Merge branch 'develop' of https://github.com/explosion/spaCy into develop

This commit is contained in:
Matthew Honnibal 2017-05-18 08:32:22 -05:00
commit 238be0f16a
8 changed files with 104 additions and 80 deletions

94
.gitignore vendored
View File

@ -1,50 +1,43 @@
# Vim # spaCy
*.swp spacy/data/
*.sw*
Profile.prof
tmp/
.dev
.denv
.pypyenv
.eggs
*.tgz
.sass-cache
.python-version
MANIFEST
corpora/ corpora/
models/ models/
keys/ keys/
spacy/syntax/*.cpp # Website
spacy/syntax/*.html website/www/
spacy/en/*.cpp website/_deploy.sh
spacy/tokens/*.cpp website/package.json
spacy/serialize/*.cpp website/announcement.jade
spacy/en/data/* website/.gitignore
spacy/*.cpp
spacy/ner/*.cpp
spacy/orthography/*.cpp
ext/murmurhash.cpp
ext/sparsehash.cpp
/spacy/data/ # Cython / C extensions
_build/
.env/
tmp/
cythonize.json cythonize.json
spacy/*.html
# Byte-compiled / optimized / DLL files *.cpp
__pycache__/
*.py[cod]
# C extensions
*.so *.so
# Distribution / packaging # Vim / VSCode / editors
*.swp
*.sw*
Profile.prof
.vscode
.sass-cache
# Python
.Python .Python
.python-version
__pycache__/
*.py[cod]
.env/
.~env/
.venv
venv/
.dev
.denv
.pypyenv
# Distribution / packaging
env/ env/
bin/ bin/
build/ build/
@ -59,6 +52,12 @@ var/
*.egg-info/ *.egg-info/
.installed.cfg .installed.cfg
*.egg *.egg
.eggs
MANIFEST
# Temporary files
*.~*
tmp/
# Installer logs # Installer logs
pip-log.txt pip-log.txt
@ -87,25 +86,16 @@ coverage.xml
*.log *.log
*.pot *.pot
# Windows local helper files # Windows
*.bat *.bat
Thumbs.db
Desktop.ini
# Mac OS X # Mac OS X
*.DS_Store *.DS_Store
# Temporary files / Dropbox hack
*.~*
# Komodo project files # Komodo project files
*.komodoproject *.komodoproject
# Website # Other
website/_deploy.sh *.tgz
website/package.json
website/announcement.jade
website/www/
website/.gitignore
# Python virtualenv
venv
venv/*

View File

@ -16,6 +16,22 @@ try:
except ImportError: except ImportError:
import copyreg as copy_reg import copyreg as copy_reg
try:
from cupy.cuda.stream import Stream as CudaStream
except ImportError:
CudaStream = None
try:
import cupy
except ImportError:
cupy = None
pickle = pickle
copy_reg = copy_reg
CudaStream = CudaStream
cupy = cupy
fix_text = ftfy.fix_text
is_python2 = six.PY2 is_python2 = six.PY2
is_python3 = six.PY3 is_python3 = six.PY3
@ -23,8 +39,6 @@ is_windows = sys.platform.startswith('win')
is_linux = sys.platform.startswith('linux') is_linux = sys.platform.startswith('linux')
is_osx = sys.platform == 'darwin' is_osx = sys.platform == 'darwin'
fix_text = ftfy.fix_text
if is_python2: if is_python2:
bytes_ = str bytes_ = str

View File

@ -3,13 +3,14 @@ from __future__ import unicode_literals
from .render import DependencyRenderer, EntityRenderer from .render import DependencyRenderer, EntityRenderer
from ..tokens import Doc from ..tokens import Doc
from ..util import prints from ..util import prints, is_in_jupyter
_html = {} _html = {}
IS_JUPYTER = is_in_jupyter()
def render(docs, style='dep', page=False, minify=False, jupyter=False, options={}): def render(docs, style='dep', page=False, minify=False, jupyter=IS_JUPYTER, options={}):
"""Render displaCy visualisation. """Render displaCy visualisation.
docs (list or Doc): Document(s) to visualise. docs (list or Doc): Document(s) to visualise.

View File

@ -11,20 +11,12 @@ import sys
import textwrap import textwrap
from .symbols import ORTH from .symbols import ORTH
from .compat import path2str, basestring_, input_, unicode_ from .compat import cupy, CudaStream, path2str, basestring_, input_, unicode_
LANGUAGES = {} LANGUAGES = {}
_data_path = Path(__file__).parent / 'data' _data_path = Path(__file__).parent / 'data'
try:
from cupy.cuda.stream import Stream as CudaStream
except ImportError:
CudaStream = None
try:
import cupy
except ImportError:
cupy = None
def get_lang_class(lang): def get_lang_class(lang):
"""Import and load a Language class. """Import and load a Language class.
@ -151,6 +143,20 @@ def parse_package_meta(package_path, require=True):
return None return None
def is_in_jupyter():
"""Check if user is in a Jupyter notebook. Mainly used for displaCy.
RETURNS (bool): True if in Jupyter, False if not.
"""
try:
cfg = get_ipython().config
if cfg['IPKernelApp']['parent_appname'] == 'ipython-notebook':
return True
except NameError:
return False
return False
def get_cuda_stream(require=False): def get_cuda_stream(require=False):
# TODO: Error and tell to install chainer if not found # TODO: Error and tell to install chainer if not found
# Requires GPU # Requires GPU

View File

@ -10,7 +10,10 @@
padding: 2rem 3rem padding: 2rem 3rem
.c-quickstart__input .c-quickstart__input
display: none @include size(0)
opacity: 0
position: absolute
left: -9999px
.c-quickstart__label .c-quickstart__label
cursor: pointer cursor: pointer
@ -25,6 +28,9 @@
&:hover &:hover
background: lighten($color-theme-light, 5) background: lighten($color-theme-light, 5)
.c-quickstart__input:focus + &
border: 1px solid $color-theme
.c-quickstart__input--radio:checked + & .c-quickstart__input--radio:checked + &
color: $color-back color: $color-back
border-color: $color-theme border-color: $color-theme
@ -36,8 +42,6 @@
display: inline-block display: inline-block
width: 20px width: 20px
height: 20px height: 20px
position: relative
bottom: 0.2rem
border: 1px solid $color-subtle border: 1px solid $color-subtle
vertical-align: middle vertical-align: middle
margin-right: 1rem margin-right: 1rem

View File

@ -4,4 +4,4 @@
* @author Ines Montani <ines@ines.io> * @author Ines Montani <ines@ines.io>
* @version 0.0.1 * @version 0.0.1
* @license MIT * @license MIT
*/'use strict';var _createClass=function(){function a(b,c){for(var e,d=0;d<c.length;d++)e=c[d],e.enumerable=e.enumerable||!1,e.configurable=!0,'value'in e&&(e.writable=!0),Object.defineProperty(b,e.key,e)}return function(b,c,d){return c&&a(b.prototype,c),d&&a(b,d),b}}();function _toConsumableArray(a){if(Array.isArray(a)){for(var b=0,c=Array(a.length);b<a.length;b++)c[b]=a[b];return c}return Array.from(a)}function _classCallCheck(a,b){if(!(a instanceof b))throw new TypeError('Cannot call a class as a function')}var Quickstart=function(){function a(){var b=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'#quickstart',d=arguments[1],c=2<arguments.length&&void 0!==arguments[2]?arguments[2]:{};_classCallCheck(this,a),this.container='string'==typeof b?this._$(b):b,this.groups=d,this.pfx=c.prefix||'qs',this.dpfx='data-'+this.pfx,this.init=this.init.bind(this),c.noInit||document.addEventListener('DOMContentLoaded',this.init)}return _createClass(a,[{key:'init',value:function init(){this.updateContainer(),this.container.style.display='block',this.container.classList.add(''+this.pfx);var b=this.groups;b instanceof Array?b.reverse().forEach(this.createGroup.bind(this)):this._$$('['+this.dpfx+'-group]').forEach(this.updateGroup.bind(this))}},{key:'initGroup',value:function initGroup(b,c){b.addEventListener('change',this.update.bind(this)),b.dispatchEvent(new CustomEvent('change',{detail:c}))}},{key:'updateGroup',value:function updateGroup(b){var c=b.getAttribute(this.dpfx+'-group'),d=this.createStyles(c);b.insertBefore(d,b.firstChild),this.initGroup(b,c)}},{key:'update',value:function update(b){var f=this,c=b.detail||b.target.name,d=this._$$('[name='+c+']').filter(function(h){return h.checked}).map(function(h){return h.value}),e=d.map(function(h){return':not(['+f.dpfx+'-'+c+'="'+h+'"])'}).join(''),g='['+this.dpfx+'-results]>['+this.dpfx+'-'+c+']'+e+' {display: none}';this._$('['+this.dpfx+'-style="'+c+'"]').textContent=g}},{key:'updateContainer',value:function updateContainer(){if(!this._$('['+this.dpfx+'-results]')){var b=this.childNodes(this.container,'pre'),c=b?b[0]:this._c('pre',this.pfx+'-code'),d=this.childNodes(c,'code')||this.childNodes(this.container,'code'),e=d?d[0]:this._c('code',this.pfx+'-results');e.setAttribute(this.dpfx+'-results','');var f=this.childNodes(e,'span')||this.childNodes(c,'span')||this.childNodes(this.container,'span');f&&f.forEach(function(g){return e.appendChild(g)}),c.appendChild(e),this.container.appendChild(c)}}},{key:'createGroup',value:function createGroup(b){var d=this,c=this._c('div',this.pfx+'-group');c.setAttribute(this.dpfx+'-group',b.id),c.innerHTML=this.createStyles(b.id).outerHTML,c.innerHTML+='<div class="'+this.pfx+'-legend">'+b.title+'</div>',c.innerHTML+='<div class="'+this.pfx+'-fields">'+b.options.map(function(e){var f=b.multiple?'checkbox':'radio';return'<input class="'+d.pfx+'-input '+d.pfx+'-input--'+f+'" type="'+f+'" name="'+b.id+'" id="'+e.id+'" value="'+e.id+'" '+(e.checked?'checked':'')+' /><label class="'+d.pfx+'-label" for="'+e.id+'">'+e.title+'</label>'}).join('')+'</div>',this.container.insertBefore(c,this.container.firstChild),this.initGroup(c,b.id)}},{key:'createStyles',value:function createStyles(b){var c=this._c('style');return c.setAttribute(this.dpfx+'-style',b),c.textContent='['+this.dpfx+'-results]>['+this.dpfx+'-'+b+'] {display: none}',c}},{key:'childNodes',value:function childNodes(b,c){var d=c.toUpperCase();if(!b.hasChildNodes)return!1;var e=[].concat(_toConsumableArray(b.childNodes)).filter(function(f){return f.nodeName===d});return!!e.length&&e}},{key:'_$',value:function _$(b){return document.querySelector(b)}},{key:'_$$',value:function _$$(b){return[].concat(_toConsumableArray(document.querySelectorAll(b)))}},{key:'_c',value:function _c(b,c){var d=document.createElement(b);return c&&(d.className=c),d}}]),a}(); */'use strict';var _createClass=function(){function a(b,c){for(var e,d=0;d<c.length;d++)e=c[d],e.enumerable=e.enumerable||!1,e.configurable=!0,'value'in e&&(e.writable=!0),Object.defineProperty(b,e.key,e)}return function(b,c,d){return c&&a(b.prototype,c),d&&a(b,d),b}}();function _toConsumableArray(a){if(Array.isArray(a)){for(var b=0,c=Array(a.length);b<a.length;b++)c[b]=a[b];return c}return Array.from(a)}function _classCallCheck(a,b){if(!(a instanceof b))throw new TypeError('Cannot call a class as a function')}var Quickstart=function(){function a(){var b=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'#quickstart',d=arguments[1],c=2<arguments.length&&void 0!==arguments[2]?arguments[2]:{};_classCallCheck(this,a),this.container='string'==typeof b?this._$(b):b,this.groups=d,this.pfx=c.prefix||'qs',this.dpfx='data-'+this.pfx,this.init=this.init.bind(this),c.noInit||document.addEventListener('DOMContentLoaded',this.init)}return _createClass(a,[{key:'init',value:function init(){this.updateContainer(),this.container.style.display='block',this.container.classList.add(''+this.pfx);var b=this.groups;b instanceof Array?b.reverse().forEach(this.createGroup.bind(this)):this._$$('['+this.dpfx+'-group]').forEach(this.updateGroup.bind(this))}},{key:'initGroup',value:function initGroup(b,c){b.addEventListener('change',this.update.bind(this)),b.dispatchEvent(new CustomEvent('change',{detail:c}))}},{key:'updateGroup',value:function updateGroup(b){var c=b.getAttribute(this.dpfx+'-group'),d=this.createStyles(c);b.insertBefore(d,b.firstChild),this.initGroup(b,c)}},{key:'update',value:function update(b){var f=this,c=b.detail||b.target.name,d=this._$$('[name='+c+']:checked').map(function(h){return h.value}),e=d.map(function(h){return':not(['+f.dpfx+'-'+c+'="'+h+'"])'}).join(''),g='['+this.dpfx+'-results]>['+this.dpfx+'-'+c+']'+e+' {display: none}';this._$('['+this.dpfx+'-style="'+c+'"]').textContent=g}},{key:'updateContainer',value:function updateContainer(){if(!this._$('['+this.dpfx+'-results]')){var b=this.childNodes(this.container,'pre'),c=b?b[0]:this._c('pre',this.pfx+'-code'),d=this.childNodes(c,'code')||this.childNodes(this.container,'code'),e=d?d[0]:this._c('code',this.pfx+'-results');e.setAttribute(this.dpfx+'-results','');var f=this.childNodes(e,'span')||this.childNodes(c,'span')||this.childNodes(this.container,'span');f&&f.forEach(function(g){return e.appendChild(g)}),c.appendChild(e),this.container.appendChild(c)}}},{key:'createGroup',value:function createGroup(b){var d=this,c=this._c('fieldset',this.pfx+'-group');c.setAttribute(this.dpfx+'-group',b.id),c.innerHTML=this.createStyles(b.id).outerHTML,c.innerHTML+='<legend class="'+this.pfx+'-legend">'+b.title+'</legend>',c.innerHTML+=b.options.map(function(e){var f=b.multiple?'checkbox':'radio';return'<input class="'+d.pfx+'-input '+d.pfx+'-input--'+f+'" type="'+f+'" name="'+b.id+'" id="'+e.id+'" value="'+e.id+'" '+(e.checked?'checked':'')+' /><label class="'+d.pfx+'-label" for="'+e.id+'">'+e.title+'</label>'}).join(''),this.container.insertBefore(c,this.container.firstChild),this.initGroup(c,b.id)}},{key:'createStyles',value:function createStyles(b){var c=this._c('style');return c.setAttribute(this.dpfx+'-style',b),c.textContent='['+this.dpfx+'-results]>['+this.dpfx+'-'+b+'] {display: none}',c}},{key:'childNodes',value:function childNodes(b,c){var d=c.toUpperCase();if(!b.hasChildNodes)return!1;var e=[].concat(_toConsumableArray(b.childNodes)).filter(function(f){return f.nodeName===d});return!!e.length&&e}},{key:'_$',value:function _$(b){return document.querySelector(b)}},{key:'_$$',value:function _$$(b){return[].concat(_toConsumableArray(document.querySelectorAll(b)))}},{key:'_c',value:function _c(b,c){var d=document.createElement(b);return c&&(d.className=c),d}}]),a}();

View File

@ -101,9 +101,9 @@ p Render a dependency parse tree or named entity visualization.
+cell #[code jupyter] +cell #[code jupyter]
+cell bool +cell bool
+cell +cell
| Returns markup using #[+a("http://jupyter.org/") Jupyter]'s | Explicitly enable "#[+a("http://jupyter.org/") Jupyter] mode" to
| internal methods, ready to be rendered in a notebook. | return markup ready to be rendered in a notebook.
+cell #[code False] +cell detected automatically
+row +row
+cell #[code options] +cell #[code options]

View File

@ -11,8 +11,12 @@ p
| process. Instead of printing a list of dependency labels or entity spans, | process. Instead of printing a list of dependency labels or entity spans,
| you can simply pass your #[code Doc] objects to #[code displacy] and view | you can simply pass your #[code Doc] objects to #[code displacy] and view
| the visualizations in your browser, or export them as HTML files or | the visualizations in your browser, or export them as HTML files or
| vector graphics. displaCy also comes with a #[+a("#jupyter") Jupyter hook] | vector graphics.
| that returns the markup in a format ready to be rendered in a notebook.
p
| If you're running a #[+a("https://jupyter.org") Jupyter] notebook,
| displaCy will detect this and return the markup in a format
| #[+a("#jupyter") ready to be rendered and exported].
+aside("What about the old visualizers?") +aside("What about the old visualizers?")
| Our JavaScript-based visualizers #[+src(gh("displacy")) displacy.js] and | Our JavaScript-based visualizers #[+src(gh("displacy")) displacy.js] and
@ -219,10 +223,10 @@ p
+h(2, "jupyter") Using displaCy in Jupyter notebooks +h(2, "jupyter") Using displaCy in Jupyter notebooks
p p
| If you're working with a #[+a("https://jupyter.org") Jupyter] notebook, | displaCy is able to detect whether you're within a
| you can use displaCy's "Jupyter mode" to return markup that can be | #[+a("https://jupyter.org") Jupyter] notebook, and will return markup
| rendered in a cell straight away. When you export your notebook, the | that can be rendered in a cell straight away. When you export your
| visualizations will be included as HTML. | notebook, the visualizations will be included as HTML.
+code("Jupyter Example"). +code("Jupyter Example").
# don't forget to install a model, e.g.: python -m spacy download en # don't forget to install a model, e.g.: python -m spacy download en
@ -230,10 +234,15 @@ p
from spacy import displacy from spacy import displacy
doc = nlp(u'Rats are various medium-sized, long-tailed rodents.') doc = nlp(u'Rats are various medium-sized, long-tailed rodents.')
displacy.render(doc, style='dep', jupyter=True) displacy.render(doc, style='dep')
doc2 = nlp(LONG_NEWS_ARTICLE) doc2 = nlp(LONG_NEWS_ARTICLE)
displacy.render(doc2, style='ent', jupyter=True) displacy.render(doc2, style='ent')
+aside("Enabling or disabling Jupyter mode")
| To explicitly enable or disable "Jupyter mode", you can use the
| #[code jupyter] keyword argument e.g. to return raw HTML in a notebook,
| or to force Jupyter rendering if auto-detection fails.
+image("/assets/img/docs/displacy_jupyter.jpg", 700, false, "Example of using the displaCy dependency and named entity visualizer in a Jupyter notebook") +image("/assets/img/docs/displacy_jupyter.jpg", 700, false, "Example of using the displaCy dependency and named entity visualizer in a Jupyter notebook")