Merge branch 'develop' of https://github.com/explosion/spaCy into develop

This commit is contained in:
Matthew Honnibal 2017-05-18 08:32:22 -05:00
commit 238be0f16a
8 changed files with 104 additions and 80 deletions

94
.gitignore vendored
View File

@ -1,50 +1,43 @@
# Vim
*.swp
*.sw*
Profile.prof
tmp/
.dev
.denv
.pypyenv
.eggs
*.tgz
.sass-cache
.python-version
MANIFEST
# spaCy
spacy/data/
corpora/
models/
keys/
spacy/syntax/*.cpp
spacy/syntax/*.html
spacy/en/*.cpp
spacy/tokens/*.cpp
spacy/serialize/*.cpp
spacy/en/data/*
spacy/*.cpp
spacy/ner/*.cpp
spacy/orthography/*.cpp
ext/murmurhash.cpp
ext/sparsehash.cpp
# Website
website/www/
website/_deploy.sh
website/package.json
website/announcement.jade
website/.gitignore
/spacy/data/
_build/
.env/
tmp/
# Cython / C extensions
cythonize.json
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
spacy/*.html
*.cpp
*.so
# Distribution / packaging
# Vim / VSCode / editors
*.swp
*.sw*
Profile.prof
.vscode
.sass-cache
# Python
.Python
.python-version
__pycache__/
*.py[cod]
.env/
.~env/
.venv
venv/
.dev
.denv
.pypyenv
# Distribution / packaging
env/
bin/
build/
@ -59,6 +52,12 @@ var/
*.egg-info/
.installed.cfg
*.egg
.eggs
MANIFEST
# Temporary files
*.~*
tmp/
# Installer logs
pip-log.txt
@ -87,25 +86,16 @@ coverage.xml
*.log
*.pot
# Windows local helper files
# Windows
*.bat
Thumbs.db
Desktop.ini
# Mac OS X
*.DS_Store
# Temporary files / Dropbox hack
*.~*
# Komodo project files
*.komodoproject
# Website
website/_deploy.sh
website/package.json
website/announcement.jade
website/www/
website/.gitignore
# Python virtualenv
venv
venv/*
# Other
*.tgz

View File

@ -16,6 +16,22 @@ try:
except ImportError:
import copyreg as copy_reg
try:
from cupy.cuda.stream import Stream as CudaStream
except ImportError:
CudaStream = None
try:
import cupy
except ImportError:
cupy = None
pickle = pickle
copy_reg = copy_reg
CudaStream = CudaStream
cupy = cupy
fix_text = ftfy.fix_text
is_python2 = six.PY2
is_python3 = six.PY3
@ -23,8 +39,6 @@ is_windows = sys.platform.startswith('win')
is_linux = sys.platform.startswith('linux')
is_osx = sys.platform == 'darwin'
fix_text = ftfy.fix_text
if is_python2:
bytes_ = str

View File

@ -3,13 +3,14 @@ from __future__ import unicode_literals
from .render import DependencyRenderer, EntityRenderer
from ..tokens import Doc
from ..util import prints
from ..util import prints, is_in_jupyter
_html = {}
IS_JUPYTER = is_in_jupyter()
def render(docs, style='dep', page=False, minify=False, jupyter=False, options={}):
def render(docs, style='dep', page=False, minify=False, jupyter=IS_JUPYTER, options={}):
"""Render displaCy visualisation.
docs (list or Doc): Document(s) to visualise.

View File

@ -11,20 +11,12 @@ import sys
import textwrap
from .symbols import ORTH
from .compat import path2str, basestring_, input_, unicode_
from .compat import cupy, CudaStream, path2str, basestring_, input_, unicode_
LANGUAGES = {}
_data_path = Path(__file__).parent / 'data'
try:
from cupy.cuda.stream import Stream as CudaStream
except ImportError:
CudaStream = None
try:
import cupy
except ImportError:
cupy = None
def get_lang_class(lang):
"""Import and load a Language class.
@ -151,6 +143,20 @@ def parse_package_meta(package_path, require=True):
return None
def is_in_jupyter():
"""Check if user is in a Jupyter notebook. Mainly used for displaCy.
RETURNS (bool): True if in Jupyter, False if not.
"""
try:
cfg = get_ipython().config
if cfg['IPKernelApp']['parent_appname'] == 'ipython-notebook':
return True
except NameError:
return False
return False
def get_cuda_stream(require=False):
# TODO: Error and tell to install chainer if not found
# Requires GPU

View File

@ -10,7 +10,10 @@
padding: 2rem 3rem
.c-quickstart__input
display: none
@include size(0)
opacity: 0
position: absolute
left: -9999px
.c-quickstart__label
cursor: pointer
@ -25,6 +28,9 @@
&:hover
background: lighten($color-theme-light, 5)
.c-quickstart__input:focus + &
border: 1px solid $color-theme
.c-quickstart__input--radio:checked + &
color: $color-back
border-color: $color-theme
@ -36,8 +42,6 @@
display: inline-block
width: 20px
height: 20px
position: relative
bottom: 0.2rem
border: 1px solid $color-subtle
vertical-align: middle
margin-right: 1rem

View File

@ -4,4 +4,4 @@
* @author Ines Montani <ines@ines.io>
* @version 0.0.1
* @license MIT
*/'use strict';var _createClass=function(){function a(b,c){for(var e,d=0;d<c.length;d++)e=c[d],e.enumerable=e.enumerable||!1,e.configurable=!0,'value'in e&&(e.writable=!0),Object.defineProperty(b,e.key,e)}return function(b,c,d){return c&&a(b.prototype,c),d&&a(b,d),b}}();function _toConsumableArray(a){if(Array.isArray(a)){for(var b=0,c=Array(a.length);b<a.length;b++)c[b]=a[b];return c}return Array.from(a)}function _classCallCheck(a,b){if(!(a instanceof b))throw new TypeError('Cannot call a class as a function')}var Quickstart=function(){function a(){var b=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'#quickstart',d=arguments[1],c=2<arguments.length&&void 0!==arguments[2]?arguments[2]:{};_classCallCheck(this,a),this.container='string'==typeof b?this._$(b):b,this.groups=d,this.pfx=c.prefix||'qs',this.dpfx='data-'+this.pfx,this.init=this.init.bind(this),c.noInit||document.addEventListener('DOMContentLoaded',this.init)}return _createClass(a,[{key:'init',value:function init(){this.updateContainer(),this.container.style.display='block',this.container.classList.add(''+this.pfx);var b=this.groups;b instanceof Array?b.reverse().forEach(this.createGroup.bind(this)):this._$$('['+this.dpfx+'-group]').forEach(this.updateGroup.bind(this))}},{key:'initGroup',value:function initGroup(b,c){b.addEventListener('change',this.update.bind(this)),b.dispatchEvent(new CustomEvent('change',{detail:c}))}},{key:'updateGroup',value:function updateGroup(b){var c=b.getAttribute(this.dpfx+'-group'),d=this.createStyles(c);b.insertBefore(d,b.firstChild),this.initGroup(b,c)}},{key:'update',value:function update(b){var f=this,c=b.detail||b.target.name,d=this._$$('[name='+c+']').filter(function(h){return h.checked}).map(function(h){return h.value}),e=d.map(function(h){return':not(['+f.dpfx+'-'+c+'="'+h+'"])'}).join(''),g='['+this.dpfx+'-results]>['+this.dpfx+'-'+c+']'+e+' {display: none}';this._$('['+this.dpfx+'-style="'+c+'"]').textContent=g}},{key:'updateContainer',value:function updateContainer(){if(!this._$('['+this.dpfx+'-results]')){var b=this.childNodes(this.container,'pre'),c=b?b[0]:this._c('pre',this.pfx+'-code'),d=this.childNodes(c,'code')||this.childNodes(this.container,'code'),e=d?d[0]:this._c('code',this.pfx+'-results');e.setAttribute(this.dpfx+'-results','');var f=this.childNodes(e,'span')||this.childNodes(c,'span')||this.childNodes(this.container,'span');f&&f.forEach(function(g){return e.appendChild(g)}),c.appendChild(e),this.container.appendChild(c)}}},{key:'createGroup',value:function createGroup(b){var d=this,c=this._c('div',this.pfx+'-group');c.setAttribute(this.dpfx+'-group',b.id),c.innerHTML=this.createStyles(b.id).outerHTML,c.innerHTML+='<div class="'+this.pfx+'-legend">'+b.title+'</div>',c.innerHTML+='<div class="'+this.pfx+'-fields">'+b.options.map(function(e){var f=b.multiple?'checkbox':'radio';return'<input class="'+d.pfx+'-input '+d.pfx+'-input--'+f+'" type="'+f+'" name="'+b.id+'" id="'+e.id+'" value="'+e.id+'" '+(e.checked?'checked':'')+' /><label class="'+d.pfx+'-label" for="'+e.id+'">'+e.title+'</label>'}).join('')+'</div>',this.container.insertBefore(c,this.container.firstChild),this.initGroup(c,b.id)}},{key:'createStyles',value:function createStyles(b){var c=this._c('style');return c.setAttribute(this.dpfx+'-style',b),c.textContent='['+this.dpfx+'-results]>['+this.dpfx+'-'+b+'] {display: none}',c}},{key:'childNodes',value:function childNodes(b,c){var d=c.toUpperCase();if(!b.hasChildNodes)return!1;var e=[].concat(_toConsumableArray(b.childNodes)).filter(function(f){return f.nodeName===d});return!!e.length&&e}},{key:'_$',value:function _$(b){return document.querySelector(b)}},{key:'_$$',value:function _$$(b){return[].concat(_toConsumableArray(document.querySelectorAll(b)))}},{key:'_c',value:function _c(b,c){var d=document.createElement(b);return c&&(d.className=c),d}}]),a}();
*/'use strict';var _createClass=function(){function a(b,c){for(var e,d=0;d<c.length;d++)e=c[d],e.enumerable=e.enumerable||!1,e.configurable=!0,'value'in e&&(e.writable=!0),Object.defineProperty(b,e.key,e)}return function(b,c,d){return c&&a(b.prototype,c),d&&a(b,d),b}}();function _toConsumableArray(a){if(Array.isArray(a)){for(var b=0,c=Array(a.length);b<a.length;b++)c[b]=a[b];return c}return Array.from(a)}function _classCallCheck(a,b){if(!(a instanceof b))throw new TypeError('Cannot call a class as a function')}var Quickstart=function(){function a(){var b=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'#quickstart',d=arguments[1],c=2<arguments.length&&void 0!==arguments[2]?arguments[2]:{};_classCallCheck(this,a),this.container='string'==typeof b?this._$(b):b,this.groups=d,this.pfx=c.prefix||'qs',this.dpfx='data-'+this.pfx,this.init=this.init.bind(this),c.noInit||document.addEventListener('DOMContentLoaded',this.init)}return _createClass(a,[{key:'init',value:function init(){this.updateContainer(),this.container.style.display='block',this.container.classList.add(''+this.pfx);var b=this.groups;b instanceof Array?b.reverse().forEach(this.createGroup.bind(this)):this._$$('['+this.dpfx+'-group]').forEach(this.updateGroup.bind(this))}},{key:'initGroup',value:function initGroup(b,c){b.addEventListener('change',this.update.bind(this)),b.dispatchEvent(new CustomEvent('change',{detail:c}))}},{key:'updateGroup',value:function updateGroup(b){var c=b.getAttribute(this.dpfx+'-group'),d=this.createStyles(c);b.insertBefore(d,b.firstChild),this.initGroup(b,c)}},{key:'update',value:function update(b){var f=this,c=b.detail||b.target.name,d=this._$$('[name='+c+']:checked').map(function(h){return h.value}),e=d.map(function(h){return':not(['+f.dpfx+'-'+c+'="'+h+'"])'}).join(''),g='['+this.dpfx+'-results]>['+this.dpfx+'-'+c+']'+e+' {display: none}';this._$('['+this.dpfx+'-style="'+c+'"]').textContent=g}},{key:'updateContainer',value:function updateContainer(){if(!this._$('['+this.dpfx+'-results]')){var b=this.childNodes(this.container,'pre'),c=b?b[0]:this._c('pre',this.pfx+'-code'),d=this.childNodes(c,'code')||this.childNodes(this.container,'code'),e=d?d[0]:this._c('code',this.pfx+'-results');e.setAttribute(this.dpfx+'-results','');var f=this.childNodes(e,'span')||this.childNodes(c,'span')||this.childNodes(this.container,'span');f&&f.forEach(function(g){return e.appendChild(g)}),c.appendChild(e),this.container.appendChild(c)}}},{key:'createGroup',value:function createGroup(b){var d=this,c=this._c('fieldset',this.pfx+'-group');c.setAttribute(this.dpfx+'-group',b.id),c.innerHTML=this.createStyles(b.id).outerHTML,c.innerHTML+='<legend class="'+this.pfx+'-legend">'+b.title+'</legend>',c.innerHTML+=b.options.map(function(e){var f=b.multiple?'checkbox':'radio';return'<input class="'+d.pfx+'-input '+d.pfx+'-input--'+f+'" type="'+f+'" name="'+b.id+'" id="'+e.id+'" value="'+e.id+'" '+(e.checked?'checked':'')+' /><label class="'+d.pfx+'-label" for="'+e.id+'">'+e.title+'</label>'}).join(''),this.container.insertBefore(c,this.container.firstChild),this.initGroup(c,b.id)}},{key:'createStyles',value:function createStyles(b){var c=this._c('style');return c.setAttribute(this.dpfx+'-style',b),c.textContent='['+this.dpfx+'-results]>['+this.dpfx+'-'+b+'] {display: none}',c}},{key:'childNodes',value:function childNodes(b,c){var d=c.toUpperCase();if(!b.hasChildNodes)return!1;var e=[].concat(_toConsumableArray(b.childNodes)).filter(function(f){return f.nodeName===d});return!!e.length&&e}},{key:'_$',value:function _$(b){return document.querySelector(b)}},{key:'_$$',value:function _$$(b){return[].concat(_toConsumableArray(document.querySelectorAll(b)))}},{key:'_c',value:function _c(b,c){var d=document.createElement(b);return c&&(d.className=c),d}}]),a}();

View File

@ -101,9 +101,9 @@ p Render a dependency parse tree or named entity visualization.
+cell #[code jupyter]
+cell bool
+cell
| Returns markup using #[+a("http://jupyter.org/") Jupyter]'s
| internal methods, ready to be rendered in a notebook.
+cell #[code False]
| Explicitly enable "#[+a("http://jupyter.org/") Jupyter] mode" to
| return markup ready to be rendered in a notebook.
+cell detected automatically
+row
+cell #[code options]

View File

@ -11,8 +11,12 @@ p
| process. Instead of printing a list of dependency labels or entity spans,
| you can simply pass your #[code Doc] objects to #[code displacy] and view
| the visualizations in your browser, or export them as HTML files or
| vector graphics. displaCy also comes with a #[+a("#jupyter") Jupyter hook]
| that returns the markup in a format ready to be rendered in a notebook.
| vector graphics.
p
| If you're running a #[+a("https://jupyter.org") Jupyter] notebook,
| displaCy will detect this and return the markup in a format
| #[+a("#jupyter") ready to be rendered and exported].
+aside("What about the old visualizers?")
| Our JavaScript-based visualizers #[+src(gh("displacy")) displacy.js] and
@ -219,10 +223,10 @@ p
+h(2, "jupyter") Using displaCy in Jupyter notebooks
p
| If you're working with a #[+a("https://jupyter.org") Jupyter] notebook,
| you can use displaCy's "Jupyter mode" to return markup that can be
| rendered in a cell straight away. When you export your notebook, the
| visualizations will be included as HTML.
| displaCy is able to detect whether you're within a
| #[+a("https://jupyter.org") Jupyter] notebook, and will return markup
| that can be rendered in a cell straight away. When you export your
| notebook, the visualizations will be included as HTML.
+code("Jupyter Example").
# don't forget to install a model, e.g.: python -m spacy download en
@ -230,10 +234,15 @@ p
from spacy import displacy
doc = nlp(u'Rats are various medium-sized, long-tailed rodents.')
displacy.render(doc, style='dep', jupyter=True)
displacy.render(doc, style='dep')
doc2 = nlp(LONG_NEWS_ARTICLE)
displacy.render(doc2, style='ent', jupyter=True)
displacy.render(doc2, style='ent')
+aside("Enabling or disabling Jupyter mode")
| To explicitly enable or disable "Jupyter mode", you can use the
| #[code jupyter] keyword argument e.g. to return raw HTML in a notebook,
| or to force Jupyter rendering if auto-detection fails.
+image("/assets/img/docs/displacy_jupyter.jpg", 700, false, "Example of using the displaCy dependency and named entity visualizer in a Jupyter notebook")