mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
238be0f16a
94
.gitignore
vendored
94
.gitignore
vendored
|
@ -1,50 +1,43 @@
|
||||||
# Vim
|
# spaCy
|
||||||
*.swp
|
spacy/data/
|
||||||
*.sw*
|
|
||||||
Profile.prof
|
|
||||||
tmp/
|
|
||||||
.dev
|
|
||||||
.denv
|
|
||||||
.pypyenv
|
|
||||||
.eggs
|
|
||||||
*.tgz
|
|
||||||
.sass-cache
|
|
||||||
.python-version
|
|
||||||
|
|
||||||
MANIFEST
|
|
||||||
|
|
||||||
corpora/
|
corpora/
|
||||||
models/
|
models/
|
||||||
keys/
|
keys/
|
||||||
|
|
||||||
spacy/syntax/*.cpp
|
# Website
|
||||||
spacy/syntax/*.html
|
website/www/
|
||||||
spacy/en/*.cpp
|
website/_deploy.sh
|
||||||
spacy/tokens/*.cpp
|
website/package.json
|
||||||
spacy/serialize/*.cpp
|
website/announcement.jade
|
||||||
spacy/en/data/*
|
website/.gitignore
|
||||||
spacy/*.cpp
|
|
||||||
spacy/ner/*.cpp
|
|
||||||
spacy/orthography/*.cpp
|
|
||||||
ext/murmurhash.cpp
|
|
||||||
ext/sparsehash.cpp
|
|
||||||
|
|
||||||
/spacy/data/
|
# Cython / C extensions
|
||||||
|
|
||||||
_build/
|
|
||||||
.env/
|
|
||||||
tmp/
|
|
||||||
cythonize.json
|
cythonize.json
|
||||||
|
spacy/*.html
|
||||||
# Byte-compiled / optimized / DLL files
|
*.cpp
|
||||||
__pycache__/
|
|
||||||
*.py[cod]
|
|
||||||
|
|
||||||
# C extensions
|
|
||||||
*.so
|
*.so
|
||||||
|
|
||||||
# Distribution / packaging
|
# Vim / VSCode / editors
|
||||||
|
*.swp
|
||||||
|
*.sw*
|
||||||
|
Profile.prof
|
||||||
|
.vscode
|
||||||
|
.sass-cache
|
||||||
|
|
||||||
|
# Python
|
||||||
.Python
|
.Python
|
||||||
|
.python-version
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
.env/
|
||||||
|
.~env/
|
||||||
|
.venv
|
||||||
|
venv/
|
||||||
|
.dev
|
||||||
|
.denv
|
||||||
|
.pypyenv
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
env/
|
env/
|
||||||
bin/
|
bin/
|
||||||
build/
|
build/
|
||||||
|
@ -59,6 +52,12 @@ var/
|
||||||
*.egg-info/
|
*.egg-info/
|
||||||
.installed.cfg
|
.installed.cfg
|
||||||
*.egg
|
*.egg
|
||||||
|
.eggs
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# Temporary files
|
||||||
|
*.~*
|
||||||
|
tmp/
|
||||||
|
|
||||||
# Installer logs
|
# Installer logs
|
||||||
pip-log.txt
|
pip-log.txt
|
||||||
|
@ -87,25 +86,16 @@ coverage.xml
|
||||||
*.log
|
*.log
|
||||||
*.pot
|
*.pot
|
||||||
|
|
||||||
# Windows local helper files
|
# Windows
|
||||||
*.bat
|
*.bat
|
||||||
|
Thumbs.db
|
||||||
|
Desktop.ini
|
||||||
|
|
||||||
# Mac OS X
|
# Mac OS X
|
||||||
*.DS_Store
|
*.DS_Store
|
||||||
|
|
||||||
# Temporary files / Dropbox hack
|
|
||||||
*.~*
|
|
||||||
|
|
||||||
# Komodo project files
|
# Komodo project files
|
||||||
*.komodoproject
|
*.komodoproject
|
||||||
|
|
||||||
# Website
|
# Other
|
||||||
website/_deploy.sh
|
*.tgz
|
||||||
website/package.json
|
|
||||||
website/announcement.jade
|
|
||||||
website/www/
|
|
||||||
website/.gitignore
|
|
||||||
|
|
||||||
# Python virtualenv
|
|
||||||
venv
|
|
||||||
venv/*
|
|
||||||
|
|
|
@ -16,6 +16,22 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import copyreg as copy_reg
|
import copyreg as copy_reg
|
||||||
|
|
||||||
|
try:
|
||||||
|
from cupy.cuda.stream import Stream as CudaStream
|
||||||
|
except ImportError:
|
||||||
|
CudaStream = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
import cupy
|
||||||
|
except ImportError:
|
||||||
|
cupy = None
|
||||||
|
|
||||||
|
|
||||||
|
pickle = pickle
|
||||||
|
copy_reg = copy_reg
|
||||||
|
CudaStream = CudaStream
|
||||||
|
cupy = cupy
|
||||||
|
fix_text = ftfy.fix_text
|
||||||
|
|
||||||
is_python2 = six.PY2
|
is_python2 = six.PY2
|
||||||
is_python3 = six.PY3
|
is_python3 = six.PY3
|
||||||
|
@ -23,8 +39,6 @@ is_windows = sys.platform.startswith('win')
|
||||||
is_linux = sys.platform.startswith('linux')
|
is_linux = sys.platform.startswith('linux')
|
||||||
is_osx = sys.platform == 'darwin'
|
is_osx = sys.platform == 'darwin'
|
||||||
|
|
||||||
fix_text = ftfy.fix_text
|
|
||||||
|
|
||||||
|
|
||||||
if is_python2:
|
if is_python2:
|
||||||
bytes_ = str
|
bytes_ = str
|
||||||
|
|
|
@ -3,13 +3,14 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .render import DependencyRenderer, EntityRenderer
|
from .render import DependencyRenderer, EntityRenderer
|
||||||
from ..tokens import Doc
|
from ..tokens import Doc
|
||||||
from ..util import prints
|
from ..util import prints, is_in_jupyter
|
||||||
|
|
||||||
|
|
||||||
_html = {}
|
_html = {}
|
||||||
|
IS_JUPYTER = is_in_jupyter()
|
||||||
|
|
||||||
|
|
||||||
def render(docs, style='dep', page=False, minify=False, jupyter=False, options={}):
|
def render(docs, style='dep', page=False, minify=False, jupyter=IS_JUPYTER, options={}):
|
||||||
"""Render displaCy visualisation.
|
"""Render displaCy visualisation.
|
||||||
|
|
||||||
docs (list or Doc): Document(s) to visualise.
|
docs (list or Doc): Document(s) to visualise.
|
||||||
|
|
|
@ -11,20 +11,12 @@ import sys
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
from .symbols import ORTH
|
from .symbols import ORTH
|
||||||
from .compat import path2str, basestring_, input_, unicode_
|
from .compat import cupy, CudaStream, path2str, basestring_, input_, unicode_
|
||||||
|
|
||||||
|
|
||||||
LANGUAGES = {}
|
LANGUAGES = {}
|
||||||
_data_path = Path(__file__).parent / 'data'
|
_data_path = Path(__file__).parent / 'data'
|
||||||
try:
|
|
||||||
from cupy.cuda.stream import Stream as CudaStream
|
|
||||||
except ImportError:
|
|
||||||
CudaStream = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
import cupy
|
|
||||||
except ImportError:
|
|
||||||
cupy = None
|
|
||||||
|
|
||||||
def get_lang_class(lang):
|
def get_lang_class(lang):
|
||||||
"""Import and load a Language class.
|
"""Import and load a Language class.
|
||||||
|
@ -151,6 +143,20 @@ def parse_package_meta(package_path, require=True):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def is_in_jupyter():
|
||||||
|
"""Check if user is in a Jupyter notebook. Mainly used for displaCy.
|
||||||
|
|
||||||
|
RETURNS (bool): True if in Jupyter, False if not.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
cfg = get_ipython().config
|
||||||
|
if cfg['IPKernelApp']['parent_appname'] == 'ipython-notebook':
|
||||||
|
return True
|
||||||
|
except NameError:
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def get_cuda_stream(require=False):
|
def get_cuda_stream(require=False):
|
||||||
# TODO: Error and tell to install chainer if not found
|
# TODO: Error and tell to install chainer if not found
|
||||||
# Requires GPU
|
# Requires GPU
|
||||||
|
|
|
@ -10,7 +10,10 @@
|
||||||
padding: 2rem 3rem
|
padding: 2rem 3rem
|
||||||
|
|
||||||
.c-quickstart__input
|
.c-quickstart__input
|
||||||
display: none
|
@include size(0)
|
||||||
|
opacity: 0
|
||||||
|
position: absolute
|
||||||
|
left: -9999px
|
||||||
|
|
||||||
.c-quickstart__label
|
.c-quickstart__label
|
||||||
cursor: pointer
|
cursor: pointer
|
||||||
|
@ -25,6 +28,9 @@
|
||||||
&:hover
|
&:hover
|
||||||
background: lighten($color-theme-light, 5)
|
background: lighten($color-theme-light, 5)
|
||||||
|
|
||||||
|
.c-quickstart__input:focus + &
|
||||||
|
border: 1px solid $color-theme
|
||||||
|
|
||||||
.c-quickstart__input--radio:checked + &
|
.c-quickstart__input--radio:checked + &
|
||||||
color: $color-back
|
color: $color-back
|
||||||
border-color: $color-theme
|
border-color: $color-theme
|
||||||
|
@ -36,8 +42,6 @@
|
||||||
display: inline-block
|
display: inline-block
|
||||||
width: 20px
|
width: 20px
|
||||||
height: 20px
|
height: 20px
|
||||||
position: relative
|
|
||||||
bottom: 0.2rem
|
|
||||||
border: 1px solid $color-subtle
|
border: 1px solid $color-subtle
|
||||||
vertical-align: middle
|
vertical-align: middle
|
||||||
margin-right: 1rem
|
margin-right: 1rem
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
* @author Ines Montani <ines@ines.io>
|
* @author Ines Montani <ines@ines.io>
|
||||||
* @version 0.0.1
|
* @version 0.0.1
|
||||||
* @license MIT
|
* @license MIT
|
||||||
*/'use strict';var _createClass=function(){function a(b,c){for(var e,d=0;d<c.length;d++)e=c[d],e.enumerable=e.enumerable||!1,e.configurable=!0,'value'in e&&(e.writable=!0),Object.defineProperty(b,e.key,e)}return function(b,c,d){return c&&a(b.prototype,c),d&&a(b,d),b}}();function _toConsumableArray(a){if(Array.isArray(a)){for(var b=0,c=Array(a.length);b<a.length;b++)c[b]=a[b];return c}return Array.from(a)}function _classCallCheck(a,b){if(!(a instanceof b))throw new TypeError('Cannot call a class as a function')}var Quickstart=function(){function a(){var b=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'#quickstart',d=arguments[1],c=2<arguments.length&&void 0!==arguments[2]?arguments[2]:{};_classCallCheck(this,a),this.container='string'==typeof b?this._$(b):b,this.groups=d,this.pfx=c.prefix||'qs',this.dpfx='data-'+this.pfx,this.init=this.init.bind(this),c.noInit||document.addEventListener('DOMContentLoaded',this.init)}return _createClass(a,[{key:'init',value:function init(){this.updateContainer(),this.container.style.display='block',this.container.classList.add(''+this.pfx);var b=this.groups;b instanceof Array?b.reverse().forEach(this.createGroup.bind(this)):this._$$('['+this.dpfx+'-group]').forEach(this.updateGroup.bind(this))}},{key:'initGroup',value:function initGroup(b,c){b.addEventListener('change',this.update.bind(this)),b.dispatchEvent(new CustomEvent('change',{detail:c}))}},{key:'updateGroup',value:function updateGroup(b){var c=b.getAttribute(this.dpfx+'-group'),d=this.createStyles(c);b.insertBefore(d,b.firstChild),this.initGroup(b,c)}},{key:'update',value:function update(b){var f=this,c=b.detail||b.target.name,d=this._$$('[name='+c+']').filter(function(h){return h.checked}).map(function(h){return h.value}),e=d.map(function(h){return':not(['+f.dpfx+'-'+c+'="'+h+'"])'}).join(''),g='['+this.dpfx+'-results]>['+this.dpfx+'-'+c+']'+e+' {display: none}';this._$('['+this.dpfx+'-style="'+c+'"]').textContent=g}},{key:'updateContainer',value:function updateContainer(){if(!this._$('['+this.dpfx+'-results]')){var b=this.childNodes(this.container,'pre'),c=b?b[0]:this._c('pre',this.pfx+'-code'),d=this.childNodes(c,'code')||this.childNodes(this.container,'code'),e=d?d[0]:this._c('code',this.pfx+'-results');e.setAttribute(this.dpfx+'-results','');var f=this.childNodes(e,'span')||this.childNodes(c,'span')||this.childNodes(this.container,'span');f&&f.forEach(function(g){return e.appendChild(g)}),c.appendChild(e),this.container.appendChild(c)}}},{key:'createGroup',value:function createGroup(b){var d=this,c=this._c('div',this.pfx+'-group');c.setAttribute(this.dpfx+'-group',b.id),c.innerHTML=this.createStyles(b.id).outerHTML,c.innerHTML+='<div class="'+this.pfx+'-legend">'+b.title+'</div>',c.innerHTML+='<div class="'+this.pfx+'-fields">'+b.options.map(function(e){var f=b.multiple?'checkbox':'radio';return'<input class="'+d.pfx+'-input '+d.pfx+'-input--'+f+'" type="'+f+'" name="'+b.id+'" id="'+e.id+'" value="'+e.id+'" '+(e.checked?'checked':'')+' /><label class="'+d.pfx+'-label" for="'+e.id+'">'+e.title+'</label>'}).join('')+'</div>',this.container.insertBefore(c,this.container.firstChild),this.initGroup(c,b.id)}},{key:'createStyles',value:function createStyles(b){var c=this._c('style');return c.setAttribute(this.dpfx+'-style',b),c.textContent='['+this.dpfx+'-results]>['+this.dpfx+'-'+b+'] {display: none}',c}},{key:'childNodes',value:function childNodes(b,c){var d=c.toUpperCase();if(!b.hasChildNodes)return!1;var e=[].concat(_toConsumableArray(b.childNodes)).filter(function(f){return f.nodeName===d});return!!e.length&&e}},{key:'_$',value:function _$(b){return document.querySelector(b)}},{key:'_$$',value:function _$$(b){return[].concat(_toConsumableArray(document.querySelectorAll(b)))}},{key:'_c',value:function _c(b,c){var d=document.createElement(b);return c&&(d.className=c),d}}]),a}();
|
*/'use strict';var _createClass=function(){function a(b,c){for(var e,d=0;d<c.length;d++)e=c[d],e.enumerable=e.enumerable||!1,e.configurable=!0,'value'in e&&(e.writable=!0),Object.defineProperty(b,e.key,e)}return function(b,c,d){return c&&a(b.prototype,c),d&&a(b,d),b}}();function _toConsumableArray(a){if(Array.isArray(a)){for(var b=0,c=Array(a.length);b<a.length;b++)c[b]=a[b];return c}return Array.from(a)}function _classCallCheck(a,b){if(!(a instanceof b))throw new TypeError('Cannot call a class as a function')}var Quickstart=function(){function a(){var b=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'#quickstart',d=arguments[1],c=2<arguments.length&&void 0!==arguments[2]?arguments[2]:{};_classCallCheck(this,a),this.container='string'==typeof b?this._$(b):b,this.groups=d,this.pfx=c.prefix||'qs',this.dpfx='data-'+this.pfx,this.init=this.init.bind(this),c.noInit||document.addEventListener('DOMContentLoaded',this.init)}return _createClass(a,[{key:'init',value:function init(){this.updateContainer(),this.container.style.display='block',this.container.classList.add(''+this.pfx);var b=this.groups;b instanceof Array?b.reverse().forEach(this.createGroup.bind(this)):this._$$('['+this.dpfx+'-group]').forEach(this.updateGroup.bind(this))}},{key:'initGroup',value:function initGroup(b,c){b.addEventListener('change',this.update.bind(this)),b.dispatchEvent(new CustomEvent('change',{detail:c}))}},{key:'updateGroup',value:function updateGroup(b){var c=b.getAttribute(this.dpfx+'-group'),d=this.createStyles(c);b.insertBefore(d,b.firstChild),this.initGroup(b,c)}},{key:'update',value:function update(b){var f=this,c=b.detail||b.target.name,d=this._$$('[name='+c+']:checked').map(function(h){return h.value}),e=d.map(function(h){return':not(['+f.dpfx+'-'+c+'="'+h+'"])'}).join(''),g='['+this.dpfx+'-results]>['+this.dpfx+'-'+c+']'+e+' {display: none}';this._$('['+this.dpfx+'-style="'+c+'"]').textContent=g}},{key:'updateContainer',value:function updateContainer(){if(!this._$('['+this.dpfx+'-results]')){var b=this.childNodes(this.container,'pre'),c=b?b[0]:this._c('pre',this.pfx+'-code'),d=this.childNodes(c,'code')||this.childNodes(this.container,'code'),e=d?d[0]:this._c('code',this.pfx+'-results');e.setAttribute(this.dpfx+'-results','');var f=this.childNodes(e,'span')||this.childNodes(c,'span')||this.childNodes(this.container,'span');f&&f.forEach(function(g){return e.appendChild(g)}),c.appendChild(e),this.container.appendChild(c)}}},{key:'createGroup',value:function createGroup(b){var d=this,c=this._c('fieldset',this.pfx+'-group');c.setAttribute(this.dpfx+'-group',b.id),c.innerHTML=this.createStyles(b.id).outerHTML,c.innerHTML+='<legend class="'+this.pfx+'-legend">'+b.title+'</legend>',c.innerHTML+=b.options.map(function(e){var f=b.multiple?'checkbox':'radio';return'<input class="'+d.pfx+'-input '+d.pfx+'-input--'+f+'" type="'+f+'" name="'+b.id+'" id="'+e.id+'" value="'+e.id+'" '+(e.checked?'checked':'')+' /><label class="'+d.pfx+'-label" for="'+e.id+'">'+e.title+'</label>'}).join(''),this.container.insertBefore(c,this.container.firstChild),this.initGroup(c,b.id)}},{key:'createStyles',value:function createStyles(b){var c=this._c('style');return c.setAttribute(this.dpfx+'-style',b),c.textContent='['+this.dpfx+'-results]>['+this.dpfx+'-'+b+'] {display: none}',c}},{key:'childNodes',value:function childNodes(b,c){var d=c.toUpperCase();if(!b.hasChildNodes)return!1;var e=[].concat(_toConsumableArray(b.childNodes)).filter(function(f){return f.nodeName===d});return!!e.length&&e}},{key:'_$',value:function _$(b){return document.querySelector(b)}},{key:'_$$',value:function _$$(b){return[].concat(_toConsumableArray(document.querySelectorAll(b)))}},{key:'_c',value:function _c(b,c){var d=document.createElement(b);return c&&(d.className=c),d}}]),a}();
|
||||||
|
|
|
@ -101,9 +101,9 @@ p Render a dependency parse tree or named entity visualization.
|
||||||
+cell #[code jupyter]
|
+cell #[code jupyter]
|
||||||
+cell bool
|
+cell bool
|
||||||
+cell
|
+cell
|
||||||
| Returns markup using #[+a("http://jupyter.org/") Jupyter]'s
|
| Explicitly enable "#[+a("http://jupyter.org/") Jupyter] mode" to
|
||||||
| internal methods, ready to be rendered in a notebook.
|
| return markup ready to be rendered in a notebook.
|
||||||
+cell #[code False]
|
+cell detected automatically
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code options]
|
+cell #[code options]
|
||||||
|
|
|
@ -11,8 +11,12 @@ p
|
||||||
| process. Instead of printing a list of dependency labels or entity spans,
|
| process. Instead of printing a list of dependency labels or entity spans,
|
||||||
| you can simply pass your #[code Doc] objects to #[code displacy] and view
|
| you can simply pass your #[code Doc] objects to #[code displacy] and view
|
||||||
| the visualizations in your browser, or export them as HTML files or
|
| the visualizations in your browser, or export them as HTML files or
|
||||||
| vector graphics. displaCy also comes with a #[+a("#jupyter") Jupyter hook]
|
| vector graphics.
|
||||||
| that returns the markup in a format ready to be rendered in a notebook.
|
|
||||||
|
p
|
||||||
|
| If you're running a #[+a("https://jupyter.org") Jupyter] notebook,
|
||||||
|
| displaCy will detect this and return the markup in a format
|
||||||
|
| #[+a("#jupyter") ready to be rendered and exported].
|
||||||
|
|
||||||
+aside("What about the old visualizers?")
|
+aside("What about the old visualizers?")
|
||||||
| Our JavaScript-based visualizers #[+src(gh("displacy")) displacy.js] and
|
| Our JavaScript-based visualizers #[+src(gh("displacy")) displacy.js] and
|
||||||
|
@ -219,10 +223,10 @@ p
|
||||||
+h(2, "jupyter") Using displaCy in Jupyter notebooks
|
+h(2, "jupyter") Using displaCy in Jupyter notebooks
|
||||||
|
|
||||||
p
|
p
|
||||||
| If you're working with a #[+a("https://jupyter.org") Jupyter] notebook,
|
| displaCy is able to detect whether you're within a
|
||||||
| you can use displaCy's "Jupyter mode" to return markup that can be
|
| #[+a("https://jupyter.org") Jupyter] notebook, and will return markup
|
||||||
| rendered in a cell straight away. When you export your notebook, the
|
| that can be rendered in a cell straight away. When you export your
|
||||||
| visualizations will be included as HTML.
|
| notebook, the visualizations will be included as HTML.
|
||||||
|
|
||||||
+code("Jupyter Example").
|
+code("Jupyter Example").
|
||||||
# don't forget to install a model, e.g.: python -m spacy download en
|
# don't forget to install a model, e.g.: python -m spacy download en
|
||||||
|
@ -230,10 +234,15 @@ p
|
||||||
from spacy import displacy
|
from spacy import displacy
|
||||||
|
|
||||||
doc = nlp(u'Rats are various medium-sized, long-tailed rodents.')
|
doc = nlp(u'Rats are various medium-sized, long-tailed rodents.')
|
||||||
displacy.render(doc, style='dep', jupyter=True)
|
displacy.render(doc, style='dep')
|
||||||
|
|
||||||
doc2 = nlp(LONG_NEWS_ARTICLE)
|
doc2 = nlp(LONG_NEWS_ARTICLE)
|
||||||
displacy.render(doc2, style='ent', jupyter=True)
|
displacy.render(doc2, style='ent')
|
||||||
|
|
||||||
|
+aside("Enabling or disabling Jupyter mode")
|
||||||
|
| To explicitly enable or disable "Jupyter mode", you can use the
|
||||||
|
| #[code jupyter] keyword argument – e.g. to return raw HTML in a notebook,
|
||||||
|
| or to force Jupyter rendering if auto-detection fails.
|
||||||
|
|
||||||
+image("/assets/img/docs/displacy_jupyter.jpg", 700, false, "Example of using the displaCy dependency and named entity visualizer in a Jupyter notebook")
|
+image("/assets/img/docs/displacy_jupyter.jpg", 700, false, "Example of using the displaCy dependency and named entity visualizer in a Jupyter notebook")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user