mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
238be0f16a
94
.gitignore
vendored
94
.gitignore
vendored
|
@ -1,50 +1,43 @@
|
|||
# Vim
|
||||
*.swp
|
||||
*.sw*
|
||||
Profile.prof
|
||||
tmp/
|
||||
.dev
|
||||
.denv
|
||||
.pypyenv
|
||||
.eggs
|
||||
*.tgz
|
||||
.sass-cache
|
||||
.python-version
|
||||
|
||||
MANIFEST
|
||||
|
||||
# spaCy
|
||||
spacy/data/
|
||||
corpora/
|
||||
models/
|
||||
keys/
|
||||
|
||||
spacy/syntax/*.cpp
|
||||
spacy/syntax/*.html
|
||||
spacy/en/*.cpp
|
||||
spacy/tokens/*.cpp
|
||||
spacy/serialize/*.cpp
|
||||
spacy/en/data/*
|
||||
spacy/*.cpp
|
||||
spacy/ner/*.cpp
|
||||
spacy/orthography/*.cpp
|
||||
ext/murmurhash.cpp
|
||||
ext/sparsehash.cpp
|
||||
# Website
|
||||
website/www/
|
||||
website/_deploy.sh
|
||||
website/package.json
|
||||
website/announcement.jade
|
||||
website/.gitignore
|
||||
|
||||
/spacy/data/
|
||||
|
||||
_build/
|
||||
.env/
|
||||
tmp/
|
||||
# Cython / C extensions
|
||||
cythonize.json
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
# C extensions
|
||||
spacy/*.html
|
||||
*.cpp
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
# Vim / VSCode / editors
|
||||
*.swp
|
||||
*.sw*
|
||||
Profile.prof
|
||||
.vscode
|
||||
.sass-cache
|
||||
|
||||
# Python
|
||||
.Python
|
||||
.python-version
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
.env/
|
||||
.~env/
|
||||
.venv
|
||||
venv/
|
||||
.dev
|
||||
.denv
|
||||
.pypyenv
|
||||
|
||||
# Distribution / packaging
|
||||
env/
|
||||
bin/
|
||||
build/
|
||||
|
@ -59,6 +52,12 @@ var/
|
|||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
.eggs
|
||||
MANIFEST
|
||||
|
||||
# Temporary files
|
||||
*.~*
|
||||
tmp/
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
|
@ -87,25 +86,16 @@ coverage.xml
|
|||
*.log
|
||||
*.pot
|
||||
|
||||
# Windows local helper files
|
||||
# Windows
|
||||
*.bat
|
||||
Thumbs.db
|
||||
Desktop.ini
|
||||
|
||||
# Mac OS X
|
||||
*.DS_Store
|
||||
|
||||
# Temporary files / Dropbox hack
|
||||
*.~*
|
||||
|
||||
# Komodo project files
|
||||
*.komodoproject
|
||||
|
||||
# Website
|
||||
website/_deploy.sh
|
||||
website/package.json
|
||||
website/announcement.jade
|
||||
website/www/
|
||||
website/.gitignore
|
||||
|
||||
# Python virtualenv
|
||||
venv
|
||||
venv/*
|
||||
# Other
|
||||
*.tgz
|
||||
|
|
|
@ -16,6 +16,22 @@ try:
|
|||
except ImportError:
|
||||
import copyreg as copy_reg
|
||||
|
||||
try:
|
||||
from cupy.cuda.stream import Stream as CudaStream
|
||||
except ImportError:
|
||||
CudaStream = None
|
||||
|
||||
try:
|
||||
import cupy
|
||||
except ImportError:
|
||||
cupy = None
|
||||
|
||||
|
||||
pickle = pickle
|
||||
copy_reg = copy_reg
|
||||
CudaStream = CudaStream
|
||||
cupy = cupy
|
||||
fix_text = ftfy.fix_text
|
||||
|
||||
is_python2 = six.PY2
|
||||
is_python3 = six.PY3
|
||||
|
@ -23,8 +39,6 @@ is_windows = sys.platform.startswith('win')
|
|||
is_linux = sys.platform.startswith('linux')
|
||||
is_osx = sys.platform == 'darwin'
|
||||
|
||||
fix_text = ftfy.fix_text
|
||||
|
||||
|
||||
if is_python2:
|
||||
bytes_ = str
|
||||
|
|
|
@ -3,13 +3,14 @@ from __future__ import unicode_literals
|
|||
|
||||
from .render import DependencyRenderer, EntityRenderer
|
||||
from ..tokens import Doc
|
||||
from ..util import prints
|
||||
from ..util import prints, is_in_jupyter
|
||||
|
||||
|
||||
_html = {}
|
||||
IS_JUPYTER = is_in_jupyter()
|
||||
|
||||
|
||||
def render(docs, style='dep', page=False, minify=False, jupyter=False, options={}):
|
||||
def render(docs, style='dep', page=False, minify=False, jupyter=IS_JUPYTER, options={}):
|
||||
"""Render displaCy visualisation.
|
||||
|
||||
docs (list or Doc): Document(s) to visualise.
|
||||
|
|
|
@ -11,20 +11,12 @@ import sys
|
|||
import textwrap
|
||||
|
||||
from .symbols import ORTH
|
||||
from .compat import path2str, basestring_, input_, unicode_
|
||||
from .compat import cupy, CudaStream, path2str, basestring_, input_, unicode_
|
||||
|
||||
|
||||
LANGUAGES = {}
|
||||
_data_path = Path(__file__).parent / 'data'
|
||||
try:
|
||||
from cupy.cuda.stream import Stream as CudaStream
|
||||
except ImportError:
|
||||
CudaStream = None
|
||||
|
||||
try:
|
||||
import cupy
|
||||
except ImportError:
|
||||
cupy = None
|
||||
|
||||
def get_lang_class(lang):
|
||||
"""Import and load a Language class.
|
||||
|
@ -151,6 +143,20 @@ def parse_package_meta(package_path, require=True):
|
|||
return None
|
||||
|
||||
|
||||
def is_in_jupyter():
|
||||
"""Check if user is in a Jupyter notebook. Mainly used for displaCy.
|
||||
|
||||
RETURNS (bool): True if in Jupyter, False if not.
|
||||
"""
|
||||
try:
|
||||
cfg = get_ipython().config
|
||||
if cfg['IPKernelApp']['parent_appname'] == 'ipython-notebook':
|
||||
return True
|
||||
except NameError:
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def get_cuda_stream(require=False):
|
||||
# TODO: Error and tell to install chainer if not found
|
||||
# Requires GPU
|
||||
|
|
|
@ -10,7 +10,10 @@
|
|||
padding: 2rem 3rem
|
||||
|
||||
.c-quickstart__input
|
||||
display: none
|
||||
@include size(0)
|
||||
opacity: 0
|
||||
position: absolute
|
||||
left: -9999px
|
||||
|
||||
.c-quickstart__label
|
||||
cursor: pointer
|
||||
|
@ -25,6 +28,9 @@
|
|||
&:hover
|
||||
background: lighten($color-theme-light, 5)
|
||||
|
||||
.c-quickstart__input:focus + &
|
||||
border: 1px solid $color-theme
|
||||
|
||||
.c-quickstart__input--radio:checked + &
|
||||
color: $color-back
|
||||
border-color: $color-theme
|
||||
|
@ -36,8 +42,6 @@
|
|||
display: inline-block
|
||||
width: 20px
|
||||
height: 20px
|
||||
position: relative
|
||||
bottom: 0.2rem
|
||||
border: 1px solid $color-subtle
|
||||
vertical-align: middle
|
||||
margin-right: 1rem
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
* @author Ines Montani <ines@ines.io>
|
||||
* @version 0.0.1
|
||||
* @license MIT
|
||||
*/'use strict';var _createClass=function(){function a(b,c){for(var e,d=0;d<c.length;d++)e=c[d],e.enumerable=e.enumerable||!1,e.configurable=!0,'value'in e&&(e.writable=!0),Object.defineProperty(b,e.key,e)}return function(b,c,d){return c&&a(b.prototype,c),d&&a(b,d),b}}();function _toConsumableArray(a){if(Array.isArray(a)){for(var b=0,c=Array(a.length);b<a.length;b++)c[b]=a[b];return c}return Array.from(a)}function _classCallCheck(a,b){if(!(a instanceof b))throw new TypeError('Cannot call a class as a function')}var Quickstart=function(){function a(){var b=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'#quickstart',d=arguments[1],c=2<arguments.length&&void 0!==arguments[2]?arguments[2]:{};_classCallCheck(this,a),this.container='string'==typeof b?this._$(b):b,this.groups=d,this.pfx=c.prefix||'qs',this.dpfx='data-'+this.pfx,this.init=this.init.bind(this),c.noInit||document.addEventListener('DOMContentLoaded',this.init)}return _createClass(a,[{key:'init',value:function init(){this.updateContainer(),this.container.style.display='block',this.container.classList.add(''+this.pfx);var b=this.groups;b instanceof Array?b.reverse().forEach(this.createGroup.bind(this)):this._$$('['+this.dpfx+'-group]').forEach(this.updateGroup.bind(this))}},{key:'initGroup',value:function initGroup(b,c){b.addEventListener('change',this.update.bind(this)),b.dispatchEvent(new CustomEvent('change',{detail:c}))}},{key:'updateGroup',value:function updateGroup(b){var c=b.getAttribute(this.dpfx+'-group'),d=this.createStyles(c);b.insertBefore(d,b.firstChild),this.initGroup(b,c)}},{key:'update',value:function update(b){var f=this,c=b.detail||b.target.name,d=this._$$('[name='+c+']').filter(function(h){return h.checked}).map(function(h){return h.value}),e=d.map(function(h){return':not(['+f.dpfx+'-'+c+'="'+h+'"])'}).join(''),g='['+this.dpfx+'-results]>['+this.dpfx+'-'+c+']'+e+' {display: none}';this._$('['+this.dpfx+'-style="'+c+'"]').textContent=g}},{key:'updateContainer',value:function updateContainer(){if(!this._$('['+this.dpfx+'-results]')){var b=this.childNodes(this.container,'pre'),c=b?b[0]:this._c('pre',this.pfx+'-code'),d=this.childNodes(c,'code')||this.childNodes(this.container,'code'),e=d?d[0]:this._c('code',this.pfx+'-results');e.setAttribute(this.dpfx+'-results','');var f=this.childNodes(e,'span')||this.childNodes(c,'span')||this.childNodes(this.container,'span');f&&f.forEach(function(g){return e.appendChild(g)}),c.appendChild(e),this.container.appendChild(c)}}},{key:'createGroup',value:function createGroup(b){var d=this,c=this._c('div',this.pfx+'-group');c.setAttribute(this.dpfx+'-group',b.id),c.innerHTML=this.createStyles(b.id).outerHTML,c.innerHTML+='<div class="'+this.pfx+'-legend">'+b.title+'</div>',c.innerHTML+='<div class="'+this.pfx+'-fields">'+b.options.map(function(e){var f=b.multiple?'checkbox':'radio';return'<input class="'+d.pfx+'-input '+d.pfx+'-input--'+f+'" type="'+f+'" name="'+b.id+'" id="'+e.id+'" value="'+e.id+'" '+(e.checked?'checked':'')+' /><label class="'+d.pfx+'-label" for="'+e.id+'">'+e.title+'</label>'}).join('')+'</div>',this.container.insertBefore(c,this.container.firstChild),this.initGroup(c,b.id)}},{key:'createStyles',value:function createStyles(b){var c=this._c('style');return c.setAttribute(this.dpfx+'-style',b),c.textContent='['+this.dpfx+'-results]>['+this.dpfx+'-'+b+'] {display: none}',c}},{key:'childNodes',value:function childNodes(b,c){var d=c.toUpperCase();if(!b.hasChildNodes)return!1;var e=[].concat(_toConsumableArray(b.childNodes)).filter(function(f){return f.nodeName===d});return!!e.length&&e}},{key:'_$',value:function _$(b){return document.querySelector(b)}},{key:'_$$',value:function _$$(b){return[].concat(_toConsumableArray(document.querySelectorAll(b)))}},{key:'_c',value:function _c(b,c){var d=document.createElement(b);return c&&(d.className=c),d}}]),a}();
|
||||
*/'use strict';var _createClass=function(){function a(b,c){for(var e,d=0;d<c.length;d++)e=c[d],e.enumerable=e.enumerable||!1,e.configurable=!0,'value'in e&&(e.writable=!0),Object.defineProperty(b,e.key,e)}return function(b,c,d){return c&&a(b.prototype,c),d&&a(b,d),b}}();function _toConsumableArray(a){if(Array.isArray(a)){for(var b=0,c=Array(a.length);b<a.length;b++)c[b]=a[b];return c}return Array.from(a)}function _classCallCheck(a,b){if(!(a instanceof b))throw new TypeError('Cannot call a class as a function')}var Quickstart=function(){function a(){var b=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'#quickstart',d=arguments[1],c=2<arguments.length&&void 0!==arguments[2]?arguments[2]:{};_classCallCheck(this,a),this.container='string'==typeof b?this._$(b):b,this.groups=d,this.pfx=c.prefix||'qs',this.dpfx='data-'+this.pfx,this.init=this.init.bind(this),c.noInit||document.addEventListener('DOMContentLoaded',this.init)}return _createClass(a,[{key:'init',value:function init(){this.updateContainer(),this.container.style.display='block',this.container.classList.add(''+this.pfx);var b=this.groups;b instanceof Array?b.reverse().forEach(this.createGroup.bind(this)):this._$$('['+this.dpfx+'-group]').forEach(this.updateGroup.bind(this))}},{key:'initGroup',value:function initGroup(b,c){b.addEventListener('change',this.update.bind(this)),b.dispatchEvent(new CustomEvent('change',{detail:c}))}},{key:'updateGroup',value:function updateGroup(b){var c=b.getAttribute(this.dpfx+'-group'),d=this.createStyles(c);b.insertBefore(d,b.firstChild),this.initGroup(b,c)}},{key:'update',value:function update(b){var f=this,c=b.detail||b.target.name,d=this._$$('[name='+c+']:checked').map(function(h){return h.value}),e=d.map(function(h){return':not(['+f.dpfx+'-'+c+'="'+h+'"])'}).join(''),g='['+this.dpfx+'-results]>['+this.dpfx+'-'+c+']'+e+' {display: none}';this._$('['+this.dpfx+'-style="'+c+'"]').textContent=g}},{key:'updateContainer',value:function updateContainer(){if(!this._$('['+this.dpfx+'-results]')){var b=this.childNodes(this.container,'pre'),c=b?b[0]:this._c('pre',this.pfx+'-code'),d=this.childNodes(c,'code')||this.childNodes(this.container,'code'),e=d?d[0]:this._c('code',this.pfx+'-results');e.setAttribute(this.dpfx+'-results','');var f=this.childNodes(e,'span')||this.childNodes(c,'span')||this.childNodes(this.container,'span');f&&f.forEach(function(g){return e.appendChild(g)}),c.appendChild(e),this.container.appendChild(c)}}},{key:'createGroup',value:function createGroup(b){var d=this,c=this._c('fieldset',this.pfx+'-group');c.setAttribute(this.dpfx+'-group',b.id),c.innerHTML=this.createStyles(b.id).outerHTML,c.innerHTML+='<legend class="'+this.pfx+'-legend">'+b.title+'</legend>',c.innerHTML+=b.options.map(function(e){var f=b.multiple?'checkbox':'radio';return'<input class="'+d.pfx+'-input '+d.pfx+'-input--'+f+'" type="'+f+'" name="'+b.id+'" id="'+e.id+'" value="'+e.id+'" '+(e.checked?'checked':'')+' /><label class="'+d.pfx+'-label" for="'+e.id+'">'+e.title+'</label>'}).join(''),this.container.insertBefore(c,this.container.firstChild),this.initGroup(c,b.id)}},{key:'createStyles',value:function createStyles(b){var c=this._c('style');return c.setAttribute(this.dpfx+'-style',b),c.textContent='['+this.dpfx+'-results]>['+this.dpfx+'-'+b+'] {display: none}',c}},{key:'childNodes',value:function childNodes(b,c){var d=c.toUpperCase();if(!b.hasChildNodes)return!1;var e=[].concat(_toConsumableArray(b.childNodes)).filter(function(f){return f.nodeName===d});return!!e.length&&e}},{key:'_$',value:function _$(b){return document.querySelector(b)}},{key:'_$$',value:function _$$(b){return[].concat(_toConsumableArray(document.querySelectorAll(b)))}},{key:'_c',value:function _c(b,c){var d=document.createElement(b);return c&&(d.className=c),d}}]),a}();
|
||||
|
|
|
@ -101,9 +101,9 @@ p Render a dependency parse tree or named entity visualization.
|
|||
+cell #[code jupyter]
|
||||
+cell bool
|
||||
+cell
|
||||
| Returns markup using #[+a("http://jupyter.org/") Jupyter]'s
|
||||
| internal methods, ready to be rendered in a notebook.
|
||||
+cell #[code False]
|
||||
| Explicitly enable "#[+a("http://jupyter.org/") Jupyter] mode" to
|
||||
| return markup ready to be rendered in a notebook.
|
||||
+cell detected automatically
|
||||
|
||||
+row
|
||||
+cell #[code options]
|
||||
|
|
|
@ -11,8 +11,12 @@ p
|
|||
| process. Instead of printing a list of dependency labels or entity spans,
|
||||
| you can simply pass your #[code Doc] objects to #[code displacy] and view
|
||||
| the visualizations in your browser, or export them as HTML files or
|
||||
| vector graphics. displaCy also comes with a #[+a("#jupyter") Jupyter hook]
|
||||
| that returns the markup in a format ready to be rendered in a notebook.
|
||||
| vector graphics.
|
||||
|
||||
p
|
||||
| If you're running a #[+a("https://jupyter.org") Jupyter] notebook,
|
||||
| displaCy will detect this and return the markup in a format
|
||||
| #[+a("#jupyter") ready to be rendered and exported].
|
||||
|
||||
+aside("What about the old visualizers?")
|
||||
| Our JavaScript-based visualizers #[+src(gh("displacy")) displacy.js] and
|
||||
|
@ -219,10 +223,10 @@ p
|
|||
+h(2, "jupyter") Using displaCy in Jupyter notebooks
|
||||
|
||||
p
|
||||
| If you're working with a #[+a("https://jupyter.org") Jupyter] notebook,
|
||||
| you can use displaCy's "Jupyter mode" to return markup that can be
|
||||
| rendered in a cell straight away. When you export your notebook, the
|
||||
| visualizations will be included as HTML.
|
||||
| displaCy is able to detect whether you're within a
|
||||
| #[+a("https://jupyter.org") Jupyter] notebook, and will return markup
|
||||
| that can be rendered in a cell straight away. When you export your
|
||||
| notebook, the visualizations will be included as HTML.
|
||||
|
||||
+code("Jupyter Example").
|
||||
# don't forget to install a model, e.g.: python -m spacy download en
|
||||
|
@ -230,10 +234,15 @@ p
|
|||
from spacy import displacy
|
||||
|
||||
doc = nlp(u'Rats are various medium-sized, long-tailed rodents.')
|
||||
displacy.render(doc, style='dep', jupyter=True)
|
||||
displacy.render(doc, style='dep')
|
||||
|
||||
doc2 = nlp(LONG_NEWS_ARTICLE)
|
||||
displacy.render(doc2, style='ent', jupyter=True)
|
||||
displacy.render(doc2, style='ent')
|
||||
|
||||
+aside("Enabling or disabling Jupyter mode")
|
||||
| To explicitly enable or disable "Jupyter mode", you can use the
|
||||
| #[code jupyter] keyword argument – e.g. to return raw HTML in a notebook,
|
||||
| or to force Jupyter rendering if auto-detection fails.
|
||||
|
||||
+image("/assets/img/docs/displacy_jupyter.jpg", 700, false, "Example of using the displaCy dependency and named entity visualizer in a Jupyter notebook")
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user