mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
877f83807f
|
@ -13,3 +13,4 @@ requests>=2.13.0,<3.0.0
|
|||
regex==2017.4.5
|
||||
ftfy>=4.4.2,<5.0.0
|
||||
pytest>=3.0.6,<4.0.0
|
||||
pip>=9.0.0,<10.0.0
|
||||
|
|
1
setup.py
1
setup.py
|
@ -193,6 +193,7 @@ def setup_package():
|
|||
'preshed>=1.0.0,<2.0.0',
|
||||
'thinc>=6.6.0,<6.7.0',
|
||||
'plac<1.0.0,>=0.9.6',
|
||||
'pip>=9.0.0,<10.0.0',
|
||||
'six',
|
||||
'pathlib',
|
||||
'ujson>=1.35',
|
||||
|
|
|
@ -5,6 +5,7 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
|||
from .tag_map import TAG_MAP
|
||||
from .stop_words import STOP_WORDS
|
||||
from .lemmatizer import LOOKUP
|
||||
from .syntax_iterators import SYNTAX_ITERATORS
|
||||
|
||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||
from ...language import Language
|
||||
|
@ -23,6 +24,7 @@ class German(Language):
|
|||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||
tag_map = dict(TAG_MAP)
|
||||
stop_words = set(STOP_WORDS)
|
||||
syntax_iterators = dict(SYNTAX_ITERATORS)
|
||||
|
||||
@classmethod
|
||||
def create_lemmatizer(cls, nlp=None):
|
||||
|
|
38
spacy/lang/de/syntax_iterators.py
Normal file
38
spacy/lang/de/syntax_iterators.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ...symbols import NOUN, PROPN, PRON
|
||||
|
||||
|
||||
def noun_chunks(obj):
|
||||
"""
|
||||
Detect base noun phrases from a dependency parse. Works on both Doc and Span.
|
||||
"""
|
||||
# this iterator extracts spans headed by NOUNs starting from the left-most
|
||||
# syntactic dependent until the NOUN itself for close apposition and
|
||||
# measurement construction, the span is sometimes extended to the right of
|
||||
# the NOUN. Example: "eine Tasse Tee" (a cup (of) tea) returns "eine Tasse Tee"
|
||||
# and not just "eine Tasse", same for "das Thema Familie".
|
||||
labels = ['sb', 'oa', 'da', 'nk', 'mo', 'ag', 'ROOT', 'root', 'cj', 'pd', 'og', 'app']
|
||||
doc = obj.doc # Ensure works on both Doc and Span.
|
||||
np_label = doc.vocab.strings['NP']
|
||||
np_deps = set(doc.vocab.strings[label] for label in labels)
|
||||
close_app = doc.vocab.strings['nk']
|
||||
|
||||
rbracket = 0
|
||||
for i, word in enumerate(obj):
|
||||
if i < rbracket:
|
||||
continue
|
||||
if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps:
|
||||
rbracket = word.i+1
|
||||
# try to extend the span to the right
|
||||
# to capture close apposition/measurement constructions
|
||||
for rdep in doc[word.i].rights:
|
||||
if rdep.pos in (NOUN, PROPN) and rdep.dep == close_app:
|
||||
rbracket = rdep.i+1
|
||||
yield word.left_edge.i, rbracket, np_label
|
||||
|
||||
|
||||
SYNTAX_ITERATORS = {
|
||||
'noun_chunks': noun_chunks
|
||||
}
|
|
@ -7,6 +7,7 @@ from .stop_words import STOP_WORDS
|
|||
from .lex_attrs import LEX_ATTRS
|
||||
from .morph_rules import MORPH_RULES
|
||||
from .lemmatizer import LEMMA_RULES, LEMMA_INDEX, LEMMA_EXC
|
||||
from .syntax_iterators import SYNTAX_ITERATORS
|
||||
|
||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||
from ...language import Language
|
||||
|
@ -29,6 +30,7 @@ class English(Language):
|
|||
lemma_rules = dict(LEMMA_RULES)
|
||||
lemma_index = dict(LEMMA_INDEX)
|
||||
lemma_exc = dict(LEMMA_EXC)
|
||||
sytax_iterators = dict(SYNTAX_ITERATORS)
|
||||
|
||||
|
||||
__all__ = ['English']
|
||||
|
|
43
spacy/lang/en/syntax_iterators.py
Normal file
43
spacy/lang/en/syntax_iterators.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ...symbols import NOUN, PROPN, PRON
|
||||
|
||||
|
||||
def noun_chunks(obj):
|
||||
"""
|
||||
Detect base noun phrases from a dependency parse. Works on both Doc and Span.
|
||||
"""
|
||||
labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj',
|
||||
'attr', 'ROOT']
|
||||
doc = obj.doc # Ensure works on both Doc and Span.
|
||||
np_deps = [doc.vocab.strings[label] for label in labels]
|
||||
conj = doc.vocab.strings['conj']
|
||||
np_label = doc.vocab.strings['NP']
|
||||
seen = set()
|
||||
for i, word in enumerate(obj):
|
||||
if word.pos not in (NOUN, PROPN, PRON):
|
||||
continue
|
||||
# Prevent nested chunks from being produced
|
||||
if word.i in seen:
|
||||
continue
|
||||
if word.dep in np_deps:
|
||||
if any(w.i in seen for w in word.subtree):
|
||||
continue
|
||||
seen.update(j for j in range(word.left_edge.i, word.i+1))
|
||||
yield word.left_edge.i, word.i+1, np_label
|
||||
elif word.dep == conj:
|
||||
head = word.head
|
||||
while head.dep == conj and head.head.i < head.i:
|
||||
head = head.head
|
||||
# If the head is an NP, and we're coordinated to it, we're an NP
|
||||
if head.dep in np_deps:
|
||||
if any(w.i in seen for w in word.subtree):
|
||||
continue
|
||||
seen.update(j for j in range(word.left_edge.i, word.i+1))
|
||||
yield word.left_edge.i, word.i+1, np_label
|
||||
|
||||
|
||||
SYNTAX_ITERATORS = {
|
||||
'noun_chunks': noun_chunks
|
||||
}
|
|
@ -29,7 +29,7 @@
|
|||
"NAVIGATION": {
|
||||
"Home": "/",
|
||||
"Usage": "/docs/usage",
|
||||
"API": "/docs/api",
|
||||
"Reference": "/docs/api",
|
||||
"Demos": "/docs/usage/showcase",
|
||||
"Blog": "https://explosion.ai/blog"
|
||||
},
|
||||
|
@ -55,6 +55,31 @@
|
|||
}
|
||||
},
|
||||
|
||||
"QUICKSTART": [
|
||||
{ "id": "os", "title": "Operating system", "options": [
|
||||
{ "id": "mac", "title": "macOS / OSX", "checked": true },
|
||||
{ "id": "windows", "title": "Windows" },
|
||||
{ "id": "linux", "title": "Linux" }]
|
||||
},
|
||||
{ "id": "package", "title": "Package manager", "options": [
|
||||
{ "id": "pip", "title": "pip", "checked": true },
|
||||
{ "id": "conda", "title": "conda" },
|
||||
{ "id": "source", "title": "from source" }]
|
||||
},
|
||||
{ "id": "python", "title": "Python version", "options": [
|
||||
{ "id": 2, "title": "2.x" },
|
||||
{ "id": 3, "title": "3.x", "checked": true }]
|
||||
},
|
||||
{ "id": "config", "title": "Configuration", "multiple": true, "options": [
|
||||
{"id": "venv", "title": "virtualenv", "help": "Use a virtual environment and install spaCy into a user directory" }]
|
||||
},
|
||||
{ "id": "model", "title": "Models", "multiple": true, "options": [
|
||||
{ "id": "en", "title": "English", "meta": "50MB" },
|
||||
{ "id": "de", "title": "German", "meta": "645MB" },
|
||||
{ "id": "fr", "title": "French", "meta": "1.33GB" }]
|
||||
}
|
||||
],
|
||||
|
||||
"V_CSS": "1.6",
|
||||
"V_JS": "1.2",
|
||||
"DEFAULT_SYNTAX": "python",
|
||||
|
|
|
@ -86,6 +86,44 @@ mixin permalink(id)
|
|||
block
|
||||
|
||||
|
||||
//- Quickstart widget
|
||||
quickstart.js with manual markup, inspired by PyTorch's "Getting started"
|
||||
groups - [object] option groups, uses global variable QUICKSTART
|
||||
headline - [string] optional text to be rendered as widget headline
|
||||
|
||||
mixin quickstart(groups, headline)
|
||||
.c-quickstart.o-block#qs
|
||||
.c-quickstart__content
|
||||
if headline
|
||||
+h(2)=headline
|
||||
for group in groups
|
||||
.c-quickstart__group.u-text-small(data-qs-group=group.id)
|
||||
.c-quickstart__legend=group.title
|
||||
.c-quickstart__fields
|
||||
for option in group.options
|
||||
input.c-quickstart__input(class="c-quickstart__input--" + (group.multiple ? "check" : "radio") type=group.multiple ? "checkbox" : "radio" name=group.id id=option.id value=option.id checked=option.checked)
|
||||
label.c-quickstart__label(for=option.id)=option.title
|
||||
if option.meta
|
||||
| #[span.c-quickstart__label__meta (#{option.meta})]
|
||||
if option.help
|
||||
| #[+help(option.help).c-quickstart__label__meta]
|
||||
|
||||
pre.c-code-block
|
||||
code.c-code-block__content.c-quickstart__code(data-qs-results="")
|
||||
block
|
||||
|
||||
|
||||
//- Quickstart code item
|
||||
data [object] - Rendering conditions (keyed by option group ID, value: option)
|
||||
|
||||
mixin qs(data)
|
||||
- args = {}
|
||||
for value, setting in data
|
||||
- args['data-qs-' + setting] = value
|
||||
span.c-quickstart__line&attributes(args)
|
||||
block
|
||||
|
||||
|
||||
//- Terminal-style code window
|
||||
label - [string] title displayed in top bar of terminal window
|
||||
|
||||
|
|
|
@ -47,6 +47,14 @@ mixin api(path)
|
|||
| #[+icon("book", 18).o-icon--inline.u-color-subtle]
|
||||
|
||||
|
||||
//- Help icon with tooltip
|
||||
tooltip - [string] Tooltip text
|
||||
|
||||
mixin help(tooltip)
|
||||
span(data-tooltip=tooltip)&attributes(attributes)
|
||||
+icon("help", 16).i-icon--inline
|
||||
|
||||
|
||||
//- Aside for text
|
||||
label - [string] aside title (optional)
|
||||
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
//- 💫 INCLUDES > SCRIPTS
|
||||
|
||||
script(src="/assets/js/main.js?v#{V_JS}", type="text/javascript")
|
||||
script(src="/assets/js/prism.js", type="text/javascript")
|
||||
script(src="/assets/js/main.js?v#{V_JS}")
|
||||
script(src="/assets/js/prism.js")
|
||||
|
||||
if SECTION == "docs"
|
||||
if quickstart
|
||||
script(src="/assets/js/quickstart.js")
|
||||
script var qs = new Quickstart("#qs")
|
||||
|
||||
script.
|
||||
((window.gitter = {}).chat = {}).options = {
|
||||
useStyles: false,
|
||||
|
|
83
website/assets/css/_components/_quickstart.sass
Normal file
83
website/assets/css/_components/_quickstart.sass
Normal file
|
@ -0,0 +1,83 @@
|
|||
//- 💫 CSS > COMPONENTS > QUICKSTART
|
||||
|
||||
.c-quickstart
|
||||
border: 1px solid $color-subtle
|
||||
border-radius: 2px
|
||||
display: none
|
||||
background: $color-subtle-light
|
||||
|
||||
.c-quickstart__content
|
||||
padding: 2rem 3rem
|
||||
|
||||
.c-quickstart__input
|
||||
display: none
|
||||
|
||||
.c-quickstart__label
|
||||
cursor: pointer
|
||||
background: $color-back
|
||||
border: 1px solid $color-subtle
|
||||
border-radius: 2px
|
||||
display: inline-block
|
||||
padding: 0.75rem 1.25rem
|
||||
margin: 0 0.5rem 0.5rem 0
|
||||
font-weight: bold
|
||||
|
||||
&:hover
|
||||
background: lighten($color-theme-light, 5)
|
||||
|
||||
.c-quickstart__input--radio:checked + &
|
||||
color: $color-back
|
||||
border-color: $color-theme
|
||||
background: $color-theme
|
||||
|
||||
.c-quickstart__input--check + &:before
|
||||
content: ""
|
||||
background: $color-back
|
||||
display: inline-block
|
||||
width: 20px
|
||||
height: 20px
|
||||
position: relative
|
||||
bottom: 0.2rem
|
||||
border: 1px solid $color-subtle
|
||||
vertical-align: middle
|
||||
margin-right: 1rem
|
||||
cursor: pointer
|
||||
border-radius: 50%
|
||||
|
||||
.c-quickstart__input--check:checked + &:before
|
||||
background: $color-theme url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij4gICAgPHBhdGggZmlsbD0iI2ZmZiIgZD0iTTkgMTYuMTcybDEwLjU5NC0xMC41OTQgMS40MDYgMS40MDYtMTIgMTItNS41NzgtNS41NzggMS40MDYtMS40MDZ6Ii8+PC9zdmc+)
|
||||
background-size: contain
|
||||
border-color: $color-theme
|
||||
|
||||
.c-quickstart__label__meta
|
||||
font-weight: normal
|
||||
color: $color-subtle-dark
|
||||
|
||||
.c-quickstart__group
|
||||
@include breakpoint(min, md)
|
||||
display: flex
|
||||
flex-flow: row nowrap
|
||||
|
||||
&:not(:last-child)
|
||||
margin-bottom: 1rem
|
||||
|
||||
.c-quickstart__fields
|
||||
flex: 100%
|
||||
|
||||
.c-quickstart__legend
|
||||
color: $color-subtle-dark
|
||||
margin-right: 2rem
|
||||
padding-top: 0.75rem
|
||||
flex: 1 1 35%
|
||||
font-weight: bold
|
||||
|
||||
.c-quickstart__line
|
||||
display: block
|
||||
|
||||
&:before
|
||||
color: $color-theme
|
||||
margin-right: 1em
|
||||
content: "$"
|
||||
|
||||
.c-quickstart__code
|
||||
font-size: 1.6rem
|
30
website/assets/css/_components/_tooltips.sass
Normal file
30
website/assets/css/_components/_tooltips.sass
Normal file
|
@ -0,0 +1,30 @@
|
|||
//- 💫 CSS > COMPONENTS > TOOLTIPS
|
||||
|
||||
[data-tooltip]
|
||||
position: relative
|
||||
|
||||
@include breakpoint(min, sm)
|
||||
&:before
|
||||
@include position(absolute, top, left, 125%, 50%)
|
||||
display: inline-block
|
||||
content: attr(data-tooltip)
|
||||
background: $color-front
|
||||
border-radius: 2px
|
||||
color: $color-back
|
||||
font-family: inherit
|
||||
font-size: 1.3rem
|
||||
line-height: 1.25
|
||||
opacity: 0
|
||||
padding: 0.5em 0.75em
|
||||
transform: translateX(-50%) translateY(-2px)
|
||||
transition: opacity 0.1s ease-out, transform 0.1s ease-out
|
||||
visibility: hidden
|
||||
//white-space: nowrap
|
||||
min-width: 200px
|
||||
max-width: 300px
|
||||
z-index: 200
|
||||
|
||||
&:hover:before
|
||||
opacity: 1
|
||||
transform: translateX(-50%) translateY(0)
|
||||
visibility: visible
|
|
@ -32,3 +32,5 @@ $theme: blue !default
|
|||
@import _components/navigation
|
||||
@import _components/sidebar
|
||||
@import _components/tables
|
||||
@import _components/quickstart
|
||||
@import _components/tooltips
|
||||
|
|
|
@ -27,5 +27,8 @@
|
|||
<symbol id="star" viewBox="0 0 24 24">
|
||||
<path d="M12 17.25l-6.188 3.75 1.641-7.031-5.438-4.734 7.172-0.609 2.813-6.609 2.813 6.609 7.172 0.609-5.438 4.734 1.641 7.031z"></path>
|
||||
</symbol>
|
||||
<symbol id="help" viewBox="0 0 24 24">
|
||||
<path d="M12 6c2.203 0 3.984 1.781 3.984 3.984 0 2.484-3 2.766-3 5.016h-1.969c0-3.234 3-3 3-5.016 0-1.078-0.938-1.969-2.016-1.969s-2.016 0.891-2.016 1.969h-1.969c0-2.203 1.781-3.984 3.984-3.984zM12 20.016c4.406 0 8.016-3.609 8.016-8.016s-3.609-8.016-8.016-8.016-8.016 3.609-8.016 8.016 3.609 8.016 8.016 8.016zM12 2.016c5.531 0 9.984 4.453 9.984 9.984s-4.453 9.984-9.984 9.984-9.984-4.453-9.984-9.984 4.453-9.984 9.984-9.984zM11.016 18v-2.016h1.969v2.016h-1.969z"/>
|
||||
</symbol>
|
||||
</defs>
|
||||
</svg>
|
||||
|
|
Before Width: | Height: | Size: 4.9 KiB After Width: | Height: | Size: 5.4 KiB |
|
@ -1,6 +1,4 @@
|
|||
//- ----------------------------------
|
||||
//- 💫 MAIN JAVASCRIPT
|
||||
//- ----------------------------------
|
||||
|
||||
'use strict'
|
||||
|
||||
|
|
7
website/assets/js/quickstart.js
Normal file
7
website/assets/js/quickstart.js
Normal file
|
@ -0,0 +1,7 @@
|
|||
/**
|
||||
* quickstart.js
|
||||
*
|
||||
* @author Ines Montani <ines@ines.io>
|
||||
* @version 0.0.1
|
||||
* @license MIT
|
||||
*/'use strict';var _createClass=function(){function a(b,c){for(var e,d=0;d<c.length;d++)e=c[d],e.enumerable=e.enumerable||!1,e.configurable=!0,'value'in e&&(e.writable=!0),Object.defineProperty(b,e.key,e)}return function(b,c,d){return c&&a(b.prototype,c),d&&a(b,d),b}}();function _toConsumableArray(a){if(Array.isArray(a)){for(var b=0,c=Array(a.length);b<a.length;b++)c[b]=a[b];return c}return Array.from(a)}function _classCallCheck(a,b){if(!(a instanceof b))throw new TypeError('Cannot call a class as a function')}var Quickstart=function(){function a(){var b=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'#quickstart',d=arguments[1],c=2<arguments.length&&void 0!==arguments[2]?arguments[2]:{};_classCallCheck(this,a),this.container='string'==typeof b?this._$(b):b,this.groups=d,this.pfx=c.prefix||'qs',this.dpfx='data-'+this.pfx,this.init=this.init.bind(this),c.noInit||document.addEventListener('DOMContentLoaded',this.init)}return _createClass(a,[{key:'init',value:function init(){this.updateContainer(),this.container.style.display='block',this.container.classList.add(''+this.pfx);var b=this.groups;b instanceof Array?b.reverse().forEach(this.createGroup.bind(this)):this._$$('['+this.dpfx+'-group]').forEach(this.updateGroup.bind(this))}},{key:'initGroup',value:function initGroup(b,c){b.addEventListener('change',this.update.bind(this)),b.dispatchEvent(new CustomEvent('change',{detail:c}))}},{key:'updateGroup',value:function updateGroup(b){var c=b.getAttribute(this.dpfx+'-group'),d=this.createStyles(c);b.insertBefore(d,b.firstChild),this.initGroup(b,c)}},{key:'update',value:function update(b){var f=this,c=b.detail||b.target.name,d=this._$$('[name='+c+']').filter(function(h){return h.checked}).map(function(h){return h.value}),e=d.map(function(h){return':not(['+f.dpfx+'-'+c+'="'+h+'"])'}).join(''),g='['+this.dpfx+'-results]>['+this.dpfx+'-'+c+']'+e+' {display: none}';this._$('['+this.dpfx+'-style="'+c+'"]').textContent=g}},{key:'updateContainer',value:function updateContainer(){if(!this._$('['+this.dpfx+'-results]')){var b=this.childNodes(this.container,'pre'),c=b?b[0]:this._c('pre',this.pfx+'-code'),d=this.childNodes(c,'code')||this.childNodes(this.container,'code'),e=d?d[0]:this._c('code',this.pfx+'-results');e.setAttribute(this.dpfx+'-results','');var f=this.childNodes(e,'span')||this.childNodes(c,'span')||this.childNodes(this.container,'span');f&&f.forEach(function(g){return e.appendChild(g)}),c.appendChild(e),this.container.appendChild(c)}}},{key:'createGroup',value:function createGroup(b){var d=this,c=this._c('div',this.pfx+'-group');c.setAttribute(this.dpfx+'-group',b.id),c.innerHTML=this.createStyles(b.id).outerHTML,c.innerHTML+='<div class="'+this.pfx+'-legend">'+b.title+'</div>',c.innerHTML+='<div class="'+this.pfx+'-fields">'+b.options.map(function(e){var f=b.multiple?'checkbox':'radio';return'<input class="'+d.pfx+'-input '+d.pfx+'-input--'+f+'" type="'+f+'" name="'+b.id+'" id="'+e.id+'" value="'+e.id+'" '+(e.checked?'checked':'')+' /><label class="'+d.pfx+'-label" for="'+e.id+'">'+e.title+'</label>'}).join('')+'</div>',this.container.insertBefore(c,this.container.firstChild),this.initGroup(c,b.id)}},{key:'createStyles',value:function createStyles(b){var c=this._c('style');return c.setAttribute(this.dpfx+'-style',b),c.textContent='['+this.dpfx+'-results]>['+this.dpfx+'-'+b+'] {display: none}',c}},{key:'childNodes',value:function childNodes(b,c){var d=c.toUpperCase();if(!b.hasChildNodes)return!1;var e=[].concat(_toConsumableArray(b.childNodes)).filter(function(f){return f.nodeName===d});return!!e.length&&e}},{key:'_$',value:function _$(b){return document.querySelector(b)}},{key:'_$$',value:function _$$(b){return[].concat(_toConsumableArray(document.querySelectorAll(b)))}},{key:'_c',value:function _c(b,c){var d=document.createElement(b);return c&&(d.className=c),d}}]),a}();
|
|
@ -21,6 +21,7 @@
|
|||
"GoldParse": "goldparse"
|
||||
},
|
||||
"Other": {
|
||||
"Command line": "cli",
|
||||
"displaCy": "displacy",
|
||||
"Utility Functions": "util",
|
||||
"Annotation Specs": "annotation",
|
||||
|
@ -112,6 +113,11 @@
|
|||
"tag": "class"
|
||||
},
|
||||
|
||||
"cli": {
|
||||
"title": "Command Line Interface",
|
||||
"next": "displacy"
|
||||
},
|
||||
|
||||
"displacy": {
|
||||
"title": "displaCy",
|
||||
"tag": "module"
|
||||
|
|
|
@ -5,10 +5,11 @@
|
|||
"Models": "models",
|
||||
"Lightning tour": "lightning-tour",
|
||||
"Visualizers": "visualizers",
|
||||
"Command line": "cli",
|
||||
"Troubleshooting": "troubleshooting"
|
||||
"Troubleshooting": "troubleshooting",
|
||||
"What's new in v2.0": "v2"
|
||||
},
|
||||
"Workflows": {
|
||||
"spaCy 101": "spacy-101",
|
||||
"Loading the pipeline": "language-processing-pipeline",
|
||||
"Processing text": "processing-text",
|
||||
"spaCy's data model": "data-model",
|
||||
|
@ -33,7 +34,12 @@
|
|||
|
||||
"index": {
|
||||
"title": "Install spaCy",
|
||||
"next": "models"
|
||||
"next": "models",
|
||||
"quickstart": true
|
||||
},
|
||||
|
||||
"v2": {
|
||||
"title": "What's new in v2.0"
|
||||
},
|
||||
|
||||
"models": {
|
||||
|
@ -43,17 +49,11 @@
|
|||
|
||||
"lightning-tour": {
|
||||
"title": "Lightning tour",
|
||||
"next": "visualizers"
|
||||
"next": "spacy-101"
|
||||
},
|
||||
|
||||
"visualizers": {
|
||||
"title": "Visualizers",
|
||||
"next": "cli"
|
||||
},
|
||||
|
||||
"cli": {
|
||||
"title": "Command Line Interface",
|
||||
"next": "troubleshooting"
|
||||
"title": "Visualizers"
|
||||
},
|
||||
|
||||
"troubleshooting": {
|
||||
|
@ -65,6 +65,10 @@
|
|||
"title": "Resources"
|
||||
},
|
||||
|
||||
"spacy-101": {
|
||||
"title": "spaCy 101"
|
||||
},
|
||||
|
||||
"language-processing-pipeline": {
|
||||
"title": "Loading a language processing pipeline",
|
||||
"next": "processing-text"
|
||||
|
|
|
@ -12,6 +12,39 @@ p
|
|||
| #[a(href="#source-ubuntu") Ubuntu], #[a(href="#source-osx") macOS/OS X]
|
||||
| and #[a(href="#source-windows") Windows] for details.
|
||||
|
||||
+quickstart(QUICKSTART, "Quickstart")
|
||||
+qs({config: 'venv', python: 2}) python -m pip install -U virtualenv
|
||||
+qs({config: 'venv', python: 3}) python -m pip install -U venv
|
||||
+qs({config: 'venv', python: 2}) virtualenv .env
|
||||
+qs({config: 'venv', python: 3}) venv .env
|
||||
+qs({config: 'venv', os: 'mac'}) source .env/bin/activate
|
||||
+qs({config: 'venv', os: 'linux'}) source .env/bin/activate
|
||||
+qs({config: 'venv', os: 'windows'}) .env\Scripts\activate
|
||||
|
||||
+qs({package: 'pip'}) pip install -U spacy
|
||||
|
||||
+qs({package: 'conda'}) conda config --add channels conda-forge
|
||||
+qs({package: 'conda'}) conda install spacy
|
||||
|
||||
+qs({package: 'source'}) git clone https://github.com/explosion/spaCy
|
||||
+qs({package: 'source'}) cd spaCy
|
||||
+qs({package: 'source'}) pip install -r requirements.txt
|
||||
+qs({package: 'source'}) pip install -e .
|
||||
|
||||
+qs({model: 'en'}) python -m spacy download en
|
||||
+qs({model: 'de'}) python -m spacy download de
|
||||
+qs({model: 'fr'}) python -m spacy download fr
|
||||
|
||||
+h(2, "installation") Installation instructions
|
||||
|
||||
+h(3, "pip") pip
|
||||
+badge("pipy")
|
||||
|
||||
p Using pip, spaCy releases are currently only available as source packages.
|
||||
|
||||
+code(false, "bash").
|
||||
pip install -U spacy
|
||||
|
||||
+aside("Download models")
|
||||
| After installation you need to download a language model. For more info
|
||||
| and available models, see the #[+a("/docs/usage/models") docs on models].
|
||||
|
@ -22,14 +55,6 @@ p
|
|||
>>> import spacy
|
||||
>>> nlp = spacy.load('en')
|
||||
|
||||
+h(2, "pip") pip
|
||||
+badge("pipy")
|
||||
|
||||
p Using pip, spaCy releases are currently only available as source packages.
|
||||
|
||||
+code(false, "bash").
|
||||
pip install -U spacy
|
||||
|
||||
p
|
||||
| When using pip it is generally recommended to install packages in a
|
||||
| #[code virtualenv] to avoid modifying system state:
|
||||
|
@ -39,7 +64,7 @@ p
|
|||
source .env/bin/activate
|
||||
pip install spacy
|
||||
|
||||
+h(2, "conda") conda
|
||||
+h(3, "conda") conda
|
||||
+badge("conda")
|
||||
|
||||
p
|
||||
|
|
10
website/docs/usage/spacy-101.jade
Normal file
10
website/docs/usage/spacy-101.jade
Normal file
|
@ -0,0 +1,10 @@
|
|||
//- 💫 DOCS > USAGE > SPACY 101
|
||||
|
||||
include ../../_includes/_mixins
|
||||
|
||||
+h(2, "architecture") Architecture
|
||||
|
||||
+image
|
||||
include ../../assets/img/docs/architecture.svg
|
||||
.u-text-right
|
||||
+button("/assets/img/docs/architecture.svg", false, "secondary").u-text-tag View large graphic
|
10
website/docs/usage/v2.jade
Normal file
10
website/docs/usage/v2.jade
Normal file
|
@ -0,0 +1,10 @@
|
|||
//- 💫 DOCS > USAGE > WHAT'S NEW IN V2.0
|
||||
|
||||
include ../../_includes/_mixins
|
||||
|
||||
|
||||
+h(2, "features") New features
|
||||
|
||||
+h(2, "incompat") Backwards incompatibilities
|
||||
|
||||
+h(2, "migrating") Migrating from spaCy 1.x
|
|
@ -153,6 +153,24 @@ p
|
|||
| #[+a("https://tympanus.net/codrops/css_reference/background/") valid background value]
|
||||
| or shorthand — including gradients and even images!
|
||||
|
||||
+h(3, "ent-titles") Adding titles to documents
|
||||
|
||||
p
|
||||
| Rendering several large documents on one page can easily become confusing.
|
||||
| To add a headline to each visualization, you can add a #[code title] to
|
||||
| its #[code user_data]. User data is never touched or modified by spaCy.
|
||||
|
||||
+code.
|
||||
doc = nlp(u'This is a sentence about Google.')
|
||||
doc.user_data['title'] = 'This is a title'
|
||||
displacy.serve(doc, style='ent')
|
||||
|
||||
p
|
||||
| This feature is espeically handy if you're using displaCy to compare
|
||||
| performance at different stages of a process, e.g. during training. Here
|
||||
| you could use the title for a brief description of the text example and
|
||||
| the number of iterations.
|
||||
|
||||
+h(2, "render") Rendering visualizations
|
||||
|
||||
p
|
||||
|
@ -232,6 +250,25 @@ p
|
|||
|
||||
+h(2, "examples") Usage examples
|
||||
|
||||
+h(3, "examples-export-svg") Export SVG graphics of dependency parses
|
||||
|
||||
+code("Example").
|
||||
import spacy
|
||||
from spacy import displacy
|
||||
from pathlib import Path
|
||||
|
||||
nlp = spacy.load('en')
|
||||
sentences = ["This is an example.", "This is another one."]
|
||||
for sent in sentences:
|
||||
doc = nlp(sentence)
|
||||
svg = displacy.render(doc, style='dep')
|
||||
file_name = '-'.join([w.text for w in doc if not w.is_punct]) + '.svg'
|
||||
output_path = Path('/images/' + file_name)
|
||||
output_path.open('w', encoding='utf-8').write(svg)
|
||||
|
||||
p
|
||||
| The above code will generate the dependency visualizations and them to
|
||||
| two files, #[code This-is-an-example.svg] and #[code This-is-another-one.svg].
|
||||
|
||||
+h(2, "manual-usage") Rendering data manually
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user