mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-13 01:32:32 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
877f83807f
|
@ -13,3 +13,4 @@ requests>=2.13.0,<3.0.0
|
||||||
regex==2017.4.5
|
regex==2017.4.5
|
||||||
ftfy>=4.4.2,<5.0.0
|
ftfy>=4.4.2,<5.0.0
|
||||||
pytest>=3.0.6,<4.0.0
|
pytest>=3.0.6,<4.0.0
|
||||||
|
pip>=9.0.0,<10.0.0
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -193,6 +193,7 @@ def setup_package():
|
||||||
'preshed>=1.0.0,<2.0.0',
|
'preshed>=1.0.0,<2.0.0',
|
||||||
'thinc>=6.6.0,<6.7.0',
|
'thinc>=6.6.0,<6.7.0',
|
||||||
'plac<1.0.0,>=0.9.6',
|
'plac<1.0.0,>=0.9.6',
|
||||||
|
'pip>=9.0.0,<10.0.0',
|
||||||
'six',
|
'six',
|
||||||
'pathlib',
|
'pathlib',
|
||||||
'ujson>=1.35',
|
'ujson>=1.35',
|
||||||
|
|
|
@ -5,6 +5,7 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
||||||
from .tag_map import TAG_MAP
|
from .tag_map import TAG_MAP
|
||||||
from .stop_words import STOP_WORDS
|
from .stop_words import STOP_WORDS
|
||||||
from .lemmatizer import LOOKUP
|
from .lemmatizer import LOOKUP
|
||||||
|
from .syntax_iterators import SYNTAX_ITERATORS
|
||||||
|
|
||||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||||
from ...language import Language
|
from ...language import Language
|
||||||
|
@ -23,6 +24,7 @@ class German(Language):
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
tag_map = dict(TAG_MAP)
|
tag_map = dict(TAG_MAP)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = set(STOP_WORDS)
|
||||||
|
syntax_iterators = dict(SYNTAX_ITERATORS)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_lemmatizer(cls, nlp=None):
|
def create_lemmatizer(cls, nlp=None):
|
||||||
|
|
38
spacy/lang/de/syntax_iterators.py
Normal file
38
spacy/lang/de/syntax_iterators.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from ...symbols import NOUN, PROPN, PRON
|
||||||
|
|
||||||
|
|
||||||
|
def noun_chunks(obj):
|
||||||
|
"""
|
||||||
|
Detect base noun phrases from a dependency parse. Works on both Doc and Span.
|
||||||
|
"""
|
||||||
|
# this iterator extracts spans headed by NOUNs starting from the left-most
|
||||||
|
# syntactic dependent until the NOUN itself for close apposition and
|
||||||
|
# measurement construction, the span is sometimes extended to the right of
|
||||||
|
# the NOUN. Example: "eine Tasse Tee" (a cup (of) tea) returns "eine Tasse Tee"
|
||||||
|
# and not just "eine Tasse", same for "das Thema Familie".
|
||||||
|
labels = ['sb', 'oa', 'da', 'nk', 'mo', 'ag', 'ROOT', 'root', 'cj', 'pd', 'og', 'app']
|
||||||
|
doc = obj.doc # Ensure works on both Doc and Span.
|
||||||
|
np_label = doc.vocab.strings['NP']
|
||||||
|
np_deps = set(doc.vocab.strings[label] for label in labels)
|
||||||
|
close_app = doc.vocab.strings['nk']
|
||||||
|
|
||||||
|
rbracket = 0
|
||||||
|
for i, word in enumerate(obj):
|
||||||
|
if i < rbracket:
|
||||||
|
continue
|
||||||
|
if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps:
|
||||||
|
rbracket = word.i+1
|
||||||
|
# try to extend the span to the right
|
||||||
|
# to capture close apposition/measurement constructions
|
||||||
|
for rdep in doc[word.i].rights:
|
||||||
|
if rdep.pos in (NOUN, PROPN) and rdep.dep == close_app:
|
||||||
|
rbracket = rdep.i+1
|
||||||
|
yield word.left_edge.i, rbracket, np_label
|
||||||
|
|
||||||
|
|
||||||
|
SYNTAX_ITERATORS = {
|
||||||
|
'noun_chunks': noun_chunks
|
||||||
|
}
|
|
@ -7,6 +7,7 @@ from .stop_words import STOP_WORDS
|
||||||
from .lex_attrs import LEX_ATTRS
|
from .lex_attrs import LEX_ATTRS
|
||||||
from .morph_rules import MORPH_RULES
|
from .morph_rules import MORPH_RULES
|
||||||
from .lemmatizer import LEMMA_RULES, LEMMA_INDEX, LEMMA_EXC
|
from .lemmatizer import LEMMA_RULES, LEMMA_INDEX, LEMMA_EXC
|
||||||
|
from .syntax_iterators import SYNTAX_ITERATORS
|
||||||
|
|
||||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||||
from ...language import Language
|
from ...language import Language
|
||||||
|
@ -29,6 +30,7 @@ class English(Language):
|
||||||
lemma_rules = dict(LEMMA_RULES)
|
lemma_rules = dict(LEMMA_RULES)
|
||||||
lemma_index = dict(LEMMA_INDEX)
|
lemma_index = dict(LEMMA_INDEX)
|
||||||
lemma_exc = dict(LEMMA_EXC)
|
lemma_exc = dict(LEMMA_EXC)
|
||||||
|
sytax_iterators = dict(SYNTAX_ITERATORS)
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['English']
|
__all__ = ['English']
|
||||||
|
|
43
spacy/lang/en/syntax_iterators.py
Normal file
43
spacy/lang/en/syntax_iterators.py
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from ...symbols import NOUN, PROPN, PRON
|
||||||
|
|
||||||
|
|
||||||
|
def noun_chunks(obj):
|
||||||
|
"""
|
||||||
|
Detect base noun phrases from a dependency parse. Works on both Doc and Span.
|
||||||
|
"""
|
||||||
|
labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj',
|
||||||
|
'attr', 'ROOT']
|
||||||
|
doc = obj.doc # Ensure works on both Doc and Span.
|
||||||
|
np_deps = [doc.vocab.strings[label] for label in labels]
|
||||||
|
conj = doc.vocab.strings['conj']
|
||||||
|
np_label = doc.vocab.strings['NP']
|
||||||
|
seen = set()
|
||||||
|
for i, word in enumerate(obj):
|
||||||
|
if word.pos not in (NOUN, PROPN, PRON):
|
||||||
|
continue
|
||||||
|
# Prevent nested chunks from being produced
|
||||||
|
if word.i in seen:
|
||||||
|
continue
|
||||||
|
if word.dep in np_deps:
|
||||||
|
if any(w.i in seen for w in word.subtree):
|
||||||
|
continue
|
||||||
|
seen.update(j for j in range(word.left_edge.i, word.i+1))
|
||||||
|
yield word.left_edge.i, word.i+1, np_label
|
||||||
|
elif word.dep == conj:
|
||||||
|
head = word.head
|
||||||
|
while head.dep == conj and head.head.i < head.i:
|
||||||
|
head = head.head
|
||||||
|
# If the head is an NP, and we're coordinated to it, we're an NP
|
||||||
|
if head.dep in np_deps:
|
||||||
|
if any(w.i in seen for w in word.subtree):
|
||||||
|
continue
|
||||||
|
seen.update(j for j in range(word.left_edge.i, word.i+1))
|
||||||
|
yield word.left_edge.i, word.i+1, np_label
|
||||||
|
|
||||||
|
|
||||||
|
SYNTAX_ITERATORS = {
|
||||||
|
'noun_chunks': noun_chunks
|
||||||
|
}
|
|
@ -29,7 +29,7 @@
|
||||||
"NAVIGATION": {
|
"NAVIGATION": {
|
||||||
"Home": "/",
|
"Home": "/",
|
||||||
"Usage": "/docs/usage",
|
"Usage": "/docs/usage",
|
||||||
"API": "/docs/api",
|
"Reference": "/docs/api",
|
||||||
"Demos": "/docs/usage/showcase",
|
"Demos": "/docs/usage/showcase",
|
||||||
"Blog": "https://explosion.ai/blog"
|
"Blog": "https://explosion.ai/blog"
|
||||||
},
|
},
|
||||||
|
@ -55,6 +55,31 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
"QUICKSTART": [
|
||||||
|
{ "id": "os", "title": "Operating system", "options": [
|
||||||
|
{ "id": "mac", "title": "macOS / OSX", "checked": true },
|
||||||
|
{ "id": "windows", "title": "Windows" },
|
||||||
|
{ "id": "linux", "title": "Linux" }]
|
||||||
|
},
|
||||||
|
{ "id": "package", "title": "Package manager", "options": [
|
||||||
|
{ "id": "pip", "title": "pip", "checked": true },
|
||||||
|
{ "id": "conda", "title": "conda" },
|
||||||
|
{ "id": "source", "title": "from source" }]
|
||||||
|
},
|
||||||
|
{ "id": "python", "title": "Python version", "options": [
|
||||||
|
{ "id": 2, "title": "2.x" },
|
||||||
|
{ "id": 3, "title": "3.x", "checked": true }]
|
||||||
|
},
|
||||||
|
{ "id": "config", "title": "Configuration", "multiple": true, "options": [
|
||||||
|
{"id": "venv", "title": "virtualenv", "help": "Use a virtual environment and install spaCy into a user directory" }]
|
||||||
|
},
|
||||||
|
{ "id": "model", "title": "Models", "multiple": true, "options": [
|
||||||
|
{ "id": "en", "title": "English", "meta": "50MB" },
|
||||||
|
{ "id": "de", "title": "German", "meta": "645MB" },
|
||||||
|
{ "id": "fr", "title": "French", "meta": "1.33GB" }]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
|
||||||
"V_CSS": "1.6",
|
"V_CSS": "1.6",
|
||||||
"V_JS": "1.2",
|
"V_JS": "1.2",
|
||||||
"DEFAULT_SYNTAX": "python",
|
"DEFAULT_SYNTAX": "python",
|
||||||
|
|
|
@ -86,6 +86,44 @@ mixin permalink(id)
|
||||||
block
|
block
|
||||||
|
|
||||||
|
|
||||||
|
//- Quickstart widget
|
||||||
|
quickstart.js with manual markup, inspired by PyTorch's "Getting started"
|
||||||
|
groups - [object] option groups, uses global variable QUICKSTART
|
||||||
|
headline - [string] optional text to be rendered as widget headline
|
||||||
|
|
||||||
|
mixin quickstart(groups, headline)
|
||||||
|
.c-quickstart.o-block#qs
|
||||||
|
.c-quickstart__content
|
||||||
|
if headline
|
||||||
|
+h(2)=headline
|
||||||
|
for group in groups
|
||||||
|
.c-quickstart__group.u-text-small(data-qs-group=group.id)
|
||||||
|
.c-quickstart__legend=group.title
|
||||||
|
.c-quickstart__fields
|
||||||
|
for option in group.options
|
||||||
|
input.c-quickstart__input(class="c-quickstart__input--" + (group.multiple ? "check" : "radio") type=group.multiple ? "checkbox" : "radio" name=group.id id=option.id value=option.id checked=option.checked)
|
||||||
|
label.c-quickstart__label(for=option.id)=option.title
|
||||||
|
if option.meta
|
||||||
|
| #[span.c-quickstart__label__meta (#{option.meta})]
|
||||||
|
if option.help
|
||||||
|
| #[+help(option.help).c-quickstart__label__meta]
|
||||||
|
|
||||||
|
pre.c-code-block
|
||||||
|
code.c-code-block__content.c-quickstart__code(data-qs-results="")
|
||||||
|
block
|
||||||
|
|
||||||
|
|
||||||
|
//- Quickstart code item
|
||||||
|
data [object] - Rendering conditions (keyed by option group ID, value: option)
|
||||||
|
|
||||||
|
mixin qs(data)
|
||||||
|
- args = {}
|
||||||
|
for value, setting in data
|
||||||
|
- args['data-qs-' + setting] = value
|
||||||
|
span.c-quickstart__line&attributes(args)
|
||||||
|
block
|
||||||
|
|
||||||
|
|
||||||
//- Terminal-style code window
|
//- Terminal-style code window
|
||||||
label - [string] title displayed in top bar of terminal window
|
label - [string] title displayed in top bar of terminal window
|
||||||
|
|
||||||
|
|
|
@ -47,6 +47,14 @@ mixin api(path)
|
||||||
| #[+icon("book", 18).o-icon--inline.u-color-subtle]
|
| #[+icon("book", 18).o-icon--inline.u-color-subtle]
|
||||||
|
|
||||||
|
|
||||||
|
//- Help icon with tooltip
|
||||||
|
tooltip - [string] Tooltip text
|
||||||
|
|
||||||
|
mixin help(tooltip)
|
||||||
|
span(data-tooltip=tooltip)&attributes(attributes)
|
||||||
|
+icon("help", 16).i-icon--inline
|
||||||
|
|
||||||
|
|
||||||
//- Aside for text
|
//- Aside for text
|
||||||
label - [string] aside title (optional)
|
label - [string] aside title (optional)
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,13 @@
|
||||||
//- 💫 INCLUDES > SCRIPTS
|
//- 💫 INCLUDES > SCRIPTS
|
||||||
|
|
||||||
script(src="/assets/js/main.js?v#{V_JS}", type="text/javascript")
|
script(src="/assets/js/main.js?v#{V_JS}")
|
||||||
script(src="/assets/js/prism.js", type="text/javascript")
|
script(src="/assets/js/prism.js")
|
||||||
|
|
||||||
if SECTION == "docs"
|
if SECTION == "docs"
|
||||||
|
if quickstart
|
||||||
|
script(src="/assets/js/quickstart.js")
|
||||||
|
script var qs = new Quickstart("#qs")
|
||||||
|
|
||||||
script.
|
script.
|
||||||
((window.gitter = {}).chat = {}).options = {
|
((window.gitter = {}).chat = {}).options = {
|
||||||
useStyles: false,
|
useStyles: false,
|
||||||
|
|
83
website/assets/css/_components/_quickstart.sass
Normal file
83
website/assets/css/_components/_quickstart.sass
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
//- 💫 CSS > COMPONENTS > QUICKSTART
|
||||||
|
|
||||||
|
.c-quickstart
|
||||||
|
border: 1px solid $color-subtle
|
||||||
|
border-radius: 2px
|
||||||
|
display: none
|
||||||
|
background: $color-subtle-light
|
||||||
|
|
||||||
|
.c-quickstart__content
|
||||||
|
padding: 2rem 3rem
|
||||||
|
|
||||||
|
.c-quickstart__input
|
||||||
|
display: none
|
||||||
|
|
||||||
|
.c-quickstart__label
|
||||||
|
cursor: pointer
|
||||||
|
background: $color-back
|
||||||
|
border: 1px solid $color-subtle
|
||||||
|
border-radius: 2px
|
||||||
|
display: inline-block
|
||||||
|
padding: 0.75rem 1.25rem
|
||||||
|
margin: 0 0.5rem 0.5rem 0
|
||||||
|
font-weight: bold
|
||||||
|
|
||||||
|
&:hover
|
||||||
|
background: lighten($color-theme-light, 5)
|
||||||
|
|
||||||
|
.c-quickstart__input--radio:checked + &
|
||||||
|
color: $color-back
|
||||||
|
border-color: $color-theme
|
||||||
|
background: $color-theme
|
||||||
|
|
||||||
|
.c-quickstart__input--check + &:before
|
||||||
|
content: ""
|
||||||
|
background: $color-back
|
||||||
|
display: inline-block
|
||||||
|
width: 20px
|
||||||
|
height: 20px
|
||||||
|
position: relative
|
||||||
|
bottom: 0.2rem
|
||||||
|
border: 1px solid $color-subtle
|
||||||
|
vertical-align: middle
|
||||||
|
margin-right: 1rem
|
||||||
|
cursor: pointer
|
||||||
|
border-radius: 50%
|
||||||
|
|
||||||
|
.c-quickstart__input--check:checked + &:before
|
||||||
|
background: $color-theme url()
|
||||||
|
background-size: contain
|
||||||
|
border-color: $color-theme
|
||||||
|
|
||||||
|
.c-quickstart__label__meta
|
||||||
|
font-weight: normal
|
||||||
|
color: $color-subtle-dark
|
||||||
|
|
||||||
|
.c-quickstart__group
|
||||||
|
@include breakpoint(min, md)
|
||||||
|
display: flex
|
||||||
|
flex-flow: row nowrap
|
||||||
|
|
||||||
|
&:not(:last-child)
|
||||||
|
margin-bottom: 1rem
|
||||||
|
|
||||||
|
.c-quickstart__fields
|
||||||
|
flex: 100%
|
||||||
|
|
||||||
|
.c-quickstart__legend
|
||||||
|
color: $color-subtle-dark
|
||||||
|
margin-right: 2rem
|
||||||
|
padding-top: 0.75rem
|
||||||
|
flex: 1 1 35%
|
||||||
|
font-weight: bold
|
||||||
|
|
||||||
|
.c-quickstart__line
|
||||||
|
display: block
|
||||||
|
|
||||||
|
&:before
|
||||||
|
color: $color-theme
|
||||||
|
margin-right: 1em
|
||||||
|
content: "$"
|
||||||
|
|
||||||
|
.c-quickstart__code
|
||||||
|
font-size: 1.6rem
|
30
website/assets/css/_components/_tooltips.sass
Normal file
30
website/assets/css/_components/_tooltips.sass
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
//- 💫 CSS > COMPONENTS > TOOLTIPS
|
||||||
|
|
||||||
|
[data-tooltip]
|
||||||
|
position: relative
|
||||||
|
|
||||||
|
@include breakpoint(min, sm)
|
||||||
|
&:before
|
||||||
|
@include position(absolute, top, left, 125%, 50%)
|
||||||
|
display: inline-block
|
||||||
|
content: attr(data-tooltip)
|
||||||
|
background: $color-front
|
||||||
|
border-radius: 2px
|
||||||
|
color: $color-back
|
||||||
|
font-family: inherit
|
||||||
|
font-size: 1.3rem
|
||||||
|
line-height: 1.25
|
||||||
|
opacity: 0
|
||||||
|
padding: 0.5em 0.75em
|
||||||
|
transform: translateX(-50%) translateY(-2px)
|
||||||
|
transition: opacity 0.1s ease-out, transform 0.1s ease-out
|
||||||
|
visibility: hidden
|
||||||
|
//white-space: nowrap
|
||||||
|
min-width: 200px
|
||||||
|
max-width: 300px
|
||||||
|
z-index: 200
|
||||||
|
|
||||||
|
&:hover:before
|
||||||
|
opacity: 1
|
||||||
|
transform: translateX(-50%) translateY(0)
|
||||||
|
visibility: visible
|
|
@ -32,3 +32,5 @@ $theme: blue !default
|
||||||
@import _components/navigation
|
@import _components/navigation
|
||||||
@import _components/sidebar
|
@import _components/sidebar
|
||||||
@import _components/tables
|
@import _components/tables
|
||||||
|
@import _components/quickstart
|
||||||
|
@import _components/tooltips
|
||||||
|
|
|
@ -27,5 +27,8 @@
|
||||||
<symbol id="star" viewBox="0 0 24 24">
|
<symbol id="star" viewBox="0 0 24 24">
|
||||||
<path d="M12 17.25l-6.188 3.75 1.641-7.031-5.438-4.734 7.172-0.609 2.813-6.609 2.813 6.609 7.172 0.609-5.438 4.734 1.641 7.031z"></path>
|
<path d="M12 17.25l-6.188 3.75 1.641-7.031-5.438-4.734 7.172-0.609 2.813-6.609 2.813 6.609 7.172 0.609-5.438 4.734 1.641 7.031z"></path>
|
||||||
</symbol>
|
</symbol>
|
||||||
|
<symbol id="help" viewBox="0 0 24 24">
|
||||||
|
<path d="M12 6c2.203 0 3.984 1.781 3.984 3.984 0 2.484-3 2.766-3 5.016h-1.969c0-3.234 3-3 3-5.016 0-1.078-0.938-1.969-2.016-1.969s-2.016 0.891-2.016 1.969h-1.969c0-2.203 1.781-3.984 3.984-3.984zM12 20.016c4.406 0 8.016-3.609 8.016-8.016s-3.609-8.016-8.016-8.016-8.016 3.609-8.016 8.016 3.609 8.016 8.016 8.016zM12 2.016c5.531 0 9.984 4.453 9.984 9.984s-4.453 9.984-9.984 9.984-9.984-4.453-9.984-9.984 4.453-9.984 9.984-9.984zM11.016 18v-2.016h1.969v2.016h-1.969z"/>
|
||||||
|
</symbol>
|
||||||
</defs>
|
</defs>
|
||||||
</svg>
|
</svg>
|
||||||
|
|
Before Width: | Height: | Size: 4.9 KiB After Width: | Height: | Size: 5.4 KiB |
|
@ -1,6 +1,4 @@
|
||||||
//- ----------------------------------
|
|
||||||
//- 💫 MAIN JAVASCRIPT
|
//- 💫 MAIN JAVASCRIPT
|
||||||
//- ----------------------------------
|
|
||||||
|
|
||||||
'use strict'
|
'use strict'
|
||||||
|
|
||||||
|
|
7
website/assets/js/quickstart.js
Normal file
7
website/assets/js/quickstart.js
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
/**
|
||||||
|
* quickstart.js
|
||||||
|
*
|
||||||
|
* @author Ines Montani <ines@ines.io>
|
||||||
|
* @version 0.0.1
|
||||||
|
* @license MIT
|
||||||
|
*/'use strict';var _createClass=function(){function a(b,c){for(var e,d=0;d<c.length;d++)e=c[d],e.enumerable=e.enumerable||!1,e.configurable=!0,'value'in e&&(e.writable=!0),Object.defineProperty(b,e.key,e)}return function(b,c,d){return c&&a(b.prototype,c),d&&a(b,d),b}}();function _toConsumableArray(a){if(Array.isArray(a)){for(var b=0,c=Array(a.length);b<a.length;b++)c[b]=a[b];return c}return Array.from(a)}function _classCallCheck(a,b){if(!(a instanceof b))throw new TypeError('Cannot call a class as a function')}var Quickstart=function(){function a(){var b=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'#quickstart',d=arguments[1],c=2<arguments.length&&void 0!==arguments[2]?arguments[2]:{};_classCallCheck(this,a),this.container='string'==typeof b?this._$(b):b,this.groups=d,this.pfx=c.prefix||'qs',this.dpfx='data-'+this.pfx,this.init=this.init.bind(this),c.noInit||document.addEventListener('DOMContentLoaded',this.init)}return _createClass(a,[{key:'init',value:function init(){this.updateContainer(),this.container.style.display='block',this.container.classList.add(''+this.pfx);var b=this.groups;b instanceof Array?b.reverse().forEach(this.createGroup.bind(this)):this._$$('['+this.dpfx+'-group]').forEach(this.updateGroup.bind(this))}},{key:'initGroup',value:function initGroup(b,c){b.addEventListener('change',this.update.bind(this)),b.dispatchEvent(new CustomEvent('change',{detail:c}))}},{key:'updateGroup',value:function updateGroup(b){var c=b.getAttribute(this.dpfx+'-group'),d=this.createStyles(c);b.insertBefore(d,b.firstChild),this.initGroup(b,c)}},{key:'update',value:function update(b){var f=this,c=b.detail||b.target.name,d=this._$$('[name='+c+']').filter(function(h){return h.checked}).map(function(h){return h.value}),e=d.map(function(h){return':not(['+f.dpfx+'-'+c+'="'+h+'"])'}).join(''),g='['+this.dpfx+'-results]>['+this.dpfx+'-'+c+']'+e+' {display: none}';this._$('['+this.dpfx+'-style="'+c+'"]').textContent=g}},{key:'updateContainer',value:function updateContainer(){if(!this._$('['+this.dpfx+'-results]')){var b=this.childNodes(this.container,'pre'),c=b?b[0]:this._c('pre',this.pfx+'-code'),d=this.childNodes(c,'code')||this.childNodes(this.container,'code'),e=d?d[0]:this._c('code',this.pfx+'-results');e.setAttribute(this.dpfx+'-results','');var f=this.childNodes(e,'span')||this.childNodes(c,'span')||this.childNodes(this.container,'span');f&&f.forEach(function(g){return e.appendChild(g)}),c.appendChild(e),this.container.appendChild(c)}}},{key:'createGroup',value:function createGroup(b){var d=this,c=this._c('div',this.pfx+'-group');c.setAttribute(this.dpfx+'-group',b.id),c.innerHTML=this.createStyles(b.id).outerHTML,c.innerHTML+='<div class="'+this.pfx+'-legend">'+b.title+'</div>',c.innerHTML+='<div class="'+this.pfx+'-fields">'+b.options.map(function(e){var f=b.multiple?'checkbox':'radio';return'<input class="'+d.pfx+'-input '+d.pfx+'-input--'+f+'" type="'+f+'" name="'+b.id+'" id="'+e.id+'" value="'+e.id+'" '+(e.checked?'checked':'')+' /><label class="'+d.pfx+'-label" for="'+e.id+'">'+e.title+'</label>'}).join('')+'</div>',this.container.insertBefore(c,this.container.firstChild),this.initGroup(c,b.id)}},{key:'createStyles',value:function createStyles(b){var c=this._c('style');return c.setAttribute(this.dpfx+'-style',b),c.textContent='['+this.dpfx+'-results]>['+this.dpfx+'-'+b+'] {display: none}',c}},{key:'childNodes',value:function childNodes(b,c){var d=c.toUpperCase();if(!b.hasChildNodes)return!1;var e=[].concat(_toConsumableArray(b.childNodes)).filter(function(f){return f.nodeName===d});return!!e.length&&e}},{key:'_$',value:function _$(b){return document.querySelector(b)}},{key:'_$$',value:function _$$(b){return[].concat(_toConsumableArray(document.querySelectorAll(b)))}},{key:'_c',value:function _c(b,c){var d=document.createElement(b);return c&&(d.className=c),d}}]),a}();
|
|
@ -21,6 +21,7 @@
|
||||||
"GoldParse": "goldparse"
|
"GoldParse": "goldparse"
|
||||||
},
|
},
|
||||||
"Other": {
|
"Other": {
|
||||||
|
"Command line": "cli",
|
||||||
"displaCy": "displacy",
|
"displaCy": "displacy",
|
||||||
"Utility Functions": "util",
|
"Utility Functions": "util",
|
||||||
"Annotation Specs": "annotation",
|
"Annotation Specs": "annotation",
|
||||||
|
@ -112,6 +113,11 @@
|
||||||
"tag": "class"
|
"tag": "class"
|
||||||
},
|
},
|
||||||
|
|
||||||
|
"cli": {
|
||||||
|
"title": "Command Line Interface",
|
||||||
|
"next": "displacy"
|
||||||
|
},
|
||||||
|
|
||||||
"displacy": {
|
"displacy": {
|
||||||
"title": "displaCy",
|
"title": "displaCy",
|
||||||
"tag": "module"
|
"tag": "module"
|
||||||
|
|
|
@ -5,10 +5,11 @@
|
||||||
"Models": "models",
|
"Models": "models",
|
||||||
"Lightning tour": "lightning-tour",
|
"Lightning tour": "lightning-tour",
|
||||||
"Visualizers": "visualizers",
|
"Visualizers": "visualizers",
|
||||||
"Command line": "cli",
|
"Troubleshooting": "troubleshooting",
|
||||||
"Troubleshooting": "troubleshooting"
|
"What's new in v2.0": "v2"
|
||||||
},
|
},
|
||||||
"Workflows": {
|
"Workflows": {
|
||||||
|
"spaCy 101": "spacy-101",
|
||||||
"Loading the pipeline": "language-processing-pipeline",
|
"Loading the pipeline": "language-processing-pipeline",
|
||||||
"Processing text": "processing-text",
|
"Processing text": "processing-text",
|
||||||
"spaCy's data model": "data-model",
|
"spaCy's data model": "data-model",
|
||||||
|
@ -33,7 +34,12 @@
|
||||||
|
|
||||||
"index": {
|
"index": {
|
||||||
"title": "Install spaCy",
|
"title": "Install spaCy",
|
||||||
"next": "models"
|
"next": "models",
|
||||||
|
"quickstart": true
|
||||||
|
},
|
||||||
|
|
||||||
|
"v2": {
|
||||||
|
"title": "What's new in v2.0"
|
||||||
},
|
},
|
||||||
|
|
||||||
"models": {
|
"models": {
|
||||||
|
@ -43,17 +49,11 @@
|
||||||
|
|
||||||
"lightning-tour": {
|
"lightning-tour": {
|
||||||
"title": "Lightning tour",
|
"title": "Lightning tour",
|
||||||
"next": "visualizers"
|
"next": "spacy-101"
|
||||||
},
|
},
|
||||||
|
|
||||||
"visualizers": {
|
"visualizers": {
|
||||||
"title": "Visualizers",
|
"title": "Visualizers"
|
||||||
"next": "cli"
|
|
||||||
},
|
|
||||||
|
|
||||||
"cli": {
|
|
||||||
"title": "Command Line Interface",
|
|
||||||
"next": "troubleshooting"
|
|
||||||
},
|
},
|
||||||
|
|
||||||
"troubleshooting": {
|
"troubleshooting": {
|
||||||
|
@ -65,6 +65,10 @@
|
||||||
"title": "Resources"
|
"title": "Resources"
|
||||||
},
|
},
|
||||||
|
|
||||||
|
"spacy-101": {
|
||||||
|
"title": "spaCy 101"
|
||||||
|
},
|
||||||
|
|
||||||
"language-processing-pipeline": {
|
"language-processing-pipeline": {
|
||||||
"title": "Loading a language processing pipeline",
|
"title": "Loading a language processing pipeline",
|
||||||
"next": "processing-text"
|
"next": "processing-text"
|
||||||
|
|
|
@ -12,6 +12,39 @@ p
|
||||||
| #[a(href="#source-ubuntu") Ubuntu], #[a(href="#source-osx") macOS/OS X]
|
| #[a(href="#source-ubuntu") Ubuntu], #[a(href="#source-osx") macOS/OS X]
|
||||||
| and #[a(href="#source-windows") Windows] for details.
|
| and #[a(href="#source-windows") Windows] for details.
|
||||||
|
|
||||||
|
+quickstart(QUICKSTART, "Quickstart")
|
||||||
|
+qs({config: 'venv', python: 2}) python -m pip install -U virtualenv
|
||||||
|
+qs({config: 'venv', python: 3}) python -m pip install -U venv
|
||||||
|
+qs({config: 'venv', python: 2}) virtualenv .env
|
||||||
|
+qs({config: 'venv', python: 3}) venv .env
|
||||||
|
+qs({config: 'venv', os: 'mac'}) source .env/bin/activate
|
||||||
|
+qs({config: 'venv', os: 'linux'}) source .env/bin/activate
|
||||||
|
+qs({config: 'venv', os: 'windows'}) .env\Scripts\activate
|
||||||
|
|
||||||
|
+qs({package: 'pip'}) pip install -U spacy
|
||||||
|
|
||||||
|
+qs({package: 'conda'}) conda config --add channels conda-forge
|
||||||
|
+qs({package: 'conda'}) conda install spacy
|
||||||
|
|
||||||
|
+qs({package: 'source'}) git clone https://github.com/explosion/spaCy
|
||||||
|
+qs({package: 'source'}) cd spaCy
|
||||||
|
+qs({package: 'source'}) pip install -r requirements.txt
|
||||||
|
+qs({package: 'source'}) pip install -e .
|
||||||
|
|
||||||
|
+qs({model: 'en'}) python -m spacy download en
|
||||||
|
+qs({model: 'de'}) python -m spacy download de
|
||||||
|
+qs({model: 'fr'}) python -m spacy download fr
|
||||||
|
|
||||||
|
+h(2, "installation") Installation instructions
|
||||||
|
|
||||||
|
+h(3, "pip") pip
|
||||||
|
+badge("pipy")
|
||||||
|
|
||||||
|
p Using pip, spaCy releases are currently only available as source packages.
|
||||||
|
|
||||||
|
+code(false, "bash").
|
||||||
|
pip install -U spacy
|
||||||
|
|
||||||
+aside("Download models")
|
+aside("Download models")
|
||||||
| After installation you need to download a language model. For more info
|
| After installation you need to download a language model. For more info
|
||||||
| and available models, see the #[+a("/docs/usage/models") docs on models].
|
| and available models, see the #[+a("/docs/usage/models") docs on models].
|
||||||
|
@ -22,14 +55,6 @@ p
|
||||||
>>> import spacy
|
>>> import spacy
|
||||||
>>> nlp = spacy.load('en')
|
>>> nlp = spacy.load('en')
|
||||||
|
|
||||||
+h(2, "pip") pip
|
|
||||||
+badge("pipy")
|
|
||||||
|
|
||||||
p Using pip, spaCy releases are currently only available as source packages.
|
|
||||||
|
|
||||||
+code(false, "bash").
|
|
||||||
pip install -U spacy
|
|
||||||
|
|
||||||
p
|
p
|
||||||
| When using pip it is generally recommended to install packages in a
|
| When using pip it is generally recommended to install packages in a
|
||||||
| #[code virtualenv] to avoid modifying system state:
|
| #[code virtualenv] to avoid modifying system state:
|
||||||
|
@ -39,7 +64,7 @@ p
|
||||||
source .env/bin/activate
|
source .env/bin/activate
|
||||||
pip install spacy
|
pip install spacy
|
||||||
|
|
||||||
+h(2, "conda") conda
|
+h(3, "conda") conda
|
||||||
+badge("conda")
|
+badge("conda")
|
||||||
|
|
||||||
p
|
p
|
||||||
|
|
10
website/docs/usage/spacy-101.jade
Normal file
10
website/docs/usage/spacy-101.jade
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
//- 💫 DOCS > USAGE > SPACY 101
|
||||||
|
|
||||||
|
include ../../_includes/_mixins
|
||||||
|
|
||||||
|
+h(2, "architecture") Architecture
|
||||||
|
|
||||||
|
+image
|
||||||
|
include ../../assets/img/docs/architecture.svg
|
||||||
|
.u-text-right
|
||||||
|
+button("/assets/img/docs/architecture.svg", false, "secondary").u-text-tag View large graphic
|
10
website/docs/usage/v2.jade
Normal file
10
website/docs/usage/v2.jade
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
//- 💫 DOCS > USAGE > WHAT'S NEW IN V2.0
|
||||||
|
|
||||||
|
include ../../_includes/_mixins
|
||||||
|
|
||||||
|
|
||||||
|
+h(2, "features") New features
|
||||||
|
|
||||||
|
+h(2, "incompat") Backwards incompatibilities
|
||||||
|
|
||||||
|
+h(2, "migrating") Migrating from spaCy 1.x
|
|
@ -153,6 +153,24 @@ p
|
||||||
| #[+a("https://tympanus.net/codrops/css_reference/background/") valid background value]
|
| #[+a("https://tympanus.net/codrops/css_reference/background/") valid background value]
|
||||||
| or shorthand — including gradients and even images!
|
| or shorthand — including gradients and even images!
|
||||||
|
|
||||||
|
+h(3, "ent-titles") Adding titles to documents
|
||||||
|
|
||||||
|
p
|
||||||
|
| Rendering several large documents on one page can easily become confusing.
|
||||||
|
| To add a headline to each visualization, you can add a #[code title] to
|
||||||
|
| its #[code user_data]. User data is never touched or modified by spaCy.
|
||||||
|
|
||||||
|
+code.
|
||||||
|
doc = nlp(u'This is a sentence about Google.')
|
||||||
|
doc.user_data['title'] = 'This is a title'
|
||||||
|
displacy.serve(doc, style='ent')
|
||||||
|
|
||||||
|
p
|
||||||
|
| This feature is espeically handy if you're using displaCy to compare
|
||||||
|
| performance at different stages of a process, e.g. during training. Here
|
||||||
|
| you could use the title for a brief description of the text example and
|
||||||
|
| the number of iterations.
|
||||||
|
|
||||||
+h(2, "render") Rendering visualizations
|
+h(2, "render") Rendering visualizations
|
||||||
|
|
||||||
p
|
p
|
||||||
|
@ -232,6 +250,25 @@ p
|
||||||
|
|
||||||
+h(2, "examples") Usage examples
|
+h(2, "examples") Usage examples
|
||||||
|
|
||||||
|
+h(3, "examples-export-svg") Export SVG graphics of dependency parses
|
||||||
|
|
||||||
|
+code("Example").
|
||||||
|
import spacy
|
||||||
|
from spacy import displacy
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
nlp = spacy.load('en')
|
||||||
|
sentences = ["This is an example.", "This is another one."]
|
||||||
|
for sent in sentences:
|
||||||
|
doc = nlp(sentence)
|
||||||
|
svg = displacy.render(doc, style='dep')
|
||||||
|
file_name = '-'.join([w.text for w in doc if not w.is_punct]) + '.svg'
|
||||||
|
output_path = Path('/images/' + file_name)
|
||||||
|
output_path.open('w', encoding='utf-8').write(svg)
|
||||||
|
|
||||||
|
p
|
||||||
|
| The above code will generate the dependency visualizations and them to
|
||||||
|
| two files, #[code This-is-an-example.svg] and #[code This-is-another-one.svg].
|
||||||
|
|
||||||
+h(2, "manual-usage") Rendering data manually
|
+h(2, "manual-usage") Rendering data manually
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user