Merge branch 'develop' of https://github.com/explosion/spaCy into develop

This commit is contained in:
Matthew Honnibal 2017-11-06 22:09:20 +01:00
commit 906aece532
15 changed files with 141 additions and 109 deletions

View File

@ -66,8 +66,7 @@
{ "id": 3, "title": "3.x", "checked": true }] { "id": 3, "title": "3.x", "checked": true }]
}, },
{ "id": "config", "title": "Configuration", "multiple": true, "options": [ { "id": "config", "title": "Configuration", "multiple": true, "options": [
{"id": "venv", "title": "virtualenv", "help": "Use a virtual environment and install spaCy into a user directory" }, {"id": "venv", "title": "virtualenv", "help": "Use a virtual environment and install spaCy into a user directory" }]
{"id": "gpu", "title": "GPU", "help": "Run spaCy on GPU to make it faster. Requires an NVDIA graphics card with CUDA 2+. See section below for more info."}]
}, },
{ "id": "model", "title": "Models", "multiple": true } { "id": "model", "title": "Models", "multiple": true }
], ],

View File

@ -243,13 +243,12 @@ mixin code(label, language, prompt, height, icon, wrap)
pre.c-code-block.o-block(class="lang-#{(language || DEFAULT_SYNTAX)}" class=icon ? "c-code-block--has-icon" : null style=height ? "height: #{height}px" : null)&attributes(attributes) pre.c-code-block.o-block(class="lang-#{(language || DEFAULT_SYNTAX)}" class=icon ? "c-code-block--has-icon" : null style=height ? "height: #{height}px" : null)&attributes(attributes)
if label if label
h4.u-text-label.u-text-label--dark=label h4.u-text-label.u-text-label--dark=label
- var icon = icon || (prompt == 'accept' || prompt == 'reject')
if icon if icon
- var classes = {'accept': 'u-color-green', 'reject': 'u-color-red'} - var classes = {'accept': 'u-color-green', 'reject': 'u-color-red'}
.c-code-block__icon(class=classes[icon] || null class=classes[icon] ? "c-code-block__icon--border" : null) .c-code-block__icon(class=classes[icon] || null class=classes[icon] ? "c-code-block__icon--border" : null)
+icon(icon, 18) +icon(icon, 18)
code.c-code-block__content(class=wrap ? "u-wrap" : null data-prompt=icon ? null : prompt) code.c-code-block__content(class=wrap ? "u-wrap" : null data-prompt=prompt)
block block
@ -262,14 +261,14 @@ mixin code-wrapper()
//- Code blocks to display old/new versions //- Code blocks to display old/new versions
label - [string] ARIA label for block. Defaults to "correct"/"incorrect". label - [string] ARIA label for block. Defaults to "correct"/"incorrect".
mixin code-old(label) mixin code-old(label, lang, prompt)
- var label = label || 'incorrect' - var label = label || 'incorrect'
+code(false, false, false, false, "reject").o-block-small(aria-label=label) +code(false, lang, prompt, false, "reject").o-block-small(aria-label=label)
block block
mixin code-new(label) mixin code-new(label, lang, prompt)
- var label = label || 'correct' - var label = label || 'correct'
+code(false, false, false, false, "accept").o-block-small(aria-label=label) +code(false, lang, prompt, false, "accept").o-block-small(aria-label=label)
block block
@ -452,8 +451,8 @@ mixin head-cell()
//- Table cell (only used within +row in +table) //- Table cell (only used within +row in +table)
mixin cell(align) mixin cell(...style)
td.c-table__cell.u-text(class=align ? "u-text-" + align : null)&attributes(attributes) td.c-table__cell.u-text(class=prefixArgs(style, "c-table__cell"))&attributes(attributes)
block block

View File

@ -71,7 +71,7 @@ for id in CURRENT_MODELS
+label=label +label=label
if MODEL_META[field] if MODEL_META[field]
| #[+help(MODEL_META[field]).u-color-subtle] | #[+help(MODEL_META[field]).u-color-subtle]
+cell("right")(data-tpl=id data-tpl-key=field) +cell("num")(data-tpl=id data-tpl-key=field)
| n/a | n/a
p.u-text-small.u-color-dark(data-tpl=id data-tpl-key="notes") p.u-text-small.u-color-dark(data-tpl=id data-tpl-key="notes")

View File

@ -32,6 +32,15 @@
&:not(:last-child) &:not(:last-child)
border-right: 1px solid $color-subtle border-right: 1px solid $color-subtle
&.c-table__cell--num
text-align: right
font-feature-settings: "tnum"
font-variant-numeric: tabular-nums
& > strong
font-feature-settings: initial
font-variant-numeric: initial
//- Table head cell //- Table head cell

View File

@ -111,8 +111,8 @@ include _includes/_mixins
| deliver accuracy in-line with the latest research systems, | deliver accuracy in-line with the latest research systems,
| even when evaluated from raw text. With these innovations, spaCy | even when evaluated from raw text. With these innovations, spaCy
| v2.0's models are #[strong 10× smaller], | v2.0's models are #[strong 10× smaller],
| #[strong 20% more accurate], and #[strong just as fast] as the | #[strong 20% more accurate], and #[strong even cheaper to run] than
| previous generation. | the previous generation.
.o-block-small.u-text-right .o-block-small.u-text-right
+button("/models", true, "secondary-light") Download models +button("/models", true, "secondary-light") Download models

View File

@ -20,8 +20,8 @@ include ../_includes/_mixins
| deliver #[strong accuracy in-line with the latest research systems], | deliver #[strong accuracy in-line with the latest research systems],
| even when evaluated from raw text. With these innovations, spaCy | even when evaluated from raw text. With these innovations, spaCy
| v2.0's models are #[strong 10× smaller], | v2.0's models are #[strong 10× smaller],
| #[strong 20% more accurate], and #[strong just as fast] as the | #[strong 20% more accurate], and #[strong even cheaper to run] than
| previous generation. | the previous generation.
include ../usage/_models/_quickstart include ../usage/_models/_quickstart

View File

@ -5,41 +5,41 @@
+cell #[strong spaCy v2.x] +cell #[strong spaCy v2.x]
+cell 2017 +cell 2017
+cell Python / Cython +cell Python / Cython
+cell("right") #[strong 92.6] +cell("num") #[strong 92.6]
+cell("right") #[em n/a] +cell("num") #[em n/a]
| #[+help("This table shows speed as benchmarked by Choi et al. We therefore can't provide comparable figures, as we'd be running the benchmark on different hardware.").u-color-dark] | #[+help("This table shows speed as benchmarked by Choi et al. We therefore can't provide comparable figures, as we'd be running the benchmark on different hardware.").u-color-dark]
+row +row
+cell #[strong spaCy v1.x] +cell #[strong spaCy v1.x]
+cell 2015 +cell 2015
+cell Python / Cython +cell Python / Cython
+cell("right") 91.8 +cell("num") 91.8
+cell("right") 13,963 +cell("num") 13,963
+row +row
+cell ClearNLP +cell ClearNLP
+cell 2015 +cell 2015
+cell Java +cell Java
+cell("right") 91.7 +cell("num") 91.7
+cell("right") 10,271 +cell("num") 10,271
+row +row
+cell CoreNLP +cell CoreNLP
+cell 2015 +cell 2015
+cell Java +cell Java
+cell("right") 89.6 +cell("num") 89.6
+cell("right") 8,602 +cell("num") 8,602
+row +row
+cell MATE +cell MATE
+cell 2015 +cell 2015
+cell Java +cell Java
+cell("right") 92.5 +cell("num") 92.5
+cell("right") 550 +cell("num") 550
+row +row
+cell Turbo +cell Turbo
+cell 2015 +cell 2015
+cell C++ +cell C++
+cell("right") 92.4 +cell("num") 92.4
+cell("right") 349 +cell("num") 349

View File

@ -20,34 +20,34 @@ p
+row +row
+cell #[+a("/models/en#en_core_web_sm") #[code en_core_web_sm]] 2.0.0a8 +cell #[+a("/models/en#en_core_web_sm") #[code en_core_web_sm]] 2.0.0a8
each data in ["2.x", "neural"] each data in ["2.x", "neural"]
+cell("right")=data +cell("num")=data
+cell("right") 91.7 +cell("num") 91.7
+cell("right") 85.3 +cell("num") 85.3
+cell("right") 97.0 +cell("num") 97.0
+cell("right") 10.1k +cell("num") 10.1k
+cell("right") #[strong 35 MB] +cell("num") #[strong 35MB]
+row +row
+cell #[+a("/models/en#en_core_web_lg") #[code en_core_web_lg]] 2.0.0a3 +cell #[+a("/models/en#en_core_web_lg") #[code en_core_web_lg]] 2.0.0a3
each data in ["2.x", "neural"] each data in ["2.x", "neural"]
+cell("right")=data +cell("num")=data
+cell("right") #[strong 91.9] +cell("num") #[strong 91.9]
+cell("right") #[strong 85.9] +cell("num") #[strong 85.9]
+cell("right") #[strong 97.2] +cell("num") #[strong 97.2]
+cell("right") 5.0k +cell("num") 10.0k
+cell("right") 812 MB +cell("num") 812MB
+row("divider") +row("divider")
+cell #[code en_core_web_sm] 1.2.0 +cell #[code en_core_web_sm] 1.2.0
each data in ["1.x", "linear", 86.6, 78.5, 96.6] each data in ["1.x", "linear", 86.6, 78.5, 96.6]
+cell("right")=data +cell("num")=data
+cell("right") #[strong 25.7k] +cell("num") #[strong 25.7k]
+cell("right") 50 MB +cell("num") 50MB
+row +row
+cell #[code en_core_web_md] 1.2.1 +cell #[code en_core_web_md] 1.2.1
each data in ["1.x", "linear", 90.6, 81.4, 96.7, "18.8k", "1 GB"] each data in ["1.x", "linear", 90.6, 81.4, 96.7, "18.8k", "1GB"]
+cell("right")=data +cell("num")=data
+h(4, "benchmarks-models-spanish") Spanish +h(4, "benchmarks-models-spanish") Spanish
@ -59,29 +59,29 @@ p
+table(["Model", "spaCy", "Type", "UAS", "NER F", "POS", "WPS", "Size"]) +table(["Model", "spaCy", "Type", "UAS", "NER F", "POS", "WPS", "Size"])
+row +row
+cell #[+a("/models/es#es_core_news_sm") #[code es_core_news_sm]] 2.0.0a0 +cell #[+a("/models/es#es_core_news_sm") #[code es_core_news_sm]] 2.0.0a0
+cell("right") 2.x +cell("num") 2.x
+cell("right") neural +cell("num") neural
+cell("right") 89.8 +cell("num") 89.8
+cell("right") 88.7 +cell("num") 88.7
+cell("right") #[strong 96.9] +cell("num") #[strong 96.9]
+cell("right") #[em n/a] +cell("num") #[em n/a]
+cell("right") #[strong 35 MB] +cell("num") #[strong 35MB]
+row +row
+cell #[+a("/models/es#es_core_news_md") #[code es_core_news_md]] 2.0.0a0 +cell #[+a("/models/es#es_core_news_md") #[code es_core_news_md]] 2.0.0a0
+cell("right") 2.x +cell("num") 2.x
+cell("right") neural +cell("num") neural
+cell("right") #[strong 90.2] +cell("num") #[strong 90.2]
+cell("right") 89.0 +cell("num") 89.0
+cell("right") 97.8 +cell("num") 97.8
+cell("right") #[em n/a] +cell("num") #[em n/a]
+cell("right") 93 MB +cell("num") 93MB
+row("divider") +row("divider")
+cell #[code es_core_web_md] 1.1.0 +cell #[code es_core_web_md] 1.1.0
each data in ["1.x", "linear", 87.5] each data in ["1.x", "linear", 87.5]
+cell("right")=data +cell("num")=data
+cell("right") #[strong 94.2] +cell("num") #[strong 94.2]
+cell("right") 96.7 +cell("num") 96.7
+cell("right") #[em n/a] +cell("num") #[em n/a]
+cell("right") 377 MB +cell("num") 377MB

View File

@ -50,55 +50,55 @@ p
+cell spaCy v2.0.0 +cell spaCy v2.0.0
+cell 2017 +cell 2017
+cell neural +cell neural
+cell("right") 94.48 +cell("num") 94.48
+row +row
+cell spaCy v1.1.0 +cell spaCy v1.1.0
+cell 2016 +cell 2016
+cell linear +cell linear
+cell("right") 92.80 +cell("num") 92.80
+row("divider") +row("divider")
+cell +cell
+a("https://arxiv.org/pdf/1611.01734.pdf") Dozat and Manning +a("https://arxiv.org/pdf/1611.01734.pdf") Dozat and Manning
+cell 2017 +cell 2017
+cell neural +cell neural
+cell("right") #[strong 95.75] +cell("num") #[strong 95.75]
+row +row
+cell +cell
+a("http://arxiv.org/abs/1603.06042") Andor et al. +a("http://arxiv.org/abs/1603.06042") Andor et al.
+cell 2016 +cell 2016
+cell neural +cell neural
+cell("right") 94.44 +cell("num") 94.44
+row +row
+cell +cell
+a("https://github.com/tensorflow/models/tree/master/research/syntaxnet") SyntaxNet Parsey McParseface +a("https://github.com/tensorflow/models/tree/master/research/syntaxnet") SyntaxNet Parsey McParseface
+cell 2016 +cell 2016
+cell neural +cell neural
+cell("right") 94.15 +cell("num") 94.15
+row +row
+cell +cell
+a("http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43800.pdf") Weiss et al. +a("http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43800.pdf") Weiss et al.
+cell 2015 +cell 2015
+cell neural +cell neural
+cell("right") 93.91 +cell("num") 93.91
+row +row
+cell +cell
+a("http://research.google.com/pubs/archive/38148.pdf") Zhang and McDonald +a("http://research.google.com/pubs/archive/38148.pdf") Zhang and McDonald
+cell 2014 +cell 2014
+cell linear +cell linear
+cell("right") 93.32 +cell("num") 93.32
+row +row
+cell +cell
+a("http://www.cs.cmu.edu/~ark/TurboParser/") Martins et al. +a("http://www.cs.cmu.edu/~ark/TurboParser/") Martins et al.
+cell 2013 +cell 2013
+cell linear +cell linear
+cell("right") 93.10 +cell("num") 93.10
+h(4, "ner-accuracy-ontonotes5") NER accuracy (OntoNotes 5, no pre-process) +h(4, "ner-accuracy-ontonotes5") NER accuracy (OntoNotes 5, no pre-process)
@ -113,35 +113,35 @@ p
+cell spaCy #[+a("/models/en#en_core_web_lg") #[code en_core_web_lg]] v2.0.0a3 +cell spaCy #[+a("/models/en#en_core_web_lg") #[code en_core_web_lg]] v2.0.0a3
+cell 2017 +cell 2017
+cell neural +cell neural
+cell("right") 85.85 +cell("num") 85.85
+row("divider") +row("divider")
+cell +cell
+a("https://arxiv.org/pdf/1702.02098.pdf") Strubell et al. +a("https://arxiv.org/pdf/1702.02098.pdf") Strubell et al.
+cell 2017 +cell 2017
+cell neural +cell neural
+cell("right") #[strong 86.81] +cell("num") #[strong 86.81]
+row +row
+cell +cell
+a("https://www.semanticscholar.org/paper/Named-Entity-Recognition-with-Bidirectional-LSTM-C-Chiu-Nichols/10a4db59e81d26b2e0e896d3186ef81b4458b93f") Chiu and Nichols +a("https://www.semanticscholar.org/paper/Named-Entity-Recognition-with-Bidirectional-LSTM-C-Chiu-Nichols/10a4db59e81d26b2e0e896d3186ef81b4458b93f") Chiu and Nichols
+cell 2016 +cell 2016
+cell neural +cell neural
+cell("right") 86.19 +cell("num") 86.19
+row +row
+cell +cell
+a("https://www.semanticscholar.org/paper/A-Joint-Model-for-Entity-Analysis-Coreference-Typi-Durrett-Klein/28eb033eee5f51c5e5389cbb6b777779203a6778") Durrett and Klein +a("https://www.semanticscholar.org/paper/A-Joint-Model-for-Entity-Analysis-Coreference-Typi-Durrett-Klein/28eb033eee5f51c5e5389cbb6b777779203a6778") Durrett and Klein
+cell 2014 +cell 2014
+cell neural +cell neural
+cell("right") 84.04 +cell("num") 84.04
+row +row
+cell +cell
+a("http://www.aclweb.org/anthology/W09-1119") Ratinov and Roth +a("http://www.aclweb.org/anthology/W09-1119") Ratinov and Roth
+cell 2009 +cell 2009
+cell linear +cell linear
+cell("right") 83.45 +cell("num") 83.45
+h(3, "spacy-models") Model comparison +h(3, "spacy-models") Model comparison
@ -183,24 +183,24 @@ p
+row +row
+cell #[strong spaCy] +cell #[strong spaCy]
each data in [ "0.2ms", "1ms", "19ms"] each data in [ "0.2ms", "1ms", "19ms"]
+cell("right") #[strong=data] +cell("num") #[strong=data]
each data in ["1x", "1x", "1x"] each data in ["1x", "1x", "1x"]
+cell("right")=data +cell("num")=data
+row +row
+cell CoreNLP +cell CoreNLP
each data in ["2ms", "10ms", "49ms", "10x", "10x", "2.6x"] each data in ["2ms", "10ms", "49ms", "10x", "10x", "2.6x"]
+cell("right")=data +cell("num")=data
+row +row
+cell ZPar +cell ZPar
each data in ["1ms", "8ms", "850ms", "5x", "8x", "44.7x"] each data in ["1ms", "8ms", "850ms", "5x", "8x", "44.7x"]
+cell("right")=data +cell("num")=data
+row +row
+cell NLTK +cell NLTK
each data in ["4ms", "443ms"] each data in ["4ms", "443ms"]
+cell("right")=data +cell("num")=data
+cell("right") #[em n/a] +cell("num") #[em n/a]
each data in ["20x", "443x"] each data in ["20x", "443x"]
+cell("right")=data +cell("num")=data
+cell("right") #[em n/a] +cell("num") #[em n/a]

View File

@ -79,12 +79,19 @@ p
python -m spacy validate python -m spacy validate
+h(3, "gpu") Run spaCy with GPU +h(3, "gpu") Run spaCy with GPU
+tag experimental
+infobox("Important note", "⚠️")
| The instructions below refer to installation with CUDA 8.0. In order to
| install with CUDA 9.0, set the environment variable #[code CUDA9=1]
| before installing Thinc. You'll also need to adjust the path to the
| CUDA runtime.
p p
| As of v2.0, spaCy's comes with neural network models that are implemented | As of v2.0, spaCy's comes with neural network models that are implemented
| in our machine learning library, #[+a(gh("thinc")) Thinc]. For GPU | in our machine learning library, #[+a(gh("thinc")) Thinc]. For GPU
| support, we've been grateful to use the work of | support, we've been grateful to use the work of
| #[+a("http://chainer.org") Chainer]'s CuPy module, which provides | Chainer's #[+a("https://cupy.chainer.org") CuPy] module, which provides
| a NumPy-compatible interface for GPU arrays. | a NumPy-compatible interface for GPU arrays.
p p
@ -93,11 +100,11 @@ p
| CUDA. Finally, install spaCy. | CUDA. Finally, install spaCy.
+code(false, "bash"). +code(false, "bash").
export CUDA_HOME=/usr/local/cuda-8.0 # Or wherever your CUDA is export CUDA_HOME=/usr/local/cuda-8.0 # or wherever your CUDA is
export PATH=$PATH:$CUDA_HOME/bin export PATH=$PATH:$CUDA_HOME/bin
pip install spacy pip install spacy
python -c "import thinc.neural.gpu_ops" # Check the GPU ops were built python -c "import thinc.neural.gpu_ops" # check the GPU ops were built
+h(3, "source") Compile from source +h(3, "source") Compile from source

View File

@ -11,9 +11,6 @@
+qs({config: 'venv', os: 'linux'}) source .env/bin/activate +qs({config: 'venv', os: 'linux'}) source .env/bin/activate
+qs({config: 'venv', os: 'windows'}) .env\Scripts\activate +qs({config: 'venv', os: 'windows'}) .env\Scripts\activate
+qs({config: 'gpu', os: 'mac'}) export PATH=$PATH:/usr/local/cuda-8.0/bin
+qs({config: 'gpu', os: 'linux'}) export PATH=$PATH:/usr/local/cuda-8.0/bin
+qs({package: 'pip'}) pip install -U spacy +qs({package: 'pip'}) pip install -U spacy
+qs({package: 'conda'}) conda install -c conda-forge spacy +qs({package: 'conda'}) conda install -c conda-forge spacy

View File

@ -4,9 +4,8 @@ p
| Similarity is determined by comparing #[strong word vectors] or "word | Similarity is determined by comparing #[strong word vectors] or "word
| embeddings", multi-dimensional meaning representations of a word. Word | embeddings", multi-dimensional meaning representations of a word. Word
| vectors can be generated using an algorithm like | vectors can be generated using an algorithm like
| #[+a("https://en.wikipedia.org/wiki/Word2vec") word2vec]. spaCy's medium | #[+a("https://en.wikipedia.org/wiki/Word2vec") word2vec] and usually
| #[code md] and large #[code lg] #[+a("/models") models] come with | look like this:
| #[strong multi-dimensional vectors] that look like this:
+code("banana.vector", false, false, 250). +code("banana.vector", false, false, 250).
array([2.02280000e-01, -7.66180009e-02, 3.70319992e-01, array([2.02280000e-01, -7.66180009e-02, 3.70319992e-01,
@ -110,8 +109,21 @@ p
-2.97650009e-01, 7.89430022e-01, 3.31680000e-01, -2.97650009e-01, 7.89430022e-01, 3.31680000e-01,
-1.19659996e+00, -4.71559986e-02, 5.31750023e-01], dtype=float32) -1.19659996e+00, -4.71559986e-02, 5.31750023e-01], dtype=float32)
+infobox("Important note", "⚠️")
| To make them compact and fast, spaCy's small #[+a("/models") models]
| (all packages that end in #[code sm]) #[strong don't ship with word vectors], and
| only include context-sensitive #[strong tensors]. This means you can
| still use the #[code similarity()] methods to compare documents, spans
| and tokens but the result won't be as good, and individual tokens won't
| have any vectors assigned. So in order to use #[em real] word vectors,
| you need to download a larger model:
+code-wrapper
+code-new(false, "bash", "$") spacy download en_core_web_lg
p p
| The #[code .vector] attribute will return an object's vector. | Models that come with built-in word vectors make them available as the
| #[+api("token#vector") #[code Token.vector]] attribute.
| #[+api("doc#vector") #[code Doc.vector]] and | #[+api("doc#vector") #[code Doc.vector]] and
| #[+api("span#vector") #[code Span.vector]] will default to an average | #[+api("span#vector") #[code Span.vector]] will default to an average
| of their token vectors. You can also check if a token has a vector | of their token vectors. You can also check if a token has a vector
@ -119,6 +131,7 @@ p
| vectors. | vectors.
+code. +code.
nlp = spacy.load('en_core_web_lg')
tokens = nlp(u'dog cat banana sasquatch') tokens = nlp(u'dog cat banana sasquatch')
for token in tokens: for token in tokens:
@ -143,10 +156,9 @@ p
| they're part of the model's vocabulary, and come with a vector. The word | they're part of the model's vocabulary, and come with a vector. The word
| "sasquatch" on the other hand is a lot less common and out-of-vocabulary | "sasquatch" on the other hand is a lot less common and out-of-vocabulary
| so its vector representation consists of 300 dimensions of #[code 0], | so its vector representation consists of 300 dimensions of #[code 0],
| which means it's practically nonexistent. | which means it's practically nonexistent. If your application will
| benefit from a #[strong large vocabulary] with more vectors, you should
p | consider using one of the larger models or loading in a full vector
| If your application will benefit from a large vocabulary with more | package, for example,
| vectors, you should consider using one of the | #[+a("/models/en#en_vectors_web_lg") #[code en_vectors_web_lg]], which
| #[+a("/models") larger models] instead of the default, | includes over #[strong 1 million unique vectors].
| smaller ones, which usually come with a clipped vocabulary.

View File

@ -10,8 +10,9 @@ p
+h(3, "features-models") Convolutional neural network models +h(3, "features-models") Convolutional neural network models
+aside-code("Example", "bash") +aside-code("Example", "bash")
for model in ["en", "de", "fr", "es", "pt", "it"] for _, lang in MODELS
| spacy download #{model} # default #{LANGUAGES[model]} model!{'\n'} if lang != "xx"
| spacy download #{lang} # default #{LANGUAGES[lang]} model!{'\n'}
| spacy download xx_ent_wiki_sm # multi-language NER | spacy download xx_ent_wiki_sm # multi-language NER
p p
@ -20,14 +21,22 @@ p
| been designed and implemented from scratch specifically for spaCy, to | been designed and implemented from scratch specifically for spaCy, to
| give you an unmatched balance of speed, size and accuracy. The new | give you an unmatched balance of speed, size and accuracy. The new
| models are #[strong 10× smaller], #[strong 20% more accurate], | models are #[strong 10× smaller], #[strong 20% more accurate],
| and #[strong just as fast] as the previous generation. | and #[strong even cheaper to run] than the previous generation.
| #[strong GPU usage] is now supported via
| #[+a("http://chainer.org") Chainer]'s CuPy module. p
| spaCy v2.0's new neural network models bring significant improvements in
| accuracy, especially for English Named Entity Recognition. The new
| #[+a("/models/en#en_core_web_lg") #[code en_core_web_lg]] model makes
| about #[strong 25% fewer mistakes] than the corresponding v1.x model and
| is within #[strong 1% of the current state-of-the-art]
| (#[+a("https://arxiv.org/pdf/1702.02098.pdf") Strubell et al., 2017]).
| The v2.0 models are also cheaper to run at scale, as they require
| #[strong under 1 GB of memory] per process.
+infobox +infobox
| #[+label-inline Usage:] #[+a("/models") Models directory], | #[+label-inline Usage:] #[+a("/models") Models directory],
| #[+a("/models/comparison") Models comparison], | #[+a("/models/comparison") Models comparison],
| #[+a("/usage/#gpu") Using spaCy with GPU] | #[+a("#benchmarks") Benchmarks]
+h(3, "features-pipelines") Improved processing pipelines +h(3, "features-pipelines") Improved processing pipelines

View File

@ -22,7 +22,7 @@ p
| #[strong deep learning-powered models] for spaCy's tagger, | #[strong deep learning-powered models] for spaCy's tagger,
| parser and entity recognizer. The new models are | parser and entity recognizer. The new models are
| #[strong 10× smaller], #[strong 20% more accurate] and | #[strong 10× smaller], #[strong 20% more accurate] and
| just as fast as the previous generation. | #[strong even cheaper to run] than the previous generation.
p p
| We've also made several usability improvements that are | We've also made several usability improvements that are

View File

@ -3,7 +3,7 @@
include ../_includes/_mixins include ../_includes/_mixins
p p
| As of v1.7.0, models for spaCy can be installed as #[strong Python packages]. | spaCy's models can be installed as #[strong Python packages].
| This means that they're a component of your application, just like any | This means that they're a component of your application, just like any
| other module. They're versioned and can be defined as a dependency in your | other module. They're versioned and can be defined as a dependency in your
| #[code requirements.txt]. Models can be installed from a download URL or | #[code requirements.txt]. Models can be installed from a download URL or