mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
Update v2 docs and benchmarks
This commit is contained in:
parent
9f55c0d4f6
commit
fd35d910b8
|
@ -22,7 +22,7 @@ p
|
||||||
| entirely new #[strong deep learning-powered models] for spaCy's tagger,
|
| entirely new #[strong deep learning-powered models] for spaCy's tagger,
|
||||||
| parser and entity recognizer. The new models are #[strong 20x smaller]
|
| parser and entity recognizer. The new models are #[strong 20x smaller]
|
||||||
| than the linear models that have powered spaCy until now: from 300 MB to
|
| than the linear models that have powered spaCy until now: from 300 MB to
|
||||||
| only 14 MB.
|
| only 15 MB.
|
||||||
|
|
||||||
p
|
p
|
||||||
| We've also made several usability improvements that are
|
| We've also made several usability improvements that are
|
||||||
|
@ -247,12 +247,12 @@ p
|
||||||
| #[code spacy.lang.xx]
|
| #[code spacy.lang.xx]
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code spacy.orth]
|
+cell #[code orth]
|
||||||
+cell #[code spacy.lang.xx.lex_attrs]
|
+cell #[code lang.xx.lex_attrs]
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code cli.model]
|
+cell #[code syntax.syntax_iterators]
|
||||||
+cell -
|
+cell #[code lang.xx.syntax_iterators]
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code Language.save_to_directory]
|
+cell #[code Language.save_to_directory]
|
||||||
|
@ -266,8 +266,6 @@ p
|
||||||
+cell
|
+cell
|
||||||
| #[code Vocab.load]
|
| #[code Vocab.load]
|
||||||
| #[code Vocab.load_lexemes]
|
| #[code Vocab.load_lexemes]
|
||||||
| #[code Vocab.load_vectors]
|
|
||||||
| #[code Vocab.load_vectors_from_bin_loc]
|
|
||||||
+cell
|
+cell
|
||||||
| #[+api("vocab#from_disk") #[code Vocab.from_disk]]
|
| #[+api("vocab#from_disk") #[code Vocab.from_disk]]
|
||||||
| #[+api("vocab#from_bytes") #[code Vocab.from_bytes]]
|
| #[+api("vocab#from_bytes") #[code Vocab.from_bytes]]
|
||||||
|
@ -275,10 +273,24 @@ p
|
||||||
+row
|
+row
|
||||||
+cell
|
+cell
|
||||||
| #[code Vocab.dump]
|
| #[code Vocab.dump]
|
||||||
|
+cell
|
||||||
|
| #[+api("vocab#to_disk") #[code Vocab.to_disk]]#[br]
|
||||||
|
| #[+api("vocab#to_bytes") #[code Vocab.to_bytes]]
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell
|
||||||
|
| #[code Vocab.load_vectors]
|
||||||
|
| #[code Vocab.load_vectors_from_bin_loc]
|
||||||
|
+cell
|
||||||
|
| #[+api("vectors#from_disk") #[code Vectors.from_disk]]
|
||||||
|
| #[+api("vectors#from_bytes") #[code Vectors.from_bytes]]
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell
|
||||||
| #[code Vocab.dump_vectors]
|
| #[code Vocab.dump_vectors]
|
||||||
+cell
|
+cell
|
||||||
| #[+api("vocab#to_disk") #[code Vocab.to_disk]]
|
| #[+api("vectors#to_disk") #[code Vectors.to_disk]]
|
||||||
| #[+api("vocab#to_bytes") #[code Vocab.to_bytes]]
|
| #[+api("vectors#to_bytes") #[code Vectors.to_bytes]]
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell
|
+cell
|
||||||
|
@ -296,7 +308,9 @@ p
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code Tokenizer.load]
|
+cell #[code Tokenizer.load]
|
||||||
+cell -
|
+cell
|
||||||
|
| #[+api("tokenizer#from_disk") #[code Tokenizer.from_disk]]
|
||||||
|
| #[+api("tokenizer#from_bytes") #[code Tokenizer.from_bytes]]
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code Tagger.load]
|
+cell #[code Tagger.load]
|
||||||
|
@ -342,6 +356,10 @@ p
|
||||||
+cell #[code Token.is_ancestor_of]
|
+cell #[code Token.is_ancestor_of]
|
||||||
+cell #[+api("token#is_ancestor") #[code Token.is_ancestor]]
|
+cell #[+api("token#is_ancestor") #[code Token.is_ancestor]]
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code cli.model]
|
||||||
|
+cell -
|
||||||
|
|
||||||
+h(2, "migrating") Migrating from spaCy 1.x
|
+h(2, "migrating") Migrating from spaCy 1.x
|
||||||
|
|
||||||
p
|
p
|
||||||
|
@ -466,18 +484,27 @@ p
|
||||||
|
|
||||||
+h(2, "benchmarks") Benchmarks
|
+h(2, "benchmarks") Benchmarks
|
||||||
|
|
||||||
|
+under-construction
|
||||||
|
|
||||||
|
+aside("Data sources")
|
||||||
|
| #[strong Parser, tagger, NER:] #[+a("https://www.gabormelli.com/RKB/OntoNotes_Corpus") OntoNotes 5]#[br]
|
||||||
|
| #[strong Word vectors:] #[+a("http://commoncrawl.org") Common Crawl]#[br]
|
||||||
|
|
||||||
|
p The evaluation was conducted on raw text with no gold standard information.
|
||||||
|
|
||||||
+table(["Model", "Version", "Type", "UAS", "LAS", "NER F", "POS", "w/s"])
|
+table(["Model", "Version", "Type", "UAS", "LAS", "NER F", "POS", "w/s"])
|
||||||
+row
|
mixin benchmark-row(name, details, values, highlight, style)
|
||||||
+cell #[code en_core_web_sm]
|
+row(style)
|
||||||
for cell in ["2.0.0", "neural", "", "", "", "", ""]
|
+cell #[code=name]
|
||||||
|
for cell in details
|
||||||
+cell=cell
|
+cell=cell
|
||||||
|
for cell, i in values
|
||||||
|
+cell.u-text-right
|
||||||
|
if highlight && highlight[i]
|
||||||
|
strong=cell
|
||||||
|
else
|
||||||
|
!=cell
|
||||||
|
|
||||||
+row
|
+benchmark-row("en_core_web_sm", ["2.0.0", "neural"], ["91.2", "89.2", "82.6", "96.6", "10,300"], [1, 1, 1, 0, 0])
|
||||||
+cell #[code es_dep_web_sm]
|
+benchmark-row("en_core_web_sm", ["1.2.0", "linear"], ["86.6", "83.8", "78.5", "96.6", "25,700"], [0, 0, 0, 0, 1], "divider")
|
||||||
for cell in ["2.0.0", "neural", "", "", "", "", ""]
|
+benchmark-row("en_core_web_md", ["1.2.1", "linear"], ["90.6", "88.5", "81.4", "96.7", "18,800"], [0, 0, 0, 1, 0])
|
||||||
+cell=cell
|
|
||||||
|
|
||||||
+row("divider")
|
|
||||||
+cell #[code en_core_web_sm]
|
|
||||||
for cell in ["1.1.0", "linear", "", "", "", "", ""]
|
|
||||||
+cell=cell
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user