Update v2 docs and benchmarks

This commit is contained in:
ines 2017-06-05 14:13:38 +02:00
parent 9f55c0d4f6
commit fd35d910b8

View File

@ -22,7 +22,7 @@ p
| entirely new #[strong deep learning-powered models] for spaCy's tagger, | entirely new #[strong deep learning-powered models] for spaCy's tagger,
| parser and entity recognizer. The new models are #[strong 20x smaller] | parser and entity recognizer. The new models are #[strong 20x smaller]
| than the linear models that have powered spaCy until now: from 300 MB to | than the linear models that have powered spaCy until now: from 300 MB to
| only 14 MB. | only 15 MB.
p p
| We've also made several usability improvements that are | We've also made several usability improvements that are
@ -247,12 +247,12 @@ p
| #[code spacy.lang.xx] | #[code spacy.lang.xx]
+row +row
+cell #[code spacy.orth] +cell #[code orth]
+cell #[code spacy.lang.xx.lex_attrs] +cell #[code lang.xx.lex_attrs]
+row +row
+cell #[code cli.model] +cell #[code syntax.syntax_iterators]
+cell - +cell #[code lang.xx.syntax_iterators]
+row +row
+cell #[code Language.save_to_directory] +cell #[code Language.save_to_directory]
@ -266,8 +266,6 @@ p
+cell +cell
| #[code Vocab.load] | #[code Vocab.load]
| #[code Vocab.load_lexemes] | #[code Vocab.load_lexemes]
| #[code Vocab.load_vectors]
| #[code Vocab.load_vectors_from_bin_loc]
+cell +cell
| #[+api("vocab#from_disk") #[code Vocab.from_disk]] | #[+api("vocab#from_disk") #[code Vocab.from_disk]]
| #[+api("vocab#from_bytes") #[code Vocab.from_bytes]] | #[+api("vocab#from_bytes") #[code Vocab.from_bytes]]
@ -275,10 +273,24 @@ p
+row +row
+cell +cell
| #[code Vocab.dump] | #[code Vocab.dump]
+cell
| #[+api("vocab#to_disk") #[code Vocab.to_disk]]#[br]
| #[+api("vocab#to_bytes") #[code Vocab.to_bytes]]
+row
+cell
| #[code Vocab.load_vectors]
| #[code Vocab.load_vectors_from_bin_loc]
+cell
| #[+api("vectors#from_disk") #[code Vectors.from_disk]]
| #[+api("vectors#from_bytes") #[code Vectors.from_bytes]]
+row
+cell
| #[code Vocab.dump_vectors] | #[code Vocab.dump_vectors]
+cell +cell
| #[+api("vocab#to_disk") #[code Vocab.to_disk]] | #[+api("vectors#to_disk") #[code Vectors.to_disk]]
| #[+api("vocab#to_bytes") #[code Vocab.to_bytes]] | #[+api("vectors#to_bytes") #[code Vectors.to_bytes]]
+row +row
+cell +cell
@ -296,7 +308,9 @@ p
+row +row
+cell #[code Tokenizer.load] +cell #[code Tokenizer.load]
+cell - +cell
| #[+api("tokenizer#from_disk") #[code Tokenizer.from_disk]]
| #[+api("tokenizer#from_bytes") #[code Tokenizer.from_bytes]]
+row +row
+cell #[code Tagger.load] +cell #[code Tagger.load]
@ -342,6 +356,10 @@ p
+cell #[code Token.is_ancestor_of] +cell #[code Token.is_ancestor_of]
+cell #[+api("token#is_ancestor") #[code Token.is_ancestor]] +cell #[+api("token#is_ancestor") #[code Token.is_ancestor]]
+row
+cell #[code cli.model]
+cell -
+h(2, "migrating") Migrating from spaCy 1.x +h(2, "migrating") Migrating from spaCy 1.x
p p
@ -466,18 +484,27 @@ p
+h(2, "benchmarks") Benchmarks +h(2, "benchmarks") Benchmarks
+under-construction
+aside("Data sources")
| #[strong Parser, tagger, NER:] #[+a("https://www.gabormelli.com/RKB/OntoNotes_Corpus") OntoNotes 5]#[br]
| #[strong Word vectors:] #[+a("http://commoncrawl.org") Common Crawl]#[br]
p The evaluation was conducted on raw text with no gold standard information.
+table(["Model", "Version", "Type", "UAS", "LAS", "NER F", "POS", "w/s"]) +table(["Model", "Version", "Type", "UAS", "LAS", "NER F", "POS", "w/s"])
+row mixin benchmark-row(name, details, values, highlight, style)
+cell #[code en_core_web_sm] +row(style)
for cell in ["2.0.0", "neural", "", "", "", "", ""] +cell #[code=name]
for cell in details
+cell=cell +cell=cell
for cell, i in values
+cell.u-text-right
if highlight && highlight[i]
strong=cell
else
!=cell
+row +benchmark-row("en_core_web_sm", ["2.0.0", "neural"], ["91.2", "89.2", "82.6", "96.6", "10,300"], [1, 1, 1, 0, 0])
+cell #[code es_dep_web_sm] +benchmark-row("en_core_web_sm", ["1.2.0", "linear"], ["86.6", "83.8", "78.5", "96.6", "25,700"], [0, 0, 0, 0, 1], "divider")
for cell in ["2.0.0", "neural", "", "", "", "", ""] +benchmark-row("en_core_web_md", ["1.2.1", "linear"], ["90.6", "88.5", "81.4", "96.7", "18,800"], [0, 0, 0, 1, 0])
+cell=cell
+row("divider")
+cell #[code en_core_web_sm]
for cell in ["1.1.0", "linear", "", "", "", "", ""]
+cell=cell