mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 12:18:04 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
370a757bc2
|
@ -24,6 +24,7 @@
|
||||||
"Lexeme": "lexeme",
|
"Lexeme": "lexeme",
|
||||||
"Vocab": "vocab",
|
"Vocab": "vocab",
|
||||||
"StringStore": "stringstore",
|
"StringStore": "stringstore",
|
||||||
|
"Vectors": "vectors",
|
||||||
"GoldParse": "goldparse",
|
"GoldParse": "goldparse",
|
||||||
"GoldCorpus": "goldcorpus",
|
"GoldCorpus": "goldcorpus",
|
||||||
"Binder": "binder"
|
"Binder": "binder"
|
||||||
|
@ -164,6 +165,12 @@
|
||||||
"source": "spacy/tokens/binder.pyx"
|
"source": "spacy/tokens/binder.pyx"
|
||||||
},
|
},
|
||||||
|
|
||||||
|
"vectors": {
|
||||||
|
"title": "Vectors",
|
||||||
|
"tag": "class",
|
||||||
|
"source": "spacy/vectors.pyx"
|
||||||
|
},
|
||||||
|
|
||||||
"annotation": {
|
"annotation": {
|
||||||
"title": "Annotation Specifications"
|
"title": "Annotation Specifications"
|
||||||
}
|
}
|
||||||
|
|
7
website/docs/api/vectors.jade
Normal file
7
website/docs/api/vectors.jade
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
//- 💫 DOCS > API > VECTORS
|
||||||
|
|
||||||
|
include ../../_includes/_mixins
|
||||||
|
|
||||||
|
p A container class for vector data keyed by string.
|
||||||
|
|
||||||
|
+under-construction
|
|
@ -11,7 +11,7 @@
|
||||||
"POS tagging": "pos-tagging",
|
"POS tagging": "pos-tagging",
|
||||||
"Using the parse": "dependency-parse",
|
"Using the parse": "dependency-parse",
|
||||||
"Entity recognition": "entity-recognition",
|
"Entity recognition": "entity-recognition",
|
||||||
"Word vectors": "word-vectors-similarities",
|
"Vectors & similarity": "word-vectors-similarities",
|
||||||
"Custom tokenization": "customizing-tokenizer",
|
"Custom tokenization": "customizing-tokenizer",
|
||||||
"Rule-based matching": "rule-based-matching",
|
"Rule-based matching": "rule-based-matching",
|
||||||
"Adding languages": "adding-languages",
|
"Adding languages": "adding-languages",
|
||||||
|
|
|
@ -70,14 +70,57 @@ p
|
||||||
+cell Map strings to and from hash values.
|
+cell Map strings to and from hash values.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+row
|
|
||||||
+cell #[+api("tokenizer") #[code Tokenizer]]
|
+cell #[+api("tokenizer") #[code Tokenizer]]
|
||||||
+cell
|
+cell
|
||||||
| Segment text, and create #[code Doc] objects with the discovered
|
| Segment text, and create #[code Doc] objects with the discovered
|
||||||
| segment boundaries.
|
| segment boundaries.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code Lemmatizer]
|
||||||
|
+cell
|
||||||
|
| Determine the base forms of words.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[+api("matcher") #[code Matcher]]
|
+cell #[+api("matcher") #[code Matcher]]
|
||||||
+cell
|
+cell
|
||||||
| Match sequences of tokens, based on pattern rules, similar to
|
| Match sequences of tokens, based on pattern rules, similar to
|
||||||
| regular expressions.
|
| regular expressions.
|
||||||
|
|
||||||
|
|
||||||
|
+h(3, "architecture-pipeline") Pipeline components
|
||||||
|
|
||||||
|
+table(["Name", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[+api("tagger") #[code Tagger]]
|
||||||
|
+cell Annotate part-of-speech tags on #[code Doc] objects.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[+api("dependencyparser") #[code DependencyParser]]
|
||||||
|
+cell Annotate syntactic dependencies on #[code Doc] objects.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[+api("entityrecognizer") #[code EntityRecognizer]]
|
||||||
|
+cell
|
||||||
|
| Annotate named entities, e.g. persons or products, on #[code Doc]
|
||||||
|
| objects.
|
||||||
|
|
||||||
|
+h(3, "architecture-other") Other classes
|
||||||
|
|
||||||
|
+table(["Name", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[+api("vectors") #[code Vectors]]
|
||||||
|
+cell Container class for vector data keyed by string.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[+api("binder") #[code Binder]]
|
||||||
|
+cell Container class for serializing collections of #[code Doc] objects.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[+api("goldparse") #[code GoldParse]]
|
||||||
|
+cell Collection for training annotations.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[+api("goldcorpus") #[code GoldCorpus]]
|
||||||
|
+cell
|
||||||
|
| An annotated corpus, using the JSON file format. Manages
|
||||||
|
| annotations for tagging, dependency parsing and NER.
|
||||||
|
|
|
@ -29,11 +29,11 @@ p
|
||||||
| #[strong #[+procon("con", 16)] similarity:] dissimilar (lower is less similar)
|
| #[strong #[+procon("con", 16)] similarity:] dissimilar (lower is less similar)
|
||||||
|
|
||||||
+table(["", "dog", "cat", "banana"])
|
+table(["", "dog", "cat", "banana"])
|
||||||
each cells, label in {"dog": [1.00, 0.80, 0.24], "cat": [0.80, 1.00, 0.28], "banana": [0.24, 0.28, 1.00]}
|
each cells, label in {"dog": [1, 0.8, 0.24], "cat": [0.8, 1, 0.28], "banana": [0.24, 0.28, 1]}
|
||||||
+row
|
+row
|
||||||
+cell.u-text-label.u-color-theme=label
|
+cell.u-text-label.u-color-theme=label
|
||||||
for cell in cells
|
for cell in cells
|
||||||
+cell #[code=cell.toFixed(2)]
|
+cell.u-text-center #[code=cell.toFixed(2)]
|
||||||
| #[+procon(cell < 0.5 ? "con" : cell != 1 ? "pro" : "neutral")]
|
| #[+procon(cell < 0.5 ? "con" : cell != 1 ? "pro" : "neutral")]
|
||||||
|
|
||||||
p
|
p
|
||||||
|
|
|
@ -110,6 +110,13 @@ p
|
||||||
| between individual tokens, like subject or object.
|
| between individual tokens, like subject or object.
|
||||||
+cell #[+procon("pro")]
|
+cell #[+procon("pro")]
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[strong Lemmatization]
|
||||||
|
+cell
|
||||||
|
| Assigning the base forms of words. For example, the lemma of
|
||||||
|
| "was" is "be", and the lemma of "rats" is "rat".
|
||||||
|
+cell #[+procon("pro")]
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[strong Sentence Boundary Detection] (SBD)
|
+cell #[strong Sentence Boundary Detection] (SBD)
|
||||||
+cell Finding and segmenting individual sentences.
|
+cell Finding and segmenting individual sentences.
|
||||||
|
@ -274,40 +281,6 @@ include _spacy-101/_language-data
|
||||||
|
|
||||||
include _spacy-101/_architecture.jade
|
include _spacy-101/_architecture.jade
|
||||||
|
|
||||||
+h(3, "architecture-pipeline") Pipeline components
|
|
||||||
|
|
||||||
+table(["Name", "Description"])
|
|
||||||
+row
|
|
||||||
+cell #[+api("tagger") #[code Tagger]]
|
|
||||||
+cell Annotate part-of-speech tags on #[code Doc] objects.
|
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[+api("dependencyparser") #[code DependencyParser]]
|
|
||||||
+cell Annotate syntactic dependencies on #[code Doc] objects.
|
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[+api("entityrecognizer") #[code EntityRecognizer]]
|
|
||||||
+cell
|
|
||||||
| Annotate named entities, e.g. persons or products, on #[code Doc]
|
|
||||||
| objects.
|
|
||||||
|
|
||||||
+h(3, "architecture-other") Other classes
|
|
||||||
|
|
||||||
+table(["Name", "Description"])
|
|
||||||
+row
|
|
||||||
+cell #[+api("binder") #[code Binder]]
|
|
||||||
+cell Container class for serializing collections of #[code Doc] objects.
|
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[+api("goldparse") #[code GoldParse]]
|
|
||||||
+cell Collection for training annotations.
|
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[+api("goldcorpus") #[code GoldCorpus]]
|
|
||||||
+cell
|
|
||||||
| An annotated corpus, using the JSON file format. Manages
|
|
||||||
| annotations for tagging, dependency parsing and NER.
|
|
||||||
|
|
||||||
+h(2, "community") Community & FAQ
|
+h(2, "community") Community & FAQ
|
||||||
|
|
||||||
p
|
p
|
||||||
|
|
|
@ -22,7 +22,7 @@ p
|
||||||
| entirely new #[strong deep learning-powered models] for spaCy's tagger,
|
| entirely new #[strong deep learning-powered models] for spaCy's tagger,
|
||||||
| parser and entity recognizer. The new models are #[strong 20x smaller]
|
| parser and entity recognizer. The new models are #[strong 20x smaller]
|
||||||
| than the linear models that have powered spaCy until now: from 300 MB to
|
| than the linear models that have powered spaCy until now: from 300 MB to
|
||||||
| only 14 MB.
|
| only 15 MB.
|
||||||
|
|
||||||
p
|
p
|
||||||
| We've also made several usability improvements that are
|
| We've also made several usability improvements that are
|
||||||
|
@ -247,12 +247,12 @@ p
|
||||||
| #[code spacy.lang.xx]
|
| #[code spacy.lang.xx]
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code spacy.orth]
|
+cell #[code orth]
|
||||||
+cell #[code spacy.lang.xx.lex_attrs]
|
+cell #[code lang.xx.lex_attrs]
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code cli.model]
|
+cell #[code syntax.syntax_iterators]
|
||||||
+cell -
|
+cell #[code lang.xx.syntax_iterators]
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code Language.save_to_directory]
|
+cell #[code Language.save_to_directory]
|
||||||
|
@ -266,8 +266,6 @@ p
|
||||||
+cell
|
+cell
|
||||||
| #[code Vocab.load]
|
| #[code Vocab.load]
|
||||||
| #[code Vocab.load_lexemes]
|
| #[code Vocab.load_lexemes]
|
||||||
| #[code Vocab.load_vectors]
|
|
||||||
| #[code Vocab.load_vectors_from_bin_loc]
|
|
||||||
+cell
|
+cell
|
||||||
| #[+api("vocab#from_disk") #[code Vocab.from_disk]]
|
| #[+api("vocab#from_disk") #[code Vocab.from_disk]]
|
||||||
| #[+api("vocab#from_bytes") #[code Vocab.from_bytes]]
|
| #[+api("vocab#from_bytes") #[code Vocab.from_bytes]]
|
||||||
|
@ -275,10 +273,24 @@ p
|
||||||
+row
|
+row
|
||||||
+cell
|
+cell
|
||||||
| #[code Vocab.dump]
|
| #[code Vocab.dump]
|
||||||
|
+cell
|
||||||
|
| #[+api("vocab#to_disk") #[code Vocab.to_disk]]#[br]
|
||||||
|
| #[+api("vocab#to_bytes") #[code Vocab.to_bytes]]
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell
|
||||||
|
| #[code Vocab.load_vectors]
|
||||||
|
| #[code Vocab.load_vectors_from_bin_loc]
|
||||||
|
+cell
|
||||||
|
| #[+api("vectors#from_disk") #[code Vectors.from_disk]]
|
||||||
|
| #[+api("vectors#from_bytes") #[code Vectors.from_bytes]]
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell
|
||||||
| #[code Vocab.dump_vectors]
|
| #[code Vocab.dump_vectors]
|
||||||
+cell
|
+cell
|
||||||
| #[+api("vocab#to_disk") #[code Vocab.to_disk]]
|
| #[+api("vectors#to_disk") #[code Vectors.to_disk]]
|
||||||
| #[+api("vocab#to_bytes") #[code Vocab.to_bytes]]
|
| #[+api("vectors#to_bytes") #[code Vectors.to_bytes]]
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell
|
+cell
|
||||||
|
@ -296,7 +308,9 @@ p
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code Tokenizer.load]
|
+cell #[code Tokenizer.load]
|
||||||
+cell -
|
+cell
|
||||||
|
| #[+api("tokenizer#from_disk") #[code Tokenizer.from_disk]]
|
||||||
|
| #[+api("tokenizer#from_bytes") #[code Tokenizer.from_bytes]]
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code Tagger.load]
|
+cell #[code Tagger.load]
|
||||||
|
@ -342,6 +356,10 @@ p
|
||||||
+cell #[code Token.is_ancestor_of]
|
+cell #[code Token.is_ancestor_of]
|
||||||
+cell #[+api("token#is_ancestor") #[code Token.is_ancestor]]
|
+cell #[+api("token#is_ancestor") #[code Token.is_ancestor]]
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code cli.model]
|
||||||
|
+cell -
|
||||||
|
|
||||||
+h(2, "migrating") Migrating from spaCy 1.x
|
+h(2, "migrating") Migrating from spaCy 1.x
|
||||||
|
|
||||||
p
|
p
|
||||||
|
@ -466,18 +484,27 @@ p
|
||||||
|
|
||||||
+h(2, "benchmarks") Benchmarks
|
+h(2, "benchmarks") Benchmarks
|
||||||
|
|
||||||
|
+under-construction
|
||||||
|
|
||||||
|
+aside("Data sources")
|
||||||
|
| #[strong Parser, tagger, NER:] #[+a("https://www.gabormelli.com/RKB/OntoNotes_Corpus") OntoNotes 5]#[br]
|
||||||
|
| #[strong Word vectors:] #[+a("http://commoncrawl.org") Common Crawl]#[br]
|
||||||
|
|
||||||
|
p The evaluation was conducted on raw text with no gold standard information.
|
||||||
|
|
||||||
+table(["Model", "Version", "Type", "UAS", "LAS", "NER F", "POS", "w/s"])
|
+table(["Model", "Version", "Type", "UAS", "LAS", "NER F", "POS", "w/s"])
|
||||||
+row
|
mixin benchmark-row(name, details, values, highlight, style)
|
||||||
+cell #[code en_core_web_sm]
|
+row(style)
|
||||||
for cell in ["2.0.0", "neural", "", "", "", "", ""]
|
+cell #[code=name]
|
||||||
+cell=cell
|
for cell in details
|
||||||
|
+cell=cell
|
||||||
|
for cell, i in values
|
||||||
|
+cell.u-text-right
|
||||||
|
if highlight && highlight[i]
|
||||||
|
strong=cell
|
||||||
|
else
|
||||||
|
!=cell
|
||||||
|
|
||||||
+row
|
+benchmark-row("en_core_web_sm", ["2.0.0", "neural"], ["91.2", "89.2", "82.6", "96.6", "10,300"], [1, 1, 1, 0, 0])
|
||||||
+cell #[code es_dep_web_sm]
|
+benchmark-row("en_core_web_sm", ["1.2.0", "linear"], ["86.6", "83.8", "78.5", "96.6", "25,700"], [0, 0, 0, 0, 1], "divider")
|
||||||
for cell in ["2.0.0", "neural", "", "", "", "", ""]
|
+benchmark-row("en_core_web_md", ["1.2.1", "linear"], ["90.6", "88.5", "81.4", "96.7", "18,800"], [0, 0, 0, 1, 0])
|
||||||
+cell=cell
|
|
||||||
|
|
||||||
+row("divider")
|
|
||||||
+cell #[code en_core_web_sm]
|
|
||||||
for cell in ["1.1.0", "linear", "", "", "", "", ""]
|
|
||||||
+cell=cell
|
|
||||||
|
|
|
@ -8,10 +8,8 @@ p
|
||||||
| to train these vectors is the #[+a("https://en.wikipedia.org/wiki/Word2vec") word2vec]
|
| to train these vectors is the #[+a("https://en.wikipedia.org/wiki/Word2vec") word2vec]
|
||||||
| family of algorithms. The default
|
| family of algorithms. The default
|
||||||
| #[+a("/docs/usage/models#available") English model] installs
|
| #[+a("/docs/usage/models#available") English model] installs
|
||||||
| 300-dimensional vectors trained on the Common Crawl
|
| 300-dimensional vectors trained on the
|
||||||
| corpus using the #[+a("http://nlp.stanford.edu/projects/glove/") GloVe]
|
| #[+a("http://commoncrawl.org") Common Crawl] corpus.
|
||||||
| algorithm. The GloVe common crawl vectors have become a de facto
|
|
||||||
| standard for practical NLP.
|
|
||||||
|
|
||||||
+aside("Tip: Training a word2vec model")
|
+aside("Tip: Training a word2vec model")
|
||||||
| If you need to train a word2vec model, we recommend the implementation in
|
| If you need to train a word2vec model, we recommend the implementation in
|
||||||
|
@ -23,6 +21,129 @@ p
|
||||||
include _spacy-101/_similarity
|
include _spacy-101/_similarity
|
||||||
include _spacy-101/_word-vectors
|
include _spacy-101/_word-vectors
|
||||||
|
|
||||||
|
+h(2, "similarity-context") Similarities in context
|
||||||
|
|
||||||
|
p
|
||||||
|
| Aside from spaCy's built-in word vectors, which were trained on a lot of
|
||||||
|
| text with a wide vocabulary, the parsing, tagging and NER models also
|
||||||
|
| rely on vector representations of the #[strong meanings of words in context].
|
||||||
|
| As the first component of the
|
||||||
|
| #[+a("/docs/usage/language-processing-pipeline") processing pipeline], the
|
||||||
|
| tensorizer encodes a document's internal meaning representations as an
|
||||||
|
| array of floats, also called a tensor. This allows spaCy to make a
|
||||||
|
| reasonable guess at a word's meaning, based on its surrounding words.
|
||||||
|
| Even if a word hasn't been seen before, spaCy will know #[em something]
|
||||||
|
| about it. Because spaCy uses a 4-layer convolutional network, the
|
||||||
|
| tensors are sensitive to up to #[strong four words on either side] of a
|
||||||
|
| word.
|
||||||
|
|
||||||
|
p
|
||||||
|
| For example, here are three sentences containing the out-of-vocabulary
|
||||||
|
| word "labrador" in different contexts.
|
||||||
|
|
||||||
|
+code.
|
||||||
|
doc1 = nlp(u"The labrador barked.")
|
||||||
|
doc2 = nlp(u"The labrador swam.")
|
||||||
|
doc3 = nlp(u"the labrador people live in canada.")
|
||||||
|
|
||||||
|
for doc in [doc1, doc2, doc3]:
|
||||||
|
labrador = doc[1]
|
||||||
|
dog = nlp(u"dog")
|
||||||
|
print(labrador.similarity(dog))
|
||||||
|
|
||||||
|
p
|
||||||
|
| Even though the model has never seen the word "labrador", it can make a
|
||||||
|
| fairly accurate prediction of its similarity to "dog" in different
|
||||||
|
| contexts.
|
||||||
|
|
||||||
|
+table(["Context", "labrador.similarity(dog)"])
|
||||||
|
+row
|
||||||
|
+cell The #[strong labrador] barked.
|
||||||
|
+cell #[code 0.56] #[+procon("pro")]
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell The #[strong labrador] swam.
|
||||||
|
+cell #[code 0.48] #[+procon("con")]
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell the #[strong labrador] people live in canada.
|
||||||
|
+cell #[code 0.39] #[+procon("con")]
|
||||||
|
|
||||||
|
p
|
||||||
|
| The same also works for whole documents. Here, the variance of the
|
||||||
|
| similarities is lower, as all words and their order are taken into
|
||||||
|
| account. However, the context-specific similarity is often still
|
||||||
|
| reflected pretty accurately.
|
||||||
|
|
||||||
|
+code.
|
||||||
|
doc1 = nlp(u"Paris is the largest city in France.")
|
||||||
|
doc2 = nlp(u"Ljubljana is the capital of Lithuania.")
|
||||||
|
doc3 = nlp(u"An emu is a large bird.")
|
||||||
|
|
||||||
|
for doc in [doc1, doc2, doc3]:
|
||||||
|
for other_doc in [doc1, doc2, doc3]:
|
||||||
|
print(doc.similarity(other_doc))
|
||||||
|
|
||||||
|
p
|
||||||
|
| Even though the sentences about Paris and Ljubljana consist of different
|
||||||
|
| words and entities, they both describe the same concept and are seen as
|
||||||
|
| more similar than the sentence about emus. In this case, even a misspelled
|
||||||
|
| version of "Ljubljana" would still produce very similar results.
|
||||||
|
|
||||||
|
+table
|
||||||
|
- var examples = {"Paris is the largest city in France.": [1, 0.84, 0.65], "Ljubljana is the capital of Lithuania.": [0.84, 1, 0.52], "An emu is a large bird.": [0.65, 0.52, 1]}
|
||||||
|
- var counter = 0
|
||||||
|
|
||||||
|
+row
|
||||||
|
+row
|
||||||
|
+cell
|
||||||
|
for _, label in examples
|
||||||
|
+cell=label
|
||||||
|
|
||||||
|
each cells, label in examples
|
||||||
|
+row(counter ? null : "divider")
|
||||||
|
+cell=label
|
||||||
|
for cell in cells
|
||||||
|
+cell.u-text-center #[code=cell.toFixed(2)]
|
||||||
|
| #[+procon(cell < 0.7 ? "con" : cell != 1 ? "pro" : "neutral")]
|
||||||
|
- counter++
|
||||||
|
|
||||||
|
p
|
||||||
|
| Sentences that consist of the same words in different order will likely
|
||||||
|
| be seen as very similar – but never identical.
|
||||||
|
|
||||||
|
+code.
|
||||||
|
docs = [nlp(u"dog bites man"), nlp(u"man bites dog"),
|
||||||
|
nlp(u"man dog bites"), nlp(u"dog man bites")]
|
||||||
|
|
||||||
|
for doc in docs:
|
||||||
|
for other_doc in docs:
|
||||||
|
print(doc.similarity(other_doc))
|
||||||
|
|
||||||
|
p
|
||||||
|
| Interestingly, "man bites dog" and "man dog bites" are seen as slightly
|
||||||
|
| more similar than "man bites dog" and "dog bites man". This may be a
|
||||||
|
| conincidence – or the result of "man" being interpreted as both sentence's
|
||||||
|
| subject.
|
||||||
|
|
||||||
|
+table
|
||||||
|
- var examples = {"dog bites man": [1, 0.9, 0.89, 0.92], "man bites dog": [0.9, 1, 0.93, 0.9], "man dog bites": [0.89, 0.93, 1, 0.92], "dog man bites": [0.92, 0.9, 0.92, 1]}
|
||||||
|
- var counter = 0
|
||||||
|
|
||||||
|
+row
|
||||||
|
+row
|
||||||
|
+cell
|
||||||
|
for _, label in examples
|
||||||
|
+cell.u-text-center=label
|
||||||
|
|
||||||
|
each cells, label in examples
|
||||||
|
+row(counter ? null : "divider")
|
||||||
|
+cell=label
|
||||||
|
for cell in cells
|
||||||
|
+cell.u-text-center #[code=cell.toFixed(2)]
|
||||||
|
| #[+procon(cell < 0.7 ? "con" : cell != 1 ? "pro" : "neutral")]
|
||||||
|
- counter++
|
||||||
|
|
||||||
+h(2, "custom") Customising word vectors
|
+h(2, "custom") Customising word vectors
|
||||||
|
|
||||||
+under-construction
|
+under-construction
|
||||||
|
@ -36,7 +157,3 @@ p
|
||||||
| behaviours by modifying the #[code doc.user_hooks],
|
| behaviours by modifying the #[code doc.user_hooks],
|
||||||
| #[code doc.user_span_hooks] and #[code doc.user_token_hooks]
|
| #[code doc.user_span_hooks] and #[code doc.user_token_hooks]
|
||||||
| dictionaries.
|
| dictionaries.
|
||||||
|
|
||||||
+h(2, "similarity") Similarity
|
|
||||||
|
|
||||||
+under-construction
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user