mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
bc4dc2da4e
|
@ -11,6 +11,23 @@ mixin section(id)
|
|||
block
|
||||
|
||||
|
||||
//- Accordion (collapsible sections)
|
||||
title - [string] Section title.
|
||||
id - [string] Optional section ID for permalinks.
|
||||
level - [integer] Headline level for section title.
|
||||
|
||||
mixin accordion(title, id, level)
|
||||
section.o-accordion.o-block
|
||||
+h(level || 4).o-no-block(id=id)
|
||||
button.o-accordion__button.o-grid.o-grid--vcenter.o-grid--space.js-accordion(aria-expanded="false")=title
|
||||
svg.o-accordion__icon(width="20" height="20" viewBox="0 0 10 10" aria-hidden="true" focusable="false")
|
||||
rect.o-accordion__hide(height="8" width="2" y="1" x="4")
|
||||
rect(height="2" width="8" y="4" x="1")
|
||||
|
||||
.o-accordion__content(hidden="")
|
||||
block
|
||||
|
||||
|
||||
//- Headlines Helper Mixin
|
||||
level - [integer] 1, 2, 3, 4, or 5
|
||||
|
||||
|
|
|
@ -50,7 +50,7 @@ for id in CURRENT_MODELS
|
|||
+cell
|
||||
span(data-tpl=id data-tpl-key=field) #[em n/a]
|
||||
|
||||
+row(data-tpl=id data-tpl-key="compat-wrapper" style="display: none")
|
||||
+row(data-tpl=id data-tpl-key="compat-wrapper" hidden="")
|
||||
+cell
|
||||
+label Compat #[+help("Latest compatible model version for your spaCy installation").u-color-subtle]
|
||||
+cell
|
||||
|
@ -58,15 +58,15 @@ for id in CURRENT_MODELS
|
|||
select.o-field__select.u-text-small(data-tpl=id data-tpl-key="compat")
|
||||
div(data-tpl=id data-tpl-key="compat-versions")
|
||||
|
||||
section(data-tpl=id data-tpl-key="benchmarks" style="display: none")
|
||||
section(data-tpl=id data-tpl-key="benchmarks" hidden="")
|
||||
+grid.o-block-small
|
||||
for keys, label in MODEL_BENCHMARKS
|
||||
.u-flex-full.u-padding-small(data-tpl=id data-tpl-key=label.toLowerCase() style="display: none")
|
||||
.u-flex-full.u-padding-small(data-tpl=id data-tpl-key=label.toLowerCase() hidden="")
|
||||
+table.o-block-small
|
||||
+row("head")
|
||||
+head-cell(colspan="2")=(MODEL_META["benchmark_" + label] || label)
|
||||
for label, field in keys
|
||||
+row(style="display: none")
|
||||
+row(hidden="")
|
||||
+cell.u-nowrap
|
||||
+label=label
|
||||
if MODEL_META[field]
|
||||
|
|
|
@ -41,6 +41,7 @@ if IS_PAGE
|
|||
https://medium.com/dev-channel/es6-modules-in-chrome-canary-m60-ba588dfb8ab7
|
||||
|
||||
- ProgressBar = "new ProgressBar('.js-progress');"
|
||||
- Accordion = "new Accordion('.js-accordion');"
|
||||
- Changelog = "new Changelog('" + SOCIAL.github + "', 'spacy');"
|
||||
- NavHighlighter = "new NavHighlighter('data-section', 'data-nav');"
|
||||
- GitHubEmbed = "new GitHubEmbed('" + SOCIAL.github + "', 'data-gh-embed');"
|
||||
|
@ -57,6 +58,7 @@ if environment == "deploy"
|
|||
if IS_PAGE
|
||||
!=NavHighlighter
|
||||
!=GitHubEmbed
|
||||
!=Accordion
|
||||
if HAS_MODELS
|
||||
!=ModelLoader
|
||||
if compare_models
|
||||
|
@ -74,6 +76,8 @@ else
|
|||
!=NavHighlighter
|
||||
| import GitHubEmbed from '/assets/js/github-embed.js';
|
||||
!=GitHubEmbed
|
||||
| import Accordion from '/assets/js/accordion.js';
|
||||
!=Accordion
|
||||
if HAS_MODELS
|
||||
| import { ModelLoader } from '/assets/js/models.js';
|
||||
!=ModelLoader
|
||||
|
|
|
@ -1,108 +1,112 @@
|
|||
//- 💫 DOCS > API > ANNOTATION > DEPENDENCY LABELS
|
||||
|
||||
+h(3, "dependency-parsing-english") English dependency labels
|
||||
|
||||
p
|
||||
| The English dependency labels use the #[+a("http://www.clearnlp.com") ClearNLP]
|
||||
| #[+a("http://www.mathcs.emory.edu/~choi/doc/clear-dependency-2012.pdf") CLEAR Style].
|
||||
| This section lists the syntactic dependency labels assigned by
|
||||
| spaCy's #[+a("/models") models]. The individual labels are
|
||||
| language-specific and depend on the training corpus.
|
||||
|
||||
+table(["Label", "Description"])
|
||||
+dep-row("acomp", "adjectival complement")
|
||||
+dep-row("advcl", "adverbial clause modifier")
|
||||
+dep-row("advmod", "adverbial modifier")
|
||||
+dep-row("agent", "agent")
|
||||
+dep-row("amod", "adjectival modifier")
|
||||
+dep-row("appos", "appositional modifier")
|
||||
+dep-row("attr", "attribute")
|
||||
+dep-row("aux", "auxiliary")
|
||||
+dep-row("auxpass", "auxiliary (passive)")
|
||||
+dep-row("cc", "coordinating conjunction")
|
||||
+dep-row("ccomp", "clausal complement")
|
||||
+dep-row("complm", "complementizer")
|
||||
+dep-row("conj", "conjunct")
|
||||
+dep-row("cop", "copula")
|
||||
+dep-row("csubj", "clausal subject")
|
||||
+dep-row("csubjpass", "clausal subject (passive)")
|
||||
+dep-row("dep", "unclassified dependent")
|
||||
+dep-row("det", "determiner")
|
||||
+dep-row("dobj", "direct object")
|
||||
+dep-row("expl", "expletive")
|
||||
+dep-row("hmod", "modifier in hyphenation")
|
||||
+dep-row("hyph", "hyphen")
|
||||
+dep-row("infmod", "infinitival modifier")
|
||||
+dep-row("intj", "interjection")
|
||||
+dep-row("iobj", "indirect object")
|
||||
+dep-row("mark", "marker")
|
||||
+dep-row("meta", "meta modifier")
|
||||
+dep-row("neg", "negation modifier")
|
||||
+dep-row("nmod", "modifier of nominal")
|
||||
+dep-row("nn", "noun compound modifier")
|
||||
+dep-row("npadvmod", "noun phrase as adverbial modifier")
|
||||
+dep-row("nsubj", "nominal subject")
|
||||
+dep-row("nsubjpass", "nominal subject (passive)")
|
||||
+dep-row("num", "number modifier")
|
||||
+dep-row("number", "number compound modifier")
|
||||
+dep-row("oprd", "object predicate")
|
||||
+dep-row("obj", "object")
|
||||
+dep-row("obl", "oblique nominal")
|
||||
+dep-row("parataxis", "parataxis")
|
||||
+dep-row("partmod", "participal modifier")
|
||||
+dep-row("pcomp", "complement of preposition")
|
||||
+dep-row("pobj", "object of preposition")
|
||||
+dep-row("poss", "possession modifier")
|
||||
+dep-row("possessive", "possessive modifier")
|
||||
+dep-row("preconj", "pre-correlative conjunction")
|
||||
+dep-row("prep", "prepositional modifier")
|
||||
+dep-row("prt", "particle")
|
||||
+dep-row("punct", "punctuation")
|
||||
+dep-row("quantmod", "modifier of quantifier")
|
||||
+dep-row("rcmod", "relative clause modifier")
|
||||
+dep-row("root", "root")
|
||||
+dep-row("xcomp", "open clausal complement")
|
||||
+accordion("English", "dependency-parsing-english")
|
||||
p
|
||||
| The English dependency labels use the
|
||||
| #[+a("http://www.mathcs.emory.edu/~choi/doc/clear-dependency-2012.pdf") CLEAR Style]
|
||||
| by #[+a("http://www.clearnlp.com") ClearNLP].
|
||||
|
||||
+h(3, "dependency-parsing-german") German dependency labels
|
||||
+table(["Label", "Description"])
|
||||
+dep-row("acomp", "adjectival complement")
|
||||
+dep-row("advcl", "adverbial clause modifier")
|
||||
+dep-row("advmod", "adverbial modifier")
|
||||
+dep-row("agent", "agent")
|
||||
+dep-row("amod", "adjectival modifier")
|
||||
+dep-row("appos", "appositional modifier")
|
||||
+dep-row("attr", "attribute")
|
||||
+dep-row("aux", "auxiliary")
|
||||
+dep-row("auxpass", "auxiliary (passive)")
|
||||
+dep-row("cc", "coordinating conjunction")
|
||||
+dep-row("ccomp", "clausal complement")
|
||||
+dep-row("complm", "complementizer")
|
||||
+dep-row("conj", "conjunct")
|
||||
+dep-row("cop", "copula")
|
||||
+dep-row("csubj", "clausal subject")
|
||||
+dep-row("csubjpass", "clausal subject (passive)")
|
||||
+dep-row("dep", "unclassified dependent")
|
||||
+dep-row("det", "determiner")
|
||||
+dep-row("dobj", "direct object")
|
||||
+dep-row("expl", "expletive")
|
||||
+dep-row("hmod", "modifier in hyphenation")
|
||||
+dep-row("hyph", "hyphen")
|
||||
+dep-row("infmod", "infinitival modifier")
|
||||
+dep-row("intj", "interjection")
|
||||
+dep-row("iobj", "indirect object")
|
||||
+dep-row("mark", "marker")
|
||||
+dep-row("meta", "meta modifier")
|
||||
+dep-row("neg", "negation modifier")
|
||||
+dep-row("nmod", "modifier of nominal")
|
||||
+dep-row("nn", "noun compound modifier")
|
||||
+dep-row("npadvmod", "noun phrase as adverbial modifier")
|
||||
+dep-row("nsubj", "nominal subject")
|
||||
+dep-row("nsubjpass", "nominal subject (passive)")
|
||||
+dep-row("num", "number modifier")
|
||||
+dep-row("number", "number compound modifier")
|
||||
+dep-row("oprd", "object predicate")
|
||||
+dep-row("obj", "object")
|
||||
+dep-row("obl", "oblique nominal")
|
||||
+dep-row("parataxis", "parataxis")
|
||||
+dep-row("partmod", "participal modifier")
|
||||
+dep-row("pcomp", "complement of preposition")
|
||||
+dep-row("pobj", "object of preposition")
|
||||
+dep-row("poss", "possession modifier")
|
||||
+dep-row("possessive", "possessive modifier")
|
||||
+dep-row("preconj", "pre-correlative conjunction")
|
||||
+dep-row("prep", "prepositional modifier")
|
||||
+dep-row("prt", "particle")
|
||||
+dep-row("punct", "punctuation")
|
||||
+dep-row("quantmod", "modifier of quantifier")
|
||||
+dep-row("rcmod", "relative clause modifier")
|
||||
+dep-row("root", "root")
|
||||
+dep-row("xcomp", "open clausal complement")
|
||||
|
||||
p
|
||||
| The German dependency labels use the
|
||||
| #[+a("http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/index.html") TIGER Treebank]
|
||||
| annotation scheme.
|
||||
+accordion("German", "dependency-parsing-german")
|
||||
p
|
||||
| The German dependency labels use the
|
||||
| #[+a("http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/index.html") TIGER Treebank]
|
||||
| annotation scheme.
|
||||
|
||||
+table(["Label", "Description"])
|
||||
+dep-row("ac", "adpositional case marker")
|
||||
+dep-row("adc", "adjective component")
|
||||
+dep-row("ag", "genitive attribute")
|
||||
+dep-row("ams", "measure argument of adjective")
|
||||
+dep-row("app", "apposition")
|
||||
+dep-row("avc", "adverbial phrase component")
|
||||
+dep-row("cc", "comparative complement")
|
||||
+dep-row("cd", "coordinating conjunction")
|
||||
+dep-row("cj", "conjunct")
|
||||
+dep-row("cm", "comparative conjunction")
|
||||
+dep-row("cp", "complementizer")
|
||||
+dep-row("cvc", "collocational verb construction")
|
||||
+dep-row("da", "dative")
|
||||
+dep-row("dh", "discourse-level head")
|
||||
+dep-row("dm", "discourse marker")
|
||||
+dep-row("ep", "expletive es")
|
||||
+dep-row("hd", "head")
|
||||
+dep-row("ju", "junctor")
|
||||
+dep-row("mnr", "postnominal modifier")
|
||||
+dep-row("mo", "modifier")
|
||||
+dep-row("ng", "negation")
|
||||
+dep-row("nk", "noun kernel element")
|
||||
+dep-row("nmc", "numerical component")
|
||||
+dep-row("oa", "accusative object")
|
||||
+dep-row("oa", "second accusative object")
|
||||
+dep-row("oc", "clausal object")
|
||||
+dep-row("og", "genitive object")
|
||||
+dep-row("op", "prepositional object")
|
||||
+dep-row("par", "parenthetical element")
|
||||
+dep-row("pd", "predicate")
|
||||
+dep-row("pg", "phrasal genitive")
|
||||
+dep-row("ph", "placeholder")
|
||||
+dep-row("pm", "morphological particle")
|
||||
+dep-row("pnc", "proper noun component")
|
||||
+dep-row("rc", "relative clause")
|
||||
+dep-row("re", "repeated element")
|
||||
+dep-row("rs", "reported speech")
|
||||
+dep-row("sb", "subject")
|
||||
+table(["Label", "Description"])
|
||||
+dep-row("ac", "adpositional case marker")
|
||||
+dep-row("adc", "adjective component")
|
||||
+dep-row("ag", "genitive attribute")
|
||||
+dep-row("ams", "measure argument of adjective")
|
||||
+dep-row("app", "apposition")
|
||||
+dep-row("avc", "adverbial phrase component")
|
||||
+dep-row("cc", "comparative complement")
|
||||
+dep-row("cd", "coordinating conjunction")
|
||||
+dep-row("cj", "conjunct")
|
||||
+dep-row("cm", "comparative conjunction")
|
||||
+dep-row("cp", "complementizer")
|
||||
+dep-row("cvc", "collocational verb construction")
|
||||
+dep-row("da", "dative")
|
||||
+dep-row("dh", "discourse-level head")
|
||||
+dep-row("dm", "discourse marker")
|
||||
+dep-row("ep", "expletive es")
|
||||
+dep-row("hd", "head")
|
||||
+dep-row("ju", "junctor")
|
||||
+dep-row("mnr", "postnominal modifier")
|
||||
+dep-row("mo", "modifier")
|
||||
+dep-row("ng", "negation")
|
||||
+dep-row("nk", "noun kernel element")
|
||||
+dep-row("nmc", "numerical component")
|
||||
+dep-row("oa", "accusative object")
|
||||
+dep-row("oa", "second accusative object")
|
||||
+dep-row("oc", "clausal object")
|
||||
+dep-row("og", "genitive object")
|
||||
+dep-row("op", "prepositional object")
|
||||
+dep-row("par", "parenthetical element")
|
||||
+dep-row("pd", "predicate")
|
||||
+dep-row("pg", "phrasal genitive")
|
||||
+dep-row("ph", "placeholder")
|
||||
+dep-row("pm", "morphological particle")
|
||||
+dep-row("pnc", "proper noun component")
|
||||
+dep-row("rc", "relative clause")
|
||||
+dep-row("re", "repeated element")
|
||||
+dep-row("rs", "reported speech")
|
||||
+dep-row("sb", "subject")
|
||||
|
|
|
@ -1,134 +1,138 @@
|
|||
//- 💫 DOCS > API > ANNOTATION > POS TAGS
|
||||
|
||||
+h(3, "pos-tagging-english") English part-of-speech tag scheme
|
||||
|
||||
p
|
||||
| The English part-of-speech tagger uses the
|
||||
| #[+a("https://catalog.ldc.upenn.edu/LDC2013T19") OntoNotes 5] version of
|
||||
| the Penn Treebank tag set. We also map the tags to the simpler Google
|
||||
| Universal POS tag set.
|
||||
| This section lists the fine-grained and coarse-grained part-of-speech
|
||||
| tags assigned by spaCy's #[+a("/models") models]. The individual mapping
|
||||
| is specific to the training corpus and can be defined in the respective
|
||||
| language data's #[+a("/usage/adding-languages#tag-map") #[code tag_map.py]].
|
||||
|
||||
+table(["Tag", "POS", "Morphology", "Description"])
|
||||
+pos-row("-LRB-", "PUNCT", "PunctType=brck PunctSide=ini", "left round bracket")
|
||||
+pos-row("-PRB-", "PUNCT", "PunctType=brck PunctSide=fin", "right round bracket")
|
||||
+pos-row(",", "PUNCT", "PunctType=comm", "punctuation mark, comma")
|
||||
+pos-row(":", "PUNCT", "", "punctuation mark, colon or ellipsis")
|
||||
+pos-row(".", "PUNCT", "PunctType=peri", "punctuation mark, sentence closer")
|
||||
+pos-row("''", "PUNCT", "PunctType=quot PunctSide=fin", "closing quotation mark")
|
||||
+pos-row("\"\"", "PUNCT", "PunctType=quot PunctSide=fin", "closing quotation mark")
|
||||
+pos-row("#", "SYM", "SymType=numbersign", "symbol, number sign")
|
||||
+pos-row("``", "PUNCT", "PunctType=quot PunctSide=ini", "opening quotation mark")
|
||||
+pos-row("$", "SYM", "SymType=currency", "symbol, currency")
|
||||
+pos-row("ADD", "X", "", "email")
|
||||
+pos-row("AFX", "ADJ", "Hyph=yes", "affix")
|
||||
+pos-row("BES", "VERB", "", 'auxiliary "be"')
|
||||
+pos-row("CC", "CONJ", "ConjType=coor", "conjunction, coordinating")
|
||||
+pos-row("CD", "NUM", "NumType=card", "cardinal number")
|
||||
+pos-row("DT", "DET", "determiner")
|
||||
+pos-row("EX", "ADV", "AdvType=ex", "existential there")
|
||||
+pos-row("FW", "X", "Foreign=yes", "foreign word")
|
||||
+pos-row("GW", "X", "", "additional word in multi-word expression")
|
||||
+pos-row("HVS", "VERB", "", 'forms of "have"')
|
||||
+pos-row("HYPH", "PUNCT", "PunctType=dash", "punctuation mark, hyphen")
|
||||
+pos-row("IN", "ADP", "", "conjunction, subordinating or preposition")
|
||||
+pos-row("JJ", "ADJ", "Degree=pos", "adjective")
|
||||
+pos-row("JJR", "ADJ", "Degree=comp", "adjective, comparative")
|
||||
+pos-row("JJS", "ADJ", "Degree=sup", "adjective, superlative")
|
||||
+pos-row("LS", "PUNCT", "NumType=ord", "list item marker")
|
||||
+pos-row("MD", "VERB", "VerbType=mod", "verb, modal auxiliary")
|
||||
+pos-row("NFP", "PUNCT", "", "superfluous punctuation")
|
||||
+pos-row("NIL", "", "", "missing tag")
|
||||
+pos-row("NN", "NOUN", "Number=sing", "noun, singular or mass")
|
||||
+pos-row("NNP", "PROPN", "NounType=prop Number=sign", "noun, proper singular")
|
||||
+pos-row("NNPS", "PROPN", "NounType=prop Number=plur", "noun, proper plural")
|
||||
+pos-row("NNS", "NOUN", "Number=plur", "noun, plural")
|
||||
+pos-row("PDT", "ADJ", "AdjType=pdt PronType=prn", "predeterminer")
|
||||
+pos-row("POS", "PART", "Poss=yes", "possessive ending")
|
||||
+pos-row("PRP", "PRON", "PronType=prs", "pronoun, personal")
|
||||
+pos-row("PRP$", "ADJ", "PronType=prs Poss=yes", "pronoun, possessive")
|
||||
+pos-row("RB", "ADV", "Degree=pos", "adverb")
|
||||
+pos-row("RBR", "ADV", "Degree=comp", "adverb, comparative")
|
||||
+pos-row("RBS", "ADV", "Degree=sup", "adverb, superlative")
|
||||
+pos-row("RP", "PART", "", "adverb, particle")
|
||||
+pos-row("SP", "SPACE", "", "space")
|
||||
+pos-row("SYM", "SYM", "", "symbol")
|
||||
+pos-row("TO", "PART", "PartType=inf VerbForm=inf", "infinitival to")
|
||||
+pos-row("UH", "INTJ", "", "interjection")
|
||||
+pos-row("VB", "VERB", "VerbForm=inf", "verb, base form")
|
||||
+pos-row("VBD", "VERB", "VerbForm=fin Tense=past", "verb, past tense")
|
||||
+pos-row("VBG", "VERB", "VerbForm=part Tense=pres Aspect=prog", "verb, gerund or present participle")
|
||||
+pos-row("VBN", "VERB", "VerbForm=part Tense=past Aspect=perf", "verb, past participle")
|
||||
+pos-row("VBP", "VERB", "VerbForm=fin Tense=pres", "verb, non-3rd person singular present")
|
||||
+pos-row("VBZ", "VERB", "VerbForm=fin Tense=pres Number=sing Person=3", "verb, 3rd person singular present")
|
||||
+pos-row("WDT", "ADJ", "PronType=int|rel", "wh-determiner")
|
||||
+pos-row("WP", "NOUN", "PronType=int|rel", "wh-pronoun, personal")
|
||||
+pos-row("WP$", "ADJ", "Poss=yes PronType=int|rel", "wh-pronoun, possessive")
|
||||
+pos-row("WRB", "ADV", "PronType=int|rel", "wh-adverb")
|
||||
+pos-row("XX", "X", "", "unknown")
|
||||
+accordion("English", "pos-tagging-english")
|
||||
p
|
||||
| The English part-of-speech tagger uses the
|
||||
| #[+a("https://catalog.ldc.upenn.edu/LDC2013T19") OntoNotes 5] version of
|
||||
| the Penn Treebank tag set. We also map the tags to the simpler Google
|
||||
| Universal POS tag set.
|
||||
|
||||
+h(3, "pos-tagging-german") German part-of-speech tag scheme
|
||||
+table(["Tag", "POS", "Morphology", "Description"])
|
||||
+pos-row("-LRB-", "PUNCT", "PunctType=brck PunctSide=ini", "left round bracket")
|
||||
+pos-row("-PRB-", "PUNCT", "PunctType=brck PunctSide=fin", "right round bracket")
|
||||
+pos-row(",", "PUNCT", "PunctType=comm", "punctuation mark, comma")
|
||||
+pos-row(":", "PUNCT", "", "punctuation mark, colon or ellipsis")
|
||||
+pos-row(".", "PUNCT", "PunctType=peri", "punctuation mark, sentence closer")
|
||||
+pos-row("''", "PUNCT", "PunctType=quot PunctSide=fin", "closing quotation mark")
|
||||
+pos-row("\"\"", "PUNCT", "PunctType=quot PunctSide=fin", "closing quotation mark")
|
||||
+pos-row("#", "SYM", "SymType=numbersign", "symbol, number sign")
|
||||
+pos-row("``", "PUNCT", "PunctType=quot PunctSide=ini", "opening quotation mark")
|
||||
+pos-row("$", "SYM", "SymType=currency", "symbol, currency")
|
||||
+pos-row("ADD", "X", "", "email")
|
||||
+pos-row("AFX", "ADJ", "Hyph=yes", "affix")
|
||||
+pos-row("BES", "VERB", "", 'auxiliary "be"')
|
||||
+pos-row("CC", "CONJ", "ConjType=coor", "conjunction, coordinating")
|
||||
+pos-row("CD", "NUM", "NumType=card", "cardinal number")
|
||||
+pos-row("DT", "DET", "determiner")
|
||||
+pos-row("EX", "ADV", "AdvType=ex", "existential there")
|
||||
+pos-row("FW", "X", "Foreign=yes", "foreign word")
|
||||
+pos-row("GW", "X", "", "additional word in multi-word expression")
|
||||
+pos-row("HVS", "VERB", "", 'forms of "have"')
|
||||
+pos-row("HYPH", "PUNCT", "PunctType=dash", "punctuation mark, hyphen")
|
||||
+pos-row("IN", "ADP", "", "conjunction, subordinating or preposition")
|
||||
+pos-row("JJ", "ADJ", "Degree=pos", "adjective")
|
||||
+pos-row("JJR", "ADJ", "Degree=comp", "adjective, comparative")
|
||||
+pos-row("JJS", "ADJ", "Degree=sup", "adjective, superlative")
|
||||
+pos-row("LS", "PUNCT", "NumType=ord", "list item marker")
|
||||
+pos-row("MD", "VERB", "VerbType=mod", "verb, modal auxiliary")
|
||||
+pos-row("NFP", "PUNCT", "", "superfluous punctuation")
|
||||
+pos-row("NIL", "", "", "missing tag")
|
||||
+pos-row("NN", "NOUN", "Number=sing", "noun, singular or mass")
|
||||
+pos-row("NNP", "PROPN", "NounType=prop Number=sign", "noun, proper singular")
|
||||
+pos-row("NNPS", "PROPN", "NounType=prop Number=plur", "noun, proper plural")
|
||||
+pos-row("NNS", "NOUN", "Number=plur", "noun, plural")
|
||||
+pos-row("PDT", "ADJ", "AdjType=pdt PronType=prn", "predeterminer")
|
||||
+pos-row("POS", "PART", "Poss=yes", "possessive ending")
|
||||
+pos-row("PRP", "PRON", "PronType=prs", "pronoun, personal")
|
||||
+pos-row("PRP$", "ADJ", "PronType=prs Poss=yes", "pronoun, possessive")
|
||||
+pos-row("RB", "ADV", "Degree=pos", "adverb")
|
||||
+pos-row("RBR", "ADV", "Degree=comp", "adverb, comparative")
|
||||
+pos-row("RBS", "ADV", "Degree=sup", "adverb, superlative")
|
||||
+pos-row("RP", "PART", "", "adverb, particle")
|
||||
+pos-row("SP", "SPACE", "", "space")
|
||||
+pos-row("SYM", "SYM", "", "symbol")
|
||||
+pos-row("TO", "PART", "PartType=inf VerbForm=inf", "infinitival to")
|
||||
+pos-row("UH", "INTJ", "", "interjection")
|
||||
+pos-row("VB", "VERB", "VerbForm=inf", "verb, base form")
|
||||
+pos-row("VBD", "VERB", "VerbForm=fin Tense=past", "verb, past tense")
|
||||
+pos-row("VBG", "VERB", "VerbForm=part Tense=pres Aspect=prog", "verb, gerund or present participle")
|
||||
+pos-row("VBN", "VERB", "VerbForm=part Tense=past Aspect=perf", "verb, past participle")
|
||||
+pos-row("VBP", "VERB", "VerbForm=fin Tense=pres", "verb, non-3rd person singular present")
|
||||
+pos-row("VBZ", "VERB", "VerbForm=fin Tense=pres Number=sing Person=3", "verb, 3rd person singular present")
|
||||
+pos-row("WDT", "ADJ", "PronType=int|rel", "wh-determiner")
|
||||
+pos-row("WP", "NOUN", "PronType=int|rel", "wh-pronoun, personal")
|
||||
+pos-row("WP$", "ADJ", "Poss=yes PronType=int|rel", "wh-pronoun, possessive")
|
||||
+pos-row("WRB", "ADV", "PronType=int|rel", "wh-adverb")
|
||||
+pos-row("XX", "X", "", "unknown")
|
||||
|
||||
p
|
||||
| The German part-of-speech tagger uses the
|
||||
| #[+a("http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/index.html") TIGER Treebank]
|
||||
| annotation scheme. We also map the tags to the simpler Google
|
||||
| Universal POS tag set.
|
||||
+accordion("German", "pos-tagging-german")
|
||||
p
|
||||
| The German part-of-speech tagger uses the
|
||||
| #[+a("http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/index.html") TIGER Treebank]
|
||||
| annotation scheme. We also map the tags to the simpler Google
|
||||
| Universal POS tag set.
|
||||
|
||||
+table(["Tag", "POS", "Morphology", "Description"])
|
||||
+pos-row("$(", "PUNCT", "PunctType=brck", "other sentence-internal punctuation mark")
|
||||
+pos-row("$,", "PUNCT", "PunctType=comm", "comma")
|
||||
+pos-row("$.", "PUNCT", "PunctType=peri", "sentence-final punctuation mark")
|
||||
+pos-row("ADJA", "ADJ", "", "adjective, attributive")
|
||||
+pos-row("ADJD", "ADJ", "Variant=short", "adjective, adverbial or predicative")
|
||||
+pos-row("ADV", "ADV", "", "adverb")
|
||||
+pos-row("APPO", "ADP", "AdpType=post", "postposition")
|
||||
+pos-row("APPR", "ADP", "AdpType=prep", "preposition; circumposition left")
|
||||
+pos-row("APPRART", "ADP", "AdpType=prep PronType=art", "preposition with article")
|
||||
+pos-row("APZR", "ADP", "AdpType=circ", "circumposition right")
|
||||
+pos-row("ART", "DET", "PronType=art", "definite or indefinite article")
|
||||
+pos-row("CARD", "NUM", "NumType=card", "cardinal number")
|
||||
+pos-row("FM", "X", "Foreign=yes", "foreign language material")
|
||||
+pos-row("ITJ", "INTJ", "", "interjection")
|
||||
+pos-row("KOKOM", "CONJ", "ConjType=comp", "comparative conjunction")
|
||||
+pos-row("KON", "CONJ", "", "coordinate conjunction")
|
||||
+pos-row("KOUI", "SCONJ", "", 'subordinate conjunction with "zu" and infinitive')
|
||||
+pos-row("KOUS", "SCONJ", "", "subordinate conjunction with sentence")
|
||||
+pos-row("NE", "PROPN", "", "proper noun")
|
||||
+pos-row("NNE", "PROPN", "", "proper noun")
|
||||
+pos-row("NN", "NOUN", "", "noun, singular or mass")
|
||||
+pos-row("PAV", "ADV", "PronType=dem", "pronominal adverb")
|
||||
+pos-row("PROAV", "ADV", "PronType=dem", "pronominal adverb")
|
||||
+pos-row("PDAT", "DET", "PronType=dem", "attributive demonstrative pronoun")
|
||||
+pos-row("PDS", "PRON", "PronType=dem", "substituting demonstrative pronoun")
|
||||
+pos-row("PIAT", "DET", "PronType=ind|neg|tot", "attributive indefinite pronoun without determiner")
|
||||
+pos-row("PIDAT", "DET", "AdjType=pdt PronType=ind|neg|tot", "attributive indefinite pronoun with determiner")
|
||||
+pos-row("PIS", "PRON", "PronType=ind|neg|tot", "substituting indefinite pronoun")
|
||||
+pos-row("PPER", "PRON", "PronType=prs", "non-reflexive personal pronoun")
|
||||
+pos-row("PPOSAT", "DET", "Poss=yes PronType=prs", "attributive possessive pronoun")
|
||||
+pos-row("PPOSS", "PRON", "PronType=rel", "substituting possessive pronoun")
|
||||
+pos-row("PRELAT", "DET", "PronType=rel", "attributive relative pronoun")
|
||||
+pos-row("PRELS", "PRON", "PronType=rel", "substituting relative pronoun")
|
||||
+pos-row("PRF", "PRON", "PronType=prs Reflex=yes", "reflexive personal pronoun")
|
||||
+pos-row("PTKA", "PART", "", "particle with adjective or adverb")
|
||||
+pos-row("PTKANT", "PART", "PartType=res", "answer particle")
|
||||
+pos-row("PTKNEG", "PART", "Negative=yes", "negative particle")
|
||||
+pos-row("PTKVZ", "PART", "PartType=vbp", "separable verbal particle")
|
||||
+pos-row("PTKZU", "PART", "PartType=inf", '"zu" before infinitive')
|
||||
+pos-row("PWAT", "DET", "PronType=int", "attributive interrogative pronoun")
|
||||
+pos-row("PWAV", "ADV", "PronType=int", "adverbial interrogative or relative pronoun")
|
||||
+pos-row("PWS", "PRON", "PronType=int", "substituting interrogative pronoun")
|
||||
+pos-row("TRUNC", "X", "Hyph=yes", "word remnant")
|
||||
+pos-row("VAFIN", "AUX", "Mood=ind VerbForm=fin", "finite verb, auxiliary")
|
||||
+pos-row("VAIMP", "AUX", "Mood=imp VerbForm=fin", "imperative, auxiliary")
|
||||
+pos-row("VAINF", "AUX", "VerbForm=inf", "infinitive, auxiliary")
|
||||
+pos-row("VAPP", "AUX", "Aspect=perf VerbForm=fin", "perfect participle, auxiliary")
|
||||
+pos-row("VMFIN", "VERB", "Mood=ind VerbForm=fin VerbType=mod", "finite verb, modal")
|
||||
+pos-row("VMINF", "VERB", "VerbForm=fin VerbType=mod", "infinitive, modal")
|
||||
+pos-row("VMPP", "VERB", "Aspect=perf VerbForm=part VerbType=mod", "perfect participle, modal")
|
||||
+pos-row("VVFIN", "VERB", "Mood=ind VerbForm=fin", "finite verb, full")
|
||||
+pos-row("VVIMP", "VERB", "Mood=imp VerbForm=fin", "imperative, full")
|
||||
+pos-row("VVINF", "VERB", "VerbForm=inf", "infinitive, full")
|
||||
+pos-row("VVIZU", "VERB", "VerbForm=inf", 'infinitive with "zu", full')
|
||||
+pos-row("VVPP", "VERB", "Aspect=perf VerbForm=part", "perfect participle, full")
|
||||
+pos-row("XY", "X", "", "non-word containing non-letter")
|
||||
+pos-row("SP", "SPACE", "", "space")
|
||||
+table(["Tag", "POS", "Morphology", "Description"])
|
||||
+pos-row("$(", "PUNCT", "PunctType=brck", "other sentence-internal punctuation mark")
|
||||
+pos-row("$,", "PUNCT", "PunctType=comm", "comma")
|
||||
+pos-row("$.", "PUNCT", "PunctType=peri", "sentence-final punctuation mark")
|
||||
+pos-row("ADJA", "ADJ", "", "adjective, attributive")
|
||||
+pos-row("ADJD", "ADJ", "Variant=short", "adjective, adverbial or predicative")
|
||||
+pos-row("ADV", "ADV", "", "adverb")
|
||||
+pos-row("APPO", "ADP", "AdpType=post", "postposition")
|
||||
+pos-row("APPR", "ADP", "AdpType=prep", "preposition; circumposition left")
|
||||
+pos-row("APPRART", "ADP", "AdpType=prep PronType=art", "preposition with article")
|
||||
+pos-row("APZR", "ADP", "AdpType=circ", "circumposition right")
|
||||
+pos-row("ART", "DET", "PronType=art", "definite or indefinite article")
|
||||
+pos-row("CARD", "NUM", "NumType=card", "cardinal number")
|
||||
+pos-row("FM", "X", "Foreign=yes", "foreign language material")
|
||||
+pos-row("ITJ", "INTJ", "", "interjection")
|
||||
+pos-row("KOKOM", "CONJ", "ConjType=comp", "comparative conjunction")
|
||||
+pos-row("KON", "CONJ", "", "coordinate conjunction")
|
||||
+pos-row("KOUI", "SCONJ", "", 'subordinate conjunction with "zu" and infinitive')
|
||||
+pos-row("KOUS", "SCONJ", "", "subordinate conjunction with sentence")
|
||||
+pos-row("NE", "PROPN", "", "proper noun")
|
||||
+pos-row("NNE", "PROPN", "", "proper noun")
|
||||
+pos-row("NN", "NOUN", "", "noun, singular or mass")
|
||||
+pos-row("PAV", "ADV", "PronType=dem", "pronominal adverb")
|
||||
+pos-row("PROAV", "ADV", "PronType=dem", "pronominal adverb")
|
||||
+pos-row("PDAT", "DET", "PronType=dem", "attributive demonstrative pronoun")
|
||||
+pos-row("PDS", "PRON", "PronType=dem", "substituting demonstrative pronoun")
|
||||
+pos-row("PIAT", "DET", "PronType=ind|neg|tot", "attributive indefinite pronoun without determiner")
|
||||
+pos-row("PIDAT", "DET", "AdjType=pdt PronType=ind|neg|tot", "attributive indefinite pronoun with determiner")
|
||||
+pos-row("PIS", "PRON", "PronType=ind|neg|tot", "substituting indefinite pronoun")
|
||||
+pos-row("PPER", "PRON", "PronType=prs", "non-reflexive personal pronoun")
|
||||
+pos-row("PPOSAT", "DET", "Poss=yes PronType=prs", "attributive possessive pronoun")
|
||||
+pos-row("PPOSS", "PRON", "PronType=rel", "substituting possessive pronoun")
|
||||
+pos-row("PRELAT", "DET", "PronType=rel", "attributive relative pronoun")
|
||||
+pos-row("PRELS", "PRON", "PronType=rel", "substituting relative pronoun")
|
||||
+pos-row("PRF", "PRON", "PronType=prs Reflex=yes", "reflexive personal pronoun")
|
||||
+pos-row("PTKA", "PART", "", "particle with adjective or adverb")
|
||||
+pos-row("PTKANT", "PART", "PartType=res", "answer particle")
|
||||
+pos-row("PTKNEG", "PART", "Negative=yes", "negative particle")
|
||||
+pos-row("PTKVZ", "PART", "PartType=vbp", "separable verbal particle")
|
||||
+pos-row("PTKZU", "PART", "PartType=inf", '"zu" before infinitive')
|
||||
+pos-row("PWAT", "DET", "PronType=int", "attributive interrogative pronoun")
|
||||
+pos-row("PWAV", "ADV", "PronType=int", "adverbial interrogative or relative pronoun")
|
||||
+pos-row("PWS", "PRON", "PronType=int", "substituting interrogative pronoun")
|
||||
+pos-row("TRUNC", "X", "Hyph=yes", "word remnant")
|
||||
+pos-row("VAFIN", "AUX", "Mood=ind VerbForm=fin", "finite verb, auxiliary")
|
||||
+pos-row("VAIMP", "AUX", "Mood=imp VerbForm=fin", "imperative, auxiliary")
|
||||
+pos-row("VAINF", "AUX", "VerbForm=inf", "infinitive, auxiliary")
|
||||
+pos-row("VAPP", "AUX", "Aspect=perf VerbForm=fin", "perfect participle, auxiliary")
|
||||
+pos-row("VMFIN", "VERB", "Mood=ind VerbForm=fin VerbType=mod", "finite verb, modal")
|
||||
+pos-row("VMINF", "VERB", "VerbForm=fin VerbType=mod", "infinitive, modal")
|
||||
+pos-row("VMPP", "VERB", "Aspect=perf VerbForm=part VerbType=mod", "perfect participle, modal")
|
||||
+pos-row("VVFIN", "VERB", "Mood=ind VerbForm=fin", "finite verb, full")
|
||||
+pos-row("VVIMP", "VERB", "Mood=imp VerbForm=fin", "imperative, full")
|
||||
+pos-row("VVINF", "VERB", "VerbForm=inf", "infinitive, full")
|
||||
+pos-row("VVIZU", "VERB", "VerbForm=inf", 'infinitive with "zu", full')
|
||||
+pos-row("VVPP", "VERB", "Aspect=perf VerbForm=part", "perfect participle, full")
|
||||
+pos-row("XY", "X", "", "non-word containing non-letter")
|
||||
+pos-row("SP", "SPACE", "", "space")
|
||||
|
|
55
website/api/_annotation/_text-processing.jade
Normal file
55
website/api/_annotation/_text-processing.jade
Normal file
|
@ -0,0 +1,55 @@
|
|||
//- 💫 DOCS > API > ANNOTATION > TEXT PROCESSING
|
||||
|
||||
+aside-code("Example").
|
||||
from spacy.lang.en import English
|
||||
nlp = English()
|
||||
tokens = nlp('Some\nspaces and\ttab characters')
|
||||
tokens_text = [t.text for t in tokens]
|
||||
assert tokens_text == ['Some', '\n', 'spaces', ' ', 'and',
|
||||
'\t', 'tab', 'characters']
|
||||
|
||||
p
|
||||
| Tokenization standards are based on the
|
||||
| #[+a("https://catalog.ldc.upenn.edu/LDC2013T19") OntoNotes 5] corpus.
|
||||
| The tokenizer differs from most by including
|
||||
| #[strong tokens for significant whitespace]. Any sequence of
|
||||
| whitespace characters beyond a single space (#[code ' ']) is included
|
||||
| as a token. The whitespace tokens are useful for much the same reason
|
||||
| punctuation is – it's often an important delimiter in the text. By
|
||||
| preserving it in the token output, we are able to maintain a simple
|
||||
| alignment between the tokens and the original string, and we ensure
|
||||
| that #[strong no information is lost] during processing.
|
||||
|
||||
+h(3, "lemmatization") Lemmatization
|
||||
|
||||
+aside("Examples")
|
||||
| In English, this means:#[br]
|
||||
| #[strong Adjectives]: happier, happiest → happy#[br]
|
||||
| #[strong Adverbs]: worse, worst → badly#[br]
|
||||
| #[strong Nouns]: dogs, children → dog, child#[br]
|
||||
| #[strong Verbs]: writes, wirting, wrote, written → write
|
||||
|
||||
|
||||
p
|
||||
| A lemma is the uninflected form of a word. The English lemmatization
|
||||
| data is taken from #[+a("https://wordnet.princeton.edu") WordNet].
|
||||
| Lookup tables are taken from
|
||||
| #[+a("http://www.lexiconista.com/datasets/lemmatization/") Lexiconista].
|
||||
| spaCy also adds a #[strong special case for pronouns]: all pronouns
|
||||
| are lemmatized to the special token #[code -PRON-].
|
||||
|
||||
+infobox("About spaCy's custom pronoun lemma", "⚠️")
|
||||
| Unlike verbs and common nouns, there's no clear base form of a personal
|
||||
| pronoun. Should the lemma of "me" be "I", or should we normalize person
|
||||
| as well, giving "it" — or maybe "he"? spaCy's solution is to introduce a
|
||||
| novel symbol, #[code -PRON-], which is used as the lemma for
|
||||
| all personal pronouns.
|
||||
|
||||
+h(3, "sentence-boundary") Sentence boundary detection
|
||||
|
||||
p
|
||||
| Sentence boundaries are calculated from the syntactic parse tree, so
|
||||
| features such as punctuation and capitalisation play an important but
|
||||
| non-decisive role in determining the sentence boundaries. Usually this
|
||||
| means that the sentence boundaries will at least coincide with clause
|
||||
| boundaries, even given poorly punctuated text.
|
|
@ -205,10 +205,8 @@
|
|||
"title": "Annotation Specifications",
|
||||
"teaser": "Schemes used for labels, tags and training data.",
|
||||
"menu": {
|
||||
"Tokenization": "tokenization",
|
||||
"Sentence Boundaries": "sbd",
|
||||
"Text Processing": "text-processing",
|
||||
"POS Tagging": "pos-tagging",
|
||||
"Lemmatization": "lemmatization",
|
||||
"Dependencies": "dependency-parsing",
|
||||
"Named Entities": "named-entities",
|
||||
"Models & Training": "training"
|
||||
|
|
|
@ -2,43 +2,9 @@
|
|||
|
||||
include ../_includes/_mixins
|
||||
|
||||
p This document describes the target annotations spaCy is trained to predict.
|
||||
|
||||
|
||||
+section("tokenization")
|
||||
+h(2, "tokenization") Tokenization
|
||||
|
||||
p
|
||||
| Tokenization standards are based on the
|
||||
| #[+a("https://catalog.ldc.upenn.edu/LDC2013T19") OntoNotes 5] corpus.
|
||||
| The tokenizer differs from most by including tokens for significant
|
||||
| whitespace. Any sequence of whitespace characters beyond a single space
|
||||
| (#[code ' ']) is included as a token.
|
||||
|
||||
+aside-code("Example").
|
||||
from spacy.lang.en import English
|
||||
nlp = English()
|
||||
tokens = nlp('Some\nspaces and\ttab characters')
|
||||
tokens_text = [t.text for t in tokens]
|
||||
assert tokens_text == ['Some', '\n', 'spaces', ' ', 'and',
|
||||
'\t', 'tab', 'characters']
|
||||
|
||||
p
|
||||
| The whitespace tokens are useful for much the same reason punctuation is
|
||||
| – it's often an important delimiter in the text. By preserving it in the
|
||||
| token output, we are able to maintain a simple alignment between the
|
||||
| tokens and the original string, and we ensure that no information is
|
||||
| lost during processing.
|
||||
|
||||
+section("sbd")
|
||||
+h(2, "sentence-boundary") Sentence boundary detection
|
||||
|
||||
p
|
||||
| Sentence boundaries are calculated from the syntactic parse tree, so
|
||||
| features such as punctuation and capitalisation play an important but
|
||||
| non-decisive role in determining the sentence boundaries. Usually this
|
||||
| means that the sentence boundaries will at least coincide with clause
|
||||
| boundaries, even given poorly punctuated text.
|
||||
+section("text-processing")
|
||||
+h(2, "text-processing") Text Processing
|
||||
include _annotation/_text-processing
|
||||
|
||||
+section("pos-tagging")
|
||||
+h(2, "pos-tagging") Part-of-speech Tagging
|
||||
|
@ -50,30 +16,6 @@ p This document describes the target annotations spaCy is trained to predict.
|
|||
|
||||
include _annotation/_pos-tags
|
||||
|
||||
+section("lemmatization")
|
||||
+h(2, "lemmatization") Lemmatization
|
||||
|
||||
p A "lemma" is the uninflected form of a word. In English, this means:
|
||||
|
||||
+list
|
||||
+item #[strong Adjectives]: The form like "happy", not "happier" or "happiest"
|
||||
+item #[strong Adverbs]: The form like "badly", not "worse" or "worst"
|
||||
+item #[strong Nouns]: The form like "dog", not "dogs"; like "child", not "children"
|
||||
+item #[strong Verbs]: The form like "write", not "writes", "writing", "wrote" or "written"
|
||||
|
||||
p
|
||||
| The lemmatization data is taken from
|
||||
| #[+a("https://wordnet.princeton.edu") WordNet]. However, we also add a
|
||||
| special case for pronouns: all pronouns are lemmatized to the special
|
||||
| token #[code -PRON-].
|
||||
|
||||
+infobox("About spaCy's custom pronoun lemma")
|
||||
| Unlike verbs and common nouns, there's no clear base form of a personal
|
||||
| pronoun. Should the lemma of "me" be "I", or should we normalize person
|
||||
| as well, giving "it" — or maybe "he"? spaCy's solution is to introduce a
|
||||
| novel symbol, #[code -PRON-], which is used as the lemma for
|
||||
| all personal pronouns.
|
||||
|
||||
+section("dependency-parsing")
|
||||
+h(2, "dependency-parsing") Syntactic Dependency Parsing
|
||||
|
||||
|
|
|
@ -31,6 +31,9 @@ main > *:not(footer) li a,
|
|||
main aside a
|
||||
@extend .u-link
|
||||
|
||||
a:focus
|
||||
outline: 1px dotted $color-theme
|
||||
|
||||
|
||||
//- Selection
|
||||
|
||||
|
|
|
@ -74,6 +74,42 @@
|
|||
border-radius: $border-radius
|
||||
box-shadow: $box-shadow
|
||||
|
||||
|
||||
//- Accordion
|
||||
|
||||
.o-accordion
|
||||
&:not(:last-child)
|
||||
margin-bottom: 2rem
|
||||
|
||||
.o-accordion__content
|
||||
margin-top: 3rem
|
||||
|
||||
.o-accordion__button
|
||||
font: inherit
|
||||
border-radius: $border-radius
|
||||
width: 100%
|
||||
padding: 1.5rem 2rem
|
||||
background: $color-subtle-light
|
||||
|
||||
&[aria-expanded="true"]
|
||||
border-bottom: 3px solid $color-subtle
|
||||
border-bottom-left-radius: 0
|
||||
border-bottom-right-radius: 0
|
||||
|
||||
.o-accordion__hide
|
||||
display: none
|
||||
|
||||
&:focus:not([aria-expanded="true"])
|
||||
background: $color-subtle
|
||||
|
||||
.o-accordion__icon
|
||||
@include size(2.5rem)
|
||||
background: $color-theme
|
||||
color: $color-back
|
||||
border-radius: 50%
|
||||
padding: 0.35rem
|
||||
pointer-events: none
|
||||
|
||||
//- Box
|
||||
|
||||
.o-box
|
||||
|
|
25
website/assets/js/accordion.js
Normal file
25
website/assets/js/accordion.js
Normal file
|
@ -0,0 +1,25 @@
|
|||
'use strict';
|
||||
|
||||
import { $$ } from './util.js';
|
||||
|
||||
export default class Accordion {
|
||||
/**
|
||||
* Simple, collapsible accordion sections.
|
||||
* Inspired by: https://inclusive-components.design/collapsible-sections/
|
||||
* @param {string} selector - Query selector of button element.
|
||||
*/
|
||||
constructor(selector) {
|
||||
[...$$(selector)].forEach(btn =>
|
||||
btn.addEventListener('click', this.onClick.bind(this)))
|
||||
}
|
||||
|
||||
/**
|
||||
* Toggle aria-expanded attribute on button and visibility of section.
|
||||
* @param {node} Event.target - The accordion button.
|
||||
*/
|
||||
onClick({ target }) {
|
||||
const exp = target.getAttribute('aria-expanded') === 'true' || false;
|
||||
target.setAttribute('aria-expanded', !exp);
|
||||
target.parentElement.nextElementSibling.hidden = exp;
|
||||
}
|
||||
}
|
|
@ -101,9 +101,9 @@ export class ModelLoader {
|
|||
showError(modelId) {
|
||||
const tpl = new Templater(modelId);
|
||||
tpl.get('table').removeAttribute('data-loading');
|
||||
tpl.get('error').style.display = 'block';
|
||||
tpl.get('error').hidden = false;
|
||||
for (let key of ['sources', 'pipeline', 'vecs', 'author', 'license']) {
|
||||
tpl.get(key).parentElement.parentElement.style.display = 'none';
|
||||
tpl.get(key).parentElement.parentElement.hidden = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -114,13 +114,12 @@ export class ModelLoader {
|
|||
const modelId = `${data.lang}_${data.name}`;
|
||||
const model = `${modelId}-${data.version}`;
|
||||
const tpl = new Templater(modelId);
|
||||
tpl.get('error').style.display = 'none';
|
||||
this.renderDetails(tpl, data)
|
||||
this.renderBenchmarks(tpl, data.accuracy, data.speed);
|
||||
this.renderCompat(tpl, modelId);
|
||||
tpl.get('download').setAttribute('href', `${this.repo}/releases/tag/${model}`);
|
||||
tpl.get('table').removeAttribute('data-loading');
|
||||
tpl.get('error').style.display = 'none';
|
||||
tpl.get('error').hidden = true;
|
||||
}
|
||||
|
||||
renderDetails(tpl, { version, size, description, notes, author, url,
|
||||
|
@ -133,9 +132,9 @@ export class ModelLoader {
|
|||
if (license) tpl.fill('license', formats.license(license, this.licenses[license]), true);
|
||||
if (sources) tpl.fill('sources', formats.sources(sources));
|
||||
if (vectors) tpl.fill('vecs', formats.vectors(vectors));
|
||||
else tpl.get('vecs').parentElement.parentElement.style.display = 'none';
|
||||
else tpl.get('vecs').parentElement.parentElement.hidden = true;
|
||||
if (pipeline && pipeline.length) tpl.fill('pipeline', formats.pipeline(pipeline), true);
|
||||
else tpl.get('pipeline').parentElement.parentElement.style.display = 'none';
|
||||
else tpl.get('pipeline').parentElement.parentElement.hidden = true;
|
||||
}
|
||||
|
||||
renderBenchmarks(tpl, accuracy = {}, speed = {}) {
|
||||
|
@ -143,7 +142,7 @@ export class ModelLoader {
|
|||
this.renderTable(tpl, 'parser', accuracy, val => val.toFixed(2));
|
||||
this.renderTable(tpl, 'ner', accuracy, val => val.toFixed(2));
|
||||
this.renderTable(tpl, 'speed', speed, Math.round);
|
||||
tpl.get('benchmarks').style.display = 'block';
|
||||
tpl.get('benchmarks').hidden = false;
|
||||
}
|
||||
|
||||
renderTable(tpl, id, benchmarks, converter = val => val) {
|
||||
|
@ -151,13 +150,13 @@ export class ModelLoader {
|
|||
for (let key of Object.keys(this.benchKeys[id])) {
|
||||
if (benchmarks[key]) tpl
|
||||
.fill(key, convertNumber(converter(benchmarks[key])))
|
||||
.parentElement.style.display = 'table-row';
|
||||
.parentElement.hidden = false;
|
||||
}
|
||||
tpl.get(id).style.display = 'block';
|
||||
tpl.get(id).hidden = false;
|
||||
}
|
||||
|
||||
renderCompat(tpl, modelId) {
|
||||
tpl.get('compat-wrapper').style.display = 'table-row';
|
||||
tpl.get('compat-wrapper').hidden = false;
|
||||
const header = '<option selected disabled>spaCy version</option>';
|
||||
const options = Object.keys(this.compat)
|
||||
.map(v => `<option value="${v}">v${v}</option>`)
|
||||
|
@ -197,8 +196,8 @@ export class ModelComparer {
|
|||
this.colors = CHART_COLORS;
|
||||
this.fonts = CHART_FONTS;
|
||||
this.defaultModels = defaultModels;
|
||||
this.tpl.get('result').style.display = 'block';
|
||||
this.tpl.get('error').style.display = 'none';
|
||||
this.tpl.get('result').hidden = false;
|
||||
this.tpl.get('error').hidden = true;
|
||||
this.fetchCompat()
|
||||
.then(compat => this.init(compat))
|
||||
.catch(this.showError.bind(this))
|
||||
|
@ -257,8 +256,8 @@ export class ModelComparer {
|
|||
|
||||
showError(err) {
|
||||
console.error(err || 'Error');
|
||||
this.tpl.get('result').style.display = 'none';
|
||||
this.tpl.get('error').style.display = 'block';
|
||||
this.tpl.get('result').hidden = true;
|
||||
this.tpl.get('error').hidden = false;
|
||||
}
|
||||
|
||||
onSelect(ev) {
|
||||
|
@ -301,8 +300,8 @@ export class ModelComparer {
|
|||
this.chart.update();
|
||||
[model1, model2].forEach((model, i) => this.renderTable(metaKeys, i + 1, model));
|
||||
this.tpl.get('result').removeAttribute('data-loading');
|
||||
this.tpl.get('error').style.display = 'none';
|
||||
this.tpl.get('result').style.display = 'block';
|
||||
this.tpl.get('error').hidden = true;
|
||||
this.tpl.get('result').hidden = false;
|
||||
}
|
||||
|
||||
renderTable(metaKeys, i, { lang, name, version, size, description,
|
||||
|
|
|
@ -12,6 +12,7 @@ import ProgressBar from './progress.js';
|
|||
import NavHighlighter from './nav-highlighter.js';
|
||||
import Changelog from './changelog.js';
|
||||
import GitHubEmbed from './github-embed.js';
|
||||
import Accordion from './accordion.js';
|
||||
import { ModelLoader, ModelComparer } from './models.js';
|
||||
|
||||
// Assign to window so they are bundled by rollup
|
||||
|
@ -19,5 +20,6 @@ window.ProgressBar = ProgressBar;
|
|||
window.NavHighlighter = NavHighlighter;
|
||||
window.Changelog = Changelog;
|
||||
window.GitHubEmbed = GitHubEmbed;
|
||||
window.Accordion = Accordion;
|
||||
window.ModelLoader = ModelLoader;
|
||||
window.ModelComparer = ModelComparer;
|
||||
|
|
|
@ -30,7 +30,7 @@ div(data-tpl=TPL data-tpl-key="error")
|
|||
| overview of the
|
||||
| #[+a(gh("spacy-models") + "/releases") latest model releases].
|
||||
|
||||
div(data-tpl=TPL data-tpl-key="result" style="display: none")
|
||||
div(data-tpl=TPL data-tpl-key="result" hidden="")
|
||||
+chart("compare_accuracy", 350)
|
||||
|
||||
+aside-code("Download", "text")
|
||||
|
|
|
@ -181,6 +181,10 @@ p
|
|||
+annotation-row(["their", "ADJ", "poss", "requests"], style)
|
||||
+annotation-row(["requests", "NOUN", "dobj", "submit"], style)
|
||||
|
||||
+h(3, "dep-scheme") Dependency label scheme
|
||||
|
||||
include ../../api/_annotation/_dep-labels
|
||||
|
||||
+h(3, "displacy") Visualizing dependencies
|
||||
|
||||
p
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
|
||||
include ../_spacy-101/_pos-deps
|
||||
|
||||
//-+aside("Help – spaCy's output is wrong!")
|
||||
|
||||
+h(3, "rule-based-morphology") Rule-based morphology
|
||||
|
||||
p
|
||||
|
@ -70,4 +68,6 @@ p
|
|||
| list-based exception files, acquired from
|
||||
| #[+a("https://wordnet.princeton.edu/") WordNet].
|
||||
|
||||
+h(3, "pos-scheme") Part-of-speech tag scheme
|
||||
|
||||
include ../../api/_annotation/_pos-tags
|
||||
|
|
Loading…
Reference in New Issue
Block a user