mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-25 11:23:40 +03:00
Use collapsible sections for pos/dep scheme and update
Will ensure better overview as we add more schemes for more languages
This commit is contained in:
parent
a9c77e01b4
commit
2d59dd374b
|
@ -1,12 +1,17 @@
|
||||||
//- 💫 DOCS > API > ANNOTATION > DEPENDENCY LABELS
|
//- 💫 DOCS > API > ANNOTATION > DEPENDENCY LABELS
|
||||||
|
|
||||||
+h(3, "dependency-parsing-english") English dependency labels
|
|
||||||
|
|
||||||
p
|
p
|
||||||
| The English dependency labels use the #[+a("http://www.clearnlp.com") ClearNLP]
|
| This section lists the syntactic dependency labels assigned by
|
||||||
| #[+a("http://www.mathcs.emory.edu/~choi/doc/clear-dependency-2012.pdf") CLEAR Style].
|
| spaCy's #[+a("/models") models]. The individual labels are
|
||||||
|
| language-specific and depend on the training corpus.
|
||||||
|
|
||||||
+table(["Label", "Description"])
|
+accordion("English", "dependency-parsing-english")
|
||||||
|
p
|
||||||
|
| The English dependency labels use the
|
||||||
|
| #[+a("http://www.mathcs.emory.edu/~choi/doc/clear-dependency-2012.pdf") CLEAR Style]
|
||||||
|
| by #[+a("http://www.clearnlp.com") ClearNLP].
|
||||||
|
|
||||||
|
+table(["Label", "Description"])
|
||||||
+dep-row("acomp", "adjectival complement")
|
+dep-row("acomp", "adjectival complement")
|
||||||
+dep-row("advcl", "adverbial clause modifier")
|
+dep-row("advcl", "adverbial clause modifier")
|
||||||
+dep-row("advmod", "adverbial modifier")
|
+dep-row("advmod", "adverbial modifier")
|
||||||
|
@ -60,14 +65,13 @@ p
|
||||||
+dep-row("root", "root")
|
+dep-row("root", "root")
|
||||||
+dep-row("xcomp", "open clausal complement")
|
+dep-row("xcomp", "open clausal complement")
|
||||||
|
|
||||||
+h(3, "dependency-parsing-german") German dependency labels
|
+accordion("German", "dependency-parsing-german")
|
||||||
|
p
|
||||||
p
|
|
||||||
| The German dependency labels use the
|
| The German dependency labels use the
|
||||||
| #[+a("http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/index.html") TIGER Treebank]
|
| #[+a("http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/index.html") TIGER Treebank]
|
||||||
| annotation scheme.
|
| annotation scheme.
|
||||||
|
|
||||||
+table(["Label", "Description"])
|
+table(["Label", "Description"])
|
||||||
+dep-row("ac", "adpositional case marker")
|
+dep-row("ac", "adpositional case marker")
|
||||||
+dep-row("adc", "adjective component")
|
+dep-row("adc", "adjective component")
|
||||||
+dep-row("ag", "genitive attribute")
|
+dep-row("ag", "genitive attribute")
|
||||||
|
|
|
@ -1,14 +1,19 @@
|
||||||
//- 💫 DOCS > API > ANNOTATION > POS TAGS
|
//- 💫 DOCS > API > ANNOTATION > POS TAGS
|
||||||
|
|
||||||
+h(3, "pos-tagging-english") English part-of-speech tag scheme
|
|
||||||
|
|
||||||
p
|
p
|
||||||
|
| This section lists the fine-grained and coarse-grained part-of-speech
|
||||||
|
| tags assigned by spaCy's #[+a("/models") models]. The individual mapping
|
||||||
|
| is specific to the training corpus and can be defined in the respective
|
||||||
|
| language data's #[+a("/usage/adding-languages#tag-map") #[code tag_map.py]].
|
||||||
|
|
||||||
|
+accordion("English", "pos-tagging-english")
|
||||||
|
p
|
||||||
| The English part-of-speech tagger uses the
|
| The English part-of-speech tagger uses the
|
||||||
| #[+a("https://catalog.ldc.upenn.edu/LDC2013T19") OntoNotes 5] version of
|
| #[+a("https://catalog.ldc.upenn.edu/LDC2013T19") OntoNotes 5] version of
|
||||||
| the Penn Treebank tag set. We also map the tags to the simpler Google
|
| the Penn Treebank tag set. We also map the tags to the simpler Google
|
||||||
| Universal POS tag set.
|
| Universal POS tag set.
|
||||||
|
|
||||||
+table(["Tag", "POS", "Morphology", "Description"])
|
+table(["Tag", "POS", "Morphology", "Description"])
|
||||||
+pos-row("-LRB-", "PUNCT", "PunctType=brck PunctSide=ini", "left round bracket")
|
+pos-row("-LRB-", "PUNCT", "PunctType=brck PunctSide=ini", "left round bracket")
|
||||||
+pos-row("-PRB-", "PUNCT", "PunctType=brck PunctSide=fin", "right round bracket")
|
+pos-row("-PRB-", "PUNCT", "PunctType=brck PunctSide=fin", "right round bracket")
|
||||||
+pos-row(",", "PUNCT", "PunctType=comm", "punctuation mark, comma")
|
+pos-row(",", "PUNCT", "PunctType=comm", "punctuation mark, comma")
|
||||||
|
@ -66,15 +71,14 @@ p
|
||||||
+pos-row("WRB", "ADV", "PronType=int|rel", "wh-adverb")
|
+pos-row("WRB", "ADV", "PronType=int|rel", "wh-adverb")
|
||||||
+pos-row("XX", "X", "", "unknown")
|
+pos-row("XX", "X", "", "unknown")
|
||||||
|
|
||||||
+h(3, "pos-tagging-german") German part-of-speech tag scheme
|
+accordion("German", "pos-tagging-german")
|
||||||
|
p
|
||||||
p
|
|
||||||
| The German part-of-speech tagger uses the
|
| The German part-of-speech tagger uses the
|
||||||
| #[+a("http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/index.html") TIGER Treebank]
|
| #[+a("http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/index.html") TIGER Treebank]
|
||||||
| annotation scheme. We also map the tags to the simpler Google
|
| annotation scheme. We also map the tags to the simpler Google
|
||||||
| Universal POS tag set.
|
| Universal POS tag set.
|
||||||
|
|
||||||
+table(["Tag", "POS", "Morphology", "Description"])
|
+table(["Tag", "POS", "Morphology", "Description"])
|
||||||
+pos-row("$(", "PUNCT", "PunctType=brck", "other sentence-internal punctuation mark")
|
+pos-row("$(", "PUNCT", "PunctType=brck", "other sentence-internal punctuation mark")
|
||||||
+pos-row("$,", "PUNCT", "PunctType=comm", "comma")
|
+pos-row("$,", "PUNCT", "PunctType=comm", "comma")
|
||||||
+pos-row("$.", "PUNCT", "PunctType=peri", "sentence-final punctuation mark")
|
+pos-row("$.", "PUNCT", "PunctType=peri", "sentence-final punctuation mark")
|
||||||
|
|
|
@ -181,6 +181,10 @@ p
|
||||||
+annotation-row(["their", "ADJ", "poss", "requests"], style)
|
+annotation-row(["their", "ADJ", "poss", "requests"], style)
|
||||||
+annotation-row(["requests", "NOUN", "dobj", "submit"], style)
|
+annotation-row(["requests", "NOUN", "dobj", "submit"], style)
|
||||||
|
|
||||||
|
+h(3, "dep-scheme") Dependency label scheme
|
||||||
|
|
||||||
|
include ../../api/_annotation/_dep-labels
|
||||||
|
|
||||||
+h(3, "displacy") Visualizing dependencies
|
+h(3, "displacy") Visualizing dependencies
|
||||||
|
|
||||||
p
|
p
|
||||||
|
|
|
@ -68,4 +68,6 @@ p
|
||||||
| list-based exception files, acquired from
|
| list-based exception files, acquired from
|
||||||
| #[+a("https://wordnet.princeton.edu/") WordNet].
|
| #[+a("https://wordnet.princeton.edu/") WordNet].
|
||||||
|
|
||||||
|
+h(3, "pos-scheme") Part-of-speech tag scheme
|
||||||
|
|
||||||
include ../../api/_annotation/_pos-tags
|
include ../../api/_annotation/_pos-tags
|
||||||
|
|
Loading…
Reference in New Issue
Block a user