diff --git a/website/docs/api/_annotation/_pos-tags.jade b/website/docs/api/_annotation/_pos-tags.jade index d3f561c3f..d3ceef777 100644 --- a/website/docs/api/_annotation/_pos-tags.jade +++ b/website/docs/api/_annotation/_pos-tags.jade @@ -1,136 +1,139 @@ //- 💫 DOCS > API > ANNOTATION > POS TAGS -mixin pos-row(...row) - +row - each cell in row - +cell - each item in cell.split(" ") - if item - | #[code=item] ++infobox("Tip") + | In spaCy v1.8.3+, you can also use #[code spacy.explain()] to get the + | description for the string representation of a tag. For example, + | #[code spacy.explain("RB")] will return "adverb". + ++h(3, "pos-tagging-english") English part-of-speech tag scheme p - | The part-of-speech tagger uses the + | The English part-of-speech tagger uses the | #[+a("https://catalog.ldc.upenn.edu/LDC2013T19") OntoNotes 5] version of | the Penn Treebank tag set. We also map the tags to the simpler Google | Universal POS tag set. -+h(3, "pos-tagging-english") English part-of-speech tag scheme - -+table(["Tag", "POS", "Morphology"]) - +pos-row("-LRB-", "PUNCT", "PunctType=brck PunctSide=ini") - +pos-row("-PRB-", "PUNCT", "PunctType=brck PunctSide=fin") - +pos-row(",", "PUNCT", "PunctType=comm") - +pos-row(":", "PUNCT", "") - +pos-row(".", "PUNCT", "PunctType=peri") - +pos-row("''", "PUNCT", "PunctType=quot PunctSide=fin") - +pos-row("\"\"", "PUNCT", "PunctType=quot PunctSide=fin") - +pos-row("#", "SYM", "SymType=numbersign") - +pos-row("``", "PUNCT", "PunctType=quot PunctSide=ini") - +pos-row("$", "SYM", "SymType=currency") - +pos-row("ADD", "X", "") - +pos-row("AFX", "ADJ", "Hyph=yes") - +pos-row("BES", "VERB", "") - +pos-row("CC", "CONJ", "ConjType=coor") - +pos-row("CD", "NUM", "NumType=card") - +pos-row("DT", "DET", "") - +pos-row("EX", "ADV", "AdvType=ex") - +pos-row("FW", "X", "Foreign=yes") - +pos-row("GW", "X", "") - +pos-row("HVS", "VERB", "") - +pos-row("HYPH", "PUNCT", "PunctType=dash") - +pos-row("IN", "ADP", "") - +pos-row("JJ", "ADJ", "Degree=pos") - +pos-row("JJR", "ADJ", "Degree=comp") - +pos-row("JJS", "ADJ", "Degree=sup") - +pos-row("LS", "PUNCT", "NumType=ord") - +pos-row("MD", "VERB", "VerbType=mod") - +pos-row("NFP", "PUNCT", "") - +pos-row("NIL", "", "") - +pos-row("NN", "NOUN", "Number=sing") - +pos-row("NNP", "PROPN", "NounType=prop Number=sign") - +pos-row("NNPS", "PROPN", "NounType=prop Number=plur") - +pos-row("NNS", "NOUN", "Number=plur") - +pos-row("PDT", "ADJ", "AdjType=pdt PronType=prn") - +pos-row("POS", "PART", "Poss=yes") - +pos-row("PRP", "PRON", "PronType=prs") - +pos-row("PRP$", "ADJ", "PronType=prs Poss=yes") - +pos-row("RB", "ADV", "Degree=pos") - +pos-row("RBR", "ADV", "Degree=comp") - +pos-row("RBS", "ADV", "Degree=sup") - +pos-row("RP", "PART", "") - +pos-row("SP", "SPACE", "") - +pos-row("SYM", "SYM", "") - +pos-row("TO", "PART", "PartType=inf VerbForm=inf") - +pos-row("UH", "INTJ", "") - +pos-row("VB", "VERB", "VerbForm=inf") - +pos-row("VBD", "VERB", "VerbForm=fin Tense=past") - +pos-row("VBG", "VERB", "VerbForm=part Tense=pres Aspect=prog") - +pos-row("VBN", "VERB", "VerbForm=part Tense=past Aspect=perf") - +pos-row("VBP", "VERB", "VerbForm=fin Tense=pres") - +pos-row("VBZ", "VERB", "VerbForm=fin Tense=pres Number=sing Person=3") - +pos-row("WDT", "ADJ", "PronType=int|rel") - +pos-row("WP", "NOUN", "PronType=int|rel") - +pos-row("WP$", "ADJ", "Poss=yes PronType=int|rel") - +pos-row("WRB", "ADV", "PronType=int|rel") - +pos-row("XX", "X", "") ++table(["Tag", "POS", "Morphology", "Description"]) + +pos-row("-LRB-", "PUNCT", "PunctType=brck PunctSide=ini", "left round bracket") + +pos-row("-PRB-", "PUNCT", "PunctType=brck PunctSide=fin", "right round bracket") + +pos-row(",", "PUNCT", "PunctType=comm", "punctuation mark, comma") + +pos-row(":", "PUNCT", "", "punctuation mark, colon or ellipsis") + +pos-row(".", "PUNCT", "PunctType=peri", "punctuation mark, sentence closer") + +pos-row("''", "PUNCT", "PunctType=quot PunctSide=fin", "closing quotation mark") + +pos-row("\"\"", "PUNCT", "PunctType=quot PunctSide=fin", "closing quotation mark") + +pos-row("#", "SYM", "SymType=numbersign", "symbol, number sign") + +pos-row("``", "PUNCT", "PunctType=quot PunctSide=ini", "opening quotation mark") + +pos-row("$", "SYM", "SymType=currency", "symbol, currency") + +pos-row("ADD", "X", "", "email") + +pos-row("AFX", "ADJ", "Hyph=yes", "affix") + +pos-row("BES", "VERB", "", 'auxillary "be"') + +pos-row("CC", "CONJ", "ConjType=coor", "conjunction, coordinating") + +pos-row("CD", "NUM", "NumType=card", "cardinal number") + +pos-row("DT", "DET", "determiner") + +pos-row("EX", "ADV", "AdvType=ex", "existential there") + +pos-row("FW", "X", "Foreign=yes", "foreign word") + +pos-row("GW", "X", "", "additional word in multi-word expression") + +pos-row("HVS", "VERB", "", 'forms of "have"') + +pos-row("HYPH", "PUNCT", "PunctType=dash", "punctuation mark, hyphen") + +pos-row("IN", "ADP", "", "conjunction, subordinating or preposition") + +pos-row("JJ", "ADJ", "Degree=pos", "adjective") + +pos-row("JJR", "ADJ", "Degree=comp", "adjective, comparative") + +pos-row("JJS", "ADJ", "Degree=sup", "adjective, superlative") + +pos-row("LS", "PUNCT", "NumType=ord", "list item marker") + +pos-row("MD", "VERB", "VerbType=mod", "verb, modal auxillary") + +pos-row("NFP", "PUNCT", "", "superfluous punctuation") + +pos-row("NIL", "", "", "missing tag") + +pos-row("NN", "NOUN", "Number=sing", "noun, singular or mass") + +pos-row("NNP", "PROPN", "NounType=prop Number=sign", "noun, proper singular") + +pos-row("NNPS", "PROPN", "NounType=prop Number=plur", "noun, proper plural") + +pos-row("NNS", "NOUN", "Number=plur", "noun, plural") + +pos-row("PDT", "ADJ", "AdjType=pdt PronType=prn", "predeterminer") + +pos-row("POS", "PART", "Poss=yes", "possessive ending") + +pos-row("PRP", "PRON", "PronType=prs", "pronoun, personal") + +pos-row("PRP$", "ADJ", "PronType=prs Poss=yes", "pronoun, possessive") + +pos-row("RB", "ADV", "Degree=pos", "adverb") + +pos-row("RBR", "ADV", "Degree=comp", "adverb, comparative") + +pos-row("RBS", "ADV", "Degree=sup", "adverb, superlative") + +pos-row("RP", "PART", "", "adverb, particle") + +pos-row("SP", "SPACE", "", "space") + +pos-row("SYM", "SYM", "", "symbol") + +pos-row("TO", "PART", "PartType=inf VerbForm=inf", "infinitival to") + +pos-row("UH", "INTJ", "", "interjection") + +pos-row("VB", "VERB", "VerbForm=inf", "verb, base form") + +pos-row("VBD", "VERB", "VerbForm=fin Tense=past", "verb, past tense") + +pos-row("VBG", "VERB", "VerbForm=part Tense=pres Aspect=prog", "verb, gerund or present participle") + +pos-row("VBN", "VERB", "VerbForm=part Tense=past Aspect=perf", "verb, past participle") + +pos-row("VBP", "VERB", "VerbForm=fin Tense=pres", "verb, non-3rd person singular present") + +pos-row("VBZ", "VERB", "VerbForm=fin Tense=pres Number=sing Person=3", "verb, 3rd person singular present") + +pos-row("WDT", "ADJ", "PronType=int|rel", "wh-determiner") + +pos-row("WP", "NOUN", "PronType=int|rel", "wh-pronoun, personal") + +pos-row("WP$", "ADJ", "Poss=yes PronType=int|rel", "wh-pronoun, possessive") + +pos-row("WRB", "ADV", "PronType=int|rel", "wh-adverb") + +pos-row("XX", "X", "", "unknown") +h(3, "pos-tagging-german") German part-of-speech tag scheme -+table(["Tag", "POS", "Morphology"]) - +pos-row("$(", "PUNCT", "PunctType=brck") - +pos-row("$,", "PUNCT", "PunctType=comm") - +pos-row("$.", "PUNCT", "PunctType=peri") - +pos-row("ADJA", "ADJ", "") - +pos-row("ADJD", "ADJ", "Variant=short") - +pos-row("ADV", "ADV", "") - +pos-row("APPO", "ADP", "AdpType=post") - +pos-row("APPR", "ADP", "AdpType=prep") - +pos-row("APPRART", "ADP", "AdpType=prep PronType=art") - +pos-row("APZR", "ADP", "AdpType=circ") - +pos-row("ART", "DET", "PronType=art") - +pos-row("CARD", "NUM", "NumType=card") - +pos-row("FM", "X", "Foreign=yes") - +pos-row("ITJ", "INTJ", "") - +pos-row("KOKOM", "CONJ", "ConjType=comp") - +pos-row("KON", "CONJ", "") - +pos-row("KOUI", "SCONJ", "") - +pos-row("KOUS", "SCONJ", "") - +pos-row("NE", "PROPN", "") - +pos-row("NNE", "PROPN", "") - +pos-row("NN", "NOUN", "") - +pos-row("PAV", "ADV", "PronType=dem") - +pos-row("PROAV", "ADV", "PronType=dem") - +pos-row("PDAT", "DET", "PronType=dem") - +pos-row("PDS", "PRON", "PronType=dem") - +pos-row("PIAT", "DET", "PronType=ind|neg|tot") - +pos-row("PIDAT", "DET", "AdjType=pdt PronType=ind|neg|tot") - +pos-row("PIS", "PRON", "PronType=ind|neg|tot") - +pos-row("PPER", "PRON", "PronType=prs") - +pos-row("PPOSAT", "DET", "Poss=yes PronType=prs") - +pos-row("PPOSS", "PRON", "PronType=rel") - +pos-row("PRELAT", "DET", "PronType=rel") - +pos-row("PRELS", "PRON", "PronType=rel") - +pos-row("PRF", "PRON", "PronType=prs Reflex=yes") - +pos-row("PTKA", "PART", "") - +pos-row("PTKANT", "PART", "PartType=res") - +pos-row("PTKNEG", "PART", "Negative=yes") - +pos-row("PTKVZ", "PART", "PartType=vbp") - +pos-row("PTKZU", "PART", "PartType=inf") - +pos-row("PWAT", "DET", "PronType=int") - +pos-row("PWAV", "ADV", "PronType=int") - +pos-row("PWS", "PRON", "PronType=int") - +pos-row("TRUNC", "X", "Hyph=yes") - +pos-row("VAFIN", "AUX", "Mood=ind VerbForm=fin") - +pos-row("VAIMP", "AUX", "Mood=imp VerbForm=fin") - +pos-row("VAINF", "AUX", "VerbForm=inf") - +pos-row("VAPP", "AUX", "Aspect=perf VerbForm=fin") - +pos-row("VMFIN", "VERB", "Mood=ind VerbForm=fin VerbType=mod") - +pos-row("VMINF", "VERB", "VerbForm=fin VerbType=mod") - +pos-row("VMPP", "VERB", "Aspect=perf VerbForm=part VerbType=mod") - +pos-row("VVFIN", "VERB", "Mood=ind VerbForm=fin") - +pos-row("VVIMP", "VERB", "Mood=imp VerbForm=fin") - +pos-row("VVINF", "VERB", "VerbForm=inf") - +pos-row("VVIZU", "VERB", "VerbForm=inf") - +pos-row("VVPP", "VERB", "Aspect=perf VerbForm=part") - +pos-row("XY", "X", "") - +pos-row("SP", "SPACE", "") +p + | The German part-of-speech tagger uses the + | #[+a("http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/index.html") TIGER Treebank] + | annotation scheme. We also map the tags to the simpler Google + | Universal POS tag set. + ++table(["Tag", "POS", "Morphology", "Description"]) + +pos-row("$(", "PUNCT", "PunctType=brck", "other sentence-internal punctuation mark") + +pos-row("$,", "PUNCT", "PunctType=comm", "comma") + +pos-row("$.", "PUNCT", "PunctType=peri", "sentence-final punctuation mark") + +pos-row("ADJA", "ADJ", "", "adjective, attributive") + +pos-row("ADJD", "ADJ", "Variant=short", "adjective, adverbial or predicative") + +pos-row("ADV", "ADV", "", "adverb") + +pos-row("APPO", "ADP", "AdpType=post", "postposition") + +pos-row("APPR", "ADP", "AdpType=prep", "preposition; circumposition left") + +pos-row("APPRART", "ADP", "AdpType=prep PronType=art", "preposition with article") + +pos-row("APZR", "ADP", "AdpType=circ", "circumposition right") + +pos-row("ART", "DET", "PronType=art", "definite or indefinite article") + +pos-row("CARD", "NUM", "NumType=card", "cardinal number") + +pos-row("FM", "X", "Foreign=yes", "foreign language material") + +pos-row("ITJ", "INTJ", "", "interjection") + +pos-row("KOKOM", "CONJ", "ConjType=comp", "comparative conjunction") + +pos-row("KON", "CONJ", "", "coordinate conjunction") + +pos-row("KOUI", "SCONJ", "", 'subordinate conjunction with "zu" and infinitive') + +pos-row("KOUS", "SCONJ", "", "subordinate conjunction with sentence") + +pos-row("NE", "PROPN", "", "proper noun") + +pos-row("NNE", "PROPN", "", "proper noun") + +pos-row("NN", "NOUN", "", "noun, singular or mass") + +pos-row("PAV", "ADV", "PronType=dem", "pronominal adverb") + +pos-row("PROAV", "ADV", "PronType=dem", "pronominal adverb") + +pos-row("PDAT", "DET", "PronType=dem", "attributive demonstrative pronoun") + +pos-row("PDS", "PRON", "PronType=dem", "substituting demonstrative pronoun") + +pos-row("PIAT", "DET", "PronType=ind|neg|tot", "attributive indefinite pronoun without determiner") + +pos-row("PIDAT", "DET", "AdjType=pdt PronType=ind|neg|tot", "attributive indefinite pronoun with determiner") + +pos-row("PIS", "PRON", "PronType=ind|neg|tot", "substituting indefinite pronoun") + +pos-row("PPER", "PRON", "PronType=prs", "non-reflexive personal pronoun") + +pos-row("PPOSAT", "DET", "Poss=yes PronType=prs", "attributive possessive pronoun") + +pos-row("PPOSS", "PRON", "PronType=rel", "substituting possessive pronoun") + +pos-row("PRELAT", "DET", "PronType=rel", "attributive relative pronoun") + +pos-row("PRELS", "PRON", "PronType=rel", "substituting relative pronoun") + +pos-row("PRF", "PRON", "PronType=prs Reflex=yes", "reflexive personal pronoun") + +pos-row("PTKA", "PART", "", "particle with adjective or adverb") + +pos-row("PTKANT", "PART", "PartType=res", "answer particle") + +pos-row("PTKNEG", "PART", "Negative=yes", "negative particle") + +pos-row("PTKVZ", "PART", "PartType=vbp", "separable verbal particle") + +pos-row("PTKZU", "PART", "PartType=inf", '"zu" before infinitive') + +pos-row("PWAT", "DET", "PronType=int", "attributive interrogative pronoun") + +pos-row("PWAV", "ADV", "PronType=int", "adverbial interrogative or relative pronoun") + +pos-row("PWS", "PRON", "PronType=int", "substituting interrogative pronoun") + +pos-row("TRUNC", "X", "Hyph=yes", "word remnant") + +pos-row("VAFIN", "AUX", "Mood=ind VerbForm=fin", "finite verb, auxiliary") + +pos-row("VAIMP", "AUX", "Mood=imp VerbForm=fin", "imperative, auxiliary") + +pos-row("VAINF", "AUX", "VerbForm=inf", "infinitive, auxiliary") + +pos-row("VAPP", "AUX", "Aspect=perf VerbForm=fin", "perfect participle, auxiliary") + +pos-row("VMFIN", "VERB", "Mood=ind VerbForm=fin VerbType=mod", "finite verb, modal") + +pos-row("VMINF", "VERB", "VerbForm=fin VerbType=mod", "infinitive, modal") + +pos-row("VMPP", "VERB", "Aspect=perf VerbForm=part VerbType=mod", "perfect participle, modal") + +pos-row("VVFIN", "VERB", "Mood=ind VerbForm=fin", "finite verb, full") + +pos-row("VVIMP", "VERB", "Mood=imp VerbForm=fin", "imperative, full") + +pos-row("VVINF", "VERB", "VerbForm=inf", "infinitive, full") + +pos-row("VVIZU", "VERB", "VerbForm=inf", 'infinitive with "zu", full') + +pos-row("VVPP", "VERB", "Aspect=perf VerbForm=part", "perfect participle, full") + +pos-row("XY", "X", "", "non-word containing non-letter") + +pos-row("SP", "SPACE", "", "space")