mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	* Add glossary entry for root There was already one but it was lower case, maybe that should be removed? * remove lowercase root On reflection, that was probably just a mistake. * Add lowercase root back It's harmless to leave it there.
		
			
				
	
	
		
			360 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			360 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import warnings
 | |
| from .errors import Warnings
 | |
| 
 | |
| 
 | |
| def explain(term):
 | |
|     """Get a description for a given POS tag, dependency label or entity type.
 | |
| 
 | |
|     term (str): The term to explain.
 | |
|     RETURNS (str): The explanation, or `None` if not found in the glossary.
 | |
| 
 | |
|     EXAMPLE:
 | |
|         >>> spacy.explain(u'NORP')
 | |
|         >>> doc = nlp(u'Hello world')
 | |
|         >>> print([w.text, w.tag_, spacy.explain(w.tag_) for w in doc])
 | |
|     """
 | |
|     if term in GLOSSARY:
 | |
|         return GLOSSARY[term]
 | |
|     else:
 | |
|         warnings.warn(Warnings.W118.format(term=term))
 | |
| 
 | |
| 
 | |
| GLOSSARY = {
 | |
|     # POS tags
 | |
|     # Universal POS Tags
 | |
|     # http://universaldependencies.org/u/pos/
 | |
|     "ADJ": "adjective",
 | |
|     "ADP": "adposition",
 | |
|     "ADV": "adverb",
 | |
|     "AUX": "auxiliary",
 | |
|     "CONJ": "conjunction",
 | |
|     "CCONJ": "coordinating conjunction",
 | |
|     "DET": "determiner",
 | |
|     "INTJ": "interjection",
 | |
|     "NOUN": "noun",
 | |
|     "NUM": "numeral",
 | |
|     "PART": "particle",
 | |
|     "PRON": "pronoun",
 | |
|     "PROPN": "proper noun",
 | |
|     "PUNCT": "punctuation",
 | |
|     "SCONJ": "subordinating conjunction",
 | |
|     "SYM": "symbol",
 | |
|     "VERB": "verb",
 | |
|     "X": "other",
 | |
|     "EOL": "end of line",
 | |
|     "SPACE": "space",
 | |
|     # POS tags (English)
 | |
|     # OntoNotes 5 / Penn Treebank
 | |
|     # https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
 | |
|     ".": "punctuation mark, sentence closer",
 | |
|     ",": "punctuation mark, comma",
 | |
|     "-LRB-": "left round bracket",
 | |
|     "-RRB-": "right round bracket",
 | |
|     "``": "opening quotation mark",
 | |
|     '""': "closing quotation mark",
 | |
|     "''": "closing quotation mark",
 | |
|     ":": "punctuation mark, colon or ellipsis",
 | |
|     "$": "symbol, currency",
 | |
|     "#": "symbol, number sign",
 | |
|     "AFX": "affix",
 | |
|     "CC": "conjunction, coordinating",
 | |
|     "CD": "cardinal number",
 | |
|     "DT": "determiner",
 | |
|     "EX": "existential there",
 | |
|     "FW": "foreign word",
 | |
|     "HYPH": "punctuation mark, hyphen",
 | |
|     "IN": "conjunction, subordinating or preposition",
 | |
|     "JJ": "adjective (English), other noun-modifier (Chinese)",
 | |
|     "JJR": "adjective, comparative",
 | |
|     "JJS": "adjective, superlative",
 | |
|     "LS": "list item marker",
 | |
|     "MD": "verb, modal auxiliary",
 | |
|     "NIL": "missing tag",
 | |
|     "NN": "noun, singular or mass",
 | |
|     "NNP": "noun, proper singular",
 | |
|     "NNPS": "noun, proper plural",
 | |
|     "NNS": "noun, plural",
 | |
|     "PDT": "predeterminer",
 | |
|     "POS": "possessive ending",
 | |
|     "PRP": "pronoun, personal",
 | |
|     "PRP$": "pronoun, possessive",
 | |
|     "RB": "adverb",
 | |
|     "RBR": "adverb, comparative",
 | |
|     "RBS": "adverb, superlative",
 | |
|     "RP": "adverb, particle",
 | |
|     "TO": 'infinitival "to"',
 | |
|     "UH": "interjection",
 | |
|     "VB": "verb, base form",
 | |
|     "VBD": "verb, past tense",
 | |
|     "VBG": "verb, gerund or present participle",
 | |
|     "VBN": "verb, past participle",
 | |
|     "VBP": "verb, non-3rd person singular present",
 | |
|     "VBZ": "verb, 3rd person singular present",
 | |
|     "WDT": "wh-determiner",
 | |
|     "WP": "wh-pronoun, personal",
 | |
|     "WP$": "wh-pronoun, possessive",
 | |
|     "WRB": "wh-adverb",
 | |
|     "SP": "space (English), sentence-final particle (Chinese)",
 | |
|     "ADD": "email",
 | |
|     "NFP": "superfluous punctuation",
 | |
|     "GW": "additional word in multi-word expression",
 | |
|     "XX": "unknown",
 | |
|     "BES": 'auxiliary "be"',
 | |
|     "HVS": 'forms of "have"',
 | |
|     "_SP": "whitespace",
 | |
|     # POS Tags (German)
 | |
|     # TIGER Treebank
 | |
|     # http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/tiger_introduction.pdf
 | |
|     "$(": "other sentence-internal punctuation mark",
 | |
|     "$,": "comma",
 | |
|     "$.": "sentence-final punctuation mark",
 | |
|     "ADJA": "adjective, attributive",
 | |
|     "ADJD": "adjective, adverbial or predicative",
 | |
|     "APPO": "postposition",
 | |
|     "APPR": "preposition; circumposition left",
 | |
|     "APPRART": "preposition with article",
 | |
|     "APZR": "circumposition right",
 | |
|     "ART": "definite or indefinite article",
 | |
|     "CARD": "cardinal number",
 | |
|     "FM": "foreign language material",
 | |
|     "ITJ": "interjection",
 | |
|     "KOKOM": "comparative conjunction",
 | |
|     "KON": "coordinate conjunction",
 | |
|     "KOUI": 'subordinate conjunction with "zu" and infinitive',
 | |
|     "KOUS": "subordinate conjunction with sentence",
 | |
|     "NE": "proper noun",
 | |
|     "NNE": "proper noun",
 | |
|     "PAV": "pronominal adverb",
 | |
|     "PROAV": "pronominal adverb",
 | |
|     "PDAT": "attributive demonstrative pronoun",
 | |
|     "PDS": "substituting demonstrative pronoun",
 | |
|     "PIAT": "attributive indefinite pronoun without determiner",
 | |
|     "PIDAT": "attributive indefinite pronoun with determiner",
 | |
|     "PIS": "substituting indefinite pronoun",
 | |
|     "PPER": "non-reflexive personal pronoun",
 | |
|     "PPOSAT": "attributive possessive pronoun",
 | |
|     "PPOSS": "substituting possessive pronoun",
 | |
|     "PRELAT": "attributive relative pronoun",
 | |
|     "PRELS": "substituting relative pronoun",
 | |
|     "PRF": "reflexive personal pronoun",
 | |
|     "PTKA": "particle with adjective or adverb",
 | |
|     "PTKANT": "answer particle",
 | |
|     "PTKNEG": "negative particle",
 | |
|     "PTKVZ": "separable verbal particle",
 | |
|     "PTKZU": '"zu" before infinitive',
 | |
|     "PWAT": "attributive interrogative pronoun",
 | |
|     "PWAV": "adverbial interrogative or relative pronoun",
 | |
|     "PWS": "substituting interrogative pronoun",
 | |
|     "TRUNC": "word remnant",
 | |
|     "VAFIN": "finite verb, auxiliary",
 | |
|     "VAIMP": "imperative, auxiliary",
 | |
|     "VAINF": "infinitive, auxiliary",
 | |
|     "VAPP": "perfect participle, auxiliary",
 | |
|     "VMFIN": "finite verb, modal",
 | |
|     "VMINF": "infinitive, modal",
 | |
|     "VMPP": "perfect participle, modal",
 | |
|     "VVFIN": "finite verb, full",
 | |
|     "VVIMP": "imperative, full",
 | |
|     "VVINF": "infinitive, full",
 | |
|     "VVIZU": 'infinitive with "zu", full',
 | |
|     "VVPP": "perfect participle, full",
 | |
|     "XY": "non-word containing non-letter",
 | |
|     # POS Tags (Chinese)
 | |
|     # OntoNotes / Chinese Penn Treebank
 | |
|     # https://repository.upenn.edu/cgi/viewcontent.cgi?article=1039&context=ircs_reports
 | |
|     "AD": "adverb",
 | |
|     "AS": "aspect marker",
 | |
|     "BA": "把 in ba-construction",
 | |
|     # "CD": "cardinal number",
 | |
|     "CS": "subordinating conjunction",
 | |
|     "DEC": "的 in a relative clause",
 | |
|     "DEG": "associative 的",
 | |
|     "DER": "得 in V-de const. and V-de-R",
 | |
|     "DEV": "地 before VP",
 | |
|     "ETC": "for words 等, 等等",
 | |
|     # "FW": "foreign words"
 | |
|     "IJ": "interjection",
 | |
|     # "JJ": "other noun-modifier",
 | |
|     "LB": "被 in long bei-const",
 | |
|     "LC": "localizer",
 | |
|     "M": "measure word",
 | |
|     "MSP": "other particle",
 | |
|     # "NN": "common noun",
 | |
|     "NR": "proper noun",
 | |
|     "NT": "temporal noun",
 | |
|     "OD": "ordinal number",
 | |
|     "ON": "onomatopoeia",
 | |
|     "P": "preposition excluding 把 and 被",
 | |
|     "PN": "pronoun",
 | |
|     "PU": "punctuation",
 | |
|     "SB": "被 in short bei-const",
 | |
|     # "SP": "sentence-final particle",
 | |
|     "VA": "predicative adjective",
 | |
|     "VC": "是 (copula)",
 | |
|     "VE": "有 as the main verb",
 | |
|     "VV": "other verb",
 | |
|     # Noun chunks
 | |
|     "NP": "noun phrase",
 | |
|     "PP": "prepositional phrase",
 | |
|     "VP": "verb phrase",
 | |
|     "ADVP": "adverb phrase",
 | |
|     "ADJP": "adjective phrase",
 | |
|     "SBAR": "subordinating conjunction",
 | |
|     "PRT": "particle",
 | |
|     "PNP": "prepositional noun phrase",
 | |
|     # Dependency Labels (English)
 | |
|     # ClearNLP / Universal Dependencies
 | |
|     # https://github.com/clir/clearnlp-guidelines/blob/master/md/specifications/dependency_labels.md
 | |
|     "acl": "clausal modifier of noun (adjectival clause)",
 | |
|     "acomp": "adjectival complement",
 | |
|     "advcl": "adverbial clause modifier",
 | |
|     "advmod": "adverbial modifier",
 | |
|     "agent": "agent",
 | |
|     "amod": "adjectival modifier",
 | |
|     "appos": "appositional modifier",
 | |
|     "attr": "attribute",
 | |
|     "aux": "auxiliary",
 | |
|     "auxpass": "auxiliary (passive)",
 | |
|     "case": "case marking",
 | |
|     "cc": "coordinating conjunction",
 | |
|     "ccomp": "clausal complement",
 | |
|     "clf": "classifier",
 | |
|     "complm": "complementizer",
 | |
|     "compound": "compound",
 | |
|     "conj": "conjunct",
 | |
|     "cop": "copula",
 | |
|     "csubj": "clausal subject",
 | |
|     "csubjpass": "clausal subject (passive)",
 | |
|     "dative": "dative",
 | |
|     "dep": "unclassified dependent",
 | |
|     "det": "determiner",
 | |
|     "discourse": "discourse element",
 | |
|     "dislocated": "dislocated elements",
 | |
|     "dobj": "direct object",
 | |
|     "expl": "expletive",
 | |
|     "fixed": "fixed multiword expression",
 | |
|     "flat": "flat multiword expression",
 | |
|     "goeswith": "goes with",
 | |
|     "hmod": "modifier in hyphenation",
 | |
|     "hyph": "hyphen",
 | |
|     "infmod": "infinitival modifier",
 | |
|     "intj": "interjection",
 | |
|     "iobj": "indirect object",
 | |
|     "list": "list",
 | |
|     "mark": "marker",
 | |
|     "meta": "meta modifier",
 | |
|     "neg": "negation modifier",
 | |
|     "nmod": "modifier of nominal",
 | |
|     "nn": "noun compound modifier",
 | |
|     "npadvmod": "noun phrase as adverbial modifier",
 | |
|     "nsubj": "nominal subject",
 | |
|     "nsubjpass": "nominal subject (passive)",
 | |
|     "nounmod": "modifier of nominal",
 | |
|     "npmod": "noun phrase as adverbial modifier",
 | |
|     "num": "number modifier",
 | |
|     "number": "number compound modifier",
 | |
|     "nummod": "numeric modifier",
 | |
|     "oprd": "object predicate",
 | |
|     "obj": "object",
 | |
|     "obl": "oblique nominal",
 | |
|     "orphan": "orphan",
 | |
|     "parataxis": "parataxis",
 | |
|     "partmod": "participal modifier",
 | |
|     "pcomp": "complement of preposition",
 | |
|     "pobj": "object of preposition",
 | |
|     "poss": "possession modifier",
 | |
|     "possessive": "possessive modifier",
 | |
|     "preconj": "pre-correlative conjunction",
 | |
|     "prep": "prepositional modifier",
 | |
|     "prt": "particle",
 | |
|     "punct": "punctuation",
 | |
|     "quantmod": "modifier of quantifier",
 | |
|     "rcmod": "relative clause modifier",
 | |
|     "relcl": "relative clause modifier",
 | |
|     "reparandum": "overridden disfluency",
 | |
|     "root": "root",
 | |
|     "ROOT": "root",
 | |
|     "vocative": "vocative",
 | |
|     "xcomp": "open clausal complement",
 | |
|     # Dependency labels (German)
 | |
|     # TIGER Treebank
 | |
|     # http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/tiger_introduction.pdf
 | |
|     # currently missing: 'cc' (comparative complement) because of conflict
 | |
|     # with English labels
 | |
|     "ac": "adpositional case marker",
 | |
|     "adc": "adjective component",
 | |
|     "ag": "genitive attribute",
 | |
|     "ams": "measure argument of adjective",
 | |
|     "app": "apposition",
 | |
|     "avc": "adverbial phrase component",
 | |
|     "cd": "coordinating conjunction",
 | |
|     "cj": "conjunct",
 | |
|     "cm": "comparative conjunction",
 | |
|     "cp": "complementizer",
 | |
|     "cvc": "collocational verb construction",
 | |
|     "da": "dative",
 | |
|     "dh": "discourse-level head",
 | |
|     "dm": "discourse marker",
 | |
|     "ep": "expletive es",
 | |
|     "hd": "head",
 | |
|     "ju": "junctor",
 | |
|     "mnr": "postnominal modifier",
 | |
|     "mo": "modifier",
 | |
|     "ng": "negation",
 | |
|     "nk": "noun kernel element",
 | |
|     "nmc": "numerical component",
 | |
|     "oa": "accusative object",
 | |
|     "oc": "clausal object",
 | |
|     "og": "genitive object",
 | |
|     "op": "prepositional object",
 | |
|     "par": "parenthetical element",
 | |
|     "pd": "predicate",
 | |
|     "pg": "phrasal genitive",
 | |
|     "ph": "placeholder",
 | |
|     "pm": "morphological particle",
 | |
|     "pnc": "proper noun component",
 | |
|     "rc": "relative clause",
 | |
|     "re": "repeated element",
 | |
|     "rs": "reported speech",
 | |
|     "sb": "subject",
 | |
|     "sbp": "passivized subject (PP)",
 | |
|     "sp": "subject or predicate",
 | |
|     "svp": "separable verb prefix",
 | |
|     "uc": "unit component",
 | |
|     "vo": "vocative",
 | |
|     # Named Entity Recognition
 | |
|     # OntoNotes 5
 | |
|     # https://catalog.ldc.upenn.edu/docs/LDC2013T19/OntoNotes-Release-5.0.pdf
 | |
|     "PERSON": "People, including fictional",
 | |
|     "NORP": "Nationalities or religious or political groups",
 | |
|     "FACILITY": "Buildings, airports, highways, bridges, etc.",
 | |
|     "FAC": "Buildings, airports, highways, bridges, etc.",
 | |
|     "ORG": "Companies, agencies, institutions, etc.",
 | |
|     "GPE": "Countries, cities, states",
 | |
|     "LOC": "Non-GPE locations, mountain ranges, bodies of water",
 | |
|     "PRODUCT": "Objects, vehicles, foods, etc. (not services)",
 | |
|     "EVENT": "Named hurricanes, battles, wars, sports events, etc.",
 | |
|     "WORK_OF_ART": "Titles of books, songs, etc.",
 | |
|     "LAW": "Named documents made into laws.",
 | |
|     "LANGUAGE": "Any named language",
 | |
|     "DATE": "Absolute or relative dates or periods",
 | |
|     "TIME": "Times smaller than a day",
 | |
|     "PERCENT": 'Percentage, including "%"',
 | |
|     "MONEY": "Monetary values, including unit",
 | |
|     "QUANTITY": "Measurements, as of weight or distance",
 | |
|     "ORDINAL": '"first", "second", etc.',
 | |
|     "CARDINAL": "Numerals that do not fall under another type",
 | |
|     # Named Entity Recognition
 | |
|     # Wikipedia
 | |
|     # http://www.sciencedirect.com/science/article/pii/S0004370212000276
 | |
|     # https://pdfs.semanticscholar.org/5744/578cc243d92287f47448870bb426c66cc941.pdf
 | |
|     "PER": "Named person or family.",
 | |
|     "MISC": "Miscellaneous entities, e.g. events, nationalities, products or works of art",
 | |
|     # https://github.com/ltgoslo/norne
 | |
|     "EVT": "Festivals, cultural events, sports events, weather phenomena, wars, etc.",
 | |
|     "PROD": "Product, i.e. artificially produced entities including speeches, radio shows, programming languages, contracts, laws and ideas",
 | |
|     "DRV": "Words (and phrases?) that are dervied from a name, but not a name in themselves, e.g. 'Oslo-mannen' ('the man from Oslo')",
 | |
|     "GPE_LOC": "Geo-political entity, with a locative sense, e.g. 'John lives in Spain'",
 | |
|     "GPE_ORG": "Geo-political entity, with an organisation sense, e.g. 'Spain declined to meet with Belgium'",
 | |
| }
 |