mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* `auxillary` -> `auxiliary` * `consistute` -> `constitute` * `earlist` -> `earliest` * `prefered` -> `preferred` * `direcory` -> `directory` * `reuseable` -> `reusable` * `idiosyncracies` -> `idiosyncrasies` * `enviroment` -> `environment` * `unecessary` -> `unnecessary` * `yesteday` -> `yesterday` * `resouces` -> `resources`
		
			
				
	
	
		
			295 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			295 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# coding: utf8
 | 
						|
from __future__ import unicode_literals
 | 
						|
 | 
						|
 | 
						|
def explain(term):
 | 
						|
    if term in GLOSSARY:
 | 
						|
        return GLOSSARY[term]
 | 
						|
 | 
						|
 | 
						|
GLOSSARY = {
 | 
						|
    # POS tags
 | 
						|
    # Universal POS Tags
 | 
						|
    # http://universaldependencies.org/u/pos/
 | 
						|
 | 
						|
    'ADJ':          'adjective',
 | 
						|
    'ADP':          'adposition',
 | 
						|
    'ADV':          'adverb',
 | 
						|
    'AUX':          'auxiliary',
 | 
						|
    'CONJ':         'conjunction',
 | 
						|
    'CCONJ':        'coordinating conjunction',
 | 
						|
    'DET':          'determiner',
 | 
						|
    'INTJ':         'interjection',
 | 
						|
    'NOUN':         'noun',
 | 
						|
    'NUM':          'numeral',
 | 
						|
    'PART':         'particle',
 | 
						|
    'PRON':         'pronoun',
 | 
						|
    'PROPN':        'proper noun',
 | 
						|
    'PUNCT':        'punctuation',
 | 
						|
    'SCONJ':        'subordinating conjunction',
 | 
						|
    'SYM':          'symbol',
 | 
						|
    'VERB':         'verb',
 | 
						|
    'X':            'other',
 | 
						|
    'EOL':          'end of line',
 | 
						|
    'SPACE':        'space',
 | 
						|
 | 
						|
 | 
						|
    # POS tags (English)
 | 
						|
    # OntoNotes 5 / Penn Treebank
 | 
						|
    # https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
 | 
						|
 | 
						|
    '.':            'punctuation mark, sentence closer',
 | 
						|
    ',':            'punctuation mark, comma',
 | 
						|
    '-LRB-':        'left round bracket',
 | 
						|
    '-RRB-':        'right round bracket',
 | 
						|
    '``':           'opening quotation mark',
 | 
						|
    '""':           'closing quotation mark',
 | 
						|
    "''":           'closing quotation mark',
 | 
						|
    ':':            'punctuation mark, colon or ellipsis',
 | 
						|
    '$':            'symbol, currency',
 | 
						|
    '#':            'symbol, number sign',
 | 
						|
    'AFX':          'affix',
 | 
						|
    'CC':           'conjunction, coordinating',
 | 
						|
    'CD':           'cardinal number',
 | 
						|
    'DT':           'determiner',
 | 
						|
    'EX':           'existential there',
 | 
						|
    'FW':           'foreign word',
 | 
						|
    'HYPH':         'punctuation mark, hyphen',
 | 
						|
    'IN':           'conjunction, subordinating or preposition',
 | 
						|
    'JJ':           'adjective',
 | 
						|
    'JJR':          'adjective, comparative',
 | 
						|
    'JJS':          'adjective, superlative',
 | 
						|
    'LS':           'list item marker',
 | 
						|
    'MD':           'verb, modal auxiliary',
 | 
						|
    'NIL':          'missing tag',
 | 
						|
    'NN':           'noun, singular or mass',
 | 
						|
    'NNP':          'noun, proper singular',
 | 
						|
    'NNPS':         'noun, proper plural',
 | 
						|
    'NNS':          'noun, plural',
 | 
						|
    'PDT':          'predeterminer',
 | 
						|
    'POS':          'possessive ending',
 | 
						|
    'PRP':          'pronoun, personal',
 | 
						|
    'PRP$':         'pronoun, possessive',
 | 
						|
    'RB':           'adverb',
 | 
						|
    'RBR':          'adverb, comparative',
 | 
						|
    'RBS':          'adverb, superlative',
 | 
						|
    'RP':           'adverb, particle',
 | 
						|
    'TO':           'infinitival to',
 | 
						|
    'UH':           'interjection',
 | 
						|
    'VB':           'verb, base form',
 | 
						|
    'VBD':          'verb, past tense',
 | 
						|
    'VBG':          'verb, gerund or present participle',
 | 
						|
    'VBN':          'verb, past participle',
 | 
						|
    'VBP':          'verb, non-3rd person singular present',
 | 
						|
    'VBZ':          'verb, 3rd person singular present',
 | 
						|
    'WDT':          'wh-determiner',
 | 
						|
    'WP':           'wh-pronoun, personal',
 | 
						|
    'WP$':          'wh-pronoun, possessive',
 | 
						|
    'WRB':          'wh-adverb',
 | 
						|
    'SP':           'space',
 | 
						|
    'ADD':          'email',
 | 
						|
    'NFP':          'superfluous punctuation',
 | 
						|
    'GW':           'additional word in multi-word expression',
 | 
						|
    'XX':           'unknown',
 | 
						|
    'BES':          'auxiliary "be"',
 | 
						|
    'HVS':          'forms of "have"',
 | 
						|
 | 
						|
 | 
						|
    # POS Tags (German)
 | 
						|
    # TIGER Treebank
 | 
						|
    # http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/tiger_introduction.pdf
 | 
						|
 | 
						|
    '$(':           'other sentence-internal punctuation mark',
 | 
						|
    '$,':           'comma',
 | 
						|
    '$.':           'sentence-final punctuation mark',
 | 
						|
    'ADJA':         'adjective, attributive',
 | 
						|
    'ADJD':         'adjective, adverbial or predicative',
 | 
						|
    'APPO':         'postposition',
 | 
						|
    'APRP':         'preposition; circumposition left',
 | 
						|
    'APPRART':      'preposition with article',
 | 
						|
    'APZR':         'circumposition right',
 | 
						|
    'ART':          'definite or indefinite article',
 | 
						|
    'CARD':         'cardinal number',
 | 
						|
    'FM':           'foreign language material',
 | 
						|
    'ITJ':          'interjection',
 | 
						|
    'KOKOM':        'comparative conjunction',
 | 
						|
    'KON':          'coordinate conjunction',
 | 
						|
    'KOUI':         'subordinate conjunction with "zu" and infinitive',
 | 
						|
    'KOUS':         'subordinate conjunction with sentence',
 | 
						|
    'NE':           'proper noun',
 | 
						|
    'NNE':          'proper noun',
 | 
						|
    'PAV':          'pronominal adverb',
 | 
						|
    'PROAV':        'pronominal adverb',
 | 
						|
    'PDAT':         'attributive demonstrative pronoun',
 | 
						|
    'PDS':          'substituting demonstrative pronoun',
 | 
						|
    'PIAT':         'attributive indefinite pronoun without determiner',
 | 
						|
    'PIDAT':        'attributive indefinite pronoun with determiner',
 | 
						|
    'PIS':          'substituting indefinite pronoun',
 | 
						|
    'PPER':         'non-reflexive personal pronoun',
 | 
						|
    'PPOSAT':       'attributive possessive pronoun',
 | 
						|
    'PPOSS':        'substituting possessive pronoun',
 | 
						|
    'PRELAT':       'attributive relative pronoun',
 | 
						|
    'PRELS':        'substituting relative pronoun',
 | 
						|
    'PRF':          'reflexive personal pronoun',
 | 
						|
    'PTKA':         'particle with adjective or adverb',
 | 
						|
    'PTKANT':       'answer particle',
 | 
						|
    'PTKNEG':       'negative particle',
 | 
						|
    'PTKVZ':        'separable verbal particle',
 | 
						|
    'PTKZU':        '"zu" before infinitive',
 | 
						|
    'PWAT':         'attributive interrogative pronoun',
 | 
						|
    'PWAV':         'adverbial interrogative or relative pronoun',
 | 
						|
    'PWS':          'substituting interrogative pronoun',
 | 
						|
    'TRUNC':        'word remnant',
 | 
						|
    'VAFIN':        'finite verb, auxiliary',
 | 
						|
    'VAIMP':        'imperative, auxiliary',
 | 
						|
    'VAINF':        'infinitive, auxiliary',
 | 
						|
    'VAPP':         'perfect participle, auxiliary',
 | 
						|
    'VMFIN':        'finite verb, modal',
 | 
						|
    'VMINF':        'infinitive, modal',
 | 
						|
    'VMPP':         'perfect participle, modal',
 | 
						|
    'VVFIN':        'finite verb, full',
 | 
						|
    'VVIMP':        'imperative, full',
 | 
						|
    'VVINF':        'infinitive, full',
 | 
						|
    'VVIZU':        'infinitive with "zu", full',
 | 
						|
    'VVPP':         'perfect participle, full',
 | 
						|
    'XY':           'non-word containing non-letter',
 | 
						|
 | 
						|
 | 
						|
    # Noun chunks
 | 
						|
 | 
						|
    'NP':           'noun phrase',
 | 
						|
    'PP':           'prepositional phrase',
 | 
						|
    'VP':           'verb phrase',
 | 
						|
    'ADVP':         'adverb phrase',
 | 
						|
    'ADJP':         'adjective phrase',
 | 
						|
    'SBAR':         'subordinating conjunction',
 | 
						|
    'PRT':          'particle',
 | 
						|
    'PNP':          'prepositional noun phrase',
 | 
						|
 | 
						|
 | 
						|
    # Dependency Labels (English)
 | 
						|
    # ClearNLP / Universal Dependencies
 | 
						|
    # https://github.com/clir/clearnlp-guidelines/blob/master/md/specifications/dependency_labels.md
 | 
						|
 | 
						|
    'acomp':        'adjectival complement',
 | 
						|
    'advcl':        'adverbial clause modifier',
 | 
						|
    'advmod':       'adverbial modifier',
 | 
						|
    'agent':        'agent',
 | 
						|
    'amod':         'adjectival modifier',
 | 
						|
    'appos':        'appositional modifier',
 | 
						|
    'attr':         'attribute',
 | 
						|
    'aux':          'auxiliary',
 | 
						|
    'auxpass':      'auxiliary (passive)',
 | 
						|
    'cc':           'coordinating conjunction',
 | 
						|
    'ccomp':        'clausal complement',
 | 
						|
    'complm':       'complementizer',
 | 
						|
    'conj':         'conjunct',
 | 
						|
    'cop':          'copula',
 | 
						|
    'csubj':        'clausal subject',
 | 
						|
    'csubjpass':    'clausal subject (passive)',
 | 
						|
    'dep':          'unclassified dependent',
 | 
						|
    'det':          'determiner',
 | 
						|
    'dobj':         'direct object',
 | 
						|
    'expl':         'expletive',
 | 
						|
    'hmod':         'modifier in hyphenation',
 | 
						|
    'hyph':         'hyphen',
 | 
						|
    'infmod':       'infinitival modifier',
 | 
						|
    'intj':         'interjection',
 | 
						|
    'iobj':         'indirect object',
 | 
						|
    'mark':         'marker',
 | 
						|
    'meta':         'meta modifier',
 | 
						|
    'neg':          'negation modifier',
 | 
						|
    'nmod':         'modifier of nominal',
 | 
						|
    'nn':           'noun compound modifier',
 | 
						|
    'npadvmod':     'noun phrase as adverbial modifier',
 | 
						|
    'nsubj':        'nominal subject',
 | 
						|
    'nsubjpass':    'nominal subject (passive)',
 | 
						|
    'num':          'number modifier',
 | 
						|
    'number':       'number compound modifier',
 | 
						|
    'oprd':         'object predicate',
 | 
						|
    'obj':          'object',
 | 
						|
    'obl':          'oblique nominal',
 | 
						|
    'parataxis':    'parataxis',
 | 
						|
    'partmod':      'participal modifier',
 | 
						|
    'pcomp':        'complement of preposition',
 | 
						|
    'pobj':         'object of preposition',
 | 
						|
    'poss':         'possession modifier',
 | 
						|
    'possessive':   'possessive modifier',
 | 
						|
    'preconj':      'pre-correlative conjunction',
 | 
						|
    'prep':         'prepositional modifier',
 | 
						|
    'prt':          'particle',
 | 
						|
    'punct':        'punctuation',
 | 
						|
    'quantmod':     'modifier of quantifier',
 | 
						|
    'rcmod':        'relative clause modifier',
 | 
						|
    'root':         'root',
 | 
						|
    'xcomp':        'open clausal complement',
 | 
						|
 | 
						|
 | 
						|
    # Dependency labels (German)
 | 
						|
    # TIGER Treebank
 | 
						|
    # http://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/TIGERCorpus/annotation/tiger_introduction.pdf
 | 
						|
    # currently missing: 'cc' (comparative complement) because of conflict
 | 
						|
    # with English labels
 | 
						|
 | 
						|
    'ac':           'adpositional case marker',
 | 
						|
    'adc':          'adjective component',
 | 
						|
    'ag':           'genitive attribute',
 | 
						|
    'ams':          'measure argument of adjective',
 | 
						|
    'app':          'apposition',
 | 
						|
    'avc':          'adverbial phrase component',
 | 
						|
    'cd':           'coordinating conjunction',
 | 
						|
    'cj':           'conjunct',
 | 
						|
    'cm':           'comparative conjunction',
 | 
						|
    'cp':           'complementizer',
 | 
						|
    'cvc':          'collocational verb construction',
 | 
						|
    'da':           'dative',
 | 
						|
    'dh':           'discourse-level head',
 | 
						|
    'dm':           'discourse marker',
 | 
						|
    'ep':           'expletive es',
 | 
						|
    'hd':           'head',
 | 
						|
    'ju':           'junctor',
 | 
						|
    'mnr':          'postnominal modifier',
 | 
						|
    'mo':           'modifier',
 | 
						|
    'ng':           'negation',
 | 
						|
    'nk':           'noun kernel element',
 | 
						|
    'nmc':          'numerical component',
 | 
						|
    'oa':           'accusative object',
 | 
						|
    'oa':           'second accusative object',
 | 
						|
    'oc':           'clausal object',
 | 
						|
    'og':           'genitive object',
 | 
						|
    'op':           'prepositional object',
 | 
						|
    'par':          'parenthetical element',
 | 
						|
    'pd':           'predicate',
 | 
						|
    'pg':           'phrasal genitive',
 | 
						|
    'ph':           'placeholder',
 | 
						|
    'pm':           'morphological particle',
 | 
						|
    'pnc':          'proper noun component',
 | 
						|
    'rc':           'relative clause',
 | 
						|
    're':           'repeated element',
 | 
						|
    'rs':           'reported speech',
 | 
						|
    'sb':           'subject',
 | 
						|
 | 
						|
 | 
						|
    # Named Entity Recognition
 | 
						|
    # OntoNotes 5
 | 
						|
    # https://catalog.ldc.upenn.edu/docs/LDC2013T19/OntoNotes-Release-5.0.pdf
 | 
						|
 | 
						|
    'PERSON':       'People, including fictional',
 | 
						|
    'NORP':         'Nationalities or religious or political groups',
 | 
						|
    'FACILITY':     'Buildings, airports, highways, bridges, etc.',
 | 
						|
    'ORG':          'Companies, agencies, institutions, etc.',
 | 
						|
    'GPE':          'Countries, cities, states',
 | 
						|
    'LOC':          'Non-GPE locations, mountain ranges, bodies of water',
 | 
						|
    'PRODUCT':      'Objects, vehicles, foods, etc. (not services)',
 | 
						|
    'EVENT':        'Named hurricanes, battles, wars, sports events, etc.',
 | 
						|
    'WORK_OF_ART':  'Titles of books, songs, etc.',
 | 
						|
    'LANGUAGE':     'Any named language',
 | 
						|
    'DATE':         'Absolute or relative dates or periods',
 | 
						|
    'TIME':         'Times smaller than a day',
 | 
						|
    'PERCENT':      'Percentage, including "%"',
 | 
						|
    'MONEY':        'Monetary values, including unit',
 | 
						|
    'QUANTITY':     'Measurements, as of weight or distance',
 | 
						|
    'ORDINAL':      '"first", "second", etc.',
 | 
						|
    'CARDINAL':     'Numerals that do not fall under another type'
 | 
						|
}
 |