add missing Urdu tags

This commit is contained in:
Muhammad Irfan 2020-04-05 20:55:38 +05:00
parent beef184e53
commit 406d5748b3

View File

@ -1,66 +1,94 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON, AUX, SCONJ
from ...symbols import POS, PUNCT, SYM, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB from ...symbols import POS, PUNCT, SYM, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON
TAG_MAP = { TAG_MAP = {
".": {POS: PUNCT, "PunctType": "peri"}, "JJ-Ez": {POS: ADJ},
",": {POS: PUNCT, "PunctType": "comm"}, "INJC": {POS: X},
"-LRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "ini"}, "QFC": {POS: DET},
"-RRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "fin"}, "UNK": {POS: X},
"``": {POS: PUNCT, "PunctType": "quot", "PunctSide": "ini"}, "NSTC": {POS: ADV},
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, "NST": {POS: ADV},
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, "VMC": {POS: VERB},
"PRPC": {POS: PRON},
"RBC": {POS: ADV},
"PSPC": {POS: ADP},
"INJ": {POS: X},
"JJZ": {POS: ADJ},
"CCC": {POS: SCONJ},
"NN-Ez": {POS: NOUN},
"ECH": {POS: NOUN},
"WQ": {POS: DET},
"RDP": {POS: ADJ},
"JJC": {POS: ADJ},
"NEG": {POS: PART},
"NNZ": {POS: NOUN},
"QO": {POS: ADJ},
"INTFC": {POS: ADV},
"INTF": {POS: ADV},
"NFC": {POS: ADP},
"QCC": {POS: NUM},
"QC": {POS: NUM},
"QF": {POS: DET},
"VAUX": {POS: AUX},
"VM": {POS: VERB},
"DEM": {POS: DET},
"NNPC": {POS: PROPN},
"NNC": {POS: NOUN},
"PSP": {POS: ADP},
".": {POS: PUNCT},
",": {POS: PUNCT},
"-LRB-": {POS: PUNCT},
"-RRB-": {POS: PUNCT},
"``": {POS: PUNCT},
'""': {POS: PUNCT},
"''": {POS: PUNCT},
":": {POS: PUNCT}, ":": {POS: PUNCT},
"$": {POS: SYM, "Other": {"SymType": "currency"}}, "$": {POS: SYM},
"#": {POS: SYM, "Other": {"SymType": "numbersign"}}, "#": {POS: SYM},
"AFX": {POS: ADJ, "Hyph": "yes"}, "AFX": {POS: ADJ},
"CC": {POS: CCONJ, "ConjType": "coor"}, "CC": {POS: CCONJ},
"CD": {POS: NUM, "NumType": "card"}, "CD": {POS: NUM},
"DT": {POS: DET}, "DT": {POS: DET},
"EX": {POS: ADV, "AdvType": "ex"}, "EX": {POS: ADV},
"FW": {POS: X, "Foreign": "yes"}, "FW": {POS: X},
"HYPH": {POS: PUNCT, "PunctType": "dash"}, "HYPH": {POS: PUNCT},
"IN": {POS: ADP}, "IN": {POS: ADP},
"JJ": {POS: ADJ, "Degree": "pos"}, "JJ": {POS: ADJ},
"JJR": {POS: ADJ, "Degree": "comp"}, "JJR": {POS: ADJ},
"JJS": {POS: ADJ, "Degree": "sup"}, "JJS": {POS: ADJ},
"LS": {POS: PUNCT, "NumType": "ord"}, "LS": {POS: PUNCT},
"MD": {POS: VERB, "VerbType": "mod"}, "MD": {POS: VERB},
"NIL": {POS: ""}, "NIL": {POS: ""},
"NN": {POS: NOUN, "Number": "sing"}, "NN": {POS: NOUN},
"NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"}, "NNP": {POS: PROPN},
"NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"}, "NNPS": {POS: PROPN},
"NNS": {POS: NOUN, "Number": "plur"}, "NNS": {POS: NOUN},
"PDT": {POS: ADJ, "AdjType": "pdt", "PronType": "prn"}, "PDT": {POS: ADJ},
"POS": {POS: PART, "Poss": "yes"}, "POS": {POS: PART},
"PRP": {POS: PRON, "PronType": "prs"}, "PRP": {POS: PRON},
"PRP$": {POS: ADJ, "PronType": "prs", "Poss": "yes"}, "PRP$": {POS: ADJ},
"RB": {POS: ADV, "Degree": "pos"}, "RB": {POS: ADV},
"RBR": {POS: ADV, "Degree": "comp"}, "RBR": {POS: ADV},
"RBS": {POS: ADV, "Degree": "sup"}, "RBS": {POS: ADV},
"RP": {POS: PART}, "RP": {POS: PART},
"SP": {POS: SPACE}, "SP": {POS: SPACE},
"SYM": {POS: SYM}, "SYM": {POS: SYM},
"TO": {POS: PART, "PartType": "inf", "VerbForm": "inf"}, "TO": {POS: PART},
"UH": {POS: INTJ}, "UH": {POS: INTJ},
"VB": {POS: VERB, "VerbForm": "inf"}, "VB": {POS: VERB},
"VBD": {POS: VERB, "VerbForm": "fin", "Tense": "past"}, "VBD": {POS: VERB},
"VBG": {POS: VERB, "VerbForm": "part", "Tense": "pres", "Aspect": "prog"}, "VBG": {POS: VERB},
"VBN": {POS: VERB, "VerbForm": "part", "Tense": "past", "Aspect": "perf"}, "VBN": {POS: VERB},
"VBP": {POS: VERB, "VerbForm": "fin", "Tense": "pres"}, "VBP": {POS: VERB},
"VBZ": { "VBZ": {POS: VERB},
POS: VERB, "WDT": {POS: ADJ},
"VerbForm": "fin", "WP": {POS: NOUN},
"Tense": "pres", "WP$": {POS: ADJ},
"Number": "sing", "WRB": {POS: ADV},
"Person": 3,
},
"WDT": {POS: ADJ, "PronType": "int|rel"},
"WP": {POS: NOUN, "PronType": "int|rel"},
"WP$": {POS: ADJ, "Poss": "yes", "PronType": "int|rel"},
"WRB": {POS: ADV, "PronType": "int|rel"},
"ADD": {POS: X}, "ADD": {POS: X},
"NFP": {POS: PUNCT}, "NFP": {POS: PUNCT},
"GW": {POS: X}, "GW": {POS: X},