Fix tag map for German

This commit is contained in:
Ines Montani 2016-12-18 13:03:01 +01:00
parent 28326649f3
commit 0fc4e45cb3

View File

@ -9,63 +9,63 @@ from ..language_data import TOKENIZER_INFIXES
TAG_MAP = {
"$(": {TAG: PUNCT, "PunctType": "brck"},
"$,": {TAG: PUNCT, "PunctType": "comm"},
"$.": {TAG: PUNCT, "PunctType": "peri"},
"ADJA": {TAG: ADJ},
"ADJD": {TAG: ADJ, "Variant": "short"},
"ADV": {TAG: ADV},
"APPO": {TAG: ADP, "AdpType": "post"},
"APPR": {TAG: ADP, "AdpType": "prep"},
"APPRART": {TAG: ADP, "AdpType": "prep", "PronType": "art"},
"APZR": {TAG: ADP, "AdpType": "circ"},
"ART": {TAG: DET, "PronType": "art"},
"CARD": {TAG: NUM, "NumType": "card"},
"FM": {TAG: X, "Foreign": "yes"},
"ITJ": {TAG: INTJ},
"KOKOM": {TAG: CONJ, "ConjType": "comp"},
"KON": {TAG: CONJ},
"KOUI": {TAG: SCONJ},
"KOUS": {TAG: SCONJ},
"NE": {TAG: PROPN},
"NNE": {TAG: PROPN},
"NN": {TAG: NOUN},
"PAV": {TAG: ADV, "PronType": "dem"},
"PROAV": {TAG: ADV, "PronType": "dem"},
"PDAT": {TAG: DET, "PronType": "dem"},
"PDS": {TAG: PRON, "PronType": "dem"},
"PIAT": {TAG: DET, "PronType": "ind|neg|tot"},
"PIDAT": {TAG: DET, "AdjType": "pdt", "PronType": "ind|neg|tot"},
"PIS": {TAG: PRON, "PronType": "ind|neg|tot"},
"PPER": {TAG: PRON, "PronType": "prs"},
"PPOSAT": {TAG: DET, "Poss": "yes", "PronType": "prs"},
"PPOSS": {TAG: PRON, "Poss": "yes", "PronType": "prs"},
"PRELAT": {TAG: DET, "PronType": "rel"},
"PRELS": {TAG: PRON, "PronType": "rel"},
"PRF": {TAG: PRON, "PronType": "prs", "Reflex": "yes"},
"PTKA": {TAG: PART},
"PTKANT": {TAG: PART, "PartType": "res"},
"PTKNEG": {TAG: PART, "Negative": "yes"},
"PTKVZ": {TAG: PART, "PartType": "vbp"},
"PTKZU": {TAG: PART, "PartType": "inf"},
"PWAT": {TAG: DET, "PronType": "int"},
"PWAV": {TAG: ADV, "PronType": "int"},
"PWS": {TAG: PRON, "PronType": "int"},
"TRUNC": {TAG: X, "Hyph": "yes"},
"VAFIN": {TAG: AUX, "Mood": "ind", "VerbForm": "fin"},
"VAIMP": {TAG: AUX, "Mood": "imp", "VerbForm": "fin"},
"VAINF": {TAG: AUX, "VerbForm": "inf"},
"VAPP": {TAG: AUX, "Aspect": "perf", "VerbForm": "part"},
"VMFIN": {TAG: VERB, "Mood": "ind", "VerbForm": "fin", "VerbType": "mod"},
"VMINF": {TAG: VERB, "VerbForm": "inf", "VerbType": "mod"},
"VMPP": {TAG: VERB, "Aspect": "perf", "VerbForm": "part", "VerbType": "mod"},
"VVFIN": {TAG: VERB, "Mood": "ind", "VerbForm": "fin"},
"VVIMP": {TAG: VERB, "Mood": "imp", "VerbForm": "fin"},
"VVINF": {TAG: VERB, "VerbForm": "inf"},
"VVIZU": {TAG: VERB, "VerbForm": "inf"},
"VVPP": {TAG: VERB, "Aspect": "perf", "VerbForm": "part"},
"XY": {TAG: X},
"SP": {TAG: SPACE}
"$(": {POS: PUNCT, "PunctType": "brck"},
"$,": {POS: PUNCT, "PunctType": "comm"},
"$.": {POS: PUNCT, "PunctType": "peri"},
"ADJA": {POS: ADJ},
"ADJD": {POS: ADJ, "Variant": "short"},
"ADV": {POS: ADV},
"APPO": {POS: ADP, "AdpType": "post"},
"APPR": {POS: ADP, "AdpType": "prep"},
"APPRART": {POS: ADP, "AdpType": "prep", "PronType": "art"},
"APZR": {POS: ADP, "AdpType": "circ"},
"ART": {POS: DET, "PronType": "art"},
"CARD": {POS: NUM, "NumType": "card"},
"FM": {POS: X, "Foreign": "yes"},
"ITJ": {POS: INTJ},
"KOKOM": {POS: CONJ, "ConjType": "comp"},
"KON": {POS: CONJ},
"KOUI": {POS: SCONJ},
"KOUS": {POS: SCONJ},
"NE": {POS: PROPN},
"NNE": {POS: PROPN},
"NN": {POS: NOUN},
"PAV": {POS: ADV, "PronType": "dem"},
"PROAV": {POS: ADV, "PronType": "dem"},
"PDAT": {POS: DET, "PronType": "dem"},
"PDS": {POS: PRON, "PronType": "dem"},
"PIAT": {POS: DET, "PronType": "ind|neg|tot"},
"PIDAT": {POS: DET, "AdjType": "pdt", "PronType": "ind|neg|tot"},
"PIS": {POS: PRON, "PronType": "ind|neg|tot"},
"PPER": {POS: PRON, "PronType": "prs"},
"PPOSAT": {POS: DET, "Poss": "yes", "PronType": "prs"},
"PPOSS": {POS: PRON, "Poss": "yes", "PronType": "prs"},
"PRELAT": {POS: DET, "PronType": "rel"},
"PRELS": {POS: PRON, "PronType": "rel"},
"PRF": {POS: PRON, "PronType": "prs", "Reflex": "yes"},
"PTKA": {POS: PART},
"PTKANT": {POS: PART, "PartType": "res"},
"PTKNEG": {POS: PART, "Negative": "yes"},
"PTKVZ": {POS: PART, "PartType": "vbp"},
"PTKZU": {POS: PART, "PartType": "inf"},
"PWAT": {POS: DET, "PronType": "int"},
"PWAV": {POS: ADV, "PronType": "int"},
"PWS": {POS: PRON, "PronType": "int"},
"TRUNC": {POS: X, "Hyph": "yes"},
"VAFIN": {POS: AUX, "Mood": "ind", "VerbForm": "fin"},
"VAIMP": {POS: AUX, "Mood": "imp", "VerbForm": "fin"},
"VAINF": {POS: AUX, "VerbForm": "inf"},
"VAPP": {POS: AUX, "Aspect": "perf", "VerbForm": "part"},
"VMFIN": {POS: VERB, "Mood": "ind", "VerbForm": "fin", "VerbType": "mod"},
"VMINF": {POS: VERB, "VerbForm": "inf", "VerbType": "mod"},
"VMPP": {POS: VERB, "Aspect": "perf", "VerbForm": "part", "VerbType": "mod"},
"VVFIN": {POS: VERB, "Mood": "ind", "VerbForm": "fin"},
"VVIMP": {POS: VERB, "Mood": "imp", "VerbForm": "fin"},
"VVINF": {POS: VERB, "VerbForm": "inf"},
"VVIZU": {POS: VERB, "VerbForm": "inf"},
"VVPP": {POS: VERB, "Aspect": "perf", "VerbForm": "part"},
"XY": {POS: X},
"SP": {POS: SPACE}
}