diff --git a/spacy/errors.py b/spacy/errors.py index 7393ddc07..e00df2c51 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -103,6 +103,9 @@ class Warnings(object): W027 = ("Found a large training file of {size} bytes. Note that it may " "be more efficient to split your training data into multiple " "smaller JSON files instead.") + W028 = ("Skipping unsupported morphological feature(s): {feature}. " + "Provide features as a dict {{\"Field1\": \"Value1,Value2\"}} or " + "string \"Field1=Value1,Value2|Field2=Value3\".") @add_codes diff --git a/spacy/lang/bn/tag_map.py b/spacy/lang/bn/tag_map.py index 36d69ccf9..bc4c5ef6b 100644 --- a/spacy/lang/bn/tag_map.py +++ b/spacy/lang/bn/tag_map.py @@ -11,8 +11,8 @@ TAG_MAP = { '""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, "''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, ":": {POS: PUNCT}, - "৳": {POS: SYM, "Other": {"SymType": "currency"}}, - "#": {POS: SYM, "Other": {"SymType": "numbersign"}}, + "৳": {POS: SYM, "SymType": "currency"}, + "#": {POS: SYM, "SymType": "numbersign"}, "AFX": {POS: ADJ, "Hyph": "yes"}, "CC": {POS: CONJ, "ConjType": "coor"}, "CD": {POS: NUM, "NumType": "card"}, diff --git a/spacy/lang/ca/tag_map.py b/spacy/lang/ca/tag_map.py deleted file mode 100644 index 1ecbddc49..000000000 --- a/spacy/lang/ca/tag_map.py +++ /dev/null @@ -1,25 +0,0 @@ -from ..symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ -from ..symbols import PUNCT, NUM, AUX, X, CONJ, ADJ, VERB, PART, SPACE, CCONJ - - -TAG_MAP = { - "ADV": {POS: ADV}, - "NOUN": {POS: NOUN}, - "ADP": {POS: ADP}, - "PRON": {POS: PRON}, - "SCONJ": {POS: SCONJ}, - "PROPN": {POS: PROPN}, - "DET": {POS: DET}, - "SYM": {POS: SYM}, - "INTJ": {POS: INTJ}, - "PUNCT": {POS: PUNCT}, - "NUM": {POS: NUM}, - "AUX": {POS: AUX}, - "X": {POS: X}, - "CONJ": {POS: CONJ}, - "CCONJ": {POS: CCONJ}, - "ADJ": {POS: ADJ}, - "VERB": {POS: VERB}, - "PART": {POS: PART}, - "SP": {POS: SPACE}, -} diff --git a/spacy/lang/da/__init__.py b/spacy/lang/da/__init__.py index 2828c014b..6d1e33986 100644 --- a/spacy/lang/da/__init__.py +++ b/spacy/lang/da/__init__.py @@ -4,7 +4,6 @@ from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES from .stop_words import STOP_WORDS from .lex_attrs import LEX_ATTRS from .morph_rules import MORPH_RULES -from ..tag_map import TAG_MAP from ..tokenizer_exceptions import BASE_EXCEPTIONS from ..norm_exceptions import BASE_NORMS @@ -24,7 +23,6 @@ class DanishDefaults(Language.Defaults): morph_rules = MORPH_RULES infixes = TOKENIZER_INFIXES suffixes = TOKENIZER_SUFFIXES - tag_map = TAG_MAP stop_words = STOP_WORDS diff --git a/spacy/lang/el/tag_map.py b/spacy/lang/el/tag_map.py index adfacd025..f37f84c57 100644 --- a/spacy/lang/el/tag_map.py +++ b/spacy/lang/el/tag_map.py @@ -656,7 +656,7 @@ TAG_MAP = { "Gender": "Fem", "Number": "Plur", "Case": "Acc", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfFePlGe": { POS: DET, @@ -664,7 +664,7 @@ TAG_MAP = { "Gender": "Fem", "Number": "Plur", "Case": "Gen", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfFePlNm": { POS: DET, @@ -672,7 +672,7 @@ TAG_MAP = { "Gender": "Fem", "Number": "Plur", "Case": "Nom", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfFeSgAc": { POS: DET, @@ -680,7 +680,7 @@ TAG_MAP = { "Gender": "Fem", "Number": "Sing", "Case": "Acc", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfFeSgDa": { POS: DET, @@ -688,7 +688,7 @@ TAG_MAP = { "Gender": "Fem", "Number": "Sing", "Case": "Dat", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfFeSgGe": { POS: DET, @@ -696,7 +696,7 @@ TAG_MAP = { "Gender": "Fem", "Number": "Sing", "Case": "Gen", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfFeSgNm": { POS: DET, @@ -704,7 +704,7 @@ TAG_MAP = { "Gender": "Fem", "Number": "Sing", "Case": "Nom", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfMaPlAc": { POS: DET, @@ -712,7 +712,7 @@ TAG_MAP = { "Gender": "Masc", "Number": "Plur", "Case": "Acc", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfMaPlGe": { POS: DET, @@ -720,7 +720,7 @@ TAG_MAP = { "Gender": "Masc", "Number": "Plur", "Case": "Gen", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfMaPlNm": { POS: DET, @@ -728,7 +728,7 @@ TAG_MAP = { "Gender": "Masc", "Number": "Plur", "Case": "Nom", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfMaSgAc": { POS: DET, @@ -736,7 +736,7 @@ TAG_MAP = { "Gender": "Masc", "Number": "Sing", "Case": "Acc", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfMaSgDa": { POS: DET, @@ -744,7 +744,7 @@ TAG_MAP = { "Gender": "Masc", "Number": "Sing", "Case": "Dat", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfMaSgGe": { POS: DET, @@ -752,7 +752,7 @@ TAG_MAP = { "Gender": "Masc", "Number": "Sing", "Case": "Gen", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfMaSgNm": { POS: DET, @@ -760,7 +760,7 @@ TAG_MAP = { "Gender": "Masc", "Number": "Sing", "Case": "Nom", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfNePlAc": { POS: DET, @@ -768,7 +768,7 @@ TAG_MAP = { "Gender": "Neut", "Number": "Plur", "Case": "Acc", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfNePlDa": { POS: DET, @@ -776,7 +776,7 @@ TAG_MAP = { "Gender": "Neut", "Number": "Plur", "Case": "Dat", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfNePlGe": { POS: DET, @@ -784,7 +784,7 @@ TAG_MAP = { "Gender": "Neut", "Number": "Plur", "Case": "Gen", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfNePlNm": { POS: DET, @@ -792,7 +792,7 @@ TAG_MAP = { "Gender": "Neut", "Number": "Plur", "Case": "Nom", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfNeSgAc": { POS: DET, @@ -800,7 +800,7 @@ TAG_MAP = { "Gender": "Neut", "Number": "Sing", "Case": "Acc", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfNeSgDa": { POS: DET, @@ -808,7 +808,7 @@ TAG_MAP = { "Gender": "Neut", "Number": "Sing", "Case": "Dat", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfNeSgGe": { POS: DET, @@ -816,7 +816,7 @@ TAG_MAP = { "Gender": "Neut", "Number": "Sing", "Case": "Gen", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtDfNeSgNm": { POS: DET, @@ -824,7 +824,7 @@ TAG_MAP = { "Gender": "Neut", "Number": "Sing", "Case": "Nom", - "Other": {"Definite": "Def"}, + "Definite": "Def", }, "AtIdFeSgAc": { POS: DET, @@ -832,7 +832,7 @@ TAG_MAP = { "Gender": "Fem", "Number": "Sing", "Case": "Acc", - "Other": {"Definite": "Ind"}, + "Definite": "Ind", }, "AtIdFeSgDa": { POS: DET, @@ -840,7 +840,7 @@ TAG_MAP = { "Gender": "Fem", "Number": "Sing", "Case": "Dat", - "Other": {"Definite": "Ind"}, + "Definite": "Ind", }, "AtIdFeSgGe": { POS: DET, @@ -848,7 +848,7 @@ TAG_MAP = { "Gender": "Fem", "Number": "Sing", "Case": "Gen", - "Other": {"Definite": "Ind"}, + "Definite": "Ind", }, "AtIdFeSgNm": { POS: DET, @@ -856,7 +856,7 @@ TAG_MAP = { "Gender": "Fem", "Number": "Sing", "Case": "Nom", - "Other": {"Definite": "Ind"}, + "Definite": "Ind", }, "AtIdMaSgAc": { POS: DET, @@ -864,7 +864,7 @@ TAG_MAP = { "Gender": "Masc", "Number": "Sing", "Case": "Acc", - "Other": {"Definite": "Ind"}, + "Definite": "Ind", }, "AtIdMaSgGe": { POS: DET, @@ -872,7 +872,7 @@ TAG_MAP = { "Gender": "Masc", "Number": "Sing", "Case": "Gen", - "Other": {"Definite": "Ind"}, + "Definite": "Ind", }, "AtIdMaSgNm": { POS: DET, @@ -880,7 +880,7 @@ TAG_MAP = { "Gender": "Masc", "Number": "Sing", "Case": "Nom", - "Other": {"Definite": "Ind"}, + "Definite": "Ind", }, "AtIdNeSgAc": { POS: DET, @@ -888,7 +888,7 @@ TAG_MAP = { "Gender": "Neut", "Number": "Sing", "Case": "Acc", - "Other": {"Definite": "Ind"}, + "Definite": "Ind", }, "AtIdNeSgGe": { POS: DET, @@ -896,7 +896,7 @@ TAG_MAP = { "Gender": "Neut", "Number": "Sing", "Case": "Gen", - "Other": {"Definite": "Ind"}, + "Definite": "Ind", }, "AtIdNeSgNm": { POS: DET, @@ -904,7 +904,7 @@ TAG_MAP = { "Gender": "Neut", "Number": "Sing", "Case": "Nom", - "Other": {"Definite": "Ind"}, + "Definite": "Ind", }, "CjCo": {POS: CCONJ}, "CjSb": {POS: SCONJ}, diff --git a/spacy/lang/fr/lemmatizer.py b/spacy/lang/fr/lemmatizer.py index 84e55d509..fe128df1f 100644 --- a/spacy/lang/fr/lemmatizer.py +++ b/spacy/lang/fr/lemmatizer.py @@ -1,7 +1,6 @@ from ...lemmatizer import Lemmatizer from ...symbols import POS, NOUN, VERB, ADJ, ADV, PRON, DET, AUX, PUNCT, ADP from ...symbols import SCONJ, CCONJ -from ...symbols import VerbForm_inf, VerbForm_none, Number_sing, Degree_pos class FrenchLemmatizer(Lemmatizer): @@ -82,13 +81,13 @@ class FrenchLemmatizer(Lemmatizer): return True elif univ_pos == "adj" and morphology.get("Degree") == "pos": return True - elif VerbForm_inf in morphology: + elif "VerbForm=inf" in morphology: return True - elif VerbForm_none in morphology: + elif "VerbForm=none" in morphology: return True - elif Number_sing in morphology: + elif "Number=sing" in morphology: return True - elif Degree_pos in morphology: + elif "Degree=pos" in morphology: return True else: return False diff --git a/spacy/lang/ga/__init__.py b/spacy/lang/ga/__init__.py index cea7c0e94..4c3d219c7 100644 --- a/spacy/lang/ga/__init__.py +++ b/spacy/lang/ga/__init__.py @@ -1,5 +1,6 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .stop_words import STOP_WORDS +from .tag_map import TAG_MAP from ..tokenizer_exceptions import BASE_EXCEPTIONS from ...language import Language @@ -13,6 +14,7 @@ class IrishDefaults(Language.Defaults): tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) stop_words = set(STOP_WORDS) + tag_map = TAG_MAP class Irish(Language): diff --git a/spacy/lang/ga/tag_map.py b/spacy/lang/ga/tag_map.py index baf64c1b8..efcaf5d1f 100644 --- a/spacy/lang/ga/tag_map.py +++ b/spacy/lang/ga/tag_map.py @@ -1,26 +1,26 @@ # fmt: off TAG_MAP = { - "ADJ__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}}, + "ADJ__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing", "Form": "len"}, "ADJ__Case=Gen|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "fem", "Number": "sing"}, "ADJ__Case=Gen|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing"}, - "ADJ__Case=Gen|NounType=Strong|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "Other": {"NounType": "strong"}}, - "ADJ__Case=Gen|NounType=Weak|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "Other": {"NounType": "weak"}}, - "ADJ__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}}, - "ADJ__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}}, + "ADJ__Case=Gen|NounType=Strong|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "NounType": "strong"}, + "ADJ__Case=Gen|NounType=Weak|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "NounType": "weak"}, + "ADJ__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "len"}, + "ADJ__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "len"}, "ADJ__Case=NomAcc|Gender=Fem|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "plur"}, "ADJ__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing"}, "ADJ__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "plur"}, "ADJ__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing"}, - "ADJ__Case=NomAcc|NounType=NotSlender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "Other": {"NounType": "notslender"}}, - "ADJ__Case=NomAcc|NounType=Slender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "Other": {"NounType": "slender"}}, - "ADJ__Degree=Cmp,Sup|Form=Len": {"pos": "ADJ", "Degree": "cmp|sup", "Other": {"Form": "len"}}, + "ADJ__Case=NomAcc|NounType=NotSlender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "NounType": "notslender"}, + "ADJ__Case=NomAcc|NounType=Slender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "NounType": "slender"}, + "ADJ__Degree=Cmp,Sup|Form=Len": {"pos": "ADJ", "Degree": "cmp|sup", "Form": "len"}, "ADJ__Degree=Cmp,Sup": {"pos": "ADJ", "Degree": "cmp|sup"}, - "ADJ__Degree=Pos|Form=Ecl": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "ecl"}}, - "ADJ__Degree=Pos|Form=HPref": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "hpref"}}, - "ADJ__Degree=Pos|Form=Len": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "len"}}, + "ADJ__Degree=Pos|Form=Ecl": {"pos": "ADJ", "Degree": "pos", "Form": "ecl"}, + "ADJ__Degree=Pos|Form=HPref": {"pos": "ADJ", "Degree": "pos", "Form": "hpref"}, + "ADJ__Degree=Pos|Form=Len": {"pos": "ADJ", "Degree": "pos", "Form": "len"}, "ADJ__Degree=Pos": {"pos": "ADJ", "Degree": "pos"}, "ADJ__Foreign=Yes": {"pos": "ADJ", "Foreign": "yes"}, - "ADJ__Form=Len|VerbForm=Part": {"pos": "ADJ", "VerbForm": "part", "Other": {"Form": "len"}}, + "ADJ__Form=Len|VerbForm=Part": {"pos": "ADJ", "VerbForm": "part", "Form": "len"}, "ADJ__Gender=Masc|Number=Sing|PartType=Voc": {"pos": "ADJ", "Gender": "masc", "Number": "sing", "Case": "voc"}, "ADJ__Gender=Masc|Number=Sing|Case=Voc": {"pos": "ADJ", "Gender": "masc", "Number": "sing", "Case": "voc"}, "ADJ__Number=Plur|PartType=Voc": {"pos": "ADJ", "Number": "plur", "Case": "voc"}, @@ -29,9 +29,9 @@ TAG_MAP = { "ADJ___": {"pos": "ADJ"}, "ADJ__VerbForm=Part": {"pos": "ADJ", "VerbForm": "part"}, "ADP__Foreign=Yes": {"pos": "ADP", "Foreign": "yes"}, - "ADP__Form=Len|Number=Plur|Person=1": {"pos": "ADP", "Number": "plur", "Person": 1, "Other": {"Form": "len"}}, - "ADP__Form=Len|Number=Plur|Person=3": {"pos": "ADP", "Number": "plur", "Person": 3, "Other": {"Form": "len"}}, - "ADP__Form=Len|Number=Sing|Person=1": {"pos": "ADP", "Number": "sing", "Person": 1, "Other": {"Form": "len"}}, + "ADP__Form=Len|Number=Plur|Person=1": {"pos": "ADP", "Number": "plur", "Person": 1, "Form": "len"}, + "ADP__Form=Len|Number=Plur|Person=3": {"pos": "ADP", "Number": "plur", "Person": 3, "Form": "len"}, + "ADP__Form=Len|Number=Sing|Person=1": {"pos": "ADP", "Number": "sing", "Person": 1, "Form": "len"}, "ADP__Gender=Fem|Number=Sing|Person=3": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3}, "ADP__Gender=Fem|Number=Sing|Person=3|Poss=Yes": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes"}, "ADP__Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes", "PronType": "prs"}, @@ -57,41 +57,41 @@ TAG_MAP = { "ADP__Person=3|Poss=Yes": {"pos": "ADP", "Person": 3, "Poss": "yes"}, "ADP___": {"pos": "ADP"}, "ADP__Poss=Yes": {"pos": "ADP", "Poss": "yes"}, - "ADP__PrepForm=Cmpd": {"pos": "ADP", "Other": {"PrepForm": "cmpd"}}, + "ADP__PrepForm=Cmpd": {"pos": "ADP", "PrepForm": "cmpd"}, "ADP__PronType=Art": {"pos": "ADP", "PronType": "art"}, - "ADV__Form=Len": {"pos": "ADV", "Other": {"Form": "len"}}, + "ADV__Form=Len": {"pos": "ADV", "Form": "len"}, "ADV___": {"pos": "ADV"}, "ADV__PronType=Int": {"pos": "ADV", "PronType": "int"}, - "AUX__Form=VF|Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}}, - "AUX__Form=VF|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}}, - "AUX__Form=VF|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}}, - "AUX__Form=VF|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}}, - "AUX__Form=VF|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"Form": "vf", "VerbForm": "cop"}}, - "AUX__Gender=Masc|Number=Sing|Person=3|VerbForm=Cop": {"pos": "AUX", "Gender": "masc", "Number": "sing", "Person": 3, "Other": {"VerbForm": "cop"}}, - "AUX__Mood=Int|Number=Sing|PronType=Art|VerbForm=Cop": {"pos": "AUX", "Number": "sing", "PronType": "art", "Other": {"Mood": "int", "VerbForm": "cop"}}, - "AUX__Mood=Int|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"Mood": "int", "VerbForm": "cop"}}, - "AUX__Mood=Int|Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Other": {"Mood": "int", "VerbForm": "cop"}}, - "AUX__Mood=Int|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"Mood": "int", "VerbForm": "cop"}}, - "AUX__PartType=Comp|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"PartType": "comp", "VerbForm": "cop"}}, - "AUX__Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"VerbForm": "cop"}}, - "AUX__Polarity=Neg|PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "pres", "Other": {"VerbForm": "cop"}}, - "AUX__Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"VerbForm": "cop"}}, - "AUX__Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Other": {"VerbForm": "cop"}}, + "AUX__Form=VF|Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Form": "vf", "VerbForm": "cop"}, + "AUX__Form=VF|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Form": "vf", "VerbForm": "cop"}, + "AUX__Form=VF|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Form": "vf", "VerbForm": "cop"}, + "AUX__Form=VF|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Form": "vf", "VerbForm": "cop"}, + "AUX__Form=VF|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Form": "vf", "VerbForm": "cop"}, + "AUX__Gender=Masc|Number=Sing|Person=3|VerbForm=Cop": {"pos": "AUX", "Gender": "masc", "Number": "sing", "Person": 3, "VerbForm": "cop"}, + "AUX__Mood=Int|Number=Sing|PronType=Art|VerbForm=Cop": {"pos": "AUX", "Number": "sing", "PronType": "art", "Mood": "int", "VerbForm": "cop"}, + "AUX__Mood=Int|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Mood": "int", "VerbForm": "cop"}, + "AUX__Mood=Int|Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Mood": "int", "VerbForm": "cop"}, + "AUX__Mood=Int|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Mood": "int", "VerbForm": "cop"}, + "AUX__PartType=Comp|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "PartType": "comp", "VerbForm": "cop"}, + "AUX__Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "VerbForm": "cop"}, + "AUX__Polarity=Neg|PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "pres", "VerbForm": "cop"}, + "AUX__Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "VerbForm": "cop"}, + "AUX__Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "VerbForm": "cop"}, "AUX___": {"pos": "AUX"}, - "AUX__PronType=Dem|VerbForm=Cop": {"pos": "AUX", "PronType": "dem", "Other": {"VerbForm": "cop"}}, - "AUX__PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Other": {"VerbForm": "cop"}}, - "AUX__PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "pres", "Other": {"VerbForm": "cop"}}, - "AUX__Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"VerbForm": "cop"}}, - "AUX__Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"VerbForm": "cop"}}, - "AUX__VerbForm=Cop": {"pos": "AUX", "Other": {"VerbForm": "cop"}}, + "AUX__PronType=Dem|VerbForm=Cop": {"pos": "AUX", "PronType": "dem", "VerbForm": "cop"}, + "AUX__PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "VerbForm": "cop"}, + "AUX__PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "pres", "VerbForm": "cop"}, + "AUX__Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "VerbForm": "cop"}, + "AUX__Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "VerbForm": "cop"}, + "AUX__VerbForm=Cop": {"pos": "AUX", "VerbForm": "cop"}, "CCONJ___": {"pos": "CCONJ"}, "DET__Case=Gen|Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"pos": "DET", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing", "PronType": "art"}, - "DET__Definite=Def|Form=Ecl": {"pos": "DET", "Definite": "def", "Other": {"Form": "ecl"}}, + "DET__Definite=Def|Form=Ecl": {"pos": "DET", "Definite": "def", "Form": "ecl"}, "DET__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"pos": "DET", "Definite": "def", "Gender": "fem", "Number": "sing", "PronType": "art"}, "DET__Definite=Def|Number=Plur|PronType=Art": {"pos": "DET", "Definite": "def", "Number": "plur", "PronType": "art"}, "DET__Definite=Def|Number=Sing|PronType=Art": {"pos": "DET", "Definite": "def", "Number": "sing", "PronType": "art"}, "DET__Definite=Def": {"pos": "DET", "Definite": "def"}, - "DET__Form=HPref|PronType=Ind": {"pos": "DET", "PronType": "ind", "Other": {"Form": "hpref"}}, + "DET__Form=HPref|PronType=Ind": {"pos": "DET", "PronType": "ind", "Form": "hpref"}, "DET__Gender=Fem|Number=Sing|Person=3|Poss=Yes": {"pos": "DET", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes"}, "DET__Gender=Masc|Number=Sing|Person=3|Poss=Yes": {"pos": "DET", "Gender": "masc", "Number": "sing", "Person": 3, "Poss": "yes"}, "DET__Number=Plur|Person=1|Poss=Yes": {"pos": "DET", "Number": "plur", "Person": 1, "Poss": "yes"}, @@ -103,33 +103,33 @@ TAG_MAP = { "DET__PronType=Dem": {"pos": "DET", "PronType": "dem"}, "DET__PronType=Ind": {"pos": "DET", "PronType": "ind"}, "NOUN__Case=Dat|Definite=Ind|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Definite": "ind", "Gender": "fem", "Number": "sing"}, - "NOUN__Case=Dat|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}}, - "NOUN__Case=Dat|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}}, + "NOUN__Case=Dat|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Form": "ecl"}, + "NOUN__Case=Dat|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Form": "len"}, "NOUN__Case=Dat|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing"}, "NOUN__Case=Dat|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "masc", "Number": "sing"}, - "NOUN__Case=Gen|Definite=Def|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "plur", "Other": {"NounType": "strong"}}, + "NOUN__Case=Gen|Definite=Def|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "plur", "NounType": "strong"}, "NOUN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing"}, - "NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "Other": {"NounType": "strong"}}, - "NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}}, + "NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "NounType": "strong"}, + "NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "NounType": "weak"}, "NOUN__Case=Gen|Definite=Def|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "sing"}, "NOUN__Case=Gen|Definite=Ind|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "ind", "Gender": "fem", "Number": "sing"}, - "NOUN__Case=Gen|Form=Ecl|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl", "NounType": "strong"}}, - "NOUN__Case=Gen|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}}, - "NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl", "NounType": "strong"}}, - "NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl", "NounType": "weak"}}, - "NOUN__Case=Gen|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}}, - "NOUN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}}, - "NOUN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}}, - "NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "len", "NounType": "strong"}}, - "NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "len", "NounType": "weak"}}, - "NOUN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}}, - "NOUN__Case=Gen|Form=Len|VerbForm=Inf": {"pos": "NOUN", "Case": "gen", "VerbForm": "inf", "Other": {"Form": "len"}}, - "NOUN__Case=Gen|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"NounType": "strong"}}, - "NOUN__Case=Gen|Gender=Fem|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"NounType": "weak"}}, + "NOUN__Case=Gen|Form=Ecl|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Form": "ecl", "NounType": "strong"}, + "NOUN__Case=Gen|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "ecl"}, + "NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "ecl", "NounType": "strong"}, + "NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "ecl", "NounType": "weak"}, + "NOUN__Case=Gen|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Form": "ecl"}, + "NOUN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "hpref"}, + "NOUN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "len"}, + "NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "len", "NounType": "strong"}, + "NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "len", "NounType": "weak"}, + "NOUN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Form": "len"}, + "NOUN__Case=Gen|Form=Len|VerbForm=Inf": {"pos": "NOUN", "Case": "gen", "VerbForm": "inf", "Form": "len"}, + "NOUN__Case=Gen|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "NounType": "strong"}, + "NOUN__Case=Gen|Gender=Fem|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "NounType": "weak"}, "NOUN__Case=Gen|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur"}, "NOUN__Case=Gen|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing"}, - "NOUN__Case=Gen|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "strong"}}, - "NOUN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}}, + "NOUN__Case=Gen|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "NounType": "strong"}, + "NOUN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "NounType": "weak"}, "NOUN__Case=Gen|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur"}, "NOUN__Case=Gen|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing"}, "NOUN__Case=Gen|Number=Sing": {"pos": "NOUN", "Case": "gen", "Number": "sing"}, @@ -140,79 +140,79 @@ TAG_MAP = { "NOUN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "plur"}, "NOUN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "sing"}, "NOUN__Case=NomAcc|Definite=Ind|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "ind", "Gender": "masc", "Number": "plur"}, - "NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl"}}, - "NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}}, - "NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl"}}, - "NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}}, - "NOUN__Case=NomAcc|Form=Emp|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "emp"}}, - "NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "hpref"}}, - "NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}}, - "NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "hpref"}}, - "NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "hpref"}}, - "NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "len"}}, - "NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}}, - "NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "len"}}, - "NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}}, + "NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Form": "ecl"}, + "NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "ecl"}, + "NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Form": "ecl"}, + "NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "ecl"}, + "NOUN__Case=NomAcc|Form=Emp|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "emp"}, + "NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Form": "hpref"}, + "NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "hpref"}, + "NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Form": "hpref"}, + "NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "hpref"}, + "NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Form": "len"}, + "NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "len"}, + "NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Form": "len"}, + "NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "len"}, "NOUN__Case=NomAcc|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur"}, "NOUN__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing"}, "NOUN__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur"}, "NOUN__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing"}, "NOUN__Case=Voc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Definite": "def", "Gender": "masc", "Number": "plur"}, - "NOUN__Case=Voc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}}, - "NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "plur", "Other": {"Form": "len"}}, - "NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}}, + "NOUN__Case=Voc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "fem", "Number": "sing", "Form": "len"}, + "NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "plur", "Form": "len"}, + "NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing", "Form": "len"}, "NOUN__Case=Voc|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing"}, "NOUN__Degree=Pos": {"pos": "NOUN", "Degree": "pos"}, "NOUN__Foreign=Yes": {"pos": "NOUN", "Foreign": "yes"}, - "NOUN__Form=Ecl|Number=Sing": {"pos": "NOUN", "Number": "sing", "Other": {"Form": "ecl"}}, - "NOUN__Form=Ecl|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "ecl"}}, - "NOUN__Form=Ecl|VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun", "Other": {"Form": "ecl"}}, - "NOUN__Form=HPref|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "hpref"}}, - "NOUN__Form=Len|Number=Sing": {"pos": "NOUN", "Number": "sing", "Other": {"Form": "len"}}, - "NOUN__Form=Len|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "len"}}, + "NOUN__Form=Ecl|Number=Sing": {"pos": "NOUN", "Number": "sing", "Form": "ecl"}, + "NOUN__Form=Ecl|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Form": "ecl"}, + "NOUN__Form=Ecl|VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun", "Form": "ecl"}, + "NOUN__Form=HPref|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Form": "hpref"}, + "NOUN__Form=Len|Number=Sing": {"pos": "NOUN", "Number": "sing", "Form": "len"}, + "NOUN__Form=Len|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Form": "len"}, "NOUN__Gender=Fem|Number=Sing": {"pos": "NOUN", "Gender": "fem", "Number": "sing"}, - "NOUN__Number=Sing|PartType=Comp": {"pos": "NOUN", "Number": "sing", "Other": {"PartType": "comp"}}, + "NOUN__Number=Sing|PartType=Comp": {"pos": "NOUN", "Number": "sing", "PartType": "comp"}, "NOUN__Number=Sing": {"pos": "NOUN", "Number": "sing"}, "NOUN___": {"pos": "NOUN"}, "NOUN__Reflex=Yes": {"pos": "NOUN", "Reflex": "yes"}, "NOUN__VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf"}, "NOUN__VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun"}, "NUM__Definite=Def|NumType=Card": {"pos": "NUM", "Definite": "def", "NumType": "card"}, - "NUM__Form=Ecl|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "ecl"}}, - "NUM__Form=Ecl|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Other": {"Form": "ecl"}}, - "NUM__Form=HPref|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "hpref"}}, - "NUM__Form=Len|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "len"}}, - "NUM__Form=Len|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Other": {"Form": "len"}}, + "NUM__Form=Ecl|NumType=Card": {"pos": "NUM", "NumType": "card", "Form": "ecl"}, + "NUM__Form=Ecl|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Form": "ecl"}, + "NUM__Form=HPref|NumType=Card": {"pos": "NUM", "NumType": "card", "Form": "hpref"}, + "NUM__Form=Len|NumType=Card": {"pos": "NUM", "NumType": "card", "Form": "len"}, + "NUM__Form=Len|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Form": "len"}, "NUM__NumType=Card": {"pos": "NUM", "NumType": "card"}, "NUM__NumType=Ord": {"pos": "NUM", "NumType": "ord"}, "NUM___": {"pos": "NUM"}, - "PART__Form=Ecl|PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"Form": "ecl", "PartType": "vb"}}, - "PART__Mood=Imp|PartType=Vb|Polarity=Neg": {"pos": "PART", "Mood": "imp", "Polarity": "neg", "Other": {"PartType": "vb"}}, - "PART__Mood=Imp|PartType=Vb": {"pos": "PART", "Mood": "imp", "Other": {"PartType": "vb"}}, - "PART__Mood=Int|PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"Mood": "int", "PartType": "vb"}}, - "PART__PartType=Ad": {"pos": "PART", "Other": {"PartType": "ad"}}, - "PART__PartType=Cmpl|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"PartType": "cmpl"}}, - "PART__PartType=Cmpl|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "Other": {"PartType": "cmpl"}}, - "PART__PartType=Cmpl": {"pos": "PART", "Other": {"PartType": "cmpl"}}, - "PART__PartType=Comp": {"pos": "PART", "Other": {"PartType": "comp"}}, - "PART__PartType=Cop|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"PartType": "cop"}}, - "PART__PartType=Deg": {"pos": "PART", "Other": {"PartType": "deg"}}, + "PART__Form=Ecl|PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Form": "ecl", "PartType": "vb"}, + "PART__Mood=Imp|PartType=Vb|Polarity=Neg": {"pos": "PART", "Mood": "imp", "Polarity": "neg", "PartType": "vb"}, + "PART__Mood=Imp|PartType=Vb": {"pos": "PART", "Mood": "imp", "PartType": "vb"}, + "PART__Mood=Int|PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Mood": "int", "PartType": "vb"}, + "PART__PartType=Ad": {"pos": "PART", "PartType": "ad"}, + "PART__PartType=Cmpl|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "PartType": "cmpl"}, + "PART__PartType=Cmpl|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "PartType": "cmpl"}, + "PART__PartType=Cmpl": {"pos": "PART", "PartType": "cmpl"}, + "PART__PartType=Comp": {"pos": "PART", "PartType": "comp"}, + "PART__PartType=Cop|PronType=Rel": {"pos": "PART", "PronType": "rel", "PartType": "cop"}, + "PART__PartType=Deg": {"pos": "PART", "PartType": "deg"}, "PART__PartType=Inf": {"pos": "PART", "PartType": "inf"}, - "PART__PartType=Num": {"pos": "PART", "Other": {"PartType": "num"}}, - "PART__PartType=Pat": {"pos": "PART", "Other": {"PartType": "pat"}}, - "PART__PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"PartType": "vb"}}, - "PART__PartType=Vb|Polarity=Neg|PronType=Rel": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Other": {"PartType": "vb"}}, - "PART__PartType=Vb|Polarity=Neg|PronType=Rel|Tense=Past": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"PartType": "vb"}}, - "PART__PartType=Vb|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "Other": {"PartType": "vb"}}, - "PART__PartType=Vb": {"pos": "PART", "Other": {"PartType": "vb"}}, - "PART__PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"PartType": "vb"}}, - "PART__PartType=Vb|PronType=Rel|Tense=Past": {"pos": "PART", "PronType": "rel", "Tense": "past", "Other": {"PartType": "vb"}}, - "PART__PartType=Vb|Tense=Past": {"pos": "PART", "Tense": "past", "Other": {"PartType": "vb"}}, - "PART__PartType=Voc": {"pos": "PART", "Other": {"PartType": "voc"}}, + "PART__PartType=Num": {"pos": "PART", "PartType": "num"}, + "PART__PartType=Pat": {"pos": "PART", "PartType": "pat"}, + "PART__PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "PartType": "vb"}, + "PART__PartType=Vb|Polarity=Neg|PronType=Rel": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "PartType": "vb"}, + "PART__PartType=Vb|Polarity=Neg|PronType=Rel|Tense=Past": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Tense": "past", "PartType": "vb"}, + "PART__PartType=Vb|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "PartType": "vb"}, + "PART__PartType=Vb": {"pos": "PART", "PartType": "vb"}, + "PART__PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "PartType": "vb"}, + "PART__PartType=Vb|PronType=Rel|Tense=Past": {"pos": "PART", "PronType": "rel", "Tense": "past", "PartType": "vb"}, + "PART__PartType=Vb|Tense=Past": {"pos": "PART", "Tense": "past", "PartType": "vb"}, + "PART__PartType=Voc": {"pos": "PART", "PartType": "voc"}, "PART___": {"pos": "PART"}, "PART__PronType=Rel": {"pos": "PART", "PronType": "rel"}, - "PRON__Form=Len|Number=Sing|Person=2": {"pos": "PRON", "Number": "sing", "Person": 2, "Other": {"Form": "len"}}, - "PRON__Form=Len|PronType=Ind": {"pos": "PRON", "PronType": "ind", "Other": {"Form": "len"}}, + "PRON__Form=Len|Number=Sing|Person=2": {"pos": "PRON", "Number": "sing", "Person": 2, "Form": "len"}, + "PRON__Form=Len|PronType=Ind": {"pos": "PRON", "PronType": "ind", "Form": "len"}, "PRON__Gender=Fem|Number=Sing|Person=3": {"pos": "PRON", "Gender": "fem", "Number": "sing", "Person": 3}, "PRON__Gender=Masc|Number=Sing|Person=3": {"pos": "PRON", "Gender": "masc", "Number": "sing", "Person": 3}, "PRON__Gender=Masc|Number=Sing|Person=3|PronType=Emp": {"pos": "PRON", "Gender": "masc", "Number": "sing", "Person": 3, "PronType": "emp"}, @@ -232,103 +232,103 @@ TAG_MAP = { "PRON__PronType=Ind": {"pos": "PRON", "PronType": "ind"}, "PRON__PronType=Int": {"pos": "PRON", "PronType": "int"}, "PRON__Reflex=Yes": {"pos": "PRON", "Reflex": "yes"}, - "PROPN__Abbr=Yes": {"pos": "PROPN", "Other": {"Abbr": "yes"}}, + "PROPN__Abbr=Yes": {"pos": "PROPN", "Abbr": "yes"}, "PROPN__Case=Dat|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "dat", "Gender": "fem", "Number": "sing"}, "PROPN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing"}, - "PROPN__Case=Gen|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl"}}, - "PROPN__Case=Gen|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl"}}, - "PROPN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}}, - "PROPN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}}, - "PROPN__Case=Gen|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Other": {"Form": "len"}}, - "PROPN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}}, - "PROPN__Case=Gen|Form=Len|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Other": {"Form": "len"}}, + "PROPN__Case=Gen|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "plur", "Form": "ecl"}, + "PROPN__Case=Gen|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "ecl"}, + "PROPN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "hpref"}, + "PROPN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "len"}, + "PROPN__Case=Gen|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Form": "len"}, + "PROPN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing", "Form": "len"}, + "PROPN__Case=Gen|Form=Len|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Form": "len"}, "PROPN__Case=Gen|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing"}, "PROPN__Case=Gen|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem"}, - "PROPN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}}, + "PROPN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "NounType": "weak"}, "PROPN__Case=Gen|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing"}, "PROPN__Case=Gen|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc"}, "PROPN__Case=NomAcc|Definite=Def|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "fem", "Number": "sing"}, "PROPN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "plur"}, "PROPN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "sing"}, - "PROPN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}}, - "PROPN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}}, - "PROPN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "hpref"}}, - "PROPN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}}, - "PROPN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}}, + "PROPN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "ecl"}, + "PROPN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "ecl"}, + "PROPN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "hpref"}, + "PROPN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "len"}, + "PROPN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "len"}, "PROPN__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing"}, "PROPN__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "plur"}, "PROPN__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing"}, "PROPN__Case=NomAcc|Gender=Masc": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc"}, - "PROPN__Case=Voc|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "voc", "Gender": "fem", "Other": {"Form": "len"}}, + "PROPN__Case=Voc|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "voc", "Gender": "fem", "Form": "len"}, "PROPN__Case=Voc|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "voc", "Gender": "masc", "Number": "sing"}, "PROPN__Gender=Masc|Number=Sing": {"pos": "PROPN", "Gender": "masc", "Number": "sing"}, "PROPN___": {"pos": "PROPN"}, "PUNCT___": {"pos": "PUNCT"}, "SCONJ___": {"pos": "SCONJ"}, - "SCONJ__Tense=Past|VerbForm=Cop": {"pos": "SCONJ", "Tense": "past", "Other": {"VerbForm": "cop"}}, - "SCONJ__VerbForm=Cop": {"pos": "SCONJ", "Other": {"VerbForm": "cop"}}, - "SYM__Abbr=Yes": {"pos": "SYM", "Other": {"Abbr": "yes"}}, + "SCONJ__Tense=Past|VerbForm=Cop": {"pos": "SCONJ", "Tense": "past", "VerbForm": "cop"}, + "SCONJ__VerbForm=Cop": {"pos": "SCONJ", "VerbForm": "cop"}, + "SYM__Abbr=Yes": {"pos": "SYM", "Abbr": "yes"}, "VERB__Case=NomAcc|Gender=Masc|Mood=Ind|Number=Sing|Tense=Pres": {"pos": "VERB", "Case": "nom|acc", "Gender": "masc", "Mood": "ind", "Number": "sing", "Tense": "pres"}, - "VERB__Dialect=Munster|Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Dialect": "munster", "Form": "len"}}, + "VERB__Dialect=Munster|Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Dialect": "munster", "Form": "len"}, "VERB__Foreign=Yes": {"pos": "VERB", "Foreign": "yes"}, - "VERB__Form=Ecl|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "ecl", "Voice": "auto"}}, - "VERB__Form=Ecl|Mood=Imp|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "ecl", "Voice": "auto"}}, - "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "ecl", "Voice": "auto"}}, - "VERB__Form=Ecl|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "ecl", "Voice": "auto"}}, - "VERB__Form=Ecl|Mood=Sub|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Tense": "pres", "Other": {"Form": "ecl"}}, - "VERB__Form=Ecl": {"pos": "VERB", "Other": {"Form": "ecl"}}, - "VERB__Form=Emp|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Other": {"Form": "emp"}}, - "VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "PronType": "rel", "Tense": "pres", "Other": {"Form": "emp"}}, - "VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Other": {"Form": "emp"}}, - "VERB__Form=Len|Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3, "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Cnd|Number=Sing|Person=2": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 2, "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "len", "Voice": "auto"}}, - "VERB__Form=Len|Mood=Imp|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 3, "Tense": "past", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Imp|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}}, - "VERB__Form=Len|Mood=Imp|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Other": {"Form": "len", "Voice": "auto"}}, - "VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "fut", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "past", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Ind|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 3, "Tense": "past", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "past", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "len", "Voice": "auto"}}, - "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}}, - "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len", "Voice": "auto"}}, - "VERB__Form=Len|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "len", "Voice": "auto"}}, - "VERB__Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}}, - "VERB__Form=Len|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "len"}}, - "VERB__Form=Len|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "len", "Voice": "auto"}}, - "VERB__Form=Len|Mood=Sub|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}}, - "VERB__Form=Len|Polarity=Neg": {"pos": "VERB", "Polarity": "neg", "Other": {"Form": "len"}}, - "VERB__Form=Len": {"pos": "VERB", "Other": {"Form": "len"}}, + "VERB__Form=Ecl|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Form": "ecl", "Voice": "auto"}, + "VERB__Form=Ecl|Mood=Imp|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "sing", "Person": 1, "Tense": "past", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Form": "ecl", "Voice": "auto"}, + "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Form": "ecl", "Voice": "auto"}, + "VERB__Form=Ecl|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Form": "ecl"}, + "VERB__Form=Ecl|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Form": "ecl", "Voice": "auto"}, + "VERB__Form=Ecl|Mood=Sub|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Tense": "pres", "Form": "ecl"}, + "VERB__Form=Ecl": {"pos": "VERB", "Form": "ecl"}, + "VERB__Form=Emp|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Form": "emp"}, + "VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "PronType": "rel", "Tense": "pres", "Form": "emp"}, + "VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Form": "emp"}, + "VERB__Form=Len|Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3, "Form": "len"}, + "VERB__Form=Len|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Form": "len"}, + "VERB__Form=Len|Mood=Cnd|Number=Sing|Person=2": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 2, "Form": "len"}, + "VERB__Form=Len|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Form": "len"}, + "VERB__Form=Len|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Form": "len"}, + "VERB__Form=Len|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Form": "len", "Voice": "auto"}, + "VERB__Form=Len|Mood=Imp|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 3, "Tense": "past", "Form": "len"}, + "VERB__Form=Len|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Form": "len"}, + "VERB__Form=Len|Mood=Imp|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Form": "len", "Voice": "auto"}, + "VERB__Form=Len|Mood=Imp|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Form": "len", "Voice": "auto"}, + "VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "fut", "Form": "len"}, + "VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "past", "Form": "len"}, + "VERB__Form=Len|Mood=Ind|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 3, "Tense": "past", "Form": "len"}, + "VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "past", "Form": "len"}, + "VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "pres", "Form": "len"}, + "VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Form": "len"}, + "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Form": "len"}, + "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Form": "len", "Voice": "auto"}, + "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Form": "len"}, + "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Form": "len", "Voice": "auto"}, + "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Form": "len"}, + "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Form": "len", "Voice": "auto"}, + "VERB__Form=Len|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Form": "len"}, + "VERB__Form=Len|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Form": "len", "Voice": "auto"}, + "VERB__Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Form": "len"}, + "VERB__Form=Len|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Form": "len", "Voice": "auto"}, + "VERB__Form=Len|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Form": "len"}, + "VERB__Form=Len|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Form": "len", "Voice": "auto"}, + "VERB__Form=Len|Mood=Sub|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Polarity": "neg", "Tense": "pres", "Form": "len"}, + "VERB__Form=Len|Polarity=Neg": {"pos": "VERB", "Polarity": "neg", "Form": "len"}, + "VERB__Form=Len": {"pos": "VERB", "Form": "len"}, "VERB__Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3}, "VERB__Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1}, "VERB__Mood=Cnd": {"pos": "VERB", "Mood": "cnd"}, - "VERB__Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Voice": "auto"}}, + "VERB__Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Voice": "auto"}, "VERB__Mood=Imp|Number=Plur|Person=1|Polarity=Neg": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 1, "Polarity": "neg"}, "VERB__Mood=Imp|Number=Plur|Person=1": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 1}, "VERB__Mood=Imp|Number=Plur|Person=2": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 2}, @@ -338,28 +338,28 @@ TAG_MAP = { "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres"}, "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past"}, "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres"}, - "VERB__Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Voice": "auto"}}, + "VERB__Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Voice": "auto"}, "VERB__Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres"}, "VERB__Mood=Ind|PronType=Rel|Tense=Fut": {"pos": "VERB", "Mood": "ind", "PronType": "rel", "Tense": "fut"}, "VERB__Mood=Ind|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "PronType": "rel", "Tense": "pres"}, "VERB__Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut"}, - "VERB__Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Voice": "auto"}}, + "VERB__Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Voice": "auto"}, "VERB__Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past"}, - "VERB__Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Voice": "auto"}}, + "VERB__Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Voice": "auto"}, "VERB__Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres"}, - "VERB__Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Voice": "auto"}}, + "VERB__Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Voice": "auto"}, "VERB___": {"pos": "VERB"}, - "X__Abbr=Yes": {"pos": "X", "Other": {"Abbr": "yes"}}, + "X__Abbr=Yes": {"pos": "X", "Abbr": "yes"}, "X__Case=NomAcc|Foreign=Yes|Gender=Fem|Number=Sing": {"pos": "X", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Foreign": "yes"}, - "X__Definite=Def|Dialect=Ulster": {"pos": "X", "Definite": "def", "Other": {"Dialect": "ulster"}}, - "X__Dialect=Munster|Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "X", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Dialect": "munster", "Form": "len"}}, - "X__Dialect=Munster|Mood=Imp|Number=Sing|Person=2|Polarity=Neg": {"pos": "X", "Mood": "imp", "Number": "sing", "Person": 2, "Polarity": "neg", "Other": {"Dialect": "munster"}}, - "X__Dialect=Munster|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "X", "Mood": "ind", "Tense": "past", "Other": {"Dialect": "munster", "Voice": "auto"}}, - "X__Dialect=Munster": {"pos": "X", "Other": {"Dialect": "munster"}}, - "X__Dialect=Munster|PronType=Dem": {"pos": "X", "PronType": "dem", "Other": {"Dialect": "munster"}}, - "X__Dialect=Ulster|Gender=Masc|Number=Sing|Person=3": {"pos": "X", "Gender": "masc", "Number": "sing", "Person": 3, "Other": {"Dialect": "ulster"}}, - "X__Dialect=Ulster|PartType=Vb|Polarity=Neg": {"pos": "X", "Polarity": "neg", "Other": {"Dialect": "ulster", "PartType": "vb"}}, - "X__Dialect=Ulster|VerbForm=Cop": {"pos": "X", "Other": {"Dialect": "ulster", "VerbForm": "cop"}}, + "X__Definite=Def|Dialect=Ulster": {"pos": "X", "Definite": "def", "Dialect": "ulster"}, + "X__Dialect=Munster|Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "X", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Dialect": "munster", "Form": "len"}, + "X__Dialect=Munster|Mood=Imp|Number=Sing|Person=2|Polarity=Neg": {"pos": "X", "Mood": "imp", "Number": "sing", "Person": 2, "Polarity": "neg", "Dialect": "munster"}, + "X__Dialect=Munster|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "X", "Mood": "ind", "Tense": "past", "Dialect": "munster", "Voice": "auto"}, + "X__Dialect=Munster": {"pos": "X", "Dialect": "munster"}, + "X__Dialect=Munster|PronType=Dem": {"pos": "X", "PronType": "dem", "Dialect": "munster"}, + "X__Dialect=Ulster|Gender=Masc|Number=Sing|Person=3": {"pos": "X", "Gender": "masc", "Number": "sing", "Person": 3, "Dialect": "ulster"}, + "X__Dialect=Ulster|PartType=Vb|Polarity=Neg": {"pos": "X", "Polarity": "neg", "Dialect": "ulster", "PartType": "vb"}, + "X__Dialect=Ulster|VerbForm=Cop": {"pos": "X", "Dialect": "ulster", "VerbForm": "cop"}, "X__Foreign=Yes": {"pos": "X", "Foreign": "yes"}, "X___": {"pos": "X"} } diff --git a/spacy/lang/nb/morph_rules.py b/spacy/lang/nb/morph_rules.py index b1799fca8..e96b9fd6b 100644 --- a/spacy/lang/nb/morph_rules.py +++ b/spacy/lang/nb/morph_rules.py @@ -195,7 +195,7 @@ MORPH_RULES = { "seg": { LEMMA: PRON_LEMMA, "Person": "Three", - "Number": ("Sing", "Plur"), + "Number": "Sing,Plur", "Reflex": "Yes", } }, @@ -248,7 +248,7 @@ MORPH_RULES = { }, "deres": { LEMMA: "deres", - "Person": ("Two", "Three"), + "Person": "Two,Three", "Number": "Sing", "Poss": "Yes", "Gender": "Masc", @@ -309,7 +309,7 @@ MORPH_RULES = { }, "deres": { LEMMA: "deres", - "Person": ("Two", "Three"), + "Person": "Two,Three", "Number": "Sing", "Poss": "Yes", "Gender": "Fem", @@ -370,7 +370,7 @@ MORPH_RULES = { }, "deres": { LEMMA: "deres", - "Person": ("Two", "Three"), + "Person": "Two,Three", "Number": "Sing", "Poss": "Yes", "Gender": "Neut", @@ -400,7 +400,7 @@ MORPH_RULES = { "våre": {LEMMA: "vår", "Person": "One", "Number": "Plur", "Poss": "Yes"}, "deres": { LEMMA: "deres", - "Person": ("Two", "Three"), + "Person": "Two,Three", "Number": "Plur", "Poss": "Yes", }, @@ -448,21 +448,21 @@ MORPH_RULES = { "PronType": "Prs", "Number": "Sing", "Person": "Three", - "Gender": ("Fem", "Masc"), + "Gender": "Fem,Masc", }, "den": { LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Person": "Three", - "Gender": ("Fem", "Masc"), + "Gender": "Fem,Masc", }, "ingen": { LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Person": "Three", - "Gender": ("Fem", "Masc"), + "Gender": "Fem,Masc", "Polarity": "Neg", }, }, @@ -475,7 +475,7 @@ MORPH_RULES = { LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing", - "Case": ("Gen", "Nom"), + "Case": "Gen,Nom", } }, "PRON__Animacy=Anim|Case=Gen|Number=Sing|PronType=Prs": { diff --git a/spacy/lang/sv/morph_rules.py b/spacy/lang/sv/morph_rules.py index 8fca20a49..3ef6aedc5 100644 --- a/spacy/lang/sv/morph_rules.py +++ b/spacy/lang/sv/morph_rules.py @@ -105,7 +105,7 @@ MORPH_RULES = { "PronType": "Prs", "Person": "Three", "Number": "Plur", - "Case": ("Nom", "Acc"), + "Case": "Nom,Acc", }, "dem": { LEMMA: PRON_LEMMA, @@ -166,7 +166,7 @@ MORPH_RULES = { LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", - "Number": ("Sing", "Plur"), + "Number": "Sing,Plur", "Gender": "Masc", "Poss": "Yes", "Reflex": "Yes", @@ -175,7 +175,7 @@ MORPH_RULES = { LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", - "Number": ("Sing", "Plur"), + "Number": "Sing,Plur", "Gender": "Fem", "Poss": "Yes", "Reflex": "Yes", @@ -184,7 +184,7 @@ MORPH_RULES = { LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", - "Number": ("Sing", "Plur"), + "Number": "Sing,Plur", "Poss": "Yes", "Reflex": "Yes", }, @@ -272,7 +272,7 @@ MORPH_RULES = { "VBZ": { "är": { "VerbForm": "Fin", - "Person": ("One", "Two", "Three"), + "Person": "One,Two,Three", "Tense": "Pres", "Mood": "Ind", } diff --git a/spacy/lang/uk/tag_map.py b/spacy/lang/uk/tag_map.py deleted file mode 100644 index 1ecbddc49..000000000 --- a/spacy/lang/uk/tag_map.py +++ /dev/null @@ -1,25 +0,0 @@ -from ..symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ -from ..symbols import PUNCT, NUM, AUX, X, CONJ, ADJ, VERB, PART, SPACE, CCONJ - - -TAG_MAP = { - "ADV": {POS: ADV}, - "NOUN": {POS: NOUN}, - "ADP": {POS: ADP}, - "PRON": {POS: PRON}, - "SCONJ": {POS: SCONJ}, - "PROPN": {POS: PROPN}, - "DET": {POS: DET}, - "SYM": {POS: SYM}, - "INTJ": {POS: INTJ}, - "PUNCT": {POS: PUNCT}, - "NUM": {POS: NUM}, - "AUX": {POS: AUX}, - "X": {POS: X}, - "CONJ": {POS: CONJ}, - "CCONJ": {POS: CCONJ}, - "ADJ": {POS: ADJ}, - "VERB": {POS: VERB}, - "PART": {POS: PART}, - "SP": {POS: SPACE}, -} diff --git a/spacy/lang/ur/tag_map.py b/spacy/lang/ur/tag_map.py index e0940edb7..d990fd46a 100644 --- a/spacy/lang/ur/tag_map.py +++ b/spacy/lang/ur/tag_map.py @@ -10,8 +10,8 @@ TAG_MAP = { '""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, "''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, ":": {POS: PUNCT}, - "$": {POS: SYM, "Other": {"SymType": "currency"}}, - "#": {POS: SYM, "Other": {"SymType": "numbersign"}}, + "$": {POS: SYM, "SymType": "currency"}, + "#": {POS: SYM, "SymType": "numbersign"}, "AFX": {POS: ADJ, "Hyph": "yes"}, "CC": {POS: CCONJ, "ConjType": "coor"}, "CD": {POS: NUM, "NumType": "card"}, @@ -52,7 +52,7 @@ TAG_MAP = { "VerbForm": "fin", "Tense": "pres", "Number": "sing", - "Person": 3, + "Person": "3", }, "WDT": {POS: ADJ, "PronType": "int|rel"}, "WP": {POS: NOUN, "PronType": "int|rel"}, diff --git a/spacy/lang/vi/tag_map.py b/spacy/lang/vi/tag_map.py deleted file mode 100644 index 1ecbddc49..000000000 --- a/spacy/lang/vi/tag_map.py +++ /dev/null @@ -1,25 +0,0 @@ -from ..symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ -from ..symbols import PUNCT, NUM, AUX, X, CONJ, ADJ, VERB, PART, SPACE, CCONJ - - -TAG_MAP = { - "ADV": {POS: ADV}, - "NOUN": {POS: NOUN}, - "ADP": {POS: ADP}, - "PRON": {POS: PRON}, - "SCONJ": {POS: SCONJ}, - "PROPN": {POS: PROPN}, - "DET": {POS: DET}, - "SYM": {POS: SYM}, - "INTJ": {POS: INTJ}, - "PUNCT": {POS: PUNCT}, - "NUM": {POS: NUM}, - "AUX": {POS: AUX}, - "X": {POS: X}, - "CONJ": {POS: CONJ}, - "CCONJ": {POS: CCONJ}, - "ADJ": {POS: ADJ}, - "VERB": {POS: VERB}, - "PART": {POS: PART}, - "SP": {POS: SPACE}, -} diff --git a/spacy/morphology.pxd b/spacy/morphology.pxd index 1a3cedf97..1e8c255b8 100644 --- a/spacy/morphology.pxd +++ b/spacy/morphology.pxd @@ -2,6 +2,7 @@ from cymem.cymem cimport Pool from preshed.maps cimport PreshMap, PreshMapArray from libc.stdint cimport uint64_t from murmurhash cimport mrmr +cimport numpy as np from .structs cimport TokenC, MorphAnalysisC from .strings cimport StringStore @@ -20,12 +21,11 @@ cdef class Morphology: cdef readonly object tag_names cdef readonly object reverse_index cdef readonly object exc - cdef readonly object _feat_map cdef readonly PreshMapArray _cache cdef readonly int n_tags - cpdef update(self, hash_t morph, features) - cdef hash_t insert(self, MorphAnalysisC tag) except 0 + cdef MorphAnalysisC create_morph_tag(self, field_feature_pairs) except * + cdef int insert(self, MorphAnalysisC tag) except -1 cdef int assign_untagged(self, TokenC* token) except -1 cdef int assign_tag(self, TokenC* token, tag) except -1 @@ -34,8 +34,7 @@ cdef class Morphology: cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1 -cdef int check_feature(const MorphAnalysisC* tag, attr_t feature) nogil -cdef attr_t get_field(const MorphAnalysisC* tag, int field) nogil -cdef list list_features(const MorphAnalysisC* tag) - -cdef tag_to_json(const MorphAnalysisC* tag) +cdef int check_feature(const MorphAnalysisC* morph, attr_t feature) nogil +cdef list list_features(const MorphAnalysisC* morph) +cdef np.ndarray get_by_field(const MorphAnalysisC* morph, attr_t field) +cdef int get_n_by_field(attr_t* results, const MorphAnalysisC* morph, attr_t field) nogil diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index 8030a9a28..3003d118f 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -2,6 +2,7 @@ from libc.string cimport memset import srsly from collections import Counter +import numpy from .strings import get_string_id from . import symbols @@ -10,130 +11,38 @@ from .attrs import LEMMA, intify_attrs from .parts_of_speech cimport SPACE from .parts_of_speech import IDS as POS_IDS from .lexeme cimport Lexeme -from .errors import Errors +from .errors import Errors, Warnings, user_warning from .util import ensure_path -cdef enum univ_field_t: - Field_POS - Field_Abbr - Field_AdpType - Field_AdvType - Field_Animacy - Field_Aspect - Field_Case - Field_ConjType - Field_Connegative - Field_Definite - Field_Degree - Field_Derivation - Field_Echo - Field_Foreign - Field_Gender - Field_Hyph - Field_InfForm - Field_Mood - Field_NameType - Field_Negative - Field_NounType - Field_Number - Field_NumForm - Field_NumType - Field_NumValue - Field_PartForm - Field_PartType - Field_Person - Field_Polarity - Field_Polite - Field_Poss - Field_Prefix - Field_PrepCase - Field_PronType - Field_PunctSide - Field_PunctType - Field_Reflex - Field_Style - Field_StyleVariant - Field_Tense - Field_Typo - Field_VerbForm - Field_VerbType - Field_Voice - - def _normalize_props(props): - """Transform deprecated string keys to correct names.""" + """Convert attrs dict so that POS is always by ID, other features are left + as is as long as they are strings or IDs. + """ out = {} props = dict(props) - for key in FIELDS: - if key in props: - value = str(props[key]).lower() - # We don't have support for disjunctive int|rel features, so - # just take the first one :( - if "|" in value: - value = value.split("|")[0] - attr = f"{key}_{value}" - if attr in FEATURES: - props.pop(key) - props[attr] = True for key, value in props.items(): + # convert POS value to ID if key == POS: if hasattr(value, 'upper'): value = value.upper() if value in POS_IDS: value = POS_IDS[value] out[key] = value - elif isinstance(key, int): - out[key] = value - elif value is True: - out[key] = value - elif key.lower() == 'pos': + elif isinstance(key, str) and key.lower() == 'pos': out[POS] = POS_IDS[value.upper()] - elif key.lower() != 'morph': + # sort values + elif isinstance(value, str) and Morphology.VALUE_SEP in value: + out[key] = Morphology.VALUE_SEP.join( + sorted(value.split(Morphology.VALUE_SEP))) + # accept any string or ID fields and values + elif isinstance(key, (int, str)) and isinstance(value, (int, str)): out[key] = value + else: + user_warning(Warnings.W028.format(feature={key: value})) return out -class MorphologyClassMap(object): - def __init__(self, features): - self.features = tuple(features) - self.fields = [] - self.feat2field = {} - seen_fields = set() - for feature in features: - field = feature.split("_", 1)[0] - if field not in seen_fields: - self.fields.append(field) - seen_fields.add(field) - self.feat2field[feature] = FIELDS[field] - self.id2feat = {get_string_id(name): name for name in features} - self.field2feats = {"POS": []} - self.col2info = [] - self.attr2field = dict(LOWER_FIELDS.items()) - self.feat2offset = {} - self.field2col = {} - self.field2id = dict(FIELDS.items()) - self.fieldid2field = {field_id: field for field, field_id in FIELDS.items()} - for feature in features: - field = self.fields[self.feat2field[feature]] - if field not in self.field2col: - self.field2col[field] = len(self.col2info) - if field != "POS" and field not in self.field2feats: - self.col2info.append((field, 0, "NIL")) - self.field2feats.setdefault(field, ["NIL"]) - offset = len(self.field2feats[field]) - self.field2feats[field].append(feature) - self.col2info.append((field, offset, feature)) - self.feat2offset[feature] = offset - - @property - def field_sizes(self): - return [len(self.field2feats[field]) for field in self.fields] - - def get_field_offset(self, field): - return self.field2col[field] - - cdef class Morphology: '''Store the possible morphological analyses for a language, and index them by hash. @@ -142,9 +51,15 @@ cdef class Morphology: analysis, so queries of morphological attributes are delegated to this class. ''' - def __init__(self, StringStore string_store, tag_map, lemmatizer, exc=None): + + FEATURE_SEP = "|" + FIELD_SEP = "=" + VALUE_SEP = "," + EMPTY_MORPH = "_" + + def __init__(self, StringStore strings, tag_map, lemmatizer, exc=None): self.mem = Pool() - self.strings = string_store + self.strings = strings self.tags = PreshMap() # Add special space symbol. We prefix with underscore, to make sure it # always sorts to the end. @@ -158,7 +73,6 @@ cdef class Morphology: self.lemmatizer = lemmatizer self.n_tags = len(tag_map) self.reverse_index = {} - self._feat_map = MorphologyClassMap(FEATURES) self._load_from_tag_map(tag_map) self._cache = PreshMapArray(self.n_tags) @@ -172,8 +86,7 @@ cdef class Morphology: def _load_from_tag_map(self, tag_map): for i, (tag_str, attrs) in enumerate(sorted(tag_map.items())): attrs = _normalize_props(attrs) - self.add({self._feat_map.id2feat[feat] for feat in attrs - if feat in self._feat_map.id2feat}) + self.add(attrs) self.tag_map[tag_str] = dict(attrs) self.reverse_index[self.strings.add(tag_str)] = i @@ -182,40 +95,78 @@ cdef class Morphology: self.exc), None, None) def add(self, features): - """Insert a morphological analysis in the morphology table, if not already - present. Returns the hash of the new analysis. + """Insert a morphological analysis in the morphology table, if not + already present. The morphological analysis may be provided in the UD + FEATS format as a string or in the tag map dict format. + Returns the hash of the new analysis. + """ + cdef MorphAnalysisC* tag_ptr + if features == self.EMPTY_MORPH: + features = "" + if isinstance(features, str): + tag_ptr = self.tags.get(self.strings[features]) + if tag_ptr != NULL: + return tag_ptr.key + features = self.feats_to_dict(features) + if not isinstance(features, dict): + user_warning(Warnings.W028.format(feature=features)) + features = {} + features = _normalize_props(features) + string_features = {self.strings.as_string(field): self.strings.as_string(values) for field, values in features.items()} + # normalized UFEATS string with sorted fields and values + norm_feats_string = self.FEATURE_SEP.join(sorted([ + self.FIELD_SEP.join([field, values]) + for field, values in string_features.items() + ])) + # intified ("Field", "Field=Value") pairs + field_feature_pairs = [] + for field in sorted(string_features): + values = string_features[field] + for value in values.split(self.VALUE_SEP): + field_feature_pairs.append(( + self.strings.add(field), + self.strings.add(field + self.FIELD_SEP + value), + )) + cdef MorphAnalysisC tag = self.create_morph_tag(field_feature_pairs) + # the hash key for the tag is either the hash of the normalized UFEATS + # string or the hash of an empty placeholder (using the empty string + # would give a hash key of 0, which is not good for PreshMap) + if norm_feats_string: + tag.key = self.strings.add(norm_feats_string) + else: + tag.key = self.strings.add(self.EMPTY_MORPH) + self.insert(tag) + return tag.key + + cdef MorphAnalysisC create_morph_tag(self, field_feature_pairs) except *: + """Creates a MorphAnalysisC from a list of intified + ("Field", "Field=Value") tuples where fields with multiple values have + been split into individual tuples, e.g.: + [("Field1", "Field1=Value1"), ("Field1", "Field1=Value2"), + ("Field2", "Field2=Value3")] """ - for f in features: - if isinstance(f, str): - self.strings.add(f) - string_features = features - features = intify_features(features) - cdef attr_t feature - for feature in features: - if feature != 0 and feature not in self._feat_map.id2feat: - raise ValueError(Errors.E167.format(feat=self.strings[feature], feat_id=feature)) cdef MorphAnalysisC tag - tag = create_rich_tag(features) - cdef hash_t key = self.insert(tag) - return key + tag.length = len(field_feature_pairs) + tag.fields = self.mem.alloc(tag.length, sizeof(attr_t)) + tag.features = self.mem.alloc(tag.length, sizeof(attr_t)) + for i, (field, feature) in enumerate(field_feature_pairs): + tag.fields[i] = field + tag.features[i] = feature + return tag + + cdef int insert(self, MorphAnalysisC tag) except -1: + cdef hash_t key = tag.key + if self.tags.get(key) == NULL: + tag_ptr = self.mem.alloc(1, sizeof(MorphAnalysisC)) + tag_ptr[0] = tag + self.tags.set(key, tag_ptr) def get(self, hash_t morph): tag = self.tags.get(morph) if tag == NULL: return [] else: - return tag_to_json(tag) - - cpdef update(self, hash_t morph, features): - """Update a morphological analysis with new feature values.""" - tag = (self.tags.get(morph))[0] - features = intify_features(features) - cdef attr_t feature - for feature in features: - field = FEATURE_FIELDS[FEATURE_NAMES[feature]] - set_feature(&tag, field, feature, 1) - morph = self.insert(tag) - return morph + return self.strings[tag.key] def lemmatize(self, const univ_pos_t univ_pos, attr_t orth, morphology): if orth not in self.strings: @@ -249,19 +200,10 @@ cdef class Morphology: """ attrs = dict(attrs) attrs = _normalize_props(attrs) - self.add({self._feat_map.id2feat[feat] for feat in attrs - if feat in self._feat_map.id2feat}) + self.add(attrs) attrs = intify_attrs(attrs, self.strings, _do_deprecated=True) self.exc[(tag_str, self.strings.add(orth_str))] = attrs - cdef hash_t insert(self, MorphAnalysisC tag) except 0: - cdef hash_t key = hash_tag(tag) - if self.tags.get(key) == NULL: - tag_ptr = self.mem.alloc(1, sizeof(MorphAnalysisC)) - tag_ptr[0] = tag - self.tags.set(key, tag_ptr) - return key - cdef int assign_untagged(self, TokenC* token) except -1: """Set morphological attributes on a token without a POS tag. Uses the lemmatizer's lookup() method, which looks up the string in the @@ -322,782 +264,60 @@ cdef class Morphology: for form_str, attrs in entries.items(): self.add_special_case(tag_str, form_str, attrs) - @classmethod - def create_class_map(cls): - return MorphologyClassMap(FEATURES) + @staticmethod + def feats_to_dict(feats): + if not feats: + return {} + return {field: Morphology.VALUE_SEP.join(sorted(values.split(Morphology.VALUE_SEP))) for field, values in + [feat.split(Morphology.FIELD_SEP) for feat in feats.split(Morphology.FEATURE_SEP)]} + + @staticmethod + def dict_to_feats(feats_dict): + if len(feats_dict) == 0: + return "" + return Morphology.FEATURE_SEP.join(sorted([Morphology.FIELD_SEP.join([field, Morphology.VALUE_SEP.join(sorted(values.split(Morphology.VALUE_SEP)))]) for field, values in feats_dict.items()])) + + @staticmethod + def list_to_feats(feats_list): + if len(feats_list) == 0: + return "" + feats_dict = {} + for feat in feats_list: + field, value = feat.split(Morphology.FIELD_SEP) + if field not in feats_dict: + feats_dict[field] = set() + feats_dict[field].add(value) + feats_dict = {field: Morphology.VALUE_SEP.join(sorted(values)) for field, values in feats_dict.items()} + return Morphology.dict_to_feats(feats_dict) -cpdef univ_pos_t get_int_tag(pos_): - return 0 - -cpdef intify_features(features): - return {get_string_id(feature) for feature in features} - -cdef hash_t hash_tag(MorphAnalysisC tag) nogil: - return mrmr.hash64(&tag, sizeof(tag), 0) +cdef int check_feature(const MorphAnalysisC* morph, attr_t feature) nogil: + cdef int i + for i in range(morph.length): + if morph.features[i] == feature: + return True + return False -cdef MorphAnalysisC create_rich_tag(features) except *: - cdef MorphAnalysisC tag - cdef attr_t feature - memset(&tag, 0, sizeof(tag)) - for feature in features: - field = FEATURE_FIELDS[FEATURE_NAMES[feature]] - set_feature(&tag, field, feature, 1) - return tag +cdef list list_features(const MorphAnalysisC* morph): + cdef int i + features = [] + for i in range(morph.length): + features.append(morph.features[i]) + return features -cdef tag_to_json(const MorphAnalysisC* tag): - return [FEATURE_NAMES[f] for f in list_features(tag)] +cdef np.ndarray get_by_field(const MorphAnalysisC* morph, attr_t field): + cdef np.ndarray results = numpy.zeros((morph.length,), dtype="uint64") + n = get_n_by_field(results.data, morph, field) + return results[:n] -cdef MorphAnalysisC tag_from_json(json_tag): - raise NotImplementedError - - -cdef list list_features(const MorphAnalysisC* tag): - output = [] - if tag.abbr != 0: - output.append(tag.abbr) - if tag.adp_type != 0: - output.append(tag.adp_type) - if tag.adv_type != 0: - output.append(tag.adv_type) - if tag.animacy != 0: - output.append(tag.animacy) - if tag.aspect != 0: - output.append(tag.aspect) - if tag.case != 0: - output.append(tag.case) - if tag.conj_type != 0: - output.append(tag.conj_type) - if tag.connegative != 0: - output.append(tag.connegative) - if tag.definite != 0: - output.append(tag.definite) - if tag.degree != 0: - output.append(tag.degree) - if tag.derivation != 0: - output.append(tag.derivation) - if tag.echo != 0: - output.append(tag.echo) - if tag.foreign != 0: - output.append(tag.foreign) - if tag.gender != 0: - output.append(tag.gender) - if tag.hyph != 0: - output.append(tag.hyph) - if tag.inf_form != 0: - output.append(tag.inf_form) - if tag.mood != 0: - output.append(tag.mood) - if tag.negative != 0: - output.append(tag.negative) - if tag.number != 0: - output.append(tag.number) - if tag.name_type != 0: - output.append(tag.name_type) - if tag.noun_type != 0: - output.append(tag.noun_type) - if tag.part_form != 0: - output.append(tag.part_form) - if tag.part_type != 0: - output.append(tag.part_type) - if tag.person != 0: - output.append(tag.person) - if tag.polite != 0: - output.append(tag.polite) - if tag.polarity != 0: - output.append(tag.polarity) - if tag.poss != 0: - output.append(tag.poss) - if tag.prefix != 0: - output.append(tag.prefix) - if tag.prep_case != 0: - output.append(tag.prep_case) - if tag.pron_type != 0: - output.append(tag.pron_type) - if tag.punct_type != 0: - output.append(tag.punct_type) - if tag.reflex != 0: - output.append(tag.reflex) - if tag.style != 0: - output.append(tag.style) - if tag.style_variant != 0: - output.append(tag.style_variant) - if tag.typo != 0: - output.append(tag.typo) - if tag.verb_form != 0: - output.append(tag.verb_form) - if tag.voice != 0: - output.append(tag.voice) - if tag.verb_type != 0: - output.append(tag.verb_type) - return output - - -cdef attr_t get_field(const MorphAnalysisC* tag, int field_id) nogil: - field = field_id - if field == Field_POS: - return tag.pos - if field == Field_Abbr: - return tag.abbr - elif field == Field_AdpType: - return tag.adp_type - elif field == Field_AdvType: - return tag.adv_type - elif field == Field_Animacy: - return tag.animacy - elif field == Field_Aspect: - return tag.aspect - elif field == Field_Case: - return tag.case - elif field == Field_ConjType: - return tag.conj_type - elif field == Field_Connegative: - return tag.connegative - elif field == Field_Definite: - return tag.definite - elif field == Field_Degree: - return tag.degree - elif field == Field_Derivation: - return tag.derivation - elif field == Field_Echo: - return tag.echo - elif field == Field_Foreign: - return tag.foreign - elif field == Field_Gender: - return tag.gender - elif field == Field_Hyph: - return tag.hyph - elif field == Field_InfForm: - return tag.inf_form - elif field == Field_Mood: - return tag.mood - elif field == Field_Negative: - return tag.negative - elif field == Field_Number: - return tag.number - elif field == Field_NameType: - return tag.name_type - elif field == Field_NounType: - return tag.noun_type - elif field == Field_NumForm: - return tag.num_form - elif field == Field_NumType: - return tag.num_type - elif field == Field_NumValue: - return tag.num_value - elif field == Field_PartForm: - return tag.part_form - elif field == Field_PartType: - return tag.part_type - elif field == Field_Person: - return tag.person - elif field == Field_Polite: - return tag.polite - elif field == Field_Polarity: - return tag.polarity - elif field == Field_Poss: - return tag.poss - elif field == Field_Prefix: - return tag.prefix - elif field == Field_PrepCase: - return tag.prep_case - elif field == Field_PronType: - return tag.pron_type - elif field == Field_PunctSide: - return tag.punct_side - elif field == Field_PunctType: - return tag.punct_type - elif field == Field_Reflex: - return tag.reflex - elif field == Field_Style: - return tag.style - elif field == Field_StyleVariant: - return tag.style_variant - elif field == Field_Tense: - return tag.tense - elif field == Field_Typo: - return tag.typo - elif field == Field_VerbForm: - return tag.verb_form - elif field == Field_Voice: - return tag.voice - elif field == Field_VerbType: - return tag.verb_type - else: - raise ValueError(Errors.E168.format(field=field_id)) - - -cdef int check_feature(const MorphAnalysisC* tag, attr_t feature) nogil: - if tag.abbr == feature: - return 1 - elif tag.adp_type == feature: - return 1 - elif tag.adv_type == feature: - return 1 - elif tag.animacy == feature: - return 1 - elif tag.aspect == feature: - return 1 - elif tag.case == feature: - return 1 - elif tag.conj_type == feature: - return 1 - elif tag.connegative == feature: - return 1 - elif tag.definite == feature: - return 1 - elif tag.degree == feature: - return 1 - elif tag.derivation == feature: - return 1 - elif tag.echo == feature: - return 1 - elif tag.foreign == feature: - return 1 - elif tag.gender == feature: - return 1 - elif tag.hyph == feature: - return 1 - elif tag.inf_form == feature: - return 1 - elif tag.mood == feature: - return 1 - elif tag.negative == feature: - return 1 - elif tag.number == feature: - return 1 - elif tag.name_type == feature: - return 1 - elif tag.noun_type == feature: - return 1 - elif tag.num_form == feature: - return 1 - elif tag.num_type == feature: - return 1 - elif tag.num_value == feature: - return 1 - elif tag.part_form == feature: - return 1 - elif tag.part_type == feature: - return 1 - elif tag.person == feature: - return 1 - elif tag.polite == feature: - return 1 - elif tag.polarity == feature: - return 1 - elif tag.poss == feature: - return 1 - elif tag.prefix == feature: - return 1 - elif tag.prep_case == feature: - return 1 - elif tag.pron_type == feature: - return 1 - elif tag.punct_side == feature: - return 1 - elif tag.punct_type == feature: - return 1 - elif tag.reflex == feature: - return 1 - elif tag.style == feature: - return 1 - elif tag.style_variant == feature: - return 1 - elif tag.tense == feature: - return 1 - elif tag.typo == feature: - return 1 - elif tag.verb_form == feature: - return 1 - elif tag.voice == feature: - return 1 - elif tag.verb_type == feature: - return 1 - else: - return 0 - -cdef int set_feature(MorphAnalysisC* tag, - univ_field_t field, attr_t feature, int value) except -1: - if value == True: - value_ = feature - else: - value_ = 0 - prev_value = get_field(tag, field) - if prev_value != 0 and value_ == 0 and field != Field_POS: - tag.length -= 1 - elif prev_value == 0 and value_ != 0 and field != Field_POS: - tag.length += 1 - if feature == 0: - pass - elif field == Field_POS: - tag.pos = get_string_id(FEATURE_NAMES[value_].split('_')[1]) - elif field == Field_Abbr: - tag.abbr = value_ - elif field == Field_AdpType: - tag.adp_type = value_ - elif field == Field_AdvType: - tag.adv_type = value_ - elif field == Field_Animacy: - tag.animacy = value_ - elif field == Field_Aspect: - tag.aspect = value_ - elif field == Field_Case: - tag.case = value_ - elif field == Field_ConjType: - tag.conj_type = value_ - elif field == Field_Connegative: - tag.connegative = value_ - elif field == Field_Definite: - tag.definite = value_ - elif field == Field_Degree: - tag.degree = value_ - elif field == Field_Derivation: - tag.derivation = value_ - elif field == Field_Echo: - tag.echo = value_ - elif field == Field_Foreign: - tag.foreign = value_ - elif field == Field_Gender: - tag.gender = value_ - elif field == Field_Hyph: - tag.hyph = value_ - elif field == Field_InfForm: - tag.inf_form = value_ - elif field == Field_Mood: - tag.mood = value_ - elif field == Field_Negative: - tag.negative = value_ - elif field == Field_Number: - tag.number = value_ - elif field == Field_NameType: - tag.name_type = value_ - elif field == Field_NounType: - tag.noun_type = value_ - elif field == Field_NumForm: - tag.num_form = value_ - elif field == Field_NumType: - tag.num_type = value_ - elif field == Field_NumValue: - tag.num_value = value_ - elif field == Field_PartForm: - tag.part_form = value_ - elif field == Field_PartType: - tag.part_type = value_ - elif field == Field_Person: - tag.person = value_ - elif field == Field_Polite: - tag.polite = value_ - elif field == Field_Polarity: - tag.polarity = value_ - elif field == Field_Poss: - tag.poss = value_ - elif field == Field_Prefix: - tag.prefix = value_ - elif field == Field_PrepCase: - tag.prep_case = value_ - elif field == Field_PronType: - tag.pron_type = value_ - elif field == Field_PunctSide: - tag.punct_side = value_ - elif field == Field_PunctType: - tag.punct_type = value_ - elif field == Field_Reflex: - tag.reflex = value_ - elif field == Field_Style: - tag.style = value_ - elif field == Field_StyleVariant: - tag.style_variant = value_ - elif field == Field_Tense: - tag.tense = value_ - elif field == Field_Typo: - tag.typo = value_ - elif field == Field_VerbForm: - tag.verb_form = value_ - elif field == Field_Voice: - tag.voice = value_ - elif field == Field_VerbType: - tag.verb_type = value_ - else: - raise ValueError(Errors.E167.format(field=FEATURE_NAMES.get(feature), field_id=feature)) - - -FIELDS = { - 'POS': Field_POS, - 'Abbr': Field_Abbr, - 'AdpType': Field_AdpType, - 'AdvType': Field_AdvType, - 'Animacy': Field_Animacy, - 'Aspect': Field_Aspect, - 'Case': Field_Case, - 'ConjType': Field_ConjType, - 'Connegative': Field_Connegative, - 'Definite': Field_Definite, - 'Degree': Field_Degree, - 'Derivation': Field_Derivation, - 'Echo': Field_Echo, - 'Foreign': Field_Foreign, - 'Gender': Field_Gender, - 'Hyph': Field_Hyph, - 'InfForm': Field_InfForm, - 'Mood': Field_Mood, - 'NameType': Field_NameType, - 'Negative': Field_Negative, - 'NounType': Field_NounType, - 'Number': Field_Number, - 'NumForm': Field_NumForm, - 'NumType': Field_NumType, - 'NumValue': Field_NumValue, - 'PartForm': Field_PartForm, - 'PartType': Field_PartType, - 'Person': Field_Person, - 'Polite': Field_Polite, - 'Polarity': Field_Polarity, - 'Poss': Field_Poss, - 'Prefix': Field_Prefix, - 'PrepCase': Field_PrepCase, - 'PronType': Field_PronType, - 'PunctSide': Field_PunctSide, - 'PunctType': Field_PunctType, - 'Reflex': Field_Reflex, - 'Style': Field_Style, - 'StyleVariant': Field_StyleVariant, - 'Tense': Field_Tense, - 'Typo': Field_Typo, - 'VerbForm': Field_VerbForm, - 'VerbType': Field_VerbType, - 'Voice': Field_Voice, -} - -LOWER_FIELDS = { - 'pos': Field_POS, - 'abbr': Field_Abbr, - 'adp_type': Field_AdpType, - 'adv_type': Field_AdvType, - 'animacy': Field_Animacy, - 'aspect': Field_Aspect, - 'case': Field_Case, - 'conj_type': Field_ConjType, - 'connegative': Field_Connegative, - 'definite': Field_Definite, - 'degree': Field_Degree, - 'derivation': Field_Derivation, - 'echo': Field_Echo, - 'foreign': Field_Foreign, - 'gender': Field_Gender, - 'hyph': Field_Hyph, - 'inf_form': Field_InfForm, - 'mood': Field_Mood, - 'name_type': Field_NameType, - 'negative': Field_Negative, - 'noun_type': Field_NounType, - 'number': Field_Number, - 'num_form': Field_NumForm, - 'num_type': Field_NumType, - 'num_value': Field_NumValue, - 'part_form': Field_PartForm, - 'part_type': Field_PartType, - 'person': Field_Person, - 'polarity': Field_Polarity, - 'polite': Field_Polite, - 'poss': Field_Poss, - 'prefix': Field_Prefix, - 'prep_case': Field_PrepCase, - 'pron_type': Field_PronType, - 'punct_side': Field_PunctSide, - 'punct_type': Field_PunctType, - 'reflex': Field_Reflex, - 'style': Field_Style, - 'style_variant': Field_StyleVariant, - 'tense': Field_Tense, - 'typo': Field_Typo, - 'verb_form': Field_VerbForm, - 'verb_type': Field_VerbType, - 'voice': Field_Voice, -} - - -FEATURES = [ - "POS_ADJ", - "POS_ADP", - "POS_ADV", - "POS_AUX", - "POS_CONJ", - "POS_CCONJ", - "POS_DET", - "POS_INTJ", - "POS_NOUN", - "POS_NUM", - "POS_PART", - "POS_PRON", - "POS_PROPN", - "POS_PUNCT", - "POS_SCONJ", - "POS_SYM", - "POS_VERB", - "POS_X", - "POS_EOL", - "POS_SPACE", - "Abbr_yes", - "AdpType_circ", - "AdpType_comprep", - "AdpType_prep", - "AdpType_post", - "AdpType_voc", - "AdvType_adadj", - "AdvType_cau", - "AdvType_deg", - "AdvType_ex", - "AdvType_loc", - "AdvType_man", - "AdvType_mod", - "AdvType_sta", - "AdvType_tim", - "Animacy_anim", - "Animacy_hum", - "Animacy_inan", - "Animacy_nhum", - "Aspect_hab", - "Aspect_imp", - "Aspect_iter", - "Aspect_perf", - "Aspect_prog", - "Aspect_prosp", - "Aspect_none", - "Case_abe", - "Case_abl", - "Case_abs", - "Case_acc", - "Case_ade", - "Case_all", - "Case_cau", - "Case_com", - "Case_dat", - "Case_del", - "Case_dis", - "Case_ela", - "Case_ess", - "Case_gen", - "Case_ill", - "Case_ine", - "Case_ins", - "Case_loc", - "Case_lat", - "Case_nom", - "Case_par", - "Case_sub", - "Case_sup", - "Case_tem", - "Case_ter", - "Case_tra", - "Case_voc", - "ConjType_comp", - "ConjType_oper", - "Connegative_yes", - "Definite_cons", - "Definite_def", - "Definite_ind", - "Definite_red", - "Definite_two", - "Degree_abs", - "Degree_cmp", - "Degree_comp", - "Degree_none", - "Degree_pos", - "Degree_sup", - "Degree_com", - "Degree_dim", - "Derivation_minen", - "Derivation_sti", - "Derivation_inen", - "Derivation_lainen", - "Derivation_ja", - "Derivation_ton", - "Derivation_vs", - "Derivation_ttain", - "Derivation_ttaa", - "Echo_rdp", - "Echo_ech", - "Foreign_foreign", - "Foreign_fscript", - "Foreign_tscript", - "Foreign_yes", - "Gender_com", - "Gender_fem", - "Gender_masc", - "Gender_neut", - "Gender_dat_masc", - "Gender_dat_fem", - "Gender_erg_masc", - "Gender_erg_fem", - "Gender_psor_masc", - "Gender_psor_fem", - "Gender_psor_neut", - "Hyph_yes", - "InfForm_one", - "InfForm_two", - "InfForm_three", - "Mood_cnd", - "Mood_imp", - "Mood_ind", - "Mood_n", - "Mood_pot", - "Mood_sub", - "Mood_opt", - "NameType_geo", - "NameType_prs", - "NameType_giv", - "NameType_sur", - "NameType_nat", - "NameType_com", - "NameType_pro", - "NameType_oth", - "Negative_neg", - "Negative_pos", - "Negative_yes", - "NounType_com", - "NounType_prop", - "NounType_class", - "Number_com", - "Number_dual", - "Number_none", - "Number_plur", - "Number_sing", - "Number_ptan", - "Number_count", - "Number_abs_sing", - "Number_abs_plur", - "Number_dat_sing", - "Number_dat_plur", - "Number_erg_sing", - "Number_erg_plur", - "Number_psee_sing", - "Number_psee_plur", - "Number_psor_sing", - "Number_psor_plur", - "NumForm_digit", - "NumForm_roman", - "NumForm_word", - "NumForm_combi", - "NumType_card", - "NumType_dist", - "NumType_frac", - "NumType_gen", - "NumType_mult", - "NumType_none", - "NumType_ord", - "NumType_sets", - "NumType_dual", - "NumValue_one", - "NumValue_two", - "NumValue_three", - "PartForm_pres", - "PartForm_past", - "PartForm_agt", - "PartForm_neg", - "PartType_mod", - "PartType_emp", - "PartType_res", - "PartType_inf", - "PartType_vbp", - "Person_one", - "Person_two", - "Person_three", - "Person_none", - "Person_abs_one", - "Person_abs_two", - "Person_abs_three", - "Person_dat_one", - "Person_dat_two", - "Person_dat_three", - "Person_erg_one", - "Person_erg_two", - "Person_erg_three", - "Person_psor_one", - "Person_psor_two", - "Person_psor_three", - "Polarity_neg", - "Polarity_pos", - "Polite_inf", - "Polite_pol", - "Polite_abs_inf", - "Polite_abs_pol", - "Polite_erg_inf", - "Polite_erg_pol", - "Polite_dat_inf", - "Polite_dat_pol", - "Poss_yes", - "Prefix_yes", - "PrepCase_npr", - "PrepCase_pre", - "PronType_advPart", - "PronType_art", - "PronType_default", - "PronType_dem", - "PronType_ind", - "PronType_int", - "PronType_neg", - "PronType_prs", - "PronType_rcp", - "PronType_rel", - "PronType_tot", - "PronType_clit", - "PronType_exc", - "PunctSide_ini", - "PunctSide_fin", - "PunctType_peri", - "PunctType_qest", - "PunctType_excl", - "PunctType_quot", - "PunctType_brck", - "PunctType_comm", - "PunctType_colo", - "PunctType_semi", - "PunctType_dash", - "Reflex_yes", - "Style_arch", - "Style_rare", - "Style_poet", - "Style_norm", - "Style_coll", - "Style_vrnc", - "Style_sing", - "Style_expr", - "Style_derg", - "Style_vulg", - "Style_yes", - "StyleVariant_styleShort", - "StyleVariant_styleBound", - "Tense_fut", - "Tense_imp", - "Tense_past", - "Tense_pres", - "Typo_yes", - "VerbForm_fin", - "VerbForm_ger", - "VerbForm_inf", - "VerbForm_none", - "VerbForm_part", - "VerbForm_partFut", - "VerbForm_partPast", - "VerbForm_partPres", - "VerbForm_sup", - "VerbForm_trans", - "VerbForm_conv", - "VerbForm_gdv", - "VerbType_aux", - "VerbType_cop", - "VerbType_mod", - "VerbType_light", - "Voice_act", - "Voice_cau", - "Voice_pass", - "Voice_mid", - "Voice_int", -] - -FEATURE_NAMES = {get_string_id(f): f for f in FEATURES} -FEATURE_FIELDS = {f: FIELDS[f.split('_', 1)[0]] for f in FEATURES} +cdef int get_n_by_field(attr_t* results, const MorphAnalysisC* morph, attr_t field) nogil: + cdef int n_results = 0 + cdef int i + for i in range(morph.length): + if morph.fields[i] == field: + results[n_results] = morph.features[i] + n_results += 1 + return n_results diff --git a/spacy/structs.pxd b/spacy/structs.pxd index b3878db3f..259fd657d 100644 --- a/spacy/structs.pxd +++ b/spacy/structs.pxd @@ -82,52 +82,11 @@ cdef struct TokenC: cdef struct MorphAnalysisC: - univ_pos_t pos + hash_t key int length - - attr_t abbr - attr_t adp_type - attr_t adv_type - attr_t animacy - attr_t aspect - attr_t case - attr_t conj_type - attr_t connegative - attr_t definite - attr_t degree - attr_t derivation - attr_t echo - attr_t foreign - attr_t gender - attr_t hyph - attr_t inf_form - attr_t mood - attr_t negative - attr_t number - attr_t name_type - attr_t noun_type - attr_t num_form - attr_t num_type - attr_t num_value - attr_t part_form - attr_t part_type - attr_t person - attr_t polite - attr_t polarity - attr_t poss - attr_t prefix - attr_t prep_case - attr_t pron_type - attr_t punct_side - attr_t punct_type - attr_t reflex - attr_t style - attr_t style_variant - attr_t tense - attr_t typo - attr_t verb_form - attr_t voice - attr_t verb_type + attr_t* fields + attr_t* features + # Internal struct, for storage and disambiguation of entities. cdef struct KBEntryC: diff --git a/spacy/symbols.pxd b/spacy/symbols.pxd index 5922ee588..b95b4b805 100644 --- a/spacy/symbols.pxd +++ b/spacy/symbols.pxd @@ -108,282 +108,282 @@ cdef enum symbol_t: EOL SPACE - Animacy_anim - Animacy_inan - Animacy_hum # U20 - Animacy_nhum - Aspect_freq - Aspect_imp - Aspect_mod - Aspect_none - Aspect_perf - Aspect_iter # U20 - Aspect_hab # U20 - Case_abe - Case_abl - Case_abs - Case_acc - Case_ade - Case_all - Case_cau - Case_com - Case_cmp # U20 - Case_dat - Case_del - Case_dis - Case_ela - Case_equ # U20 - Case_ess - Case_gen - Case_ill - Case_ine - Case_ins - Case_loc - Case_lat - Case_nom - Case_par - Case_sub - Case_sup - Case_tem - Case_ter - Case_tra - Case_voc - Definite_two - Definite_def - Definite_red - Definite_cons # U20 - Definite_ind - Definite_spec # U20 - Degree_cmp - Degree_comp - Degree_none - Degree_pos - Degree_sup - Degree_abs - Degree_com - Degree_dim # du - Degree_equ # U20 - Evident_nfh # U20 - Gender_com - Gender_fem - Gender_masc - Gender_neut - Mood_cnd - Mood_imp - Mood_ind - Mood_n - Mood_pot - Mood_sub - Mood_opt - Mood_prp # U20 - Mood_adm # U20 - Negative_neg - Negative_pos - Negative_yes - Polarity_neg # U20 - Polarity_pos # U20 - Number_com - Number_dual - Number_none - Number_plur - Number_sing - Number_ptan # bg - Number_count # bg, U20 - Number_tri # U20 - NumType_card - NumType_dist - NumType_frac - NumType_gen - NumType_mult - NumType_none - NumType_ord - NumType_sets - Person_one - Person_two - Person_three - Person_none - Poss_yes - PronType_advPart - PronType_art - PronType_default - PronType_dem - PronType_ind - PronType_int - PronType_neg - PronType_prs - PronType_rcp - PronType_rel - PronType_tot - PronType_clit - PronType_exc # es, ca, it, fa, U20 - PronType_emp # U20 - Reflex_yes - Tense_fut - Tense_imp - Tense_past - Tense_pres - VerbForm_fin - VerbForm_ger - VerbForm_inf - VerbForm_none - VerbForm_part - VerbForm_partFut - VerbForm_partPast - VerbForm_partPres - VerbForm_sup - VerbForm_trans - VerbForm_conv # U20 - VerbForm_gdv # la - VerbForm_vnoun # U20 - Voice_act - Voice_cau - Voice_pass - Voice_mid # gkc, U20 - Voice_int # hb - Voice_antip # U20 - Voice_dir # U20 - Voice_inv # U20 - Abbr_yes # cz, fi, sl, U - AdpType_prep # cz, U - AdpType_post # U - AdpType_voc # cz - AdpType_comprep # cz - AdpType_circ # U - AdvType_man - AdvType_loc - AdvType_tim - AdvType_deg - AdvType_cau - AdvType_mod - AdvType_sta - AdvType_ex - AdvType_adadj - ConjType_oper # cz, U - ConjType_comp # cz, U - Connegative_yes # fi - Derivation_minen # fi - Derivation_sti # fi - Derivation_inen # fi - Derivation_lainen # fi - Derivation_ja # fi - Derivation_ton # fi - Derivation_vs # fi - Derivation_ttain # fi - Derivation_ttaa # fi - Echo_rdp # U - Echo_ech # U - Foreign_foreign # cz, fi, U - Foreign_fscript # cz, fi, U - Foreign_tscript # cz, U - Foreign_yes # sl - Gender_dat_masc # bq, U - Gender_dat_fem # bq, U - Gender_erg_masc # bq - Gender_erg_fem # bq - Gender_psor_masc # cz, sl, U - Gender_psor_fem # cz, sl, U - Gender_psor_neut # sl - Hyph_yes # cz, U - InfForm_one # fi - InfForm_two # fi - InfForm_three # fi - NameType_geo # U, cz - NameType_prs # U, cz - NameType_giv # U, cz - NameType_sur # U, cz - NameType_nat # U, cz - NameType_com # U, cz - NameType_pro # U, cz - NameType_oth # U, cz - NounType_com # U - NounType_prop # U - NounType_class # U - Number_abs_sing # bq, U - Number_abs_plur # bq, U - Number_dat_sing # bq, U - Number_dat_plur # bq, U - Number_erg_sing # bq, U - Number_erg_plur # bq, U - Number_psee_sing # U - Number_psee_plur # U - Number_psor_sing # cz, fi, sl, U - Number_psor_plur # cz, fi, sl, U - Number_pauc # U20 - Number_grpa # U20 - Number_grpl # U20 - Number_inv # U20 - NumForm_digit # cz, sl, U - NumForm_roman # cz, sl, U - NumForm_word # cz, sl, U - NumValue_one # cz, U - NumValue_two # cz, U - NumValue_three # cz, U - PartForm_pres # fi - PartForm_past # fi - PartForm_agt # fi - PartForm_neg # fi - PartType_mod # U - PartType_emp # U - PartType_res # U - PartType_inf # U - PartType_vbp # U - Person_abs_one # bq, U - Person_abs_two # bq, U - Person_abs_three # bq, U - Person_dat_one # bq, U - Person_dat_two # bq, U - Person_dat_three # bq, U - Person_erg_one # bq, U - Person_erg_two # bq, U - Person_erg_three # bq, U - Person_psor_one # fi, U - Person_psor_two # fi, U - Person_psor_three # fi, U - Person_zero # U20 - Person_four # U20 - Polite_inf # bq, U - Polite_pol # bq, U - Polite_abs_inf # bq, U - Polite_abs_pol # bq, U - Polite_erg_inf # bq, U - Polite_erg_pol # bq, U - Polite_dat_inf # bq, U - Polite_dat_pol # bq, U - Polite_infm # U20 - Polite_form # U20 - Polite_form_elev # U20 - Polite_form_humb # U20 - Prefix_yes # U - PrepCase_npr # cz - PrepCase_pre # U - PunctSide_ini # U - PunctSide_fin # U - PunctType_peri # U - PunctType_qest # U - PunctType_excl # U - PunctType_quot # U - PunctType_brck # U - PunctType_comm # U - PunctType_colo # U - PunctType_semi # U - PunctType_dash # U - Style_arch # cz, fi, U - Style_rare # cz, fi, U - Style_poet # cz, U - Style_norm # cz, U - Style_coll # cz, U - Style_vrnc # cz, U - Style_sing # cz, U - Style_expr # cz, U - Style_derg # cz, U - Style_vulg # cz, U - Style_yes # fi, U - StyleVariant_styleShort # cz - StyleVariant_styleBound # cz, sl - VerbType_aux # U - VerbType_cop # U - VerbType_mod # U - VerbType_light # U + DEPRECATED001 + DEPRECATED002 + DEPRECATED003 + DEPRECATED004 + DEPRECATED005 + DEPRECATED006 + DEPRECATED007 + DEPRECATED008 + DEPRECATED009 + DEPRECATED010 + DEPRECATED011 + DEPRECATED012 + DEPRECATED013 + DEPRECATED014 + DEPRECATED015 + DEPRECATED016 + DEPRECATED017 + DEPRECATED018 + DEPRECATED019 + DEPRECATED020 + DEPRECATED021 + DEPRECATED022 + DEPRECATED023 + DEPRECATED024 + DEPRECATED025 + DEPRECATED026 + DEPRECATED027 + DEPRECATED028 + DEPRECATED029 + DEPRECATED030 + DEPRECATED031 + DEPRECATED032 + DEPRECATED033 + DEPRECATED034 + DEPRECATED035 + DEPRECATED036 + DEPRECATED037 + DEPRECATED038 + DEPRECATED039 + DEPRECATED040 + DEPRECATED041 + DEPRECATED042 + DEPRECATED043 + DEPRECATED044 + DEPRECATED045 + DEPRECATED046 + DEPRECATED047 + DEPRECATED048 + DEPRECATED049 + DEPRECATED050 + DEPRECATED051 + DEPRECATED052 + DEPRECATED053 + DEPRECATED054 + DEPRECATED055 + DEPRECATED056 + DEPRECATED057 + DEPRECATED058 + DEPRECATED059 + DEPRECATED060 + DEPRECATED061 + DEPRECATED062 + DEPRECATED063 + DEPRECATED064 + DEPRECATED065 + DEPRECATED066 + DEPRECATED067 + DEPRECATED068 + DEPRECATED069 + DEPRECATED070 + DEPRECATED071 + DEPRECATED072 + DEPRECATED073 + DEPRECATED074 + DEPRECATED075 + DEPRECATED076 + DEPRECATED077 + DEPRECATED078 + DEPRECATED079 + DEPRECATED080 + DEPRECATED081 + DEPRECATED082 + DEPRECATED083 + DEPRECATED084 + DEPRECATED085 + DEPRECATED086 + DEPRECATED087 + DEPRECATED088 + DEPRECATED089 + DEPRECATED090 + DEPRECATED091 + DEPRECATED092 + DEPRECATED093 + DEPRECATED094 + DEPRECATED095 + DEPRECATED096 + DEPRECATED097 + DEPRECATED098 + DEPRECATED099 + DEPRECATED100 + DEPRECATED101 + DEPRECATED102 + DEPRECATED103 + DEPRECATED104 + DEPRECATED105 + DEPRECATED106 + DEPRECATED107 + DEPRECATED108 + DEPRECATED109 + DEPRECATED110 + DEPRECATED111 + DEPRECATED112 + DEPRECATED113 + DEPRECATED114 + DEPRECATED115 + DEPRECATED116 + DEPRECATED117 + DEPRECATED118 + DEPRECATED119 + DEPRECATED120 + DEPRECATED121 + DEPRECATED122 + DEPRECATED123 + DEPRECATED124 + DEPRECATED125 + DEPRECATED126 + DEPRECATED127 + DEPRECATED128 + DEPRECATED129 + DEPRECATED130 + DEPRECATED131 + DEPRECATED132 + DEPRECATED133 + DEPRECATED134 + DEPRECATED135 + DEPRECATED136 + DEPRECATED137 + DEPRECATED138 + DEPRECATED139 + DEPRECATED140 + DEPRECATED141 + DEPRECATED142 + DEPRECATED143 + DEPRECATED144 + DEPRECATED145 + DEPRECATED146 + DEPRECATED147 + DEPRECATED148 + DEPRECATED149 + DEPRECATED150 + DEPRECATED151 + DEPRECATED152 + DEPRECATED153 + DEPRECATED154 + DEPRECATED155 + DEPRECATED156 + DEPRECATED157 + DEPRECATED158 + DEPRECATED159 + DEPRECATED160 + DEPRECATED161 + DEPRECATED162 + DEPRECATED163 + DEPRECATED164 + DEPRECATED165 + DEPRECATED166 + DEPRECATED167 + DEPRECATED168 + DEPRECATED169 + DEPRECATED170 + DEPRECATED171 + DEPRECATED172 + DEPRECATED173 + DEPRECATED174 + DEPRECATED175 + DEPRECATED176 + DEPRECATED177 + DEPRECATED178 + DEPRECATED179 + DEPRECATED180 + DEPRECATED181 + DEPRECATED182 + DEPRECATED183 + DEPRECATED184 + DEPRECATED185 + DEPRECATED186 + DEPRECATED187 + DEPRECATED188 + DEPRECATED189 + DEPRECATED190 + DEPRECATED191 + DEPRECATED192 + DEPRECATED193 + DEPRECATED194 + DEPRECATED195 + DEPRECATED196 + DEPRECATED197 + DEPRECATED198 + DEPRECATED199 + DEPRECATED200 + DEPRECATED201 + DEPRECATED202 + DEPRECATED203 + DEPRECATED204 + DEPRECATED205 + DEPRECATED206 + DEPRECATED207 + DEPRECATED208 + DEPRECATED209 + DEPRECATED210 + DEPRECATED211 + DEPRECATED212 + DEPRECATED213 + DEPRECATED214 + DEPRECATED215 + DEPRECATED216 + DEPRECATED217 + DEPRECATED218 + DEPRECATED219 + DEPRECATED220 + DEPRECATED221 + DEPRECATED222 + DEPRECATED223 + DEPRECATED224 + DEPRECATED225 + DEPRECATED226 + DEPRECATED227 + DEPRECATED228 + DEPRECATED229 + DEPRECATED230 + DEPRECATED231 + DEPRECATED232 + DEPRECATED233 + DEPRECATED234 + DEPRECATED235 + DEPRECATED236 + DEPRECATED237 + DEPRECATED238 + DEPRECATED239 + DEPRECATED240 + DEPRECATED241 + DEPRECATED242 + DEPRECATED243 + DEPRECATED244 + DEPRECATED245 + DEPRECATED246 + DEPRECATED247 + DEPRECATED248 + DEPRECATED249 + DEPRECATED250 + DEPRECATED251 + DEPRECATED252 + DEPRECATED253 + DEPRECATED254 + DEPRECATED255 + DEPRECATED256 + DEPRECATED257 + DEPRECATED258 + DEPRECATED259 + DEPRECATED260 + DEPRECATED261 + DEPRECATED262 + DEPRECATED263 + DEPRECATED264 + DEPRECATED265 + DEPRECATED266 + DEPRECATED267 + DEPRECATED268 + DEPRECATED269 + DEPRECATED270 + DEPRECATED271 + DEPRECATED272 + DEPRECATED273 + DEPRECATED274 + DEPRECATED275 + DEPRECATED276 PERSON NORP diff --git a/spacy/symbols.pyx b/spacy/symbols.pyx index 85f23ccbc..36b9ffa67 100644 --- a/spacy/symbols.pyx +++ b/spacy/symbols.pyx @@ -110,282 +110,282 @@ IDS = { "EOL": EOL, "SPACE": SPACE, - "Animacy_anim": Animacy_anim, - "Animacy_inam": Animacy_inan, - "Animacy_hum": Animacy_hum, # U20 - "Animacy_nhum": Animacy_nhum, - "Aspect_freq": Aspect_freq, - "Aspect_imp": Aspect_imp, - "Aspect_mod": Aspect_mod, - "Aspect_none": Aspect_none, - "Aspect_perf": Aspect_perf, - "Aspect_iter": Aspect_iter, # U20 - "Aspect_hab": Aspect_hab, # U20 - "Case_abe": Case_abe, - "Case_abl": Case_abl, - "Case_abs": Case_abs, - "Case_acc": Case_acc, - "Case_ade": Case_ade, - "Case_all": Case_all, - "Case_cau": Case_cau, - "Case_com": Case_com, - "Case_cmp": Case_cmp, # U20 - "Case_dat": Case_dat, - "Case_del": Case_del, - "Case_dis": Case_dis, - "Case_ela": Case_ela, - "Case_equ": Case_equ, # U20 - "Case_ess": Case_ess, - "Case_gen": Case_gen, - "Case_ill": Case_ill, - "Case_ine": Case_ine, - "Case_ins": Case_ins, - "Case_loc": Case_loc, - "Case_lat": Case_lat, - "Case_nom": Case_nom, - "Case_par": Case_par, - "Case_sub": Case_sub, - "Case_sup": Case_sup, - "Case_tem": Case_tem, - "Case_ter": Case_ter, - "Case_tra": Case_tra, - "Case_voc": Case_voc, - "Definite_two": Definite_two, - "Definite_def": Definite_def, - "Definite_red": Definite_red, - "Definite_cons": Definite_cons, # U20 - "Definite_ind": Definite_ind, - "Definite_spec": Definite_spec, # U20 - "Degree_cmp": Degree_cmp, - "Degree_comp": Degree_comp, - "Degree_none": Degree_none, - "Degree_pos": Degree_pos, - "Degree_sup": Degree_sup, - "Degree_abs": Degree_abs, - "Degree_com": Degree_com, - "Degree_dim": Degree_dim, # du - "Degree_equ": Degree_equ, # U20 - "Evident_nfh": Evident_nfh, # U20 - "Gender_com": Gender_com, - "Gender_fem": Gender_fem, - "Gender_masc": Gender_masc, - "Gender_neut": Gender_neut, - "Mood_cnd": Mood_cnd, - "Mood_imp": Mood_imp, - "Mood_ind": Mood_ind, - "Mood_n": Mood_n, - "Mood_pot": Mood_pot, - "Mood_sub": Mood_sub, - "Mood_opt": Mood_opt, - "Mood_prp": Mood_prp, # U20 - "Mood_adm": Mood_adm, # U20 - "Negative_neg": Negative_neg, - "Negative_pos": Negative_pos, - "Negative_yes": Negative_yes, - "Polarity_neg": Polarity_neg, # U20 - "Polarity_pos": Polarity_pos, # U20 - "Number_com": Number_com, - "Number_dual": Number_dual, - "Number_none": Number_none, - "Number_plur": Number_plur, - "Number_sing": Number_sing, - "Number_ptan": Number_ptan, # bg - "Number_count": Number_count, # bg, U20 - "Number_tri": Number_tri, # U20 - "NumType_card": NumType_card, - "NumType_dist": NumType_dist, - "NumType_frac": NumType_frac, - "NumType_gen": NumType_gen, - "NumType_mult": NumType_mult, - "NumType_none": NumType_none, - "NumType_ord": NumType_ord, - "NumType_sets": NumType_sets, - "Person_one": Person_one, - "Person_two": Person_two, - "Person_three": Person_three, - "Person_none": Person_none, - "Poss_yes": Poss_yes, - "PronType_advPart": PronType_advPart, - "PronType_art": PronType_art, - "PronType_default": PronType_default, - "PronType_dem": PronType_dem, - "PronType_ind": PronType_ind, - "PronType_int": PronType_int, - "PronType_neg": PronType_neg, - "PronType_prs": PronType_prs, - "PronType_rcp": PronType_rcp, - "PronType_rel": PronType_rel, - "PronType_tot": PronType_tot, - "PronType_clit": PronType_clit, - "PronType_exc": PronType_exc, # es, ca, it, fa, U20 - "PronType_emp": PronType_emp, # U20 - "Reflex_yes": Reflex_yes, - "Tense_fut": Tense_fut, - "Tense_imp": Tense_imp, - "Tense_past": Tense_past, - "Tense_pres": Tense_pres, - "VerbForm_fin": VerbForm_fin, - "VerbForm_ger": VerbForm_ger, - "VerbForm_inf": VerbForm_inf, - "VerbForm_none": VerbForm_none, - "VerbForm_part": VerbForm_part, - "VerbForm_partFut": VerbForm_partFut, - "VerbForm_partPast": VerbForm_partPast, - "VerbForm_partPres": VerbForm_partPres, - "VerbForm_sup": VerbForm_sup, - "VerbForm_trans": VerbForm_trans, - "VerbForm_conv": VerbForm_conv, # U20 - "VerbForm_gdv": VerbForm_gdv, # la, - "VerbForm_vnoun": VerbForm_vnoun, # U20 - "Voice_act": Voice_act, - "Voice_cau": Voice_cau, - "Voice_pass": Voice_pass, - "Voice_mid": Voice_mid, # gkc, U20 - "Voice_int": Voice_int, # hb, - "Voice_antip": Voice_antip, # U20 - "Voice_dir": Voice_dir, # U20 - "Voice_inv": Voice_inv, # U20 - "Abbr_yes": Abbr_yes, # cz, fi, sl, U, - "AdpType_prep": AdpType_prep, # cz, U, - "AdpType_post": AdpType_post, # U, - "AdpType_voc": AdpType_voc, # cz, - "AdpType_comprep": AdpType_comprep, # cz, - "AdpType_circ": AdpType_circ, # U, - "AdvType_man": AdvType_man, - "AdvType_loc": AdvType_loc, - "AdvType_tim": AdvType_tim, - "AdvType_deg": AdvType_deg, - "AdvType_cau": AdvType_cau, - "AdvType_mod": AdvType_mod, - "AdvType_sta": AdvType_sta, - "AdvType_ex": AdvType_ex, - "AdvType_adadj": AdvType_adadj, - "ConjType_oper": ConjType_oper, # cz, U, - "ConjType_comp": ConjType_comp, # cz, U, - "Connegative_yes": Connegative_yes, # fi, - "Derivation_minen": Derivation_minen, # fi, - "Derivation_sti": Derivation_sti, # fi, - "Derivation_inen": Derivation_inen, # fi, - "Derivation_lainen": Derivation_lainen, # fi, - "Derivation_ja": Derivation_ja, # fi, - "Derivation_ton": Derivation_ton, # fi, - "Derivation_vs": Derivation_vs, # fi, - "Derivation_ttain": Derivation_ttain, # fi, - "Derivation_ttaa": Derivation_ttaa, # fi, - "Echo_rdp": Echo_rdp, # U, - "Echo_ech": Echo_ech, # U, - "Foreign_foreign": Foreign_foreign, # cz, fi, U, - "Foreign_fscript": Foreign_fscript, # cz, fi, U, - "Foreign_tscript": Foreign_tscript, # cz, U, - "Foreign_yes": Foreign_yes, # sl, - "Gender_dat_masc": Gender_dat_masc, # bq, U, - "Gender_dat_fem": Gender_dat_fem, # bq, U, - "Gender_erg_masc": Gender_erg_masc, # bq, - "Gender_erg_fem": Gender_erg_fem, # bq, - "Gender_psor_masc": Gender_psor_masc, # cz, sl, U, - "Gender_psor_fem": Gender_psor_fem, # cz, sl, U, - "Gender_psor_neut": Gender_psor_neut, # sl, - "Hyph_yes": Hyph_yes, # cz, U, - "InfForm_one": InfForm_one, # fi, - "InfForm_two": InfForm_two, # fi, - "InfForm_three": InfForm_three, # fi, - "NameType_geo": NameType_geo, # U, cz, - "NameType_prs": NameType_prs, # U, cz, - "NameType_giv": NameType_giv, # U, cz, - "NameType_sur": NameType_sur, # U, cz, - "NameType_nat": NameType_nat, # U, cz, - "NameType_com": NameType_com, # U, cz, - "NameType_pro": NameType_pro, # U, cz, - "NameType_oth": NameType_oth, # U, cz, - "NounType_com": NounType_com, # U, - "NounType_prop": NounType_prop, # U, - "NounType_class": NounType_class, # U, - "Number_abs_sing": Number_abs_sing, # bq, U, - "Number_abs_plur": Number_abs_plur, # bq, U, - "Number_dat_sing": Number_dat_sing, # bq, U, - "Number_dat_plur": Number_dat_plur, # bq, U, - "Number_erg_sing": Number_erg_sing, # bq, U, - "Number_erg_plur": Number_erg_plur, # bq, U, - "Number_psee_sing": Number_psee_sing, # U, - "Number_psee_plur": Number_psee_plur, # U, - "Number_psor_sing": Number_psor_sing, # cz, fi, sl, U, - "Number_psor_plur": Number_psor_plur, # cz, fi, sl, U, - "Number_pauc": Number_pauc, # U20 - "Number_grpa": Number_grpa, # U20 - "Number_grpl": Number_grpl, # U20 - "Number_inv": Number_inv, # U20 - "NumForm_digit": NumForm_digit, # cz, sl, U, - "NumForm_roman": NumForm_roman, # cz, sl, U, - "NumForm_word": NumForm_word, # cz, sl, U, - "NumValue_one": NumValue_one, # cz, U, - "NumValue_two": NumValue_two, # cz, U, - "NumValue_three": NumValue_three, # cz, U, - "PartForm_pres": PartForm_pres, # fi, - "PartForm_past": PartForm_past, # fi, - "PartForm_agt": PartForm_agt, # fi, - "PartForm_neg": PartForm_neg, # fi, - "PartType_mod": PartType_mod, # U, - "PartType_emp": PartType_emp, # U, - "PartType_res": PartType_res, # U, - "PartType_inf": PartType_inf, # U, - "PartType_vbp": PartType_vbp, # U, - "Person_abs_one": Person_abs_one, # bq, U, - "Person_abs_two": Person_abs_two, # bq, U, - "Person_abs_three": Person_abs_three, # bq, U, - "Person_dat_one": Person_dat_one, # bq, U, - "Person_dat_two": Person_dat_two, # bq, U, - "Person_dat_three": Person_dat_three, # bq, U, - "Person_erg_one": Person_erg_one, # bq, U, - "Person_erg_two": Person_erg_two, # bq, U, - "Person_erg_three": Person_erg_three, # bq, U, - "Person_psor_one": Person_psor_one, # fi, U, - "Person_psor_two": Person_psor_two, # fi, U, - "Person_psor_three": Person_psor_three, # fi, U, - "Person_zero": Person_zero, # U20 - "Person_four": Person_four, # U20 - "Polite_inf": Polite_inf, # bq, U, - "Polite_pol": Polite_pol, # bq, U, - "Polite_abs_inf": Polite_abs_inf, # bq, U, - "Polite_abs_pol": Polite_abs_pol, # bq, U, - "Polite_erg_inf": Polite_erg_inf, # bq, U, - "Polite_erg_pol": Polite_erg_pol, # bq, U, - "Polite_dat_inf": Polite_dat_inf, # bq, U, - "Polite_dat_pol": Polite_dat_pol, # bq, U, - "Polite_infm": Polite_infm, # U20 - "Polite_form": Polite_form, # U20 - "Polite_form_elev": Polite_form_elev, # U20 - "Polite_form_humb": Polite_form_humb, # U20 - "Prefix_yes": Prefix_yes, # U, - "PrepCase_npr": PrepCase_npr, # cz, - "PrepCase_pre": PrepCase_pre, # U, - "PunctSide_ini": PunctSide_ini, # U, - "PunctSide_fin": PunctSide_fin, # U, - "PunctType_peri": PunctType_peri, # U, - "PunctType_qest": PunctType_qest, # U, - "PunctType_excl": PunctType_excl, # U, - "PunctType_quot": PunctType_quot, # U, - "PunctType_brck": PunctType_brck, # U, - "PunctType_comm": PunctType_comm, # U, - "PunctType_colo": PunctType_colo, # U, - "PunctType_semi": PunctType_semi, # U, - "PunctType_dash": PunctType_dash, # U, - "Style_arch": Style_arch, # cz, fi, U, - "Style_rare": Style_rare, # cz, fi, U, - "Style_poet": Style_poet, # cz, U, - "Style_norm": Style_norm, # cz, U, - "Style_coll": Style_coll, # cz, U, - "Style_vrnc": Style_vrnc, # cz, U, - "Style_sing": Style_sing, # cz, U, - "Style_expr": Style_expr, # cz, U, - "Style_derg": Style_derg, # cz, U, - "Style_vulg": Style_vulg, # cz, U, - "Style_yes": Style_yes, # fi, U, - "StyleVariant_styleShort": StyleVariant_styleShort, # cz, - "StyleVariant_styleBound": StyleVariant_styleBound, # cz, sl, - "VerbType_aux": VerbType_aux, # U, - "VerbType_cop": VerbType_cop, # U, - "VerbType_mod": VerbType_mod, # U, - "VerbType_light": VerbType_light, # U, + "DEPRECATED001": DEPRECATED001, + "DEPRECATED002": DEPRECATED002, + "DEPRECATED003": DEPRECATED003, + "DEPRECATED004": DEPRECATED004, + "DEPRECATED005": DEPRECATED005, + "DEPRECATED006": DEPRECATED006, + "DEPRECATED007": DEPRECATED007, + "DEPRECATED008": DEPRECATED008, + "DEPRECATED009": DEPRECATED009, + "DEPRECATED010": DEPRECATED010, + "DEPRECATED011": DEPRECATED011, + "DEPRECATED012": DEPRECATED012, + "DEPRECATED013": DEPRECATED013, + "DEPRECATED014": DEPRECATED014, + "DEPRECATED015": DEPRECATED015, + "DEPRECATED016": DEPRECATED016, + "DEPRECATED017": DEPRECATED017, + "DEPRECATED018": DEPRECATED018, + "DEPRECATED019": DEPRECATED019, + "DEPRECATED020": DEPRECATED020, + "DEPRECATED021": DEPRECATED021, + "DEPRECATED022": DEPRECATED022, + "DEPRECATED023": DEPRECATED023, + "DEPRECATED024": DEPRECATED024, + "DEPRECATED025": DEPRECATED025, + "DEPRECATED026": DEPRECATED026, + "DEPRECATED027": DEPRECATED027, + "DEPRECATED028": DEPRECATED028, + "DEPRECATED029": DEPRECATED029, + "DEPRECATED030": DEPRECATED030, + "DEPRECATED031": DEPRECATED031, + "DEPRECATED032": DEPRECATED032, + "DEPRECATED033": DEPRECATED033, + "DEPRECATED034": DEPRECATED034, + "DEPRECATED035": DEPRECATED035, + "DEPRECATED036": DEPRECATED036, + "DEPRECATED037": DEPRECATED037, + "DEPRECATED038": DEPRECATED038, + "DEPRECATED039": DEPRECATED039, + "DEPRECATED040": DEPRECATED040, + "DEPRECATED041": DEPRECATED041, + "DEPRECATED042": DEPRECATED042, + "DEPRECATED043": DEPRECATED043, + "DEPRECATED044": DEPRECATED044, + "DEPRECATED045": DEPRECATED045, + "DEPRECATED046": DEPRECATED046, + "DEPRECATED047": DEPRECATED047, + "DEPRECATED048": DEPRECATED048, + "DEPRECATED049": DEPRECATED049, + "DEPRECATED050": DEPRECATED050, + "DEPRECATED051": DEPRECATED051, + "DEPRECATED052": DEPRECATED052, + "DEPRECATED053": DEPRECATED053, + "DEPRECATED054": DEPRECATED054, + "DEPRECATED055": DEPRECATED055, + "DEPRECATED056": DEPRECATED056, + "DEPRECATED057": DEPRECATED057, + "DEPRECATED058": DEPRECATED058, + "DEPRECATED059": DEPRECATED059, + "DEPRECATED060": DEPRECATED060, + "DEPRECATED061": DEPRECATED061, + "DEPRECATED062": DEPRECATED062, + "DEPRECATED063": DEPRECATED063, + "DEPRECATED064": DEPRECATED064, + "DEPRECATED065": DEPRECATED065, + "DEPRECATED066": DEPRECATED066, + "DEPRECATED067": DEPRECATED067, + "DEPRECATED068": DEPRECATED068, + "DEPRECATED069": DEPRECATED069, + "DEPRECATED070": DEPRECATED070, + "DEPRECATED071": DEPRECATED071, + "DEPRECATED072": DEPRECATED072, + "DEPRECATED073": DEPRECATED073, + "DEPRECATED074": DEPRECATED074, + "DEPRECATED075": DEPRECATED075, + "DEPRECATED076": DEPRECATED076, + "DEPRECATED077": DEPRECATED077, + "DEPRECATED078": DEPRECATED078, + "DEPRECATED079": DEPRECATED079, + "DEPRECATED080": DEPRECATED080, + "DEPRECATED081": DEPRECATED081, + "DEPRECATED082": DEPRECATED082, + "DEPRECATED083": DEPRECATED083, + "DEPRECATED084": DEPRECATED084, + "DEPRECATED085": DEPRECATED085, + "DEPRECATED086": DEPRECATED086, + "DEPRECATED087": DEPRECATED087, + "DEPRECATED088": DEPRECATED088, + "DEPRECATED089": DEPRECATED089, + "DEPRECATED090": DEPRECATED090, + "DEPRECATED091": DEPRECATED091, + "DEPRECATED092": DEPRECATED092, + "DEPRECATED093": DEPRECATED093, + "DEPRECATED094": DEPRECATED094, + "DEPRECATED095": DEPRECATED095, + "DEPRECATED096": DEPRECATED096, + "DEPRECATED097": DEPRECATED097, + "DEPRECATED098": DEPRECATED098, + "DEPRECATED099": DEPRECATED099, + "DEPRECATED100": DEPRECATED100, + "DEPRECATED101": DEPRECATED101, + "DEPRECATED102": DEPRECATED102, + "DEPRECATED103": DEPRECATED103, + "DEPRECATED104": DEPRECATED104, + "DEPRECATED105": DEPRECATED105, + "DEPRECATED106": DEPRECATED106, + "DEPRECATED107": DEPRECATED107, + "DEPRECATED108": DEPRECATED108, + "DEPRECATED109": DEPRECATED109, + "DEPRECATED110": DEPRECATED110, + "DEPRECATED111": DEPRECATED111, + "DEPRECATED112": DEPRECATED112, + "DEPRECATED113": DEPRECATED113, + "DEPRECATED114": DEPRECATED114, + "DEPRECATED115": DEPRECATED115, + "DEPRECATED116": DEPRECATED116, + "DEPRECATED117": DEPRECATED117, + "DEPRECATED118": DEPRECATED118, + "DEPRECATED119": DEPRECATED119, + "DEPRECATED120": DEPRECATED120, + "DEPRECATED121": DEPRECATED121, + "DEPRECATED122": DEPRECATED122, + "DEPRECATED123": DEPRECATED123, + "DEPRECATED124": DEPRECATED124, + "DEPRECATED125": DEPRECATED125, + "DEPRECATED126": DEPRECATED126, + "DEPRECATED127": DEPRECATED127, + "DEPRECATED128": DEPRECATED128, + "DEPRECATED129": DEPRECATED129, + "DEPRECATED130": DEPRECATED130, + "DEPRECATED131": DEPRECATED131, + "DEPRECATED132": DEPRECATED132, + "DEPRECATED133": DEPRECATED133, + "DEPRECATED134": DEPRECATED134, + "DEPRECATED135": DEPRECATED135, + "DEPRECATED136": DEPRECATED136, + "DEPRECATED137": DEPRECATED137, + "DEPRECATED138": DEPRECATED138, + "DEPRECATED139": DEPRECATED139, + "DEPRECATED140": DEPRECATED140, + "DEPRECATED141": DEPRECATED141, + "DEPRECATED142": DEPRECATED142, + "DEPRECATED143": DEPRECATED143, + "DEPRECATED144": DEPRECATED144, + "DEPRECATED145": DEPRECATED145, + "DEPRECATED146": DEPRECATED146, + "DEPRECATED147": DEPRECATED147, + "DEPRECATED148": DEPRECATED148, + "DEPRECATED149": DEPRECATED149, + "DEPRECATED150": DEPRECATED150, + "DEPRECATED151": DEPRECATED151, + "DEPRECATED152": DEPRECATED152, + "DEPRECATED153": DEPRECATED153, + "DEPRECATED154": DEPRECATED154, + "DEPRECATED155": DEPRECATED155, + "DEPRECATED156": DEPRECATED156, + "DEPRECATED157": DEPRECATED157, + "DEPRECATED158": DEPRECATED158, + "DEPRECATED159": DEPRECATED159, + "DEPRECATED160": DEPRECATED160, + "DEPRECATED161": DEPRECATED161, + "DEPRECATED162": DEPRECATED162, + "DEPRECATED163": DEPRECATED163, + "DEPRECATED164": DEPRECATED164, + "DEPRECATED165": DEPRECATED165, + "DEPRECATED166": DEPRECATED166, + "DEPRECATED167": DEPRECATED167, + "DEPRECATED168": DEPRECATED168, + "DEPRECATED169": DEPRECATED169, + "DEPRECATED170": DEPRECATED170, + "DEPRECATED171": DEPRECATED171, + "DEPRECATED172": DEPRECATED172, + "DEPRECATED173": DEPRECATED173, + "DEPRECATED174": DEPRECATED174, + "DEPRECATED175": DEPRECATED175, + "DEPRECATED176": DEPRECATED176, + "DEPRECATED177": DEPRECATED177, + "DEPRECATED178": DEPRECATED178, + "DEPRECATED179": DEPRECATED179, + "DEPRECATED180": DEPRECATED180, + "DEPRECATED181": DEPRECATED181, + "DEPRECATED182": DEPRECATED182, + "DEPRECATED183": DEPRECATED183, + "DEPRECATED184": DEPRECATED184, + "DEPRECATED185": DEPRECATED185, + "DEPRECATED186": DEPRECATED186, + "DEPRECATED187": DEPRECATED187, + "DEPRECATED188": DEPRECATED188, + "DEPRECATED189": DEPRECATED189, + "DEPRECATED190": DEPRECATED190, + "DEPRECATED191": DEPRECATED191, + "DEPRECATED192": DEPRECATED192, + "DEPRECATED193": DEPRECATED193, + "DEPRECATED194": DEPRECATED194, + "DEPRECATED195": DEPRECATED195, + "DEPRECATED196": DEPRECATED196, + "DEPRECATED197": DEPRECATED197, + "DEPRECATED198": DEPRECATED198, + "DEPRECATED199": DEPRECATED199, + "DEPRECATED200": DEPRECATED200, + "DEPRECATED201": DEPRECATED201, + "DEPRECATED202": DEPRECATED202, + "DEPRECATED203": DEPRECATED203, + "DEPRECATED204": DEPRECATED204, + "DEPRECATED205": DEPRECATED205, + "DEPRECATED206": DEPRECATED206, + "DEPRECATED207": DEPRECATED207, + "DEPRECATED208": DEPRECATED208, + "DEPRECATED209": DEPRECATED209, + "DEPRECATED210": DEPRECATED210, + "DEPRECATED211": DEPRECATED211, + "DEPRECATED212": DEPRECATED212, + "DEPRECATED213": DEPRECATED213, + "DEPRECATED214": DEPRECATED214, + "DEPRECATED215": DEPRECATED215, + "DEPRECATED216": DEPRECATED216, + "DEPRECATED217": DEPRECATED217, + "DEPRECATED218": DEPRECATED218, + "DEPRECATED219": DEPRECATED219, + "DEPRECATED220": DEPRECATED220, + "DEPRECATED221": DEPRECATED221, + "DEPRECATED222": DEPRECATED222, + "DEPRECATED223": DEPRECATED223, + "DEPRECATED224": DEPRECATED224, + "DEPRECATED225": DEPRECATED225, + "DEPRECATED226": DEPRECATED226, + "DEPRECATED227": DEPRECATED227, + "DEPRECATED228": DEPRECATED228, + "DEPRECATED229": DEPRECATED229, + "DEPRECATED230": DEPRECATED230, + "DEPRECATED231": DEPRECATED231, + "DEPRECATED232": DEPRECATED232, + "DEPRECATED233": DEPRECATED233, + "DEPRECATED234": DEPRECATED234, + "DEPRECATED235": DEPRECATED235, + "DEPRECATED236": DEPRECATED236, + "DEPRECATED237": DEPRECATED237, + "DEPRECATED238": DEPRECATED238, + "DEPRECATED239": DEPRECATED239, + "DEPRECATED240": DEPRECATED240, + "DEPRECATED241": DEPRECATED241, + "DEPRECATED242": DEPRECATED242, + "DEPRECATED243": DEPRECATED243, + "DEPRECATED244": DEPRECATED244, + "DEPRECATED245": DEPRECATED245, + "DEPRECATED246": DEPRECATED246, + "DEPRECATED247": DEPRECATED247, + "DEPRECATED248": DEPRECATED248, + "DEPRECATED249": DEPRECATED249, + "DEPRECATED250": DEPRECATED250, + "DEPRECATED251": DEPRECATED251, + "DEPRECATED252": DEPRECATED252, + "DEPRECATED253": DEPRECATED253, + "DEPRECATED254": DEPRECATED254, + "DEPRECATED255": DEPRECATED255, + "DEPRECATED256": DEPRECATED256, + "DEPRECATED257": DEPRECATED257, + "DEPRECATED258": DEPRECATED258, + "DEPRECATED259": DEPRECATED259, + "DEPRECATED260": DEPRECATED260, + "DEPRECATED261": DEPRECATED261, + "DEPRECATED262": DEPRECATED262, + "DEPRECATED263": DEPRECATED263, + "DEPRECATED264": DEPRECATED264, + "DEPRECATED265": DEPRECATED265, + "DEPRECATED266": DEPRECATED266, + "DEPRECATED267": DEPRECATED267, + "DEPRECATED268": DEPRECATED268, + "DEPRECATED269": DEPRECATED269, + "DEPRECATED270": DEPRECATED270, + "DEPRECATED271": DEPRECATED271, + "DEPRECATED272": DEPRECATED272, + "DEPRECATED273": DEPRECATED273, + "DEPRECATED274": DEPRECATED274, + "DEPRECATED275": DEPRECATED275, + "DEPRECATED276": DEPRECATED276, "PERSON": PERSON, "NORP": NORP, diff --git a/spacy/tests/doc/test_morphanalysis.py b/spacy/tests/doc/test_morphanalysis.py index 67ebc06d6..82fb549ba 100644 --- a/spacy/tests/doc/test_morphanalysis.py +++ b/spacy/tests/doc/test_morphanalysis.py @@ -9,22 +9,52 @@ def i_has(en_tokenizer): return doc -def test_token_morph_id(i_has): - assert i_has[0].morph.id - assert i_has[1].morph.id != 0 - assert i_has[0].morph.id != i_has[1].morph.id +def test_token_morph_eq(i_has): + assert i_has[0].morph is not i_has[0].morph + assert i_has[0].morph == i_has[0].morph + assert i_has[0].morph != i_has[1].morph + + +def test_token_morph_key(i_has): + assert i_has[0].morph.key != 0 + assert i_has[1].morph.key != 0 + assert i_has[0].morph.key == i_has[0].morph.key + assert i_has[0].morph.key != i_has[1].morph.key def test_morph_props(i_has): - assert i_has[0].morph.pron_type == i_has.vocab.strings["PronType_prs"] - assert i_has[0].morph.pron_type_ == "PronType_prs" - assert i_has[1].morph.pron_type == 0 + assert i_has[0].morph.get("PronType") == ["PronType=prs"] + assert i_has[1].morph.get("PronType") == [] def test_morph_iter(i_has): - assert list(i_has[0].morph) == ["PronType_prs"] - assert list(i_has[1].morph) == ["Number_sing", "Person_three", "VerbForm_fin"] + assert set(i_has[0].morph) == set(["PronType=prs"]) + assert set(i_has[1].morph) == set(["Number=sing", "Person=three", "Tense=pres", "VerbForm=fin"]) def test_morph_get(i_has): - assert i_has[0].morph.get("pron_type") == "PronType_prs" + assert i_has[0].morph.get("PronType") == ["PronType=prs"] + + +def test_morph_set(i_has): + assert i_has[0].morph.get("PronType") == ["PronType=prs"] + # set by string + i_has[0].morph_ = "PronType=unk" + assert i_has[0].morph.get("PronType") == ["PronType=unk"] + # set by string, fields are alphabetized + i_has[0].morph_ = "PronType=123|NounType=unk" + assert i_has[0].morph_ == "NounType=unk|PronType=123" + # set by dict + i_has[0].morph_ = {"AType": "123", "BType": "unk", "POS": "ADJ"} + assert i_has[0].morph_ == "AType=123|BType=unk|POS=ADJ" + # set by string with multiple values, fields and values are alphabetized + i_has[0].morph_ = "BType=c|AType=b,a" + assert i_has[0].morph_ == "AType=a,b|BType=c" + # set by dict with multiple values, fields and values are alphabetized + i_has[0].morph_ = {"AType": "b,a", "BType": "c"} + assert i_has[0].morph_ == "AType=a,b|BType=c" + + +def test_morph_str(i_has): + assert str(i_has[0].morph) == "PronType=prs" + assert str(i_has[1].morph) == "Number=sing|Person=three|Tense=pres|VerbForm=fin" diff --git a/spacy/tests/morphology/test_morph_converters.py b/spacy/tests/morphology/test_morph_converters.py new file mode 100644 index 000000000..3bff4f924 --- /dev/null +++ b/spacy/tests/morphology/test_morph_converters.py @@ -0,0 +1,26 @@ +import pytest +from spacy.morphology import Morphology + + +def test_feats_converters(): + feats = "Case=dat,gen|Number=sing" + feats_dict = {"Case": "dat,gen", "Number": "sing"} + feats_list = feats.split(Morphology.FEATURE_SEP) + + # simple conversions + assert Morphology.list_to_feats(feats_list) == feats + assert Morphology.dict_to_feats(feats_dict) == feats + assert Morphology.feats_to_dict(feats) == feats_dict + + # roundtrips + assert Morphology.dict_to_feats(Morphology.feats_to_dict(feats)) == feats + assert Morphology.feats_to_dict(Morphology.dict_to_feats(feats_dict)) == feats_dict + + # unsorted input is normalized + unsorted_feats = "Number=sing|Case=gen,dat" + unsorted_feats_dict = {"Case": "gen,dat", "Number": "sing"} + unsorted_feats_list = feats.split(Morphology.FEATURE_SEP) + assert Morphology.feats_to_dict(unsorted_feats) == feats_dict + assert Morphology.dict_to_feats(unsorted_feats_dict) == feats + assert Morphology.list_to_feats(unsorted_feats_list) == feats + assert Morphology.dict_to_feats(Morphology.feats_to_dict(unsorted_feats)) == feats diff --git a/spacy/tests/morphology/test_morph_features.py b/spacy/tests/morphology/test_morph_features.py index 4cf6b1206..0d8d7dea9 100644 --- a/spacy/tests/morphology/test_morph_features.py +++ b/spacy/tests/morphology/test_morph_features.py @@ -16,32 +16,30 @@ def test_init(morphology): def test_add_morphology_with_string_names(morphology): - morphology.add({"Case_gen", "Number_sing"}) + morphology.add({"Case": "gen", "Number": "sing"}) def test_add_morphology_with_int_ids(morphology): - morphology.add({get_string_id("Case_gen"), get_string_id("Number_sing")}) + morphology.strings.add("Case") + morphology.strings.add("gen") + morphology.strings.add("Number") + morphology.strings.add("sing") + morphology.add({get_string_id("Case"): get_string_id("gen"), get_string_id("Number"): get_string_id("sing")}) def test_add_morphology_with_mix_strings_and_ints(morphology): - morphology.add({get_string_id("PunctSide_ini"), "VerbType_aux"}) + morphology.strings.add("PunctSide") + morphology.strings.add("ini") + morphology.add({get_string_id("PunctSide"): get_string_id("ini"), "VerbType": "aux"}) def test_morphology_tags_hash_distinctly(morphology): - tag1 = morphology.add({"PunctSide_ini", "VerbType_aux"}) - tag2 = morphology.add({"Case_gen", "Number_sing"}) + tag1 = morphology.add({"PunctSide": "ini", "VerbType": "aux"}) + tag2 = morphology.add({"Case": "gen", "Number": "sing"}) assert tag1 != tag2 def test_morphology_tags_hash_independent_of_order(morphology): - tag1 = morphology.add({"Case_gen", "Number_sing"}) - tag2 = morphology.add({"Number_sing", "Case_gen"}) + tag1 = morphology.add({"Case": "gen", "Number": "sing"}) + tag2 = morphology.add({"Number": "sing", "Case": "gen"}) assert tag1 == tag2 - - -def test_update_morphology_tag(morphology): - tag1 = morphology.add({"Case_gen"}) - tag2 = morphology.update(tag1, {"Number_sing"}) - assert tag1 != tag2 - tag3 = morphology.add({"Number_sing", "Case_gen"}) - assert tag2 == tag3 diff --git a/spacy/tests/regression/test_issue1-1000.py b/spacy/tests/regression/test_issue1-1000.py index a3148aa90..bfca72853 100644 --- a/spacy/tests/regression/test_issue1-1000.py +++ b/spacy/tests/regression/test_issue1-1000.py @@ -2,7 +2,7 @@ import pytest import random from spacy.matcher import Matcher from spacy.attrs import IS_PUNCT, ORTH, LOWER -from spacy.symbols import POS, VERB, VerbForm_inf +from spacy.symbols import POS, VERB from spacy.vocab import Vocab from spacy.language import Language from spacy.lemmatizer import Lemmatizer @@ -164,7 +164,7 @@ def test_issue590(en_vocab): def test_issue595(): """Test lemmatization of base forms""" words = ["Do", "n't", "feed", "the", "dog"] - tag_map = {"VB": {POS: VERB, VerbForm_inf: True}} + tag_map = {"VB": {POS: VERB, "VerbForm": "inf"}} lookups = Lookups() lookups.add_table("lemma_rules", {"verb": [["ed", "e"]]}) lookups.add_table("lemma_index", {"verb": {}}) diff --git a/spacy/tests/regression/test_issue1001-1500.py b/spacy/tests/regression/test_issue1001-1500.py index 7d81c3148..aaff951e5 100644 --- a/spacy/tests/regression/test_issue1001-1500.py +++ b/spacy/tests/regression/test_issue1001-1500.py @@ -8,7 +8,7 @@ from spacy.matcher import Matcher from spacy.tokenizer import Tokenizer from spacy.lemmatizer import Lemmatizer from spacy.lookups import Lookups -from spacy.symbols import ORTH, LEMMA, POS, VERB, VerbForm_part +from spacy.symbols import ORTH, LEMMA, POS, VERB def test_issue1061(): @@ -88,7 +88,7 @@ def test_issue1375(): def test_issue1387(): - tag_map = {"VBG": {POS: VERB, VerbForm_part: True}} + tag_map = {"VBG": {POS: VERB, "VerbForm": "part"}} lookups = Lookups() lookups.add_table("lemma_index", {"verb": ("cope", "cop")}) lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}}) diff --git a/spacy/tokens/__init__.py b/spacy/tokens/__init__.py index 88428709b..1aefa2b7c 100644 --- a/spacy/tokens/__init__.py +++ b/spacy/tokens/__init__.py @@ -2,5 +2,6 @@ from .doc import Doc from .token import Token from .span import Span from ._serialize import DocBin +from .morphanalysis import MorphAnalysis -__all__ = ["Doc", "Token", "Span", "DocBin"] +__all__ = ["Doc", "Token", "Span", "DocBin", "MorphAnalysis"] diff --git a/spacy/tokens/morphanalysis.pxd b/spacy/tokens/morphanalysis.pxd index 22844454a..9510875c9 100644 --- a/spacy/tokens/morphanalysis.pxd +++ b/spacy/tokens/morphanalysis.pxd @@ -5,5 +5,5 @@ from ..structs cimport MorphAnalysisC cdef class MorphAnalysis: cdef readonly Vocab vocab - cdef hash_t key + cdef readonly hash_t key cdef MorphAnalysisC c diff --git a/spacy/tokens/morphanalysis.pyx b/spacy/tokens/morphanalysis.pyx index e09870741..ed987f4e4 100644 --- a/spacy/tokens/morphanalysis.pyx +++ b/spacy/tokens/morphanalysis.pyx @@ -1,15 +1,14 @@ from libc.string cimport memset +cimport numpy as np from ..vocab cimport Vocab from ..typedefs cimport hash_t, attr_t -from ..morphology cimport list_features, check_feature, get_field, tag_to_json - -from ..strings import get_string_id +from ..morphology cimport list_features, check_feature, get_by_field cdef class MorphAnalysis: """Control access to morphological features for a token.""" - def __init__(self, Vocab vocab, features=tuple()): + def __init__(self, Vocab vocab, features=dict()): self.vocab = vocab self.key = self.vocab.morphology.add(features) analysis = self.vocab.morphology.tags.get(self.key) @@ -33,7 +32,7 @@ cdef class MorphAnalysis: def __contains__(self, feature): """Test whether the morphological analysis contains some feature.""" - cdef attr_t feat_id = get_string_id(feature) + cdef attr_t feat_id = self.vocab.strings.as_int(feature) return check_feature(&self.c, feat_id) def __iter__(self): @@ -55,369 +54,28 @@ cdef class MorphAnalysis: def __hash__(self): return self.key - def get(self, unicode field): + def __eq__(self, other): + return self.key == other.key + + def __ne__(self, other): + return self.key != other.key + + def get(self, field): """Retrieve a feature by field.""" - cdef int field_id = self.vocab.morphology._feat_map.attr2field[field] - return self.vocab.strings[get_field(&self.c, field_id)] + cdef attr_t field_id = self.vocab.strings.as_int(field) + cdef np.ndarray results = get_by_field(&self.c, field_id) + return [self.vocab.strings[result] for result in results] def to_json(self): - """Produce a json serializable representation, which will be a list of - strings. + """Produce a json serializable representation as a UD FEATS-style + string. """ - return tag_to_json(&self.c) - - @property - def is_base_form(self): - raise NotImplementedError - - @property - def pos(self): - return self.c.pos - - @property - def pos_(self): - return self.vocab.strings[self.c.pos] - - property id: - def __get__(self): - return self.key - - property abbr: - def __get__(self): - return self.c.abbr - - property adp_type: - def __get__(self): - return self.c.adp_type - - property adv_type: - def __get__(self): - return self.c.adv_type - - property animacy: - def __get__(self): - return self.c.animacy - - property aspect: - def __get__(self): - return self.c.aspect - - property case: - def __get__(self): - return self.c.case - - property conj_type: - def __get__(self): - return self.c.conj_type - - property connegative: - def __get__(self): - return self.c.connegative - - property definite: - def __get__(self): - return self.c.definite - - property degree: - def __get__(self): - return self.c.degree - - property derivation: - def __get__(self): - return self.c.derivation - - property echo: - def __get__(self): - return self.c.echo - - property foreign: - def __get__(self): - return self.c.foreign - - property gender: - def __get__(self): - return self.c.gender - - property hyph: - def __get__(self): - return self.c.hyph - - property inf_form: - def __get__(self): - return self.c.inf_form - - property mood: - def __get__(self): - return self.c.mood - - property name_type: - def __get__(self): - return self.c.name_type - - property negative: - def __get__(self): - return self.c.negative - - property noun_type: - def __get__(self): - return self.c.noun_type - - property number: - def __get__(self): - return self.c.number - - property num_form: - def __get__(self): - return self.c.num_form - - property num_type: - def __get__(self): - return self.c.num_type - - property num_value: - def __get__(self): - return self.c.num_value - - property part_form: - def __get__(self): - return self.c.part_form - - property part_type: - def __get__(self): - return self.c.part_type - - property person: - def __get__(self): - return self.c.person - - property polite: - def __get__(self): - return self.c.polite - - property polarity: - def __get__(self): - return self.c.polarity - - property poss: - def __get__(self): - return self.c.poss - - property prefix: - def __get__(self): - return self.c.prefix - - property prep_case: - def __get__(self): - return self.c.prep_case - - property pron_type: - def __get__(self): - return self.c.pron_type - - property punct_side: - def __get__(self): - return self.c.punct_side - - property punct_type: - def __get__(self): - return self.c.punct_type - - property reflex: - def __get__(self): - return self.c.reflex - - property style: - def __get__(self): - return self.c.style - - property style_variant: - def __get__(self): - return self.c.style_variant - - property tense: - def __get__(self): - return self.c.tense - - property typo: - def __get__(self): - return self.c.typo - - property verb_form: - def __get__(self): - return self.c.verb_form - - property voice: - def __get__(self): - return self.c.voice - - property verb_type: - def __get__(self): - return self.c.verb_type - - property abbr_: - def __get__(self): - return self.vocab.strings[self.c.abbr] - - property adp_type_: - def __get__(self): - return self.vocab.strings[self.c.adp_type] - - property adv_type_: - def __get__(self): - return self.vocab.strings[self.c.adv_type] - - property animacy_: - def __get__(self): - return self.vocab.strings[self.c.animacy] - - property aspect_: - def __get__(self): - return self.vocab.strings[self.c.aspect] - - property case_: - def __get__(self): - return self.vocab.strings[self.c.case] - - property conj_type_: - def __get__(self): - return self.vocab.strings[self.c.conj_type] - - property connegative_: - def __get__(self): - return self.vocab.strings[self.c.connegative] - - property definite_: - def __get__(self): - return self.vocab.strings[self.c.definite] - - property degree_: - def __get__(self): - return self.vocab.strings[self.c.degree] - - property derivation_: - def __get__(self): - return self.vocab.strings[self.c.derivation] - - property echo_: - def __get__(self): - return self.vocab.strings[self.c.echo] - - property foreign_: - def __get__(self): - return self.vocab.strings[self.c.foreign] - - property gender_: - def __get__(self): - return self.vocab.strings[self.c.gender] - - property hyph_: - def __get__(self): - return self.vocab.strings[self.c.hyph] - - property inf_form_: - def __get__(self): - return self.vocab.strings[self.c.inf_form] - - property name_type_: - def __get__(self): - return self.vocab.strings[self.c.name_type] - - property negative_: - def __get__(self): - return self.vocab.strings[self.c.negative] - - property mood_: - def __get__(self): - return self.vocab.strings[self.c.mood] - - property number_: - def __get__(self): - return self.vocab.strings[self.c.number] - - property num_form_: - def __get__(self): - return self.vocab.strings[self.c.num_form] - - property num_type_: - def __get__(self): - return self.vocab.strings[self.c.num_type] - - property num_value_: - def __get__(self): - return self.vocab.strings[self.c.num_value] - - property part_form_: - def __get__(self): - return self.vocab.strings[self.c.part_form] - - property part_type_: - def __get__(self): - return self.vocab.strings[self.c.part_type] - - property person_: - def __get__(self): - return self.vocab.strings[self.c.person] - - property polite_: - def __get__(self): - return self.vocab.strings[self.c.polite] - - property polarity_: - def __get__(self): - return self.vocab.strings[self.c.polarity] - - property poss_: - def __get__(self): - return self.vocab.strings[self.c.poss] - - property prefix_: - def __get__(self): - return self.vocab.strings[self.c.prefix] - - property prep_case_: - def __get__(self): - return self.vocab.strings[self.c.prep_case] - - property pron_type_: - def __get__(self): - return self.vocab.strings[self.c.pron_type] - - property punct_side_: - def __get__(self): - return self.vocab.strings[self.c.punct_side] - - property punct_type_: - def __get__(self): - return self.vocab.strings[self.c.punct_type] - - property reflex_: - def __get__(self): - return self.vocab.strings[self.c.reflex] - - property style_: - def __get__(self): - return self.vocab.strings[self.c.style] - - property style_variant_: - def __get__(self): - return self.vocab.strings[self.c.style_variant] - - property tense_: - def __get__(self): - return self.vocab.strings[self.c.tense] - - property typo_: - def __get__(self): - return self.vocab.strings[self.c.typo] - - property verb_form_: - def __get__(self): - return self.vocab.strings[self.c.verb_form] - - property voice_: - def __get__(self): - return self.vocab.strings[self.c.voice] - - property verb_type_: - def __get__(self): - return self.vocab.strings[self.c.verb_type] + morph_string = self.vocab.strings[self.c.key] + if morph_string == self.vocab.morphology.EMPTY_MORPH: + return "" + return morph_string + + def to_dict(self): + """Produce a dict representation. + """ + return self.vocab.morphology.feats_to_dict(self.to_json()) diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index 8e6290187..b159fffc1 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -217,6 +217,14 @@ cdef class Token: def morph(self): return MorphAnalysis.from_id(self.vocab, self.c.morph) + property morph_: + def __get__(self): + return str(MorphAnalysis.from_id(self.vocab, self.c.morph)) + + def __set__(self, features): + cdef hash_t key = self.vocab.morphology.add(features) + self.c.morph = key + @property def lex_id(self): """RETURNS (int): Sequential ID of the token's lexical type."""