mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Modify morphology to support arbitrary features (#4932)
* Restructure tag maps for MorphAnalysis changes Prepare tag maps for upcoming MorphAnalysis changes that allow arbritrary features. * Use default tag map rather than duplicating for ca / uk / vi * Import tag map into defaults for ga * Modify tag maps so all morphological fields and features are strings * Move features from `"Other"` to the top level * Rewrite tuples as strings separated by `","` * Rewrite morph symbols for fr lemmatizer as strings * Export MorphAnalysis under spacy.tokens * Modify morphology to support arbitrary features Modify `Morphology` and `MorphAnalysis` so that arbitrary features are supported. * Modify `MorphAnalysisC` so that it can support arbitrary features and multiple values per field. `MorphAnalysisC` is redesigned to contain: * key: hash of UD FEATS string of morphological features * array of `MorphFeatureC` structs that each contain a hash of `Field` and `Field=Value` for a given morphological feature, which makes it possible to: * find features by field * represent multiple values for a given field * `get_field()` is renamed to `get_by_field()` and is no longer `nogil`. Instead a new helper function `get_n_by_field()` is `nogil` and returns `n` features by field. * `MorphAnalysis.get()` returns all possible values for a field as a list of individual features such as `["Tense=Pres", "Tense=Past"]`. * `MorphAnalysis`'s `str()` and `repr()` are the UD FEATS string. * `Morphology.feats_to_dict()` converts a UD FEATS string to a dict where: * Each field has one entry in the dict * Multiple values remain separated by a separator in the value string * `Token.morph_` returns the UD FEATS string and you can set `Token.morph_` with a UD FEATS string or with a tag map dict. * Modify get_by_field to use np.ndarray Modify `get_by_field()` to use np.ndarray. Remove `max_results` from `get_n_by_field()` and always iterate over all the fields. * Rewrite without MorphFeatureC * Add shortcut for existing feats strings as keys Add shortcut for existing feats strings as keys in `Morphology.add()`. * Check for '_' as empty analysis when adding morphs * Extend helper converters in Morphology Add and extend helper converters that convert and normalize between: * UD FEATS strings (`"Case=dat,gen|Number=sing"`) * per-field dict of feats (`{"Case": "dat,gen", "Number": "sing"}`) * list of individual features (`["Case=dat", "Case=gen", "Number=sing"]`) All converters sort fields and values where applicable.
This commit is contained in:
parent
0a0de85409
commit
adc9745718
|
@ -103,6 +103,9 @@ class Warnings(object):
|
||||||
W027 = ("Found a large training file of {size} bytes. Note that it may "
|
W027 = ("Found a large training file of {size} bytes. Note that it may "
|
||||||
"be more efficient to split your training data into multiple "
|
"be more efficient to split your training data into multiple "
|
||||||
"smaller JSON files instead.")
|
"smaller JSON files instead.")
|
||||||
|
W028 = ("Skipping unsupported morphological feature(s): {feature}. "
|
||||||
|
"Provide features as a dict {{\"Field1\": \"Value1,Value2\"}} or "
|
||||||
|
"string \"Field1=Value1,Value2|Field2=Value3\".")
|
||||||
|
|
||||||
|
|
||||||
@add_codes
|
@add_codes
|
||||||
|
|
|
@ -11,8 +11,8 @@ TAG_MAP = {
|
||||||
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
||||||
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
||||||
":": {POS: PUNCT},
|
":": {POS: PUNCT},
|
||||||
"৳": {POS: SYM, "Other": {"SymType": "currency"}},
|
"৳": {POS: SYM, "SymType": "currency"},
|
||||||
"#": {POS: SYM, "Other": {"SymType": "numbersign"}},
|
"#": {POS: SYM, "SymType": "numbersign"},
|
||||||
"AFX": {POS: ADJ, "Hyph": "yes"},
|
"AFX": {POS: ADJ, "Hyph": "yes"},
|
||||||
"CC": {POS: CONJ, "ConjType": "coor"},
|
"CC": {POS: CONJ, "ConjType": "coor"},
|
||||||
"CD": {POS: NUM, "NumType": "card"},
|
"CD": {POS: NUM, "NumType": "card"},
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
from ..symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ
|
|
||||||
from ..symbols import PUNCT, NUM, AUX, X, CONJ, ADJ, VERB, PART, SPACE, CCONJ
|
|
||||||
|
|
||||||
|
|
||||||
TAG_MAP = {
|
|
||||||
"ADV": {POS: ADV},
|
|
||||||
"NOUN": {POS: NOUN},
|
|
||||||
"ADP": {POS: ADP},
|
|
||||||
"PRON": {POS: PRON},
|
|
||||||
"SCONJ": {POS: SCONJ},
|
|
||||||
"PROPN": {POS: PROPN},
|
|
||||||
"DET": {POS: DET},
|
|
||||||
"SYM": {POS: SYM},
|
|
||||||
"INTJ": {POS: INTJ},
|
|
||||||
"PUNCT": {POS: PUNCT},
|
|
||||||
"NUM": {POS: NUM},
|
|
||||||
"AUX": {POS: AUX},
|
|
||||||
"X": {POS: X},
|
|
||||||
"CONJ": {POS: CONJ},
|
|
||||||
"CCONJ": {POS: CCONJ},
|
|
||||||
"ADJ": {POS: ADJ},
|
|
||||||
"VERB": {POS: VERB},
|
|
||||||
"PART": {POS: PART},
|
|
||||||
"SP": {POS: SPACE},
|
|
||||||
}
|
|
|
@ -4,7 +4,6 @@ from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
|
||||||
from .stop_words import STOP_WORDS
|
from .stop_words import STOP_WORDS
|
||||||
from .lex_attrs import LEX_ATTRS
|
from .lex_attrs import LEX_ATTRS
|
||||||
from .morph_rules import MORPH_RULES
|
from .morph_rules import MORPH_RULES
|
||||||
from ..tag_map import TAG_MAP
|
|
||||||
|
|
||||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||||
from ..norm_exceptions import BASE_NORMS
|
from ..norm_exceptions import BASE_NORMS
|
||||||
|
@ -24,7 +23,6 @@ class DanishDefaults(Language.Defaults):
|
||||||
morph_rules = MORPH_RULES
|
morph_rules = MORPH_RULES
|
||||||
infixes = TOKENIZER_INFIXES
|
infixes = TOKENIZER_INFIXES
|
||||||
suffixes = TOKENIZER_SUFFIXES
|
suffixes = TOKENIZER_SUFFIXES
|
||||||
tag_map = TAG_MAP
|
|
||||||
stop_words = STOP_WORDS
|
stop_words = STOP_WORDS
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -656,7 +656,7 @@ TAG_MAP = {
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
"Number": "Plur",
|
"Number": "Plur",
|
||||||
"Case": "Acc",
|
"Case": "Acc",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfFePlGe": {
|
"AtDfFePlGe": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -664,7 +664,7 @@ TAG_MAP = {
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
"Number": "Plur",
|
"Number": "Plur",
|
||||||
"Case": "Gen",
|
"Case": "Gen",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfFePlNm": {
|
"AtDfFePlNm": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -672,7 +672,7 @@ TAG_MAP = {
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
"Number": "Plur",
|
"Number": "Plur",
|
||||||
"Case": "Nom",
|
"Case": "Nom",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfFeSgAc": {
|
"AtDfFeSgAc": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -680,7 +680,7 @@ TAG_MAP = {
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Acc",
|
"Case": "Acc",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfFeSgDa": {
|
"AtDfFeSgDa": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -688,7 +688,7 @@ TAG_MAP = {
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Dat",
|
"Case": "Dat",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfFeSgGe": {
|
"AtDfFeSgGe": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -696,7 +696,7 @@ TAG_MAP = {
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Gen",
|
"Case": "Gen",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfFeSgNm": {
|
"AtDfFeSgNm": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -704,7 +704,7 @@ TAG_MAP = {
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Nom",
|
"Case": "Nom",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfMaPlAc": {
|
"AtDfMaPlAc": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -712,7 +712,7 @@ TAG_MAP = {
|
||||||
"Gender": "Masc",
|
"Gender": "Masc",
|
||||||
"Number": "Plur",
|
"Number": "Plur",
|
||||||
"Case": "Acc",
|
"Case": "Acc",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfMaPlGe": {
|
"AtDfMaPlGe": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -720,7 +720,7 @@ TAG_MAP = {
|
||||||
"Gender": "Masc",
|
"Gender": "Masc",
|
||||||
"Number": "Plur",
|
"Number": "Plur",
|
||||||
"Case": "Gen",
|
"Case": "Gen",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfMaPlNm": {
|
"AtDfMaPlNm": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -728,7 +728,7 @@ TAG_MAP = {
|
||||||
"Gender": "Masc",
|
"Gender": "Masc",
|
||||||
"Number": "Plur",
|
"Number": "Plur",
|
||||||
"Case": "Nom",
|
"Case": "Nom",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfMaSgAc": {
|
"AtDfMaSgAc": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -736,7 +736,7 @@ TAG_MAP = {
|
||||||
"Gender": "Masc",
|
"Gender": "Masc",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Acc",
|
"Case": "Acc",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfMaSgDa": {
|
"AtDfMaSgDa": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -744,7 +744,7 @@ TAG_MAP = {
|
||||||
"Gender": "Masc",
|
"Gender": "Masc",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Dat",
|
"Case": "Dat",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfMaSgGe": {
|
"AtDfMaSgGe": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -752,7 +752,7 @@ TAG_MAP = {
|
||||||
"Gender": "Masc",
|
"Gender": "Masc",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Gen",
|
"Case": "Gen",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfMaSgNm": {
|
"AtDfMaSgNm": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -760,7 +760,7 @@ TAG_MAP = {
|
||||||
"Gender": "Masc",
|
"Gender": "Masc",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Nom",
|
"Case": "Nom",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfNePlAc": {
|
"AtDfNePlAc": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -768,7 +768,7 @@ TAG_MAP = {
|
||||||
"Gender": "Neut",
|
"Gender": "Neut",
|
||||||
"Number": "Plur",
|
"Number": "Plur",
|
||||||
"Case": "Acc",
|
"Case": "Acc",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfNePlDa": {
|
"AtDfNePlDa": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -776,7 +776,7 @@ TAG_MAP = {
|
||||||
"Gender": "Neut",
|
"Gender": "Neut",
|
||||||
"Number": "Plur",
|
"Number": "Plur",
|
||||||
"Case": "Dat",
|
"Case": "Dat",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfNePlGe": {
|
"AtDfNePlGe": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -784,7 +784,7 @@ TAG_MAP = {
|
||||||
"Gender": "Neut",
|
"Gender": "Neut",
|
||||||
"Number": "Plur",
|
"Number": "Plur",
|
||||||
"Case": "Gen",
|
"Case": "Gen",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfNePlNm": {
|
"AtDfNePlNm": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -792,7 +792,7 @@ TAG_MAP = {
|
||||||
"Gender": "Neut",
|
"Gender": "Neut",
|
||||||
"Number": "Plur",
|
"Number": "Plur",
|
||||||
"Case": "Nom",
|
"Case": "Nom",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfNeSgAc": {
|
"AtDfNeSgAc": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -800,7 +800,7 @@ TAG_MAP = {
|
||||||
"Gender": "Neut",
|
"Gender": "Neut",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Acc",
|
"Case": "Acc",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfNeSgDa": {
|
"AtDfNeSgDa": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -808,7 +808,7 @@ TAG_MAP = {
|
||||||
"Gender": "Neut",
|
"Gender": "Neut",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Dat",
|
"Case": "Dat",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfNeSgGe": {
|
"AtDfNeSgGe": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -816,7 +816,7 @@ TAG_MAP = {
|
||||||
"Gender": "Neut",
|
"Gender": "Neut",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Gen",
|
"Case": "Gen",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtDfNeSgNm": {
|
"AtDfNeSgNm": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -824,7 +824,7 @@ TAG_MAP = {
|
||||||
"Gender": "Neut",
|
"Gender": "Neut",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Nom",
|
"Case": "Nom",
|
||||||
"Other": {"Definite": "Def"},
|
"Definite": "Def",
|
||||||
},
|
},
|
||||||
"AtIdFeSgAc": {
|
"AtIdFeSgAc": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -832,7 +832,7 @@ TAG_MAP = {
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Acc",
|
"Case": "Acc",
|
||||||
"Other": {"Definite": "Ind"},
|
"Definite": "Ind",
|
||||||
},
|
},
|
||||||
"AtIdFeSgDa": {
|
"AtIdFeSgDa": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -840,7 +840,7 @@ TAG_MAP = {
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Dat",
|
"Case": "Dat",
|
||||||
"Other": {"Definite": "Ind"},
|
"Definite": "Ind",
|
||||||
},
|
},
|
||||||
"AtIdFeSgGe": {
|
"AtIdFeSgGe": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -848,7 +848,7 @@ TAG_MAP = {
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Gen",
|
"Case": "Gen",
|
||||||
"Other": {"Definite": "Ind"},
|
"Definite": "Ind",
|
||||||
},
|
},
|
||||||
"AtIdFeSgNm": {
|
"AtIdFeSgNm": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -856,7 +856,7 @@ TAG_MAP = {
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Nom",
|
"Case": "Nom",
|
||||||
"Other": {"Definite": "Ind"},
|
"Definite": "Ind",
|
||||||
},
|
},
|
||||||
"AtIdMaSgAc": {
|
"AtIdMaSgAc": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -864,7 +864,7 @@ TAG_MAP = {
|
||||||
"Gender": "Masc",
|
"Gender": "Masc",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Acc",
|
"Case": "Acc",
|
||||||
"Other": {"Definite": "Ind"},
|
"Definite": "Ind",
|
||||||
},
|
},
|
||||||
"AtIdMaSgGe": {
|
"AtIdMaSgGe": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -872,7 +872,7 @@ TAG_MAP = {
|
||||||
"Gender": "Masc",
|
"Gender": "Masc",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Gen",
|
"Case": "Gen",
|
||||||
"Other": {"Definite": "Ind"},
|
"Definite": "Ind",
|
||||||
},
|
},
|
||||||
"AtIdMaSgNm": {
|
"AtIdMaSgNm": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -880,7 +880,7 @@ TAG_MAP = {
|
||||||
"Gender": "Masc",
|
"Gender": "Masc",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Nom",
|
"Case": "Nom",
|
||||||
"Other": {"Definite": "Ind"},
|
"Definite": "Ind",
|
||||||
},
|
},
|
||||||
"AtIdNeSgAc": {
|
"AtIdNeSgAc": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -888,7 +888,7 @@ TAG_MAP = {
|
||||||
"Gender": "Neut",
|
"Gender": "Neut",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Acc",
|
"Case": "Acc",
|
||||||
"Other": {"Definite": "Ind"},
|
"Definite": "Ind",
|
||||||
},
|
},
|
||||||
"AtIdNeSgGe": {
|
"AtIdNeSgGe": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -896,7 +896,7 @@ TAG_MAP = {
|
||||||
"Gender": "Neut",
|
"Gender": "Neut",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Gen",
|
"Case": "Gen",
|
||||||
"Other": {"Definite": "Ind"},
|
"Definite": "Ind",
|
||||||
},
|
},
|
||||||
"AtIdNeSgNm": {
|
"AtIdNeSgNm": {
|
||||||
POS: DET,
|
POS: DET,
|
||||||
|
@ -904,7 +904,7 @@ TAG_MAP = {
|
||||||
"Gender": "Neut",
|
"Gender": "Neut",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": "Nom",
|
"Case": "Nom",
|
||||||
"Other": {"Definite": "Ind"},
|
"Definite": "Ind",
|
||||||
},
|
},
|
||||||
"CjCo": {POS: CCONJ},
|
"CjCo": {POS: CCONJ},
|
||||||
"CjSb": {POS: SCONJ},
|
"CjSb": {POS: SCONJ},
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
from ...lemmatizer import Lemmatizer
|
from ...lemmatizer import Lemmatizer
|
||||||
from ...symbols import POS, NOUN, VERB, ADJ, ADV, PRON, DET, AUX, PUNCT, ADP
|
from ...symbols import POS, NOUN, VERB, ADJ, ADV, PRON, DET, AUX, PUNCT, ADP
|
||||||
from ...symbols import SCONJ, CCONJ
|
from ...symbols import SCONJ, CCONJ
|
||||||
from ...symbols import VerbForm_inf, VerbForm_none, Number_sing, Degree_pos
|
|
||||||
|
|
||||||
|
|
||||||
class FrenchLemmatizer(Lemmatizer):
|
class FrenchLemmatizer(Lemmatizer):
|
||||||
|
@ -82,13 +81,13 @@ class FrenchLemmatizer(Lemmatizer):
|
||||||
return True
|
return True
|
||||||
elif univ_pos == "adj" and morphology.get("Degree") == "pos":
|
elif univ_pos == "adj" and morphology.get("Degree") == "pos":
|
||||||
return True
|
return True
|
||||||
elif VerbForm_inf in morphology:
|
elif "VerbForm=inf" in morphology:
|
||||||
return True
|
return True
|
||||||
elif VerbForm_none in morphology:
|
elif "VerbForm=none" in morphology:
|
||||||
return True
|
return True
|
||||||
elif Number_sing in morphology:
|
elif "Number=sing" in morphology:
|
||||||
return True
|
return True
|
||||||
elif Degree_pos in morphology:
|
elif "Degree=pos" in morphology:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
||||||
from .stop_words import STOP_WORDS
|
from .stop_words import STOP_WORDS
|
||||||
|
from .tag_map import TAG_MAP
|
||||||
|
|
||||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||||
from ...language import Language
|
from ...language import Language
|
||||||
|
@ -13,6 +14,7 @@ class IrishDefaults(Language.Defaults):
|
||||||
|
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = set(STOP_WORDS)
|
||||||
|
tag_map = TAG_MAP
|
||||||
|
|
||||||
|
|
||||||
class Irish(Language):
|
class Irish(Language):
|
||||||
|
|
|
@ -1,26 +1,26 @@
|
||||||
# fmt: off
|
# fmt: off
|
||||||
TAG_MAP = {
|
TAG_MAP = {
|
||||||
"ADJ__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
|
"ADJ__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing", "Form": "len"},
|
||||||
"ADJ__Case=Gen|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "fem", "Number": "sing"},
|
"ADJ__Case=Gen|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "fem", "Number": "sing"},
|
||||||
"ADJ__Case=Gen|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing"},
|
"ADJ__Case=Gen|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing"},
|
||||||
"ADJ__Case=Gen|NounType=Strong|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "Other": {"NounType": "strong"}},
|
"ADJ__Case=Gen|NounType=Strong|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "NounType": "strong"},
|
||||||
"ADJ__Case=Gen|NounType=Weak|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "Other": {"NounType": "weak"}},
|
"ADJ__Case=Gen|NounType=Weak|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "NounType": "weak"},
|
||||||
"ADJ__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
|
"ADJ__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "len"},
|
||||||
"ADJ__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
|
"ADJ__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "len"},
|
||||||
"ADJ__Case=NomAcc|Gender=Fem|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "plur"},
|
"ADJ__Case=NomAcc|Gender=Fem|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "plur"},
|
||||||
"ADJ__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
|
"ADJ__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
|
||||||
"ADJ__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
|
"ADJ__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
|
||||||
"ADJ__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
|
"ADJ__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
|
||||||
"ADJ__Case=NomAcc|NounType=NotSlender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "Other": {"NounType": "notslender"}},
|
"ADJ__Case=NomAcc|NounType=NotSlender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "NounType": "notslender"},
|
||||||
"ADJ__Case=NomAcc|NounType=Slender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "Other": {"NounType": "slender"}},
|
"ADJ__Case=NomAcc|NounType=Slender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "NounType": "slender"},
|
||||||
"ADJ__Degree=Cmp,Sup|Form=Len": {"pos": "ADJ", "Degree": "cmp|sup", "Other": {"Form": "len"}},
|
"ADJ__Degree=Cmp,Sup|Form=Len": {"pos": "ADJ", "Degree": "cmp|sup", "Form": "len"},
|
||||||
"ADJ__Degree=Cmp,Sup": {"pos": "ADJ", "Degree": "cmp|sup"},
|
"ADJ__Degree=Cmp,Sup": {"pos": "ADJ", "Degree": "cmp|sup"},
|
||||||
"ADJ__Degree=Pos|Form=Ecl": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "ecl"}},
|
"ADJ__Degree=Pos|Form=Ecl": {"pos": "ADJ", "Degree": "pos", "Form": "ecl"},
|
||||||
"ADJ__Degree=Pos|Form=HPref": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "hpref"}},
|
"ADJ__Degree=Pos|Form=HPref": {"pos": "ADJ", "Degree": "pos", "Form": "hpref"},
|
||||||
"ADJ__Degree=Pos|Form=Len": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "len"}},
|
"ADJ__Degree=Pos|Form=Len": {"pos": "ADJ", "Degree": "pos", "Form": "len"},
|
||||||
"ADJ__Degree=Pos": {"pos": "ADJ", "Degree": "pos"},
|
"ADJ__Degree=Pos": {"pos": "ADJ", "Degree": "pos"},
|
||||||
"ADJ__Foreign=Yes": {"pos": "ADJ", "Foreign": "yes"},
|
"ADJ__Foreign=Yes": {"pos": "ADJ", "Foreign": "yes"},
|
||||||
"ADJ__Form=Len|VerbForm=Part": {"pos": "ADJ", "VerbForm": "part", "Other": {"Form": "len"}},
|
"ADJ__Form=Len|VerbForm=Part": {"pos": "ADJ", "VerbForm": "part", "Form": "len"},
|
||||||
"ADJ__Gender=Masc|Number=Sing|PartType=Voc": {"pos": "ADJ", "Gender": "masc", "Number": "sing", "Case": "voc"},
|
"ADJ__Gender=Masc|Number=Sing|PartType=Voc": {"pos": "ADJ", "Gender": "masc", "Number": "sing", "Case": "voc"},
|
||||||
"ADJ__Gender=Masc|Number=Sing|Case=Voc": {"pos": "ADJ", "Gender": "masc", "Number": "sing", "Case": "voc"},
|
"ADJ__Gender=Masc|Number=Sing|Case=Voc": {"pos": "ADJ", "Gender": "masc", "Number": "sing", "Case": "voc"},
|
||||||
"ADJ__Number=Plur|PartType=Voc": {"pos": "ADJ", "Number": "plur", "Case": "voc"},
|
"ADJ__Number=Plur|PartType=Voc": {"pos": "ADJ", "Number": "plur", "Case": "voc"},
|
||||||
|
@ -29,9 +29,9 @@ TAG_MAP = {
|
||||||
"ADJ___": {"pos": "ADJ"},
|
"ADJ___": {"pos": "ADJ"},
|
||||||
"ADJ__VerbForm=Part": {"pos": "ADJ", "VerbForm": "part"},
|
"ADJ__VerbForm=Part": {"pos": "ADJ", "VerbForm": "part"},
|
||||||
"ADP__Foreign=Yes": {"pos": "ADP", "Foreign": "yes"},
|
"ADP__Foreign=Yes": {"pos": "ADP", "Foreign": "yes"},
|
||||||
"ADP__Form=Len|Number=Plur|Person=1": {"pos": "ADP", "Number": "plur", "Person": 1, "Other": {"Form": "len"}},
|
"ADP__Form=Len|Number=Plur|Person=1": {"pos": "ADP", "Number": "plur", "Person": 1, "Form": "len"},
|
||||||
"ADP__Form=Len|Number=Plur|Person=3": {"pos": "ADP", "Number": "plur", "Person": 3, "Other": {"Form": "len"}},
|
"ADP__Form=Len|Number=Plur|Person=3": {"pos": "ADP", "Number": "plur", "Person": 3, "Form": "len"},
|
||||||
"ADP__Form=Len|Number=Sing|Person=1": {"pos": "ADP", "Number": "sing", "Person": 1, "Other": {"Form": "len"}},
|
"ADP__Form=Len|Number=Sing|Person=1": {"pos": "ADP", "Number": "sing", "Person": 1, "Form": "len"},
|
||||||
"ADP__Gender=Fem|Number=Sing|Person=3": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3},
|
"ADP__Gender=Fem|Number=Sing|Person=3": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3},
|
||||||
"ADP__Gender=Fem|Number=Sing|Person=3|Poss=Yes": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes"},
|
"ADP__Gender=Fem|Number=Sing|Person=3|Poss=Yes": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes"},
|
||||||
"ADP__Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes", "PronType": "prs"},
|
"ADP__Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes", "PronType": "prs"},
|
||||||
|
@ -57,41 +57,41 @@ TAG_MAP = {
|
||||||
"ADP__Person=3|Poss=Yes": {"pos": "ADP", "Person": 3, "Poss": "yes"},
|
"ADP__Person=3|Poss=Yes": {"pos": "ADP", "Person": 3, "Poss": "yes"},
|
||||||
"ADP___": {"pos": "ADP"},
|
"ADP___": {"pos": "ADP"},
|
||||||
"ADP__Poss=Yes": {"pos": "ADP", "Poss": "yes"},
|
"ADP__Poss=Yes": {"pos": "ADP", "Poss": "yes"},
|
||||||
"ADP__PrepForm=Cmpd": {"pos": "ADP", "Other": {"PrepForm": "cmpd"}},
|
"ADP__PrepForm=Cmpd": {"pos": "ADP", "PrepForm": "cmpd"},
|
||||||
"ADP__PronType=Art": {"pos": "ADP", "PronType": "art"},
|
"ADP__PronType=Art": {"pos": "ADP", "PronType": "art"},
|
||||||
"ADV__Form=Len": {"pos": "ADV", "Other": {"Form": "len"}},
|
"ADV__Form=Len": {"pos": "ADV", "Form": "len"},
|
||||||
"ADV___": {"pos": "ADV"},
|
"ADV___": {"pos": "ADV"},
|
||||||
"ADV__PronType=Int": {"pos": "ADV", "PronType": "int"},
|
"ADV__PronType=Int": {"pos": "ADV", "PronType": "int"},
|
||||||
"AUX__Form=VF|Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
|
"AUX__Form=VF|Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Form": "vf", "VerbForm": "cop"},
|
||||||
"AUX__Form=VF|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
|
"AUX__Form=VF|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Form": "vf", "VerbForm": "cop"},
|
||||||
"AUX__Form=VF|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
|
"AUX__Form=VF|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Form": "vf", "VerbForm": "cop"},
|
||||||
"AUX__Form=VF|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
|
"AUX__Form=VF|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Form": "vf", "VerbForm": "cop"},
|
||||||
"AUX__Form=VF|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"Form": "vf", "VerbForm": "cop"}},
|
"AUX__Form=VF|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Form": "vf", "VerbForm": "cop"},
|
||||||
"AUX__Gender=Masc|Number=Sing|Person=3|VerbForm=Cop": {"pos": "AUX", "Gender": "masc", "Number": "sing", "Person": 3, "Other": {"VerbForm": "cop"}},
|
"AUX__Gender=Masc|Number=Sing|Person=3|VerbForm=Cop": {"pos": "AUX", "Gender": "masc", "Number": "sing", "Person": 3, "VerbForm": "cop"},
|
||||||
"AUX__Mood=Int|Number=Sing|PronType=Art|VerbForm=Cop": {"pos": "AUX", "Number": "sing", "PronType": "art", "Other": {"Mood": "int", "VerbForm": "cop"}},
|
"AUX__Mood=Int|Number=Sing|PronType=Art|VerbForm=Cop": {"pos": "AUX", "Number": "sing", "PronType": "art", "Mood": "int", "VerbForm": "cop"},
|
||||||
"AUX__Mood=Int|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"Mood": "int", "VerbForm": "cop"}},
|
"AUX__Mood=Int|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Mood": "int", "VerbForm": "cop"},
|
||||||
"AUX__Mood=Int|Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Other": {"Mood": "int", "VerbForm": "cop"}},
|
"AUX__Mood=Int|Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Mood": "int", "VerbForm": "cop"},
|
||||||
"AUX__Mood=Int|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"Mood": "int", "VerbForm": "cop"}},
|
"AUX__Mood=Int|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Mood": "int", "VerbForm": "cop"},
|
||||||
"AUX__PartType=Comp|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"PartType": "comp", "VerbForm": "cop"}},
|
"AUX__PartType=Comp|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "PartType": "comp", "VerbForm": "cop"},
|
||||||
"AUX__Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"VerbForm": "cop"}},
|
"AUX__Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "VerbForm": "cop"},
|
||||||
"AUX__Polarity=Neg|PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "pres", "Other": {"VerbForm": "cop"}},
|
"AUX__Polarity=Neg|PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "pres", "VerbForm": "cop"},
|
||||||
"AUX__Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"VerbForm": "cop"}},
|
"AUX__Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "VerbForm": "cop"},
|
||||||
"AUX__Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Other": {"VerbForm": "cop"}},
|
"AUX__Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "VerbForm": "cop"},
|
||||||
"AUX___": {"pos": "AUX"},
|
"AUX___": {"pos": "AUX"},
|
||||||
"AUX__PronType=Dem|VerbForm=Cop": {"pos": "AUX", "PronType": "dem", "Other": {"VerbForm": "cop"}},
|
"AUX__PronType=Dem|VerbForm=Cop": {"pos": "AUX", "PronType": "dem", "VerbForm": "cop"},
|
||||||
"AUX__PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Other": {"VerbForm": "cop"}},
|
"AUX__PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "VerbForm": "cop"},
|
||||||
"AUX__PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "pres", "Other": {"VerbForm": "cop"}},
|
"AUX__PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "pres", "VerbForm": "cop"},
|
||||||
"AUX__Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"VerbForm": "cop"}},
|
"AUX__Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "VerbForm": "cop"},
|
||||||
"AUX__Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"VerbForm": "cop"}},
|
"AUX__Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "VerbForm": "cop"},
|
||||||
"AUX__VerbForm=Cop": {"pos": "AUX", "Other": {"VerbForm": "cop"}},
|
"AUX__VerbForm=Cop": {"pos": "AUX", "VerbForm": "cop"},
|
||||||
"CCONJ___": {"pos": "CCONJ"},
|
"CCONJ___": {"pos": "CCONJ"},
|
||||||
"DET__Case=Gen|Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"pos": "DET", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing", "PronType": "art"},
|
"DET__Case=Gen|Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"pos": "DET", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing", "PronType": "art"},
|
||||||
"DET__Definite=Def|Form=Ecl": {"pos": "DET", "Definite": "def", "Other": {"Form": "ecl"}},
|
"DET__Definite=Def|Form=Ecl": {"pos": "DET", "Definite": "def", "Form": "ecl"},
|
||||||
"DET__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"pos": "DET", "Definite": "def", "Gender": "fem", "Number": "sing", "PronType": "art"},
|
"DET__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"pos": "DET", "Definite": "def", "Gender": "fem", "Number": "sing", "PronType": "art"},
|
||||||
"DET__Definite=Def|Number=Plur|PronType=Art": {"pos": "DET", "Definite": "def", "Number": "plur", "PronType": "art"},
|
"DET__Definite=Def|Number=Plur|PronType=Art": {"pos": "DET", "Definite": "def", "Number": "plur", "PronType": "art"},
|
||||||
"DET__Definite=Def|Number=Sing|PronType=Art": {"pos": "DET", "Definite": "def", "Number": "sing", "PronType": "art"},
|
"DET__Definite=Def|Number=Sing|PronType=Art": {"pos": "DET", "Definite": "def", "Number": "sing", "PronType": "art"},
|
||||||
"DET__Definite=Def": {"pos": "DET", "Definite": "def"},
|
"DET__Definite=Def": {"pos": "DET", "Definite": "def"},
|
||||||
"DET__Form=HPref|PronType=Ind": {"pos": "DET", "PronType": "ind", "Other": {"Form": "hpref"}},
|
"DET__Form=HPref|PronType=Ind": {"pos": "DET", "PronType": "ind", "Form": "hpref"},
|
||||||
"DET__Gender=Fem|Number=Sing|Person=3|Poss=Yes": {"pos": "DET", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes"},
|
"DET__Gender=Fem|Number=Sing|Person=3|Poss=Yes": {"pos": "DET", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes"},
|
||||||
"DET__Gender=Masc|Number=Sing|Person=3|Poss=Yes": {"pos": "DET", "Gender": "masc", "Number": "sing", "Person": 3, "Poss": "yes"},
|
"DET__Gender=Masc|Number=Sing|Person=3|Poss=Yes": {"pos": "DET", "Gender": "masc", "Number": "sing", "Person": 3, "Poss": "yes"},
|
||||||
"DET__Number=Plur|Person=1|Poss=Yes": {"pos": "DET", "Number": "plur", "Person": 1, "Poss": "yes"},
|
"DET__Number=Plur|Person=1|Poss=Yes": {"pos": "DET", "Number": "plur", "Person": 1, "Poss": "yes"},
|
||||||
|
@ -103,33 +103,33 @@ TAG_MAP = {
|
||||||
"DET__PronType=Dem": {"pos": "DET", "PronType": "dem"},
|
"DET__PronType=Dem": {"pos": "DET", "PronType": "dem"},
|
||||||
"DET__PronType=Ind": {"pos": "DET", "PronType": "ind"},
|
"DET__PronType=Ind": {"pos": "DET", "PronType": "ind"},
|
||||||
"NOUN__Case=Dat|Definite=Ind|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Definite": "ind", "Gender": "fem", "Number": "sing"},
|
"NOUN__Case=Dat|Definite=Ind|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Definite": "ind", "Gender": "fem", "Number": "sing"},
|
||||||
"NOUN__Case=Dat|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
|
"NOUN__Case=Dat|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Form": "ecl"},
|
||||||
"NOUN__Case=Dat|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
|
"NOUN__Case=Dat|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Form": "len"},
|
||||||
"NOUN__Case=Dat|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing"},
|
"NOUN__Case=Dat|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing"},
|
||||||
"NOUN__Case=Dat|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "masc", "Number": "sing"},
|
"NOUN__Case=Dat|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "masc", "Number": "sing"},
|
||||||
"NOUN__Case=Gen|Definite=Def|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "plur", "Other": {"NounType": "strong"}},
|
"NOUN__Case=Gen|Definite=Def|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "plur", "NounType": "strong"},
|
||||||
"NOUN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing"},
|
"NOUN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing"},
|
||||||
"NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "Other": {"NounType": "strong"}},
|
"NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "NounType": "strong"},
|
||||||
"NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}},
|
"NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "NounType": "weak"},
|
||||||
"NOUN__Case=Gen|Definite=Def|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "sing"},
|
"NOUN__Case=Gen|Definite=Def|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "sing"},
|
||||||
"NOUN__Case=Gen|Definite=Ind|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "ind", "Gender": "fem", "Number": "sing"},
|
"NOUN__Case=Gen|Definite=Ind|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "ind", "Gender": "fem", "Number": "sing"},
|
||||||
"NOUN__Case=Gen|Form=Ecl|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl", "NounType": "strong"}},
|
"NOUN__Case=Gen|Form=Ecl|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Form": "ecl", "NounType": "strong"},
|
||||||
"NOUN__Case=Gen|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
|
"NOUN__Case=Gen|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "ecl"},
|
||||||
"NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl", "NounType": "strong"}},
|
"NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "ecl", "NounType": "strong"},
|
||||||
"NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl", "NounType": "weak"}},
|
"NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "ecl", "NounType": "weak"},
|
||||||
"NOUN__Case=Gen|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}},
|
"NOUN__Case=Gen|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Form": "ecl"},
|
||||||
"NOUN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}},
|
"NOUN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "hpref"},
|
||||||
"NOUN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
|
"NOUN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "len"},
|
||||||
"NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "len", "NounType": "strong"}},
|
"NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "len", "NounType": "strong"},
|
||||||
"NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "len", "NounType": "weak"}},
|
"NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "len", "NounType": "weak"},
|
||||||
"NOUN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
|
"NOUN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Form": "len"},
|
||||||
"NOUN__Case=Gen|Form=Len|VerbForm=Inf": {"pos": "NOUN", "Case": "gen", "VerbForm": "inf", "Other": {"Form": "len"}},
|
"NOUN__Case=Gen|Form=Len|VerbForm=Inf": {"pos": "NOUN", "Case": "gen", "VerbForm": "inf", "Form": "len"},
|
||||||
"NOUN__Case=Gen|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"NounType": "strong"}},
|
"NOUN__Case=Gen|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "NounType": "strong"},
|
||||||
"NOUN__Case=Gen|Gender=Fem|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"NounType": "weak"}},
|
"NOUN__Case=Gen|Gender=Fem|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "NounType": "weak"},
|
||||||
"NOUN__Case=Gen|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur"},
|
"NOUN__Case=Gen|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur"},
|
||||||
"NOUN__Case=Gen|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing"},
|
"NOUN__Case=Gen|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing"},
|
||||||
"NOUN__Case=Gen|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "strong"}},
|
"NOUN__Case=Gen|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "NounType": "strong"},
|
||||||
"NOUN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}},
|
"NOUN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "NounType": "weak"},
|
||||||
"NOUN__Case=Gen|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur"},
|
"NOUN__Case=Gen|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur"},
|
||||||
"NOUN__Case=Gen|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing"},
|
"NOUN__Case=Gen|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing"},
|
||||||
"NOUN__Case=Gen|Number=Sing": {"pos": "NOUN", "Case": "gen", "Number": "sing"},
|
"NOUN__Case=Gen|Number=Sing": {"pos": "NOUN", "Case": "gen", "Number": "sing"},
|
||||||
|
@ -140,79 +140,79 @@ TAG_MAP = {
|
||||||
"NOUN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "plur"},
|
"NOUN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "plur"},
|
||||||
"NOUN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "sing"},
|
"NOUN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "sing"},
|
||||||
"NOUN__Case=NomAcc|Definite=Ind|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "ind", "Gender": "masc", "Number": "plur"},
|
"NOUN__Case=NomAcc|Definite=Ind|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "ind", "Gender": "masc", "Number": "plur"},
|
||||||
"NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl"}},
|
"NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Form": "ecl"},
|
||||||
"NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
|
"NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "ecl"},
|
||||||
"NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl"}},
|
"NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Form": "ecl"},
|
||||||
"NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}},
|
"NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "ecl"},
|
||||||
"NOUN__Case=NomAcc|Form=Emp|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "emp"}},
|
"NOUN__Case=NomAcc|Form=Emp|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "emp"},
|
||||||
"NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "hpref"}},
|
"NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Form": "hpref"},
|
||||||
"NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}},
|
"NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "hpref"},
|
||||||
"NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "hpref"}},
|
"NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Form": "hpref"},
|
||||||
"NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "hpref"}},
|
"NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "hpref"},
|
||||||
"NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "len"}},
|
"NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Form": "len"},
|
||||||
"NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
|
"NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "len"},
|
||||||
"NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "len"}},
|
"NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Form": "len"},
|
||||||
"NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
|
"NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "len"},
|
||||||
"NOUN__Case=NomAcc|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur"},
|
"NOUN__Case=NomAcc|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur"},
|
||||||
"NOUN__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
|
"NOUN__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
|
||||||
"NOUN__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
|
"NOUN__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
|
||||||
"NOUN__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
|
"NOUN__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
|
||||||
"NOUN__Case=Voc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Definite": "def", "Gender": "masc", "Number": "plur"},
|
"NOUN__Case=Voc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Definite": "def", "Gender": "masc", "Number": "plur"},
|
||||||
"NOUN__Case=Voc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
|
"NOUN__Case=Voc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "fem", "Number": "sing", "Form": "len"},
|
||||||
"NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "plur", "Other": {"Form": "len"}},
|
"NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "plur", "Form": "len"},
|
||||||
"NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
|
"NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing", "Form": "len"},
|
||||||
"NOUN__Case=Voc|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing"},
|
"NOUN__Case=Voc|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing"},
|
||||||
"NOUN__Degree=Pos": {"pos": "NOUN", "Degree": "pos"},
|
"NOUN__Degree=Pos": {"pos": "NOUN", "Degree": "pos"},
|
||||||
"NOUN__Foreign=Yes": {"pos": "NOUN", "Foreign": "yes"},
|
"NOUN__Foreign=Yes": {"pos": "NOUN", "Foreign": "yes"},
|
||||||
"NOUN__Form=Ecl|Number=Sing": {"pos": "NOUN", "Number": "sing", "Other": {"Form": "ecl"}},
|
"NOUN__Form=Ecl|Number=Sing": {"pos": "NOUN", "Number": "sing", "Form": "ecl"},
|
||||||
"NOUN__Form=Ecl|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "ecl"}},
|
"NOUN__Form=Ecl|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Form": "ecl"},
|
||||||
"NOUN__Form=Ecl|VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun", "Other": {"Form": "ecl"}},
|
"NOUN__Form=Ecl|VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun", "Form": "ecl"},
|
||||||
"NOUN__Form=HPref|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "hpref"}},
|
"NOUN__Form=HPref|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Form": "hpref"},
|
||||||
"NOUN__Form=Len|Number=Sing": {"pos": "NOUN", "Number": "sing", "Other": {"Form": "len"}},
|
"NOUN__Form=Len|Number=Sing": {"pos": "NOUN", "Number": "sing", "Form": "len"},
|
||||||
"NOUN__Form=Len|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "len"}},
|
"NOUN__Form=Len|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Form": "len"},
|
||||||
"NOUN__Gender=Fem|Number=Sing": {"pos": "NOUN", "Gender": "fem", "Number": "sing"},
|
"NOUN__Gender=Fem|Number=Sing": {"pos": "NOUN", "Gender": "fem", "Number": "sing"},
|
||||||
"NOUN__Number=Sing|PartType=Comp": {"pos": "NOUN", "Number": "sing", "Other": {"PartType": "comp"}},
|
"NOUN__Number=Sing|PartType=Comp": {"pos": "NOUN", "Number": "sing", "PartType": "comp"},
|
||||||
"NOUN__Number=Sing": {"pos": "NOUN", "Number": "sing"},
|
"NOUN__Number=Sing": {"pos": "NOUN", "Number": "sing"},
|
||||||
"NOUN___": {"pos": "NOUN"},
|
"NOUN___": {"pos": "NOUN"},
|
||||||
"NOUN__Reflex=Yes": {"pos": "NOUN", "Reflex": "yes"},
|
"NOUN__Reflex=Yes": {"pos": "NOUN", "Reflex": "yes"},
|
||||||
"NOUN__VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf"},
|
"NOUN__VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf"},
|
||||||
"NOUN__VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun"},
|
"NOUN__VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun"},
|
||||||
"NUM__Definite=Def|NumType=Card": {"pos": "NUM", "Definite": "def", "NumType": "card"},
|
"NUM__Definite=Def|NumType=Card": {"pos": "NUM", "Definite": "def", "NumType": "card"},
|
||||||
"NUM__Form=Ecl|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "ecl"}},
|
"NUM__Form=Ecl|NumType=Card": {"pos": "NUM", "NumType": "card", "Form": "ecl"},
|
||||||
"NUM__Form=Ecl|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Other": {"Form": "ecl"}},
|
"NUM__Form=Ecl|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Form": "ecl"},
|
||||||
"NUM__Form=HPref|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "hpref"}},
|
"NUM__Form=HPref|NumType=Card": {"pos": "NUM", "NumType": "card", "Form": "hpref"},
|
||||||
"NUM__Form=Len|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "len"}},
|
"NUM__Form=Len|NumType=Card": {"pos": "NUM", "NumType": "card", "Form": "len"},
|
||||||
"NUM__Form=Len|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Other": {"Form": "len"}},
|
"NUM__Form=Len|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Form": "len"},
|
||||||
"NUM__NumType=Card": {"pos": "NUM", "NumType": "card"},
|
"NUM__NumType=Card": {"pos": "NUM", "NumType": "card"},
|
||||||
"NUM__NumType=Ord": {"pos": "NUM", "NumType": "ord"},
|
"NUM__NumType=Ord": {"pos": "NUM", "NumType": "ord"},
|
||||||
"NUM___": {"pos": "NUM"},
|
"NUM___": {"pos": "NUM"},
|
||||||
"PART__Form=Ecl|PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"Form": "ecl", "PartType": "vb"}},
|
"PART__Form=Ecl|PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Form": "ecl", "PartType": "vb"},
|
||||||
"PART__Mood=Imp|PartType=Vb|Polarity=Neg": {"pos": "PART", "Mood": "imp", "Polarity": "neg", "Other": {"PartType": "vb"}},
|
"PART__Mood=Imp|PartType=Vb|Polarity=Neg": {"pos": "PART", "Mood": "imp", "Polarity": "neg", "PartType": "vb"},
|
||||||
"PART__Mood=Imp|PartType=Vb": {"pos": "PART", "Mood": "imp", "Other": {"PartType": "vb"}},
|
"PART__Mood=Imp|PartType=Vb": {"pos": "PART", "Mood": "imp", "PartType": "vb"},
|
||||||
"PART__Mood=Int|PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"Mood": "int", "PartType": "vb"}},
|
"PART__Mood=Int|PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Mood": "int", "PartType": "vb"},
|
||||||
"PART__PartType=Ad": {"pos": "PART", "Other": {"PartType": "ad"}},
|
"PART__PartType=Ad": {"pos": "PART", "PartType": "ad"},
|
||||||
"PART__PartType=Cmpl|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"PartType": "cmpl"}},
|
"PART__PartType=Cmpl|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "PartType": "cmpl"},
|
||||||
"PART__PartType=Cmpl|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "Other": {"PartType": "cmpl"}},
|
"PART__PartType=Cmpl|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "PartType": "cmpl"},
|
||||||
"PART__PartType=Cmpl": {"pos": "PART", "Other": {"PartType": "cmpl"}},
|
"PART__PartType=Cmpl": {"pos": "PART", "PartType": "cmpl"},
|
||||||
"PART__PartType=Comp": {"pos": "PART", "Other": {"PartType": "comp"}},
|
"PART__PartType=Comp": {"pos": "PART", "PartType": "comp"},
|
||||||
"PART__PartType=Cop|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"PartType": "cop"}},
|
"PART__PartType=Cop|PronType=Rel": {"pos": "PART", "PronType": "rel", "PartType": "cop"},
|
||||||
"PART__PartType=Deg": {"pos": "PART", "Other": {"PartType": "deg"}},
|
"PART__PartType=Deg": {"pos": "PART", "PartType": "deg"},
|
||||||
"PART__PartType=Inf": {"pos": "PART", "PartType": "inf"},
|
"PART__PartType=Inf": {"pos": "PART", "PartType": "inf"},
|
||||||
"PART__PartType=Num": {"pos": "PART", "Other": {"PartType": "num"}},
|
"PART__PartType=Num": {"pos": "PART", "PartType": "num"},
|
||||||
"PART__PartType=Pat": {"pos": "PART", "Other": {"PartType": "pat"}},
|
"PART__PartType=Pat": {"pos": "PART", "PartType": "pat"},
|
||||||
"PART__PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"PartType": "vb"}},
|
"PART__PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "PartType": "vb"},
|
||||||
"PART__PartType=Vb|Polarity=Neg|PronType=Rel": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Other": {"PartType": "vb"}},
|
"PART__PartType=Vb|Polarity=Neg|PronType=Rel": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "PartType": "vb"},
|
||||||
"PART__PartType=Vb|Polarity=Neg|PronType=Rel|Tense=Past": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"PartType": "vb"}},
|
"PART__PartType=Vb|Polarity=Neg|PronType=Rel|Tense=Past": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Tense": "past", "PartType": "vb"},
|
||||||
"PART__PartType=Vb|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "Other": {"PartType": "vb"}},
|
"PART__PartType=Vb|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "PartType": "vb"},
|
||||||
"PART__PartType=Vb": {"pos": "PART", "Other": {"PartType": "vb"}},
|
"PART__PartType=Vb": {"pos": "PART", "PartType": "vb"},
|
||||||
"PART__PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"PartType": "vb"}},
|
"PART__PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "PartType": "vb"},
|
||||||
"PART__PartType=Vb|PronType=Rel|Tense=Past": {"pos": "PART", "PronType": "rel", "Tense": "past", "Other": {"PartType": "vb"}},
|
"PART__PartType=Vb|PronType=Rel|Tense=Past": {"pos": "PART", "PronType": "rel", "Tense": "past", "PartType": "vb"},
|
||||||
"PART__PartType=Vb|Tense=Past": {"pos": "PART", "Tense": "past", "Other": {"PartType": "vb"}},
|
"PART__PartType=Vb|Tense=Past": {"pos": "PART", "Tense": "past", "PartType": "vb"},
|
||||||
"PART__PartType=Voc": {"pos": "PART", "Other": {"PartType": "voc"}},
|
"PART__PartType=Voc": {"pos": "PART", "PartType": "voc"},
|
||||||
"PART___": {"pos": "PART"},
|
"PART___": {"pos": "PART"},
|
||||||
"PART__PronType=Rel": {"pos": "PART", "PronType": "rel"},
|
"PART__PronType=Rel": {"pos": "PART", "PronType": "rel"},
|
||||||
"PRON__Form=Len|Number=Sing|Person=2": {"pos": "PRON", "Number": "sing", "Person": 2, "Other": {"Form": "len"}},
|
"PRON__Form=Len|Number=Sing|Person=2": {"pos": "PRON", "Number": "sing", "Person": 2, "Form": "len"},
|
||||||
"PRON__Form=Len|PronType=Ind": {"pos": "PRON", "PronType": "ind", "Other": {"Form": "len"}},
|
"PRON__Form=Len|PronType=Ind": {"pos": "PRON", "PronType": "ind", "Form": "len"},
|
||||||
"PRON__Gender=Fem|Number=Sing|Person=3": {"pos": "PRON", "Gender": "fem", "Number": "sing", "Person": 3},
|
"PRON__Gender=Fem|Number=Sing|Person=3": {"pos": "PRON", "Gender": "fem", "Number": "sing", "Person": 3},
|
||||||
"PRON__Gender=Masc|Number=Sing|Person=3": {"pos": "PRON", "Gender": "masc", "Number": "sing", "Person": 3},
|
"PRON__Gender=Masc|Number=Sing|Person=3": {"pos": "PRON", "Gender": "masc", "Number": "sing", "Person": 3},
|
||||||
"PRON__Gender=Masc|Number=Sing|Person=3|PronType=Emp": {"pos": "PRON", "Gender": "masc", "Number": "sing", "Person": 3, "PronType": "emp"},
|
"PRON__Gender=Masc|Number=Sing|Person=3|PronType=Emp": {"pos": "PRON", "Gender": "masc", "Number": "sing", "Person": 3, "PronType": "emp"},
|
||||||
|
@ -232,103 +232,103 @@ TAG_MAP = {
|
||||||
"PRON__PronType=Ind": {"pos": "PRON", "PronType": "ind"},
|
"PRON__PronType=Ind": {"pos": "PRON", "PronType": "ind"},
|
||||||
"PRON__PronType=Int": {"pos": "PRON", "PronType": "int"},
|
"PRON__PronType=Int": {"pos": "PRON", "PronType": "int"},
|
||||||
"PRON__Reflex=Yes": {"pos": "PRON", "Reflex": "yes"},
|
"PRON__Reflex=Yes": {"pos": "PRON", "Reflex": "yes"},
|
||||||
"PROPN__Abbr=Yes": {"pos": "PROPN", "Other": {"Abbr": "yes"}},
|
"PROPN__Abbr=Yes": {"pos": "PROPN", "Abbr": "yes"},
|
||||||
"PROPN__Case=Dat|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "dat", "Gender": "fem", "Number": "sing"},
|
"PROPN__Case=Dat|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "dat", "Gender": "fem", "Number": "sing"},
|
||||||
"PROPN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing"},
|
"PROPN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing"},
|
||||||
"PROPN__Case=Gen|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl"}},
|
"PROPN__Case=Gen|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "plur", "Form": "ecl"},
|
||||||
"PROPN__Case=Gen|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl"}},
|
"PROPN__Case=Gen|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "ecl"},
|
||||||
"PROPN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}},
|
"PROPN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "hpref"},
|
||||||
"PROPN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
|
"PROPN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "len"},
|
||||||
"PROPN__Case=Gen|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Other": {"Form": "len"}},
|
"PROPN__Case=Gen|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Form": "len"},
|
||||||
"PROPN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
|
"PROPN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing", "Form": "len"},
|
||||||
"PROPN__Case=Gen|Form=Len|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Other": {"Form": "len"}},
|
"PROPN__Case=Gen|Form=Len|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Form": "len"},
|
||||||
"PROPN__Case=Gen|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing"},
|
"PROPN__Case=Gen|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing"},
|
||||||
"PROPN__Case=Gen|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem"},
|
"PROPN__Case=Gen|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem"},
|
||||||
"PROPN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}},
|
"PROPN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "NounType": "weak"},
|
||||||
"PROPN__Case=Gen|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing"},
|
"PROPN__Case=Gen|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing"},
|
||||||
"PROPN__Case=Gen|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc"},
|
"PROPN__Case=Gen|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc"},
|
||||||
"PROPN__Case=NomAcc|Definite=Def|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "fem", "Number": "sing"},
|
"PROPN__Case=NomAcc|Definite=Def|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "fem", "Number": "sing"},
|
||||||
"PROPN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "plur"},
|
"PROPN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "plur"},
|
||||||
"PROPN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "sing"},
|
"PROPN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "sing"},
|
||||||
"PROPN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
|
"PROPN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "ecl"},
|
||||||
"PROPN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}},
|
"PROPN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "ecl"},
|
||||||
"PROPN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "hpref"}},
|
"PROPN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "hpref"},
|
||||||
"PROPN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
|
"PROPN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "len"},
|
||||||
"PROPN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
|
"PROPN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "len"},
|
||||||
"PROPN__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
|
"PROPN__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
|
||||||
"PROPN__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
|
"PROPN__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
|
||||||
"PROPN__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
|
"PROPN__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
|
||||||
"PROPN__Case=NomAcc|Gender=Masc": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc"},
|
"PROPN__Case=NomAcc|Gender=Masc": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc"},
|
||||||
"PROPN__Case=Voc|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "voc", "Gender": "fem", "Other": {"Form": "len"}},
|
"PROPN__Case=Voc|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "voc", "Gender": "fem", "Form": "len"},
|
||||||
"PROPN__Case=Voc|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "voc", "Gender": "masc", "Number": "sing"},
|
"PROPN__Case=Voc|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "voc", "Gender": "masc", "Number": "sing"},
|
||||||
"PROPN__Gender=Masc|Number=Sing": {"pos": "PROPN", "Gender": "masc", "Number": "sing"},
|
"PROPN__Gender=Masc|Number=Sing": {"pos": "PROPN", "Gender": "masc", "Number": "sing"},
|
||||||
"PROPN___": {"pos": "PROPN"},
|
"PROPN___": {"pos": "PROPN"},
|
||||||
"PUNCT___": {"pos": "PUNCT"},
|
"PUNCT___": {"pos": "PUNCT"},
|
||||||
"SCONJ___": {"pos": "SCONJ"},
|
"SCONJ___": {"pos": "SCONJ"},
|
||||||
"SCONJ__Tense=Past|VerbForm=Cop": {"pos": "SCONJ", "Tense": "past", "Other": {"VerbForm": "cop"}},
|
"SCONJ__Tense=Past|VerbForm=Cop": {"pos": "SCONJ", "Tense": "past", "VerbForm": "cop"},
|
||||||
"SCONJ__VerbForm=Cop": {"pos": "SCONJ", "Other": {"VerbForm": "cop"}},
|
"SCONJ__VerbForm=Cop": {"pos": "SCONJ", "VerbForm": "cop"},
|
||||||
"SYM__Abbr=Yes": {"pos": "SYM", "Other": {"Abbr": "yes"}},
|
"SYM__Abbr=Yes": {"pos": "SYM", "Abbr": "yes"},
|
||||||
"VERB__Case=NomAcc|Gender=Masc|Mood=Ind|Number=Sing|Tense=Pres": {"pos": "VERB", "Case": "nom|acc", "Gender": "masc", "Mood": "ind", "Number": "sing", "Tense": "pres"},
|
"VERB__Case=NomAcc|Gender=Masc|Mood=Ind|Number=Sing|Tense=Pres": {"pos": "VERB", "Case": "nom|acc", "Gender": "masc", "Mood": "ind", "Number": "sing", "Tense": "pres"},
|
||||||
"VERB__Dialect=Munster|Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Dialect": "munster", "Form": "len"}},
|
"VERB__Dialect=Munster|Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Dialect": "munster", "Form": "len"},
|
||||||
"VERB__Foreign=Yes": {"pos": "VERB", "Foreign": "yes"},
|
"VERB__Foreign=Yes": {"pos": "VERB", "Foreign": "yes"},
|
||||||
"VERB__Form=Ecl|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "ecl", "Voice": "auto"}},
|
"VERB__Form=Ecl|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Form": "ecl", "Voice": "auto"},
|
||||||
"VERB__Form=Ecl|Mood=Imp|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Imp|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "sing", "Person": 1, "Tense": "past", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "ecl", "Voice": "auto"}},
|
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Form": "ecl", "Voice": "auto"},
|
||||||
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "ecl", "Voice": "auto"}},
|
"VERB__Form=Ecl|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Form": "ecl", "Voice": "auto"},
|
||||||
"VERB__Form=Ecl|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "ecl", "Voice": "auto"}},
|
"VERB__Form=Ecl|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Form": "ecl", "Voice": "auto"},
|
||||||
"VERB__Form=Ecl|Mood=Sub|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Tense": "pres", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl|Mood=Sub|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Tense": "pres", "Form": "ecl"},
|
||||||
"VERB__Form=Ecl": {"pos": "VERB", "Other": {"Form": "ecl"}},
|
"VERB__Form=Ecl": {"pos": "VERB", "Form": "ecl"},
|
||||||
"VERB__Form=Emp|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Other": {"Form": "emp"}},
|
"VERB__Form=Emp|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Form": "emp"},
|
||||||
"VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "PronType": "rel", "Tense": "pres", "Other": {"Form": "emp"}},
|
"VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "PronType": "rel", "Tense": "pres", "Form": "emp"},
|
||||||
"VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Other": {"Form": "emp"}},
|
"VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Form": "emp"},
|
||||||
"VERB__Form=Len|Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3, "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3, "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Cnd|Number=Sing|Person=2": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 2, "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Cnd|Number=Sing|Person=2": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 2, "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "len", "Voice": "auto"}},
|
"VERB__Form=Len|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Form": "len", "Voice": "auto"},
|
||||||
"VERB__Form=Len|Mood=Imp|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 3, "Tense": "past", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Imp|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 3, "Tense": "past", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Imp|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}},
|
"VERB__Form=Len|Mood=Imp|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Form": "len", "Voice": "auto"},
|
||||||
"VERB__Form=Len|Mood=Imp|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Other": {"Form": "len", "Voice": "auto"}},
|
"VERB__Form=Len|Mood=Imp|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Form": "len", "Voice": "auto"},
|
||||||
"VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "fut", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "fut", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "past", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "past", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Ind|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 3, "Tense": "past", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Ind|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 3, "Tense": "past", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "past", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "past", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "pres", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "len", "Voice": "auto"}},
|
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Form": "len", "Voice": "auto"},
|
||||||
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}},
|
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Form": "len", "Voice": "auto"},
|
||||||
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len", "Voice": "auto"}},
|
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Form": "len", "Voice": "auto"},
|
||||||
"VERB__Form=Len|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "len", "Voice": "auto"}},
|
"VERB__Form=Len|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Form": "len", "Voice": "auto"},
|
||||||
"VERB__Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}},
|
"VERB__Form=Len|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Form": "len", "Voice": "auto"},
|
||||||
"VERB__Form=Len|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Form": "len"},
|
||||||
"VERB__Form=Len|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "len", "Voice": "auto"}},
|
"VERB__Form=Len|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Form": "len", "Voice": "auto"},
|
||||||
"VERB__Form=Len|Mood=Sub|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Mood=Sub|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Polarity": "neg", "Tense": "pres", "Form": "len"},
|
||||||
"VERB__Form=Len|Polarity=Neg": {"pos": "VERB", "Polarity": "neg", "Other": {"Form": "len"}},
|
"VERB__Form=Len|Polarity=Neg": {"pos": "VERB", "Polarity": "neg", "Form": "len"},
|
||||||
"VERB__Form=Len": {"pos": "VERB", "Other": {"Form": "len"}},
|
"VERB__Form=Len": {"pos": "VERB", "Form": "len"},
|
||||||
"VERB__Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3},
|
"VERB__Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3},
|
||||||
"VERB__Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1},
|
"VERB__Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1},
|
||||||
"VERB__Mood=Cnd": {"pos": "VERB", "Mood": "cnd"},
|
"VERB__Mood=Cnd": {"pos": "VERB", "Mood": "cnd"},
|
||||||
"VERB__Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Voice": "auto"}},
|
"VERB__Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Voice": "auto"},
|
||||||
"VERB__Mood=Imp|Number=Plur|Person=1|Polarity=Neg": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 1, "Polarity": "neg"},
|
"VERB__Mood=Imp|Number=Plur|Person=1|Polarity=Neg": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 1, "Polarity": "neg"},
|
||||||
"VERB__Mood=Imp|Number=Plur|Person=1": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 1},
|
"VERB__Mood=Imp|Number=Plur|Person=1": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 1},
|
||||||
"VERB__Mood=Imp|Number=Plur|Person=2": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 2},
|
"VERB__Mood=Imp|Number=Plur|Person=2": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 2},
|
||||||
|
@ -338,28 +338,28 @@ TAG_MAP = {
|
||||||
"VERB__Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres"},
|
"VERB__Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres"},
|
||||||
"VERB__Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past"},
|
"VERB__Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past"},
|
||||||
"VERB__Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres"},
|
"VERB__Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres"},
|
||||||
"VERB__Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Voice": "auto"}},
|
"VERB__Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Voice": "auto"},
|
||||||
"VERB__Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres"},
|
"VERB__Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres"},
|
||||||
"VERB__Mood=Ind|PronType=Rel|Tense=Fut": {"pos": "VERB", "Mood": "ind", "PronType": "rel", "Tense": "fut"},
|
"VERB__Mood=Ind|PronType=Rel|Tense=Fut": {"pos": "VERB", "Mood": "ind", "PronType": "rel", "Tense": "fut"},
|
||||||
"VERB__Mood=Ind|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "PronType": "rel", "Tense": "pres"},
|
"VERB__Mood=Ind|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "PronType": "rel", "Tense": "pres"},
|
||||||
"VERB__Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut"},
|
"VERB__Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut"},
|
||||||
"VERB__Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Voice": "auto"}},
|
"VERB__Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Voice": "auto"},
|
||||||
"VERB__Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past"},
|
"VERB__Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past"},
|
||||||
"VERB__Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Voice": "auto"}},
|
"VERB__Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Voice": "auto"},
|
||||||
"VERB__Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres"},
|
"VERB__Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres"},
|
||||||
"VERB__Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Voice": "auto"}},
|
"VERB__Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Voice": "auto"},
|
||||||
"VERB___": {"pos": "VERB"},
|
"VERB___": {"pos": "VERB"},
|
||||||
"X__Abbr=Yes": {"pos": "X", "Other": {"Abbr": "yes"}},
|
"X__Abbr=Yes": {"pos": "X", "Abbr": "yes"},
|
||||||
"X__Case=NomAcc|Foreign=Yes|Gender=Fem|Number=Sing": {"pos": "X", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Foreign": "yes"},
|
"X__Case=NomAcc|Foreign=Yes|Gender=Fem|Number=Sing": {"pos": "X", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Foreign": "yes"},
|
||||||
"X__Definite=Def|Dialect=Ulster": {"pos": "X", "Definite": "def", "Other": {"Dialect": "ulster"}},
|
"X__Definite=Def|Dialect=Ulster": {"pos": "X", "Definite": "def", "Dialect": "ulster"},
|
||||||
"X__Dialect=Munster|Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "X", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Dialect": "munster", "Form": "len"}},
|
"X__Dialect=Munster|Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "X", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Dialect": "munster", "Form": "len"},
|
||||||
"X__Dialect=Munster|Mood=Imp|Number=Sing|Person=2|Polarity=Neg": {"pos": "X", "Mood": "imp", "Number": "sing", "Person": 2, "Polarity": "neg", "Other": {"Dialect": "munster"}},
|
"X__Dialect=Munster|Mood=Imp|Number=Sing|Person=2|Polarity=Neg": {"pos": "X", "Mood": "imp", "Number": "sing", "Person": 2, "Polarity": "neg", "Dialect": "munster"},
|
||||||
"X__Dialect=Munster|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "X", "Mood": "ind", "Tense": "past", "Other": {"Dialect": "munster", "Voice": "auto"}},
|
"X__Dialect=Munster|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "X", "Mood": "ind", "Tense": "past", "Dialect": "munster", "Voice": "auto"},
|
||||||
"X__Dialect=Munster": {"pos": "X", "Other": {"Dialect": "munster"}},
|
"X__Dialect=Munster": {"pos": "X", "Dialect": "munster"},
|
||||||
"X__Dialect=Munster|PronType=Dem": {"pos": "X", "PronType": "dem", "Other": {"Dialect": "munster"}},
|
"X__Dialect=Munster|PronType=Dem": {"pos": "X", "PronType": "dem", "Dialect": "munster"},
|
||||||
"X__Dialect=Ulster|Gender=Masc|Number=Sing|Person=3": {"pos": "X", "Gender": "masc", "Number": "sing", "Person": 3, "Other": {"Dialect": "ulster"}},
|
"X__Dialect=Ulster|Gender=Masc|Number=Sing|Person=3": {"pos": "X", "Gender": "masc", "Number": "sing", "Person": 3, "Dialect": "ulster"},
|
||||||
"X__Dialect=Ulster|PartType=Vb|Polarity=Neg": {"pos": "X", "Polarity": "neg", "Other": {"Dialect": "ulster", "PartType": "vb"}},
|
"X__Dialect=Ulster|PartType=Vb|Polarity=Neg": {"pos": "X", "Polarity": "neg", "Dialect": "ulster", "PartType": "vb"},
|
||||||
"X__Dialect=Ulster|VerbForm=Cop": {"pos": "X", "Other": {"Dialect": "ulster", "VerbForm": "cop"}},
|
"X__Dialect=Ulster|VerbForm=Cop": {"pos": "X", "Dialect": "ulster", "VerbForm": "cop"},
|
||||||
"X__Foreign=Yes": {"pos": "X", "Foreign": "yes"},
|
"X__Foreign=Yes": {"pos": "X", "Foreign": "yes"},
|
||||||
"X___": {"pos": "X"}
|
"X___": {"pos": "X"}
|
||||||
}
|
}
|
||||||
|
|
|
@ -195,7 +195,7 @@ MORPH_RULES = {
|
||||||
"seg": {
|
"seg": {
|
||||||
LEMMA: PRON_LEMMA,
|
LEMMA: PRON_LEMMA,
|
||||||
"Person": "Three",
|
"Person": "Three",
|
||||||
"Number": ("Sing", "Plur"),
|
"Number": "Sing,Plur",
|
||||||
"Reflex": "Yes",
|
"Reflex": "Yes",
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -248,7 +248,7 @@ MORPH_RULES = {
|
||||||
},
|
},
|
||||||
"deres": {
|
"deres": {
|
||||||
LEMMA: "deres",
|
LEMMA: "deres",
|
||||||
"Person": ("Two", "Three"),
|
"Person": "Two,Three",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Poss": "Yes",
|
"Poss": "Yes",
|
||||||
"Gender": "Masc",
|
"Gender": "Masc",
|
||||||
|
@ -309,7 +309,7 @@ MORPH_RULES = {
|
||||||
},
|
},
|
||||||
"deres": {
|
"deres": {
|
||||||
LEMMA: "deres",
|
LEMMA: "deres",
|
||||||
"Person": ("Two", "Three"),
|
"Person": "Two,Three",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Poss": "Yes",
|
"Poss": "Yes",
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
|
@ -370,7 +370,7 @@ MORPH_RULES = {
|
||||||
},
|
},
|
||||||
"deres": {
|
"deres": {
|
||||||
LEMMA: "deres",
|
LEMMA: "deres",
|
||||||
"Person": ("Two", "Three"),
|
"Person": "Two,Three",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Poss": "Yes",
|
"Poss": "Yes",
|
||||||
"Gender": "Neut",
|
"Gender": "Neut",
|
||||||
|
@ -400,7 +400,7 @@ MORPH_RULES = {
|
||||||
"våre": {LEMMA: "vår", "Person": "One", "Number": "Plur", "Poss": "Yes"},
|
"våre": {LEMMA: "vår", "Person": "One", "Number": "Plur", "Poss": "Yes"},
|
||||||
"deres": {
|
"deres": {
|
||||||
LEMMA: "deres",
|
LEMMA: "deres",
|
||||||
"Person": ("Two", "Three"),
|
"Person": "Two,Three",
|
||||||
"Number": "Plur",
|
"Number": "Plur",
|
||||||
"Poss": "Yes",
|
"Poss": "Yes",
|
||||||
},
|
},
|
||||||
|
@ -448,21 +448,21 @@ MORPH_RULES = {
|
||||||
"PronType": "Prs",
|
"PronType": "Prs",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Person": "Three",
|
"Person": "Three",
|
||||||
"Gender": ("Fem", "Masc"),
|
"Gender": "Fem,Masc",
|
||||||
},
|
},
|
||||||
"den": {
|
"den": {
|
||||||
LEMMA: PRON_LEMMA,
|
LEMMA: PRON_LEMMA,
|
||||||
"PronType": "Prs",
|
"PronType": "Prs",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Person": "Three",
|
"Person": "Three",
|
||||||
"Gender": ("Fem", "Masc"),
|
"Gender": "Fem,Masc",
|
||||||
},
|
},
|
||||||
"ingen": {
|
"ingen": {
|
||||||
LEMMA: PRON_LEMMA,
|
LEMMA: PRON_LEMMA,
|
||||||
"PronType": "Prs",
|
"PronType": "Prs",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Person": "Three",
|
"Person": "Three",
|
||||||
"Gender": ("Fem", "Masc"),
|
"Gender": "Fem,Masc",
|
||||||
"Polarity": "Neg",
|
"Polarity": "Neg",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -475,7 +475,7 @@ MORPH_RULES = {
|
||||||
LEMMA: PRON_LEMMA,
|
LEMMA: PRON_LEMMA,
|
||||||
"PronType": "Prs",
|
"PronType": "Prs",
|
||||||
"Number": "Sing",
|
"Number": "Sing",
|
||||||
"Case": ("Gen", "Nom"),
|
"Case": "Gen,Nom",
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"PRON__Animacy=Anim|Case=Gen|Number=Sing|PronType=Prs": {
|
"PRON__Animacy=Anim|Case=Gen|Number=Sing|PronType=Prs": {
|
||||||
|
|
|
@ -105,7 +105,7 @@ MORPH_RULES = {
|
||||||
"PronType": "Prs",
|
"PronType": "Prs",
|
||||||
"Person": "Three",
|
"Person": "Three",
|
||||||
"Number": "Plur",
|
"Number": "Plur",
|
||||||
"Case": ("Nom", "Acc"),
|
"Case": "Nom,Acc",
|
||||||
},
|
},
|
||||||
"dem": {
|
"dem": {
|
||||||
LEMMA: PRON_LEMMA,
|
LEMMA: PRON_LEMMA,
|
||||||
|
@ -166,7 +166,7 @@ MORPH_RULES = {
|
||||||
LEMMA: PRON_LEMMA,
|
LEMMA: PRON_LEMMA,
|
||||||
"PronType": "Prs",
|
"PronType": "Prs",
|
||||||
"Person": "Two",
|
"Person": "Two",
|
||||||
"Number": ("Sing", "Plur"),
|
"Number": "Sing,Plur",
|
||||||
"Gender": "Masc",
|
"Gender": "Masc",
|
||||||
"Poss": "Yes",
|
"Poss": "Yes",
|
||||||
"Reflex": "Yes",
|
"Reflex": "Yes",
|
||||||
|
@ -175,7 +175,7 @@ MORPH_RULES = {
|
||||||
LEMMA: PRON_LEMMA,
|
LEMMA: PRON_LEMMA,
|
||||||
"PronType": "Prs",
|
"PronType": "Prs",
|
||||||
"Person": "Two",
|
"Person": "Two",
|
||||||
"Number": ("Sing", "Plur"),
|
"Number": "Sing,Plur",
|
||||||
"Gender": "Fem",
|
"Gender": "Fem",
|
||||||
"Poss": "Yes",
|
"Poss": "Yes",
|
||||||
"Reflex": "Yes",
|
"Reflex": "Yes",
|
||||||
|
@ -184,7 +184,7 @@ MORPH_RULES = {
|
||||||
LEMMA: PRON_LEMMA,
|
LEMMA: PRON_LEMMA,
|
||||||
"PronType": "Prs",
|
"PronType": "Prs",
|
||||||
"Person": "Two",
|
"Person": "Two",
|
||||||
"Number": ("Sing", "Plur"),
|
"Number": "Sing,Plur",
|
||||||
"Poss": "Yes",
|
"Poss": "Yes",
|
||||||
"Reflex": "Yes",
|
"Reflex": "Yes",
|
||||||
},
|
},
|
||||||
|
@ -272,7 +272,7 @@ MORPH_RULES = {
|
||||||
"VBZ": {
|
"VBZ": {
|
||||||
"är": {
|
"är": {
|
||||||
"VerbForm": "Fin",
|
"VerbForm": "Fin",
|
||||||
"Person": ("One", "Two", "Three"),
|
"Person": "One,Two,Three",
|
||||||
"Tense": "Pres",
|
"Tense": "Pres",
|
||||||
"Mood": "Ind",
|
"Mood": "Ind",
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
from ..symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ
|
|
||||||
from ..symbols import PUNCT, NUM, AUX, X, CONJ, ADJ, VERB, PART, SPACE, CCONJ
|
|
||||||
|
|
||||||
|
|
||||||
TAG_MAP = {
|
|
||||||
"ADV": {POS: ADV},
|
|
||||||
"NOUN": {POS: NOUN},
|
|
||||||
"ADP": {POS: ADP},
|
|
||||||
"PRON": {POS: PRON},
|
|
||||||
"SCONJ": {POS: SCONJ},
|
|
||||||
"PROPN": {POS: PROPN},
|
|
||||||
"DET": {POS: DET},
|
|
||||||
"SYM": {POS: SYM},
|
|
||||||
"INTJ": {POS: INTJ},
|
|
||||||
"PUNCT": {POS: PUNCT},
|
|
||||||
"NUM": {POS: NUM},
|
|
||||||
"AUX": {POS: AUX},
|
|
||||||
"X": {POS: X},
|
|
||||||
"CONJ": {POS: CONJ},
|
|
||||||
"CCONJ": {POS: CCONJ},
|
|
||||||
"ADJ": {POS: ADJ},
|
|
||||||
"VERB": {POS: VERB},
|
|
||||||
"PART": {POS: PART},
|
|
||||||
"SP": {POS: SPACE},
|
|
||||||
}
|
|
|
@ -10,8 +10,8 @@ TAG_MAP = {
|
||||||
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
||||||
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
||||||
":": {POS: PUNCT},
|
":": {POS: PUNCT},
|
||||||
"$": {POS: SYM, "Other": {"SymType": "currency"}},
|
"$": {POS: SYM, "SymType": "currency"},
|
||||||
"#": {POS: SYM, "Other": {"SymType": "numbersign"}},
|
"#": {POS: SYM, "SymType": "numbersign"},
|
||||||
"AFX": {POS: ADJ, "Hyph": "yes"},
|
"AFX": {POS: ADJ, "Hyph": "yes"},
|
||||||
"CC": {POS: CCONJ, "ConjType": "coor"},
|
"CC": {POS: CCONJ, "ConjType": "coor"},
|
||||||
"CD": {POS: NUM, "NumType": "card"},
|
"CD": {POS: NUM, "NumType": "card"},
|
||||||
|
@ -52,7 +52,7 @@ TAG_MAP = {
|
||||||
"VerbForm": "fin",
|
"VerbForm": "fin",
|
||||||
"Tense": "pres",
|
"Tense": "pres",
|
||||||
"Number": "sing",
|
"Number": "sing",
|
||||||
"Person": 3,
|
"Person": "3",
|
||||||
},
|
},
|
||||||
"WDT": {POS: ADJ, "PronType": "int|rel"},
|
"WDT": {POS: ADJ, "PronType": "int|rel"},
|
||||||
"WP": {POS: NOUN, "PronType": "int|rel"},
|
"WP": {POS: NOUN, "PronType": "int|rel"},
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
from ..symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ
|
|
||||||
from ..symbols import PUNCT, NUM, AUX, X, CONJ, ADJ, VERB, PART, SPACE, CCONJ
|
|
||||||
|
|
||||||
|
|
||||||
TAG_MAP = {
|
|
||||||
"ADV": {POS: ADV},
|
|
||||||
"NOUN": {POS: NOUN},
|
|
||||||
"ADP": {POS: ADP},
|
|
||||||
"PRON": {POS: PRON},
|
|
||||||
"SCONJ": {POS: SCONJ},
|
|
||||||
"PROPN": {POS: PROPN},
|
|
||||||
"DET": {POS: DET},
|
|
||||||
"SYM": {POS: SYM},
|
|
||||||
"INTJ": {POS: INTJ},
|
|
||||||
"PUNCT": {POS: PUNCT},
|
|
||||||
"NUM": {POS: NUM},
|
|
||||||
"AUX": {POS: AUX},
|
|
||||||
"X": {POS: X},
|
|
||||||
"CONJ": {POS: CONJ},
|
|
||||||
"CCONJ": {POS: CCONJ},
|
|
||||||
"ADJ": {POS: ADJ},
|
|
||||||
"VERB": {POS: VERB},
|
|
||||||
"PART": {POS: PART},
|
|
||||||
"SP": {POS: SPACE},
|
|
||||||
}
|
|
|
@ -2,6 +2,7 @@ from cymem.cymem cimport Pool
|
||||||
from preshed.maps cimport PreshMap, PreshMapArray
|
from preshed.maps cimport PreshMap, PreshMapArray
|
||||||
from libc.stdint cimport uint64_t
|
from libc.stdint cimport uint64_t
|
||||||
from murmurhash cimport mrmr
|
from murmurhash cimport mrmr
|
||||||
|
cimport numpy as np
|
||||||
|
|
||||||
from .structs cimport TokenC, MorphAnalysisC
|
from .structs cimport TokenC, MorphAnalysisC
|
||||||
from .strings cimport StringStore
|
from .strings cimport StringStore
|
||||||
|
@ -20,12 +21,11 @@ cdef class Morphology:
|
||||||
cdef readonly object tag_names
|
cdef readonly object tag_names
|
||||||
cdef readonly object reverse_index
|
cdef readonly object reverse_index
|
||||||
cdef readonly object exc
|
cdef readonly object exc
|
||||||
cdef readonly object _feat_map
|
|
||||||
cdef readonly PreshMapArray _cache
|
cdef readonly PreshMapArray _cache
|
||||||
cdef readonly int n_tags
|
cdef readonly int n_tags
|
||||||
|
|
||||||
cpdef update(self, hash_t morph, features)
|
cdef MorphAnalysisC create_morph_tag(self, field_feature_pairs) except *
|
||||||
cdef hash_t insert(self, MorphAnalysisC tag) except 0
|
cdef int insert(self, MorphAnalysisC tag) except -1
|
||||||
|
|
||||||
cdef int assign_untagged(self, TokenC* token) except -1
|
cdef int assign_untagged(self, TokenC* token) except -1
|
||||||
cdef int assign_tag(self, TokenC* token, tag) except -1
|
cdef int assign_tag(self, TokenC* token, tag) except -1
|
||||||
|
@ -34,8 +34,7 @@ cdef class Morphology:
|
||||||
cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1
|
cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1
|
||||||
|
|
||||||
|
|
||||||
cdef int check_feature(const MorphAnalysisC* tag, attr_t feature) nogil
|
cdef int check_feature(const MorphAnalysisC* morph, attr_t feature) nogil
|
||||||
cdef attr_t get_field(const MorphAnalysisC* tag, int field) nogil
|
cdef list list_features(const MorphAnalysisC* morph)
|
||||||
cdef list list_features(const MorphAnalysisC* tag)
|
cdef np.ndarray get_by_field(const MorphAnalysisC* morph, attr_t field)
|
||||||
|
cdef int get_n_by_field(attr_t* results, const MorphAnalysisC* morph, attr_t field) nogil
|
||||||
cdef tag_to_json(const MorphAnalysisC* tag)
|
|
||||||
|
|
1056
spacy/morphology.pyx
1056
spacy/morphology.pyx
File diff suppressed because it is too large
Load Diff
|
@ -82,52 +82,11 @@ cdef struct TokenC:
|
||||||
|
|
||||||
|
|
||||||
cdef struct MorphAnalysisC:
|
cdef struct MorphAnalysisC:
|
||||||
univ_pos_t pos
|
hash_t key
|
||||||
int length
|
int length
|
||||||
|
attr_t* fields
|
||||||
|
attr_t* features
|
||||||
|
|
||||||
attr_t abbr
|
|
||||||
attr_t adp_type
|
|
||||||
attr_t adv_type
|
|
||||||
attr_t animacy
|
|
||||||
attr_t aspect
|
|
||||||
attr_t case
|
|
||||||
attr_t conj_type
|
|
||||||
attr_t connegative
|
|
||||||
attr_t definite
|
|
||||||
attr_t degree
|
|
||||||
attr_t derivation
|
|
||||||
attr_t echo
|
|
||||||
attr_t foreign
|
|
||||||
attr_t gender
|
|
||||||
attr_t hyph
|
|
||||||
attr_t inf_form
|
|
||||||
attr_t mood
|
|
||||||
attr_t negative
|
|
||||||
attr_t number
|
|
||||||
attr_t name_type
|
|
||||||
attr_t noun_type
|
|
||||||
attr_t num_form
|
|
||||||
attr_t num_type
|
|
||||||
attr_t num_value
|
|
||||||
attr_t part_form
|
|
||||||
attr_t part_type
|
|
||||||
attr_t person
|
|
||||||
attr_t polite
|
|
||||||
attr_t polarity
|
|
||||||
attr_t poss
|
|
||||||
attr_t prefix
|
|
||||||
attr_t prep_case
|
|
||||||
attr_t pron_type
|
|
||||||
attr_t punct_side
|
|
||||||
attr_t punct_type
|
|
||||||
attr_t reflex
|
|
||||||
attr_t style
|
|
||||||
attr_t style_variant
|
|
||||||
attr_t tense
|
|
||||||
attr_t typo
|
|
||||||
attr_t verb_form
|
|
||||||
attr_t voice
|
|
||||||
attr_t verb_type
|
|
||||||
|
|
||||||
# Internal struct, for storage and disambiguation of entities.
|
# Internal struct, for storage and disambiguation of entities.
|
||||||
cdef struct KBEntryC:
|
cdef struct KBEntryC:
|
||||||
|
|
|
@ -108,282 +108,282 @@ cdef enum symbol_t:
|
||||||
EOL
|
EOL
|
||||||
SPACE
|
SPACE
|
||||||
|
|
||||||
Animacy_anim
|
DEPRECATED001
|
||||||
Animacy_inan
|
DEPRECATED002
|
||||||
Animacy_hum # U20
|
DEPRECATED003
|
||||||
Animacy_nhum
|
DEPRECATED004
|
||||||
Aspect_freq
|
DEPRECATED005
|
||||||
Aspect_imp
|
DEPRECATED006
|
||||||
Aspect_mod
|
DEPRECATED007
|
||||||
Aspect_none
|
DEPRECATED008
|
||||||
Aspect_perf
|
DEPRECATED009
|
||||||
Aspect_iter # U20
|
DEPRECATED010
|
||||||
Aspect_hab # U20
|
DEPRECATED011
|
||||||
Case_abe
|
DEPRECATED012
|
||||||
Case_abl
|
DEPRECATED013
|
||||||
Case_abs
|
DEPRECATED014
|
||||||
Case_acc
|
DEPRECATED015
|
||||||
Case_ade
|
DEPRECATED016
|
||||||
Case_all
|
DEPRECATED017
|
||||||
Case_cau
|
DEPRECATED018
|
||||||
Case_com
|
DEPRECATED019
|
||||||
Case_cmp # U20
|
DEPRECATED020
|
||||||
Case_dat
|
DEPRECATED021
|
||||||
Case_del
|
DEPRECATED022
|
||||||
Case_dis
|
DEPRECATED023
|
||||||
Case_ela
|
DEPRECATED024
|
||||||
Case_equ # U20
|
DEPRECATED025
|
||||||
Case_ess
|
DEPRECATED026
|
||||||
Case_gen
|
DEPRECATED027
|
||||||
Case_ill
|
DEPRECATED028
|
||||||
Case_ine
|
DEPRECATED029
|
||||||
Case_ins
|
DEPRECATED030
|
||||||
Case_loc
|
DEPRECATED031
|
||||||
Case_lat
|
DEPRECATED032
|
||||||
Case_nom
|
DEPRECATED033
|
||||||
Case_par
|
DEPRECATED034
|
||||||
Case_sub
|
DEPRECATED035
|
||||||
Case_sup
|
DEPRECATED036
|
||||||
Case_tem
|
DEPRECATED037
|
||||||
Case_ter
|
DEPRECATED038
|
||||||
Case_tra
|
DEPRECATED039
|
||||||
Case_voc
|
DEPRECATED040
|
||||||
Definite_two
|
DEPRECATED041
|
||||||
Definite_def
|
DEPRECATED042
|
||||||
Definite_red
|
DEPRECATED043
|
||||||
Definite_cons # U20
|
DEPRECATED044
|
||||||
Definite_ind
|
DEPRECATED045
|
||||||
Definite_spec # U20
|
DEPRECATED046
|
||||||
Degree_cmp
|
DEPRECATED047
|
||||||
Degree_comp
|
DEPRECATED048
|
||||||
Degree_none
|
DEPRECATED049
|
||||||
Degree_pos
|
DEPRECATED050
|
||||||
Degree_sup
|
DEPRECATED051
|
||||||
Degree_abs
|
DEPRECATED052
|
||||||
Degree_com
|
DEPRECATED053
|
||||||
Degree_dim # du
|
DEPRECATED054
|
||||||
Degree_equ # U20
|
DEPRECATED055
|
||||||
Evident_nfh # U20
|
DEPRECATED056
|
||||||
Gender_com
|
DEPRECATED057
|
||||||
Gender_fem
|
DEPRECATED058
|
||||||
Gender_masc
|
DEPRECATED059
|
||||||
Gender_neut
|
DEPRECATED060
|
||||||
Mood_cnd
|
DEPRECATED061
|
||||||
Mood_imp
|
DEPRECATED062
|
||||||
Mood_ind
|
DEPRECATED063
|
||||||
Mood_n
|
DEPRECATED064
|
||||||
Mood_pot
|
DEPRECATED065
|
||||||
Mood_sub
|
DEPRECATED066
|
||||||
Mood_opt
|
DEPRECATED067
|
||||||
Mood_prp # U20
|
DEPRECATED068
|
||||||
Mood_adm # U20
|
DEPRECATED069
|
||||||
Negative_neg
|
DEPRECATED070
|
||||||
Negative_pos
|
DEPRECATED071
|
||||||
Negative_yes
|
DEPRECATED072
|
||||||
Polarity_neg # U20
|
DEPRECATED073
|
||||||
Polarity_pos # U20
|
DEPRECATED074
|
||||||
Number_com
|
DEPRECATED075
|
||||||
Number_dual
|
DEPRECATED076
|
||||||
Number_none
|
DEPRECATED077
|
||||||
Number_plur
|
DEPRECATED078
|
||||||
Number_sing
|
DEPRECATED079
|
||||||
Number_ptan # bg
|
DEPRECATED080
|
||||||
Number_count # bg, U20
|
DEPRECATED081
|
||||||
Number_tri # U20
|
DEPRECATED082
|
||||||
NumType_card
|
DEPRECATED083
|
||||||
NumType_dist
|
DEPRECATED084
|
||||||
NumType_frac
|
DEPRECATED085
|
||||||
NumType_gen
|
DEPRECATED086
|
||||||
NumType_mult
|
DEPRECATED087
|
||||||
NumType_none
|
DEPRECATED088
|
||||||
NumType_ord
|
DEPRECATED089
|
||||||
NumType_sets
|
DEPRECATED090
|
||||||
Person_one
|
DEPRECATED091
|
||||||
Person_two
|
DEPRECATED092
|
||||||
Person_three
|
DEPRECATED093
|
||||||
Person_none
|
DEPRECATED094
|
||||||
Poss_yes
|
DEPRECATED095
|
||||||
PronType_advPart
|
DEPRECATED096
|
||||||
PronType_art
|
DEPRECATED097
|
||||||
PronType_default
|
DEPRECATED098
|
||||||
PronType_dem
|
DEPRECATED099
|
||||||
PronType_ind
|
DEPRECATED100
|
||||||
PronType_int
|
DEPRECATED101
|
||||||
PronType_neg
|
DEPRECATED102
|
||||||
PronType_prs
|
DEPRECATED103
|
||||||
PronType_rcp
|
DEPRECATED104
|
||||||
PronType_rel
|
DEPRECATED105
|
||||||
PronType_tot
|
DEPRECATED106
|
||||||
PronType_clit
|
DEPRECATED107
|
||||||
PronType_exc # es, ca, it, fa, U20
|
DEPRECATED108
|
||||||
PronType_emp # U20
|
DEPRECATED109
|
||||||
Reflex_yes
|
DEPRECATED110
|
||||||
Tense_fut
|
DEPRECATED111
|
||||||
Tense_imp
|
DEPRECATED112
|
||||||
Tense_past
|
DEPRECATED113
|
||||||
Tense_pres
|
DEPRECATED114
|
||||||
VerbForm_fin
|
DEPRECATED115
|
||||||
VerbForm_ger
|
DEPRECATED116
|
||||||
VerbForm_inf
|
DEPRECATED117
|
||||||
VerbForm_none
|
DEPRECATED118
|
||||||
VerbForm_part
|
DEPRECATED119
|
||||||
VerbForm_partFut
|
DEPRECATED120
|
||||||
VerbForm_partPast
|
DEPRECATED121
|
||||||
VerbForm_partPres
|
DEPRECATED122
|
||||||
VerbForm_sup
|
DEPRECATED123
|
||||||
VerbForm_trans
|
DEPRECATED124
|
||||||
VerbForm_conv # U20
|
DEPRECATED125
|
||||||
VerbForm_gdv # la
|
DEPRECATED126
|
||||||
VerbForm_vnoun # U20
|
DEPRECATED127
|
||||||
Voice_act
|
DEPRECATED128
|
||||||
Voice_cau
|
DEPRECATED129
|
||||||
Voice_pass
|
DEPRECATED130
|
||||||
Voice_mid # gkc, U20
|
DEPRECATED131
|
||||||
Voice_int # hb
|
DEPRECATED132
|
||||||
Voice_antip # U20
|
DEPRECATED133
|
||||||
Voice_dir # U20
|
DEPRECATED134
|
||||||
Voice_inv # U20
|
DEPRECATED135
|
||||||
Abbr_yes # cz, fi, sl, U
|
DEPRECATED136
|
||||||
AdpType_prep # cz, U
|
DEPRECATED137
|
||||||
AdpType_post # U
|
DEPRECATED138
|
||||||
AdpType_voc # cz
|
DEPRECATED139
|
||||||
AdpType_comprep # cz
|
DEPRECATED140
|
||||||
AdpType_circ # U
|
DEPRECATED141
|
||||||
AdvType_man
|
DEPRECATED142
|
||||||
AdvType_loc
|
DEPRECATED143
|
||||||
AdvType_tim
|
DEPRECATED144
|
||||||
AdvType_deg
|
DEPRECATED145
|
||||||
AdvType_cau
|
DEPRECATED146
|
||||||
AdvType_mod
|
DEPRECATED147
|
||||||
AdvType_sta
|
DEPRECATED148
|
||||||
AdvType_ex
|
DEPRECATED149
|
||||||
AdvType_adadj
|
DEPRECATED150
|
||||||
ConjType_oper # cz, U
|
DEPRECATED151
|
||||||
ConjType_comp # cz, U
|
DEPRECATED152
|
||||||
Connegative_yes # fi
|
DEPRECATED153
|
||||||
Derivation_minen # fi
|
DEPRECATED154
|
||||||
Derivation_sti # fi
|
DEPRECATED155
|
||||||
Derivation_inen # fi
|
DEPRECATED156
|
||||||
Derivation_lainen # fi
|
DEPRECATED157
|
||||||
Derivation_ja # fi
|
DEPRECATED158
|
||||||
Derivation_ton # fi
|
DEPRECATED159
|
||||||
Derivation_vs # fi
|
DEPRECATED160
|
||||||
Derivation_ttain # fi
|
DEPRECATED161
|
||||||
Derivation_ttaa # fi
|
DEPRECATED162
|
||||||
Echo_rdp # U
|
DEPRECATED163
|
||||||
Echo_ech # U
|
DEPRECATED164
|
||||||
Foreign_foreign # cz, fi, U
|
DEPRECATED165
|
||||||
Foreign_fscript # cz, fi, U
|
DEPRECATED166
|
||||||
Foreign_tscript # cz, U
|
DEPRECATED167
|
||||||
Foreign_yes # sl
|
DEPRECATED168
|
||||||
Gender_dat_masc # bq, U
|
DEPRECATED169
|
||||||
Gender_dat_fem # bq, U
|
DEPRECATED170
|
||||||
Gender_erg_masc # bq
|
DEPRECATED171
|
||||||
Gender_erg_fem # bq
|
DEPRECATED172
|
||||||
Gender_psor_masc # cz, sl, U
|
DEPRECATED173
|
||||||
Gender_psor_fem # cz, sl, U
|
DEPRECATED174
|
||||||
Gender_psor_neut # sl
|
DEPRECATED175
|
||||||
Hyph_yes # cz, U
|
DEPRECATED176
|
||||||
InfForm_one # fi
|
DEPRECATED177
|
||||||
InfForm_two # fi
|
DEPRECATED178
|
||||||
InfForm_three # fi
|
DEPRECATED179
|
||||||
NameType_geo # U, cz
|
DEPRECATED180
|
||||||
NameType_prs # U, cz
|
DEPRECATED181
|
||||||
NameType_giv # U, cz
|
DEPRECATED182
|
||||||
NameType_sur # U, cz
|
DEPRECATED183
|
||||||
NameType_nat # U, cz
|
DEPRECATED184
|
||||||
NameType_com # U, cz
|
DEPRECATED185
|
||||||
NameType_pro # U, cz
|
DEPRECATED186
|
||||||
NameType_oth # U, cz
|
DEPRECATED187
|
||||||
NounType_com # U
|
DEPRECATED188
|
||||||
NounType_prop # U
|
DEPRECATED189
|
||||||
NounType_class # U
|
DEPRECATED190
|
||||||
Number_abs_sing # bq, U
|
DEPRECATED191
|
||||||
Number_abs_plur # bq, U
|
DEPRECATED192
|
||||||
Number_dat_sing # bq, U
|
DEPRECATED193
|
||||||
Number_dat_plur # bq, U
|
DEPRECATED194
|
||||||
Number_erg_sing # bq, U
|
DEPRECATED195
|
||||||
Number_erg_plur # bq, U
|
DEPRECATED196
|
||||||
Number_psee_sing # U
|
DEPRECATED197
|
||||||
Number_psee_plur # U
|
DEPRECATED198
|
||||||
Number_psor_sing # cz, fi, sl, U
|
DEPRECATED199
|
||||||
Number_psor_plur # cz, fi, sl, U
|
DEPRECATED200
|
||||||
Number_pauc # U20
|
DEPRECATED201
|
||||||
Number_grpa # U20
|
DEPRECATED202
|
||||||
Number_grpl # U20
|
DEPRECATED203
|
||||||
Number_inv # U20
|
DEPRECATED204
|
||||||
NumForm_digit # cz, sl, U
|
DEPRECATED205
|
||||||
NumForm_roman # cz, sl, U
|
DEPRECATED206
|
||||||
NumForm_word # cz, sl, U
|
DEPRECATED207
|
||||||
NumValue_one # cz, U
|
DEPRECATED208
|
||||||
NumValue_two # cz, U
|
DEPRECATED209
|
||||||
NumValue_three # cz, U
|
DEPRECATED210
|
||||||
PartForm_pres # fi
|
DEPRECATED211
|
||||||
PartForm_past # fi
|
DEPRECATED212
|
||||||
PartForm_agt # fi
|
DEPRECATED213
|
||||||
PartForm_neg # fi
|
DEPRECATED214
|
||||||
PartType_mod # U
|
DEPRECATED215
|
||||||
PartType_emp # U
|
DEPRECATED216
|
||||||
PartType_res # U
|
DEPRECATED217
|
||||||
PartType_inf # U
|
DEPRECATED218
|
||||||
PartType_vbp # U
|
DEPRECATED219
|
||||||
Person_abs_one # bq, U
|
DEPRECATED220
|
||||||
Person_abs_two # bq, U
|
DEPRECATED221
|
||||||
Person_abs_three # bq, U
|
DEPRECATED222
|
||||||
Person_dat_one # bq, U
|
DEPRECATED223
|
||||||
Person_dat_two # bq, U
|
DEPRECATED224
|
||||||
Person_dat_three # bq, U
|
DEPRECATED225
|
||||||
Person_erg_one # bq, U
|
DEPRECATED226
|
||||||
Person_erg_two # bq, U
|
DEPRECATED227
|
||||||
Person_erg_three # bq, U
|
DEPRECATED228
|
||||||
Person_psor_one # fi, U
|
DEPRECATED229
|
||||||
Person_psor_two # fi, U
|
DEPRECATED230
|
||||||
Person_psor_three # fi, U
|
DEPRECATED231
|
||||||
Person_zero # U20
|
DEPRECATED232
|
||||||
Person_four # U20
|
DEPRECATED233
|
||||||
Polite_inf # bq, U
|
DEPRECATED234
|
||||||
Polite_pol # bq, U
|
DEPRECATED235
|
||||||
Polite_abs_inf # bq, U
|
DEPRECATED236
|
||||||
Polite_abs_pol # bq, U
|
DEPRECATED237
|
||||||
Polite_erg_inf # bq, U
|
DEPRECATED238
|
||||||
Polite_erg_pol # bq, U
|
DEPRECATED239
|
||||||
Polite_dat_inf # bq, U
|
DEPRECATED240
|
||||||
Polite_dat_pol # bq, U
|
DEPRECATED241
|
||||||
Polite_infm # U20
|
DEPRECATED242
|
||||||
Polite_form # U20
|
DEPRECATED243
|
||||||
Polite_form_elev # U20
|
DEPRECATED244
|
||||||
Polite_form_humb # U20
|
DEPRECATED245
|
||||||
Prefix_yes # U
|
DEPRECATED246
|
||||||
PrepCase_npr # cz
|
DEPRECATED247
|
||||||
PrepCase_pre # U
|
DEPRECATED248
|
||||||
PunctSide_ini # U
|
DEPRECATED249
|
||||||
PunctSide_fin # U
|
DEPRECATED250
|
||||||
PunctType_peri # U
|
DEPRECATED251
|
||||||
PunctType_qest # U
|
DEPRECATED252
|
||||||
PunctType_excl # U
|
DEPRECATED253
|
||||||
PunctType_quot # U
|
DEPRECATED254
|
||||||
PunctType_brck # U
|
DEPRECATED255
|
||||||
PunctType_comm # U
|
DEPRECATED256
|
||||||
PunctType_colo # U
|
DEPRECATED257
|
||||||
PunctType_semi # U
|
DEPRECATED258
|
||||||
PunctType_dash # U
|
DEPRECATED259
|
||||||
Style_arch # cz, fi, U
|
DEPRECATED260
|
||||||
Style_rare # cz, fi, U
|
DEPRECATED261
|
||||||
Style_poet # cz, U
|
DEPRECATED262
|
||||||
Style_norm # cz, U
|
DEPRECATED263
|
||||||
Style_coll # cz, U
|
DEPRECATED264
|
||||||
Style_vrnc # cz, U
|
DEPRECATED265
|
||||||
Style_sing # cz, U
|
DEPRECATED266
|
||||||
Style_expr # cz, U
|
DEPRECATED267
|
||||||
Style_derg # cz, U
|
DEPRECATED268
|
||||||
Style_vulg # cz, U
|
DEPRECATED269
|
||||||
Style_yes # fi, U
|
DEPRECATED270
|
||||||
StyleVariant_styleShort # cz
|
DEPRECATED271
|
||||||
StyleVariant_styleBound # cz, sl
|
DEPRECATED272
|
||||||
VerbType_aux # U
|
DEPRECATED273
|
||||||
VerbType_cop # U
|
DEPRECATED274
|
||||||
VerbType_mod # U
|
DEPRECATED275
|
||||||
VerbType_light # U
|
DEPRECATED276
|
||||||
|
|
||||||
PERSON
|
PERSON
|
||||||
NORP
|
NORP
|
||||||
|
|
|
@ -110,282 +110,282 @@ IDS = {
|
||||||
"EOL": EOL,
|
"EOL": EOL,
|
||||||
"SPACE": SPACE,
|
"SPACE": SPACE,
|
||||||
|
|
||||||
"Animacy_anim": Animacy_anim,
|
"DEPRECATED001": DEPRECATED001,
|
||||||
"Animacy_inam": Animacy_inan,
|
"DEPRECATED002": DEPRECATED002,
|
||||||
"Animacy_hum": Animacy_hum, # U20
|
"DEPRECATED003": DEPRECATED003,
|
||||||
"Animacy_nhum": Animacy_nhum,
|
"DEPRECATED004": DEPRECATED004,
|
||||||
"Aspect_freq": Aspect_freq,
|
"DEPRECATED005": DEPRECATED005,
|
||||||
"Aspect_imp": Aspect_imp,
|
"DEPRECATED006": DEPRECATED006,
|
||||||
"Aspect_mod": Aspect_mod,
|
"DEPRECATED007": DEPRECATED007,
|
||||||
"Aspect_none": Aspect_none,
|
"DEPRECATED008": DEPRECATED008,
|
||||||
"Aspect_perf": Aspect_perf,
|
"DEPRECATED009": DEPRECATED009,
|
||||||
"Aspect_iter": Aspect_iter, # U20
|
"DEPRECATED010": DEPRECATED010,
|
||||||
"Aspect_hab": Aspect_hab, # U20
|
"DEPRECATED011": DEPRECATED011,
|
||||||
"Case_abe": Case_abe,
|
"DEPRECATED012": DEPRECATED012,
|
||||||
"Case_abl": Case_abl,
|
"DEPRECATED013": DEPRECATED013,
|
||||||
"Case_abs": Case_abs,
|
"DEPRECATED014": DEPRECATED014,
|
||||||
"Case_acc": Case_acc,
|
"DEPRECATED015": DEPRECATED015,
|
||||||
"Case_ade": Case_ade,
|
"DEPRECATED016": DEPRECATED016,
|
||||||
"Case_all": Case_all,
|
"DEPRECATED017": DEPRECATED017,
|
||||||
"Case_cau": Case_cau,
|
"DEPRECATED018": DEPRECATED018,
|
||||||
"Case_com": Case_com,
|
"DEPRECATED019": DEPRECATED019,
|
||||||
"Case_cmp": Case_cmp, # U20
|
"DEPRECATED020": DEPRECATED020,
|
||||||
"Case_dat": Case_dat,
|
"DEPRECATED021": DEPRECATED021,
|
||||||
"Case_del": Case_del,
|
"DEPRECATED022": DEPRECATED022,
|
||||||
"Case_dis": Case_dis,
|
"DEPRECATED023": DEPRECATED023,
|
||||||
"Case_ela": Case_ela,
|
"DEPRECATED024": DEPRECATED024,
|
||||||
"Case_equ": Case_equ, # U20
|
"DEPRECATED025": DEPRECATED025,
|
||||||
"Case_ess": Case_ess,
|
"DEPRECATED026": DEPRECATED026,
|
||||||
"Case_gen": Case_gen,
|
"DEPRECATED027": DEPRECATED027,
|
||||||
"Case_ill": Case_ill,
|
"DEPRECATED028": DEPRECATED028,
|
||||||
"Case_ine": Case_ine,
|
"DEPRECATED029": DEPRECATED029,
|
||||||
"Case_ins": Case_ins,
|
"DEPRECATED030": DEPRECATED030,
|
||||||
"Case_loc": Case_loc,
|
"DEPRECATED031": DEPRECATED031,
|
||||||
"Case_lat": Case_lat,
|
"DEPRECATED032": DEPRECATED032,
|
||||||
"Case_nom": Case_nom,
|
"DEPRECATED033": DEPRECATED033,
|
||||||
"Case_par": Case_par,
|
"DEPRECATED034": DEPRECATED034,
|
||||||
"Case_sub": Case_sub,
|
"DEPRECATED035": DEPRECATED035,
|
||||||
"Case_sup": Case_sup,
|
"DEPRECATED036": DEPRECATED036,
|
||||||
"Case_tem": Case_tem,
|
"DEPRECATED037": DEPRECATED037,
|
||||||
"Case_ter": Case_ter,
|
"DEPRECATED038": DEPRECATED038,
|
||||||
"Case_tra": Case_tra,
|
"DEPRECATED039": DEPRECATED039,
|
||||||
"Case_voc": Case_voc,
|
"DEPRECATED040": DEPRECATED040,
|
||||||
"Definite_two": Definite_two,
|
"DEPRECATED041": DEPRECATED041,
|
||||||
"Definite_def": Definite_def,
|
"DEPRECATED042": DEPRECATED042,
|
||||||
"Definite_red": Definite_red,
|
"DEPRECATED043": DEPRECATED043,
|
||||||
"Definite_cons": Definite_cons, # U20
|
"DEPRECATED044": DEPRECATED044,
|
||||||
"Definite_ind": Definite_ind,
|
"DEPRECATED045": DEPRECATED045,
|
||||||
"Definite_spec": Definite_spec, # U20
|
"DEPRECATED046": DEPRECATED046,
|
||||||
"Degree_cmp": Degree_cmp,
|
"DEPRECATED047": DEPRECATED047,
|
||||||
"Degree_comp": Degree_comp,
|
"DEPRECATED048": DEPRECATED048,
|
||||||
"Degree_none": Degree_none,
|
"DEPRECATED049": DEPRECATED049,
|
||||||
"Degree_pos": Degree_pos,
|
"DEPRECATED050": DEPRECATED050,
|
||||||
"Degree_sup": Degree_sup,
|
"DEPRECATED051": DEPRECATED051,
|
||||||
"Degree_abs": Degree_abs,
|
"DEPRECATED052": DEPRECATED052,
|
||||||
"Degree_com": Degree_com,
|
"DEPRECATED053": DEPRECATED053,
|
||||||
"Degree_dim": Degree_dim, # du
|
"DEPRECATED054": DEPRECATED054,
|
||||||
"Degree_equ": Degree_equ, # U20
|
"DEPRECATED055": DEPRECATED055,
|
||||||
"Evident_nfh": Evident_nfh, # U20
|
"DEPRECATED056": DEPRECATED056,
|
||||||
"Gender_com": Gender_com,
|
"DEPRECATED057": DEPRECATED057,
|
||||||
"Gender_fem": Gender_fem,
|
"DEPRECATED058": DEPRECATED058,
|
||||||
"Gender_masc": Gender_masc,
|
"DEPRECATED059": DEPRECATED059,
|
||||||
"Gender_neut": Gender_neut,
|
"DEPRECATED060": DEPRECATED060,
|
||||||
"Mood_cnd": Mood_cnd,
|
"DEPRECATED061": DEPRECATED061,
|
||||||
"Mood_imp": Mood_imp,
|
"DEPRECATED062": DEPRECATED062,
|
||||||
"Mood_ind": Mood_ind,
|
"DEPRECATED063": DEPRECATED063,
|
||||||
"Mood_n": Mood_n,
|
"DEPRECATED064": DEPRECATED064,
|
||||||
"Mood_pot": Mood_pot,
|
"DEPRECATED065": DEPRECATED065,
|
||||||
"Mood_sub": Mood_sub,
|
"DEPRECATED066": DEPRECATED066,
|
||||||
"Mood_opt": Mood_opt,
|
"DEPRECATED067": DEPRECATED067,
|
||||||
"Mood_prp": Mood_prp, # U20
|
"DEPRECATED068": DEPRECATED068,
|
||||||
"Mood_adm": Mood_adm, # U20
|
"DEPRECATED069": DEPRECATED069,
|
||||||
"Negative_neg": Negative_neg,
|
"DEPRECATED070": DEPRECATED070,
|
||||||
"Negative_pos": Negative_pos,
|
"DEPRECATED071": DEPRECATED071,
|
||||||
"Negative_yes": Negative_yes,
|
"DEPRECATED072": DEPRECATED072,
|
||||||
"Polarity_neg": Polarity_neg, # U20
|
"DEPRECATED073": DEPRECATED073,
|
||||||
"Polarity_pos": Polarity_pos, # U20
|
"DEPRECATED074": DEPRECATED074,
|
||||||
"Number_com": Number_com,
|
"DEPRECATED075": DEPRECATED075,
|
||||||
"Number_dual": Number_dual,
|
"DEPRECATED076": DEPRECATED076,
|
||||||
"Number_none": Number_none,
|
"DEPRECATED077": DEPRECATED077,
|
||||||
"Number_plur": Number_plur,
|
"DEPRECATED078": DEPRECATED078,
|
||||||
"Number_sing": Number_sing,
|
"DEPRECATED079": DEPRECATED079,
|
||||||
"Number_ptan": Number_ptan, # bg
|
"DEPRECATED080": DEPRECATED080,
|
||||||
"Number_count": Number_count, # bg, U20
|
"DEPRECATED081": DEPRECATED081,
|
||||||
"Number_tri": Number_tri, # U20
|
"DEPRECATED082": DEPRECATED082,
|
||||||
"NumType_card": NumType_card,
|
"DEPRECATED083": DEPRECATED083,
|
||||||
"NumType_dist": NumType_dist,
|
"DEPRECATED084": DEPRECATED084,
|
||||||
"NumType_frac": NumType_frac,
|
"DEPRECATED085": DEPRECATED085,
|
||||||
"NumType_gen": NumType_gen,
|
"DEPRECATED086": DEPRECATED086,
|
||||||
"NumType_mult": NumType_mult,
|
"DEPRECATED087": DEPRECATED087,
|
||||||
"NumType_none": NumType_none,
|
"DEPRECATED088": DEPRECATED088,
|
||||||
"NumType_ord": NumType_ord,
|
"DEPRECATED089": DEPRECATED089,
|
||||||
"NumType_sets": NumType_sets,
|
"DEPRECATED090": DEPRECATED090,
|
||||||
"Person_one": Person_one,
|
"DEPRECATED091": DEPRECATED091,
|
||||||
"Person_two": Person_two,
|
"DEPRECATED092": DEPRECATED092,
|
||||||
"Person_three": Person_three,
|
"DEPRECATED093": DEPRECATED093,
|
||||||
"Person_none": Person_none,
|
"DEPRECATED094": DEPRECATED094,
|
||||||
"Poss_yes": Poss_yes,
|
"DEPRECATED095": DEPRECATED095,
|
||||||
"PronType_advPart": PronType_advPart,
|
"DEPRECATED096": DEPRECATED096,
|
||||||
"PronType_art": PronType_art,
|
"DEPRECATED097": DEPRECATED097,
|
||||||
"PronType_default": PronType_default,
|
"DEPRECATED098": DEPRECATED098,
|
||||||
"PronType_dem": PronType_dem,
|
"DEPRECATED099": DEPRECATED099,
|
||||||
"PronType_ind": PronType_ind,
|
"DEPRECATED100": DEPRECATED100,
|
||||||
"PronType_int": PronType_int,
|
"DEPRECATED101": DEPRECATED101,
|
||||||
"PronType_neg": PronType_neg,
|
"DEPRECATED102": DEPRECATED102,
|
||||||
"PronType_prs": PronType_prs,
|
"DEPRECATED103": DEPRECATED103,
|
||||||
"PronType_rcp": PronType_rcp,
|
"DEPRECATED104": DEPRECATED104,
|
||||||
"PronType_rel": PronType_rel,
|
"DEPRECATED105": DEPRECATED105,
|
||||||
"PronType_tot": PronType_tot,
|
"DEPRECATED106": DEPRECATED106,
|
||||||
"PronType_clit": PronType_clit,
|
"DEPRECATED107": DEPRECATED107,
|
||||||
"PronType_exc": PronType_exc, # es, ca, it, fa, U20
|
"DEPRECATED108": DEPRECATED108,
|
||||||
"PronType_emp": PronType_emp, # U20
|
"DEPRECATED109": DEPRECATED109,
|
||||||
"Reflex_yes": Reflex_yes,
|
"DEPRECATED110": DEPRECATED110,
|
||||||
"Tense_fut": Tense_fut,
|
"DEPRECATED111": DEPRECATED111,
|
||||||
"Tense_imp": Tense_imp,
|
"DEPRECATED112": DEPRECATED112,
|
||||||
"Tense_past": Tense_past,
|
"DEPRECATED113": DEPRECATED113,
|
||||||
"Tense_pres": Tense_pres,
|
"DEPRECATED114": DEPRECATED114,
|
||||||
"VerbForm_fin": VerbForm_fin,
|
"DEPRECATED115": DEPRECATED115,
|
||||||
"VerbForm_ger": VerbForm_ger,
|
"DEPRECATED116": DEPRECATED116,
|
||||||
"VerbForm_inf": VerbForm_inf,
|
"DEPRECATED117": DEPRECATED117,
|
||||||
"VerbForm_none": VerbForm_none,
|
"DEPRECATED118": DEPRECATED118,
|
||||||
"VerbForm_part": VerbForm_part,
|
"DEPRECATED119": DEPRECATED119,
|
||||||
"VerbForm_partFut": VerbForm_partFut,
|
"DEPRECATED120": DEPRECATED120,
|
||||||
"VerbForm_partPast": VerbForm_partPast,
|
"DEPRECATED121": DEPRECATED121,
|
||||||
"VerbForm_partPres": VerbForm_partPres,
|
"DEPRECATED122": DEPRECATED122,
|
||||||
"VerbForm_sup": VerbForm_sup,
|
"DEPRECATED123": DEPRECATED123,
|
||||||
"VerbForm_trans": VerbForm_trans,
|
"DEPRECATED124": DEPRECATED124,
|
||||||
"VerbForm_conv": VerbForm_conv, # U20
|
"DEPRECATED125": DEPRECATED125,
|
||||||
"VerbForm_gdv": VerbForm_gdv, # la,
|
"DEPRECATED126": DEPRECATED126,
|
||||||
"VerbForm_vnoun": VerbForm_vnoun, # U20
|
"DEPRECATED127": DEPRECATED127,
|
||||||
"Voice_act": Voice_act,
|
"DEPRECATED128": DEPRECATED128,
|
||||||
"Voice_cau": Voice_cau,
|
"DEPRECATED129": DEPRECATED129,
|
||||||
"Voice_pass": Voice_pass,
|
"DEPRECATED130": DEPRECATED130,
|
||||||
"Voice_mid": Voice_mid, # gkc, U20
|
"DEPRECATED131": DEPRECATED131,
|
||||||
"Voice_int": Voice_int, # hb,
|
"DEPRECATED132": DEPRECATED132,
|
||||||
"Voice_antip": Voice_antip, # U20
|
"DEPRECATED133": DEPRECATED133,
|
||||||
"Voice_dir": Voice_dir, # U20
|
"DEPRECATED134": DEPRECATED134,
|
||||||
"Voice_inv": Voice_inv, # U20
|
"DEPRECATED135": DEPRECATED135,
|
||||||
"Abbr_yes": Abbr_yes, # cz, fi, sl, U,
|
"DEPRECATED136": DEPRECATED136,
|
||||||
"AdpType_prep": AdpType_prep, # cz, U,
|
"DEPRECATED137": DEPRECATED137,
|
||||||
"AdpType_post": AdpType_post, # U,
|
"DEPRECATED138": DEPRECATED138,
|
||||||
"AdpType_voc": AdpType_voc, # cz,
|
"DEPRECATED139": DEPRECATED139,
|
||||||
"AdpType_comprep": AdpType_comprep, # cz,
|
"DEPRECATED140": DEPRECATED140,
|
||||||
"AdpType_circ": AdpType_circ, # U,
|
"DEPRECATED141": DEPRECATED141,
|
||||||
"AdvType_man": AdvType_man,
|
"DEPRECATED142": DEPRECATED142,
|
||||||
"AdvType_loc": AdvType_loc,
|
"DEPRECATED143": DEPRECATED143,
|
||||||
"AdvType_tim": AdvType_tim,
|
"DEPRECATED144": DEPRECATED144,
|
||||||
"AdvType_deg": AdvType_deg,
|
"DEPRECATED145": DEPRECATED145,
|
||||||
"AdvType_cau": AdvType_cau,
|
"DEPRECATED146": DEPRECATED146,
|
||||||
"AdvType_mod": AdvType_mod,
|
"DEPRECATED147": DEPRECATED147,
|
||||||
"AdvType_sta": AdvType_sta,
|
"DEPRECATED148": DEPRECATED148,
|
||||||
"AdvType_ex": AdvType_ex,
|
"DEPRECATED149": DEPRECATED149,
|
||||||
"AdvType_adadj": AdvType_adadj,
|
"DEPRECATED150": DEPRECATED150,
|
||||||
"ConjType_oper": ConjType_oper, # cz, U,
|
"DEPRECATED151": DEPRECATED151,
|
||||||
"ConjType_comp": ConjType_comp, # cz, U,
|
"DEPRECATED152": DEPRECATED152,
|
||||||
"Connegative_yes": Connegative_yes, # fi,
|
"DEPRECATED153": DEPRECATED153,
|
||||||
"Derivation_minen": Derivation_minen, # fi,
|
"DEPRECATED154": DEPRECATED154,
|
||||||
"Derivation_sti": Derivation_sti, # fi,
|
"DEPRECATED155": DEPRECATED155,
|
||||||
"Derivation_inen": Derivation_inen, # fi,
|
"DEPRECATED156": DEPRECATED156,
|
||||||
"Derivation_lainen": Derivation_lainen, # fi,
|
"DEPRECATED157": DEPRECATED157,
|
||||||
"Derivation_ja": Derivation_ja, # fi,
|
"DEPRECATED158": DEPRECATED158,
|
||||||
"Derivation_ton": Derivation_ton, # fi,
|
"DEPRECATED159": DEPRECATED159,
|
||||||
"Derivation_vs": Derivation_vs, # fi,
|
"DEPRECATED160": DEPRECATED160,
|
||||||
"Derivation_ttain": Derivation_ttain, # fi,
|
"DEPRECATED161": DEPRECATED161,
|
||||||
"Derivation_ttaa": Derivation_ttaa, # fi,
|
"DEPRECATED162": DEPRECATED162,
|
||||||
"Echo_rdp": Echo_rdp, # U,
|
"DEPRECATED163": DEPRECATED163,
|
||||||
"Echo_ech": Echo_ech, # U,
|
"DEPRECATED164": DEPRECATED164,
|
||||||
"Foreign_foreign": Foreign_foreign, # cz, fi, U,
|
"DEPRECATED165": DEPRECATED165,
|
||||||
"Foreign_fscript": Foreign_fscript, # cz, fi, U,
|
"DEPRECATED166": DEPRECATED166,
|
||||||
"Foreign_tscript": Foreign_tscript, # cz, U,
|
"DEPRECATED167": DEPRECATED167,
|
||||||
"Foreign_yes": Foreign_yes, # sl,
|
"DEPRECATED168": DEPRECATED168,
|
||||||
"Gender_dat_masc": Gender_dat_masc, # bq, U,
|
"DEPRECATED169": DEPRECATED169,
|
||||||
"Gender_dat_fem": Gender_dat_fem, # bq, U,
|
"DEPRECATED170": DEPRECATED170,
|
||||||
"Gender_erg_masc": Gender_erg_masc, # bq,
|
"DEPRECATED171": DEPRECATED171,
|
||||||
"Gender_erg_fem": Gender_erg_fem, # bq,
|
"DEPRECATED172": DEPRECATED172,
|
||||||
"Gender_psor_masc": Gender_psor_masc, # cz, sl, U,
|
"DEPRECATED173": DEPRECATED173,
|
||||||
"Gender_psor_fem": Gender_psor_fem, # cz, sl, U,
|
"DEPRECATED174": DEPRECATED174,
|
||||||
"Gender_psor_neut": Gender_psor_neut, # sl,
|
"DEPRECATED175": DEPRECATED175,
|
||||||
"Hyph_yes": Hyph_yes, # cz, U,
|
"DEPRECATED176": DEPRECATED176,
|
||||||
"InfForm_one": InfForm_one, # fi,
|
"DEPRECATED177": DEPRECATED177,
|
||||||
"InfForm_two": InfForm_two, # fi,
|
"DEPRECATED178": DEPRECATED178,
|
||||||
"InfForm_three": InfForm_three, # fi,
|
"DEPRECATED179": DEPRECATED179,
|
||||||
"NameType_geo": NameType_geo, # U, cz,
|
"DEPRECATED180": DEPRECATED180,
|
||||||
"NameType_prs": NameType_prs, # U, cz,
|
"DEPRECATED181": DEPRECATED181,
|
||||||
"NameType_giv": NameType_giv, # U, cz,
|
"DEPRECATED182": DEPRECATED182,
|
||||||
"NameType_sur": NameType_sur, # U, cz,
|
"DEPRECATED183": DEPRECATED183,
|
||||||
"NameType_nat": NameType_nat, # U, cz,
|
"DEPRECATED184": DEPRECATED184,
|
||||||
"NameType_com": NameType_com, # U, cz,
|
"DEPRECATED185": DEPRECATED185,
|
||||||
"NameType_pro": NameType_pro, # U, cz,
|
"DEPRECATED186": DEPRECATED186,
|
||||||
"NameType_oth": NameType_oth, # U, cz,
|
"DEPRECATED187": DEPRECATED187,
|
||||||
"NounType_com": NounType_com, # U,
|
"DEPRECATED188": DEPRECATED188,
|
||||||
"NounType_prop": NounType_prop, # U,
|
"DEPRECATED189": DEPRECATED189,
|
||||||
"NounType_class": NounType_class, # U,
|
"DEPRECATED190": DEPRECATED190,
|
||||||
"Number_abs_sing": Number_abs_sing, # bq, U,
|
"DEPRECATED191": DEPRECATED191,
|
||||||
"Number_abs_plur": Number_abs_plur, # bq, U,
|
"DEPRECATED192": DEPRECATED192,
|
||||||
"Number_dat_sing": Number_dat_sing, # bq, U,
|
"DEPRECATED193": DEPRECATED193,
|
||||||
"Number_dat_plur": Number_dat_plur, # bq, U,
|
"DEPRECATED194": DEPRECATED194,
|
||||||
"Number_erg_sing": Number_erg_sing, # bq, U,
|
"DEPRECATED195": DEPRECATED195,
|
||||||
"Number_erg_plur": Number_erg_plur, # bq, U,
|
"DEPRECATED196": DEPRECATED196,
|
||||||
"Number_psee_sing": Number_psee_sing, # U,
|
"DEPRECATED197": DEPRECATED197,
|
||||||
"Number_psee_plur": Number_psee_plur, # U,
|
"DEPRECATED198": DEPRECATED198,
|
||||||
"Number_psor_sing": Number_psor_sing, # cz, fi, sl, U,
|
"DEPRECATED199": DEPRECATED199,
|
||||||
"Number_psor_plur": Number_psor_plur, # cz, fi, sl, U,
|
"DEPRECATED200": DEPRECATED200,
|
||||||
"Number_pauc": Number_pauc, # U20
|
"DEPRECATED201": DEPRECATED201,
|
||||||
"Number_grpa": Number_grpa, # U20
|
"DEPRECATED202": DEPRECATED202,
|
||||||
"Number_grpl": Number_grpl, # U20
|
"DEPRECATED203": DEPRECATED203,
|
||||||
"Number_inv": Number_inv, # U20
|
"DEPRECATED204": DEPRECATED204,
|
||||||
"NumForm_digit": NumForm_digit, # cz, sl, U,
|
"DEPRECATED205": DEPRECATED205,
|
||||||
"NumForm_roman": NumForm_roman, # cz, sl, U,
|
"DEPRECATED206": DEPRECATED206,
|
||||||
"NumForm_word": NumForm_word, # cz, sl, U,
|
"DEPRECATED207": DEPRECATED207,
|
||||||
"NumValue_one": NumValue_one, # cz, U,
|
"DEPRECATED208": DEPRECATED208,
|
||||||
"NumValue_two": NumValue_two, # cz, U,
|
"DEPRECATED209": DEPRECATED209,
|
||||||
"NumValue_three": NumValue_three, # cz, U,
|
"DEPRECATED210": DEPRECATED210,
|
||||||
"PartForm_pres": PartForm_pres, # fi,
|
"DEPRECATED211": DEPRECATED211,
|
||||||
"PartForm_past": PartForm_past, # fi,
|
"DEPRECATED212": DEPRECATED212,
|
||||||
"PartForm_agt": PartForm_agt, # fi,
|
"DEPRECATED213": DEPRECATED213,
|
||||||
"PartForm_neg": PartForm_neg, # fi,
|
"DEPRECATED214": DEPRECATED214,
|
||||||
"PartType_mod": PartType_mod, # U,
|
"DEPRECATED215": DEPRECATED215,
|
||||||
"PartType_emp": PartType_emp, # U,
|
"DEPRECATED216": DEPRECATED216,
|
||||||
"PartType_res": PartType_res, # U,
|
"DEPRECATED217": DEPRECATED217,
|
||||||
"PartType_inf": PartType_inf, # U,
|
"DEPRECATED218": DEPRECATED218,
|
||||||
"PartType_vbp": PartType_vbp, # U,
|
"DEPRECATED219": DEPRECATED219,
|
||||||
"Person_abs_one": Person_abs_one, # bq, U,
|
"DEPRECATED220": DEPRECATED220,
|
||||||
"Person_abs_two": Person_abs_two, # bq, U,
|
"DEPRECATED221": DEPRECATED221,
|
||||||
"Person_abs_three": Person_abs_three, # bq, U,
|
"DEPRECATED222": DEPRECATED222,
|
||||||
"Person_dat_one": Person_dat_one, # bq, U,
|
"DEPRECATED223": DEPRECATED223,
|
||||||
"Person_dat_two": Person_dat_two, # bq, U,
|
"DEPRECATED224": DEPRECATED224,
|
||||||
"Person_dat_three": Person_dat_three, # bq, U,
|
"DEPRECATED225": DEPRECATED225,
|
||||||
"Person_erg_one": Person_erg_one, # bq, U,
|
"DEPRECATED226": DEPRECATED226,
|
||||||
"Person_erg_two": Person_erg_two, # bq, U,
|
"DEPRECATED227": DEPRECATED227,
|
||||||
"Person_erg_three": Person_erg_three, # bq, U,
|
"DEPRECATED228": DEPRECATED228,
|
||||||
"Person_psor_one": Person_psor_one, # fi, U,
|
"DEPRECATED229": DEPRECATED229,
|
||||||
"Person_psor_two": Person_psor_two, # fi, U,
|
"DEPRECATED230": DEPRECATED230,
|
||||||
"Person_psor_three": Person_psor_three, # fi, U,
|
"DEPRECATED231": DEPRECATED231,
|
||||||
"Person_zero": Person_zero, # U20
|
"DEPRECATED232": DEPRECATED232,
|
||||||
"Person_four": Person_four, # U20
|
"DEPRECATED233": DEPRECATED233,
|
||||||
"Polite_inf": Polite_inf, # bq, U,
|
"DEPRECATED234": DEPRECATED234,
|
||||||
"Polite_pol": Polite_pol, # bq, U,
|
"DEPRECATED235": DEPRECATED235,
|
||||||
"Polite_abs_inf": Polite_abs_inf, # bq, U,
|
"DEPRECATED236": DEPRECATED236,
|
||||||
"Polite_abs_pol": Polite_abs_pol, # bq, U,
|
"DEPRECATED237": DEPRECATED237,
|
||||||
"Polite_erg_inf": Polite_erg_inf, # bq, U,
|
"DEPRECATED238": DEPRECATED238,
|
||||||
"Polite_erg_pol": Polite_erg_pol, # bq, U,
|
"DEPRECATED239": DEPRECATED239,
|
||||||
"Polite_dat_inf": Polite_dat_inf, # bq, U,
|
"DEPRECATED240": DEPRECATED240,
|
||||||
"Polite_dat_pol": Polite_dat_pol, # bq, U,
|
"DEPRECATED241": DEPRECATED241,
|
||||||
"Polite_infm": Polite_infm, # U20
|
"DEPRECATED242": DEPRECATED242,
|
||||||
"Polite_form": Polite_form, # U20
|
"DEPRECATED243": DEPRECATED243,
|
||||||
"Polite_form_elev": Polite_form_elev, # U20
|
"DEPRECATED244": DEPRECATED244,
|
||||||
"Polite_form_humb": Polite_form_humb, # U20
|
"DEPRECATED245": DEPRECATED245,
|
||||||
"Prefix_yes": Prefix_yes, # U,
|
"DEPRECATED246": DEPRECATED246,
|
||||||
"PrepCase_npr": PrepCase_npr, # cz,
|
"DEPRECATED247": DEPRECATED247,
|
||||||
"PrepCase_pre": PrepCase_pre, # U,
|
"DEPRECATED248": DEPRECATED248,
|
||||||
"PunctSide_ini": PunctSide_ini, # U,
|
"DEPRECATED249": DEPRECATED249,
|
||||||
"PunctSide_fin": PunctSide_fin, # U,
|
"DEPRECATED250": DEPRECATED250,
|
||||||
"PunctType_peri": PunctType_peri, # U,
|
"DEPRECATED251": DEPRECATED251,
|
||||||
"PunctType_qest": PunctType_qest, # U,
|
"DEPRECATED252": DEPRECATED252,
|
||||||
"PunctType_excl": PunctType_excl, # U,
|
"DEPRECATED253": DEPRECATED253,
|
||||||
"PunctType_quot": PunctType_quot, # U,
|
"DEPRECATED254": DEPRECATED254,
|
||||||
"PunctType_brck": PunctType_brck, # U,
|
"DEPRECATED255": DEPRECATED255,
|
||||||
"PunctType_comm": PunctType_comm, # U,
|
"DEPRECATED256": DEPRECATED256,
|
||||||
"PunctType_colo": PunctType_colo, # U,
|
"DEPRECATED257": DEPRECATED257,
|
||||||
"PunctType_semi": PunctType_semi, # U,
|
"DEPRECATED258": DEPRECATED258,
|
||||||
"PunctType_dash": PunctType_dash, # U,
|
"DEPRECATED259": DEPRECATED259,
|
||||||
"Style_arch": Style_arch, # cz, fi, U,
|
"DEPRECATED260": DEPRECATED260,
|
||||||
"Style_rare": Style_rare, # cz, fi, U,
|
"DEPRECATED261": DEPRECATED261,
|
||||||
"Style_poet": Style_poet, # cz, U,
|
"DEPRECATED262": DEPRECATED262,
|
||||||
"Style_norm": Style_norm, # cz, U,
|
"DEPRECATED263": DEPRECATED263,
|
||||||
"Style_coll": Style_coll, # cz, U,
|
"DEPRECATED264": DEPRECATED264,
|
||||||
"Style_vrnc": Style_vrnc, # cz, U,
|
"DEPRECATED265": DEPRECATED265,
|
||||||
"Style_sing": Style_sing, # cz, U,
|
"DEPRECATED266": DEPRECATED266,
|
||||||
"Style_expr": Style_expr, # cz, U,
|
"DEPRECATED267": DEPRECATED267,
|
||||||
"Style_derg": Style_derg, # cz, U,
|
"DEPRECATED268": DEPRECATED268,
|
||||||
"Style_vulg": Style_vulg, # cz, U,
|
"DEPRECATED269": DEPRECATED269,
|
||||||
"Style_yes": Style_yes, # fi, U,
|
"DEPRECATED270": DEPRECATED270,
|
||||||
"StyleVariant_styleShort": StyleVariant_styleShort, # cz,
|
"DEPRECATED271": DEPRECATED271,
|
||||||
"StyleVariant_styleBound": StyleVariant_styleBound, # cz, sl,
|
"DEPRECATED272": DEPRECATED272,
|
||||||
"VerbType_aux": VerbType_aux, # U,
|
"DEPRECATED273": DEPRECATED273,
|
||||||
"VerbType_cop": VerbType_cop, # U,
|
"DEPRECATED274": DEPRECATED274,
|
||||||
"VerbType_mod": VerbType_mod, # U,
|
"DEPRECATED275": DEPRECATED275,
|
||||||
"VerbType_light": VerbType_light, # U,
|
"DEPRECATED276": DEPRECATED276,
|
||||||
|
|
||||||
"PERSON": PERSON,
|
"PERSON": PERSON,
|
||||||
"NORP": NORP,
|
"NORP": NORP,
|
||||||
|
|
|
@ -9,22 +9,52 @@ def i_has(en_tokenizer):
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
|
||||||
def test_token_morph_id(i_has):
|
def test_token_morph_eq(i_has):
|
||||||
assert i_has[0].morph.id
|
assert i_has[0].morph is not i_has[0].morph
|
||||||
assert i_has[1].morph.id != 0
|
assert i_has[0].morph == i_has[0].morph
|
||||||
assert i_has[0].morph.id != i_has[1].morph.id
|
assert i_has[0].morph != i_has[1].morph
|
||||||
|
|
||||||
|
|
||||||
|
def test_token_morph_key(i_has):
|
||||||
|
assert i_has[0].morph.key != 0
|
||||||
|
assert i_has[1].morph.key != 0
|
||||||
|
assert i_has[0].morph.key == i_has[0].morph.key
|
||||||
|
assert i_has[0].morph.key != i_has[1].morph.key
|
||||||
|
|
||||||
|
|
||||||
def test_morph_props(i_has):
|
def test_morph_props(i_has):
|
||||||
assert i_has[0].morph.pron_type == i_has.vocab.strings["PronType_prs"]
|
assert i_has[0].morph.get("PronType") == ["PronType=prs"]
|
||||||
assert i_has[0].morph.pron_type_ == "PronType_prs"
|
assert i_has[1].morph.get("PronType") == []
|
||||||
assert i_has[1].morph.pron_type == 0
|
|
||||||
|
|
||||||
|
|
||||||
def test_morph_iter(i_has):
|
def test_morph_iter(i_has):
|
||||||
assert list(i_has[0].morph) == ["PronType_prs"]
|
assert set(i_has[0].morph) == set(["PronType=prs"])
|
||||||
assert list(i_has[1].morph) == ["Number_sing", "Person_three", "VerbForm_fin"]
|
assert set(i_has[1].morph) == set(["Number=sing", "Person=three", "Tense=pres", "VerbForm=fin"])
|
||||||
|
|
||||||
|
|
||||||
def test_morph_get(i_has):
|
def test_morph_get(i_has):
|
||||||
assert i_has[0].morph.get("pron_type") == "PronType_prs"
|
assert i_has[0].morph.get("PronType") == ["PronType=prs"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_morph_set(i_has):
|
||||||
|
assert i_has[0].morph.get("PronType") == ["PronType=prs"]
|
||||||
|
# set by string
|
||||||
|
i_has[0].morph_ = "PronType=unk"
|
||||||
|
assert i_has[0].morph.get("PronType") == ["PronType=unk"]
|
||||||
|
# set by string, fields are alphabetized
|
||||||
|
i_has[0].morph_ = "PronType=123|NounType=unk"
|
||||||
|
assert i_has[0].morph_ == "NounType=unk|PronType=123"
|
||||||
|
# set by dict
|
||||||
|
i_has[0].morph_ = {"AType": "123", "BType": "unk", "POS": "ADJ"}
|
||||||
|
assert i_has[0].morph_ == "AType=123|BType=unk|POS=ADJ"
|
||||||
|
# set by string with multiple values, fields and values are alphabetized
|
||||||
|
i_has[0].morph_ = "BType=c|AType=b,a"
|
||||||
|
assert i_has[0].morph_ == "AType=a,b|BType=c"
|
||||||
|
# set by dict with multiple values, fields and values are alphabetized
|
||||||
|
i_has[0].morph_ = {"AType": "b,a", "BType": "c"}
|
||||||
|
assert i_has[0].morph_ == "AType=a,b|BType=c"
|
||||||
|
|
||||||
|
|
||||||
|
def test_morph_str(i_has):
|
||||||
|
assert str(i_has[0].morph) == "PronType=prs"
|
||||||
|
assert str(i_has[1].morph) == "Number=sing|Person=three|Tense=pres|VerbForm=fin"
|
||||||
|
|
26
spacy/tests/morphology/test_morph_converters.py
Normal file
26
spacy/tests/morphology/test_morph_converters.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
import pytest
|
||||||
|
from spacy.morphology import Morphology
|
||||||
|
|
||||||
|
|
||||||
|
def test_feats_converters():
|
||||||
|
feats = "Case=dat,gen|Number=sing"
|
||||||
|
feats_dict = {"Case": "dat,gen", "Number": "sing"}
|
||||||
|
feats_list = feats.split(Morphology.FEATURE_SEP)
|
||||||
|
|
||||||
|
# simple conversions
|
||||||
|
assert Morphology.list_to_feats(feats_list) == feats
|
||||||
|
assert Morphology.dict_to_feats(feats_dict) == feats
|
||||||
|
assert Morphology.feats_to_dict(feats) == feats_dict
|
||||||
|
|
||||||
|
# roundtrips
|
||||||
|
assert Morphology.dict_to_feats(Morphology.feats_to_dict(feats)) == feats
|
||||||
|
assert Morphology.feats_to_dict(Morphology.dict_to_feats(feats_dict)) == feats_dict
|
||||||
|
|
||||||
|
# unsorted input is normalized
|
||||||
|
unsorted_feats = "Number=sing|Case=gen,dat"
|
||||||
|
unsorted_feats_dict = {"Case": "gen,dat", "Number": "sing"}
|
||||||
|
unsorted_feats_list = feats.split(Morphology.FEATURE_SEP)
|
||||||
|
assert Morphology.feats_to_dict(unsorted_feats) == feats_dict
|
||||||
|
assert Morphology.dict_to_feats(unsorted_feats_dict) == feats
|
||||||
|
assert Morphology.list_to_feats(unsorted_feats_list) == feats
|
||||||
|
assert Morphology.dict_to_feats(Morphology.feats_to_dict(unsorted_feats)) == feats
|
|
@ -16,32 +16,30 @@ def test_init(morphology):
|
||||||
|
|
||||||
|
|
||||||
def test_add_morphology_with_string_names(morphology):
|
def test_add_morphology_with_string_names(morphology):
|
||||||
morphology.add({"Case_gen", "Number_sing"})
|
morphology.add({"Case": "gen", "Number": "sing"})
|
||||||
|
|
||||||
|
|
||||||
def test_add_morphology_with_int_ids(morphology):
|
def test_add_morphology_with_int_ids(morphology):
|
||||||
morphology.add({get_string_id("Case_gen"), get_string_id("Number_sing")})
|
morphology.strings.add("Case")
|
||||||
|
morphology.strings.add("gen")
|
||||||
|
morphology.strings.add("Number")
|
||||||
|
morphology.strings.add("sing")
|
||||||
|
morphology.add({get_string_id("Case"): get_string_id("gen"), get_string_id("Number"): get_string_id("sing")})
|
||||||
|
|
||||||
|
|
||||||
def test_add_morphology_with_mix_strings_and_ints(morphology):
|
def test_add_morphology_with_mix_strings_and_ints(morphology):
|
||||||
morphology.add({get_string_id("PunctSide_ini"), "VerbType_aux"})
|
morphology.strings.add("PunctSide")
|
||||||
|
morphology.strings.add("ini")
|
||||||
|
morphology.add({get_string_id("PunctSide"): get_string_id("ini"), "VerbType": "aux"})
|
||||||
|
|
||||||
|
|
||||||
def test_morphology_tags_hash_distinctly(morphology):
|
def test_morphology_tags_hash_distinctly(morphology):
|
||||||
tag1 = morphology.add({"PunctSide_ini", "VerbType_aux"})
|
tag1 = morphology.add({"PunctSide": "ini", "VerbType": "aux"})
|
||||||
tag2 = morphology.add({"Case_gen", "Number_sing"})
|
tag2 = morphology.add({"Case": "gen", "Number": "sing"})
|
||||||
assert tag1 != tag2
|
assert tag1 != tag2
|
||||||
|
|
||||||
|
|
||||||
def test_morphology_tags_hash_independent_of_order(morphology):
|
def test_morphology_tags_hash_independent_of_order(morphology):
|
||||||
tag1 = morphology.add({"Case_gen", "Number_sing"})
|
tag1 = morphology.add({"Case": "gen", "Number": "sing"})
|
||||||
tag2 = morphology.add({"Number_sing", "Case_gen"})
|
tag2 = morphology.add({"Number": "sing", "Case": "gen"})
|
||||||
assert tag1 == tag2
|
assert tag1 == tag2
|
||||||
|
|
||||||
|
|
||||||
def test_update_morphology_tag(morphology):
|
|
||||||
tag1 = morphology.add({"Case_gen"})
|
|
||||||
tag2 = morphology.update(tag1, {"Number_sing"})
|
|
||||||
assert tag1 != tag2
|
|
||||||
tag3 = morphology.add({"Number_sing", "Case_gen"})
|
|
||||||
assert tag2 == tag3
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ import pytest
|
||||||
import random
|
import random
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
from spacy.attrs import IS_PUNCT, ORTH, LOWER
|
from spacy.attrs import IS_PUNCT, ORTH, LOWER
|
||||||
from spacy.symbols import POS, VERB, VerbForm_inf
|
from spacy.symbols import POS, VERB
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
from spacy.language import Language
|
from spacy.language import Language
|
||||||
from spacy.lemmatizer import Lemmatizer
|
from spacy.lemmatizer import Lemmatizer
|
||||||
|
@ -164,7 +164,7 @@ def test_issue590(en_vocab):
|
||||||
def test_issue595():
|
def test_issue595():
|
||||||
"""Test lemmatization of base forms"""
|
"""Test lemmatization of base forms"""
|
||||||
words = ["Do", "n't", "feed", "the", "dog"]
|
words = ["Do", "n't", "feed", "the", "dog"]
|
||||||
tag_map = {"VB": {POS: VERB, VerbForm_inf: True}}
|
tag_map = {"VB": {POS: VERB, "VerbForm": "inf"}}
|
||||||
lookups = Lookups()
|
lookups = Lookups()
|
||||||
lookups.add_table("lemma_rules", {"verb": [["ed", "e"]]})
|
lookups.add_table("lemma_rules", {"verb": [["ed", "e"]]})
|
||||||
lookups.add_table("lemma_index", {"verb": {}})
|
lookups.add_table("lemma_index", {"verb": {}})
|
||||||
|
|
|
@ -8,7 +8,7 @@ from spacy.matcher import Matcher
|
||||||
from spacy.tokenizer import Tokenizer
|
from spacy.tokenizer import Tokenizer
|
||||||
from spacy.lemmatizer import Lemmatizer
|
from spacy.lemmatizer import Lemmatizer
|
||||||
from spacy.lookups import Lookups
|
from spacy.lookups import Lookups
|
||||||
from spacy.symbols import ORTH, LEMMA, POS, VERB, VerbForm_part
|
from spacy.symbols import ORTH, LEMMA, POS, VERB
|
||||||
|
|
||||||
|
|
||||||
def test_issue1061():
|
def test_issue1061():
|
||||||
|
@ -88,7 +88,7 @@ def test_issue1375():
|
||||||
|
|
||||||
|
|
||||||
def test_issue1387():
|
def test_issue1387():
|
||||||
tag_map = {"VBG": {POS: VERB, VerbForm_part: True}}
|
tag_map = {"VBG": {POS: VERB, "VerbForm": "part"}}
|
||||||
lookups = Lookups()
|
lookups = Lookups()
|
||||||
lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
|
lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
|
||||||
lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
|
lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})
|
||||||
|
|
|
@ -2,5 +2,6 @@ from .doc import Doc
|
||||||
from .token import Token
|
from .token import Token
|
||||||
from .span import Span
|
from .span import Span
|
||||||
from ._serialize import DocBin
|
from ._serialize import DocBin
|
||||||
|
from .morphanalysis import MorphAnalysis
|
||||||
|
|
||||||
__all__ = ["Doc", "Token", "Span", "DocBin"]
|
__all__ = ["Doc", "Token", "Span", "DocBin", "MorphAnalysis"]
|
||||||
|
|
|
@ -5,5 +5,5 @@ from ..structs cimport MorphAnalysisC
|
||||||
|
|
||||||
cdef class MorphAnalysis:
|
cdef class MorphAnalysis:
|
||||||
cdef readonly Vocab vocab
|
cdef readonly Vocab vocab
|
||||||
cdef hash_t key
|
cdef readonly hash_t key
|
||||||
cdef MorphAnalysisC c
|
cdef MorphAnalysisC c
|
||||||
|
|
|
@ -1,15 +1,14 @@
|
||||||
from libc.string cimport memset
|
from libc.string cimport memset
|
||||||
|
cimport numpy as np
|
||||||
|
|
||||||
from ..vocab cimport Vocab
|
from ..vocab cimport Vocab
|
||||||
from ..typedefs cimport hash_t, attr_t
|
from ..typedefs cimport hash_t, attr_t
|
||||||
from ..morphology cimport list_features, check_feature, get_field, tag_to_json
|
from ..morphology cimport list_features, check_feature, get_by_field
|
||||||
|
|
||||||
from ..strings import get_string_id
|
|
||||||
|
|
||||||
|
|
||||||
cdef class MorphAnalysis:
|
cdef class MorphAnalysis:
|
||||||
"""Control access to morphological features for a token."""
|
"""Control access to morphological features for a token."""
|
||||||
def __init__(self, Vocab vocab, features=tuple()):
|
def __init__(self, Vocab vocab, features=dict()):
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.key = self.vocab.morphology.add(features)
|
self.key = self.vocab.morphology.add(features)
|
||||||
analysis = <const MorphAnalysisC*>self.vocab.morphology.tags.get(self.key)
|
analysis = <const MorphAnalysisC*>self.vocab.morphology.tags.get(self.key)
|
||||||
|
@ -33,7 +32,7 @@ cdef class MorphAnalysis:
|
||||||
|
|
||||||
def __contains__(self, feature):
|
def __contains__(self, feature):
|
||||||
"""Test whether the morphological analysis contains some feature."""
|
"""Test whether the morphological analysis contains some feature."""
|
||||||
cdef attr_t feat_id = get_string_id(feature)
|
cdef attr_t feat_id = self.vocab.strings.as_int(feature)
|
||||||
return check_feature(&self.c, feat_id)
|
return check_feature(&self.c, feat_id)
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
|
@ -55,369 +54,28 @@ cdef class MorphAnalysis:
|
||||||
def __hash__(self):
|
def __hash__(self):
|
||||||
return self.key
|
return self.key
|
||||||
|
|
||||||
def get(self, unicode field):
|
def __eq__(self, other):
|
||||||
|
return self.key == other.key
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return self.key != other.key
|
||||||
|
|
||||||
|
def get(self, field):
|
||||||
"""Retrieve a feature by field."""
|
"""Retrieve a feature by field."""
|
||||||
cdef int field_id = self.vocab.morphology._feat_map.attr2field[field]
|
cdef attr_t field_id = self.vocab.strings.as_int(field)
|
||||||
return self.vocab.strings[get_field(&self.c, field_id)]
|
cdef np.ndarray results = get_by_field(&self.c, field_id)
|
||||||
|
return [self.vocab.strings[result] for result in results]
|
||||||
|
|
||||||
def to_json(self):
|
def to_json(self):
|
||||||
"""Produce a json serializable representation, which will be a list of
|
"""Produce a json serializable representation as a UD FEATS-style
|
||||||
strings.
|
string.
|
||||||
"""
|
"""
|
||||||
return tag_to_json(&self.c)
|
morph_string = self.vocab.strings[self.c.key]
|
||||||
|
if morph_string == self.vocab.morphology.EMPTY_MORPH:
|
||||||
@property
|
return ""
|
||||||
def is_base_form(self):
|
return morph_string
|
||||||
raise NotImplementedError
|
|
||||||
|
def to_dict(self):
|
||||||
@property
|
"""Produce a dict representation.
|
||||||
def pos(self):
|
"""
|
||||||
return self.c.pos
|
return self.vocab.morphology.feats_to_dict(self.to_json())
|
||||||
|
|
||||||
@property
|
|
||||||
def pos_(self):
|
|
||||||
return self.vocab.strings[self.c.pos]
|
|
||||||
|
|
||||||
property id:
|
|
||||||
def __get__(self):
|
|
||||||
return self.key
|
|
||||||
|
|
||||||
property abbr:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.abbr
|
|
||||||
|
|
||||||
property adp_type:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.adp_type
|
|
||||||
|
|
||||||
property adv_type:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.adv_type
|
|
||||||
|
|
||||||
property animacy:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.animacy
|
|
||||||
|
|
||||||
property aspect:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.aspect
|
|
||||||
|
|
||||||
property case:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.case
|
|
||||||
|
|
||||||
property conj_type:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.conj_type
|
|
||||||
|
|
||||||
property connegative:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.connegative
|
|
||||||
|
|
||||||
property definite:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.definite
|
|
||||||
|
|
||||||
property degree:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.degree
|
|
||||||
|
|
||||||
property derivation:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.derivation
|
|
||||||
|
|
||||||
property echo:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.echo
|
|
||||||
|
|
||||||
property foreign:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.foreign
|
|
||||||
|
|
||||||
property gender:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.gender
|
|
||||||
|
|
||||||
property hyph:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.hyph
|
|
||||||
|
|
||||||
property inf_form:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.inf_form
|
|
||||||
|
|
||||||
property mood:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.mood
|
|
||||||
|
|
||||||
property name_type:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.name_type
|
|
||||||
|
|
||||||
property negative:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.negative
|
|
||||||
|
|
||||||
property noun_type:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.noun_type
|
|
||||||
|
|
||||||
property number:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.number
|
|
||||||
|
|
||||||
property num_form:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.num_form
|
|
||||||
|
|
||||||
property num_type:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.num_type
|
|
||||||
|
|
||||||
property num_value:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.num_value
|
|
||||||
|
|
||||||
property part_form:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.part_form
|
|
||||||
|
|
||||||
property part_type:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.part_type
|
|
||||||
|
|
||||||
property person:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.person
|
|
||||||
|
|
||||||
property polite:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.polite
|
|
||||||
|
|
||||||
property polarity:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.polarity
|
|
||||||
|
|
||||||
property poss:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.poss
|
|
||||||
|
|
||||||
property prefix:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.prefix
|
|
||||||
|
|
||||||
property prep_case:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.prep_case
|
|
||||||
|
|
||||||
property pron_type:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.pron_type
|
|
||||||
|
|
||||||
property punct_side:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.punct_side
|
|
||||||
|
|
||||||
property punct_type:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.punct_type
|
|
||||||
|
|
||||||
property reflex:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.reflex
|
|
||||||
|
|
||||||
property style:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.style
|
|
||||||
|
|
||||||
property style_variant:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.style_variant
|
|
||||||
|
|
||||||
property tense:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.tense
|
|
||||||
|
|
||||||
property typo:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.typo
|
|
||||||
|
|
||||||
property verb_form:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.verb_form
|
|
||||||
|
|
||||||
property voice:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.voice
|
|
||||||
|
|
||||||
property verb_type:
|
|
||||||
def __get__(self):
|
|
||||||
return self.c.verb_type
|
|
||||||
|
|
||||||
property abbr_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.abbr]
|
|
||||||
|
|
||||||
property adp_type_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.adp_type]
|
|
||||||
|
|
||||||
property adv_type_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.adv_type]
|
|
||||||
|
|
||||||
property animacy_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.animacy]
|
|
||||||
|
|
||||||
property aspect_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.aspect]
|
|
||||||
|
|
||||||
property case_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.case]
|
|
||||||
|
|
||||||
property conj_type_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.conj_type]
|
|
||||||
|
|
||||||
property connegative_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.connegative]
|
|
||||||
|
|
||||||
property definite_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.definite]
|
|
||||||
|
|
||||||
property degree_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.degree]
|
|
||||||
|
|
||||||
property derivation_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.derivation]
|
|
||||||
|
|
||||||
property echo_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.echo]
|
|
||||||
|
|
||||||
property foreign_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.foreign]
|
|
||||||
|
|
||||||
property gender_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.gender]
|
|
||||||
|
|
||||||
property hyph_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.hyph]
|
|
||||||
|
|
||||||
property inf_form_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.inf_form]
|
|
||||||
|
|
||||||
property name_type_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.name_type]
|
|
||||||
|
|
||||||
property negative_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.negative]
|
|
||||||
|
|
||||||
property mood_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.mood]
|
|
||||||
|
|
||||||
property number_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.number]
|
|
||||||
|
|
||||||
property num_form_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.num_form]
|
|
||||||
|
|
||||||
property num_type_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.num_type]
|
|
||||||
|
|
||||||
property num_value_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.num_value]
|
|
||||||
|
|
||||||
property part_form_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.part_form]
|
|
||||||
|
|
||||||
property part_type_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.part_type]
|
|
||||||
|
|
||||||
property person_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.person]
|
|
||||||
|
|
||||||
property polite_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.polite]
|
|
||||||
|
|
||||||
property polarity_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.polarity]
|
|
||||||
|
|
||||||
property poss_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.poss]
|
|
||||||
|
|
||||||
property prefix_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.prefix]
|
|
||||||
|
|
||||||
property prep_case_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.prep_case]
|
|
||||||
|
|
||||||
property pron_type_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.pron_type]
|
|
||||||
|
|
||||||
property punct_side_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.punct_side]
|
|
||||||
|
|
||||||
property punct_type_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.punct_type]
|
|
||||||
|
|
||||||
property reflex_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.reflex]
|
|
||||||
|
|
||||||
property style_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.style]
|
|
||||||
|
|
||||||
property style_variant_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.style_variant]
|
|
||||||
|
|
||||||
property tense_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.tense]
|
|
||||||
|
|
||||||
property typo_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.typo]
|
|
||||||
|
|
||||||
property verb_form_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.verb_form]
|
|
||||||
|
|
||||||
property voice_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.voice]
|
|
||||||
|
|
||||||
property verb_type_:
|
|
||||||
def __get__(self):
|
|
||||||
return self.vocab.strings[self.c.verb_type]
|
|
||||||
|
|
|
@ -217,6 +217,14 @@ cdef class Token:
|
||||||
def morph(self):
|
def morph(self):
|
||||||
return MorphAnalysis.from_id(self.vocab, self.c.morph)
|
return MorphAnalysis.from_id(self.vocab, self.c.morph)
|
||||||
|
|
||||||
|
property morph_:
|
||||||
|
def __get__(self):
|
||||||
|
return str(MorphAnalysis.from_id(self.vocab, self.c.morph))
|
||||||
|
|
||||||
|
def __set__(self, features):
|
||||||
|
cdef hash_t key = self.vocab.morphology.add(features)
|
||||||
|
self.c.morph = key
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def lex_id(self):
|
def lex_id(self):
|
||||||
"""RETURNS (int): Sequential ID of the token's lexical type."""
|
"""RETURNS (int): Sequential ID of the token's lexical type."""
|
||||||
|
|
Loading…
Reference in New Issue
Block a user