From c693d40791726bab752ca1f43b20cfda82b9b13c Mon Sep 17 00:00:00 2001 From: oeg Date: Thu, 6 Apr 2017 18:48:45 +0200 Subject: [PATCH] feature(model): Add support for creating the Spanish model, including rich tagset, configuration, and basich tests --- spacy/es/__init__.py | 1 + spacy/es/language_data.py | 5 +- spacy/es/tag_map.py | 1045 +++++++++--------------------- spacy/tests/conftest.py | 8 +- spacy/tests/es/__init__.py | 0 spacy/tests/es/test_exception.py | 24 + spacy/tests/es/test_text.py | 35 + 7 files changed, 376 insertions(+), 742 deletions(-) create mode 100644 spacy/tests/es/__init__.py create mode 100644 spacy/tests/es/test_exception.py create mode 100644 spacy/tests/es/test_text.py diff --git a/spacy/es/__init__.py b/spacy/es/__init__.py index b925e648a..97cca637a 100644 --- a/spacy/es/__init__.py +++ b/spacy/es/__init__.py @@ -17,4 +17,5 @@ class Spanish(Language): lex_attr_getters[LANG] = lambda text: 'es' tokenizer_exceptions = TOKENIZER_EXCEPTIONS + tag_map = TAG_MAP stop_words = STOP_WORDS diff --git a/spacy/es/language_data.py b/spacy/es/language_data.py index 3b6ae5023..1758efefa 100644 --- a/spacy/es/language_data.py +++ b/spacy/es/language_data.py @@ -5,6 +5,7 @@ from .. import language_data as base from ..language_data import update_exc, strings_to_exc from ..symbols import ORTH, LEMMA +from .tag_map import TAG_MAP from .stop_words import STOP_WORDS from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, ORTH_ONLY @@ -39,7 +40,7 @@ def get_time_exc(hours): ] return exc - +TAG_MAP = dict(TAG_MAP) STOP_WORDS = set(STOP_WORDS) @@ -51,4 +52,4 @@ update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.EMOTICONS)) update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.ABBREVIATIONS)) -__all__ = ["TOKENIZER_EXCEPTIONS", "STOP_WORDS"] +__all__ = ["TOKENIZER_EXCEPTIONS", "TAG_MAP", "STOP_WORDS"] diff --git a/spacy/es/tag_map.py b/spacy/es/tag_map.py index 6054d147d..bdeb7250f 100644 --- a/spacy/es/tag_map.py +++ b/spacy/es/tag_map.py @@ -1,738 +1,307 @@ -{ - "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": { - "freq": 865, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin", - "pos": "AUX" - }, - "PUNCT__PunctSide=Fin|PunctType=Brck": { - "freq": 1476, - "morph": "PunctSide=Fin|PunctType=Brck", - "pos": "PUNCT" - }, - "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": { - "freq": 7033, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin", - "pos": "VERB" - }, - "PRON__Number=Sing|Person=2|PronType=Prs": { - "freq": 132, - "morph": "Number=Sing|Person=2|PronType=Prs", - "pos": "PRON" - }, - "PRON": { - "pos": "PRON" - }, - "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": { - "freq": 525, - "morph": "Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", - "pos": "VERB" - }, - "SYM__NumForm=Digit|NumType=Frac": { - "freq": 236, - "morph": "NumForm=Digit|NumType=Frac", - "pos": "SYM" - }, - "ADJ___": { - "freq": 515, - "morph": "_", - "pos": "ADJ" - }, - "PRON__Person=3": { - "freq": 3185, - "morph": "Person=3", - "pos": "PRON" - }, - "PRON__Case=Acc|Gender=Masc|Number=Plur|Person=3|PronType=Prs": { - "freq": 104, - "morph": "Case=Acc|Gender=Masc|Number=Plur|Person=3|PronType=Prs", - "pos": "PRON" - }, - "DET__Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": { - "freq": 148, - "morph": "Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", - "pos": "DET" - }, - "CONJ": { - "pos": "CONJ" - }, - "PUNCT__PunctType=Comm": { - "freq": 24475, - "morph": "PunctType=Comm", - "pos": "PUNCT" - }, - "ADV": { - "pos": "ADV" - }, - "ADV__AdpType=Prep": { - "freq": 161, - "morph": "AdpType=Prep", - "pos": "ADV" - }, - "ADJ__Number=Plur": { - "freq": 2617, - "morph": "Number=Plur", - "pos": "ADJ" - }, - "AUX__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 149, - "morph": "Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "AUX" - }, - "ADJ__Gender=Masc|Number=Sing|NumType=Ord": { - "freq": 654, - "morph": "Gender=Masc|Number=Sing|NumType=Ord", - "pos": "ADJ" - }, - "AUX__Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin": { - "freq": 272, - "morph": "Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin", - "pos": "AUX" - }, - "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": { - "freq": 388, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin", - "pos": "AUX" - }, - "ADJ__Gender=Masc|Number=Plur": { - "freq": 1995, - "morph": "Gender=Masc|Number=Plur", - "pos": "ADJ" - }, - "DET": { - "pos": "DET" - }, - "VERB__VerbForm=Inf": { - "freq": 8204, - "morph": "VerbForm=Inf", - "pos": "VERB" - }, - "DET__Definite=Def|Gender=Fem|Number=Plur|PronType=Art": { - "freq": 4275, - "morph": "Definite=Def|Gender=Fem|Number=Plur|PronType=Art", - "pos": "DET" - }, - "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 495, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "VERB" - }, - "DET__Definite=Def|Gender=Masc|Number=Plur|PronType=Art": { - "freq": 6951, - "morph": "Definite=Def|Gender=Masc|Number=Plur|PronType=Art", - "pos": "DET" - }, - "PRON___": { - "freq": 1871, - "morph": "_", - "pos": "PRON" - }, - "DET__Definite=Ind|Gender=Masc|Number=Plur|PronType=Art": { - "freq": 113, - "morph": "Definite=Ind|Gender=Masc|Number=Plur|PronType=Art", - "pos": "DET" - }, - "NOUN__Number=Sing": { - "freq": 1977, - "morph": "Number=Sing", - "pos": "NOUN" - }, - "ADJ__Gender=Fem|Number=Sing|NumType=Ord": { - "freq": 568, - "morph": "Gender=Fem|Number=Sing|NumType=Ord", - "pos": "ADJ" - }, - "NOUN__Gender=Masc|Number=Sing": { - "freq": 25557, - "morph": "Gender=Masc|Number=Sing", - "pos": "NOUN" - }, - "PART": { - "pos": "PART" - }, - "ADJ__Number=Sing": { - "freq": 6619, - "morph": "Number=Sing", - "pos": "ADJ" - }, - "NUM": { - "pos": "NUM" - }, - "DET__Number=Sing|PronType=Ind": { - "freq": 309, - "morph": "Number=Sing|PronType=Ind", - "pos": "DET" - }, - "ADJ__Gender=Fem|Number=Sing|VerbForm=Part": { - "freq": 1387, - "morph": "Gender=Fem|Number=Sing|VerbForm=Part", - "pos": "ADJ" - }, - "VERB__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 272, - "morph": "Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "VERB" - }, - "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": { - "freq": 1574, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin", - "pos": "VERB" - }, - "PRON__Gender=Masc|Number=Sing|PronType=Dem": { - "freq": 115, - "morph": "Gender=Masc|Number=Sing|PronType=Dem", - "pos": "PRON" - }, - "ADP": { - "pos": "ADP" - }, - "NOUN__AdvType=Tim": { - "freq": 1504, - "morph": "AdvType=Tim", - "pos": "NOUN" - }, - "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": { - "freq": 130, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin", - "pos": "AUX" - }, - "PRON__Case=Nom|Number=Sing|Person=1|PronType=Prs": { - "freq": 115, - "morph": "Case=Nom|Number=Sing|Person=1|PronType=Prs", - "pos": "PRON" - }, - "PUNCT__PunctType=Semi": { - "freq": 259, - "morph": "PunctType=Semi", - "pos": "PUNCT" - }, - "PUNCT__PunctSide=Ini|PunctType=Qest": { - "freq": 206, - "morph": "PunctSide=Ini|PunctType=Qest", - "pos": "PUNCT" - }, - "PRON__Case=Dat|Number=Sing|Person=3|PronType=Prs": { - "freq": 754, - "morph": "Case=Dat|Number=Sing|Person=3|PronType=Prs", - "pos": "PRON" - }, - "PRON__Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs": { - "freq": 624, - "morph": "Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs", - "pos": "PRON" - }, - "NUM__NumForm=Digit": { - "freq": 2979, - "morph": "NumForm=Digit", - "pos": "NUM" - }, - "PUNCT__PunctType=Colo": { - "freq": 638, - "morph": "PunctType=Colo", - "pos": "PUNCT" - }, - "PROPN": { - "pos": "PROPN" - }, - "X": { - "pos": "X" - }, - "NOUN__NumForm=Digit": { - "freq": 555, - "morph": "NumForm=Digit", - "pos": "NOUN" - }, - "VERB__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": { - "freq": 3297, - "morph": "Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part", - "pos": "VERB" - }, - "ADJ__Gender=Masc|Number=Plur|NumType=Ord": { - "freq": 227, - "morph": "Gender=Masc|Number=Plur|NumType=Ord", - "pos": "ADJ" - }, - "PRON__Gender=Masc|Number=Sing|Person=3|PronType=Prs": { - "freq": 205, - "morph": "Gender=Masc|Number=Sing|Person=3|PronType=Prs", - "pos": "PRON" - }, - "NOUN__Number=Plur": { - "freq": 1463, - "morph": "Number=Plur", - "pos": "NOUN" - }, - "DET__Number=Sing|Person=3|Poss=Yes|PronType=Prs": { - "freq": 2909, - "morph": "Number=Sing|Person=3|Poss=Yes|PronType=Prs", - "pos": "DET" - }, - "VERB__VerbForm=Ger": { - "freq": 994, - "morph": "VerbForm=Ger", - "pos": "VERB" - }, - "INTJ": { - "pos": "INTJ" - }, - "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": { - "freq": 398, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin", - "pos": "VERB" - }, - "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 1403, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "AUX" - }, - "PRON__Number=Plur|Person=1|PronType=Prs": { - "freq": 264, - "morph": "Number=Plur|Person=1|PronType=Prs", - "pos": "PRON" - }, - "ADV__Negative=Neg": { - "freq": 2960, - "morph": "Negative=Neg", - "pos": "ADV" - }, - "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 2488, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "VERB" - }, - "DET__Gender=Masc|Number=Sing|PronType=Ind": { - "freq": 855, - "morph": "Gender=Masc|Number=Sing|PronType=Ind", - "pos": "DET" - }, - "VERB__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 408, - "morph": "Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "VERB" - }, - "PRON__Gender=Fem|Number=Sing|PronType=Ind": { - "freq": 237, - "morph": "Gender=Fem|Number=Sing|PronType=Ind", - "pos": "PRON" - }, - "DET__Gender=Fem|Number=Plur|PronType=Ind": { - "freq": 592, - "morph": "Gender=Fem|Number=Plur|PronType=Ind", - "pos": "DET" - }, - "ADJ__Gender=Fem|Number=Plur|VerbForm=Part": { - "freq": 614, - "morph": "Gender=Fem|Number=Plur|VerbForm=Part", - "pos": "ADJ" - }, - "DET__Gender=Fem|Number=Sing|PronType=Dem": { - "freq": 808, - "morph": "Gender=Fem|Number=Sing|PronType=Dem", - "pos": "DET" - }, - "DET__Gender=Fem|Number=Sing|PronType=Ind": { - "freq": 613, - "morph": "Gender=Fem|Number=Sing|PronType=Ind", - "pos": "DET" - }, - "DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Art": { - "freq": 4277, - "morph": "Definite=Ind|Gender=Masc|Number=Sing|PronType=Art", - "pos": "DET" - }, - "VERB__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 788, - "morph": "Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "VERB" - }, - "NOUN__Gender=Fem": { - "freq": 145, - "morph": "Gender=Fem", - "pos": "NOUN" - }, - "PRON__Gender=Fem|Number=Plur|PronType=Ind": { - "freq": 127, - "morph": "Gender=Fem|Number=Plur|PronType=Ind", - "pos": "PRON" - }, - "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 729, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "AUX" - }, - "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 1223, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "VERB" - }, - "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": { - "freq": 164, - "morph": "Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", - "pos": "AUX" - }, - "PRON__PronType=Rel": { - "freq": 7301, - "morph": "PronType=Rel", - "pos": "PRON" - }, - "DET__Definite=Def|Number=Sing|PronType=Art": { - "freq": 928, - "morph": "Definite=Def|Number=Sing|PronType=Art", - "pos": "DET" - }, - "ADV___": { - "freq": 11334, - "morph": "_", - "pos": "ADV" - }, - "ADJ": { - "pos": "ADJ" - }, - "AUX__VerbForm=Ger": { - "freq": 154, - "morph": "VerbForm=Ger", - "pos": "AUX" - }, - "PRON__Number=Sing|PronType=Int": { - "freq": 201, - "morph": "Number=Sing|PronType=Int", - "pos": "PRON" - }, - "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": { - "freq": 1236, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin", - "pos": "VERB" - }, - "NOUN__Gender=Masc|Number=Plur": { - "freq": 12310, - "morph": "Gender=Masc|Number=Plur", - "pos": "NOUN" - }, - "NOUN__Gender=Fem|Number=Plur": { - "freq": 8612, - "morph": "Gender=Fem|Number=Plur", - "pos": "NOUN" - }, - "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 6343, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "VERB" - }, - "PRON__Gender=Masc|Number=Plur|PronType=Ind": { - "freq": 460, - "morph": "Gender=Masc|Number=Plur|PronType=Ind", - "pos": "PRON" - }, - "VERB__Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 100, - "morph": "Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "VERB" - }, - "PUNCT__PunctSide=Ini|PunctType=Brck": { - "freq": 1482, - "morph": "PunctSide=Ini|PunctType=Brck", - "pos": "PUNCT" - }, - "PRON__Gender=Masc|Number=Sing|PronType=Tot": { - "freq": 111, - "morph": "Gender=Masc|Number=Sing|PronType=Tot", - "pos": "PRON" - }, - "SCONJ": { - "pos": "SCONJ" - }, - "AUX__VerbForm=Inf": { - "freq": 1495, - "morph": "VerbForm=Inf", - "pos": "AUX" - }, - "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 5227, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "AUX" - }, - "ADJ__AdpType=Prep": { - "freq": 124, - "morph": "AdpType=Prep", - "pos": "ADJ" - }, - "PRON__Gender=Masc|Number=Sing|PronType=Ind": { - "freq": 624, - "morph": "Gender=Masc|Number=Sing|PronType=Ind", - "pos": "PRON" - }, - "DET__Gender=Masc|Number=Plur|PronType=Dem": { - "freq": 269, - "morph": "Gender=Masc|Number=Plur|PronType=Dem", - "pos": "DET" - }, - "ADJ__Gender=Fem|Number=Plur": { - "freq": 1612, - "morph": "Gender=Fem|Number=Plur", - "pos": "ADJ" - }, - "NUM__Gender=Masc|Number=Plur|NumType=Card": { - "freq": 104, - "morph": "Gender=Masc|Number=Plur|NumType=Card", - "pos": "NUM" - }, - "NUM__NumType=Card": { - "freq": 533, - "morph": "NumType=Card", - "pos": "NUM" - }, - "SCONJ___": { - "freq": 10129, - "morph": "_", - "pos": "SCONJ" - }, - "PRON__Number=Sing|PronType=Rel": { - "freq": 318, - "morph": "Number=Sing|PronType=Rel", - "pos": "PRON" - }, - "VERB__Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin": { - "freq": 253, - "morph": "Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin", - "pos": "VERB" - }, - "NOUN": { - "pos": "NOUN" - }, - "NOUN__Gender=Masc": { - "freq": 153, - "morph": "Gender=Masc", - "pos": "NOUN" - }, - "DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Art": { - "freq": 3087, - "morph": "Definite=Ind|Gender=Fem|Number=Sing|PronType=Art", - "pos": "DET" - }, - "ADJ__Gender=Masc|Number=Plur|VerbForm=Part": { - "freq": 997, - "morph": "Gender=Masc|Number=Plur|VerbForm=Part", - "pos": "ADJ" - }, - "PRON__Number=Sing|PronType=Dem": { - "freq": 302, - "morph": "Number=Sing|PronType=Dem", - "pos": "PRON" - }, - "PRON__Number=Sing|Person=3|PronType=Prs": { - "freq": 116, - "morph": "Number=Sing|Person=3|PronType=Prs", - "pos": "PRON" - }, - "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs": { - "freq": 173, - "morph": "Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs", - "pos": "PRON" - }, - "PUNCT": { - "pos": "PUNCT" - }, - "DET__Gender=Masc|Number=Sing|PronType=Dem": { - "freq": 962, - "morph": "Gender=Masc|Number=Sing|PronType=Dem", - "pos": "DET" - }, - "PRON__Number=Plur|PronType=Rel": { - "freq": 102, - "morph": "Number=Plur|PronType=Rel", - "pos": "PRON" - }, - "ADJ__Gender=Masc|Number=Sing": { - "freq": 5136, - "morph": "Gender=Masc|Number=Sing", - "pos": "ADJ" - }, - "DET__Definite=Def|Gender=Masc|Number=Sing|PronType=Art": { - "freq": 22962, - "morph": "Definite=Def|Gender=Masc|Number=Sing|PronType=Art", - "pos": "DET" - }, - "AUX__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 107, - "morph": "Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "AUX" - }, - "PRON__Case=Dat|Number=Plur|Person=3|PronType=Prs": { - "freq": 220, - "morph": "Case=Dat|Number=Plur|Person=3|PronType=Prs", - "pos": "PRON" - }, - "VERB__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": { - "freq": 206, - "morph": "Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part", - "pos": "VERB" - }, - "DET__Number=Plur|Person=3|Poss=Yes|PronType=Prs": { - "freq": 1021, - "morph": "Number=Plur|Person=3|Poss=Yes|PronType=Prs", - "pos": "DET" - }, - "ADJ__Gender=Fem|Number=Plur|NumType=Ord": { - "freq": 101, - "morph": "Gender=Fem|Number=Plur|NumType=Ord", - "pos": "ADJ" - }, - "PRON__PronType=Int": { - "freq": 137, - "morph": "PronType=Int", - "pos": "PRON" - }, - "ADP__AdpType=Prep": { - "freq": 71133, - "morph": "AdpType=Prep", - "pos": "ADP" - }, - "DET__Gender=Masc|Number=Plur|PronType=Ind": { - "freq": 904, - "morph": "Gender=Masc|Number=Plur|PronType=Ind", - "pos": "DET" - }, - "AUX__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 299, - "morph": "Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "AUX" - }, - "DET__Gender=Fem|Number=Plur|PronType=Dem": { - "freq": 188, - "morph": "Gender=Fem|Number=Plur|PronType=Dem", - "pos": "DET" - }, - "NUM__NumForm=Digit|NumType=Card": { - "freq": 1108, - "morph": "NumForm=Digit|NumType=Card", - "pos": "NUM" - }, - "PUNCT__PunctType=Quot": { - "freq": 7380, - "morph": "PunctType=Quot", - "pos": "PUNCT" - }, - "VERB__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part": { - "freq": 184, - "morph": "Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part", - "pos": "VERB" - }, - "PUNCT__PunctType=Dash": { - "freq": 2345, - "morph": "PunctType=Dash", - "pos": "PUNCT" - }, - "ADJ__Gender=Fem|Number=Sing": { - "freq": 3935, - "morph": "Gender=Fem|Number=Sing", - "pos": "ADJ" - }, - "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": { - "freq": 215, - "morph": "Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", - "pos": "AUX" - }, - "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": { - "freq": 218, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin", - "pos": "AUX" - }, - "PROPN___": { - "freq": 34454, - "morph": "_", - "pos": "PROPN" - }, - "PRON__Number=Sing|PronType=Ind": { - "freq": 421, - "morph": "Number=Sing|PronType=Ind", - "pos": "PRON" - }, - "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": { - "freq": 359, - "morph": "Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", - "pos": "VERB" - }, - "PUNCT__PunctSide=Fin|PunctType=Qest": { - "freq": 312, - "morph": "PunctSide=Fin|PunctType=Qest", - "pos": "PUNCT" - }, - "PRON__Number=Sing|Person=1|PronType=Prs": { - "freq": 298, - "morph": "Number=Sing|Person=1|PronType=Prs", - "pos": "PRON" - }, - "PART__Negative=Neg": { - "freq": 122, - "morph": "Negative=Neg", - "pos": "PART" - }, - "PRON__Gender=Masc|Number=Plur|Person=3|PronType=Prs": { - "freq": 176, - "morph": "Gender=Masc|Number=Plur|Person=3|PronType=Prs", - "pos": "PRON" - }, - "NOUN__Gender=Fem|Number=Sing": { - "freq": 24416, - "morph": "Gender=Fem|Number=Sing", - "pos": "NOUN" - }, - "ADJ__Gender=Masc|Number=Sing|VerbForm=Part": { - "freq": 2297, - "morph": "Gender=Masc|Number=Sing|VerbForm=Part", - "pos": "ADJ" - }, - "CONJ___": { - "freq": 12225, - "morph": "_", - "pos": "CONJ" - }, - "NUM__Number=Plur|NumType=Card": { - "freq": 2057, - "morph": "Number=Plur|NumType=Card", - "pos": "NUM" - }, - "NOUN___": { - "freq": 4829, - "morph": "_", - "pos": "NOUN" - }, - "VERB": { - "pos": "VERB" - }, - "DET__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": { - "freq": 16487, - "morph": "Definite=Def|Gender=Fem|Number=Sing|PronType=Art", - "pos": "DET" - }, - "SYM": { - "pos": "SYM" - }, - "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin": { - "freq": 130, - "morph": "Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin", - "pos": "VERB" - }, - "AUX": { - "pos": "AUX" - }, - "AUX__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": { - "freq": 494, - "morph": "Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part", - "pos": "AUX" - }, - "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 199, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "AUX" - }, - "VERB__Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": { - "freq": 100, - "morph": "Mood=Imp|Number=Sing|Person=2|VerbForm=Fin", - "pos": "VERB" - }, - "PUNCT__PunctType=Peri": { - "freq": 14170, - "morph": "PunctType=Peri", - "pos": "PUNCT" - } -} \ No newline at end of file +# coding: utf8 +from __future__ import unicode_literals + +from ..symbols import * + + +TAG_MAP = { + "ADJ___": {"morph": "_", "pos": "ADJ"}, + "ADJ__AdpType=Prep": {"morph": "AdpType=Prep", "pos": "ADJ"}, + "ADJ__AdpType=Preppron|Gender=Masc|Number=Sing": {"morph": "AdpType=Preppron|Gender=Masc|Number=Sing", "pos": "ADV"}, + "ADJ__AdvType=Tim": {"morph": "AdvType=Tim", "pos": "ADJ"}, + "ADJ__Gender=Fem|Number=Plur": {"morph": "Gender=Fem|Number=Plur", "pos": "ADJ"}, + "ADJ__Gender=Fem|Number=Plur|NumType=Ord": {"morph": "Gender=Fem|Number=Plur|NumType=Ord", "pos": "ADJ"}, + "ADJ__Gender=Fem|Number=Plur|VerbForm=Part": {"morph": "Gender=Fem|Number=Plur|VerbForm=Part", "pos": "ADJ"}, + "ADJ__Gender=Fem|Number=Sing": {"morph": "Gender=Fem|Number=Sing", "pos": "ADJ"}, + "ADJ__Gender=Fem|Number=Sing|NumType=Ord": {"morph": "Gender=Fem|Number=Sing|NumType=Ord", "pos": "ADJ"}, + "ADJ__Gender=Fem|Number=Sing|VerbForm=Part": {"morph": "Gender=Fem|Number=Sing|VerbForm=Part", "pos": "ADJ"}, + "ADJ__Gender=Masc": {"morph": "Gender=Masc", "pos": "ADJ"}, + "ADJ__Gender=Masc|Number=Plur": {"morph": "Gender=Masc|Number=Plur", "pos": "ADJ"}, + "ADJ__Gender=Masc|Number=Plur|NumType=Ord": {"morph": "Gender=Masc|Number=Plur|NumType=Ord", "pos": "ADJ"}, + "ADJ__Gender=Masc|Number=Plur|VerbForm=Part": {"morph": "Gender=Masc|Number=Plur|VerbForm=Part", "pos": "ADJ"}, + "ADJ__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", "pos": "ADJ"}, + "ADJ__Gender=Masc|Number=Sing|NumType=Ord": {"morph": "Gender=Masc|Number=Sing|NumType=Ord", "pos": "ADJ"}, + "ADJ__Gender=Masc|Number=Sing|VerbForm=Part": {"morph": "Gender=Masc|Number=Sing|VerbForm=Part", "pos": "ADJ"}, + "ADJ__Number=Plur": {"morph": "Number=Plur", "pos": "ADJ"}, + "ADJ__Number=Sing": {"morph": "Number=Sing", "pos": "ADJ"}, + "ADP__AdpType=Prep": {"morph": "AdpType=Prep", "pos": "ADP"}, + "ADP__AdpType=Preppron|Gender=Fem|Number=Sing": {"morph": "AdpType=Preppron|Gender=Fem|Number=Sing", "pos": "ADP"}, + "ADP__AdpType=Preppron|Gender=Masc|Number=Plur": {"morph": "AdpType=Preppron|Gender=Masc|Number=Plur", "pos": "ADP"}, + "ADP__AdpType=Preppron|Gender=Masc|Number=Sing": {"morph": "AdpType=Preppron|Gender=Masc|Number=Sing", "pos": "ADP"}, + "ADP": { "pos": "ADP"}, + "ADV___": {"morph": "_", "pos": "ADV"}, + "ADV__AdpType=Prep": {"morph": "AdpType=Prep", "pos": "ADV"}, + "ADV__AdpType=Preppron|Gender=Masc|Number=Sing": {"morph": "AdpType=Preppron|Gender=Masc|Number=Sing", "pos": "ADV"}, + "ADV__AdvType=Tim": {"morph": "AdvType=Tim", "pos": "ADV"}, + "ADV__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", "pos": "ADV"}, + "ADV__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", "pos": "ADV"}, + "ADV__Negative=Neg": {"morph": "Negative=Neg", "pos": "ADV"}, + "ADV__Number=Plur": {"morph": "Number=Plur", "pos": "ADV"}, + "ADV__Polarity=Neg": {"morph": "Polarity=Neg", "pos": "ADV"}, + "AUX__Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part": {"morph": "Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part", "pos": "AUX"}, + "AUX__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": {"morph": "Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part", "pos": "AUX"}, + "AUX__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part": {"morph": "Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part", "pos": "AUX"}, + "AUX__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": {"morph": "Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part", "pos": "AUX"}, + "AUX__Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Cnd|Number=Plur|Person=3|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Plur|Person=3|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Cnd|Number=Sing|Person=1|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=1|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Cnd|Number=Sing|Person=2|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=2|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Imp|Number=Plur|Person=3|VerbForm=Fin": {"morph": "Mood=Imp|Number=Plur|Person=3|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": {"morph": "Mood=Imp|Number=Sing|Person=2|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Imp|Number=Sing|Person=3|VerbForm=Fin": {"morph": "Mood=Imp|Number=Sing|Person=3|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__VerbForm=Ger": {"morph": "VerbForm=Ger", "pos": "AUX"}, + "AUX__VerbForm=Inf": {"morph": "VerbForm=Inf", "pos": "AUX"}, + "CCONJ___": {"morph": "_", "pos": "CONJ"}, + "CONJ___": {"morph": "_", "pos": "CONJ"}, + "DET__Definite=Def|Gender=Fem|Number=Plur|PronType=Art": {"morph": "Definite=Def|Gender=Fem|Number=Plur|PronType=Art", "pos": "DET"}, + "DET__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"morph": "Definite=Def|Gender=Fem|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Definite=Def|Gender=Masc|Number=Plur|PronType=Art": {"morph": "Definite=Def|Gender=Masc|Number=Plur|PronType=Art", "pos": "DET"}, + "DET__Definite=Def|Gender=Masc|Number=Sing|PronType=Art": {"morph": "Definite=Def|Gender=Masc|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Definite=Def|Gender=Masc|PronType=Art": {"morph": "Definite=Def|Gender=Masc|PronType=Art", "pos": "DET"}, + "DET__Definite=Def|Number=Sing|PronType=Art": {"morph": "Definite=Def|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Definite=Ind|Gender=Fem|Number=Plur|PronType=Art": {"morph": "Definite=Ind|Gender=Fem|Number=Plur|PronType=Art", "pos": "DET"}, + "DET__Definite=Ind|Gender=Fem|Number=Sing|NumType=Card|PronType=Art": {"morph": "Definite=Ind|Gender=Fem|Number=Sing|NumType=Card|PronType=Art", "pos": "DET"}, + "DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Art": {"morph": "Definite=Ind|Gender=Fem|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Definite=Ind|Gender=Masc|Number=Plur|PronType=Art": {"morph": "Definite=Ind|Gender=Masc|Number=Plur|PronType=Art", "pos": "DET"}, + "DET__Definite=Ind|Gender=Masc|Number=Sing|NumType=Card|PronType=Art": {"morph": "Definite=Ind|Gender=Masc|Number=Sing|NumType=Card|PronType=Art", "pos": "DET"}, + "DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Art": {"morph": "Definite=Ind|Gender=Masc|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Plur|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Person=3|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Plur|PronType=Art": {"morph": "Gender=Fem|Number=Plur|PronType=Art", "pos": "DET"}, + "DET__Gender=Fem|Number=Plur|PronType=Dem": {"morph": "Gender=Fem|Number=Plur|PronType=Dem", "pos": "DET"}, + "DET__Gender=Fem|Number=Plur|PronType=Ind": {"morph": "Gender=Fem|Number=Plur|PronType=Ind", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|PronType=Art": {"morph": "Gender=Fem|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|PronType=Dem": {"morph": "Gender=Fem|Number=Sing|PronType=Dem", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|PronType=Ind": {"morph": "Gender=Fem|Number=Sing|PronType=Ind", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|PronType=Int": {"morph": "Gender=Fem|Number=Sing|PronType=Int", "pos": "DET"}, + "DET__Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Masc|Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Person=3|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Masc|Number=Plur|PronType=Art": {"morph": "Gender=Masc|Number=Plur|PronType=Art", "pos": "DET"}, + "DET__Gender=Masc|Number=Plur|PronType=Dem": {"morph": "Gender=Masc|Number=Plur|PronType=Dem", "pos": "DET"}, + "DET__Gender=Masc|Number=Plur|PronType=Ind": {"morph": "Gender=Masc|Number=Plur|PronType=Ind", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|PronType=Art": {"morph": "Gender=Masc|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|PronType=Dem": {"morph": "Gender=Masc|Number=Sing|PronType=Dem", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|PronType=Ind": {"morph": "Gender=Masc|Number=Sing|PronType=Ind", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|PronType=Int": {"morph": "Gender=Masc|Number=Sing|PronType=Int", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|PronType=Tot": {"morph": "Gender=Masc|Number=Sing|PronType=Tot", "pos": "DET"}, + "DET__Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {"morph": "Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Number=Plur|Person=3|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Number=Plur|PronType=Dem": {"morph": "Number=Plur|PronType=Dem", "pos": "DET"}, + "DET__Number=Plur|PronType=Ind": {"morph": "Number=Plur|PronType=Ind", "pos": "DET"}, + "DET__Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {"morph": "Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Number=Sing|Person=3|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Number=Sing|PronType=Dem": {"morph": "Number=Sing|PronType=Dem", "pos": "DET"}, + "DET__Number=Sing|PronType=Ind": {"morph": "Number=Sing|PronType=Ind", "pos": "DET"}, + "DET__PronType=Int": {"morph": "PronType=Int", "pos": "DET"}, + "DET__PronType=Rel": {"morph": "PronType=Rel", "pos": "DET"}, + "DET": { "pos": "DET"}, + "INTJ___": {"morph": "_", "pos": "INTJ"}, + "NOUN___": {"morph": "_", "pos": "NOUN"}, + "NOUN__AdvType=Tim": {"morph": "AdvType=Tim", "pos": "NOUN"}, + "NOUN__AdvType=Tim|Gender=Masc|Number=Sing": {"morph": "AdvType=Tim|Gender=Masc|Number=Sing", "pos": "NOUN"}, + "NOUN__Gender=Fem": {"morph": "Gender=Fem", "pos": "NOUN"}, + "NOUN__Gender=Fem|Number=Plur": {"morph": "Gender=Fem|Number=Plur", "pos": "NOUN"}, + "NOUN__Gender=Fem|Number=Sing": {"morph": "Gender=Fem|Number=Sing", "pos": "NOUN"}, + "NOUN__Gender=Masc": {"morph": "Gender=Masc", "pos": "NOUN"}, + "NOUN__Gender=Masc|Number=Plur": {"morph": "Gender=Masc|Number=Plur", "pos": "NOUN"}, + "NOUN__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", "pos": "NOUN"}, + "NOUN__Gender=Masc|Number=Sing|VerbForm=Part": {"morph": "Gender=Masc|Number=Sing|VerbForm=Part", "pos": "NOUN"}, + "NOUN__Number=Plur": {"morph": "Number=Plur", "pos": "NOUN"}, + "NOUN__Number=Sing": {"morph": "Number=Sing", "pos": "NOUN"}, + "NOUN__NumForm=Digit": {"morph": "NumForm=Digit", "pos": "NOUN"}, + "NUM__Gender=Fem|Number=Plur|NumType=Card": {"morph": "Gender=Fem|Number=Plur|NumType=Card", "pos": "NUM"}, + "NUM__Gender=Fem|Number=Sing|NumType=Card": {"morph": "Gender=Fem|Number=Sing|NumType=Card", "pos": "NUM"}, + "NUM__Gender=Masc|Number=Plur|NumType=Card": {"morph": "Gender=Masc|Number=Plur|NumType=Card", "pos": "NUM"}, + "NUM__Gender=Masc|Number=Sing|NumType=Card": {"morph": "Gender=Masc|Number=Sing|NumType=Card", "pos": "NUM"}, + "NUM__Number=Plur|NumType=Card": {"morph": "Number=Plur|NumType=Card", "pos": "NUM"}, + "NUM__Number=Sing|NumType=Card": {"morph": "Number=Sing|NumType=Card", "pos": "NUM"}, + "NUM__NumForm=Digit": {"morph": "NumForm=Digit", "pos": "NUM"}, + "NUM__NumForm=Digit|NumType=Card": {"morph": "NumForm=Digit|NumType=Card", "pos": "NUM"}, + "NUM__NumForm=Digit|NumType=Frac": {"morph": "NumForm=Digit|NumType=Frac", "pos": "NUM"}, + "NUM__NumType=Card": {"morph": "NumType=Card", "pos": "NUM"}, + "PART___": {"morph": "_", "pos": "PART"}, + "PART__Negative=Neg": {"morph": "Negative=Neg", "pos": "PART"}, + "PRON___": {"morph": "_", "pos": "PRON"}, + "PRON__Case=Acc|Gender=Fem|Number=Plur|Person=3|PronType=Prs": {"morph": "Case=Acc|Gender=Fem|Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {"morph": "Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Acc|Gender=Masc|Number=Plur|Person=3|PronType=Prs": {"morph": "Case=Acc|Gender=Masc|Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {"morph": "Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Acc|Number=Plur|Person=3|PronType=Prs": {"morph": "Case=Acc|Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Acc|Number=Sing|Person=3|PronType=Prs": {"morph": "Case=Acc|Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Acc|Person=3|PronType=Prs": {"morph": "Case=Acc|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Dat|Number=Plur|Person=3|PronType=Prs": {"morph": "Case=Dat|Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Dat|Number=Sing|Person=3|PronType=Prs": {"morph": "Case=Dat|Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Nom|Number=Sing|Person=1|PronType=Prs": {"morph": "Case=Nom|Number=Sing|Person=1|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Nom|Number=Sing|Person=2|PronType=Prs": {"morph": "Case=Nom|Number=Sing|Person=2|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Person=3|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|Person=3|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|PronType=Dem": {"morph": "Gender=Fem|Number=Plur|PronType=Dem", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|PronType=Ind": {"morph": "Gender=Fem|Number=Plur|PronType=Ind", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|PronType=Int": {"morph": "Gender=Fem|Number=Plur|PronType=Int", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|PronType=Rel": {"morph": "Gender=Fem|Number=Plur|PronType=Rel", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|Person=1|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Person=1|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|Person=3|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|PronType=Dem": {"morph": "Gender=Fem|Number=Sing|PronType=Dem", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|PronType=Ind": {"morph": "Gender=Fem|Number=Sing|PronType=Ind", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|PronType=Rel": {"morph": "Gender=Fem|Number=Sing|PronType=Rel", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|Person=1|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Person=1|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Person=2|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|Person=3|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|PronType=Dem": {"morph": "Gender=Masc|Number=Plur|PronType=Dem", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|PronType=Ind": {"morph": "Gender=Masc|Number=Plur|PronType=Ind", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|PronType=Int": {"morph": "Gender=Masc|Number=Plur|PronType=Int", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|PronType=Rel": {"morph": "Gender=Masc|Number=Plur|PronType=Rel", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|Person=3|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|PronType=Dem": {"morph": "Gender=Masc|Number=Sing|PronType=Dem", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|PronType=Ind": {"morph": "Gender=Masc|Number=Sing|PronType=Ind", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|PronType=Int": {"morph": "Gender=Masc|Number=Sing|PronType=Int", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|PronType=Rel": {"morph": "Gender=Masc|Number=Sing|PronType=Rel", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|PronType=Tot": {"morph": "Gender=Masc|Number=Sing|PronType=Tot", "pos": "PRON"}, + "PRON__Number=Plur|Person=1": {"morph": "Number=Plur|Person=1", "pos": "PRON"}, + "PRON__Number=Plur|Person=1|PronType=Prs": {"morph": "Number=Plur|Person=1|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Plur|Person=2|Polite=Form|PronType=Prs": {"morph": "Number=Plur|Person=2|Polite=Form|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Plur|Person=2|PronType=Prs": {"morph": "Number=Plur|Person=2|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Number=Plur|Person=3|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Plur|Person=3|PronType=Prs": {"morph": "Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Plur|PronType=Dem": {"morph": "Number=Plur|PronType=Dem", "pos": "PRON"}, + "PRON__Number=Plur|PronType=Ind": {"morph": "Number=Plur|PronType=Ind", "pos": "PRON"}, + "PRON__Number=Plur|PronType=Int": {"morph": "Number=Plur|PronType=Int", "pos": "PRON"}, + "PRON__Number=Plur|PronType=Rel": {"morph": "Number=Plur|PronType=Rel", "pos": "PRON"}, + "PRON__Number=Sing|Person=1": {"morph": "Number=Sing|Person=1", "pos": "PRON"}, + "PRON__Number=Sing|Person=1|PrepCase=Pre|PronType=Prs": {"morph": "Number=Sing|Person=1|PrepCase=Pre|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|Person=1|PronType=Prs": {"morph": "Number=Sing|Person=1|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|Person=2": {"morph": "Number=Sing|Person=2", "pos": "PRON"}, + "PRON__Number=Sing|Person=2|Polite=Form|PronType=Prs": {"morph": "Number=Sing|Person=2|Polite=Form|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|Person=2|PrepCase=Pre|PronType=Prs": {"morph": "Number=Sing|Person=2|PrepCase=Pre|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|Person=2|PronType=Prs": {"morph": "Number=Sing|Person=2|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Number=Sing|Person=3|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|Person=3|PronType=Prs": {"morph": "Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|PronType=Dem": {"morph": "Number=Sing|PronType=Dem", "pos": "PRON"}, + "PRON__Number=Sing|PronType=Ind": {"morph": "Number=Sing|PronType=Ind", "pos": "PRON"}, + "PRON__Number=Sing|PronType=Int": {"morph": "Number=Sing|PronType=Int", "pos": "PRON"}, + "PRON__Number=Sing|PronType=Rel": {"morph": "Number=Sing|PronType=Rel", "pos": "PRON"}, + "PRON__Person=1|PronType=Prs": {"morph": "Person=1|PronType=Prs", "pos": "PRON"}, + "PRON__Person=3": {"morph": "Person=3", "pos": "PRON"}, + "PRON__Person=3|PrepCase=Pre|PronType=Prs": {"morph": "Person=3|PrepCase=Pre|PronType=Prs", "pos": "PRON"}, + "PRON__Person=3|PronType=Prs": {"morph": "Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__PronType=Ind": {"morph": "PronType=Ind", "pos": "PRON"}, + "PRON__PronType=Int": {"morph": "PronType=Int", "pos": "PRON"}, + "PRON__PronType=Rel": {"morph": "PronType=Rel", "pos": "PRON"}, + "PROPN___": {"morph": "_", "pos": "PROPN"}, + "PUNCT___": {"morph": "_", "pos": "PUNCT"}, + "PUNCT__PunctSide=Fin|PunctType=Brck": {"morph": "PunctSide=Fin|PunctType=Brck", "pos": "PUNCT"}, + "PUNCT__PunctSide=Fin|PunctType=Excl": {"morph": "PunctSide=Fin|PunctType=Excl", "pos": "PUNCT"}, + "PUNCT__PunctSide=Fin|PunctType=Qest": {"morph": "PunctSide=Fin|PunctType=Qest", "pos": "PUNCT"}, + "PUNCT__PunctSide=Ini|PunctType=Brck": {"morph": "PunctSide=Ini|PunctType=Brck", "pos": "PUNCT"}, + "PUNCT__PunctSide=Ini|PunctType=Excl": {"morph": "PunctSide=Ini|PunctType=Excl", "pos": "PUNCT"}, + "PUNCT__PunctSide=Ini|PunctType=Qest": {"morph": "PunctSide=Ini|PunctType=Qest", "pos": "PUNCT"}, + "PUNCT__PunctType=Colo": {"morph": "PunctType=Colo", "pos": "PUNCT"}, + "PUNCT__PunctType=Comm": {"morph": "PunctType=Comm", "pos": "PUNCT"}, + "PUNCT__PunctType=Dash": {"morph": "PunctType=Dash", "pos": "PUNCT"}, + "PUNCT__PunctType=Peri": {"morph": "PunctType=Peri", "pos": "PUNCT"}, + "PUNCT__PunctType=Quot": {"morph": "PunctType=Quot", "pos": "PUNCT"}, + "PUNCT__PunctType=Semi": {"morph": "PunctType=Semi", "pos": "PUNCT"}, + "SCONJ___": {"morph": "_", "pos": "SCONJ"}, + "SYM___": {"morph": "_", "pos": "SYM"}, + "SYM__NumForm=Digit": {"morph": "NumForm=Digit", "pos": "SYM"}, + "SYM__NumForm=Digit|NumType=Frac": {"morph": "NumForm=Digit|NumType=Frac", "pos": "SYM"}, + "VERB__Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part": {"morph": "Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part", "pos": "VERB"}, + "VERB__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": {"morph": "Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part", "pos": "VERB"}, + "VERB__Gender=Masc|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Gender=Masc|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part": {"morph": "Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part", "pos": "VERB"}, + "VERB__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": {"morph": "Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part", "pos": "VERB"}, + "VERB__Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Cnd|Number=Plur|Person=3|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Plur|Person=3|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Cnd|Number=Sing|Person=1|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=1|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Cnd|Number=Sing|Person=2|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=2|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Imp|Number=Plur|Person=1|VerbForm=Fin": {"morph": "Mood=Imp|Number=Plur|Person=1|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Imp|Number=Plur|Person=2|VerbForm=Fin": {"morph": "Mood=Imp|Number=Plur|Person=2|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Imp|Number=Plur|Person=3|VerbForm=Fin": {"morph": "Mood=Imp|Number=Plur|Person=3|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": {"morph": "Mood=Imp|Number=Sing|Person=2|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Imp|Number=Sing|Person=3|VerbForm=Fin": {"morph": "Mood=Imp|Number=Sing|Person=3|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Person=3|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__VerbForm=Ger": {"morph": "VerbForm=Ger", "pos": "VERB"}, + "VERB__VerbForm=Inf": {"morph": "VerbForm=Inf", "pos": "VERB"}, + "X___": {"morph": "_", "pos": "X"}, +} diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index b2627f96f..37d3180d0 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -49,6 +49,10 @@ def en_vocab(): def en_parser(): return English.Defaults.create_parser() +@pytest.fixture +def es_tokenizer(): + return Spanish.Defaults.create_tokenizer() + @pytest.fixture def de_tokenizer(): @@ -79,8 +83,8 @@ def sv_tokenizer(): def bn_tokenizer(): return Bengali.Defaults.create_tokenizer() - -@pytest.fixture + +@pytest.fixture def he_tokenizer(): return Hebrew.Defaults.create_tokenizer() diff --git a/spacy/tests/es/__init__.py b/spacy/tests/es/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/spacy/tests/es/test_exception.py b/spacy/tests/es/test_exception.py new file mode 100644 index 000000000..2303e6095 --- /dev/null +++ b/spacy/tests/es/test_exception.py @@ -0,0 +1,24 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +import pytest + + +@pytest.mark.parametrize('text,lemma', [("aprox.", "aproximadamente"), + ("esq.", "esquina"), + ("pág.", "página"), + ("p.ej.", "por ejemplo") + ]) +def test_tokenizer_handles_abbr(es_tokenizer, text, lemma): + tokens = es_tokenizer(text) + assert len(tokens) == 1 + assert tokens[0].lemma_ == lemma + + +def test_tokenizer_handles_exc_in_text(es_tokenizer): + text = "Mariano Rajoy ha corrido aprox. medio kilómetro" + tokens = es_tokenizer(text) + assert len(tokens) == 7 + assert tokens[4].text == "aprox." + assert tokens[4].lemma_ == "aproximadamente" diff --git a/spacy/tests/es/test_text.py b/spacy/tests/es/test_text.py new file mode 100644 index 000000000..7081ea12d --- /dev/null +++ b/spacy/tests/es/test_text.py @@ -0,0 +1,35 @@ +# coding: utf-8 + +"""Test that longer and mixed texts are tokenized correctly.""" + + +from __future__ import unicode_literals + +import pytest + + +def test_tokenizer_handles_long_text(es_tokenizer): + text = """Cuando a José Mujica lo invitaron a dar una conferencia + +en Oxford este verano, su cabeza hizo "crac". La "más antigua" universidad de habla + +inglesa, esa que cobra decenas de miles de euros de matrícula a sus alumnos + +y en cuyos salones han disertado desde Margaret Thatcher hasta Stephen Hawking, + +reclamaba los servicios de este viejo de 81 años, formado en un colegio público + +en Montevideo y que pregona las bondades de la vida austera.""" + tokens = es_tokenizer(text) + assert len(tokens) == 90 + + +@pytest.mark.parametrize('text,length', [ + ("¿Por qué José Mujica?", 6), + ("“¿Oh no?”", 6), + ("""¡Sí! "Vámonos", contestó José Arcadio Buendía""", 11), + ("Corrieron aprox. 10km.", 5), + ("Y entonces por qué...", 5)]) +def test_tokenizer_handles_cnts(es_tokenizer, text, length): + tokens = es_tokenizer(text) + assert len(tokens) == length