From c693d40791726bab752ca1f43b20cfda82b9b13c Mon Sep 17 00:00:00 2001 From: oeg Date: Thu, 6 Apr 2017 18:48:45 +0200 Subject: [PATCH 1/2] feature(model): Add support for creating the Spanish model, including rich tagset, configuration, and basich tests --- spacy/es/__init__.py | 1 + spacy/es/language_data.py | 5 +- spacy/es/tag_map.py | 1045 +++++++++--------------------- spacy/tests/conftest.py | 8 +- spacy/tests/es/__init__.py | 0 spacy/tests/es/test_exception.py | 24 + spacy/tests/es/test_text.py | 35 + 7 files changed, 376 insertions(+), 742 deletions(-) create mode 100644 spacy/tests/es/__init__.py create mode 100644 spacy/tests/es/test_exception.py create mode 100644 spacy/tests/es/test_text.py diff --git a/spacy/es/__init__.py b/spacy/es/__init__.py index b925e648a..97cca637a 100644 --- a/spacy/es/__init__.py +++ b/spacy/es/__init__.py @@ -17,4 +17,5 @@ class Spanish(Language): lex_attr_getters[LANG] = lambda text: 'es' tokenizer_exceptions = TOKENIZER_EXCEPTIONS + tag_map = TAG_MAP stop_words = STOP_WORDS diff --git a/spacy/es/language_data.py b/spacy/es/language_data.py index 3b6ae5023..1758efefa 100644 --- a/spacy/es/language_data.py +++ b/spacy/es/language_data.py @@ -5,6 +5,7 @@ from .. import language_data as base from ..language_data import update_exc, strings_to_exc from ..symbols import ORTH, LEMMA +from .tag_map import TAG_MAP from .stop_words import STOP_WORDS from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, ORTH_ONLY @@ -39,7 +40,7 @@ def get_time_exc(hours): ] return exc - +TAG_MAP = dict(TAG_MAP) STOP_WORDS = set(STOP_WORDS) @@ -51,4 +52,4 @@ update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.EMOTICONS)) update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.ABBREVIATIONS)) -__all__ = ["TOKENIZER_EXCEPTIONS", "STOP_WORDS"] +__all__ = ["TOKENIZER_EXCEPTIONS", "TAG_MAP", "STOP_WORDS"] diff --git a/spacy/es/tag_map.py b/spacy/es/tag_map.py index 6054d147d..bdeb7250f 100644 --- a/spacy/es/tag_map.py +++ b/spacy/es/tag_map.py @@ -1,738 +1,307 @@ -{ - "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": { - "freq": 865, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin", - "pos": "AUX" - }, - "PUNCT__PunctSide=Fin|PunctType=Brck": { - "freq": 1476, - "morph": "PunctSide=Fin|PunctType=Brck", - "pos": "PUNCT" - }, - "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": { - "freq": 7033, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin", - "pos": "VERB" - }, - "PRON__Number=Sing|Person=2|PronType=Prs": { - "freq": 132, - "morph": "Number=Sing|Person=2|PronType=Prs", - "pos": "PRON" - }, - "PRON": { - "pos": "PRON" - }, - "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": { - "freq": 525, - "morph": "Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", - "pos": "VERB" - }, - "SYM__NumForm=Digit|NumType=Frac": { - "freq": 236, - "morph": "NumForm=Digit|NumType=Frac", - "pos": "SYM" - }, - "ADJ___": { - "freq": 515, - "morph": "_", - "pos": "ADJ" - }, - "PRON__Person=3": { - "freq": 3185, - "morph": "Person=3", - "pos": "PRON" - }, - "PRON__Case=Acc|Gender=Masc|Number=Plur|Person=3|PronType=Prs": { - "freq": 104, - "morph": "Case=Acc|Gender=Masc|Number=Plur|Person=3|PronType=Prs", - "pos": "PRON" - }, - "DET__Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": { - "freq": 148, - "morph": "Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", - "pos": "DET" - }, - "CONJ": { - "pos": "CONJ" - }, - "PUNCT__PunctType=Comm": { - "freq": 24475, - "morph": "PunctType=Comm", - "pos": "PUNCT" - }, - "ADV": { - "pos": "ADV" - }, - "ADV__AdpType=Prep": { - "freq": 161, - "morph": "AdpType=Prep", - "pos": "ADV" - }, - "ADJ__Number=Plur": { - "freq": 2617, - "morph": "Number=Plur", - "pos": "ADJ" - }, - "AUX__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 149, - "morph": "Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "AUX" - }, - "ADJ__Gender=Masc|Number=Sing|NumType=Ord": { - "freq": 654, - "morph": "Gender=Masc|Number=Sing|NumType=Ord", - "pos": "ADJ" - }, - "AUX__Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin": { - "freq": 272, - "morph": "Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin", - "pos": "AUX" - }, - "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": { - "freq": 388, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin", - "pos": "AUX" - }, - "ADJ__Gender=Masc|Number=Plur": { - "freq": 1995, - "morph": "Gender=Masc|Number=Plur", - "pos": "ADJ" - }, - "DET": { - "pos": "DET" - }, - "VERB__VerbForm=Inf": { - "freq": 8204, - "morph": "VerbForm=Inf", - "pos": "VERB" - }, - "DET__Definite=Def|Gender=Fem|Number=Plur|PronType=Art": { - "freq": 4275, - "morph": "Definite=Def|Gender=Fem|Number=Plur|PronType=Art", - "pos": "DET" - }, - "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 495, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "VERB" - }, - "DET__Definite=Def|Gender=Masc|Number=Plur|PronType=Art": { - "freq": 6951, - "morph": "Definite=Def|Gender=Masc|Number=Plur|PronType=Art", - "pos": "DET" - }, - "PRON___": { - "freq": 1871, - "morph": "_", - "pos": "PRON" - }, - "DET__Definite=Ind|Gender=Masc|Number=Plur|PronType=Art": { - "freq": 113, - "morph": "Definite=Ind|Gender=Masc|Number=Plur|PronType=Art", - "pos": "DET" - }, - "NOUN__Number=Sing": { - "freq": 1977, - "morph": "Number=Sing", - "pos": "NOUN" - }, - "ADJ__Gender=Fem|Number=Sing|NumType=Ord": { - "freq": 568, - "morph": "Gender=Fem|Number=Sing|NumType=Ord", - "pos": "ADJ" - }, - "NOUN__Gender=Masc|Number=Sing": { - "freq": 25557, - "morph": "Gender=Masc|Number=Sing", - "pos": "NOUN" - }, - "PART": { - "pos": "PART" - }, - "ADJ__Number=Sing": { - "freq": 6619, - "morph": "Number=Sing", - "pos": "ADJ" - }, - "NUM": { - "pos": "NUM" - }, - "DET__Number=Sing|PronType=Ind": { - "freq": 309, - "morph": "Number=Sing|PronType=Ind", - "pos": "DET" - }, - "ADJ__Gender=Fem|Number=Sing|VerbForm=Part": { - "freq": 1387, - "morph": "Gender=Fem|Number=Sing|VerbForm=Part", - "pos": "ADJ" - }, - "VERB__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 272, - "morph": "Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "VERB" - }, - "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": { - "freq": 1574, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin", - "pos": "VERB" - }, - "PRON__Gender=Masc|Number=Sing|PronType=Dem": { - "freq": 115, - "morph": "Gender=Masc|Number=Sing|PronType=Dem", - "pos": "PRON" - }, - "ADP": { - "pos": "ADP" - }, - "NOUN__AdvType=Tim": { - "freq": 1504, - "morph": "AdvType=Tim", - "pos": "NOUN" - }, - "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": { - "freq": 130, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin", - "pos": "AUX" - }, - "PRON__Case=Nom|Number=Sing|Person=1|PronType=Prs": { - "freq": 115, - "morph": "Case=Nom|Number=Sing|Person=1|PronType=Prs", - "pos": "PRON" - }, - "PUNCT__PunctType=Semi": { - "freq": 259, - "morph": "PunctType=Semi", - "pos": "PUNCT" - }, - "PUNCT__PunctSide=Ini|PunctType=Qest": { - "freq": 206, - "morph": "PunctSide=Ini|PunctType=Qest", - "pos": "PUNCT" - }, - "PRON__Case=Dat|Number=Sing|Person=3|PronType=Prs": { - "freq": 754, - "morph": "Case=Dat|Number=Sing|Person=3|PronType=Prs", - "pos": "PRON" - }, - "PRON__Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs": { - "freq": 624, - "morph": "Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs", - "pos": "PRON" - }, - "NUM__NumForm=Digit": { - "freq": 2979, - "morph": "NumForm=Digit", - "pos": "NUM" - }, - "PUNCT__PunctType=Colo": { - "freq": 638, - "morph": "PunctType=Colo", - "pos": "PUNCT" - }, - "PROPN": { - "pos": "PROPN" - }, - "X": { - "pos": "X" - }, - "NOUN__NumForm=Digit": { - "freq": 555, - "morph": "NumForm=Digit", - "pos": "NOUN" - }, - "VERB__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": { - "freq": 3297, - "morph": "Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part", - "pos": "VERB" - }, - "ADJ__Gender=Masc|Number=Plur|NumType=Ord": { - "freq": 227, - "morph": "Gender=Masc|Number=Plur|NumType=Ord", - "pos": "ADJ" - }, - "PRON__Gender=Masc|Number=Sing|Person=3|PronType=Prs": { - "freq": 205, - "morph": "Gender=Masc|Number=Sing|Person=3|PronType=Prs", - "pos": "PRON" - }, - "NOUN__Number=Plur": { - "freq": 1463, - "morph": "Number=Plur", - "pos": "NOUN" - }, - "DET__Number=Sing|Person=3|Poss=Yes|PronType=Prs": { - "freq": 2909, - "morph": "Number=Sing|Person=3|Poss=Yes|PronType=Prs", - "pos": "DET" - }, - "VERB__VerbForm=Ger": { - "freq": 994, - "morph": "VerbForm=Ger", - "pos": "VERB" - }, - "INTJ": { - "pos": "INTJ" - }, - "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": { - "freq": 398, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin", - "pos": "VERB" - }, - "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 1403, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "AUX" - }, - "PRON__Number=Plur|Person=1|PronType=Prs": { - "freq": 264, - "morph": "Number=Plur|Person=1|PronType=Prs", - "pos": "PRON" - }, - "ADV__Negative=Neg": { - "freq": 2960, - "morph": "Negative=Neg", - "pos": "ADV" - }, - "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 2488, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "VERB" - }, - "DET__Gender=Masc|Number=Sing|PronType=Ind": { - "freq": 855, - "morph": "Gender=Masc|Number=Sing|PronType=Ind", - "pos": "DET" - }, - "VERB__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 408, - "morph": "Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "VERB" - }, - "PRON__Gender=Fem|Number=Sing|PronType=Ind": { - "freq": 237, - "morph": "Gender=Fem|Number=Sing|PronType=Ind", - "pos": "PRON" - }, - "DET__Gender=Fem|Number=Plur|PronType=Ind": { - "freq": 592, - "morph": "Gender=Fem|Number=Plur|PronType=Ind", - "pos": "DET" - }, - "ADJ__Gender=Fem|Number=Plur|VerbForm=Part": { - "freq": 614, - "morph": "Gender=Fem|Number=Plur|VerbForm=Part", - "pos": "ADJ" - }, - "DET__Gender=Fem|Number=Sing|PronType=Dem": { - "freq": 808, - "morph": "Gender=Fem|Number=Sing|PronType=Dem", - "pos": "DET" - }, - "DET__Gender=Fem|Number=Sing|PronType=Ind": { - "freq": 613, - "morph": "Gender=Fem|Number=Sing|PronType=Ind", - "pos": "DET" - }, - "DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Art": { - "freq": 4277, - "morph": "Definite=Ind|Gender=Masc|Number=Sing|PronType=Art", - "pos": "DET" - }, - "VERB__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 788, - "morph": "Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "VERB" - }, - "NOUN__Gender=Fem": { - "freq": 145, - "morph": "Gender=Fem", - "pos": "NOUN" - }, - "PRON__Gender=Fem|Number=Plur|PronType=Ind": { - "freq": 127, - "morph": "Gender=Fem|Number=Plur|PronType=Ind", - "pos": "PRON" - }, - "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 729, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "AUX" - }, - "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 1223, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "VERB" - }, - "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": { - "freq": 164, - "morph": "Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", - "pos": "AUX" - }, - "PRON__PronType=Rel": { - "freq": 7301, - "morph": "PronType=Rel", - "pos": "PRON" - }, - "DET__Definite=Def|Number=Sing|PronType=Art": { - "freq": 928, - "morph": "Definite=Def|Number=Sing|PronType=Art", - "pos": "DET" - }, - "ADV___": { - "freq": 11334, - "morph": "_", - "pos": "ADV" - }, - "ADJ": { - "pos": "ADJ" - }, - "AUX__VerbForm=Ger": { - "freq": 154, - "morph": "VerbForm=Ger", - "pos": "AUX" - }, - "PRON__Number=Sing|PronType=Int": { - "freq": 201, - "morph": "Number=Sing|PronType=Int", - "pos": "PRON" - }, - "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": { - "freq": 1236, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin", - "pos": "VERB" - }, - "NOUN__Gender=Masc|Number=Plur": { - "freq": 12310, - "morph": "Gender=Masc|Number=Plur", - "pos": "NOUN" - }, - "NOUN__Gender=Fem|Number=Plur": { - "freq": 8612, - "morph": "Gender=Fem|Number=Plur", - "pos": "NOUN" - }, - "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 6343, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "VERB" - }, - "PRON__Gender=Masc|Number=Plur|PronType=Ind": { - "freq": 460, - "morph": "Gender=Masc|Number=Plur|PronType=Ind", - "pos": "PRON" - }, - "VERB__Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 100, - "morph": "Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "VERB" - }, - "PUNCT__PunctSide=Ini|PunctType=Brck": { - "freq": 1482, - "morph": "PunctSide=Ini|PunctType=Brck", - "pos": "PUNCT" - }, - "PRON__Gender=Masc|Number=Sing|PronType=Tot": { - "freq": 111, - "morph": "Gender=Masc|Number=Sing|PronType=Tot", - "pos": "PRON" - }, - "SCONJ": { - "pos": "SCONJ" - }, - "AUX__VerbForm=Inf": { - "freq": 1495, - "morph": "VerbForm=Inf", - "pos": "AUX" - }, - "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 5227, - "morph": "Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "AUX" - }, - "ADJ__AdpType=Prep": { - "freq": 124, - "morph": "AdpType=Prep", - "pos": "ADJ" - }, - "PRON__Gender=Masc|Number=Sing|PronType=Ind": { - "freq": 624, - "morph": "Gender=Masc|Number=Sing|PronType=Ind", - "pos": "PRON" - }, - "DET__Gender=Masc|Number=Plur|PronType=Dem": { - "freq": 269, - "morph": "Gender=Masc|Number=Plur|PronType=Dem", - "pos": "DET" - }, - "ADJ__Gender=Fem|Number=Plur": { - "freq": 1612, - "morph": "Gender=Fem|Number=Plur", - "pos": "ADJ" - }, - "NUM__Gender=Masc|Number=Plur|NumType=Card": { - "freq": 104, - "morph": "Gender=Masc|Number=Plur|NumType=Card", - "pos": "NUM" - }, - "NUM__NumType=Card": { - "freq": 533, - "morph": "NumType=Card", - "pos": "NUM" - }, - "SCONJ___": { - "freq": 10129, - "morph": "_", - "pos": "SCONJ" - }, - "PRON__Number=Sing|PronType=Rel": { - "freq": 318, - "morph": "Number=Sing|PronType=Rel", - "pos": "PRON" - }, - "VERB__Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin": { - "freq": 253, - "morph": "Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin", - "pos": "VERB" - }, - "NOUN": { - "pos": "NOUN" - }, - "NOUN__Gender=Masc": { - "freq": 153, - "morph": "Gender=Masc", - "pos": "NOUN" - }, - "DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Art": { - "freq": 3087, - "morph": "Definite=Ind|Gender=Fem|Number=Sing|PronType=Art", - "pos": "DET" - }, - "ADJ__Gender=Masc|Number=Plur|VerbForm=Part": { - "freq": 997, - "morph": "Gender=Masc|Number=Plur|VerbForm=Part", - "pos": "ADJ" - }, - "PRON__Number=Sing|PronType=Dem": { - "freq": 302, - "morph": "Number=Sing|PronType=Dem", - "pos": "PRON" - }, - "PRON__Number=Sing|Person=3|PronType=Prs": { - "freq": 116, - "morph": "Number=Sing|Person=3|PronType=Prs", - "pos": "PRON" - }, - "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs": { - "freq": 173, - "morph": "Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs", - "pos": "PRON" - }, - "PUNCT": { - "pos": "PUNCT" - }, - "DET__Gender=Masc|Number=Sing|PronType=Dem": { - "freq": 962, - "morph": "Gender=Masc|Number=Sing|PronType=Dem", - "pos": "DET" - }, - "PRON__Number=Plur|PronType=Rel": { - "freq": 102, - "morph": "Number=Plur|PronType=Rel", - "pos": "PRON" - }, - "ADJ__Gender=Masc|Number=Sing": { - "freq": 5136, - "morph": "Gender=Masc|Number=Sing", - "pos": "ADJ" - }, - "DET__Definite=Def|Gender=Masc|Number=Sing|PronType=Art": { - "freq": 22962, - "morph": "Definite=Def|Gender=Masc|Number=Sing|PronType=Art", - "pos": "DET" - }, - "AUX__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 107, - "morph": "Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "AUX" - }, - "PRON__Case=Dat|Number=Plur|Person=3|PronType=Prs": { - "freq": 220, - "morph": "Case=Dat|Number=Plur|Person=3|PronType=Prs", - "pos": "PRON" - }, - "VERB__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": { - "freq": 206, - "morph": "Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part", - "pos": "VERB" - }, - "DET__Number=Plur|Person=3|Poss=Yes|PronType=Prs": { - "freq": 1021, - "morph": "Number=Plur|Person=3|Poss=Yes|PronType=Prs", - "pos": "DET" - }, - "ADJ__Gender=Fem|Number=Plur|NumType=Ord": { - "freq": 101, - "morph": "Gender=Fem|Number=Plur|NumType=Ord", - "pos": "ADJ" - }, - "PRON__PronType=Int": { - "freq": 137, - "morph": "PronType=Int", - "pos": "PRON" - }, - "ADP__AdpType=Prep": { - "freq": 71133, - "morph": "AdpType=Prep", - "pos": "ADP" - }, - "DET__Gender=Masc|Number=Plur|PronType=Ind": { - "freq": 904, - "morph": "Gender=Masc|Number=Plur|PronType=Ind", - "pos": "DET" - }, - "AUX__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": { - "freq": 299, - "morph": "Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", - "pos": "AUX" - }, - "DET__Gender=Fem|Number=Plur|PronType=Dem": { - "freq": 188, - "morph": "Gender=Fem|Number=Plur|PronType=Dem", - "pos": "DET" - }, - "NUM__NumForm=Digit|NumType=Card": { - "freq": 1108, - "morph": "NumForm=Digit|NumType=Card", - "pos": "NUM" - }, - "PUNCT__PunctType=Quot": { - "freq": 7380, - "morph": "PunctType=Quot", - "pos": "PUNCT" - }, - "VERB__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part": { - "freq": 184, - "morph": "Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part", - "pos": "VERB" - }, - "PUNCT__PunctType=Dash": { - "freq": 2345, - "morph": "PunctType=Dash", - "pos": "PUNCT" - }, - "ADJ__Gender=Fem|Number=Sing": { - "freq": 3935, - "morph": "Gender=Fem|Number=Sing", - "pos": "ADJ" - }, - "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": { - "freq": 215, - "morph": "Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", - "pos": "AUX" - }, - "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": { - "freq": 218, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin", - "pos": "AUX" - }, - "PROPN___": { - "freq": 34454, - "morph": "_", - "pos": "PROPN" - }, - "PRON__Number=Sing|PronType=Ind": { - "freq": 421, - "morph": "Number=Sing|PronType=Ind", - "pos": "PRON" - }, - "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": { - "freq": 359, - "morph": "Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", - "pos": "VERB" - }, - "PUNCT__PunctSide=Fin|PunctType=Qest": { - "freq": 312, - "morph": "PunctSide=Fin|PunctType=Qest", - "pos": "PUNCT" - }, - "PRON__Number=Sing|Person=1|PronType=Prs": { - "freq": 298, - "morph": "Number=Sing|Person=1|PronType=Prs", - "pos": "PRON" - }, - "PART__Negative=Neg": { - "freq": 122, - "morph": "Negative=Neg", - "pos": "PART" - }, - "PRON__Gender=Masc|Number=Plur|Person=3|PronType=Prs": { - "freq": 176, - "morph": "Gender=Masc|Number=Plur|Person=3|PronType=Prs", - "pos": "PRON" - }, - "NOUN__Gender=Fem|Number=Sing": { - "freq": 24416, - "morph": "Gender=Fem|Number=Sing", - "pos": "NOUN" - }, - "ADJ__Gender=Masc|Number=Sing|VerbForm=Part": { - "freq": 2297, - "morph": "Gender=Masc|Number=Sing|VerbForm=Part", - "pos": "ADJ" - }, - "CONJ___": { - "freq": 12225, - "morph": "_", - "pos": "CONJ" - }, - "NUM__Number=Plur|NumType=Card": { - "freq": 2057, - "morph": "Number=Plur|NumType=Card", - "pos": "NUM" - }, - "NOUN___": { - "freq": 4829, - "morph": "_", - "pos": "NOUN" - }, - "VERB": { - "pos": "VERB" - }, - "DET__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": { - "freq": 16487, - "morph": "Definite=Def|Gender=Fem|Number=Sing|PronType=Art", - "pos": "DET" - }, - "SYM": { - "pos": "SYM" - }, - "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin": { - "freq": 130, - "morph": "Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin", - "pos": "VERB" - }, - "AUX": { - "pos": "AUX" - }, - "AUX__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": { - "freq": 494, - "morph": "Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part", - "pos": "AUX" - }, - "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": { - "freq": 199, - "morph": "Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", - "pos": "AUX" - }, - "VERB__Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": { - "freq": 100, - "morph": "Mood=Imp|Number=Sing|Person=2|VerbForm=Fin", - "pos": "VERB" - }, - "PUNCT__PunctType=Peri": { - "freq": 14170, - "morph": "PunctType=Peri", - "pos": "PUNCT" - } -} \ No newline at end of file +# coding: utf8 +from __future__ import unicode_literals + +from ..symbols import * + + +TAG_MAP = { + "ADJ___": {"morph": "_", "pos": "ADJ"}, + "ADJ__AdpType=Prep": {"morph": "AdpType=Prep", "pos": "ADJ"}, + "ADJ__AdpType=Preppron|Gender=Masc|Number=Sing": {"morph": "AdpType=Preppron|Gender=Masc|Number=Sing", "pos": "ADV"}, + "ADJ__AdvType=Tim": {"morph": "AdvType=Tim", "pos": "ADJ"}, + "ADJ__Gender=Fem|Number=Plur": {"morph": "Gender=Fem|Number=Plur", "pos": "ADJ"}, + "ADJ__Gender=Fem|Number=Plur|NumType=Ord": {"morph": "Gender=Fem|Number=Plur|NumType=Ord", "pos": "ADJ"}, + "ADJ__Gender=Fem|Number=Plur|VerbForm=Part": {"morph": "Gender=Fem|Number=Plur|VerbForm=Part", "pos": "ADJ"}, + "ADJ__Gender=Fem|Number=Sing": {"morph": "Gender=Fem|Number=Sing", "pos": "ADJ"}, + "ADJ__Gender=Fem|Number=Sing|NumType=Ord": {"morph": "Gender=Fem|Number=Sing|NumType=Ord", "pos": "ADJ"}, + "ADJ__Gender=Fem|Number=Sing|VerbForm=Part": {"morph": "Gender=Fem|Number=Sing|VerbForm=Part", "pos": "ADJ"}, + "ADJ__Gender=Masc": {"morph": "Gender=Masc", "pos": "ADJ"}, + "ADJ__Gender=Masc|Number=Plur": {"morph": "Gender=Masc|Number=Plur", "pos": "ADJ"}, + "ADJ__Gender=Masc|Number=Plur|NumType=Ord": {"morph": "Gender=Masc|Number=Plur|NumType=Ord", "pos": "ADJ"}, + "ADJ__Gender=Masc|Number=Plur|VerbForm=Part": {"morph": "Gender=Masc|Number=Plur|VerbForm=Part", "pos": "ADJ"}, + "ADJ__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", "pos": "ADJ"}, + "ADJ__Gender=Masc|Number=Sing|NumType=Ord": {"morph": "Gender=Masc|Number=Sing|NumType=Ord", "pos": "ADJ"}, + "ADJ__Gender=Masc|Number=Sing|VerbForm=Part": {"morph": "Gender=Masc|Number=Sing|VerbForm=Part", "pos": "ADJ"}, + "ADJ__Number=Plur": {"morph": "Number=Plur", "pos": "ADJ"}, + "ADJ__Number=Sing": {"morph": "Number=Sing", "pos": "ADJ"}, + "ADP__AdpType=Prep": {"morph": "AdpType=Prep", "pos": "ADP"}, + "ADP__AdpType=Preppron|Gender=Fem|Number=Sing": {"morph": "AdpType=Preppron|Gender=Fem|Number=Sing", "pos": "ADP"}, + "ADP__AdpType=Preppron|Gender=Masc|Number=Plur": {"morph": "AdpType=Preppron|Gender=Masc|Number=Plur", "pos": "ADP"}, + "ADP__AdpType=Preppron|Gender=Masc|Number=Sing": {"morph": "AdpType=Preppron|Gender=Masc|Number=Sing", "pos": "ADP"}, + "ADP": { "pos": "ADP"}, + "ADV___": {"morph": "_", "pos": "ADV"}, + "ADV__AdpType=Prep": {"morph": "AdpType=Prep", "pos": "ADV"}, + "ADV__AdpType=Preppron|Gender=Masc|Number=Sing": {"morph": "AdpType=Preppron|Gender=Masc|Number=Sing", "pos": "ADV"}, + "ADV__AdvType=Tim": {"morph": "AdvType=Tim", "pos": "ADV"}, + "ADV__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", "pos": "ADV"}, + "ADV__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", "pos": "ADV"}, + "ADV__Negative=Neg": {"morph": "Negative=Neg", "pos": "ADV"}, + "ADV__Number=Plur": {"morph": "Number=Plur", "pos": "ADV"}, + "ADV__Polarity=Neg": {"morph": "Polarity=Neg", "pos": "ADV"}, + "AUX__Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part": {"morph": "Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part", "pos": "AUX"}, + "AUX__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": {"morph": "Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part", "pos": "AUX"}, + "AUX__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part": {"morph": "Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part", "pos": "AUX"}, + "AUX__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": {"morph": "Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part", "pos": "AUX"}, + "AUX__Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Cnd|Number=Plur|Person=3|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Plur|Person=3|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Cnd|Number=Sing|Person=1|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=1|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Cnd|Number=Sing|Person=2|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=2|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Imp|Number=Plur|Person=3|VerbForm=Fin": {"morph": "Mood=Imp|Number=Plur|Person=3|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": {"morph": "Mood=Imp|Number=Sing|Person=2|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Imp|Number=Sing|Person=3|VerbForm=Fin": {"morph": "Mood=Imp|Number=Sing|Person=3|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", "pos": "AUX"}, + "AUX__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", "pos": "AUX"}, + "AUX__VerbForm=Ger": {"morph": "VerbForm=Ger", "pos": "AUX"}, + "AUX__VerbForm=Inf": {"morph": "VerbForm=Inf", "pos": "AUX"}, + "CCONJ___": {"morph": "_", "pos": "CONJ"}, + "CONJ___": {"morph": "_", "pos": "CONJ"}, + "DET__Definite=Def|Gender=Fem|Number=Plur|PronType=Art": {"morph": "Definite=Def|Gender=Fem|Number=Plur|PronType=Art", "pos": "DET"}, + "DET__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"morph": "Definite=Def|Gender=Fem|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Definite=Def|Gender=Masc|Number=Plur|PronType=Art": {"morph": "Definite=Def|Gender=Masc|Number=Plur|PronType=Art", "pos": "DET"}, + "DET__Definite=Def|Gender=Masc|Number=Sing|PronType=Art": {"morph": "Definite=Def|Gender=Masc|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Definite=Def|Gender=Masc|PronType=Art": {"morph": "Definite=Def|Gender=Masc|PronType=Art", "pos": "DET"}, + "DET__Definite=Def|Number=Sing|PronType=Art": {"morph": "Definite=Def|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Definite=Ind|Gender=Fem|Number=Plur|PronType=Art": {"morph": "Definite=Ind|Gender=Fem|Number=Plur|PronType=Art", "pos": "DET"}, + "DET__Definite=Ind|Gender=Fem|Number=Sing|NumType=Card|PronType=Art": {"morph": "Definite=Ind|Gender=Fem|Number=Sing|NumType=Card|PronType=Art", "pos": "DET"}, + "DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Art": {"morph": "Definite=Ind|Gender=Fem|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Definite=Ind|Gender=Masc|Number=Plur|PronType=Art": {"morph": "Definite=Ind|Gender=Masc|Number=Plur|PronType=Art", "pos": "DET"}, + "DET__Definite=Ind|Gender=Masc|Number=Sing|NumType=Card|PronType=Art": {"morph": "Definite=Ind|Gender=Masc|Number=Sing|NumType=Card|PronType=Art", "pos": "DET"}, + "DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Art": {"morph": "Definite=Ind|Gender=Masc|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Plur|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Person=3|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Plur|PronType=Art": {"morph": "Gender=Fem|Number=Plur|PronType=Art", "pos": "DET"}, + "DET__Gender=Fem|Number=Plur|PronType=Dem": {"morph": "Gender=Fem|Number=Plur|PronType=Dem", "pos": "DET"}, + "DET__Gender=Fem|Number=Plur|PronType=Ind": {"morph": "Gender=Fem|Number=Plur|PronType=Ind", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|PronType=Art": {"morph": "Gender=Fem|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|PronType=Dem": {"morph": "Gender=Fem|Number=Sing|PronType=Dem", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|PronType=Ind": {"morph": "Gender=Fem|Number=Sing|PronType=Ind", "pos": "DET"}, + "DET__Gender=Fem|Number=Sing|PronType=Int": {"morph": "Gender=Fem|Number=Sing|PronType=Int", "pos": "DET"}, + "DET__Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Masc|Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Person=3|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Masc|Number=Plur|PronType=Art": {"morph": "Gender=Masc|Number=Plur|PronType=Art", "pos": "DET"}, + "DET__Gender=Masc|Number=Plur|PronType=Dem": {"morph": "Gender=Masc|Number=Plur|PronType=Dem", "pos": "DET"}, + "DET__Gender=Masc|Number=Plur|PronType=Ind": {"morph": "Gender=Masc|Number=Plur|PronType=Ind", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|PronType=Art": {"morph": "Gender=Masc|Number=Sing|PronType=Art", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|PronType=Dem": {"morph": "Gender=Masc|Number=Sing|PronType=Dem", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|PronType=Ind": {"morph": "Gender=Masc|Number=Sing|PronType=Ind", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|PronType=Int": {"morph": "Gender=Masc|Number=Sing|PronType=Int", "pos": "DET"}, + "DET__Gender=Masc|Number=Sing|PronType=Tot": {"morph": "Gender=Masc|Number=Sing|PronType=Tot", "pos": "DET"}, + "DET__Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {"morph": "Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Number=Plur|Person=3|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Number=Plur|PronType=Dem": {"morph": "Number=Plur|PronType=Dem", "pos": "DET"}, + "DET__Number=Plur|PronType=Ind": {"morph": "Number=Plur|PronType=Ind", "pos": "DET"}, + "DET__Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {"morph": "Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Number=Sing|Person=3|Poss=Yes|PronType=Prs", "pos": "DET"}, + "DET__Number=Sing|PronType=Dem": {"morph": "Number=Sing|PronType=Dem", "pos": "DET"}, + "DET__Number=Sing|PronType=Ind": {"morph": "Number=Sing|PronType=Ind", "pos": "DET"}, + "DET__PronType=Int": {"morph": "PronType=Int", "pos": "DET"}, + "DET__PronType=Rel": {"morph": "PronType=Rel", "pos": "DET"}, + "DET": { "pos": "DET"}, + "INTJ___": {"morph": "_", "pos": "INTJ"}, + "NOUN___": {"morph": "_", "pos": "NOUN"}, + "NOUN__AdvType=Tim": {"morph": "AdvType=Tim", "pos": "NOUN"}, + "NOUN__AdvType=Tim|Gender=Masc|Number=Sing": {"morph": "AdvType=Tim|Gender=Masc|Number=Sing", "pos": "NOUN"}, + "NOUN__Gender=Fem": {"morph": "Gender=Fem", "pos": "NOUN"}, + "NOUN__Gender=Fem|Number=Plur": {"morph": "Gender=Fem|Number=Plur", "pos": "NOUN"}, + "NOUN__Gender=Fem|Number=Sing": {"morph": "Gender=Fem|Number=Sing", "pos": "NOUN"}, + "NOUN__Gender=Masc": {"morph": "Gender=Masc", "pos": "NOUN"}, + "NOUN__Gender=Masc|Number=Plur": {"morph": "Gender=Masc|Number=Plur", "pos": "NOUN"}, + "NOUN__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", "pos": "NOUN"}, + "NOUN__Gender=Masc|Number=Sing|VerbForm=Part": {"morph": "Gender=Masc|Number=Sing|VerbForm=Part", "pos": "NOUN"}, + "NOUN__Number=Plur": {"morph": "Number=Plur", "pos": "NOUN"}, + "NOUN__Number=Sing": {"morph": "Number=Sing", "pos": "NOUN"}, + "NOUN__NumForm=Digit": {"morph": "NumForm=Digit", "pos": "NOUN"}, + "NUM__Gender=Fem|Number=Plur|NumType=Card": {"morph": "Gender=Fem|Number=Plur|NumType=Card", "pos": "NUM"}, + "NUM__Gender=Fem|Number=Sing|NumType=Card": {"morph": "Gender=Fem|Number=Sing|NumType=Card", "pos": "NUM"}, + "NUM__Gender=Masc|Number=Plur|NumType=Card": {"morph": "Gender=Masc|Number=Plur|NumType=Card", "pos": "NUM"}, + "NUM__Gender=Masc|Number=Sing|NumType=Card": {"morph": "Gender=Masc|Number=Sing|NumType=Card", "pos": "NUM"}, + "NUM__Number=Plur|NumType=Card": {"morph": "Number=Plur|NumType=Card", "pos": "NUM"}, + "NUM__Number=Sing|NumType=Card": {"morph": "Number=Sing|NumType=Card", "pos": "NUM"}, + "NUM__NumForm=Digit": {"morph": "NumForm=Digit", "pos": "NUM"}, + "NUM__NumForm=Digit|NumType=Card": {"morph": "NumForm=Digit|NumType=Card", "pos": "NUM"}, + "NUM__NumForm=Digit|NumType=Frac": {"morph": "NumForm=Digit|NumType=Frac", "pos": "NUM"}, + "NUM__NumType=Card": {"morph": "NumType=Card", "pos": "NUM"}, + "PART___": {"morph": "_", "pos": "PART"}, + "PART__Negative=Neg": {"morph": "Negative=Neg", "pos": "PART"}, + "PRON___": {"morph": "_", "pos": "PRON"}, + "PRON__Case=Acc|Gender=Fem|Number=Plur|Person=3|PronType=Prs": {"morph": "Case=Acc|Gender=Fem|Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {"morph": "Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Acc|Gender=Masc|Number=Plur|Person=3|PronType=Prs": {"morph": "Case=Acc|Gender=Masc|Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {"morph": "Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Acc|Number=Plur|Person=3|PronType=Prs": {"morph": "Case=Acc|Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Acc|Number=Sing|Person=3|PronType=Prs": {"morph": "Case=Acc|Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Acc|Person=3|PronType=Prs": {"morph": "Case=Acc|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Dat|Number=Plur|Person=3|PronType=Prs": {"morph": "Case=Dat|Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Dat|Number=Sing|Person=3|PronType=Prs": {"morph": "Case=Dat|Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Nom|Number=Sing|Person=1|PronType=Prs": {"morph": "Case=Nom|Number=Sing|Person=1|PronType=Prs", "pos": "PRON"}, + "PRON__Case=Nom|Number=Sing|Person=2|PronType=Prs": {"morph": "Case=Nom|Number=Sing|Person=2|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Person=3|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|Person=3|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|PronType=Dem": {"morph": "Gender=Fem|Number=Plur|PronType=Dem", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|PronType=Ind": {"morph": "Gender=Fem|Number=Plur|PronType=Ind", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|PronType=Int": {"morph": "Gender=Fem|Number=Plur|PronType=Int", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Plur|PronType=Rel": {"morph": "Gender=Fem|Number=Plur|PronType=Rel", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|Person=1|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Person=1|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|Person=3|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|PronType=Dem": {"morph": "Gender=Fem|Number=Sing|PronType=Dem", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|PronType=Ind": {"morph": "Gender=Fem|Number=Sing|PronType=Ind", "pos": "PRON"}, + "PRON__Gender=Fem|Number=Sing|PronType=Rel": {"morph": "Gender=Fem|Number=Sing|PronType=Rel", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|Person=1|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Person=1|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Person=2|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|Person=3|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|PronType=Dem": {"morph": "Gender=Masc|Number=Plur|PronType=Dem", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|PronType=Ind": {"morph": "Gender=Masc|Number=Plur|PronType=Ind", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|PronType=Int": {"morph": "Gender=Masc|Number=Plur|PronType=Int", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Plur|PronType=Rel": {"morph": "Gender=Masc|Number=Plur|PronType=Rel", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|Person=3|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|PronType=Dem": {"morph": "Gender=Masc|Number=Sing|PronType=Dem", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|PronType=Ind": {"morph": "Gender=Masc|Number=Sing|PronType=Ind", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|PronType=Int": {"morph": "Gender=Masc|Number=Sing|PronType=Int", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|PronType=Rel": {"morph": "Gender=Masc|Number=Sing|PronType=Rel", "pos": "PRON"}, + "PRON__Gender=Masc|Number=Sing|PronType=Tot": {"morph": "Gender=Masc|Number=Sing|PronType=Tot", "pos": "PRON"}, + "PRON__Number=Plur|Person=1": {"morph": "Number=Plur|Person=1", "pos": "PRON"}, + "PRON__Number=Plur|Person=1|PronType=Prs": {"morph": "Number=Plur|Person=1|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Plur|Person=2|Polite=Form|PronType=Prs": {"morph": "Number=Plur|Person=2|Polite=Form|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Plur|Person=2|PronType=Prs": {"morph": "Number=Plur|Person=2|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Number=Plur|Person=3|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Plur|Person=3|PronType=Prs": {"morph": "Number=Plur|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Plur|PronType=Dem": {"morph": "Number=Plur|PronType=Dem", "pos": "PRON"}, + "PRON__Number=Plur|PronType=Ind": {"morph": "Number=Plur|PronType=Ind", "pos": "PRON"}, + "PRON__Number=Plur|PronType=Int": {"morph": "Number=Plur|PronType=Int", "pos": "PRON"}, + "PRON__Number=Plur|PronType=Rel": {"morph": "Number=Plur|PronType=Rel", "pos": "PRON"}, + "PRON__Number=Sing|Person=1": {"morph": "Number=Sing|Person=1", "pos": "PRON"}, + "PRON__Number=Sing|Person=1|PrepCase=Pre|PronType=Prs": {"morph": "Number=Sing|Person=1|PrepCase=Pre|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|Person=1|PronType=Prs": {"morph": "Number=Sing|Person=1|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|Person=2": {"morph": "Number=Sing|Person=2", "pos": "PRON"}, + "PRON__Number=Sing|Person=2|Polite=Form|PronType=Prs": {"morph": "Number=Sing|Person=2|Polite=Form|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|Person=2|PrepCase=Pre|PronType=Prs": {"morph": "Number=Sing|Person=2|PrepCase=Pre|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|Person=2|PronType=Prs": {"morph": "Number=Sing|Person=2|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Number=Sing|Person=3|Poss=Yes|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|Person=3|PronType=Prs": {"morph": "Number=Sing|Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__Number=Sing|PronType=Dem": {"morph": "Number=Sing|PronType=Dem", "pos": "PRON"}, + "PRON__Number=Sing|PronType=Ind": {"morph": "Number=Sing|PronType=Ind", "pos": "PRON"}, + "PRON__Number=Sing|PronType=Int": {"morph": "Number=Sing|PronType=Int", "pos": "PRON"}, + "PRON__Number=Sing|PronType=Rel": {"morph": "Number=Sing|PronType=Rel", "pos": "PRON"}, + "PRON__Person=1|PronType=Prs": {"morph": "Person=1|PronType=Prs", "pos": "PRON"}, + "PRON__Person=3": {"morph": "Person=3", "pos": "PRON"}, + "PRON__Person=3|PrepCase=Pre|PronType=Prs": {"morph": "Person=3|PrepCase=Pre|PronType=Prs", "pos": "PRON"}, + "PRON__Person=3|PronType=Prs": {"morph": "Person=3|PronType=Prs", "pos": "PRON"}, + "PRON__PronType=Ind": {"morph": "PronType=Ind", "pos": "PRON"}, + "PRON__PronType=Int": {"morph": "PronType=Int", "pos": "PRON"}, + "PRON__PronType=Rel": {"morph": "PronType=Rel", "pos": "PRON"}, + "PROPN___": {"morph": "_", "pos": "PROPN"}, + "PUNCT___": {"morph": "_", "pos": "PUNCT"}, + "PUNCT__PunctSide=Fin|PunctType=Brck": {"morph": "PunctSide=Fin|PunctType=Brck", "pos": "PUNCT"}, + "PUNCT__PunctSide=Fin|PunctType=Excl": {"morph": "PunctSide=Fin|PunctType=Excl", "pos": "PUNCT"}, + "PUNCT__PunctSide=Fin|PunctType=Qest": {"morph": "PunctSide=Fin|PunctType=Qest", "pos": "PUNCT"}, + "PUNCT__PunctSide=Ini|PunctType=Brck": {"morph": "PunctSide=Ini|PunctType=Brck", "pos": "PUNCT"}, + "PUNCT__PunctSide=Ini|PunctType=Excl": {"morph": "PunctSide=Ini|PunctType=Excl", "pos": "PUNCT"}, + "PUNCT__PunctSide=Ini|PunctType=Qest": {"morph": "PunctSide=Ini|PunctType=Qest", "pos": "PUNCT"}, + "PUNCT__PunctType=Colo": {"morph": "PunctType=Colo", "pos": "PUNCT"}, + "PUNCT__PunctType=Comm": {"morph": "PunctType=Comm", "pos": "PUNCT"}, + "PUNCT__PunctType=Dash": {"morph": "PunctType=Dash", "pos": "PUNCT"}, + "PUNCT__PunctType=Peri": {"morph": "PunctType=Peri", "pos": "PUNCT"}, + "PUNCT__PunctType=Quot": {"morph": "PunctType=Quot", "pos": "PUNCT"}, + "PUNCT__PunctType=Semi": {"morph": "PunctType=Semi", "pos": "PUNCT"}, + "SCONJ___": {"morph": "_", "pos": "SCONJ"}, + "SYM___": {"morph": "_", "pos": "SYM"}, + "SYM__NumForm=Digit": {"morph": "NumForm=Digit", "pos": "SYM"}, + "SYM__NumForm=Digit|NumType=Frac": {"morph": "NumForm=Digit|NumType=Frac", "pos": "SYM"}, + "VERB__Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part": {"morph": "Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part", "pos": "VERB"}, + "VERB__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": {"morph": "Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part", "pos": "VERB"}, + "VERB__Gender=Masc|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Gender=Masc|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part": {"morph": "Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part", "pos": "VERB"}, + "VERB__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": {"morph": "Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part", "pos": "VERB"}, + "VERB__Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Cnd|Number=Plur|Person=3|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Plur|Person=3|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Cnd|Number=Sing|Person=1|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=1|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Cnd|Number=Sing|Person=2|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=2|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Imp|Number=Plur|Person=1|VerbForm=Fin": {"morph": "Mood=Imp|Number=Plur|Person=1|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Imp|Number=Plur|Person=2|VerbForm=Fin": {"morph": "Mood=Imp|Number=Plur|Person=2|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Imp|Number=Plur|Person=3|VerbForm=Fin": {"morph": "Mood=Imp|Number=Plur|Person=3|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": {"morph": "Mood=Imp|Number=Sing|Person=2|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Imp|Number=Sing|Person=3|VerbForm=Fin": {"morph": "Mood=Imp|Number=Sing|Person=3|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Ind|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Person=3|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", "pos": "VERB"}, + "VERB__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", "pos": "VERB"}, + "VERB__VerbForm=Ger": {"morph": "VerbForm=Ger", "pos": "VERB"}, + "VERB__VerbForm=Inf": {"morph": "VerbForm=Inf", "pos": "VERB"}, + "X___": {"morph": "_", "pos": "X"}, +} diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index b2627f96f..37d3180d0 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -49,6 +49,10 @@ def en_vocab(): def en_parser(): return English.Defaults.create_parser() +@pytest.fixture +def es_tokenizer(): + return Spanish.Defaults.create_tokenizer() + @pytest.fixture def de_tokenizer(): @@ -79,8 +83,8 @@ def sv_tokenizer(): def bn_tokenizer(): return Bengali.Defaults.create_tokenizer() - -@pytest.fixture + +@pytest.fixture def he_tokenizer(): return Hebrew.Defaults.create_tokenizer() diff --git a/spacy/tests/es/__init__.py b/spacy/tests/es/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/spacy/tests/es/test_exception.py b/spacy/tests/es/test_exception.py new file mode 100644 index 000000000..2303e6095 --- /dev/null +++ b/spacy/tests/es/test_exception.py @@ -0,0 +1,24 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +import pytest + + +@pytest.mark.parametrize('text,lemma', [("aprox.", "aproximadamente"), + ("esq.", "esquina"), + ("pág.", "página"), + ("p.ej.", "por ejemplo") + ]) +def test_tokenizer_handles_abbr(es_tokenizer, text, lemma): + tokens = es_tokenizer(text) + assert len(tokens) == 1 + assert tokens[0].lemma_ == lemma + + +def test_tokenizer_handles_exc_in_text(es_tokenizer): + text = "Mariano Rajoy ha corrido aprox. medio kilómetro" + tokens = es_tokenizer(text) + assert len(tokens) == 7 + assert tokens[4].text == "aprox." + assert tokens[4].lemma_ == "aproximadamente" diff --git a/spacy/tests/es/test_text.py b/spacy/tests/es/test_text.py new file mode 100644 index 000000000..7081ea12d --- /dev/null +++ b/spacy/tests/es/test_text.py @@ -0,0 +1,35 @@ +# coding: utf-8 + +"""Test that longer and mixed texts are tokenized correctly.""" + + +from __future__ import unicode_literals + +import pytest + + +def test_tokenizer_handles_long_text(es_tokenizer): + text = """Cuando a José Mujica lo invitaron a dar una conferencia + +en Oxford este verano, su cabeza hizo "crac". La "más antigua" universidad de habla + +inglesa, esa que cobra decenas de miles de euros de matrícula a sus alumnos + +y en cuyos salones han disertado desde Margaret Thatcher hasta Stephen Hawking, + +reclamaba los servicios de este viejo de 81 años, formado en un colegio público + +en Montevideo y que pregona las bondades de la vida austera.""" + tokens = es_tokenizer(text) + assert len(tokens) == 90 + + +@pytest.mark.parametrize('text,length', [ + ("¿Por qué José Mujica?", 6), + ("“¿Oh no?”", 6), + ("""¡Sí! "Vámonos", contestó José Arcadio Buendía""", 11), + ("Corrieron aprox. 10km.", 5), + ("Y entonces por qué...", 5)]) +def test_tokenizer_handles_cnts(es_tokenizer, text, length): + tokens = es_tokenizer(text) + assert len(tokens) == length From b10bc1a177b047cd1635c833f207733e0da2088e Mon Sep 17 00:00:00 2001 From: oeg Date: Fri, 7 Apr 2017 11:58:28 +0200 Subject: [PATCH 2/2] Adds contributor agreement dvsrepo --- .github/contributors/dvsrepo.md | 106 ++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 .github/contributors/dvsrepo.md diff --git a/.github/contributors/dvsrepo.md b/.github/contributors/dvsrepo.md new file mode 100644 index 000000000..c9edcf12a --- /dev/null +++ b/.github/contributors/dvsrepo.md @@ -0,0 +1,106 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | Daniel Vila-Suero | +| Company name (if applicable) | recogn.ai | +| Title or role (if applicable) | | +| Date | 07-04-2017 | +| GitHub username | dvsrepo | +| Website (optional) | recogn.ai |