From d83e3c44c5db111706d3b2a5efdcbaa1e86b5f6e Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 15 Jul 2020 19:44:18 +0200 Subject: [PATCH] Remove corpus-specific morph rules * Remove corpus-specific morph rules * Add options similar to tag maps to provide them in the `train` and `debug-data` CLIs --- spacy/cli/debug_data.py | 7 + spacy/cli/train.py | 9 + spacy/lang/bn/morph_rules.py | 263 --- spacy/lang/da/__init__.py | 2 - spacy/lang/da/morph_rules.py | 308 ---- spacy/lang/en/__init__.py | 2 - spacy/lang/en/morph_rules.py | 490 ------ spacy/lang/lt/__init__.py | 2 - spacy/lang/lt/morph_rules.py | 3072 ---------------------------------- spacy/lang/nb/__init__.py | 2 - spacy/lang/nb/morph_rules.py | 665 -------- spacy/lang/sv/__init__.py | 3 - spacy/lang/sv/morph_rules.py | 285 ---- 13 files changed, 16 insertions(+), 5094 deletions(-) delete mode 100644 spacy/lang/bn/morph_rules.py delete mode 100644 spacy/lang/da/morph_rules.py delete mode 100644 spacy/lang/en/morph_rules.py delete mode 100644 spacy/lang/lt/morph_rules.py delete mode 100644 spacy/lang/nb/morph_rules.py delete mode 100644 spacy/lang/sv/morph_rules.py diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index 9d1986d8a..da1fa42cd 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -131,9 +131,16 @@ def debug_data( tag_map = {} if tag_map_path is not None: tag_map = srsly.read_json(tag_map_path) + morph_rules_path = util.ensure_path(config["training"]["morph_rules"]) + morph_rules = {} + if morph_rules_path is not None: + morph_rules = srsly.read_json(morph_rules_path) # Update tag map with provided mapping nlp.vocab.morphology.tag_map.update(tag_map) + # Load morph rules + nlp.vocab.morphology.load_morph_exceptions(morph_rules) + msg.divider("Data file validation") # Create the gold corpus to be able to better analyze data diff --git a/spacy/cli/train.py b/spacy/cli/train.py index feebc30d4..14ccf7299 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -127,6 +127,9 @@ def train( # Update tag map with provided mapping nlp.vocab.morphology.tag_map.update(tag_map) + # Load morph rules + nlp.vocab.morphology.load_morph_exceptions(morph_rules) + # Create empty extra lexeme tables so the data from spacy-lookups-data # isn't loaded if these features are accessed if config["training"]["omit_extra_lookups"]: @@ -482,6 +485,12 @@ def load_from_paths(config): if not tag_map_path.exists(): msg.fail("Can't find tag map path", tag_map_path, exits=1) tag_map = srsly.read_json(config["training"]["tag_map"]) + morph_rules = {} + morph_rules_path = util.ensure_path(config["training"]["morph_rules"]) + if morph_rules_path is not None: + if not morph_rules_path.exists(): + msg.fail("Can't find tag map path", morph_rules_path, exits=1) + morph_rules = srsly.read_json(config["training"]["morph_rules"]) weights_data = None init_tok2vec = util.ensure_path(config["training"]["init_tok2vec"]) if init_tok2vec is not None: diff --git a/spacy/lang/bn/morph_rules.py b/spacy/lang/bn/morph_rules.py deleted file mode 100644 index 44d6108e9..000000000 --- a/spacy/lang/bn/morph_rules.py +++ /dev/null @@ -1,263 +0,0 @@ -from ...symbols import LEMMA, PRON_LEMMA - - -MORPH_RULES = { - "PRP": { - "ঐ": {LEMMA: PRON_LEMMA, "PronType": "Dem"}, - "ওই": {LEMMA: PRON_LEMMA, "PronType": "Dem"}, - "আমাকে": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "One", - "PronType": "Prs", - "Case": "Acc", - }, - "কি": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Gender": "Neut", - "PronType": "Int", - "Case": "Acc", - }, - "সে": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "Three", - "PronType": "Prs", - "Case": "Nom", - }, - "কিসে": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Gender": "Neut", - "PronType": "Int", - "Case": "Acc", - }, - "তাকে": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "Three", - "PronType": "Prs", - "Case": "Acc", - }, - "স্বয়ং": {LEMMA: PRON_LEMMA, "Reflex": "Yes", "PronType": "Ref"}, - "কোনগুলো": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Gender": "Neut", - "PronType": "Int", - "Case": "Acc", - }, - "তুমি": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "Two", - "PronType": "Prs", - "Case": "Nom", - }, - "তুই": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "Two", - "PronType": "Prs", - "Case": "Nom", - }, - "তাদেরকে": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "Three", - "PronType": "Prs", - "Case": "Acc", - }, - "আমরা": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "One ", - "PronType": "Prs", - "Case": "Nom", - }, - "যিনি": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Rel", "Case": "Nom"}, - "আমাদেরকে": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "One", - "PronType": "Prs", - "Case": "Acc", - }, - "কোন": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Int", "Case": "Acc"}, - "কারা": {LEMMA: PRON_LEMMA, "Number": "Plur", "PronType": "Int", "Case": "Acc"}, - "তোমাকে": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "Two", - "PronType": "Prs", - "Case": "Acc", - }, - "তোকে": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "Two", - "PronType": "Prs", - "Case": "Acc", - }, - "খোদ": {LEMMA: PRON_LEMMA, "Reflex": "Yes", "PronType": "Ref"}, - "কে": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Int", "Case": "Acc"}, - "যারা": {LEMMA: PRON_LEMMA, "Number": "Plur", "PronType": "Rel", "Case": "Nom"}, - "যে": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Rel", "Case": "Nom"}, - "তোমরা": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "Two", - "PronType": "Prs", - "Case": "Nom", - }, - "তোরা": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "Two", - "PronType": "Prs", - "Case": "Nom", - }, - "তোমাদেরকে": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "Two", - "PronType": "Prs", - "Case": "Acc", - }, - "তোদেরকে": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "Two", - "PronType": "Prs", - "Case": "Acc", - }, - "আপন": {LEMMA: PRON_LEMMA, "Reflex": "Yes", "PronType": "Ref"}, - "এ": {LEMMA: PRON_LEMMA, "PronType": "Dem"}, - "নিজ": {LEMMA: PRON_LEMMA, "Reflex": "Yes", "PronType": "Ref"}, - "কার": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Int", "Case": "Acc"}, - "যা": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Gender": "Neut", - "PronType": "Rel", - "Case": "Nom", - }, - "তারা": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "Three", - "PronType": "Prs", - "Case": "Nom", - }, - "আমি": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "One", - "PronType": "Prs", - "Case": "Nom", - }, - }, - "PRP$": { - "আমার": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "One", - "PronType": "Prs", - "Poss": "Yes", - "Case": "Nom", - }, - "মোর": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "One", - "PronType": "Prs", - "Poss": "Yes", - "Case": "Nom", - }, - "মোদের": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "One", - "PronType": "Prs", - "Poss": "Yes", - "Case": "Nom", - }, - "তার": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "Three", - "PronType": "Prs", - "Poss": "Yes", - "Case": "Nom", - }, - "তাহাার": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "Three", - "PronType": "Prs", - "Poss": "Yes", - "Case": "Nom", - }, - "তোমাদের": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "Two", - "PronType": "Prs", - "Poss": "Yes", - "Case": "Nom", - }, - "আমাদের": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "One", - "PronType": "Prs", - "Poss": "Yes", - "Case": "Nom", - }, - "তোমার": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "Two", - "PronType": "Prs", - "Poss": "Yes", - "Case": "Nom", - }, - "তোর": { - LEMMA: PRON_LEMMA, - "Number": "Sing", - "Person": "Two", - "PronType": "Prs", - "Poss": "Yes", - "Case": "Nom", - }, - "তাদের": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "Three", - "PronType": "Prs", - "Poss": "Yes", - "Case": "Nom", - }, - "কাদের": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "PronType": "Int", - "Case": "Acc", - }, - "তোদের": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "Person": "Two", - "PronType": "Prs", - "Poss": "Yes", - "Case": "Nom", - }, - "যাদের": { - LEMMA: PRON_LEMMA, - "Number": "Plur", - "PronType": "Int", - "Case": "Acc", - }, - }, -} diff --git a/spacy/lang/da/__init__.py b/spacy/lang/da/__init__.py index e0f0061ec..10f4e9afc 100644 --- a/spacy/lang/da/__init__.py +++ b/spacy/lang/da/__init__.py @@ -2,7 +2,6 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES from .stop_words import STOP_WORDS from .lex_attrs import LEX_ATTRS -from .morph_rules import MORPH_RULES from ..tokenizer_exceptions import BASE_EXCEPTIONS from ...language import Language @@ -15,7 +14,6 @@ class DanishDefaults(Language.Defaults): lex_attr_getters.update(LEX_ATTRS) lex_attr_getters[LANG] = lambda text: "da" tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - morph_rules = MORPH_RULES infixes = TOKENIZER_INFIXES suffixes = TOKENIZER_SUFFIXES stop_words = STOP_WORDS diff --git a/spacy/lang/da/morph_rules.py b/spacy/lang/da/morph_rules.py deleted file mode 100644 index 06704f482..000000000 --- a/spacy/lang/da/morph_rules.py +++ /dev/null @@ -1,308 +0,0 @@ -from ...symbols import LEMMA, PRON_LEMMA - -# Source: Danish Universal Dependencies and http://fjern-uv.dk/pronom.php - -# Note: The Danish Universal Dependencies specify Case=Acc for all instances -# of "den"/"det" even when the case is in fact "Nom". In the rules below, Case -# is left unspecified for "den" and "det". - -MORPH_RULES = { - "PRON": { - "jeg": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Case": "Nom", - "Gender": "Com", - }, # Case=Nom|Gender=Com|Number=Sing|Person=1|PronType=Prs - "mig": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Case": "Acc", - "Gender": "Com", - }, # Case=Acc|Gender=Com|Number=Sing|Person=1|PronType=Prs - "min": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Com", - }, # Gender=Com|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs - "mit": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Neut", - }, # Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs - "vor": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Com", - }, # Gender=Com|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs|Style=Form - "vort": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Neut", - }, # Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs|Style=Form - "du": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing", - "Case": "Nom", - "Gender": "Com", - }, # Case=Nom|Gender=Com|Number=Sing|Person=2|PronType=Prs - "dig": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing", - "Case": "Acc", - "Gender": "Com", - }, # Case=Acc|Gender=Com|Number=Sing|Person=2|PronType=Prs - "din": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Com", - }, # Gender=Com|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs - "dit": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Neut", - }, # Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs - "han": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Case": "Nom", - "Gender": "Com", - }, # Case=Nom|Gender=Com|Number=Sing|Person=3|PronType=Prs - "hun": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Case": "Nom", - "Gender": "Com", - }, # Case=Nom|Gender=Com|Number=Sing|Person=3|PronType=Prs - "den": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Com", - }, # Case=Acc|Gender=Com|Number=Sing|Person=3|PronType=Prs, See note above. - "det": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Neut", - }, # Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs See note above. - "ham": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Case": "Acc", - "Gender": "Com", - }, # Case=Acc|Gender=Com|Number=Sing|Person=3|PronType=Prs - "hende": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Case": "Acc", - "Gender": "Com", - }, # Case=Acc|Gender=Com|Number=Sing|Person=3|PronType=Prs - "sin": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Com", - "Reflex": "Yes", - }, # Gender=Com|Number=Sing|Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes - "sit": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Neut", - "Reflex": "Yes", - }, # Gender=Neut|Number=Sing|Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes - "vi": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Case": "Nom", - "Gender": "Com", - }, # Case=Nom|Gender=Com|Number=Plur|Person=1|PronType=Prs - "os": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Case": "Acc", - "Gender": "Com", - }, # Case=Acc|Gender=Com|Number=Plur|Person=1|PronType=Prs - "mine": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Poss": "Yes", - }, # Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs - "vore": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Poss": "Yes", - }, # Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs|Style=Form - "I": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Plur", - "Case": "Nom", - "Gender": "Com", - }, # Case=Nom|Gender=Com|Number=Plur|Person=2|PronType=Prs - "jer": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Plur", - "Case": "Acc", - "Gender": "Com", - }, # Case=Acc|Gender=Com|Number=Plur|Person=2|PronType=Prs - "dine": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Plur", - "Poss": "Yes", - }, # Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs - "de": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Case": "Nom", - }, # Case=Nom|Number=Plur|Person=3|PronType=Prs - "dem": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Case": "Acc", - }, # Case=Acc|Number=Plur|Person=3|PronType=Prs - "sine": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, # Number=Plur|Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes - "vores": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Poss": "Yes", - }, # Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs - "De": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Case": "Nom", - "Gender": "Com", - }, # Case=Nom|Gender=Com|Person=2|Polite=Form|PronType=Prs - "Dem": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Case": "Acc", - "Gender": "Com", - }, # Case=Acc|Gender=Com|Person=2|Polite=Form|PronType=Prs - "Deres": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Poss": "Yes", - }, # Person=2|Polite=Form|Poss=Yes|PronType=Prs - "jeres": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Poss": "Yes", - }, # Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs - "sig": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Case": "Acc", - "Reflex": "Yes", - }, # Case=Acc|Person=3|PronType=Prs|Reflex=Yes - "hans": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Poss": "Yes", - }, # Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs - "hendes": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Poss": "Yes", - }, # Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs - "dens": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Poss": "Yes", - }, # Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs - "dets": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Poss": "Yes", - }, # Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs - "deres": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Poss": "Yes", - }, # Number[psor]=Plur|Person=3|Poss=Yes|PronType=Prs - }, - "VERB": { - "er": {LEMMA: "være", "VerbForm": "Fin", "Tense": "Pres"}, - "var": {LEMMA: "være", "VerbForm": "Fin", "Tense": "Past"}, - }, -} - -for tag, rules in MORPH_RULES.items(): - for key, attrs in dict(rules).items(): - rules[key.title()] = attrs diff --git a/spacy/lang/en/__init__.py b/spacy/lang/en/__init__.py index 76aff9cd3..9626704da 100644 --- a/spacy/lang/en/__init__.py +++ b/spacy/lang/en/__init__.py @@ -1,7 +1,6 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .stop_words import STOP_WORDS from .lex_attrs import LEX_ATTRS -from .morph_rules import MORPH_RULES from .syntax_iterators import SYNTAX_ITERATORS from ..tokenizer_exceptions import BASE_EXCEPTIONS @@ -20,7 +19,6 @@ class EnglishDefaults(Language.Defaults): lex_attr_getters[LANG] = _return_en tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) stop_words = STOP_WORDS - morph_rules = MORPH_RULES syntax_iterators = SYNTAX_ITERATORS single_orth_variants = [ {"tags": ["NFP"], "variants": ["…", "..."]}, diff --git a/spacy/lang/en/morph_rules.py b/spacy/lang/en/morph_rules.py deleted file mode 100644 index aa3e6ce57..000000000 --- a/spacy/lang/en/morph_rules.py +++ /dev/null @@ -1,490 +0,0 @@ -from ...symbols import LEMMA, PRON_LEMMA - -# Several entries here look pretty suspicious. These will get the POS SCONJ -# given the tag IN, when an adpositional reading seems much more likely for -# a lot of these prepositions. I'm not sure what I was running in 04395ffa4 -# when I did this? It doesn't seem right. -_subordinating_conjunctions = [ - "that", - "if", - "as", - "because", - # "of", - # "for", - # "before", - # "in", - "while", - # "after", - "since", - "like", - # "with", - "so", - # "to", - # "by", - # "on", - # "about", - "than", - "whether", - "although", - # "from", - "though", - # "until", - "unless", - "once", - # "without", - # "at", - # "into", - "cause", - # "over", - "upon", - "till", - "whereas", - # "beyond", - "whilst", - "except", - "despite", - "wether", - # "then", - "but", - "becuse", - "whie", - # "below", - # "against", - "it", - "w/out", - # "toward", - "albeit", - "save", - "besides", - "becouse", - "coz", - "til", - "ask", - "i'd", - "out", - "near", - "seince", - # "towards", - "tho", - "sice", - "will", -] - -# This seems kind of wrong too? -# _relative_pronouns = ["this", "that", "those", "these"] - -MORPH_RULES = { - # "DT": {word: {"POS": "PRON"} for word in _relative_pronouns}, - "IN": {word: {"POS": "SCONJ"} for word in _subordinating_conjunctions}, - "NN": { - "something": {"POS": "PRON"}, - "anyone": {"POS": "PRON"}, - "anything": {"POS": "PRON"}, - "nothing": {"POS": "PRON"}, - "someone": {"POS": "PRON"}, - "everything": {"POS": "PRON"}, - "everyone": {"POS": "PRON"}, - "everybody": {"POS": "PRON"}, - "nobody": {"POS": "PRON"}, - "somebody": {"POS": "PRON"}, - "anybody": {"POS": "PRON"}, - "any1": {"POS": "PRON"}, - }, - "PRP": { - "I": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Case": "Nom", - }, - "me": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Case": "Acc", - }, - "you": {LEMMA: PRON_LEMMA, "POS": "PRON", "PronType": "Prs", "Person": "Two"}, - "he": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Masc", - "Case": "Nom", - }, - "him": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Masc", - "Case": "Acc", - }, - "she": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Fem", - "Case": "Nom", - }, - "her": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Fem", - "Case": "Acc", - }, - "it": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Neut", - }, - "we": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Case": "Nom", - }, - "us": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Case": "Acc", - }, - "they": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Case": "Nom", - }, - "them": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Case": "Acc", - }, - "mine": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Reflex": "Yes", - }, - "his": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Masc", - "Poss": "Yes", - "Reflex": "Yes", - }, - "hers": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Fem", - "Poss": "Yes", - "Reflex": "Yes", - }, - "its": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Neut", - "Poss": "Yes", - "Reflex": "Yes", - }, - "ours": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "yours": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Two", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "theirs": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "myself": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Case": "Acc", - "Reflex": "Yes", - }, - "yourself": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Two", - "Case": "Acc", - "Reflex": "Yes", - }, - "himself": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Case": "Acc", - "Gender": "Masc", - "Reflex": "Yes", - }, - "herself": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Case": "Acc", - "Gender": "Fem", - "Reflex": "Yes", - }, - "itself": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Case": "Acc", - "Gender": "Neut", - "Reflex": "Yes", - }, - "themself": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Case": "Acc", - "Reflex": "Yes", - }, - "ourselves": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Case": "Acc", - "Reflex": "Yes", - }, - "yourselves": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Two", - "Case": "Acc", - "Reflex": "Yes", - }, - "themselves": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Case": "Acc", - "Reflex": "Yes", - }, - }, - "PRP$": { - "my": { - LEMMA: PRON_LEMMA, - "Person": "One", - "Number": "Sing", - "PronType": "Prs", - "Poss": "Yes", - }, - "your": {LEMMA: PRON_LEMMA, "Person": "Two", "PronType": "Prs", "Poss": "Yes"}, - "his": { - LEMMA: PRON_LEMMA, - "Person": "Three", - "Number": "Sing", - "Gender": "Masc", - "PronType": "Prs", - "Poss": "Yes", - }, - "her": { - LEMMA: PRON_LEMMA, - "Person": "Three", - "Number": "Sing", - "Gender": "Fem", - "PronType": "Prs", - "Poss": "Yes", - }, - "its": { - LEMMA: PRON_LEMMA, - "Person": "Three", - "Number": "Sing", - "Gender": "Neut", - "PronType": "Prs", - "Poss": "Yes", - }, - "our": { - LEMMA: PRON_LEMMA, - "Person": "One", - "Number": "Plur", - "PronType": "Prs", - "Poss": "Yes", - }, - "their": { - LEMMA: PRON_LEMMA, - "Person": "Three", - "Number": "Plur", - "PronType": "Prs", - "Poss": "Yes", - }, - }, - "RB": {word: {"POS": "PART"} for word in ["not", "n't", "nt", "n’t"]}, - "VB": { - word: {"POS": "AUX"} - for word in ["be", "have", "do", "get", "of", "am", "are", "'ve"] - }, - "VBN": {"been": {LEMMA: "be", "POS": "AUX"}}, - "VBG": {"being": {LEMMA: "be", "POS": "AUX"}}, - "VBZ": { - "am": { - LEMMA: "be", - "POS": "AUX", - "VerbForm": "Fin", - "Person": "One", - "Tense": "Pres", - "Mood": "Ind", - }, - "are": { - LEMMA: "be", - "POS": "AUX", - "VerbForm": "Fin", - "Person": "Two", - "Tense": "Pres", - "Mood": "Ind", - }, - "is": { - LEMMA: "be", - "POS": "AUX", - "VerbForm": "Fin", - "Person": "Three", - "Tense": "Pres", - "Mood": "Ind", - }, - "'re": { - LEMMA: "be", - "POS": "AUX", - "VerbForm": "Fin", - "Person": "Two", - "Tense": "Pres", - "Mood": "Ind", - }, - "'s": { - LEMMA: "be", - "POS": "AUX", - "VerbForm": "Fin", - "Person": "Three", - "Tense": "Pres", - "Mood": "Ind", - }, - "has": {LEMMA: "have", "POS": "AUX"}, - "does": {LEMMA: "do", "POS": "AUX"}, - }, - "VBP": { - "are": { - LEMMA: "be", - "POS": "AUX", - "VerbForm": "Fin", - "Tense": "Pres", - "Mood": "Ind", - }, - "'re": { - LEMMA: "be", - "POS": "AUX", - "VerbForm": "Fin", - "Tense": "Pres", - "Mood": "Ind", - }, - "am": { - LEMMA: "be", - "POS": "AUX", - "VerbForm": "Fin", - "Person": "One", - "Tense": "Pres", - "Mood": "Ind", - }, - "do": {"POS": "AUX"}, - "have": {"POS": "AUX"}, - "'m": {"POS": "AUX", LEMMA: "be"}, - "'ve": {"POS": "AUX"}, - "'s": {"POS": "AUX"}, - "is": {"POS": "AUX"}, - "'d": {"POS": "AUX"}, - }, - "VBD": { - "was": { - LEMMA: "be", - "POS": "AUX", - "VerbForm": "Fin", - "Tense": "Past", - "Number": "Sing", - }, - "were": { - LEMMA: "be", - "POS": "AUX", - "VerbForm": "Fin", - "Tense": "Past", - "Number": "Plur", - }, - "did": {LEMMA: "do", "POS": "AUX"}, - "had": {LEMMA: "have", "POS": "AUX"}, - "'d": {LEMMA: "have", "POS": "AUX"}, - }, -} - - -for tag, rules in MORPH_RULES.items(): - for key, attrs in dict(rules).items(): - rules[key.title()] = attrs diff --git a/spacy/lang/lt/__init__.py b/spacy/lang/lt/__init__.py index a4eee852f..fa3c87e21 100644 --- a/spacy/lang/lt/__init__.py +++ b/spacy/lang/lt/__init__.py @@ -2,7 +2,6 @@ from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .stop_words import STOP_WORDS from .lex_attrs import LEX_ATTRS -from .morph_rules import MORPH_RULES from ..tokenizer_exceptions import BASE_EXCEPTIONS from ..norm_exceptions import BASE_NORMS @@ -31,7 +30,6 @@ class LithuanianDefaults(Language.Defaults): del mod_base_exceptions["8)"] tokenizer_exceptions = update_exc(mod_base_exceptions, TOKENIZER_EXCEPTIONS) stop_words = STOP_WORDS - morph_rules = MORPH_RULES class Lithuanian(Language): diff --git a/spacy/lang/lt/morph_rules.py b/spacy/lang/lt/morph_rules.py deleted file mode 100644 index f7bfd3cc6..000000000 --- a/spacy/lang/lt/morph_rules.py +++ /dev/null @@ -1,3072 +0,0 @@ -from ...symbols import LEMMA, PRON_LEMMA - - -_coordinating_conjunctions = [ - "ar", - "arba", - "bei", - "beigi", - "bet", - "betgi", - "ir", - "kadangi", - "kuo", - "ne", - "o", - "tad", - "tai", - "tačiau", - "tegul", - "tik", - "visgi", -] - -_subordinating_conjunctions = [ - "jei", - "jeigu", - "jog", - "kad", - "kai", - "kaip", - "kol", - "lyg", - "nebent", - "negu", - "nei", - "nes", - "nors", - "tarsi", - "tuo", - "užuot", -] - -MORPH_RULES = { - "Cg": dict( - [(word, {"POS": "CCONJ"}) for word in _coordinating_conjunctions] - + [(word, {"POS": "SCONJ"}) for word in _subordinating_conjunctions] - ), - "Pg--an": { - "keletą": {LEMMA: PRON_LEMMA, "POS": "PRON", "Case": "Acc", "PronType": "Ind"}, - "save": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "PronType": "Prs", - "Reflex": "Yes", - }, - }, - "Pg--dn": { - "sau": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "PronType": "Prs", - "Reflex": "Yes", - } - }, - "Pg--gn": { - "keleto": {LEMMA: PRON_LEMMA, "POS": "PRON", "Case": "Gen", "PronType": "Ind"}, - "savo": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "PronType": "Prs", - "Reflex": "Yes", - }, - "savęs": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "PronType": "Prs", - "Reflex": "Yes", - }, - }, - "Pg--in": { - "savimi": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "PronType": "Prs", - "Reflex": "Yes", - } - }, - "Pg--nn": { - "keletas": {LEMMA: PRON_LEMMA, "POS": "PRON", "Case": "Nom", "PronType": "Ind"} - }, - "Pg-dnn": { - "mudu": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Number": "Dual", - "Person": "1", - "PronType": "Prs", - } - }, - "Pg-pa-": { - "jus": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Number": "Plur", - "Person": "2", - "PronType": "Prs", - } - }, - "Pg-pan": { - "jus": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Number": "Plur", - "Person": "2", - "PronType": "Prs", - }, - "mus": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Number": "Plur", - "Person": "1", - "PronType": "Prs", - }, - }, - "Pg-pdn": { - "jums": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Number": "Plur", - "Person": "2", - "PronType": "Prs", - }, - "mums": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Number": "Plur", - "Person": "1", - "PronType": "Prs", - }, - }, - "Pg-pgn": { - "jūsų": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Number": "Plur", - "Person": "2", - "PronType": "Prs", - }, - "mūsų": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Number": "Plur", - "Person": "1", - "PronType": "Prs", - }, - }, - "Pg-pin": { - "jumis": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Number": "Plur", - "Person": "2", - "PronType": "Prs", - }, - "mumis": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Number": "Plur", - "Person": "1", - "PronType": "Prs", - }, - }, - "Pg-pln": { - "jumyse": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Loc", - "Number": "Plur", - "Person": "2", - "PronType": "Prs", - } - }, - "Pg-pnn": { - "jūs": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Number": "Plur", - "Person": "2", - "PronType": "Prs", - }, - "mes": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Number": "Plur", - "Person": "1", - "PronType": "Prs", - }, - }, - "Pg-san": { - "mane": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Number": "Sing", - "Person": "1", - "PronType": "Prs", - }, - "tave": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Number": "Sing", - "Person": "2", - "PronType": "Prs", - }, - }, - "Pg-sd-": { - "tau": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Number": "Sing", - "Person": "2", - "PronType": "Prs", - } - }, - "Pg-sdn": { - "man": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Number": "Sing", - "Person": "1", - "PronType": "Prs", - }, - "sau": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Number": "Sing", - "PronType": "Prs", - "Reflex": "Yes", - }, - "tau": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Number": "Sing", - "Person": "2", - "PronType": "Prs", - }, - }, - "Pg-sgn": { - "mano": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Number": "Sing", - "Person": "1", - "PronType": "Prs", - }, - "manęs": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Number": "Sing", - "Person": "1", - "PronType": "Prs", - }, - "tavo": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Number": "Sing", - "Person": "2", - "PronType": "Prs", - }, - "tavęs": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Number": "Sing", - "Person": "2", - "PronType": "Prs", - }, - }, - "Pg-sin": { - "manimi": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Number": "Sing", - "Person": "1", - "PronType": "Prs", - }, - "tavim": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Number": "Sing", - "Person": "2", - "PronType": "Prs", - }, - "tavimi": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Number": "Sing", - "Person": "2", - "PronType": "Prs", - }, - }, - "Pg-sln": { - "manyje": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Loc", - "Number": "Sing", - "Person": "1", - "PronType": "Prs", - }, - "tavyje": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Loc", - "Number": "Sing", - "Person": "2", - "PronType": "Prs", - }, - }, - "Pg-snn": { - "aš": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Number": "Sing", - "Person": "1", - "PronType": "Prs", - }, - "tu": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Number": "Sing", - "Person": "2", - "PronType": "Prs", - }, - }, - "Pgf-an": { - "kelias": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Fem", - "PronType": "Ind", - } - }, - "Pgf-dn": { - "kelioms": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Gender": "Fem", - "PronType": "Ind", - } - }, - "Pgf-nn": { - "kelios": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Fem", - "PronType": "Ind", - } - }, - "Pgfdn-": { - "abi": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Fem", - "Number": "Dual", - "PronType": "Ind", - } - }, - "Pgfpan": { - "jas": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Fem", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - }, - "kelias": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Ind", - }, - "kitas": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Ind", - }, - "kokias": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Int", - }, - "kurias": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Int", - }, - "savas": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Ind", - }, - "tas": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Dem", - }, - "tokias": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Dem", - }, - }, - "Pgfpdn": { - "joms": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Gender": "Fem", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - }, - "kitoms": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Ind", - }, - "kurioms": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Dat", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Int", - }, - "tokioms": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Dat", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Dem", - }, - }, - "Pgfpgn": { - "jokių": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Neg", - }, - "jų": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Gender": "Fem", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - }, - "kelių": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Ind", - }, - "kitų": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Ind", - }, - "kurių": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Int", - }, - "pačių": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Emp", - }, - "tokių": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Dem", - }, - "tų": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Dem", - }, - }, - "Pgfpin": { - "jomis": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Gender": "Fem", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - }, - "kitokiomis": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Ind", - }, - "kitomis": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Ind", - }, - "kokiomis": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Int", - }, - "kuriomis": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Int", - }, - "pačiomis": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Emp", - }, - "tomis": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Dem", - }, - }, - "Pgfpln": { - "jose": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Loc", - "Gender": "Fem", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - }, - "kitose": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Loc", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Ind", - }, - "kuriose": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Loc", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Int", - }, - "tokiose": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Loc", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Dem", - }, - "tose": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Loc", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Dem", - }, - }, - "Pgfpnn": { - "jos": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Fem", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - }, - "kitokios": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Ind", - }, - "kitos": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Ind", - }, - "kokios": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Int", - }, - "kurios": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Int", - }, - "pačios": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Emp", - }, - "tokios": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Dem", - }, - "tos": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Fem", - "Number": "Plur", - "PronType": "Dem", - }, - }, - "Pgfsan": { - "ją": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Fem", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "kiekvieną": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Tot", - }, - "kitokią": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Ind", - }, - "kitą": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Ind", - }, - "kokią": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Int", - }, - "kurią": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Int", - }, - "pačią": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Emp", - }, - "tokią": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Dem", - }, - "tą": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgfsdn": { - "jai": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Gender": "Fem", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "kiekvienai": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Dat", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Tot", - }, - "kitai": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Ind", - }, - "pačiai": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Dat", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Emp", - }, - }, - "Pgfsgn": { - "jokios": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Neg", - }, - "jos": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Gender": "Fem", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "kiekvienos": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Tot", - }, - "kokios": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Int", - }, - "kurios": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Int", - }, - "pačios": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Emp", - }, - "tokios": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Dem", - }, - "tos": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgfsin": { - "ja": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Gender": "Fem", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "kiekviena": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Tot", - }, - "kita": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Ind", - }, - "kuria": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Int", - }, - "ta": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Dem", - }, - "tokia": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgfsln": { - "joje": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Loc", - "Gender": "Fem", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "kiekvienoje": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Loc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Tot", - }, - "kitoje": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Loc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Ind", - }, - "kurioje": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Loc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Int", - }, - "toje": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Loc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Dem", - }, - "tokioje": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Loc", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgfsnn": { - "ji": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Fem", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "kiekviena": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Tot", - }, - "kita": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Ind", - }, - "kokia": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Int", - }, - "kuri": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Int", - }, - "pati": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Emp", - }, - "sava": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Ind", - }, - "ta": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Dem", - }, - "tokia": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgfsny": { - "jinai": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Fem", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "toji": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Fem", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgfsny-": { - "jinai": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Fem", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - } - }, - "Pgm-a-": { - "kelis": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Masc", - "PronType": "Ind", - } - }, - "Pgm-an": { - "kelis": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Masc", - "PronType": "Ind", - } - }, - "Pgm-dn": { - "keliems": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Gender": "Masc", - "PronType": "Ind", - } - }, - "Pgm-gn": { - "kelių": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Gender": "Masc", - "PronType": "Ind", - } - }, - "Pgm-nn": { - "keli": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Masc", - "PronType": "Ind", - } - }, - "Pgmdan": { - "mudu": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Masc", - "Number": "Dual", - "Person": "1", - "PronType": "Prs", - } - }, - "Pgmdgn": { - "mudviejų": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Gender": "Masc", - "Number": "Dual", - "Person": "1", - "PronType": "Prs", - } - }, - "Pgmdnn": { - "jiedu": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Masc", - "Number": "Dual", - "Person": "3", - "PronType": "Prs", - }, - "mudu": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Masc", - "Number": "Dual", - "Person": "1", - "PronType": "Prs", - }, - }, - "Pgmpan": { - "juos": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Masc", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - }, - "jus": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Masc", - "Number": "Plur", - "Person": "2", - "PronType": "Prs", - }, - "kitus": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Ind", - }, - "kokius": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Int", - }, - "kuriuos": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Int", - }, - "pačius": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Emp", - }, - "tokius": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Dem", - }, - "tuos": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Dem", - }, - }, - "Pgmpan-": { - "juos": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Masc", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - } - }, - "Pgmpdn": { - "jiems": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Gender": "Masc", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - }, - "kitiems": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Ind", - }, - "kuriems": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Dat", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Int", - }, - "patiems": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Dat", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Emp", - }, - "tiems": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Dat", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Dem", - }, - }, - "Pgmpgn": { - "jokių": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Neg", - }, - "jų": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Gender": "Masc", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - }, - "kitų": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Ind", - }, - "kokių": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Int", - }, - "kurių": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Int", - }, - "pačių": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Emp", - }, - "tokių": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Dem", - }, - "tų": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Dem", - }, - }, - "Pgmpin": { - "jais": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Gender": "Masc", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - }, - "jokiais": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Neg", - }, - "kitais": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Ind", - }, - "kokiais": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Int", - }, - "savais": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Ind", - }, - "tais": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Dem", - }, - "tokiais": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Dem", - }, - }, - "Pgmpln": { - "juose": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Loc", - "Gender": "Masc", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - }, - "kituose": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Loc", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Ind", - }, - }, - "Pgmpnn": { - "jie": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Masc", - "Number": "Plur", - "Person": "3", - "PronType": "Prs", - }, - "jūs": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Masc", - "Number": "Plur", - "Person": "2", - "PronType": "Prs", - }, - "kiti": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Ind", - }, - "kokie": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Int", - }, - "kurie": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Int", - }, - "patys": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Emp", - }, - "tie": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Dem", - }, - "tokie": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Plur", - "PronType": "Dem", - }, - }, - "Pgmsan": { - "jį": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Masc", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "kiekvieną": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Tot", - }, - "kitokį": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Ind", - }, - "kitą": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Acc", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Ind", - }, - "kokį": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Int", - }, - "kurį": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Int", - }, - "tokį": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Dem", - }, - "tą": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Acc", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgmsdn": { - "jam": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Gender": "Masc", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "kiekvienam": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Dat", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Tot", - }, - "kitam": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Dat", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Ind", - }, - "kokiam": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Dat", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Int", - }, - "kuriam": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Dat", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Int", - }, - "pačiam": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Dat", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Emp", - }, - "tam": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Dat", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgmsgn": { - "jo": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Gender": "Masc", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "jokio": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Neg", - }, - "kiekvieno": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Tot", - }, - "kito": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Ind", - }, - "kokio": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Int", - }, - "kurio": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Int", - }, - "paties": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Emp", - }, - "savo": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Gen", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Ind", - }, - "to": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Dem", - }, - "tokio": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Gen", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgmsin": { - "juo": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Gender": "Masc", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "kitu": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Ins", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Ind", - }, - "kokiu": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Int", - }, - "kuriuo": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Int", - }, - "pačiu": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Emp", - }, - "tokiu": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Dem", - }, - "tuo": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Ins", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgmsln": { - "jame": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Loc", - "Gender": "Masc", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "kiekvienam": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Loc", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Tot", - }, - "kokiame": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Loc", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Int", - }, - "kuriame": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Loc", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Int", - }, - "tame": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Loc", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgmsnn": { - "jis": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "Person": "3", - "PronType": "Prs", - }, - "joks": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Neg", - }, - "kiekvienas": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Tot", - }, - "kitas": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Ind", - }, - "kitoks": { - LEMMA: PRON_LEMMA, - "POS": "PRON", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Ind", - }, - "koks": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Int", - }, - "kuris": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Int", - }, - "pats": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Emp", - }, - "tas": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Dem", - }, - "toks": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgmsny": { - "patsai": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Emp", - }, - "tasai": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Dem", - }, - "toksai": { - LEMMA: PRON_LEMMA, - "POS": "DET", - "Case": "Nom", - "Gender": "Masc", - "Number": "Sing", - "PronType": "Dem", - }, - }, - "Pgn--n": { - "tai": {LEMMA: PRON_LEMMA, "POS": "DET", "Gender": "Neut", "PronType": "Dem"} - }, - "Pgnn--n": { - "tai": {LEMMA: PRON_LEMMA, "POS": "DET", "Gender": "Neut", "PronType": "Dem"} - }, - "Pgsmdn": { - "tam": {LEMMA: PRON_LEMMA, "POS": "DET", "Case": "Dat", "PronType": "Dem"} - }, - "Qg": {"tai": {LEMMA: "tas", "POS": "PART"}}, - "Vgap----n--n--": { - "esant": { - LEMMA: "būti", - "POS": "VERB", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Ger", - }, - "turint": { - LEMMA: "turėti", - "POS": "VERB", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Ger", - }, - }, - "Vgh--pm-n--n--": { - "būdami": { - LEMMA: "būti", - "POS": "VERB", - "Gender": "Masc", - "Number": "Plur", - "Polarity": "POS", - "VerbForm": "Conv", - } - }, - "Vgh--sm-n--n--": { - "būdamas": { - LEMMA: "būti", - "POS": "VERB", - "Gender": "Masc", - "Number": "Sing", - "Polarity": "POS", - "VerbForm": "Conv", - } - }, - "Vgi-----n--n--": { - "būti": {LEMMA: "būti", "POS": "VERB", "Polarity": "POS", "VerbForm": "Inf"}, - "daryti": { - LEMMA: "daryti", - "POS": "VERB", - "Polarity": "POS", - "VerbForm": "Inf", - }, - "turėti": { - LEMMA: "turėti", - "POS": "VERB", - "Polarity": "POS", - "VerbForm": "Inf", - }, - }, - "Vgm-1p--n--ns-": { - "turėtume": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Cnd", - "Number": "Plur", - "Person": "1", - "Polarity": "POS", - "VerbForm": "Fin", - } - }, - "Vgm-2p--n--nm-": { - "būkite": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Imp", - "Number": "Plur", - "Person": "2", - "Polarity": "POS", - "VerbForm": "Fin", - }, - "darykit": { - LEMMA: "daryti", - "POS": "VERB", - "Mood": "Imp", - "Number": "Plur", - "Person": "2", - "Polarity": "POS", - "VerbForm": "Fin", - }, - "darykite": { - LEMMA: "daryti", - "POS": "VERB", - "Mood": "Imp", - "Number": "Plur", - "Person": "2", - "Polarity": "POS", - "VerbForm": "Fin", - }, - "turėkite": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Imp", - "Number": "Plur", - "Person": "2", - "Polarity": "POS", - "VerbForm": "Fin", - }, - }, - "Vgm-2p--n--ns-": { - "turėtumėte": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Cnd", - "Number": "Plur", - "Person": "2", - "Polarity": "POS", - "VerbForm": "Fin", - } - }, - "Vgm-2s--n--ns-": { - "turėtum": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Cnd", - "Number": "Sing", - "Person": "2", - "Polarity": "POS", - "VerbForm": "Fin", - } - }, - "Vgm-3---n--ns-": { - "būtų": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Cnd", - "Person": "3", - "Polarity": "POS", - "VerbForm": "Fin", - }, - "turėtų": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Cnd", - "Person": "3", - "Polarity": "POS", - "VerbForm": "Fin", - }, - }, - "Vgm-3p--n--ns-": { - "būtų": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Cnd", - "Number": "Plur", - "Person": "3", - "Polarity": "POS", - "VerbForm": "Fin", - }, - "turėtų": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Cnd", - "Number": "Plur", - "Person": "3", - "Polarity": "POS", - "VerbForm": "Fin", - }, - }, - "Vgm-3s--n--ns-": { - "būtų": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Cnd", - "Number": "Sing", - "Person": "3", - "Polarity": "POS", - "VerbForm": "Fin", - }, - "turėtų": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Cnd", - "Number": "Sing", - "Person": "3", - "Polarity": "POS", - "VerbForm": "Fin", - }, - }, - "Vgma1p--n--ni-": { - "turėjom": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "1", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - } - }, - "Vgma1s--n--ni-": { - "turėjau": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "1", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - } - }, - "Vgma3---n--ni-": { - "buvo": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Person": "3", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - }, - "turėjo": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Person": "3", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - }, - }, - "Vgma3p--n--ni-": { - "buvo": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "3", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - }, - "darė": { - LEMMA: "daryti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "3", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - }, - "turėjo": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "3", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - }, - }, - "Vgma3s--n--ni-": { - "buvo": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "3", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - }, - "darė": { - LEMMA: "daryti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "3", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - }, - "turėjo": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "3", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - }, - }, - "Vgmf1s--n--ni-": { - "turėsiu": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "1", - "Polarity": "POS", - "Tense": "Fut", - "VerbForm": "Fin", - } - }, - "Vgmf2p--n--ni-": { - "būsite": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "2", - "Polarity": "POS", - "Tense": "Fut", - "VerbForm": "Fin", - }, - "darysite": { - LEMMA: "daryti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "2", - "Polarity": "POS", - "Tense": "Fut", - "VerbForm": "Fin", - }, - "turėsite": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "2", - "Polarity": "POS", - "Tense": "Fut", - "VerbForm": "Fin", - }, - }, - "Vgmf3---n--ni-": { - "bus": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Person": "3", - "Polarity": "POS", - "Tense": "Fut", - "VerbForm": "Fin", - } - }, - "Vgmf3p--n--ni-": { - "bus": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "3", - "Polarity": "POS", - "Tense": "Fut", - "VerbForm": "Fin", - }, - "darys": { - LEMMA: "daryti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "3", - "Polarity": "POS", - "Tense": "Fut", - "VerbForm": "Fin", - }, - "turės": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "3", - "Polarity": "POS", - "Tense": "Fut", - "VerbForm": "Fin", - }, - }, - "Vgmf3s--n--ni-": { - "bus": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "3", - "Polarity": "POS", - "Tense": "Fut", - "VerbForm": "Fin", - }, - "turės": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "3", - "Polarity": "POS", - "Tense": "Fut", - "VerbForm": "Fin", - }, - }, - "Vgmp1p--n--ni-": { - "darome": { - LEMMA: "daryti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "1", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "esame": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "1", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "turime": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "1", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - }, - "Vgmp1s--n--ni-": { - "būnu": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "1", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "esu": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "1", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "turiu": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "1", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - }, - "Vgmp2p--n--ni-": { - "esate": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "2", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "turite": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "2", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - }, - "Vgmp2s--n--ni-": { - "esi": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "2", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "turi": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "2", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - }, - "Vgmp3---n--ni-": { - "būna": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Person": "3", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "turi": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Person": "3", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "yra": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Person": "3", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - }, - "Vgmp3p--n--ni-": { - "būna": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "3", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "daro": { - LEMMA: "daryti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "3", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "turi": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "3", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "yra": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Plur", - "Person": "3", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - }, - "Vgmp3s--n--ni-": { - "būna": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "3", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "daro": { - LEMMA: "daryti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "3", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "turi": { - LEMMA: "turėti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "3", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - "yra": { - LEMMA: "būti", - "POS": "VERB", - "Mood": "Ind", - "Number": "Sing", - "Person": "3", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Fin", - }, - }, - "Vgmq2s--n--ni-": { - "turėdavai": { - LEMMA: "turėti", - "POS": "VERB", - "Aspect": "Hab", - "Mood": "Ind", - "Number": "Sing", - "Person": "2", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - } - }, - "Vgmq3---n--ni-": { - "būdavo": { - LEMMA: "būti", - "POS": "VERB", - "Aspect": "Hab", - "Mood": "Ind", - "Person": "3", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - } - }, - "Vgmq3s--n--ni-": { - "turėdavo": { - LEMMA: "turėti", - "POS": "VERB", - "Aspect": "Hab", - "Mood": "Ind", - "Number": "Sing", - "Person": "3", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Fin", - } - }, - "Vgp--pfnnnnn-p": { - "darytinos": { - LEMMA: "daryti", - "POS": "VERB", - "Case": "Nom", - "Degree": "POS", - "Gender": "Fem", - "Number": "Plur", - "Polarity": "POS", - "VerbForm": "Part", - } - }, - "Vgpa--nann-n-p": { - "buvę": { - LEMMA: "būti", - "POS": "VERB", - "Degree": "POS", - "Gender": "Neut", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpa-pmanngn-p": { - "buvusių": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Gen", - "Degree": "POS", - "Gender": "Masc", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpa-smanngn-p": { - "buvusio": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Gen", - "Degree": "POS", - "Gender": "Masc", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpa-smannnn-p": { - "buvęs": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Nom", - "Degree": "POS", - "Gender": "Masc", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Part", - "Voice": "Act", - }, - "turėjęs": { - LEMMA: "turėti", - "POS": "VERB", - "Case": "Nom", - "Degree": "POS", - "Gender": "Masc", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Part", - "Voice": "Act", - }, - }, - "Vgpa-smanyin-p": { - "buvusiuoju": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Ins", - "Degree": "POS", - "Gender": "Masc", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpf-smpnnan-p": { - "būsimą": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Acc", - "Degree": "POS", - "Gender": "Masc", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Fut", - "VerbForm": "Part", - "Voice": "Pass", - } - }, - "Vgpf-smpnndn-p": { - "būsimam": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Dat", - "Degree": "POS", - "Gender": "Masc", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Fut", - "VerbForm": "Part", - "Voice": "Pass", - } - }, - "Vgpp--npnn-n-p": { - "esama": { - LEMMA: "būti", - "POS": "VERB", - "Degree": "POS", - "Gender": "Neut", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Pass", - } - }, - "Vgpp-pfannan-p": { - "esančias": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Acc", - "Degree": "POS", - "Gender": "Fem", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpp-pfanndn-p": { - "turinčioms": { - LEMMA: "turėti", - "POS": "VERB", - "Case": "Dat", - "Degree": "POS", - "Gender": "Fem", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpp-pfannin-p": { - "esančiomis": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Ins", - "Degree": "POS", - "Gender": "Fem", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpp-pfpnnan-p": { - "daromas": { - LEMMA: "daryti", - "POS": "VERB", - "Case": "Acc", - "Degree": "POS", - "Gender": "Fem", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Pass", - }, - "turimas": { - LEMMA: "turėti", - "POS": "VERB", - "Case": "Acc", - "Degree": "POS", - "Gender": "Fem", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Pass", - }, - }, - "Vgpp-pfpnnin-p": { - "turimomis": { - LEMMA: "turėti", - "POS": "VERB", - "Case": "Ins", - "Degree": "POS", - "Gender": "Fem", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Pass", - } - }, - "Vgpp-pmannan-p": { - "turinčius": { - LEMMA: "turėti", - "POS": "VERB", - "Case": "Acc", - "Degree": "POS", - "Gender": "Masc", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpp-pmanngn-p": { - "esančių": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Gen", - "Degree": "POS", - "Gender": "Masc", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpp-pmannin-p": { - "esančiais": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Ins", - "Degree": "POS", - "Gender": "Masc", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpp-pmannnn-p": { - "esantys": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Nom", - "Degree": "POS", - "Gender": "Masc", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpp-pmpnnan-p": { - "turimus": { - LEMMA: "turėti", - "POS": "VERB", - "Case": "Acc", - "Degree": "POS", - "Gender": "Masc", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Pass", - } - }, - "Vgpp-pmpnngn-p": { - "esamų": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Gen", - "Degree": "POS", - "Gender": "Masc", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Pass", - } - }, - "Vgpp-sfanngn-p": { - "turinčios": { - LEMMA: "turėti", - "POS": "VERB", - "Case": "Gen", - "Degree": "POS", - "Gender": "Fem", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpp-sfannln-p": { - "esančioje": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Loc", - "Degree": "POS", - "Gender": "Fem", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpp-sfannnn-p": { - "esanti": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Nom", - "Degree": "POS", - "Gender": "Fem", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpp-sfpnnnn-p": { - "daroma": { - LEMMA: "daryti", - "POS": "VERB", - "Case": "Nom", - "Degree": "POS", - "Gender": "Fem", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Pass", - } - }, - "Vgpp-smanngn-p": { - "esančio": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Gen", - "Degree": "POS", - "Gender": "Masc", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - } - }, - "Vgpp-smannnn-p": { - "esantis": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Nom", - "Degree": "POS", - "Gender": "Masc", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - }, - "esąs": { - LEMMA: "būti", - "POS": "VERB", - "Case": "Nom", - "Degree": "POS", - "Gender": "Masc", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - }, - "turintis": { - LEMMA: "turėti", - "POS": "VERB", - "Case": "Nom", - "Degree": "POS", - "Gender": "Masc", - "Number": "Sing", - "Polarity": "POS", - "Tense": "Pres", - "VerbForm": "Part", - "Voice": "Act", - }, - }, - "Vgps--npnn-n-p": { - "daryta": { - LEMMA: "daryti", - "POS": "VERB", - "Aspect": "Perf", - "Degree": "POS", - "Gender": "Neut", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Part", - "Voice": "Pass", - } - }, - "Vgps-pmpnnnn-p": { - "daryti": { - LEMMA: "daryti", - "POS": "VERB", - "Aspect": "Perf", - "Case": "Nom", - "Degree": "POS", - "Gender": "Masc", - "Number": "Plur", - "Polarity": "POS", - "Tense": "Past", - "VerbForm": "Part", - "Voice": "Pass", - } - }, -} - - -for tag, rules in MORPH_RULES.items(): - for key, attrs in dict(rules).items(): - rules[key.title()] = attrs diff --git a/spacy/lang/nb/__init__.py b/spacy/lang/nb/__init__.py index 4e980a20c..39df2e857 100644 --- a/spacy/lang/nb/__init__.py +++ b/spacy/lang/nb/__init__.py @@ -2,7 +2,6 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES from .punctuation import TOKENIZER_SUFFIXES from .stop_words import STOP_WORDS -from .morph_rules import MORPH_RULES from .syntax_iterators import SYNTAX_ITERATORS from ..tokenizer_exceptions import BASE_EXCEPTIONS @@ -23,7 +22,6 @@ class NorwegianDefaults(Language.Defaults): infixes = TOKENIZER_INFIXES suffixes = TOKENIZER_SUFFIXES stop_words = STOP_WORDS - morph_rules = MORPH_RULES syntax_iterators = SYNTAX_ITERATORS diff --git a/spacy/lang/nb/morph_rules.py b/spacy/lang/nb/morph_rules.py deleted file mode 100644 index e96b9fd6b..000000000 --- a/spacy/lang/nb/morph_rules.py +++ /dev/null @@ -1,665 +0,0 @@ -from ...symbols import LEMMA, PRON_LEMMA - -# This dict includes all the PRON and DET tag combinations found in the -# dataset developed by Schibsted, Nasjonalbiblioteket and LTG (to be published -# autumn 2018) and the rarely used polite form. - -MORPH_RULES = { - "PRON__Animacy=Anim|Case=Nom|Number=Sing|Person=1|PronType=Prs": { - "jeg": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Case": "Nom", - } - }, - "PRON__Animacy=Anim|Case=Nom|Number=Sing|Person=2|PronType=Prs": { - "du": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing", - "Case": "Nom", - }, - # polite form, not sure about the tag - "De": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing", - "Case": "Nom", - "Polite": "Form", - }, - }, - "PRON__Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs": { - "hun": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Fem", - "Case": "Nom", - } - }, - "PRON__Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs": { - "han": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Masc", - "Case": "Nom", - } - }, - "PRON__Gender=Neut|Number=Sing|Person=3|PronType=Prs": { - "det": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Neut", - }, - "alt": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Neut", - }, - "intet": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Neut", - }, - "noe": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Number": "Sing", - "Person": "Three", - "Gender": "Neut", - }, - }, - "PRON__Animacy=Anim|Case=Nom|Number=Plur|Person=1|PronType=Prs": { - "vi": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Case": "Nom", - } - }, - "PRON__Animacy=Anim|Case=Nom|Number=Plur|Person=2|PronType=Prs": { - "dere": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Plur", - "Case": "Nom", - } - }, - "PRON__Case=Nom|Number=Plur|Person=3|PronType=Prs": { - "de": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Case": "Nom", - } - }, - "PRON__Animacy=Anim|Case=Acc|Number=Sing|Person=1|PronType=Prs": { - "meg": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Case": "Acc", - } - }, - "PRON__Animacy=Anim|Case=Acc|Number=Sing|Person=2|PronType=Prs": { - "deg": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing", - "Case": "Acc", - }, - # polite form, not sure about the tag - "Dem": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing", - "Case": "Acc", - "Polite": "Form", - }, - }, - "PRON__Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs": { - "henne": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Fem", - "Case": "Acc", - } - }, - "PRON__Animacy=Anim|Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs": { - "ham": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Masc", - "Case": "Acc", - }, - "han": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Masc", - "Case": "Acc", - }, - }, - "PRON__Animacy=Anim|Case=Acc|Number=Plur|Person=1|PronType=Prs": { - "oss": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Case": "Acc", - } - }, - "PRON__Animacy=Anim|Case=Acc|Number=Plur|Person=2|PronType=Prs": { - "dere": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Plur", - "Case": "Acc", - } - }, - "PRON__Case=Acc|Number=Plur|Person=3|PronType=Prs": { - "dem": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Case": "Acc", - } - }, - "PRON__Case=Acc|Reflex=Yes": { - "seg": { - LEMMA: PRON_LEMMA, - "Person": "Three", - "Number": "Sing,Plur", - "Reflex": "Yes", - } - }, - "PRON__Animacy=Anim|Case=Nom|Number=Sing|PronType=Prs": { - "man": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Case": "Nom"} - }, - "DET__Gender=Masc|Number=Sing|Poss=Yes": { - "min": { - LEMMA: "min", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Masc", - }, - "din": { - LEMMA: "din", - "Person": "Two", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Masc", - }, - "hennes": { - LEMMA: "hennes", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Masc", - }, - "hans": { - LEMMA: "hans", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Masc", - }, - "sin": { - LEMMA: "sin", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Masc", - "Reflex": "Yes", - }, - "vår": { - LEMMA: "vår", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Masc", - }, - "deres": { - LEMMA: "deres", - "Person": "Two,Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Masc", - }, - # polite form, not sure about the tag - "Deres": { - LEMMA: "Deres", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Masc", - "Polite": "Form", - }, - }, - "DET__Gender=Fem|Number=Sing|Poss=Yes": { - "mi": { - LEMMA: "min", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Fem", - }, - "di": { - LEMMA: "din", - "Person": "Two", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Fem", - }, - "hennes": { - LEMMA: "hennes", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Fem", - }, - "hans": { - LEMMA: "hans", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Fem", - }, - "si": { - LEMMA: "sin", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Fem", - "Reflex": "Yes", - }, - "vår": { - LEMMA: "vår", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Fem", - }, - "deres": { - LEMMA: "deres", - "Person": "Two,Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Fem", - }, - # polite form, not sure about the tag - "Deres": { - LEMMA: "Deres", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Fem", - "Polite": "Form", - }, - }, - "DET__Gender=Neut|Number=Sing|Poss=Yes": { - "mitt": { - LEMMA: "min", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Neut", - }, - "ditt": { - LEMMA: "din", - "Person": "Two", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Neut", - }, - "hennes": { - LEMMA: "hennes", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Neut", - }, - "hans": { - LEMMA: "hans", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Neut", - }, - "sitt": { - LEMMA: "sin", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Neut", - "Reflex": "Yes", - }, - "vårt": { - LEMMA: "vår", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Neut", - }, - "deres": { - LEMMA: "deres", - "Person": "Two,Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Neut", - }, - # polite form, not sure about the tag - "Deres": { - LEMMA: "Deres", - "Person": "Three", - "Number": "Sing", - "Poss": "Yes", - "Gender": "Neut", - "Polite": "Form", - }, - }, - "DET__Number=Plur|Poss=Yes": { - "mine": {LEMMA: "min", "Person": "One", "Number": "Plur", "Poss": "Yes"}, - "dine": {LEMMA: "din", "Person": "Two", "Number": "Plur", "Poss": "Yes"}, - "hennes": {LEMMA: "hennes", "Person": "Three", "Number": "Plur", "Poss": "Yes"}, - "hans": {LEMMA: "hans", "Person": "Three", "Number": "Plur", "Poss": "Yes"}, - "sine": { - LEMMA: "sin", - "Person": "Three", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "våre": {LEMMA: "vår", "Person": "One", "Number": "Plur", "Poss": "Yes"}, - "deres": { - LEMMA: "deres", - "Person": "Two,Three", - "Number": "Plur", - "Poss": "Yes", - }, - }, - "PRON__Animacy=Anim|Number=Plur|PronType=Rcp": { - "hverandre": {LEMMA: PRON_LEMMA, "PronType": "Rcp", "Number": "Plur"} - }, - "DET__Number=Plur|Poss=Yes|PronType=Rcp": { - "hverandres": { - LEMMA: "hverandres", - "PronType": "Rcp", - "Number": "Plur", - "Poss": "Yes", - } - }, - "PRON___": {"som": {LEMMA: PRON_LEMMA}, "ikkenoe": {LEMMA: PRON_LEMMA}}, - "PRON__PronType=Int": {"hva": {LEMMA: PRON_LEMMA, "PronType": "Int"}}, - "PRON__Animacy=Anim|PronType=Int": {"hvem": {LEMMA: PRON_LEMMA, "PronType": "Int"}}, - "PRON__Animacy=Anim|Poss=Yes|PronType=Int": { - "hvis": {LEMMA: PRON_LEMMA, "PronType": "Int", "Poss": "Yes"} - }, - "PRON__Number=Plur|Person=3|PronType=Prs": { - "noen": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Number": "Plur", - "Person": "Three", - }, - "ingen": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Number": "Plur", - "Person": "Three", - }, - "alle": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Number": "Plur", - "Person": "Three", - }, - }, - "PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs": { - "noen": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Number": "Sing", - "Person": "Three", - "Gender": "Fem,Masc", - }, - "den": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Number": "Sing", - "Person": "Three", - "Gender": "Fem,Masc", - }, - "ingen": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Number": "Sing", - "Person": "Three", - "Gender": "Fem,Masc", - "Polarity": "Neg", - }, - }, - "PRON__Number=Sing": {"ingenting": {LEMMA: PRON_LEMMA, "Number": "Sing"}}, - "PRON__Animacy=Anim|Number=Sing|PronType=Prs": { - "en": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing"} - }, - "PRON__Animacy=Anim|Case=Gen,Nom|Number=Sing|PronType=Prs": { - "ens": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Number": "Sing", - "Case": "Gen,Nom", - } - }, - "PRON__Animacy=Anim|Case=Gen|Number=Sing|PronType=Prs": { - "ens": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Case": "Gen"} - }, - "DET__Case=Gen|Gender=Masc|Number=Sing": { - "ens": {LEMMA: "en", "Number": "Sing", "Case": "Gen"} - }, - "DET__Gender=Masc|Number=Sing": { - "enhver": {LEMMA: "enhver", "Number": "Sing", "Gender": "Masc"}, - "all": {LEMMA: "all", "Number": "Sing", "Gender": "Masc"}, - "hver": {LEMMA: "hver", "Number": "Sing", "Gender": "Masc"}, - "noen": {LEMMA: "noen", "Gender": "Masc", "Number": "Sing"}, - "noe": {LEMMA: "noen", "Gender": "Masc", "Number": "Sing"}, - "en": {LEMMA: "en", "Number": "Sing", "Gender": "Neut"}, - "ingen": {LEMMA: "ingen", "Gender": "Masc", "Number": "Sing"}, - }, - "DET__Gender=Fem|Number=Sing": { - "enhver": {LEMMA: "enhver", "Number": "Sing", "Gender": "Fem"}, - "all": {LEMMA: "all", "Number": "Sing", "Gender": "Fem"}, - "hver": {LEMMA: "hver", "Number": "Sing", "Gender": "Fem"}, - "noen": {LEMMA: "noen", "Gender": "Fem", "Number": "Sing"}, - "noe": {LEMMA: "noen", "Gender": "Fem", "Number": "Sing"}, - "ei": {LEMMA: "en", "Number": "Sing", "Gender": "Fem"}, - }, - "DET__Gender=Neut|Number=Sing": { - "ethvert": {LEMMA: "enhver", "Number": "Sing", "Gender": "Neut"}, - "alt": {LEMMA: "all", "Number": "Sing", "Gender": "Neut"}, - "hvert": {LEMMA: "hver", "Number": "Sing", "Gender": "Neut"}, - "noe": {LEMMA: "noen", "Number": "Sing", "Gender": "Neut"}, - "intet": {LEMMA: "ingen", "Gender": "Neut", "Number": "Sing"}, - "et": {LEMMA: "en", "Number": "Sing", "Gender": "Neut"}, - }, - "DET__Gender=Neut|Number=Sing|PronType=Int": { - "hvilket": { - LEMMA: "hvilken", - "PronType": "Int", - "Number": "Sing", - "Gender": "Neut", - } - }, - "DET__Gender=Fem|Number=Sing|PronType=Int": { - "hvilken": { - LEMMA: "hvilken", - "PronType": "Int", - "Number": "Sing", - "Gender": "Fem", - } - }, - "DET__Gender=Masc|Number=Sing|PronType=Int": { - "hvilken": { - LEMMA: "hvilken", - "PronType": "Int", - "Number": "Sing", - "Gender": "Masc", - } - }, - "DET__Number=Plur|PronType=Int": { - "hvilke": {LEMMA: "hvilken", "PronType": "Int", "Number": "Plur"} - }, - "DET__Number=Plur": { - "alle": {LEMMA: "all", "Number": "Plur"}, - "noen": {LEMMA: "noen", "Number": "Plur"}, - "egne": {LEMMA: "egen", "Number": "Plur"}, - "ingen": {LEMMA: "ingen", "Number": "Plur"}, - }, - "DET__Gender=Masc|Number=Sing|PronType=Dem": { - "den": {LEMMA: "den", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"}, - "slik": {LEMMA: "slik", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"}, - "denne": { - LEMMA: "denne", - "PronType": "Dem", - "Number": "Sing", - "Gender": "Masc", - }, - }, - "DET__Gender=Fem|Number=Sing|PronType=Dem": { - "den": {LEMMA: "den", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"}, - "slik": {LEMMA: "slik", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"}, - "denne": {LEMMA: "denne", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"}, - }, - "DET__Gender=Neut|Number=Sing|PronType=Dem": { - "det": {LEMMA: "det", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"}, - "slikt": {LEMMA: "slik", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"}, - "dette": { - LEMMA: "dette", - "PronType": "Dem", - "Number": "Sing", - "Gender": "Neut", - }, - }, - "DET__Number=Plur|PronType=Dem": { - "disse": {LEMMA: "disse", "PronType": "Dem", "Number": "Plur"}, - "andre": {LEMMA: "annen", "PronType": "Dem", "Number": "Plur"}, - "de": {LEMMA: "de", "PronType": "Dem", "Number": "Plur"}, - "slike": {LEMMA: "slik", "PronType": "Dem", "Number": "Plur"}, - }, - "DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem": { - "annen": {LEMMA: "annen", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"} - }, - "DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Dem": { - "annen": {LEMMA: "annen", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"} - }, - "DET__Definite=Ind|Gender=Neut|Number=Sing|PronType=Dem": { - "annet": {LEMMA: "annen", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"} - }, - "DET__Case=Gen|Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem": { - "annens": { - LEMMA: "annnen", - "PronType": "Dem", - "Number": "Sing", - "Gender": "Masc", - "Case": "Gen", - } - }, - "DET__Case=Gen|Number=Plur|PronType=Dem": { - "andres": {LEMMA: "annen", "PronType": "Dem", "Number": "Plur", "Case": "Gen"} - }, - "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Dem": { - "dens": { - LEMMA: "den", - "PronType": "Dem", - "Number": "Sing", - "Gender": "Fem", - "Case": "Gen", - } - }, - "DET__Case=Gen|Gender=Masc|Number=Sing|PronType=Dem": { - "hvis": { - LEMMA: "hvis", - "PronType": "Dem", - "Number": "Sing", - "Gender": "Masc", - "Case": "Gen", - }, - "dens": { - LEMMA: "den", - "PronType": "Dem", - "Number": "Sing", - "Gender": "Masc", - "Case": "Gen", - }, - }, - "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Dem": { - "dets": { - LEMMA: "det", - "PronType": "Dem", - "Number": "Sing", - "Gender": "Neut", - "Case": "Gen", - } - }, - "DET__Case=Gen|Number=Plur": { - "alles": {LEMMA: "all", "Number": "Plur", "Case": "Gen"} - }, - "DET__Definite=Def|Number=Sing|PronType=Dem": { - "andre": {LEMMA: "annen", "Number": "Sing", "PronType": "Dem"} - }, - "DET__Definite=Def|PronType=Dem": { - "samme": {LEMMA: "samme", "PronType": "Dem"}, - "forrige": {LEMMA: "forrige", "PronType": "Dem"}, - "neste": {LEMMA: "neste", "PronType": "Dem"}, - }, - "DET__Definite=Def": {"selve": {LEMMA: "selve"}, "selveste": {LEMMA: "selveste"}}, - "DET___": {"selv": {LEMMA: "selv"}, "endel": {LEMMA: "endel"}}, - "DET__Definite=Ind|Gender=Fem|Number=Sing": { - "egen": {LEMMA: "egen", "Gender": "Fem", "Number": "Sing"} - }, - "DET__Definite=Ind|Gender=Masc|Number=Sing": { - "egen": {LEMMA: "egen", "Gender": "Masc", "Number": "Sing"} - }, - "DET__Definite=Ind|Gender=Neut|Number=Sing": { - "eget": {LEMMA: "egen", "Gender": "Neut", "Number": "Sing"} - }, - # same wordform and pos (verb), have to specify the exact features in order to not mix them up - "VERB__Mood=Ind|Tense=Pres|VerbForm=Fin": { - "så": {LEMMA: "så", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"} - }, - "VERB__Mood=Ind|Tense=Past|VerbForm=Fin": { - "så": {LEMMA: "se", "VerbForm": "Fin", "Tense": "Past", "Mood": "Ind"} - }, -} - -# copied from the English morph_rules.py -for tag, rules in MORPH_RULES.items(): - for key, attrs in dict(rules).items(): - rules[key.title()] = attrs diff --git a/spacy/lang/sv/__init__.py b/spacy/lang/sv/__init__.py index 08ae6b712..9dcdc543d 100644 --- a/spacy/lang/sv/__init__.py +++ b/spacy/lang/sv/__init__.py @@ -1,7 +1,6 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .stop_words import STOP_WORDS from .lex_attrs import LEX_ATTRS -from .morph_rules import MORPH_RULES # Punctuation stolen from Danish from ..da.punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES @@ -22,11 +21,9 @@ class SwedishDefaults(Language.Defaults): Language.Defaults.lex_attr_getters[NORM], BASE_NORMS ) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - morph_rules = MORPH_RULES infixes = TOKENIZER_INFIXES suffixes = TOKENIZER_SUFFIXES stop_words = STOP_WORDS - morph_rules = MORPH_RULES syntax_iterators = SYNTAX_ITERATORS diff --git a/spacy/lang/sv/morph_rules.py b/spacy/lang/sv/morph_rules.py deleted file mode 100644 index 3ef6aedc5..000000000 --- a/spacy/lang/sv/morph_rules.py +++ /dev/null @@ -1,285 +0,0 @@ -from ...symbols import LEMMA, PRON_LEMMA - - -# Used the table of pronouns at https://sv.wiktionary.org/wiki/deras -MORPH_RULES = { - "PRP": { - "jag": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Case": "Nom", - }, - "mig": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Case": "Acc", - }, - "mej": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Case": "Acc", - }, - "du": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing", - "Case": "Nom", - }, - "han": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Masc", - "Case": "Nom", - }, - "honom": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Masc", - "Case": "Acc", - }, - "hon": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Fem", - "Case": "Nom", - }, - "henne": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Fem", - "Case": "Acc", - }, - "det": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Sing", - "Gender": "Neut", - }, - "vi": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Case": "Nom", - }, - "oss": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Case": "Acc", - }, - "ni": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Plur", - "Case": "Nom", - }, - "er": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Plur"}, - "de": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Case": "Nom", - }, - "dom": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Case": "Nom,Acc", - }, - "dem": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Case": "Acc", - }, - "min": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Reflex": "Yes", - }, - "mitt": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Sing", - "Poss": "Yes", - "Reflex": "Yes", - }, - "mina": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "din": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing", - "Poss": "Yes", - "Reflex": "Yes", - }, - "ditt": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing", - "Poss": "Yes", - "Reflex": "Yes", - }, - "dina": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "hans": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing,Plur", - "Gender": "Masc", - "Poss": "Yes", - "Reflex": "Yes", - }, - "hennes": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing,Plur", - "Gender": "Fem", - "Poss": "Yes", - "Reflex": "Yes", - }, - "dess": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Sing,Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "vår": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "våran": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "vårt": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "vårat": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "våra": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "One", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "eran": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "ert": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "erat": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "era": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Two", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - "deras": { - LEMMA: PRON_LEMMA, - "PronType": "Prs", - "Person": "Three", - "Number": "Plur", - "Poss": "Yes", - "Reflex": "Yes", - }, - }, - "VBZ": { - "är": { - "VerbForm": "Fin", - "Person": "One,Two,Three", - "Tense": "Pres", - "Mood": "Ind", - } - }, - "VBP": {"är": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}}, - "VBD": { - "var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"}, - "vart": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}, - }, -}