mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Remove corpus-specific morph rules
* Remove corpus-specific morph rules * Add options similar to tag maps to provide them in the `train` and `debug-data` CLIs
This commit is contained in:
parent
2f981d5af1
commit
d83e3c44c5
|
@ -131,9 +131,16 @@ def debug_data(
|
|||
tag_map = {}
|
||||
if tag_map_path is not None:
|
||||
tag_map = srsly.read_json(tag_map_path)
|
||||
morph_rules_path = util.ensure_path(config["training"]["morph_rules"])
|
||||
morph_rules = {}
|
||||
if morph_rules_path is not None:
|
||||
morph_rules = srsly.read_json(morph_rules_path)
|
||||
# Update tag map with provided mapping
|
||||
nlp.vocab.morphology.tag_map.update(tag_map)
|
||||
|
||||
# Load morph rules
|
||||
nlp.vocab.morphology.load_morph_exceptions(morph_rules)
|
||||
|
||||
msg.divider("Data file validation")
|
||||
|
||||
# Create the gold corpus to be able to better analyze data
|
||||
|
|
|
@ -127,6 +127,9 @@ def train(
|
|||
# Update tag map with provided mapping
|
||||
nlp.vocab.morphology.tag_map.update(tag_map)
|
||||
|
||||
# Load morph rules
|
||||
nlp.vocab.morphology.load_morph_exceptions(morph_rules)
|
||||
|
||||
# Create empty extra lexeme tables so the data from spacy-lookups-data
|
||||
# isn't loaded if these features are accessed
|
||||
if config["training"]["omit_extra_lookups"]:
|
||||
|
@ -482,6 +485,12 @@ def load_from_paths(config):
|
|||
if not tag_map_path.exists():
|
||||
msg.fail("Can't find tag map path", tag_map_path, exits=1)
|
||||
tag_map = srsly.read_json(config["training"]["tag_map"])
|
||||
morph_rules = {}
|
||||
morph_rules_path = util.ensure_path(config["training"]["morph_rules"])
|
||||
if morph_rules_path is not None:
|
||||
if not morph_rules_path.exists():
|
||||
msg.fail("Can't find tag map path", morph_rules_path, exits=1)
|
||||
morph_rules = srsly.read_json(config["training"]["morph_rules"])
|
||||
weights_data = None
|
||||
init_tok2vec = util.ensure_path(config["training"]["init_tok2vec"])
|
||||
if init_tok2vec is not None:
|
||||
|
|
|
@ -1,263 +0,0 @@
|
|||
from ...symbols import LEMMA, PRON_LEMMA
|
||||
|
||||
|
||||
MORPH_RULES = {
|
||||
"PRP": {
|
||||
"ঐ": {LEMMA: PRON_LEMMA, "PronType": "Dem"},
|
||||
"ওই": {LEMMA: PRON_LEMMA, "PronType": "Dem"},
|
||||
"আমাকে": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "One",
|
||||
"PronType": "Prs",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"কি": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
"PronType": "Int",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"সে": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "Three",
|
||||
"PronType": "Prs",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"কিসে": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
"PronType": "Int",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"তাকে": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "Three",
|
||||
"PronType": "Prs",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"স্বয়ং": {LEMMA: PRON_LEMMA, "Reflex": "Yes", "PronType": "Ref"},
|
||||
"কোনগুলো": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Gender": "Neut",
|
||||
"PronType": "Int",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"তুমি": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "Two",
|
||||
"PronType": "Prs",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"তুই": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "Two",
|
||||
"PronType": "Prs",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"তাদেরকে": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "Three",
|
||||
"PronType": "Prs",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"আমরা": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "One ",
|
||||
"PronType": "Prs",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"যিনি": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Rel", "Case": "Nom"},
|
||||
"আমাদেরকে": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "One",
|
||||
"PronType": "Prs",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"কোন": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Int", "Case": "Acc"},
|
||||
"কারা": {LEMMA: PRON_LEMMA, "Number": "Plur", "PronType": "Int", "Case": "Acc"},
|
||||
"তোমাকে": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "Two",
|
||||
"PronType": "Prs",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"তোকে": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "Two",
|
||||
"PronType": "Prs",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"খোদ": {LEMMA: PRON_LEMMA, "Reflex": "Yes", "PronType": "Ref"},
|
||||
"কে": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Int", "Case": "Acc"},
|
||||
"যারা": {LEMMA: PRON_LEMMA, "Number": "Plur", "PronType": "Rel", "Case": "Nom"},
|
||||
"যে": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Rel", "Case": "Nom"},
|
||||
"তোমরা": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "Two",
|
||||
"PronType": "Prs",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"তোরা": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "Two",
|
||||
"PronType": "Prs",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"তোমাদেরকে": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "Two",
|
||||
"PronType": "Prs",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"তোদেরকে": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "Two",
|
||||
"PronType": "Prs",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"আপন": {LEMMA: PRON_LEMMA, "Reflex": "Yes", "PronType": "Ref"},
|
||||
"এ": {LEMMA: PRON_LEMMA, "PronType": "Dem"},
|
||||
"নিজ": {LEMMA: PRON_LEMMA, "Reflex": "Yes", "PronType": "Ref"},
|
||||
"কার": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Int", "Case": "Acc"},
|
||||
"যা": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
"PronType": "Rel",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"তারা": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "Three",
|
||||
"PronType": "Prs",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"আমি": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "One",
|
||||
"PronType": "Prs",
|
||||
"Case": "Nom",
|
||||
},
|
||||
},
|
||||
"PRP$": {
|
||||
"আমার": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "One",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"মোর": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "One",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"মোদের": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "One",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"তার": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "Three",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"তাহাার": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "Three",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"তোমাদের": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "Two",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"আমাদের": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "One",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"তোমার": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "Two",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"তোর": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Sing",
|
||||
"Person": "Two",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"তাদের": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "Three",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"কাদের": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"PronType": "Int",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"তোদের": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"Person": "Two",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"যাদের": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Number": "Plur",
|
||||
"PronType": "Int",
|
||||
"Case": "Acc",
|
||||
},
|
||||
},
|
||||
}
|
|
@ -2,7 +2,6 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
|||
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
|
||||
from .stop_words import STOP_WORDS
|
||||
from .lex_attrs import LEX_ATTRS
|
||||
from .morph_rules import MORPH_RULES
|
||||
|
||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||
from ...language import Language
|
||||
|
@ -15,7 +14,6 @@ class DanishDefaults(Language.Defaults):
|
|||
lex_attr_getters.update(LEX_ATTRS)
|
||||
lex_attr_getters[LANG] = lambda text: "da"
|
||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||
morph_rules = MORPH_RULES
|
||||
infixes = TOKENIZER_INFIXES
|
||||
suffixes = TOKENIZER_SUFFIXES
|
||||
stop_words = STOP_WORDS
|
||||
|
|
|
@ -1,308 +0,0 @@
|
|||
from ...symbols import LEMMA, PRON_LEMMA
|
||||
|
||||
# Source: Danish Universal Dependencies and http://fjern-uv.dk/pronom.php
|
||||
|
||||
# Note: The Danish Universal Dependencies specify Case=Acc for all instances
|
||||
# of "den"/"det" even when the case is in fact "Nom". In the rules below, Case
|
||||
# is left unspecified for "den" and "det".
|
||||
|
||||
MORPH_RULES = {
|
||||
"PRON": {
|
||||
"jeg": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Case": "Nom",
|
||||
"Gender": "Com",
|
||||
}, # Case=Nom|Gender=Com|Number=Sing|Person=1|PronType=Prs
|
||||
"mig": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
"Gender": "Com",
|
||||
}, # Case=Acc|Gender=Com|Number=Sing|Person=1|PronType=Prs
|
||||
"min": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Com",
|
||||
}, # Gender=Com|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs
|
||||
"mit": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Neut",
|
||||
}, # Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs
|
||||
"vor": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Com",
|
||||
}, # Gender=Com|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs|Style=Form
|
||||
"vort": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Neut",
|
||||
}, # Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs|Style=Form
|
||||
"du": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Case": "Nom",
|
||||
"Gender": "Com",
|
||||
}, # Case=Nom|Gender=Com|Number=Sing|Person=2|PronType=Prs
|
||||
"dig": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
"Gender": "Com",
|
||||
}, # Case=Acc|Gender=Com|Number=Sing|Person=2|PronType=Prs
|
||||
"din": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Com",
|
||||
}, # Gender=Com|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs
|
||||
"dit": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Neut",
|
||||
}, # Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs
|
||||
"han": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Case": "Nom",
|
||||
"Gender": "Com",
|
||||
}, # Case=Nom|Gender=Com|Number=Sing|Person=3|PronType=Prs
|
||||
"hun": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Case": "Nom",
|
||||
"Gender": "Com",
|
||||
}, # Case=Nom|Gender=Com|Number=Sing|Person=3|PronType=Prs
|
||||
"den": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Com",
|
||||
}, # Case=Acc|Gender=Com|Number=Sing|Person=3|PronType=Prs, See note above.
|
||||
"det": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
}, # Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs See note above.
|
||||
"ham": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
"Gender": "Com",
|
||||
}, # Case=Acc|Gender=Com|Number=Sing|Person=3|PronType=Prs
|
||||
"hende": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
"Gender": "Com",
|
||||
}, # Case=Acc|Gender=Com|Number=Sing|Person=3|PronType=Prs
|
||||
"sin": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Com",
|
||||
"Reflex": "Yes",
|
||||
}, # Gender=Com|Number=Sing|Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes
|
||||
"sit": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Neut",
|
||||
"Reflex": "Yes",
|
||||
}, # Gender=Neut|Number=Sing|Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes
|
||||
"vi": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Case": "Nom",
|
||||
"Gender": "Com",
|
||||
}, # Case=Nom|Gender=Com|Number=Plur|Person=1|PronType=Prs
|
||||
"os": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Case": "Acc",
|
||||
"Gender": "Com",
|
||||
}, # Case=Acc|Gender=Com|Number=Plur|Person=1|PronType=Prs
|
||||
"mine": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
}, # Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs
|
||||
"vore": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
}, # Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs|Style=Form
|
||||
"I": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Plur",
|
||||
"Case": "Nom",
|
||||
"Gender": "Com",
|
||||
}, # Case=Nom|Gender=Com|Number=Plur|Person=2|PronType=Prs
|
||||
"jer": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Plur",
|
||||
"Case": "Acc",
|
||||
"Gender": "Com",
|
||||
}, # Case=Acc|Gender=Com|Number=Plur|Person=2|PronType=Prs
|
||||
"dine": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
}, # Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs
|
||||
"de": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Case": "Nom",
|
||||
}, # Case=Nom|Number=Plur|Person=3|PronType=Prs
|
||||
"dem": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Case": "Acc",
|
||||
}, # Case=Acc|Number=Plur|Person=3|PronType=Prs
|
||||
"sine": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
}, # Number=Plur|Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes
|
||||
"vores": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Poss": "Yes",
|
||||
}, # Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs
|
||||
"De": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Case": "Nom",
|
||||
"Gender": "Com",
|
||||
}, # Case=Nom|Gender=Com|Person=2|Polite=Form|PronType=Prs
|
||||
"Dem": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Case": "Acc",
|
||||
"Gender": "Com",
|
||||
}, # Case=Acc|Gender=Com|Person=2|Polite=Form|PronType=Prs
|
||||
"Deres": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Poss": "Yes",
|
||||
}, # Person=2|Polite=Form|Poss=Yes|PronType=Prs
|
||||
"jeres": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Poss": "Yes",
|
||||
}, # Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs
|
||||
"sig": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Case": "Acc",
|
||||
"Reflex": "Yes",
|
||||
}, # Case=Acc|Person=3|PronType=Prs|Reflex=Yes
|
||||
"hans": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Poss": "Yes",
|
||||
}, # Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs
|
||||
"hendes": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Poss": "Yes",
|
||||
}, # Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs
|
||||
"dens": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Poss": "Yes",
|
||||
}, # Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs
|
||||
"dets": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Poss": "Yes",
|
||||
}, # Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs
|
||||
"deres": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Poss": "Yes",
|
||||
}, # Number[psor]=Plur|Person=3|Poss=Yes|PronType=Prs
|
||||
},
|
||||
"VERB": {
|
||||
"er": {LEMMA: "være", "VerbForm": "Fin", "Tense": "Pres"},
|
||||
"var": {LEMMA: "være", "VerbForm": "Fin", "Tense": "Past"},
|
||||
},
|
||||
}
|
||||
|
||||
for tag, rules in MORPH_RULES.items():
|
||||
for key, attrs in dict(rules).items():
|
||||
rules[key.title()] = attrs
|
|
@ -1,7 +1,6 @@
|
|||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
||||
from .stop_words import STOP_WORDS
|
||||
from .lex_attrs import LEX_ATTRS
|
||||
from .morph_rules import MORPH_RULES
|
||||
from .syntax_iterators import SYNTAX_ITERATORS
|
||||
|
||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||
|
@ -20,7 +19,6 @@ class EnglishDefaults(Language.Defaults):
|
|||
lex_attr_getters[LANG] = _return_en
|
||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||
stop_words = STOP_WORDS
|
||||
morph_rules = MORPH_RULES
|
||||
syntax_iterators = SYNTAX_ITERATORS
|
||||
single_orth_variants = [
|
||||
{"tags": ["NFP"], "variants": ["…", "..."]},
|
||||
|
|
|
@ -1,490 +0,0 @@
|
|||
from ...symbols import LEMMA, PRON_LEMMA
|
||||
|
||||
# Several entries here look pretty suspicious. These will get the POS SCONJ
|
||||
# given the tag IN, when an adpositional reading seems much more likely for
|
||||
# a lot of these prepositions. I'm not sure what I was running in 04395ffa4
|
||||
# when I did this? It doesn't seem right.
|
||||
_subordinating_conjunctions = [
|
||||
"that",
|
||||
"if",
|
||||
"as",
|
||||
"because",
|
||||
# "of",
|
||||
# "for",
|
||||
# "before",
|
||||
# "in",
|
||||
"while",
|
||||
# "after",
|
||||
"since",
|
||||
"like",
|
||||
# "with",
|
||||
"so",
|
||||
# "to",
|
||||
# "by",
|
||||
# "on",
|
||||
# "about",
|
||||
"than",
|
||||
"whether",
|
||||
"although",
|
||||
# "from",
|
||||
"though",
|
||||
# "until",
|
||||
"unless",
|
||||
"once",
|
||||
# "without",
|
||||
# "at",
|
||||
# "into",
|
||||
"cause",
|
||||
# "over",
|
||||
"upon",
|
||||
"till",
|
||||
"whereas",
|
||||
# "beyond",
|
||||
"whilst",
|
||||
"except",
|
||||
"despite",
|
||||
"wether",
|
||||
# "then",
|
||||
"but",
|
||||
"becuse",
|
||||
"whie",
|
||||
# "below",
|
||||
# "against",
|
||||
"it",
|
||||
"w/out",
|
||||
# "toward",
|
||||
"albeit",
|
||||
"save",
|
||||
"besides",
|
||||
"becouse",
|
||||
"coz",
|
||||
"til",
|
||||
"ask",
|
||||
"i'd",
|
||||
"out",
|
||||
"near",
|
||||
"seince",
|
||||
# "towards",
|
||||
"tho",
|
||||
"sice",
|
||||
"will",
|
||||
]
|
||||
|
||||
# This seems kind of wrong too?
|
||||
# _relative_pronouns = ["this", "that", "those", "these"]
|
||||
|
||||
MORPH_RULES = {
|
||||
# "DT": {word: {"POS": "PRON"} for word in _relative_pronouns},
|
||||
"IN": {word: {"POS": "SCONJ"} for word in _subordinating_conjunctions},
|
||||
"NN": {
|
||||
"something": {"POS": "PRON"},
|
||||
"anyone": {"POS": "PRON"},
|
||||
"anything": {"POS": "PRON"},
|
||||
"nothing": {"POS": "PRON"},
|
||||
"someone": {"POS": "PRON"},
|
||||
"everything": {"POS": "PRON"},
|
||||
"everyone": {"POS": "PRON"},
|
||||
"everybody": {"POS": "PRON"},
|
||||
"nobody": {"POS": "PRON"},
|
||||
"somebody": {"POS": "PRON"},
|
||||
"anybody": {"POS": "PRON"},
|
||||
"any1": {"POS": "PRON"},
|
||||
},
|
||||
"PRP": {
|
||||
"I": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"me": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"you": {LEMMA: PRON_LEMMA, "POS": "PRON", "PronType": "Prs", "Person": "Two"},
|
||||
"he": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"him": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"she": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Fem",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"her": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Fem",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"it": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
"we": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"us": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"they": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"them": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"mine": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"his": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"hers": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Fem",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"its": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"ours": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"yours": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"theirs": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"myself": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"yourself": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Case": "Acc",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"himself": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
"Gender": "Masc",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"herself": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
"Gender": "Fem",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"itself": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
"Gender": "Neut",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"themself": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"ourselves": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Case": "Acc",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"yourselves": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Case": "Acc",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"themselves": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"POS": "PRON",
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Case": "Acc",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
},
|
||||
"PRP$": {
|
||||
"my": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
},
|
||||
"your": {LEMMA: PRON_LEMMA, "Person": "Two", "PronType": "Prs", "Poss": "Yes"},
|
||||
"his": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
},
|
||||
"her": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Fem",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
},
|
||||
"its": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
},
|
||||
"our": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
},
|
||||
"their": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"PronType": "Prs",
|
||||
"Poss": "Yes",
|
||||
},
|
||||
},
|
||||
"RB": {word: {"POS": "PART"} for word in ["not", "n't", "nt", "n’t"]},
|
||||
"VB": {
|
||||
word: {"POS": "AUX"}
|
||||
for word in ["be", "have", "do", "get", "of", "am", "are", "'ve"]
|
||||
},
|
||||
"VBN": {"been": {LEMMA: "be", "POS": "AUX"}},
|
||||
"VBG": {"being": {LEMMA: "be", "POS": "AUX"}},
|
||||
"VBZ": {
|
||||
"am": {
|
||||
LEMMA: "be",
|
||||
"POS": "AUX",
|
||||
"VerbForm": "Fin",
|
||||
"Person": "One",
|
||||
"Tense": "Pres",
|
||||
"Mood": "Ind",
|
||||
},
|
||||
"are": {
|
||||
LEMMA: "be",
|
||||
"POS": "AUX",
|
||||
"VerbForm": "Fin",
|
||||
"Person": "Two",
|
||||
"Tense": "Pres",
|
||||
"Mood": "Ind",
|
||||
},
|
||||
"is": {
|
||||
LEMMA: "be",
|
||||
"POS": "AUX",
|
||||
"VerbForm": "Fin",
|
||||
"Person": "Three",
|
||||
"Tense": "Pres",
|
||||
"Mood": "Ind",
|
||||
},
|
||||
"'re": {
|
||||
LEMMA: "be",
|
||||
"POS": "AUX",
|
||||
"VerbForm": "Fin",
|
||||
"Person": "Two",
|
||||
"Tense": "Pres",
|
||||
"Mood": "Ind",
|
||||
},
|
||||
"'s": {
|
||||
LEMMA: "be",
|
||||
"POS": "AUX",
|
||||
"VerbForm": "Fin",
|
||||
"Person": "Three",
|
||||
"Tense": "Pres",
|
||||
"Mood": "Ind",
|
||||
},
|
||||
"has": {LEMMA: "have", "POS": "AUX"},
|
||||
"does": {LEMMA: "do", "POS": "AUX"},
|
||||
},
|
||||
"VBP": {
|
||||
"are": {
|
||||
LEMMA: "be",
|
||||
"POS": "AUX",
|
||||
"VerbForm": "Fin",
|
||||
"Tense": "Pres",
|
||||
"Mood": "Ind",
|
||||
},
|
||||
"'re": {
|
||||
LEMMA: "be",
|
||||
"POS": "AUX",
|
||||
"VerbForm": "Fin",
|
||||
"Tense": "Pres",
|
||||
"Mood": "Ind",
|
||||
},
|
||||
"am": {
|
||||
LEMMA: "be",
|
||||
"POS": "AUX",
|
||||
"VerbForm": "Fin",
|
||||
"Person": "One",
|
||||
"Tense": "Pres",
|
||||
"Mood": "Ind",
|
||||
},
|
||||
"do": {"POS": "AUX"},
|
||||
"have": {"POS": "AUX"},
|
||||
"'m": {"POS": "AUX", LEMMA: "be"},
|
||||
"'ve": {"POS": "AUX"},
|
||||
"'s": {"POS": "AUX"},
|
||||
"is": {"POS": "AUX"},
|
||||
"'d": {"POS": "AUX"},
|
||||
},
|
||||
"VBD": {
|
||||
"was": {
|
||||
LEMMA: "be",
|
||||
"POS": "AUX",
|
||||
"VerbForm": "Fin",
|
||||
"Tense": "Past",
|
||||
"Number": "Sing",
|
||||
},
|
||||
"were": {
|
||||
LEMMA: "be",
|
||||
"POS": "AUX",
|
||||
"VerbForm": "Fin",
|
||||
"Tense": "Past",
|
||||
"Number": "Plur",
|
||||
},
|
||||
"did": {LEMMA: "do", "POS": "AUX"},
|
||||
"had": {LEMMA: "have", "POS": "AUX"},
|
||||
"'d": {LEMMA: "have", "POS": "AUX"},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
for tag, rules in MORPH_RULES.items():
|
||||
for key, attrs in dict(rules).items():
|
||||
rules[key.title()] = attrs
|
|
@ -2,7 +2,6 @@ from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
|
|||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
||||
from .stop_words import STOP_WORDS
|
||||
from .lex_attrs import LEX_ATTRS
|
||||
from .morph_rules import MORPH_RULES
|
||||
|
||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||
from ..norm_exceptions import BASE_NORMS
|
||||
|
@ -31,7 +30,6 @@ class LithuanianDefaults(Language.Defaults):
|
|||
del mod_base_exceptions["8)"]
|
||||
tokenizer_exceptions = update_exc(mod_base_exceptions, TOKENIZER_EXCEPTIONS)
|
||||
stop_words = STOP_WORDS
|
||||
morph_rules = MORPH_RULES
|
||||
|
||||
|
||||
class Lithuanian(Language):
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,6 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
|||
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
|
||||
from .punctuation import TOKENIZER_SUFFIXES
|
||||
from .stop_words import STOP_WORDS
|
||||
from .morph_rules import MORPH_RULES
|
||||
from .syntax_iterators import SYNTAX_ITERATORS
|
||||
|
||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||
|
@ -23,7 +22,6 @@ class NorwegianDefaults(Language.Defaults):
|
|||
infixes = TOKENIZER_INFIXES
|
||||
suffixes = TOKENIZER_SUFFIXES
|
||||
stop_words = STOP_WORDS
|
||||
morph_rules = MORPH_RULES
|
||||
syntax_iterators = SYNTAX_ITERATORS
|
||||
|
||||
|
||||
|
|
|
@ -1,665 +0,0 @@
|
|||
from ...symbols import LEMMA, PRON_LEMMA
|
||||
|
||||
# This dict includes all the PRON and DET tag combinations found in the
|
||||
# dataset developed by Schibsted, Nasjonalbiblioteket and LTG (to be published
|
||||
# autumn 2018) and the rarely used polite form.
|
||||
|
||||
MORPH_RULES = {
|
||||
"PRON__Animacy=Anim|Case=Nom|Number=Sing|Person=1|PronType=Prs": {
|
||||
"jeg": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Case": "Nom",
|
||||
}
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Nom|Number=Sing|Person=2|PronType=Prs": {
|
||||
"du": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Case": "Nom",
|
||||
},
|
||||
# polite form, not sure about the tag
|
||||
"De": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Case": "Nom",
|
||||
"Polite": "Form",
|
||||
},
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {
|
||||
"hun": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Fem",
|
||||
"Case": "Nom",
|
||||
}
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {
|
||||
"han": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
"Case": "Nom",
|
||||
}
|
||||
},
|
||||
"PRON__Gender=Neut|Number=Sing|Person=3|PronType=Prs": {
|
||||
"det": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
"alt": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
"intet": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
"noe": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Number": "Sing",
|
||||
"Person": "Three",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Nom|Number=Plur|Person=1|PronType=Prs": {
|
||||
"vi": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Case": "Nom",
|
||||
}
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Nom|Number=Plur|Person=2|PronType=Prs": {
|
||||
"dere": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Plur",
|
||||
"Case": "Nom",
|
||||
}
|
||||
},
|
||||
"PRON__Case=Nom|Number=Plur|Person=3|PronType=Prs": {
|
||||
"de": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Case": "Nom",
|
||||
}
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Acc|Number=Sing|Person=1|PronType=Prs": {
|
||||
"meg": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
}
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Acc|Number=Sing|Person=2|PronType=Prs": {
|
||||
"deg": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
},
|
||||
# polite form, not sure about the tag
|
||||
"Dem": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
"Polite": "Form",
|
||||
},
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {
|
||||
"henne": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Fem",
|
||||
"Case": "Acc",
|
||||
}
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {
|
||||
"ham": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"han": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
"Case": "Acc",
|
||||
},
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Acc|Number=Plur|Person=1|PronType=Prs": {
|
||||
"oss": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Case": "Acc",
|
||||
}
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Acc|Number=Plur|Person=2|PronType=Prs": {
|
||||
"dere": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Plur",
|
||||
"Case": "Acc",
|
||||
}
|
||||
},
|
||||
"PRON__Case=Acc|Number=Plur|Person=3|PronType=Prs": {
|
||||
"dem": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Case": "Acc",
|
||||
}
|
||||
},
|
||||
"PRON__Case=Acc|Reflex=Yes": {
|
||||
"seg": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"Person": "Three",
|
||||
"Number": "Sing,Plur",
|
||||
"Reflex": "Yes",
|
||||
}
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Nom|Number=Sing|PronType=Prs": {
|
||||
"man": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Case": "Nom"}
|
||||
},
|
||||
"DET__Gender=Masc|Number=Sing|Poss=Yes": {
|
||||
"min": {
|
||||
LEMMA: "min",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Masc",
|
||||
},
|
||||
"din": {
|
||||
LEMMA: "din",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Masc",
|
||||
},
|
||||
"hennes": {
|
||||
LEMMA: "hennes",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Masc",
|
||||
},
|
||||
"hans": {
|
||||
LEMMA: "hans",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Masc",
|
||||
},
|
||||
"sin": {
|
||||
LEMMA: "sin",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Masc",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"vår": {
|
||||
LEMMA: "vår",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Masc",
|
||||
},
|
||||
"deres": {
|
||||
LEMMA: "deres",
|
||||
"Person": "Two,Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Masc",
|
||||
},
|
||||
# polite form, not sure about the tag
|
||||
"Deres": {
|
||||
LEMMA: "Deres",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Masc",
|
||||
"Polite": "Form",
|
||||
},
|
||||
},
|
||||
"DET__Gender=Fem|Number=Sing|Poss=Yes": {
|
||||
"mi": {
|
||||
LEMMA: "min",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Fem",
|
||||
},
|
||||
"di": {
|
||||
LEMMA: "din",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Fem",
|
||||
},
|
||||
"hennes": {
|
||||
LEMMA: "hennes",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Fem",
|
||||
},
|
||||
"hans": {
|
||||
LEMMA: "hans",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Fem",
|
||||
},
|
||||
"si": {
|
||||
LEMMA: "sin",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Fem",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"vår": {
|
||||
LEMMA: "vår",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Fem",
|
||||
},
|
||||
"deres": {
|
||||
LEMMA: "deres",
|
||||
"Person": "Two,Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Fem",
|
||||
},
|
||||
# polite form, not sure about the tag
|
||||
"Deres": {
|
||||
LEMMA: "Deres",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Fem",
|
||||
"Polite": "Form",
|
||||
},
|
||||
},
|
||||
"DET__Gender=Neut|Number=Sing|Poss=Yes": {
|
||||
"mitt": {
|
||||
LEMMA: "min",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
"ditt": {
|
||||
LEMMA: "din",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
"hennes": {
|
||||
LEMMA: "hennes",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
"hans": {
|
||||
LEMMA: "hans",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
"sitt": {
|
||||
LEMMA: "sin",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Neut",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"vårt": {
|
||||
LEMMA: "vår",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
"deres": {
|
||||
LEMMA: "deres",
|
||||
"Person": "Two,Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
# polite form, not sure about the tag
|
||||
"Deres": {
|
||||
LEMMA: "Deres",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Gender": "Neut",
|
||||
"Polite": "Form",
|
||||
},
|
||||
},
|
||||
"DET__Number=Plur|Poss=Yes": {
|
||||
"mine": {LEMMA: "min", "Person": "One", "Number": "Plur", "Poss": "Yes"},
|
||||
"dine": {LEMMA: "din", "Person": "Two", "Number": "Plur", "Poss": "Yes"},
|
||||
"hennes": {LEMMA: "hennes", "Person": "Three", "Number": "Plur", "Poss": "Yes"},
|
||||
"hans": {LEMMA: "hans", "Person": "Three", "Number": "Plur", "Poss": "Yes"},
|
||||
"sine": {
|
||||
LEMMA: "sin",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"våre": {LEMMA: "vår", "Person": "One", "Number": "Plur", "Poss": "Yes"},
|
||||
"deres": {
|
||||
LEMMA: "deres",
|
||||
"Person": "Two,Three",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
},
|
||||
},
|
||||
"PRON__Animacy=Anim|Number=Plur|PronType=Rcp": {
|
||||
"hverandre": {LEMMA: PRON_LEMMA, "PronType": "Rcp", "Number": "Plur"}
|
||||
},
|
||||
"DET__Number=Plur|Poss=Yes|PronType=Rcp": {
|
||||
"hverandres": {
|
||||
LEMMA: "hverandres",
|
||||
"PronType": "Rcp",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
}
|
||||
},
|
||||
"PRON___": {"som": {LEMMA: PRON_LEMMA}, "ikkenoe": {LEMMA: PRON_LEMMA}},
|
||||
"PRON__PronType=Int": {"hva": {LEMMA: PRON_LEMMA, "PronType": "Int"}},
|
||||
"PRON__Animacy=Anim|PronType=Int": {"hvem": {LEMMA: PRON_LEMMA, "PronType": "Int"}},
|
||||
"PRON__Animacy=Anim|Poss=Yes|PronType=Int": {
|
||||
"hvis": {LEMMA: PRON_LEMMA, "PronType": "Int", "Poss": "Yes"}
|
||||
},
|
||||
"PRON__Number=Plur|Person=3|PronType=Prs": {
|
||||
"noen": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Number": "Plur",
|
||||
"Person": "Three",
|
||||
},
|
||||
"ingen": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Number": "Plur",
|
||||
"Person": "Three",
|
||||
},
|
||||
"alle": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Number": "Plur",
|
||||
"Person": "Three",
|
||||
},
|
||||
},
|
||||
"PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs": {
|
||||
"noen": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Number": "Sing",
|
||||
"Person": "Three",
|
||||
"Gender": "Fem,Masc",
|
||||
},
|
||||
"den": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Number": "Sing",
|
||||
"Person": "Three",
|
||||
"Gender": "Fem,Masc",
|
||||
},
|
||||
"ingen": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Number": "Sing",
|
||||
"Person": "Three",
|
||||
"Gender": "Fem,Masc",
|
||||
"Polarity": "Neg",
|
||||
},
|
||||
},
|
||||
"PRON__Number=Sing": {"ingenting": {LEMMA: PRON_LEMMA, "Number": "Sing"}},
|
||||
"PRON__Animacy=Anim|Number=Sing|PronType=Prs": {
|
||||
"en": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing"}
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Gen,Nom|Number=Sing|PronType=Prs": {
|
||||
"ens": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Number": "Sing",
|
||||
"Case": "Gen,Nom",
|
||||
}
|
||||
},
|
||||
"PRON__Animacy=Anim|Case=Gen|Number=Sing|PronType=Prs": {
|
||||
"ens": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Case": "Gen"}
|
||||
},
|
||||
"DET__Case=Gen|Gender=Masc|Number=Sing": {
|
||||
"ens": {LEMMA: "en", "Number": "Sing", "Case": "Gen"}
|
||||
},
|
||||
"DET__Gender=Masc|Number=Sing": {
|
||||
"enhver": {LEMMA: "enhver", "Number": "Sing", "Gender": "Masc"},
|
||||
"all": {LEMMA: "all", "Number": "Sing", "Gender": "Masc"},
|
||||
"hver": {LEMMA: "hver", "Number": "Sing", "Gender": "Masc"},
|
||||
"noen": {LEMMA: "noen", "Gender": "Masc", "Number": "Sing"},
|
||||
"noe": {LEMMA: "noen", "Gender": "Masc", "Number": "Sing"},
|
||||
"en": {LEMMA: "en", "Number": "Sing", "Gender": "Neut"},
|
||||
"ingen": {LEMMA: "ingen", "Gender": "Masc", "Number": "Sing"},
|
||||
},
|
||||
"DET__Gender=Fem|Number=Sing": {
|
||||
"enhver": {LEMMA: "enhver", "Number": "Sing", "Gender": "Fem"},
|
||||
"all": {LEMMA: "all", "Number": "Sing", "Gender": "Fem"},
|
||||
"hver": {LEMMA: "hver", "Number": "Sing", "Gender": "Fem"},
|
||||
"noen": {LEMMA: "noen", "Gender": "Fem", "Number": "Sing"},
|
||||
"noe": {LEMMA: "noen", "Gender": "Fem", "Number": "Sing"},
|
||||
"ei": {LEMMA: "en", "Number": "Sing", "Gender": "Fem"},
|
||||
},
|
||||
"DET__Gender=Neut|Number=Sing": {
|
||||
"ethvert": {LEMMA: "enhver", "Number": "Sing", "Gender": "Neut"},
|
||||
"alt": {LEMMA: "all", "Number": "Sing", "Gender": "Neut"},
|
||||
"hvert": {LEMMA: "hver", "Number": "Sing", "Gender": "Neut"},
|
||||
"noe": {LEMMA: "noen", "Number": "Sing", "Gender": "Neut"},
|
||||
"intet": {LEMMA: "ingen", "Gender": "Neut", "Number": "Sing"},
|
||||
"et": {LEMMA: "en", "Number": "Sing", "Gender": "Neut"},
|
||||
},
|
||||
"DET__Gender=Neut|Number=Sing|PronType=Int": {
|
||||
"hvilket": {
|
||||
LEMMA: "hvilken",
|
||||
"PronType": "Int",
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
}
|
||||
},
|
||||
"DET__Gender=Fem|Number=Sing|PronType=Int": {
|
||||
"hvilken": {
|
||||
LEMMA: "hvilken",
|
||||
"PronType": "Int",
|
||||
"Number": "Sing",
|
||||
"Gender": "Fem",
|
||||
}
|
||||
},
|
||||
"DET__Gender=Masc|Number=Sing|PronType=Int": {
|
||||
"hvilken": {
|
||||
LEMMA: "hvilken",
|
||||
"PronType": "Int",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
}
|
||||
},
|
||||
"DET__Number=Plur|PronType=Int": {
|
||||
"hvilke": {LEMMA: "hvilken", "PronType": "Int", "Number": "Plur"}
|
||||
},
|
||||
"DET__Number=Plur": {
|
||||
"alle": {LEMMA: "all", "Number": "Plur"},
|
||||
"noen": {LEMMA: "noen", "Number": "Plur"},
|
||||
"egne": {LEMMA: "egen", "Number": "Plur"},
|
||||
"ingen": {LEMMA: "ingen", "Number": "Plur"},
|
||||
},
|
||||
"DET__Gender=Masc|Number=Sing|PronType=Dem": {
|
||||
"den": {LEMMA: "den", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"},
|
||||
"slik": {LEMMA: "slik", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"},
|
||||
"denne": {
|
||||
LEMMA: "denne",
|
||||
"PronType": "Dem",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
},
|
||||
},
|
||||
"DET__Gender=Fem|Number=Sing|PronType=Dem": {
|
||||
"den": {LEMMA: "den", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"},
|
||||
"slik": {LEMMA: "slik", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"},
|
||||
"denne": {LEMMA: "denne", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"},
|
||||
},
|
||||
"DET__Gender=Neut|Number=Sing|PronType=Dem": {
|
||||
"det": {LEMMA: "det", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"},
|
||||
"slikt": {LEMMA: "slik", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"},
|
||||
"dette": {
|
||||
LEMMA: "dette",
|
||||
"PronType": "Dem",
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
},
|
||||
"DET__Number=Plur|PronType=Dem": {
|
||||
"disse": {LEMMA: "disse", "PronType": "Dem", "Number": "Plur"},
|
||||
"andre": {LEMMA: "annen", "PronType": "Dem", "Number": "Plur"},
|
||||
"de": {LEMMA: "de", "PronType": "Dem", "Number": "Plur"},
|
||||
"slike": {LEMMA: "slik", "PronType": "Dem", "Number": "Plur"},
|
||||
},
|
||||
"DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem": {
|
||||
"annen": {LEMMA: "annen", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"}
|
||||
},
|
||||
"DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Dem": {
|
||||
"annen": {LEMMA: "annen", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"}
|
||||
},
|
||||
"DET__Definite=Ind|Gender=Neut|Number=Sing|PronType=Dem": {
|
||||
"annet": {LEMMA: "annen", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"}
|
||||
},
|
||||
"DET__Case=Gen|Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem": {
|
||||
"annens": {
|
||||
LEMMA: "annnen",
|
||||
"PronType": "Dem",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
"Case": "Gen",
|
||||
}
|
||||
},
|
||||
"DET__Case=Gen|Number=Plur|PronType=Dem": {
|
||||
"andres": {LEMMA: "annen", "PronType": "Dem", "Number": "Plur", "Case": "Gen"}
|
||||
},
|
||||
"DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Dem": {
|
||||
"dens": {
|
||||
LEMMA: "den",
|
||||
"PronType": "Dem",
|
||||
"Number": "Sing",
|
||||
"Gender": "Fem",
|
||||
"Case": "Gen",
|
||||
}
|
||||
},
|
||||
"DET__Case=Gen|Gender=Masc|Number=Sing|PronType=Dem": {
|
||||
"hvis": {
|
||||
LEMMA: "hvis",
|
||||
"PronType": "Dem",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
"Case": "Gen",
|
||||
},
|
||||
"dens": {
|
||||
LEMMA: "den",
|
||||
"PronType": "Dem",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
"Case": "Gen",
|
||||
},
|
||||
},
|
||||
"DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Dem": {
|
||||
"dets": {
|
||||
LEMMA: "det",
|
||||
"PronType": "Dem",
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
"Case": "Gen",
|
||||
}
|
||||
},
|
||||
"DET__Case=Gen|Number=Plur": {
|
||||
"alles": {LEMMA: "all", "Number": "Plur", "Case": "Gen"}
|
||||
},
|
||||
"DET__Definite=Def|Number=Sing|PronType=Dem": {
|
||||
"andre": {LEMMA: "annen", "Number": "Sing", "PronType": "Dem"}
|
||||
},
|
||||
"DET__Definite=Def|PronType=Dem": {
|
||||
"samme": {LEMMA: "samme", "PronType": "Dem"},
|
||||
"forrige": {LEMMA: "forrige", "PronType": "Dem"},
|
||||
"neste": {LEMMA: "neste", "PronType": "Dem"},
|
||||
},
|
||||
"DET__Definite=Def": {"selve": {LEMMA: "selve"}, "selveste": {LEMMA: "selveste"}},
|
||||
"DET___": {"selv": {LEMMA: "selv"}, "endel": {LEMMA: "endel"}},
|
||||
"DET__Definite=Ind|Gender=Fem|Number=Sing": {
|
||||
"egen": {LEMMA: "egen", "Gender": "Fem", "Number": "Sing"}
|
||||
},
|
||||
"DET__Definite=Ind|Gender=Masc|Number=Sing": {
|
||||
"egen": {LEMMA: "egen", "Gender": "Masc", "Number": "Sing"}
|
||||
},
|
||||
"DET__Definite=Ind|Gender=Neut|Number=Sing": {
|
||||
"eget": {LEMMA: "egen", "Gender": "Neut", "Number": "Sing"}
|
||||
},
|
||||
# same wordform and pos (verb), have to specify the exact features in order to not mix them up
|
||||
"VERB__Mood=Ind|Tense=Pres|VerbForm=Fin": {
|
||||
"så": {LEMMA: "så", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
|
||||
},
|
||||
"VERB__Mood=Ind|Tense=Past|VerbForm=Fin": {
|
||||
"så": {LEMMA: "se", "VerbForm": "Fin", "Tense": "Past", "Mood": "Ind"}
|
||||
},
|
||||
}
|
||||
|
||||
# copied from the English morph_rules.py
|
||||
for tag, rules in MORPH_RULES.items():
|
||||
for key, attrs in dict(rules).items():
|
||||
rules[key.title()] = attrs
|
|
@ -1,7 +1,6 @@
|
|||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
||||
from .stop_words import STOP_WORDS
|
||||
from .lex_attrs import LEX_ATTRS
|
||||
from .morph_rules import MORPH_RULES
|
||||
|
||||
# Punctuation stolen from Danish
|
||||
from ..da.punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
|
||||
|
@ -22,11 +21,9 @@ class SwedishDefaults(Language.Defaults):
|
|||
Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
|
||||
)
|
||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||
morph_rules = MORPH_RULES
|
||||
infixes = TOKENIZER_INFIXES
|
||||
suffixes = TOKENIZER_SUFFIXES
|
||||
stop_words = STOP_WORDS
|
||||
morph_rules = MORPH_RULES
|
||||
syntax_iterators = SYNTAX_ITERATORS
|
||||
|
||||
|
||||
|
|
|
@ -1,285 +0,0 @@
|
|||
from ...symbols import LEMMA, PRON_LEMMA
|
||||
|
||||
|
||||
# Used the table of pronouns at https://sv.wiktionary.org/wiki/deras
|
||||
MORPH_RULES = {
|
||||
"PRP": {
|
||||
"jag": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"mig": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"mej": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"du": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"han": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"honom": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Masc",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"hon": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Fem",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"henne": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Fem",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"det": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Sing",
|
||||
"Gender": "Neut",
|
||||
},
|
||||
"vi": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"oss": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"ni": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Plur",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"er": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Number": "Plur"},
|
||||
"de": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Case": "Nom",
|
||||
},
|
||||
"dom": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Case": "Nom,Acc",
|
||||
},
|
||||
"dem": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Case": "Acc",
|
||||
},
|
||||
"min": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"mitt": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"mina": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"din": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"ditt": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"dina": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"hans": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing,Plur",
|
||||
"Gender": "Masc",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"hennes": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing,Plur",
|
||||
"Gender": "Fem",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"dess": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Sing,Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"vår": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"våran": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"vårt": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"vårat": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"våra": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "One",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"eran": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"ert": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"erat": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"era": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Two",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
"deras": {
|
||||
LEMMA: PRON_LEMMA,
|
||||
"PronType": "Prs",
|
||||
"Person": "Three",
|
||||
"Number": "Plur",
|
||||
"Poss": "Yes",
|
||||
"Reflex": "Yes",
|
||||
},
|
||||
},
|
||||
"VBZ": {
|
||||
"är": {
|
||||
"VerbForm": "Fin",
|
||||
"Person": "One,Two,Three",
|
||||
"Tense": "Pres",
|
||||
"Mood": "Ind",
|
||||
}
|
||||
},
|
||||
"VBP": {"är": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}},
|
||||
"VBD": {
|
||||
"var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
|
||||
"vart": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"},
|
||||
},
|
||||
}
|
Loading…
Reference in New Issue
Block a user