mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Optimize Greek language support (#2658)
This commit is contained in:
parent
3953e967a0
commit
fe94e696d3
|
@ -7,6 +7,8 @@ from .tag_map_general import TAG_MAP
|
||||||
from .stop_words import STOP_WORDS
|
from .stop_words import STOP_WORDS
|
||||||
from .lex_attrs import LEX_ATTRS
|
from .lex_attrs import LEX_ATTRS
|
||||||
from .lemmatizer import LEMMA_RULES, LEMMA_INDEX, LEMMA_EXC
|
from .lemmatizer import LEMMA_RULES, LEMMA_INDEX, LEMMA_EXC
|
||||||
|
from .lemmatizer.lemmatizer import GreekLemmatizer
|
||||||
|
from .syntax_iterators import SYNTAX_ITERATORS
|
||||||
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
|
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
|
||||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||||
from .norm_exceptions import NORM_EXCEPTIONS
|
from .norm_exceptions import NORM_EXCEPTIONS
|
||||||
|
@ -20,15 +22,23 @@ class GreekDefaults(Language.Defaults):
|
||||||
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
||||||
lex_attr_getters.update(LEX_ATTRS)
|
lex_attr_getters.update(LEX_ATTRS)
|
||||||
lex_attr_getters[LANG] = lambda text: 'el' # ISO code
|
lex_attr_getters[LANG] = lambda text: 'el' # ISO code
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS, NORM_EXCEPTIONS)
|
lex_attr_getters[NORM] = add_lookups(
|
||||||
|
Language.Defaults.lex_attr_getters[NORM], BASE_NORMS, NORM_EXCEPTIONS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
stop_words = STOP_WORDS
|
stop_words = STOP_WORDS
|
||||||
lemma_rules = LEMMA_RULES
|
|
||||||
lemma_index = LEMMA_INDEX
|
|
||||||
tag_map = TAG_MAP
|
tag_map = TAG_MAP
|
||||||
prefixes = TOKENIZER_PREFIXES
|
prefixes = TOKENIZER_PREFIXES
|
||||||
suffixes = TOKENIZER_SUFFIXES
|
suffixes = TOKENIZER_SUFFIXES
|
||||||
infixes = TOKENIZER_INFIXES
|
infixes = TOKENIZER_INFIXES
|
||||||
|
syntax_iterators = SYNTAX_ITERATORS
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create_lemmatizer(cls, nlp=None):
|
||||||
|
lemma_rules = LEMMA_RULES
|
||||||
|
lemma_index = LEMMA_INDEX
|
||||||
|
lemma_exc = LEMMA_EXC
|
||||||
|
return GreekLemmatizer(index=lemma_index, exceptions=lemma_exc,
|
||||||
|
rules=lemma_rules)
|
||||||
|
|
||||||
|
|
||||||
class Greek(Language):
|
class Greek(Language):
|
||||||
|
@ -39,4 +49,3 @@ class Greek(Language):
|
||||||
|
|
||||||
# set default export – this allows the language class to be lazy-loaded
|
# set default export – this allows the language class to be lazy-loaded
|
||||||
__all__ = ['Greek']
|
__all__ = ['Greek']
|
||||||
|
|
||||||
|
|
|
@ -9,11 +9,20 @@ Example sentences to test spaCy and its language models.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
sentences = [
|
sentences = [
|
||||||
"Η άνιση κατανομή του πλούτου και του εισοδήματος, η οποία έχει λάβει τρομερές διαστάσεις, δεν δείχνει τάσεις βελτίωσης.",
|
'''Η άνιση κατανομή του πλούτου και του εισοδήματος, η οποία έχει λάβει
|
||||||
"Ο στόχος της σύντομης αυτής έκθεσης είναι να συνοψίσει τα κυριότερα συμπεράσματα των επισκοπήσεων κάθε μιας χώρας.",
|
τρομερές διαστάσεις, δεν δείχνει τάσεις βελτίωσης.''',
|
||||||
"Μέχρι αργά χθες το βράδυ ο πλοιοκτήτης παρέμενε έξω από το γραφείο του γενικού γραμματέα του υπουργείου, ενώ είχε μόνον τηλεφωνική επικοινωνία με τον υπουργό.",
|
'''Ο στόχος της σύντομης αυτής έκθεσης είναι να συνοψίσει τα κυριότερα
|
||||||
"Σύμφωνα με καλά ενημερωμένη πηγή, από την επεξεργασία του προέκυψε ότι οι δράστες της επίθεσης ήταν δύο, καθώς και ότι προσέγγισαν και αποχώρησαν από το σημείο με μοτοσικλέτα.",
|
συμπεράσματα των επισκοπήσεων κάθε μιας χώρας.''',
|
||||||
|
'''Μέχρι αργά χθες το βράδυ ο πλοιοκτήτης παρέμενε έξω από το γραφείο του
|
||||||
|
γενικού γραμματέα του υπουργείου, ενώ είχε μόνον τηλεφωνική επικοινωνία με
|
||||||
|
τον υπουργό.''',
|
||||||
|
'''Σύμφωνα με καλά ενημερωμένη πηγή, από την επεξεργασία του προέκυψε ότι
|
||||||
|
οι δράστες της επίθεσης ήταν δύο, καθώς και ότι προσέγγισαν και αποχώρησαν
|
||||||
|
από το σημείο με μοτοσικλέτα.''',
|
||||||
"Η υποδομή καταλυμάτων στην Ελλάδα είναι πλήρης και ανανεώνεται συνεχώς.",
|
"Η υποδομή καταλυμάτων στην Ελλάδα είναι πλήρης και ανανεώνεται συνεχώς.",
|
||||||
"Το επείγον ταχυδρομείο (ήτοι το παραδοτέο εντός 48 ωρών το πολύ) μπορεί να μεταφέρεται αεροπορικώς μόνον εφόσον εφαρμόζονται οι κανόνες ασφαλείας.",
|
'''Το επείγον ταχυδρομείο (ήτοι το παραδοτέο εντός 48 ωρών το πολύ) μπορεί
|
||||||
"Στις ορεινές περιοχές του νησιού οι χιονοπτώσεις και οι παγετοί είναι περιορισμένοι ενώ στις παραθαλάσσιες περιοχές σημειώνονται σπανίως."
|
να μεταφέρεται αεροπορικώς μόνον εφόσον εφαρμόζονται οι κανόνες
|
||||||
|
ασφαλείας''',
|
||||||
|
''''Στις ορεινές περιοχές του νησιού οι χιονοπτώσεις και οι παγετοί είναι
|
||||||
|
περιορισμένοι ενώ στις παραθαλάσσιες περιοχές σημειώνονται σπανίως.'''
|
||||||
]
|
]
|
||||||
|
|
|
@ -5,19 +5,29 @@ from __future__ import unicode_literals
|
||||||
ADJECTIVES_IRREG = {
|
ADJECTIVES_IRREG = {
|
||||||
"χειρότερος": ("κακός",),
|
"χειρότερος": ("κακός",),
|
||||||
"χειρότερη": ("κακός",),
|
"χειρότερη": ("κακός",),
|
||||||
|
"χειρότερης": ("κακός",),
|
||||||
"χειρότερο": ("κακός",),
|
"χειρότερο": ("κακός",),
|
||||||
|
"χειρότεροι": ("κακός",),
|
||||||
|
"χειρότερων": ("κακός",),
|
||||||
|
"χειρότερου": ("κακός",),
|
||||||
"βέλτιστος": ("καλός",),
|
"βέλτιστος": ("καλός",),
|
||||||
"βέλτιστη": ("καλός",),
|
"βέλτιστη": ("καλός",),
|
||||||
|
"βέλτιστης": ("καλός",),
|
||||||
"βέλτιστο": ("καλός",),
|
"βέλτιστο": ("καλός",),
|
||||||
"βέλτιστοι": ("καλός",),
|
"βέλτιστοι": ("καλός",),
|
||||||
"βέλτιστων": ("καλός",),
|
"βέλτιστων": ("καλός",),
|
||||||
|
"βέλτιστου": ("καλός",),
|
||||||
"ελάχιστος": ("λίγος",),
|
"ελάχιστος": ("λίγος",),
|
||||||
"ελάχιστα": ("λίγος",),
|
"ελάχιστα": ("λίγος",),
|
||||||
"ελάχιστοι": ("λίγος",),
|
"ελάχιστοι": ("λίγος",),
|
||||||
"ελάχιστων": ("λίγος",),
|
"ελάχιστων": ("λίγος",),
|
||||||
"ελάχιστη": ("λίγος",),
|
"ελάχιστη": ("λίγος",),
|
||||||
|
"ελάχιστης": ("λίγος",),
|
||||||
"ελάχιστο": ("λίγος",),
|
"ελάχιστο": ("λίγος",),
|
||||||
|
"ελάχιστου": ("λίγος",),
|
||||||
"πλείστος": ("πολύς",),
|
"πλείστος": ("πολύς",),
|
||||||
|
"πλείστου": ("πολύς",),
|
||||||
|
"πλείστων": ("πολύς",),
|
||||||
"πολλή": ("πολύ",),
|
"πολλή": ("πολύ",),
|
||||||
"πολύς": ("πολύ",),
|
"πολύς": ("πολύ",),
|
||||||
"πολλύ": ("πολύ",),
|
"πολλύ": ("πολύ",),
|
||||||
|
|
|
@ -3,94 +3,148 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
ADJECTIVE_RULES = [
|
ADJECTIVE_RULES = [
|
||||||
["οί","ός"], # καρδιακοί
|
["οί", "ός"], # καρδιακοί -> καρδιακός. Ονομαστική πλ. σε -ός. (m)
|
||||||
["ές","ός"], # επιφανειακές
|
["ών", "ός"], # καρδιακών -> καρδιακός. Γενική πλ. σε -ός. (m)
|
||||||
["ές","ος"], # καρδιακές
|
["ού", "ός"], # καρδιακού -> καρδιακός. Γενική εν. σε -ός. (m)
|
||||||
["ές","ύς"], # πολλές
|
["ή", "ός"], # καρδιακή -> καρδιακός. Ονομαστική εν. σε -ή. (f)
|
||||||
["οι","ος"],
|
["ής", "ός"], # καρδιακής -> καρδιακός. Γενική εν. σε -ή. (f)
|
||||||
["αία","ος"], # ωραία
|
["ές", "ός"], # καρδιακές -> καρδιακός. Ονομαστική πλ. σε -ή. (f)
|
||||||
["ωδη","ες"], # δασώδη
|
["οι", "ος"], # ωραίοι -> ωραίος. Ονομαστική πλ. σε -ος. (m)
|
||||||
["ώδη","ες"],
|
["ων", "ος"], # ωραίων -> ωραίος. Γενική πλ. σε -ος. (m)
|
||||||
["ότερη","ός"],
|
["ου", "ος"], # ωραίου -> ωραίος. Γενική εν. σε -ος. (m)
|
||||||
["ότερος","ός"],
|
["ο", "ος"], # ωραίο -> ωραίος. Ονομαστική εν. σε -ο. (n)
|
||||||
["ότεροι", "ός"],
|
["α", "ος"], # χυδαία -> χυδαίος. Ονομαστική πλ. σε -ο. (n)
|
||||||
["ότερων","ός"],
|
["ώδη", "ώδες"], # δασώδη -> δασώδες. Ονομαστική πλ. σε -ώδες. (n)
|
||||||
["ότερες", "ός"],
|
["ύτερη", "ός"], # καλύτερη -> καλός. Συγκριτικός βαθμός σε -ή. (f)
|
||||||
|
["ύτερης", "ός"], # καλύτερης -> καλός. (f)
|
||||||
|
["ύτερων", "ός"], # καλύτερων -> καλός. (f)
|
||||||
|
["ύτερος", "ός"], # καλύτερος -> καλός. Συγκριτικός βαθμός σε -ός. (m)
|
||||||
|
["ύτερου", "ός"], # καλύτερου -> καλός. (m)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# masculine -> m, feminine -> f, neuter -> n.
|
||||||
NOUN_RULES = [
|
NOUN_RULES = [
|
||||||
["ιά","ί"], # παιδιά
|
["ιού", "ί"], # παιδιού -> παιδί. Γενική ενικού σε -ί. (n)
|
||||||
["ια","ι"], # ποτήρια
|
["ιά", "ί"], # παιδιά -> παιδί. Ονομαστική πληθυντικού σε -ί. (n)
|
||||||
["ες","α"], # κεραμίδες
|
["ιών", "ί"], # παιδιών -> παιδί. Γενική πληθυντικού σε -ί. (n)
|
||||||
["ές","ά"],
|
["ηριού", "ήρι"], # ποτηριού -> ποτήρι. Γενική ενικού σε -ι. (n)
|
||||||
["ές","ά"],
|
["ια", "ι"], # ποτήρια -> ποτήρι. Ονομαστική πληθυντικού σε -ι. (n)
|
||||||
["ες","α"], # εσπερινές
|
["ηριών", "ήρι"], # ποτηριών -> ποτήρι. Γενική πληθυντικού σε -ι. (n)
|
||||||
["ες","η"], # ζάχαρη
|
["ας", "α"], # κεραμίδας -> κεραμίδα. Γενική ενικού σε -α. (f)
|
||||||
["ές","ή"], # φυλακές
|
["ες", "α"], # κεραμίδες -> κεραμίδα. Ονομαστική πληθυντικού σε -α. (f)
|
||||||
["ές","ής"], # καθηγητής
|
["ων", "α"], # κεραμίδων -> κεραμίδα. Γενική πληθυντικού σε -α. (f)
|
||||||
["α","ο"], # πρόβατα
|
["άς", "ά"], # βελανιδιάς -> βελανιδιά. Γενική ενικού σε -ά. (f)
|
||||||
["α","α"], # ζήτημα
|
["ές", "ά"], # βελανιδιές -> βελανιδιά. Ονομαστική πληθυντικού σε -ά. (f)
|
||||||
["ατα","α"], # στόματα
|
["ών", "ά"], # βελανιδιών -> βελανιδιά. Γενική πληθυντικού σε -ά. (f)
|
||||||
["άτα","άτα"], # ντομάτα
|
["ής", "ή"], # φυλακής -> φυλακή. Γενική ενικού σε -ή. (f)
|
||||||
["άτες","άτα"], # πατάτες
|
["ές", "ή"], # φυλακές -> φυλακή. Ονομαστική πληθυντικού σε -ή. (f)
|
||||||
["ία","ία"],
|
["ών", "ή"], # φυλακών -> φυλακή. Γενική πληθυντικού σε -ή. (f)
|
||||||
["ιά","ιά"],
|
["ές", "ής"], # καθηγητές -> καθηγητής. Ονομαστική πληθυντικού σε -ής. (m)
|
||||||
["οί","ός"], # υπουργοί
|
["ών", "ής"], # καθηγητών -> καθηγητής. Γενική πληθυντικού σε -ής. (m)
|
||||||
["ίας","ία"], # δικτατορίας, δυσωδείας, τρομοκρατίας
|
["ου", "ο"], # προβάτου -> πρόβατο. Γενική ενικού σε -ο. (n)
|
||||||
["άτων","ατα"], # δικαιωμάτων
|
["α", "ο"], # πρόβατα -> πρόβατο. Ονομαστική πληθυντικού σε -o. (n)
|
||||||
["ώπων","ωπος"], # ανθρώπων
|
["ων", "ο"], # προβάτων -> πρόβατο. Γενική πληθυντικού σε -ο. (n)
|
||||||
|
["ητήματος", "ήτημα"], # ζητήματος -> ζήτημα. Γενική ενικού σε -α (n)
|
||||||
|
# ζητήματα -> ζήτημα. Ονομαστική πληθυντικού σε -α. (n)
|
||||||
|
["ητήματα", "ήτημα"],
|
||||||
|
# ζητημάτων -> ζήτημα. Γενική πληθυντικού σε -α. (n)
|
||||||
|
["ητημάτων", "ήτημα"],
|
||||||
|
["τος", ""], # στόματος -> στόμα. Γενική ενικού σε -α. (n)
|
||||||
|
["τα", "α"], # στόματα -> στόμα. Ονομαστική πληθυντικού σε -α. (n)
|
||||||
|
["ομάτων", "όμα"], # στομάτων -> στόμα. Γενική πληθυντικού σε -α. (n)
|
||||||
|
["ού", "ός"], # υπουργού -> υπουργός. Γενική ενικού σε -ος. (m)
|
||||||
|
["οί", "ός"], # υπουργοί -> υπουργούς. Ονομαστική πληυθυντικού σε -ος. (m)
|
||||||
|
["ών", "ός"], # υπουργών -> υπουργός. Γενική πληθυντικού σε -ος. (m)
|
||||||
|
["ς", ""], # δικτατορίας -> δικτατορία. Γενική ενικού σε -ας. (f)
|
||||||
|
# δικτατορίες -> δικτατορία. Ονομαστική πληθυντικού σε -ας. (f)
|
||||||
|
["ες", "α"],
|
||||||
|
["ιών", "ία"], # δικτατοριών -> δικτατορία. Γενική πληθυντικού σε -ας. (f)
|
||||||
|
["α", "ας"], # βασιλιά -> βασιλιάς. Γενική ενικού σε -άς. (m)
|
||||||
|
["δων", ""], # βασιλιάδων -> βασιλιά. Γενική πληθυντικού σε -άς. (m)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
VERB_RULES = [
|
VERB_RULES = [
|
||||||
["εις", "ω"],
|
["εις", "ω"], # πάρεις -> πάρω. Ενεστώτας ρήματος σε -ω.
|
||||||
["εις","ώ"],
|
["ει", "ω"],
|
||||||
["ει","ω"],
|
["ουμε", "ω"],
|
||||||
["ει","ώ"],
|
["ετε", "ω"],
|
||||||
["ουμε","ω"],
|
["ουνε", "ω"],
|
||||||
["ουμε","ώ"],
|
["ουν", "ω"],
|
||||||
["ούμε","ώ"], # θεώρησα
|
["είς", "ώ"], # πονείς -> πονώ. Ενεστώτας ρήματος σε -ώ vol1.
|
||||||
["ούνε","ώ"], #
|
["εί", "ώ"], # οι κανόνες που λείπουν καλύπτονται από το αγαπώ.
|
||||||
["ετε","ω"],
|
["ούν", "ώ"],
|
||||||
["ετε","ώ"],
|
["εσαι", "ομαι"], # αισθάνεσαι -> αισθάνομαι. Ενεστώτας ρήματος σε -ομαι.
|
||||||
["ουν","ω"],
|
["εται", "ομαι"],
|
||||||
["ουν","ώ"],
|
["ανόμαστε", "άνομαι"],
|
||||||
["είς","ώ"],
|
["εστε", "ομαι"],
|
||||||
["εί","ώ"],
|
["ονται", "ομαι"],
|
||||||
["ούν","ώ"],
|
["άς", "ώ"], # αγαπάς -> αγαπάω (ή αγαπώ). Ενεστώτας ρήματος σε -ώ vol2.
|
||||||
["εσαι","ομαι"], #αισθάνεσαι
|
["άει", "ώ"],
|
||||||
["εσαι","όμαι"],
|
["άμε", "ώ"],
|
||||||
["έσαι","ομαι"],
|
["άτε", "ώ"],
|
||||||
["έσαι","όμαι"],
|
["άνε", "ώ"],
|
||||||
["εται","ομαι"],
|
["άν", "ώ"],
|
||||||
["εται","όμαι"],
|
["άω", "ώ"],
|
||||||
["έται","ομαι"],
|
["ώ", "άω"],
|
||||||
["έται","όμαι"],
|
# ζαλιζόμουν -> ζαλίζομαι. Παρατατικός ρήματος -ίζομαι.
|
||||||
["όμαστε","όμαι"],
|
["ιζόμουν", "ίζομαι"],
|
||||||
["όμαστε","ομαι"],
|
["ιζόσουν", "ίζομαι"],
|
||||||
["έσθε","όμαι"],
|
["ιζόταν", "ίζομαι"],
|
||||||
["εσθε","όμαι"],
|
["ιζόμασταν", "ίζομαι"],
|
||||||
["άς","ώ"], # αγαπάς
|
["ιζόσασταν", "ίζομαι"],
|
||||||
["άει","ώ"],
|
["ονταν", "ομαι"],
|
||||||
["άμε","ώ"],
|
["όμουν", "άμαι"], # κοιμόμουν -> κοιμάμαι. Παρατατικός ρήματος σε -άμαι.
|
||||||
["άτε","ώ"],
|
["όσουν", "άμαι"],
|
||||||
["άνε","ώ"],
|
["όταν", "άμαι"],
|
||||||
["άν","ώ"],
|
["όμασταν", "άμαι"],
|
||||||
["άμε","ώ"],
|
["όσασταν", "άμαι"],
|
||||||
["άω","ώ"], # _verbs.py could contain any of the two
|
["όντουσταν", "άμαι"],
|
||||||
["ώ","άω"],
|
["ούσα", "ώ"], # ζητούσα -> ζητώ. # Παρατατικός ρήματος σε -ώ.
|
||||||
["όμουν", "ομαι"], # ζαλιζόμουν
|
|
||||||
["όμουν", "όμαι"],
|
|
||||||
["όμουν", "αμαι"], # κοιμόμουν
|
|
||||||
["όμουν", "αμαι"],
|
|
||||||
["ούσα", "ώ"], # ζητούσα -> ζητώ
|
|
||||||
["ούσες", "ώ"],
|
["ούσες", "ώ"],
|
||||||
["ούσε", "ώ"],
|
["ούσε", "ώ"],
|
||||||
["ούσαμε", "ώ"],
|
["ούσαμε", "ώ"],
|
||||||
["ούσατε", "ώ"],
|
["ούσατε", "ώ"],
|
||||||
["ούσαν", "ώ"],
|
["ούσαν", "ώ"],
|
||||||
["ούσανε", "ώ"],
|
["ούσανε", "ώ"],
|
||||||
|
["λαμε", "ζω"], # βγάλαμε -> βγάζω. Αόριστος ρήματος σε -ω vol1.
|
||||||
|
["λατε", "ζω"],
|
||||||
|
["ήρα", "άρω"], # πήρα -> πάρω. Αόριστος ρήματος σε -ω vol2.
|
||||||
|
["ήρες", "άρω"],
|
||||||
|
["ήρε", "άρω"],
|
||||||
|
["ήραμε", "άρω"],
|
||||||
|
["ήρατε", "άρω"],
|
||||||
|
["ήρα", "άρω"],
|
||||||
|
["ένησα", "ενώ"], # φιλοξένησα -> φιλοξενώ. Αόριστος ρήματος σε -ώ vol1.
|
||||||
|
["ένησες", "ενώ"],
|
||||||
|
["ένησε", "ενώ"],
|
||||||
|
["ενήσαμε", "ενώ"],
|
||||||
|
["ένησατε", "ενώ"],
|
||||||
|
["ένησαν", "ενώ"],
|
||||||
|
["όνεσα", "ονώ"], # πόνεσα -> πονώ. Αόριστος ρήματος σε -ώ vol2.
|
||||||
|
["όνεσες", "ονώ"],
|
||||||
|
["όνεσε", "ονώ"],
|
||||||
|
["έσαμε", "ώ"],
|
||||||
|
["έσατε", "ώ"],
|
||||||
|
["ισα", "ομαι"], # κάθισα -> κάθομαι. Αόριστος ρήματος σε -ομαι.
|
||||||
|
["ισες", "ομαι"],
|
||||||
|
["ισε", "ομαι"],
|
||||||
|
["αθίσαμε", "άθομαι"],
|
||||||
|
["αθίσατε", "άθομαι"],
|
||||||
|
["ισαν", "ομαι"],
|
||||||
|
["άπα", "απώ"], # αγάπα -> αγαπώ. Προστακτική ρήματος σε -άω/ώ vol1.
|
||||||
|
["ά", "ώ"], # τιμά -> τιμώ. Προστακτική ρήματος σε άω/ώ vol2.
|
||||||
|
["οντας", "ω"], # βλέποντας -> βλέπω. Μετοχή.
|
||||||
|
["ξω", "ζω"], # παίξω -> παίζω. Μέλλοντας σε -ω.
|
||||||
|
["ξεις", "ζω"],
|
||||||
|
["ξουμε", "ζω"],
|
||||||
|
["ξετε", "ζω"],
|
||||||
|
["ξουν", "ζω"],
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,8 @@ VERBS_IRREG = {
|
||||||
"είπατε": ("λέω",),
|
"είπατε": ("λέω",),
|
||||||
"είπαν": ("λέω",),
|
"είπαν": ("λέω",),
|
||||||
"είπανε": ("λέω",),
|
"είπανε": ("λέω",),
|
||||||
|
"πει": ("λέω"),
|
||||||
|
"πω": ("λέω"),
|
||||||
"πάω": ("πηγαίνω",),
|
"πάω": ("πηγαίνω",),
|
||||||
"πάς": ("πηγαίνω",),
|
"πάς": ("πηγαίνω",),
|
||||||
"πας": ("πηγαίνω",),
|
"πας": ("πηγαίνω",),
|
||||||
|
@ -38,7 +40,7 @@ VERBS_IRREG = {
|
||||||
"έπαιζα": ("παίζω",),
|
"έπαιζα": ("παίζω",),
|
||||||
"έπαιζες": ("παίζω",),
|
"έπαιζες": ("παίζω",),
|
||||||
"έπαιζε": ("παίζω",),
|
"έπαιζε": ("παίζω",),
|
||||||
"έπαιζαν":("παίζω,",),
|
"έπαιζαν": ("παίζω,",),
|
||||||
"έπαιξα": ("παίζω",),
|
"έπαιξα": ("παίζω",),
|
||||||
"έπαιξες": ("παίζω",),
|
"έπαιξες": ("παίζω",),
|
||||||
"έπαιξε": ("παίζω",),
|
"έπαιξε": ("παίζω",),
|
||||||
|
@ -52,6 +54,7 @@ VERBS_IRREG = {
|
||||||
"είχαμε": ("έχω",),
|
"είχαμε": ("έχω",),
|
||||||
"είχατε": ("έχω",),
|
"είχατε": ("έχω",),
|
||||||
"είχαν": ("έχω",),
|
"είχαν": ("έχω",),
|
||||||
|
"είχανε": ("έχω",),
|
||||||
"έπαιρνα": ("παίρνω",),
|
"έπαιρνα": ("παίρνω",),
|
||||||
"έπαιρνες": ("παίρνω",),
|
"έπαιρνες": ("παίρνω",),
|
||||||
"έπαιρνε": ("παίρνω",),
|
"έπαιρνε": ("παίρνω",),
|
||||||
|
@ -72,6 +75,12 @@ VERBS_IRREG = {
|
||||||
"έβλεπες": ("βλέπω",),
|
"έβλεπες": ("βλέπω",),
|
||||||
"έβλεπε": ("βλέπω",),
|
"έβλεπε": ("βλέπω",),
|
||||||
"έβλεπαν": ("βλέπω",),
|
"έβλεπαν": ("βλέπω",),
|
||||||
|
"είδα": ("βλέπω",),
|
||||||
|
"είδες": ("βλέπω",),
|
||||||
|
"είδε": ("βλέπω",),
|
||||||
|
"είδαμε": ("βλέπω",),
|
||||||
|
"είδατε": ("βλέπω",),
|
||||||
|
"είδαν": ("βλέπω",),
|
||||||
"έφερνα": ("φέρνω",),
|
"έφερνα": ("φέρνω",),
|
||||||
"έφερνες": ("φέρνω",),
|
"έφερνες": ("φέρνω",),
|
||||||
"έφερνε": ("φέρνω",),
|
"έφερνε": ("φέρνω",),
|
||||||
|
@ -122,6 +131,10 @@ VERBS_IRREG = {
|
||||||
"έπεφτες": ("πέφτω",),
|
"έπεφτες": ("πέφτω",),
|
||||||
"έπεφτε": ("πέφτω",),
|
"έπεφτε": ("πέφτω",),
|
||||||
"έπεφταν": ("πέφτω",),
|
"έπεφταν": ("πέφτω",),
|
||||||
|
"έπεσα": ("πέφτω",),
|
||||||
|
"έπεσες": ("πέφτω",),
|
||||||
|
"έπεσε": ("πέφτω",),
|
||||||
|
"έπεσαν": ("πέφτω",),
|
||||||
"έστειλα": ("στέλνω",),
|
"έστειλα": ("στέλνω",),
|
||||||
"έστειλες": ("στέλνω",),
|
"έστειλες": ("στέλνω",),
|
||||||
"έστειλε": ("στέλνω",),
|
"έστειλε": ("στέλνω",),
|
||||||
|
@ -142,6 +155,12 @@ VERBS_IRREG = {
|
||||||
"έπινες": ("πίνω",),
|
"έπινες": ("πίνω",),
|
||||||
"έπινε": ("πίνω",),
|
"έπινε": ("πίνω",),
|
||||||
"έπιναν": ("πίνω",),
|
"έπιναν": ("πίνω",),
|
||||||
|
"ήπια": ("πίνω",),
|
||||||
|
"ήπιες": ("πίνω",),
|
||||||
|
"ήπιε": ("πίνω",),
|
||||||
|
"ήπιαμε": ("πίνω",),
|
||||||
|
"ήπιατε": ("πίνω",),
|
||||||
|
"ήπιαν": ("πίνω",),
|
||||||
"ετύχα": ("τυχαίνω",),
|
"ετύχα": ("τυχαίνω",),
|
||||||
"ετύχες": ("τυχαίνω",),
|
"ετύχες": ("τυχαίνω",),
|
||||||
"ετύχε": ("τυχαίνω",),
|
"ετύχε": ("τυχαίνω",),
|
||||||
|
@ -159,4 +178,23 @@ VERBS_IRREG = {
|
||||||
"τρώγατε": ("τρώω",),
|
"τρώγατε": ("τρώω",),
|
||||||
"τρώγανε": ("τρώω",),
|
"τρώγανε": ("τρώω",),
|
||||||
"τρώγαν": ("τρώω",),
|
"τρώγαν": ("τρώω",),
|
||||||
|
"πέρασα": ("περνώ",),
|
||||||
|
"πέρασες": ("περνώ",),
|
||||||
|
"πέρασε": ("περνώ",),
|
||||||
|
"πέρασαμε": ("περνώ",),
|
||||||
|
"πέρασατε": ("περνώ",),
|
||||||
|
"πέρασαν": ("περνώ",),
|
||||||
|
"έγδαρα": ("γδάρω",),
|
||||||
|
"έγδαρες": ("γδάρω",),
|
||||||
|
"έγδαρε": ("γδάρω",),
|
||||||
|
"έγδαραν": ("γδάρω",),
|
||||||
|
"έβγαλα": ("βγάλω",),
|
||||||
|
"έβγαλες": ("βγάλω",),
|
||||||
|
"έβγαλε": ("βγάλω",),
|
||||||
|
"έβγαλαν": ("βγάλω",),
|
||||||
|
"έφθασα": ("φτάνω",),
|
||||||
|
"έφθασες": ("φτάνω",),
|
||||||
|
"έφθασε": ("φτάνω",),
|
||||||
|
"έφθασαν": ("φτάνω",),
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
69
spacy/lang/el/lemmatizer/lemmatizer.py
Normal file
69
spacy/lang/el/lemmatizer/lemmatizer.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from ....symbols import NOUN, VERB, ADJ, PUNCT
|
||||||
|
|
||||||
|
'''
|
||||||
|
Greek language lemmatizer applies the default rule based lemmatization
|
||||||
|
procedure with some modifications for better Greek language support.
|
||||||
|
|
||||||
|
The first modification is that it checks if the word for lemmatization is
|
||||||
|
already a lemma and if yes, it just returns it.
|
||||||
|
The second modification is about removing the base forms function which is
|
||||||
|
not applicable for Greek language.
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
class GreekLemmatizer(object):
|
||||||
|
@classmethod
|
||||||
|
def load(cls, path, index=None, exc=None, rules=None, lookup=None):
|
||||||
|
return cls(index, exc, rules, lookup)
|
||||||
|
|
||||||
|
def __init__(self, index=None, exceptions=None, rules=None, lookup=None):
|
||||||
|
self.index = index
|
||||||
|
self.exc = exceptions
|
||||||
|
self.rules = rules
|
||||||
|
self.lookup_table = lookup if lookup is not None else {}
|
||||||
|
|
||||||
|
def __call__(self, string, univ_pos, morphology=None):
|
||||||
|
if not self.rules:
|
||||||
|
return [self.lookup_table.get(string, string)]
|
||||||
|
if univ_pos in (NOUN, 'NOUN', 'noun'):
|
||||||
|
univ_pos = 'noun'
|
||||||
|
elif univ_pos in (VERB, 'VERB', 'verb'):
|
||||||
|
univ_pos = 'verb'
|
||||||
|
elif univ_pos in (ADJ, 'ADJ', 'adj'):
|
||||||
|
univ_pos = 'adj'
|
||||||
|
elif univ_pos in (PUNCT, 'PUNCT', 'punct'):
|
||||||
|
univ_pos = 'punct'
|
||||||
|
else:
|
||||||
|
return list(set([string.lower()]))
|
||||||
|
lemmas = lemmatize(string, self.index.get(univ_pos, {}),
|
||||||
|
self.exc.get(univ_pos, {}),
|
||||||
|
self.rules.get(univ_pos, []))
|
||||||
|
return lemmas
|
||||||
|
|
||||||
|
|
||||||
|
def lemmatize(string, index, exceptions, rules):
|
||||||
|
string = string.lower()
|
||||||
|
forms = []
|
||||||
|
if (string in index):
|
||||||
|
forms.append(string)
|
||||||
|
return forms
|
||||||
|
forms.extend(exceptions.get(string, []))
|
||||||
|
oov_forms = []
|
||||||
|
if not forms:
|
||||||
|
for old, new in rules:
|
||||||
|
if string.endswith(old):
|
||||||
|
form = string[:len(string) - len(old)] + new
|
||||||
|
if not form:
|
||||||
|
pass
|
||||||
|
elif form in index or not form.isalpha():
|
||||||
|
forms.append(form)
|
||||||
|
else:
|
||||||
|
oov_forms.append(form)
|
||||||
|
if not forms:
|
||||||
|
forms.extend(oov_forms)
|
||||||
|
if not forms:
|
||||||
|
forms.append(string)
|
||||||
|
return list(set(forms))
|
|
@ -4,14 +4,20 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from ...attrs import LIKE_NUM
|
from ...attrs import LIKE_NUM
|
||||||
|
|
||||||
_num_words = ['μηδέν', 'ένας', 'δυο', 'δυό', 'τρεις', 'τέσσερις', 'πέντε', 'έξι', 'εφτά', 'επτά', 'οκτώ', 'οχτώ',
|
_num_words = ['μηδέν', 'ένας', 'δυο', 'δυό', 'τρεις', 'τέσσερις', 'πέντε',
|
||||||
'εννιά', 'εννέα', 'δέκα', 'έντεκα', 'ένδεκα', 'δώδεκα', 'δεκατρείς', 'δεκατέσσερις', 'δεκαπέντε',
|
'έξι', 'εφτά', 'επτά', 'οκτώ', 'οχτώ',
|
||||||
'δεκαέξι', 'δεκαεπτά', 'δεκαοχτώ', 'δεκαεννέα', 'δεκαεννεα', 'είκοσι', 'τριάντα', 'σαράντα', 'πενήντα',
|
'εννιά', 'εννέα', 'δέκα', 'έντεκα', 'ένδεκα', 'δώδεκα',
|
||||||
'εξήντα', 'εβδομήντα', 'ογδόντα', 'ενενήντα', 'εκατό', 'διακόσιοι', 'διακόσοι', 'τριακόσιοι', 'τριακόσοι',
|
'δεκατρείς', 'δεκατέσσερις', 'δεκαπέντε', 'δεκαέξι', 'δεκαεπτά',
|
||||||
'τετρακόσιοι', 'τετρακόσοι', 'πεντακόσιοι', 'πεντακόσοι', 'εξακόσιοι', 'εξακόσοι', 'εφτακόσιοι',
|
'δεκαοχτώ', 'δεκαεννέα', 'δεκαεννεα', 'είκοσι', 'τριάντα',
|
||||||
'εφτακόσοι', 'επτακόσιοι', 'επτακόσοι', 'οχτακόσιοι', 'οχτακόσοι', 'οκτακόσιοι', 'οκτακόσοι',
|
'σαράντα', 'πενήντα', 'εξήντα', 'εβδομήντα', 'ογδόντα',
|
||||||
'εννιακόσιοι', 'χίλιοι', 'χιλιάδα', 'εκατομμύριο', 'δισεκατομμύριο', 'τρισεκατομμύριο', 'τετράκις',
|
'ενενήντα', 'εκατό', 'διακόσιοι', 'διακόσοι', 'τριακόσιοι',
|
||||||
'πεντάκις', 'εξάκις', 'επτάκις', 'οκτάκις', 'εννεάκις', 'ένα', 'δύο', 'τρία', 'τέσσερα', 'δις', 'χιλιάδες']
|
'τριακόσοι', 'τετρακόσιοι', 'τετρακόσοι', 'πεντακόσιοι',
|
||||||
|
'πεντακόσοι', 'εξακόσιοι', 'εξακόσοι', 'εφτακόσιοι', 'εφτακόσοι',
|
||||||
|
'επτακόσιοι', 'επτακόσοι', 'οχτακόσιοι', 'οχτακόσοι',
|
||||||
|
'οκτακόσιοι', 'οκτακόσοι', 'εννιακόσιοι', 'χίλιοι', 'χιλιάδα',
|
||||||
|
'εκατομμύριο', 'δισεκατομμύριο', 'τρισεκατομμύριο', 'τετράκις',
|
||||||
|
'πεντάκις', 'εξάκις', 'επτάκις', 'οκτάκις', 'εννεάκις', 'ένα',
|
||||||
|
'δύο', 'τρία', 'τέσσερα', 'δις', 'χιλιάδες']
|
||||||
|
|
||||||
|
|
||||||
def like_num(text):
|
def like_num(text):
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -10,7 +10,11 @@ _units = ('km km² km³ m m² m³ dm dm² dm³ cm cm² cm³ mm mm² mm³ ha µm
|
||||||
'kg g mg µg t lb oz m/s km/h kmh mph hPa Pa mbar mb MB kb KB gb GB tb '
|
'kg g mg µg t lb oz m/s km/h kmh mph hPa Pa mbar mb MB kb KB gb GB tb '
|
||||||
'TB T G M K км км² км³ м м² м³ дм дм² дм³ см см² см³ мм мм² мм³ нм '
|
'TB T G M K км км² км³ м м² м³ дм дм² дм³ см см² см³ мм мм² мм³ нм '
|
||||||
'кг г мг м/с км/ч кПа Па мбар Кб КБ кб Мб МБ мб Гб ГБ гб Тб ТБ тб')
|
'кг г мг м/с км/ч кПа Па мбар Кб КБ кб Мб МБ мб Гб ГБ гб Тб ТБ тб')
|
||||||
merge_chars = lambda char: char.strip().replace(' ', '|')
|
|
||||||
|
|
||||||
|
def merge_chars(char): return char.strip().replace(' ', '|')
|
||||||
|
|
||||||
|
|
||||||
UNITS = merge_chars(_units)
|
UNITS = merge_chars(_units)
|
||||||
|
|
||||||
_prefixes = (['\'\'', '§', '%', '=', r'\+[0-9]+%', # 90%
|
_prefixes = (['\'\'', '§', '%', '=', r'\+[0-9]+%', # 90%
|
||||||
|
@ -42,7 +46,8 @@ _suffixes = (LIST_PUNCT + LIST_ELLIPSES + LIST_QUOTES + LIST_ICONS +
|
||||||
r'(?<=[Α-Ωα-ωίϊΐόάέύϋΰήώ])\.',
|
r'(?<=[Α-Ωα-ωίϊΐόάέύϋΰήώ])\.',
|
||||||
r'^[Α-Ω]{1}\.',
|
r'^[Α-Ω]{1}\.',
|
||||||
r'\ [Α-Ω]{1}\.',
|
r'\ [Α-Ω]{1}\.',
|
||||||
r'[ΈΆΊΑΌ-Ωα-ωίϊΐόάέύϋΰήώ]+([\-]([ΈΆΊΑΌ-Ωα-ωίϊΐόάέύϋΰήώ]+))+', # πρώτος-δεύτερος , πρώτος-δεύτερος-τρίτος
|
# πρώτος-δεύτερος , πρώτος-δεύτερος-τρίτος
|
||||||
|
r'[ΈΆΊΑΌ-Ωα-ωίϊΐόάέύϋΰήώ]+([\-]([ΈΆΊΑΌ-Ωα-ωίϊΐόάέύϋΰήώ]+))+',
|
||||||
r'([0-9]+)mg', # 13mg
|
r'([0-9]+)mg', # 13mg
|
||||||
r'([0-9]+)\.([0-9]+)m' # 1.2m
|
r'([0-9]+)\.([0-9]+)m' # 1.2m
|
||||||
])
|
])
|
||||||
|
@ -53,7 +58,8 @@ _infixes = (LIST_ELLIPSES + LIST_ICONS +
|
||||||
r'([0-9])+(\.([0-9]+))*([\-]([0-9])+)+', # 10.9 , 10.9.9 , 10.9-6
|
r'([0-9])+(\.([0-9]+))*([\-]([0-9])+)+', # 10.9 , 10.9.9 , 10.9-6
|
||||||
r'([0-9])+[,]([0-9])+[\-]([0-9])+[,]([0-9])+', # 10,11,12
|
r'([0-9])+[,]([0-9])+[\-]([0-9])+[,]([0-9])+', # 10,11,12
|
||||||
r'([0-9])+[ης]+([\-]([0-9])+)+', # 1ης-2
|
r'([0-9])+[ης]+([\-]([0-9])+)+', # 1ης-2
|
||||||
r'([0-9]){1,4}[\/]([0-9]){1,2}([\/]([0-9]){0,4}){0,1}', # 15/2 , 15/2/17 , 2017/2/15
|
# 15/2 , 15/2/17 , 2017/2/15
|
||||||
|
r'([0-9]){1,4}[\/]([0-9]){1,2}([\/]([0-9]){0,4}){0,1}',
|
||||||
r'[A-Za-z]+\@[A-Za-z]+(\-[A-Za-z]+)*\.[A-Za-z]+', # abc@cde-fgh.a
|
r'[A-Za-z]+\@[A-Za-z]+(\-[A-Za-z]+)*\.[A-Za-z]+', # abc@cde-fgh.a
|
||||||
r'([a-zA-Z]+)(\-([a-zA-Z]+))+', # abc-abc
|
r'([a-zA-Z]+)(\-([a-zA-Z]+))+', # abc-abc
|
||||||
r'(?<=[{}])\.(?=[{}])'.format(ALPHA_LOWER, ALPHA_UPPER),
|
r'(?<=[{}])\.(?=[{}])'.format(ALPHA_LOWER, ALPHA_UPPER),
|
||||||
|
|
61
spacy/lang/el/syntax_iterators.py
Normal file
61
spacy/lang/el/syntax_iterators.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from ...symbols import NOUN, PROPN, PRON
|
||||||
|
|
||||||
|
|
||||||
|
def noun_chunks(obj):
|
||||||
|
"""
|
||||||
|
Detect base noun phrases. Works on both Doc and Span.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# it follows the logic of the noun chunks finder of English language,
|
||||||
|
# adjusted to some Greek language special characteristics.
|
||||||
|
|
||||||
|
# obj tag corrects some DEP tagger mistakes.
|
||||||
|
# Further improvement of the models will eliminate the need for this tag.
|
||||||
|
labels = ['nsubj', 'obj', 'iobj', 'appos', 'ROOT', 'obl']
|
||||||
|
doc = obj.doc # Ensure works on both Doc and Span.
|
||||||
|
np_deps = [doc.vocab.strings.add(label) for label in labels]
|
||||||
|
conj = doc.vocab.strings.add('conj')
|
||||||
|
nmod = doc.vocab.strings.add('nmod')
|
||||||
|
np_label = doc.vocab.strings.add('NP')
|
||||||
|
seen = set()
|
||||||
|
for i, word in enumerate(obj):
|
||||||
|
if word.pos not in (NOUN, PROPN, PRON):
|
||||||
|
continue
|
||||||
|
# Prevent nested chunks from being produced
|
||||||
|
if word.i in seen:
|
||||||
|
continue
|
||||||
|
if word.dep in np_deps:
|
||||||
|
if any(w.i in seen for w in word.subtree):
|
||||||
|
continue
|
||||||
|
flag = False
|
||||||
|
if (word.pos == NOUN):
|
||||||
|
# check for patterns such as γραμμή παραγωγής
|
||||||
|
for potential_nmod in word.rights:
|
||||||
|
if (potential_nmod.dep == nmod):
|
||||||
|
seen.update(j for j in range(
|
||||||
|
word.left_edge.i, potential_nmod.i + 1))
|
||||||
|
yield word.left_edge.i, potential_nmod.i + 1, np_label
|
||||||
|
flag = True
|
||||||
|
break
|
||||||
|
if (flag is False):
|
||||||
|
seen.update(j for j in range(word.left_edge.i, word.i + 1))
|
||||||
|
yield word.left_edge.i, word.i + 1, np_label
|
||||||
|
elif word.dep == conj:
|
||||||
|
# covers the case: έχει όμορφα και έξυπνα παιδιά
|
||||||
|
head = word.head
|
||||||
|
while head.dep == conj and head.head.i < head.i:
|
||||||
|
head = head.head
|
||||||
|
# If the head is an NP, and we're coordinated to it, we're an NP
|
||||||
|
if head.dep in np_deps:
|
||||||
|
if any(w.i in seen for w in word.subtree):
|
||||||
|
continue
|
||||||
|
seen.update(j for j in range(word.left_edge.i, word.i + 1))
|
||||||
|
yield word.left_edge.i, word.i + 1, np_label
|
||||||
|
|
||||||
|
|
||||||
|
SYNTAX_ITERATORS = {
|
||||||
|
'noun_chunks': noun_chunks
|
||||||
|
}
|
|
@ -2,10 +2,10 @@
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from ...symbols import POS, PUNCT, SYM, ADJ, CCONJ, SCONJ, NUM, DET, ADV, ADP, X, VERB
|
from ...symbols import POS, PUNCT, SYM, ADJ, CCONJ, SCONJ, NUM, DET, ADV, ADP, X, VERB
|
||||||
from ...symbols import NOUN, PROPN, PART, INTJ,SPACE,PRON
|
from ...symbols import NOUN, PROPN, PART, INTJ, PRON
|
||||||
|
|
||||||
TAG_MAP = {
|
TAG_MAP = {
|
||||||
"ABBR": {POS: NOUN, "Abbr":"Yes"},
|
"ABBR": {POS: NOUN, "Abbr": "Yes"},
|
||||||
"AdXxBa": {POS: ADV, "Degree": ""},
|
"AdXxBa": {POS: ADV, "Degree": ""},
|
||||||
"AdXxCp": {POS: ADV, "Degree": "Cmp"},
|
"AdXxCp": {POS: ADV, "Degree": "Cmp"},
|
||||||
"AdXxSu": {POS: ADV, "Degree": "Sup"},
|
"AdXxSu": {POS: ADV, "Degree": "Sup"},
|
||||||
|
@ -112,38 +112,38 @@ TAG_MAP = {
|
||||||
"AsPpPaNeSgAc": {POS: ADP, "Gender": "Neut", "Number": "Sing", "Case": "Acc"},
|
"AsPpPaNeSgAc": {POS: ADP, "Gender": "Neut", "Number": "Sing", "Case": "Acc"},
|
||||||
"AsPpPaNeSgGe": {POS: ADP, "Gender": "Neut", "Number": "Sing", "Case": "Gen"},
|
"AsPpPaNeSgGe": {POS: ADP, "Gender": "Neut", "Number": "Sing", "Case": "Gen"},
|
||||||
"AsPpSp": {POS: ADP},
|
"AsPpSp": {POS: ADP},
|
||||||
"AtDfFePlAc": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Plur", "Case": "Acc", "Other":{"Definite": "Def"}},
|
"AtDfFePlAc": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Plur", "Case": "Acc", "Other": {"Definite": "Def"}},
|
||||||
"AtDfFePlGe": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Plur", "Case": "Gen", "Other":{"Definite": "Def"}},
|
"AtDfFePlGe": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Plur", "Case": "Gen", "Other": {"Definite": "Def"}},
|
||||||
"AtDfFePlNm": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Plur", "Case": "Nom", "Other":{"Definite": "Def"}},
|
"AtDfFePlNm": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Plur", "Case": "Nom", "Other": {"Definite": "Def"}},
|
||||||
"AtDfFeSgAc": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Acc", "Other":{"Definite": "Def"}},
|
"AtDfFeSgAc": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Acc", "Other": {"Definite": "Def"}},
|
||||||
"AtDfFeSgDa": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Dat", "Other":{"Definite": "Def"}},
|
"AtDfFeSgDa": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Dat", "Other": {"Definite": "Def"}},
|
||||||
"AtDfFeSgGe": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Gen", "Other":{"Definite": "Def"}},
|
"AtDfFeSgGe": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Gen", "Other": {"Definite": "Def"}},
|
||||||
"AtDfFeSgNm": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Nom", "Other":{"Definite": "Def"}},
|
"AtDfFeSgNm": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Nom", "Other": {"Definite": "Def"}},
|
||||||
"AtDfMaPlAc": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Plur", "Case": "Acc", "Other":{"Definite": "Def"}},
|
"AtDfMaPlAc": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Plur", "Case": "Acc", "Other": {"Definite": "Def"}},
|
||||||
"AtDfMaPlGe": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Plur", "Case": "Gen", "Other":{"Definite": "Def"}},
|
"AtDfMaPlGe": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Plur", "Case": "Gen", "Other": {"Definite": "Def"}},
|
||||||
"AtDfMaPlNm": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Plur", "Case": "Nom", "Other":{"Definite": "Def"}},
|
"AtDfMaPlNm": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Plur", "Case": "Nom", "Other": {"Definite": "Def"}},
|
||||||
"AtDfMaSgAc": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Acc", "Other":{"Definite": "Def"}},
|
"AtDfMaSgAc": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Acc", "Other": {"Definite": "Def"}},
|
||||||
"AtDfMaSgDa": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Dat", "Other":{"Definite": "Def"}},
|
"AtDfMaSgDa": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Dat", "Other": {"Definite": "Def"}},
|
||||||
"AtDfMaSgGe": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Gen", "Other":{"Definite": "Def"}},
|
"AtDfMaSgGe": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Gen", "Other": {"Definite": "Def"}},
|
||||||
"AtDfMaSgNm": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Nom", "Other":{"Definite": "Def"}},
|
"AtDfMaSgNm": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Nom", "Other": {"Definite": "Def"}},
|
||||||
"AtDfNePlAc": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Acc", "Other":{"Definite": "Def"}},
|
"AtDfNePlAc": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Acc", "Other": {"Definite": "Def"}},
|
||||||
"AtDfNePlDa": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Dat", "Other":{"Definite": "Def"}},
|
"AtDfNePlDa": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Dat", "Other": {"Definite": "Def"}},
|
||||||
"AtDfNePlGe": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Gen", "Other":{"Definite": "Def"}},
|
"AtDfNePlGe": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Gen", "Other": {"Definite": "Def"}},
|
||||||
"AtDfNePlNm": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Nom", "Other":{"Definite": "Def"}},
|
"AtDfNePlNm": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Nom", "Other": {"Definite": "Def"}},
|
||||||
"AtDfNeSgAc": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Acc", "Other":{"Definite": "Def"}},
|
"AtDfNeSgAc": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Acc", "Other": {"Definite": "Def"}},
|
||||||
"AtDfNeSgDa": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Dat", "Other":{"Definite": "Def"}},
|
"AtDfNeSgDa": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Dat", "Other": {"Definite": "Def"}},
|
||||||
"AtDfNeSgGe": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Gen", "Other":{"Definite": "Def"}},
|
"AtDfNeSgGe": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Gen", "Other": {"Definite": "Def"}},
|
||||||
"AtDfNeSgNm": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Nom", "Other":{"Definite": "Def"}},
|
"AtDfNeSgNm": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Nom", "Other": {"Definite": "Def"}},
|
||||||
"AtIdFeSgAc": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Acc", "Other":{"Definite": "Ind"}},
|
"AtIdFeSgAc": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Acc", "Other": {"Definite": "Ind"}},
|
||||||
"AtIdFeSgDa": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Dat", "Other":{"Definite": "Ind"}},
|
"AtIdFeSgDa": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Dat", "Other": {"Definite": "Ind"}},
|
||||||
"AtIdFeSgGe": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Gen", "Other":{"Definite": "Ind"}},
|
"AtIdFeSgGe": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Gen", "Other": {"Definite": "Ind"}},
|
||||||
"AtIdFeSgNm": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Nom", "Other":{"Definite": "Ind"}},
|
"AtIdFeSgNm": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Nom", "Other": {"Definite": "Ind"}},
|
||||||
"AtIdMaSgAc": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Acc", "Other":{"Definite": "Ind"}},
|
"AtIdMaSgAc": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Acc", "Other": {"Definite": "Ind"}},
|
||||||
"AtIdMaSgGe": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Gen", "Other":{"Definite": "Ind"}},
|
"AtIdMaSgGe": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Gen", "Other": {"Definite": "Ind"}},
|
||||||
"AtIdMaSgNm": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Nom", "Other":{"Definite": "Ind"}},
|
"AtIdMaSgNm": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Nom", "Other": {"Definite": "Ind"}},
|
||||||
"AtIdNeSgAc": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Acc", "Other":{"Definite": "Ind"}},
|
"AtIdNeSgAc": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Acc", "Other": {"Definite": "Ind"}},
|
||||||
"AtIdNeSgGe": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Gen", "Other":{"Definite": "Ind"}},
|
"AtIdNeSgGe": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Gen", "Other": {"Definite": "Ind"}},
|
||||||
"AtIdNeSgNm": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Nom", "Other":{"Definite": "Ind"}},
|
"AtIdNeSgNm": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Nom", "Other": {"Definite": "Ind"}},
|
||||||
"CjCo": {POS: CCONJ},
|
"CjCo": {POS: CCONJ},
|
||||||
"CjSb": {POS: SCONJ},
|
"CjSb": {POS: SCONJ},
|
||||||
"CPUNCT": {POS: PUNCT},
|
"CPUNCT": {POS: PUNCT},
|
||||||
|
@ -152,7 +152,7 @@ TAG_MAP = {
|
||||||
"ENUM": {POS: NUM},
|
"ENUM": {POS: NUM},
|
||||||
"Ij": {POS: INTJ},
|
"Ij": {POS: INTJ},
|
||||||
"INIT": {POS: SYM},
|
"INIT": {POS: SYM},
|
||||||
"NBABBR": {POS: NOUN, "Abbr":"Yes"},
|
"NBABBR": {POS: NOUN, "Abbr": "Yes"},
|
||||||
"NmAnFePlAcAj": {POS: NUM, "NumType": "Mult", "Gender": "Fem", "Number": "Plur", "Case": "Acc"},
|
"NmAnFePlAcAj": {POS: NUM, "NumType": "Mult", "Gender": "Fem", "Number": "Plur", "Case": "Acc"},
|
||||||
"NmAnFePlGeAj": {POS: NUM, "NumType": "Mult", "Gender": "Fem", "Number": "Plur", "Case": "Gen"},
|
"NmAnFePlGeAj": {POS: NUM, "NumType": "Mult", "Gender": "Fem", "Number": "Plur", "Case": "Gen"},
|
||||||
"NmAnFePlNmAj": {POS: NUM, "NumType": "Mult", "Gender": "Fem", "Number": "Plur", "Case": "Nom"},
|
"NmAnFePlNmAj": {POS: NUM, "NumType": "Mult", "Gender": "Fem", "Number": "Plur", "Case": "Nom"},
|
||||||
|
@ -529,71 +529,70 @@ TAG_MAP = {
|
||||||
"VbMnIdPa03PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPa03PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPa03PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPa03PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPa03PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPa03PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPa03PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPa03PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPa03SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPa03SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPa03SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPa03SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPa03SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPa03SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPa03SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPa03SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPr01PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPr01PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPr01PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPr01PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPr01SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPr01SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPr01SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPr01SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPr02PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPr02PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPr02PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPr02PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPr02SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPr02SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPr02SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPr02SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPr03PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPr03PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPr03PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPr03PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPr03SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPr03SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdPr03SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdPr03SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdXx01PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdXx01PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdXx01PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdXx01PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdXx01SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdXx01SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdXx01SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdXx01SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdXx02PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdXx02PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdXx02PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdXx02PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdXx02SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdXx02SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdXx02SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdXx02SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdXx03PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdXx03PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdXx03PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdXx03PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdXx03SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdXx03SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnIdXx03SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnIdXx03SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnMpXx02PlXxIpAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnMpXx02PlXxIpAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnMpXx02PlXxIpPvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnMpXx02PlXxIpPvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnMpXx02PlXxPeAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnMpXx02PlXxPeAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnMpXx02PlXxPePvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnMpXx02PlXxPePvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnMpXx02SgXxIpAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnMpXx02SgXxIpAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnMpXx02SgXxIpPvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnMpXx02SgXxIpPvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnMpXx02SgXxPeAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnMpXx02SgXxPeAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnMpXx02SgXxPePvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnMpXx02SgXxPePvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnMpXx03SgXxIpPvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnMpXx03SgXxIpPvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnNfXxXxXxXxPeAvXx": {POS: VERB, "VerbForm": "Inf", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnNfXxXxXxXxPeAvXx": {POS: VERB, "VerbForm": "Inf", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnNfXxXxXxXxPePvXx": {POS: VERB, "VerbForm": "Inf", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnNfXxXxXxXxPePvXx": {POS: VERB, "VerbForm": "Inf", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnPpPrXxXxXxIpAvXx": {POS: VERB, "VerbForm": "Conv", "Mood": "", "Tense": "Pres", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
"VbMnPpPrXxXxXxIpAvXx": {POS: VERB, "VerbForm": "Conv", "Mood": "", "Tense": "Pres", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
|
||||||
"VbMnPpXxXxPlFePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Acc"},
|
"VbMnPpXxXxPlFePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Acc"},
|
||||||
"VbMnPpXxXxPlFePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Gen"},
|
"VbMnPpXxXxPlFePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Gen"},
|
||||||
"VbMnPpXxXxPlFePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom"},
|
"VbMnPpXxXxPlFePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom"},
|
||||||
"VbMnPpXxXxPlFePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Voc"},
|
"VbMnPpXxXxPlFePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Voc"},
|
||||||
"VbMnPpXxXxPlMaPePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Acc"},
|
"VbMnPpXxXxPlMaPePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Acc"},
|
||||||
"VbMnPpXxXxPlMaPePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Gen"},
|
"VbMnPpXxXxPlMaPePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Gen"},
|
||||||
"VbMnPpXxXxPlMaPePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom"},
|
"VbMnPpXxXxPlMaPePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom"},
|
||||||
"VbMnPpXxXxPlMaPePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Voc"},
|
"VbMnPpXxXxPlMaPePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Voc"},
|
||||||
"VbMnPpXxXxPlNePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Acc"},
|
"VbMnPpXxXxPlNePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Acc"},
|
||||||
"VbMnPpXxXxPlNePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Gen"},
|
"VbMnPpXxXxPlNePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Gen"},
|
||||||
"VbMnPpXxXxPlNePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom"},
|
"VbMnPpXxXxPlNePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom"},
|
||||||
"VbMnPpXxXxPlNePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Voc"},
|
"VbMnPpXxXxPlNePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Voc"},
|
||||||
"VbMnPpXxXxSgFePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Acc"},
|
"VbMnPpXxXxSgFePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Acc"},
|
||||||
"VbMnPpXxXxSgFePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Gen"},
|
"VbMnPpXxXxSgFePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Gen"},
|
||||||
"VbMnPpXxXxSgFePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom"},
|
"VbMnPpXxXxSgFePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom"},
|
||||||
"VbMnPpXxXxSgFePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Voc"},
|
"VbMnPpXxXxSgFePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Voc"},
|
||||||
"VbMnPpXxXxSgMaPePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Acc"},
|
"VbMnPpXxXxSgMaPePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Acc"},
|
||||||
"VbMnPpXxXxSgMaPePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Gen"},
|
"VbMnPpXxXxSgMaPePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Gen"},
|
||||||
"VbMnPpXxXxSgMaPePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom"},
|
"VbMnPpXxXxSgMaPePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom"},
|
||||||
"VbMnPpXxXxSgMaPePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Voc"},
|
"VbMnPpXxXxSgMaPePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Voc"},
|
||||||
"VbMnPpXxXxSgNePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Acc"},
|
"VbMnPpXxXxSgNePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Acc"},
|
||||||
"VbMnPpXxXxSgNePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Gen"},
|
"VbMnPpXxXxSgNePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Gen"},
|
||||||
"VbMnPpXxXxSgNePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom"},
|
"VbMnPpXxXxSgNePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom"},
|
||||||
"VbMnPpXxXxSgNePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Voc"},
|
"VbMnPpXxXxSgNePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Voc"},
|
||||||
"VbMnPpXxXxXxXxIpAvXx": {POS: VERB, "VerbForm": "Conv", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"}
|
"VbMnPpXxXxXxXxIpAvXx": {POS: VERB, "VerbForm": "Conv", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,27 +1,26 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ...symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ
|
from ...symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ
|
||||||
from ...symbols import PUNCT, NUM, AUX, X, CONJ, ADJ, VERB, PART, SPACE, CCONJ
|
from ...symbols import PUNCT, NUM, AUX, X, ADJ, VERB, PART, SPACE, CCONJ
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
TAG_MAP = {
|
TAG_MAP = {
|
||||||
"ADJ": {POS: ADJ},
|
"ADJ": {POS: ADJ},
|
||||||
"ADV": {POS: ADV},
|
"ADV": {POS: ADV},
|
||||||
"INTJ": {POS: INTJ},
|
"INTJ": {POS: INTJ},
|
||||||
"NOUN": {POS: NOUN},
|
"NOUN": {POS: NOUN},
|
||||||
"PROPN": {POS: PROPN},
|
"PROPN": {POS: PROPN},
|
||||||
"VERB": {POS: VERB},
|
"VERB": {POS: VERB},
|
||||||
"ADP": {POS: ADP},
|
"ADP": {POS: ADP},
|
||||||
"CCONJ": {POS: CCONJ},
|
"CCONJ": {POS: CCONJ},
|
||||||
"SCONJ": {POS: SCONJ},
|
"SCONJ": {POS: SCONJ},
|
||||||
"PART": {POS: PART},
|
"PART": {POS: PART},
|
||||||
"PUNCT": {POS: PUNCT},
|
"PUNCT": {POS: PUNCT},
|
||||||
"SYM": {POS: SYM},
|
"SYM": {POS: SYM},
|
||||||
"NUM": {POS: NUM},
|
"NUM": {POS: NUM},
|
||||||
"PRON": {POS: PRON},
|
"PRON": {POS: PRON},
|
||||||
"AUX": {POS: AUX},
|
"AUX": {POS: AUX},
|
||||||
"SPACE": {POS: SPACE},
|
"SPACE": {POS: SPACE},
|
||||||
"DET": {POS: DET},
|
"DET": {POS: DET},
|
||||||
"X" : {POS: X}
|
"X": {POS: X}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ...symbols import ORTH, LEMMA, TAG, NORM, ADP, DET
|
from ...symbols import ORTH, LEMMA, NORM
|
||||||
|
|
||||||
_exc = {}
|
_exc = {}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user