Optimize Greek language support (#2658)

This commit is contained in:
Ioannis Daras 2018-08-14 03:31:32 +03:00 committed by Matthew Honnibal
parent 3953e967a0
commit fe94e696d3
13 changed files with 3115 additions and 2880 deletions

View File

@ -7,6 +7,8 @@ from .tag_map_general import TAG_MAP
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
from .lemmatizer import LEMMA_RULES, LEMMA_INDEX, LEMMA_EXC
from .lemmatizer.lemmatizer import GreekLemmatizer
from .syntax_iterators import SYNTAX_ITERATORS
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from .norm_exceptions import NORM_EXCEPTIONS
@ -20,15 +22,23 @@ class GreekDefaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters.update(LEX_ATTRS)
lex_attr_getters[LANG] = lambda text: 'el' # ISO code
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS, NORM_EXCEPTIONS)
lex_attr_getters[NORM] = add_lookups(
Language.Defaults.lex_attr_getters[NORM], BASE_NORMS, NORM_EXCEPTIONS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = STOP_WORDS
lemma_rules = LEMMA_RULES
lemma_index = LEMMA_INDEX
tag_map = TAG_MAP
prefixes = TOKENIZER_PREFIXES
suffixes = TOKENIZER_SUFFIXES
infixes = TOKENIZER_INFIXES
syntax_iterators = SYNTAX_ITERATORS
@classmethod
def create_lemmatizer(cls, nlp=None):
lemma_rules = LEMMA_RULES
lemma_index = LEMMA_INDEX
lemma_exc = LEMMA_EXC
return GreekLemmatizer(index=lemma_index, exceptions=lemma_exc,
rules=lemma_rules)
class Greek(Language):
@ -39,4 +49,3 @@ class Greek(Language):
# set default export this allows the language class to be lazy-loaded
__all__ = ['Greek']

View File

@ -9,11 +9,20 @@ Example sentences to test spaCy and its language models.
"""
sentences = [
"Η άνιση κατανομή του πλούτου και του εισοδήματος, η οποία έχει λάβει τρομερές διαστάσεις, δεν δείχνει τάσεις βελτίωσης.",
"Ο στόχος της σύντομης αυτής έκθεσης είναι να συνοψίσει τα κυριότερα συμπεράσματα των επισκοπήσεων κάθε μιας χώρας.",
"Μέχρι αργά χθες το βράδυ ο πλοιοκτήτης παρέμενε έξω από το γραφείο του γενικού γραμματέα του υπουργείου, ενώ είχε μόνον τηλεφωνική επικοινωνία με τον υπουργό.",
"Σύμφωνα με καλά ενημερωμένη πηγή, από την επεξεργασία του προέκυψε ότι οι δράστες της επίθεσης ήταν δύο, καθώς και ότι προσέγγισαν και αποχώρησαν από το σημείο με μοτοσικλέτα.",
'''Η άνιση κατανομή του πλούτου και του εισοδήματος, η οποία έχει λάβει
τρομερές διαστάσεις, δεν δείχνει τάσεις βελτίωσης.''',
'''Ο στόχος της σύντομης αυτής έκθεσης είναι να συνοψίσει τα κυριότερα
συμπεράσματα των επισκοπήσεων κάθε μιας χώρας.''',
'''Μέχρι αργά χθες το βράδυ ο πλοιοκτήτης παρέμενε έξω από το γραφείο του
γενικού γραμματέα του υπουργείου, ενώ είχε μόνον τηλεφωνική επικοινωνία με
τον υπουργό.''',
'''Σύμφωνα με καλά ενημερωμένη πηγή, από την επεξεργασία του προέκυψε ότι
οι δράστες της επίθεσης ήταν δύο, καθώς και ότι προσέγγισαν και αποχώρησαν
από το σημείο με μοτοσικλέτα.''',
"Η υποδομή καταλυμάτων στην Ελλάδα είναι πλήρης και ανανεώνεται συνεχώς.",
"Το επείγον ταχυδρομείο (ήτοι το παραδοτέο εντός 48 ωρών το πολύ) μπορεί να μεταφέρεται αεροπορικώς μόνον εφόσον εφαρμόζονται οι κανόνες ασφαλείας.",
"Στις ορεινές περιοχές του νησιού οι χιονοπτώσεις και οι παγετοί είναι περιορισμένοι ενώ στις παραθαλάσσιες περιοχές σημειώνονται σπανίως."
'''Το επείγον ταχυδρομείο (ήτοι το παραδοτέο εντός 48 ωρών το πολύ) μπορεί
να μεταφέρεται αεροπορικώς μόνον εφόσον εφαρμόζονται οι κανόνες
ασφαλείας''',
''''Στις ορεινές περιοχές του νησιού οι χιονοπτώσεις και οι παγετοί είναι
περιορισμένοι ενώ στις παραθαλάσσιες περιοχές σημειώνονται σπανίως.'''
]

View File

@ -5,19 +5,29 @@ from __future__ import unicode_literals
ADJECTIVES_IRREG = {
"χειρότερος": ("κακός",),
"χειρότερη": ("κακός",),
"χειρότερης": ("κακός",),
"χειρότερο": ("κακός",),
"χειρότεροι": ("κακός",),
"χειρότερων": ("κακός",),
"χειρότερου": ("κακός",),
"βέλτιστος": ("καλός",),
"βέλτιστη": ("καλός",),
"βέλτιστης": ("καλός",),
"βέλτιστο": ("καλός",),
"βέλτιστοι": ("καλός",),
"βέλτιστων": ("καλός",),
"βέλτιστου": ("καλός",),
"ελάχιστος": ("λίγος",),
"ελάχιστα": ("λίγος",),
"ελάχιστοι": ("λίγος",),
"ελάχιστων": ("λίγος",),
"ελάχιστη": ("λίγος",),
"ελάχιστης": ("λίγος",),
"ελάχιστο": ("λίγος",),
"ελάχιστου": ("λίγος",),
"πλείστος": ("πολύς",),
"πλείστου": ("πολύς",),
"πλείστων": ("πολύς",),
"πολλή": ("πολύ",),
"πολύς": ("πολύ",),
"πολλύ": ("πολύ",),

View File

@ -3,94 +3,148 @@ from __future__ import unicode_literals
ADJECTIVE_RULES = [
["οί","ός"], # καρδιακοί
["ές","ός"], # επιφανειακές
["ές","ος"], # καρδιακές
["ές","ύς"], # πολλές
["οι","ος"],
["αία","ος"], # ωραία
["ωδη","ες"], # δασώδη
["ώδη","ες"],
["ότερη","ός"],
["ότερος","ός"],
["ότεροι", "ός"],
["ότερων","ός"],
["ότερες", "ός"],
["οί", "ός"], # καρδιακοί -> καρδιακός. Ονομαστική πλ. σε -ός. (m)
["ών", "ός"], # καρδιακών -> καρδιακός. Γενική πλ. σε -ός. (m)
["ού", "ός"], # καρδιακού -> καρδιακός. Γενική εν. σε -ός. (m)
["ή", "ός"], # καρδιακή -> καρδιακός. Ονομαστική εν. σε -ή. (f)
["ής", "ός"], # καρδιακής -> καρδιακός. Γενική εν. σε -ή. (f)
["ές", "ός"], # καρδιακές -> καρδιακός. Ονομαστική πλ. σε -ή. (f)
["οι", "ος"], # ωραίοι -> ωραίος. Ονομαστική πλ. σε -ος. (m)
["ων", "ος"], # ωραίων -> ωραίος. Γενική πλ. σε -ος. (m)
["ου", "ος"], # ωραίου -> ωραίος. Γενική εν. σε -ος. (m)
["ο", "ος"], # ωραίο -> ωραίος. Ονομαστική εν. σε -ο. (n)
["α", "ος"], # χυδαία -> χυδαίος. Ονομαστική πλ. σε -ο. (n)
["ώδη", "ώδες"], # δασώδη -> δασώδες. Ονομαστική πλ. σε -ώδες. (n)
["ύτερη", "ός"], # καλύτερη -> καλός. Συγκριτικός βαθμός σε -ή. (f)
["ύτερης", "ός"], # καλύτερης -> καλός. (f)
["ύτερων", "ός"], # καλύτερων -> καλός. (f)
["ύτερος", "ός"], # καλύτερος -> καλός. Συγκριτικός βαθμός σε -ός. (m)
["ύτερου", "ός"], # καλύτερου -> καλός. (m)
]
# masculine -> m, feminine -> f, neuter -> n.
NOUN_RULES = [
["ιά","ί"], # παιδιά
["ια","ι"], # ποτήρια
["ες","α"], # κεραμίδες
["ές","ά"],
["ές","ά"],
["ες","α"], # εσπερινές
["ες","η"], # ζάχαρη
["ές","ή"], # φυλακές
["ές","ής"], # καθηγητής
["α","ο"], # πρόβατα
["α","α"], # ζήτημα
["ατα","α"], # στόματα
["άτα","άτα"], # ντομάτα
["άτες","άτα"], # πατάτες
["ία","ία"],
["ιά","ιά"],
["οί","ός"], # υπουργοί
["ίας","ία"], # δικτατορίας, δυσωδείας, τρομοκρατίας
["άτων","ατα"], # δικαιωμάτων
["ώπων","ωπος"], # ανθρώπων
["ιού", "ί"], # παιδιού -> παιδί. Γενική ενικού σε -ί. (n)
["ιά", "ί"], # παιδιά -> παιδί. Ονομαστική πληθυντικού σε -ί. (n)
["ιών", "ί"], # παιδιών -> παιδί. Γενική πληθυντικού σε -ί. (n)
["ηριού", "ήρι"], # ποτηριού -> ποτήρι. Γενική ενικού σε -ι. (n)
["ια", "ι"], # ποτήρια -> ποτήρι. Ονομαστική πληθυντικού σε -ι. (n)
["ηριών", "ήρι"], # ποτηριών -> ποτήρι. Γενική πληθυντικού σε -ι. (n)
["ας", "α"], # κεραμίδας -> κεραμίδα. Γενική ενικού σε -α. (f)
["ες", "α"], # κεραμίδες -> κεραμίδα. Ονομαστική πληθυντικού σε -α. (f)
["ων", "α"], # κεραμίδων -> κεραμίδα. Γενική πληθυντικού σε -α. (f)
["άς", "ά"], # βελανιδιάς -> βελανιδιά. Γενική ενικού σε -ά. (f)
["ές", "ά"], # βελανιδιές -> βελανιδιά. Ονομαστική πληθυντικού σε -ά. (f)
["ών", "ά"], # βελανιδιών -> βελανιδιά. Γενική πληθυντικού σε -ά. (f)
["ής", "ή"], # φυλακής -> φυλακή. Γενική ενικού σε -ή. (f)
["ές", "ή"], # φυλακές -> φυλακή. Ονομαστική πληθυντικού σε -ή. (f)
["ών", "ή"], # φυλακών -> φυλακή. Γενική πληθυντικού σε -ή. (f)
["ές", "ής"], # καθηγητές -> καθηγητής. Ονομαστική πληθυντικού σε -ής. (m)
["ών", "ής"], # καθηγητών -> καθηγητής. Γενική πληθυντικού σε -ής. (m)
["ου", "ο"], # προβάτου -> πρόβατο. Γενική ενικού σε -ο. (n)
["α", "ο"], # πρόβατα -> πρόβατο. Ονομαστική πληθυντικού σε -o. (n)
["ων", "ο"], # προβάτων -> πρόβατο. Γενική πληθυντικού σε -ο. (n)
["ητήματος", "ήτημα"], # ζητήματος -> ζήτημα. Γενική ενικού σε -α (n)
# ζητήματα -> ζήτημα. Ονομαστική πληθυντικού σε -α. (n)
["ητήματα", "ήτημα"],
# ζητημάτων -> ζήτημα. Γενική πληθυντικού σε -α. (n)
["ητημάτων", "ήτημα"],
["τος", ""], # στόματος -> στόμα. Γενική ενικού σε -α. (n)
["τα", "α"], # στόματα -> στόμα. Ονομαστική πληθυντικού σε -α. (n)
["ομάτων", "όμα"], # στομάτων -> στόμα. Γενική πληθυντικού σε -α. (n)
["ού", "ός"], # υπουργού -> υπουργός. Γενική ενικού σε -ος. (m)
["οί", "ός"], # υπουργοί -> υπουργούς. Ονομαστική πληυθυντικού σε -ος. (m)
["ών", "ός"], # υπουργών -> υπουργός. Γενική πληθυντικού σε -ος. (m)
["ς", ""], # δικτατορίας -> δικτατορία. Γενική ενικού σε -ας. (f)
# δικτατορίες -> δικτατορία. Ονομαστική πληθυντικού σε -ας. (f)
["ες", "α"],
["ιών", "ία"], # δικτατοριών -> δικτατορία. Γενική πληθυντικού σε -ας. (f)
["α", "ας"], # βασιλιά -> βασιλιάς. Γενική ενικού σε -άς. (m)
["δων", ""], # βασιλιάδων -> βασιλιά. Γενική πληθυντικού σε -άς. (m)
]
VERB_RULES = [
["εις", "ω"],
["εις","ώ"],
["ει","ω"],
["ει","ώ"],
["ουμε","ω"],
["ουμε","ώ"],
["ούμε","ώ"], # θεώρησα
["ούνε","ώ"], #
["ετε","ω"],
["ετε","ώ"],
["ουν","ω"],
["ουν","ώ"],
["είς","ώ"],
["εί","ώ"],
["ούν","ώ"],
["εσαι","ομαι"], #αισθάνεσαι
["εσαι","όμαι"],
["έσαι","ομαι"],
["έσαι","όμαι"],
["εται","ομαι"],
["εται","όμαι"],
["έται","ομαι"],
["έται","όμαι"],
["όμαστε","όμαι"],
["όμαστε","ομαι"],
["έσθε","όμαι"],
["εσθε","όμαι"],
["άς","ώ"], # αγαπάς
["άει","ώ"],
["άμε","ώ"],
["άτε","ώ"],
["άνε","ώ"],
["άν","ώ"],
["άμε","ώ"],
["άω","ώ"], # _verbs.py could contain any of the two
["ώ","άω"],
["όμουν", "ομαι"], # ζαλιζόμουν
["όμουν", "όμαι"],
["όμουν", "αμαι"], # κοιμόμουν
["όμουν", "αμαι"],
["ούσα", "ώ"], # ζητούσα -> ζητώ
["ούσες", "ώ"],
["ούσε", "ώ"],
["ούσαμε", "ώ"],
["ούσατε", "ώ"],
["ούσαν", "ώ"],
["ούσανε", "ώ"],
["εις", "ω"], # πάρεις -> πάρω. Ενεστώτας ρήματος σε -ω.
["ει", "ω"],
["ουμε", "ω"],
["ετε", "ω"],
["ουνε", "ω"],
["ουν", "ω"],
["είς", "ώ"], # πονείς -> πονώ. Ενεστώτας ρήματος σε -ώ vol1.
["εί", "ώ"], # οι κανόνες που λείπουν καλύπτονται από το αγαπώ.
["ούν", "ώ"],
["εσαι", "ομαι"], # αισθάνεσαι -> αισθάνομαι. Ενεστώτας ρήματος σε -ομαι.
["εται", "ομαι"],
["ανόμαστε", "άνομαι"],
["εστε", "ομαι"],
["ονται", "ομαι"],
["άς", "ώ"], # αγαπάς -> αγαπάω (ή αγαπώ). Ενεστώτας ρήματος σε -ώ vol2.
["άει", "ώ"],
["άμε", "ώ"],
["άτε", "ώ"],
["άνε", "ώ"],
["άν", "ώ"],
["άω", "ώ"],
["ώ", "άω"],
# ζαλιζόμουν -> ζαλίζομαι. Παρατατικός ρήματος -ίζομαι.
["ιζόμουν", "ίζομαι"],
["ιζόσουν", "ίζομαι"],
["ιζόταν", "ίζομαι"],
["ιζόμασταν", "ίζομαι"],
["ιζόσασταν", "ίζομαι"],
["ονταν", "ομαι"],
["όμουν", "άμαι"], # κοιμόμουν -> κοιμάμαι. Παρατατικός ρήματος σε -άμαι.
["όσουν", "άμαι"],
["όταν", "άμαι"],
["όμασταν", "άμαι"],
["όσασταν", "άμαι"],
["όντουσταν", "άμαι"],
["ούσα", "ώ"], # ζητούσα -> ζητώ. # Παρατατικός ρήματος σε -ώ.
["ούσες", "ώ"],
["ούσε", "ώ"],
["ούσαμε", "ώ"],
["ούσατε", "ώ"],
["ούσαν", "ώ"],
["ούσανε", "ώ"],
["λαμε", "ζω"], # βγάλαμε -> βγάζω. Αόριστος ρήματος σε -ω vol1.
["λατε", "ζω"],
["ήρα", "άρω"], # πήρα -> πάρω. Αόριστος ρήματος σε -ω vol2.
["ήρες", "άρω"],
["ήρε", "άρω"],
["ήραμε", "άρω"],
["ήρατε", "άρω"],
["ήρα", "άρω"],
["ένησα", "ενώ"], # φιλοξένησα -> φιλοξενώ. Αόριστος ρήματος σε -ώ vol1.
["ένησες", "ενώ"],
["ένησε", "ενώ"],
["ενήσαμε", "ενώ"],
["ένησατε", "ενώ"],
["ένησαν", "ενώ"],
["όνεσα", "ονώ"], # πόνεσα -> πονώ. Αόριστος ρήματος σε -ώ vol2.
["όνεσες", "ονώ"],
["όνεσε", "ονώ"],
["έσαμε", "ώ"],
["έσατε", "ώ"],
["ισα", "ομαι"], # κάθισα -> κάθομαι. Αόριστος ρήματος σε -ομαι.
["ισες", "ομαι"],
["ισε", "ομαι"],
["αθίσαμε", "άθομαι"],
["αθίσατε", "άθομαι"],
["ισαν", "ομαι"],
["άπα", "απώ"], # αγάπα -> αγαπώ. Προστακτική ρήματος σε -άω/ώ vol1.
["ά", "ώ"], # τιμά -> τιμώ. Προστακτική ρήματος σε άω/ώ vol2.
["οντας", "ω"], # βλέποντας -> βλέπω. Μετοχή.
["ξω", "ζω"], # παίξω -> παίζω. Μέλλοντας σε -ω.
["ξεις", "ζω"],
["ξουμε", "ζω"],
["ξετε", "ζω"],
["ξουν", "ζω"],
]

View File

@ -21,6 +21,8 @@ VERBS_IRREG = {
"είπατε": ("λέω",),
"είπαν": ("λέω",),
"είπανε": ("λέω",),
"πει": ("λέω"),
"πω": ("λέω"),
"πάω": ("πηγαίνω",),
"πάς": ("πηγαίνω",),
"πας": ("πηγαίνω",),
@ -38,7 +40,7 @@ VERBS_IRREG = {
"έπαιζα": ("παίζω",),
"έπαιζες": ("παίζω",),
"έπαιζε": ("παίζω",),
"έπαιζαν":("παίζω,",),
"έπαιζαν": ("παίζω,",),
"έπαιξα": ("παίζω",),
"έπαιξες": ("παίζω",),
"έπαιξε": ("παίζω",),
@ -52,6 +54,7 @@ VERBS_IRREG = {
"είχαμε": ("έχω",),
"είχατε": ("έχω",),
"είχαν": ("έχω",),
"είχανε": ("έχω",),
"έπαιρνα": ("παίρνω",),
"έπαιρνες": ("παίρνω",),
"έπαιρνε": ("παίρνω",),
@ -72,6 +75,12 @@ VERBS_IRREG = {
"έβλεπες": ("βλέπω",),
"έβλεπε": ("βλέπω",),
"έβλεπαν": ("βλέπω",),
"είδα": ("βλέπω",),
"είδες": ("βλέπω",),
"είδε": ("βλέπω",),
"είδαμε": ("βλέπω",),
"είδατε": ("βλέπω",),
"είδαν": ("βλέπω",),
"έφερνα": ("φέρνω",),
"έφερνες": ("φέρνω",),
"έφερνε": ("φέρνω",),
@ -122,6 +131,10 @@ VERBS_IRREG = {
"έπεφτες": ("πέφτω",),
"έπεφτε": ("πέφτω",),
"έπεφταν": ("πέφτω",),
"έπεσα": ("πέφτω",),
"έπεσες": ("πέφτω",),
"έπεσε": ("πέφτω",),
"έπεσαν": ("πέφτω",),
"έστειλα": ("στέλνω",),
"έστειλες": ("στέλνω",),
"έστειλε": ("στέλνω",),
@ -142,6 +155,12 @@ VERBS_IRREG = {
"έπινες": ("πίνω",),
"έπινε": ("πίνω",),
"έπιναν": ("πίνω",),
"ήπια": ("πίνω",),
"ήπιες": ("πίνω",),
"ήπιε": ("πίνω",),
"ήπιαμε": ("πίνω",),
"ήπιατε": ("πίνω",),
"ήπιαν": ("πίνω",),
"ετύχα": ("τυχαίνω",),
"ετύχες": ("τυχαίνω",),
"ετύχε": ("τυχαίνω",),
@ -159,4 +178,23 @@ VERBS_IRREG = {
"τρώγατε": ("τρώω",),
"τρώγανε": ("τρώω",),
"τρώγαν": ("τρώω",),
"πέρασα": ("περνώ",),
"πέρασες": ("περνώ",),
"πέρασε": ("περνώ",),
"πέρασαμε": ("περνώ",),
"πέρασατε": ("περνώ",),
"πέρασαν": ("περνώ",),
"έγδαρα": ("γδάρω",),
"έγδαρες": ("γδάρω",),
"έγδαρε": ("γδάρω",),
"έγδαραν": ("γδάρω",),
"έβγαλα": ("βγάλω",),
"έβγαλες": ("βγάλω",),
"έβγαλε": ("βγάλω",),
"έβγαλαν": ("βγάλω",),
"έφθασα": ("φτάνω",),
"έφθασες": ("φτάνω",),
"έφθασε": ("φτάνω",),
"έφθασαν": ("φτάνω",),
}

View File

@ -0,0 +1,69 @@
# coding: utf8
from __future__ import unicode_literals
from ....symbols import NOUN, VERB, ADJ, PUNCT
'''
Greek language lemmatizer applies the default rule based lemmatization
procedure with some modifications for better Greek language support.
The first modification is that it checks if the word for lemmatization is
already a lemma and if yes, it just returns it.
The second modification is about removing the base forms function which is
not applicable for Greek language.
'''
class GreekLemmatizer(object):
@classmethod
def load(cls, path, index=None, exc=None, rules=None, lookup=None):
return cls(index, exc, rules, lookup)
def __init__(self, index=None, exceptions=None, rules=None, lookup=None):
self.index = index
self.exc = exceptions
self.rules = rules
self.lookup_table = lookup if lookup is not None else {}
def __call__(self, string, univ_pos, morphology=None):
if not self.rules:
return [self.lookup_table.get(string, string)]
if univ_pos in (NOUN, 'NOUN', 'noun'):
univ_pos = 'noun'
elif univ_pos in (VERB, 'VERB', 'verb'):
univ_pos = 'verb'
elif univ_pos in (ADJ, 'ADJ', 'adj'):
univ_pos = 'adj'
elif univ_pos in (PUNCT, 'PUNCT', 'punct'):
univ_pos = 'punct'
else:
return list(set([string.lower()]))
lemmas = lemmatize(string, self.index.get(univ_pos, {}),
self.exc.get(univ_pos, {}),
self.rules.get(univ_pos, []))
return lemmas
def lemmatize(string, index, exceptions, rules):
string = string.lower()
forms = []
if (string in index):
forms.append(string)
return forms
forms.extend(exceptions.get(string, []))
oov_forms = []
if not forms:
for old, new in rules:
if string.endswith(old):
form = string[:len(string) - len(old)] + new
if not form:
pass
elif form in index or not form.isalpha():
forms.append(form)
else:
oov_forms.append(form)
if not forms:
forms.extend(oov_forms)
if not forms:
forms.append(string)
return list(set(forms))

View File

@ -4,14 +4,20 @@ from __future__ import unicode_literals
from ...attrs import LIKE_NUM
_num_words = ['μηδέν', 'ένας', 'δυο', 'δυό', 'τρεις', 'τέσσερις', 'πέντε', 'έξι', 'εφτά', 'επτά', 'οκτώ', 'οχτώ',
'εννιά', 'εννέα', 'δέκα', 'έντεκα', 'ένδεκα', 'δώδεκα', 'δεκατρείς', 'δεκατέσσερις', 'δεκαπέντε',
'δεκαέξι', 'δεκαεπτά', 'δεκαοχτώ', 'δεκαεννέα', 'δεκαεννεα', 'είκοσι', 'τριάντα', 'σαράντα', 'πενήντα',
'εξήντα', 'εβδομήντα', 'ογδόντα', 'ενενήντα', 'εκατό', 'διακόσιοι', 'διακόσοι', 'τριακόσιοι', 'τριακόσοι',
'τετρακόσιοι', 'τετρακόσοι', 'πεντακόσιοι', 'πεντακόσοι', 'εξακόσιοι', 'εξακόσοι', 'εφτακόσιοι',
'εφτακόσοι', 'επτακόσιοι', 'επτακόσοι', 'οχτακόσιοι', 'οχτακόσοι', 'οκτακόσιοι', 'οκτακόσοι',
'εννιακόσιοι', 'χίλιοι', 'χιλιάδα', 'εκατομμύριο', 'δισεκατομμύριο', 'τρισεκατομμύριο', 'τετράκις',
'πεντάκις', 'εξάκις', 'επτάκις', 'οκτάκις', 'εννεάκις', 'ένα', 'δύο', 'τρία', 'τέσσερα', 'δις', 'χιλιάδες']
_num_words = ['μηδέν', 'ένας', 'δυο', 'δυό', 'τρεις', 'τέσσερις', 'πέντε',
'έξι', 'εφτά', 'επτά', 'οκτώ', 'οχτώ',
'εννιά', 'εννέα', 'δέκα', 'έντεκα', 'ένδεκα', 'δώδεκα',
'δεκατρείς', 'δεκατέσσερις', 'δεκαπέντε', 'δεκαέξι', 'δεκαεπτά',
'δεκαοχτώ', 'δεκαεννέα', 'δεκαεννεα', 'είκοσι', 'τριάντα',
'σαράντα', 'πενήντα', 'εξήντα', 'εβδομήντα', 'ογδόντα',
'ενενήντα', 'εκατό', 'διακόσιοι', 'διακόσοι', 'τριακόσιοι',
'τριακόσοι', 'τετρακόσιοι', 'τετρακόσοι', 'πεντακόσιοι',
'πεντακόσοι', 'εξακόσιοι', 'εξακόσοι', 'εφτακόσιοι', 'εφτακόσοι',
'επτακόσιοι', 'επτακόσοι', 'οχτακόσιοι', 'οχτακόσοι',
'οκτακόσιοι', 'οκτακόσοι', 'εννιακόσιοι', 'χίλιοι', 'χιλιάδα',
'εκατομμύριο', 'δισεκατομμύριο', 'τρισεκατομμύριο', 'τετράκις',
'πεντάκις', 'εξάκις', 'επτάκις', 'οκτάκις', 'εννεάκις', 'ένα',
'δύο', 'τρία', 'τέσσερα', 'δις', 'χιλιάδες']
def like_num(text):

File diff suppressed because it is too large Load Diff

View File

@ -10,7 +10,11 @@ _units = ('km km² km³ m m² m³ dm dm² dm³ cm cm² cm³ mm mm² mm³ ha µm
'kg g mg µg t lb oz m/s km/h kmh mph hPa Pa mbar mb MB kb KB gb GB tb '
'TB T G M K км км² км³ м м² м³ дм дм² дм³ см см² см³ мм мм² мм³ нм '
'кг г мг м/с км/ч кПа Па мбар Кб КБ кб Мб МБ мб Гб ГБ гб Тб ТБ тб')
merge_chars = lambda char: char.strip().replace(' ', '|')
def merge_chars(char): return char.strip().replace(' ', '|')
UNITS = merge_chars(_units)
_prefixes = (['\'\'', '§', '%', '=', r'\+[0-9]+%', # 90%
@ -42,7 +46,8 @@ _suffixes = (LIST_PUNCT + LIST_ELLIPSES + LIST_QUOTES + LIST_ICONS +
r'(?<=[Α-Ωα-ωίϊΐόάέύϋΰήώ])\.',
r'^[Α-Ω]{1}\.',
r'\ [Α-Ω]{1}\.',
r'[ΈΆΊΑΌ-Ωα-ωίϊΐόάέύϋΰήώ]+([\-]([ΈΆΊΑΌ-Ωα-ωίϊΐόάέύϋΰήώ]+))+', # πρώτος-δεύτερος , πρώτος-δεύτερος-τρίτος
# πρώτος-δεύτερος , πρώτος-δεύτερος-τρίτος
r'[ΈΆΊΑΌ-Ωα-ωίϊΐόάέύϋΰήώ]+([\-]([ΈΆΊΑΌ-Ωα-ωίϊΐόάέύϋΰήώ]+))+',
r'([0-9]+)mg', # 13mg
r'([0-9]+)\.([0-9]+)m' # 1.2m
])
@ -53,7 +58,8 @@ _infixes = (LIST_ELLIPSES + LIST_ICONS +
r'([0-9])+(\.([0-9]+))*([\-]([0-9])+)+', # 10.9 , 10.9.9 , 10.9-6
r'([0-9])+[,]([0-9])+[\-]([0-9])+[,]([0-9])+', # 10,11,12
r'([0-9])+[ης]+([\-]([0-9])+)+', # 1ης-2
r'([0-9]){1,4}[\/]([0-9]){1,2}([\/]([0-9]){0,4}){0,1}', # 15/2 , 15/2/17 , 2017/2/15
# 15/2 , 15/2/17 , 2017/2/15
r'([0-9]){1,4}[\/]([0-9]){1,2}([\/]([0-9]){0,4}){0,1}',
r'[A-Za-z]+\@[A-Za-z]+(\-[A-Za-z]+)*\.[A-Za-z]+', # abc@cde-fgh.a
r'([a-zA-Z]+)(\-([a-zA-Z]+))+', # abc-abc
r'(?<=[{}])\.(?=[{}])'.format(ALPHA_LOWER, ALPHA_UPPER),

View File

@ -0,0 +1,61 @@
# coding: utf8
from __future__ import unicode_literals
from ...symbols import NOUN, PROPN, PRON
def noun_chunks(obj):
"""
Detect base noun phrases. Works on both Doc and Span.
"""
# it follows the logic of the noun chunks finder of English language,
# adjusted to some Greek language special characteristics.
# obj tag corrects some DEP tagger mistakes.
# Further improvement of the models will eliminate the need for this tag.
labels = ['nsubj', 'obj', 'iobj', 'appos', 'ROOT', 'obl']
doc = obj.doc # Ensure works on both Doc and Span.
np_deps = [doc.vocab.strings.add(label) for label in labels]
conj = doc.vocab.strings.add('conj')
nmod = doc.vocab.strings.add('nmod')
np_label = doc.vocab.strings.add('NP')
seen = set()
for i, word in enumerate(obj):
if word.pos not in (NOUN, PROPN, PRON):
continue
# Prevent nested chunks from being produced
if word.i in seen:
continue
if word.dep in np_deps:
if any(w.i in seen for w in word.subtree):
continue
flag = False
if (word.pos == NOUN):
# check for patterns such as γραμμή παραγωγής
for potential_nmod in word.rights:
if (potential_nmod.dep == nmod):
seen.update(j for j in range(
word.left_edge.i, potential_nmod.i + 1))
yield word.left_edge.i, potential_nmod.i + 1, np_label
flag = True
break
if (flag is False):
seen.update(j for j in range(word.left_edge.i, word.i + 1))
yield word.left_edge.i, word.i + 1, np_label
elif word.dep == conj:
# covers the case: έχει όμορφα και έξυπνα παιδιά
head = word.head
while head.dep == conj and head.head.i < head.i:
head = head.head
# If the head is an NP, and we're coordinated to it, we're an NP
if head.dep in np_deps:
if any(w.i in seen for w in word.subtree):
continue
seen.update(j for j in range(word.left_edge.i, word.i + 1))
yield word.left_edge.i, word.i + 1, np_label
SYNTAX_ITERATORS = {
'noun_chunks': noun_chunks
}

View File

@ -2,10 +2,10 @@
from __future__ import unicode_literals
from ...symbols import POS, PUNCT, SYM, ADJ, CCONJ, SCONJ, NUM, DET, ADV, ADP, X, VERB
from ...symbols import NOUN, PROPN, PART, INTJ,SPACE,PRON
from ...symbols import NOUN, PROPN, PART, INTJ, PRON
TAG_MAP = {
"ABBR": {POS: NOUN, "Abbr":"Yes"},
"ABBR": {POS: NOUN, "Abbr": "Yes"},
"AdXxBa": {POS: ADV, "Degree": ""},
"AdXxCp": {POS: ADV, "Degree": "Cmp"},
"AdXxSu": {POS: ADV, "Degree": "Sup"},
@ -112,38 +112,38 @@ TAG_MAP = {
"AsPpPaNeSgAc": {POS: ADP, "Gender": "Neut", "Number": "Sing", "Case": "Acc"},
"AsPpPaNeSgGe": {POS: ADP, "Gender": "Neut", "Number": "Sing", "Case": "Gen"},
"AsPpSp": {POS: ADP},
"AtDfFePlAc": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Plur", "Case": "Acc", "Other":{"Definite": "Def"}},
"AtDfFePlGe": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Plur", "Case": "Gen", "Other":{"Definite": "Def"}},
"AtDfFePlNm": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Plur", "Case": "Nom", "Other":{"Definite": "Def"}},
"AtDfFeSgAc": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Acc", "Other":{"Definite": "Def"}},
"AtDfFeSgDa": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Dat", "Other":{"Definite": "Def"}},
"AtDfFeSgGe": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Gen", "Other":{"Definite": "Def"}},
"AtDfFeSgNm": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Nom", "Other":{"Definite": "Def"}},
"AtDfMaPlAc": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Plur", "Case": "Acc", "Other":{"Definite": "Def"}},
"AtDfMaPlGe": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Plur", "Case": "Gen", "Other":{"Definite": "Def"}},
"AtDfMaPlNm": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Plur", "Case": "Nom", "Other":{"Definite": "Def"}},
"AtDfMaSgAc": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Acc", "Other":{"Definite": "Def"}},
"AtDfMaSgDa": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Dat", "Other":{"Definite": "Def"}},
"AtDfMaSgGe": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Gen", "Other":{"Definite": "Def"}},
"AtDfMaSgNm": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Nom", "Other":{"Definite": "Def"}},
"AtDfNePlAc": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Acc", "Other":{"Definite": "Def"}},
"AtDfNePlDa": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Dat", "Other":{"Definite": "Def"}},
"AtDfNePlGe": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Gen", "Other":{"Definite": "Def"}},
"AtDfNePlNm": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Nom", "Other":{"Definite": "Def"}},
"AtDfNeSgAc": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Acc", "Other":{"Definite": "Def"}},
"AtDfNeSgDa": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Dat", "Other":{"Definite": "Def"}},
"AtDfNeSgGe": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Gen", "Other":{"Definite": "Def"}},
"AtDfNeSgNm": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Nom", "Other":{"Definite": "Def"}},
"AtIdFeSgAc": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Acc", "Other":{"Definite": "Ind"}},
"AtIdFeSgDa": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Dat", "Other":{"Definite": "Ind"}},
"AtIdFeSgGe": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Gen", "Other":{"Definite": "Ind"}},
"AtIdFeSgNm": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Nom", "Other":{"Definite": "Ind"}},
"AtIdMaSgAc": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Acc", "Other":{"Definite": "Ind"}},
"AtIdMaSgGe": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Gen", "Other":{"Definite": "Ind"}},
"AtIdMaSgNm": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Nom", "Other":{"Definite": "Ind"}},
"AtIdNeSgAc": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Acc", "Other":{"Definite": "Ind"}},
"AtIdNeSgGe": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Gen", "Other":{"Definite": "Ind"}},
"AtIdNeSgNm": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Nom", "Other":{"Definite": "Ind"}},
"AtDfFePlAc": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Plur", "Case": "Acc", "Other": {"Definite": "Def"}},
"AtDfFePlGe": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Plur", "Case": "Gen", "Other": {"Definite": "Def"}},
"AtDfFePlNm": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Plur", "Case": "Nom", "Other": {"Definite": "Def"}},
"AtDfFeSgAc": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Acc", "Other": {"Definite": "Def"}},
"AtDfFeSgDa": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Dat", "Other": {"Definite": "Def"}},
"AtDfFeSgGe": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Gen", "Other": {"Definite": "Def"}},
"AtDfFeSgNm": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Nom", "Other": {"Definite": "Def"}},
"AtDfMaPlAc": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Plur", "Case": "Acc", "Other": {"Definite": "Def"}},
"AtDfMaPlGe": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Plur", "Case": "Gen", "Other": {"Definite": "Def"}},
"AtDfMaPlNm": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Plur", "Case": "Nom", "Other": {"Definite": "Def"}},
"AtDfMaSgAc": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Acc", "Other": {"Definite": "Def"}},
"AtDfMaSgDa": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Dat", "Other": {"Definite": "Def"}},
"AtDfMaSgGe": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Gen", "Other": {"Definite": "Def"}},
"AtDfMaSgNm": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Nom", "Other": {"Definite": "Def"}},
"AtDfNePlAc": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Acc", "Other": {"Definite": "Def"}},
"AtDfNePlDa": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Dat", "Other": {"Definite": "Def"}},
"AtDfNePlGe": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Gen", "Other": {"Definite": "Def"}},
"AtDfNePlNm": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Plur", "Case": "Nom", "Other": {"Definite": "Def"}},
"AtDfNeSgAc": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Acc", "Other": {"Definite": "Def"}},
"AtDfNeSgDa": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Dat", "Other": {"Definite": "Def"}},
"AtDfNeSgGe": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Gen", "Other": {"Definite": "Def"}},
"AtDfNeSgNm": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Nom", "Other": {"Definite": "Def"}},
"AtIdFeSgAc": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Acc", "Other": {"Definite": "Ind"}},
"AtIdFeSgDa": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Dat", "Other": {"Definite": "Ind"}},
"AtIdFeSgGe": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Gen", "Other": {"Definite": "Ind"}},
"AtIdFeSgNm": {POS: DET, "PronType": "Art", "Gender": "Fem", "Number": "Sing", "Case": "Nom", "Other": {"Definite": "Ind"}},
"AtIdMaSgAc": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Acc", "Other": {"Definite": "Ind"}},
"AtIdMaSgGe": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Gen", "Other": {"Definite": "Ind"}},
"AtIdMaSgNm": {POS: DET, "PronType": "Art", "Gender": "Masc", "Number": "Sing", "Case": "Nom", "Other": {"Definite": "Ind"}},
"AtIdNeSgAc": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Acc", "Other": {"Definite": "Ind"}},
"AtIdNeSgGe": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Gen", "Other": {"Definite": "Ind"}},
"AtIdNeSgNm": {POS: DET, "PronType": "Art", "Gender": "Neut", "Number": "Sing", "Case": "Nom", "Other": {"Definite": "Ind"}},
"CjCo": {POS: CCONJ},
"CjSb": {POS: SCONJ},
"CPUNCT": {POS: PUNCT},
@ -152,7 +152,7 @@ TAG_MAP = {
"ENUM": {POS: NUM},
"Ij": {POS: INTJ},
"INIT": {POS: SYM},
"NBABBR": {POS: NOUN, "Abbr":"Yes"},
"NBABBR": {POS: NOUN, "Abbr": "Yes"},
"NmAnFePlAcAj": {POS: NUM, "NumType": "Mult", "Gender": "Fem", "Number": "Plur", "Case": "Acc"},
"NmAnFePlGeAj": {POS: NUM, "NumType": "Mult", "Gender": "Fem", "Number": "Plur", "Case": "Gen"},
"NmAnFePlNmAj": {POS: NUM, "NumType": "Mult", "Gender": "Fem", "Number": "Plur", "Case": "Nom"},
@ -529,71 +529,70 @@ TAG_MAP = {
"VbMnIdPa03PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPa03PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPa03PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPa03PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPa03SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPa03SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPa03SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPa03SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr01PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr01PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr01SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr01SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr02PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr02PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr02SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr02SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr03PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr03PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr03SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr03SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx01PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx01PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx01SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx01SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx02PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx02PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx02SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx02SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx03PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx03PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx03SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx03SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02PlXxIpAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02PlXxIpPvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02PlXxPeAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02PlXxPePvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02SgXxIpAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02SgXxIpPvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02SgXxPeAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02SgXxPePvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx03SgXxIpPvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnNfXxXxXxXxPeAvXx": {POS: VERB, "VerbForm": "Inf", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnNfXxXxXxXxPePvXx": {POS: VERB, "VerbForm": "Inf", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnPpPrXxXxXxIpAvXx": {POS: VERB, "VerbForm": "Conv", "Mood": "", "Tense": "Pres", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnPpXxXxPlFePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Acc"},
"VbMnPpXxXxPlFePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Gen"},
"VbMnPpXxXxPlFePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom"},
"VbMnPpXxXxPlFePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Voc"},
"VbMnPpXxXxPlMaPePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Acc"},
"VbMnPpXxXxPlMaPePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Gen"},
"VbMnPpXxXxPlMaPePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom"},
"VbMnPpXxXxPlMaPePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Voc"},
"VbMnPpXxXxPlNePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Acc"},
"VbMnPpXxXxPlNePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Gen"},
"VbMnPpXxXxPlNePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom"},
"VbMnPpXxXxPlNePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Voc"},
"VbMnPpXxXxSgFePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Acc"},
"VbMnPpXxXxSgFePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Gen"},
"VbMnPpXxXxSgFePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom"},
"VbMnPpXxXxSgFePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf" , "Voice": "Pass", "Case": "Voc"},
"VbMnPpXxXxSgMaPePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Acc"},
"VbMnPpXxXxSgMaPePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Gen"},
"VbMnPpXxXxSgMaPePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom"},
"VbMnPpXxXxSgMaPePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf" , "Voice": "Pass", "Case": "Voc"},
"VbMnPpXxXxSgNePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Acc"},
"VbMnPpXxXxSgNePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Gen"},
"VbMnPpXxXxSgNePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Nom"},
"VbMnPpXxXxSgNePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf" , "Voice": "Pass", "Case": "Voc"},
"VbMnPpXxXxXxXxIpAvXx": {POS: VERB, "VerbForm": "Conv", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp" , "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"}
"VbMnIdPa03PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPa03SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPa03SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPa03SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPa03SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr01PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr01PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr01SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr01SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr02PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr02PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr02SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr02SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr03PlXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr03PlXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr03SgXxIpAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdPr03SgXxIpPvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx01PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx01PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx01SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx01SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "1", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx02PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx02PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx02SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx02SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx03PlXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx03PlXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx03SgXxPeAvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnIdXx03SgXxPePvXx": {POS: VERB, "VerbForm": "Fin", "Mood": "Ind", "Tense": "Pres|Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02PlXxIpAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02PlXxIpPvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02PlXxPeAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02PlXxPePvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02SgXxIpAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02SgXxIpPvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02SgXxPeAvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx02SgXxPePvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "2", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnMpXx03SgXxIpPvXx": {POS: VERB, "VerbForm": "", "Mood": "Imp", "Tense": "Pres|Past", "Person": "3", "Number": "Sing", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnNfXxXxXxXxPeAvXx": {POS: VERB, "VerbForm": "Inf", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnNfXxXxXxXxPePvXx": {POS: VERB, "VerbForm": "Inf", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnPpPrXxXxXxIpAvXx": {POS: VERB, "VerbForm": "Conv", "Mood": "", "Tense": "Pres", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"},
"VbMnPpXxXxPlFePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Acc"},
"VbMnPpXxXxPlFePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Gen"},
"VbMnPpXxXxPlFePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom"},
"VbMnPpXxXxPlFePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Voc"},
"VbMnPpXxXxPlMaPePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Acc"},
"VbMnPpXxXxPlMaPePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Gen"},
"VbMnPpXxXxPlMaPePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom"},
"VbMnPpXxXxPlMaPePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Voc"},
"VbMnPpXxXxPlNePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Acc"},
"VbMnPpXxXxPlNePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Gen"},
"VbMnPpXxXxPlNePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom"},
"VbMnPpXxXxPlNePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Plur", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Voc"},
"VbMnPpXxXxSgFePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Acc"},
"VbMnPpXxXxSgFePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Gen"},
"VbMnPpXxXxSgFePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom"},
"VbMnPpXxXxSgFePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Fem", "Aspect": "Perf", "Voice": "Pass", "Case": "Voc"},
"VbMnPpXxXxSgMaPePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Acc"},
"VbMnPpXxXxSgMaPePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Gen"},
"VbMnPpXxXxSgMaPePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom"},
"VbMnPpXxXxSgMaPePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Masc", "Aspect": "Perf", "Voice": "Pass", "Case": "Voc"},
"VbMnPpXxXxSgNePePvAc": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Acc"},
"VbMnPpXxXxSgNePePvGe": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Gen"},
"VbMnPpXxXxSgNePePvNm": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Nom"},
"VbMnPpXxXxSgNePePvVo": {POS: VERB, "VerbForm": "Part", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing", "Gender": "Neut", "Aspect": "Perf", "Voice": "Pass", "Case": "Voc"},
"VbMnPpXxXxXxXxIpAvXx": {POS: VERB, "VerbForm": "Conv", "Mood": "", "Tense": "Pres|Past", "Person": "1|2|3", "Number": "Sing|Plur", "Gender": "Masc|Fem|Neut", "Aspect": "Imp", "Voice": "Act", "Case": "Nom|Gen|Dat|Acc|Voc"}
}

View File

@ -1,27 +1,26 @@
from __future__ import unicode_literals
from ...symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ
from ...symbols import PUNCT, NUM, AUX, X, CONJ, ADJ, VERB, PART, SPACE, CCONJ
from ...symbols import PUNCT, NUM, AUX, X, ADJ, VERB, PART, SPACE, CCONJ
TAG_MAP = {
"ADJ": {POS: ADJ},
"ADV": {POS: ADV},
"INTJ": {POS: INTJ},
"NOUN": {POS: NOUN},
"PROPN": {POS: PROPN},
"VERB": {POS: VERB},
"ADP": {POS: ADP},
"CCONJ": {POS: CCONJ},
"SCONJ": {POS: SCONJ},
"PART": {POS: PART},
"PUNCT": {POS: PUNCT},
"SYM": {POS: SYM},
"NUM": {POS: NUM},
"PRON": {POS: PRON},
"AUX": {POS: AUX},
"SPACE": {POS: SPACE},
"DET": {POS: DET},
"X" : {POS: X}
"ADJ": {POS: ADJ},
"ADV": {POS: ADV},
"INTJ": {POS: INTJ},
"NOUN": {POS: NOUN},
"PROPN": {POS: PROPN},
"VERB": {POS: VERB},
"ADP": {POS: ADP},
"CCONJ": {POS: CCONJ},
"SCONJ": {POS: SCONJ},
"PART": {POS: PART},
"PUNCT": {POS: PUNCT},
"SYM": {POS: SYM},
"NUM": {POS: NUM},
"PRON": {POS: PRON},
"AUX": {POS: AUX},
"SPACE": {POS: SPACE},
"DET": {POS: DET},
"X": {POS: X}
}

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals
from ...symbols import ORTH, LEMMA, TAG, NORM, ADP, DET
from ...symbols import ORTH, LEMMA, NORM
_exc = {}