spaCy/spacy/lang/el/lemmatizer/_lemma_rules.py

103 lines
2.7 KiB
Python
Raw Normal View History

# coding: utf8
from __future__ import unicode_literals
ADJECTIVE_RULES = [
["οί","ός"], # καρδιακοί
["ές","ός"], # επιφανειακές
["ές","ος"], # καρδιακές
["ές","ύς"], # πολλές
["οι","ος"],
["αία","ος"], # ωραία
["ωδη","ες"], # δασώδη
["ώδη","ες"],
["ότερη","ός"],
["ότερος","ός"],
["ότεροι", "ός"],
["ότερων","ός"],
["ότερες", "ός"],
]
NOUN_RULES = [
["ιά","ί"], # παιδιά
["ια","ι"], # ποτήρια
["ες","α"], # κεραμίδες
["ές","ά"],
["ές","ά"],
["ες","α"], # εσπερινές
["ες","η"], # ζάχαρη
["ές","ή"], # φυλακές
["ές","ής"], # καθηγητής
["α","ο"], # πρόβατα
["α","α"], # ζήτημα
["ατα","α"], # στόματα
["άτα","άτα"], # ντομάτα
["άτες","άτα"], # πατάτες
["ία","ία"],
["ιά","ιά"],
["οί","ός"], # υπουργοί
["ίας","ία"], # δικτατορίας, δυσωδείας, τρομοκρατίας
["άτων","ατα"], # δικαιωμάτων
["ώπων","ωπος"], # ανθρώπων
]
VERB_RULES = [
["εις", "ω"],
["εις","ώ"],
["ει","ω"],
["ει","ώ"],
["ουμε","ω"],
["ουμε","ώ"],
["ούμε","ώ"], # θεώρησα
["ούνε","ώ"], #
["ετε","ω"],
["ετε","ώ"],
["ουν","ω"],
["ουν","ώ"],
["είς","ώ"],
["εί","ώ"],
["ούν","ώ"],
["εσαι","ομαι"], #αισθάνεσαι
["εσαι","όμαι"],
["έσαι","ομαι"],
["έσαι","όμαι"],
["εται","ομαι"],
["εται","όμαι"],
["έται","ομαι"],
["έται","όμαι"],
["όμαστε","όμαι"],
["όμαστε","ομαι"],
["έσθε","όμαι"],
["εσθε","όμαι"],
["άς","ώ"], # αγαπάς
["άει","ώ"],
["άμε","ώ"],
["άτε","ώ"],
["άνε","ώ"],
["άν","ώ"],
["άμε","ώ"],
["άω","ώ"], # _verbs.py could contain any of the two
["ώ","άω"],
["όμουν", "ομαι"], # ζαλιζόμουν
["όμουν", "όμαι"],
["όμουν", "αμαι"], # κοιμόμουν
["όμουν", "αμαι"],
["ούσα", "ώ"], # ζητούσα -> ζητώ
["ούσες", "ώ"],
["ούσε", "ώ"],
["ούσαμε", "ώ"],
["ούσατε", "ώ"],
["ούσαν", "ώ"],
["ούσανε", "ώ"],
]
PUNCT_RULES = [
["", "\""],
["", "\""],
["\u2018", "'"],
["\u2019", "'"]
]