Update Ukrainian lemmatizer with new lookups (#4359)

* Update Ukrainian lemmatizer with new lookups

* Add missing import


Co-authored-by: Ines Montani <ines@ines.io>
This commit is contained in:
adrianeboyd 2019-10-02 12:04:06 +02:00 committed by Ines Montani
parent b6670bf0c2
commit dda86118bd
2 changed files with 7 additions and 17 deletions

View File

@ -9,6 +9,7 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..norm_exceptions import BASE_NORMS from ..norm_exceptions import BASE_NORMS
from ...util import update_exc, add_lookups from ...util import update_exc, add_lookups
from ...language import Language from ...language import Language
from ...lookups import Lookups
from ...attrs import LANG, NORM from ...attrs import LANG, NORM
from .lemmatizer import UkrainianLemmatizer from .lemmatizer import UkrainianLemmatizer
@ -24,8 +25,10 @@ class UkrainianDefaults(Language.Defaults):
stop_words = STOP_WORDS stop_words = STOP_WORDS
@classmethod @classmethod
def create_lemmatizer(cls, nlp=None, **kwargs): def create_lemmatizer(cls, nlp=None, lookups=None):
return UkrainianLemmatizer() if lookups is None:
lookups = Lookups()
return UkrainianLemmatizer(lookups)
class Ukrainian(Language): class Ukrainian(Language):

View File

@ -6,8 +6,8 @@ from ...lemmatizer import Lemmatizer
class UkrainianLemmatizer(Lemmatizer): class UkrainianLemmatizer(Lemmatizer):
_morph = None _morph = None
def __init__(self): def __init__(self, lookups=None):
super(UkrainianLemmatizer, self).__init__() super(UkrainianLemmatizer, self).__init__(lookups)
try: try:
from pymorphy2 import MorphAnalyzer from pymorphy2 import MorphAnalyzer
@ -99,19 +99,6 @@ class UkrainianLemmatizer(Lemmatizer):
return symbols_to_str[univ_pos] return symbols_to_str[univ_pos]
return None return None
def is_base_form(self, univ_pos, morphology=None):
# TODO
raise NotImplementedError
def det(self, string, morphology=None):
return self(string, "det", morphology)
def num(self, string, morphology=None):
return self(string, "num", morphology)
def pron(self, string, morphology=None):
return self(string, "pron", morphology)
def lookup(self, string, orth=None): def lookup(self, string, orth=None):
analyses = self._morph.parse(string) analyses = self._morph.parse(string)
if len(analyses) == 1: if len(analyses) == 1: