Update Ukrainian lemmatizer with new lookups (#4359)

* Update Ukrainian lemmatizer with new lookups

* Add missing import


Co-authored-by: Ines Montani <ines@ines.io>
This commit is contained in:
adrianeboyd 2019-10-02 12:04:06 +02:00 committed by Ines Montani
parent b6670bf0c2
commit dda86118bd
2 changed files with 7 additions and 17 deletions

View File

@ -9,6 +9,7 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..norm_exceptions import BASE_NORMS
from ...util import update_exc, add_lookups
from ...language import Language
from ...lookups import Lookups
from ...attrs import LANG, NORM
from .lemmatizer import UkrainianLemmatizer
@ -24,8 +25,10 @@ class UkrainianDefaults(Language.Defaults):
stop_words = STOP_WORDS
@classmethod
def create_lemmatizer(cls, nlp=None, **kwargs):
return UkrainianLemmatizer()
def create_lemmatizer(cls, nlp=None, lookups=None):
if lookups is None:
lookups = Lookups()
return UkrainianLemmatizer(lookups)
class Ukrainian(Language):

View File

@ -6,8 +6,8 @@ from ...lemmatizer import Lemmatizer
class UkrainianLemmatizer(Lemmatizer):
_morph = None
def __init__(self):
super(UkrainianLemmatizer, self).__init__()
def __init__(self, lookups=None):
super(UkrainianLemmatizer, self).__init__(lookups)
try:
from pymorphy2 import MorphAnalyzer
@ -99,19 +99,6 @@ class UkrainianLemmatizer(Lemmatizer):
return symbols_to_str[univ_pos]
return None
def is_base_form(self, univ_pos, morphology=None):
# TODO
raise NotImplementedError
def det(self, string, morphology=None):
return self(string, "det", morphology)
def num(self, string, morphology=None):
return self(string, "num", morphology)
def pron(self, string, morphology=None):
return self(string, "pron", morphology)
def lookup(self, string, orth=None):
analyses = self._morph.parse(string)
if len(analyses) == 1: