mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Update Ukrainian lemmatizer with new lookups (#4359)
* Update Ukrainian lemmatizer with new lookups * Add missing import Co-authored-by: Ines Montani <ines@ines.io>
This commit is contained in:
parent
b6670bf0c2
commit
dda86118bd
|
@ -9,6 +9,7 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
|||
from ..norm_exceptions import BASE_NORMS
|
||||
from ...util import update_exc, add_lookups
|
||||
from ...language import Language
|
||||
from ...lookups import Lookups
|
||||
from ...attrs import LANG, NORM
|
||||
from .lemmatizer import UkrainianLemmatizer
|
||||
|
||||
|
@ -24,8 +25,10 @@ class UkrainianDefaults(Language.Defaults):
|
|||
stop_words = STOP_WORDS
|
||||
|
||||
@classmethod
|
||||
def create_lemmatizer(cls, nlp=None, **kwargs):
|
||||
return UkrainianLemmatizer()
|
||||
def create_lemmatizer(cls, nlp=None, lookups=None):
|
||||
if lookups is None:
|
||||
lookups = Lookups()
|
||||
return UkrainianLemmatizer(lookups)
|
||||
|
||||
|
||||
class Ukrainian(Language):
|
||||
|
|
|
@ -6,8 +6,8 @@ from ...lemmatizer import Lemmatizer
|
|||
class UkrainianLemmatizer(Lemmatizer):
|
||||
_morph = None
|
||||
|
||||
def __init__(self):
|
||||
super(UkrainianLemmatizer, self).__init__()
|
||||
def __init__(self, lookups=None):
|
||||
super(UkrainianLemmatizer, self).__init__(lookups)
|
||||
try:
|
||||
from pymorphy2 import MorphAnalyzer
|
||||
|
||||
|
@ -99,19 +99,6 @@ class UkrainianLemmatizer(Lemmatizer):
|
|||
return symbols_to_str[univ_pos]
|
||||
return None
|
||||
|
||||
def is_base_form(self, univ_pos, morphology=None):
|
||||
# TODO
|
||||
raise NotImplementedError
|
||||
|
||||
def det(self, string, morphology=None):
|
||||
return self(string, "det", morphology)
|
||||
|
||||
def num(self, string, morphology=None):
|
||||
return self(string, "num", morphology)
|
||||
|
||||
def pron(self, string, morphology=None):
|
||||
return self(string, "pron", morphology)
|
||||
|
||||
def lookup(self, string, orth=None):
|
||||
analyses = self._morph.parse(string)
|
||||
if len(analyses) == 1:
|
||||
|
|
Loading…
Reference in New Issue
Block a user