mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Update Ukrainian lemmatizer with new lookups (#4359)
* Update Ukrainian lemmatizer with new lookups * Add missing import Co-authored-by: Ines Montani <ines@ines.io>
This commit is contained in:
parent
b6670bf0c2
commit
dda86118bd
|
@ -9,6 +9,7 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||||
from ..norm_exceptions import BASE_NORMS
|
from ..norm_exceptions import BASE_NORMS
|
||||||
from ...util import update_exc, add_lookups
|
from ...util import update_exc, add_lookups
|
||||||
from ...language import Language
|
from ...language import Language
|
||||||
|
from ...lookups import Lookups
|
||||||
from ...attrs import LANG, NORM
|
from ...attrs import LANG, NORM
|
||||||
from .lemmatizer import UkrainianLemmatizer
|
from .lemmatizer import UkrainianLemmatizer
|
||||||
|
|
||||||
|
@ -24,8 +25,10 @@ class UkrainianDefaults(Language.Defaults):
|
||||||
stop_words = STOP_WORDS
|
stop_words = STOP_WORDS
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_lemmatizer(cls, nlp=None, **kwargs):
|
def create_lemmatizer(cls, nlp=None, lookups=None):
|
||||||
return UkrainianLemmatizer()
|
if lookups is None:
|
||||||
|
lookups = Lookups()
|
||||||
|
return UkrainianLemmatizer(lookups)
|
||||||
|
|
||||||
|
|
||||||
class Ukrainian(Language):
|
class Ukrainian(Language):
|
||||||
|
|
|
@ -6,8 +6,8 @@ from ...lemmatizer import Lemmatizer
|
||||||
class UkrainianLemmatizer(Lemmatizer):
|
class UkrainianLemmatizer(Lemmatizer):
|
||||||
_morph = None
|
_morph = None
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, lookups=None):
|
||||||
super(UkrainianLemmatizer, self).__init__()
|
super(UkrainianLemmatizer, self).__init__(lookups)
|
||||||
try:
|
try:
|
||||||
from pymorphy2 import MorphAnalyzer
|
from pymorphy2 import MorphAnalyzer
|
||||||
|
|
||||||
|
@ -99,19 +99,6 @@ class UkrainianLemmatizer(Lemmatizer):
|
||||||
return symbols_to_str[univ_pos]
|
return symbols_to_str[univ_pos]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def is_base_form(self, univ_pos, morphology=None):
|
|
||||||
# TODO
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def det(self, string, morphology=None):
|
|
||||||
return self(string, "det", morphology)
|
|
||||||
|
|
||||||
def num(self, string, morphology=None):
|
|
||||||
return self(string, "num", morphology)
|
|
||||||
|
|
||||||
def pron(self, string, morphology=None):
|
|
||||||
return self(string, "pron", morphology)
|
|
||||||
|
|
||||||
def lookup(self, string, orth=None):
|
def lookup(self, string, orth=None):
|
||||||
analyses = self._morph.parse(string)
|
analyses = self._morph.parse(string)
|
||||||
if len(analyses) == 1:
|
if len(analyses) == 1:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user