mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
Fix init for pymorphy2_lookup lemmatizer mode (#11631)
This commit is contained in:
parent
2e52479eec
commit
fe06e037bc
|
@ -23,7 +23,7 @@ class RussianLemmatizer(Lemmatizer):
|
||||||
overwrite: bool = False,
|
overwrite: bool = False,
|
||||||
scorer: Optional[Callable] = lemmatizer_score,
|
scorer: Optional[Callable] = lemmatizer_score,
|
||||||
) -> None:
|
) -> None:
|
||||||
if mode == "pymorphy2":
|
if mode in {"pymorphy2", "pymorphy2_lookup"}:
|
||||||
try:
|
try:
|
||||||
from pymorphy2 import MorphAnalyzer
|
from pymorphy2 import MorphAnalyzer
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|
|
@ -18,7 +18,7 @@ class UkrainianLemmatizer(RussianLemmatizer):
|
||||||
overwrite: bool = False,
|
overwrite: bool = False,
|
||||||
scorer: Optional[Callable] = lemmatizer_score,
|
scorer: Optional[Callable] = lemmatizer_score,
|
||||||
) -> None:
|
) -> None:
|
||||||
if mode == "pymorphy2":
|
if mode in {"pymorphy2", "pymorphy2_lookup"}:
|
||||||
try:
|
try:
|
||||||
from pymorphy2 import MorphAnalyzer
|
from pymorphy2 import MorphAnalyzer
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|
|
@ -343,6 +343,14 @@ def ru_lemmatizer():
|
||||||
return get_lang_class("ru")().add_pipe("lemmatizer")
|
return get_lang_class("ru")().add_pipe("lemmatizer")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def ru_lookup_lemmatizer():
|
||||||
|
pytest.importorskip("pymorphy2")
|
||||||
|
return get_lang_class("ru")().add_pipe(
|
||||||
|
"lemmatizer", config={"mode": "pymorphy2_lookup"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def sa_tokenizer():
|
def sa_tokenizer():
|
||||||
return get_lang_class("sa")().tokenizer
|
return get_lang_class("sa")().tokenizer
|
||||||
|
@ -422,6 +430,15 @@ def uk_lemmatizer():
|
||||||
return get_lang_class("uk")().add_pipe("lemmatizer")
|
return get_lang_class("uk")().add_pipe("lemmatizer")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def uk_lookup_lemmatizer():
|
||||||
|
pytest.importorskip("pymorphy2")
|
||||||
|
pytest.importorskip("pymorphy2_dicts_uk")
|
||||||
|
return get_lang_class("uk")().add_pipe(
|
||||||
|
"lemmatizer", config={"mode": "pymorphy2_lookup"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def ur_tokenizer():
|
def ur_tokenizer():
|
||||||
return get_lang_class("ur")().tokenizer
|
return get_lang_class("ur")().tokenizer
|
||||||
|
|
|
@ -78,3 +78,17 @@ def test_ru_lemmatizer_punct(ru_lemmatizer):
|
||||||
assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
|
assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
|
||||||
doc = Doc(ru_lemmatizer.vocab, words=["»"], pos=["PUNCT"])
|
doc = Doc(ru_lemmatizer.vocab, words=["»"], pos=["PUNCT"])
|
||||||
assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
|
assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
|
||||||
|
|
||||||
|
|
||||||
|
def test_ru_doc_lookup_lemmatization(ru_lookup_lemmatizer):
|
||||||
|
words = ["мама", "мыла", "раму"]
|
||||||
|
pos = ["NOUN", "VERB", "NOUN"]
|
||||||
|
morphs = [
|
||||||
|
"Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing",
|
||||||
|
"Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act",
|
||||||
|
"Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing",
|
||||||
|
]
|
||||||
|
doc = Doc(ru_lookup_lemmatizer.vocab, words=words, pos=pos, morphs=morphs)
|
||||||
|
doc = ru_lookup_lemmatizer(doc)
|
||||||
|
lemmas = [token.lemma_ for token in doc]
|
||||||
|
assert lemmas == ["мама", "мыла", "раму"]
|
||||||
|
|
|
@ -9,3 +9,11 @@ def test_uk_lemmatizer(uk_lemmatizer):
|
||||||
"""Check that the default uk lemmatizer runs."""
|
"""Check that the default uk lemmatizer runs."""
|
||||||
doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"])
|
doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"])
|
||||||
uk_lemmatizer(doc)
|
uk_lemmatizer(doc)
|
||||||
|
assert [token.lemma for token in doc]
|
||||||
|
|
||||||
|
|
||||||
|
def test_uk_lookup_lemmatizer(uk_lookup_lemmatizer):
|
||||||
|
"""Check that the lookup uk lemmatizer runs."""
|
||||||
|
doc = Doc(uk_lookup_lemmatizer.vocab, words=["a", "b", "c"])
|
||||||
|
uk_lookup_lemmatizer(doc)
|
||||||
|
assert [token.lemma for token in doc]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user