mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Restrict pymorphy2 requirement to pymorphy2 mode (#8299)
For the Russian and Ukrainian lemmatizers, restrict the `pymorphy2` requirement to the mode `pymorphy2` so that lookup or other lemmatizer modes can be loaded without installing `pymorphy2`.
This commit is contained in:
parent
0a1a4c665d
commit
f4008bdb13
|
@ -23,15 +23,16 @@ class RussianLemmatizer(Lemmatizer):
|
||||||
mode: str = "pymorphy2",
|
mode: str = "pymorphy2",
|
||||||
overwrite: bool = False,
|
overwrite: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
try:
|
if mode == "pymorphy2":
|
||||||
from pymorphy2 import MorphAnalyzer
|
try:
|
||||||
except ImportError:
|
from pymorphy2 import MorphAnalyzer
|
||||||
raise ImportError(
|
except ImportError:
|
||||||
"The Russian lemmatizer requires the pymorphy2 library: "
|
raise ImportError(
|
||||||
'try to fix it with "pip install pymorphy2"'
|
"The Russian lemmatizer mode 'pymorphy2' requires the "
|
||||||
) from None
|
"pymorphy2 library. Install it with: pip install pymorphy2"
|
||||||
if RussianLemmatizer._morph is None:
|
) from None
|
||||||
RussianLemmatizer._morph = MorphAnalyzer()
|
if RussianLemmatizer._morph is None:
|
||||||
|
RussianLemmatizer._morph = MorphAnalyzer()
|
||||||
super().__init__(vocab, model, name, mode=mode, overwrite=overwrite)
|
super().__init__(vocab, model, name, mode=mode, overwrite=overwrite)
|
||||||
|
|
||||||
def pymorphy2_lemmatize(self, token: Token) -> List[str]:
|
def pymorphy2_lemmatize(self, token: Token) -> List[str]:
|
||||||
|
|
|
@ -18,14 +18,15 @@ class UkrainianLemmatizer(RussianLemmatizer):
|
||||||
mode: str = "pymorphy2",
|
mode: str = "pymorphy2",
|
||||||
overwrite: bool = False,
|
overwrite: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
try:
|
if mode == "pymorphy2":
|
||||||
from pymorphy2 import MorphAnalyzer
|
try:
|
||||||
except ImportError:
|
from pymorphy2 import MorphAnalyzer
|
||||||
raise ImportError(
|
except ImportError:
|
||||||
"The Ukrainian lemmatizer requires the pymorphy2 library and "
|
raise ImportError(
|
||||||
"dictionaries: try to fix it with "
|
"The Ukrainian lemmatizer mode 'pymorphy2' requires the "
|
||||||
'"pip install pymorphy2 pymorphy2-dicts-uk"'
|
"pymorphy2 library and dictionaries. Install them with: "
|
||||||
) from None
|
"pip install pymorphy2 pymorphy2-dicts-uk"
|
||||||
if UkrainianLemmatizer._morph is None:
|
) from None
|
||||||
UkrainianLemmatizer._morph = MorphAnalyzer(lang="uk")
|
if UkrainianLemmatizer._morph is None:
|
||||||
|
UkrainianLemmatizer._morph = MorphAnalyzer(lang="uk")
|
||||||
super().__init__(vocab, model, name, mode=mode, overwrite=overwrite)
|
super().__init__(vocab, model, name, mode=mode, overwrite=overwrite)
|
||||||
|
|
|
@ -281,6 +281,13 @@ def uk_tokenizer():
|
||||||
return get_lang_class("uk")().tokenizer
|
return get_lang_class("uk")().tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def uk_lemmatizer():
|
||||||
|
pytest.importorskip("pymorphy2")
|
||||||
|
pytest.importorskip("pymorphy2_dicts_uk")
|
||||||
|
return get_lang_class("uk")().add_pipe("lemmatizer")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def ur_tokenizer():
|
def ur_tokenizer():
|
||||||
return get_lang_class("ur")().tokenizer
|
return get_lang_class("ur")().tokenizer
|
||||||
|
|
8
spacy/tests/lang/uk/test_lemmatizer.py
Normal file
8
spacy/tests/lang/uk/test_lemmatizer.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
import pytest
|
||||||
|
from spacy.tokens import Doc
|
||||||
|
|
||||||
|
|
||||||
|
def test_uk_lemmatizer(uk_lemmatizer):
|
||||||
|
"""Check that the default uk lemmatizer runs."""
|
||||||
|
doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"])
|
||||||
|
uk_lemmatizer(doc)
|
Loading…
Reference in New Issue
Block a user