From f4008bdb13e262c389e3d0c7017a634605f6e706 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 11 Jun 2021 10:19:22 +0200 Subject: [PATCH] Restrict pymorphy2 requirement to pymorphy2 mode (#8299) For the Russian and Ukrainian lemmatizers, restrict the `pymorphy2` requirement to the mode `pymorphy2` so that lookup or other lemmatizer modes can be loaded without installing `pymorphy2`. --- spacy/lang/ru/lemmatizer.py | 19 ++++++++++--------- spacy/lang/uk/lemmatizer.py | 21 +++++++++++---------- spacy/tests/conftest.py | 7 +++++++ spacy/tests/lang/uk/test_lemmatizer.py | 8 ++++++++ 4 files changed, 36 insertions(+), 19 deletions(-) create mode 100644 spacy/tests/lang/uk/test_lemmatizer.py diff --git a/spacy/lang/ru/lemmatizer.py b/spacy/lang/ru/lemmatizer.py index c337b9bc3..63aa94a36 100644 --- a/spacy/lang/ru/lemmatizer.py +++ b/spacy/lang/ru/lemmatizer.py @@ -23,15 +23,16 @@ class RussianLemmatizer(Lemmatizer): mode: str = "pymorphy2", overwrite: bool = False, ) -> None: - try: - from pymorphy2 import MorphAnalyzer - except ImportError: - raise ImportError( - "The Russian lemmatizer requires the pymorphy2 library: " - 'try to fix it with "pip install pymorphy2"' - ) from None - if RussianLemmatizer._morph is None: - RussianLemmatizer._morph = MorphAnalyzer() + if mode == "pymorphy2": + try: + from pymorphy2 import MorphAnalyzer + except ImportError: + raise ImportError( + "The Russian lemmatizer mode 'pymorphy2' requires the " + "pymorphy2 library. Install it with: pip install pymorphy2" + ) from None + if RussianLemmatizer._morph is None: + RussianLemmatizer._morph = MorphAnalyzer() super().__init__(vocab, model, name, mode=mode, overwrite=overwrite) def pymorphy2_lemmatize(self, token: Token) -> List[str]: diff --git a/spacy/lang/uk/lemmatizer.py b/spacy/lang/uk/lemmatizer.py index 0b4435a21..e1fdf39fc 100644 --- a/spacy/lang/uk/lemmatizer.py +++ b/spacy/lang/uk/lemmatizer.py @@ -18,14 +18,15 @@ class UkrainianLemmatizer(RussianLemmatizer): mode: str = "pymorphy2", overwrite: bool = False, ) -> None: - try: - from pymorphy2 import MorphAnalyzer - except ImportError: - raise ImportError( - "The Ukrainian lemmatizer requires the pymorphy2 library and " - "dictionaries: try to fix it with " - '"pip install pymorphy2 pymorphy2-dicts-uk"' - ) from None - if UkrainianLemmatizer._morph is None: - UkrainianLemmatizer._morph = MorphAnalyzer(lang="uk") + if mode == "pymorphy2": + try: + from pymorphy2 import MorphAnalyzer + except ImportError: + raise ImportError( + "The Ukrainian lemmatizer mode 'pymorphy2' requires the " + "pymorphy2 library and dictionaries. Install them with: " + "pip install pymorphy2 pymorphy2-dicts-uk" + ) from None + if UkrainianLemmatizer._morph is None: + UkrainianLemmatizer._morph = MorphAnalyzer(lang="uk") super().__init__(vocab, model, name, mode=mode, overwrite=overwrite) diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index 04e254c50..c6be15189 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -281,6 +281,13 @@ def uk_tokenizer(): return get_lang_class("uk")().tokenizer +@pytest.fixture +def uk_lemmatizer(): + pytest.importorskip("pymorphy2") + pytest.importorskip("pymorphy2_dicts_uk") + return get_lang_class("uk")().add_pipe("lemmatizer") + + @pytest.fixture(scope="session") def ur_tokenizer(): return get_lang_class("ur")().tokenizer diff --git a/spacy/tests/lang/uk/test_lemmatizer.py b/spacy/tests/lang/uk/test_lemmatizer.py new file mode 100644 index 000000000..4a0d91f7e --- /dev/null +++ b/spacy/tests/lang/uk/test_lemmatizer.py @@ -0,0 +1,8 @@ +import pytest +from spacy.tokens import Doc + + +def test_uk_lemmatizer(uk_lemmatizer): + """Check that the default uk lemmatizer runs.""" + doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"]) + uk_lemmatizer(doc)