spaCy/spacy/tests/lang/uk/test_lemmatizer.py
Adriane Boyd 30d31fd335
Update Russian and Ukrainian lemmatizers (#11811)
* pymorph2 issues #11620, #11626, #11625:
- #11620: pymorphy2_lookup
- #11626: handle multiple forms pointing to the same normal form + handling empty POS tag
- #11625: matching DET that are labelled as PRON by pymorhp2

* Move lemmatizer algorithm changes back into RussianLemmatizer

* Fix uk pymorphy3_lookup mode init

* Move and update tests for ru/uk lookup lemmatizer modes

* Fix typo

* Remove traces of previous behavior for uninflected POS

* Refactor to private generic-looking pymorphy methods

* Remove xfailed uk lemmatizer cases

* Update spacy/lang/ru/lemmatizer.py

Co-authored-by: Richard Hudson <richard@explosion.ai>

Co-authored-by: Dmytro S Lituiev <d.lituiev@gmail.com>
Co-authored-by: Richard Hudson <richard@explosion.ai>
2022-11-25 11:12:46 +01:00

28 lines
837 B
Python

import pytest
from spacy.tokens import Doc
pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_uk_lemmatizer(uk_lemmatizer):
"""Check that the default uk lemmatizer runs."""
doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"])
assert uk_lemmatizer.mode == "pymorphy3"
uk_lemmatizer(doc)
assert [token.lemma for token in doc]
@pytest.mark.parametrize(
"word,lemma",
(
("якийсь", "якийсь"),
("розповідають", "розповідати"),
("розповіси", "розповісти"),
),
)
def test_uk_lookup_lemmatizer(uk_lookup_lemmatizer, word, lemma):
assert uk_lookup_lemmatizer.mode == "pymorphy3_lookup"
doc = Doc(uk_lookup_lemmatizer.vocab, words=[word])
assert uk_lookup_lemmatizer(doc)[0].lemma_ == lemma