spaCy/spacy/tests/lang/ja/test_lemmatization.py
Matthew Honnibal ba7468e32e
Update requirements, fixing windows crashes (#13727)
* Re-enable pretraining test

* Require thinc 8.3.4

* Reformat

* Re-enable test
2025-01-13 16:39:46 +01:00

31 lines
741 B
Python

import pytest
@pytest.mark.parametrize(
"word,lemma",
[
("新しく", "新しい"),
("赤く", "赤い"),
("すごく", "すごい"),
("いただきました", "いただく"),
("なった", "なる"),
],
)
def test_ja_lemmatizer_assigns(ja_tokenizer, word, lemma):
test_lemma = ja_tokenizer(word)[0].lemma_
assert test_lemma == lemma
@pytest.mark.parametrize(
"word,norm",
[
("SUMMER", "サマー"),
("食べ物", "食べ物"),
("綜合", "総合"),
("コンピュータ", "コンピューター"),
],
)
def test_ja_lemmatizer_norm(ja_tokenizer, word, norm):
test_norm = ja_tokenizer(word)[0].norm_
assert test_norm == norm