spaCy/spacy/tests/lang/ko/test_lemmatization.py

14 lines
371 B
Python
Raw Normal View History

# coding: utf-8
from __future__ import unicode_literals
import pytest
@pytest.mark.parametrize(
"word,lemma",
[("새로운", "새롭"), ("빨간", "빨갛"), ("클수록", ""), ("뭡니까", ""), ("됐다", "")],
)
def test_ko_lemmatizer_assigns(ko_tokenizer, word, lemma):
test_lemma = ko_tokenizer(word)[0].lemma_
assert test_lemma == lemma