mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
58f06e6180
* start lang/ko * add test codes * using natto-py * add test_ko_tokenizer_full_tags() * spaCy contributor agreement * external dependency for ko * collections.namedtuple for python version < 3.5 * case fix * tuple unpacking * add jongseong(final consonant) * apply mecab option * Remove Pipfile for now Co-authored-by: Ines Montani <ines@ines.io>
14 lines
371 B
Python
14 lines
371 B
Python
# coding: utf-8
|
|
from __future__ import unicode_literals
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"word,lemma",
|
|
[("새로운", "새롭"), ("빨간", "빨갛"), ("클수록", "크"), ("뭡니까", "뭣"), ("됐다", "되")],
|
|
)
|
|
def test_ko_lemmatizer_assigns(ko_tokenizer, word, lemma):
|
|
test_lemma = ko_tokenizer(word)[0].lemma_
|
|
assert test_lemma == lemma
|