mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
add th test
This commit is contained in:
parent
39bb5690f0
commit
1abf472068
|
@ -15,6 +15,7 @@ from ..fi import Finnish
|
||||||
from ..bn import Bengali
|
from ..bn import Bengali
|
||||||
from ..he import Hebrew
|
from ..he import Hebrew
|
||||||
from ..nb import Norwegian
|
from ..nb import Norwegian
|
||||||
|
from ..th import Thai
|
||||||
|
|
||||||
|
|
||||||
from ..tokens import Doc
|
from ..tokens import Doc
|
||||||
|
@ -101,6 +102,11 @@ def he_tokenizer():
|
||||||
def nb_tokenizer():
|
def nb_tokenizer():
|
||||||
return Norwegian.Defaults.create_tokenizer()
|
return Norwegian.Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def th_tokenizer():
|
||||||
|
pythainlp = pytest.importorskip("pythainlp")
|
||||||
|
return Thai.Defaults.create_tokenizer()
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def stringstore():
|
def stringstore():
|
||||||
return StringStore()
|
return StringStore()
|
||||||
|
|
13
spacy/tests/th/test_tokenizer.py
Normal file
13
spacy/tests/th/test_tokenizer.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
TOKENIZER_TESTS = [
|
||||||
|
("คุณรักผมไหม", ['คุณ', 'รัก', 'ผม', 'ไหม'])
|
||||||
|
]
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('text,expected_tokens', TOKENIZER_TESTS)
|
||||||
|
def test_thai_tokenizer(th_tokenizer, text, expected_tokens):
|
||||||
|
tokens = [token.text for token in th_tokenizer(text)]
|
||||||
|
assert tokens == expected_tokens
|
Loading…
Reference in New Issue
Block a user