mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
add th test
This commit is contained in:
parent
39bb5690f0
commit
1abf472068
|
@ -15,6 +15,7 @@ from ..fi import Finnish
|
|||
from ..bn import Bengali
|
||||
from ..he import Hebrew
|
||||
from ..nb import Norwegian
|
||||
from ..th import Thai
|
||||
|
||||
|
||||
from ..tokens import Doc
|
||||
|
@ -101,6 +102,11 @@ def he_tokenizer():
|
|||
def nb_tokenizer():
|
||||
return Norwegian.Defaults.create_tokenizer()
|
||||
|
||||
@pytest.fixture
|
||||
def th_tokenizer():
|
||||
pythainlp = pytest.importorskip("pythainlp")
|
||||
return Thai.Defaults.create_tokenizer()
|
||||
|
||||
@pytest.fixture
|
||||
def stringstore():
|
||||
return StringStore()
|
||||
|
|
13
spacy/tests/th/test_tokenizer.py
Normal file
13
spacy/tests/th/test_tokenizer.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
|
||||
TOKENIZER_TESTS = [
|
||||
("คุณรักผมไหม", ['คุณ', 'รัก', 'ผม', 'ไหม'])
|
||||
]
|
||||
|
||||
@pytest.mark.parametrize('text,expected_tokens', TOKENIZER_TESTS)
|
||||
def test_thai_tokenizer(th_tokenizer, text, expected_tokens):
|
||||
tokens = [token.text for token in th_tokenizer(text)]
|
||||
assert tokens == expected_tokens
|
Loading…
Reference in New Issue
Block a user