Add basic Japanese tokenizer test

This commit is contained in:
Paul O'Leary McCann 2017-06-28 01:24:25 +09:00
parent 84041a2bb5
commit e56fea14eb
3 changed files with 15 additions and 1 deletions

View File

@ -5,6 +5,7 @@ from ..en import English
from ..de import German
from ..es import Spanish
from ..it import Italian
from ..ja import Japanese
from ..fr import French
from ..pt import Portuguese
from ..nl import Dutch
@ -27,7 +28,7 @@ import os
import pytest
LANGUAGES = [English, German, Spanish, Italian, French, Portuguese, Dutch,
LANGUAGES = [English, German, Spanish, Italian, Japanese, French, Portuguese, Dutch,
Swedish, Hungarian, Finnish, Bengali, Norwegian]
@ -76,6 +77,11 @@ def fi_tokenizer():
return Finnish.Defaults.create_tokenizer()
@pytest.fixture
def ja_tokenizer():
return Japanese.Defaults.create_tokenizer()
@pytest.fixture
def sv_tokenizer():
return Swedish.Defaults.create_tokenizer()

View File

View File

@ -0,0 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import pytest
def test_japanese_tokenizer(ja_tokenizer):
tokens = ja_tokenizer("日本語だよ")
assert len(tokens) == 3