mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 17:54:39 +03:00
Add basic Japanese tokenizer test
This commit is contained in:
parent
84041a2bb5
commit
e56fea14eb
|
@ -5,6 +5,7 @@ from ..en import English
|
|||
from ..de import German
|
||||
from ..es import Spanish
|
||||
from ..it import Italian
|
||||
from ..ja import Japanese
|
||||
from ..fr import French
|
||||
from ..pt import Portuguese
|
||||
from ..nl import Dutch
|
||||
|
@ -27,7 +28,7 @@ import os
|
|||
import pytest
|
||||
|
||||
|
||||
LANGUAGES = [English, German, Spanish, Italian, French, Portuguese, Dutch,
|
||||
LANGUAGES = [English, German, Spanish, Italian, Japanese, French, Portuguese, Dutch,
|
||||
Swedish, Hungarian, Finnish, Bengali, Norwegian]
|
||||
|
||||
|
||||
|
@ -76,6 +77,11 @@ def fi_tokenizer():
|
|||
return Finnish.Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ja_tokenizer():
|
||||
return Japanese.Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sv_tokenizer():
|
||||
return Swedish.Defaults.create_tokenizer()
|
||||
|
|
0
spacy/tests/ja/__init__.py
Normal file
0
spacy/tests/ja/__init__.py
Normal file
8
spacy/tests/ja/test_tokenizer.py
Normal file
8
spacy/tests/ja/test_tokenizer.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
|
||||
def test_japanese_tokenizer(ja_tokenizer):
|
||||
tokens = ja_tokenizer("日本語だよ")
|
||||
assert len(tokens) == 3
|
Loading…
Reference in New Issue
Block a user