added tests for Russian language

added tests of creating Russian Language instance and Russian tokenizer
This commit is contained in:
yuukos 2017-10-13 14:04:37 +07:00
parent 622b6d6270
commit a229b6e0de

View File

@ -16,7 +16,7 @@ from ..bn import Bengali
from ..he import Hebrew from ..he import Hebrew
from ..nb import Norwegian from ..nb import Norwegian
from ..th import Thai from ..th import Thai
from ..ru import Russian
from ..tokens import Doc from ..tokens import Doc
from ..strings import StringStore from ..strings import StringStore
@ -30,7 +30,7 @@ import pytest
# These languages get run through generic tokenizer tests # These languages get run through generic tokenizer tests
LANGUAGES = [English, German, Spanish, Italian, French, Portuguese, Dutch, LANGUAGES = [English, German, Spanish, Italian, French, Portuguese, Dutch,
Swedish, Hungarian, Finnish, Bengali, Norwegian] Swedish, Hungarian, Finnish, Bengali, Norwegian, Russian]
@pytest.fixture(params=LANGUAGES) @pytest.fixture(params=LANGUAGES)
@ -53,6 +53,7 @@ def en_vocab():
def en_parser(): def en_parser():
return English.Defaults.create_parser() return English.Defaults.create_parser()
@pytest.fixture @pytest.fixture
def es_tokenizer(): def es_tokenizer():
return Spanish.Defaults.create_tokenizer() return Spanish.Defaults.create_tokenizer()
@ -83,11 +84,13 @@ def ja_tokenizer():
pytest.importorskip("MeCab") pytest.importorskip("MeCab")
return Japanese.Defaults.create_tokenizer() return Japanese.Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
def japanese(): def japanese():
pytest.importorskip("MeCab") pytest.importorskip("MeCab")
return Japanese() return Japanese()
@pytest.fixture @pytest.fixture
def sv_tokenizer(): def sv_tokenizer():
return Swedish.Defaults.create_tokenizer() return Swedish.Defaults.create_tokenizer()
@ -102,15 +105,30 @@ def bn_tokenizer():
def he_tokenizer(): def he_tokenizer():
return Hebrew.Defaults.create_tokenizer() return Hebrew.Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
def nb_tokenizer(): def nb_tokenizer():
return Norwegian.Defaults.create_tokenizer() return Norwegian.Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
def th_tokenizer(): def th_tokenizer():
pythainlp = pytest.importorskip("pythainlp") pythainlp = pytest.importorskip("pythainlp")
return Thai.Defaults.create_tokenizer() return Thai.Defaults.create_tokenizer()
@pytest.fixture
def ru_tokenizer():
pytest.importorskip("pymorphy2")
return Russian.Defaults.create_tokenizer()
@pytest.fixture
def russian():
pytest.importorskip("pymorphy2")
return Russian()
@pytest.fixture @pytest.fixture
def stringstore(): def stringstore():
return StringStore() return StringStore()
@ -118,7 +136,7 @@ def stringstore():
@pytest.fixture @pytest.fixture
def en_entityrecognizer(): def en_entityrecognizer():
return English.Defaults.create_entity() return English.Defaults.create_entity()
@pytest.fixture @pytest.fixture
@ -130,6 +148,7 @@ def lemmatizer():
def text_file(): def text_file():
return StringIO() return StringIO()
@pytest.fixture @pytest.fixture
def text_file_b(): def text_file_b():
return BytesIO() return BytesIO()
@ -149,11 +168,11 @@ def DE():
def pytest_addoption(parser): def pytest_addoption(parser):
parser.addoption("--models", action="store_true", parser.addoption("--models", action="store_true",
help="include tests that require full models") help="include tests that require full models")
parser.addoption("--vectors", action="store_true", parser.addoption("--vectors", action="store_true",
help="include word vectors tests") help="include word vectors tests")
parser.addoption("--slow", action="store_true", parser.addoption("--slow", action="store_true",
help="include slow tests") help="include slow tests")
def pytest_runtest_setup(item): def pytest_runtest_setup(item):