mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 01:34:30 +03:00
added tests for Russian language
added tests of creating Russian Language instance and Russian tokenizer
This commit is contained in:
parent
622b6d6270
commit
a229b6e0de
|
@ -16,7 +16,7 @@ from ..bn import Bengali
|
||||||
from ..he import Hebrew
|
from ..he import Hebrew
|
||||||
from ..nb import Norwegian
|
from ..nb import Norwegian
|
||||||
from ..th import Thai
|
from ..th import Thai
|
||||||
|
from ..ru import Russian
|
||||||
|
|
||||||
from ..tokens import Doc
|
from ..tokens import Doc
|
||||||
from ..strings import StringStore
|
from ..strings import StringStore
|
||||||
|
@ -30,7 +30,7 @@ import pytest
|
||||||
|
|
||||||
# These languages get run through generic tokenizer tests
|
# These languages get run through generic tokenizer tests
|
||||||
LANGUAGES = [English, German, Spanish, Italian, French, Portuguese, Dutch,
|
LANGUAGES = [English, German, Spanish, Italian, French, Portuguese, Dutch,
|
||||||
Swedish, Hungarian, Finnish, Bengali, Norwegian]
|
Swedish, Hungarian, Finnish, Bengali, Norwegian, Russian]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(params=LANGUAGES)
|
@pytest.fixture(params=LANGUAGES)
|
||||||
|
@ -53,6 +53,7 @@ def en_vocab():
|
||||||
def en_parser():
|
def en_parser():
|
||||||
return English.Defaults.create_parser()
|
return English.Defaults.create_parser()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def es_tokenizer():
|
def es_tokenizer():
|
||||||
return Spanish.Defaults.create_tokenizer()
|
return Spanish.Defaults.create_tokenizer()
|
||||||
|
@ -83,11 +84,13 @@ def ja_tokenizer():
|
||||||
pytest.importorskip("MeCab")
|
pytest.importorskip("MeCab")
|
||||||
return Japanese.Defaults.create_tokenizer()
|
return Japanese.Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def japanese():
|
def japanese():
|
||||||
pytest.importorskip("MeCab")
|
pytest.importorskip("MeCab")
|
||||||
return Japanese()
|
return Japanese()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sv_tokenizer():
|
def sv_tokenizer():
|
||||||
return Swedish.Defaults.create_tokenizer()
|
return Swedish.Defaults.create_tokenizer()
|
||||||
|
@ -102,15 +105,30 @@ def bn_tokenizer():
|
||||||
def he_tokenizer():
|
def he_tokenizer():
|
||||||
return Hebrew.Defaults.create_tokenizer()
|
return Hebrew.Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def nb_tokenizer():
|
def nb_tokenizer():
|
||||||
return Norwegian.Defaults.create_tokenizer()
|
return Norwegian.Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def th_tokenizer():
|
def th_tokenizer():
|
||||||
pythainlp = pytest.importorskip("pythainlp")
|
pythainlp = pytest.importorskip("pythainlp")
|
||||||
return Thai.Defaults.create_tokenizer()
|
return Thai.Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def ru_tokenizer():
|
||||||
|
pytest.importorskip("pymorphy2")
|
||||||
|
return Russian.Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def russian():
|
||||||
|
pytest.importorskip("pymorphy2")
|
||||||
|
return Russian()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def stringstore():
|
def stringstore():
|
||||||
return StringStore()
|
return StringStore()
|
||||||
|
@ -118,7 +136,7 @@ def stringstore():
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def en_entityrecognizer():
|
def en_entityrecognizer():
|
||||||
return English.Defaults.create_entity()
|
return English.Defaults.create_entity()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
@ -130,6 +148,7 @@ def lemmatizer():
|
||||||
def text_file():
|
def text_file():
|
||||||
return StringIO()
|
return StringIO()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def text_file_b():
|
def text_file_b():
|
||||||
return BytesIO()
|
return BytesIO()
|
||||||
|
@ -149,11 +168,11 @@ def DE():
|
||||||
|
|
||||||
def pytest_addoption(parser):
|
def pytest_addoption(parser):
|
||||||
parser.addoption("--models", action="store_true",
|
parser.addoption("--models", action="store_true",
|
||||||
help="include tests that require full models")
|
help="include tests that require full models")
|
||||||
parser.addoption("--vectors", action="store_true",
|
parser.addoption("--vectors", action="store_true",
|
||||||
help="include word vectors tests")
|
help="include word vectors tests")
|
||||||
parser.addoption("--slow", action="store_true",
|
parser.addoption("--slow", action="store_true",
|
||||||
help="include slow tests")
|
help="include slow tests")
|
||||||
|
|
||||||
|
|
||||||
def pytest_runtest_setup(item):
|
def pytest_runtest_setup(item):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user