spaCy/spacy/tests/conftest.py

182 lines
3.7 KiB
Python
Raw Normal View History

2017-01-11 15:56:32 +03:00
# coding: utf-8
from __future__ import unicode_literals
from ..en import English
from ..de import German
2017-01-11 15:56:32 +03:00
from ..es import Spanish
from ..it import Italian
2017-06-27 19:24:25 +03:00
from ..ja import Japanese
2017-01-11 15:56:32 +03:00
from ..fr import French
from ..pt import Portuguese
from ..nl import Dutch
from ..sv import Swedish
from ..hu import Hungarian
from ..fi import Finnish
2017-03-05 04:11:26 +03:00
from ..bn import Bengali
2017-03-24 18:27:44 +03:00
from ..he import Hebrew
2017-04-27 00:21:41 +03:00
from ..nb import Norwegian
2017-09-21 08:56:58 +03:00
from ..th import Thai
from ..ru import Russian
2017-03-25 12:03:47 +03:00
2017-01-11 15:56:32 +03:00
from ..tokens import Doc
2017-01-12 17:05:40 +03:00
from ..strings import StringStore
2017-01-13 01:38:55 +03:00
from ..lemmatizer import Lemmatizer
2017-01-11 15:56:32 +03:00
from ..attrs import ORTH, TAG, HEAD, DEP
2017-01-13 04:23:50 +03:00
from io import StringIO, BytesIO
2017-01-13 01:38:47 +03:00
from pathlib import Path
import os
2017-01-11 15:56:32 +03:00
import pytest
# These languages get run through generic tokenizer tests
LANGUAGES = [English, German, Spanish, Italian, French, Portuguese, Dutch,
Swedish, Hungarian, Finnish, Bengali, Norwegian, Russian]
2017-01-11 15:56:32 +03:00
@pytest.fixture(params=LANGUAGES)
def tokenizer(request):
lang = request.param
return lang.Defaults.create_tokenizer()
@pytest.fixture
def en_tokenizer():
return English.Defaults.create_tokenizer()
2017-01-11 15:56:32 +03:00
@pytest.fixture
def en_vocab():
return English.Defaults.create_vocab()
2017-01-11 23:29:59 +03:00
@pytest.fixture
def en_parser():
return English.Defaults.create_parser()
@pytest.fixture
def es_tokenizer():
return Spanish.Defaults.create_tokenizer()
2017-01-11 23:29:59 +03:00
2017-01-11 15:56:32 +03:00
@pytest.fixture
def de_tokenizer():
return German.Defaults.create_tokenizer()
@pytest.fixture(scope='module')
2017-01-24 12:55:02 +03:00
def fr_tokenizer():
return French.Defaults.create_tokenizer()
2017-01-11 15:56:32 +03:00
@pytest.fixture
def hu_tokenizer():
return Hungarian.Defaults.create_tokenizer()
2017-01-12 18:49:19 +03:00
@pytest.fixture
def fi_tokenizer():
return Finnish.Defaults.create_tokenizer()
2017-06-27 19:24:25 +03:00
@pytest.fixture
def ja_tokenizer():
pytest.importorskip("MeCab")
2017-06-27 19:24:25 +03:00
return Japanese.Defaults.create_tokenizer()
@pytest.fixture
def japanese():
2017-08-22 15:30:59 +03:00
pytest.importorskip("MeCab")
return Japanese()
2017-06-27 19:24:25 +03:00
@pytest.fixture
def sv_tokenizer():
return Swedish.Defaults.create_tokenizer()
2017-03-24 18:27:44 +03:00
@pytest.fixture
2017-03-05 04:11:26 +03:00
def bn_tokenizer():
return Bengali.Defaults.create_tokenizer()
@pytest.fixture
2017-03-24 18:27:44 +03:00
def he_tokenizer():
return Hebrew.Defaults.create_tokenizer()
2017-04-27 00:21:41 +03:00
@pytest.fixture
def nb_tokenizer():
return Norwegian.Defaults.create_tokenizer()
2017-03-24 18:27:44 +03:00
2017-09-21 08:56:58 +03:00
@pytest.fixture
def th_tokenizer():
pythainlp = pytest.importorskip("pythainlp")
return Thai.Defaults.create_tokenizer()
@pytest.fixture
def ru_tokenizer():
pytest.importorskip("pymorphy2")
return Russian.Defaults.create_tokenizer()
@pytest.fixture
def russian():
pytest.importorskip("pymorphy2")
return Russian()
2017-01-12 17:05:40 +03:00
@pytest.fixture
def stringstore():
return StringStore()
2017-01-11 15:56:32 +03:00
2017-01-12 18:49:19 +03:00
2017-01-12 23:56:32 +03:00
@pytest.fixture
def en_entityrecognizer():
return English.Defaults.create_entity()
2017-01-12 23:56:32 +03:00
2017-01-13 01:38:55 +03:00
@pytest.fixture
2017-03-17 03:48:00 +03:00
def lemmatizer():
2017-03-17 04:50:36 +03:00
return English.Defaults.create_lemmatizer()
2017-01-13 01:38:55 +03:00
2017-01-11 15:56:32 +03:00
@pytest.fixture
def text_file():
return StringIO()
2017-01-13 04:23:50 +03:00
@pytest.fixture
def text_file_b():
return BytesIO()
2017-01-11 15:56:32 +03:00
2017-01-13 00:03:07 +03:00
# only used for tests that require loading the models
# in all other cases, use specific instances
@pytest.fixture(scope="session")
def EN():
2016-10-17 02:52:49 +03:00
return English()
2016-05-03 13:51:47 +03:00
2017-01-11 15:56:32 +03:00
@pytest.fixture(scope="session")
2016-05-03 13:51:47 +03:00
def DE():
2016-10-17 02:52:49 +03:00
return German()
def pytest_addoption(parser):
parser.addoption("--models", action="store_true",
help="include tests that require full models")
parser.addoption("--vectors", action="store_true",
help="include word vectors tests")
parser.addoption("--slow", action="store_true",
help="include slow tests")
def pytest_runtest_setup(item):
for opt in ['models', 'vectors', 'slow']:
if opt in item.keywords and not item.config.getoption("--%s" % opt):
pytest.skip("need --%s option to run" % opt)