spaCy/spacy/tests/conftest.py

142 lines
2.9 KiB
Python
Raw Normal View History

2017-01-11 15:56:32 +03:00
# coding: utf-8
from __future__ import unicode_literals
from ..en import English
from ..de import German
2017-01-11 15:56:32 +03:00
from ..es import Spanish
from ..it import Italian
from ..fr import French
from ..pt import Portuguese
from ..nl import Dutch
from ..sv import Swedish
from ..hu import Hungarian
from ..fi import Finnish
2017-03-05 04:11:26 +03:00
from ..bn import Bengali
2017-01-11 15:56:32 +03:00
from ..tokens import Doc
2017-01-12 17:05:40 +03:00
from ..strings import StringStore
2017-01-13 01:38:55 +03:00
from ..lemmatizer import Lemmatizer
2017-01-11 15:56:32 +03:00
from ..attrs import ORTH, TAG, HEAD, DEP
2017-01-13 01:38:47 +03:00
from ..util import match_best_version, get_data_path
2017-01-11 15:56:32 +03:00
2017-01-13 04:23:50 +03:00
from io import StringIO, BytesIO
2017-01-13 01:38:47 +03:00
from pathlib import Path
import os
2017-01-11 15:56:32 +03:00
import pytest
LANGUAGES = [English, German, Spanish, Italian, French, Portuguese, Dutch,
2017-03-05 04:11:26 +03:00
Swedish, Hungarian, Finnish, Bengali]
2017-01-11 15:56:32 +03:00
@pytest.fixture(params=LANGUAGES)
def tokenizer(request):
lang = request.param
return lang.Defaults.create_tokenizer()
@pytest.fixture
def en_tokenizer():
return English.Defaults.create_tokenizer()
2017-01-11 15:56:32 +03:00
@pytest.fixture
def en_vocab():
return English.Defaults.create_vocab()
2017-01-11 23:29:59 +03:00
@pytest.fixture
def en_parser():
return English.Defaults.create_parser()
2017-01-11 15:56:32 +03:00
@pytest.fixture
def de_tokenizer():
return German.Defaults.create_tokenizer()
@pytest.fixture(scope='module')
2017-01-24 12:55:02 +03:00
def fr_tokenizer():
return French.Defaults.create_tokenizer()
2017-01-11 15:56:32 +03:00
@pytest.fixture
def hu_tokenizer():
return Hungarian.Defaults.create_tokenizer()
2017-01-12 18:49:19 +03:00
@pytest.fixture
def fi_tokenizer():
return Finnish.Defaults.create_tokenizer()
@pytest.fixture
def sv_tokenizer():
return Swedish.Defaults.create_tokenizer()
2017-03-05 04:11:26 +03:00
@pytest.fixture
def bn_tokenizer():
return Bengali.Defaults.create_tokenizer()
2017-01-12 17:05:40 +03:00
@pytest.fixture
def stringstore():
return StringStore()
2017-01-11 15:56:32 +03:00
2017-01-12 18:49:19 +03:00
2017-01-12 23:56:32 +03:00
@pytest.fixture
def en_entityrecognizer():
return English.Defaults.create_entity()
2017-01-13 01:38:55 +03:00
@pytest.fixture
def lemmatizer(path):
if path is not None:
return Lemmatizer.load(path)
else:
return None
2017-01-11 15:56:32 +03:00
@pytest.fixture
def text_file():
return StringIO()
2017-01-13 04:23:50 +03:00
@pytest.fixture
def text_file_b():
return BytesIO()
2017-01-11 15:56:32 +03:00
2017-01-13 01:38:47 +03:00
@pytest.fixture
def path():
if 'SPACY_DATA' in os.environ:
return Path(os.environ['SPACY_DATA'])
else:
return match_best_version('en', None, get_data_path())
2017-01-13 00:03:07 +03:00
# only used for tests that require loading the models
# in all other cases, use specific instances
@pytest.fixture(scope="session")
def EN():
2016-10-17 02:52:49 +03:00
return English()
2016-05-03 13:51:47 +03:00
2017-01-11 15:56:32 +03:00
@pytest.fixture(scope="session")
2016-05-03 13:51:47 +03:00
def DE():
2016-10-17 02:52:49 +03:00
return German()
def pytest_addoption(parser):
parser.addoption("--models", action="store_true",
help="include tests that require full models")
parser.addoption("--vectors", action="store_true",
help="include word vectors tests")
parser.addoption("--slow", action="store_true",
help="include slow tests")
def pytest_runtest_setup(item):
for opt in ['models', 'vectors', 'slow']:
if opt in item.keywords and not item.config.getoption("--%s" % opt):
pytest.skip("need --%s option to run" % opt)