mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Fix formatting and add comment on languages
This commit is contained in:
parent
a4d974d97b
commit
9b3f8f9ec3
|
@ -11,8 +11,12 @@ from ..strings import StringStore
|
||||||
from .. import util
|
from .. import util
|
||||||
|
|
||||||
|
|
||||||
|
# These languages are used for generic tokenizer tests – only add a language
|
||||||
|
# here if it's using spaCy's tokenizer (not a different library)
|
||||||
|
# TODO: re-implement generic tokenizer tests
|
||||||
_languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'he', 'hu', 'id',
|
_languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'he', 'hu', 'id',
|
||||||
'it', 'nb', 'nl', 'pl', 'pt', 'sv', 'th','xx']
|
'it', 'nb', 'nl', 'pl', 'pt', 'sv', 'xx']
|
||||||
|
|
||||||
_models = {'en': ['en_core_web_sm'],
|
_models = {'en': ['en_core_web_sm'],
|
||||||
'de': ['de_core_news_md'],
|
'de': ['de_core_news_md'],
|
||||||
'fr': ['fr_depvec_web_lg'],
|
'fr': ['fr_depvec_web_lg'],
|
||||||
|
@ -42,6 +46,7 @@ def FR(request):
|
||||||
#lang = util.get_lang_class(request.param)
|
#lang = util.get_lang_class(request.param)
|
||||||
#return lang.Defaults.create_tokenizer()
|
#return lang.Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def tokenizer():
|
def tokenizer():
|
||||||
return util.get_lang_class('xx').Defaults.create_tokenizer()
|
return util.get_lang_class('xx').Defaults.create_tokenizer()
|
||||||
|
@ -87,10 +92,12 @@ def hu_tokenizer():
|
||||||
def fi_tokenizer():
|
def fi_tokenizer():
|
||||||
return util.get_lang_class('fi').Defaults.create_tokenizer()
|
return util.get_lang_class('fi').Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def id_tokenizer():
|
def id_tokenizer():
|
||||||
return util.get_lang_class('id').Defaults.create_tokenizer()
|
return util.get_lang_class('id').Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sv_tokenizer():
|
def sv_tokenizer():
|
||||||
return util.get_lang_class('sv').Defaults.create_tokenizer()
|
return util.get_lang_class('sv').Defaults.create_tokenizer()
|
||||||
|
@ -105,6 +112,7 @@ def bn_tokenizer():
|
||||||
def he_tokenizer():
|
def he_tokenizer():
|
||||||
return util.get_lang_class('he').Defaults.create_tokenizer()
|
return util.get_lang_class('he').Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def nb_tokenizer():
|
def nb_tokenizer():
|
||||||
return util.get_lang_class('nb').Defaults.create_tokenizer()
|
return util.get_lang_class('nb').Defaults.create_tokenizer()
|
||||||
|
@ -129,6 +137,7 @@ def en_entityrecognizer():
|
||||||
def text_file():
|
def text_file():
|
||||||
return StringIO()
|
return StringIO()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def text_file_b():
|
def text_file_b():
|
||||||
return BytesIO()
|
return BytesIO()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user