mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Fix formatting and add comment on languages
This commit is contained in:
parent
a4d974d97b
commit
9b3f8f9ec3
|
@ -11,8 +11,12 @@ from ..strings import StringStore
|
|||
from .. import util
|
||||
|
||||
|
||||
# These languages are used for generic tokenizer tests – only add a language
|
||||
# here if it's using spaCy's tokenizer (not a different library)
|
||||
# TODO: re-implement generic tokenizer tests
|
||||
_languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'he', 'hu', 'id',
|
||||
'it', 'nb', 'nl', 'pl', 'pt', 'sv', 'th','xx']
|
||||
'it', 'nb', 'nl', 'pl', 'pt', 'sv', 'xx']
|
||||
|
||||
_models = {'en': ['en_core_web_sm'],
|
||||
'de': ['de_core_news_md'],
|
||||
'fr': ['fr_depvec_web_lg'],
|
||||
|
@ -42,6 +46,7 @@ def FR(request):
|
|||
#lang = util.get_lang_class(request.param)
|
||||
#return lang.Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tokenizer():
|
||||
return util.get_lang_class('xx').Defaults.create_tokenizer()
|
||||
|
@ -87,10 +92,12 @@ def hu_tokenizer():
|
|||
def fi_tokenizer():
|
||||
return util.get_lang_class('fi').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def id_tokenizer():
|
||||
return util.get_lang_class('id').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sv_tokenizer():
|
||||
return util.get_lang_class('sv').Defaults.create_tokenizer()
|
||||
|
@ -105,6 +112,7 @@ def bn_tokenizer():
|
|||
def he_tokenizer():
|
||||
return util.get_lang_class('he').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def nb_tokenizer():
|
||||
return util.get_lang_class('nb').Defaults.create_tokenizer()
|
||||
|
@ -129,6 +137,7 @@ def en_entityrecognizer():
|
|||
def text_file():
|
||||
return StringIO()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def text_file_b():
|
||||
return BytesIO()
|
||||
|
|
Loading…
Reference in New Issue
Block a user