Merge load_lang_class and get_lang_class

This commit is contained in:
ines 2017-05-14 01:31:10 +02:00
parent 36bebe7164
commit b462076d80
5 changed files with 39 additions and 39 deletions

View File

@ -16,7 +16,7 @@ def load(name, **overrides):
meta = util.parse_package_meta(model_path)
if 'lang' not in meta:
raise IOError('No language setting found in model meta.')
cls = util.load_lang_class(meta['lang'])
cls = util.get_lang_class(meta['lang'])
overrides['meta'] = meta
overrides['path'] = model_path
return cls(**overrides)

View File

@ -18,67 +18,67 @@ _languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'he', 'hu', 'it', 'nb',
@pytest.fixture(params=_languages)
def tokenizer(request):
lang = util.load_lang_class(request.param)
lang = util.get_lang_class(request.param)
return lang.Defaults.create_tokenizer()
@pytest.fixture
def en_tokenizer():
return util.load_lang_class('en').Defaults.create_tokenizer()
return util.get_lang_class('en').Defaults.create_tokenizer()
@pytest.fixture
def en_vocab():
return util.load_lang_class('en').Defaults.create_vocab()
return util.get_lang_class('en').Defaults.create_vocab()
@pytest.fixture
def en_parser():
return util.load_lang_class('en').Defaults.create_parser()
return util.get_lang_class('en').Defaults.create_parser()
@pytest.fixture
def es_tokenizer():
return util.load_lang_class('es').Defaults.create_tokenizer()
return util.get_lang_class('es').Defaults.create_tokenizer()
@pytest.fixture
def de_tokenizer():
return util.load_lang_class('de').Defaults.create_tokenizer()
return util.get_lang_class('de').Defaults.create_tokenizer()
@pytest.fixture(scope='module')
def fr_tokenizer():
return util.load_lang_class('fr').Defaults.create_tokenizer()
return util.get_lang_class('fr').Defaults.create_tokenizer()
@pytest.fixture
def hu_tokenizer():
return util.load_lang_class('hu').Defaults.create_tokenizer()
return util.get_lang_class('hu').Defaults.create_tokenizer()
@pytest.fixture
def fi_tokenizer():
return util.load_lang_class('fi').Defaults.create_tokenizer()
return util.get_lang_class('fi').Defaults.create_tokenizer()
@pytest.fixture
def sv_tokenizer():
return util.load_lang_class('sv').Defaults.create_tokenizer()
return util.get_lang_class('sv').Defaults.create_tokenizer()
@pytest.fixture
def bn_tokenizer():
return util.load_lang_class('bn').Defaults.create_tokenizer()
return util.get_lang_class('bn').Defaults.create_tokenizer()
@pytest.fixture
def he_tokenizer():
return util.load_lang_class('he').Defaults.create_tokenizer()
return util.get_lang_class('he').Defaults.create_tokenizer()
@pytest.fixture
def nb_tokenizer():
return util.load_lang_class('nb').Defaults.create_tokenizer()
return util.get_lang_class('nb').Defaults.create_tokenizer()
@pytest.fixture
@ -88,12 +88,12 @@ def stringstore():
@pytest.fixture
def en_entityrecognizer():
return util.load_lang_class('en').Defaults.create_entity()
return util.get_lang_class('en').Defaults.create_entity()
@pytest.fixture
def lemmatizer():
return util.load_lang_class('en').Defaults.create_lemmatizer()
return util.get_lang_class('en').Defaults.create_lemmatizer()
@pytest.fixture

View File

@ -17,30 +17,30 @@ LANGUAGES = {}
_data_path = Path(__file__).parent / 'data'
def set_lang_class(name, cls):
def get_lang_class(lang):
"""Import and load a Language class.
lang (unicode): Two-letter language code, e.g. 'en'.
RETURNS (Language): Language class.
"""
global LANGUAGES
LANGUAGES[name] = cls
def get_lang_class(name):
if name in LANGUAGES:
return LANGUAGES[name]
lang = re.split('[^a-zA-Z0-9]', name, 1)[0]
if lang not in LANGUAGES:
raise RuntimeError('Language not supported: %s' % name)
if not lang in LANGUAGES:
try:
module = importlib.import_module('.lang.%s' % lang, 'spacy')
except ImportError:
raise ImportError("Can't import language %s from spacy.lang." %lang)
LANGUAGES[lang] = getattr(module, module.__all__[0])
return LANGUAGES[lang]
def load_lang_class(lang):
"""Import and load a Language class.
def set_lang_class(name, cls):
"""Set a custom Language class name that can be loaded via get_lang_class.
Args:
lang (unicode): Two-letter language code, e.g. 'en'.
Returns:
Language: Language class.
name (unicode): Name of Language class.
cls (Language): Language class.
"""
module = importlib.import_module('.lang.%s' % lang, 'spacy')
return getattr(module, module.__all__[0])
global LANGUAGES
LANGUAGES[name] = cls
def get_data_path(require_exists=True):

View File

@ -49,7 +49,7 @@ p
+cell unicode or #[code Path]
+cell Path to new data directory.
+h(2, "load_lang_class") load_lang_class
+h(2, "get_lang_class") get_lang_class
+tag function
p
@ -59,7 +59,7 @@ p
+aside-code("Example").
for lang_id in ['en', 'de']:
lang_class = util.load_lang_class(lang_id)
lang_class = util.get_lang_class(lang_id)
lang = lang_class()
tokenizer = lang.Defaults.create_tokenizer()

View File

@ -80,7 +80,7 @@ p
| compute. As of spaCy v2.0, #[code Language] classes are not imported on
| initialisation and are only loaded when you import them directly, or load
| a model that requires a language to be loaded. To lazy-load languages in
| your application, you can use the #[code util.load_lang_class()] helper
| your application, you can use the #[code util.get_lang_class()] helper
| function with the two-letter language code as its argument.
+h(2, "language-data") Adding language data
@ -486,7 +486,7 @@ p
| #[+src(gh("spaCy", "spacy/tests/lang")) tests/lang] in a directory named
| after the language ID. You'll also need to create a fixture for your
| tokenizer in the #[+src(gh("spaCy", "spacy/tests/conftest.py")) conftest.py].
| Always use the #[code load_lang_class()] helper function within the fixture,
| Always use the #[code get_lang_class()] helper function within the fixture,
| instead of importing the class at the top of the file. This will load the
| language data only when it's needed. (Otherwise, #[em all data] would be
| loaded every time you run a test.)
@ -494,7 +494,7 @@ p
+code.
@pytest.fixture
def en_tokenizer():
return util.load_lang_class('en').Defaults.create_tokenizer()
return util.get_lang_class('en').Defaults.create_tokenizer()
p
| When adding test cases, always