mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Merge load_lang_class and get_lang_class
This commit is contained in:
parent
36bebe7164
commit
b462076d80
|
@ -16,7 +16,7 @@ def load(name, **overrides):
|
|||
meta = util.parse_package_meta(model_path)
|
||||
if 'lang' not in meta:
|
||||
raise IOError('No language setting found in model meta.')
|
||||
cls = util.load_lang_class(meta['lang'])
|
||||
cls = util.get_lang_class(meta['lang'])
|
||||
overrides['meta'] = meta
|
||||
overrides['path'] = model_path
|
||||
return cls(**overrides)
|
||||
|
|
|
@ -18,67 +18,67 @@ _languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'he', 'hu', 'it', 'nb',
|
|||
|
||||
@pytest.fixture(params=_languages)
|
||||
def tokenizer(request):
|
||||
lang = util.load_lang_class(request.param)
|
||||
lang = util.get_lang_class(request.param)
|
||||
return lang.Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def en_tokenizer():
|
||||
return util.load_lang_class('en').Defaults.create_tokenizer()
|
||||
return util.get_lang_class('en').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def en_vocab():
|
||||
return util.load_lang_class('en').Defaults.create_vocab()
|
||||
return util.get_lang_class('en').Defaults.create_vocab()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def en_parser():
|
||||
return util.load_lang_class('en').Defaults.create_parser()
|
||||
return util.get_lang_class('en').Defaults.create_parser()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def es_tokenizer():
|
||||
return util.load_lang_class('es').Defaults.create_tokenizer()
|
||||
return util.get_lang_class('es').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def de_tokenizer():
|
||||
return util.load_lang_class('de').Defaults.create_tokenizer()
|
||||
return util.get_lang_class('de').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def fr_tokenizer():
|
||||
return util.load_lang_class('fr').Defaults.create_tokenizer()
|
||||
return util.get_lang_class('fr').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def hu_tokenizer():
|
||||
return util.load_lang_class('hu').Defaults.create_tokenizer()
|
||||
return util.get_lang_class('hu').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fi_tokenizer():
|
||||
return util.load_lang_class('fi').Defaults.create_tokenizer()
|
||||
return util.get_lang_class('fi').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sv_tokenizer():
|
||||
return util.load_lang_class('sv').Defaults.create_tokenizer()
|
||||
return util.get_lang_class('sv').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def bn_tokenizer():
|
||||
return util.load_lang_class('bn').Defaults.create_tokenizer()
|
||||
return util.get_lang_class('bn').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def he_tokenizer():
|
||||
return util.load_lang_class('he').Defaults.create_tokenizer()
|
||||
return util.get_lang_class('he').Defaults.create_tokenizer()
|
||||
|
||||
@pytest.fixture
|
||||
def nb_tokenizer():
|
||||
return util.load_lang_class('nb').Defaults.create_tokenizer()
|
||||
return util.get_lang_class('nb').Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -88,12 +88,12 @@ def stringstore():
|
|||
|
||||
@pytest.fixture
|
||||
def en_entityrecognizer():
|
||||
return util.load_lang_class('en').Defaults.create_entity()
|
||||
return util.get_lang_class('en').Defaults.create_entity()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def lemmatizer():
|
||||
return util.load_lang_class('en').Defaults.create_lemmatizer()
|
||||
return util.get_lang_class('en').Defaults.create_lemmatizer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
|
@ -17,30 +17,30 @@ LANGUAGES = {}
|
|||
_data_path = Path(__file__).parent / 'data'
|
||||
|
||||
|
||||
def set_lang_class(name, cls):
|
||||
def get_lang_class(lang):
|
||||
"""Import and load a Language class.
|
||||
|
||||
lang (unicode): Two-letter language code, e.g. 'en'.
|
||||
RETURNS (Language): Language class.
|
||||
"""
|
||||
global LANGUAGES
|
||||
LANGUAGES[name] = cls
|
||||
|
||||
|
||||
def get_lang_class(name):
|
||||
if name in LANGUAGES:
|
||||
return LANGUAGES[name]
|
||||
lang = re.split('[^a-zA-Z0-9]', name, 1)[0]
|
||||
if lang not in LANGUAGES:
|
||||
raise RuntimeError('Language not supported: %s' % name)
|
||||
if not lang in LANGUAGES:
|
||||
try:
|
||||
module = importlib.import_module('.lang.%s' % lang, 'spacy')
|
||||
except ImportError:
|
||||
raise ImportError("Can't import language %s from spacy.lang." %lang)
|
||||
LANGUAGES[lang] = getattr(module, module.__all__[0])
|
||||
return LANGUAGES[lang]
|
||||
|
||||
|
||||
def load_lang_class(lang):
|
||||
"""Import and load a Language class.
|
||||
def set_lang_class(name, cls):
|
||||
"""Set a custom Language class name that can be loaded via get_lang_class.
|
||||
|
||||
Args:
|
||||
lang (unicode): Two-letter language code, e.g. 'en'.
|
||||
Returns:
|
||||
Language: Language class.
|
||||
name (unicode): Name of Language class.
|
||||
cls (Language): Language class.
|
||||
"""
|
||||
module = importlib.import_module('.lang.%s' % lang, 'spacy')
|
||||
return getattr(module, module.__all__[0])
|
||||
global LANGUAGES
|
||||
LANGUAGES[name] = cls
|
||||
|
||||
|
||||
def get_data_path(require_exists=True):
|
||||
|
|
|
@ -49,7 +49,7 @@ p
|
|||
+cell unicode or #[code Path]
|
||||
+cell Path to new data directory.
|
||||
|
||||
+h(2, "load_lang_class") load_lang_class
|
||||
+h(2, "get_lang_class") get_lang_class
|
||||
+tag function
|
||||
|
||||
p
|
||||
|
@ -59,7 +59,7 @@ p
|
|||
|
||||
+aside-code("Example").
|
||||
for lang_id in ['en', 'de']:
|
||||
lang_class = util.load_lang_class(lang_id)
|
||||
lang_class = util.get_lang_class(lang_id)
|
||||
lang = lang_class()
|
||||
tokenizer = lang.Defaults.create_tokenizer()
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ p
|
|||
| compute. As of spaCy v2.0, #[code Language] classes are not imported on
|
||||
| initialisation and are only loaded when you import them directly, or load
|
||||
| a model that requires a language to be loaded. To lazy-load languages in
|
||||
| your application, you can use the #[code util.load_lang_class()] helper
|
||||
| your application, you can use the #[code util.get_lang_class()] helper
|
||||
| function with the two-letter language code as its argument.
|
||||
|
||||
+h(2, "language-data") Adding language data
|
||||
|
@ -486,7 +486,7 @@ p
|
|||
| #[+src(gh("spaCy", "spacy/tests/lang")) tests/lang] in a directory named
|
||||
| after the language ID. You'll also need to create a fixture for your
|
||||
| tokenizer in the #[+src(gh("spaCy", "spacy/tests/conftest.py")) conftest.py].
|
||||
| Always use the #[code load_lang_class()] helper function within the fixture,
|
||||
| Always use the #[code get_lang_class()] helper function within the fixture,
|
||||
| instead of importing the class at the top of the file. This will load the
|
||||
| language data only when it's needed. (Otherwise, #[em all data] would be
|
||||
| loaded every time you run a test.)
|
||||
|
@ -494,7 +494,7 @@ p
|
|||
+code.
|
||||
@pytest.fixture
|
||||
def en_tokenizer():
|
||||
return util.load_lang_class('en').Defaults.create_tokenizer()
|
||||
return util.get_lang_class('en').Defaults.create_tokenizer()
|
||||
|
||||
p
|
||||
| When adding test cases, always
|
||||
|
|
Loading…
Reference in New Issue
Block a user