mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Fix lang check and error handling in Language.from_config
This commit is contained in:
parent
2ed6e2a218
commit
d3d7f92f05
|
@ -552,7 +552,10 @@ class Errors:
|
|||
"to register a simple stateless function component that just takes "
|
||||
"a Doc and returns it.")
|
||||
E958 = ("Language code defined in config ({bad_lang_code}) does not match "
|
||||
"language code of current Language subclass {lang} ({lang_code})")
|
||||
"language code of current Language subclass {lang} ({lang_code}). "
|
||||
"If you want to create an nlp object from a config, make sure to "
|
||||
"use the matching subclass with the language-specific settings and "
|
||||
"data.")
|
||||
E959 = ("Can't insert component {dir} index {idx}. Existing components: {opts}")
|
||||
E960 = ("No config data found for component '{name}'. This is likely a bug "
|
||||
"in spaCy.")
|
||||
|
|
|
@ -1487,7 +1487,7 @@ class Language:
|
|||
if "nlp" not in config:
|
||||
raise ValueError(Errors.E985.format(config=config))
|
||||
config_lang = config["nlp"]["lang"]
|
||||
if cls.lang is not None and config_lang is not None and config_lang != cls.lang:
|
||||
if config_lang is not None and config_lang != cls.lang:
|
||||
raise ValueError(
|
||||
Errors.E958.format(
|
||||
bad_lang_code=config["nlp"]["lang"],
|
||||
|
|
|
@ -5,6 +5,7 @@ from spacy.tokens import Doc, Span
|
|||
from spacy.vocab import Vocab
|
||||
from spacy.training import Example
|
||||
from spacy.lang.en import English
|
||||
from spacy.lang.de import German
|
||||
from spacy.util import registry
|
||||
|
||||
from .util import add_vecs_to_vocab, assert_docs_equal
|
||||
|
@ -266,3 +267,13 @@ def test_language_custom_tokenizer():
|
|||
assert [t.text for t in doc] == ["_hello", "_world"]
|
||||
doc = list(nlp.pipe(["hello world"]))[0]
|
||||
assert [t.text for t in doc] == ["_hello", "_world"]
|
||||
|
||||
|
||||
def test_language_from_config_invalid_lang():
|
||||
"""Test that calling Language.from_config raises an error and lang defined
|
||||
in config needs to match language-specific subclasses."""
|
||||
config = {"nlp": {"lang": "en"}}
|
||||
with pytest.raises(ValueError):
|
||||
Language.from_config(config)
|
||||
with pytest.raises(ValueError):
|
||||
German.from_config(config)
|
||||
|
|
Loading…
Reference in New Issue
Block a user