Add new error for renamed lang codes

This commit is contained in:
thomashacker 2022-11-11 16:11:38 +01:00
parent 6b4b68e22e
commit cb0a7aa230
2 changed files with 20 additions and 6 deletions

View File

@ -957,6 +957,9 @@ class Errors(metaclass=ErrorsWithCodes):
E4000 = ("Expected a Doc as input, but got: '{type}'")
E4001 = ("Expected input to be one of the following types: ({expected_types}), "
"but got '{received_type}'")
E4002 = ("Language code defined in config ({bad_lang_code}) does not match "
"language code of current Language subclass {lang} ({lang_code}) because it was renamed in v4. "
"Please change your current defined language code from ({bad_lang_code}) to ({lang_code}).")
# Deprecated model shortcuts, only used in errors and warnings

View File

@ -1736,13 +1736,24 @@ class Language:
raise ValueError(Errors.E985.format(config=config))
config_lang = config["nlp"].get("lang")
if config_lang is not None and config_lang != cls.lang:
raise ValueError(
Errors.E958.format(
bad_lang_code=config["nlp"]["lang"],
lang_code=cls.lang,
lang=util.get_object_name(cls),
# Set of language codes that were renamed in v4
renamed_lang_codes = set(["xx","is"])
if config_lang in renamed_lang_codes:
raise ValueError(
Errors.E4002.format(
bad_lang_code=config["nlp"]["lang"],
lang_code=cls.lang,
lang=util.get_object_name(cls),
)
)
else:
raise ValueError(
Errors.E958.format(
bad_lang_code=config["nlp"]["lang"],
lang_code=cls.lang,
lang=util.get_object_name(cls),
)
)
)
config["nlp"]["lang"] = cls.lang
# This isn't very elegant, but we remove the [components] block here to prevent
# it from getting resolved (causes problems because we expect to pass in