diff --git a/spacy/errors.py b/spacy/errors.py index c86c431ce..7f3c58af7 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -957,6 +957,9 @@ class Errors(metaclass=ErrorsWithCodes): E4000 = ("Expected a Doc as input, but got: '{type}'") E4001 = ("Expected input to be one of the following types: ({expected_types}), " "but got '{received_type}'") + E4002 = ("Language code defined in config ({bad_lang_code}) does not match " + "language code of current Language subclass {lang} ({lang_code}) because it was renamed in v4. " + "Please change your current defined language code from ({bad_lang_code}) to ({lang_code}).") # Deprecated model shortcuts, only used in errors and warnings diff --git a/spacy/language.py b/spacy/language.py index ca1fad413..fc8993f98 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1736,13 +1736,24 @@ class Language: raise ValueError(Errors.E985.format(config=config)) config_lang = config["nlp"].get("lang") if config_lang is not None and config_lang != cls.lang: - raise ValueError( - Errors.E958.format( - bad_lang_code=config["nlp"]["lang"], - lang_code=cls.lang, - lang=util.get_object_name(cls), + # Set of language codes that were renamed in v4 + renamed_lang_codes = set(["xx","is"]) + if config_lang in renamed_lang_codes: + raise ValueError( + Errors.E4002.format( + bad_lang_code=config["nlp"]["lang"], + lang_code=cls.lang, + lang=util.get_object_name(cls), + ) + ) + else: + raise ValueError( + Errors.E958.format( + bad_lang_code=config["nlp"]["lang"], + lang_code=cls.lang, + lang=util.get_object_name(cls), + ) ) - ) config["nlp"]["lang"] = cls.lang # This isn't very elegant, but we remove the [components] block here to prevent # it from getting resolved (causes problems because we expect to pass in