diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py index ea09163a0..7024d6b78 100644 --- a/spacy/cli/init_config.py +++ b/spacy/cli/init_config.py @@ -67,7 +67,6 @@ def init_config_cli( "The provided output file already exists. To force overwriting the config file, set the --force or -F flag.", exits=1, ) - lang = util.find_matching_language(lang) config = init_config( lang=lang, pipeline=pipeline, diff --git a/spacy/errors.py b/spacy/errors.py index fd6824338..c86c431ce 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -215,7 +215,7 @@ class Warnings(metaclass=ErrorsWithCodes): W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be " "aware that this might affect other components in your pipeline.") # New warnings added in v4.x - W124 = ("Language code {lang} has been renamed to {renamed_lang} in spaCy v4") + W124 = ("Language code '{lang}' was not found and replaced with '{renamed_lang}'. Language codes like 'xx' and 'is' were renamed to 'mul' and 'isl' in v4.") class Errors(metaclass=ErrorsWithCodes): diff --git a/spacy/util.py b/spacy/util.py index 9ef78df77..3ed9f7b4c 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -302,6 +302,8 @@ def find_matching_language(lang: str) -> Optional[str]: # is labeled that way is probably trying to be distinct from 'zh' and # shouldn't automatically match. match = langcodes.closest_supported_match(lang, possible_languages, max_distance=9) + if match is not None: + warnings.warn(Warnings.W124.format(lang=lang, renamed_lang=match)) return match