add more warning details

This commit is contained in:
thomashacker 2022-11-11 10:46:59 +01:00
parent eaeb197d62
commit bf5790f6cc
3 changed files with 3 additions and 2 deletions

View File

@ -67,7 +67,6 @@ def init_config_cli(
"The provided output file already exists. To force overwriting the config file, set the --force or -F flag.",
exits=1,
)
lang = util.find_matching_language(lang)
config = init_config(
lang=lang,
pipeline=pipeline,

View File

@ -215,7 +215,7 @@ class Warnings(metaclass=ErrorsWithCodes):
W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
"aware that this might affect other components in your pipeline.")
# New warnings added in v4.x
W124 = ("Language code {lang} has been renamed to {renamed_lang} in spaCy v4")
W124 = ("Language code '{lang}' was not found and replaced with '{renamed_lang}'. Language codes like 'xx' and 'is' were renamed to 'mul' and 'isl' in v4.")
class Errors(metaclass=ErrorsWithCodes):

View File

@ -302,6 +302,8 @@ def find_matching_language(lang: str) -> Optional[str]:
# is labeled that way is probably trying to be distinct from 'zh' and
# shouldn't automatically match.
match = langcodes.closest_supported_match(lang, possible_languages, max_distance=9)
if match is not None:
warnings.warn(Warnings.W124.format(lang=lang, renamed_lang=match))
return match