From eaeb197d62bfcc80b7566aaae0988c054ae0677c Mon Sep 17 00:00:00 2001 From: thomashacker Date: Thu, 10 Nov 2022 16:52:35 +0100 Subject: [PATCH] Add warning --- spacy/cli/init_config.py | 3 ++- spacy/errors.py | 2 ++ spacy/language.py | 12 ++++++------ spacy/util.py | 2 ++ 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py index b634caa4c..ea09163a0 100644 --- a/spacy/cli/init_config.py +++ b/spacy/cli/init_config.py @@ -43,7 +43,7 @@ class InitValues: def init_config_cli( # fmt: off output_file: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True), - lang: str = Opt(InitValues.lang, "--lang", "-l", help="Two-letter code of the language to use"), + lang: str = Opt(InitValues.lang, "--lang", "-l", help="Code of the language to use"), pipeline: str = Opt(",".join(InitValues.pipeline), "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')"), optimize: Optimizations = Opt(InitValues.optimize, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."), gpu: bool = Opt(InitValues.gpu, "--gpu", "-G", help="Whether the model can run on GPU. This will impact the choice of architecture, pretrained weights and related hyperparameters."), @@ -67,6 +67,7 @@ def init_config_cli( "The provided output file already exists. To force overwriting the config file, set the --force or -F flag.", exits=1, ) + lang = util.find_matching_language(lang) config = init_config( lang=lang, pipeline=pipeline, diff --git a/spacy/errors.py b/spacy/errors.py index 820f7352e..fd6824338 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -214,6 +214,8 @@ class Warnings(metaclass=ErrorsWithCodes): "is a Cython extension type.") W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be " "aware that this might affect other components in your pipeline.") +# New warnings added in v4.x + W124 = ("Language code {lang} has been renamed to {renamed_lang} in spaCy v4") class Errors(metaclass=ErrorsWithCodes): diff --git a/spacy/language.py b/spacy/language.py index d391f15ab..ca1fad413 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1736,13 +1736,13 @@ class Language: raise ValueError(Errors.E985.format(config=config)) config_lang = config["nlp"].get("lang") if config_lang is not None and config_lang != cls.lang: - raise ValueError( - Errors.E958.format( - bad_lang_code=config["nlp"]["lang"], - lang_code=cls.lang, - lang=util.get_object_name(cls), + raise ValueError( + Errors.E958.format( + bad_lang_code=config["nlp"]["lang"], + lang_code=cls.lang, + lang=util.get_object_name(cls), + ) ) - ) config["nlp"]["lang"] = cls.lang # This isn't very elegant, but we remove the [components] block here to prevent # it from getting resolved (causes problems because we expect to pass in diff --git a/spacy/util.py b/spacy/util.py index 7a0b28665..9ef78df77 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -19,6 +19,7 @@ from catalogue import RegistryError, Registry import langcodes import sys import warnings +from .errors import Warnings from packaging.specifiers import SpecifierSet, InvalidSpecifier from packaging.version import Version, InvalidVersion from packaging.requirements import Requirement @@ -282,6 +283,7 @@ def find_matching_language(lang: str) -> Optional[str]: import spacy.lang # noqa: F401 if lang == "xx": + warnings.warn(Warnings.W124.format(lang=lang, renamed_lang="mul")) return "mul" # Find out which language modules we have