From e9959fa913972a9c911084e176aa9a876941eaf7 Mon Sep 17 00:00:00 2001 From: thomashacker Date: Thu, 17 Nov 2022 12:46:11 +0100 Subject: [PATCH] Change to msg.fail --- spacy/cli/convert.py | 12 ++++++------ spacy/cli/init_config.py | 9 +++++++++ spacy/cli/init_pipeline.py | 9 +++++++++ spacy/errors.py | 11 +++++------ spacy/language.py | 25 +++++++------------------ 5 files changed, 36 insertions(+), 30 deletions(-) diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py index 2e150d3ae..ed6186e56 100644 --- a/spacy/cli/convert.py +++ b/spacy/cli/convert.py @@ -13,8 +13,7 @@ from ..tokens import Doc, DocBin from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs from ..training.converters import conllu_to_docs -import warnings -from ..errors import Warnings +from ..errors import RENAMED_LANGUAGE_CODES # Converters are matched by file extension except for ner/iob, which are # matched by file extension and content. To add a converter, add a new @@ -114,10 +113,11 @@ def convert( msg = Printer(no_print=silent) # Add warnings for renamed language code in v4 - renamed_lang_codes = {"xx": "mul", "is": "isl"} - if lang in renamed_lang_codes: - warnings.warn( - Warnings.W124.format(lang=lang, renamed_lang=renamed_lang_codes[lang]) + if lang in RENAMED_LANGUAGE_CODES: + msg.fail( + title="Renamed language code", + text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.", + exits=1, ) ner_map = srsly.read_json(ner_map) if ner_map is not None else None diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py index 7024d6b78..b29bd0cb9 100644 --- a/spacy/cli/init_config.py +++ b/spacy/cli/init_config.py @@ -14,6 +14,8 @@ from ..util import SimpleFrozenList from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND from ._util import string_to_list, import_code +from ..errors import RENAMED_LANGUAGE_CODES + ROOT = Path(__file__).parent / "templates" TEMPLATE_PATH = ROOT / "quickstart_training.jinja" @@ -158,6 +160,13 @@ def init_config( msg = Printer(no_print=silent) with TEMPLATE_PATH.open("r") as f: template = Template(f.read()) + # Add warnings for renamed language code in v4 + if lang in RENAMED_LANGUAGE_CODES: + msg.fail( + title="Renamed language code", + text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.", + exits=1, + ) # Filter out duplicates since tok2vec and transformer are added by template pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")] defaults = RECOMMENDATIONS["__default__"] diff --git a/spacy/cli/init_pipeline.py b/spacy/cli/init_pipeline.py index d53a61b8e..09d41f01c 100644 --- a/spacy/cli/init_pipeline.py +++ b/spacy/cli/init_pipeline.py @@ -11,6 +11,8 @@ from ..language import Language from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error from ._util import import_code, setup_gpu +from ..errors import RENAMED_LANGUAGE_CODES + @init_cli.command("vectors") def init_vectors_cli( @@ -31,6 +33,13 @@ def init_vectors_cli( a model with vectors. """ util.logger.setLevel(logging.DEBUG if verbose else logging.INFO) + # Add warnings for renamed language code in v4 + if lang in RENAMED_LANGUAGE_CODES: + msg.fail( + title="Renamed language code", + text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.", + exits=1, + ) msg.info(f"Creating blank nlp object for language '{lang}'") nlp = util.get_lang_class(lang)() if jsonl_loc is not None: diff --git a/spacy/errors.py b/spacy/errors.py index 596a989b3..d4a7ac191 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -214,8 +214,6 @@ class Warnings(metaclass=ErrorsWithCodes): "is a Cython extension type.") W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be " "aware that this might affect other components in your pipeline.") -# New warnings added in v4.x - W124 = ("Language code '{lang}' was replaced with '{renamed_lang}' in v4.") class Errors(metaclass=ErrorsWithCodes): @@ -957,10 +955,6 @@ class Errors(metaclass=ErrorsWithCodes): E4000 = ("Expected a Doc as input, but got: '{type}'") E4001 = ("Expected input to be one of the following types: ({expected_types}), " "but got '{received_type}'") - E4002 = ("Language code defined in config ({bad_lang_code}) does not match " - "language code of current Language subclass {lang} ({lang_code}) because it was renamed in v4. " - "Please change your current defined language code from ({bad_lang_code}) to ({lang_code}).") - # Deprecated model shortcuts, only used in errors and warnings OLD_MODEL_SHORTCUTS = { @@ -970,6 +964,11 @@ OLD_MODEL_SHORTCUTS = { "lt": "lt_core_news_sm", "xx": "xx_ent_wiki_sm" } +# Renamed language codes in v4 +RENAMED_LANGUAGE_CODES = { + "xx":"mul", "is":"isl" +} + # fmt: on diff --git a/spacy/language.py b/spacy/language.py index fc8993f98..d391f15ab 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1736,24 +1736,13 @@ class Language: raise ValueError(Errors.E985.format(config=config)) config_lang = config["nlp"].get("lang") if config_lang is not None and config_lang != cls.lang: - # Set of language codes that were renamed in v4 - renamed_lang_codes = set(["xx","is"]) - if config_lang in renamed_lang_codes: - raise ValueError( - Errors.E4002.format( - bad_lang_code=config["nlp"]["lang"], - lang_code=cls.lang, - lang=util.get_object_name(cls), - ) - ) - else: - raise ValueError( - Errors.E958.format( - bad_lang_code=config["nlp"]["lang"], - lang_code=cls.lang, - lang=util.get_object_name(cls), - ) - ) + raise ValueError( + Errors.E958.format( + bad_lang_code=config["nlp"]["lang"], + lang_code=cls.lang, + lang=util.get_object_name(cls), + ) + ) config["nlp"]["lang"] = cls.lang # This isn't very elegant, but we remove the [components] block here to prevent # it from getting resolved (causes problems because we expect to pass in