Change to msg.fail

This commit is contained in:
thomashacker 2022-11-17 12:46:11 +01:00
parent de838a0866
commit e9959fa913
5 changed files with 36 additions and 30 deletions

View File

@ -13,8 +13,7 @@ from ..tokens import Doc, DocBin
from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
from ..training.converters import conllu_to_docs
import warnings
from ..errors import Warnings
from ..errors import RENAMED_LANGUAGE_CODES
# Converters are matched by file extension except for ner/iob, which are
# matched by file extension and content. To add a converter, add a new
@ -114,10 +113,11 @@ def convert(
msg = Printer(no_print=silent)
# Add warnings for renamed language code in v4
renamed_lang_codes = {"xx": "mul", "is": "isl"}
if lang in renamed_lang_codes:
warnings.warn(
Warnings.W124.format(lang=lang, renamed_lang=renamed_lang_codes[lang])
if lang in RENAMED_LANGUAGE_CODES:
msg.fail(
title="Renamed language code",
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
exits=1,
)
ner_map = srsly.read_json(ner_map) if ner_map is not None else None

View File

@ -14,6 +14,8 @@ from ..util import SimpleFrozenList
from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
from ._util import string_to_list, import_code
from ..errors import RENAMED_LANGUAGE_CODES
ROOT = Path(__file__).parent / "templates"
TEMPLATE_PATH = ROOT / "quickstart_training.jinja"
@ -158,6 +160,13 @@ def init_config(
msg = Printer(no_print=silent)
with TEMPLATE_PATH.open("r") as f:
template = Template(f.read())
# Add warnings for renamed language code in v4
if lang in RENAMED_LANGUAGE_CODES:
msg.fail(
title="Renamed language code",
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
exits=1,
)
# Filter out duplicates since tok2vec and transformer are added by template
pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
defaults = RECOMMENDATIONS["__default__"]

View File

@ -11,6 +11,8 @@ from ..language import Language
from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error
from ._util import import_code, setup_gpu
from ..errors import RENAMED_LANGUAGE_CODES
@init_cli.command("vectors")
def init_vectors_cli(
@ -31,6 +33,13 @@ def init_vectors_cli(
a model with vectors.
"""
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
# Add warnings for renamed language code in v4
if lang in RENAMED_LANGUAGE_CODES:
msg.fail(
title="Renamed language code",
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
exits=1,
)
msg.info(f"Creating blank nlp object for language '{lang}'")
nlp = util.get_lang_class(lang)()
if jsonl_loc is not None:

View File

@ -214,8 +214,6 @@ class Warnings(metaclass=ErrorsWithCodes):
"is a Cython extension type.")
W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
"aware that this might affect other components in your pipeline.")
# New warnings added in v4.x
W124 = ("Language code '{lang}' was replaced with '{renamed_lang}' in v4.")
class Errors(metaclass=ErrorsWithCodes):
@ -957,10 +955,6 @@ class Errors(metaclass=ErrorsWithCodes):
E4000 = ("Expected a Doc as input, but got: '{type}'")
E4001 = ("Expected input to be one of the following types: ({expected_types}), "
"but got '{received_type}'")
E4002 = ("Language code defined in config ({bad_lang_code}) does not match "
"language code of current Language subclass {lang} ({lang_code}) because it was renamed in v4. "
"Please change your current defined language code from ({bad_lang_code}) to ({lang_code}).")
# Deprecated model shortcuts, only used in errors and warnings
OLD_MODEL_SHORTCUTS = {
@ -970,6 +964,11 @@ OLD_MODEL_SHORTCUTS = {
"lt": "lt_core_news_sm", "xx": "xx_ent_wiki_sm"
}
# Renamed language codes in v4
RENAMED_LANGUAGE_CODES = {
"xx":"mul", "is":"isl"
}
# fmt: on

View File

@ -1736,24 +1736,13 @@ class Language:
raise ValueError(Errors.E985.format(config=config))
config_lang = config["nlp"].get("lang")
if config_lang is not None and config_lang != cls.lang:
# Set of language codes that were renamed in v4
renamed_lang_codes = set(["xx","is"])
if config_lang in renamed_lang_codes:
raise ValueError(
Errors.E4002.format(
bad_lang_code=config["nlp"]["lang"],
lang_code=cls.lang,
lang=util.get_object_name(cls),
)
)
else:
raise ValueError(
Errors.E958.format(
bad_lang_code=config["nlp"]["lang"],
lang_code=cls.lang,
lang=util.get_object_name(cls),
)
)
raise ValueError(
Errors.E958.format(
bad_lang_code=config["nlp"]["lang"],
lang_code=cls.lang,
lang=util.get_object_name(cls),
)
)
config["nlp"]["lang"] = cls.lang
# This isn't very elegant, but we remove the [components] block here to prevent
# it from getting resolved (causes problems because we expect to pass in