Change to msg.fail

This commit is contained in:
thomashacker 2022-11-17 12:46:11 +01:00
parent de838a0866
commit e9959fa913
5 changed files with 36 additions and 30 deletions

View File

@ -13,8 +13,7 @@ from ..tokens import Doc, DocBin
from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
from ..training.converters import conllu_to_docs from ..training.converters import conllu_to_docs
import warnings from ..errors import RENAMED_LANGUAGE_CODES
from ..errors import Warnings
# Converters are matched by file extension except for ner/iob, which are # Converters are matched by file extension except for ner/iob, which are
# matched by file extension and content. To add a converter, add a new # matched by file extension and content. To add a converter, add a new
@ -114,10 +113,11 @@ def convert(
msg = Printer(no_print=silent) msg = Printer(no_print=silent)
# Add warnings for renamed language code in v4 # Add warnings for renamed language code in v4
renamed_lang_codes = {"xx": "mul", "is": "isl"} if lang in RENAMED_LANGUAGE_CODES:
if lang in renamed_lang_codes: msg.fail(
warnings.warn( title="Renamed language code",
Warnings.W124.format(lang=lang, renamed_lang=renamed_lang_codes[lang]) text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
exits=1,
) )
ner_map = srsly.read_json(ner_map) if ner_map is not None else None ner_map = srsly.read_json(ner_map) if ner_map is not None else None

View File

@ -14,6 +14,8 @@ from ..util import SimpleFrozenList
from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
from ._util import string_to_list, import_code from ._util import string_to_list, import_code
from ..errors import RENAMED_LANGUAGE_CODES
ROOT = Path(__file__).parent / "templates" ROOT = Path(__file__).parent / "templates"
TEMPLATE_PATH = ROOT / "quickstart_training.jinja" TEMPLATE_PATH = ROOT / "quickstart_training.jinja"
@ -158,6 +160,13 @@ def init_config(
msg = Printer(no_print=silent) msg = Printer(no_print=silent)
with TEMPLATE_PATH.open("r") as f: with TEMPLATE_PATH.open("r") as f:
template = Template(f.read()) template = Template(f.read())
# Add warnings for renamed language code in v4
if lang in RENAMED_LANGUAGE_CODES:
msg.fail(
title="Renamed language code",
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
exits=1,
)
# Filter out duplicates since tok2vec and transformer are added by template # Filter out duplicates since tok2vec and transformer are added by template
pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")] pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
defaults = RECOMMENDATIONS["__default__"] defaults = RECOMMENDATIONS["__default__"]

View File

@ -11,6 +11,8 @@ from ..language import Language
from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error
from ._util import import_code, setup_gpu from ._util import import_code, setup_gpu
from ..errors import RENAMED_LANGUAGE_CODES
@init_cli.command("vectors") @init_cli.command("vectors")
def init_vectors_cli( def init_vectors_cli(
@ -31,6 +33,13 @@ def init_vectors_cli(
a model with vectors. a model with vectors.
""" """
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO) util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
# Add warnings for renamed language code in v4
if lang in RENAMED_LANGUAGE_CODES:
msg.fail(
title="Renamed language code",
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
exits=1,
)
msg.info(f"Creating blank nlp object for language '{lang}'") msg.info(f"Creating blank nlp object for language '{lang}'")
nlp = util.get_lang_class(lang)() nlp = util.get_lang_class(lang)()
if jsonl_loc is not None: if jsonl_loc is not None:

View File

@ -214,8 +214,6 @@ class Warnings(metaclass=ErrorsWithCodes):
"is a Cython extension type.") "is a Cython extension type.")
W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be " W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
"aware that this might affect other components in your pipeline.") "aware that this might affect other components in your pipeline.")
# New warnings added in v4.x
W124 = ("Language code '{lang}' was replaced with '{renamed_lang}' in v4.")
class Errors(metaclass=ErrorsWithCodes): class Errors(metaclass=ErrorsWithCodes):
@ -957,10 +955,6 @@ class Errors(metaclass=ErrorsWithCodes):
E4000 = ("Expected a Doc as input, but got: '{type}'") E4000 = ("Expected a Doc as input, but got: '{type}'")
E4001 = ("Expected input to be one of the following types: ({expected_types}), " E4001 = ("Expected input to be one of the following types: ({expected_types}), "
"but got '{received_type}'") "but got '{received_type}'")
E4002 = ("Language code defined in config ({bad_lang_code}) does not match "
"language code of current Language subclass {lang} ({lang_code}) because it was renamed in v4. "
"Please change your current defined language code from ({bad_lang_code}) to ({lang_code}).")
# Deprecated model shortcuts, only used in errors and warnings # Deprecated model shortcuts, only used in errors and warnings
OLD_MODEL_SHORTCUTS = { OLD_MODEL_SHORTCUTS = {
@ -970,6 +964,11 @@ OLD_MODEL_SHORTCUTS = {
"lt": "lt_core_news_sm", "xx": "xx_ent_wiki_sm" "lt": "lt_core_news_sm", "xx": "xx_ent_wiki_sm"
} }
# Renamed language codes in v4
RENAMED_LANGUAGE_CODES = {
"xx":"mul", "is":"isl"
}
# fmt: on # fmt: on

View File

@ -1736,17 +1736,6 @@ class Language:
raise ValueError(Errors.E985.format(config=config)) raise ValueError(Errors.E985.format(config=config))
config_lang = config["nlp"].get("lang") config_lang = config["nlp"].get("lang")
if config_lang is not None and config_lang != cls.lang: if config_lang is not None and config_lang != cls.lang:
# Set of language codes that were renamed in v4
renamed_lang_codes = set(["xx","is"])
if config_lang in renamed_lang_codes:
raise ValueError(
Errors.E4002.format(
bad_lang_code=config["nlp"]["lang"],
lang_code=cls.lang,
lang=util.get_object_name(cls),
)
)
else:
raise ValueError( raise ValueError(
Errors.E958.format( Errors.E958.format(
bad_lang_code=config["nlp"]["lang"], bad_lang_code=config["nlp"]["lang"],