mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 04:40:20 +03:00
Change to msg.fail
This commit is contained in:
parent
de838a0866
commit
e9959fa913
|
@ -13,8 +13,7 @@ from ..tokens import Doc, DocBin
|
|||
from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
|
||||
from ..training.converters import conllu_to_docs
|
||||
|
||||
import warnings
|
||||
from ..errors import Warnings
|
||||
from ..errors import RENAMED_LANGUAGE_CODES
|
||||
|
||||
# Converters are matched by file extension except for ner/iob, which are
|
||||
# matched by file extension and content. To add a converter, add a new
|
||||
|
@ -114,10 +113,11 @@ def convert(
|
|||
msg = Printer(no_print=silent)
|
||||
|
||||
# Add warnings for renamed language code in v4
|
||||
renamed_lang_codes = {"xx": "mul", "is": "isl"}
|
||||
if lang in renamed_lang_codes:
|
||||
warnings.warn(
|
||||
Warnings.W124.format(lang=lang, renamed_lang=renamed_lang_codes[lang])
|
||||
if lang in RENAMED_LANGUAGE_CODES:
|
||||
msg.fail(
|
||||
title="Renamed language code",
|
||||
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
|
||||
exits=1,
|
||||
)
|
||||
|
||||
ner_map = srsly.read_json(ner_map) if ner_map is not None else None
|
||||
|
|
|
@ -14,6 +14,8 @@ from ..util import SimpleFrozenList
|
|||
from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
|
||||
from ._util import string_to_list, import_code
|
||||
|
||||
from ..errors import RENAMED_LANGUAGE_CODES
|
||||
|
||||
|
||||
ROOT = Path(__file__).parent / "templates"
|
||||
TEMPLATE_PATH = ROOT / "quickstart_training.jinja"
|
||||
|
@ -158,6 +160,13 @@ def init_config(
|
|||
msg = Printer(no_print=silent)
|
||||
with TEMPLATE_PATH.open("r") as f:
|
||||
template = Template(f.read())
|
||||
# Add warnings for renamed language code in v4
|
||||
if lang in RENAMED_LANGUAGE_CODES:
|
||||
msg.fail(
|
||||
title="Renamed language code",
|
||||
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
|
||||
exits=1,
|
||||
)
|
||||
# Filter out duplicates since tok2vec and transformer are added by template
|
||||
pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
|
||||
defaults = RECOMMENDATIONS["__default__"]
|
||||
|
|
|
@ -11,6 +11,8 @@ from ..language import Language
|
|||
from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error
|
||||
from ._util import import_code, setup_gpu
|
||||
|
||||
from ..errors import RENAMED_LANGUAGE_CODES
|
||||
|
||||
|
||||
@init_cli.command("vectors")
|
||||
def init_vectors_cli(
|
||||
|
@ -31,6 +33,13 @@ def init_vectors_cli(
|
|||
a model with vectors.
|
||||
"""
|
||||
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
|
||||
# Add warnings for renamed language code in v4
|
||||
if lang in RENAMED_LANGUAGE_CODES:
|
||||
msg.fail(
|
||||
title="Renamed language code",
|
||||
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
|
||||
exits=1,
|
||||
)
|
||||
msg.info(f"Creating blank nlp object for language '{lang}'")
|
||||
nlp = util.get_lang_class(lang)()
|
||||
if jsonl_loc is not None:
|
||||
|
|
|
@ -214,8 +214,6 @@ class Warnings(metaclass=ErrorsWithCodes):
|
|||
"is a Cython extension type.")
|
||||
W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
|
||||
"aware that this might affect other components in your pipeline.")
|
||||
# New warnings added in v4.x
|
||||
W124 = ("Language code '{lang}' was replaced with '{renamed_lang}' in v4.")
|
||||
|
||||
|
||||
class Errors(metaclass=ErrorsWithCodes):
|
||||
|
@ -957,10 +955,6 @@ class Errors(metaclass=ErrorsWithCodes):
|
|||
E4000 = ("Expected a Doc as input, but got: '{type}'")
|
||||
E4001 = ("Expected input to be one of the following types: ({expected_types}), "
|
||||
"but got '{received_type}'")
|
||||
E4002 = ("Language code defined in config ({bad_lang_code}) does not match "
|
||||
"language code of current Language subclass {lang} ({lang_code}) because it was renamed in v4. "
|
||||
"Please change your current defined language code from ({bad_lang_code}) to ({lang_code}).")
|
||||
|
||||
|
||||
# Deprecated model shortcuts, only used in errors and warnings
|
||||
OLD_MODEL_SHORTCUTS = {
|
||||
|
@ -970,6 +964,11 @@ OLD_MODEL_SHORTCUTS = {
|
|||
"lt": "lt_core_news_sm", "xx": "xx_ent_wiki_sm"
|
||||
}
|
||||
|
||||
# Renamed language codes in v4
|
||||
RENAMED_LANGUAGE_CODES = {
|
||||
"xx":"mul", "is":"isl"
|
||||
}
|
||||
|
||||
|
||||
# fmt: on
|
||||
|
||||
|
|
|
@ -1736,24 +1736,13 @@ class Language:
|
|||
raise ValueError(Errors.E985.format(config=config))
|
||||
config_lang = config["nlp"].get("lang")
|
||||
if config_lang is not None and config_lang != cls.lang:
|
||||
# Set of language codes that were renamed in v4
|
||||
renamed_lang_codes = set(["xx","is"])
|
||||
if config_lang in renamed_lang_codes:
|
||||
raise ValueError(
|
||||
Errors.E4002.format(
|
||||
bad_lang_code=config["nlp"]["lang"],
|
||||
lang_code=cls.lang,
|
||||
lang=util.get_object_name(cls),
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
Errors.E958.format(
|
||||
bad_lang_code=config["nlp"]["lang"],
|
||||
lang_code=cls.lang,
|
||||
lang=util.get_object_name(cls),
|
||||
)
|
||||
)
|
||||
raise ValueError(
|
||||
Errors.E958.format(
|
||||
bad_lang_code=config["nlp"]["lang"],
|
||||
lang_code=cls.lang,
|
||||
lang=util.get_object_name(cls),
|
||||
)
|
||||
)
|
||||
config["nlp"]["lang"] = cls.lang
|
||||
# This isn't very elegant, but we remove the [components] block here to prevent
|
||||
# it from getting resolved (causes problems because we expect to pass in
|
||||
|
|
Loading…
Reference in New Issue
Block a user