mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-06 21:30:22 +03:00
Change to msg.fail
This commit is contained in:
parent
de838a0866
commit
e9959fa913
|
@ -13,8 +13,7 @@ from ..tokens import Doc, DocBin
|
||||||
from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
|
from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
|
||||||
from ..training.converters import conllu_to_docs
|
from ..training.converters import conllu_to_docs
|
||||||
|
|
||||||
import warnings
|
from ..errors import RENAMED_LANGUAGE_CODES
|
||||||
from ..errors import Warnings
|
|
||||||
|
|
||||||
# Converters are matched by file extension except for ner/iob, which are
|
# Converters are matched by file extension except for ner/iob, which are
|
||||||
# matched by file extension and content. To add a converter, add a new
|
# matched by file extension and content. To add a converter, add a new
|
||||||
|
@ -114,10 +113,11 @@ def convert(
|
||||||
msg = Printer(no_print=silent)
|
msg = Printer(no_print=silent)
|
||||||
|
|
||||||
# Add warnings for renamed language code in v4
|
# Add warnings for renamed language code in v4
|
||||||
renamed_lang_codes = {"xx": "mul", "is": "isl"}
|
if lang in RENAMED_LANGUAGE_CODES:
|
||||||
if lang in renamed_lang_codes:
|
msg.fail(
|
||||||
warnings.warn(
|
title="Renamed language code",
|
||||||
Warnings.W124.format(lang=lang, renamed_lang=renamed_lang_codes[lang])
|
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
|
||||||
|
exits=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
ner_map = srsly.read_json(ner_map) if ner_map is not None else None
|
ner_map = srsly.read_json(ner_map) if ner_map is not None else None
|
||||||
|
|
|
@ -14,6 +14,8 @@ from ..util import SimpleFrozenList
|
||||||
from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
|
from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
|
||||||
from ._util import string_to_list, import_code
|
from ._util import string_to_list, import_code
|
||||||
|
|
||||||
|
from ..errors import RENAMED_LANGUAGE_CODES
|
||||||
|
|
||||||
|
|
||||||
ROOT = Path(__file__).parent / "templates"
|
ROOT = Path(__file__).parent / "templates"
|
||||||
TEMPLATE_PATH = ROOT / "quickstart_training.jinja"
|
TEMPLATE_PATH = ROOT / "quickstart_training.jinja"
|
||||||
|
@ -158,6 +160,13 @@ def init_config(
|
||||||
msg = Printer(no_print=silent)
|
msg = Printer(no_print=silent)
|
||||||
with TEMPLATE_PATH.open("r") as f:
|
with TEMPLATE_PATH.open("r") as f:
|
||||||
template = Template(f.read())
|
template = Template(f.read())
|
||||||
|
# Add warnings for renamed language code in v4
|
||||||
|
if lang in RENAMED_LANGUAGE_CODES:
|
||||||
|
msg.fail(
|
||||||
|
title="Renamed language code",
|
||||||
|
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
|
||||||
|
exits=1,
|
||||||
|
)
|
||||||
# Filter out duplicates since tok2vec and transformer are added by template
|
# Filter out duplicates since tok2vec and transformer are added by template
|
||||||
pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
|
pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
|
||||||
defaults = RECOMMENDATIONS["__default__"]
|
defaults = RECOMMENDATIONS["__default__"]
|
||||||
|
|
|
@ -11,6 +11,8 @@ from ..language import Language
|
||||||
from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error
|
from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error
|
||||||
from ._util import import_code, setup_gpu
|
from ._util import import_code, setup_gpu
|
||||||
|
|
||||||
|
from ..errors import RENAMED_LANGUAGE_CODES
|
||||||
|
|
||||||
|
|
||||||
@init_cli.command("vectors")
|
@init_cli.command("vectors")
|
||||||
def init_vectors_cli(
|
def init_vectors_cli(
|
||||||
|
@ -31,6 +33,13 @@ def init_vectors_cli(
|
||||||
a model with vectors.
|
a model with vectors.
|
||||||
"""
|
"""
|
||||||
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
|
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
|
||||||
|
# Add warnings for renamed language code in v4
|
||||||
|
if lang in RENAMED_LANGUAGE_CODES:
|
||||||
|
msg.fail(
|
||||||
|
title="Renamed language code",
|
||||||
|
text=f"Language code '{lang}' was replaced with '{RENAMED_LANGUAGE_CODES[lang]}' in v4. Please change your current defined language code from '{lang}' to '{RENAMED_LANGUAGE_CODES[lang]}'.",
|
||||||
|
exits=1,
|
||||||
|
)
|
||||||
msg.info(f"Creating blank nlp object for language '{lang}'")
|
msg.info(f"Creating blank nlp object for language '{lang}'")
|
||||||
nlp = util.get_lang_class(lang)()
|
nlp = util.get_lang_class(lang)()
|
||||||
if jsonl_loc is not None:
|
if jsonl_loc is not None:
|
||||||
|
|
|
@ -214,8 +214,6 @@ class Warnings(metaclass=ErrorsWithCodes):
|
||||||
"is a Cython extension type.")
|
"is a Cython extension type.")
|
||||||
W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
|
W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
|
||||||
"aware that this might affect other components in your pipeline.")
|
"aware that this might affect other components in your pipeline.")
|
||||||
# New warnings added in v4.x
|
|
||||||
W124 = ("Language code '{lang}' was replaced with '{renamed_lang}' in v4.")
|
|
||||||
|
|
||||||
|
|
||||||
class Errors(metaclass=ErrorsWithCodes):
|
class Errors(metaclass=ErrorsWithCodes):
|
||||||
|
@ -957,10 +955,6 @@ class Errors(metaclass=ErrorsWithCodes):
|
||||||
E4000 = ("Expected a Doc as input, but got: '{type}'")
|
E4000 = ("Expected a Doc as input, but got: '{type}'")
|
||||||
E4001 = ("Expected input to be one of the following types: ({expected_types}), "
|
E4001 = ("Expected input to be one of the following types: ({expected_types}), "
|
||||||
"but got '{received_type}'")
|
"but got '{received_type}'")
|
||||||
E4002 = ("Language code defined in config ({bad_lang_code}) does not match "
|
|
||||||
"language code of current Language subclass {lang} ({lang_code}) because it was renamed in v4. "
|
|
||||||
"Please change your current defined language code from ({bad_lang_code}) to ({lang_code}).")
|
|
||||||
|
|
||||||
|
|
||||||
# Deprecated model shortcuts, only used in errors and warnings
|
# Deprecated model shortcuts, only used in errors and warnings
|
||||||
OLD_MODEL_SHORTCUTS = {
|
OLD_MODEL_SHORTCUTS = {
|
||||||
|
@ -970,6 +964,11 @@ OLD_MODEL_SHORTCUTS = {
|
||||||
"lt": "lt_core_news_sm", "xx": "xx_ent_wiki_sm"
|
"lt": "lt_core_news_sm", "xx": "xx_ent_wiki_sm"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Renamed language codes in v4
|
||||||
|
RENAMED_LANGUAGE_CODES = {
|
||||||
|
"xx":"mul", "is":"isl"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# fmt: on
|
# fmt: on
|
||||||
|
|
||||||
|
|
|
@ -1736,17 +1736,6 @@ class Language:
|
||||||
raise ValueError(Errors.E985.format(config=config))
|
raise ValueError(Errors.E985.format(config=config))
|
||||||
config_lang = config["nlp"].get("lang")
|
config_lang = config["nlp"].get("lang")
|
||||||
if config_lang is not None and config_lang != cls.lang:
|
if config_lang is not None and config_lang != cls.lang:
|
||||||
# Set of language codes that were renamed in v4
|
|
||||||
renamed_lang_codes = set(["xx","is"])
|
|
||||||
if config_lang in renamed_lang_codes:
|
|
||||||
raise ValueError(
|
|
||||||
Errors.E4002.format(
|
|
||||||
bad_lang_code=config["nlp"]["lang"],
|
|
||||||
lang_code=cls.lang,
|
|
||||||
lang=util.get_object_name(cls),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
Errors.E958.format(
|
Errors.E958.format(
|
||||||
bad_lang_code=config["nlp"]["lang"],
|
bad_lang_code=config["nlp"]["lang"],
|
||||||
|
|
Loading…
Reference in New Issue
Block a user