Add warning

This commit is contained in:
thomashacker 2022-11-10 16:52:35 +01:00
parent 597d0a84d6
commit eaeb197d62
4 changed files with 12 additions and 7 deletions

View File

@ -43,7 +43,7 @@ class InitValues:
def init_config_cli(
# fmt: off
output_file: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
lang: str = Opt(InitValues.lang, "--lang", "-l", help="Two-letter code of the language to use"),
lang: str = Opt(InitValues.lang, "--lang", "-l", help="Code of the language to use"),
pipeline: str = Opt(",".join(InitValues.pipeline), "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')"),
optimize: Optimizations = Opt(InitValues.optimize, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."),
gpu: bool = Opt(InitValues.gpu, "--gpu", "-G", help="Whether the model can run on GPU. This will impact the choice of architecture, pretrained weights and related hyperparameters."),
@ -67,6 +67,7 @@ def init_config_cli(
"The provided output file already exists. To force overwriting the config file, set the --force or -F flag.",
exits=1,
)
lang = util.find_matching_language(lang)
config = init_config(
lang=lang,
pipeline=pipeline,

View File

@ -214,6 +214,8 @@ class Warnings(metaclass=ErrorsWithCodes):
"is a Cython extension type.")
W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
"aware that this might affect other components in your pipeline.")
# New warnings added in v4.x
W124 = ("Language code {lang} has been renamed to {renamed_lang} in spaCy v4")
class Errors(metaclass=ErrorsWithCodes):

View File

@ -1736,13 +1736,13 @@ class Language:
raise ValueError(Errors.E985.format(config=config))
config_lang = config["nlp"].get("lang")
if config_lang is not None and config_lang != cls.lang:
raise ValueError(
Errors.E958.format(
bad_lang_code=config["nlp"]["lang"],
lang_code=cls.lang,
lang=util.get_object_name(cls),
raise ValueError(
Errors.E958.format(
bad_lang_code=config["nlp"]["lang"],
lang_code=cls.lang,
lang=util.get_object_name(cls),
)
)
)
config["nlp"]["lang"] = cls.lang
# This isn't very elegant, but we remove the [components] block here to prevent
# it from getting resolved (causes problems because we expect to pass in

View File

@ -19,6 +19,7 @@ from catalogue import RegistryError, Registry
import langcodes
import sys
import warnings
from .errors import Warnings
from packaging.specifiers import SpecifierSet, InvalidSpecifier
from packaging.version import Version, InvalidVersion
from packaging.requirements import Requirement
@ -282,6 +283,7 @@ def find_matching_language(lang: str) -> Optional[str]:
import spacy.lang # noqa: F401
if lang == "xx":
warnings.warn(Warnings.W124.format(lang=lang, renamed_lang="mul"))
return "mul"
# Find out which language modules we have