mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-23 15:54:13 +03:00
Improve CLI config validation with latest Thinc
This commit is contained in:
parent
702edf52a0
commit
ca3c997062
|
@ -6,7 +6,7 @@ requires = [
|
|||
"cymem>=2.0.2,<2.1.0",
|
||||
"preshed>=3.0.2,<3.1.0",
|
||||
"murmurhash>=0.28.0,<1.1.0",
|
||||
"thinc>=8.0.0a35,<8.0.0a40",
|
||||
"thinc>=8.0.0a36,<8.0.0a40",
|
||||
"blis>=0.4.0,<0.5.0",
|
||||
"pytokenizations",
|
||||
"pathy"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# Our libraries
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.0a35,<8.0.0a40
|
||||
thinc>=8.0.0a36,<8.0.0a40
|
||||
blis>=0.4.0,<0.5.0
|
||||
ml_datasets==0.2.0a0
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
|
|
|
@ -34,13 +34,13 @@ setup_requires =
|
|||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
thinc>=8.0.0a35,<8.0.0a40
|
||||
thinc>=8.0.0a36,<8.0.0a40
|
||||
install_requires =
|
||||
# Our libraries
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.0a35,<8.0.0a40
|
||||
thinc>=8.0.0a36,<8.0.0a40
|
||||
blis>=0.4.0,<0.5.0
|
||||
wasabi>=0.8.0,<1.1.0
|
||||
srsly>=2.1.0,<3.0.0
|
||||
|
|
|
@ -10,7 +10,7 @@ from click import NoSuchOption
|
|||
from click.parser import split_arg_string
|
||||
from typer.main import get_command
|
||||
from contextlib import contextmanager
|
||||
from thinc.config import Config, ConfigValidationError
|
||||
from thinc.api import Config, ConfigValidationError
|
||||
from configparser import InterpolationError
|
||||
import os
|
||||
|
||||
|
@ -226,24 +226,28 @@ def get_checksum(path: Union[Path, str]) -> str:
|
|||
def show_validation_error(
|
||||
file_path: Optional[Union[str, Path]] = None,
|
||||
*,
|
||||
title: str = "Config validation error",
|
||||
title: Optional[str] = None,
|
||||
desc: str = "",
|
||||
show_config: Optional[bool] = None,
|
||||
hint_fill: bool = True,
|
||||
):
|
||||
"""Helper to show custom config validation errors on the CLI.
|
||||
|
||||
file_path (str / Path): Optional file path of config file, used in hints.
|
||||
title (str): Title of the custom formatted error.
|
||||
title (str): Override title of custom formatted error.
|
||||
desc (str): Override description of custom formatted error.
|
||||
show_config (bool): Whether to output the config the error refers to.
|
||||
hint_fill (bool): Show hint about filling config.
|
||||
"""
|
||||
try:
|
||||
yield
|
||||
except (ConfigValidationError, InterpolationError) as e:
|
||||
msg.fail(title, spaced=True)
|
||||
# TODO: This is kinda hacky and we should probably provide a better
|
||||
# helper for this in Thinc
|
||||
err_text = str(e).replace("Config validation error", "").strip()
|
||||
print(err_text)
|
||||
if hint_fill and "field required" in err_text:
|
||||
except ConfigValidationError as e:
|
||||
title = title if title is not None else e.title
|
||||
# Re-generate a new error object with overrides
|
||||
err = e.from_error(e, title="", desc=desc, show_config=show_config)
|
||||
msg.fail(title)
|
||||
print(err.text.strip())
|
||||
if hint_fill and "value_error.missing" in err.error_types:
|
||||
config_path = file_path if file_path is not None else "config.cfg"
|
||||
msg.text(
|
||||
"If your config contains missing values, you can run the 'init "
|
||||
|
@ -252,6 +256,8 @@ def show_validation_error(
|
|||
)
|
||||
print(f"{COMMAND} init fill-config {config_path} --base {config_path}\n")
|
||||
sys.exit(1)
|
||||
except InterpolationError as e:
|
||||
msg.fail("Config validation error", e, exits=1)
|
||||
|
||||
|
||||
def import_code(code_path: Optional[Union[Path, str]]) -> None:
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
from typing import Optional, Dict, Any, Union, List
|
||||
from pathlib import Path
|
||||
from wasabi import msg, table
|
||||
from thinc.api import Config
|
||||
from thinc.config import VARIABLE_RE, ConfigValidationError
|
||||
from thinc.api import Config, ConfigValidationError
|
||||
from thinc.config import VARIABLE_RE
|
||||
import typer
|
||||
|
||||
from ._util import Arg, Opt, show_validation_error, parse_config_overrides
|
||||
|
@ -115,4 +115,4 @@ def check_section_refs(config: Config, fields: List[str]) -> None:
|
|||
msg = f"not a valid section reference: {value}"
|
||||
errors.append({"loc": field.split("."), "msg": msg})
|
||||
if errors:
|
||||
raise ConfigValidationError(config, errors)
|
||||
raise ConfigValidationError(config=config, errors=errors)
|
||||
|
|
|
@ -89,7 +89,7 @@ def train(
|
|||
nlp, config = util.load_model_from_config(config)
|
||||
util.load_vocab_data_into_model(nlp, lookups=config["training"]["lookups"])
|
||||
if config["training"]["vectors"] is not None:
|
||||
util.load_vectors_into_model(nlp, config["training"]["vectors"])
|
||||
add_vectors(nlp, config["training"]["vectors"])
|
||||
raw_text, tag_map, morph_rules, weights_data = load_from_paths(config)
|
||||
T_cfg = config["training"]
|
||||
optimizer = T_cfg["optimizer"]
|
||||
|
@ -195,6 +195,19 @@ def train(
|
|||
msg.good(f"Saved pipeline to output directory {final_model_path}")
|
||||
|
||||
|
||||
def add_vectors(nlp: Language, vectors: str) -> None:
|
||||
title = f"Config validation error for vectors {vectors}"
|
||||
desc = (
|
||||
"This typically means that there's a problem in the config.cfg included "
|
||||
"with the packaged vectors. Make sure that the vectors package you're "
|
||||
"loading is compatible with the current version of spaCy."
|
||||
)
|
||||
with show_validation_error(
|
||||
title=title, desc=desc, hint_fill=False, show_config=False
|
||||
):
|
||||
util.load_vectors_into_model(nlp, vectors)
|
||||
|
||||
|
||||
def create_train_batches(iterator, batcher, max_epochs: int):
|
||||
epoch = 0
|
||||
examples = list(iterator)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import pytest
|
||||
from spacy.lang.zh import Chinese, _get_pkuseg_trie_data
|
||||
from thinc.config import ConfigValidationError
|
||||
from thinc.api import ConfigValidationError
|
||||
|
||||
|
||||
# fmt: off
|
||||
|
|
|
@ -4,8 +4,7 @@ from spacy.lang.en import English
|
|||
from spacy.lang.de import German
|
||||
from spacy.tokens import Doc
|
||||
from spacy.util import registry, SimpleFrozenDict, combine_score_weights
|
||||
from thinc.api import Model, Linear
|
||||
from thinc.config import ConfigValidationError
|
||||
from thinc.api import Model, Linear, ConfigValidationError
|
||||
from pydantic import StrictInt, StrictStr
|
||||
|
||||
from ..util import make_tempdir
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import pytest
|
||||
from thinc.config import Config, ConfigValidationError
|
||||
from thinc.api import Config, ConfigValidationError
|
||||
import spacy
|
||||
from spacy.lang.en import English
|
||||
from spacy.lang.de import German
|
||||
|
|
|
@ -8,7 +8,7 @@ from spacy.cli._util import validate_project_commands, parse_config_overrides
|
|||
from spacy.cli._util import load_project_config, substitute_project_variables
|
||||
from spacy.cli._util import string_to_list, OVERRIDES_ENV_VAR
|
||||
from spacy.cli.debug_config import check_section_refs
|
||||
from thinc.config import ConfigValidationError, Config
|
||||
from thinc.api import ConfigValidationError, Config
|
||||
import srsly
|
||||
import os
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user