Improve CLI config validation with latest Thinc

This commit is contained in:
Ines Montani 2020-09-26 13:13:57 +02:00
parent 702edf52a0
commit ca3c997062
10 changed files with 41 additions and 23 deletions

View File

@ -6,7 +6,7 @@ requires = [
"cymem>=2.0.2,<2.1.0",
"preshed>=3.0.2,<3.1.0",
"murmurhash>=0.28.0,<1.1.0",
"thinc>=8.0.0a35,<8.0.0a40",
"thinc>=8.0.0a36,<8.0.0a40",
"blis>=0.4.0,<0.5.0",
"pytokenizations",
"pathy"

View File

@ -1,7 +1,7 @@
# Our libraries
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc>=8.0.0a35,<8.0.0a40
thinc>=8.0.0a36,<8.0.0a40
blis>=0.4.0,<0.5.0
ml_datasets==0.2.0a0
murmurhash>=0.28.0,<1.1.0

View File

@ -34,13 +34,13 @@ setup_requires =
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
murmurhash>=0.28.0,<1.1.0
thinc>=8.0.0a35,<8.0.0a40
thinc>=8.0.0a36,<8.0.0a40
install_requires =
# Our libraries
murmurhash>=0.28.0,<1.1.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc>=8.0.0a35,<8.0.0a40
thinc>=8.0.0a36,<8.0.0a40
blis>=0.4.0,<0.5.0
wasabi>=0.8.0,<1.1.0
srsly>=2.1.0,<3.0.0

View File

@ -10,7 +10,7 @@ from click import NoSuchOption
from click.parser import split_arg_string
from typer.main import get_command
from contextlib import contextmanager
from thinc.config import Config, ConfigValidationError
from thinc.api import Config, ConfigValidationError
from configparser import InterpolationError
import os
@ -226,24 +226,28 @@ def get_checksum(path: Union[Path, str]) -> str:
def show_validation_error(
file_path: Optional[Union[str, Path]] = None,
*,
title: str = "Config validation error",
title: Optional[str] = None,
desc: str = "",
show_config: Optional[bool] = None,
hint_fill: bool = True,
):
"""Helper to show custom config validation errors on the CLI.
file_path (str / Path): Optional file path of config file, used in hints.
title (str): Title of the custom formatted error.
title (str): Override title of custom formatted error.
desc (str): Override description of custom formatted error.
show_config (bool): Whether to output the config the error refers to.
hint_fill (bool): Show hint about filling config.
"""
try:
yield
except (ConfigValidationError, InterpolationError) as e:
msg.fail(title, spaced=True)
# TODO: This is kinda hacky and we should probably provide a better
# helper for this in Thinc
err_text = str(e).replace("Config validation error", "").strip()
print(err_text)
if hint_fill and "field required" in err_text:
except ConfigValidationError as e:
title = title if title is not None else e.title
# Re-generate a new error object with overrides
err = e.from_error(e, title="", desc=desc, show_config=show_config)
msg.fail(title)
print(err.text.strip())
if hint_fill and "value_error.missing" in err.error_types:
config_path = file_path if file_path is not None else "config.cfg"
msg.text(
"If your config contains missing values, you can run the 'init "
@ -252,6 +256,8 @@ def show_validation_error(
)
print(f"{COMMAND} init fill-config {config_path} --base {config_path}\n")
sys.exit(1)
except InterpolationError as e:
msg.fail("Config validation error", e, exits=1)
def import_code(code_path: Optional[Union[Path, str]]) -> None:

View File

@ -1,8 +1,8 @@
from typing import Optional, Dict, Any, Union, List
from pathlib import Path
from wasabi import msg, table
from thinc.api import Config
from thinc.config import VARIABLE_RE, ConfigValidationError
from thinc.api import Config, ConfigValidationError
from thinc.config import VARIABLE_RE
import typer
from ._util import Arg, Opt, show_validation_error, parse_config_overrides
@ -115,4 +115,4 @@ def check_section_refs(config: Config, fields: List[str]) -> None:
msg = f"not a valid section reference: {value}"
errors.append({"loc": field.split("."), "msg": msg})
if errors:
raise ConfigValidationError(config, errors)
raise ConfigValidationError(config=config, errors=errors)

View File

@ -89,7 +89,7 @@ def train(
nlp, config = util.load_model_from_config(config)
util.load_vocab_data_into_model(nlp, lookups=config["training"]["lookups"])
if config["training"]["vectors"] is not None:
util.load_vectors_into_model(nlp, config["training"]["vectors"])
add_vectors(nlp, config["training"]["vectors"])
raw_text, tag_map, morph_rules, weights_data = load_from_paths(config)
T_cfg = config["training"]
optimizer = T_cfg["optimizer"]
@ -195,6 +195,19 @@ def train(
msg.good(f"Saved pipeline to output directory {final_model_path}")
def add_vectors(nlp: Language, vectors: str) -> None:
title = f"Config validation error for vectors {vectors}"
desc = (
"This typically means that there's a problem in the config.cfg included "
"with the packaged vectors. Make sure that the vectors package you're "
"loading is compatible with the current version of spaCy."
)
with show_validation_error(
title=title, desc=desc, hint_fill=False, show_config=False
):
util.load_vectors_into_model(nlp, vectors)
def create_train_batches(iterator, batcher, max_epochs: int):
epoch = 0
examples = list(iterator)

View File

@ -1,6 +1,6 @@
import pytest
from spacy.lang.zh import Chinese, _get_pkuseg_trie_data
from thinc.config import ConfigValidationError
from thinc.api import ConfigValidationError
# fmt: off

View File

@ -4,8 +4,7 @@ from spacy.lang.en import English
from spacy.lang.de import German
from spacy.tokens import Doc
from spacy.util import registry, SimpleFrozenDict, combine_score_weights
from thinc.api import Model, Linear
from thinc.config import ConfigValidationError
from thinc.api import Model, Linear, ConfigValidationError
from pydantic import StrictInt, StrictStr
from ..util import make_tempdir

View File

@ -1,5 +1,5 @@
import pytest
from thinc.config import Config, ConfigValidationError
from thinc.api import Config, ConfigValidationError
import spacy
from spacy.lang.en import English
from spacy.lang.de import German

View File

@ -8,7 +8,7 @@ from spacy.cli._util import validate_project_commands, parse_config_overrides
from spacy.cli._util import load_project_config, substitute_project_variables
from spacy.cli._util import string_to_list, OVERRIDES_ENV_VAR
from spacy.cli.debug_config import check_section_refs
from thinc.config import ConfigValidationError, Config
from thinc.api import ConfigValidationError, Config
import srsly
import os