mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Merge branch 'develop' into fix/train-config-interpolation
This commit is contained in:
commit
b4486d747d
|
@ -6,7 +6,7 @@ requires = [
|
|||
"cymem>=2.0.2,<2.1.0",
|
||||
"preshed>=3.0.2,<3.1.0",
|
||||
"murmurhash>=0.28.0,<1.1.0",
|
||||
"thinc>=8.0.0a35,<8.0.0a40",
|
||||
"thinc>=8.0.0a36,<8.0.0a40",
|
||||
"blis>=0.4.0,<0.5.0",
|
||||
"pytokenizations",
|
||||
"pathy"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# Our libraries
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.0a35,<8.0.0a40
|
||||
thinc>=8.0.0a36,<8.0.0a40
|
||||
blis>=0.4.0,<0.5.0
|
||||
ml_datasets==0.2.0a0
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
|
|
|
@ -34,13 +34,13 @@ setup_requires =
|
|||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
thinc>=8.0.0a35,<8.0.0a40
|
||||
thinc>=8.0.0a36,<8.0.0a40
|
||||
install_requires =
|
||||
# Our libraries
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.0a35,<8.0.0a40
|
||||
thinc>=8.0.0a36,<8.0.0a40
|
||||
blis>=0.4.0,<0.5.0
|
||||
wasabi>=0.8.0,<1.1.0
|
||||
srsly>=2.1.0,<3.0.0
|
||||
|
|
|
@ -10,7 +10,7 @@ from click import NoSuchOption
|
|||
from click.parser import split_arg_string
|
||||
from typer.main import get_command
|
||||
from contextlib import contextmanager
|
||||
from thinc.config import Config, ConfigValidationError
|
||||
from thinc.api import Config, ConfigValidationError
|
||||
from configparser import InterpolationError
|
||||
import os
|
||||
|
||||
|
@ -226,24 +226,28 @@ def get_checksum(path: Union[Path, str]) -> str:
|
|||
def show_validation_error(
|
||||
file_path: Optional[Union[str, Path]] = None,
|
||||
*,
|
||||
title: str = "Config validation error",
|
||||
title: Optional[str] = None,
|
||||
desc: str = "",
|
||||
show_config: Optional[bool] = None,
|
||||
hint_fill: bool = True,
|
||||
):
|
||||
"""Helper to show custom config validation errors on the CLI.
|
||||
|
||||
file_path (str / Path): Optional file path of config file, used in hints.
|
||||
title (str): Title of the custom formatted error.
|
||||
title (str): Override title of custom formatted error.
|
||||
desc (str): Override description of custom formatted error.
|
||||
show_config (bool): Whether to output the config the error refers to.
|
||||
hint_fill (bool): Show hint about filling config.
|
||||
"""
|
||||
try:
|
||||
yield
|
||||
except (ConfigValidationError, InterpolationError) as e:
|
||||
msg.fail(title, spaced=True)
|
||||
# TODO: This is kinda hacky and we should probably provide a better
|
||||
# helper for this in Thinc
|
||||
err_text = str(e).replace("Config validation error", "").strip()
|
||||
print(err_text)
|
||||
if hint_fill and "field required" in err_text:
|
||||
except ConfigValidationError as e:
|
||||
title = title if title is not None else e.title
|
||||
# Re-generate a new error object with overrides
|
||||
err = e.from_error(e, title="", desc=desc, show_config=show_config)
|
||||
msg.fail(title)
|
||||
print(err.text.strip())
|
||||
if hint_fill and "value_error.missing" in err.error_types:
|
||||
config_path = file_path if file_path is not None else "config.cfg"
|
||||
msg.text(
|
||||
"If your config contains missing values, you can run the 'init "
|
||||
|
@ -252,6 +256,8 @@ def show_validation_error(
|
|||
)
|
||||
print(f"{COMMAND} init fill-config {config_path} --base {config_path}\n")
|
||||
sys.exit(1)
|
||||
except InterpolationError as e:
|
||||
msg.fail("Config validation error", e, exits=1)
|
||||
|
||||
|
||||
def import_code(code_path: Optional[Union[Path, str]]) -> None:
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
from typing import Optional, Dict, Any, Union, List
|
||||
from pathlib import Path
|
||||
from wasabi import msg, table
|
||||
from thinc.api import Config
|
||||
from thinc.config import VARIABLE_RE, ConfigValidationError
|
||||
from thinc.api import Config, ConfigValidationError
|
||||
from thinc.config import VARIABLE_RE
|
||||
import typer
|
||||
|
||||
from ._util import Arg, Opt, show_validation_error, parse_config_overrides
|
||||
|
@ -115,4 +115,4 @@ def check_section_refs(config: Config, fields: List[str]) -> None:
|
|||
msg = f"not a valid section reference: {value}"
|
||||
errors.append({"loc": field.split("."), "msg": msg})
|
||||
if errors:
|
||||
raise ConfigValidationError(config, errors)
|
||||
raise ConfigValidationError(config=config, errors=errors)
|
||||
|
|
|
@ -92,7 +92,7 @@ def train(
|
|||
nlp, config = util.load_model_from_config(raw_config)
|
||||
util.load_vocab_data_into_model(nlp, lookups=config["training"]["lookups"])
|
||||
if config["training"]["vectors"] is not None:
|
||||
util.load_vectors_into_model(nlp, config["training"]["vectors"])
|
||||
add_vectors(nlp, config["training"]["vectors"])
|
||||
raw_text, tag_map, morph_rules, weights_data = load_from_paths(config)
|
||||
T_cfg = config["training"]
|
||||
optimizer = T_cfg["optimizer"]
|
||||
|
@ -198,6 +198,19 @@ def train(
|
|||
msg.good(f"Saved pipeline to output directory {final_model_path}")
|
||||
|
||||
|
||||
def add_vectors(nlp: Language, vectors: str) -> None:
|
||||
title = f"Config validation error for vectors {vectors}"
|
||||
desc = (
|
||||
"This typically means that there's a problem in the config.cfg included "
|
||||
"with the packaged vectors. Make sure that the vectors package you're "
|
||||
"loading is compatible with the current version of spaCy."
|
||||
)
|
||||
with show_validation_error(
|
||||
title=title, desc=desc, hint_fill=False, show_config=False
|
||||
):
|
||||
util.load_vectors_into_model(nlp, vectors)
|
||||
|
||||
|
||||
def create_train_batches(iterator, batcher, max_epochs: int):
|
||||
epoch = 0
|
||||
examples = list(iterator)
|
||||
|
|
|
@ -82,7 +82,7 @@ class AttributeRuler(Pipe):
|
|||
matches = self.matcher(doc, allow_missing=True)
|
||||
# Sort by the attribute ID, so that later rules have precendence
|
||||
matches = [
|
||||
(_parse_key(self.vocab.strings[m_id]), m_id, s, e)
|
||||
(int(self.vocab.strings[m_id]), m_id, s, e)
|
||||
for m_id, s, e in matches
|
||||
]
|
||||
matches.sort()
|
||||
|
@ -184,7 +184,7 @@ class AttributeRuler(Pipe):
|
|||
"""
|
||||
# We need to make a string here, because otherwise the ID we pass back
|
||||
# will be interpreted as the hash of a string, rather than an ordinal.
|
||||
key = _make_key(len(self.attrs))
|
||||
key = str(len(self.attrs))
|
||||
self.matcher.add(self.vocab.strings.add(key), patterns)
|
||||
self._attrs_unnormed.append(attrs)
|
||||
attrs = normalize_token_attrs(self.vocab, attrs)
|
||||
|
@ -209,7 +209,7 @@ class AttributeRuler(Pipe):
|
|||
all_patterns = []
|
||||
for i in range(len(self.attrs)):
|
||||
p = {}
|
||||
p["patterns"] = self.matcher.get(_make_key(i))[1]
|
||||
p["patterns"] = self.matcher.get(str(i))[1]
|
||||
p["attrs"] = self._attrs_unnormed[i]
|
||||
p["index"] = self.indices[i]
|
||||
all_patterns.append(p)
|
||||
|
@ -313,12 +313,6 @@ class AttributeRuler(Pipe):
|
|||
|
||||
return self
|
||||
|
||||
def _make_key(n_attr):
|
||||
return f"attr_rule_{n_attr}"
|
||||
|
||||
def _parse_key(key):
|
||||
return int(key.rsplit("_", 1)[1])
|
||||
|
||||
|
||||
def _split_morph_attrs(attrs):
|
||||
"""Split entries from a tag map or morph rules dict into to two dicts, one
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import pytest
|
||||
from spacy.lang.zh import Chinese, _get_pkuseg_trie_data
|
||||
from thinc.config import ConfigValidationError
|
||||
from thinc.api import ConfigValidationError
|
||||
|
||||
|
||||
# fmt: off
|
||||
|
|
|
@ -4,8 +4,7 @@ from spacy.lang.en import English
|
|||
from spacy.lang.de import German
|
||||
from spacy.tokens import Doc
|
||||
from spacy.util import registry, SimpleFrozenDict, combine_score_weights
|
||||
from thinc.api import Model, Linear
|
||||
from thinc.config import ConfigValidationError
|
||||
from thinc.api import Model, Linear, ConfigValidationError
|
||||
from pydantic import StrictInt, StrictStr
|
||||
|
||||
from ..util import make_tempdir
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import pytest
|
||||
from thinc.config import Config, ConfigValidationError
|
||||
from thinc.api import Config, ConfigValidationError
|
||||
import spacy
|
||||
from spacy.lang.en import English
|
||||
from spacy.lang.de import German
|
||||
|
|
|
@ -8,7 +8,7 @@ from spacy.cli._util import validate_project_commands, parse_config_overrides
|
|||
from spacy.cli._util import load_project_config, substitute_project_variables
|
||||
from spacy.cli._util import string_to_list, OVERRIDES_ENV_VAR
|
||||
from spacy.cli.debug_config import check_section_refs
|
||||
from thinc.config import ConfigValidationError, Config
|
||||
from thinc.api import ConfigValidationError, Config
|
||||
import srsly
|
||||
import os
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user