mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Merge pull request #6720 from adrianeboyd/feature/improved-init-training-config-validation
This commit is contained in:
commit
e8a97a2bd6
|
@ -7,7 +7,7 @@ import typer
|
||||||
|
|
||||||
from ._util import Arg, Opt, show_validation_error, parse_config_overrides
|
from ._util import Arg, Opt, show_validation_error, parse_config_overrides
|
||||||
from ._util import import_code, debug_cli
|
from ._util import import_code, debug_cli
|
||||||
from ..schemas import ConfigSchemaTraining
|
from ..schemas import ConfigSchemaInit, ConfigSchemaTraining
|
||||||
from ..util import registry
|
from ..util import registry
|
||||||
from .. import util
|
from .. import util
|
||||||
|
|
||||||
|
@ -55,6 +55,11 @@ def debug_config(
|
||||||
config = util.load_config(config_path, overrides=overrides)
|
config = util.load_config(config_path, overrides=overrides)
|
||||||
nlp = util.load_model_from_config(config)
|
nlp = util.load_model_from_config(config)
|
||||||
config = nlp.config.interpolate()
|
config = nlp.config.interpolate()
|
||||||
|
msg.divider("Config validation for [initialize]")
|
||||||
|
with show_validation_error(config_path):
|
||||||
|
T = registry.resolve(config["initialize"], schema=ConfigSchemaInit)
|
||||||
|
msg.divider("Config validation for [training]")
|
||||||
|
with show_validation_error(config_path):
|
||||||
T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
|
T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
|
||||||
dot_names = [T["train_corpus"], T["dev_corpus"]]
|
dot_names = [T["train_corpus"], T["dev_corpus"]]
|
||||||
util.resolve_dot_names(config, dot_names)
|
util.resolve_dot_names(config, dot_names)
|
||||||
|
|
|
@ -730,6 +730,8 @@ class Errors:
|
||||||
"DocBin (.spacy) format. If your data is in spaCy v2's JSON "
|
"DocBin (.spacy) format. If your data is in spaCy v2's JSON "
|
||||||
"training format, convert it using `python -m spacy convert "
|
"training format, convert it using `python -m spacy convert "
|
||||||
"file.json .`.")
|
"file.json .`.")
|
||||||
|
E1015 = ("Can't initialize model from config: no {value} found. For more "
|
||||||
|
"information, run: python -m spacy debug config config.cfg")
|
||||||
|
|
||||||
|
|
||||||
# Deprecated model shortcuts, only used in errors and warnings
|
# Deprecated model shortcuts, only used in errors and warnings
|
||||||
|
|
|
@ -23,6 +23,10 @@ if TYPE_CHECKING:
|
||||||
def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
|
def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
|
||||||
raw_config = config
|
raw_config = config
|
||||||
config = raw_config.interpolate()
|
config = raw_config.interpolate()
|
||||||
|
if "seed" not in config["training"]:
|
||||||
|
raise ValueError(Errors.E1015.format(value="[training] seed"))
|
||||||
|
if "gpu_allocator" not in config["training"]:
|
||||||
|
raise ValueError(Errors.E1015.format(value="[training] gpu_allocator"))
|
||||||
if config["training"]["seed"] is not None:
|
if config["training"]["seed"] is not None:
|
||||||
fix_random_seed(config["training"]["seed"])
|
fix_random_seed(config["training"]["seed"])
|
||||||
allocator = config["training"]["gpu_allocator"]
|
allocator = config["training"]["gpu_allocator"]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user