From 05af71cc394a5bf9371bc8f0307d178de9488557 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Thu, 12 Jan 2023 19:05:09 +0900 Subject: [PATCH] Add support for multiple code files to all relevant commands Prior to this, only the package command supported multiple code files. --- spacy/cli/_util.py | 7 +++++++ spacy/cli/assemble.py | 6 +++--- spacy/cli/debug_config.py | 6 +++--- spacy/cli/debug_data.py | 6 +++--- spacy/cli/evaluate.py | 6 +++--- spacy/cli/package.py | 2 +- spacy/cli/pretrain.py | 6 +++--- spacy/cli/train.py | 6 +++--- 8 files changed, 26 insertions(+), 19 deletions(-) diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index 0f4e9f599..6a47ce9da 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -318,6 +318,13 @@ def show_validation_error( msg.fail("Config validation error", e, exits=1) +def import_code_paths(code_paths: str) -> None: + """Helper to import comma-separated list of code paths.""" + code_paths = [Path(p.strip()) for p in string_to_list(code_paths)] + for code_path in code_paths: + import_code(code_path) + + def import_code(code_path: Optional[Union[Path, str]]) -> None: """Helper to import Python file provided in training commands / commands using the config. This makes custom registered functions available. diff --git a/spacy/cli/assemble.py b/spacy/cli/assemble.py index 1cfa290a3..9c8971510 100644 --- a/spacy/cli/assemble.py +++ b/spacy/cli/assemble.py @@ -5,7 +5,7 @@ import typer import logging from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error -from ._util import import_code +from ._util import import_code_paths from .. import util from ..util import get_sourced_components, load_model_from_config @@ -19,7 +19,7 @@ def assemble_cli( ctx: typer.Context, # This is only used to read additional arguments config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), output_path: Path = Arg(..., help="Output directory to store assembled pipeline in"), - code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), + code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"), # fmt: on ): @@ -38,7 +38,7 @@ def assemble_cli( if not config_path or (str(config_path) != "-" and not config_path.exists()): msg.fail("Config file not found", config_path, exits=1) overrides = parse_config_overrides(ctx.args) - import_code(code_path) + import_code_paths(code_paths) with show_validation_error(config_path): config = util.load_config(config_path, overrides=overrides, interpolate=False) msg.divider("Initializing pipeline") diff --git a/spacy/cli/debug_config.py b/spacy/cli/debug_config.py index 409fac4ed..c63d95d66 100644 --- a/spacy/cli/debug_config.py +++ b/spacy/cli/debug_config.py @@ -6,7 +6,7 @@ from thinc.config import VARIABLE_RE import typer from ._util import Arg, Opt, show_validation_error, parse_config_overrides -from ._util import import_code, debug_cli +from ._util import import_code_paths, debug_cli from ..schemas import ConfigSchemaInit, ConfigSchemaTraining from ..util import registry from .. import util @@ -20,7 +20,7 @@ def debug_config_cli( # fmt: off ctx: typer.Context, # This is only used to read additional arguments config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), - code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), + code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), show_funcs: bool = Opt(False, "--show-functions", "-F", help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)"), show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.") # fmt: on @@ -37,7 +37,7 @@ def debug_config_cli( DOCS: https://spacy.io/api/cli#debug-config """ overrides = parse_config_overrides(ctx.args) - import_code(code_path) + import_code_paths(code_paths) debug_config( config_path, overrides=overrides, show_funcs=show_funcs, show_vars=show_vars ) diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index a85324e87..1866f847b 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -9,7 +9,7 @@ import typer import math from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides -from ._util import import_code, debug_cli, _format_number +from ._util import import_code_paths, debug_cli, _format_number from ..training import Example, remove_bilu_prefix from ..training.initialize import get_sourced_components from ..schemas import ConfigSchemaTraining @@ -52,7 +52,7 @@ def debug_data_cli( # fmt: off ctx: typer.Context, # This is only used to read additional arguments config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), - code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), + code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), ignore_warnings: bool = Opt(False, "--ignore-warnings", "-IW", help="Ignore warnings, only show stats and errors"), verbose: bool = Opt(False, "--verbose", "-V", help="Print additional information and explanations"), no_format: bool = Opt(False, "--no-format", "-NF", help="Don't pretty-print the results"), @@ -72,7 +72,7 @@ def debug_data_cli( "--help for an overview of the other available debugging commands." ) overrides = parse_config_overrides(ctx.args) - import_code(code_path) + import_code_paths(code_paths) debug_data( config_path, config_overrides=overrides, diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py index 0d08d2c5e..45f4e4029 100644 --- a/spacy/cli/evaluate.py +++ b/spacy/cli/evaluate.py @@ -7,7 +7,7 @@ from thinc.api import fix_random_seed from ..training import Corpus from ..tokens import Doc -from ._util import app, Arg, Opt, setup_gpu, import_code +from ._util import app, Arg, Opt, setup_gpu, import_code_paths from ..scorer import Scorer from .. import util from .. import displacy @@ -19,7 +19,7 @@ def evaluate_cli( model: str = Arg(..., help="Model name or path"), data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True), output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False), - code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), + code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"), displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False), @@ -38,7 +38,7 @@ def evaluate_cli( DOCS: https://spacy.io/api/cli#evaluate """ - import_code(code_path) + import_code_paths(code_paths) evaluate( model, data_path, diff --git a/spacy/cli/package.py b/spacy/cli/package.py index 324c5d1bb..056b1c2e6 100644 --- a/spacy/cli/package.py +++ b/spacy/cli/package.py @@ -20,7 +20,7 @@ def package_cli( # fmt: off input_dir: Path = Arg(..., help="Directory with pipeline data", exists=True, file_okay=False), output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False), - code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python file with additional code (registered functions) to be included in the package"), + code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False), create_meta: bool = Opt(False, "--create-meta", "-C", help="Create meta.json, even if one exists"), name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"), diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py index 381d589cf..b80f95355 100644 --- a/spacy/cli/pretrain.py +++ b/spacy/cli/pretrain.py @@ -5,7 +5,7 @@ import typer import re from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error -from ._util import import_code, setup_gpu +from ._util import import_code_paths, setup_gpu from ..training.pretrain import pretrain from ..util import load_config @@ -19,7 +19,7 @@ def pretrain_cli( ctx: typer.Context, # This is only used to read additional arguments config_path: Path = Arg(..., help="Path to config file", exists=True, dir_okay=False, allow_dash=True), output_dir: Path = Arg(..., help="Directory to write weights to on each epoch"), - code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), + code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"), epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."), use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), @@ -47,7 +47,7 @@ def pretrain_cli( DOCS: https://spacy.io/api/cli#pretrain """ config_overrides = parse_config_overrides(ctx.args) - import_code(code_path) + import_code_paths(code_paths) verify_cli_args(config_path, output_dir, resume_path, epoch_resume) setup_gpu(use_gpu) msg.info(f"Loading config from: {config_path}") diff --git a/spacy/cli/train.py b/spacy/cli/train.py index cc22cbba6..e259458ac 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -6,7 +6,7 @@ import logging import sys from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error -from ._util import import_code, setup_gpu +from ._util import import_code_paths, setup_gpu from ..training.loop import train as train_nlp from ..training.initialize import init_nlp from .. import util @@ -20,7 +20,7 @@ def train_cli( ctx: typer.Context, # This is only used to read additional arguments config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"), - code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), + code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"), verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"), use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU") # fmt: on @@ -41,7 +41,7 @@ def train_cli( """ util.logger.setLevel(logging.DEBUG if verbose else logging.INFO) overrides = parse_config_overrides(ctx.args) - import_code(code_path) + import_code_paths(code_paths) train(config_path, output_path, use_gpu=use_gpu, overrides=overrides)