diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py index 56c0e0f46..fd8da262e 100644 --- a/spacy/cli/__init__.py +++ b/spacy/cli/__init__.py @@ -9,6 +9,7 @@ from .info import info # noqa: F401 from .package import package # noqa: F401 from .profile import profile # noqa: F401 from .train import train_cli # noqa: F401 +from .assemble import assemble_cli # noqa: F401 from .pretrain import pretrain # noqa: F401 from .debug_data import debug_data # noqa: F401 from .debug_config import debug_config # noqa: F401 diff --git a/spacy/cli/assemble.py b/spacy/cli/assemble.py new file mode 100644 index 000000000..f63c51857 --- /dev/null +++ b/spacy/cli/assemble.py @@ -0,0 +1,58 @@ +from typing import Optional +from pathlib import Path +from wasabi import msg +import typer +import logging + +from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error +from ._util import import_code +from ..training.initialize import init_nlp +from .. import util +from ..util import get_sourced_components, load_model_from_config + + +@app.command( + "assemble", + context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, +) +def assemble_cli( + # fmt: off + ctx: typer.Context, # This is only used to read additional arguments + config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True), + output_path: Path = Arg(..., help="Output directory to store assembled pipeline in"), + code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), + verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"), + # fmt: on +): + """ + Assemble a spaCy pipeline from a config file. The config file includes + all settings for initializing the pipeline. To override settings in the + config, e.g. settings that point to local paths or that you want to + experiment with, you can override them as command line options. The + --code argument lets you pass in a Python file that can be used to + register custom functions that are referenced in the config. + + DOCS: https://spacy.io/api/cli#assemble + """ + util.logger.setLevel(logging.DEBUG if verbose else logging.INFO) + # Make sure all files and paths exists if they are needed + if not config_path or (str(config_path) != "-" and not config_path.exists()): + msg.fail("Config file not found", config_path, exits=1) + overrides = parse_config_overrides(ctx.args) + import_code(code_path) + with show_validation_error(config_path): + config = util.load_config(config_path, overrides=overrides, interpolate=False) + msg.divider("Initializing pipeline") + nlp = load_model_from_config(config, auto_fill=True) + config = config.interpolate() + sourced = get_sourced_components(config) + # Make sure that listeners are defined before initializing further + nlp._link_components() + with nlp.select_pipes(disable=[*sourced]): + nlp.initialize() + msg.good("Initialized pipeline") + msg.divider("Serializing to disk") + if output_path is not None and not output_path.exists(): + output_path.mkdir(parents=True) + msg.good(f"Created output directory: {output_path}") + nlp.to_disk(output_path) diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md index 73a03cba8..196e47543 100644 --- a/website/docs/api/cli.md +++ b/website/docs/api/cli.md @@ -12,6 +12,7 @@ menu: - ['train', 'train'] - ['pretrain', 'pretrain'] - ['evaluate', 'evaluate'] + - ['assemble', 'assemble'] - ['package', 'package'] - ['project', 'project'] - ['ray', 'ray'] @@ -892,6 +893,34 @@ $ python -m spacy evaluate [model] [data_path] [--output] [--code] [--gold-prepr | `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ | | **CREATES** | Training results and optional metrics and visualizations. | +## assemble {#assemble tag="command"} + +Assemble a pipeline from a config file without additional training. Expects a +[config file](/api/data-formats#config) with all settings and hyperparameters. +The `--code` argument can be used to import a Python file that lets you register +[custom functions](/usage/training#custom-functions) and refer to them in your +config. + +> #### Example +> +> ```cli +> $ python -m spacy assemble config.cfg ./output +> ``` + +```cli +$ python -m spacy assemble [config_path] [output_dir] [--code] [--verbose] [overrides] +``` + +| Name | Description | +| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `config_path` | Path to the [config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ | +| `output_dir` | Directory to store the final pipeline in. Will be created if it doesn't exist. ~~Optional[Path] \(option)~~ | +| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions). ~~Optional[Path] \(option)~~ | +| `--verbose`, `-V` | Show more detailed messages during processing. ~~bool (flag)~~ | +| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ | +| overrides | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.data ./data`. ~~Any (option/flag)~~ | +| **CREATES** | The final assembled pipeline. | + ## package {#package tag="command"} Generate an installable [Python package](/usage/training#models-generating) from