mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
assemble CLI command (#7783)
* assemble CLI command * ensure assemble runs even without training section * cleanup
This commit is contained in:
parent
15bd230413
commit
c786e98e56
|
@ -9,6 +9,7 @@ from .info import info # noqa: F401
|
|||
from .package import package # noqa: F401
|
||||
from .profile import profile # noqa: F401
|
||||
from .train import train_cli # noqa: F401
|
||||
from .assemble import assemble_cli # noqa: F401
|
||||
from .pretrain import pretrain # noqa: F401
|
||||
from .debug_data import debug_data # noqa: F401
|
||||
from .debug_config import debug_config # noqa: F401
|
||||
|
|
58
spacy/cli/assemble.py
Normal file
58
spacy/cli/assemble.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
from typing import Optional
|
||||
from pathlib import Path
|
||||
from wasabi import msg
|
||||
import typer
|
||||
import logging
|
||||
|
||||
from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
|
||||
from ._util import import_code
|
||||
from ..training.initialize import init_nlp
|
||||
from .. import util
|
||||
from ..util import get_sourced_components, load_model_from_config
|
||||
|
||||
|
||||
@app.command(
|
||||
"assemble",
|
||||
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
|
||||
)
|
||||
def assemble_cli(
|
||||
# fmt: off
|
||||
ctx: typer.Context, # This is only used to read additional arguments
|
||||
config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
|
||||
output_path: Path = Arg(..., help="Output directory to store assembled pipeline in"),
|
||||
code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
|
||||
verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
|
||||
# fmt: on
|
||||
):
|
||||
"""
|
||||
Assemble a spaCy pipeline from a config file. The config file includes
|
||||
all settings for initializing the pipeline. To override settings in the
|
||||
config, e.g. settings that point to local paths or that you want to
|
||||
experiment with, you can override them as command line options. The
|
||||
--code argument lets you pass in a Python file that can be used to
|
||||
register custom functions that are referenced in the config.
|
||||
|
||||
DOCS: https://spacy.io/api/cli#assemble
|
||||
"""
|
||||
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
|
||||
# Make sure all files and paths exists if they are needed
|
||||
if not config_path or (str(config_path) != "-" and not config_path.exists()):
|
||||
msg.fail("Config file not found", config_path, exits=1)
|
||||
overrides = parse_config_overrides(ctx.args)
|
||||
import_code(code_path)
|
||||
with show_validation_error(config_path):
|
||||
config = util.load_config(config_path, overrides=overrides, interpolate=False)
|
||||
msg.divider("Initializing pipeline")
|
||||
nlp = load_model_from_config(config, auto_fill=True)
|
||||
config = config.interpolate()
|
||||
sourced = get_sourced_components(config)
|
||||
# Make sure that listeners are defined before initializing further
|
||||
nlp._link_components()
|
||||
with nlp.select_pipes(disable=[*sourced]):
|
||||
nlp.initialize()
|
||||
msg.good("Initialized pipeline")
|
||||
msg.divider("Serializing to disk")
|
||||
if output_path is not None and not output_path.exists():
|
||||
output_path.mkdir(parents=True)
|
||||
msg.good(f"Created output directory: {output_path}")
|
||||
nlp.to_disk(output_path)
|
|
@ -12,6 +12,7 @@ menu:
|
|||
- ['train', 'train']
|
||||
- ['pretrain', 'pretrain']
|
||||
- ['evaluate', 'evaluate']
|
||||
- ['assemble', 'assemble']
|
||||
- ['package', 'package']
|
||||
- ['project', 'project']
|
||||
- ['ray', 'ray']
|
||||
|
@ -892,6 +893,34 @@ $ python -m spacy evaluate [model] [data_path] [--output] [--code] [--gold-prepr
|
|||
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
||||
| **CREATES** | Training results and optional metrics and visualizations. |
|
||||
|
||||
## assemble {#assemble tag="command"}
|
||||
|
||||
Assemble a pipeline from a config file without additional training. Expects a
|
||||
[config file](/api/data-formats#config) with all settings and hyperparameters.
|
||||
The `--code` argument can be used to import a Python file that lets you register
|
||||
[custom functions](/usage/training#custom-functions) and refer to them in your
|
||||
config.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```cli
|
||||
> $ python -m spacy assemble config.cfg ./output
|
||||
> ```
|
||||
|
||||
```cli
|
||||
$ python -m spacy assemble [config_path] [output_dir] [--code] [--verbose] [overrides]
|
||||
```
|
||||
|
||||
| Name | Description |
|
||||
| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `config_path` | Path to the [config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
|
||||
| `output_dir` | Directory to store the final pipeline in. Will be created if it doesn't exist. ~~Optional[Path] \(option)~~ |
|
||||
| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions). ~~Optional[Path] \(option)~~ |
|
||||
| `--verbose`, `-V` | Show more detailed messages during processing. ~~bool (flag)~~ |
|
||||
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
||||
| overrides | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.data ./data`. ~~Any (option/flag)~~ |
|
||||
| **CREATES** | The final assembled pipeline. |
|
||||
|
||||
## package {#package tag="command"}
|
||||
|
||||
Generate an installable [Python package](/usage/training#models-generating) from
|
||||
|
|
Loading…
Reference in New Issue
Block a user