diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index f2ac20ae5..ca92cdd23 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -350,6 +350,13 @@ def show_validation_error(
msg.fail("Config validation error", e, exits=1)
+def import_code_paths(code_paths: str) -> None:
+ """Helper to import comma-separated list of code paths."""
+ code_paths = [Path(p.strip()) for p in string_to_list(code_paths)]
+ for code_path in code_paths:
+ import_code(code_path)
+
+
def import_code(code_path: Optional[Union[Path, str]]) -> None:
"""Helper to import Python file provided in training commands / commands
using the config. This makes custom registered functions available.
diff --git a/spacy/cli/assemble.py b/spacy/cli/assemble.py
index ee2500b27..f552c8459 100644
--- a/spacy/cli/assemble.py
+++ b/spacy/cli/assemble.py
@@ -11,7 +11,7 @@ from ._util import (
Arg,
Opt,
app,
- import_code,
+ import_code_paths,
parse_config_overrides,
show_validation_error,
)
@@ -26,7 +26,7 @@ def assemble_cli(
ctx: typer.Context, # This is only used to read additional arguments
config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
output_path: Path = Arg(..., help="Output directory to store assembled pipeline in"),
- code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+ code_path: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be imported"),
verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
# fmt: on
):
@@ -45,7 +45,7 @@ def assemble_cli(
if not config_path or (str(config_path) != "-" and not config_path.exists()):
msg.fail("Config file not found", config_path, exits=1)
overrides = parse_config_overrides(ctx.args)
- import_code(code_path)
+ import_code_paths(code_path)
with show_validation_error(config_path):
config = util.load_config(config_path, overrides=overrides, interpolate=False)
msg.divider("Initializing pipeline")
diff --git a/spacy/cli/debug_config.py b/spacy/cli/debug_config.py
index 0e5382cd9..7818b4087 100644
--- a/spacy/cli/debug_config.py
+++ b/spacy/cli/debug_config.py
@@ -13,7 +13,7 @@ from ._util import (
Arg,
Opt,
debug_cli,
- import_code,
+ import_code_paths,
parse_config_overrides,
show_validation_error,
)
@@ -27,7 +27,7 @@ def debug_config_cli(
# fmt: off
ctx: typer.Context, # This is only used to read additional arguments
config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
- code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+ code_path: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be imported"),
show_funcs: bool = Opt(False, "--show-functions", "-F", help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)"),
show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.")
# fmt: on
@@ -44,7 +44,7 @@ def debug_config_cli(
DOCS: https://spacy.io/api/cli#debug-config
"""
overrides = parse_config_overrides(ctx.args)
- import_code(code_path)
+ import_code_paths(code_path)
debug_config(
config_path, overrides=overrides, show_funcs=show_funcs, show_vars=show_vars
)
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 4c44a8c0e..714969be1 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -40,7 +40,7 @@ from ._util import (
_format_number,
app,
debug_cli,
- import_code,
+ import_code_paths,
parse_config_overrides,
show_validation_error,
)
@@ -72,7 +72,7 @@ def debug_data_cli(
# fmt: off
ctx: typer.Context, # This is only used to read additional arguments
config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
- code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+ code_path: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be imported"),
ignore_warnings: bool = Opt(False, "--ignore-warnings", "-IW", help="Ignore warnings, only show stats and errors"),
verbose: bool = Opt(False, "--verbose", "-V", help="Print additional information and explanations"),
no_format: bool = Opt(False, "--no-format", "-NF", help="Don't pretty-print the results"),
@@ -92,7 +92,7 @@ def debug_data_cli(
"--help for an overview of the other available debugging commands."
)
overrides = parse_config_overrides(ctx.args)
- import_code(code_path)
+ import_code_paths(code_path)
debug_data(
config_path,
config_overrides=overrides,
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index 6235b658d..f035aa3ce 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -10,7 +10,7 @@ from .. import displacy, util
from ..scorer import Scorer
from ..tokens import Doc
from ..training import Corpus
-from ._util import Arg, Opt, app, benchmark_cli, import_code, setup_gpu
+from ._util import Arg, Opt, app, benchmark_cli, import_code_paths, setup_gpu
@benchmark_cli.command(
@@ -22,7 +22,7 @@ def evaluate_cli(
model: str = Arg(..., help="Model name or path"),
data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False),
- code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+ code_path: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be imported"),
use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"),
displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False),
@@ -42,7 +42,7 @@ def evaluate_cli(
DOCS: https://spacy.io/api/cli#benchmark-accuracy
"""
- import_code(code_path)
+ import_code_paths(code_path)
evaluate(
model,
data_path,
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 4545578e6..01449f957 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -20,7 +20,7 @@ def package_cli(
# fmt: off
input_dir: Path = Arg(..., help="Directory with pipeline data", exists=True, file_okay=False),
output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False),
- code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python file with additional code (registered functions) to be included in the package"),
+ code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"),
meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False),
create_meta: bool = Opt(False, "--create-meta", "-C", help="Create meta.json, even if one exists"),
name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"),
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index 446c40510..73337a7ca 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -11,7 +11,7 @@ from ._util import (
Arg,
Opt,
app,
- import_code,
+ import_code_paths,
parse_config_overrides,
setup_gpu,
show_validation_error,
@@ -27,7 +27,7 @@ def pretrain_cli(
ctx: typer.Context, # This is only used to read additional arguments
config_path: Path = Arg(..., help="Path to config file", exists=True, dir_okay=False, allow_dash=True),
output_dir: Path = Arg(..., help="Directory to write weights to on each epoch"),
- code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+ code_path: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be imported"),
resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
@@ -56,7 +56,7 @@ def pretrain_cli(
DOCS: https://spacy.io/api/cli#pretrain
"""
config_overrides = parse_config_overrides(ctx.args)
- import_code(code_path)
+ import_code_paths(code_path)
verify_cli_args(config_path, output_dir, resume_path, epoch_resume)
setup_gpu(use_gpu)
msg.info(f"Loading config from: {config_path}")
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 8bdabd39c..eb1a1a2c1 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -13,7 +13,7 @@ from ._util import (
Arg,
Opt,
app,
- import_code,
+ import_code_paths,
parse_config_overrides,
setup_gpu,
show_validation_error,
@@ -28,7 +28,7 @@ def train_cli(
ctx: typer.Context, # This is only used to read additional arguments
config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"),
- code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+ code_path: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be imported"),
verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
# fmt: on
@@ -49,7 +49,7 @@ def train_cli(
"""
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
overrides = parse_config_overrides(ctx.args)
- import_code(code_path)
+ import_code_paths(code_path)
train(config_path, output_path, use_gpu=use_gpu, overrides=overrides)
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 3a426113b..2424138d3 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -1,10 +1,13 @@
import os
+import subprocess
+import sys
from pathlib import Path
import pytest
import srsly
from typer.testing import CliRunner
+import spacy
from spacy.cli._util import app, get_git_version
from spacy.tokens import Doc, DocBin
@@ -46,6 +49,210 @@ def test_convert_auto_conflict():
assert len(out_files) == 0
+NOOP_CONFIG = """
+[paths]
+train = null
+dev = null
+vectors = null
+init_tok2vec = null
+
+[system]
+seed = 0
+gpu_allocator = null
+
+[nlp]
+lang = "mul"
+pipeline = ["noop", "noop2"]
+disabled = []
+before_creation = null
+after_creation = null
+after_pipeline_creation = null
+batch_size = 1000
+tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
+
+[components]
+
+[components.noop]
+factory = "noop"
+
+[components.noop2]
+factory = "noop2"
+
+[corpora]
+
+[corpora.dev]
+@readers = "spacy.Corpus.v1"
+path = ${paths.dev}
+gold_preproc = false
+max_length = 0
+limit = 0
+augmenter = null
+
+[corpora.train]
+@readers = "spacy.Corpus.v1"
+path = ${paths.train}
+gold_preproc = false
+max_length = 0
+limit = 0
+augmenter = null
+
+[training]
+seed = ${system.seed}
+gpu_allocator = ${system.gpu_allocator}
+dropout = 0.1
+accumulate_gradient = 1
+patience = 1600
+max_epochs = 0
+max_steps = 100
+eval_frequency = 200
+frozen_components = []
+annotating_components = []
+dev_corpus = "corpora.dev"
+
+train_corpus = "corpora.train"
+before_to_disk = null
+before_update = null
+
+[training.batcher]
+@batchers = "spacy.batch_by_words.v1"
+discard_oversize = false
+tolerance = 0.2
+get_length = null
+
+[training.batcher.size]
+@schedules = "compounding.v1"
+start = 100
+stop = 1000
+compound = 1.001
+t = 0.0
+
+[training.logger]
+@loggers = "spacy.ConsoleLogger.v1"
+progress_bar = false
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+beta1 = 0.9
+beta2 = 0.999
+L2_is_weight_decay = true
+L2 = 0.01
+grad_clip = 1.0
+use_averages = false
+eps = 0.00000001
+learn_rate = 0.001
+
+[training.score_weights]
+
+[pretraining]
+
+[initialize]
+vectors = ${paths.vectors}
+init_tok2vec = ${paths.init_tok2vec}
+vocab_data = null
+lookups = null
+before_init = null
+after_init = null
+
+[initialize.components]
+
+[initialize.tokenizer]
+"""
+
+
+@pytest.fixture
+def data_paths():
+ nlp = spacy.blank("mul")
+ doc = nlp("ok")
+ with make_tempdir() as tdir:
+ db = DocBin()
+ # debug data will *fail* if there aren't enough docs
+ for ii in range(100):
+ db.add(doc)
+ fpath = tdir / "data.spacy"
+ db.to_disk(fpath)
+
+ args = [
+ "--paths.train",
+ str(fpath),
+ "--paths.dev",
+ str(fpath),
+ ]
+ yield args
+
+
+@pytest.fixture
+def code_paths():
+ noop_base = """
+from spacy.language import Language
+
+@Language.component("{}")
+def noop(doc):
+ return doc
+"""
+
+ with make_tempdir() as temp_d:
+ # write code files to load
+ paths = []
+ for ff in ["noop", "noop2"]:
+ pyfile = temp_d / f"{ff}.py"
+ pyfile.write_text(noop_base.format(ff))
+ paths.append(pyfile)
+
+ args = ["--code", ",".join([str(pp) for pp in paths])]
+ yield args
+
+
+@pytest.fixture
+def noop_config():
+ with make_tempdir() as temp_d:
+ cfg = temp_d / "config.cfg"
+ cfg.write_text(NOOP_CONFIG)
+
+ yield cfg
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+ "cmd",
+ ["debug config", "debug data", "train", "assemble"],
+)
+def test_multi_code(cmd, code_paths, data_paths, noop_config):
+ # check that it fails without the code arg
+ cmd = cmd.split()
+ output = ["."] if cmd[0] == "assemble" else []
+ cmd = [sys.executable, "-m", "spacy"] + cmd
+ result = subprocess.run([*cmd, str(noop_config), *output, *data_paths])
+ assert result.returncode == 1
+
+ # check that it succeeds with the code arg
+ result = subprocess.run([*cmd, str(noop_config), *output, *data_paths, *code_paths])
+ assert result.returncode == 0
+
+
+@pytest.mark.slow
+def test_multi_code_evaluate(code_paths, data_paths, noop_config):
+ # Evaluation requires a model, not a config, so this works differently from
+ # the other commands.
+
+ # Train a model to evaluate
+ cmd = f"{sys.executable} -m spacy train {noop_config} -o model".split()
+ result = subprocess.run([*cmd, *data_paths, *code_paths])
+ assert result.returncode == 0
+
+ # now do the evaluation
+
+ eval_data = data_paths[-1]
+ cmd = f"{sys.executable} -m spacy evaluate model/model-best {eval_data}".split()
+
+ # check that it fails without the code arg
+ result = subprocess.run(cmd)
+ assert result.returncode == 1
+
+ # check that it succeeds with the code arg
+ result = subprocess.run([*cmd, *code_paths])
+ assert result.returncode == 0
+
+
def test_benchmark_accuracy_alias():
# Verify that the `evaluate` alias works correctly.
result_benchmark = CliRunner().invoke(app, ["benchmark", "accuracy", "--help"])
diff --git a/website/docs/api/cli.mdx b/website/docs/api/cli.mdx
index b6a32f722..002b0b39d 100644
--- a/website/docs/api/cli.mdx
+++ b/website/docs/api/cli.mdx
@@ -175,15 +175,15 @@ validation error with more details.
$ python -m spacy init fill-config [base_path] [output_file] [--diff]
```
-| Name | Description |
-| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `base_path` | Path to base config to fill, e.g. generated by the [quickstart widget](/usage/training#quickstart). ~~Path (positional)~~ |
-| `output_file` | Path to output `.cfg` file or "-" to write to stdout so you can pipe it to a file. Defaults to "-" (stdout). ~~Path (positional)~~ |
-| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--pretraining`, `-pt` | Include config for pretraining (with [`spacy pretrain`](/api/cli#pretrain)). Defaults to `False`. ~~bool (flag)~~ |
-| `--diff`, `-D` | Print a visual diff highlighting the changes. ~~bool (flag)~~ |
-| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
-| **CREATES** | Complete and auto-filled config file for training. |
+| Name | Description |
+| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `base_path` | Path to base config to fill, e.g. generated by the [quickstart widget](/usage/training#quickstart). ~~Path (positional)~~ |
+| `output_file` | Path to output `.cfg` file or "-" to write to stdout so you can pipe it to a file. Defaults to "-" (stdout). ~~Path (positional)~~ |
+| `--code`, `-c` | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--pretraining`, `-pt` | Include config for pretraining (with [`spacy pretrain`](/api/cli#pretrain)). Defaults to `False`. ~~bool (flag)~~ |
+| `--diff`, `-D` | Print a visual diff highlighting the changes. ~~bool (flag)~~ |
+| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
+| **CREATES** | Complete and auto-filled config file for training. |
### init vectors {id="init-vectors",version="3",tag="command"}
@@ -242,7 +242,7 @@ $ python -m spacy init labels [config_path] [output_path] [--code] [--verbose] [
| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `config_path` | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
| `output_path` | Output directory for the label files. Will create one JSON file per component. ~~Path (positional)~~ |
-| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--code`, `-c` | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
| `--verbose`, `-V` | Show more detailed messages for debugging purposes. ~~bool (flag)~~ |
| `--gpu-id`, `-g` | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~ |
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
@@ -446,7 +446,7 @@ File /path/to/thinc/thinc/schedules.py (line 91)
| Name | Description |
| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `config_path` | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
-| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--code`, `-c` | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
| `--show-functions`, `-F` | Show an overview of all registered function blocks used in the config and where those functions come from, including the module name, Python file and line number. ~~bool (flag)~~ |
| `--show-variables`, `-V` | Show an overview of all variables referenced in the config, e.g. `${paths.train}` and their values that will be used. This also reflects any config overrides provided on the CLI, e.g. `--paths.train /path`. ~~bool (flag)~~ |
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
@@ -631,7 +631,7 @@ will not be available.
| Name | Description |
| -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `config_path` | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
-| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--code`, `-c` | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
| `--ignore-warnings`, `-IW` | Ignore warnings, only show stats and errors. ~~bool (flag)~~ |
| `--verbose`, `-V` | Print additional information and explanations. ~~bool (flag)~~ |
| `--no-format`, `-NF` | Don't pretty-print the results. Use this if you want to write to a file. ~~bool (flag)~~ |
@@ -1055,7 +1055,7 @@ $ python -m spacy train [config_path] [--output] [--code] [--verbose] [--gpu-id]
| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `config_path` | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
| `--output`, `-o` | Directory to store trained pipeline in. Will be created if it doesn't exist. ~~Optional[Path] \(option)~~ |
-| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--code`, `-c` | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
| `--verbose`, `-V` | Show more detailed messages during training. ~~bool (flag)~~ |
| `--gpu-id`, `-g` | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~ |
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
@@ -1125,6 +1125,7 @@ $ python -m spacy pretrain [config_path] [output_dir] [--code] [--resume-path] [
| -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `config_path` | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
| `output_dir` | Directory to save binary weights to on each epoch. ~~Path (positional)~~ |
+| `--code`, `-c` | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
| `--resume-path`, `-r` | Path to pretrained weights from which to resume pretraining. ~~Optional[Path] \(option)~~ |
| `--epoch-resume`, `-er` | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. ~~Optional[int] \(option)~~ |
@@ -1162,19 +1163,19 @@ skew. To render a sample of dependency parses in a HTML file using the
$ python -m spacy benchmark accuracy [model] [data_path] [--output] [--code] [--gold-preproc] [--gpu-id] [--displacy-path] [--displacy-limit]
```
-| Name | Description |
-| ---------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model` | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~ |
-| `data_path` | Location of evaluation data in spaCy's [binary format](/api/data-formats#training). ~~Path (positional)~~ |
-| `--output`, `-o` | Output JSON file for metrics. If not set, no metrics will be exported. ~~Optional[Path] \(option)~~ |
-| `--code`, `-c` 3 | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--gold-preproc`, `-G` | Use gold preprocessing. ~~bool (flag)~~ |
-| `--gpu-id`, `-g` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~ |
-| `--displacy-path`, `-dp` | Directory to output rendered parses as HTML. If not set, no visualizations will be generated. ~~Optional[Path] \(option)~~ |
-| `--displacy-limit`, `-dl` | Number of parses to generate per file. Defaults to `25`. Keep in mind that a significantly higher number might cause the `.html` files to render slowly. ~~int (option)~~ |
-| `--per-component`, `-P` 3.6 | Whether to return the scores keyed by component name. Defaults to `False`. ~~bool (flag)~~ |
-| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
-| **CREATES** | Training results and optional metrics and visualizations. |
+| Name | Description |
+| ---------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model` | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~ |
+| `data_path` | Location of evaluation data in spaCy's [binary format](/api/data-formats#training). ~~Path (positional)~~ |
+| `--output`, `-o` | Output JSON file for metrics. If not set, no metrics will be exported. ~~Optional[Path] \(option)~~ |
+| `--code`, `-c` 3 | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--gold-preproc`, `-G` | Use gold preprocessing. ~~bool (flag)~~ |
+| `--gpu-id`, `-g` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~ |
+| `--displacy-path`, `-dp` | Directory to output rendered parses as HTML. If not set, no visualizations will be generated. ~~Optional[Path] \(option)~~ |
+| `--displacy-limit`, `-dl` | Number of parses to generate per file. Defaults to `25`. Keep in mind that a significantly higher number might cause the `.html` files to render slowly. ~~int (option)~~ |
+| `--per-component`, `-P` 3.6 | Whether to return the scores keyed by component name. Defaults to `False`. ~~bool (flag)~~ |
+| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
+| **CREATES** | Training results and optional metrics and visualizations. |
### speed {id="benchmark-speed", version="3.5", tag="command"}
@@ -1216,19 +1217,19 @@ When a directory is provided it is traversed recursively to collect all files.
$ python -m spacy apply [model] [data-path] [output-file] [--code] [--text-key] [--force-overwrite] [--gpu-id] [--batch-size] [--n-process]
```
-| Name | Description |
-| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model` | Pipeline to apply to the data. Can be a package or a path to a data directory. ~~str (positional)~~ |
-| `data_path` | Location of data to be evaluated in spaCy's [binary format](/api/data-formats#training), jsonl, or plain text. ~~Path (positional)~~ |
-| `output-file` | Output `DocBin` path. ~~str (positional)~~ |
-| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--text-key`, `-tk` | The key for `.jsonl` files to use to grab the texts from. Defaults to `text`. ~~Optional[str] \(option)~~ |
-| `--force-overwrite`, `-F` | If the provided `output-file` already exists, then force `apply` to overwrite it. If this is `False` (default) then quits with a warning instead. ~~bool (flag)~~ |
-| `--gpu-id`, `-g` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~ |
-| `--batch-size`, `-b` | Batch size to use for prediction. Defaults to `1`. ~~int (option)~~ |
-| `--n-process`, `-n` | Number of processes to use for prediction. Defaults to `1`. ~~int (option)~~ |
-| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
-| **CREATES** | A `DocBin` with the annotations from the `model` for all the files found in `data-path`. |
+| Name | Description |
+| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model` | Pipeline to apply to the data. Can be a package or a path to a data directory. ~~str (positional)~~ |
+| `data_path` | Location of data to be evaluated in spaCy's [binary format](/api/data-formats#training), jsonl, or plain text. ~~Path (positional)~~ |
+| `output-file` | Output `DocBin` path. ~~str (positional)~~ |
+| `--code`, `-c` | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--text-key`, `-tk` | The key for `.jsonl` files to use to grab the texts from. Defaults to `text`. ~~Optional[str] \(option)~~ |
+| `--force-overwrite`, `-F` | If the provided `output-file` already exists, then force `apply` to overwrite it. If this is `False` (default) then quits with a warning instead. ~~bool (flag)~~ |
+| `--gpu-id`, `-g` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~ |
+| `--batch-size`, `-b` | Batch size to use for prediction. Defaults to `1`. ~~int (option)~~ |
+| `--n-process`, `-n` | Number of processes to use for prediction. Defaults to `1`. ~~int (option)~~ |
+| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
+| **CREATES** | A `DocBin` with the annotations from the `model` for all the files found in `data-path`. |
## find-threshold {id="find-threshold",version="3.5",tag="command"}
@@ -1255,19 +1256,19 @@ be provided.
> $ python -m spacy find-threshold my_nlp data.spacy spancat threshold spans_sc_f
> ```
-| Name | Description |
-| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model` | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~ |
-| `data_path` | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~ |
-| `pipe_name` | Name of pipe to examine thresholds for. ~~str (positional)~~ |
-| `threshold_key` | Key of threshold attribute in component's configuration. ~~str (positional)~~ |
-| `scores_key` | Name of score to metric to optimize. ~~str (positional)~~ |
-| `--n_trials`, `-n` | Number of trials to determine optimal thresholds. ~~int (option)~~ |
-| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--gpu-id`, `-g` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~ |
-| `--gold-preproc`, `-G` | Use gold preprocessing. ~~bool (flag)~~ |
-| `--verbose`, `-V`, `-VV` | Display more information for debugging purposes. ~~bool (flag)~~ |
-| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
+| Name | Description |
+| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model` | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~ |
+| `data_path` | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~ |
+| `pipe_name` | Name of pipe to examine thresholds for. ~~str (positional)~~ |
+| `threshold_key` | Key of threshold attribute in component's configuration. ~~str (positional)~~ |
+| `scores_key` | Name of score to metric to optimize. ~~str (positional)~~ |
+| `--n_trials`, `-n` | Number of trials to determine optimal thresholds. ~~int (option)~~ |
+| `--code`, `-c` | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--gpu-id`, `-g` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~ |
+| `--gold-preproc`, `-G` | Use gold preprocessing. ~~bool (flag)~~ |
+| `--verbose`, `-V`, `-VV` | Display more information for debugging purposes. ~~bool (flag)~~ |
+| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
## assemble {id="assemble",tag="command"}
@@ -1291,7 +1292,7 @@ $ python -m spacy assemble [config_path] [output_dir] [--code] [--verbose] [over
| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `config_path` | Path to the [config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
| `output_dir` | Directory to store the final pipeline in. Will be created if it doesn't exist. ~~Optional[Path] \(option)~~ |
-| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions). ~~Optional[Path] \(option)~~ |
+| `--code`, `-c` | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
| `--verbose`, `-V` | Show more detailed messages during processing. ~~bool (flag)~~ |
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
| overrides | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.data ./data`. ~~Any (option/flag)~~ |