Accept multiple code files in all CLI commands (#12101)

* Add support for multiple code files to all relevant commands Prior to this, only the package command supported multiple code files. * Update docs * Add debug data test, plus generic fixtures One tricky thing here: it's tempting to create the config by creating a pipeline in code, but that requires declaring the custom components here. However the CliRunner appears to be run in the same process or otherwise have access to our registry, so it works even without any code arguments. So it's necessary to avoid declaring the components in the tests. * Add debug config test and restructure The code argument imports the provided file. If it adds item to the registry, that affects global state, which CliRunner doesn't isolate. Since there's no standard way to remove things from the registry, this instead uses subprocess.run to run commands. * Use a more generic, parametrized test * Add output arg for assemble and pretrain Assemble and pretrain require an output argument. This commit adds assemble testing, but not pretrain, as that requires an actual trainable component, which is not currently in the test config. * Add evaluate test and some cleanup * Mark tests as slow * Revert argument name change * Apply suggestions from code review Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Format API CLI docs * isort * Fix imports in tests * isort * Undo changes to package CLI help * Fix python executable and lang code in test * Fix executable in another test --------- Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> Co-authored-by: Raphael Mitsch <r.mitsch@outlook.com>
2025-07-03 11:23:12 +03:00 · 2023-08-01 22:24:02 +09:00 · 2023-08-01 22:24:02 +09:00 · b4e457d9fe
commit b4e457d9fe
parent 2702db9fef
10 changed files with 287 additions and 72 deletions
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@ -350,6 +350,13 @@ def show_validation_error(
        msg.fail("Config validation error", e, exits=1)
 def import_code_paths(code_paths: str) -> None:
    """Helper to import comma-separated list of code paths."""
    code_paths = [Path(p.strip()) for p in string_to_list(code_paths)]
    for code_path in code_paths:
        import_code(code_path)
 def import_code(code_path: Optional[Union[Path, str]]) -> None:
    """Helper to import Python file provided in training commands / commands
    using the config. This makes custom registered functions available.
--- a/spacy/cli/assemble.py
+++ b/spacy/cli/assemble.py
@ -11,7 +11,7 @@ from ._util import (
    Arg,
    Opt,
    app,
-    import_code,
+    import_code_paths,
    parse_config_overrides,
    show_validation_error,
 )
@ -26,7 +26,7 @@ def assemble_cli(
    ctx: typer.Context,  # This is only used to read additional arguments
    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
    output_path: Path = Arg(..., help="Output directory to store assembled pipeline in"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    code_path: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be imported"),
    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
    # fmt: on
 ):
@ -45,7 +45,7 @@ def assemble_cli(
    if not config_path or (str(config_path) != "-" and not config_path.exists()):
        msg.fail("Config file not found", config_path, exits=1)
    overrides = parse_config_overrides(ctx.args)
-    import_code(code_path)
+    import_code_paths(code_path)
    with show_validation_error(config_path):
        config = util.load_config(config_path, overrides=overrides, interpolate=False)
    msg.divider("Initializing pipeline")
--- a/spacy/cli/debug_config.py
+++ b/spacy/cli/debug_config.py
@ -13,7 +13,7 @@ from ._util import (
    Arg,
    Opt,
    debug_cli,
-    import_code,
+    import_code_paths,
    parse_config_overrides,
    show_validation_error,
 )
@ -27,7 +27,7 @@ def debug_config_cli(
    # fmt: off
    ctx: typer.Context,  # This is only used to read additional arguments
    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    code_path: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be imported"),
    show_funcs: bool = Opt(False, "--show-functions", "-F", help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)"),
    show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.")
    # fmt: on
@ -44,7 +44,7 @@ def debug_config_cli(
    DOCS: https://spacy.io/api/cli#debug-config
    """
    overrides = parse_config_overrides(ctx.args)
-    import_code(code_path)
+    import_code_paths(code_path)
    debug_config(
        config_path, overrides=overrides, show_funcs=show_funcs, show_vars=show_vars
    )
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -40,7 +40,7 @@ from ._util import (
    _format_number,
    app,
    debug_cli,
-    import_code,
+    import_code_paths,
    parse_config_overrides,
    show_validation_error,
 )
@ -72,7 +72,7 @@ def debug_data_cli(
    # fmt: off
    ctx: typer.Context,  # This is only used to read additional arguments
    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    code_path: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be imported"),
    ignore_warnings: bool = Opt(False, "--ignore-warnings", "-IW", help="Ignore warnings, only show stats and errors"),
    verbose: bool = Opt(False, "--verbose", "-V", help="Print additional information and explanations"),
    no_format: bool = Opt(False, "--no-format", "-NF", help="Don't pretty-print the results"),
@ -92,7 +92,7 @@ def debug_data_cli(
            "--help for an overview of the other available debugging commands."
        )
    overrides = parse_config_overrides(ctx.args)
-    import_code(code_path)
+    import_code_paths(code_path)
    debug_data(
        config_path,
        config_overrides=overrides,
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@ -10,7 +10,7 @@ from .. import displacy, util
 from ..scorer import Scorer
 from ..tokens import Doc
 from ..training import Corpus
-from ._util import Arg, Opt, app, benchmark_cli, import_code, setup_gpu
+from ._util import Arg, Opt, app, benchmark_cli, import_code_paths, setup_gpu
@benchmark_cli.command(
@ -22,7 +22,7 @@ def evaluate_cli(
    model: str = Arg(..., help="Model name or path"),
    data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
    output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    code_path: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be imported"),
    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
    gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"),
    displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False),
@ -42,7 +42,7 @@ def evaluate_cli(
    DOCS: https://spacy.io/api/cli#benchmark-accuracy
    """
-    import_code(code_path)
+    import_code_paths(code_path)
    evaluate(
        model,
        data_path,
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -20,7 +20,7 @@ def package_cli(
    # fmt: off
    input_dir: Path = Arg(..., help="Directory with pipeline data", exists=True, file_okay=False),
    output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False),
-    code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python file with additional code (registered functions) to be included in the package"),
+    code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be included in the package"),
    meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False),
    create_meta: bool = Opt(False, "--create-meta", "-C", help="Create meta.json, even if one exists"),
    name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"),
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@ -11,7 +11,7 @@ from ._util import (
    Arg,
    Opt,
    app,
-    import_code,
+    import_code_paths,
    parse_config_overrides,
    setup_gpu,
    show_validation_error,
@ -27,7 +27,7 @@ def pretrain_cli(
    ctx: typer.Context,  # This is only used to read additional arguments
    config_path: Path = Arg(..., help="Path to config file", exists=True, dir_okay=False, allow_dash=True),
    output_dir: Path = Arg(..., help="Directory to write weights to on each epoch"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    code_path: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be imported"),
    resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
    epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
@ -56,7 +56,7 @@ def pretrain_cli(
    DOCS: https://spacy.io/api/cli#pretrain
    """
    config_overrides = parse_config_overrides(ctx.args)
-    import_code(code_path)
+    import_code_paths(code_path)
    verify_cli_args(config_path, output_dir, resume_path, epoch_resume)
    setup_gpu(use_gpu)
    msg.info(f"Loading config from: {config_path}")
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -13,7 +13,7 @@ from ._util import (
    Arg,
    Opt,
    app,
-    import_code,
+    import_code_paths,
    parse_config_overrides,
    setup_gpu,
    show_validation_error,
@ -28,7 +28,7 @@ def train_cli(
    ctx: typer.Context,  # This is only used to read additional arguments
    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
    output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    code_path: str = Opt("", "--code", "-c", help="Comma-separated paths to Python files with additional code (registered functions) to be imported"),
    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
    # fmt: on
@ -49,7 +49,7 @@ def train_cli(
    """
    util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
    overrides = parse_config_overrides(ctx.args)
-    import_code(code_path)
+    import_code_paths(code_path)
    train(config_path, output_path, use_gpu=use_gpu, overrides=overrides)
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@ -1,10 +1,13 @@
 import os
 import subprocess
 import sys
 from pathlib import Path
 import pytest
 import srsly
 from typer.testing import CliRunner
 import spacy
 from spacy.cli._util import app, get_git_version
 from spacy.tokens import Doc, DocBin
@ -46,6 +49,210 @@ def test_convert_auto_conflict():
        assert len(out_files) == 0
 NOOP_CONFIG = """
 [paths]
 train = null
 dev = null
 vectors = null
 init_tok2vec = null
 [system]
 seed = 0
 gpu_allocator = null
 [nlp]
 lang = "mul"
 pipeline = ["noop", "noop2"]
 disabled = []
 before_creation = null
 after_creation = null
 after_pipeline_creation = null
 batch_size = 1000
 tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
 [components]
 [components.noop]
 factory = "noop"
 [components.noop2]
 factory = "noop2"
 [corpora]
 [corpora.dev]
@readers = "spacy.Corpus.v1"
 path = ${paths.dev}
 gold_preproc = false
 max_length = 0
 limit = 0
 augmenter = null
 [corpora.train]
@readers = "spacy.Corpus.v1"
 path = ${paths.train}
 gold_preproc = false
 max_length = 0
 limit = 0
 augmenter = null
 [training]
 seed = ${system.seed}
 gpu_allocator = ${system.gpu_allocator}
 dropout = 0.1
 accumulate_gradient = 1
 patience = 1600
 max_epochs = 0
 max_steps = 100
 eval_frequency = 200
 frozen_components = []
 annotating_components = []
 dev_corpus = "corpora.dev"
 train_corpus = "corpora.train"
 before_to_disk = null
 before_update = null
 [training.batcher]
@batchers = "spacy.batch_by_words.v1"
 discard_oversize = false
 tolerance = 0.2
 get_length = null
 [training.batcher.size]
@schedules = "compounding.v1"
 start = 100
 stop = 1000
 compound = 1.001
 t = 0.0
 [training.logger]
@loggers = "spacy.ConsoleLogger.v1"
 progress_bar = false
 [training.optimizer]
@optimizers = "Adam.v1"
 beta1 = 0.9
 beta2 = 0.999
 L2_is_weight_decay = true
 L2 = 0.01
 grad_clip = 1.0
 use_averages = false
 eps = 0.00000001
 learn_rate = 0.001
 [training.score_weights]
 [pretraining]
 [initialize]
 vectors = ${paths.vectors}
 init_tok2vec = ${paths.init_tok2vec}
 vocab_data = null
 lookups = null
 before_init = null
 after_init = null
 [initialize.components]
 [initialize.tokenizer]
 """
@pytest.fixture
 def data_paths():
    nlp = spacy.blank("mul")
    doc = nlp("ok")
    with make_tempdir() as tdir:
        db = DocBin()
        # debug data will *fail* if there aren't enough docs
        for ii in range(100):
            db.add(doc)
        fpath = tdir / "data.spacy"
        db.to_disk(fpath)
        args = [
            "--paths.train",
            str(fpath),
            "--paths.dev",
            str(fpath),
        ]
        yield args
@pytest.fixture
 def code_paths():
    noop_base = """
 from spacy.language import Language
@Language.component("{}")
 def noop(doc):
    return doc
 """
    with make_tempdir() as temp_d:
        # write code files to load
        paths = []
        for ff in ["noop", "noop2"]:
            pyfile = temp_d / f"{ff}.py"
            pyfile.write_text(noop_base.format(ff))
            paths.append(pyfile)
        args = ["--code", ",".join([str(pp) for pp in paths])]
        yield args
@pytest.fixture
 def noop_config():
    with make_tempdir() as temp_d:
        cfg = temp_d / "config.cfg"
        cfg.write_text(NOOP_CONFIG)
        yield cfg
@pytest.mark.slow
@pytest.mark.parametrize(
    "cmd",
    ["debug config", "debug data", "train", "assemble"],
 )
 def test_multi_code(cmd, code_paths, data_paths, noop_config):
    # check that it fails without the code arg
    cmd = cmd.split()
    output = ["."] if cmd[0] == "assemble" else []
    cmd = [sys.executable, "-m", "spacy"] + cmd
    result = subprocess.run([*cmd, str(noop_config), *output, *data_paths])
    assert result.returncode == 1
    # check that it succeeds with the code arg
    result = subprocess.run([*cmd, str(noop_config), *output, *data_paths, *code_paths])
    assert result.returncode == 0
@pytest.mark.slow
 def test_multi_code_evaluate(code_paths, data_paths, noop_config):
    # Evaluation requires a model, not a config, so this works differently from
    # the other commands.
    # Train a model to evaluate
    cmd = f"{sys.executable} -m spacy train {noop_config} -o model".split()
    result = subprocess.run([*cmd, *data_paths, *code_paths])
    assert result.returncode == 0
    # now do the evaluation
    eval_data = data_paths[-1]
    cmd = f"{sys.executable} -m spacy evaluate model/model-best {eval_data}".split()
    # check that it fails without the code arg
    result = subprocess.run(cmd)
    assert result.returncode == 1
    # check that it succeeds with the code arg
    result = subprocess.run([*cmd, *code_paths])
    assert result.returncode == 0
 def test_benchmark_accuracy_alias():
    # Verify that the `evaluate` alias works correctly.
    result_benchmark = CliRunner().invoke(app, ["benchmark", "accuracy", "--help"])
--- a/website/docs/api/cli.mdx
+++ b/website/docs/api/cli.mdx
@ -175,15 +175,15 @@ validation error with more details.
 $ python -m spacy init fill-config [base_path] [output_file] [--diff]
 ```
-| Name                   | Description                                                                                                                                                                          |
+| Name                   | Description                                                                                                                                                                                            |
-| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `base_path`            | Path to base config to fill, e.g. generated by the [quickstart widget](/usage/training#quickstart). ~~Path (positional)~~                                                            |
+| `base_path`            | Path to base config to fill, e.g. generated by the [quickstart widget](/usage/training#quickstart). ~~Path (positional)~~                                                                              |
-| `output_file`          | Path to output `.cfg` file or "-" to write to stdout so you can pipe it to a file. Defaults to "-" (stdout). ~~Path (positional)~~                                                   |
+| `output_file`          | Path to output `.cfg` file or "-" to write to stdout so you can pipe it to a file. Defaults to "-" (stdout). ~~Path (positional)~~                                                                     |
-| `--code`, `-c`         | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--code`, `-c`         | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--pretraining`, `-pt` | Include config for pretraining (with [`spacy pretrain`](/api/cli#pretrain)). Defaults to `False`. ~~bool (flag)~~                                                                    |
+| `--pretraining`, `-pt` | Include config for pretraining (with [`spacy pretrain`](/api/cli#pretrain)). Defaults to `False`. ~~bool (flag)~~                                                                                      |
-| `--diff`, `-D`         | Print a visual diff highlighting the changes. ~~bool (flag)~~                                                                                                                        |
+| `--diff`, `-D`         | Print a visual diff highlighting the changes. ~~bool (flag)~~                                                                                                                                          |
-| `--help`, `-h`         | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
+| `--help`, `-h`         | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                             |
-| **CREATES**            | Complete and auto-filled config file for training.                                                                                                                                   |
+| **CREATES**            | Complete and auto-filled config file for training.                                                                                                                                                     |
 ### init vectors {id="init-vectors",version="3",tag="command"}
@ -242,7 +242,7 @@ $ python -m spacy init labels [config_path] [output_path] [--code] [--verbose] [
 | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `config_path`     | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
 | `output_path`     | Output directory for the label files. Will create one JSON file per component. ~~Path (positional)~~                                                                                                               |
-| `--code`, `-c`    | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                               |
+| `--code`, `-c`    | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~             |
 | `--verbose`, `-V` | Show more detailed messages for debugging purposes. ~~bool (flag)~~                                                                                                                                                |
 | `--gpu-id`, `-g`  | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~                                                                                                                                                         |
 | `--help`, `-h`    | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                         |
@ -446,7 +446,7 @@ File       /path/to/thinc/thinc/schedules.py (line 91)
 | Name                     | Description                                                                                                                                                                                                                    |
 | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `config_path`            | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~             |
-| `--code`, `-c`           | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                                           |
+| `--code`, `-c`           | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                         |
 | `--show-functions`, `-F` | Show an overview of all registered function blocks used in the config and where those functions come from, including the module name, Python file and line number. ~~bool (flag)~~                                             |
 | `--show-variables`, `-V` | Show an overview of all variables referenced in the config, e.g. `${paths.train}` and their values that will be used. This also reflects any config overrides provided on the CLI, e.g. `--paths.train /path`. ~~bool (flag)~~ |
 | `--help`, `-h`           | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                                     |
@ -631,7 +631,7 @@ will not be available.
 | Name                       | Description                                                                                                                                                                                                        |
 | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `config_path`              | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
-| `--code`, `-c`             | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                               |
+| `--code`, `-c`             | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~             |
 | `--ignore-warnings`, `-IW` | Ignore warnings, only show stats and errors. ~~bool (flag)~~                                                                                                                                                       |
 | `--verbose`, `-V`          | Print additional information and explanations. ~~bool (flag)~~                                                                                                                                                     |
 | `--no-format`, `-NF`       | Don't pretty-print the results. Use this if you want to write to a file. ~~bool (flag)~~                                                                                                                           |
@ -1055,7 +1055,7 @@ $ python -m spacy train [config_path] [--output] [--code] [--verbose] [--gpu-id]
 | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `config_path`     | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
 | `--output`, `-o`  | Directory to store trained pipeline in. Will be created if it doesn't exist. ~~Optional[Path] \(option)~~                                                                                                          |
-| `--code`, `-c`    | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                               |
+| `--code`, `-c`    | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~             |
 | `--verbose`, `-V` | Show more detailed messages during training. ~~bool (flag)~~                                                                                                                                                       |
 | `--gpu-id`, `-g`  | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~                                                                                                                                                         |
 | `--help`, `-h`    | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                         |
@ -1125,6 +1125,7 @@ $ python -m spacy pretrain [config_path] [output_dir] [--code] [--resume-path] [
 | -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `config_path`                                      | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
 | `output_dir`                                       | Directory to save binary weights to on each epoch. ~~Path (positional)~~                                                                                                                                           |
 | `--code`, `-c`                                     | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~             |
 | `--code`, `-c`                                     | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                               |
 | `--resume-path`, `-r`                              | Path to pretrained weights from which to resume pretraining. ~~Optional[Path] \(option)~~                                                                                                                          |
 | `--epoch-resume`, `-er`                            | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. ~~Optional[int] \(option)~~                                                                |
@ -1162,19 +1163,19 @@ skew. To render a sample of dependency parses in a HTML file using the
 $ python -m spacy benchmark accuracy [model] [data_path] [--output] [--code] [--gold-preproc] [--gpu-id] [--displacy-path] [--displacy-limit]
 ```
-| Name                                                 | Description                                                                                                                                                                          |
+| Name                                                 | Description                                                                                                                                                                                            |
-| ---------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| ---------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model`                                              | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                           |
+| `model`                                              | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                                             |
-| `data_path`                                          | Location of evaluation data in spaCy's [binary format](/api/data-formats#training). ~~Path (positional)~~                                                                            |
+| `data_path`                                          | Location of evaluation data in spaCy's [binary format](/api/data-formats#training). ~~Path (positional)~~                                                                                              |
-| `--output`, `-o`                                     | Output JSON file for metrics. If not set, no metrics will be exported. ~~Optional[Path] \(option)~~                                                                                  |
+| `--output`, `-o`                                     | Output JSON file for metrics. If not set, no metrics will be exported. ~~Optional[Path] \(option)~~                                                                                                    |
-| `--code`, `-c` <Tag variant="new">3</Tag>            | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--code`, `-c` <Tag variant="new">3</Tag>            | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--gold-preproc`, `-G`                               | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                              |
+| `--gold-preproc`, `-G`                               | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                                                |
-| `--gpu-id`, `-g`                                     | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
+| `--gpu-id`, `-g`                                     | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                                         |
-| `--displacy-path`, `-dp`                             | Directory to output rendered parses as HTML. If not set, no visualizations will be generated. ~~Optional[Path] \(option)~~                                                           |
+| `--displacy-path`, `-dp`                             | Directory to output rendered parses as HTML. If not set, no visualizations will be generated. ~~Optional[Path] \(option)~~                                                                             |
-| `--displacy-limit`, `-dl`                            | Number of parses to generate per file. Defaults to `25`. Keep in mind that a significantly higher number might cause the `.html` files to render slowly. ~~int (option)~~            |
+| `--displacy-limit`, `-dl`                            | Number of parses to generate per file. Defaults to `25`. Keep in mind that a significantly higher number might cause the `.html` files to render slowly. ~~int (option)~~                              |
-| `--per-component`, `-P` <Tag variant="new">3.6</Tag> | Whether to return the scores keyed by component name. Defaults to `False`. ~~bool (flag)~~                                                                                           |
+| `--per-component`, `-P` <Tag variant="new">3.6</Tag> | Whether to return the scores keyed by component name. Defaults to `False`. ~~bool (flag)~~                                                                                                             |
-| `--help`, `-h`                                       | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
+| `--help`, `-h`                                       | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                             |
-| **CREATES**                                          | Training results and optional metrics and visualizations.                                                                                                                            |
+| **CREATES**                                          | Training results and optional metrics and visualizations.                                                                                                                                              |
 ### speed {id="benchmark-speed", version="3.5", tag="command"}
@ -1216,19 +1217,19 @@ When a directory is provided it is traversed recursively to collect all files.
 $ python -m spacy apply [model] [data-path] [output-file] [--code] [--text-key] [--force-overwrite] [--gpu-id] [--batch-size] [--n-process]
 ```
-| Name                      | Description                                                                                                                                                                          |
+| Name                      | Description                                                                                                                                                                                            |
-| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model`                   | Pipeline to apply to the data. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                  |
+| `model`                   | Pipeline to apply to the data. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                                    |
-| `data_path`               | Location of data to be evaluated in spaCy's [binary format](/api/data-formats#training), jsonl, or plain text. ~~Path (positional)~~                                                 |
+| `data_path`               | Location of data to be evaluated in spaCy's [binary format](/api/data-formats#training), jsonl, or plain text. ~~Path (positional)~~                                                                   |
-| `output-file`             | Output `DocBin` path. ~~str (positional)~~                                                                                                                                           |
+| `output-file`             | Output `DocBin` path. ~~str (positional)~~                                                                                                                                                             |
-| `--code`, `-c`            | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--code`, `-c`            | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--text-key`, `-tk`       | The key for `.jsonl` files to use to grab the texts from. Defaults to `text`. ~~Optional[str] \(option)~~                                                                            |
+| `--text-key`, `-tk`       | The key for `.jsonl` files to use to grab the texts from. Defaults to `text`. ~~Optional[str] \(option)~~                                                                                              |
-| `--force-overwrite`, `-F` | If the provided `output-file` already exists, then force `apply` to overwrite it. If this is `False` (default) then quits with a warning instead. ~~bool (flag)~~                    |
+| `--force-overwrite`, `-F` | If the provided `output-file` already exists, then force `apply` to overwrite it. If this is `False` (default) then quits with a warning instead. ~~bool (flag)~~                                      |
-| `--gpu-id`, `-g`          | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
+| `--gpu-id`, `-g`          | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                                         |
-| `--batch-size`, `-b`      | Batch size to use for prediction. Defaults to `1`. ~~int (option)~~                                                                                                                  |
+| `--batch-size`, `-b`      | Batch size to use for prediction. Defaults to `1`. ~~int (option)~~                                                                                                                                    |
-| `--n-process`, `-n`       | Number of processes to use for prediction. Defaults to `1`. ~~int (option)~~                                                                                                         |
+| `--n-process`, `-n`       | Number of processes to use for prediction. Defaults to `1`. ~~int (option)~~                                                                                                                           |
-| `--help`, `-h`            | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
+| `--help`, `-h`            | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                             |
-| **CREATES**               | A `DocBin` with the annotations from the `model` for all the files found in `data-path`.                                                                                             |
+| **CREATES**               | A `DocBin` with the annotations from the `model` for all the files found in `data-path`.                                                                                                               |
 ## find-threshold {id="find-threshold",version="3.5",tag="command"}
@ -1255,19 +1256,19 @@ be provided.
 > $ python -m spacy find-threshold my_nlp data.spacy spancat threshold spans_sc_f
 > ```
-| Name                     | Description                                                                                                                                                                          |
+| Name                     | Description                                                                                                                                                                                            |
-| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model`                  | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                           |
+| `model`                  | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                                             |
-| `data_path`              | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~                                                                                                |
+| `data_path`              | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~                                                                                                                  |
-| `pipe_name`              | Name of pipe to examine thresholds for. ~~str (positional)~~                                                                                                                         |
+| `pipe_name`              | Name of pipe to examine thresholds for. ~~str (positional)~~                                                                                                                                           |
-| `threshold_key`          | Key of threshold attribute in component's configuration. ~~str (positional)~~                                                                                                        |
+| `threshold_key`          | Key of threshold attribute in component's configuration. ~~str (positional)~~                                                                                                                          |
-| `scores_key`             | Name of score to metric to optimize. ~~str (positional)~~                                                                                                                            |
+| `scores_key`             | Name of score to metric to optimize. ~~str (positional)~~                                                                                                                                              |
-| `--n_trials`, `-n`       | Number of trials to determine optimal thresholds. ~~int (option)~~                                                                                                                   |
+| `--n_trials`, `-n`       | Number of trials to determine optimal thresholds. ~~int (option)~~                                                                                                                                     |
-| `--code`, `-c`           | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--code`, `-c`           | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--gpu-id`, `-g`         | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
+| `--gpu-id`, `-g`         | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                                         |
-| `--gold-preproc`, `-G`   | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                              |
+| `--gold-preproc`, `-G`   | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                                                |
-| `--verbose`, `-V`, `-VV` | Display more information for debugging purposes. ~~bool (flag)~~                                                                                                                     |
+| `--verbose`, `-V`, `-VV` | Display more information for debugging purposes. ~~bool (flag)~~                                                                                                                                       |
-| `--help`, `-h`           | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
+| `--help`, `-h`           | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                             |
 ## assemble {id="assemble",tag="command"}
@ -1291,7 +1292,7 @@ $ python -m spacy assemble [config_path] [output_dir] [--code] [--verbose] [over
 | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `config_path`     | Path to the [config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
 | `output_dir`      | Directory to store the final pipeline in. Will be created if it doesn't exist. ~~Optional[Path] \(option)~~                                                                                                   |
-| `--code`, `-c`    | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions). ~~Optional[Path] \(option)~~                                                |
+| `--code`, `-c`    | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~        |
 | `--verbose`, `-V` | Show more detailed messages during processing. ~~bool (flag)~~                                                                                                                                                |
 | `--help`, `-h`    | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                    |
 | overrides         | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.data ./data`. ~~Any (option/flag)~~                            |