From c14be8513f8268fd4a3059a1abdefa3217c7f8a5 Mon Sep 17 00:00:00 2001
From: Paul O'Leary McCann <polm@dampfkraft.com>
Date: Fri, 13 Jan 2023 18:22:39 +0900
Subject: [PATCH] Update docs

---
 website/docs/api/cli.mdx | 107 +++++++++++++++++++--------------------
 1 file changed, 53 insertions(+), 54 deletions(-)

diff --git a/website/docs/api/cli.mdx b/website/docs/api/cli.mdx
index 8b84a02ff..2c59d2d69 100644
--- a/website/docs/api/cli.mdx
+++ b/website/docs/api/cli.mdx
@@ -174,15 +174,15 @@ validation error with more details.
 $ python -m spacy init fill-config [base_path] [output_file] [--diff]
 ```
 
-| Name                   | Description                                                                                                                                                                          |
-| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `base_path`            | Path to base config to fill, e.g. generated by the [quickstart widget](/usage/training#quickstart). ~~Path (positional)~~                                                            |
-| `output_file`          | Path to output `.cfg` file or "-" to write to stdout so you can pipe it to a file. Defaults to "-" (stdout). ~~Path (positional)~~                                                   |
-| `--code`, `-c`         | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--pretraining`, `-pt` | Include config for pretraining (with [`spacy pretrain`](/api/cli#pretrain)). Defaults to `False`. ~~bool (flag)~~                                                                    |
-| `--diff`, `-D`         | Print a visual diff highlighting the changes. ~~bool (flag)~~                                                                                                                        |
-| `--help`, `-h`         | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
-| **CREATES**            | Complete and auto-filled config file for training.                                                                                                                                   |
+| Name                   | Description                                                                                                                                                                                            |
+| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `base_path`            | Path to base config to fill, e.g. generated by the [quickstart widget](/usage/training#quickstart). ~~Path (positional)~~                                                                              |
+| `output_file`          | Path to output `.cfg` file or "-" to write to stdout so you can pipe it to a file. Defaults to "-" (stdout). ~~Path (positional)~~                                                                     |
+| `--code`, `-c`         | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--pretraining`, `-pt` | Include config for pretraining (with [`spacy pretrain`](/api/cli#pretrain)). Defaults to `False`. ~~bool (flag)~~                                                                                      |
+| `--diff`, `-D`         | Print a visual diff highlighting the changes. ~~bool (flag)~~                                                                                                                                          |
+| `--help`, `-h`         | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                             |
+| **CREATES**            | Complete and auto-filled config file for training.                                                                                                                                                     |
 
 ### init vectors {id="init-vectors",version="3",tag="command"}
 
@@ -242,7 +242,7 @@ $ python -m spacy init labels [config_path] [output_path] [--code] [--verbose] [
 | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `config_path`     | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
 | `output_path`     | Output directory for the label files. Will create one JSON file per component. ~~Path (positional)~~                                                                                                               |
-| `--code`, `-c`    | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                               |
+| `--code`, `-c`    | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~             |
 | `--verbose`, `-V` | Show more detailed messages for debugging purposes. ~~bool (flag)~~                                                                                                                                                |
 | `--gpu-id`, `-g`  | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~                                                                                                                                                         |
 | `--help`, `-h`    | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                         |
@@ -446,7 +446,7 @@ File       /path/to/thinc/thinc/schedules.py (line 91)
 | Name                     | Description                                                                                                                                                                                                                    |
 | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `config_path`            | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~             |
-| `--code`, `-c`           | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                                           |
+| `--code`, `-c`           | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                         |
 | `--show-functions`, `-F` | Show an overview of all registered function blocks used in the config and where those functions come from, including the module name, Python file and line number. ~~bool (flag)~~                                             |
 | `--show-variables`, `-V` | Show an overview of all variables referenced in the config, e.g. `${paths.train}` and their values that will be used. This also reflects any config overrides provided on the CLI, e.g. `--paths.train /path`. ~~bool (flag)~~ |
 | `--help`, `-h`           | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                                     |
@@ -631,7 +631,7 @@ will not be available.
 | Name                       | Description                                                                                                                                                                                                        |
 | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `config_path`              | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
-| `--code`, `-c`             | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                               |
+| `--code`, `-c`             | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~             |
 | `--ignore-warnings`, `-IW` | Ignore warnings, only show stats and errors. ~~bool (flag)~~                                                                                                                                                       |
 | `--verbose`, `-V`          | Print additional information and explanations. ~~bool (flag)~~                                                                                                                                                     |
 | `--no-format`, `-NF`       | Don't pretty-print the results. Use this if you want to write to a file. ~~bool (flag)~~                                                                                                                           |
@@ -1055,7 +1055,7 @@ $ python -m spacy train [config_path] [--output] [--code] [--verbose] [--gpu-id]
 | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `config_path`     | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
 | `--output`, `-o`  | Directory to store trained pipeline in. Will be created if it doesn't exist. ~~Optional[Path] \(option)~~                                                                                                          |
-| `--code`, `-c`    | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                               |
+| `--code`, `-c`    | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~             |
 | `--verbose`, `-V` | Show more detailed messages during training. ~~bool (flag)~~                                                                                                                                                       |
 | `--gpu-id`, `-g`  | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~                                                                                                                                                         |
 | `--help`, `-h`    | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                         |
@@ -1125,7 +1125,7 @@ $ python -m spacy pretrain [config_path] [output_dir] [--code] [--resume-path] [
 | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `config_path`           | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
 | `output_dir`            | Directory to save binary weights to on each epoch. ~~Path (positional)~~                                                                                                                                           |
-| `--code`, `-c`          | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                               |
+| `--code`, `-c`          | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~             |
 | `--resume-path`, `-r`   | Path to pretrained weights from which to resume pretraining. ~~Optional[Path] \(option)~~                                                                                                                          |
 | `--epoch-resume`, `-er` | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. ~~Optional[int] \(option)~~                                                                |
 | `--gpu-id`, `-g`        | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~                                                                                                                                                         |
@@ -1150,18 +1150,18 @@ skew. To render a sample of dependency parses in a HTML file using the
 $ python -m spacy evaluate [model] [data_path] [--output] [--code] [--gold-preproc] [--gpu-id] [--displacy-path] [--displacy-limit]
 ```
 
-| Name                                      | Description                                                                                                                                                                          |
-| ----------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model`                                   | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                           |
-| `data_path`                               | Location of evaluation data in spaCy's [binary format](/api/data-formats#training). ~~Path (positional)~~                                                                            |
-| `--output`, `-o`                          | Output JSON file for metrics. If not set, no metrics will be exported. ~~Optional[Path] \(option)~~                                                                                  |
-| `--code`, `-c` <Tag variant="new">3</Tag> | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--gold-preproc`, `-G`                    | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                              |
-| `--gpu-id`, `-g`                          | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
-| `--displacy-path`, `-dp`                  | Directory to output rendered parses as HTML. If not set, no visualizations will be generated. ~~Optional[Path] \(option)~~                                                           |
-| `--displacy-limit`, `-dl`                 | Number of parses to generate per file. Defaults to `25`. Keep in mind that a significantly higher number might cause the `.html` files to render slowly. ~~int (option)~~            |
-| `--help`, `-h`                            | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
-| **CREATES**                               | Training results and optional metrics and visualizations.                                                                                                                            |
+| Name                      | Description                                                                                                                                                                                            |
+| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model`                   | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                                             |
+| `data_path`               | Location of evaluation data in spaCy's [binary format](/api/data-formats#training). ~~Path (positional)~~                                                                                              |
+| `--output`, `-o`          | Output JSON file for metrics. If not set, no metrics will be exported. ~~Optional[Path] \(option)~~                                                                                                    |
+| `--code`, `-c`            | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--gold-preproc`, `-G`    | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                                                |
+| `--gpu-id`, `-g`          | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                                         |
+| `--displacy-path`, `-dp`  | Directory to output rendered parses as HTML. If not set, no visualizations will be generated. ~~Optional[Path] \(option)~~                                                                             |
+| `--displacy-limit`, `-dl` | Number of parses to generate per file. Defaults to `25`. Keep in mind that a significantly higher number might cause the `.html` files to render slowly. ~~int (option)~~                              |
+| `--help`, `-h`            | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                             |
+| **CREATES**               | Training results and optional metrics and visualizations.                                                                                                                                              |
 
 ## apply {id="apply", version="3.5", tag="command"}
 
@@ -1180,20 +1180,19 @@ When a directory is provided it is traversed recursively to collect all files.
 $ python -m spacy apply [model] [data-path] [output-file] [--code] [--text-key] [--force-overwrite] [--gpu-id] [--batch-size] [--n-process]
 ```
 
-| Name                                      | Description                                                                                                                                                                          |
-| ----------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model`                                   | Pipeline to apply to the data. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                  |
-| `data_path`                               | Location of data to be evaluated in spaCy's [binary format](/api/data-formats#training), jsonl, or plain text. ~~Path (positional)~~                                                 |
-| `output-file`, `-o`                       | Output `DocBin` path. ~~str (positional)~~                                                                                                                                           |
-| `--code`, `-c` <Tag variant="new">3</Tag> | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--text-key`, `-tk`                       | The key for `.jsonl` files to use to grab the texts from. Defaults to `text`. ~~Optional[str] \(option)~~                                                                            |
-| `--force-overwrite`, `-F`                 | If the provided `output-file` already exists, then force `apply` to overwrite it. If this is `False` (default) then quits with a warning instead. ~~bool (flag)~~                    |
-| `--gpu-id`, `-g`                          | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
-| `--batch-size`, `-b`                      | Batch size to use for prediction. Defaults to `1`. ~~int (option)~~                                                                                                                  |
-| `--n-process`, `-n`                       | Number of processes to use for prediction. Defaults to `1`. ~~int (option)~~                                                                                                         |
-| `--help`, `-h`                            | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
-| **CREATES**                               | A `DocBin` with the annotations from the `model` for all the files found in `data-path`.                                                                                             |
-
+| Name                      | Description                                                                                                                                                                                            |
+| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model`                   | Pipeline to apply to the data. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                                    |
+| `data_path`               | Location of data to be evaluated in spaCy's [binary format](/api/data-formats#training), jsonl, or plain text. ~~Path (positional)~~                                                                   |
+| `output-file`, `-o`       | Output `DocBin` path. ~~str (positional)~~                                                                                                                                                             |
+| `--code`, `-c`            | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--text-key`, `-tk`       | The key for `.jsonl` files to use to grab the texts from. Defaults to `text`. ~~Optional[str] \(option)~~                                                                                              |
+| `--force-overwrite`, `-F` | If the provided `output-file` already exists, then force `apply` to overwrite it. If this is `False` (default) then quits with a warning instead. ~~bool (flag)~~                                      |
+| `--gpu-id`, `-g`          | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                                         |
+| `--batch-size`, `-b`      | Batch size to use for prediction. Defaults to `1`. ~~int (option)~~                                                                                                                                    |
+| `--n-process`, `-n`       | Number of processes to use for prediction. Defaults to `1`. ~~int (option)~~                                                                                                                           |
+| `--help`, `-h`            | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                             |
+| **CREATES**               | A `DocBin` with the annotations from the `model` for all the files found in `data-path`.                                                                                                               |
 
 ## find-threshold {id="find-threshold",version="3.5",tag="command"}
 
@@ -1220,19 +1219,19 @@ be provided.
 > $ python -m spacy find-threshold my_nlp data.spacy spancat threshold spans_sc_f
 > ```
 
-| Name                    | Description                                                                                                                                                                          |
-| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model`                 | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                           |
-| `data_path`             | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~                                                                                                |
-| `pipe_name`             | Name of pipe to examine thresholds for. ~~str (positional)~~                                                                                                                         |
-| `threshold_key`         | Key of threshold attribute in component's configuration. ~~str (positional)~~                                                                                                        |
-| `scores_key`            | Name of score to metric to optimize. ~~str (positional)~~                                                                                                                            |
-| `--n_trials`, `-n`      | Number of trials to determine optimal thresholds. ~~int (option)~~                                                                                                                   |
-| `--code`, `-c`          | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--gpu-id`, `-g`        | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
-| `--gold-preproc`, `-G`  | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                              |
-| `--silent`, `-V`, `-VV` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
-| `--help`, `-h`          | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
+| Name                    | Description                                                                                                                                                                                            |
+| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model`                 | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                                             |
+| `data_path`             | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~                                                                                                                  |
+| `pipe_name`             | Name of pipe to examine thresholds for. ~~str (positional)~~                                                                                                                                           |
+| `threshold_key`         | Key of threshold attribute in component's configuration. ~~str (positional)~~                                                                                                                          |
+| `scores_key`            | Name of score to metric to optimize. ~~str (positional)~~                                                                                                                                              |
+| `--n_trials`, `-n`      | Number of trials to determine optimal thresholds. ~~int (option)~~                                                                                                                                     |
+| `--code`, `-c`          | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--gpu-id`, `-g`        | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                                         |
+| `--gold-preproc`, `-G`  | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                                                |
+| `--silent`, `-V`, `-VV` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                                         |
+| `--help`, `-h`          | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                             |
 
 ## assemble {id="assemble",tag="command"}
 
@@ -1256,7 +1255,7 @@ $ python -m spacy assemble [config_path] [output_dir] [--code] [--verbose] [over
 | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `config_path`     | Path to the [config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
 | `output_dir`      | Directory to store the final pipeline in. Will be created if it doesn't exist. ~~Optional[Path] \(option)~~                                                                                                   |
-| `--code`, `-c`    | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions). ~~Optional[Path] \(option)~~                                                |
+| `--code`, `-c`    | Comma-separated paths to Python files with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~        |
 | `--verbose`, `-V` | Show more detailed messages during processing. ~~bool (flag)~~                                                                                                                                                |
 | `--help`, `-h`    | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                    |
 | overrides         | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.data ./data`. ~~Any (option/flag)~~                            |