Update docs & docstring.

2025-08-07 13:44:55 +03:00 · 2022-11-17 12:37:22 +01:00 · 2022-11-17 12:37:22 +01:00 · 809588de30
commit 809588de30
parent 7b4da3f36d
2 changed files with 44 additions and 12 deletions
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@ -39,15 +39,16 @@ def find_threshold_cli(
    # fmt: on
 ):
    """
-    Runs prediction trials for models with varying tresholds to maximize the
-    specified metric. The search space for the threshold is traversed
-    linearly from 0 to 1 in n_trials steps.
+    Runs prediction trials for a trained model with varying tresholds to maximize
+    the specified metric. The search space for the threshold is traversed linearly
+    from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`
+    (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`
+    returns all results).

-    This is applicable only for components whose predictions are influenced
-    by thresholds (e.g. textcat_multilabel and spancat, but not textcat).
-
-    Note that the full path to the corresponding threshold attribute in the
-    config has to be provided.
+    This is applicable only for components whose predictions are influenced by
+    thresholds - e.g. `textcat_multilabel` and `spancat`, but not `textcat`. Note
+    that the full path to the corresponding threshold attribute in the config has to
+    be provided.

    DOCS: https://spacy.io/api/cli#find-threshold
    """
@ -81,8 +82,8 @@ def find_threshold(
 ) -> Tuple[float, float, Dict[float, float]]:
    """
    Runs prediction trials for models with varying tresholds to maximize the specified metric.
-    model (Union[str, Path]): Path to file with trained model.
-    data_path (Union[str, Path]): Path to file with DocBin with docs to use for threshold search.
+    model (Union[str, Path]): Pipeline to evaluate. Can be a package or a path to a data directory.
+    data_path (Path): Path to file with DocBin with docs to use for threshold search.
    pipe_name (str): Name of pipe to examine thresholds for.
    threshold_key (str): Key of threshold attribute in component's configuration.
    scores_key (str): Name of score to metric to optimize.
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@ -12,6 +12,7 @@ menu:
  - ['train', 'train']
  - ['pretrain', 'pretrain']
  - ['evaluate', 'evaluate']
+  - ['find-threshold', 'find-threshold']
  - ['assemble', 'assemble']
  - ['package', 'package']
  - ['project', 'project']
@ -474,8 +475,7 @@ report span characteristics such as the average span length and the span (or
 span boundary) distinctiveness. The distinctiveness measure shows how different
 the tokens are with respect to the rest of the corpus using the KL-divergence of
 the token distributions. To learn more, you can check out Papay et al.'s work on
-[*Dissecting Span Identification Tasks with Performance Prediction* (EMNLP
-2020)](https://aclanthology.org/2020.emnlp-main.396/).
+[_Dissecting Span Identification Tasks with Performance Prediction_ (EMNLP 2020)](https://aclanthology.org/2020.emnlp-main.396/).

 </Infobox>

@ -1163,6 +1163,37 @@ $ python -m spacy evaluate [model] [data_path] [--output] [--code] [--gold-prepr
 | `--help`, `-h`                            | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
 | **CREATES**                               | Training results and optional metrics and visualizations.                                                                                                                            |

+## find-threshold {#find-threshold new="3.5" tag="command"}
+
+Runs prediction trials for a trained model with varying tresholds to maximize
+the specified metric. The search space for the threshold is traversed linearly
+from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`
+(the corresponding API call to `spacy.cli.find_threshold.find_threshold()`
+returns all results).
+
+This is applicable only for components whose predictions are influenced by
+thresholds - e.g. `textcat_multilabel` and `spancat`, but not `textcat`. Note
+that the full path to the corresponding threshold attribute in the config has to
+be provided.
+
+```cli
+$ python -m spacy find-threshold [model] [data_path] [pipe_name] [threshold_key] [scores_key] [--n_trials] [--code] [--use-gpu] [--gold-preproc] [--verbose]
+```
+
+| Name                    | Description                                                                                                                                                                          |
+| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model`                 | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                           |
+| `data_path`             | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~                                                                                                |
+| `pipe_name`             | Name of pipe to examine thresholds for. ~~str (positional)~~                                                                                                                         |
+| `threshold_key`         | Key of threshold attribute in component's configuration. ~~str (positional)~~                                                                                                        |
+| `scores_key`            | Name of score to metric to optimize. ~~str (positional)~~                                                                                                                            |
+| `--n_trials`, `-n`      | Number of trials to determine optimal thresholds. ~~int (option)~~                                                                                                                   |
+| `--code`, `-c`          | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--gpu-id`, `-g`        | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
+| `--gold-preproc`, `-G`  | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                              |
+| `--silent`, `-V`, `-VV` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
+| `--help`, `-h`          | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
+
 ## assemble {#assemble tag="command"}

 Assemble a pipeline from a config file without additional training. Expects a