Update CLI docs and evaluate command [ci skip]

2025-12-16 06:34:20 +03:00 · 2020-08-07 14:40:58 +02:00 · 2020-08-07 14:40:58 +02:00 · 1d01d89b79
commit 1d01d89b79
parent ef2c67cca5
2 changed files with 29 additions and 22 deletions
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@ -17,23 +17,28 @@ from .. import displacy
 def evaluate_cli(
    # fmt: off
    model: str = Arg(..., help="Model name or path"),
-    data_path: Path = Arg(..., help="Location of JSON-formatted evaluation data", exists=True),
+    data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
    output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False),
-    gpu_id: int = Opt(-1, "--gpu-id", "-g", help="Use GPU"),
+    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
    gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"),
    displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False),
    displacy_limit: int = Opt(25, "--displacy-limit", "-dl", help="Limit of parses to render as HTML"),
    # fmt: on
 ):
    """
-    Evaluate a model. To render a sample of parses in a HTML file, set an
+    Evaluate a model. Expects a loadable spaCy model and evaluation data in the
-    output directory as the displacy_path argument.
+    binary .spacy format. The --gold-preproc option sets up the evaluation
    examples with gold-standard sentences and tokens for the predictions. Gold
    preprocessing helps the annotations align to the tokenization, and may
    result in sequences of more consistent length. However, it may reduce
    runtime accuracy due to train/test skew. To render a sample of dependency
    parses in a HTML file, set as output directory as the displacy_path argument.
    """
    evaluate(
        model,
        data_path,
        output=output,
-        gpu_id=gpu_id,
+        use_gpu=use_gpu,
        gold_preproc=gold_preproc,
        displacy_path=displacy_path,
        displacy_limit=displacy_limit,
@ -45,7 +50,7 @@ def evaluate(
    model: str,
    data_path: Path,
    output: Optional[Path] = None,
-    gpu_id: int = -1,
+    use_gpu: int = -1,
    gold_preproc: bool = False,
    displacy_path: Optional[Path] = None,
    displacy_limit: int = 25,
@ -53,8 +58,8 @@ def evaluate(
 ) -> Scorer:
    msg = Printer(no_print=silent, pretty=not silent)
    fix_random_seed()
-    if gpu_id >= 0:
+    if use_gpu >= 0:
-        require_gpu(gpu_id)
+        require_gpu(use_gpu)
    util.set_env_log(False)
    data_path = util.ensure_path(data_path)
    output_path = util.ensure_path(output)
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@ -132,7 +132,7 @@ $ python -m spacy init config [output] [--base] [--lang] [--model] [--pipeline]
 | `--base`, `-b`     | option     | Optional base config file to auto-fill with defaults.                                                                                                                 |
 | `--lang`, `-l`     | option     | Optional language code to use for blank config. If a `--pipeline` is specified, the components will be added in order.                                                |
 | `--model`, `-m`    | option     | Optional base model to copy config from. If a `--pipeline` is specified, only those components will be kept, and all other components not in the model will be added. |
-| `--pipeline`, `-p` | option     | Optional comma-separated pipeline of components to add to blank language or model.                                                                                     |
+| `--pipeline`, `-p` | option     | Optional comma-separated pipeline of components to add to blank language or model.                                                                                    |
 | **CREATES**        | config     | Complete and auto-filled config file for training.                                                                                                                    |
 ### init model {#init-model new="2"}
@ -446,7 +446,8 @@ Debug a Thinc [`Model`](https://thinc.ai/docs/api-model) by running it on a
 sample text and checking how it updates its internal weights and parameters.
 ```bash
-$ python -m spacy debug model [config_path] [component] [--layers] [-DIM] [-PAR] [-GRAD] [-ATTR] [-P0] [-P1] [-P2] [P3] [--gpu_id]
+$ python -m spacy debug model [config_path] [component] [--layers] [-DIM]
 [-PAR] [-GRAD] [-ATTR] [-P0] [-P1] [-P2] [P3] [--gpu-id]
 ```
 <Accordion title="Example outputs" spaced>
@ -641,18 +642,19 @@ $ python -m spacy pretrain [texts_loc] [output_dir] [config_path]
 ## Evaluate {#evaluate new="2"}
-<!-- TODO: document new evaluate command -->
+Evaluate a model. Expects a loadable spaCy model and evaluation data in the
-
+[binary `.spacy` format](/api/data-formats#binary-training). The
-Evaluate a model's accuracy and speed on JSON-formatted annotated data. Will
+`--gold-preproc` option sets up the evaluation examples with gold-standard
-print the results and optionally export
+sentences and tokens for the predictions. Gold preprocessing helps the
-[displaCy visualizations](/usage/visualizers) of a sample set of parses to
+annotations align to the tokenization, and may result in sequences of more
-`.html` files. Visualizations for the dependency parse and NER will be exported
+consistent length. However, it may reduce runtime accuracy due to train/test
-as separate files if the respective component is present in the model's
+skew. To render a sample of dependency parses in a HTML file using the
-pipeline.
+[displaCy visualizations](/usage/visualizers), set as output directory as the
 `--displacy-path` argument.
 ```bash
-$ python -m spacy evaluate [model] [data_path] [--output] [--displacy-path]
+$ python -m spacy evaluate [model] [data_path] [--output] [--gold-preproc]
-[--displacy-limit] [--gpu-id] [--gold-preproc]
+[--gpu-id] [--displacy-path] [--displacy-limit]
 ```
 | Argument                  | Type                 | Description                                                                                                                                              |
@ -660,10 +662,10 @@ $ python -m spacy evaluate [model] [data_path] [--output] [--displacy-path]
 | `model`                   | positional           | Model to evaluate. Can be a package or a path to a model data directory.                                                                                 |
 | `data_path`               | positional           | Location of evaluation data in spaCy's [binary format](/api/data-formats#training).                                                                      |
 | `--output`, `-o`          | option               | Output JSON file for metrics. If not set, no metrics will be exported.                                                                                   |
 | `--gold-preproc`, `-G`    | flag                 | Use gold preprocessing.                                                                                                                                  |
 | `--gpu-id`, `-g`          | option               | GPU to use, if any. Defaults to `-1` for CPU.                                                                                                            |
 | `--displacy-path`, `-dp`  | option               | Directory to output rendered parses as HTML. If not set, no visualizations will be generated.                                                            |
 | `--displacy-limit`, `-dl` | option               | Number of parses to generate per file. Defaults to `25`. Keep in mind that a significantly higher number might cause the `.html` files to render slowly. |
 | `--gpu-id`, `-g`          | option               | GPU to use, if any. Defaults to `-1` for CPU.                                                                                                            |
 | `--gold-preproc`, `-G`    | flag                 | Use gold preprocessing.                                                                                                                                  |
 | **CREATES**               | `stdout`, JSON, HTML | Training results and optional metrics and visualizations.                                                                                                |
 ## Package {#package}