Add docs for configure command

This also change the `output_file` arg to match other commands.
2025-08-23 21:44:54 +03:00 · 2023-01-11 16:06:50 +09:00 · 2023-01-11 16:06:50 +09:00 · f2bbab4623
commit f2bbab4623
parent 2791f0b552
2 changed files with 104 additions and 15 deletions
--- a/spacy/cli/configure.py
+++ b/spacy/cli/configure.py
@ -137,7 +137,7 @@ def _check_pipeline_names(nlp, nlp2):
 def configure_resume_cli(
    # fmt: off
    base_model: Path = Arg(..., help="Path or name of base model to use for config"),
-    output_path: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
+    output_file: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
    # fmt: on
 ):
    """Create a config for resuming training.
@ -155,18 +155,18 @@ def configure_resume_cli(
    for comp in nlp.pipe_names:
        conf["components"][comp] = {"source": path_str}
-    if str(output_path) == "-":
+    if str(output_file) == "-":
        print(conf.to_str())
    else:
-        conf.to_disk(output_path)
+        conf.to_disk(output_file)
-        msg.good("Saved config", output_path)
+        msg.good("Saved config", output_file)
    return conf
@configure_cli.command("transformer")
 def use_transformer(
-    base_model: str, output_path: Path, transformer_name: str = "roberta-base"
+    base_model: str, output_file: Path, transformer_name: str = "roberta-base"
 ) -> Config:
    """Replace pipeline tok2vec with transformer."""
@ -208,17 +208,17 @@ def use_transformer(
        }
        nlp.config["components"][listener]["model"]["tok2vec"] = listener_config
-    if str(output_path) == "-":
+    if str(output_file) == "-":
        print(nlp.config.to_str())
    else:
-        nlp.config.to_disk(output_path)
+        nlp.config.to_disk(output_file)
-        msg.good("Saved config", output_path)
+        msg.good("Saved config", output_file)
    return nlp.config
@configure_cli.command("tok2vec")
-def use_tok2vec(base_model: str, output_path: Path) -> Config:
+def use_tok2vec(base_model: str, output_file: Path) -> Config:
    """Replace pipeline tok2vec with CNN tok2vec."""
    nlp = spacy.load(base_model)
    _check_single_tok2vec(base_model, nlp.config)
@ -240,11 +240,11 @@ def use_tok2vec(base_model: str, output_path: Path) -> Config:
        }
        nlp.config["components"][listener]["model"]["tok2vec"] = listener_config
-    if str(output_path) == "-":
+    if str(output_file) == "-":
        print(nlp.config.to_str())
    else:
-        nlp.config.to_disk(output_path)
+        nlp.config.to_disk(output_file)
-        msg.good("Saved config", output_path)
+        msg.good("Saved config", output_file)
    return nlp.config
@ -298,7 +298,7 @@ def _inner_merge(nlp, nlp2, replace_listeners=False) -> Language:
@configure_cli.command("merge")
-def merge_pipelines(base_model: str, added_model: str, output_path: Path) -> Language:
+def merge_pipelines(base_model: str, added_model: str, output_file: Path) -> Language:
    """Combine components from multiple pipelines."""
    nlp = spacy.load(base_model)
    nlp2 = spacy.load(added_model)
@ -336,7 +336,7 @@ def merge_pipelines(base_model: str, added_model: str, output_path: Path) -> Lan
    nlp_out = _inner_merge(nlp, nlp2, replace_listeners=replace_listeners)
    # write the final pipeline
-    nlp.to_disk(output_path)
+    nlp.to_disk(output_file)
-    msg.info(f"Saved pipeline to: {output_path}")
+    msg.info(f"Saved pipeline to: {output_file}")
    return nlp
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@ -7,6 +7,7 @@ menu:
  - ['info', 'info']
  - ['validate', 'validate']
  - ['init', 'init']
  - ['configure', 'configure']
  - ['convert', 'convert']
  - ['debug', 'debug']
  - ['train', 'train']
@ -249,6 +250,94 @@ $ python -m spacy init labels [config_path] [output_path] [--code] [--verbose] [
 | overrides         | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. ~~Any (option/flag)~~                         |
 | **CREATES**       | The label files.                                                                                                                                                                                                   |
 ## configure {#configure new="TODO"}
 Modify or combine existing configs in high-level ways. Can be used to automate
 config changes made as part of the development cycle.
 ### configure resume {#configure-resume tag="command"}
 Modify the input config for use in resuming training. When resuming training,
 all components are sourced from the previously trained pipeline.
 ```cli
 $ python -m spacy configure resume [base_model] [output_file]
 ```
 | Name          | Description                                                                                                                                                                                                                           |
 | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `base_model`  | A trained pipeline to resume training (package name or path). ~~str (positional)~~                                                                                                                                                    |
 | `output_file` | Path to output `.cfg` file or `-` to write the config to stdout (so you can pipe it forward to a file or to the `train` command). Note that if you're writing to stdout, no additional logging info is printed. ~~Path (positional)~~ |
 ### configure transformer {#configure-transformer tag="command"}
 Modify the base config to use a transformer component, optionally specifying the
 base transformer to use. Useful for converting a CNN tok2vec pipeline to use
 transformers.
 During development of a model, you can use a CNN tok2vec for faster training
 time and reduced hardware requirements, and then use this command to convert
 your pipeline to use a transformer once you've verified a proof of concept. This
 can also help isolate whether any training issues are transformer-related or
 not.
 ```cli
 $ python -m spacy configure transformer [base_model] [output_file] [--transformer_name]
 ```
 | Name               | Description                                                                                                                                                                                                                           |
 | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `base_model`       | A trained pipeline to resume training (package name or path). ~~str (positional)~~                                                                                                                                                    |
 | `output_file`      | Path to output `.cfg` file or `-` to write the config to stdout (so you can pipe it forward to a file or to the `train` command). Note that if you're writing to stdout, no additional logging info is printed. ~~Path (positional)~~ |
 | `transformer_name` | The name of the base HuggingFace model to use. Defaults to `roberta-base`. ~~str (option)~~                                                                                                                                           |
 ### configure tok2vec {#configure-tok2vec tag="command"}
 Modify the base model config to use a CNN tok2vec component. Useful for
 generating a config from a transformer-based model for faster training
 iteration.
 ```cli
 $ python -m spacy configure tok2vec [base_model] [output_file]
 ```
 | Name          | Description                                                                                                                                                                                                                           |
 | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `base_model`  | A trained pipeline to resume training (package name or path). ~~str (positional)~~                                                                                                                                                    |
 | `output_file` | Path to output `.cfg` file or `-` to write the config to stdout (so you can pipe it forward to a file or to the `train` command). Note that if you're writing to stdout, no additional logging info is printed. ~~Path (positional)~~ |
 ### configure merge {#configure-merge tag="command"}
 Take two pipelines and create a new one with components from both of them,
 handling the configuration of listeners. Note that unlike other commands, this
 produces a whole pipeline, not just a config.
 Components in the final pipeline are in the same order as in the original
 pipelines, with the base pipeline first and the added pipeline after. Because
 pipeline names must be unique, if there is a name collision in components, the
 later components will be automatically renamed.
 For components with listeners, the resulting pipeline structure depends on the
 number of listeners. If the second pipeline has only one listener, then
 [`replace_listeners`](https://spacy.io/api/language/#replace_listeners) will be
 used. If there is more than one listener, `replace_listeners` will not be used.
 In the multi-listener case, the resulting pipeline may require more adjustment
 for training to work.
 This is useful if you have trained a specialized component, such as NER or
 textcat, and want to provide with one of the official pretrained pipelines or
 another pipeline.
 ```cli
 $ python -m spacy configure tok2vec [base_model] [added_model] [output_file]
 ```
 | Name          | Description                                                                              |
 | ------------- | ---------------------------------------------------------------------------------------- |
 | `base_model`  | A trained pipeline (package name or path) to use as a base. ~~str (positional)~~         |
 | `added_model` | A trained pipeline (package name or path) to combine with the base. ~~str (positional)~~ |
 | `output_file` | Path to output pipeline. ~~Path (positional)~~                                           |
 ## convert {#convert tag="command"}
 Convert files into spaCy's