mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-04 20:03:13 +03:00
Add docs for configure command
This also change the `output_file` arg to match other commands.
This commit is contained in:
parent
2791f0b552
commit
f2bbab4623
|
@ -137,7 +137,7 @@ def _check_pipeline_names(nlp, nlp2):
|
|||
def configure_resume_cli(
|
||||
# fmt: off
|
||||
base_model: Path = Arg(..., help="Path or name of base model to use for config"),
|
||||
output_path: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
|
||||
output_file: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
|
||||
# fmt: on
|
||||
):
|
||||
"""Create a config for resuming training.
|
||||
|
@ -155,18 +155,18 @@ def configure_resume_cli(
|
|||
for comp in nlp.pipe_names:
|
||||
conf["components"][comp] = {"source": path_str}
|
||||
|
||||
if str(output_path) == "-":
|
||||
if str(output_file) == "-":
|
||||
print(conf.to_str())
|
||||
else:
|
||||
conf.to_disk(output_path)
|
||||
msg.good("Saved config", output_path)
|
||||
conf.to_disk(output_file)
|
||||
msg.good("Saved config", output_file)
|
||||
|
||||
return conf
|
||||
|
||||
|
||||
@configure_cli.command("transformer")
|
||||
def use_transformer(
|
||||
base_model: str, output_path: Path, transformer_name: str = "roberta-base"
|
||||
base_model: str, output_file: Path, transformer_name: str = "roberta-base"
|
||||
) -> Config:
|
||||
"""Replace pipeline tok2vec with transformer."""
|
||||
|
||||
|
@ -208,17 +208,17 @@ def use_transformer(
|
|||
}
|
||||
nlp.config["components"][listener]["model"]["tok2vec"] = listener_config
|
||||
|
||||
if str(output_path) == "-":
|
||||
if str(output_file) == "-":
|
||||
print(nlp.config.to_str())
|
||||
else:
|
||||
nlp.config.to_disk(output_path)
|
||||
msg.good("Saved config", output_path)
|
||||
nlp.config.to_disk(output_file)
|
||||
msg.good("Saved config", output_file)
|
||||
|
||||
return nlp.config
|
||||
|
||||
|
||||
@configure_cli.command("tok2vec")
|
||||
def use_tok2vec(base_model: str, output_path: Path) -> Config:
|
||||
def use_tok2vec(base_model: str, output_file: Path) -> Config:
|
||||
"""Replace pipeline tok2vec with CNN tok2vec."""
|
||||
nlp = spacy.load(base_model)
|
||||
_check_single_tok2vec(base_model, nlp.config)
|
||||
|
@ -240,11 +240,11 @@ def use_tok2vec(base_model: str, output_path: Path) -> Config:
|
|||
}
|
||||
nlp.config["components"][listener]["model"]["tok2vec"] = listener_config
|
||||
|
||||
if str(output_path) == "-":
|
||||
if str(output_file) == "-":
|
||||
print(nlp.config.to_str())
|
||||
else:
|
||||
nlp.config.to_disk(output_path)
|
||||
msg.good("Saved config", output_path)
|
||||
nlp.config.to_disk(output_file)
|
||||
msg.good("Saved config", output_file)
|
||||
|
||||
return nlp.config
|
||||
|
||||
|
@ -298,7 +298,7 @@ def _inner_merge(nlp, nlp2, replace_listeners=False) -> Language:
|
|||
|
||||
|
||||
@configure_cli.command("merge")
|
||||
def merge_pipelines(base_model: str, added_model: str, output_path: Path) -> Language:
|
||||
def merge_pipelines(base_model: str, added_model: str, output_file: Path) -> Language:
|
||||
"""Combine components from multiple pipelines."""
|
||||
nlp = spacy.load(base_model)
|
||||
nlp2 = spacy.load(added_model)
|
||||
|
@ -336,7 +336,7 @@ def merge_pipelines(base_model: str, added_model: str, output_path: Path) -> Lan
|
|||
nlp_out = _inner_merge(nlp, nlp2, replace_listeners=replace_listeners)
|
||||
|
||||
# write the final pipeline
|
||||
nlp.to_disk(output_path)
|
||||
msg.info(f"Saved pipeline to: {output_path}")
|
||||
nlp.to_disk(output_file)
|
||||
msg.info(f"Saved pipeline to: {output_file}")
|
||||
|
||||
return nlp
|
||||
|
|
|
@ -7,6 +7,7 @@ menu:
|
|||
- ['info', 'info']
|
||||
- ['validate', 'validate']
|
||||
- ['init', 'init']
|
||||
- ['configure', 'configure']
|
||||
- ['convert', 'convert']
|
||||
- ['debug', 'debug']
|
||||
- ['train', 'train']
|
||||
|
@ -249,6 +250,94 @@ $ python -m spacy init labels [config_path] [output_path] [--code] [--verbose] [
|
|||
| overrides | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. ~~Any (option/flag)~~ |
|
||||
| **CREATES** | The label files. |
|
||||
|
||||
## configure {#configure new="TODO"}
|
||||
|
||||
Modify or combine existing configs in high-level ways. Can be used to automate
|
||||
config changes made as part of the development cycle.
|
||||
|
||||
### configure resume {#configure-resume tag="command"}
|
||||
|
||||
Modify the input config for use in resuming training. When resuming training,
|
||||
all components are sourced from the previously trained pipeline.
|
||||
|
||||
```cli
|
||||
$ python -m spacy configure resume [base_model] [output_file]
|
||||
```
|
||||
|
||||
| Name | Description |
|
||||
| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `base_model` | A trained pipeline to resume training (package name or path). ~~str (positional)~~ |
|
||||
| `output_file` | Path to output `.cfg` file or `-` to write the config to stdout (so you can pipe it forward to a file or to the `train` command). Note that if you're writing to stdout, no additional logging info is printed. ~~Path (positional)~~ |
|
||||
|
||||
### configure transformer {#configure-transformer tag="command"}
|
||||
|
||||
Modify the base config to use a transformer component, optionally specifying the
|
||||
base transformer to use. Useful for converting a CNN tok2vec pipeline to use
|
||||
transformers.
|
||||
|
||||
During development of a model, you can use a CNN tok2vec for faster training
|
||||
time and reduced hardware requirements, and then use this command to convert
|
||||
your pipeline to use a transformer once you've verified a proof of concept. This
|
||||
can also help isolate whether any training issues are transformer-related or
|
||||
not.
|
||||
|
||||
```cli
|
||||
$ python -m spacy configure transformer [base_model] [output_file] [--transformer_name]
|
||||
```
|
||||
|
||||
| Name | Description |
|
||||
| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `base_model` | A trained pipeline to resume training (package name or path). ~~str (positional)~~ |
|
||||
| `output_file` | Path to output `.cfg` file or `-` to write the config to stdout (so you can pipe it forward to a file or to the `train` command). Note that if you're writing to stdout, no additional logging info is printed. ~~Path (positional)~~ |
|
||||
| `transformer_name` | The name of the base HuggingFace model to use. Defaults to `roberta-base`. ~~str (option)~~ |
|
||||
|
||||
### configure tok2vec {#configure-tok2vec tag="command"}
|
||||
|
||||
Modify the base model config to use a CNN tok2vec component. Useful for
|
||||
generating a config from a transformer-based model for faster training
|
||||
iteration.
|
||||
|
||||
```cli
|
||||
$ python -m spacy configure tok2vec [base_model] [output_file]
|
||||
```
|
||||
|
||||
| Name | Description |
|
||||
| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `base_model` | A trained pipeline to resume training (package name or path). ~~str (positional)~~ |
|
||||
| `output_file` | Path to output `.cfg` file or `-` to write the config to stdout (so you can pipe it forward to a file or to the `train` command). Note that if you're writing to stdout, no additional logging info is printed. ~~Path (positional)~~ |
|
||||
|
||||
### configure merge {#configure-merge tag="command"}
|
||||
|
||||
Take two pipelines and create a new one with components from both of them,
|
||||
handling the configuration of listeners. Note that unlike other commands, this
|
||||
produces a whole pipeline, not just a config.
|
||||
|
||||
Components in the final pipeline are in the same order as in the original
|
||||
pipelines, with the base pipeline first and the added pipeline after. Because
|
||||
pipeline names must be unique, if there is a name collision in components, the
|
||||
later components will be automatically renamed.
|
||||
|
||||
For components with listeners, the resulting pipeline structure depends on the
|
||||
number of listeners. If the second pipeline has only one listener, then
|
||||
[`replace_listeners`](https://spacy.io/api/language/#replace_listeners) will be
|
||||
used. If there is more than one listener, `replace_listeners` will not be used.
|
||||
In the multi-listener case, the resulting pipeline may require more adjustment
|
||||
for training to work.
|
||||
|
||||
This is useful if you have trained a specialized component, such as NER or
|
||||
textcat, and want to provide with one of the official pretrained pipelines or
|
||||
another pipeline.
|
||||
|
||||
```cli
|
||||
$ python -m spacy configure tok2vec [base_model] [added_model] [output_file]
|
||||
```
|
||||
|
||||
| Name | Description |
|
||||
| ------------- | ---------------------------------------------------------------------------------------- |
|
||||
| `base_model` | A trained pipeline (package name or path) to use as a base. ~~str (positional)~~ |
|
||||
| `added_model` | A trained pipeline (package name or path) to combine with the base. ~~str (positional)~~ |
|
||||
| `output_file` | Path to output pipeline. ~~Path (positional)~~ |
|
||||
|
||||
## convert {#convert tag="command"}
|
||||
|
||||
Convert files into spaCy's
|
||||
|
|
Loading…
Reference in New Issue
Block a user