mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 13:11:03 +03:00 
			
		
		
		
	Add docs for configure command
This also change the `output_file` arg to match other commands.
This commit is contained in:
		
							parent
							
								
									2791f0b552
								
							
						
					
					
						commit
						f2bbab4623
					
				|  | @ -137,7 +137,7 @@ def _check_pipeline_names(nlp, nlp2): | ||||||
| def configure_resume_cli( | def configure_resume_cli( | ||||||
|     # fmt: off |     # fmt: off | ||||||
|     base_model: Path = Arg(..., help="Path or name of base model to use for config"), |     base_model: Path = Arg(..., help="Path or name of base model to use for config"), | ||||||
|     output_path: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True), |     output_file: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True), | ||||||
|     # fmt: on |     # fmt: on | ||||||
| ): | ): | ||||||
|     """Create a config for resuming training. |     """Create a config for resuming training. | ||||||
|  | @ -155,18 +155,18 @@ def configure_resume_cli( | ||||||
|     for comp in nlp.pipe_names: |     for comp in nlp.pipe_names: | ||||||
|         conf["components"][comp] = {"source": path_str} |         conf["components"][comp] = {"source": path_str} | ||||||
| 
 | 
 | ||||||
|     if str(output_path) == "-": |     if str(output_file) == "-": | ||||||
|         print(conf.to_str()) |         print(conf.to_str()) | ||||||
|     else: |     else: | ||||||
|         conf.to_disk(output_path) |         conf.to_disk(output_file) | ||||||
|         msg.good("Saved config", output_path) |         msg.good("Saved config", output_file) | ||||||
| 
 | 
 | ||||||
|     return conf |     return conf | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @configure_cli.command("transformer") | @configure_cli.command("transformer") | ||||||
| def use_transformer( | def use_transformer( | ||||||
|     base_model: str, output_path: Path, transformer_name: str = "roberta-base" |     base_model: str, output_file: Path, transformer_name: str = "roberta-base" | ||||||
| ) -> Config: | ) -> Config: | ||||||
|     """Replace pipeline tok2vec with transformer.""" |     """Replace pipeline tok2vec with transformer.""" | ||||||
| 
 | 
 | ||||||
|  | @ -208,17 +208,17 @@ def use_transformer( | ||||||
|         } |         } | ||||||
|         nlp.config["components"][listener]["model"]["tok2vec"] = listener_config |         nlp.config["components"][listener]["model"]["tok2vec"] = listener_config | ||||||
| 
 | 
 | ||||||
|     if str(output_path) == "-": |     if str(output_file) == "-": | ||||||
|         print(nlp.config.to_str()) |         print(nlp.config.to_str()) | ||||||
|     else: |     else: | ||||||
|         nlp.config.to_disk(output_path) |         nlp.config.to_disk(output_file) | ||||||
|         msg.good("Saved config", output_path) |         msg.good("Saved config", output_file) | ||||||
| 
 | 
 | ||||||
|     return nlp.config |     return nlp.config | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @configure_cli.command("tok2vec") | @configure_cli.command("tok2vec") | ||||||
| def use_tok2vec(base_model: str, output_path: Path) -> Config: | def use_tok2vec(base_model: str, output_file: Path) -> Config: | ||||||
|     """Replace pipeline tok2vec with CNN tok2vec.""" |     """Replace pipeline tok2vec with CNN tok2vec.""" | ||||||
|     nlp = spacy.load(base_model) |     nlp = spacy.load(base_model) | ||||||
|     _check_single_tok2vec(base_model, nlp.config) |     _check_single_tok2vec(base_model, nlp.config) | ||||||
|  | @ -240,11 +240,11 @@ def use_tok2vec(base_model: str, output_path: Path) -> Config: | ||||||
|         } |         } | ||||||
|         nlp.config["components"][listener]["model"]["tok2vec"] = listener_config |         nlp.config["components"][listener]["model"]["tok2vec"] = listener_config | ||||||
| 
 | 
 | ||||||
|     if str(output_path) == "-": |     if str(output_file) == "-": | ||||||
|         print(nlp.config.to_str()) |         print(nlp.config.to_str()) | ||||||
|     else: |     else: | ||||||
|         nlp.config.to_disk(output_path) |         nlp.config.to_disk(output_file) | ||||||
|         msg.good("Saved config", output_path) |         msg.good("Saved config", output_file) | ||||||
| 
 | 
 | ||||||
|     return nlp.config |     return nlp.config | ||||||
| 
 | 
 | ||||||
|  | @ -298,7 +298,7 @@ def _inner_merge(nlp, nlp2, replace_listeners=False) -> Language: | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @configure_cli.command("merge") | @configure_cli.command("merge") | ||||||
| def merge_pipelines(base_model: str, added_model: str, output_path: Path) -> Language: | def merge_pipelines(base_model: str, added_model: str, output_file: Path) -> Language: | ||||||
|     """Combine components from multiple pipelines.""" |     """Combine components from multiple pipelines.""" | ||||||
|     nlp = spacy.load(base_model) |     nlp = spacy.load(base_model) | ||||||
|     nlp2 = spacy.load(added_model) |     nlp2 = spacy.load(added_model) | ||||||
|  | @ -336,7 +336,7 @@ def merge_pipelines(base_model: str, added_model: str, output_path: Path) -> Lan | ||||||
|     nlp_out = _inner_merge(nlp, nlp2, replace_listeners=replace_listeners) |     nlp_out = _inner_merge(nlp, nlp2, replace_listeners=replace_listeners) | ||||||
| 
 | 
 | ||||||
|     # write the final pipeline |     # write the final pipeline | ||||||
|     nlp.to_disk(output_path) |     nlp.to_disk(output_file) | ||||||
|     msg.info(f"Saved pipeline to: {output_path}") |     msg.info(f"Saved pipeline to: {output_file}") | ||||||
| 
 | 
 | ||||||
|     return nlp |     return nlp | ||||||
|  |  | ||||||
|  | @ -7,6 +7,7 @@ menu: | ||||||
|   - ['info', 'info'] |   - ['info', 'info'] | ||||||
|   - ['validate', 'validate'] |   - ['validate', 'validate'] | ||||||
|   - ['init', 'init'] |   - ['init', 'init'] | ||||||
|  |   - ['configure', 'configure'] | ||||||
|   - ['convert', 'convert'] |   - ['convert', 'convert'] | ||||||
|   - ['debug', 'debug'] |   - ['debug', 'debug'] | ||||||
|   - ['train', 'train'] |   - ['train', 'train'] | ||||||
|  | @ -249,6 +250,94 @@ $ python -m spacy init labels [config_path] [output_path] [--code] [--verbose] [ | ||||||
| | overrides         | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. ~~Any (option/flag)~~                         | | | overrides         | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. ~~Any (option/flag)~~                         | | ||||||
| | **CREATES**       | The label files.                                                                                                                                                                                                   | | | **CREATES**       | The label files.                                                                                                                                                                                                   | | ||||||
| 
 | 
 | ||||||
|  | ## configure {#configure new="TODO"} | ||||||
|  | 
 | ||||||
|  | Modify or combine existing configs in high-level ways. Can be used to automate | ||||||
|  | config changes made as part of the development cycle. | ||||||
|  | 
 | ||||||
|  | ### configure resume {#configure-resume tag="command"} | ||||||
|  | 
 | ||||||
|  | Modify the input config for use in resuming training. When resuming training, | ||||||
|  | all components are sourced from the previously trained pipeline. | ||||||
|  | 
 | ||||||
|  | ```cli | ||||||
|  | $ python -m spacy configure resume [base_model] [output_file] | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | | Name          | Description                                                                                                                                                                                                                           | | ||||||
|  | | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||||
|  | | `base_model`  | A trained pipeline to resume training (package name or path). ~~str (positional)~~                                                                                                                                                    | | ||||||
|  | | `output_file` | Path to output `.cfg` file or `-` to write the config to stdout (so you can pipe it forward to a file or to the `train` command). Note that if you're writing to stdout, no additional logging info is printed. ~~Path (positional)~~ | | ||||||
|  | 
 | ||||||
|  | ### configure transformer {#configure-transformer tag="command"} | ||||||
|  | 
 | ||||||
|  | Modify the base config to use a transformer component, optionally specifying the | ||||||
|  | base transformer to use. Useful for converting a CNN tok2vec pipeline to use | ||||||
|  | transformers. | ||||||
|  | 
 | ||||||
|  | During development of a model, you can use a CNN tok2vec for faster training | ||||||
|  | time and reduced hardware requirements, and then use this command to convert | ||||||
|  | your pipeline to use a transformer once you've verified a proof of concept. This | ||||||
|  | can also help isolate whether any training issues are transformer-related or | ||||||
|  | not. | ||||||
|  | 
 | ||||||
|  | ```cli | ||||||
|  | $ python -m spacy configure transformer [base_model] [output_file] [--transformer_name] | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | | Name               | Description                                                                                                                                                                                                                           | | ||||||
|  | | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||||
|  | | `base_model`       | A trained pipeline to resume training (package name or path). ~~str (positional)~~                                                                                                                                                    | | ||||||
|  | | `output_file`      | Path to output `.cfg` file or `-` to write the config to stdout (so you can pipe it forward to a file or to the `train` command). Note that if you're writing to stdout, no additional logging info is printed. ~~Path (positional)~~ | | ||||||
|  | | `transformer_name` | The name of the base HuggingFace model to use. Defaults to `roberta-base`. ~~str (option)~~                                                                                                                                           | | ||||||
|  | 
 | ||||||
|  | ### configure tok2vec {#configure-tok2vec tag="command"} | ||||||
|  | 
 | ||||||
|  | Modify the base model config to use a CNN tok2vec component. Useful for | ||||||
|  | generating a config from a transformer-based model for faster training | ||||||
|  | iteration. | ||||||
|  | 
 | ||||||
|  | ```cli | ||||||
|  | $ python -m spacy configure tok2vec [base_model] [output_file] | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | | Name          | Description                                                                                                                                                                                                                           | | ||||||
|  | | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||||
|  | | `base_model`  | A trained pipeline to resume training (package name or path). ~~str (positional)~~                                                                                                                                                    | | ||||||
|  | | `output_file` | Path to output `.cfg` file or `-` to write the config to stdout (so you can pipe it forward to a file or to the `train` command). Note that if you're writing to stdout, no additional logging info is printed. ~~Path (positional)~~ | | ||||||
|  | 
 | ||||||
|  | ### configure merge {#configure-merge tag="command"} | ||||||
|  | 
 | ||||||
|  | Take two pipelines and create a new one with components from both of them, | ||||||
|  | handling the configuration of listeners. Note that unlike other commands, this | ||||||
|  | produces a whole pipeline, not just a config. | ||||||
|  | 
 | ||||||
|  | Components in the final pipeline are in the same order as in the original | ||||||
|  | pipelines, with the base pipeline first and the added pipeline after. Because | ||||||
|  | pipeline names must be unique, if there is a name collision in components, the | ||||||
|  | later components will be automatically renamed. | ||||||
|  | 
 | ||||||
|  | For components with listeners, the resulting pipeline structure depends on the | ||||||
|  | number of listeners. If the second pipeline has only one listener, then | ||||||
|  | [`replace_listeners`](https://spacy.io/api/language/#replace_listeners) will be | ||||||
|  | used. If there is more than one listener, `replace_listeners` will not be used. | ||||||
|  | In the multi-listener case, the resulting pipeline may require more adjustment | ||||||
|  | for training to work. | ||||||
|  | 
 | ||||||
|  | This is useful if you have trained a specialized component, such as NER or | ||||||
|  | textcat, and want to provide with one of the official pretrained pipelines or | ||||||
|  | another pipeline. | ||||||
|  | 
 | ||||||
|  | ```cli | ||||||
|  | $ python -m spacy configure tok2vec [base_model] [added_model] [output_file] | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | | Name          | Description                                                                              | | ||||||
|  | | ------------- | ---------------------------------------------------------------------------------------- | | ||||||
|  | | `base_model`  | A trained pipeline (package name or path) to use as a base. ~~str (positional)~~         | | ||||||
|  | | `added_model` | A trained pipeline (package name or path) to combine with the base. ~~str (positional)~~ | | ||||||
|  | | `output_file` | Path to output pipeline. ~~Path (positional)~~                                           | | ||||||
|  | 
 | ||||||
| ## convert {#convert tag="command"} | ## convert {#convert tag="command"} | ||||||
| 
 | 
 | ||||||
| Convert files into spaCy's | Convert files into spaCy's | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user