mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-23 23:20:52 +03:00
Merge pull request #6084 from svlandeg/feature/init-config-pretrain [ci skip]
This commit is contained in:
commit
2c80f41852
|
@ -30,6 +30,7 @@ def init_config_cli(
|
||||||
pipeline: Optional[str] = Opt("tagger,parser,ner", "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')"),
|
pipeline: Optional[str] = Opt("tagger,parser,ner", "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')"),
|
||||||
optimize: Optimizations = Opt(Optimizations.efficiency.value, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."),
|
optimize: Optimizations = Opt(Optimizations.efficiency.value, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."),
|
||||||
cpu: bool = Opt(False, "--cpu", "-C", help="Whether the model needs to run on CPU. This will impact the choice of architecture, pretrained weights and related hyperparameters."),
|
cpu: bool = Opt(False, "--cpu", "-C", help="Whether the model needs to run on CPU. This will impact the choice of architecture, pretrained weights and related hyperparameters."),
|
||||||
|
pretraining: bool = Opt(False, "--pretraining", "-pt", help="Include config for pretraining (with 'spacy pretrain')"),
|
||||||
# fmt: on
|
# fmt: on
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -43,7 +44,14 @@ def init_config_cli(
|
||||||
if isinstance(optimize, Optimizations): # instance of enum from the CLI
|
if isinstance(optimize, Optimizations): # instance of enum from the CLI
|
||||||
optimize = optimize.value
|
optimize = optimize.value
|
||||||
pipeline = string_to_list(pipeline)
|
pipeline = string_to_list(pipeline)
|
||||||
init_config(output_file, lang=lang, pipeline=pipeline, optimize=optimize, cpu=cpu)
|
init_config(
|
||||||
|
output_file,
|
||||||
|
lang=lang,
|
||||||
|
pipeline=pipeline,
|
||||||
|
optimize=optimize,
|
||||||
|
cpu=cpu,
|
||||||
|
pretraining=pretraining,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@init_cli.command("fill-config")
|
@init_cli.command("fill-config")
|
||||||
|
@ -51,7 +59,7 @@ def init_fill_config_cli(
|
||||||
# fmt: off
|
# fmt: off
|
||||||
base_path: Path = Arg(..., help="Base config to fill", exists=True, dir_okay=False),
|
base_path: Path = Arg(..., help="Base config to fill", exists=True, dir_okay=False),
|
||||||
output_file: Path = Arg("-", help="File to save config.cfg to (or - for stdout)", allow_dash=True),
|
output_file: Path = Arg("-", help="File to save config.cfg to (or - for stdout)", allow_dash=True),
|
||||||
pretraining: bool = Opt(False, "--pretraining", "-p", help="Include config for pretraining (with 'spacy pretrain')"),
|
pretraining: bool = Opt(False, "--pretraining", "-pt", help="Include config for pretraining (with 'spacy pretrain')"),
|
||||||
diff: bool = Opt(False, "--diff", "-D", help="Print a visual diff highlighting the changes")
|
diff: bool = Opt(False, "--diff", "-D", help="Print a visual diff highlighting the changes")
|
||||||
# fmt: on
|
# fmt: on
|
||||||
):
|
):
|
||||||
|
@ -109,7 +117,13 @@ def fill_config(
|
||||||
|
|
||||||
|
|
||||||
def init_config(
|
def init_config(
|
||||||
output_file: Path, *, lang: str, pipeline: List[str], optimize: str, cpu: bool
|
output_file: Path,
|
||||||
|
*,
|
||||||
|
lang: str,
|
||||||
|
pipeline: List[str],
|
||||||
|
optimize: str,
|
||||||
|
cpu: bool,
|
||||||
|
pretraining: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
is_stdout = str(output_file) == "-"
|
is_stdout = str(output_file) == "-"
|
||||||
msg = Printer(no_print=is_stdout)
|
msg = Printer(no_print=is_stdout)
|
||||||
|
@ -156,8 +170,13 @@ def init_config(
|
||||||
with show_validation_error(hint_fill=False):
|
with show_validation_error(hint_fill=False):
|
||||||
config = util.load_config_from_str(base_template)
|
config = util.load_config_from_str(base_template)
|
||||||
nlp, _ = util.load_model_from_config(config, auto_fill=True)
|
nlp, _ = util.load_model_from_config(config, auto_fill=True)
|
||||||
|
config = nlp.config
|
||||||
|
if pretraining:
|
||||||
|
validate_config_for_pretrain(config, msg)
|
||||||
|
pretrain_config = util.load_config(DEFAULT_CONFIG_PRETRAIN_PATH)
|
||||||
|
config = pretrain_config.merge(config)
|
||||||
msg.good("Auto-filled config with all values")
|
msg.good("Auto-filled config with all values")
|
||||||
save_config(nlp.config, output_file, is_stdout=is_stdout)
|
save_config(config, output_file, is_stdout=is_stdout)
|
||||||
|
|
||||||
|
|
||||||
def save_config(
|
def save_config(
|
||||||
|
|
|
@ -121,18 +121,19 @@ customize those settings in your config file later.
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
```cli
|
```cli
|
||||||
$ python -m spacy init config [output_file] [--lang] [--pipeline] [--optimize] [--cpu]
|
$ python -m spacy init config [output_file] [--lang] [--pipeline] [--optimize] [--cpu] [--pretraining]
|
||||||
```
|
```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `output_file` | Path to output `.cfg` file or `-` to write the config to stdout (so you can pipe it forward to a file). Note that if you're writing to stdout, no additional logging info is printed. ~~Path (positional)~~ |
|
| `output_file` | Path to output `.cfg` file or `-` to write the config to stdout (so you can pipe it forward to a file). Note that if you're writing to stdout, no additional logging info is printed. ~~Path (positional)~~ |
|
||||||
| `--lang`, `-l` | Optional code of the [language](/usage/models#languages) to use. Defaults to `"en"`. ~~str (option)~~ |
|
| `--lang`, `-l` | Optional code of the [language](/usage/models#languages) to use. Defaults to `"en"`. ~~str (option)~~ |
|
||||||
| `--pipeline`, `-p` | Comma-separated list of trainable [pipeline components](/usage/processing-pipelines#built-in) to include. Defaults to `"tagger,parser,ner"`. ~~str (option)~~ |
|
| `--pipeline`, `-p` | Comma-separated list of trainable [pipeline components](/usage/processing-pipelines#built-in) to include. Defaults to `"tagger,parser,ner"`. ~~str (option)~~ |
|
||||||
| `--optimize`, `-o` | `"efficiency"` or `"accuracy"`. Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters. Defaults to `"efficiency"`. ~~str (option)~~ |
|
| `--optimize`, `-o` | `"efficiency"` or `"accuracy"`. Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters. Defaults to `"efficiency"`. ~~str (option)~~ |
|
||||||
| `--cpu`, `-C` | Whether the model needs to run on CPU. This will impact the choice of architecture, pretrained weights and related hyperparameters. ~~bool (flag)~~ |
|
| `--cpu`, `-C` | Whether the model needs to run on CPU. This will impact the choice of architecture, pretrained weights and related hyperparameters. ~~bool (flag)~~ |
|
||||||
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
| `--pretraining`, `-pt` | Include config for pretraining (with [`spacy pretrain`](/api/cli#pretrain)). Defaults to `False`. ~~bool (flag)~~ |
|
||||||
| **CREATES** | The config file for training. |
|
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
||||||
|
| **CREATES** | The config file for training. |
|
||||||
|
|
||||||
### init fill-config {#init-fill-config new="3"}
|
### init fill-config {#init-fill-config new="3"}
|
||||||
|
|
||||||
|
@ -160,13 +161,14 @@ validation error with more details.
|
||||||
$ python -m spacy init fill-config [base_path] [output_file] [--diff]
|
$ python -m spacy init fill-config [base_path] [output_file] [--diff]
|
||||||
```
|
```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| -------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
|
| --------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `base_path` | Path to base config to fill, e.g. generated by the [quickstart widget](/usage/training#quickstart). ~~Path (positional)~~ |
|
| `base_path` | Path to base config to fill, e.g. generated by the [quickstart widget](/usage/training#quickstart). ~~Path (positional)~~ |
|
||||||
| `output_file` | Path to output `.cfg` file. If not set, the config is written to stdout so you can pipe it forward to a file. ~~Path (positional)~~ |
|
| `output_file` | Path to output `.cfg` file. If not set, the config is written to stdout so you can pipe it forward to a file. ~~Path (positional)~~ |
|
||||||
| `--diff`, `-D` | Print a visual diff highlighting the changes. ~~bool (flag)~~ |
|
| `--pretraining`, `-pt` | Include config for pretraining (with [`spacy pretrain`](/api/cli#pretrain)). Defaults to `False`. ~~bool (flag)~~ |
|
||||||
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
| `--diff`, `-D` | Print a visual diff highlighting the changes. ~~bool (flag)~~ |
|
||||||
| **CREATES** | Complete and auto-filled config file for training. |
|
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
||||||
|
| **CREATES** | Complete and auto-filled config file for training. |
|
||||||
|
|
||||||
### init vocab {#init-vocab new="3" tag="command"}
|
### init vocab {#init-vocab new="3" tag="command"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user