Fix CLI consistency [ci skip]

This commit is contained in:
Ines Montani 2020-08-16 15:46:29 +02:00
parent 45f13cbf64
commit 6ae83bde0c
4 changed files with 99 additions and 103 deletions

View File

@ -49,10 +49,7 @@ def debug_config_cli(
import_code(code_path) import_code(code_path)
with show_validation_error(config_path): with show_validation_error(config_path):
config = util.load_config(config_path, overrides=overrides) config = util.load_config(config_path, overrides=overrides)
try:
nlp, _ = util.load_model_from_config(config, auto_fill=auto_fill) nlp, _ = util.load_model_from_config(config, auto_fill=auto_fill)
except ValueError as e:
msg.fail(str(e), exits=1)
if auto_fill: if auto_fill:
orig_config = config.to_str() orig_config = config.to_str()
filled_config = nlp.config.to_str() filled_config = nlp.config.to_str()

View File

@ -50,10 +50,7 @@ def debug_model_cli(
config_overrides = parse_config_overrides(ctx.args) config_overrides = parse_config_overrides(ctx.args)
with show_validation_error(config_path): with show_validation_error(config_path):
config = util.load_config(config_path, overrides=config_overrides) config = util.load_config(config_path, overrides=config_overrides)
try:
nlp, config = util.load_model_from_config(config_path) nlp, config = util.load_model_from_config(config_path)
except ValueError as e:
msg.fail(str(e), exits=1)
seed = config["pretraining"]["seed"] seed = config["pretraining"]["seed"]
if seed is not None: if seed is not None:
msg.info(f"Fixing random seed: {seed}") msg.info(f"Fixing random seed: {seed}")

View File

@ -82,12 +82,8 @@ def fill_config(
is_stdout = str(output_file) == "-" is_stdout = str(output_file) == "-"
msg = Printer(no_print=is_stdout) msg = Printer(no_print=is_stdout)
with show_validation_error(hint_fill=False): with show_validation_error(hint_fill=False):
with msg.loading("Auto-filling config..."):
config = util.load_config(base_path) config = util.load_config(base_path)
try:
nlp, _ = util.load_model_from_config(config, auto_fill=True) nlp, _ = util.load_model_from_config(config, auto_fill=True)
except ValueError as e:
msg.fail(str(e), exits=1)
before = config.to_str() before = config.to_str()
after = nlp.config.to_str() after = nlp.config.to_str()
if before == after: if before == after:
@ -152,10 +148,7 @@ def init_config(
require_spacy_transformers(msg) require_spacy_transformers(msg)
with show_validation_error(hint_fill=False): with show_validation_error(hint_fill=False):
config = util.load_config_from_str(base_template) config = util.load_config_from_str(base_template)
try:
nlp, _ = util.load_model_from_config(config, auto_fill=True) nlp, _ = util.load_model_from_config(config, auto_fill=True)
except ValueError as e:
msg.fail(str(e), exits=1)
if use_transformer: if use_transformer:
nlp.config.pop("pretraining", {}) # TODO: solve this better nlp.config.pop("pretraining", {}) # TODO: solve this better
msg.good("Auto-filled config with all values") msg.good("Auto-filled config with all values")

View File

@ -16,9 +16,11 @@ menu:
- ['Project', 'project'] - ['Project', 'project']
--- ---
For a list of available commands, type `spacy --help`. spaCy's CLI provides a range of helpful commands for downloading and training
models, converting data and debugging your config, data and installation. For a
<!-- TODO: add notes on autocompletion etc. --> list of available commands, you can type `python -m spacy --help`. You can also
add the `--help` flag to any command or subcommand to see the description,
available arguments and usage.
## Download {#download} ## Download {#download}
@ -42,11 +44,11 @@ $ python -m spacy download [model] [--direct] [pip args]
``` ```
| Argument | Type | Description | | Argument | Type | Description |
| ------------------------------------- | ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ------------------------------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `model` | positional | Model name, e.g. [`en_core_web_sm`](/models/en#en_core_web_sm). | | `model` | positional | Model name, e.g. [`en_core_web_sm`](/models/en#en_core_web_sm). |
| `--direct`, `-d` | flag | Force direct download of exact model version. | | `--direct`, `-d` | flag | Force direct download of exact model version. |
| pip args <Tag variant="new">2.1</Tag> | - | Additional installation options to be passed to `pip install` when installing the model package. For example, `--user` to install to the user home directory or `--no-deps` to not install model dependencies. |
| `--help`, `-h` | flag | Show help message and available arguments. | | `--help`, `-h` | flag | Show help message and available arguments. |
| pip args <Tag variant="new">2.1</Tag> | option / flag | Additional installation options to be passed to `pip install` when installing the model package. For example, `--user` to install to the user home directory or `--no-deps` to not install model dependencies. |
| **CREATES** | directory | The installed model package in your `site-packages` directory. | | **CREATES** | directory | The installed model package in your `site-packages` directory. |
## Info {#info} ## Info {#info}
@ -189,6 +191,7 @@ $ python -m spacy init model [lang] [output_dir] [--jsonl-loc] [--vectors-loc]
| `--truncate-vectors`, `-t` <Tag variant="new">2.3</Tag> | option | Number of vectors to truncate to when reading in vectors file. Defaults to `0` for no truncation. | | `--truncate-vectors`, `-t` <Tag variant="new">2.3</Tag> | option | Number of vectors to truncate to when reading in vectors file. Defaults to `0` for no truncation. |
| `--prune-vectors`, `-V` | option | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning. | | `--prune-vectors`, `-V` | option | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning. |
| `--vectors-name`, `-vn` | option | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. | | `--vectors-name`, `-vn` | option | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. |
| `--help`, `-h` | flag | Show help message and available arguments. |
| **CREATES** | model | A spaCy model containing the vocab and vectors. | | **CREATES** | model | A spaCy model containing the vocab and vectors. |
## Convert {#convert} ## Convert {#convert}
@ -257,34 +260,33 @@ $ python -m spacy debug config [config_path] [--code_path] [--output] [--auto_fi
<Accordion title="Example output" spaced> <Accordion title="Example output" spaced>
<!-- TODO: update examples with validation error of final config -->
``` ```
✘ Config validation error ✘ Config validation error
training -> use_gpu field required training -> dropout field required
training -> omit_extra_lookups field required training -> optimizer field required
training -> batch_by field required training -> optimize extra fields not permitted
training -> raw_text field required
training -> tag_map field required
training -> evaluation_batch_size extra fields not permitted
training -> vectors extra fields not permitted
training -> width extra fields not permitted
{'gold_preproc': False, 'max_length': 3000, 'limit': 0, 'orth_variant_level': 0.0, 'dropout': 0.1, 'patience': 6000, 'max_epochs': 0, 'max_steps': 100000, 'eval_frequency': 400, 'seed': 0, 'accumulate_gradient': 4, 'width': 768, 'use_pytorch_for_gpu_memory': True, 'scores': ['speed', 'tags_acc', 'uas', 'las', 'ents_f'], 'score_weights': {'las': 0.4, 'ents_f': 0.4, 'tags_acc': 0.2}, 'init_tok2vec': None, 'vectors': None, 'discard_oversize': True, 'evaluation_batch_size': 16, 'batch_size': {'@schedules': 'compounding.v1', 'start': 800, 'stop': 800, 'compound': 1.001}, 'optimizer': {'@optimizers': 'Adam.v1', 'beta1': 0.9, 'beta2': 0.999, 'L2_is_weight_decay': True, 'L2': 0.01, 'grad_clip': 1.0, 'use_averages': False, 'eps': 1e-08, 'learn_rate': {'@schedules': 'warmup_linear.v1', 'warmup_steps': 250, 'total_steps': 20000, 'initial_rate': 5e-05}}} {'vectors': 'en_vectors_web_lg', 'seed': 0, 'accumulate_gradient': 1, 'init_tok2vec': None, 'raw_text': None, 'patience': 1600, 'max_epochs': 0, 'max_steps': 20000, 'eval_frequency': 200, 'frozen_components': [], 'optimize': None, 'batcher': {'@batchers': 'batch_by_words.v1', 'discard_oversize': False, 'tolerance': 0.2, 'get_length': None, 'size': {'@schedules': 'compounding.v1', 'start': 100, 'stop': 1000, 'compound': 1.001, 't': 0.0}}, 'dev_corpus': {'@readers': 'spacy.Corpus.v1', 'path': '', 'max_length': 0, 'gold_preproc': False, 'limit': 0}, 'score_weights': {'tag_acc': 0.5, 'dep_uas': 0.25, 'dep_las': 0.25, 'sents_f': 0.0}, 'train_corpus': {'@readers': 'spacy.Corpus.v1', 'path': '', 'max_length': 0, 'gold_preproc': False, 'limit': 0}}
If your config contains missing values, you can run the 'init fill-config'
command to fill in all the defaults, if possible:
python -m spacy init fill-config tmp/starter-config_invalid.cfg --base tmp/starter-config_invalid.cfg
``` ```
</Accordion> </Accordion>
| Argument | Type | Default | Description | | Argument | Type | Default | Description |
| --------------------- | ---------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --------------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- |
| `config_path` | positional | - | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. | | `config_path` | positional | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. |
| `--code_path`, `-c` | option | `None` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-models) for new architectures. | | `--code_path`, `-c` | option | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-models) for new architectures. |
| `--auto_fill`, `-F` | option | `False` | Whether or not to auto-fill the config with built-in defaults if possible. If `False`, the provided config needs to be complete. | | `--auto_fill`, `-F` | option | Whether or not to auto-fill the config with built-in defaults if possible. If `False`, the provided config needs to be complete. |
| `--output_path`, `-o` | option | `None` | Output path where the filled config can be stored. Use '-' for standard output. | | `--output_path`, `-o` | option | Output path where the filled config can be stored. Use '-' for standard output. |
| `--diff`, `-D` | option | `False` | Show a visual diff if config was auto-filled. | | `--diff`, `-D` | option | `Show a visual diff if config was auto-filled. |
| `--help`, `-h` | flag | `False` | Show help message and available arguments. | | `--help`, `-h` | flag | Show help message and available arguments. |
| overrides | | `None` | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. | | overrides | option / flag | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. |
| **PRINTS** | stdout | Config validation errors, if available. |
### debug data {#debug-data} ### debug data {#debug-data}
@ -452,14 +454,15 @@ will not be available.
</Accordion> </Accordion>
| Argument | Type | Description | | Argument | Type | Description |
| -------------------------- | ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | -------------------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `config_path` | positional | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. | | `config_path` | positional | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. |
| `--code`, `-c` | option | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-models) for new architectures. | | `--code`, `-c` | option | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-models) for new architectures. |
| `--ignore-warnings`, `-IW` | flag | Ignore warnings, only show stats and errors. | | `--ignore-warnings`, `-IW` | flag | Ignore warnings, only show stats and errors. |
| `--verbose`, `-V` | flag | Print additional information and explanations. | | `--verbose`, `-V` | flag | Print additional information and explanations. |
| `--no-format`, `-NF` | flag | Don't pretty-print the results. Use this if you want to write to a file. | | `--no-format`, `-NF` | flag | Don't pretty-print the results. Use this if you want to write to a file. |
| `--help`, `-h` | flag | Show help message and available arguments. | | `--help`, `-h` | flag | Show help message and available arguments. |
| overrides | | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. | | overrides | option / flag | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. |
| **PRINTS** | stdout | Debugging information. |
### debug profile {#debug-profile} ### debug profile {#debug-profile}
@ -480,10 +483,12 @@ $ python -m spacy debug profile [model] [inputs] [--n-texts]
``` ```
| Argument | Type | Description | | Argument | Type | Description |
| ----------------- | ----------------------------------------------------------------- | ------------------------------------------------------- | | ----------------- | ---------- | ----------------------------------------------------------------- |
| `model` | positional | A loadable spaCy model. | | `model` | positional | A loadable spaCy model. |
| `inputs` | positional | Optional path to input file, or `-` for standard input. | | `inputs` | positional | Optional path to input file, or `-` for standard input. |
| `--n-texts`, `-n` | Maximum number of texts to use if available. Defaults to `10000`. | | `--n-texts`, `-n` | option | Maximum number of texts to use if available. Defaults to `10000`. |
| `--help`, `-h` | flag | Show help message and available arguments. |
| **PRINTS** | stdout | Profiling information for the model. |
### debug model {#debug-model} ### debug model {#debug-model}
@ -591,20 +596,21 @@ $ python -m spacy debug model ./config.cfg tagger -l "5,15" -DIM -PAR -P0 -P1 -P
</Accordion> </Accordion>
| Argument | Type | Description | Default | | Argument | Type | Description |
| ----------------------- | ---------- | ----------------------------------------------------------------------------------------------------- | ------- | | ----------------------- | ---------- | ----------------------------------------------------------------------------------------------------- |
| `config_path` | positional | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. | | | `config_path` | positional | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. | |
| `component` | positional | Name of the pipeline component of which the model should be analyzed. |   | | `component` | positional | Name of the pipeline component of which the model should be analyzed. |   |
| `--layers`, `-l` | option | Comma-separated names of layer IDs to print. | | | `--layers`, `-l` | option | Comma-separated names of layer IDs to print. | |
| `--dimensions`, `-DIM` | option | Show dimensions of each layer. | `False` | | `--dimensions`, `-DIM` | option | Show dimensions of each layer. |
| `--parameters`, `-PAR` | option | Show parameters of each layer. | `False` | | `--parameters`, `-PAR` | option | Show parameters of each layer. |
| `--gradients`, `-GRAD` | option | Show gradients of each layer. | `False` | | `--gradients`, `-GRAD` | option | Show gradients of each layer. |
| `--attributes`, `-ATTR` | option | Show attributes of each layer. | `False` | | `--attributes`, `-ATTR` | option | Show attributes of each layer. |
| `--print-step0`, `-P0` | option | Print model before training. | `False` | | `--print-step0`, `-P0` | option | Print model before training. |
| `--print-step1`, `-P1` | option | Print model after initialization. | `False` | | `--print-step1`, `-P1` | option | Print model after initialization. |
| `--print-step2`, `-P2` | option | Print model after training. | `False` | | `--print-step2`, `-P2` | option | Print model after training. |
| `--print-step3`, `-P3` | option | Print final predictions. | `False` | | `--print-step3`, `-P3` | option | Print final predictions. |
| `--help`, `-h` | flag | Show help message and available arguments. | | | `--help`, `-h` | flag | Show help message and available arguments. |
| **PRINTS** | stdout | Debugging information. |
## Train {#train} ## Train {#train}
@ -635,13 +641,13 @@ $ python -m spacy train [config_path] [--output] [--code] [--verbose] [overrides
``` ```
| Argument | Type | Description | | Argument | Type | Description |
| ----------------- | ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `config_path` | positional | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. | | `config_path` | positional | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. |
| `--output`, `-o` | positional | Directory to store model in. Will be created if it doesn't exist. | | `--output`, `-o` | positional | Directory to store model in. Will be created if it doesn't exist. |
| `--code`, `-c` | option | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-models) for new architectures. | | `--code`, `-c` | option | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-models) for new architectures. |
| `--verbose`, `-V` | flag | Show more detailed messages during training. | | `--verbose`, `-V` | flag | Show more detailed messages during training. |
| `--help`, `-h` | flag | Show help message and available arguments. | | `--help`, `-h` | flag | Show help message and available arguments. |
| overrides | | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. | | overrides | option / flag | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. |
| **CREATES** | model | The final model and the best model. | | **CREATES** | model | The final model and the best model. |
## Pretrain {#pretrain new="2.1" tag="experimental"} ## Pretrain {#pretrain new="2.1" tag="experimental"}
@ -673,7 +679,7 @@ $ python -m spacy pretrain [texts_loc] [output_dir] [config_path]
``` ```
| Argument | Type | Description | | Argument | Type | Description |
| ----------------------- | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------------------- | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `texts_loc` | positional | Path to JSONL file with raw texts to learn from, with text provided as the key `"text"` or tokens as the key `"tokens"`. [See here](/api/data-formats#pretrain) for details. | | `texts_loc` | positional | Path to JSONL file with raw texts to learn from, with text provided as the key `"text"` or tokens as the key `"tokens"`. [See here](/api/data-formats#pretrain) for details. |
| `output_dir` | positional | Directory to write models to on each epoch. | | `output_dir` | positional | Directory to write models to on each epoch. |
| `config_path` | positional | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. | | `config_path` | positional | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. |
@ -681,7 +687,7 @@ $ python -m spacy pretrain [texts_loc] [output_dir] [config_path]
| `--resume-path`, `-r` | option | Path to pretrained weights from which to resume pretraining. | | `--resume-path`, `-r` | option | Path to pretrained weights from which to resume pretraining. |
| `--epoch-resume`, `-er` | option | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. | | `--epoch-resume`, `-er` | option | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. |
| `--help`, `-h` | flag | Show help message and available arguments. | | `--help`, `-h` | flag | Show help message and available arguments. |
| overrides | | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--training.use_gpu 1`. | | overrides | option / flag | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--training.use_gpu 1`. |
| **CREATES** | weights | The pretrained weights that can be used to initialize `spacy train`. | | **CREATES** | weights | The pretrained weights that can be used to initialize `spacy train`. |
## Evaluate {#evaluate new="2"} ## Evaluate {#evaluate new="2"}
@ -710,6 +716,7 @@ $ python -m spacy evaluate [model] [data_path] [--output] [--gold-preproc]
| `--gpu-id`, `-g` | option | GPU to use, if any. Defaults to `-1` for CPU. | | `--gpu-id`, `-g` | option | GPU to use, if any. Defaults to `-1` for CPU. |
| `--displacy-path`, `-dp` | option | Directory to output rendered parses as HTML. If not set, no visualizations will be generated. | | `--displacy-path`, `-dp` | option | Directory to output rendered parses as HTML. If not set, no visualizations will be generated. |
| `--displacy-limit`, `-dl` | option | Number of parses to generate per file. Defaults to `25`. Keep in mind that a significantly higher number might cause the `.html` files to render slowly. | | `--displacy-limit`, `-dl` | option | Number of parses to generate per file. Defaults to `25`. Keep in mind that a significantly higher number might cause the `.html` files to render slowly. |
| `--help`, `-h` | flag | Show help message and available arguments. |
| **CREATES** | `stdout`, JSON, HTML | Training results and optional metrics and visualizations. | | **CREATES** | `stdout`, JSON, HTML | Training results and optional metrics and visualizations. |
## Package {#package} ## Package {#package}
@ -849,6 +856,7 @@ $ python -m spacy project run [subcommand] [project_dir] [--force] [--dry]
| `--force`, `-F` | flag | Force re-running steps, even if nothing changed. | | `--force`, `-F` | flag | Force re-running steps, even if nothing changed. |
| `--dry`, `-D` | flag |  Perform a dry run and don't execute scripts. | | `--dry`, `-D` | flag |  Perform a dry run and don't execute scripts. |
| `--help`, `-h` | flag | Show help message and available arguments. | | `--help`, `-h` | flag | Show help message and available arguments. |
| **EXECUTES** | script | The command defined in the `project.yml`. |
### project dvc {#project-dvc} ### project dvc {#project-dvc}
@ -883,9 +891,10 @@ $ python -m spacy project dvc [project_dir] [workflow] [--force] [--verbose]
> ``` > ```
| Argument | Type | Description | | Argument | Type | Description |
| ----------------- | ---------- | --------------------------------------------------------------------------------- | | ----------------- | ---------- | --------------------------------------------------------------------------------------------- |
| `project_dir` | positional | Path to project directory. Defaults to current working directory. | | `project_dir` | positional | Path to project directory. Defaults to current working directory. |
| `workflow` | positional | Name of workflow defined in `project.yml`. Defaults to first workflow if not set. | | `workflow` | positional | Name of workflow defined in `project.yml`. Defaults to first workflow if not set. |
| `--force`, `-F` | flag | Force-updating config file. | | `--force`, `-F` | flag | Force-updating config file. |
| `--verbose`, `-V` | flag |  Print more output generated by DVC. | | `--verbose`, `-V` | flag |  Print more output generated by DVC. |
| `--help`, `-h` | flag | Show help message and available arguments. | | `--help`, `-h` | flag | Show help message and available arguments. |
| **CREATES** | file | A `dvc.yaml` file in the project directory, based on the steps defined in the given workflow. |