Fix references to config file in the docs & UX (#9961)

* doc fixes around config file

* fix typo

* clarify default
This commit is contained in:
Sofie Van Landeghem 2022-01-04 14:31:26 +01:00 committed by GitHub
parent 029a48e340
commit 56dcb39fb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 16 additions and 16 deletions

View File

@ -25,7 +25,7 @@ def debug_config_cli(
show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.") show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.")
# fmt: on # fmt: on
): ):
"""Debug a config.cfg file and show validation errors. The command will """Debug a config file and show validation errors. The command will
create all objects in the tree and validate them. Note that some config create all objects in the tree and validate them. Note that some config
validation errors are blocking and will prevent the rest of the config from validation errors are blocking and will prevent the rest of the config from
being resolved. This means that you may not see all validation errors at being resolved. This means that you may not see all validation errors at

View File

@ -27,7 +27,7 @@ class Optimizations(str, Enum):
@init_cli.command("config") @init_cli.command("config")
def init_config_cli( def init_config_cli(
# fmt: off # fmt: off
output_file: Path = Arg(..., help="File to save config.cfg to or - for stdout (will only output config and no additional logging info)", allow_dash=True), output_file: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
lang: str = Opt("en", "--lang", "-l", help="Two-letter code of the language to use"), lang: str = Opt("en", "--lang", "-l", help="Two-letter code of the language to use"),
pipeline: str = Opt("tagger,parser,ner", "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')"), pipeline: str = Opt("tagger,parser,ner", "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')"),
optimize: Optimizations = Opt(Optimizations.efficiency.value, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."), optimize: Optimizations = Opt(Optimizations.efficiency.value, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."),
@ -37,7 +37,7 @@ def init_config_cli(
# fmt: on # fmt: on
): ):
""" """
Generate a starter config.cfg for training. Based on your requirements Generate a starter config file for training. Based on your requirements
specified via the CLI arguments, this command generates a config with the specified via the CLI arguments, this command generates a config with the
optimal settings for your use case. This includes the choice of architecture, optimal settings for your use case. This includes the choice of architecture,
pretrained weights and related hyperparameters. pretrained weights and related hyperparameters.
@ -66,15 +66,15 @@ def init_config_cli(
@init_cli.command("fill-config") @init_cli.command("fill-config")
def init_fill_config_cli( def init_fill_config_cli(
# fmt: off # fmt: off
base_path: Path = Arg(..., help="Base config to fill", exists=True, dir_okay=False), base_path: Path = Arg(..., help="Path to base config to fill", exists=True, dir_okay=False),
output_file: Path = Arg("-", help="File to save config.cfg to (or - for stdout)", allow_dash=True), output_file: Path = Arg("-", help="Path to output .cfg file (or - for stdout)", allow_dash=True),
pretraining: bool = Opt(False, "--pretraining", "-pt", help="Include config for pretraining (with 'spacy pretrain')"), pretraining: bool = Opt(False, "--pretraining", "-pt", help="Include config for pretraining (with 'spacy pretrain')"),
diff: bool = Opt(False, "--diff", "-D", help="Print a visual diff highlighting the changes"), diff: bool = Opt(False, "--diff", "-D", help="Print a visual diff highlighting the changes"),
code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
# fmt: on # fmt: on
): ):
""" """
Fill partial config.cfg with default values. Will add all missing settings Fill partial config file with default values. Will add all missing settings
from the default config and will create all objects, check the registered from the default config and will create all objects, check the registered
functions for their default values and update the base config. This command functions for their default values and update the base config. This command
can be used with a config generated via the training quickstart widget: can be used with a config generated via the training quickstart widget:

View File

@ -642,7 +642,7 @@ class Errors(metaclass=ErrorsWithCodes):
E912 = ("Failed to initialize lemmatizer. Missing lemmatizer table(s) found " E912 = ("Failed to initialize lemmatizer. Missing lemmatizer table(s) found "
"for mode '{mode}'. Required tables: {tables}. Found: {found}.") "for mode '{mode}'. Required tables: {tables}. Found: {found}.")
E913 = ("Corpus path can't be None. Maybe you forgot to define it in your " E913 = ("Corpus path can't be None. Maybe you forgot to define it in your "
"config.cfg or override it on the CLI?") ".cfg file or override it on the CLI?")
E914 = ("Executing {name} callback failed. Expected the function to " E914 = ("Executing {name} callback failed. Expected the function to "
"return the nlp object but got: {value}. Maybe you forgot to return " "return the nlp object but got: {value}. Maybe you forgot to return "
"the modified object in your function?") "the modified object in your function?")

View File

@ -63,7 +63,7 @@ OOV_RANK = numpy.iinfo(numpy.uint64).max
DEFAULT_OOV_PROB = -20 DEFAULT_OOV_PROB = -20
LEXEME_NORM_LANGS = ["cs", "da", "de", "el", "en", "id", "lb", "mk", "pt", "ru", "sr", "ta", "th"] LEXEME_NORM_LANGS = ["cs", "da", "de", "el", "en", "id", "lb", "mk", "pt", "ru", "sr", "ta", "th"]
# Default order of sections in the config.cfg. Not all sections needs to exist, # Default order of sections in the config file. Not all sections needs to exist,
# and additional sections are added at the end, in alphabetical order. # and additional sections are added at the end, in alphabetical order.
CONFIG_SECTION_ORDER = ["paths", "variables", "system", "nlp", "components", "corpora", "training", "pretraining", "initialize"] CONFIG_SECTION_ORDER = ["paths", "variables", "system", "nlp", "components", "corpora", "training", "pretraining", "initialize"]
# fmt: on # fmt: on
@ -465,7 +465,7 @@ def load_model_from_path(
"""Load a model from a data directory path. Creates Language class with """Load a model from a data directory path. Creates Language class with
pipeline from config.cfg and then calls from_disk() with path. pipeline from config.cfg and then calls from_disk() with path.
model_path (Path): Mmodel path. model_path (Path): Model path.
meta (Dict[str, Any]): Optional model meta. meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True, vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created. a new Vocab object will be created.
@ -642,8 +642,8 @@ def load_config(
sys.stdin.read(), overrides=overrides, interpolate=interpolate sys.stdin.read(), overrides=overrides, interpolate=interpolate
) )
else: else:
if not config_path or not config_path.exists() or not config_path.is_file(): if not config_path or not config_path.is_file():
raise IOError(Errors.E053.format(path=config_path, name="config.cfg")) raise IOError(Errors.E053.format(path=config_path, name="config file"))
return config.from_disk( return config.from_disk(
config_path, overrides=overrides, interpolate=interpolate config_path, overrides=overrides, interpolate=interpolate
) )

View File

@ -148,8 +148,8 @@ $ python -m spacy init config [output_file] [--lang] [--pipeline] [--optimize] [
### init fill-config {#init-fill-config new="3"} ### init fill-config {#init-fill-config new="3"}
Auto-fill a partial [`config.cfg` file](/usage/training#config) file with **all Auto-fill a partial [.cfg file](/usage/training#config) with **all default
default values**, e.g. a config generated with the values**, e.g. a config generated with the
[quickstart widget](/usage/training#quickstart). Config files used for training [quickstart widget](/usage/training#quickstart). Config files used for training
should always be complete and not contain any hidden defaults or missing values, should always be complete and not contain any hidden defaults or missing values,
so this command helps you create your final training config. In order to find so this command helps you create your final training config. In order to find
@ -175,7 +175,7 @@ $ python -m spacy init fill-config [base_path] [output_file] [--diff]
| Name | Description | | Name | Description |
| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `base_path` | Path to base config to fill, e.g. generated by the [quickstart widget](/usage/training#quickstart). ~~Path (positional)~~ | | `base_path` | Path to base config to fill, e.g. generated by the [quickstart widget](/usage/training#quickstart). ~~Path (positional)~~ |
| `output_file` | Path to output `.cfg` file. If not set, the config is written to stdout so you can pipe it forward to a file. ~~Path (positional)~~ | | `output_file` | Path to output `.cfg` file or "-" to write to stdout so you can pipe it to a file. Defaults to "-" (stdout). ~~Path (positional)~~ |
| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ | | `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
| `--pretraining`, `-pt` | Include config for pretraining (with [`spacy pretrain`](/api/cli#pretrain)). Defaults to `False`. ~~bool (flag)~~ | | `--pretraining`, `-pt` | Include config for pretraining (with [`spacy pretrain`](/api/cli#pretrain)). Defaults to `False`. ~~bool (flag)~~ |
| `--diff`, `-D` | Print a visual diff highlighting the changes. ~~bool (flag)~~ | | `--diff`, `-D` | Print a visual diff highlighting the changes. ~~bool (flag)~~ |
@ -208,7 +208,7 @@ $ python -m spacy init vectors [lang] [vectors_loc] [output_dir] [--prune] [--tr
| `output_dir` | Pipeline output directory. Will be created if it doesn't exist. ~~Path (positional)~~ | | `output_dir` | Pipeline output directory. Will be created if it doesn't exist. ~~Path (positional)~~ |
| `--truncate`, `-t` | Number of vectors to truncate to when reading in vectors file. Defaults to `0` for no truncation. ~~int (option)~~ | | `--truncate`, `-t` | Number of vectors to truncate to when reading in vectors file. Defaults to `0` for no truncation. ~~int (option)~~ |
| `--prune`, `-p` | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning. ~~int (option)~~ | | `--prune`, `-p` | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning. ~~int (option)~~ |
| `--mode`, `-m` | Vectors mode: `default` or [`floret`](https://github.com/explosion/floret). Defaults to `default`. ~~Optional[str] \(option)~~ | | `--mode`, `-m` | Vectors mode: `default` or [`floret`](https://github.com/explosion/floret). Defaults to `default`. ~~Optional[str] \(option)~~ |
| `--name`, `-n` | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. ~~Optional[str] \(option)~~ | | `--name`, `-n` | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. ~~Optional[str] \(option)~~ |
| `--verbose`, `-V` | Print additional information and explanations. ~~bool (flag)~~ | | `--verbose`, `-V` | Print additional information and explanations. ~~bool (flag)~~ |
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ | | `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |

View File

@ -535,7 +535,7 @@ As of spaCy v3.0, the `meta.json` **isn't** used to construct the language class
and pipeline anymore and only contains meta information for reference and for and pipeline anymore and only contains meta information for reference and for
creating a Python package with [`spacy package`](/api/cli#package). How to set creating a Python package with [`spacy package`](/api/cli#package). How to set
up the `nlp` object is now defined in the up the `nlp` object is now defined in the
[`config.cfg`](/api/data-formats#config), which includes detailed information [config file](/api/data-formats#config), which includes detailed information
about the pipeline components and their model architectures, and all other about the pipeline components and their model architectures, and all other
settings and hyperparameters used to train the pipeline. It's the **single settings and hyperparameters used to train the pipeline. It's the **single
source of truth** used for loading a pipeline. source of truth** used for loading a pipeline.