Add more info to debug config

2026-01-06 00:39:25 +03:00 · 2020-08-27 18:17:58 +02:00 · 2020-08-27 18:17:58 +02:00 · ff4175e839
commit ff4175e839
parent e1e1760fd6
6 changed files with 221 additions and 41 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -7,7 +7,7 @@ ml_datasets>=0.1.1
 murmurhash>=0.28.0,<1.1.0
 wasabi>=0.8.0,<1.1.0
 srsly>=2.1.0,<3.0.0
-catalogue>=0.0.7,<1.1.0
+catalogue>=2.0.1,<2.1.0
 typer>=0.3.0,<0.4.0
 pathy
 # Third party dependencies
--- a/setup.cfg
+++ b/setup.cfg
@ -44,7 +44,7 @@ install_requires =
    blis>=0.4.0,<0.5.0
    wasabi>=0.8.0,<1.1.0
    srsly>=2.1.0,<3.0.0
-    catalogue>=0.0.7,<1.1.0
+    catalogue>=2.0.1,<2.1.0
    typer>=0.3.0,<0.4.0
    pathy
    # Third-party dependencies
--- a/spacy/cli/init.py
+++ b/spacy/cli/init.py
@ -11,6 +11,7 @@ from .profile import profile  # noqa: F401
 from .train import train_cli  # noqa: F401
 from .pretrain import pretrain  # noqa: F401
 from .debug_data import debug_data  # noqa: F401
+from .debug_config import debug_config  # noqa: F401
 from .debug_model import debug_model  # noqa: F401
 from .evaluate import evaluate  # noqa: F401
 from .convert import convert  # noqa: F401
--- a/spacy/cli/debug_config.py
+++ b/spacy/cli/debug_config.py
@ -0,0 +1,93 @@
+from typing import Optional, Dict, Any, Union, List
+from pathlib import Path
+from wasabi import msg, table
+from thinc.api import Config
+from thinc.config import VARIABLE_RE
+import typer
+
+from ._util import Arg, Opt, show_validation_error, parse_config_overrides
+from ._util import import_code, debug_cli
+from .. import util
+
+
+@debug_cli.command(
+    "config",
+    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+)
+def debug_config_cli(
+    # fmt: off
+    ctx: typer.Context,  # This is only used to read additional arguments
+    config_path: Path = Arg(..., help="Path to config file", exists=True),
+    code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    show_funcs: bool = Opt(False, "--show-functions", "-F", help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)"),
+    show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.")
+    # fmt: on
+):
+    """Debug a config.cfg file and show validation errors. The command will
+    create all objects in the tree and validate them. Note that some config
+    validation errors are blocking and will prevent the rest of the config from
+    being resolved. This means that you may not see all validation errors at
+    once and some issues are only shown once previous errors have been fixed.
+    Similar as with the 'train' command, you can override settings from the config
+    as command line options. For instance, --training.batch_size 128 overrides
+    the value of "batch_size" in the block "[training]".
+    """
+    overrides = parse_config_overrides(ctx.args)
+    import_code(code_path)
+    debug_config(
+        config_path, overrides=overrides, show_funcs=show_funcs, show_vars=show_vars
+    )
+
+
+def debug_config(
+    config_path: Path,
+    *,
+    overrides: Dict[str, Any] = {},
+    show_funcs: bool = False,
+    show_vars: bool = False,
+):
+    msg.divider("Config validation")
+    with show_validation_error(config_path):
+        config = util.load_config(config_path, overrides=overrides)
+        nlp, _ = util.load_model_from_config(config)
+    msg.good("Config is valid")
+    if show_vars:
+        variables = get_variables(config)
+        msg.divider(f"Variables ({len(variables)})")
+        head = ("Variable", "Value")
+        msg.table(variables, header=head, divider=True, widths=(41, 34), spacing=2)
+    if show_funcs:
+        funcs = get_registered_funcs(config)
+        msg.divider(f"Registered functions ({len(funcs)})")
+        for func in funcs:
+            func_data = {
+                "Registry": f"@{func['registry']}",
+                "Name": func["name"],
+                "Module": func["module"],
+                "File": f"{func['file']} (line {func['line_no']})",
+            }
+            msg.info(f"[{func['path']}]")
+            print(table(func_data).strip())
+
+
+def get_registered_funcs(config: Config) -> List[Dict[str, Optional[Union[str, int]]]]:
+    result = []
+    for key, value in util.walk_dict(config):
+        if not key[-1].startswith("@"):
+            continue
+        # We have a reference to a registered function
+        reg_name = key[-1][1:]
+        registry = getattr(util.registry, reg_name)
+        path = ".".join(key[:-1])
+        info = registry.find(value)
+        result.append({"name": value, "registry": reg_name, "path": path, **info})
+    return result
+
+
+def get_variables(config: Config) -> Dict[str, Any]:
+    result = {}
+    for variable in sorted(set(VARIABLE_RE.findall(config.to_str()))):
+        path = variable[2:-1].replace(":", ".")
+        value = util.dot_to_object(config, path)
+        result[variable] = repr(value)
+    return result
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -23,34 +23,6 @@ BLANK_MODEL_MIN_THRESHOLD = 100
 BLANK_MODEL_THRESHOLD = 2000


-@debug_cli.command(
-    "config",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
-)
-def debug_config_cli(
-    # fmt: off
-    ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True),
-    code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    # fmt: on
-):
-    """Debug a config.cfg file and show validation errors. The command will
-    create all objects in the tree and validate them. Note that some config
-    validation errors are blocking and will prevent the rest of the config from
-    being resolved. This means that you may not see all validation errors at
-    once and some issues are only shown once previous errors have been fixed.
-    Similar as with the 'train' command, you can override settings from the config
-    as command line options. For instance, --training.batch_size 128 overrides
-    the value of "batch_size" in the block "[training]".
-    """
-    overrides = parse_config_overrides(ctx.args)
-    import_code(code_path)
-    with show_validation_error(config_path):
-        config = util.load_config(config_path, overrides=overrides)
-        nlp, _ = util.load_model_from_config(config)
-    msg.good("Original config is valid")
-
-
@debug_cli.command(
    "data", context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
 )
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@ -246,19 +246,19 @@ some config validation errors are blocking and will prevent the rest of the
 config from being resolved. This means that you may not see all validation
 errors at once and some issues are only shown once previous errors have been
 fixed. To auto-fill a partial config and save the result, you can use the
-[`init fillconfig`](/api/cli#init-fill-config) command.
+[`init fill-config`](/api/cli#init-fill-config) command.

 ```cli
-$ python -m spacy debug config [config_path] [--code_path] [overrides]
+$ python -m spacy debug config [config_path] [--code-path] [--show-functions] [--show-variables] [overrides]
 ```

 > #### Example
 >
 > ```cli
-> $ python -m spacy debug config ./config.cfg
+> $ python -m spacy debug config config.cfg
 > ```

-<Accordion title="Example output" spaced>
+<Accordion title="Example output (validation error)">

 ```
 ✘ Config validation error
@ -277,10 +277,124 @@ python -m spacy init fill-config tmp/starter-config_invalid.cfg --base tmp/start

 </Accordion>

+<Accordion title="Example output (valid config and all options)" spaced>
+
+```cli
+$ python -m spacy debug config ./config.cfg --show-functions --show-variables
+```
+
+```
+============================= Config validation =============================
+✔ Config is valid
+
+=============================== Variables (6) ===============================
+
+Variable                                   Value
+-----------------------------------------  ----------------------------------
+${components.tok2vec.model.encode.width}   96
+${paths.dev}                               'hello'
+${paths.init_tok2vec}                      None
+${paths.raw}                               None
+${paths.train}                             ''
+${system.seed}                             0
+
+
+========================= Registered functions (17) =========================
+ℹ [nlp.tokenizer]
+Registry   @tokenizers
+Name       spacy.Tokenizer.v1
+Module     spacy.language
+File       /path/to/spacy/language.py (line 64)
+ℹ [components.ner.model]
+Registry   @architectures
+Name       spacy.TransitionBasedParser.v1
+Module     spacy.ml.models.parser
+File       /path/to/spacy/ml/models/parser.py (line 11)
+ℹ [components.ner.model.tok2vec]
+Registry   @architectures
+Name       spacy.Tok2VecListener.v1
+Module     spacy.ml.models.tok2vec
+File       /path/to/spacy/ml/models/tok2vec.py (line 16)
+ℹ [components.parser.model]
+Registry   @architectures
+Name       spacy.TransitionBasedParser.v1
+Module     spacy.ml.models.parser
+File       /path/to/spacy/ml/models/parser.py (line 11)
+ℹ [components.parser.model.tok2vec]
+Registry   @architectures
+Name       spacy.Tok2VecListener.v1
+Module     spacy.ml.models.tok2vec
+File       /path/to/spacy/ml/models/tok2vec.py (line 16)
+ℹ [components.tagger.model]
+Registry   @architectures
+Name       spacy.Tagger.v1
+Module     spacy.ml.models.tagger
+File       /path/to/spacy/ml/models/tagger.py (line 9)
+ℹ [components.tagger.model.tok2vec]
+Registry   @architectures
+Name       spacy.Tok2VecListener.v1
+Module     spacy.ml.models.tok2vec
+File       /path/to/spacy/ml/models/tok2vec.py (line 16)
+ℹ [components.tok2vec.model]
+Registry   @architectures
+Name       spacy.Tok2Vec.v1
+Module     spacy.ml.models.tok2vec
+File       /path/to/spacy/ml/models/tok2vec.py (line 72)
+ℹ [components.tok2vec.model.embed]
+Registry   @architectures
+Name       spacy.MultiHashEmbed.v1
+Module     spacy.ml.models.tok2vec
+File       /path/to/spacy/ml/models/tok2vec.py (line 93)
+ℹ [components.tok2vec.model.encode]
+Registry   @architectures
+Name       spacy.MaxoutWindowEncoder.v1
+Module     spacy.ml.models.tok2vec
+File       /path/to/spacy/ml/models/tok2vec.py (line 207)
+ℹ [training.logger]
+Registry   @loggers
+Name       spacy.ConsoleLogger.v1
+Module     spacy.gold.loggers
+File       /path/to/spacy/gold/loggers.py (line 8)
+ℹ [training.batcher]
+Registry   @batchers
+Name       batch_by_words.v1
+Module     spacy.gold.batchers
+File       /path/to/spacy/gold/batchers.py (line 49)
+ℹ [training.batcher.size]
+Registry   @schedules
+Name       compounding.v1
+Module     thinc.schedules
+File       /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 43)
+ℹ [training.dev_corpus]
+Registry   @readers
+Name       spacy.Corpus.v1
+Module     spacy.gold.corpus
+File       /path/to/spacy/gold/corpus.py (line 18)
+ℹ [training.optimizer]
+Registry   @optimizers
+Name       Adam.v1
+Module     thinc.optimizers
+File       /Users/ines/Repos/explosion/thinc/thinc/optimizers.py (line 58)
+ℹ [training.optimizer.learn_rate]
+Registry   @schedules
+Name       warmup_linear.v1
+Module     thinc.schedules
+File       /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 91)
+ℹ [training.train_corpus]
+Registry   @readers
+Name       spacy.Corpus.v1
+Module     spacy.gold.corpus
+File       /path/to/spacy/gold/corpus.py (line 18)
+```
+
+</Accordion>
+
 | Name                     | Description                                                                                                                                                                                                                    |
-| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `config_path`            | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. ~~Path (positional)~~                                                                                                    |
-| `--code_path`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~       |
+| `--code-path`, `-c`      | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                                           |
+| `--show-functions`, `-F` | Show an overview of all registered function blocks used in the config and where those functions come from, including the module name, Python file and line number. ~~bool (flag)~~                                             |
+| `--show-variables`, `-V` | Show an overview of all variables referenced in the config, e.g. `${paths.train}` and their values that will be used. This also reflects any config overrides provided on the CLI, e.g. `--paths.train /path`. ~~bool (flag)~~ |
 | `--help`, `-h`           | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                                     |
 | overrides                | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. ~~Any (option/flag)~~                                     |
 | **PRINTS**               | Config validation errors, if available.                                                                                                                                                                                        |