Make debug commands subcommands of spacy debug

Also handle backwards-compatibility so the old commands don't break
This commit is contained in:
Ines Montani 2020-07-12 13:53:41 +02:00
parent 3f948b9c74
commit 0ab483037c
4 changed files with 48 additions and 16 deletions

View File

@ -22,10 +22,14 @@ HELP = """spaCy Command-line Interface
DOCS: https://spacy.io/api/cli DOCS: https://spacy.io/api/cli
""" """
PROJECT_HELP = f"""Command-line interface for spaCy projects and working with PROJECT_HELP = f"""Command-line interface for spaCy projects and templates.
project templates. You'd typically start by cloning a project template to a local You'd typically start by cloning a project template to a local directory and
directory and fetching its assets like datasets etc. See the project's fetching its assets like datasets etc. See the project's {PROJECT_FILE} for the
{PROJECT_FILE} for the available commands. available commands.
"""
DEBUG_HELP = """Suite of helpful commands for debugging and profiling. Includes
commands to check and validate your config files, training and evaluation data,
and custom model implementations.
""" """
# Wrappers for Typer's annotations. Initially created to set defaults and to # Wrappers for Typer's annotations. Initially created to set defaults and to
@ -35,7 +39,10 @@ Opt = typer.Option
app = typer.Typer(name=NAME, help=HELP) app = typer.Typer(name=NAME, help=HELP)
project_cli = typer.Typer(name="project", help=PROJECT_HELP, no_args_is_help=True) project_cli = typer.Typer(name="project", help=PROJECT_HELP, no_args_is_help=True)
debug_cli = typer.Typer(name="debug", help=DEBUG_HELP, no_args_is_help=True)
app.add_typer(project_cli) app.add_typer(project_cli)
app.add_typer(debug_cli)
def setup_cli() -> None: def setup_cli() -> None:

View File

@ -7,7 +7,7 @@ from wasabi import Printer, MESSAGES, msg
import typer import typer
from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
from ._util import import_code from ._util import import_code, debug_cli
from ..schemas import ConfigSchema from ..schemas import ConfigSchema
from ..gold import Corpus, Example from ..gold import Corpus, Example
from ..syntax import nonproj from ..syntax import nonproj
@ -24,8 +24,8 @@ BLANK_MODEL_MIN_THRESHOLD = 100
BLANK_MODEL_THRESHOLD = 2000 BLANK_MODEL_THRESHOLD = 2000
@app.command( @debug_cli.command(
"debug-config", "config",
context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
) )
def debug_config_cli( def debug_config_cli(
@ -35,7 +35,12 @@ def debug_config_cli(
code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"), code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
# fmt: on # fmt: on
): ):
"""Debug a config file and show validation errors.""" """Debug a config.cfg file and show validation errors. The command will
create all objects in the tree and validate them. Note that some config
validation errors are blocking and will prevent the rest of the config from
being resolved. This means that you may not see all validation errors at
once and some issues are only shown once previous errors have been fixed.
"""
overrides = parse_config_overrides(ctx.args) overrides = parse_config_overrides(ctx.args)
import_code(code_path) import_code(code_path)
with show_validation_error(): with show_validation_error():
@ -45,9 +50,13 @@ def debug_config_cli(
msg.good("Config is valid") msg.good("Config is valid")
@debug_cli.command(
"data", context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
)
@app.command( @app.command(
"debug-data", "debug-data",
context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
hidden=True, # hide this from main CLI help but still allow it to work with warning
) )
def debug_data_cli( def debug_data_cli(
# fmt: off # fmt: off
@ -62,10 +71,16 @@ def debug_data_cli(
# fmt: on # fmt: on
): ):
""" """
Analyze, debug and validate your training and development data, get useful Analyze, debug and validate your training and development data. Outputs
stats, and find problems like invalid entity annotations, cyclic useful stats, and can help you find problems like invalid entity annotations,
dependencies, low data labels and more. cyclic dependencies, low data labels and more.
""" """
if ctx.command.name == "debug-data":
msg.warn(
"The debug-data command is now available via the 'debug data' "
"subcommand (without the hyphen). You can run python -m spacy debug "
"--help for an overview of the other available debugging commands."
)
overrides = parse_config_overrides(ctx.args) overrides = parse_config_overrides(ctx.args)
import_code(code_path) import_code(code_path)
debug_data( debug_data(

View File

@ -2,12 +2,12 @@ from pathlib import Path
from wasabi import msg from wasabi import msg
from thinc.api import require_gpu, fix_random_seed, set_dropout_rate, Adam from thinc.api import require_gpu, fix_random_seed, set_dropout_rate, Adam
from ._util import app, Arg, Opt from ._util import Arg, Opt, debug_cli
from .. import util from .. import util
from ..lang.en import English from ..lang.en import English
@app.command("debug-model", hidden=True) @debug_cli.command("model")
def debug_model_cli( def debug_model_cli(
# fmt: off # fmt: off
config_path: Path = Arg(..., help="Path to config file", exists=True), config_path: Path = Arg(..., help="Path to config file", exists=True),
@ -25,7 +25,8 @@ def debug_model_cli(
# fmt: on # fmt: on
): ):
""" """
Analyze a Thinc ML model - internal structure and activations during training Analyze a Thinc model implementation. Includes checks for internal structure
and activations during training.
""" """
print_settings = { print_settings = {
"dimensions": dimensions, "dimensions": dimensions,

View File

@ -7,15 +7,18 @@ import pstats
import sys import sys
import itertools import itertools
from wasabi import msg, Printer from wasabi import msg, Printer
import typer
from ._util import app, Arg, Opt from ._util import app, debug_cli, Arg, Opt, NAME
from ..language import Language from ..language import Language
from ..util import load_model from ..util import load_model
@app.command("profile") @debug_cli.command("profile")
@app.command("profile", hidden=True)
def profile_cli( def profile_cli(
# fmt: off # fmt: off
ctx: typer.Context, # This is only used to read current calling context
model: str = Arg(..., help="Model to load"), model: str = Arg(..., help="Model to load"),
inputs: Optional[Path] = Arg(None, help="Location of input file. '-' for stdin.", exists=True, allow_dash=True), inputs: Optional[Path] = Arg(None, help="Location of input file. '-' for stdin.", exists=True, allow_dash=True),
n_texts: int = Opt(10000, "--n-texts", "-n", help="Maximum number of texts to use if available"), n_texts: int = Opt(10000, "--n-texts", "-n", help="Maximum number of texts to use if available"),
@ -27,6 +30,12 @@ def profile_cli(
It can either be provided as a JSONL file, or be read from sys.sytdin. It can either be provided as a JSONL file, or be read from sys.sytdin.
If no input file is specified, the IMDB dataset is loaded via Thinc. If no input file is specified, the IMDB dataset is loaded via Thinc.
""" """
if ctx.parent.command.name == NAME: # called as top-level command
msg.warn(
"The profile command is now available via the 'debug profile' "
"subcommand. You can run python -m spacy debug --help for an "
"overview of the other available debugging commands."
)
profile(model, inputs=inputs, n_texts=n_texts) profile(model, inputs=inputs, n_texts=n_texts)