From 0b23594953fea4bf17e438de8e8fef1f42ddd145 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 30 Jul 2020 20:06:28 +0200 Subject: [PATCH 1/7] pipe_name instead of section in debug_model --- spacy/cli/debug_model.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py index 88e060238..0aea38e5c 100644 --- a/spacy/cli/debug_model.py +++ b/spacy/cli/debug_model.py @@ -16,7 +16,7 @@ def debug_model_cli( # fmt: off ctx: typer.Context, # This is only used to read additional arguments config_path: Path = Arg(..., help="Path to config file", exists=True), - section: str = Arg(..., help="Section that defines the model to be analysed"), + pipe_name: str = Arg(..., help="Name of the pipe of which the model should be analysed"), layers: str = Opt("", "--layers", "-l", help="Comma-separated names of layer IDs to print"), dimensions: bool = Opt(False, "--dimensions", "-DIM", help="Show dimensions"), parameters: bool = Opt(False, "--parameters", "-PAR", help="Show parameters"), @@ -53,20 +53,20 @@ def debug_model_cli( cfg = Config().from_disk(config_path) with show_validation_error(): try: - _, config = util.load_model_from_config(cfg, overrides=config_overrides) + nlp, config = util.load_model_from_config(cfg, overrides=config_overrides) except ValueError as e: msg.fail(str(e), exits=1) - seed = config["pretraining"]["seed"] + seed = config.get("training", {}).get("seed", None) if seed is not None: msg.info(f"Fixing random seed: {seed}") fix_random_seed(seed) - component = dot_to_object(config, section) + component = nlp.get_pipe(pipe_name) if hasattr(component, "model"): model = component.model else: msg.fail( - f"The section '{section}' does not specify an object that holds a Model.", + f"The component '{pipe_name}' does not specify an object that holds a Model.", exits=1, ) debug_model(model, print_settings=print_settings) From 08d3c36c204a59fb294d1b8a286c764da6f992cb Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 31 Jul 2020 14:18:27 +0200 Subject: [PATCH 2/7] bugfix in train CLI --- spacy/cli/train.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index b0bc145ff..9cc36f77b 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -51,7 +51,7 @@ def train_cli( referenced in the config. """ util.set_env_log(verbose) - verify_cli_args(train_path, dev_path, config_path) + verify_cli_args(train_path, dev_path, config_path, output_path) overrides = parse_config_overrides(ctx.args) import_code(code_path) train( @@ -173,7 +173,6 @@ def train( progress = tqdm.tqdm(total=training["eval_frequency"], leave=False) except Exception as e: if output_path is not None: - raise e msg.warn( f"Aborting and saving the final best model. " f"Encountered exception: {str(e)}", From 5fa3235d0604a7a73a0601c67a6a89ca721b2757 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 31 Jul 2020 14:19:10 +0200 Subject: [PATCH 3/7] set DATA_VALIDATION to False for debug_model (upgrade thinc) --- pyproject.toml | 2 +- requirements.txt | 2 +- setup.cfg | 4 ++-- spacy/cli/debug_model.py | 4 +++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 91f1464df..5989889d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ "cymem>=2.0.2,<2.1.0", "preshed>=3.0.2,<3.1.0", "murmurhash>=0.28.0,<1.1.0", - "thinc>=8.0.0a19,<8.0.0a30", + "thinc>=8.0.0a21,<8.0.0a30", "blis>=0.4.0,<0.5.0", "pytokenizations", "smart_open>=2.0.0,<3.0.0" diff --git a/requirements.txt b/requirements.txt index d0413825b..ee892b393 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # Our libraries cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 -thinc>=8.0.0a19,<8.0.0a30 +thinc>=8.0.0a21,<8.0.0a30 blis>=0.4.0,<0.5.0 ml_datasets>=0.1.1 murmurhash>=0.28.0,<1.1.0 diff --git a/setup.cfg b/setup.cfg index d2cb7c92a..f8f4561bf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,13 +34,13 @@ setup_requires = cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 murmurhash>=0.28.0,<1.1.0 - thinc>=8.0.0a19,<8.0.0a30 + thinc>=8.0.0a21,<8.0.0a30 install_requires = # Our libraries murmurhash>=0.28.0,<1.1.0 cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 - thinc>=8.0.0a19,<8.0.0a30 + thinc>=8.0.0a21,<8.0.0a30 blis>=0.4.0,<0.5.0 wasabi>=0.7.1,<1.1.0 srsly>=2.1.0,<3.0.0 diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py index 0aea38e5c..8c0726cc3 100644 --- a/spacy/cli/debug_model.py +++ b/spacy/cli/debug_model.py @@ -2,7 +2,7 @@ from typing import Dict, Any, Optional from pathlib import Path from wasabi import msg from thinc.api import require_gpu, fix_random_seed, set_dropout_rate, Adam, Config -from thinc.api import Model +from thinc.api import Model, DATA_VALIDATION import typer from ._util import Arg, Opt, debug_cli, show_validation_error, parse_config_overrides @@ -90,7 +90,9 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None # STEP 1: Initializing the model and printing again Y = _get_output(model.ops.xp) _set_output_dim(nO=Y.shape[-1], model=model) + DATA_VALIDATION.set(False) # The output vector might differ from the official type of the output layer model.initialize(X=_get_docs(), Y=Y) + DATA_VALIDATION.set(True) if print_settings.get("print_after_init"): msg.info(f"After initialization:") _print_model(model, print_settings) From cc2f58a1b06773bb8ee6aed5ec05f231737e1777 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 31 Jul 2020 16:49:42 +0200 Subject: [PATCH 4/7] use data_validation context manager --- spacy/cli/debug_model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py index 8c0726cc3..84a496e12 100644 --- a/spacy/cli/debug_model.py +++ b/spacy/cli/debug_model.py @@ -2,7 +2,7 @@ from typing import Dict, Any, Optional from pathlib import Path from wasabi import msg from thinc.api import require_gpu, fix_random_seed, set_dropout_rate, Adam, Config -from thinc.api import Model, DATA_VALIDATION +from thinc.api import Model, data_validation import typer from ._util import Arg, Opt, debug_cli, show_validation_error, parse_config_overrides @@ -90,9 +90,9 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None # STEP 1: Initializing the model and printing again Y = _get_output(model.ops.xp) _set_output_dim(nO=Y.shape[-1], model=model) - DATA_VALIDATION.set(False) # The output vector might differ from the official type of the output layer - model.initialize(X=_get_docs(), Y=Y) - DATA_VALIDATION.set(True) + # The output vector might differ from the official type of the output layer + with data_validation(False): + model.initialize(X=_get_docs(), Y=Y) if print_settings.get("print_after_init"): msg.info(f"After initialization:") _print_model(model, print_settings) From 878327d38e1e3c7f2284c9aa442b0566d519b2e6 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 31 Jul 2020 17:36:32 +0200 Subject: [PATCH 5/7] printing final predictions by default to False --- spacy/cli/debug_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py index 84a496e12..bf565df0b 100644 --- a/spacy/cli/debug_model.py +++ b/spacy/cli/debug_model.py @@ -25,7 +25,7 @@ def debug_model_cli( P0: bool = Opt(False, "--print-step0", "-P0", help="Print model before training"), P1: bool = Opt(False, "--print-step1", "-P1", help="Print model after initialization"), P2: bool = Opt(False, "--print-step2", "-P2", help="Print model after training"), - P3: bool = Opt(True, "--print-step3", "-P3", help="Print final predictions"), + P3: bool = Opt(False, "--print-step3", "-P3", help="Print final predictions"), use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU") # fmt: on ): From 51ffc4a16674e69809b874f86daea50db70601af Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 31 Jul 2020 17:58:55 +0200 Subject: [PATCH 6/7] rename pipe_name to component --- spacy/cli/debug_model.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py index bf565df0b..de5583b03 100644 --- a/spacy/cli/debug_model.py +++ b/spacy/cli/debug_model.py @@ -16,7 +16,7 @@ def debug_model_cli( # fmt: off ctx: typer.Context, # This is only used to read additional arguments config_path: Path = Arg(..., help="Path to config file", exists=True), - pipe_name: str = Arg(..., help="Name of the pipe of which the model should be analysed"), + component: str = Arg(..., help="Name of the pipeline component of which the model should be analysed"), layers: str = Opt("", "--layers", "-l", help="Comma-separated names of layer IDs to print"), dimensions: bool = Opt(False, "--dimensions", "-DIM", help="Show dimensions"), parameters: bool = Opt(False, "--parameters", "-PAR", help="Show parameters"), @@ -61,12 +61,12 @@ def debug_model_cli( msg.info(f"Fixing random seed: {seed}") fix_random_seed(seed) - component = nlp.get_pipe(pipe_name) - if hasattr(component, "model"): - model = component.model + pipe = nlp.get_pipe(component) + if hasattr(pipe, "model"): + model = pipe.model else: msg.fail( - f"The component '{pipe_name}' does not specify an object that holds a Model.", + f"The component '{component}' does not specify an object that holds a Model.", exits=1, ) debug_model(model, print_settings=print_settings) From 9b719dfb1aa1226425fac3bc00b047857d522089 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 31 Jul 2020 18:06:48 +0200 Subject: [PATCH 7/7] use divider inbetween steps --- spacy/cli/debug_model.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py index de5583b03..132d2a2f1 100644 --- a/spacy/cli/debug_model.py +++ b/spacy/cli/debug_model.py @@ -84,7 +84,7 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None # STEP 0: Printing before training msg.info(f"Analysing model with ID {model.id}") if print_settings.get("print_before_training"): - msg.info(f"Before training:") + msg.divider(f"STEP 0 - before training") _print_model(model, print_settings) # STEP 1: Initializing the model and printing again @@ -94,7 +94,7 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None with data_validation(False): model.initialize(X=_get_docs(), Y=Y) if print_settings.get("print_after_init"): - msg.info(f"After initialization:") + msg.divider(f"STEP 1 - after initialization") _print_model(model, print_settings) # STEP 2: Updating the model and printing again @@ -106,13 +106,14 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None get_dX(dY) model.finish_update(optimizer) if print_settings.get("print_after_training"): - msg.info(f"After training:") + msg.divider(f"STEP 2 - after training") _print_model(model, print_settings) # STEP 3: the final prediction prediction = model.predict(_get_docs()) if print_settings.get("print_prediction"): - msg.info(f"Prediction:", str(prediction)) + msg.divider(f"STEP 3 - prediction") + msg.info(str(prediction)) def get_gradient(model, Y):