From 0b23594953fea4bf17e438de8e8fef1f42ddd145 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Thu, 30 Jul 2020 20:06:28 +0200
Subject: [PATCH 1/7] pipe_name instead of section in debug_model

---
 spacy/cli/debug_model.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 88e060238..0aea38e5c 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -16,7 +16,7 @@ def debug_model_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
     config_path: Path = Arg(..., help="Path to config file", exists=True),
-    section: str = Arg(..., help="Section that defines the model to be analysed"),
+    pipe_name: str = Arg(..., help="Name of the pipe of which the model should be analysed"),
     layers: str = Opt("", "--layers", "-l", help="Comma-separated names of layer IDs to print"),
     dimensions: bool = Opt(False, "--dimensions", "-DIM", help="Show dimensions"),
     parameters: bool = Opt(False, "--parameters", "-PAR", help="Show parameters"),
@@ -53,20 +53,20 @@ def debug_model_cli(
     cfg = Config().from_disk(config_path)
     with show_validation_error():
         try:
-            _, config = util.load_model_from_config(cfg, overrides=config_overrides)
+            nlp, config = util.load_model_from_config(cfg, overrides=config_overrides)
         except ValueError as e:
             msg.fail(str(e), exits=1)
-    seed = config["pretraining"]["seed"]
+    seed = config.get("training", {}).get("seed", None)
     if seed is not None:
         msg.info(f"Fixing random seed: {seed}")
         fix_random_seed(seed)
 
-    component = dot_to_object(config, section)
+    component = nlp.get_pipe(pipe_name)
     if hasattr(component, "model"):
         model = component.model
     else:
         msg.fail(
-            f"The section '{section}' does not specify an object that holds a Model.",
+            f"The component '{pipe_name}' does not specify an object that holds a Model.",
             exits=1,
         )
     debug_model(model, print_settings=print_settings)

From 08d3c36c204a59fb294d1b8a286c764da6f992cb Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Fri, 31 Jul 2020 14:18:27 +0200
Subject: [PATCH 2/7] bugfix in train CLI

---
 spacy/cli/train.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index b0bc145ff..9cc36f77b 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -51,7 +51,7 @@ def train_cli(
     referenced in the config.
     """
     util.set_env_log(verbose)
-    verify_cli_args(train_path, dev_path, config_path)
+    verify_cli_args(train_path, dev_path, config_path, output_path)
     overrides = parse_config_overrides(ctx.args)
     import_code(code_path)
     train(
@@ -173,7 +173,6 @@ def train(
                 progress = tqdm.tqdm(total=training["eval_frequency"], leave=False)
     except Exception as e:
         if output_path is not None:
-            raise e
             msg.warn(
                 f"Aborting and saving the final best model. "
                 f"Encountered exception: {str(e)}",

From 5fa3235d0604a7a73a0601c67a6a89ca721b2757 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Fri, 31 Jul 2020 14:19:10 +0200
Subject: [PATCH 3/7] set DATA_VALIDATION to False for debug_model (upgrade
 thinc)

---
 pyproject.toml           | 2 +-
 requirements.txt         | 2 +-
 setup.cfg                | 4 ++--
 spacy/cli/debug_model.py | 4 +++-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 91f1464df..5989889d8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ requires = [
     "cymem>=2.0.2,<2.1.0",
     "preshed>=3.0.2,<3.1.0",
     "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.0.0a19,<8.0.0a30",
+    "thinc>=8.0.0a21,<8.0.0a30",
     "blis>=0.4.0,<0.5.0",
     "pytokenizations",
     "smart_open>=2.0.0,<3.0.0"
diff --git a/requirements.txt b/requirements.txt
index d0413825b..ee892b393 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 # Our libraries
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.0.0a19,<8.0.0a30
+thinc>=8.0.0a21,<8.0.0a30
 blis>=0.4.0,<0.5.0
 ml_datasets>=0.1.1
 murmurhash>=0.28.0,<1.1.0
diff --git a/setup.cfg b/setup.cfg
index d2cb7c92a..f8f4561bf 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -34,13 +34,13 @@ setup_requires =
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
     murmurhash>=0.28.0,<1.1.0
-    thinc>=8.0.0a19,<8.0.0a30
+    thinc>=8.0.0a21,<8.0.0a30
 install_requires =
     # Our libraries
     murmurhash>=0.28.0,<1.1.0
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
-    thinc>=8.0.0a19,<8.0.0a30
+    thinc>=8.0.0a21,<8.0.0a30
     blis>=0.4.0,<0.5.0
     wasabi>=0.7.1,<1.1.0
     srsly>=2.1.0,<3.0.0
diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 0aea38e5c..8c0726cc3 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -2,7 +2,7 @@ from typing import Dict, Any, Optional
 from pathlib import Path
 from wasabi import msg
 from thinc.api import require_gpu, fix_random_seed, set_dropout_rate, Adam, Config
-from thinc.api import Model
+from thinc.api import Model, DATA_VALIDATION
 import typer
 
 from ._util import Arg, Opt, debug_cli, show_validation_error, parse_config_overrides
@@ -90,7 +90,9 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
     # STEP 1: Initializing the model and printing again
     Y = _get_output(model.ops.xp)
     _set_output_dim(nO=Y.shape[-1], model=model)
+    DATA_VALIDATION.set(False)   # The output vector might differ from the official type of the output layer
     model.initialize(X=_get_docs(), Y=Y)
+    DATA_VALIDATION.set(True)
     if print_settings.get("print_after_init"):
         msg.info(f"After initialization:")
         _print_model(model, print_settings)

From cc2f58a1b06773bb8ee6aed5ec05f231737e1777 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Fri, 31 Jul 2020 16:49:42 +0200
Subject: [PATCH 4/7] use data_validation context manager

---
 spacy/cli/debug_model.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 8c0726cc3..84a496e12 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -2,7 +2,7 @@ from typing import Dict, Any, Optional
 from pathlib import Path
 from wasabi import msg
 from thinc.api import require_gpu, fix_random_seed, set_dropout_rate, Adam, Config
-from thinc.api import Model, DATA_VALIDATION
+from thinc.api import Model, data_validation
 import typer
 
 from ._util import Arg, Opt, debug_cli, show_validation_error, parse_config_overrides
@@ -90,9 +90,9 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
     # STEP 1: Initializing the model and printing again
     Y = _get_output(model.ops.xp)
     _set_output_dim(nO=Y.shape[-1], model=model)
-    DATA_VALIDATION.set(False)   # The output vector might differ from the official type of the output layer
-    model.initialize(X=_get_docs(), Y=Y)
-    DATA_VALIDATION.set(True)
+    # The output vector might differ from the official type of the output layer
+    with data_validation(False):
+        model.initialize(X=_get_docs(), Y=Y)
     if print_settings.get("print_after_init"):
         msg.info(f"After initialization:")
         _print_model(model, print_settings)

From 878327d38e1e3c7f2284c9aa442b0566d519b2e6 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Fri, 31 Jul 2020 17:36:32 +0200
Subject: [PATCH 5/7] printing final predictions by default to False

---
 spacy/cli/debug_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 84a496e12..bf565df0b 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -25,7 +25,7 @@ def debug_model_cli(
     P0: bool = Opt(False, "--print-step0", "-P0", help="Print model before training"),
     P1: bool = Opt(False, "--print-step1", "-P1", help="Print model after initialization"),
     P2: bool = Opt(False, "--print-step2", "-P2", help="Print model after training"),
-    P3: bool = Opt(True, "--print-step3", "-P3", help="Print final predictions"),
+    P3: bool = Opt(False, "--print-step3", "-P3", help="Print final predictions"),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
     # fmt: on
 ):

From 51ffc4a16674e69809b874f86daea50db70601af Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Fri, 31 Jul 2020 17:58:55 +0200
Subject: [PATCH 6/7] rename pipe_name to component

---
 spacy/cli/debug_model.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index bf565df0b..de5583b03 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -16,7 +16,7 @@ def debug_model_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
     config_path: Path = Arg(..., help="Path to config file", exists=True),
-    pipe_name: str = Arg(..., help="Name of the pipe of which the model should be analysed"),
+    component: str = Arg(..., help="Name of the pipeline component of which the model should be analysed"),
     layers: str = Opt("", "--layers", "-l", help="Comma-separated names of layer IDs to print"),
     dimensions: bool = Opt(False, "--dimensions", "-DIM", help="Show dimensions"),
     parameters: bool = Opt(False, "--parameters", "-PAR", help="Show parameters"),
@@ -61,12 +61,12 @@ def debug_model_cli(
         msg.info(f"Fixing random seed: {seed}")
         fix_random_seed(seed)
 
-    component = nlp.get_pipe(pipe_name)
-    if hasattr(component, "model"):
-        model = component.model
+    pipe = nlp.get_pipe(component)
+    if hasattr(pipe, "model"):
+        model = pipe.model
     else:
         msg.fail(
-            f"The component '{pipe_name}' does not specify an object that holds a Model.",
+            f"The component '{component}' does not specify an object that holds a Model.",
             exits=1,
         )
     debug_model(model, print_settings=print_settings)

From 9b719dfb1aa1226425fac3bc00b047857d522089 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Fri, 31 Jul 2020 18:06:48 +0200
Subject: [PATCH 7/7] use divider inbetween steps

---
 spacy/cli/debug_model.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index de5583b03..132d2a2f1 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -84,7 +84,7 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
     # STEP 0: Printing before training
     msg.info(f"Analysing model with ID {model.id}")
     if print_settings.get("print_before_training"):
-        msg.info(f"Before training:")
+        msg.divider(f"STEP 0 - before training")
         _print_model(model, print_settings)
 
     # STEP 1: Initializing the model and printing again
@@ -94,7 +94,7 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
     with data_validation(False):
         model.initialize(X=_get_docs(), Y=Y)
     if print_settings.get("print_after_init"):
-        msg.info(f"After initialization:")
+        msg.divider(f"STEP 1 - after initialization")
         _print_model(model, print_settings)
 
     # STEP 2: Updating the model and printing again
@@ -106,13 +106,14 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
         get_dX(dY)
         model.finish_update(optimizer)
     if print_settings.get("print_after_training"):
-        msg.info(f"After training:")
+        msg.divider(f"STEP 2 - after training")
         _print_model(model, print_settings)
 
     # STEP 3: the final prediction
     prediction = model.predict(_get_docs())
     if print_settings.get("print_prediction"):
-        msg.info(f"Prediction:", str(prediction))
+        msg.divider(f"STEP 3 - prediction")
+        msg.info(str(prediction))
 
 
 def get_gradient(model, Y):