From a3667394b4a0f6b15ad59ecc403f31333c28705e Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 10 Jul 2020 19:47:05 +0200
Subject: [PATCH] Integrate with latest Thinc and config overrides

---
 pyproject.toml     |  2 +-
 requirements.txt   |  2 +-
 setup.cfg          |  4 ++--
 spacy/cli/train.py | 14 ++++++++++++--
 spacy/util.py      |  8 ++++++--
 5 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2c020ef66..ec69e3f84 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ requires = [
     "cymem>=2.0.2,<2.1.0",
     "preshed>=3.0.2,<3.1.0",
     "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.0.0a12,<8.0.0a20",
+    "thinc>=8.0.0a14,<8.0.0a20",
     "blis>=0.4.0,<0.5.0",
     "pytokenizations"
 ]
diff --git a/requirements.txt b/requirements.txt
index 3e1329de9..7bac27023 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 # Our libraries
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.0.0a12,<8.0.0a20
+thinc>=8.0.0a14,<8.0.0a20
 blis>=0.4.0,<0.5.0
 ml_datasets>=0.1.1
 murmurhash>=0.28.0,<1.1.0
diff --git a/setup.cfg b/setup.cfg
index 9793bbb08..91d573536 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -34,13 +34,13 @@ setup_requires =
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
     murmurhash>=0.28.0,<1.1.0
-    thinc>=8.0.0a12,<8.0.0a20
+    thinc>=8.0.0a14,<8.0.0a20
 install_requires =
     # Our libraries
     murmurhash>=0.28.0,<1.1.0
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
-    thinc>=8.0.0a12,<8.0.0a20
+    thinc>=8.0.0a14,<8.0.0a20
     blis>=0.4.0,<0.5.0
     wasabi>=0.7.0,<1.1.0
     srsly>=2.1.0,<3.0.0
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 32d1a456e..594a552b6 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -81,7 +81,12 @@ def train(
 ) -> None:
     msg.info(f"Loading config from: {config_path}")
     # Read the config first without creating objects, to get to the original nlp_config
-    config = util.load_config(config_path, create_objects=False, schema=ConfigSchema)
+    config = util.load_config(
+        config_path,
+        create_objects=False,
+        schema=ConfigSchema,
+        overrides=config_overrides,
+    )
     use_gpu = config["training"]["use_gpu"]
     if use_gpu >= 0:
         msg.info(f"Using GPU: {use_gpu}")
@@ -94,7 +99,12 @@ def train(
         # It feels kind of weird to not have a default for this.
         use_pytorch_for_gpu_memory()
     nlp_config = config["nlp"]
-    config = util.load_config(config_path, create_objects=True, schema=ConfigSchema)
+    config = util.load_config(
+        config_path,
+        create_objects=True,
+        schema=ConfigSchema,
+        overrides=config_overrides,
+    )
     training = config["training"]
     msg.info("Creating nlp from config")
     nlp = util.load_model_from_config(nlp_config)
diff --git a/spacy/util.py b/spacy/util.py
index c91c2af25..ccfc1c03a 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -334,6 +334,7 @@ def load_config(
     *,
     create_objects: bool = False,
     schema: Type[BaseModel] = EmptySchema,
+    overrides: Dict[str, Any] = {},
     validate: bool = True,
 ) -> Dict[str, Any]:
     """Load a Thinc-formatted config file, optionally filling in objects where
@@ -343,15 +344,18 @@ def load_config(
     create_objects (bool): Whether to automatically create objects when the config
         references registry entries. Defaults to False.
     schema (BaseModel): Optional pydantic base schema to use for validation.
+    overrides (Dict[str, Any]): Optional overrides to substitute in config.
+    validate (bool): Whether to validate against schema.
     RETURNS (dict): The objects from the config file.
     """
     config = thinc.config.Config().from_disk(path)
+    kwargs = {"validate": validate, "schema": schema, "overrides": overrides}
     if create_objects:
-        return registry.make_from_config(config, validate=validate, schema=schema)
+        return registry.make_from_config(config, **kwargs)
     else:
         # Just fill config here so we can validate and fail early
         if validate and schema:
-            registry.fill_config(config, validate=validate, schema=schema)
+            registry.fill_config(config, **kwargs)
         return config