Integrate with latest Thinc and config overrides

2025-07-31 10:29:46 +03:00 · 2020-07-10 19:47:05 +02:00 · 2020-07-10 19:47:05 +02:00 · a3667394b4
commit a3667394b4
parent 5cfc3edcaa
5 changed files with 22 additions and 8 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -6,7 +6,7 @@ requires = [
    "cymem>=2.0.2,<2.1.0",
    "preshed>=3.0.2,<3.1.0",
    "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.0.0a12,<8.0.0a20",
+    "thinc>=8.0.0a14,<8.0.0a20",
    "blis>=0.4.0,<0.5.0",
    "pytokenizations"
 ]
--- a/requirements.txt
+++ b/requirements.txt
@ -1,7 +1,7 @@
 # Our libraries
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.0.0a12,<8.0.0a20
+thinc>=8.0.0a14,<8.0.0a20
 blis>=0.4.0,<0.5.0
 ml_datasets>=0.1.1
 murmurhash>=0.28.0,<1.1.0
--- a/setup.cfg
+++ b/setup.cfg
@ -34,13 +34,13 @@ setup_requires =
    cymem>=2.0.2,<2.1.0
    preshed>=3.0.2,<3.1.0
    murmurhash>=0.28.0,<1.1.0
-    thinc>=8.0.0a12,<8.0.0a20
+    thinc>=8.0.0a14,<8.0.0a20
 install_requires =
    # Our libraries
    murmurhash>=0.28.0,<1.1.0
    cymem>=2.0.2,<2.1.0
    preshed>=3.0.2,<3.1.0
-    thinc>=8.0.0a12,<8.0.0a20
+    thinc>=8.0.0a14,<8.0.0a20
    blis>=0.4.0,<0.5.0
    wasabi>=0.7.0,<1.1.0
    srsly>=2.1.0,<3.0.0
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -81,7 +81,12 @@ def train(
 ) -> None:
    msg.info(f"Loading config from: {config_path}")
    # Read the config first without creating objects, to get to the original nlp_config
-    config = util.load_config(config_path, create_objects=False, schema=ConfigSchema)
+    config = util.load_config(
+        config_path,
+        create_objects=False,
+        schema=ConfigSchema,
+        overrides=config_overrides,
+    )
    use_gpu = config["training"]["use_gpu"]
    if use_gpu >= 0:
        msg.info(f"Using GPU: {use_gpu}")
@ -94,7 +99,12 @@ def train(
        # It feels kind of weird to not have a default for this.
        use_pytorch_for_gpu_memory()
    nlp_config = config["nlp"]
-    config = util.load_config(config_path, create_objects=True, schema=ConfigSchema)
+    config = util.load_config(
+        config_path,
+        create_objects=True,
+        schema=ConfigSchema,
+        overrides=config_overrides,
+    )
    training = config["training"]
    msg.info("Creating nlp from config")
    nlp = util.load_model_from_config(nlp_config)
--- a/spacy/util.py
+++ b/spacy/util.py
@ -334,6 +334,7 @@ def load_config(
    *,
    create_objects: bool = False,
    schema: Type[BaseModel] = EmptySchema,
+    overrides: Dict[str, Any] = {},
    validate: bool = True,
 ) -> Dict[str, Any]:
    """Load a Thinc-formatted config file, optionally filling in objects where
@ -343,15 +344,18 @@ def load_config(
    create_objects (bool): Whether to automatically create objects when the config
        references registry entries. Defaults to False.
    schema (BaseModel): Optional pydantic base schema to use for validation.
+    overrides (Dict[str, Any]): Optional overrides to substitute in config.
+    validate (bool): Whether to validate against schema.
    RETURNS (dict): The objects from the config file.
    """
    config = thinc.config.Config().from_disk(path)
+    kwargs = {"validate": validate, "schema": schema, "overrides": overrides}
    if create_objects:
-        return registry.make_from_config(config, validate=validate, schema=schema)
+        return registry.make_from_config(config, **kwargs)
    else:
        # Just fill config here so we can validate and fail early
        if validate and schema:
-            registry.fill_config(config, validate=validate, schema=schema)
+            registry.fill_config(config, **kwargs)
        return config