From a3667394b4a0f6b15ad59ecc403f31333c28705e Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 10 Jul 2020 19:47:05 +0200 Subject: [PATCH] Integrate with latest Thinc and config overrides --- pyproject.toml | 2 +- requirements.txt | 2 +- setup.cfg | 4 ++-- spacy/cli/train.py | 14 ++++++++++++-- spacy/util.py | 8 ++++++-- 5 files changed, 22 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2c020ef66..ec69e3f84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ "cymem>=2.0.2,<2.1.0", "preshed>=3.0.2,<3.1.0", "murmurhash>=0.28.0,<1.1.0", - "thinc>=8.0.0a12,<8.0.0a20", + "thinc>=8.0.0a14,<8.0.0a20", "blis>=0.4.0,<0.5.0", "pytokenizations" ] diff --git a/requirements.txt b/requirements.txt index 3e1329de9..7bac27023 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # Our libraries cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 -thinc>=8.0.0a12,<8.0.0a20 +thinc>=8.0.0a14,<8.0.0a20 blis>=0.4.0,<0.5.0 ml_datasets>=0.1.1 murmurhash>=0.28.0,<1.1.0 diff --git a/setup.cfg b/setup.cfg index 9793bbb08..91d573536 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,13 +34,13 @@ setup_requires = cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 murmurhash>=0.28.0,<1.1.0 - thinc>=8.0.0a12,<8.0.0a20 + thinc>=8.0.0a14,<8.0.0a20 install_requires = # Our libraries murmurhash>=0.28.0,<1.1.0 cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 - thinc>=8.0.0a12,<8.0.0a20 + thinc>=8.0.0a14,<8.0.0a20 blis>=0.4.0,<0.5.0 wasabi>=0.7.0,<1.1.0 srsly>=2.1.0,<3.0.0 diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 32d1a456e..594a552b6 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -81,7 +81,12 @@ def train( ) -> None: msg.info(f"Loading config from: {config_path}") # Read the config first without creating objects, to get to the original nlp_config - config = util.load_config(config_path, create_objects=False, schema=ConfigSchema) + config = util.load_config( + config_path, + create_objects=False, + schema=ConfigSchema, + overrides=config_overrides, + ) use_gpu = config["training"]["use_gpu"] if use_gpu >= 0: msg.info(f"Using GPU: {use_gpu}") @@ -94,7 +99,12 @@ def train( # It feels kind of weird to not have a default for this. use_pytorch_for_gpu_memory() nlp_config = config["nlp"] - config = util.load_config(config_path, create_objects=True, schema=ConfigSchema) + config = util.load_config( + config_path, + create_objects=True, + schema=ConfigSchema, + overrides=config_overrides, + ) training = config["training"] msg.info("Creating nlp from config") nlp = util.load_model_from_config(nlp_config) diff --git a/spacy/util.py b/spacy/util.py index c91c2af25..ccfc1c03a 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -334,6 +334,7 @@ def load_config( *, create_objects: bool = False, schema: Type[BaseModel] = EmptySchema, + overrides: Dict[str, Any] = {}, validate: bool = True, ) -> Dict[str, Any]: """Load a Thinc-formatted config file, optionally filling in objects where @@ -343,15 +344,18 @@ def load_config( create_objects (bool): Whether to automatically create objects when the config references registry entries. Defaults to False. schema (BaseModel): Optional pydantic base schema to use for validation. + overrides (Dict[str, Any]): Optional overrides to substitute in config. + validate (bool): Whether to validate against schema. RETURNS (dict): The objects from the config file. """ config = thinc.config.Config().from_disk(path) + kwargs = {"validate": validate, "schema": schema, "overrides": overrides} if create_objects: - return registry.make_from_config(config, validate=validate, schema=schema) + return registry.make_from_config(config, **kwargs) else: # Just fill config here so we can validate and fail early if validate and schema: - registry.fill_config(config, validate=validate, schema=schema) + registry.fill_config(config, **kwargs) return config