Integrate with latest Thinc and config overrides

This commit is contained in:
Ines Montani 2020-07-10 19:47:05 +02:00
parent 5cfc3edcaa
commit a3667394b4
5 changed files with 22 additions and 8 deletions

View File

@ -6,7 +6,7 @@ requires = [
"cymem>=2.0.2,<2.1.0", "cymem>=2.0.2,<2.1.0",
"preshed>=3.0.2,<3.1.0", "preshed>=3.0.2,<3.1.0",
"murmurhash>=0.28.0,<1.1.0", "murmurhash>=0.28.0,<1.1.0",
"thinc>=8.0.0a12,<8.0.0a20", "thinc>=8.0.0a14,<8.0.0a20",
"blis>=0.4.0,<0.5.0", "blis>=0.4.0,<0.5.0",
"pytokenizations" "pytokenizations"
] ]

View File

@ -1,7 +1,7 @@
# Our libraries # Our libraries
cymem>=2.0.2,<2.1.0 cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0 preshed>=3.0.2,<3.1.0
thinc>=8.0.0a12,<8.0.0a20 thinc>=8.0.0a14,<8.0.0a20
blis>=0.4.0,<0.5.0 blis>=0.4.0,<0.5.0
ml_datasets>=0.1.1 ml_datasets>=0.1.1
murmurhash>=0.28.0,<1.1.0 murmurhash>=0.28.0,<1.1.0

View File

@ -34,13 +34,13 @@ setup_requires =
cymem>=2.0.2,<2.1.0 cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0 preshed>=3.0.2,<3.1.0
murmurhash>=0.28.0,<1.1.0 murmurhash>=0.28.0,<1.1.0
thinc>=8.0.0a12,<8.0.0a20 thinc>=8.0.0a14,<8.0.0a20
install_requires = install_requires =
# Our libraries # Our libraries
murmurhash>=0.28.0,<1.1.0 murmurhash>=0.28.0,<1.1.0
cymem>=2.0.2,<2.1.0 cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0 preshed>=3.0.2,<3.1.0
thinc>=8.0.0a12,<8.0.0a20 thinc>=8.0.0a14,<8.0.0a20
blis>=0.4.0,<0.5.0 blis>=0.4.0,<0.5.0
wasabi>=0.7.0,<1.1.0 wasabi>=0.7.0,<1.1.0
srsly>=2.1.0,<3.0.0 srsly>=2.1.0,<3.0.0

View File

@ -81,7 +81,12 @@ def train(
) -> None: ) -> None:
msg.info(f"Loading config from: {config_path}") msg.info(f"Loading config from: {config_path}")
# Read the config first without creating objects, to get to the original nlp_config # Read the config first without creating objects, to get to the original nlp_config
config = util.load_config(config_path, create_objects=False, schema=ConfigSchema) config = util.load_config(
config_path,
create_objects=False,
schema=ConfigSchema,
overrides=config_overrides,
)
use_gpu = config["training"]["use_gpu"] use_gpu = config["training"]["use_gpu"]
if use_gpu >= 0: if use_gpu >= 0:
msg.info(f"Using GPU: {use_gpu}") msg.info(f"Using GPU: {use_gpu}")
@ -94,7 +99,12 @@ def train(
# It feels kind of weird to not have a default for this. # It feels kind of weird to not have a default for this.
use_pytorch_for_gpu_memory() use_pytorch_for_gpu_memory()
nlp_config = config["nlp"] nlp_config = config["nlp"]
config = util.load_config(config_path, create_objects=True, schema=ConfigSchema) config = util.load_config(
config_path,
create_objects=True,
schema=ConfigSchema,
overrides=config_overrides,
)
training = config["training"] training = config["training"]
msg.info("Creating nlp from config") msg.info("Creating nlp from config")
nlp = util.load_model_from_config(nlp_config) nlp = util.load_model_from_config(nlp_config)

View File

@ -334,6 +334,7 @@ def load_config(
*, *,
create_objects: bool = False, create_objects: bool = False,
schema: Type[BaseModel] = EmptySchema, schema: Type[BaseModel] = EmptySchema,
overrides: Dict[str, Any] = {},
validate: bool = True, validate: bool = True,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Load a Thinc-formatted config file, optionally filling in objects where """Load a Thinc-formatted config file, optionally filling in objects where
@ -343,15 +344,18 @@ def load_config(
create_objects (bool): Whether to automatically create objects when the config create_objects (bool): Whether to automatically create objects when the config
references registry entries. Defaults to False. references registry entries. Defaults to False.
schema (BaseModel): Optional pydantic base schema to use for validation. schema (BaseModel): Optional pydantic base schema to use for validation.
overrides (Dict[str, Any]): Optional overrides to substitute in config.
validate (bool): Whether to validate against schema.
RETURNS (dict): The objects from the config file. RETURNS (dict): The objects from the config file.
""" """
config = thinc.config.Config().from_disk(path) config = thinc.config.Config().from_disk(path)
kwargs = {"validate": validate, "schema": schema, "overrides": overrides}
if create_objects: if create_objects:
return registry.make_from_config(config, validate=validate, schema=schema) return registry.make_from_config(config, **kwargs)
else: else:
# Just fill config here so we can validate and fail early # Just fill config here so we can validate and fail early
if validate and schema: if validate and schema:
registry.fill_config(config, validate=validate, schema=schema) registry.fill_config(config, **kwargs)
return config return config