Integrate with latest Thinc and config overrides

This commit is contained in:
Ines Montani 2020-07-10 19:47:05 +02:00
parent 5cfc3edcaa
commit a3667394b4
5 changed files with 22 additions and 8 deletions

View File

@ -6,7 +6,7 @@ requires = [
"cymem>=2.0.2,<2.1.0",
"preshed>=3.0.2,<3.1.0",
"murmurhash>=0.28.0,<1.1.0",
"thinc>=8.0.0a12,<8.0.0a20",
"thinc>=8.0.0a14,<8.0.0a20",
"blis>=0.4.0,<0.5.0",
"pytokenizations"
]

View File

@ -1,7 +1,7 @@
# Our libraries
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc>=8.0.0a12,<8.0.0a20
thinc>=8.0.0a14,<8.0.0a20
blis>=0.4.0,<0.5.0
ml_datasets>=0.1.1
murmurhash>=0.28.0,<1.1.0

View File

@ -34,13 +34,13 @@ setup_requires =
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
murmurhash>=0.28.0,<1.1.0
thinc>=8.0.0a12,<8.0.0a20
thinc>=8.0.0a14,<8.0.0a20
install_requires =
# Our libraries
murmurhash>=0.28.0,<1.1.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc>=8.0.0a12,<8.0.0a20
thinc>=8.0.0a14,<8.0.0a20
blis>=0.4.0,<0.5.0
wasabi>=0.7.0,<1.1.0
srsly>=2.1.0,<3.0.0

View File

@ -81,7 +81,12 @@ def train(
) -> None:
msg.info(f"Loading config from: {config_path}")
# Read the config first without creating objects, to get to the original nlp_config
config = util.load_config(config_path, create_objects=False, schema=ConfigSchema)
config = util.load_config(
config_path,
create_objects=False,
schema=ConfigSchema,
overrides=config_overrides,
)
use_gpu = config["training"]["use_gpu"]
if use_gpu >= 0:
msg.info(f"Using GPU: {use_gpu}")
@ -94,7 +99,12 @@ def train(
# It feels kind of weird to not have a default for this.
use_pytorch_for_gpu_memory()
nlp_config = config["nlp"]
config = util.load_config(config_path, create_objects=True, schema=ConfigSchema)
config = util.load_config(
config_path,
create_objects=True,
schema=ConfigSchema,
overrides=config_overrides,
)
training = config["training"]
msg.info("Creating nlp from config")
nlp = util.load_model_from_config(nlp_config)

View File

@ -334,6 +334,7 @@ def load_config(
*,
create_objects: bool = False,
schema: Type[BaseModel] = EmptySchema,
overrides: Dict[str, Any] = {},
validate: bool = True,
) -> Dict[str, Any]:
"""Load a Thinc-formatted config file, optionally filling in objects where
@ -343,15 +344,18 @@ def load_config(
create_objects (bool): Whether to automatically create objects when the config
references registry entries. Defaults to False.
schema (BaseModel): Optional pydantic base schema to use for validation.
overrides (Dict[str, Any]): Optional overrides to substitute in config.
validate (bool): Whether to validate against schema.
RETURNS (dict): The objects from the config file.
"""
config = thinc.config.Config().from_disk(path)
kwargs = {"validate": validate, "schema": schema, "overrides": overrides}
if create_objects:
return registry.make_from_config(config, validate=validate, schema=schema)
return registry.make_from_config(config, **kwargs)
else:
# Just fill config here so we can validate and fail early
if validate and schema:
registry.fill_config(config, validate=validate, schema=schema)
registry.fill_config(config, **kwargs)
return config