mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Integrate with latest Thinc and config overrides
This commit is contained in:
parent
5cfc3edcaa
commit
a3667394b4
|
@ -6,7 +6,7 @@ requires = [
|
|||
"cymem>=2.0.2,<2.1.0",
|
||||
"preshed>=3.0.2,<3.1.0",
|
||||
"murmurhash>=0.28.0,<1.1.0",
|
||||
"thinc>=8.0.0a12,<8.0.0a20",
|
||||
"thinc>=8.0.0a14,<8.0.0a20",
|
||||
"blis>=0.4.0,<0.5.0",
|
||||
"pytokenizations"
|
||||
]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# Our libraries
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.0a12,<8.0.0a20
|
||||
thinc>=8.0.0a14,<8.0.0a20
|
||||
blis>=0.4.0,<0.5.0
|
||||
ml_datasets>=0.1.1
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
|
|
|
@ -34,13 +34,13 @@ setup_requires =
|
|||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
thinc>=8.0.0a12,<8.0.0a20
|
||||
thinc>=8.0.0a14,<8.0.0a20
|
||||
install_requires =
|
||||
# Our libraries
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.0a12,<8.0.0a20
|
||||
thinc>=8.0.0a14,<8.0.0a20
|
||||
blis>=0.4.0,<0.5.0
|
||||
wasabi>=0.7.0,<1.1.0
|
||||
srsly>=2.1.0,<3.0.0
|
||||
|
|
|
@ -81,7 +81,12 @@ def train(
|
|||
) -> None:
|
||||
msg.info(f"Loading config from: {config_path}")
|
||||
# Read the config first without creating objects, to get to the original nlp_config
|
||||
config = util.load_config(config_path, create_objects=False, schema=ConfigSchema)
|
||||
config = util.load_config(
|
||||
config_path,
|
||||
create_objects=False,
|
||||
schema=ConfigSchema,
|
||||
overrides=config_overrides,
|
||||
)
|
||||
use_gpu = config["training"]["use_gpu"]
|
||||
if use_gpu >= 0:
|
||||
msg.info(f"Using GPU: {use_gpu}")
|
||||
|
@ -94,7 +99,12 @@ def train(
|
|||
# It feels kind of weird to not have a default for this.
|
||||
use_pytorch_for_gpu_memory()
|
||||
nlp_config = config["nlp"]
|
||||
config = util.load_config(config_path, create_objects=True, schema=ConfigSchema)
|
||||
config = util.load_config(
|
||||
config_path,
|
||||
create_objects=True,
|
||||
schema=ConfigSchema,
|
||||
overrides=config_overrides,
|
||||
)
|
||||
training = config["training"]
|
||||
msg.info("Creating nlp from config")
|
||||
nlp = util.load_model_from_config(nlp_config)
|
||||
|
|
|
@ -334,6 +334,7 @@ def load_config(
|
|||
*,
|
||||
create_objects: bool = False,
|
||||
schema: Type[BaseModel] = EmptySchema,
|
||||
overrides: Dict[str, Any] = {},
|
||||
validate: bool = True,
|
||||
) -> Dict[str, Any]:
|
||||
"""Load a Thinc-formatted config file, optionally filling in objects where
|
||||
|
@ -343,15 +344,18 @@ def load_config(
|
|||
create_objects (bool): Whether to automatically create objects when the config
|
||||
references registry entries. Defaults to False.
|
||||
schema (BaseModel): Optional pydantic base schema to use for validation.
|
||||
overrides (Dict[str, Any]): Optional overrides to substitute in config.
|
||||
validate (bool): Whether to validate against schema.
|
||||
RETURNS (dict): The objects from the config file.
|
||||
"""
|
||||
config = thinc.config.Config().from_disk(path)
|
||||
kwargs = {"validate": validate, "schema": schema, "overrides": overrides}
|
||||
if create_objects:
|
||||
return registry.make_from_config(config, validate=validate, schema=schema)
|
||||
return registry.make_from_config(config, **kwargs)
|
||||
else:
|
||||
# Just fill config here so we can validate and fail early
|
||||
if validate and schema:
|
||||
registry.fill_config(config, validate=validate, schema=schema)
|
||||
registry.fill_config(config, **kwargs)
|
||||
return config
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user