mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 17:54:39 +03:00
Integrate with latest Thinc and config overrides
This commit is contained in:
parent
5cfc3edcaa
commit
a3667394b4
|
@ -6,7 +6,7 @@ requires = [
|
||||||
"cymem>=2.0.2,<2.1.0",
|
"cymem>=2.0.2,<2.1.0",
|
||||||
"preshed>=3.0.2,<3.1.0",
|
"preshed>=3.0.2,<3.1.0",
|
||||||
"murmurhash>=0.28.0,<1.1.0",
|
"murmurhash>=0.28.0,<1.1.0",
|
||||||
"thinc>=8.0.0a12,<8.0.0a20",
|
"thinc>=8.0.0a14,<8.0.0a20",
|
||||||
"blis>=0.4.0,<0.5.0",
|
"blis>=0.4.0,<0.5.0",
|
||||||
"pytokenizations"
|
"pytokenizations"
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# Our libraries
|
# Our libraries
|
||||||
cymem>=2.0.2,<2.1.0
|
cymem>=2.0.2,<2.1.0
|
||||||
preshed>=3.0.2,<3.1.0
|
preshed>=3.0.2,<3.1.0
|
||||||
thinc>=8.0.0a12,<8.0.0a20
|
thinc>=8.0.0a14,<8.0.0a20
|
||||||
blis>=0.4.0,<0.5.0
|
blis>=0.4.0,<0.5.0
|
||||||
ml_datasets>=0.1.1
|
ml_datasets>=0.1.1
|
||||||
murmurhash>=0.28.0,<1.1.0
|
murmurhash>=0.28.0,<1.1.0
|
||||||
|
|
|
@ -34,13 +34,13 @@ setup_requires =
|
||||||
cymem>=2.0.2,<2.1.0
|
cymem>=2.0.2,<2.1.0
|
||||||
preshed>=3.0.2,<3.1.0
|
preshed>=3.0.2,<3.1.0
|
||||||
murmurhash>=0.28.0,<1.1.0
|
murmurhash>=0.28.0,<1.1.0
|
||||||
thinc>=8.0.0a12,<8.0.0a20
|
thinc>=8.0.0a14,<8.0.0a20
|
||||||
install_requires =
|
install_requires =
|
||||||
# Our libraries
|
# Our libraries
|
||||||
murmurhash>=0.28.0,<1.1.0
|
murmurhash>=0.28.0,<1.1.0
|
||||||
cymem>=2.0.2,<2.1.0
|
cymem>=2.0.2,<2.1.0
|
||||||
preshed>=3.0.2,<3.1.0
|
preshed>=3.0.2,<3.1.0
|
||||||
thinc>=8.0.0a12,<8.0.0a20
|
thinc>=8.0.0a14,<8.0.0a20
|
||||||
blis>=0.4.0,<0.5.0
|
blis>=0.4.0,<0.5.0
|
||||||
wasabi>=0.7.0,<1.1.0
|
wasabi>=0.7.0,<1.1.0
|
||||||
srsly>=2.1.0,<3.0.0
|
srsly>=2.1.0,<3.0.0
|
||||||
|
|
|
@ -81,7 +81,12 @@ def train(
|
||||||
) -> None:
|
) -> None:
|
||||||
msg.info(f"Loading config from: {config_path}")
|
msg.info(f"Loading config from: {config_path}")
|
||||||
# Read the config first without creating objects, to get to the original nlp_config
|
# Read the config first without creating objects, to get to the original nlp_config
|
||||||
config = util.load_config(config_path, create_objects=False, schema=ConfigSchema)
|
config = util.load_config(
|
||||||
|
config_path,
|
||||||
|
create_objects=False,
|
||||||
|
schema=ConfigSchema,
|
||||||
|
overrides=config_overrides,
|
||||||
|
)
|
||||||
use_gpu = config["training"]["use_gpu"]
|
use_gpu = config["training"]["use_gpu"]
|
||||||
if use_gpu >= 0:
|
if use_gpu >= 0:
|
||||||
msg.info(f"Using GPU: {use_gpu}")
|
msg.info(f"Using GPU: {use_gpu}")
|
||||||
|
@ -94,7 +99,12 @@ def train(
|
||||||
# It feels kind of weird to not have a default for this.
|
# It feels kind of weird to not have a default for this.
|
||||||
use_pytorch_for_gpu_memory()
|
use_pytorch_for_gpu_memory()
|
||||||
nlp_config = config["nlp"]
|
nlp_config = config["nlp"]
|
||||||
config = util.load_config(config_path, create_objects=True, schema=ConfigSchema)
|
config = util.load_config(
|
||||||
|
config_path,
|
||||||
|
create_objects=True,
|
||||||
|
schema=ConfigSchema,
|
||||||
|
overrides=config_overrides,
|
||||||
|
)
|
||||||
training = config["training"]
|
training = config["training"]
|
||||||
msg.info("Creating nlp from config")
|
msg.info("Creating nlp from config")
|
||||||
nlp = util.load_model_from_config(nlp_config)
|
nlp = util.load_model_from_config(nlp_config)
|
||||||
|
|
|
@ -334,6 +334,7 @@ def load_config(
|
||||||
*,
|
*,
|
||||||
create_objects: bool = False,
|
create_objects: bool = False,
|
||||||
schema: Type[BaseModel] = EmptySchema,
|
schema: Type[BaseModel] = EmptySchema,
|
||||||
|
overrides: Dict[str, Any] = {},
|
||||||
validate: bool = True,
|
validate: bool = True,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Load a Thinc-formatted config file, optionally filling in objects where
|
"""Load a Thinc-formatted config file, optionally filling in objects where
|
||||||
|
@ -343,15 +344,18 @@ def load_config(
|
||||||
create_objects (bool): Whether to automatically create objects when the config
|
create_objects (bool): Whether to automatically create objects when the config
|
||||||
references registry entries. Defaults to False.
|
references registry entries. Defaults to False.
|
||||||
schema (BaseModel): Optional pydantic base schema to use for validation.
|
schema (BaseModel): Optional pydantic base schema to use for validation.
|
||||||
|
overrides (Dict[str, Any]): Optional overrides to substitute in config.
|
||||||
|
validate (bool): Whether to validate against schema.
|
||||||
RETURNS (dict): The objects from the config file.
|
RETURNS (dict): The objects from the config file.
|
||||||
"""
|
"""
|
||||||
config = thinc.config.Config().from_disk(path)
|
config = thinc.config.Config().from_disk(path)
|
||||||
|
kwargs = {"validate": validate, "schema": schema, "overrides": overrides}
|
||||||
if create_objects:
|
if create_objects:
|
||||||
return registry.make_from_config(config, validate=validate, schema=schema)
|
return registry.make_from_config(config, **kwargs)
|
||||||
else:
|
else:
|
||||||
# Just fill config here so we can validate and fail early
|
# Just fill config here so we can validate and fail early
|
||||||
if validate and schema:
|
if validate and schema:
|
||||||
registry.fill_config(config, validate=validate, schema=schema)
|
registry.fill_config(config, **kwargs)
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user