mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-26 20:03:40 +03:00
Don't support init path for now
This commit is contained in:
parent
f49288ab81
commit
c22ecc66bb
|
@ -20,7 +20,9 @@ DEFAULT_OOV_PROB = -20
|
||||||
|
|
||||||
|
|
||||||
@init_cli.command(
|
@init_cli.command(
|
||||||
"nlp", context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
|
"nlp",
|
||||||
|
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
|
||||||
|
hidden=True,
|
||||||
)
|
)
|
||||||
def init_pipeline_cli(
|
def init_pipeline_cli(
|
||||||
# fmt: off
|
# fmt: off
|
||||||
|
@ -38,7 +40,6 @@ def init_pipeline_cli(
|
||||||
config = util.load_config(config_path, overrides=overrides)
|
config = util.load_config(config_path, overrides=overrides)
|
||||||
nlp = init_pipeline(config)
|
nlp = init_pipeline(config)
|
||||||
nlp.to_disk(output_path)
|
nlp.to_disk(output_path)
|
||||||
# TODO: add more instructions
|
|
||||||
msg.good(f"Saved initialized pipeline to {output_path}")
|
msg.good(f"Saved initialized pipeline to {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,6 @@ def train_cli(
|
||||||
config_path: Path = Arg(..., help="Path to config file", exists=True),
|
config_path: Path = Arg(..., help="Path to config file", exists=True),
|
||||||
output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"),
|
output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"),
|
||||||
code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
|
code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
|
||||||
init_path: Optional[Path] = Opt(None, "--init", "-i", help="Path to already initialized pipeline directory, e.g. created with 'spacy init pipeline' (will speed up training)"),
|
|
||||||
verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
|
verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
|
||||||
use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
|
use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
|
||||||
# fmt: on
|
# fmt: on
|
||||||
|
@ -60,46 +59,34 @@ def train_cli(
|
||||||
msg.info("Using CPU")
|
msg.info("Using CPU")
|
||||||
config = util.load_config(config_path, overrides=overrides, interpolate=False)
|
config = util.load_config(config_path, overrides=overrides, interpolate=False)
|
||||||
msg.divider("Initializing pipeline")
|
msg.divider("Initializing pipeline")
|
||||||
nlp = init_nlp(config, output_path, init_path)
|
nlp = init_nlp(config, output_path)
|
||||||
msg.divider("Training pipeline")
|
msg.divider("Training pipeline")
|
||||||
train(nlp, output_path, use_gpu=use_gpu)
|
train(nlp, output_path, use_gpu=use_gpu)
|
||||||
|
|
||||||
|
|
||||||
def init_nlp(
|
def init_nlp(config: Config, output_path: Optional[Path]) -> Language:
|
||||||
config: Config, output_path: Optional[Path], init_path: Optional[Path]
|
|
||||||
) -> None:
|
|
||||||
if init_path is not None:
|
|
||||||
nlp = util.load_model(init_path)
|
|
||||||
if must_reinitialize(config, nlp.config):
|
|
||||||
msg.fail(
|
|
||||||
f"Config has changed: can't use initialized pipeline from "
|
|
||||||
f"{init_path}. Please re-run 'spacy init nlp'.",
|
|
||||||
exits=1,
|
|
||||||
)
|
|
||||||
msg.good(f"Loaded initialized pipeline from {init_path}")
|
|
||||||
return nlp
|
|
||||||
if output_path is not None:
|
if output_path is not None:
|
||||||
output_init_path = output_path / "model-initial"
|
init_path = output_path / "model-initial"
|
||||||
if not output_init_path.exists():
|
if not init_path.exists():
|
||||||
msg.info(f"Initializing the pipeline in {output_init_path}")
|
msg.info(f"Initializing the pipeline in {init_path}")
|
||||||
nlp = init_pipeline(config)
|
nlp = init_pipeline(config)
|
||||||
nlp.to_disk(output_init_path)
|
nlp.to_disk(init_path)
|
||||||
msg.good(f"Saved initialized pipeline to {output_init_path}")
|
msg.good(f"Saved initialized pipeline to {init_path}")
|
||||||
else:
|
else:
|
||||||
nlp = util.load_model(output_init_path)
|
nlp = util.load_model(init_path)
|
||||||
if must_reinitialize(config, nlp.config):
|
if must_reinitialize(config, nlp.config):
|
||||||
msg.warn("Config has changed: need to re-initialize pipeline")
|
msg.warn("Config has changed: need to re-initialize pipeline")
|
||||||
nlp = init_pipeline(config)
|
nlp = init_pipeline(config)
|
||||||
nlp.to_disk(output_init_path)
|
nlp.to_disk(init_path)
|
||||||
msg.good(f"Re-initialized pipeline in {output_init_path}")
|
msg.good(f"Re-initialized pipeline in {init_path}")
|
||||||
else:
|
else:
|
||||||
msg.good(f"Loaded initialized pipeline from {output_init_path}")
|
msg.good(f"Loaded initialized pipeline from {init_path}")
|
||||||
return nlp
|
return nlp
|
||||||
msg.warn(
|
msg.warn(
|
||||||
"Not saving initialized model: no output directory specified. "
|
"Not saving initialized model: no output directory specified. "
|
||||||
"To speed up training, spaCy can save the initialized nlp object with "
|
"To speed up training, spaCy can save the initialized nlp object with "
|
||||||
"the vocabulary, vectors and label scheme. To take advantage of this, "
|
"the vocabulary, vectors and label scheme. To take advantage of this, "
|
||||||
"provide an output directory or use the 'spacy init nlp' command."
|
"provide an output directory."
|
||||||
)
|
)
|
||||||
return init_pipeline(config)
|
return init_pipeline(config)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user