mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Merge branch 'develop' into fix/default-corpus-values
This commit is contained in:
commit
df8dd91b6f
|
@ -49,35 +49,11 @@ def train_cli(
|
||||||
config = util.load_config(config_path, overrides=overrides, interpolate=False)
|
config = util.load_config(config_path, overrides=overrides, interpolate=False)
|
||||||
msg.divider("Initializing pipeline")
|
msg.divider("Initializing pipeline")
|
||||||
with show_validation_error(config_path, hint_fill=False):
|
with show_validation_error(config_path, hint_fill=False):
|
||||||
nlp = init_pipeline(config, output_path, use_gpu=use_gpu)
|
nlp = init_nlp(config, use_gpu=use_gpu)
|
||||||
msg.divider("Training pipeline")
|
msg.divider("Training pipeline")
|
||||||
train(nlp, output_path, use_gpu=use_gpu, silent=False)
|
train(nlp, output_path, use_gpu=use_gpu, silent=False)
|
||||||
|
|
||||||
|
|
||||||
def init_pipeline(
|
|
||||||
config: Config, output_path: Optional[Path], *, use_gpu: int = -1
|
|
||||||
) -> Language:
|
|
||||||
init_kwargs = {"use_gpu": use_gpu}
|
|
||||||
if output_path is not None:
|
|
||||||
init_path = output_path / "model-initial"
|
|
||||||
if not init_path.exists():
|
|
||||||
msg.info(f"Initializing the pipeline in {init_path}")
|
|
||||||
nlp = init_nlp(config, **init_kwargs)
|
|
||||||
nlp.to_disk(init_path)
|
|
||||||
msg.good(f"Saved initialized pipeline to {init_path}")
|
|
||||||
else:
|
|
||||||
nlp = util.load_model(init_path)
|
|
||||||
if must_reinitialize(config, nlp.config):
|
|
||||||
msg.warn("Config has changed: need to re-initialize pipeline")
|
|
||||||
nlp = init_nlp(config, **init_kwargs)
|
|
||||||
nlp.to_disk(init_path)
|
|
||||||
msg.good(f"Re-initialized pipeline in {init_path}")
|
|
||||||
else:
|
|
||||||
msg.good(f"Loaded initialized pipeline from {init_path}")
|
|
||||||
return nlp
|
|
||||||
return init_nlp(config, **init_kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def verify_cli_args(config_path: Path, output_path: Optional[Path] = None) -> None:
|
def verify_cli_args(config_path: Path, output_path: Optional[Path] = None) -> None:
|
||||||
# Make sure all files and paths exists if they are needed
|
# Make sure all files and paths exists if they are needed
|
||||||
if not config_path or not config_path.exists():
|
if not config_path or not config_path.exists():
|
||||||
|
|
|
@ -28,6 +28,7 @@ def create_docbin_reader(
|
||||||
) -> Callable[["Language"], Iterable[Example]]:
|
) -> Callable[["Language"], Iterable[Example]]:
|
||||||
if path is None:
|
if path is None:
|
||||||
raise ValueError(Errors.E913)
|
raise ValueError(Errors.E913)
|
||||||
|
util.logger.debug(f"Loading corpus from path: {path}")
|
||||||
return Corpus(
|
return Corpus(
|
||||||
path,
|
path,
|
||||||
gold_preproc=gold_preproc,
|
gold_preproc=gold_preproc,
|
||||||
|
|
|
@ -94,6 +94,7 @@ def init_vocab(
|
||||||
if vectors is not None:
|
if vectors is not None:
|
||||||
load_vectors_into_model(nlp, vectors)
|
load_vectors_into_model(nlp, vectors)
|
||||||
logger.info(f"Added vectors: {vectors}")
|
logger.info(f"Added vectors: {vectors}")
|
||||||
|
logger.info("Finished initializing nlp object")
|
||||||
|
|
||||||
|
|
||||||
def load_vectors_into_model(
|
def load_vectors_into_model(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user