Merge branch 'develop' into fix/default-corpus-values

This commit is contained in:
Ines Montani 2020-09-29 22:55:39 +02:00
commit df8dd91b6f
3 changed files with 3 additions and 25 deletions

View File

@ -49,35 +49,11 @@ def train_cli(
config = util.load_config(config_path, overrides=overrides, interpolate=False) config = util.load_config(config_path, overrides=overrides, interpolate=False)
msg.divider("Initializing pipeline") msg.divider("Initializing pipeline")
with show_validation_error(config_path, hint_fill=False): with show_validation_error(config_path, hint_fill=False):
nlp = init_pipeline(config, output_path, use_gpu=use_gpu) nlp = init_nlp(config, use_gpu=use_gpu)
msg.divider("Training pipeline") msg.divider("Training pipeline")
train(nlp, output_path, use_gpu=use_gpu, silent=False) train(nlp, output_path, use_gpu=use_gpu, silent=False)
def init_pipeline(
config: Config, output_path: Optional[Path], *, use_gpu: int = -1
) -> Language:
init_kwargs = {"use_gpu": use_gpu}
if output_path is not None:
init_path = output_path / "model-initial"
if not init_path.exists():
msg.info(f"Initializing the pipeline in {init_path}")
nlp = init_nlp(config, **init_kwargs)
nlp.to_disk(init_path)
msg.good(f"Saved initialized pipeline to {init_path}")
else:
nlp = util.load_model(init_path)
if must_reinitialize(config, nlp.config):
msg.warn("Config has changed: need to re-initialize pipeline")
nlp = init_nlp(config, **init_kwargs)
nlp.to_disk(init_path)
msg.good(f"Re-initialized pipeline in {init_path}")
else:
msg.good(f"Loaded initialized pipeline from {init_path}")
return nlp
return init_nlp(config, **init_kwargs)
def verify_cli_args(config_path: Path, output_path: Optional[Path] = None) -> None: def verify_cli_args(config_path: Path, output_path: Optional[Path] = None) -> None:
# Make sure all files and paths exists if they are needed # Make sure all files and paths exists if they are needed
if not config_path or not config_path.exists(): if not config_path or not config_path.exists():

View File

@ -28,6 +28,7 @@ def create_docbin_reader(
) -> Callable[["Language"], Iterable[Example]]: ) -> Callable[["Language"], Iterable[Example]]:
if path is None: if path is None:
raise ValueError(Errors.E913) raise ValueError(Errors.E913)
util.logger.debug(f"Loading corpus from path: {path}")
return Corpus( return Corpus(
path, path,
gold_preproc=gold_preproc, gold_preproc=gold_preproc,

View File

@ -94,6 +94,7 @@ def init_vocab(
if vectors is not None: if vectors is not None:
load_vectors_into_model(nlp, vectors) load_vectors_into_model(nlp, vectors)
logger.info(f"Added vectors: {vectors}") logger.info(f"Added vectors: {vectors}")
logger.info("Finished initializing nlp object")
def load_vectors_into_model( def load_vectors_into_model(