From ad6d40d028192aa8e974f8ac69ba965a2b4fa978 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 29 Sep 2020 22:53:14 +0200 Subject: [PATCH 1/2] Add logging --- spacy/training/corpus.py | 1 + spacy/training/initialize.py | 1 + 2 files changed, 2 insertions(+) diff --git a/spacy/training/corpus.py b/spacy/training/corpus.py index 90eb62474..85079f41c 100644 --- a/spacy/training/corpus.py +++ b/spacy/training/corpus.py @@ -26,6 +26,7 @@ def create_docbin_reader( limit: int = 0, augmenter: Optional[Callable] = None, ) -> Callable[["Language"], Iterable[Example]]: + util.logger.debug(f"Loading corpus from path: {path}") return Corpus( path, gold_preproc=gold_preproc, diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py index 267b77f05..e248cf314 100644 --- a/spacy/training/initialize.py +++ b/spacy/training/initialize.py @@ -94,6 +94,7 @@ def init_vocab( if vectors is not None: load_vectors_into_model(nlp, vectors) logger.info(f"Added vectors: {vectors}") + logger.info("Finished initializing nlp object") def load_vectors_into_model( From 0a1ee109db2fc30d98e41b269a751b0d3dcd8168 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 29 Sep 2020 22:53:18 +0200 Subject: [PATCH 2/2] Remove init form path --- spacy/cli/train.py | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 0b3e2580e..36a9d08d9 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -49,35 +49,11 @@ def train_cli( config = util.load_config(config_path, overrides=overrides, interpolate=False) msg.divider("Initializing pipeline") with show_validation_error(config_path, hint_fill=False): - nlp = init_pipeline(config, output_path, use_gpu=use_gpu) + nlp = init_nlp(config, use_gpu=use_gpu) msg.divider("Training pipeline") train(nlp, output_path, use_gpu=use_gpu, silent=False) -def init_pipeline( - config: Config, output_path: Optional[Path], *, use_gpu: int = -1 -) -> Language: - init_kwargs = {"use_gpu": use_gpu} - if output_path is not None: - init_path = output_path / "model-initial" - if not init_path.exists(): - msg.info(f"Initializing the pipeline in {init_path}") - nlp = init_nlp(config, **init_kwargs) - nlp.to_disk(init_path) - msg.good(f"Saved initialized pipeline to {init_path}") - else: - nlp = util.load_model(init_path) - if must_reinitialize(config, nlp.config): - msg.warn("Config has changed: need to re-initialize pipeline") - nlp = init_nlp(config, **init_kwargs) - nlp.to_disk(init_path) - msg.good(f"Re-initialized pipeline in {init_path}") - else: - msg.good(f"Loaded initialized pipeline from {init_path}") - return nlp - return init_nlp(config, **init_kwargs) - - def verify_cli_args(config_path: Path, output_path: Optional[Path] = None) -> None: # Make sure all files and paths exists if they are needed if not config_path or not config_path.exists():