From 3b5cfec1fcf34e45d86fd2b133120be13141488a Mon Sep 17 00:00:00 2001 From: Matthw Honnibal Date: Thu, 21 May 2020 19:32:04 +0200 Subject: [PATCH] Tweak memory management in train_from_config --- spacy/cli/train_from_config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spacy/cli/train_from_config.py b/spacy/cli/train_from_config.py index c75c861cc..eeb21c10c 100644 --- a/spacy/cli/train_from_config.py +++ b/spacy/cli/train_from_config.py @@ -213,6 +213,12 @@ def train_from_config( if is_best_checkpoint and output_path is not None: nlp.to_disk(output_path) progress = tqdm.tqdm(total=training["eval_frequency"], leave=False) + # Clean up the objects to faciliate garbage collection. + for eg in batch: + eg.doc = None + eg.goldparse = None + eg.doc_annotation = None + eg.token_annotation = None finally: if output_path is not None: final_model_path = output_path / "model-final"