From ab80dea14f2f0bfe705b671c8dac7b2624046765 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Thu, 25 Aug 2022 13:28:39 +0000 Subject: [PATCH] add batch-size and n_process --- spacy/cli/annotate.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/spacy/cli/annotate.py b/spacy/cli/annotate.py index 954d96a9d..3643277c9 100644 --- a/spacy/cli/annotate.py +++ b/spacy/cli/annotate.py @@ -94,13 +94,15 @@ def annotate_cli( """ import_code(code_path) setup_gpu(use_gpu) - annotate(data_path, output, model) + annotate(data_path, output, model, batch_size, n_process) def annotate( data_path: Path, output: Path, - model: str + model: str, + batch_size: int, + n_process: int ): data_path = util.ensure_path(data_path) output_path = util.ensure_path(output) @@ -110,7 +112,8 @@ def annotate( msg.good(f"Loaded model {model}") vocab = nlp.vocab docbin = DocBin() - for doc in tqdm.tqdm(nlp.pipe(_stream_data(data_path, vocab))): + datagen = _stream_data(data_path, vocab) + for doc in tqdm.tqdm(nlp.pipe(datagen, batch_size=batch_size, n_process=n_process)): docbin.add(doc) if output_path.is_dir(): output_path = output_path / "predictions.spacy"