add batch-size and n_process

This commit is contained in:
kadarakos 2022-08-25 13:28:39 +00:00
parent fe563e6ba9
commit ab80dea14f

View File

@ -94,13 +94,15 @@ def annotate_cli(
""" """
import_code(code_path) import_code(code_path)
setup_gpu(use_gpu) setup_gpu(use_gpu)
annotate(data_path, output, model) annotate(data_path, output, model, batch_size, n_process)
def annotate( def annotate(
data_path: Path, data_path: Path,
output: Path, output: Path,
model: str model: str,
batch_size: int,
n_process: int
): ):
data_path = util.ensure_path(data_path) data_path = util.ensure_path(data_path)
output_path = util.ensure_path(output) output_path = util.ensure_path(output)
@ -110,7 +112,8 @@ def annotate(
msg.good(f"Loaded model {model}") msg.good(f"Loaded model {model}")
vocab = nlp.vocab vocab = nlp.vocab
docbin = DocBin() docbin = DocBin()
for doc in tqdm.tqdm(nlp.pipe(_stream_data(data_path, vocab))): datagen = _stream_data(data_path, vocab)
for doc in tqdm.tqdm(nlp.pipe(datagen, batch_size=batch_size, n_process=n_process)):
docbin.add(doc) docbin.add(doc)
if output_path.is_dir(): if output_path.is_dir():
output_path = output_path / "predictions.spacy" output_path = output_path / "predictions.spacy"