add batch-size and n_process

2025-08-09 22:54:53 +03:00 · 2022-08-25 13:28:39 +00:00 · 2022-08-25 13:28:39 +00:00 · ab80dea14f
commit ab80dea14f
parent fe563e6ba9
1 changed files with 6 additions and 3 deletions
--- a/spacy/cli/annotate.py
+++ b/spacy/cli/annotate.py
@ -94,13 +94,15 @@ def annotate_cli(
    """
    import_code(code_path)
    setup_gpu(use_gpu)
-    annotate(data_path, output, model)
+    annotate(data_path, output, model, batch_size, n_process)
 def annotate(
    data_path: Path,
    output: Path,
-    model: str
+    model: str,
    batch_size: int,
    n_process: int
 ):
    data_path = util.ensure_path(data_path)
    output_path = util.ensure_path(output)
@ -110,7 +112,8 @@ def annotate(
    msg.good(f"Loaded model {model}")
    vocab = nlp.vocab
    docbin = DocBin()
-    for doc in tqdm.tqdm(nlp.pipe(_stream_data(data_path, vocab))):
+    datagen = _stream_data(data_path, vocab)
    for doc in tqdm.tqdm(nlp.pipe(datagen, batch_size=batch_size, n_process=n_process)):
        docbin.add(doc)
    if output_path.is_dir():
        output_path = output_path / "predictions.spacy"