mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-09 22:54:53 +03:00
add batch-size and n_process
This commit is contained in:
parent
fe563e6ba9
commit
ab80dea14f
|
@ -94,13 +94,15 @@ def annotate_cli(
|
||||||
"""
|
"""
|
||||||
import_code(code_path)
|
import_code(code_path)
|
||||||
setup_gpu(use_gpu)
|
setup_gpu(use_gpu)
|
||||||
annotate(data_path, output, model)
|
annotate(data_path, output, model, batch_size, n_process)
|
||||||
|
|
||||||
|
|
||||||
def annotate(
|
def annotate(
|
||||||
data_path: Path,
|
data_path: Path,
|
||||||
output: Path,
|
output: Path,
|
||||||
model: str
|
model: str,
|
||||||
|
batch_size: int,
|
||||||
|
n_process: int
|
||||||
):
|
):
|
||||||
data_path = util.ensure_path(data_path)
|
data_path = util.ensure_path(data_path)
|
||||||
output_path = util.ensure_path(output)
|
output_path = util.ensure_path(output)
|
||||||
|
@ -110,7 +112,8 @@ def annotate(
|
||||||
msg.good(f"Loaded model {model}")
|
msg.good(f"Loaded model {model}")
|
||||||
vocab = nlp.vocab
|
vocab = nlp.vocab
|
||||||
docbin = DocBin()
|
docbin = DocBin()
|
||||||
for doc in tqdm.tqdm(nlp.pipe(_stream_data(data_path, vocab))):
|
datagen = _stream_data(data_path, vocab)
|
||||||
|
for doc in tqdm.tqdm(nlp.pipe(datagen, batch_size=batch_size, n_process=n_process)):
|
||||||
docbin.add(doc)
|
docbin.add(doc)
|
||||||
if output_path.is_dir():
|
if output_path.is_dir():
|
||||||
output_path = output_path / "predictions.spacy"
|
output_path = output_path / "predictions.spacy"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user