Update multi_processing example

This commit is contained in:
Matthew Honnibal 2019-02-21 10:33:16 +01:00
parent a137e8b418
commit 582be8746c

View File

@ -10,12 +10,13 @@ Compatible with: spaCy v2.0.0+
""" """
from __future__ import print_function, unicode_literals from __future__ import print_function, unicode_literals
from toolz import partition_all
from pathlib import Path from pathlib import Path
from joblib import Parallel, delayed from joblib import Parallel, delayed
from functools import partial
import thinc.extra.datasets import thinc.extra.datasets
import plac import plac
import spacy import spacy
from spacy.util import minibatch
@plac.annotations( @plac.annotations(
@ -35,10 +36,10 @@ def main(output_dir, model="en_core_web_sm", n_jobs=4, batch_size=1000, limit=10
data, _ = thinc.extra.datasets.imdb() data, _ = thinc.extra.datasets.imdb()
texts, _ = zip(*data[-limit:]) texts, _ = zip(*data[-limit:])
print("Processing texts...") print("Processing texts...")
partitions = partition_all(batch_size, texts) partitions = minibatch(texts, size=batch_size)
executor = Parallel(n_jobs=n_jobs) executor = Parallel(n_jobs=n_jobs, backend="multiprocessing", prefer="processes")
do = delayed(transform_texts) do = delayed(partial(transform_texts, nlp))
tasks = (do(nlp, i, batch, output_dir) for i, batch in enumerate(partitions)) tasks = (do(i, batch, output_dir) for i, batch in enumerate(partitions))
executor(tasks) executor(tasks)