* Tweak usage example for multi-processing

This commit is contained in:
Matthew Honnibal 2016-02-06 14:44:11 +01:00
parent 963ccc2aee
commit 161b01d4c0

View File

@ -4,6 +4,12 @@ import spacy
import os import os
try:
xrange
except NameError:
xrange = range
@pytest.fixture() @pytest.fixture()
def token(doc): def token(doc):
return doc[0] return doc[0]
@ -164,11 +170,11 @@ def test_efficient_binary_serialization(doc):
@pytest.mark.models @pytest.mark.models
def test_multithreading(nlp): def test_multithreading(nlp):
texts = [ texts = [u'One document.', u'...', u'Lots of documents']
u'One document.', # .pipe streams input, and produces streaming output
u'A second document.', iter_texts = (texts[i % 3] for i in xrange(100000000))
u'Another document (you should use a generator!).' for i, doc in enumerate(nlp.pipe(iter_texts, batch_size=50, n_threads=4)):
]
for doc in nlp.pipe(texts, batch_size=1000, n_threads=4):
assert doc.is_parsed assert doc.is_parsed
if i == 100:
break