mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
* Tweak usage example for multi-processing
This commit is contained in:
parent
963ccc2aee
commit
161b01d4c0
|
@ -4,6 +4,12 @@ import spacy
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
xrange
|
||||||
|
except NameError:
|
||||||
|
xrange = range
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def token(doc):
|
def token(doc):
|
||||||
return doc[0]
|
return doc[0]
|
||||||
|
@ -164,11 +170,11 @@ def test_efficient_binary_serialization(doc):
|
||||||
|
|
||||||
@pytest.mark.models
|
@pytest.mark.models
|
||||||
def test_multithreading(nlp):
|
def test_multithreading(nlp):
|
||||||
texts = [
|
texts = [u'One document.', u'...', u'Lots of documents']
|
||||||
u'One document.',
|
# .pipe streams input, and produces streaming output
|
||||||
u'A second document.',
|
iter_texts = (texts[i % 3] for i in xrange(100000000))
|
||||||
u'Another document (you should use a generator!).'
|
for i, doc in enumerate(nlp.pipe(iter_texts, batch_size=50, n_threads=4)):
|
||||||
]
|
|
||||||
for doc in nlp.pipe(texts, batch_size=1000, n_threads=4):
|
|
||||||
assert doc.is_parsed
|
assert doc.is_parsed
|
||||||
|
if i == 100:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user