* Tweak usage example for multi-processing

2025-10-16 08:46:51 +03:00 · 2016-02-06 14:44:11 +01:00 · 2016-02-06 14:44:11 +01:00 · 161b01d4c0
commit 161b01d4c0
parent 963ccc2aee
1 changed files with 12 additions and 6 deletions
--- a/spacy/tests/website/test_home.py
+++ b/spacy/tests/website/test_home.py
@ -4,6 +4,12 @@ import spacy
 import os
 try:
    xrange
 except NameError:
    xrange = range
@pytest.fixture()
 def token(doc):
    return doc[0]
@ -164,11 +170,11 @@ def test_efficient_binary_serialization(doc):
@pytest.mark.models
 def test_multithreading(nlp):
-    texts = [
+    texts = [u'One document.', u'...', u'Lots of documents']
-        u'One document.',
+    # .pipe streams input, and produces streaming output
-        u'A second document.',
+    iter_texts = (texts[i % 3] for i in xrange(100000000))
-        u'Another document (you should use a generator!).'
+    for i, doc in enumerate(nlp.pipe(iter_texts, batch_size=50, n_threads=4)):
    ]
    for doc in nlp.pipe(texts, batch_size=1000, n_threads=4):
        assert doc.is_parsed
        if i == 100:
            break