Tidy up references to n_threads and fix default

2025-09-06 20:34:56 +03:00 · 2019-03-15 16:24:26 +01:00 · 2019-03-15 16:24:26 +01:00 · cb5dbfa63a
commit cb5dbfa63a
parent 852e1f105c
7 changed files with 7 additions and 17 deletions
--- a/examples/deep_learning_keras.py
+++ b/examples/deep_learning_keras.py
@ -49,7 +49,7 @@ class SentimentAnalyser(object):
        y = self._model.predict(X)
        self.set_sentiment(doc, y)
-    def pipe(self, docs, batch_size=1000, n_threads=2):
+    def pipe(self, docs, batch_size=1000):
        for minibatch in cytoolz.partition_all(batch_size, docs):
            minibatch = list(minibatch)
            sentences = []
@ -176,7 +176,7 @@ def evaluate(model_dir, texts, labels, max_length=100):
    correct = 0
    i = 0
-    for doc in nlp.pipe(texts, batch_size=1000, n_threads=4):
+    for doc in nlp.pipe(texts, batch_size=1000):
        correct += bool(doc.sentiment >= 0.5) == bool(labels[i])
        i += 1
    return float(correct) / i
--- a/spacy/language.py
+++ b/spacy/language.py
@ -644,7 +644,7 @@ class Language(object):
        self,
        texts,
        as_tuples=False,
-        n_threads=2,
+        n_threads=-1,
        batch_size=1000,
        disable=[],
        cleanup=False,
@ -656,7 +656,6 @@ class Language(object):
        as_tuples (bool): If set to True, inputs should be a sequence of
            (text, context) tuples. Output will then be a sequence of
            (doc, context) tuples. Defaults to False.
        n_threads (int): Currently inactive.
        batch_size (int): The number of texts to buffer.
        disable (list): Names of the pipeline components to disable.
        cleanup (bool): If True, unneeded strings are freed to control memory
@ -673,7 +672,6 @@ class Language(object):
            contexts = (tc[1] for tc in text_context2)
            docs = self.pipe(
                texts,
                n_threads=n_threads,
                batch_size=batch_size,
                disable=disable,
                component_cfg=component_cfg,
@ -690,7 +688,6 @@ class Language(object):
            kwargs = component_cfg.get(name, {})
            # Allow component_cfg to overwrite the top-level kwargs.
            kwargs.setdefault("batch_size", batch_size)
            kwargs.setdefault("n_threads", n_threads)
            if hasattr(proc, "pipe"):
                docs = proc.pipe(docs, **kwargs)
            else:
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -153,13 +153,11 @@ cdef class Matcher:
            return default
        return (self._callbacks[key], self._patterns[key])
-    def pipe(self, docs, batch_size=1000, n_threads=2):
+    def pipe(self, docs, batch_size=1000, n_threads=-1):
        """Match a stream of documents, yielding them in turn.
        docs (iterable): A stream of documents.
        batch_size (int): Number of documents to accumulate into a working set.
        n_threads (int): The number of threads with which to work on the buffer
            in parallel, if the implementation supports multi-threading.
        YIELDS (Doc): Documents, in order.
        """
        for doc in docs:
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@ -166,14 +166,12 @@ cdef class PhraseMatcher:
                on_match(self, doc, i, matches)
        return matches
-    def pipe(self, stream, batch_size=1000, n_threads=1, return_matches=False,
+    def pipe(self, stream, batch_size=1000, n_threads=-1, return_matches=False,
             as_tuples=False):
        """Match a stream of documents, yielding them in turn.
        docs (iterable): A stream of documents.
        batch_size (int): Number of documents to accumulate into a working set.
        n_threads (int): The number of threads with which to work on the buffer
            in parallel, if the implementation supports multi-threading.
        return_matches (bool): Yield the match lists along with the docs, making
            results (doc, matches) tuples.
        as_tuples (bool): Interpret the input stream as (doc, context) tuples,
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@ -257,7 +257,6 @@ class Tensorizer(Pipe):
        stream (iterator): A sequence of `Doc` objects to process.
        batch_size (int): Number of `Doc` objects to group.
        n_threads (int): Number of threads.
        YIELDS (iterator): A sequence of `Doc` objects, in order of input.
        """
        for docs in util.minibatch(stream, size=batch_size):
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -205,13 +205,11 @@ cdef class Parser:
        self.set_annotations([doc], states, tensors=None)
        return doc
-    def pipe(self, docs, int batch_size=256, int n_threads=2, beam_width=None):
+    def pipe(self, docs, int batch_size=256, int n_threads=-1, beam_width=None):
        """Process a stream of documents.
        stream: The sequence of documents to process.
        batch_size (int): Number of documents to accumulate into a working set.
        n_threads (int): The number of threads with which to work on the buffer
            in parallel.
        YIELDS (Doc): Documents, in order.
        """
        if beam_width is None:
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -125,7 +125,7 @@ cdef class Tokenizer:
            doc.c[doc.length - 1].spacy = string[-1] == " " and not in_ws
        return doc
-    def pipe(self, texts, batch_size=1000, n_threads=2):
+    def pipe(self, texts, batch_size=1000, n_threads=-1):
        """Tokenize a stream of texts.
        texts: A sequence of unicode texts.