mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Tidy up references to n_threads and fix default
This commit is contained in:
parent
852e1f105c
commit
cb5dbfa63a
|
@ -49,7 +49,7 @@ class SentimentAnalyser(object):
|
||||||
y = self._model.predict(X)
|
y = self._model.predict(X)
|
||||||
self.set_sentiment(doc, y)
|
self.set_sentiment(doc, y)
|
||||||
|
|
||||||
def pipe(self, docs, batch_size=1000, n_threads=2):
|
def pipe(self, docs, batch_size=1000):
|
||||||
for minibatch in cytoolz.partition_all(batch_size, docs):
|
for minibatch in cytoolz.partition_all(batch_size, docs):
|
||||||
minibatch = list(minibatch)
|
minibatch = list(minibatch)
|
||||||
sentences = []
|
sentences = []
|
||||||
|
@ -176,7 +176,7 @@ def evaluate(model_dir, texts, labels, max_length=100):
|
||||||
|
|
||||||
correct = 0
|
correct = 0
|
||||||
i = 0
|
i = 0
|
||||||
for doc in nlp.pipe(texts, batch_size=1000, n_threads=4):
|
for doc in nlp.pipe(texts, batch_size=1000):
|
||||||
correct += bool(doc.sentiment >= 0.5) == bool(labels[i])
|
correct += bool(doc.sentiment >= 0.5) == bool(labels[i])
|
||||||
i += 1
|
i += 1
|
||||||
return float(correct) / i
|
return float(correct) / i
|
||||||
|
|
|
@ -644,7 +644,7 @@ class Language(object):
|
||||||
self,
|
self,
|
||||||
texts,
|
texts,
|
||||||
as_tuples=False,
|
as_tuples=False,
|
||||||
n_threads=2,
|
n_threads=-1,
|
||||||
batch_size=1000,
|
batch_size=1000,
|
||||||
disable=[],
|
disable=[],
|
||||||
cleanup=False,
|
cleanup=False,
|
||||||
|
@ -656,7 +656,6 @@ class Language(object):
|
||||||
as_tuples (bool): If set to True, inputs should be a sequence of
|
as_tuples (bool): If set to True, inputs should be a sequence of
|
||||||
(text, context) tuples. Output will then be a sequence of
|
(text, context) tuples. Output will then be a sequence of
|
||||||
(doc, context) tuples. Defaults to False.
|
(doc, context) tuples. Defaults to False.
|
||||||
n_threads (int): Currently inactive.
|
|
||||||
batch_size (int): The number of texts to buffer.
|
batch_size (int): The number of texts to buffer.
|
||||||
disable (list): Names of the pipeline components to disable.
|
disable (list): Names of the pipeline components to disable.
|
||||||
cleanup (bool): If True, unneeded strings are freed to control memory
|
cleanup (bool): If True, unneeded strings are freed to control memory
|
||||||
|
@ -673,7 +672,6 @@ class Language(object):
|
||||||
contexts = (tc[1] for tc in text_context2)
|
contexts = (tc[1] for tc in text_context2)
|
||||||
docs = self.pipe(
|
docs = self.pipe(
|
||||||
texts,
|
texts,
|
||||||
n_threads=n_threads,
|
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
disable=disable,
|
disable=disable,
|
||||||
component_cfg=component_cfg,
|
component_cfg=component_cfg,
|
||||||
|
@ -690,7 +688,6 @@ class Language(object):
|
||||||
kwargs = component_cfg.get(name, {})
|
kwargs = component_cfg.get(name, {})
|
||||||
# Allow component_cfg to overwrite the top-level kwargs.
|
# Allow component_cfg to overwrite the top-level kwargs.
|
||||||
kwargs.setdefault("batch_size", batch_size)
|
kwargs.setdefault("batch_size", batch_size)
|
||||||
kwargs.setdefault("n_threads", n_threads)
|
|
||||||
if hasattr(proc, "pipe"):
|
if hasattr(proc, "pipe"):
|
||||||
docs = proc.pipe(docs, **kwargs)
|
docs = proc.pipe(docs, **kwargs)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -153,13 +153,11 @@ cdef class Matcher:
|
||||||
return default
|
return default
|
||||||
return (self._callbacks[key], self._patterns[key])
|
return (self._callbacks[key], self._patterns[key])
|
||||||
|
|
||||||
def pipe(self, docs, batch_size=1000, n_threads=2):
|
def pipe(self, docs, batch_size=1000, n_threads=-1):
|
||||||
"""Match a stream of documents, yielding them in turn.
|
"""Match a stream of documents, yielding them in turn.
|
||||||
|
|
||||||
docs (iterable): A stream of documents.
|
docs (iterable): A stream of documents.
|
||||||
batch_size (int): Number of documents to accumulate into a working set.
|
batch_size (int): Number of documents to accumulate into a working set.
|
||||||
n_threads (int): The number of threads with which to work on the buffer
|
|
||||||
in parallel, if the implementation supports multi-threading.
|
|
||||||
YIELDS (Doc): Documents, in order.
|
YIELDS (Doc): Documents, in order.
|
||||||
"""
|
"""
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
|
|
|
@ -166,14 +166,12 @@ cdef class PhraseMatcher:
|
||||||
on_match(self, doc, i, matches)
|
on_match(self, doc, i, matches)
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
def pipe(self, stream, batch_size=1000, n_threads=1, return_matches=False,
|
def pipe(self, stream, batch_size=1000, n_threads=-1, return_matches=False,
|
||||||
as_tuples=False):
|
as_tuples=False):
|
||||||
"""Match a stream of documents, yielding them in turn.
|
"""Match a stream of documents, yielding them in turn.
|
||||||
|
|
||||||
docs (iterable): A stream of documents.
|
docs (iterable): A stream of documents.
|
||||||
batch_size (int): Number of documents to accumulate into a working set.
|
batch_size (int): Number of documents to accumulate into a working set.
|
||||||
n_threads (int): The number of threads with which to work on the buffer
|
|
||||||
in parallel, if the implementation supports multi-threading.
|
|
||||||
return_matches (bool): Yield the match lists along with the docs, making
|
return_matches (bool): Yield the match lists along with the docs, making
|
||||||
results (doc, matches) tuples.
|
results (doc, matches) tuples.
|
||||||
as_tuples (bool): Interpret the input stream as (doc, context) tuples,
|
as_tuples (bool): Interpret the input stream as (doc, context) tuples,
|
||||||
|
|
|
@ -257,7 +257,6 @@ class Tensorizer(Pipe):
|
||||||
|
|
||||||
stream (iterator): A sequence of `Doc` objects to process.
|
stream (iterator): A sequence of `Doc` objects to process.
|
||||||
batch_size (int): Number of `Doc` objects to group.
|
batch_size (int): Number of `Doc` objects to group.
|
||||||
n_threads (int): Number of threads.
|
|
||||||
YIELDS (iterator): A sequence of `Doc` objects, in order of input.
|
YIELDS (iterator): A sequence of `Doc` objects, in order of input.
|
||||||
"""
|
"""
|
||||||
for docs in util.minibatch(stream, size=batch_size):
|
for docs in util.minibatch(stream, size=batch_size):
|
||||||
|
|
|
@ -205,13 +205,11 @@ cdef class Parser:
|
||||||
self.set_annotations([doc], states, tensors=None)
|
self.set_annotations([doc], states, tensors=None)
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def pipe(self, docs, int batch_size=256, int n_threads=2, beam_width=None):
|
def pipe(self, docs, int batch_size=256, int n_threads=-1, beam_width=None):
|
||||||
"""Process a stream of documents.
|
"""Process a stream of documents.
|
||||||
|
|
||||||
stream: The sequence of documents to process.
|
stream: The sequence of documents to process.
|
||||||
batch_size (int): Number of documents to accumulate into a working set.
|
batch_size (int): Number of documents to accumulate into a working set.
|
||||||
n_threads (int): The number of threads with which to work on the buffer
|
|
||||||
in parallel.
|
|
||||||
YIELDS (Doc): Documents, in order.
|
YIELDS (Doc): Documents, in order.
|
||||||
"""
|
"""
|
||||||
if beam_width is None:
|
if beam_width is None:
|
||||||
|
|
|
@ -125,7 +125,7 @@ cdef class Tokenizer:
|
||||||
doc.c[doc.length - 1].spacy = string[-1] == " " and not in_ws
|
doc.c[doc.length - 1].spacy = string[-1] == " " and not in_ws
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def pipe(self, texts, batch_size=1000, n_threads=2):
|
def pipe(self, texts, batch_size=1000, n_threads=-1):
|
||||||
"""Tokenize a stream of texts.
|
"""Tokenize a stream of texts.
|
||||||
|
|
||||||
texts: A sequence of unicode texts.
|
texts: A sequence of unicode texts.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user