diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index c3b8f5fae..b1e046b5b 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -5,7 +5,6 @@ from __future__ import unicode_literals import numpy cimport numpy as np -import cytoolz from collections import OrderedDict, defaultdict import srsly @@ -302,7 +301,7 @@ class Pipe(object): Both __call__ and pipe should delegate to the `predict()` and `set_annotations()` methods. """ - for docs in cytoolz.partition_all(batch_size, stream): + for docs in util.minibatch(stream, size=batch_size): docs = list(docs) scores, tensors = self.predict(docs) self.set_annotations(docs, scores, tensor=tensors) @@ -479,7 +478,7 @@ class Tensorizer(Pipe): n_threads (int): Number of threads. YIELDS (iterator): A sequence of `Doc` objects, in order of input. """ - for docs in cytoolz.partition_all(batch_size, stream): + for docs in util.minibatch(stream, size=batch_size): docs = list(docs) tensors = self.predict(docs) self.set_annotations(docs, tensors) @@ -588,7 +587,7 @@ class Tagger(Pipe): return doc def pipe(self, stream, batch_size=128, n_threads=-1): - for docs in cytoolz.partition_all(batch_size, stream): + for docs in util.minibatch(stream, size=batch_size): docs = list(docs) tag_ids, tokvecs = self.predict(docs) self.set_annotations(docs, tag_ids, tensors=tokvecs) @@ -1073,7 +1072,7 @@ class TextCategorizer(Pipe): return doc def pipe(self, stream, batch_size=128, n_threads=-1): - for docs in cytoolz.partition_all(batch_size, stream): + for docs in util.minibatch(stream, size=batch_size): docs = list(docs) scores, tensors = self.predict(docs) self.set_annotations(docs, scores, tensors=tensors) diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 186c5c16c..61bbbc967 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -7,7 +7,6 @@ from __future__ import unicode_literals, print_function from collections import OrderedDict import numpy cimport cython.parallel -import cytoolz import numpy.random cimport numpy as np from cpython.ref cimport PyObject, Py_XDECREF @@ -213,10 +212,10 @@ cdef class Parser: beam_width = self.cfg.get('beam_width', 1) beam_density = self.cfg.get('beam_density', 0.) cdef Doc doc - for batch in cytoolz.partition_all(batch_size, docs): + for batch in util.minibatch(docs, size=batch_size): batch_in_order = list(batch) by_length = sorted(batch_in_order, key=lambda doc: len(doc)) - for subbatch in cytoolz.partition_all(8, by_length): + for subbatch in util.minibatch(by_length, size=8): subbatch = list(subbatch) parse_states = self.predict(subbatch, beam_width=beam_width, beam_density=beam_density) diff --git a/spacy/util.py b/spacy/util.py index 7e700be03..0a682fcaa 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -11,7 +11,6 @@ from collections import OrderedDict from thinc.neural._classes.model import Model from thinc.neural.ops import NumpyOps import functools -import cytoolz import itertools import numpy.random import srsly @@ -403,7 +402,7 @@ def minibatch(items, size=8): items = iter(items) while True: batch_size = next(size_) - batch = list(cytoolz.take(int(batch_size), items)) + batch = list(itertools.islice(items, int(batch_size))) if len(batch) == 0: break yield list(batch)