mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Remove cytoolz usage from spaCy
This commit is contained in:
parent
a7b085ae46
commit
1c71fdb805
|
@ -5,7 +5,6 @@ from __future__ import unicode_literals
|
|||
|
||||
import numpy
|
||||
cimport numpy as np
|
||||
import cytoolz
|
||||
from collections import OrderedDict, defaultdict
|
||||
import srsly
|
||||
|
||||
|
@ -302,7 +301,7 @@ class Pipe(object):
|
|||
Both __call__ and pipe should delegate to the `predict()`
|
||||
and `set_annotations()` methods.
|
||||
"""
|
||||
for docs in cytoolz.partition_all(batch_size, stream):
|
||||
for docs in util.minibatch(stream, size=batch_size):
|
||||
docs = list(docs)
|
||||
scores, tensors = self.predict(docs)
|
||||
self.set_annotations(docs, scores, tensor=tensors)
|
||||
|
@ -479,7 +478,7 @@ class Tensorizer(Pipe):
|
|||
n_threads (int): Number of threads.
|
||||
YIELDS (iterator): A sequence of `Doc` objects, in order of input.
|
||||
"""
|
||||
for docs in cytoolz.partition_all(batch_size, stream):
|
||||
for docs in util.minibatch(stream, size=batch_size):
|
||||
docs = list(docs)
|
||||
tensors = self.predict(docs)
|
||||
self.set_annotations(docs, tensors)
|
||||
|
@ -588,7 +587,7 @@ class Tagger(Pipe):
|
|||
return doc
|
||||
|
||||
def pipe(self, stream, batch_size=128, n_threads=-1):
|
||||
for docs in cytoolz.partition_all(batch_size, stream):
|
||||
for docs in util.minibatch(stream, size=batch_size):
|
||||
docs = list(docs)
|
||||
tag_ids, tokvecs = self.predict(docs)
|
||||
self.set_annotations(docs, tag_ids, tensors=tokvecs)
|
||||
|
@ -1073,7 +1072,7 @@ class TextCategorizer(Pipe):
|
|||
return doc
|
||||
|
||||
def pipe(self, stream, batch_size=128, n_threads=-1):
|
||||
for docs in cytoolz.partition_all(batch_size, stream):
|
||||
for docs in util.minibatch(stream, size=batch_size):
|
||||
docs = list(docs)
|
||||
scores, tensors = self.predict(docs)
|
||||
self.set_annotations(docs, scores, tensors=tensors)
|
||||
|
|
|
@ -7,7 +7,6 @@ from __future__ import unicode_literals, print_function
|
|||
from collections import OrderedDict
|
||||
import numpy
|
||||
cimport cython.parallel
|
||||
import cytoolz
|
||||
import numpy.random
|
||||
cimport numpy as np
|
||||
from cpython.ref cimport PyObject, Py_XDECREF
|
||||
|
@ -213,10 +212,10 @@ cdef class Parser:
|
|||
beam_width = self.cfg.get('beam_width', 1)
|
||||
beam_density = self.cfg.get('beam_density', 0.)
|
||||
cdef Doc doc
|
||||
for batch in cytoolz.partition_all(batch_size, docs):
|
||||
for batch in util.minibatch(docs, size=batch_size):
|
||||
batch_in_order = list(batch)
|
||||
by_length = sorted(batch_in_order, key=lambda doc: len(doc))
|
||||
for subbatch in cytoolz.partition_all(8, by_length):
|
||||
for subbatch in util.minibatch(by_length, size=8):
|
||||
subbatch = list(subbatch)
|
||||
parse_states = self.predict(subbatch, beam_width=beam_width,
|
||||
beam_density=beam_density)
|
||||
|
|
|
@ -11,7 +11,6 @@ from collections import OrderedDict
|
|||
from thinc.neural._classes.model import Model
|
||||
from thinc.neural.ops import NumpyOps
|
||||
import functools
|
||||
import cytoolz
|
||||
import itertools
|
||||
import numpy.random
|
||||
import srsly
|
||||
|
@ -403,7 +402,7 @@ def minibatch(items, size=8):
|
|||
items = iter(items)
|
||||
while True:
|
||||
batch_size = next(size_)
|
||||
batch = list(cytoolz.take(int(batch_size), items))
|
||||
batch = list(itertools.islice(items, int(batch_size)))
|
||||
if len(batch) == 0:
|
||||
break
|
||||
yield list(batch)
|
||||
|
|
Loading…
Reference in New Issue
Block a user