mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 09:44:36 +03:00
Remove cytoolz usage from spaCy
This commit is contained in:
parent
a7b085ae46
commit
1c71fdb805
|
@ -5,7 +5,6 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
import cytoolz
|
|
||||||
from collections import OrderedDict, defaultdict
|
from collections import OrderedDict, defaultdict
|
||||||
import srsly
|
import srsly
|
||||||
|
|
||||||
|
@ -302,7 +301,7 @@ class Pipe(object):
|
||||||
Both __call__ and pipe should delegate to the `predict()`
|
Both __call__ and pipe should delegate to the `predict()`
|
||||||
and `set_annotations()` methods.
|
and `set_annotations()` methods.
|
||||||
"""
|
"""
|
||||||
for docs in cytoolz.partition_all(batch_size, stream):
|
for docs in util.minibatch(stream, size=batch_size):
|
||||||
docs = list(docs)
|
docs = list(docs)
|
||||||
scores, tensors = self.predict(docs)
|
scores, tensors = self.predict(docs)
|
||||||
self.set_annotations(docs, scores, tensor=tensors)
|
self.set_annotations(docs, scores, tensor=tensors)
|
||||||
|
@ -479,7 +478,7 @@ class Tensorizer(Pipe):
|
||||||
n_threads (int): Number of threads.
|
n_threads (int): Number of threads.
|
||||||
YIELDS (iterator): A sequence of `Doc` objects, in order of input.
|
YIELDS (iterator): A sequence of `Doc` objects, in order of input.
|
||||||
"""
|
"""
|
||||||
for docs in cytoolz.partition_all(batch_size, stream):
|
for docs in util.minibatch(stream, size=batch_size):
|
||||||
docs = list(docs)
|
docs = list(docs)
|
||||||
tensors = self.predict(docs)
|
tensors = self.predict(docs)
|
||||||
self.set_annotations(docs, tensors)
|
self.set_annotations(docs, tensors)
|
||||||
|
@ -588,7 +587,7 @@ class Tagger(Pipe):
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def pipe(self, stream, batch_size=128, n_threads=-1):
|
def pipe(self, stream, batch_size=128, n_threads=-1):
|
||||||
for docs in cytoolz.partition_all(batch_size, stream):
|
for docs in util.minibatch(stream, size=batch_size):
|
||||||
docs = list(docs)
|
docs = list(docs)
|
||||||
tag_ids, tokvecs = self.predict(docs)
|
tag_ids, tokvecs = self.predict(docs)
|
||||||
self.set_annotations(docs, tag_ids, tensors=tokvecs)
|
self.set_annotations(docs, tag_ids, tensors=tokvecs)
|
||||||
|
@ -1073,7 +1072,7 @@ class TextCategorizer(Pipe):
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def pipe(self, stream, batch_size=128, n_threads=-1):
|
def pipe(self, stream, batch_size=128, n_threads=-1):
|
||||||
for docs in cytoolz.partition_all(batch_size, stream):
|
for docs in util.minibatch(stream, size=batch_size):
|
||||||
docs = list(docs)
|
docs = list(docs)
|
||||||
scores, tensors = self.predict(docs)
|
scores, tensors = self.predict(docs)
|
||||||
self.set_annotations(docs, scores, tensors=tensors)
|
self.set_annotations(docs, scores, tensors=tensors)
|
||||||
|
|
|
@ -7,7 +7,6 @@ from __future__ import unicode_literals, print_function
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
import numpy
|
import numpy
|
||||||
cimport cython.parallel
|
cimport cython.parallel
|
||||||
import cytoolz
|
|
||||||
import numpy.random
|
import numpy.random
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
from cpython.ref cimport PyObject, Py_XDECREF
|
from cpython.ref cimport PyObject, Py_XDECREF
|
||||||
|
@ -213,10 +212,10 @@ cdef class Parser:
|
||||||
beam_width = self.cfg.get('beam_width', 1)
|
beam_width = self.cfg.get('beam_width', 1)
|
||||||
beam_density = self.cfg.get('beam_density', 0.)
|
beam_density = self.cfg.get('beam_density', 0.)
|
||||||
cdef Doc doc
|
cdef Doc doc
|
||||||
for batch in cytoolz.partition_all(batch_size, docs):
|
for batch in util.minibatch(docs, size=batch_size):
|
||||||
batch_in_order = list(batch)
|
batch_in_order = list(batch)
|
||||||
by_length = sorted(batch_in_order, key=lambda doc: len(doc))
|
by_length = sorted(batch_in_order, key=lambda doc: len(doc))
|
||||||
for subbatch in cytoolz.partition_all(8, by_length):
|
for subbatch in util.minibatch(by_length, size=8):
|
||||||
subbatch = list(subbatch)
|
subbatch = list(subbatch)
|
||||||
parse_states = self.predict(subbatch, beam_width=beam_width,
|
parse_states = self.predict(subbatch, beam_width=beam_width,
|
||||||
beam_density=beam_density)
|
beam_density=beam_density)
|
||||||
|
|
|
@ -11,7 +11,6 @@ from collections import OrderedDict
|
||||||
from thinc.neural._classes.model import Model
|
from thinc.neural._classes.model import Model
|
||||||
from thinc.neural.ops import NumpyOps
|
from thinc.neural.ops import NumpyOps
|
||||||
import functools
|
import functools
|
||||||
import cytoolz
|
|
||||||
import itertools
|
import itertools
|
||||||
import numpy.random
|
import numpy.random
|
||||||
import srsly
|
import srsly
|
||||||
|
@ -403,7 +402,7 @@ def minibatch(items, size=8):
|
||||||
items = iter(items)
|
items = iter(items)
|
||||||
while True:
|
while True:
|
||||||
batch_size = next(size_)
|
batch_size = next(size_)
|
||||||
batch = list(cytoolz.take(int(batch_size), items))
|
batch = list(itertools.islice(items, int(batch_size)))
|
||||||
if len(batch) == 0:
|
if len(batch) == 0:
|
||||||
break
|
break
|
||||||
yield list(batch)
|
yield list(batch)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user