mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Remove cytoolz usage from spaCy
This commit is contained in:
		
							parent
							
								
									a7b085ae46
								
							
						
					
					
						commit
						1c71fdb805
					
				| 
						 | 
				
			
			@ -5,7 +5,6 @@ from __future__ import unicode_literals
 | 
			
		|||
 | 
			
		||||
import numpy
 | 
			
		||||
cimport numpy as np
 | 
			
		||||
import cytoolz
 | 
			
		||||
from collections import OrderedDict, defaultdict
 | 
			
		||||
import srsly
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -302,7 +301,7 @@ class Pipe(object):
 | 
			
		|||
        Both __call__ and pipe should delegate to the `predict()`
 | 
			
		||||
        and `set_annotations()` methods.
 | 
			
		||||
        """
 | 
			
		||||
        for docs in cytoolz.partition_all(batch_size, stream):
 | 
			
		||||
        for docs in util.minibatch(stream, size=batch_size):
 | 
			
		||||
            docs = list(docs)
 | 
			
		||||
            scores, tensors = self.predict(docs)
 | 
			
		||||
            self.set_annotations(docs, scores, tensor=tensors)
 | 
			
		||||
| 
						 | 
				
			
			@ -479,7 +478,7 @@ class Tensorizer(Pipe):
 | 
			
		|||
        n_threads (int): Number of threads.
 | 
			
		||||
        YIELDS (iterator): A sequence of `Doc` objects, in order of input.
 | 
			
		||||
        """
 | 
			
		||||
        for docs in cytoolz.partition_all(batch_size, stream):
 | 
			
		||||
        for docs in util.minibatch(stream, size=batch_size):
 | 
			
		||||
            docs = list(docs)
 | 
			
		||||
            tensors = self.predict(docs)
 | 
			
		||||
            self.set_annotations(docs, tensors)
 | 
			
		||||
| 
						 | 
				
			
			@ -588,7 +587,7 @@ class Tagger(Pipe):
 | 
			
		|||
        return doc
 | 
			
		||||
 | 
			
		||||
    def pipe(self, stream, batch_size=128, n_threads=-1):
 | 
			
		||||
        for docs in cytoolz.partition_all(batch_size, stream):
 | 
			
		||||
        for docs in util.minibatch(stream, size=batch_size):
 | 
			
		||||
            docs = list(docs)
 | 
			
		||||
            tag_ids, tokvecs = self.predict(docs)
 | 
			
		||||
            self.set_annotations(docs, tag_ids, tensors=tokvecs)
 | 
			
		||||
| 
						 | 
				
			
			@ -1073,7 +1072,7 @@ class TextCategorizer(Pipe):
 | 
			
		|||
        return doc
 | 
			
		||||
 | 
			
		||||
    def pipe(self, stream, batch_size=128, n_threads=-1):
 | 
			
		||||
        for docs in cytoolz.partition_all(batch_size, stream):
 | 
			
		||||
        for docs in util.minibatch(stream, size=batch_size):
 | 
			
		||||
            docs = list(docs)
 | 
			
		||||
            scores, tensors = self.predict(docs)
 | 
			
		||||
            self.set_annotations(docs, scores, tensors=tensors)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,7 +7,6 @@ from __future__ import unicode_literals, print_function
 | 
			
		|||
from collections import OrderedDict
 | 
			
		||||
import numpy
 | 
			
		||||
cimport cython.parallel
 | 
			
		||||
import cytoolz
 | 
			
		||||
import numpy.random
 | 
			
		||||
cimport numpy as np
 | 
			
		||||
from cpython.ref cimport PyObject, Py_XDECREF
 | 
			
		||||
| 
						 | 
				
			
			@ -213,10 +212,10 @@ cdef class Parser:
 | 
			
		|||
            beam_width = self.cfg.get('beam_width', 1)
 | 
			
		||||
        beam_density = self.cfg.get('beam_density', 0.)
 | 
			
		||||
        cdef Doc doc
 | 
			
		||||
        for batch in cytoolz.partition_all(batch_size, docs):
 | 
			
		||||
        for batch in util.minibatch(docs, size=batch_size):
 | 
			
		||||
            batch_in_order = list(batch)
 | 
			
		||||
            by_length = sorted(batch_in_order, key=lambda doc: len(doc))
 | 
			
		||||
            for subbatch in cytoolz.partition_all(8, by_length):
 | 
			
		||||
            for subbatch in util.minibatch(by_length, size=8):
 | 
			
		||||
                subbatch = list(subbatch)
 | 
			
		||||
                parse_states = self.predict(subbatch, beam_width=beam_width,
 | 
			
		||||
                                            beam_density=beam_density)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -11,7 +11,6 @@ from collections import OrderedDict
 | 
			
		|||
from thinc.neural._classes.model import Model
 | 
			
		||||
from thinc.neural.ops import NumpyOps
 | 
			
		||||
import functools
 | 
			
		||||
import cytoolz
 | 
			
		||||
import itertools
 | 
			
		||||
import numpy.random
 | 
			
		||||
import srsly
 | 
			
		||||
| 
						 | 
				
			
			@ -403,7 +402,7 @@ def minibatch(items, size=8):
 | 
			
		|||
    items = iter(items)
 | 
			
		||||
    while True:
 | 
			
		||||
        batch_size = next(size_)
 | 
			
		||||
        batch = list(cytoolz.take(int(batch_size), items))
 | 
			
		||||
        batch = list(itertools.islice(items, int(batch_size)))
 | 
			
		||||
        if len(batch) == 0:
 | 
			
		||||
            break
 | 
			
		||||
        yield list(batch)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user