mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Add pipe() method to tokenizer
This commit is contained in:
		
							parent
							
								
									4cbad510ff
								
							
						
					
					
						commit
						f9e765cae7
					
				| 
						 | 
					@ -133,6 +133,10 @@ cdef class Tokenizer:
 | 
				
			||||||
            tokens.c[tokens.length - 1].spacy = string[-1] == ' ' and not in_ws
 | 
					            tokens.c[tokens.length - 1].spacy = string[-1] == ' ' and not in_ws
 | 
				
			||||||
        return tokens
 | 
					        return tokens
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def pipe(self, texts, batch_size=1000, n_threads=2):
 | 
				
			||||||
 | 
					        for text in texts:
 | 
				
			||||||
 | 
					            yield self(text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef int _try_cache(self, hash_t key, Doc tokens) except -1:
 | 
					    cdef int _try_cache(self, hash_t key, Doc tokens) except -1:
 | 
				
			||||||
        cached = <_Cached*>self._cache.get(key)
 | 
					        cached = <_Cached*>self._cache.get(key)
 | 
				
			||||||
        if cached == NULL:
 | 
					        if cached == NULL:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user