mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	* Ugly but seemingly working fix to the token memory leak
This commit is contained in:
		
							parent
							
								
									c7bb6b329c
								
							
						
					
					
						commit
						fc7c10d7f8
					
				|  | @ -42,4 +42,6 @@ cdef class Language: | |||
|                              int split, size_t length) | ||||
|     cdef Orthography* init_orth(self, StringHash hashed, unicode lex) | ||||
| 
 | ||||
|     cdef int _happax_to_vocab(self, StringHash hashed, Lexeme_addr addr) | ||||
| 
 | ||||
|   | ||||
|  |  | |||
|  | @ -85,6 +85,7 @@ cdef class Language: | |||
|                 length = len(token_string) | ||||
|                 hashed = self.hash_string(token_string, length) | ||||
|                 word.tail = self._add(hashed, lex, 0, len(lex)) | ||||
|                 self._happax_to_vocab(hashed, <Lexeme_addr>word.tail) | ||||
|                 word = word.tail | ||||
| 
 | ||||
|     def load_clusters(self): | ||||
|  | @ -133,19 +134,19 @@ cdef class Language: | |||
|                 word_ptr = self._add(hashed, string, start, length) | ||||
|             else: | ||||
|                 # Second time word seen, move to vocab | ||||
|                 self.vocab[0][hashed] = <Lexeme_addr>word_ptr | ||||
|                 self.happax.erase(hashed) | ||||
|                 self._happax_to_vocab(hashed, <Lexeme_addr>word_ptr) | ||||
|         return <Lexeme_addr>word_ptr | ||||
| 
 | ||||
|     cdef int _happax_to_vocab(self, StringHash hashed, Lexeme_addr word_ptr): | ||||
|         self.vocab[0][hashed] = <Lexeme_addr>word_ptr | ||||
|         self.happax.erase(hashed) | ||||
| 
 | ||||
|     cdef Lexeme* _add(self, StringHash hashed, unicode string, int split, size_t length): | ||||
|         cdef size_t i | ||||
|         word = self.init_lexeme(string, hashed, split, length) | ||||
|         cdef Lexeme* clobbered = <Lexeme*>self.happax.insert(hashed, <size_t>word) | ||||
|         if clobbered != NULL: | ||||
|             # Can't do this --- we might be pointing to the Lexeme in .tail. | ||||
|             # Fix that to reduce memory, probably. | ||||
|             #free(clobbered) | ||||
|             pass | ||||
|             free(clobbered) | ||||
|         self.bacov[hashed] = string | ||||
|         return word    | ||||
| 
 | ||||
|  | @ -210,6 +211,7 @@ cdef class Language: | |||
|         # Now recurse, and deal with the tail | ||||
|         if tail_string: | ||||
|             word.tail = <Lexeme*>self.lookup(-1, tail_string, len(tail_string)) | ||||
|             self._happax_to_vocab(word.tail.sic, <Lexeme_addr>word.tail) | ||||
|         return word | ||||
| 
 | ||||
|     cdef Orthography* init_orth(self, StringHash hashed, unicode lex): | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user