mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 07:27:28 +03:00 
			
		
		
		
	* Free lexemes clobbered as happaxes
This commit is contained in:
		
							parent
							
								
									d8cb2288ce
								
							
						
					
					
						commit
						5b6457e80e
					
				|  | @ -6,6 +6,6 @@ cdef class FixedTable: | ||||||
|     cdef uint64_t* keys |     cdef uint64_t* keys | ||||||
|     cdef size_t* values |     cdef size_t* values | ||||||
| 
 | 
 | ||||||
|     cdef int insert(self, uint64_t key, size_t value) nogil |     cdef size_t insert(self, uint64_t key, size_t value) nogil | ||||||
|     cdef size_t get(self, uint64_t key) nogil |     cdef size_t get(self, uint64_t key) nogil | ||||||
|     cdef int erase(self, uint64_t key) nogil |     cdef int erase(self, uint64_t key) nogil | ||||||
|  |  | ||||||
|  | @ -24,10 +24,16 @@ cdef class FixedTable: | ||||||
|     def bucket(self, uint64_t key): |     def bucket(self, uint64_t key): | ||||||
|         return _find(key, self.size) |         return _find(key, self.size) | ||||||
| 
 | 
 | ||||||
|     cdef int insert(self, uint64_t key, size_t value) nogil: |     cdef size_t insert(self, uint64_t key, size_t value) nogil: | ||||||
|         cdef size_t bucket = _find(key, self.size) |         cdef size_t bucket = _find(key, self.size) | ||||||
|  |         cdef size_t clobbered | ||||||
|  |         if self.values[bucket] == value: | ||||||
|  |             clobbered = 0 | ||||||
|  |         else: | ||||||
|  |             clobbered = self.values[clobbered] | ||||||
|         self.keys[bucket] = key |         self.keys[bucket] = key | ||||||
|         self.values[bucket] = value |         self.values[bucket] = value | ||||||
|  |         return clobbered | ||||||
| 
 | 
 | ||||||
|     cdef size_t get(self, uint64_t key) nogil: |     cdef size_t get(self, uint64_t key) nogil: | ||||||
|         cdef size_t bucket = _find(key, self.size) |         cdef size_t bucket = _find(key, self.size) | ||||||
|  | @ -39,6 +45,7 @@ cdef class FixedTable: | ||||||
|     cdef int erase(self, uint64_t key) nogil: |     cdef int erase(self, uint64_t key) nogil: | ||||||
|         cdef size_t bucket = _find(key, self.size) |         cdef size_t bucket = _find(key, self.size) | ||||||
|         self.keys[bucket] = 0 |         self.keys[bucket] = 0 | ||||||
|  |         self.values[bucket] = 0 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @cython.cdivision | @cython.cdivision | ||||||
|  |  | ||||||
|  | @ -140,7 +140,9 @@ cdef class Language: | ||||||
|     cdef Lexeme* _add(self, StringHash hashed, unicode string, int split, size_t length): |     cdef Lexeme* _add(self, StringHash hashed, unicode string, int split, size_t length): | ||||||
|         cdef size_t i |         cdef size_t i | ||||||
|         word = self.init_lexeme(string, hashed, split, length) |         word = self.init_lexeme(string, hashed, split, length) | ||||||
|         self.happax.insert(hashed, <size_t>word) |         cdef Lexeme* clobbered = <Lexeme*>self.happax.insert(hashed, <size_t>word) | ||||||
|  |         if clobbered != NULL: | ||||||
|  |             free(clobbered) | ||||||
|         self.bacov[hashed] = string |         self.bacov[hashed] = string | ||||||
|         return word    |         return word    | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user