mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 02:36:32 +03:00
* Free lexemes clobbered as happaxes
This commit is contained in:
parent
d8cb2288ce
commit
5b6457e80e
|
@ -6,6 +6,6 @@ cdef class FixedTable:
|
|||
cdef uint64_t* keys
|
||||
cdef size_t* values
|
||||
|
||||
cdef int insert(self, uint64_t key, size_t value) nogil
|
||||
cdef size_t insert(self, uint64_t key, size_t value) nogil
|
||||
cdef size_t get(self, uint64_t key) nogil
|
||||
cdef int erase(self, uint64_t key) nogil
|
||||
|
|
|
@ -24,10 +24,16 @@ cdef class FixedTable:
|
|||
def bucket(self, uint64_t key):
|
||||
return _find(key, self.size)
|
||||
|
||||
cdef int insert(self, uint64_t key, size_t value) nogil:
|
||||
cdef size_t insert(self, uint64_t key, size_t value) nogil:
|
||||
cdef size_t bucket = _find(key, self.size)
|
||||
cdef size_t clobbered
|
||||
if self.values[bucket] == value:
|
||||
clobbered = 0
|
||||
else:
|
||||
clobbered = self.values[clobbered]
|
||||
self.keys[bucket] = key
|
||||
self.values[bucket] = value
|
||||
return clobbered
|
||||
|
||||
cdef size_t get(self, uint64_t key) nogil:
|
||||
cdef size_t bucket = _find(key, self.size)
|
||||
|
@ -39,6 +45,7 @@ cdef class FixedTable:
|
|||
cdef int erase(self, uint64_t key) nogil:
|
||||
cdef size_t bucket = _find(key, self.size)
|
||||
self.keys[bucket] = 0
|
||||
self.values[bucket] = 0
|
||||
|
||||
|
||||
@cython.cdivision
|
||||
|
|
|
@ -140,7 +140,9 @@ cdef class Language:
|
|||
cdef Lexeme* _add(self, StringHash hashed, unicode string, int split, size_t length):
|
||||
cdef size_t i
|
||||
word = self.init_lexeme(string, hashed, split, length)
|
||||
self.happax.insert(hashed, <size_t>word)
|
||||
cdef Lexeme* clobbered = <Lexeme*>self.happax.insert(hashed, <size_t>word)
|
||||
if clobbered != NULL:
|
||||
free(clobbered)
|
||||
self.bacov[hashed] = string
|
||||
return word
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user