mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 17:54:39 +03:00
* Free lexemes clobbered as happaxes
This commit is contained in:
parent
d8cb2288ce
commit
5b6457e80e
|
@ -6,6 +6,6 @@ cdef class FixedTable:
|
||||||
cdef uint64_t* keys
|
cdef uint64_t* keys
|
||||||
cdef size_t* values
|
cdef size_t* values
|
||||||
|
|
||||||
cdef int insert(self, uint64_t key, size_t value) nogil
|
cdef size_t insert(self, uint64_t key, size_t value) nogil
|
||||||
cdef size_t get(self, uint64_t key) nogil
|
cdef size_t get(self, uint64_t key) nogil
|
||||||
cdef int erase(self, uint64_t key) nogil
|
cdef int erase(self, uint64_t key) nogil
|
||||||
|
|
|
@ -24,10 +24,16 @@ cdef class FixedTable:
|
||||||
def bucket(self, uint64_t key):
|
def bucket(self, uint64_t key):
|
||||||
return _find(key, self.size)
|
return _find(key, self.size)
|
||||||
|
|
||||||
cdef int insert(self, uint64_t key, size_t value) nogil:
|
cdef size_t insert(self, uint64_t key, size_t value) nogil:
|
||||||
cdef size_t bucket = _find(key, self.size)
|
cdef size_t bucket = _find(key, self.size)
|
||||||
|
cdef size_t clobbered
|
||||||
|
if self.values[bucket] == value:
|
||||||
|
clobbered = 0
|
||||||
|
else:
|
||||||
|
clobbered = self.values[clobbered]
|
||||||
self.keys[bucket] = key
|
self.keys[bucket] = key
|
||||||
self.values[bucket] = value
|
self.values[bucket] = value
|
||||||
|
return clobbered
|
||||||
|
|
||||||
cdef size_t get(self, uint64_t key) nogil:
|
cdef size_t get(self, uint64_t key) nogil:
|
||||||
cdef size_t bucket = _find(key, self.size)
|
cdef size_t bucket = _find(key, self.size)
|
||||||
|
@ -39,6 +45,7 @@ cdef class FixedTable:
|
||||||
cdef int erase(self, uint64_t key) nogil:
|
cdef int erase(self, uint64_t key) nogil:
|
||||||
cdef size_t bucket = _find(key, self.size)
|
cdef size_t bucket = _find(key, self.size)
|
||||||
self.keys[bucket] = 0
|
self.keys[bucket] = 0
|
||||||
|
self.values[bucket] = 0
|
||||||
|
|
||||||
|
|
||||||
@cython.cdivision
|
@cython.cdivision
|
||||||
|
|
|
@ -140,7 +140,9 @@ cdef class Language:
|
||||||
cdef Lexeme* _add(self, StringHash hashed, unicode string, int split, size_t length):
|
cdef Lexeme* _add(self, StringHash hashed, unicode string, int split, size_t length):
|
||||||
cdef size_t i
|
cdef size_t i
|
||||||
word = self.init_lexeme(string, hashed, split, length)
|
word = self.init_lexeme(string, hashed, split, length)
|
||||||
self.happax.insert(hashed, <size_t>word)
|
cdef Lexeme* clobbered = <Lexeme*>self.happax.insert(hashed, <size_t>word)
|
||||||
|
if clobbered != NULL:
|
||||||
|
free(clobbered)
|
||||||
self.bacov[hashed] = string
|
self.bacov[hashed] = string
|
||||||
return word
|
return word
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user