* Free lexemes clobbered as happaxes

This commit is contained in:
Matthew Honnibal 2014-08-01 07:37:50 +01:00
parent d8cb2288ce
commit 5b6457e80e
3 changed files with 12 additions and 3 deletions

View File

@ -6,6 +6,6 @@ cdef class FixedTable:
cdef uint64_t* keys cdef uint64_t* keys
cdef size_t* values cdef size_t* values
cdef int insert(self, uint64_t key, size_t value) nogil cdef size_t insert(self, uint64_t key, size_t value) nogil
cdef size_t get(self, uint64_t key) nogil cdef size_t get(self, uint64_t key) nogil
cdef int erase(self, uint64_t key) nogil cdef int erase(self, uint64_t key) nogil

View File

@ -24,10 +24,16 @@ cdef class FixedTable:
def bucket(self, uint64_t key): def bucket(self, uint64_t key):
return _find(key, self.size) return _find(key, self.size)
cdef int insert(self, uint64_t key, size_t value) nogil: cdef size_t insert(self, uint64_t key, size_t value) nogil:
cdef size_t bucket = _find(key, self.size) cdef size_t bucket = _find(key, self.size)
cdef size_t clobbered
if self.values[bucket] == value:
clobbered = 0
else:
clobbered = self.values[clobbered]
self.keys[bucket] = key self.keys[bucket] = key
self.values[bucket] = value self.values[bucket] = value
return clobbered
cdef size_t get(self, uint64_t key) nogil: cdef size_t get(self, uint64_t key) nogil:
cdef size_t bucket = _find(key, self.size) cdef size_t bucket = _find(key, self.size)
@ -39,6 +45,7 @@ cdef class FixedTable:
cdef int erase(self, uint64_t key) nogil: cdef int erase(self, uint64_t key) nogil:
cdef size_t bucket = _find(key, self.size) cdef size_t bucket = _find(key, self.size)
self.keys[bucket] = 0 self.keys[bucket] = 0
self.values[bucket] = 0
@cython.cdivision @cython.cdivision

View File

@ -140,7 +140,9 @@ cdef class Language:
cdef Lexeme* _add(self, StringHash hashed, unicode string, int split, size_t length): cdef Lexeme* _add(self, StringHash hashed, unicode string, int split, size_t length):
cdef size_t i cdef size_t i
word = self.init_lexeme(string, hashed, split, length) word = self.init_lexeme(string, hashed, split, length)
self.happax.insert(hashed, <size_t>word) cdef Lexeme* clobbered = <Lexeme*>self.happax.insert(hashed, <size_t>word)
if clobbered != NULL:
free(clobbered)
self.bacov[hashed] = string self.bacov[hashed] = string
return word return word