mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
* Add initial work on simple hash table
This commit is contained in:
parent
afdc9b7ac2
commit
c8db76e3e1
18
spacy/_hashing.pxd
Normal file
18
spacy/_hashing.pxd
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
ctypedef key_t size_t
|
||||||
|
ctypedef val_t size_t
|
||||||
|
|
||||||
|
|
||||||
|
cdef struct Cell:
|
||||||
|
key_t key
|
||||||
|
val_t value
|
||||||
|
|
||||||
|
|
||||||
|
cdef class PointerHash:
|
||||||
|
cdef size_t size
|
||||||
|
cdef size_t filled
|
||||||
|
cdef Cell* cells
|
||||||
|
|
||||||
|
cdef size_t find_slot(self, key_t key)
|
||||||
|
cdef Cell* lookup(self, key_t key)
|
||||||
|
cdef void insert(self, key_t key)
|
||||||
|
cdef void resize(self, size_t new_size)
|
51
spacy/_hashing.pyx
Normal file
51
spacy/_hashing.pyx
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
cdef class PointerHash:
|
||||||
|
def __cinit__(self, size_t initial_size=8):
|
||||||
|
self.size = initial_size
|
||||||
|
self.filled = 0
|
||||||
|
# Size must be power of two
|
||||||
|
assert self.size & (self.size - 1) == 0
|
||||||
|
self.cells = <Cell*>calloc(self.size, sizeof(Cell))
|
||||||
|
|
||||||
|
def __dealloc__(self):
|
||||||
|
free(self.cells)
|
||||||
|
|
||||||
|
def __getitem__(self, key_t key):
|
||||||
|
cdef Cell* cell = self.lookup(key)
|
||||||
|
return cell.value if cell.key != 0 else None
|
||||||
|
|
||||||
|
def __setitem__(self, key_t key, val_t value):
|
||||||
|
self.insert(key, value
|
||||||
|
|
||||||
|
cdef size_t find_slot(self, key_t key):
|
||||||
|
cdef size_t i = key % self.size
|
||||||
|
while self.cells[i].key != 0 and self.cells[i].key != key:
|
||||||
|
i = (i + 1) % self.size
|
||||||
|
return i
|
||||||
|
|
||||||
|
cdef Cell* lookup(self, key_t key):
|
||||||
|
cdef size_t i = self.find_slot(key)
|
||||||
|
return &self.cells[i]
|
||||||
|
|
||||||
|
cdef void insert(self, key_t key, val_t value):
|
||||||
|
cdef size_t i = self.find_slot(key)
|
||||||
|
if self.cells[i].key == 0:
|
||||||
|
self.cells[i].key = key
|
||||||
|
self.filled += 1
|
||||||
|
self.cells[i].value = value
|
||||||
|
if (self.filled + 1) * 4 >= (self.size * 3):
|
||||||
|
self.resize(self.size * 2)
|
||||||
|
|
||||||
|
cdef void resize(self, size_t new_size):
|
||||||
|
assert new_size & (new_size - 1)) == 0 # Must be a power of 2
|
||||||
|
assert self.filled * 4 <= new_size * 3
|
||||||
|
|
||||||
|
self.size = new_size
|
||||||
|
|
||||||
|
cdef Cell* old_cells = self.cells
|
||||||
|
cdef size_t old_size = self.size
|
||||||
|
|
||||||
|
self.size = new_size
|
||||||
|
self.cells = <Cell*>calloc(new_size, sizeof(Cell))
|
||||||
|
|
||||||
|
for i in range(old_size):
|
||||||
|
self.insert(self.cells[i].key, self.cells[i].value)
|
Loading…
Reference in New Issue
Block a user