enable nogil for cython functions in kb.pxd

This commit is contained in:
svlandeg 2019-04-10 17:25:10 +02:00
parent 61a33f55d2
commit 9a7d534b1b
4 changed files with 52 additions and 39 deletions

View File

@ -28,7 +28,7 @@ def create_kb(vocab):
print() print()
alias_0 = "Douglas" alias_0 = "Douglas"
print("adding alias", alias_0) print("adding alias", alias_0)
kb.add_alias(alias=alias_0, entities=[entity_0, entity_1, entity_2], probabilities=[0.1, 0.6, 0.2]) kb.add_alias(alias=alias_0, entities=[entity_0, entity_1, entity_2], probabilities=[0.6, 0.1, 0.2])
alias_1 = "Douglas Adams" alias_1 = "Douglas Adams"
print("adding alias", alias_1) print("adding alias", alias_1)

View File

@ -97,58 +97,64 @@ cdef class KnowledgeBase:
cdef object _features_table cdef object _features_table
cdef inline int64_t c_add_entity(self, hash_t entity_hash, float prob, cdef inline int64_t c_add_entity(self, hash_t entity_hash, float prob,
int32_t* vector_rows, int feats_row): int32_t* vector_rows, int feats_row) nogil:
"""Add an entry to the knowledge base.""" """Add an entry to the vector of entries.
After calling this method, make sure to update also the _entry_index using the return value"""
# This is what we'll map the entity hash key to. It's where the entry will sit # This is what we'll map the entity hash key to. It's where the entry will sit
# in the vector of entries, so we can get it later. # in the vector of entries, so we can get it later.
cdef int64_t new_index = self._entries.size() cdef int64_t new_index = self._entries.size()
self._entries.push_back(
_EntryC( # Avoid struct initializer to enable nogil, cf https://github.com/cython/cython/issues/1642
entity_hash=entity_hash, cdef _EntryC entry
vector_rows=vector_rows, entry.entity_hash = entity_hash
feats_row=feats_row, entry.vector_rows = vector_rows
prob=prob entry.feats_row = feats_row
)) entry.prob = prob
self._entry_index[entity_hash] = new_index
self._entries.push_back(entry)
return new_index return new_index
cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs): cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs) nogil:
"""Connect a mention to a list of potential entities with their prior probabilities .""" """Connect a mention to a list of potential entities with their prior probabilities .
After calling this method, make sure to update also the _alias_index using the return value"""
# This is what we'll map the alias hash key to. It's where the alias will be defined # This is what we'll map the alias hash key to. It's where the alias will be defined
# in the vector of aliases. # in the vector of aliases.
cdef int64_t new_index = self._aliases_table.size() cdef int64_t new_index = self._aliases_table.size()
self._aliases_table.push_back( # Avoid struct initializer to enable nogil
_AliasC( cdef _AliasC alias
entry_indices=entry_indices, alias.entry_indices = entry_indices
probs=probs alias.probs = probs
))
self._alias_index[alias_hash] = new_index self._aliases_table.push_back(alias)
return new_index return new_index
cdef inline _create_empty_vectors(self): cdef inline void _create_empty_vectors(self, hash_t dummy_hash) nogil:
""" """
Initializing the vectors and making sure the first element of each vector is a dummy, Initializing the vectors and making sure the first element of each vector is a dummy,
because the PreshMap maps pointing to indices in these vectors can not contain 0 as value because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
cf. https://github.com/explosion/preshed/issues/17 cf. https://github.com/explosion/preshed/issues/17
""" """
cdef int32_t dummy_value = 0 cdef int32_t dummy_value = 0
self.vocab.strings.add("")
self._entry_index = PreshMap() # Avoid struct initializer to enable nogil
self._entries.push_back( cdef _EntryC entry
_EntryC( entry.entity_hash = dummy_hash
entity_hash=self.vocab.strings[""], entry.vector_rows = &dummy_value
vector_rows=&dummy_value, entry.feats_row = dummy_value
feats_row=dummy_value, entry.prob = dummy_value
prob=dummy_value
))
self._alias_index = PreshMap() # Avoid struct initializer to enable nogil
self._aliases_table.push_back( cdef vector[int64_t] dummy_entry_indices
_AliasC( dummy_entry_indices.push_back(0)
entry_indices=[dummy_value], cdef vector[float] dummy_probs
probs=[dummy_value] dummy_probs.push_back(0)
))
cdef _AliasC alias
alias.entry_indices = dummy_entry_indices
alias.probs = dummy_probs
self._entries.push_back(entry)
self._aliases_table.push_back(alias)

View File

@ -42,7 +42,11 @@ cdef class KnowledgeBase:
def __init__(self, Vocab vocab): def __init__(self, Vocab vocab):
self.vocab = vocab self.vocab = vocab
self.mem = Pool() self.mem = Pool()
self._create_empty_vectors() self._entry_index = PreshMap()
self._alias_index = PreshMap()
self.vocab.strings.add("")
self._create_empty_vectors(dummy_hash=self.vocab.strings[""])
def __len__(self): def __len__(self):
return self.get_size_entities() return self.get_size_entities()
@ -66,8 +70,10 @@ cdef class KnowledgeBase:
return return
cdef int32_t dummy_value = 342 cdef int32_t dummy_value = 342
self.c_add_entity(entity_hash=entity_hash, prob=prob, new_index = self.c_add_entity(entity_hash=entity_hash, prob=prob,
vector_rows=&dummy_value, feats_row=dummy_value) vector_rows=&dummy_value, feats_row=dummy_value)
self._entry_index[entity_hash] = new_index
# TODO self._vectors_table.get_pointer(vectors), # TODO self._vectors_table.get_pointer(vectors),
# self._features_table.get(features)) # self._features_table.get(features))
@ -109,7 +115,8 @@ cdef class KnowledgeBase:
entry_indices.push_back(int(entry_index)) entry_indices.push_back(int(entry_index))
probs.push_back(float(prob)) probs.push_back(float(prob))
self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs) new_index = self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs)
self._alias_index[alias_hash] = new_index
return alias_hash return alias_hash