bugfix adding aliases

This commit is contained in:
svlandeg 2019-03-19 16:15:38 +01:00
parent c4ba942765
commit 51560bf0ed
2 changed files with 12 additions and 10 deletions

View File

@ -98,18 +98,10 @@ cdef class KnowledgeBase:
self._entry_index[entity_key] = entity_index self._entry_index[entity_key] = entity_index
return entity_index return entity_index
cdef inline int64_t c_add_aliases(self, hash_t alias_key, entities, probabilities): cdef inline int64_t c_add_aliases(self, hash_t alias_key, vector[int64_t] entry_indices, vector[float] probs):
"""Connect a mention to a list of potential entities with their prior probabilities .""" """Connect a mention to a list of potential entities with their prior probabilities ."""
cdef int64_t alias_index = self._aliases_table.size() cdef int64_t alias_index = self._aliases_table.size()
cdef vector[int64_t] entry_indices
cdef vector[float] probs
for entity, prob in zip(entities, probs):
entry_index = self._entry_index[hash_string(entity)]
entry_indices.push_back(entry_index)
probs.push_back(prob)
self._aliases_table.push_back( self._aliases_table.push_back(
_AliasC( _AliasC(
entry_indices=entry_indices, entry_indices=entry_indices,

View File

@ -35,13 +35,23 @@ cdef class KnowledgeBase:
def add_alias(self, unicode alias, entities, probabilities): def add_alias(self, unicode alias, entities, probabilities):
"""For a given alias, add its potential entities and prior probabilies to the KB.""" """For a given alias, add its potential entities and prior probabilies to the KB."""
cdef hash_t alias_hash = hash_string(alias) cdef hash_t alias_hash = hash_string(alias)
cdef hash_t entity_hash
cdef vector[int64_t] entry_indices
cdef vector[float] probs
for entity, prob in zip(entities, probabilities):
entity_hash = hash_string(entity)
entry_index = <int64_t>self._entry_index.get(entity_hash)
entry_indices.push_back(int(entry_index))
probs.push_back(float(prob))
# TODO: check that alias hadn't been defined before # TODO: check that alias hadn't been defined before
# TODO: check that entity is already in this KB (entity_index is OK) # TODO: check that entity is already in this KB (entity_index is OK)
# TODO: check sum(probabilities) <= 1 # TODO: check sum(probabilities) <= 1
# TODO: check len(entities) == len(probabilities) # TODO: check len(entities) == len(probabilities)
self.c_add_aliases(alias_key=alias_hash, entities=entities, probabilities=probabilities) self.c_add_aliases(alias_key=alias_hash, entry_indices=entry_indices, probs=probs)
def get_candidates(self, unicode alias): def get_candidates(self, unicode alias):
cdef hash_t alias_hash = hash_string(alias) cdef hash_t alias_hash = hash_string(alias)