mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
bugfix adding aliases
This commit is contained in:
parent
c4ba942765
commit
51560bf0ed
10
spacy/kb.pxd
10
spacy/kb.pxd
|
@ -98,18 +98,10 @@ cdef class KnowledgeBase:
|
|||
self._entry_index[entity_key] = entity_index
|
||||
return entity_index
|
||||
|
||||
cdef inline int64_t c_add_aliases(self, hash_t alias_key, entities, probabilities):
|
||||
cdef inline int64_t c_add_aliases(self, hash_t alias_key, vector[int64_t] entry_indices, vector[float] probs):
|
||||
"""Connect a mention to a list of potential entities with their prior probabilities ."""
|
||||
cdef int64_t alias_index = self._aliases_table.size()
|
||||
|
||||
cdef vector[int64_t] entry_indices
|
||||
cdef vector[float] probs
|
||||
|
||||
for entity, prob in zip(entities, probs):
|
||||
entry_index = self._entry_index[hash_string(entity)]
|
||||
entry_indices.push_back(entry_index)
|
||||
probs.push_back(prob)
|
||||
|
||||
self._aliases_table.push_back(
|
||||
_AliasC(
|
||||
entry_indices=entry_indices,
|
||||
|
|
12
spacy/kb.pyx
12
spacy/kb.pyx
|
@ -35,13 +35,23 @@ cdef class KnowledgeBase:
|
|||
def add_alias(self, unicode alias, entities, probabilities):
|
||||
"""For a given alias, add its potential entities and prior probabilies to the KB."""
|
||||
cdef hash_t alias_hash = hash_string(alias)
|
||||
cdef hash_t entity_hash
|
||||
|
||||
cdef vector[int64_t] entry_indices
|
||||
cdef vector[float] probs
|
||||
|
||||
for entity, prob in zip(entities, probabilities):
|
||||
entity_hash = hash_string(entity)
|
||||
entry_index = <int64_t>self._entry_index.get(entity_hash)
|
||||
entry_indices.push_back(int(entry_index))
|
||||
probs.push_back(float(prob))
|
||||
|
||||
# TODO: check that alias hadn't been defined before
|
||||
# TODO: check that entity is already in this KB (entity_index is OK)
|
||||
# TODO: check sum(probabilities) <= 1
|
||||
# TODO: check len(entities) == len(probabilities)
|
||||
|
||||
self.c_add_aliases(alias_key=alias_hash, entities=entities, probabilities=probabilities)
|
||||
self.c_add_aliases(alias_key=alias_hash, entry_indices=entry_indices, probs=probs)
|
||||
|
||||
def get_candidates(self, unicode alias):
|
||||
cdef hash_t alias_hash = hash_string(alias)
|
||||
|
|
Loading…
Reference in New Issue
Block a user