mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
bugfix adding aliases
This commit is contained in:
parent
c4ba942765
commit
51560bf0ed
10
spacy/kb.pxd
10
spacy/kb.pxd
|
@ -98,18 +98,10 @@ cdef class KnowledgeBase:
|
||||||
self._entry_index[entity_key] = entity_index
|
self._entry_index[entity_key] = entity_index
|
||||||
return entity_index
|
return entity_index
|
||||||
|
|
||||||
cdef inline int64_t c_add_aliases(self, hash_t alias_key, entities, probabilities):
|
cdef inline int64_t c_add_aliases(self, hash_t alias_key, vector[int64_t] entry_indices, vector[float] probs):
|
||||||
"""Connect a mention to a list of potential entities with their prior probabilities ."""
|
"""Connect a mention to a list of potential entities with their prior probabilities ."""
|
||||||
cdef int64_t alias_index = self._aliases_table.size()
|
cdef int64_t alias_index = self._aliases_table.size()
|
||||||
|
|
||||||
cdef vector[int64_t] entry_indices
|
|
||||||
cdef vector[float] probs
|
|
||||||
|
|
||||||
for entity, prob in zip(entities, probs):
|
|
||||||
entry_index = self._entry_index[hash_string(entity)]
|
|
||||||
entry_indices.push_back(entry_index)
|
|
||||||
probs.push_back(prob)
|
|
||||||
|
|
||||||
self._aliases_table.push_back(
|
self._aliases_table.push_back(
|
||||||
_AliasC(
|
_AliasC(
|
||||||
entry_indices=entry_indices,
|
entry_indices=entry_indices,
|
||||||
|
|
12
spacy/kb.pyx
12
spacy/kb.pyx
|
@ -35,13 +35,23 @@ cdef class KnowledgeBase:
|
||||||
def add_alias(self, unicode alias, entities, probabilities):
|
def add_alias(self, unicode alias, entities, probabilities):
|
||||||
"""For a given alias, add its potential entities and prior probabilies to the KB."""
|
"""For a given alias, add its potential entities and prior probabilies to the KB."""
|
||||||
cdef hash_t alias_hash = hash_string(alias)
|
cdef hash_t alias_hash = hash_string(alias)
|
||||||
|
cdef hash_t entity_hash
|
||||||
|
|
||||||
|
cdef vector[int64_t] entry_indices
|
||||||
|
cdef vector[float] probs
|
||||||
|
|
||||||
|
for entity, prob in zip(entities, probabilities):
|
||||||
|
entity_hash = hash_string(entity)
|
||||||
|
entry_index = <int64_t>self._entry_index.get(entity_hash)
|
||||||
|
entry_indices.push_back(int(entry_index))
|
||||||
|
probs.push_back(float(prob))
|
||||||
|
|
||||||
# TODO: check that alias hadn't been defined before
|
# TODO: check that alias hadn't been defined before
|
||||||
# TODO: check that entity is already in this KB (entity_index is OK)
|
# TODO: check that entity is already in this KB (entity_index is OK)
|
||||||
# TODO: check sum(probabilities) <= 1
|
# TODO: check sum(probabilities) <= 1
|
||||||
# TODO: check len(entities) == len(probabilities)
|
# TODO: check len(entities) == len(probabilities)
|
||||||
|
|
||||||
self.c_add_aliases(alias_key=alias_hash, entities=entities, probabilities=probabilities)
|
self.c_add_aliases(alias_key=alias_hash, entry_indices=entry_indices, probs=probs)
|
||||||
|
|
||||||
def get_candidates(self, unicode alias):
|
def get_candidates(self, unicode alias):
|
||||||
cdef hash_t alias_hash = hash_string(alias)
|
cdef hash_t alias_hash = hash_string(alias)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user