From 51560bf0edff4ae6f37c80401cfb2c738a4c9e3a Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 19 Mar 2019 16:15:38 +0100 Subject: [PATCH] bugfix adding aliases --- spacy/kb.pxd | 10 +--------- spacy/kb.pyx | 12 +++++++++++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/spacy/kb.pxd b/spacy/kb.pxd index d96502f41..9f0a5e68d 100644 --- a/spacy/kb.pxd +++ b/spacy/kb.pxd @@ -98,18 +98,10 @@ cdef class KnowledgeBase: self._entry_index[entity_key] = entity_index return entity_index - cdef inline int64_t c_add_aliases(self, hash_t alias_key, entities, probabilities): + cdef inline int64_t c_add_aliases(self, hash_t alias_key, vector[int64_t] entry_indices, vector[float] probs): """Connect a mention to a list of potential entities with their prior probabilities .""" cdef int64_t alias_index = self._aliases_table.size() - cdef vector[int64_t] entry_indices - cdef vector[float] probs - - for entity, prob in zip(entities, probs): - entry_index = self._entry_index[hash_string(entity)] - entry_indices.push_back(entry_index) - probs.push_back(prob) - self._aliases_table.push_back( _AliasC( entry_indices=entry_indices, diff --git a/spacy/kb.pyx b/spacy/kb.pyx index b4369d59b..854feb069 100644 --- a/spacy/kb.pyx +++ b/spacy/kb.pyx @@ -35,13 +35,23 @@ cdef class KnowledgeBase: def add_alias(self, unicode alias, entities, probabilities): """For a given alias, add its potential entities and prior probabilies to the KB.""" cdef hash_t alias_hash = hash_string(alias) + cdef hash_t entity_hash + + cdef vector[int64_t] entry_indices + cdef vector[float] probs + + for entity, prob in zip(entities, probabilities): + entity_hash = hash_string(entity) + entry_index = self._entry_index.get(entity_hash) + entry_indices.push_back(int(entry_index)) + probs.push_back(float(prob)) # TODO: check that alias hadn't been defined before # TODO: check that entity is already in this KB (entity_index is OK) # TODO: check sum(probabilities) <= 1 # TODO: check len(entities) == len(probabilities) - self.c_add_aliases(alias_key=alias_hash, entities=entities, probabilities=probabilities) + self.c_add_aliases(alias_key=alias_hash, entry_indices=entry_indices, probs=probs) def get_candidates(self, unicode alias): cdef hash_t alias_hash = hash_string(alias)