little fixes

This commit is contained in:
svlandeg 2019-04-10 16:06:09 +02:00
parent 6ae3b5699e
commit 61a33f55d2
2 changed files with 14 additions and 11 deletions

View File

@ -13,7 +13,7 @@ from .typedefs cimport hash_t
# of bits we need to keep track of the answers. # of bits we need to keep track of the answers.
cdef struct _EntryC: cdef struct _EntryC:
# The hash of this entry's unique ID and name in the kB # The hash of this entry's unique ID/name in the kB
hash_t entity_hash hash_t entity_hash
# Allows retrieval of one or more vectors. # Allows retrieval of one or more vectors.
@ -99,7 +99,7 @@ cdef class KnowledgeBase:
cdef inline int64_t c_add_entity(self, hash_t entity_hash, float prob, cdef inline int64_t c_add_entity(self, hash_t entity_hash, float prob,
int32_t* vector_rows, int feats_row): int32_t* vector_rows, int feats_row):
"""Add an entry to the knowledge base.""" """Add an entry to the knowledge base."""
# This is what we'll map the hash key to. It's where the entry will sit # This is what we'll map the entity hash key to. It's where the entry will sit
# in the vector of entries, so we can get it later. # in the vector of entries, so we can get it later.
cdef int64_t new_index = self._entries.size() cdef int64_t new_index = self._entries.size()
self._entries.push_back( self._entries.push_back(
@ -114,6 +114,8 @@ cdef class KnowledgeBase:
cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs): cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs):
"""Connect a mention to a list of potential entities with their prior probabilities .""" """Connect a mention to a list of potential entities with their prior probabilities ."""
# This is what we'll map the alias hash key to. It's where the alias will be defined
# in the vector of aliases.
cdef int64_t new_index = self._aliases_table.size() cdef int64_t new_index = self._aliases_table.size()
self._aliases_table.push_back( self._aliases_table.push_back(
@ -126,12 +128,14 @@ cdef class KnowledgeBase:
cdef inline _create_empty_vectors(self): cdef inline _create_empty_vectors(self):
""" """
Making sure the first element of each vector is a dummy, Initializing the vectors and making sure the first element of each vector is a dummy,
because the PreshMap maps pointing to indices in these vectors can not contain 0 as value because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
cf. https://github.com/explosion/preshed/issues/17 cf. https://github.com/explosion/preshed/issues/17
""" """
cdef int32_t dummy_value = 0 cdef int32_t dummy_value = 0
self.vocab.strings.add("") self.vocab.strings.add("")
self._entry_index = PreshMap()
self._entries.push_back( self._entries.push_back(
_EntryC( _EntryC(
entity_hash=self.vocab.strings[""], entity_hash=self.vocab.strings[""],
@ -139,6 +143,8 @@ cdef class KnowledgeBase:
feats_row=dummy_value, feats_row=dummy_value,
prob=dummy_value prob=dummy_value
)) ))
self._alias_index = PreshMap()
self._aliases_table.push_back( self._aliases_table.push_back(
_AliasC( _AliasC(
entry_indices=[dummy_value], entry_indices=[dummy_value],

View File

@ -1,3 +1,4 @@
# cython: infer_types=True
# cython: profile=True # cython: profile=True
# coding: utf8 # coding: utf8
from spacy.errors import Errors, Warnings, user_warning from spacy.errors import Errors, Warnings, user_warning
@ -19,7 +20,7 @@ cdef class Candidate:
@property @property
def entity_(self): def entity_(self):
"""RETURNS (unicode): ID/name of this entity in the KB""" """RETURNS (unicode): ID/name of this entity in the KB"""
return self.kb.vocab.strings[self.entity] return self.kb.vocab.strings[self.entity_hash]
@property @property
def alias(self): def alias(self):
@ -29,7 +30,7 @@ cdef class Candidate:
@property @property
def alias_(self): def alias_(self):
"""RETURNS (unicode): ID of the original alias""" """RETURNS (unicode): ID of the original alias"""
return self.kb.vocab.strings[self.alias] return self.kb.vocab.strings[self.alias_hash]
@property @property
def prior_prob(self): def prior_prob(self):
@ -40,8 +41,6 @@ cdef class KnowledgeBase:
def __init__(self, Vocab vocab): def __init__(self, Vocab vocab):
self.vocab = vocab self.vocab = vocab
self._entry_index = PreshMap()
self._alias_index = PreshMap()
self.mem = Pool() self.mem = Pool()
self._create_empty_vectors() self._create_empty_vectors()
@ -56,8 +55,8 @@ cdef class KnowledgeBase:
def add_entity(self, unicode entity, float prob=0.5, vectors=None, features=None): def add_entity(self, unicode entity, float prob=0.5, vectors=None, features=None):
""" """
Add an entity to the KB. Add an entity to the KB, optionally specifying its log probability based on corpus frequency
Return the hash of the entity ID at the end Return the hash of the entity ID/name at the end
""" """
cdef hash_t entity_hash = self.vocab.strings.add(entity) cdef hash_t entity_hash = self.vocab.strings.add(entity)
@ -98,8 +97,6 @@ cdef class KnowledgeBase:
user_warning(Warnings.W017.format(alias=alias)) user_warning(Warnings.W017.format(alias=alias))
return return
cdef hash_t entity_hash
cdef vector[int64_t] entry_indices cdef vector[int64_t] entry_indices
cdef vector[float] probs cdef vector[float] probs