Drop storing mention.

This commit is contained in:
Raphael Mitsch 2023-03-09 15:04:10 +01:00
parent b0ee34185d
commit c61654eef8
3 changed files with 11 additions and 10 deletions

View File

@ -5,7 +5,6 @@ from ..typedefs cimport hash_t
cdef class Candidate: cdef class Candidate:
cdef readonly str _entity_id_ cdef readonly str _entity_id_
cdef readonly hash_t _entity_id cdef readonly hash_t _entity_id
cdef readonly str _mention
cpdef vector[float] _entity_vector cpdef vector[float] _entity_vector
cdef float _prior_prob cdef float _prior_prob
@ -14,4 +13,4 @@ cdef class InMemoryCandidate(Candidate):
cdef readonly InMemoryLookupKB _kb cdef readonly InMemoryLookupKB _kb
cdef hash_t _entity_hash cdef hash_t _entity_hash
cdef float _entity_freq cdef float _entity_freq
cdef hash_t _alias_hash cdef hash_t _mention

View File

@ -17,13 +17,11 @@ cdef class Candidate:
def __init__( def __init__(
self, self,
mention: str,
entity_id: str, entity_id: str,
entity_vector: vector[float], entity_vector: vector[float],
prior_prob: float, prior_prob: float,
): ):
"""Initializes properties of abstract base class `Candidate`. """Initializes properties of abstract base class `Candidate`.
mention (str): Mention text for this candidate.
entity_id (Union[str, int]): Unique entity ID. entity_id (Union[str, int]): Unique entity ID.
entity_vector (List[float]): Entity embedding. entity_vector (List[float]): Entity embedding.
prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
@ -37,7 +35,6 @@ cdef class Candidate:
Errors.E1046.format(cls_name=self.__class__.__name__) Errors.E1046.format(cls_name=self.__class__.__name__)
) )
self._mention = mention
self._entity_id_ = entity_id self._entity_id_ = entity_id
# Note that hashing an int value yields the same int value. # Note that hashing an int value yields the same int value.
self._entity_id = hash(entity_id) self._entity_id = hash(entity_id)
@ -58,7 +55,7 @@ cdef class Candidate:
@property @property
def mention(self) -> str: def mention(self) -> str:
"""RETURNS (str): Mention.""" """RETURNS (str): Mention."""
return self._mention raise NotImplementedError
@property @property
def entity_vector(self) -> vector[float]: def entity_vector(self) -> vector[float]:
@ -78,7 +75,7 @@ cdef class InMemoryCandidate(Candidate):
self, self,
kb: InMemoryLookupKB, kb: InMemoryLookupKB,
entity_hash: int, entity_hash: int,
mention: str, mention_hash: int,
entity_vector: vector[float], entity_vector: vector[float],
prior_prob: float, prior_prob: float,
entity_freq: float entity_freq: float
@ -88,22 +85,27 @@ cdef class InMemoryCandidate(Candidate):
entity_id (int): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__(). entity_id (int): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__().
entity_freq (int): Entity frequency in KB corpus. entity_freq (int): Entity frequency in KB corpus.
entity_vector (List[float]): Entity embedding. entity_vector (List[float]): Entity embedding.
mention (str): Mention. mention_hash (int): Mention hash.
prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
doesn't) it might be better to eschew this information and always supply the same value. doesn't) it might be better to eschew this information and always supply the same value.
""" """
super().__init__( super().__init__(
mention=mention,
entity_id=kb.vocab.strings[entity_hash], entity_id=kb.vocab.strings[entity_hash],
entity_vector=entity_vector, entity_vector=entity_vector,
prior_prob=prior_prob, prior_prob=prior_prob,
) )
self._kb = kb self._kb = kb
self._mention = mention_hash
self._entity_id = entity_hash self._entity_id = entity_hash
self._entity_freq = entity_freq self._entity_freq = entity_freq
@property
def mention(self) -> str:
"""RETURNS (str): ID/name of this entity in the KB"""
return self._kb.vocab.strings[self._mention]
@property @property
def entity_id_(self) -> str: def entity_id_(self) -> str:
"""RETURNS (str): ID/name of this entity in the KB""" """RETURNS (str): ID/name of this entity in the KB"""

View File

@ -245,7 +245,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
InMemoryCandidate( InMemoryCandidate(
kb=self, kb=self,
entity_hash=self._entries[entry_index].entity_hash, entity_hash=self._entries[entry_index].entity_hash,
mention=alias, mention_hash=alias_hash,
entity_vector=self._vectors_table[self._entries[entry_index].vector_index], entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
prior_prob=prior_prob, prior_prob=prior_prob,
entity_freq=self._entries[entry_index].freq entity_freq=self._entries[entry_index].freq