Move attributes out of Candidate.

This commit is contained in:
Raphael Mitsch 2023-03-13 09:21:08 +01:00
parent ce23942320
commit 649c146e2c
2 changed files with 27 additions and 37 deletions

View File

@ -3,13 +3,13 @@ from .kb_in_memory cimport InMemoryLookupKB
from ..typedefs cimport hash_t
cdef class Candidate:
cdef readonly str _entity_id_
cdef readonly hash_t _entity_id
cpdef vector[float] _entity_vector
cdef float _prior_prob
pass
cdef class InMemoryCandidate(Candidate):
cdef readonly hash_t _entity_hash
cpdef vector[float] _entity_vector
cdef float _prior_prob
cdef readonly InMemoryLookupKB _kb
cdef float _entity_freq
cdef hash_t _mention

View File

@ -12,42 +12,23 @@ cdef class Candidate:
DOCS: https://spacy.io/api/kb/#candidate-init
"""
def __init__(
self,
entity_id: str,
entity_vector: vector[float],
prior_prob: float,
):
"""Initializes properties of abstract base class `Candidate`.
entity_id (Union[str, int]): Unique entity ID.
entity_vector (List[float]): Entity embedding.
prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
doesn't) it might be better to eschew this information and always supply the same value.
"""
# Make sure abstract KB is not instantiated.
def __init__(self):
# Make sure abstract Candidate is not instantiated.
if self.__class__ == Candidate:
raise TypeError(
Errors.E1046.format(cls_name=self.__class__.__name__)
)
self._entity_id_ = entity_id
# Note that hashing an int value yields the same int value.
self._entity_id = hash(entity_id)
self._entity_vector = entity_vector
self._prior_prob = prior_prob
@property
def entity_id(self) -> int:
"""RETURNS (int): Numerical representation of entity ID (if entity ID is numerical, this is just the entity ID,
otherwise the hash of the entity ID string)."""
return self._entity_id
raise NotImplementedError
@property
def entity_id_(self) -> str:
"""RETURNS (str): String representation of entity ID."""
return self._entity_id_
raise NotImplementedError
@property
def mention(self) -> str:
@ -57,12 +38,12 @@ cdef class Candidate:
@property
def entity_vector(self) -> vector[float]:
"""RETURNS (vector[float]): Entity vector."""
return self._entity_vector
raise NotImplementedError
@property
def prior_prob(self) -> float:
"""RETURNS (List[float]): Entity vector."""
return self._prior_prob
raise NotImplementedError
cdef class InMemoryCandidate(Candidate):
@ -88,27 +69,36 @@ cdef class InMemoryCandidate(Candidate):
cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
doesn't) it might be better to eschew this information and always supply the same value.
"""
super().__init__(
entity_id=kb.vocab.strings[entity_hash],
entity_vector=entity_vector,
prior_prob=prior_prob,
)
super().__init__()
self._entity_hash = entity_hash
self._entity_vector = entity_vector
self._prior_prob = prior_prob
self._kb = kb
self._mention = mention_hash
self._entity_id = entity_hash
self._entity_freq = entity_freq
@property
def entity_id(self) -> int:
return self._entity_hash
@property
def entity_vector(self) -> vector[float]:
return self._entity_vector
@property
def prior_prob(self) -> float:
return self._prior_prob
@property
def mention(self) -> str:
"""RETURNS (str): Mention."""
return self._kb.vocab.strings[self._mention]
@property
def entity_id_(self) -> str:
"""RETURNS (str): ID/name of this entity in the KB."""
return self._kb.vocab.strings[self._entity_id]
@property
def entity_freq(self) -> float:
"""RETURNS (float): Entity frequence of this candidate's entity in the KB."""
return self._entity_freq