Move attributes out of Candidate.

This commit is contained in:
Raphael Mitsch 2023-03-13 09:21:08 +01:00
parent ce23942320
commit 649c146e2c
2 changed files with 27 additions and 37 deletions

View File

@ -3,13 +3,13 @@ from .kb_in_memory cimport InMemoryLookupKB
from ..typedefs cimport hash_t from ..typedefs cimport hash_t
cdef class Candidate: cdef class Candidate:
cdef readonly str _entity_id_ pass
cdef readonly hash_t _entity_id
cpdef vector[float] _entity_vector
cdef float _prior_prob
cdef class InMemoryCandidate(Candidate): cdef class InMemoryCandidate(Candidate):
cdef readonly hash_t _entity_hash
cpdef vector[float] _entity_vector
cdef float _prior_prob
cdef readonly InMemoryLookupKB _kb cdef readonly InMemoryLookupKB _kb
cdef float _entity_freq cdef float _entity_freq
cdef hash_t _mention cdef hash_t _mention

View File

@ -12,42 +12,23 @@ cdef class Candidate:
DOCS: https://spacy.io/api/kb/#candidate-init DOCS: https://spacy.io/api/kb/#candidate-init
""" """
def __init__( def __init__(self):
self, # Make sure abstract Candidate is not instantiated.
entity_id: str,
entity_vector: vector[float],
prior_prob: float,
):
"""Initializes properties of abstract base class `Candidate`.
entity_id (Union[str, int]): Unique entity ID.
entity_vector (List[float]): Entity embedding.
prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
doesn't) it might be better to eschew this information and always supply the same value.
"""
# Make sure abstract KB is not instantiated.
if self.__class__ == Candidate: if self.__class__ == Candidate:
raise TypeError( raise TypeError(
Errors.E1046.format(cls_name=self.__class__.__name__) Errors.E1046.format(cls_name=self.__class__.__name__)
) )
self._entity_id_ = entity_id
# Note that hashing an int value yields the same int value.
self._entity_id = hash(entity_id)
self._entity_vector = entity_vector
self._prior_prob = prior_prob
@property @property
def entity_id(self) -> int: def entity_id(self) -> int:
"""RETURNS (int): Numerical representation of entity ID (if entity ID is numerical, this is just the entity ID, """RETURNS (int): Numerical representation of entity ID (if entity ID is numerical, this is just the entity ID,
otherwise the hash of the entity ID string).""" otherwise the hash of the entity ID string)."""
return self._entity_id raise NotImplementedError
@property @property
def entity_id_(self) -> str: def entity_id_(self) -> str:
"""RETURNS (str): String representation of entity ID.""" """RETURNS (str): String representation of entity ID."""
return self._entity_id_ raise NotImplementedError
@property @property
def mention(self) -> str: def mention(self) -> str:
@ -57,12 +38,12 @@ cdef class Candidate:
@property @property
def entity_vector(self) -> vector[float]: def entity_vector(self) -> vector[float]:
"""RETURNS (vector[float]): Entity vector.""" """RETURNS (vector[float]): Entity vector."""
return self._entity_vector raise NotImplementedError
@property @property
def prior_prob(self) -> float: def prior_prob(self) -> float:
"""RETURNS (List[float]): Entity vector.""" """RETURNS (List[float]): Entity vector."""
return self._prior_prob raise NotImplementedError
cdef class InMemoryCandidate(Candidate): cdef class InMemoryCandidate(Candidate):
@ -88,27 +69,36 @@ cdef class InMemoryCandidate(Candidate):
cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
doesn't) it might be better to eschew this information and always supply the same value. doesn't) it might be better to eschew this information and always supply the same value.
""" """
super().__init__( super().__init__()
entity_id=kb.vocab.strings[entity_hash],
entity_vector=entity_vector, self._entity_hash = entity_hash
prior_prob=prior_prob, self._entity_vector = entity_vector
) self._prior_prob = prior_prob
self._kb = kb self._kb = kb
self._mention = mention_hash self._mention = mention_hash
self._entity_id = entity_hash self._entity_id = entity_hash
self._entity_freq = entity_freq self._entity_freq = entity_freq
@property
def entity_id(self) -> int:
return self._entity_hash
@property
def entity_vector(self) -> vector[float]:
return self._entity_vector
@property
def prior_prob(self) -> float:
return self._prior_prob
@property @property
def mention(self) -> str: def mention(self) -> str:
"""RETURNS (str): Mention."""
return self._kb.vocab.strings[self._mention] return self._kb.vocab.strings[self._mention]
@property @property
def entity_id_(self) -> str: def entity_id_(self) -> str:
"""RETURNS (str): ID/name of this entity in the KB."""
return self._kb.vocab.strings[self._entity_id] return self._kb.vocab.strings[self._entity_id]
@property @property
def entity_freq(self) -> float: def entity_freq(self) -> float:
"""RETURNS (float): Entity frequence of this candidate's entity in the KB."""
return self._entity_freq return self._entity_freq