diff --git a/spacy/kb/candidate.pxd b/spacy/kb/candidate.pxd index ddb3dbca8..0e753bf99 100644 --- a/spacy/kb/candidate.pxd +++ b/spacy/kb/candidate.pxd @@ -3,13 +3,13 @@ from .kb_in_memory cimport InMemoryLookupKB from ..typedefs cimport hash_t cdef class Candidate: - cdef readonly str _entity_id_ - cdef readonly hash_t _entity_id - cpdef vector[float] _entity_vector - cdef float _prior_prob + pass cdef class InMemoryCandidate(Candidate): + cdef readonly hash_t _entity_hash + cpdef vector[float] _entity_vector + cdef float _prior_prob cdef readonly InMemoryLookupKB _kb cdef float _entity_freq cdef hash_t _mention diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx index d45d130c5..9d3a05ec8 100644 --- a/spacy/kb/candidate.pyx +++ b/spacy/kb/candidate.pyx @@ -12,42 +12,23 @@ cdef class Candidate: DOCS: https://spacy.io/api/kb/#candidate-init """ - def __init__( - self, - entity_id: str, - entity_vector: vector[float], - prior_prob: float, - ): - """Initializes properties of abstract base class `Candidate`. - entity_id (Union[str, int]): Unique entity ID. - entity_vector (List[float]): Entity embedding. - prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of - the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In - cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus - doesn't) it might be better to eschew this information and always supply the same value. - """ - # Make sure abstract KB is not instantiated. + def __init__(self): + # Make sure abstract Candidate is not instantiated. if self.__class__ == Candidate: raise TypeError( Errors.E1046.format(cls_name=self.__class__.__name__) ) - self._entity_id_ = entity_id - # Note that hashing an int value yields the same int value. - self._entity_id = hash(entity_id) - self._entity_vector = entity_vector - self._prior_prob = prior_prob - @property def entity_id(self) -> int: """RETURNS (int): Numerical representation of entity ID (if entity ID is numerical, this is just the entity ID, otherwise the hash of the entity ID string).""" - return self._entity_id + raise NotImplementedError @property def entity_id_(self) -> str: """RETURNS (str): String representation of entity ID.""" - return self._entity_id_ + raise NotImplementedError @property def mention(self) -> str: @@ -57,12 +38,12 @@ cdef class Candidate: @property def entity_vector(self) -> vector[float]: """RETURNS (vector[float]): Entity vector.""" - return self._entity_vector + raise NotImplementedError @property def prior_prob(self) -> float: """RETURNS (List[float]): Entity vector.""" - return self._prior_prob + raise NotImplementedError cdef class InMemoryCandidate(Candidate): @@ -88,27 +69,36 @@ cdef class InMemoryCandidate(Candidate): cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus doesn't) it might be better to eschew this information and always supply the same value. """ - super().__init__( - entity_id=kb.vocab.strings[entity_hash], - entity_vector=entity_vector, - prior_prob=prior_prob, - ) + super().__init__() + + self._entity_hash = entity_hash + self._entity_vector = entity_vector + self._prior_prob = prior_prob self._kb = kb self._mention = mention_hash self._entity_id = entity_hash self._entity_freq = entity_freq + @property + def entity_id(self) -> int: + return self._entity_hash + + @property + def entity_vector(self) -> vector[float]: + return self._entity_vector + + @property + def prior_prob(self) -> float: + return self._prior_prob + @property def mention(self) -> str: - """RETURNS (str): Mention.""" return self._kb.vocab.strings[self._mention] @property def entity_id_(self) -> str: - """RETURNS (str): ID/name of this entity in the KB.""" return self._kb.vocab.strings[self._entity_id] @property def entity_freq(self) -> float: - """RETURNS (float): Entity frequence of this candidate's entity in the KB.""" return self._entity_freq