From fa390618c8397be456f53c47c11a12214d679cb4 Mon Sep 17 00:00:00 2001 From: Raphael Mitsch Date: Wed, 1 Mar 2023 14:50:58 +0100 Subject: [PATCH] Adjust Candidate to support and mandate numerical entity IDs. --- spacy/kb/candidate.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py index 47eac08ba..a62249065 100644 --- a/spacy/kb/candidate.py +++ b/spacy/kb/candidate.py @@ -12,26 +12,33 @@ class Candidate(abc.ABC): """ def __init__( - self, mention: str, entity_id: Union[int, str], entity_vector: List[float] + self, mention: str, entity_id: int, entity_name: str, entity_vector: List[float], prior_prob: float ): """Initializes properties of `Candidate` instance. mention (str): Mention text for this candidate. - entity_id (Union[int, str]): Unique entity ID. + entity_id (int): Unique entity ID. + entity_name (str): Entity name. entity_vector (List[float]): Entity embedding. + prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of + the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In + cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus + doesn't) it might be better to eschew this information and always supply the same value. """ self._mention = mention self._entity_id = entity_id + self._entity_name = entity_name self._entity_vector = entity_vector + self._prior_prob = prior_prob @property - def entity(self) -> Union[int, str]: - """RETURNS (Union[int, str]): Entity ID.""" + def entity(self) -> int: + """RETURNS (int): Unique entity ID.""" return self._entity_id @property - @abc.abstractmethod def entity_(self) -> str: - """RETURNS (str): Entity name.""" + """RETURNS (int): Entity name.""" + return self._entity_name @property def mention(self) -> str: @@ -43,6 +50,11 @@ class Candidate(abc.ABC): """RETURNS (List[float]): Entity vector.""" return self._entity_vector + @property + def prior_prob(self) -> float: + """RETURNS (List[float]): Entity vector.""" + return self._prior_prob + class InMemoryCandidate(Candidate): """Candidate for InMemoryLookupKB.""" @@ -71,7 +83,9 @@ class InMemoryCandidate(Candidate): super().__init__( mention=retrieve_string_from_hash(alias_hash), entity_id=entity_hash, + entity_name=retrieve_string_from_hash(entity_hash), entity_vector=entity_vector, + prior_prob=prior_prob, ) self._retrieve_string_from_hash = retrieve_string_from_hash self._entity_hash = entity_hash @@ -84,11 +98,6 @@ class InMemoryCandidate(Candidate): """RETURNS (int): hash of the entity_id's KB ID/name""" return self._entity_hash - @property - def entity_(self) -> str: - """RETURNS (str): ID/name of this entity_id in the KB""" - return self._retrieve_string_from_hash(self._entity_hash) - @property def alias(self) -> int: """RETURNS (int): hash of the alias""" @@ -102,8 +111,3 @@ class InMemoryCandidate(Candidate): @property def entity_freq(self) -> float: return self._entity_freq - - @property - def prior_prob(self) -> float: - """RETURNS (List[float]): Entity vector.""" - return self._prior_prob