From 6adc15178f74529aa1c01a390ab0c09c682e2329 Mon Sep 17 00:00:00 2001 From: Raphael Mitsch Date: Mon, 13 Mar 2023 14:26:14 +0100 Subject: [PATCH] Partially fix alias/mention terminology usage. Convert Candidate to interface. --- spacy/errors.py | 2 +- spacy/kb/candidate.pyx | 6 +++--- spacy/kb/kb.pyx | 2 +- spacy/kb/kb_in_memory.pyx | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index 92770b8a8..30446e7ea 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -82,7 +82,7 @@ class Warnings(metaclass=ErrorsWithCodes): "ignoring the duplicate entry.") W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be " "incorrect. Modify PhraseMatcher._terminal_hash to fix.") - W024 = ("Entity '{entity}' - mention '{mention}' combination already exists in " + W024 = ("Entity '{entity}' - alias '{alias}' combination already exists in " "the Knowledge Base.") W026 = ("Unable to set all sentence boundaries from dependency parses. If " "you are constructing a parse tree incrementally by setting " diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx index 9d3a05ec8..ac19df671 100644 --- a/spacy/kb/candidate.pyx +++ b/spacy/kb/candidate.pyx @@ -7,7 +7,8 @@ cdef class Candidate: """A `Candidate` object refers to a textual mention that may or may not be resolved to a specific entity from a Knowledge Base. This will be used as input for the entity linking algorithm which will disambiguate the various candidates to the correct one. - Each candidate (mention, entity_id) pair is assigned a certain prior probability. + Each candidate, which represents a possible link between one textual mention and one entity in the knowledge base, + is assigned a certain prior probability. DOCS: https://spacy.io/api/kb/#candidate-init """ @@ -76,7 +77,6 @@ cdef class InMemoryCandidate(Candidate): self._prior_prob = prior_prob self._kb = kb self._mention = mention_hash - self._entity_id = entity_hash self._entity_freq = entity_freq @property @@ -97,7 +97,7 @@ cdef class InMemoryCandidate(Candidate): @property def entity_id_(self) -> str: - return self._kb.vocab.strings[self._entity_id] + return self._kb.vocab.strings[self._entity_hash] @property def entity_freq(self) -> float: diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx index 158c3304f..7da312863 100644 --- a/spacy/kb/kb.pyx +++ b/spacy/kb/kb.pyx @@ -11,7 +11,7 @@ from ..errors import Errors cdef class KnowledgeBase: - """A `KnowledgeBase` instance stores unique identifiers for entities and their textual mentions, + """A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases, to support entity linking of named entities to real-world concepts. This is an abstract class and requires its operations to be implemented. diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx index 059f3a140..4ceb87888 100644 --- a/spacy/kb/kb_in_memory.pyx +++ b/spacy/kb/kb_in_memory.pyx @@ -22,7 +22,7 @@ from .candidate import InMemoryCandidate cdef class InMemoryLookupKB(KnowledgeBase): - """An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual mentions, + """An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual aliases, to support entity linking of named entities to real-world concepts. DOCS: https://spacy.io/api/inmemorylookupkb @@ -216,7 +216,7 @@ cdef class InMemoryLookupKB(KnowledgeBase): if is_present: if not ignore_warnings: - warnings.warn(Warnings.W024.format(entity=entity, mention=alias)) + warnings.warn(Warnings.W024.format(entity=entity, alias=alias)) else: entry_indices.push_back(int(entry_index)) alias_entry.entry_indices = entry_indices