Partially fix alias/mention terminology usage. Convert Candidate to interface.

This commit is contained in:
Raphael Mitsch 2023-03-13 14:26:14 +01:00
parent 649c146e2c
commit 6adc15178f
4 changed files with 7 additions and 7 deletions

View File

@ -82,7 +82,7 @@ class Warnings(metaclass=ErrorsWithCodes):
"ignoring the duplicate entry.")
W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be "
"incorrect. Modify PhraseMatcher._terminal_hash to fix.")
W024 = ("Entity '{entity}' - mention '{mention}' combination already exists in "
W024 = ("Entity '{entity}' - alias '{alias}' combination already exists in "
"the Knowledge Base.")
W026 = ("Unable to set all sentence boundaries from dependency parses. If "
"you are constructing a parse tree incrementally by setting "

View File

@ -7,7 +7,8 @@ cdef class Candidate:
"""A `Candidate` object refers to a textual mention that may or may not be resolved
to a specific entity from a Knowledge Base. This will be used as input for the entity linking
algorithm which will disambiguate the various candidates to the correct one.
Each candidate (mention, entity_id) pair is assigned a certain prior probability.
Each candidate, which represents a possible link between one textual mention and one entity in the knowledge base,
is assigned a certain prior probability.
DOCS: https://spacy.io/api/kb/#candidate-init
"""
@ -76,7 +77,6 @@ cdef class InMemoryCandidate(Candidate):
self._prior_prob = prior_prob
self._kb = kb
self._mention = mention_hash
self._entity_id = entity_hash
self._entity_freq = entity_freq
@property
@ -97,7 +97,7 @@ cdef class InMemoryCandidate(Candidate):
@property
def entity_id_(self) -> str:
return self._kb.vocab.strings[self._entity_id]
return self._kb.vocab.strings[self._entity_hash]
@property
def entity_freq(self) -> float:

View File

@ -11,7 +11,7 @@ from ..errors import Errors
cdef class KnowledgeBase:
"""A `KnowledgeBase` instance stores unique identifiers for entities and their textual mentions,
"""A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
to support entity linking of named entities to real-world concepts.
This is an abstract class and requires its operations to be implemented.

View File

@ -22,7 +22,7 @@ from .candidate import InMemoryCandidate
cdef class InMemoryLookupKB(KnowledgeBase):
"""An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual mentions,
"""An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual aliases,
to support entity linking of named entities to real-world concepts.
DOCS: https://spacy.io/api/inmemorylookupkb
@ -216,7 +216,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
if is_present:
if not ignore_warnings:
warnings.warn(Warnings.W024.format(entity=entity, mention=alias))
warnings.warn(Warnings.W024.format(entity=entity, alias=alias))
else:
entry_indices.push_back(int(entry_index))
alias_entry.entry_indices = entry_indices