Partially fix alias/mention terminology usage. Convert Candidate to interface.

This commit is contained in:
Raphael Mitsch 2023-03-13 14:26:14 +01:00
parent 649c146e2c
commit 6adc15178f
4 changed files with 7 additions and 7 deletions

View File

@ -82,7 +82,7 @@ class Warnings(metaclass=ErrorsWithCodes):
"ignoring the duplicate entry.") "ignoring the duplicate entry.")
W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be " W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be "
"incorrect. Modify PhraseMatcher._terminal_hash to fix.") "incorrect. Modify PhraseMatcher._terminal_hash to fix.")
W024 = ("Entity '{entity}' - mention '{mention}' combination already exists in " W024 = ("Entity '{entity}' - alias '{alias}' combination already exists in "
"the Knowledge Base.") "the Knowledge Base.")
W026 = ("Unable to set all sentence boundaries from dependency parses. If " W026 = ("Unable to set all sentence boundaries from dependency parses. If "
"you are constructing a parse tree incrementally by setting " "you are constructing a parse tree incrementally by setting "

View File

@ -7,7 +7,8 @@ cdef class Candidate:
"""A `Candidate` object refers to a textual mention that may or may not be resolved """A `Candidate` object refers to a textual mention that may or may not be resolved
to a specific entity from a Knowledge Base. This will be used as input for the entity linking to a specific entity from a Knowledge Base. This will be used as input for the entity linking
algorithm which will disambiguate the various candidates to the correct one. algorithm which will disambiguate the various candidates to the correct one.
Each candidate (mention, entity_id) pair is assigned a certain prior probability. Each candidate, which represents a possible link between one textual mention and one entity in the knowledge base,
is assigned a certain prior probability.
DOCS: https://spacy.io/api/kb/#candidate-init DOCS: https://spacy.io/api/kb/#candidate-init
""" """
@ -76,7 +77,6 @@ cdef class InMemoryCandidate(Candidate):
self._prior_prob = prior_prob self._prior_prob = prior_prob
self._kb = kb self._kb = kb
self._mention = mention_hash self._mention = mention_hash
self._entity_id = entity_hash
self._entity_freq = entity_freq self._entity_freq = entity_freq
@property @property
@ -97,7 +97,7 @@ cdef class InMemoryCandidate(Candidate):
@property @property
def entity_id_(self) -> str: def entity_id_(self) -> str:
return self._kb.vocab.strings[self._entity_id] return self._kb.vocab.strings[self._entity_hash]
@property @property
def entity_freq(self) -> float: def entity_freq(self) -> float:

View File

@ -11,7 +11,7 @@ from ..errors import Errors
cdef class KnowledgeBase: cdef class KnowledgeBase:
"""A `KnowledgeBase` instance stores unique identifiers for entities and their textual mentions, """A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
to support entity linking of named entities to real-world concepts. to support entity linking of named entities to real-world concepts.
This is an abstract class and requires its operations to be implemented. This is an abstract class and requires its operations to be implemented.

View File

@ -22,7 +22,7 @@ from .candidate import InMemoryCandidate
cdef class InMemoryLookupKB(KnowledgeBase): cdef class InMemoryLookupKB(KnowledgeBase):
"""An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual mentions, """An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual aliases,
to support entity linking of named entities to real-world concepts. to support entity linking of named entities to real-world concepts.
DOCS: https://spacy.io/api/inmemorylookupkb DOCS: https://spacy.io/api/inmemorylookupkb
@ -216,7 +216,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
if is_present: if is_present:
if not ignore_warnings: if not ignore_warnings:
warnings.warn(Warnings.W024.format(entity=entity, mention=alias)) warnings.warn(Warnings.W024.format(entity=entity, alias=alias))
else: else:
entry_indices.push_back(int(entry_index)) entry_indices.push_back(int(entry_index))
alias_entry.entry_indices = entry_indices alias_entry.entry_indices = entry_indices