diff --git a/setup.py b/setup.py index 9b8897233..d5b82ec68 100755 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ MOD_NAMES = [ "spacy.lexeme", "spacy.vocab", "spacy.attrs", + "spacy.kb.candidate", "spacy.kb.kb", "spacy.kb.kb_in_memory", "spacy.ml.tb_framework", diff --git a/spacy/errors.py b/spacy/errors.py index bd4da19d0..c6a3e8161 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -82,7 +82,7 @@ class Warnings(metaclass=ErrorsWithCodes): "ignoring the duplicate entry.") W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be " "incorrect. Modify PhraseMatcher._terminal_hash to fix.") - W024 = ("Entity '{entity}' - Alias '{alias}' combination already exists in " + W024 = ("Entity '{entity}' - alias '{alias}' combination already exists in " "the Knowledge Base.") W026 = ("Unable to set all sentence boundaries from dependency parses. If " "you are constructing a parse tree incrementally by setting " @@ -209,7 +209,11 @@ class Warnings(metaclass=ErrorsWithCodes): "`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.") W124 = ("{host}:{port} is already in use, using the nearest available port {serve_port} as an alternative.") + # v4 warning strings W400 = ("`use_upper=False` is ignored, the upper layer is always enabled") + W401 = ("`incl_prior is True`, but the selected knowledge base type {kb_type} doesn't support prior probability " + "lookups so this setting will be ignored. If your KB does support prior probability lookups, make sure " + "to return `True` in `.supports_prior_probs`.") class Errors(metaclass=ErrorsWithCodes): @@ -960,6 +964,9 @@ class Errors(metaclass=ErrorsWithCodes): E4003 = ("Training examples for distillation must have the exact same tokens in the " "reference and predicted docs.") E4004 = ("Backprop is not supported when is_train is not set.") + E4005 = ("EntityLinker_v1 is not supported in spaCy v4. Update your configuration.") + E4006 = ("Expected `entity_id` to be of type {exp_type}, but is of type {found_type}.") + RENAMED_LANGUAGE_CODES = {"xx": "mul", "is": "isl"} diff --git a/spacy/kb/__init__.py b/spacy/kb/__init__.py index c8a657d62..ff0e209e3 100644 --- a/spacy/kb/__init__.py +++ b/spacy/kb/__init__.py @@ -2,4 +2,5 @@ from .kb import KnowledgeBase from .kb_in_memory import InMemoryLookupKB from .candidate import Candidate, InMemoryCandidate + __all__ = ["KnowledgeBase", "InMemoryLookupKB", "Candidate", "InMemoryCandidate"] diff --git a/spacy/kb/candidate.pxd b/spacy/kb/candidate.pxd new file mode 100644 index 000000000..f21f423e4 --- /dev/null +++ b/spacy/kb/candidate.pxd @@ -0,0 +1,15 @@ +from libcpp.vector cimport vector +from .kb_in_memory cimport InMemoryLookupKB +from ..typedefs cimport hash_t + +cdef class Candidate: + pass + + +cdef class InMemoryCandidate(Candidate): + cdef readonly hash_t _entity_hash + cdef readonly hash_t _alias_hash + cpdef vector[float] _entity_vector + cdef float _prior_prob + cdef readonly InMemoryLookupKB _kb + cdef float _entity_freq diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py deleted file mode 100644 index 22c054ab2..000000000 --- a/spacy/kb/candidate.py +++ /dev/null @@ -1,118 +0,0 @@ -import abc -from typing import List, Callable - - -class Candidate(abc.ABC): - """A `Candidate` object refers to a textual mention (`alias`) that may or may not be resolved - to a specific `entity_id` from a Knowledge Base. This will be used as input for the entity_id linking - algorithm which will disambiguate the various candidates to the correct one. - Each candidate (alias, entity_id) pair is assigned a certain prior probability. - - DOCS: https://spacy.io/api/kb/#candidate-init - """ - - def __init__( - self, - mention: str, - entity_id: int, - entity_name: str, - entity_vector: List[float], - prior_prob: float, - ): - """Initializes properties of `Candidate` instance. - mention (str): Mention text for this candidate. - entity_id (int): Unique entity ID. - entity_name (str): Entity name. - entity_vector (List[float]): Entity embedding. - prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of - the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In - cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus - doesn't) it might be better to eschew this information and always supply the same value. - """ - self._mention = mention - self._entity_id = entity_id - self._entity_name = entity_name - self._entity_vector = entity_vector - self._prior_prob = prior_prob - - @property - def entity(self) -> int: - """RETURNS (int): Unique entity ID.""" - return self._entity_id - - @property - def entity_(self) -> str: - """RETURNS (int): Entity name.""" - return self._entity_name - - @property - def mention(self) -> str: - """RETURNS (str): Mention.""" - return self._mention - - @property - def entity_vector(self) -> List[float]: - """RETURNS (List[float]): Entity vector.""" - return self._entity_vector - - @property - def prior_prob(self) -> float: - """RETURNS (List[float]): Entity vector.""" - return self._prior_prob - - -class InMemoryCandidate(Candidate): - """Candidate for InMemoryLookupKB.""" - - def __init__( - self, - retrieve_string_from_hash: Callable[[int], str], - entity_hash: int, - entity_freq: int, - entity_vector: List[float], - alias_hash: int, - prior_prob: float, - ): - """ - retrieve_string_from_hash (Callable[[int], str]): Callable retrieving entity name from provided entity/vocab - hash. - entity_hash (str): Hashed entity name /ID. - entity_freq (int): Entity frequency in KB corpus. - entity_vector (List[float]): Entity embedding. - alias_hash (int): Hashed alias. - prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of - the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In - cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus - doesn't) it might be better to eschew this information and always supply the same value. - """ - super().__init__( - mention=retrieve_string_from_hash(alias_hash), - entity_id=entity_hash, - entity_name=retrieve_string_from_hash(entity_hash), - entity_vector=entity_vector, - prior_prob=prior_prob, - ) - self._retrieve_string_from_hash = retrieve_string_from_hash - self._entity_hash = entity_hash - self._entity_freq = entity_freq - self._alias_hash = alias_hash - self._prior_prob = prior_prob - - @property - def entity(self) -> int: - """RETURNS (int): hash of the entity_id's KB ID/name""" - return self._entity_hash - - @property - def alias(self) -> int: - """RETURNS (int): hash of the alias""" - return self._alias_hash - - @property - def alias_(self) -> str: - """RETURNS (str): ID of the original alias""" - return self._retrieve_string_from_hash(self._alias_hash) - - @property - def entity_freq(self) -> float: - return self._entity_freq diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx new file mode 100644 index 000000000..3d8da4b95 --- /dev/null +++ b/spacy/kb/candidate.pyx @@ -0,0 +1,96 @@ +# cython: infer_types=True, profile=True + +from .kb_in_memory cimport InMemoryLookupKB +from ..errors import Errors + +cdef class Candidate: + """A `Candidate` object refers to a textual mention that may or may not be resolved + to a specific entity from a Knowledge Base. This will be used as input for the entity linking + algorithm which will disambiguate the various candidates to the correct one. + Each candidate, which represents a possible link between one textual mention and one entity in the knowledge base, + is assigned a certain prior probability. + + DOCS: https://spacy.io/api/kb/#candidate-init + """ + + def __init__(self): + # Make sure abstract Candidate is not instantiated. + if self.__class__ == Candidate: + raise TypeError( + Errors.E1046.format(cls_name=self.__class__.__name__) + ) + + @property + def entity_id(self) -> int: + """RETURNS (int): Numerical representation of entity ID (if entity ID is numerical, this is just the entity ID, + otherwise the hash of the entity ID string).""" + raise NotImplementedError + + @property + def entity_id_(self) -> str: + """RETURNS (str): String representation of entity ID.""" + raise NotImplementedError + + @property + def entity_vector(self) -> vector[float]: + """RETURNS (vector[float]): Entity vector.""" + raise NotImplementedError + + +cdef class InMemoryCandidate(Candidate): + """Candidate for InMemoryLookupKB.""" + + def __init__( + self, + kb: InMemoryLookupKB, + entity_hash: int, + alias_hash: int, + entity_vector: vector[float], + prior_prob: float, + entity_freq: float + ): + """ + kb (InMemoryLookupKB]): InMemoryLookupKB instance. + entity_id (int): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__(). + entity_freq (int): Entity frequency in KB corpus. + entity_vector (List[float]): Entity embedding. + alias_hash (int): Alias hash. + prior_prob (float): Prior probability of entity for this alias. I. e. the probability that, independent of + the context, this alias - which matches one of this entity's aliases - resolves to one this entity. + """ + super().__init__() + + self._entity_hash = entity_hash + self._entity_vector = entity_vector + self._prior_prob = prior_prob + self._kb = kb + self._alias_hash = alias_hash + self._entity_freq = entity_freq + + @property + def entity_id(self) -> int: + return self._entity_hash + + @property + def entity_vector(self) -> vector[float]: + return self._entity_vector + + @property + def prior_prob(self) -> float: + """RETURNS (float): Prior probability that this alias, which matches one of this entity's synonyms, resolves to + this entity.""" + return self._prior_prob + + @property + def alias(self) -> str: + """RETURNS (str): Alias.""" + return self._kb.vocab.strings[self._alias_hash] + + @property + def entity_id_(self) -> str: + return self._kb.vocab.strings[self._entity_hash] + + @property + def entity_freq(self) -> float: + """RETURNS (float): Entity frequency in KB corpus.""" + return self._entity_freq diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx index 636fca74d..622d2fdbb 100644 --- a/spacy/kb/kb.pyx +++ b/spacy/kb/kb.pyx @@ -32,11 +32,12 @@ cdef class KnowledgeBase: def get_candidates(self, mentions: Iterator[SpanGroup]) -> Iterator[Iterable[Iterable[Candidate]]]: """ - Return candidate entities for mentions stored in `ent` attribute in passed docs. Each candidate defines the - entity, the original alias, and the prior probability of that alias resolving to that entity. - If no candidate is found for a given mention, an empty list is returned. - mentions (Iterator[SpanGroup]): Mentions per doc as SpanGroup instance. - RETURNS (Iterator[Iterable[Iterable[Candidate]]]): Identified candidates per document. + Return candidate entities for a specified Span mention. Each candidate defines at least the entity and the + entity's embedding vector. Depending on the KB implementation, further properties - such as the prior + probability of the specified mention text resolving to that entity - might be included. + If no candidates are found for a given mention, an empty list is returned. + mentions (Iterable[SpangGroup]): Mentions for which to get candidates. + RETURNS (Iterable[Iterable[Candidate]]): Identified candidates. """ raise NotImplementedError( Errors.E1045.format(parent="KnowledgeBase", method="get_candidates", name=self.__name__) @@ -96,3 +97,10 @@ cdef class KnowledgeBase: raise NotImplementedError( Errors.E1045.format(parent="KnowledgeBase", method="from_disk", name=self.__name__) ) + + @property + def supports_prior_probs(self) -> bool: + """RETURNS (bool): Whether this KB type supports looking up prior probabilities for entity mentions.""" + raise NotImplementedError( + Errors.E1045.format(parent="KnowledgeBase", method="supports_prior_probs", name=self.__name__) + ) diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx index b221b6cb1..b93fa5a4d 100644 --- a/spacy/kb/kb_in_memory.pyx +++ b/spacy/kb/kb_in_memory.pyx @@ -230,7 +230,7 @@ cdef class InMemoryLookupKB(KnowledgeBase): for mentions_for_doc in mentions: yield [self.get_alias_candidates(ent_span.text) for ent_span in mentions_for_doc] - def get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]: + def _get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]: """ Return candidate entities for an alias. Each candidate defines the entity, the original alias, and the prior probability of that alias resolving to that entity. @@ -244,12 +244,12 @@ cdef class InMemoryLookupKB(KnowledgeBase): return [ InMemoryCandidate( - retrieve_string_from_hash=self.vocab.strings.__getitem__, + kb=self, entity_hash=self._entries[entry_index].entity_hash, - entity_freq=self._entries[entry_index].freq, - entity_vector=self._vectors_table[self._entries[entry_index].vector_index], alias_hash=alias_hash, - prior_prob=prior_prob + entity_vector=self._vectors_table[self._entries[entry_index].vector_index], + prior_prob=prior_prob, + entity_freq=self._entries[entry_index].freq ) for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs) if entry_index != 0 @@ -284,6 +284,9 @@ cdef class InMemoryLookupKB(KnowledgeBase): return 0.0 + def supports_prior_probs(self) -> bool: + return True + def to_bytes(self, **kwargs): """Serialize the current state to a binary string. """ diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py index daf914fa7..abfc14d84 100644 --- a/spacy/ml/models/entity_linker.py +++ b/spacy/ml/models/entity_linker.py @@ -108,7 +108,7 @@ def empty_kb( @registry.misc("spacy.CandidateGenerator.v1") -def create_candidates_all() -> Callable[ +def create_get_candidates() -> Callable[ [KnowledgeBase, Iterator[SpanGroup]], Iterator[Iterable[Iterable[Candidate]]], ]: diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py index cf3d7839e..e8ea9eb94 100644 --- a/spacy/pipeline/entity_linker.py +++ b/spacy/pipeline/entity_linker.py @@ -1,4 +1,6 @@ +import warnings from typing import ( + cast, Optional, Iterable, Callable, @@ -9,7 +11,6 @@ from typing import ( Any, Iterator, ) -from typing import cast from numpy import dtype from thinc.types import Floats1d, Floats2d, Ints1d, Ragged from pathlib import Path @@ -27,7 +28,7 @@ from .trainable_pipe import TrainablePipe from ..language import Language from ..vocab import Vocab from ..training import Example, validate_examples, validate_get_examples -from ..errors import Errors +from ..errors import Errors, Warnings from ..util import SimpleFrozenList, registry from .. import util from ..scorer import Scorer @@ -120,28 +121,9 @@ def make_entity_linker( prediction is discarded. If None, predictions are not filtered by any threshold. save_activations (bool): save model activations in Doc when annotating. """ - if not model.attrs.get("include_span_maker", False): - try: - from spacy_legacy.components.entity_linker import EntityLinker_v1 - except: - raise ImportError( - "In order to use v1 of the EntityLinker, you must use spacy-legacy>=3.0.12." - ) - # The only difference in arguments here is that use_gold_ents and threshold aren't available. - return EntityLinker_v1( - nlp.vocab, - model, - name, - labels_discard=labels_discard, - n_sents=n_sents, - incl_prior=incl_prior, - incl_context=incl_context, - entity_vector_length=entity_vector_length, - get_candidates=get_candidates, - overwrite=overwrite, - scorer=scorer, - ) + raise ValueError(Errors.E4005) + return EntityLinker( nlp.vocab, model, @@ -251,6 +233,9 @@ class EntityLinker(TrainablePipe): self.threshold = threshold self.save_activations = save_activations + if self.incl_prior and not self.kb.supports_prior_probs: + warnings.warn(Warnings.W401) + def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]): """Define the KB of this pipe by providing a function that will create it using this object's vocab.""" diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py index e84dc7382..406bfc841 100644 --- a/spacy/tests/pipeline/test_entity_linker.py +++ b/spacy/tests/pipeline/test_entity_linker.py @@ -7,7 +7,7 @@ from thinc.types import Ragged from spacy import registry, util from spacy.attrs import ENT_KB_ID from spacy.compat import pickle -from spacy.kb import InMemoryCandidate, InMemoryLookupKB, KnowledgeBase +from spacy.kb import Candidate, InMemoryLookupKB, KnowledgeBase from spacy.lang.en import English from spacy.ml import load_kb from spacy.ml.models.entity_linker import build_span_maker, get_candidates @@ -479,8 +479,8 @@ def test_candidate_generation(nlp): ) # test the content of the candidates - assert adam_ent_cands[0].entity_ == "Q2" - assert adam_ent_cands[0].alias_ == "adam" + assert adam_ent_cands[0].entity_id_ == "Q2" + assert adam_ent_cands[0].alias == "adam" assert_almost_equal(adam_ent_cands[0].entity_freq, 12) assert_almost_equal(adam_ent_cands[0].prior_prob, 0.9) @@ -519,7 +519,7 @@ def test_el_pipe_configuration(nlp): @registry.misc("spacy.LowercaseCandidateGenerator.v1") def create_candidates() -> Callable[ [InMemoryLookupKB, Iterator[SpanGroup]], - Iterator[Iterable[Iterable[InMemoryCandidate]]], + Iterator[Iterable[Iterable[Candidate]]], ]: return get_lowercased_candidates @@ -562,24 +562,22 @@ def test_vocab_serialization(nlp): mykb.add_alias(alias="douglas", entities=["Q2", "Q3"], probabilities=[0.4, 0.1]) adam_hash = mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9]) - candidates = mykb.get_alias_candidates("adam") + candidates = mykb._get_alias_candidates("adam") assert len(candidates) == 1 - assert candidates[0].entity == q2_hash - assert candidates[0].entity_ == "Q2" - assert candidates[0].alias == adam_hash - assert candidates[0].alias_ == "adam" + assert candidates[0].entity_id == q2_hash + assert candidates[0].entity_id_ == "Q2" + assert candidates[0].alias == "adam" with make_tempdir() as d: mykb.to_disk(d / "kb") kb_new_vocab = InMemoryLookupKB(Vocab(), entity_vector_length=1) kb_new_vocab.from_disk(d / "kb") - candidates = kb_new_vocab.get_alias_candidates("adam") + candidates = kb_new_vocab._get_alias_candidates("adam") assert len(candidates) == 1 - assert candidates[0].entity == q2_hash - assert candidates[0].entity_ == "Q2" - assert candidates[0].alias == adam_hash - assert candidates[0].alias_ == "adam" + assert candidates[0].entity_id == q2_hash + assert candidates[0].entity_id_ == "Q2" + assert candidates[0].alias == "adam" assert kb_new_vocab.get_vector("Q2") == [2] assert_almost_equal(kb_new_vocab.get_prior_prob("Q2", "douglas"), 0.4) @@ -599,20 +597,20 @@ def test_append_alias(nlp): mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9]) # test the size of the relevant candidates - assert len(mykb.get_alias_candidates("douglas")) == 2 + assert len(mykb._get_alias_candidates("douglas")) == 2 # append an alias mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2) # test the size of the relevant candidates has been incremented - assert len(mykb.get_alias_candidates("douglas")) == 3 + assert len(mykb._get_alias_candidates("douglas")) == 3 # append the same alias-entity pair again should not work (will throw a warning) with pytest.warns(UserWarning): mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.3) # test the size of the relevant candidates remained unchanged - assert len(mykb.get_alias_candidates("douglas")) == 3 + assert len(mykb._get_alias_candidates("douglas")) == 3 @pytest.mark.filterwarnings("ignore:\\[W036") @@ -909,11 +907,11 @@ def test_kb_to_bytes(): assert kb_2.contains_alias("Russ Cochran") assert kb_1.get_size_aliases() == kb_2.get_size_aliases() assert kb_1.get_alias_strings() == kb_2.get_alias_strings() - assert len(kb_1.get_alias_candidates("Russ Cochran")) == len( - kb_2.get_alias_candidates("Russ Cochran") + assert len(kb_1._get_alias_candidates("Russ Cochran")) == len( + kb_2._get_alias_candidates("Russ Cochran") ) - assert len(kb_1.get_alias_candidates("Randomness")) == len( - kb_2.get_alias_candidates("Randomness") + assert len(kb_1._get_alias_candidates("Randomness")) == len( + kb_2._get_alias_candidates("Randomness") ) @@ -994,14 +992,11 @@ def test_scorer_links(): @pytest.mark.parametrize( "name,config", [ - ("entity_linker", {"@architectures": "spacy.EntityLinker.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL}), ("entity_linker", {"@architectures": "spacy.EntityLinker.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL}), ], ) # fmt: on def test_legacy_architectures(name, config): - from spacy_legacy.components.entity_linker import EntityLinker_v1 - # Ensure that the legacy architectures still work vector_length = 3 nlp = English() @@ -1023,10 +1018,7 @@ def test_legacy_architectures(name, config): return mykb entity_linker = nlp.add_pipe(name, config={"model": config}) - if config["@architectures"] == "spacy.EntityLinker.v1": - assert isinstance(entity_linker, EntityLinker_v1) - else: - assert isinstance(entity_linker, EntityLinker) + assert isinstance(entity_linker, EntityLinker) entity_linker.set_kb(create_kb) optimizer = nlp.initialize(get_examples=lambda: train_examples) diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py index f9d2e226b..eb4254d31 100644 --- a/spacy/tests/serialize/test_serialize_kb.py +++ b/spacy/tests/serialize/test_serialize_kb.py @@ -66,19 +66,21 @@ def _check_kb(kb): assert alias_string not in kb.get_alias_strings() # check candidates & probabilities - candidates = sorted(kb.get_alias_candidates("double07"), key=lambda x: x.entity_) + candidates = sorted( + kb._get_alias_candidates("double07"), key=lambda x: x.entity_id_ + ) assert len(candidates) == 2 - assert candidates[0].entity_ == "Q007" + assert candidates[0].entity_id_ == "Q007" assert 6.999 < candidates[0].entity_freq < 7.01 assert candidates[0].entity_vector == [0, 0, 7] - assert candidates[0].alias_ == "double07" + assert candidates[0].alias == "double07" assert 0.899 < candidates[0].prior_prob < 0.901 - assert candidates[1].entity_ == "Q17" + assert candidates[1].entity_id_ == "Q17" assert 1.99 < candidates[1].entity_freq < 2.01 assert candidates[1].entity_vector == [7, 1, 0] - assert candidates[1].alias_ == "double07" + assert candidates[1].alias == "double07" assert 0.099 < candidates[1].prior_prob < 0.101 diff --git a/website/docs/api/inmemorylookupkb.mdx b/website/docs/api/inmemorylookupkb.mdx index 789c28293..791f6e8e9 100644 --- a/website/docs/api/inmemorylookupkb.mdx +++ b/website/docs/api/inmemorylookupkb.mdx @@ -178,22 +178,6 @@ implementation of `KnowledgeBase.get_candidates()`. | `mentions` | The textual mention or alias. ~~Iterable[SpanGroup]~~ | | **RETURNS** | An iterator over iterables of iterables with relevant [`InMemoryCandidate`](/api/kb#candidate) objects (per mention and doc). ~~Iterator[Iterable[Iterable[InMemoryCandidate]]]~~ | -## InMemoryLookupKB.get_alias_candidates {id="get_alias_candidates",tag="method"} - -Given a certain textual mention as input, retrieve a list of candidate entities -of type [`InMemoryCandidate`](/api/kb#candidate). - -> #### Example -> -> ```python -> candidates = kb.get_alias_candidates("Douglas") -> ``` - -| Name | Description | -| ----------- | ----------------------------------------------------------------------------- | -| `alias` | The textual mention or alias. ~~str~~ | -| **RETURNS** | The list of relevant `InMemoryCandidate` objects. ~~List[InMemoryCandidate]~~ | - ## InMemoryLookupKB.get_vector {id="get_vector",tag="method"} Given a certain entity ID, retrieve its pretrained entity vector. diff --git a/website/docs/api/kb.mdx b/website/docs/api/kb.mdx index 82e5979d9..0740dd9c7 100644 --- a/website/docs/api/kb.mdx +++ b/website/docs/api/kb.mdx @@ -155,15 +155,15 @@ Restore the state of the knowledge base from a given directory. Note that the ## InMemoryCandidate {id="candidate",tag="class"} -A `InMemoryCandidate` object refers to a textual mention that may or may not be -resolved to a specific entity from a `KnowledgeBase`. This will be used as input -for the entity linking algorithm which will disambiguate the various candidates -to the correct one. Each candidate `(mention, entity)` pair is assigned to a -certain prior probability. +An `InMemoryCandidate` object refers to a textual mention (alias) that may or +may not be resolved to a specific entity from a `KnowledgeBase`. This will be +used as input for the entity linking algorithm which will disambiguate the +various candidates to the correct one. Each candidate `(alias, entity)` pair is +assigned to a certain prior probability. ### InMemoryCandidate.\_\_init\_\_ {id="candidate-init",tag="method"} -Construct a `InMemoryCandidate` object. Usually this constructor is not called +Construct an `InMemoryCandidate` object. Usually this constructor is not called directly, but instead these objects are returned by the `get_candidates` method of the [`entity_linker`](/api/entitylinker) pipe. @@ -181,7 +181,7 @@ of the [`entity_linker`](/api/entitylinker) pipe. | `kb` | The knowledge base that defined this candidate. ~~KnowledgeBase~~ | | `entity_hash` | The hash of the entity's KB ID. ~~int~~ | | `entity_freq` | The entity frequency as recorded in the KB. ~~float~~ | -| `alias_hash` | The hash of the textual mention or alias. ~~int~~ | +| `alias_hash` | The hash of the entity alias. ~~int~~ | | `prior_prob` | The prior probability of the `alias` referring to the `entity`. ~~float~~ | ## InMemoryCandidate attributes {id="candidate-attributes"}