Merge branch 'refactor/el-candidates' into feature/docwise-generator-batching

# Conflicts: # spacy/kb/candidate.py # spacy/kb/kb.pyx # spacy/kb/kb_in_memory.pyx # spacy/ml/models/entity_linker.py # spacy/pipeline/entity_linker.py # spacy/tests/pipeline/test_entity_linker.py # website/docs/api/inmemorylookupkb.mdx # website/docs/api/kb.mdx
2025-08-13 00:24:54 +03:00 · 2023-03-20 10:24:17 +01:00 · 2023-03-20 10:24:17 +01:00 · 73bdeb01e4
commit 73bdeb01e4
parent 4bdb359711 b83407388a
14 changed files with 185 additions and 209 deletions
--- a/setup.py
+++ b/setup.py
@ -30,6 +30,7 @@ MOD_NAMES = [
    "spacy.lexeme",
    "spacy.vocab",
    "spacy.attrs",
+    "spacy.kb.candidate",
    "spacy.kb.kb",
    "spacy.kb.kb_in_memory",
    "spacy.ml.tb_framework",
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -82,7 +82,7 @@ class Warnings(metaclass=ErrorsWithCodes):
            "ignoring the duplicate entry.")
    W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be "
            "incorrect. Modify PhraseMatcher._terminal_hash to fix.")
-    W024 = ("Entity '{entity}' - Alias '{alias}' combination already exists in "
+    W024 = ("Entity '{entity}' - alias '{alias}' combination already exists in "
            "the Knowledge Base.")
    W026 = ("Unable to set all sentence boundaries from dependency parses. If "
            "you are constructing a parse tree incrementally by setting "
@ -209,7 +209,11 @@ class Warnings(metaclass=ErrorsWithCodes):
            "`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.")
    W124 = ("{host}:{port} is already in use, using the nearest available port {serve_port} as an alternative.")

+    # v4 warning strings
    W400 = ("`use_upper=False` is ignored, the upper layer is always enabled")
+    W401 = ("`incl_prior is True`, but the selected knowledge base type {kb_type} doesn't support prior probability "
+            "lookups so this setting will be ignored. If your KB does support prior probability lookups, make sure "
+            "to return `True` in `.supports_prior_probs`.")


 class Errors(metaclass=ErrorsWithCodes):
@ -960,6 +964,9 @@ class Errors(metaclass=ErrorsWithCodes):
    E4003 = ("Training examples for distillation must have the exact same tokens in the "
             "reference and predicted docs.")
    E4004 = ("Backprop is not supported when is_train is not set.")
+    E4005 = ("EntityLinker_v1 is not supported in spaCy v4. Update your configuration.")
+    E4006 = ("Expected `entity_id` to be of type {exp_type}, but is of type {found_type}.")
+

 RENAMED_LANGUAGE_CODES = {"xx": "mul", "is": "isl"}

--- a/spacy/kb/init.py
+++ b/spacy/kb/init.py
@ -2,4 +2,5 @@ from .kb import KnowledgeBase
 from .kb_in_memory import InMemoryLookupKB
 from .candidate import Candidate, InMemoryCandidate

+
 __all__ = ["KnowledgeBase", "InMemoryLookupKB", "Candidate", "InMemoryCandidate"]
--- a/spacy/kb/candidate.pxd
+++ b/spacy/kb/candidate.pxd
@ -0,0 +1,15 @@
+from libcpp.vector cimport vector
+from .kb_in_memory cimport InMemoryLookupKB
+from ..typedefs cimport hash_t
+
+cdef class Candidate:
+    pass
+
+
+cdef class InMemoryCandidate(Candidate):
+    cdef readonly hash_t _entity_hash
+    cdef readonly hash_t _alias_hash
+    cpdef vector[float] _entity_vector
+    cdef float _prior_prob
+    cdef readonly InMemoryLookupKB _kb
+    cdef float _entity_freq
--- a/spacy/kb/candidate.py
+++ b/spacy/kb/candidate.py
@ -1,118 +0,0 @@
-import abc
-from typing import List, Callable
-
-
-class Candidate(abc.ABC):
-    """A `Candidate` object refers to a textual mention (`alias`) that may or may not be resolved
-    to a specific `entity_id` from a Knowledge Base. This will be used as input for the entity_id linking
-    algorithm which will disambiguate the various candidates to the correct one.
-    Each candidate (alias, entity_id) pair is assigned a certain prior probability.
-
-    DOCS: https://spacy.io/api/kb/#candidate-init
-    """
-
-    def __init__(
-        self,
-        mention: str,
-        entity_id: int,
-        entity_name: str,
-        entity_vector: List[float],
-        prior_prob: float,
-    ):
-        """Initializes properties of `Candidate` instance.
-        mention (str): Mention text for this candidate.
-        entity_id (int): Unique entity ID.
-        entity_name (str): Entity name.
-        entity_vector (List[float]): Entity embedding.
-        prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
-            the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
-            cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
-            doesn't) it might be better to eschew this information and always supply the same value.
-        """
-        self._mention = mention
-        self._entity_id = entity_id
-        self._entity_name = entity_name
-        self._entity_vector = entity_vector
-        self._prior_prob = prior_prob
-
-    @property
-    def entity(self) -> int:
-        """RETURNS (int): Unique entity ID."""
-        return self._entity_id
-
-    @property
-    def entity_(self) -> str:
-        """RETURNS (int): Entity name."""
-        return self._entity_name
-
-    @property
-    def mention(self) -> str:
-        """RETURNS (str): Mention."""
-        return self._mention
-
-    @property
-    def entity_vector(self) -> List[float]:
-        """RETURNS (List[float]): Entity vector."""
-        return self._entity_vector
-
-    @property
-    def prior_prob(self) -> float:
-        """RETURNS (List[float]): Entity vector."""
-        return self._prior_prob
-
-
-class InMemoryCandidate(Candidate):
-    """Candidate for InMemoryLookupKB."""
-
-    def __init__(
-        self,
-        retrieve_string_from_hash: Callable[[int], str],
-        entity_hash: int,
-        entity_freq: int,
-        entity_vector: List[float],
-        alias_hash: int,
-        prior_prob: float,
-    ):
-        """
-        retrieve_string_from_hash (Callable[[int], str]): Callable retrieving entity name from provided entity/vocab
-            hash.
-        entity_hash (str): Hashed entity name /ID.
-        entity_freq (int): Entity frequency in KB corpus.
-        entity_vector (List[float]): Entity embedding.
-        alias_hash (int): Hashed alias.
-        prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
-            the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
-            cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
-            doesn't) it might be better to eschew this information and always supply the same value.
-        """
-        super().__init__(
-            mention=retrieve_string_from_hash(alias_hash),
-            entity_id=entity_hash,
-            entity_name=retrieve_string_from_hash(entity_hash),
-            entity_vector=entity_vector,
-            prior_prob=prior_prob,
-        )
-        self._retrieve_string_from_hash = retrieve_string_from_hash
-        self._entity_hash = entity_hash
-        self._entity_freq = entity_freq
-        self._alias_hash = alias_hash
-        self._prior_prob = prior_prob
-
-    @property
-    def entity(self) -> int:
-        """RETURNS (int): hash of the entity_id's KB ID/name"""
-        return self._entity_hash
-
-    @property
-    def alias(self) -> int:
-        """RETURNS (int): hash of the alias"""
-        return self._alias_hash
-
-    @property
-    def alias_(self) -> str:
-        """RETURNS (str): ID of the original alias"""
-        return self._retrieve_string_from_hash(self._alias_hash)
-
-    @property
-    def entity_freq(self) -> float:
-        return self._entity_freq
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@ -0,0 +1,96 @@
+# cython: infer_types=True, profile=True
+
+from .kb_in_memory cimport InMemoryLookupKB
+from ..errors import Errors
+
+cdef class Candidate:
+    """A `Candidate` object refers to a textual mention that may or may not be resolved
+    to a specific entity from a Knowledge Base. This will be used as input for the entity linking
+    algorithm which will disambiguate the various candidates to the correct one.
+    Each candidate, which represents a possible link between one textual mention and one entity in the knowledge base,
+    is assigned a certain prior probability.
+
+    DOCS: https://spacy.io/api/kb/#candidate-init
+    """
+
+    def __init__(self):
+        # Make sure abstract Candidate is not instantiated.
+        if self.__class__ == Candidate:
+            raise TypeError(
+                Errors.E1046.format(cls_name=self.__class__.__name__)
+            )
+
+    @property
+    def entity_id(self) -> int:
+        """RETURNS (int): Numerical representation of entity ID (if entity ID is numerical, this is just the entity ID,
+        otherwise the hash of the entity ID string)."""
+        raise NotImplementedError
+
+    @property
+    def entity_id_(self) -> str:
+        """RETURNS (str): String representation of entity ID."""
+        raise NotImplementedError
+
+    @property
+    def entity_vector(self) -> vector[float]:
+        """RETURNS (vector[float]): Entity vector."""
+        raise NotImplementedError
+
+
+cdef class InMemoryCandidate(Candidate):
+    """Candidate for InMemoryLookupKB."""
+
+    def __init__(
+        self,
+        kb: InMemoryLookupKB,
+        entity_hash: int,
+        alias_hash: int,
+        entity_vector: vector[float],
+        prior_prob: float,
+        entity_freq: float
+    ):
+        """
+        kb (InMemoryLookupKB]): InMemoryLookupKB instance.
+        entity_id (int): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__().
+        entity_freq (int): Entity frequency in KB corpus.
+        entity_vector (List[float]): Entity embedding.
+        alias_hash (int): Alias hash.
+        prior_prob (float): Prior probability of entity for this alias. I. e. the probability that, independent of
+            the context, this alias - which matches one of this entity's aliases - resolves to one this entity.
+        """
+        super().__init__()
+
+        self._entity_hash = entity_hash
+        self._entity_vector = entity_vector
+        self._prior_prob = prior_prob
+        self._kb = kb
+        self._alias_hash = alias_hash
+        self._entity_freq = entity_freq
+
+    @property
+    def entity_id(self) -> int:
+        return self._entity_hash
+
+    @property
+    def entity_vector(self) -> vector[float]:
+        return self._entity_vector
+
+    @property
+    def prior_prob(self) -> float:
+        """RETURNS (float): Prior probability that this alias, which matches one of this entity's synonyms, resolves to
+        this entity."""
+        return self._prior_prob
+
+    @property
+    def alias(self) -> str:
+        """RETURNS (str): Alias."""
+        return self._kb.vocab.strings[self._alias_hash]
+
+    @property
+    def entity_id_(self) -> str:
+        return self._kb.vocab.strings[self._entity_hash]
+
+    @property
+    def entity_freq(self) -> float:
+        """RETURNS (float): Entity frequency in KB corpus."""
+        return self._entity_freq
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@ -32,11 +32,12 @@ cdef class KnowledgeBase:

    def get_candidates(self, mentions: Iterator[SpanGroup]) -> Iterator[Iterable[Iterable[Candidate]]]:
        """
-        Return candidate entities for mentions stored in `ent` attribute in passed docs. Each candidate defines the
-        entity, the original alias, and the prior probability of that alias resolving to that entity.
-        If no candidate is found for a given mention, an empty list is returned.
-        mentions (Iterator[SpanGroup]): Mentions per doc as SpanGroup instance.
-        RETURNS (Iterator[Iterable[Iterable[Candidate]]]): Identified candidates per document.
+        Return candidate entities for a specified Span mention. Each candidate defines at least the entity and the
+        entity's embedding vector. Depending on the KB implementation, further properties - such as the prior
+        probability of the specified mention text resolving to that entity - might be included.
+        If no candidates are found for a given mention, an empty list is returned.
+        mentions (Iterable[SpangGroup]): Mentions for which to get candidates.
+        RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
        """
        raise NotImplementedError(
            Errors.E1045.format(parent="KnowledgeBase", method="get_candidates", name=self.__name__)
@ -96,3 +97,10 @@ cdef class KnowledgeBase:
        raise NotImplementedError(
            Errors.E1045.format(parent="KnowledgeBase", method="from_disk", name=self.__name__)
        )
+
+    @property
+    def supports_prior_probs(self) -> bool:
+        """RETURNS (bool): Whether this KB type supports looking up prior probabilities for entity mentions."""
+        raise NotImplementedError(
+            Errors.E1045.format(parent="KnowledgeBase", method="supports_prior_probs", name=self.__name__)
+        )
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@ -230,7 +230,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        for mentions_for_doc in mentions:
            yield [self.get_alias_candidates(ent_span.text) for ent_span in mentions_for_doc]

-    def get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
+    def _get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
        """
        Return candidate entities for an alias. Each candidate defines the entity, the original alias,
        and the prior probability of that alias resolving to that entity.
@ -244,12 +244,12 @@ cdef class InMemoryLookupKB(KnowledgeBase):

        return [
            InMemoryCandidate(
-                retrieve_string_from_hash=self.vocab.strings.__getitem__,
+                kb=self,
                entity_hash=self._entries[entry_index].entity_hash,
-                entity_freq=self._entries[entry_index].freq,
-                entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
                alias_hash=alias_hash,
-                prior_prob=prior_prob
+                entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
+                prior_prob=prior_prob,
+                entity_freq=self._entries[entry_index].freq
            )
            for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs)
            if entry_index != 0
@ -284,6 +284,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):

        return 0.0

+    def supports_prior_probs(self) -> bool:
+        return True
+
    def to_bytes(self, **kwargs):
        """Serialize the current state to a binary string.
        """
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@ -108,7 +108,7 @@ def empty_kb(


@registry.misc("spacy.CandidateGenerator.v1")
-def create_candidates_all() -> Callable[
+def create_get_candidates() -> Callable[
    [KnowledgeBase, Iterator[SpanGroup]],
    Iterator[Iterable[Iterable[Candidate]]],
 ]:
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@ -1,4 +1,6 @@
+import warnings
 from typing import (
+    cast,
    Optional,
    Iterable,
    Callable,
@ -9,7 +11,6 @@ from typing import (
    Any,
    Iterator,
 )
-from typing import cast
 from numpy import dtype
 from thinc.types import Floats1d, Floats2d, Ints1d, Ragged
 from pathlib import Path
@ -27,7 +28,7 @@ from .trainable_pipe import TrainablePipe
 from ..language import Language
 from ..vocab import Vocab
 from ..training import Example, validate_examples, validate_get_examples
-from ..errors import Errors
+from ..errors import Errors, Warnings
 from ..util import SimpleFrozenList, registry
 from .. import util
 from ..scorer import Scorer
@ -120,28 +121,9 @@ def make_entity_linker(
        prediction is discarded. If None, predictions are not filtered by any threshold.
    save_activations (bool): save model activations in Doc when annotating.
    """
-
    if not model.attrs.get("include_span_maker", False):
-        try:
-            from spacy_legacy.components.entity_linker import EntityLinker_v1
-        except:
-            raise ImportError(
-                "In order to use v1 of the EntityLinker, you must use spacy-legacy>=3.0.12."
-            )
-        # The only difference in arguments here is that use_gold_ents and threshold aren't available.
-        return EntityLinker_v1(
-            nlp.vocab,
-            model,
-            name,
-            labels_discard=labels_discard,
-            n_sents=n_sents,
-            incl_prior=incl_prior,
-            incl_context=incl_context,
-            entity_vector_length=entity_vector_length,
-            get_candidates=get_candidates,
-            overwrite=overwrite,
-            scorer=scorer,
-        )
+        raise ValueError(Errors.E4005)
+
    return EntityLinker(
        nlp.vocab,
        model,
@ -251,6 +233,9 @@ class EntityLinker(TrainablePipe):
        self.threshold = threshold
        self.save_activations = save_activations

+        if self.incl_prior and not self.kb.supports_prior_probs:
+            warnings.warn(Warnings.W401)
+
    def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]):
        """Define the KB of this pipe by providing a function that will
        create it using this object's vocab."""
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@ -7,7 +7,7 @@ from thinc.types import Ragged
 from spacy import registry, util
 from spacy.attrs import ENT_KB_ID
 from spacy.compat import pickle
-from spacy.kb import InMemoryCandidate, InMemoryLookupKB, KnowledgeBase
+from spacy.kb import Candidate, InMemoryLookupKB, KnowledgeBase
 from spacy.lang.en import English
 from spacy.ml import load_kb
 from spacy.ml.models.entity_linker import build_span_maker, get_candidates
@ -479,8 +479,8 @@ def test_candidate_generation(nlp):
    )

    # test the content of the candidates
-    assert adam_ent_cands[0].entity_ == "Q2"
-    assert adam_ent_cands[0].alias_ == "adam"
+    assert adam_ent_cands[0].entity_id_ == "Q2"
+    assert adam_ent_cands[0].alias == "adam"
    assert_almost_equal(adam_ent_cands[0].entity_freq, 12)
    assert_almost_equal(adam_ent_cands[0].prior_prob, 0.9)

@ -519,7 +519,7 @@ def test_el_pipe_configuration(nlp):
    @registry.misc("spacy.LowercaseCandidateGenerator.v1")
    def create_candidates() -> Callable[
        [InMemoryLookupKB, Iterator[SpanGroup]],
-        Iterator[Iterable[Iterable[InMemoryCandidate]]],
+        Iterator[Iterable[Iterable[Candidate]]],
    ]:
        return get_lowercased_candidates

@ -562,24 +562,22 @@ def test_vocab_serialization(nlp):
    mykb.add_alias(alias="douglas", entities=["Q2", "Q3"], probabilities=[0.4, 0.1])
    adam_hash = mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])

-    candidates = mykb.get_alias_candidates("adam")
+    candidates = mykb._get_alias_candidates("adam")
    assert len(candidates) == 1
-    assert candidates[0].entity == q2_hash
-    assert candidates[0].entity_ == "Q2"
-    assert candidates[0].alias == adam_hash
-    assert candidates[0].alias_ == "adam"
+    assert candidates[0].entity_id == q2_hash
+    assert candidates[0].entity_id_ == "Q2"
+    assert candidates[0].alias == "adam"

    with make_tempdir() as d:
        mykb.to_disk(d / "kb")
        kb_new_vocab = InMemoryLookupKB(Vocab(), entity_vector_length=1)
        kb_new_vocab.from_disk(d / "kb")

-        candidates = kb_new_vocab.get_alias_candidates("adam")
+        candidates = kb_new_vocab._get_alias_candidates("adam")
        assert len(candidates) == 1
-        assert candidates[0].entity == q2_hash
-        assert candidates[0].entity_ == "Q2"
-        assert candidates[0].alias == adam_hash
-        assert candidates[0].alias_ == "adam"
+        assert candidates[0].entity_id == q2_hash
+        assert candidates[0].entity_id_ == "Q2"
+        assert candidates[0].alias == "adam"

        assert kb_new_vocab.get_vector("Q2") == [2]
        assert_almost_equal(kb_new_vocab.get_prior_prob("Q2", "douglas"), 0.4)
@ -599,20 +597,20 @@ def test_append_alias(nlp):
    mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])

    # test the size of the relevant candidates
-    assert len(mykb.get_alias_candidates("douglas")) == 2
+    assert len(mykb._get_alias_candidates("douglas")) == 2

    # append an alias
    mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2)

    # test the size of the relevant candidates has been incremented
-    assert len(mykb.get_alias_candidates("douglas")) == 3
+    assert len(mykb._get_alias_candidates("douglas")) == 3

    # append the same alias-entity pair again should not work (will throw a warning)
    with pytest.warns(UserWarning):
        mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.3)

    # test the size of the relevant candidates remained unchanged
-    assert len(mykb.get_alias_candidates("douglas")) == 3
+    assert len(mykb._get_alias_candidates("douglas")) == 3


@pytest.mark.filterwarnings("ignore:\\[W036")
@ -909,11 +907,11 @@ def test_kb_to_bytes():
    assert kb_2.contains_alias("Russ Cochran")
    assert kb_1.get_size_aliases() == kb_2.get_size_aliases()
    assert kb_1.get_alias_strings() == kb_2.get_alias_strings()
-    assert len(kb_1.get_alias_candidates("Russ Cochran")) == len(
-        kb_2.get_alias_candidates("Russ Cochran")
+    assert len(kb_1._get_alias_candidates("Russ Cochran")) == len(
+        kb_2._get_alias_candidates("Russ Cochran")
    )
-    assert len(kb_1.get_alias_candidates("Randomness")) == len(
-        kb_2.get_alias_candidates("Randomness")
+    assert len(kb_1._get_alias_candidates("Randomness")) == len(
+        kb_2._get_alias_candidates("Randomness")
    )


@ -994,14 +992,11 @@ def test_scorer_links():
@pytest.mark.parametrize(
    "name,config",
    [
-        ("entity_linker", {"@architectures": "spacy.EntityLinker.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL}),
        ("entity_linker", {"@architectures": "spacy.EntityLinker.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL}),
    ],
 )
 # fmt: on
 def test_legacy_architectures(name, config):
-    from spacy_legacy.components.entity_linker import EntityLinker_v1
-
    # Ensure that the legacy architectures still work
    vector_length = 3
    nlp = English()
@ -1023,10 +1018,7 @@ def test_legacy_architectures(name, config):
        return mykb

    entity_linker = nlp.add_pipe(name, config={"model": config})
-    if config["@architectures"] == "spacy.EntityLinker.v1":
-        assert isinstance(entity_linker, EntityLinker_v1)
-    else:
-        assert isinstance(entity_linker, EntityLinker)
+    assert isinstance(entity_linker, EntityLinker)
    entity_linker.set_kb(create_kb)
    optimizer = nlp.initialize(get_examples=lambda: train_examples)

--- a/spacy/tests/serialize/test_serialize_kb.py
+++ b/spacy/tests/serialize/test_serialize_kb.py
@ -66,19 +66,21 @@ def _check_kb(kb):
        assert alias_string not in kb.get_alias_strings()

    # check candidates & probabilities
-    candidates = sorted(kb.get_alias_candidates("double07"), key=lambda x: x.entity_)
+    candidates = sorted(
+        kb._get_alias_candidates("double07"), key=lambda x: x.entity_id_
+    )
    assert len(candidates) == 2

-    assert candidates[0].entity_ == "Q007"
+    assert candidates[0].entity_id_ == "Q007"
    assert 6.999 < candidates[0].entity_freq < 7.01
    assert candidates[0].entity_vector == [0, 0, 7]
-    assert candidates[0].alias_ == "double07"
+    assert candidates[0].alias == "double07"
    assert 0.899 < candidates[0].prior_prob < 0.901

-    assert candidates[1].entity_ == "Q17"
+    assert candidates[1].entity_id_ == "Q17"
    assert 1.99 < candidates[1].entity_freq < 2.01
    assert candidates[1].entity_vector == [7, 1, 0]
-    assert candidates[1].alias_ == "double07"
+    assert candidates[1].alias == "double07"
    assert 0.099 < candidates[1].prior_prob < 0.101


--- a/website/docs/api/inmemorylookupkb.mdx
+++ b/website/docs/api/inmemorylookupkb.mdx
@ -178,22 +178,6 @@ implementation of `KnowledgeBase.get_candidates()`.
 | `mentions`  | The textual mention or alias. ~~Iterable[SpanGroup]~~                                                                                                                             |
 | **RETURNS** | An iterator over iterables of iterables with relevant [`InMemoryCandidate`](/api/kb#candidate) objects (per mention and doc). ~~Iterator[Iterable[Iterable[InMemoryCandidate]]]~~ |

-## InMemoryLookupKB.get_alias_candidates {id="get_alias_candidates",tag="method"}
-
-Given a certain textual mention as input, retrieve a list of candidate entities
-of type [`InMemoryCandidate`](/api/kb#candidate).
-
-> #### Example
->
-> ```python
-> candidates = kb.get_alias_candidates("Douglas")
-> ```
-
-| Name        | Description                                                                   |
-| ----------- | ----------------------------------------------------------------------------- |
-| `alias`     | The textual mention or alias. ~~str~~                                         |
-| **RETURNS** | The list of relevant `InMemoryCandidate` objects. ~~List[InMemoryCandidate]~~ |
-
 ## InMemoryLookupKB.get_vector {id="get_vector",tag="method"}

 Given a certain entity ID, retrieve its pretrained entity vector.
--- a/website/docs/api/kb.mdx
+++ b/website/docs/api/kb.mdx
@ -155,15 +155,15 @@ Restore the state of the knowledge base from a given directory. Note that the

 ## InMemoryCandidate {id="candidate",tag="class"}

-A `InMemoryCandidate` object refers to a textual mention that may or may not be
-resolved to a specific entity from a `KnowledgeBase`. This will be used as input
-for the entity linking algorithm which will disambiguate the various candidates
-to the correct one. Each candidate `(mention, entity)` pair is assigned to a
-certain prior probability.
+An `InMemoryCandidate` object refers to a textual mention (alias) that may or
+may not be resolved to a specific entity from a `KnowledgeBase`. This will be
+used as input for the entity linking algorithm which will disambiguate the
+various candidates to the correct one. Each candidate `(alias, entity)` pair is
+assigned to a certain prior probability.

 ### InMemoryCandidate.\_\_init\_\_ {id="candidate-init",tag="method"}

-Construct a `InMemoryCandidate` object. Usually this constructor is not called
+Construct an `InMemoryCandidate` object. Usually this constructor is not called
 directly, but instead these objects are returned by the `get_candidates` method
 of the [`entity_linker`](/api/entitylinker) pipe.

@ -181,7 +181,7 @@ of the [`entity_linker`](/api/entitylinker) pipe.
 | `kb`          | The knowledge base that defined this candidate. ~~KnowledgeBase~~         |
 | `entity_hash` | The hash of the entity's KB ID. ~~int~~                                   |
 | `entity_freq` | The entity frequency as recorded in the KB. ~~float~~                     |
-| `alias_hash`  | The hash of the textual mention or alias. ~~int~~                         |
+| `alias_hash`  | The hash of the entity alias. ~~int~~                                     |
 | `prior_prob`  | The prior probability of the `alias` referring to the `entity`. ~~float~~ |

 ## InMemoryCandidate attributes {id="candidate-attributes"}