From 61bacf81bd97f6ee671428cae861ceb2600ff50c Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 3 Mar 2023 09:54:28 +0100
Subject: [PATCH 01/39] Update website/docs/api/kb.mdx

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 website/docs/api/kb.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/api/kb.mdx b/website/docs/api/kb.mdx
index 5c5abaef9..3a1cefe8d 100644
--- a/website/docs/api/kb.mdx
+++ b/website/docs/api/kb.mdx
@@ -200,7 +200,7 @@ to a certain prior probability.
 
 ### InMemoryCandidate.\_\_init\_\_ {id="candidate-init",tag="method"}
 
-Construct a `InMemoryCandidate` object. Usually this constructor is not called
+Construct an `InMemoryCandidate` object. Usually this constructor is not called
 directly, but instead these objects are returned by the `get_candidates` method
 of the [`entity_linker`](/api/entitylinker) pipe.
 

From 46fe069f8731c3d591963fe4a1e3e1c1a4b1eef9 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 3 Mar 2023 10:29:53 +0100
Subject: [PATCH 02/39] Rename alias -> mention.

---
 spacy/kb/candidate.py | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py
index 3cc3a6c59..af691b415 100644
--- a/spacy/kb/candidate.py
+++ b/spacy/kb/candidate.py
@@ -3,10 +3,10 @@ from typing import List, Union, Callable
 
 
 class Candidate(abc.ABC):
-    """A `Candidate` object refers to a textual mention (`alias`) that may or may not be resolved
+    """A `Candidate` object refers to a textual mention that may or may not be resolved
     to a specific `entity_id` from a Knowledge Base. This will be used as input for the entity_id linking
     algorithm which will disambiguate the various candidates to the correct one.
-    Each candidate (alias, entity_id) pair is assigned a certain prior probability.
+    Each candidate (mention, entity_id) pair is assigned a certain prior probability.
 
     DOCS: https://spacy.io/api/kb/#candidate-init
     """
@@ -70,7 +70,7 @@ class InMemoryCandidate(Candidate):
         entity_hash: int,
         entity_freq: int,
         entity_vector: List[float],
-        alias_hash: int,
+        mention_hash: int,
         prior_prob: float,
     ):
         """
@@ -79,14 +79,14 @@ class InMemoryCandidate(Candidate):
         entity_hash (str): Hashed entity name /ID.
         entity_freq (int): Entity frequency in KB corpus.
         entity_vector (List[float]): Entity embedding.
-        alias_hash (int): Hashed alias.
+        mention_hash (int): Hashed mention.
         prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
             the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
             cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
             doesn't) it might be better to eschew this information and always supply the same value.
         """
         super().__init__(
-            mention=retrieve_string_from_hash(alias_hash),
+            mention=retrieve_string_from_hash(mention_hash),
             entity_id=entity_hash,
             entity_name=retrieve_string_from_hash(entity_hash),
             entity_vector=entity_vector,
@@ -95,7 +95,7 @@ class InMemoryCandidate(Candidate):
         self._retrieve_string_from_hash = retrieve_string_from_hash
         self._entity_hash = entity_hash
         self._entity_freq = entity_freq
-        self._alias_hash = alias_hash
+        self._mention_hash = mention_hash
         self._prior_prob = prior_prob
 
     @property
@@ -104,15 +104,11 @@ class InMemoryCandidate(Candidate):
         return self._entity_hash
 
     @property
-    def alias(self) -> int:
-        """RETURNS (int): hash of the alias"""
-        return self._alias_hash
-
-    @property
-    def alias_(self) -> str:
-        """RETURNS (str): ID of the original alias"""
-        return self._retrieve_string_from_hash(self._alias_hash)
+    def mention_hash(self) -> int:
+        """RETURNS (int): Mention hash."""
+        return self._mention_hash
 
     @property
     def entity_freq(self) -> float:
+        """RETURNS (float): Relative entity frequency."""
         return self._entity_freq

From 94e57d0ed5fe1981cf5ac2b54964d0e3f14533a4 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 3 Mar 2023 11:08:17 +0100
Subject: [PATCH 03/39] Refactor Candidate attribute names. Update docs and
 tests accordingly.

---
 spacy/kb/candidate.py                      | 24 ++++++++--------
 spacy/kb/kb_in_memory.pyx                  |  6 ++--
 spacy/tests/pipeline/test_entity_linker.py | 30 ++++++++++----------
 spacy/tests/serialize/test_serialize_kb.py |  6 ++--
 website/docs/api/kb.mdx                    | 33 ++++++----------------
 5 files changed, 41 insertions(+), 58 deletions(-)

diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py
index af691b415..b8d26832a 100644
--- a/spacy/kb/candidate.py
+++ b/spacy/kb/candidate.py
@@ -29,26 +29,26 @@ class Candidate(abc.ABC):
             cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
             doesn't) it might be better to eschew this information and always supply the same value.
         """
-        self._mention = mention
-        self._entity_id = entity_id
-        self._entity_name = entity_name
+        self._mention_ = mention
+        self._entity = entity_id
+        self._entity_ = entity_name
         self._entity_vector = entity_vector
         self._prior_prob = prior_prob
 
     @property
     def entity(self) -> int:
         """RETURNS (int): Unique entity ID."""
-        return self._entity_id
+        return self._entity
 
     @property
     def entity_(self) -> str:
         """RETURNS (int): Entity name."""
-        return self._entity_name
+        return self._entity_
 
     @property
-    def mention(self) -> str:
+    def mention_(self) -> str:
         """RETURNS (str): Mention."""
-        return self._mention
+        return self._mention_
 
     @property
     def entity_vector(self) -> List[float]:
@@ -93,20 +93,20 @@ class InMemoryCandidate(Candidate):
             prior_prob=prior_prob,
         )
         self._retrieve_string_from_hash = retrieve_string_from_hash
-        self._entity_hash = entity_hash
+        self._entity = entity_hash
         self._entity_freq = entity_freq
-        self._mention_hash = mention_hash
+        self._mention = mention_hash
         self._prior_prob = prior_prob
 
     @property
     def entity(self) -> int:
         """RETURNS (int): hash of the entity_id's KB ID/name"""
-        return self._entity_hash
+        return self._entity
 
     @property
-    def mention_hash(self) -> int:
+    def mention(self) -> int:
         """RETURNS (int): Mention hash."""
-        return self._mention_hash
+        return self._mention
 
     @property
     def entity_freq(self) -> float:
diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx
index f39432f5e..d7a986320 100644
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@@ -224,9 +224,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
             self._aliases_table[alias_index] = alias_entry
 
     def get_candidates(self, mention: Span) -> Iterable[InMemoryCandidate]:
-        return self.get_alias_candidates(mention.text)  # type: ignore
+        return self._get_alias_candidates(mention.text)  # type: ignore
 
-    def get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
+    def _get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
         """
         Return candidate entities for an alias. Each candidate defines the entity, the original alias,
         and the prior probability of that alias resolving to that entity.
@@ -244,7 +244,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
                 entity_hash=self._entries[entry_index].entity_hash,
                 entity_freq=self._entries[entry_index].freq,
                 entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
-                alias_hash=alias_hash,
+                mention_hash=alias_hash,
                 prior_prob=prior_prob
             )
             for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs)
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index cb1e4a733..23eb5e205 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -469,7 +469,7 @@ def test_candidate_generation(nlp):
 
     # test the content of the candidates
     assert get_candidates(mykb, adam_ent)[0].entity_ == "Q2"
-    assert get_candidates(mykb, adam_ent)[0].alias_ == "adam"
+    assert get_candidates(mykb, adam_ent)[0].mention_ == "adam"
     assert_almost_equal(get_candidates(mykb, adam_ent)[0].entity_freq, 12)
     assert_almost_equal(get_candidates(mykb, adam_ent)[0].prior_prob, 0.9)
 
@@ -499,7 +499,7 @@ def test_el_pipe_configuration(nlp):
     assert doc[2].ent_kb_id_ == "Q2"
 
     def get_lowercased_candidates(kb, span):
-        return kb.get_alias_candidates(span.text.lower())
+        return kb._get_alias_candidates(span.text.lower())
 
     def get_lowercased_candidates_batch(kb, spans):
         return [get_lowercased_candidates(kb, span) for span in spans]
@@ -558,24 +558,24 @@ def test_vocab_serialization(nlp):
     mykb.add_alias(alias="douglas", entities=["Q2", "Q3"], probabilities=[0.4, 0.1])
     adam_hash = mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])
 
-    candidates = mykb.get_alias_candidates("adam")
+    candidates = mykb._get_alias_candidates("adam")
     assert len(candidates) == 1
     assert candidates[0].entity == q2_hash
     assert candidates[0].entity_ == "Q2"
-    assert candidates[0].alias == adam_hash
-    assert candidates[0].alias_ == "adam"
+    assert candidates[0].mention == adam_hash
+    assert candidates[0].mention_ == "adam"
 
     with make_tempdir() as d:
         mykb.to_disk(d / "kb")
         kb_new_vocab = InMemoryLookupKB(Vocab(), entity_vector_length=1)
         kb_new_vocab.from_disk(d / "kb")
 
-        candidates = kb_new_vocab.get_alias_candidates("adam")
+        candidates = kb_new_vocab._get_alias_candidates("adam")
         assert len(candidates) == 1
         assert candidates[0].entity == q2_hash
         assert candidates[0].entity_ == "Q2"
-        assert candidates[0].alias == adam_hash
-        assert candidates[0].alias_ == "adam"
+        assert candidates[0].mention == adam_hash
+        assert candidates[0].mention_ == "adam"
 
         assert kb_new_vocab.get_vector("Q2") == [2]
         assert_almost_equal(kb_new_vocab.get_prior_prob("Q2", "douglas"), 0.4)
@@ -595,20 +595,20 @@ def test_append_alias(nlp):
     mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])
 
     # test the size of the relevant candidates
-    assert len(mykb.get_alias_candidates("douglas")) == 2
+    assert len(mykb._get_alias_candidates("douglas")) == 2
 
     # append an alias
     mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2)
 
     # test the size of the relevant candidates has been incremented
-    assert len(mykb.get_alias_candidates("douglas")) == 3
+    assert len(mykb._get_alias_candidates("douglas")) == 3
 
     # append the same alias-entity pair again should not work (will throw a warning)
     with pytest.warns(UserWarning):
         mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.3)
 
     # test the size of the relevant candidates remained unchanged
-    assert len(mykb.get_alias_candidates("douglas")) == 3
+    assert len(mykb._get_alias_candidates("douglas")) == 3
 
 
 @pytest.mark.filterwarnings("ignore:\\[W036")
@@ -905,11 +905,11 @@ def test_kb_to_bytes():
     assert kb_2.contains_alias("Russ Cochran")
     assert kb_1.get_size_aliases() == kb_2.get_size_aliases()
     assert kb_1.get_alias_strings() == kb_2.get_alias_strings()
-    assert len(kb_1.get_alias_candidates("Russ Cochran")) == len(
-        kb_2.get_alias_candidates("Russ Cochran")
+    assert len(kb_1._get_alias_candidates("Russ Cochran")) == len(
+        kb_2._get_alias_candidates("Russ Cochran")
     )
-    assert len(kb_1.get_alias_candidates("Randomness")) == len(
-        kb_2.get_alias_candidates("Randomness")
+    assert len(kb_1._get_alias_candidates("Randomness")) == len(
+        kb_2._get_alias_candidates("Randomness")
     )
 
 
diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py
index 8d3653ab1..9e501c32f 100644
--- a/spacy/tests/serialize/test_serialize_kb.py
+++ b/spacy/tests/serialize/test_serialize_kb.py
@@ -63,19 +63,19 @@ def _check_kb(kb):
         assert alias_string not in kb.get_alias_strings()
 
     # check candidates & probabilities
-    candidates = sorted(kb.get_alias_candidates("double07"), key=lambda x: x.entity_)
+    candidates = sorted(kb._get_alias_candidates("double07"), key=lambda x: x.entity_)
     assert len(candidates) == 2
 
     assert candidates[0].entity_ == "Q007"
     assert 6.999 < candidates[0].entity_freq < 7.01
     assert candidates[0].entity_vector == [0, 0, 7]
-    assert candidates[0].alias_ == "double07"
+    assert candidates[0].mention_ == "double07"
     assert 0.899 < candidates[0].prior_prob < 0.901
 
     assert candidates[1].entity_ == "Q17"
     assert 1.99 < candidates[1].entity_freq < 2.01
     assert candidates[1].entity_vector == [7, 1, 0]
-    assert candidates[1].alias_ == "double07"
+    assert candidates[1].mention_ == "double07"
     assert 0.099 < candidates[1].prior_prob < 0.101
 
 
diff --git a/website/docs/api/kb.mdx b/website/docs/api/kb.mdx
index 3a1cefe8d..4d51dbc16 100644
--- a/website/docs/api/kb.mdx
+++ b/website/docs/api/kb.mdx
@@ -103,23 +103,6 @@ to you.
 | `mentions`  | The textual mention or alias. ~~Iterable[Span]~~                                             |
 | **RETURNS** | An iterable of iterable with relevant `Candidate` objects. ~~Iterable[Iterable[Candidate]]~~ |
 
-## KnowledgeBase.get_alias_candidates {id="get_alias_candidates",tag="method"}
-
-<Infobox variant="warning">
-  This method is _not_ available from spaCy 3.5 onwards.
-</Infobox>
-
-From spaCy 3.5 on `KnowledgeBase` is an abstract class (with
-[`InMemoryLookupKB`](/api/inmemorylookupkb) being a drop-in replacement) to
-allow more flexibility in customizing knowledge bases. Some of its methods were
-moved to [`InMemoryLookupKB`](/api/inmemorylookupkb) during this refactoring,
-one of those being `get_alias_candidates()`. This method is now available as
-[`InMemoryLookupKB.get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates).
-Note:
-[`InMemoryLookupKB.get_candidates()`](/api/inmemorylookupkb#get_candidates)
-defaults to
-[`InMemoryLookupKB.get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates).
-
 ## KnowledgeBase.get_vector {id="get_vector",tag="method"}
 
 Given a certain entity ID, retrieve its pretrained entity vector.
@@ -207,19 +190,19 @@ of the [`entity_linker`](/api/entitylinker) pipe.
 > #### Example```python
 >
 > from spacy.kb import InMemoryCandidate candidate = InMemoryCandidate(kb,
-> entity_hash, entity_freq, entity_vector, alias_hash, prior_prob)
+> entity_hash, entity_freq, entity_vector, mention_hash, prior_prob)
 >
 > ```
 >
 > ```
 
-| Name          | Description                                                               |
-| ------------- | ------------------------------------------------------------------------- |
-| `kb`          | The knowledge base that defined this candidate. ~~KnowledgeBase~~         |
-| `entity_hash` | The hash of the entity's KB ID. ~~int~~                                   |
-| `entity_freq` | The entity frequency as recorded in the KB. ~~float~~                     |
-| `alias_hash`  | The hash of the textual mention or alias. ~~int~~                         |
-| `prior_prob`  | The prior probability of the `alias` referring to the `entity`. ~~float~~ |
+| Name           | Description                                                               |
+| -------------- | ------------------------------------------------------------------------- |
+| `kb`           | The knowledge base that defined this candidate. ~~KnowledgeBase~~         |
+| `entity_hash`  | The hash of the entity's KB ID. ~~int~~                                   |
+| `entity_freq`  | The entity frequency as recorded in the KB. ~~float~~                     |
+| `mention_hash` | The hash of the textual mention. ~~int~~                                  |
+| `prior_prob`   | The prior probability of the `alias` referring to the `entity`. ~~float~~ |
 
 ## InMemoryCandidate attributes {id="candidate-attributes"}
 

From 38dce966e55dbeb61d1d085ff97ce50f5095dea8 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Sun, 5 Mar 2023 13:49:13 +0100
Subject: [PATCH 04/39] Refacor Candidate attributes and their usage.

---
 spacy/kb/candidate.py                      | 76 ++++++++++------------
 spacy/kb/kb_in_memory.pyx                  | 10 +--
 spacy/pipeline/entity_linker.py            |  8 +--
 spacy/tests/pipeline/test_entity_linker.py | 18 +++--
 spacy/tests/serialize/test_serialize_kb.py | 10 +--
 5 files changed, 58 insertions(+), 64 deletions(-)

diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py
index b8d26832a..663b68168 100644
--- a/spacy/kb/candidate.py
+++ b/spacy/kb/candidate.py
@@ -14,41 +14,46 @@ class Candidate(abc.ABC):
     def __init__(
         self,
         mention: str,
-        entity_id: int,
-        entity_name: str,
+        entity_id: Union[str, int],
         entity_vector: List[float],
         prior_prob: float,
     ):
         """Initializes properties of `Candidate` instance.
         mention (str): Mention text for this candidate.
-        entity_id (int): Unique entity ID.
-        entity_name (str): Entity name.
+        entity_id (Union[str, int]): Unique entity ID.
         entity_vector (List[float]): Entity embedding.
         prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
             the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
             cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
             doesn't) it might be better to eschew this information and always supply the same value.
         """
-        self._mention_ = mention
-        self._entity = entity_id
-        self._entity_ = entity_name
+        self._mention = mention
+        self._entity_id = entity_id
+        # Note that hashing an int value yields the same int value.
+        self._entity_id_hash = hash(entity_id)
         self._entity_vector = entity_vector
         self._prior_prob = prior_prob
 
     @property
-    def entity(self) -> int:
-        """RETURNS (int): Unique entity ID."""
-        return self._entity
+    def entity_id(self) -> Union[str, int]:
+        """RETURNS (Union[str, int]): Unique entity ID."""
+        return self._entity_id
 
     @property
-    def entity_(self) -> str:
-        """RETURNS (int): Entity name."""
-        return self._entity_
+    def entity_id_int(self) -> int:
+        """RETURNS (int): Numerical representation of entity ID (if entity ID is numerical, this is just the entity ID,
+        otherwise the hash of the entity ID string)."""
+        return self._entity_id_hash
 
     @property
-    def mention_(self) -> str:
+    def entity_id_str(self) -> str:
+        """RETURNS (str): String representation of entity ID."""
+        return str(self._entity_id)
+
+    @property
+    def mention(self) -> str:
         """RETURNS (str): Mention."""
-        return self._mention_
+        return self._mention
 
     @property
     def entity_vector(self) -> List[float]:
@@ -66,49 +71,40 @@ class InMemoryCandidate(Candidate):
 
     def __init__(
         self,
-        retrieve_string_from_hash: Callable[[int], str],
-        entity_hash: int,
-        entity_freq: int,
+        hash_to_str: Callable[[int], str],
+        entity_id: int,
+        mention: str,
         entity_vector: List[float],
-        mention_hash: int,
         prior_prob: float,
+        entity_freq: int
     ):
         """
-        retrieve_string_from_hash (Callable[[int], str]): Callable retrieving entity name from provided entity/vocab
-            hash.
-        entity_hash (str): Hashed entity name /ID.
+        hash_to_str (Callable[[int], str]): Callable retrieving entity name from provided entity/vocab hash.
+        entity_id (str): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__().
         entity_freq (int): Entity frequency in KB corpus.
         entity_vector (List[float]): Entity embedding.
-        mention_hash (int): Hashed mention.
+        mention (str): Mention.
         prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
             the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
             cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
             doesn't) it might be better to eschew this information and always supply the same value.
         """
         super().__init__(
-            mention=retrieve_string_from_hash(mention_hash),
-            entity_id=entity_hash,
-            entity_name=retrieve_string_from_hash(entity_hash),
+            mention=mention,
+            entity_id=entity_id,
             entity_vector=entity_vector,
             prior_prob=prior_prob,
         )
-        self._retrieve_string_from_hash = retrieve_string_from_hash
-        self._entity = entity_hash
+        self._hash_to_str = hash_to_str
         self._entity_freq = entity_freq
-        self._mention = mention_hash
-        self._prior_prob = prior_prob
-
-    @property
-    def entity(self) -> int:
-        """RETURNS (int): hash of the entity_id's KB ID/name"""
-        return self._entity
-
-    @property
-    def mention(self) -> int:
-        """RETURNS (int): Mention hash."""
-        return self._mention
+        self._entity_id_str = self._hash_to_str(self._entity_id)
 
     @property
     def entity_freq(self) -> float:
         """RETURNS (float): Relative entity frequency."""
         return self._entity_freq
+
+    @property
+    def entity_id_str(self) -> str:
+        """RETURNS (str): String representation of entity ID."""
+        return self._entity_id_str
diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx
index d7a986320..ecdc148f4 100644
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@@ -240,12 +240,12 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 
         return [
             InMemoryCandidate(
-                retrieve_string_from_hash=self.vocab.strings.__getitem__,
-                entity_hash=self._entries[entry_index].entity_hash,
-                entity_freq=self._entries[entry_index].freq,
+                hash_to_str=self.vocab.strings.__getitem__,
+                entity_id=self._entries[entry_index].entity_hash,
+                mention=alias,
                 entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
-                mention_hash=alias_hash,
-                prior_prob=prior_prob
+                prior_prob=prior_prob,
+                entity_freq=self._entries[entry_index].freq
             )
             for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs)
             if entry_index != 0
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index 907307056..e892141cc 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -535,12 +535,12 @@ class EntityLinker(TrainablePipe):
                             )
                         elif len(candidates) == 1 and self.threshold is None:
                             # shortcut for efficiency reasons: take the 1 candidate
-                            final_kb_ids.append(candidates[0].entity_)
+                            final_kb_ids.append(candidates[0].entity_id_str)
                             self._add_activations(
                                 doc_scores=doc_scores,
                                 doc_ents=doc_ents,
                                 scores=[1.0],
-                                ents=[candidates[0].entity],
+                                ents=[candidates[0].entity_id_int],
                             )
                         else:
                             random.shuffle(candidates)
@@ -570,7 +570,7 @@ class EntityLinker(TrainablePipe):
                                     raise ValueError(Errors.E161)
                                 scores = prior_probs + sims - (prior_probs * sims)
                             final_kb_ids.append(
-                                candidates[scores.argmax().item()].entity_
+                                candidates[scores.argmax().item()].entity_id_str
                                 if self.threshold is None
                                 or scores.max() >= self.threshold
                                 else EntityLinker.NIL
@@ -579,7 +579,7 @@ class EntityLinker(TrainablePipe):
                                 doc_scores=doc_scores,
                                 doc_ents=doc_ents,
                                 scores=scores,
-                                ents=[c.entity for c in candidates],
+                                ents=[c.entity_id_int for c in candidates],
                             )
             self._add_doc_activations(
                 docs_scores=docs_scores,
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 23eb5e205..7c82db3c7 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -468,8 +468,8 @@ def test_candidate_generation(nlp):
     assert len(get_candidates(mykb, shrubbery_ent)) == 0
 
     # test the content of the candidates
-    assert get_candidates(mykb, adam_ent)[0].entity_ == "Q2"
-    assert get_candidates(mykb, adam_ent)[0].mention_ == "adam"
+    assert get_candidates(mykb, adam_ent)[0].entity_id_str == "Q2"
+    assert get_candidates(mykb, adam_ent)[0].mention == "adam"
     assert_almost_equal(get_candidates(mykb, adam_ent)[0].entity_freq, 12)
     assert_almost_equal(get_candidates(mykb, adam_ent)[0].prior_prob, 0.9)
 
@@ -560,10 +560,9 @@ def test_vocab_serialization(nlp):
 
     candidates = mykb._get_alias_candidates("adam")
     assert len(candidates) == 1
-    assert candidates[0].entity == q2_hash
-    assert candidates[0].entity_ == "Q2"
-    assert candidates[0].mention == adam_hash
-    assert candidates[0].mention_ == "adam"
+    assert candidates[0].entity_id_int == q2_hash
+    assert candidates[0].entity_id_str == "Q2"
+    assert candidates[0].mention == "adam"
 
     with make_tempdir() as d:
         mykb.to_disk(d / "kb")
@@ -572,10 +571,9 @@ def test_vocab_serialization(nlp):
 
         candidates = kb_new_vocab._get_alias_candidates("adam")
         assert len(candidates) == 1
-        assert candidates[0].entity == q2_hash
-        assert candidates[0].entity_ == "Q2"
-        assert candidates[0].mention == adam_hash
-        assert candidates[0].mention_ == "adam"
+        assert candidates[0].entity_id_int == q2_hash
+        assert candidates[0].entity_id_str == "Q2"
+        assert candidates[0].mention == "adam"
 
         assert kb_new_vocab.get_vector("Q2") == [2]
         assert_almost_equal(kb_new_vocab.get_prior_prob("Q2", "douglas"), 0.4)
diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py
index 9e501c32f..860b9f8c7 100644
--- a/spacy/tests/serialize/test_serialize_kb.py
+++ b/spacy/tests/serialize/test_serialize_kb.py
@@ -63,19 +63,19 @@ def _check_kb(kb):
         assert alias_string not in kb.get_alias_strings()
 
     # check candidates & probabilities
-    candidates = sorted(kb._get_alias_candidates("double07"), key=lambda x: x.entity_)
+    candidates = sorted(kb._get_alias_candidates("double07"), key=lambda x: x.entity_id_str)
     assert len(candidates) == 2
 
-    assert candidates[0].entity_ == "Q007"
+    assert candidates[0].entity_id_str == "Q007"
     assert 6.999 < candidates[0].entity_freq < 7.01
     assert candidates[0].entity_vector == [0, 0, 7]
-    assert candidates[0].mention_ == "double07"
+    assert candidates[0].mention == "double07"
     assert 0.899 < candidates[0].prior_prob < 0.901
 
-    assert candidates[1].entity_ == "Q17"
+    assert candidates[1].entity_id_str == "Q17"
     assert 1.99 < candidates[1].entity_freq < 2.01
     assert candidates[1].entity_vector == [7, 1, 0]
-    assert candidates[1].mention_ == "double07"
+    assert candidates[1].mention == "double07"
     assert 0.099 < candidates[1].prior_prob < 0.101
 
 

From 5f40b3e5231ef686ecc7682444d23757a2ea6b3a Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Sun, 5 Mar 2023 14:14:16 +0100
Subject: [PATCH 05/39] Format.

---
 spacy/kb/candidate.py                      | 2 +-
 spacy/tests/serialize/test_serialize_kb.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py
index 663b68168..b32b72dd8 100644
--- a/spacy/kb/candidate.py
+++ b/spacy/kb/candidate.py
@@ -76,7 +76,7 @@ class InMemoryCandidate(Candidate):
         mention: str,
         entity_vector: List[float],
         prior_prob: float,
-        entity_freq: int
+        entity_freq: int,
     ):
         """
         hash_to_str (Callable[[int], str]): Callable retrieving entity name from provided entity/vocab hash.
diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py
index 860b9f8c7..3d75862c6 100644
--- a/spacy/tests/serialize/test_serialize_kb.py
+++ b/spacy/tests/serialize/test_serialize_kb.py
@@ -63,7 +63,9 @@ def _check_kb(kb):
         assert alias_string not in kb.get_alias_strings()
 
     # check candidates & probabilities
-    candidates = sorted(kb._get_alias_candidates("double07"), key=lambda x: x.entity_id_str)
+    candidates = sorted(
+        kb._get_alias_candidates("double07"), key=lambda x: x.entity_id_str
+    )
     assert len(candidates) == 2
 
     assert candidates[0].entity_id_str == "Q007"

From 670e1ca7c5a1dfc29f97e13c71b54c2485ba0353 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Sun, 5 Mar 2023 14:33:32 +0100
Subject: [PATCH 06/39] Fix mypy error.

---
 spacy/errors.py       | 1 +
 spacy/kb/candidate.py | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/spacy/errors.py b/spacy/errors.py
index eadbf63d6..9473f1cf7 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -952,6 +952,7 @@ class Errors(metaclass=ErrorsWithCodes):
              "with `displacy.serve(doc, port=port)`")
     E1050 = ("Port {port} is already in use. Please specify an available port with `displacy.serve(doc, port=port)` "
              "or use `auto_switch_port=True` to pick an available port automatically.")
+    E1051 = ("Expected `entity_id` to be of type {should_type}, but is of type {is_type}.")
 
     # v4 error strings
     E4000 = ("Expected a Doc as input, but got: '{type}'")
diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py
index b32b72dd8..a481d88f5 100644
--- a/spacy/kb/candidate.py
+++ b/spacy/kb/candidate.py
@@ -1,6 +1,8 @@
 import abc
 from typing import List, Union, Callable
 
+from ..errors import Errors
+
 
 class Candidate(abc.ABC):
     """A `Candidate` object refers to a textual mention that may or may not be resolved
@@ -97,6 +99,10 @@ class InMemoryCandidate(Candidate):
         )
         self._hash_to_str = hash_to_str
         self._entity_freq = entity_freq
+        if not isinstance(self._entity_id, int):
+            raise ValueError(
+                Errors.E1051.format(should_type="int", is_type=str(type(entity_id)))
+            )
         self._entity_id_str = self._hash_to_str(self._entity_id)
 
     @property

From 2ac586fdb5681119a4a7842577764fd7940977cc Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Sun, 5 Mar 2023 14:43:32 +0100
Subject: [PATCH 07/39] Update error code in line with v4 convention.

---
 spacy/errors.py       | 2 +-
 spacy/kb/candidate.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 9473f1cf7..74111e48d 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -952,7 +952,6 @@ class Errors(metaclass=ErrorsWithCodes):
              "with `displacy.serve(doc, port=port)`")
     E1050 = ("Port {port} is already in use. Please specify an available port with `displacy.serve(doc, port=port)` "
              "or use `auto_switch_port=True` to pick an available port automatically.")
-    E1051 = ("Expected `entity_id` to be of type {should_type}, but is of type {is_type}.")
 
     # v4 error strings
     E4000 = ("Expected a Doc as input, but got: '{type}'")
@@ -962,6 +961,7 @@ class Errors(metaclass=ErrorsWithCodes):
     E4003 = ("Training examples for distillation must have the exact same tokens in the "
              "reference and predicted docs.")
     E4004 = ("Backprop is not supported when is_train is not set.")
+    E4005 = ("Expected `entity_id` to be of type {should_type}, but is of type {is_type}.")
 
 RENAMED_LANGUAGE_CODES = {"xx": "mul", "is": "isl"}
 
diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py
index a481d88f5..9778b2880 100644
--- a/spacy/kb/candidate.py
+++ b/spacy/kb/candidate.py
@@ -101,7 +101,7 @@ class InMemoryCandidate(Candidate):
         self._entity_freq = entity_freq
         if not isinstance(self._entity_id, int):
             raise ValueError(
-                Errors.E1051.format(should_type="int", is_type=str(type(entity_id)))
+                Errors.E4005.format(should_type="int", is_type=str(type(entity_id)))
             )
         self._entity_id_str = self._hash_to_str(self._entity_id)
 

From 41b3a0d932aafb4db9db02ae2e03b560305e0d53 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Tue, 7 Mar 2023 13:10:45 +0100
Subject: [PATCH 08/39] Drop support for EntityLinker_v1. (#12377)

---
 spacy/errors.py                            |  1 +
 spacy/pipeline/entity_linker.py            | 23 ++--------------------
 spacy/tests/pipeline/test_entity_linker.py |  7 +------
 3 files changed, 4 insertions(+), 27 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 5049100d8..390de126e 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -960,6 +960,7 @@ class Errors(metaclass=ErrorsWithCodes):
     E4003 = ("Training examples for distillation must have the exact same tokens in the "
              "reference and predicted docs.")
     E4004 = ("Backprop is not supported when is_train is not set.")
+    E4005 = ("EntityLinker_v1 is not supported in spaCy v4. Update your configuration.")
 
 RENAMED_LANGUAGE_CODES = {"xx": "mul", "is": "isl"}
 
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index cd13a4b21..6a187b6c3 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -117,28 +117,9 @@ def make_entity_linker(
         prediction is discarded. If None, predictions are not filtered by any threshold.
     save_activations (bool): save model activations in Doc when annotating.
     """
-
     if not model.attrs.get("include_span_maker", False):
-        try:
-            from spacy_legacy.components.entity_linker import EntityLinker_v1
-        except:
-            raise ImportError(
-                "In order to use v1 of the EntityLinker, you must use spacy-legacy>=3.0.12."
-            )
-        # The only difference in arguments here is that use_gold_ents and threshold aren't available.
-        return EntityLinker_v1(
-            nlp.vocab,
-            model,
-            name,
-            labels_discard=labels_discard,
-            n_sents=n_sents,
-            incl_prior=incl_prior,
-            incl_context=incl_context,
-            entity_vector_length=entity_vector_length,
-            get_candidates=get_candidates,
-            overwrite=overwrite,
-            scorer=scorer,
-        )
+        raise ValueError(Errors.E4005)
+
     return EntityLinker(
         nlp.vocab,
         model,
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index ed84ce674..87cacfc9d 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -993,13 +993,11 @@ def test_scorer_links():
 @pytest.mark.parametrize(
     "name,config",
     [
-        ("entity_linker", {"@architectures": "spacy.EntityLinker.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL}),
         ("entity_linker", {"@architectures": "spacy.EntityLinker.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL}),
     ],
 )
 # fmt: on
 def test_legacy_architectures(name, config):
-    from spacy_legacy.components.entity_linker import EntityLinker_v1
 
     # Ensure that the legacy architectures still work
     vector_length = 3
@@ -1022,10 +1020,7 @@ def test_legacy_architectures(name, config):
         return mykb
 
     entity_linker = nlp.add_pipe(name, config={"model": config})
-    if config["@architectures"] == "spacy.EntityLinker.v1":
-        assert isinstance(entity_linker, EntityLinker_v1)
-    else:
-        assert isinstance(entity_linker, EntityLinker)
+    assert isinstance(entity_linker, EntityLinker)
     entity_linker.set_kb(create_kb)
     optimizer = nlp.initialize(get_examples=lambda: train_examples)
 

From 082992aebb45773b5334b41b125886b1bb03bb7d Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Tue, 7 Mar 2023 13:54:11 +0100
Subject: [PATCH 09/39] Update spacy/kb/candidate.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/kb/candidate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py
index 9778b2880..2a18b753c 100644
--- a/spacy/kb/candidate.py
+++ b/spacy/kb/candidate.py
@@ -82,7 +82,7 @@ class InMemoryCandidate(Candidate):
     ):
         """
         hash_to_str (Callable[[int], str]): Callable retrieving entity name from provided entity/vocab hash.
-        entity_id (str): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__().
+        entity_id (int): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__().
         entity_freq (int): Entity frequency in KB corpus.
         entity_vector (List[float]): Entity embedding.
         mention (str): Mention.

From f8a02f7fef740ac8ed4e5db4dc6f0f9f8a86b4c4 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Tue, 7 Mar 2023 13:58:42 +0100
Subject: [PATCH 10/39] Updated error code.

---
 spacy/errors.py       | 2 +-
 spacy/kb/candidate.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 58d2c81a0..73d867792 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -960,7 +960,7 @@ class Errors(metaclass=ErrorsWithCodes):
     E4003 = ("Training examples for distillation must have the exact same tokens in the "
              "reference and predicted docs.")
     E4004 = ("Backprop is not supported when is_train is not set.")
-    E4005 = ("Expected `entity_id` to be of type {should_type}, but is of type {is_type}.")
+    E4006 = ("Expected `entity_id` to be of type {exp_type}, but is of type {found_type}.")
 
 RENAMED_LANGUAGE_CODES = {"xx": "mul", "is": "isl"}
 
diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py
index 2a18b753c..911af1127 100644
--- a/spacy/kb/candidate.py
+++ b/spacy/kb/candidate.py
@@ -101,7 +101,7 @@ class InMemoryCandidate(Candidate):
         self._entity_freq = entity_freq
         if not isinstance(self._entity_id, int):
             raise ValueError(
-                Errors.E4005.format(should_type="int", is_type=str(type(entity_id)))
+                Errors.E4006.format(exp_type="int", found_type=str(type(entity_id)))
             )
         self._entity_id_str = self._hash_to_str(self._entity_id)
 

From cea58ade8969592266483848feae3fd06f537e6b Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Tue, 7 Mar 2023 14:35:38 +0100
Subject: [PATCH 11/39] Simplify interface for int/str representations.

---
 spacy/kb/candidate.py                      | 11 +++--------
 spacy/pipeline/entity_linker.py            |  8 ++++----
 spacy/tests/pipeline/test_entity_linker.py | 10 +++++-----
 spacy/tests/serialize/test_serialize_kb.py |  6 +++---
 4 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.py
index 911af1127..b1c188e09 100644
--- a/spacy/kb/candidate.py
+++ b/spacy/kb/candidate.py
@@ -37,18 +37,13 @@ class Candidate(abc.ABC):
         self._prior_prob = prior_prob
 
     @property
-    def entity_id(self) -> Union[str, int]:
-        """RETURNS (Union[str, int]): Unique entity ID."""
-        return self._entity_id
-
-    @property
-    def entity_id_int(self) -> int:
+    def entity_id(self) -> int:
         """RETURNS (int): Numerical representation of entity ID (if entity ID is numerical, this is just the entity ID,
         otherwise the hash of the entity ID string)."""
         return self._entity_id_hash
 
     @property
-    def entity_id_str(self) -> str:
+    def entity_id_(self) -> str:
         """RETURNS (str): String representation of entity ID."""
         return str(self._entity_id)
 
@@ -111,6 +106,6 @@ class InMemoryCandidate(Candidate):
         return self._entity_freq
 
     @property
-    def entity_id_str(self) -> str:
+    def entity_id_(self) -> str:
         """RETURNS (str): String representation of entity ID."""
         return self._entity_id_str
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index 4d8370fd3..39cff218a 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -522,12 +522,12 @@ class EntityLinker(TrainablePipe):
                             )
                         elif len(candidates) == 1 and self.threshold is None:
                             # shortcut for efficiency reasons: take the 1 candidate
-                            final_kb_ids.append(candidates[0].entity_id_str)
+                            final_kb_ids.append(candidates[0].entity_id_)
                             self._add_activations(
                                 doc_scores=doc_scores,
                                 doc_ents=doc_ents,
                                 scores=[1.0],
-                                ents=[candidates[0].entity_id_int],
+                                ents=[candidates[0].entity_id],
                             )
                         else:
                             random.shuffle(candidates)
@@ -557,7 +557,7 @@ class EntityLinker(TrainablePipe):
                                     raise ValueError(Errors.E161)
                                 scores = prior_probs + sims - (prior_probs * sims)
                             final_kb_ids.append(
-                                candidates[scores.argmax().item()].entity_id_str
+                                candidates[scores.argmax().item()].entity_id_
                                 if self.threshold is None
                                 or scores.max() >= self.threshold
                                 else EntityLinker.NIL
@@ -566,7 +566,7 @@ class EntityLinker(TrainablePipe):
                                 doc_scores=doc_scores,
                                 doc_ents=doc_ents,
                                 scores=scores,
-                                ents=[c.entity_id_int for c in candidates],
+                                ents=[c.entity_id for c in candidates],
                             )
             self._add_doc_activations(
                 docs_scores=docs_scores,
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 1b5117947..d0cfdd3c7 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -471,7 +471,7 @@ def test_candidate_generation(nlp):
     assert len(get_candidates(mykb, shrubbery_ent)) == 0
 
     # test the content of the candidates
-    assert get_candidates(mykb, adam_ent)[0].entity_id_str == "Q2"
+    assert get_candidates(mykb, adam_ent)[0].entity_id_ == "Q2"
     assert get_candidates(mykb, adam_ent)[0].mention == "adam"
     assert_almost_equal(get_candidates(mykb, adam_ent)[0].entity_freq, 12)
     assert_almost_equal(get_candidates(mykb, adam_ent)[0].prior_prob, 0.9)
@@ -563,8 +563,8 @@ def test_vocab_serialization(nlp):
 
     candidates = mykb._get_alias_candidates("adam")
     assert len(candidates) == 1
-    assert candidates[0].entity_id_int == q2_hash
-    assert candidates[0].entity_id_str == "Q2"
+    assert candidates[0].entity_id == q2_hash
+    assert candidates[0].entity_id_ == "Q2"
     assert candidates[0].mention == "adam"
 
     with make_tempdir() as d:
@@ -574,8 +574,8 @@ def test_vocab_serialization(nlp):
 
         candidates = kb_new_vocab._get_alias_candidates("adam")
         assert len(candidates) == 1
-        assert candidates[0].entity_id_int == q2_hash
-        assert candidates[0].entity_id_str == "Q2"
+        assert candidates[0].entity_id == q2_hash
+        assert candidates[0].entity_id_ == "Q2"
         assert candidates[0].mention == "adam"
 
         assert kb_new_vocab.get_vector("Q2") == [2]
diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py
index 7365765cb..336fd16fe 100644
--- a/spacy/tests/serialize/test_serialize_kb.py
+++ b/spacy/tests/serialize/test_serialize_kb.py
@@ -67,17 +67,17 @@ def _check_kb(kb):
 
     # check candidates & probabilities
     candidates = sorted(
-        kb._get_alias_candidates("double07"), key=lambda x: x.entity_id_str
+        kb._get_alias_candidates("double07"), key=lambda x: x.entity_id_
     )
     assert len(candidates) == 2
 
-    assert candidates[0].entity_id_str == "Q007"
+    assert candidates[0].entity_id_ == "Q007"
     assert 6.999 < candidates[0].entity_freq < 7.01
     assert candidates[0].entity_vector == [0, 0, 7]
     assert candidates[0].mention == "double07"
     assert 0.899 < candidates[0].prior_prob < 0.901
 
-    assert candidates[1].entity_id_str == "Q17"
+    assert candidates[1].entity_id_ == "Q17"
     assert 1.99 < candidates[1].entity_freq < 2.01
     assert candidates[1].entity_vector == [7, 1, 0]
     assert candidates[1].mention == "double07"

From 1ba2fc42070f2065b766deb8a582ad10c84615ac Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Thu, 9 Mar 2023 12:01:42 +0100
Subject: [PATCH 12/39] Update website/docs/api/kb.mdx

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 website/docs/api/kb.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/api/kb.mdx b/website/docs/api/kb.mdx
index 4d51dbc16..e3b699140 100644
--- a/website/docs/api/kb.mdx
+++ b/website/docs/api/kb.mdx
@@ -175,7 +175,7 @@ Restore the state of the knowledge base from a given directory. Note that the
 
 ## InMemoryCandidate {id="candidate",tag="class"}
 
-A `InMemoryCandidate` object refers to a textual mention (alias) that may or may
+An `InMemoryCandidate` object refers to a textual mention (alias) that may or may
 not be resolved to a specific entity from a `KnowledgeBase`. This will be used
 as input for the entity linking algorithm which will disambiguate the various
 candidates to the correct one. Each candidate `(alias, entity)` pair is assigned

From 1c937db3af6a66fa36fbb0538dc975450254d301 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Thu, 9 Mar 2023 12:06:15 +0100
Subject: [PATCH 13/39] Rename 'alias' to 'mention'.

---
 spacy/errors.py           |  2 +-
 spacy/kb/kb.pyx           | 10 +++++-----
 spacy/kb/kb_in_memory.pyx |  4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 51a10be17..92770b8a8 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -82,7 +82,7 @@ class Warnings(metaclass=ErrorsWithCodes):
             "ignoring the duplicate entry.")
     W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be "
             "incorrect. Modify PhraseMatcher._terminal_hash to fix.")
-    W024 = ("Entity '{entity}' - Alias '{alias}' combination already exists in "
+    W024 = ("Entity '{entity}' - mention '{mention}' combination already exists in "
             "the Knowledge Base.")
     W026 = ("Unable to set all sentence boundaries from dependency parses. If "
             "you are constructing a parse tree incrementally by setting "
diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx
index ce4bc0138..158c3304f 100644
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@@ -11,7 +11,7 @@ from ..errors import Errors
 
 
 cdef class KnowledgeBase:
-    """A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
+    """A `KnowledgeBase` instance stores unique identifiers for entities and their textual mentions,
     to support entity linking of named entities to real-world concepts.
     This is an abstract class and requires its operations to be implemented.
 
@@ -32,8 +32,8 @@ cdef class KnowledgeBase:
 
     def get_candidates_batch(self, mentions: Iterable[Span]) -> Iterable[Iterable[Candidate]]:
         """
-        Return candidate entities for specified texts. Each candidate defines the entity, the original alias,
-        and the prior probability of that alias resolving to that entity.
+        Return candidate entities for specified texts. Each candidate defines the entity, the original mention,
+        and the prior probability of this mention resolving to that entity.
         If no candidate is found for a given text, an empty list is returned.
         mentions (Iterable[Span]): Mentions for which to get candidates.
         RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
@@ -42,8 +42,8 @@ cdef class KnowledgeBase:
 
     def get_candidates(self, mention: Span) -> Iterable[Candidate]:
         """
-        Return candidate entities for specified text. Each candidate defines the entity, the original alias,
-        and the prior probability of that alias resolving to that entity.
+        Return candidate entities for specified text. Each candidate defines the entity, the original mention,
+        and the prior probability of that mention resolving to that entity.
         If the no candidate is found for a given text, an empty list is returned.
         mention (Span): Mention for which to get candidates.
         RETURNS (Iterable[Candidate]): Identified candidates.
diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx
index d456f729b..692390f10 100644
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@@ -22,7 +22,7 @@ from .candidate import InMemoryCandidate
 
 
 cdef class InMemoryLookupKB(KnowledgeBase):
-    """An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual aliases,
+    """An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual mentions,
     to support entity linking of named entities to real-world concepts.
 
     DOCS: https://spacy.io/api/inmemorylookupkb
@@ -216,7 +216,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 
         if is_present:
             if not ignore_warnings:
-                warnings.warn(Warnings.W024.format(entity=entity, alias=alias))
+                warnings.warn(Warnings.W024.format(entity=entity, mention=alias))
         else:
             entry_indices.push_back(int(entry_index))
             alias_entry.entry_indices = entry_indices

From b4760414173ed03799559fc5713f6c5e17943998 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Thu, 9 Mar 2023 14:44:41 +0100
Subject: [PATCH 14/39] Port Candidate and InMemoryCandidate to Cython.

---
 setup.py                                   |  2 +
 spacy/kb/__init__.py                       |  1 +
 spacy/kb/candidate.pxd                     | 17 ++++++
 spacy/kb/{candidate.py => candidate.pyx}   | 62 +++++++++++-----------
 spacy/kb/kb_in_memory.pyx                  |  4 +-
 spacy/tests/pipeline/test_entity_linker.py | 11 ++--
 6 files changed, 58 insertions(+), 39 deletions(-)
 create mode 100644 spacy/kb/candidate.pxd
 rename spacy/kb/{candidate.py => candidate.pyx} (73%)

diff --git a/setup.py b/setup.py
index 9b8897233..2768455b9 100755
--- a/setup.py
+++ b/setup.py
@@ -30,6 +30,8 @@ MOD_NAMES = [
     "spacy.lexeme",
     "spacy.vocab",
     "spacy.attrs",
+    "spacy.kb.candidate",
+    # "spacy.kb.inmemorycandidate",
     "spacy.kb.kb",
     "spacy.kb.kb_in_memory",
     "spacy.ml.tb_framework",
diff --git a/spacy/kb/__init__.py b/spacy/kb/__init__.py
index c8a657d62..ff0e209e3 100644
--- a/spacy/kb/__init__.py
+++ b/spacy/kb/__init__.py
@@ -2,4 +2,5 @@ from .kb import KnowledgeBase
 from .kb_in_memory import InMemoryLookupKB
 from .candidate import Candidate, InMemoryCandidate
 
+
 __all__ = ["KnowledgeBase", "InMemoryLookupKB", "Candidate", "InMemoryCandidate"]
diff --git a/spacy/kb/candidate.pxd b/spacy/kb/candidate.pxd
new file mode 100644
index 000000000..17056f615
--- /dev/null
+++ b/spacy/kb/candidate.pxd
@@ -0,0 +1,17 @@
+from libcpp.vector cimport vector
+from .kb_in_memory cimport InMemoryLookupKB
+from ..typedefs cimport hash_t
+
+cdef class Candidate:
+    cdef readonly str _entity_id_
+    cdef readonly hash_t _entity_id
+    cdef readonly str _mention
+    cpdef vector[float] _entity_vector
+    cdef float _prior_prob
+
+
+cdef class InMemoryCandidate(Candidate):
+    cdef readonly InMemoryLookupKB _kb
+    cdef hash_t _entity_hash
+    cdef float _entity_freq
+    cdef hash_t _alias_hash
\ No newline at end of file
diff --git a/spacy/kb/candidate.py b/spacy/kb/candidate.pyx
similarity index 73%
rename from spacy/kb/candidate.py
rename to spacy/kb/candidate.pyx
index b1c188e09..3e61c4444 100644
--- a/spacy/kb/candidate.py
+++ b/spacy/kb/candidate.pyx
@@ -1,10 +1,12 @@
-import abc
-from typing import List, Union, Callable
+# cython: infer_types=True, profile=True
 
-from ..errors import Errors
+from ..typedefs cimport hash_t
+
+from .kb cimport KnowledgeBase
+from .kb_in_memory cimport InMemoryLookupKB
 
 
-class Candidate(abc.ABC):
+cdef class Candidate:
     """A `Candidate` object refers to a textual mention that may or may not be resolved
     to a specific `entity_id` from a Knowledge Base. This will be used as input for the entity_id linking
     algorithm which will disambiguate the various candidates to the correct one.
@@ -16,8 +18,8 @@ class Candidate(abc.ABC):
     def __init__(
         self,
         mention: str,
-        entity_id: Union[str, int],
-        entity_vector: List[float],
+        entity_id: str,
+        entity_vector: vector[float],
         prior_prob: float,
     ):
         """Initializes properties of `Candidate` instance.
@@ -30,22 +32,23 @@ class Candidate(abc.ABC):
             doesn't) it might be better to eschew this information and always supply the same value.
         """
         self._mention = mention
-        self._entity_id = entity_id
+        self._entity_id_ = entity_id
         # Note that hashing an int value yields the same int value.
-        self._entity_id_hash = hash(entity_id)
+        self._entity_id = hash(entity_id)
         self._entity_vector = entity_vector
         self._prior_prob = prior_prob
+        # todo raise exception if this is instantiated class
 
     @property
     def entity_id(self) -> int:
         """RETURNS (int): Numerical representation of entity ID (if entity ID is numerical, this is just the entity ID,
         otherwise the hash of the entity ID string)."""
-        return self._entity_id_hash
+        return self._entity_id
 
     @property
     def entity_id_(self) -> str:
         """RETURNS (str): String representation of entity ID."""
-        return str(self._entity_id)
+        return self._entity_id_
 
     @property
     def mention(self) -> str:
@@ -53,8 +56,8 @@ class Candidate(abc.ABC):
         return self._mention
 
     @property
-    def entity_vector(self) -> List[float]:
-        """RETURNS (List[float]): Entity vector."""
+    def entity_vector(self) -> vector[float]:
+        """RETURNS (vector[float]): Entity vector."""
         return self._entity_vector
 
     @property
@@ -63,20 +66,20 @@ class Candidate(abc.ABC):
         return self._prior_prob
 
 
-class InMemoryCandidate(Candidate):
+cdef class InMemoryCandidate(Candidate):
     """Candidate for InMemoryLookupKB."""
 
     def __init__(
         self,
-        hash_to_str: Callable[[int], str],
-        entity_id: int,
+        kb: InMemoryLookupKB,
+        entity_hash: int,
         mention: str,
-        entity_vector: List[float],
+        entity_vector: vector[float],
         prior_prob: float,
-        entity_freq: int,
+        entity_freq: float
     ):
         """
-        hash_to_str (Callable[[int], str]): Callable retrieving entity name from provided entity/vocab hash.
+        kb (InMemoryLookupKB]): InMemoryLookupKB instance.
         entity_id (int): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__().
         entity_freq (int): Entity frequency in KB corpus.
         entity_vector (List[float]): Entity embedding.
@@ -88,24 +91,19 @@ class InMemoryCandidate(Candidate):
         """
         super().__init__(
             mention=mention,
-            entity_id=entity_id,
+            entity_id=kb.vocab.strings[entity_hash],
             entity_vector=entity_vector,
             prior_prob=prior_prob,
         )
-        self._hash_to_str = hash_to_str
+        self._kb = kb
+        self._entity_id = entity_hash
         self._entity_freq = entity_freq
-        if not isinstance(self._entity_id, int):
-            raise ValueError(
-                Errors.E4006.format(exp_type="int", found_type=str(type(entity_id)))
-            )
-        self._entity_id_str = self._hash_to_str(self._entity_id)
-
-    @property
-    def entity_freq(self) -> float:
-        """RETURNS (float): Relative entity frequency."""
-        return self._entity_freq
 
     @property
     def entity_id_(self) -> str:
-        """RETURNS (str): String representation of entity ID."""
-        return self._entity_id_str
+        """RETURNS (str): ID/name of this entity in the KB"""
+        return self._kb.vocab.strings[self._entity_id]
+
+    @property
+    def entity_freq(self) -> float:
+        return self._entity_freq
diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx
index 692390f10..3e9001da9 100644
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@@ -243,8 +243,8 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 
         return [
             InMemoryCandidate(
-                hash_to_str=self.vocab.strings.__getitem__,
-                entity_id=self._entries[entry_index].entity_hash,
+                kb=self,
+                entity_hash=self._entries[entry_index].entity_hash,
                 mention=alias,
                 entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
                 prior_prob=prior_prob,
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index d0cfdd3c7..cd1dc90e4 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -465,16 +465,17 @@ def test_candidate_generation(nlp):
     mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])
 
     # test the size of the relevant candidates
+    adam_ent_cands = get_candidates(mykb, adam_ent)
     assert len(get_candidates(mykb, douglas_ent)) == 2
-    assert len(get_candidates(mykb, adam_ent)) == 1
+    assert len(adam_ent_cands) == 1
     assert len(get_candidates(mykb, Adam_ent)) == 0  # default case sensitive
     assert len(get_candidates(mykb, shrubbery_ent)) == 0
 
     # test the content of the candidates
-    assert get_candidates(mykb, adam_ent)[0].entity_id_ == "Q2"
-    assert get_candidates(mykb, adam_ent)[0].mention == "adam"
-    assert_almost_equal(get_candidates(mykb, adam_ent)[0].entity_freq, 12)
-    assert_almost_equal(get_candidates(mykb, adam_ent)[0].prior_prob, 0.9)
+    assert adam_ent_cands[0].entity_id_ == "Q2"
+    assert adam_ent_cands[0].mention == "adam"
+    assert_almost_equal(adam_ent_cands[0].entity_freq, 12)
+    assert_almost_equal(adam_ent_cands[0].prior_prob, 0.9)
 
 
 def test_el_pipe_configuration(nlp):

From 845864beb4ab16e8a9e96621e3a3e5227032220b Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Thu, 9 Mar 2023 14:55:10 +0100
Subject: [PATCH 15/39] Remove redundant entry in setup.py.

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index 2768455b9..d5b82ec68 100755
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,6 @@ MOD_NAMES = [
     "spacy.vocab",
     "spacy.attrs",
     "spacy.kb.candidate",
-    # "spacy.kb.inmemorycandidate",
     "spacy.kb.kb",
     "spacy.kb.kb_in_memory",
     "spacy.ml.tb_framework",

From b0ee34185da4877f8045f0016316a7120a255d61 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Thu, 9 Mar 2023 14:56:44 +0100
Subject: [PATCH 16/39] Add abstract class check.

---
 spacy/kb/candidate.pyx | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index 3e61c4444..4f75ac687 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -4,7 +4,7 @@ from ..typedefs cimport hash_t
 
 from .kb cimport KnowledgeBase
 from .kb_in_memory cimport InMemoryLookupKB
-
+from ..errors import Errors
 
 cdef class Candidate:
     """A `Candidate` object refers to a textual mention that may or may not be resolved
@@ -22,7 +22,7 @@ cdef class Candidate:
         entity_vector: vector[float],
         prior_prob: float,
     ):
-        """Initializes properties of `Candidate` instance.
+        """Initializes properties of abstract base class `Candidate`.
         mention (str): Mention text for this candidate.
         entity_id (Union[str, int]): Unique entity ID.
         entity_vector (List[float]): Entity embedding.
@@ -31,13 +31,18 @@ cdef class Candidate:
             cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
             doesn't) it might be better to eschew this information and always supply the same value.
         """
+        # Make sure abstract KB is not instantiated.
+        if self.__class__ == Candidate:
+            raise TypeError(
+                Errors.E1046.format(cls_name=self.__class__.__name__)
+            )
+
         self._mention = mention
         self._entity_id_ = entity_id
         # Note that hashing an int value yields the same int value.
         self._entity_id = hash(entity_id)
         self._entity_vector = entity_vector
         self._prior_prob = prior_prob
-        # todo raise exception if this is instantiated class
 
     @property
     def entity_id(self) -> int:

From c61654eef8cd7afa3b1d00f9c90f3f11b6528c08 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Thu, 9 Mar 2023 15:04:10 +0100
Subject: [PATCH 17/39] Drop storing mention.

---
 spacy/kb/candidate.pxd    |  3 +--
 spacy/kb/candidate.pyx    | 16 +++++++++-------
 spacy/kb/kb_in_memory.pyx |  2 +-
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/spacy/kb/candidate.pxd b/spacy/kb/candidate.pxd
index 17056f615..23da038d3 100644
--- a/spacy/kb/candidate.pxd
+++ b/spacy/kb/candidate.pxd
@@ -5,7 +5,6 @@ from ..typedefs cimport hash_t
 cdef class Candidate:
     cdef readonly str _entity_id_
     cdef readonly hash_t _entity_id
-    cdef readonly str _mention
     cpdef vector[float] _entity_vector
     cdef float _prior_prob
 
@@ -14,4 +13,4 @@ cdef class InMemoryCandidate(Candidate):
     cdef readonly InMemoryLookupKB _kb
     cdef hash_t _entity_hash
     cdef float _entity_freq
-    cdef hash_t _alias_hash
\ No newline at end of file
+    cdef hash_t _mention
\ No newline at end of file
diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index 4f75ac687..dcf302112 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -17,13 +17,11 @@ cdef class Candidate:
 
     def __init__(
         self,
-        mention: str,
         entity_id: str,
         entity_vector: vector[float],
         prior_prob: float,
     ):
         """Initializes properties of abstract base class `Candidate`.
-        mention (str): Mention text for this candidate.
         entity_id (Union[str, int]): Unique entity ID.
         entity_vector (List[float]): Entity embedding.
         prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
@@ -37,7 +35,6 @@ cdef class Candidate:
                 Errors.E1046.format(cls_name=self.__class__.__name__)
             )
 
-        self._mention = mention
         self._entity_id_ = entity_id
         # Note that hashing an int value yields the same int value.
         self._entity_id = hash(entity_id)
@@ -58,7 +55,7 @@ cdef class Candidate:
     @property
     def mention(self) -> str:
         """RETURNS (str): Mention."""
-        return self._mention
+        raise NotImplementedError
 
     @property
     def entity_vector(self) -> vector[float]:
@@ -78,7 +75,7 @@ cdef class InMemoryCandidate(Candidate):
         self,
         kb: InMemoryLookupKB,
         entity_hash: int,
-        mention: str,
+        mention_hash: int,
         entity_vector: vector[float],
         prior_prob: float,
         entity_freq: float
@@ -88,22 +85,27 @@ cdef class InMemoryCandidate(Candidate):
         entity_id (int): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__().
         entity_freq (int): Entity frequency in KB corpus.
         entity_vector (List[float]): Entity embedding.
-        mention (str): Mention.
+        mention_hash (int): Mention hash.
         prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
             the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
             cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
             doesn't) it might be better to eschew this information and always supply the same value.
         """
         super().__init__(
-            mention=mention,
             entity_id=kb.vocab.strings[entity_hash],
             entity_vector=entity_vector,
             prior_prob=prior_prob,
         )
         self._kb = kb
+        self._mention = mention_hash
         self._entity_id = entity_hash
         self._entity_freq = entity_freq
 
+    @property
+    def mention(self) -> str:
+        """RETURNS (str): ID/name of this entity in the KB"""
+        return self._kb.vocab.strings[self._mention]
+
     @property
     def entity_id_(self) -> str:
         """RETURNS (str): ID/name of this entity in the KB"""
diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx
index 3e9001da9..059f3a140 100644
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@@ -245,7 +245,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
             InMemoryCandidate(
                 kb=self,
                 entity_hash=self._entries[entry_index].entity_hash,
-                mention=alias,
+                mention_hash=alias_hash,
                 entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
                 prior_prob=prior_prob,
                 entity_freq=self._entries[entry_index].freq

From 34e092e4e522600c264499bfcc678995616b295f Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Thu, 9 Mar 2023 16:15:39 +0100
Subject: [PATCH 18/39] Update spacy/kb/candidate.pxd

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/kb/candidate.pxd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/kb/candidate.pxd b/spacy/kb/candidate.pxd
index 23da038d3..920580d33 100644
--- a/spacy/kb/candidate.pxd
+++ b/spacy/kb/candidate.pxd
@@ -13,4 +13,4 @@ cdef class InMemoryCandidate(Candidate):
     cdef readonly InMemoryLookupKB _kb
     cdef hash_t _entity_hash
     cdef float _entity_freq
-    cdef hash_t _mention
\ No newline at end of file
+    cdef hash_t _mention

From 6fc7997c06ca8aded44194f4d9f9e81b9d112139 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 10 Mar 2023 08:55:32 +0100
Subject: [PATCH 19/39] Fix entity_id refactoring problems in docstrings.

---
 spacy/kb/candidate.pyx | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index dcf302112..6f0bd061e 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -1,14 +1,11 @@
 # cython: infer_types=True, profile=True
 
-from ..typedefs cimport hash_t
-
-from .kb cimport KnowledgeBase
 from .kb_in_memory cimport InMemoryLookupKB
 from ..errors import Errors
 
 cdef class Candidate:
     """A `Candidate` object refers to a textual mention that may or may not be resolved
-    to a specific `entity_id` from a Knowledge Base. This will be used as input for the entity_id linking
+    to a specific entity from a Knowledge Base. This will be used as input for the entity linking
     algorithm which will disambiguate the various candidates to the correct one.
     Each candidate (mention, entity_id) pair is assigned a certain prior probability.
 
@@ -87,7 +84,7 @@ cdef class InMemoryCandidate(Candidate):
         entity_vector (List[float]): Entity embedding.
         mention_hash (int): Mention hash.
         prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
-            the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
+            the context, this mention resolves to this entity in the corpus used to build the knowledge base. In
             cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
             doesn't) it might be better to eschew this information and always supply the same value.
         """

From 27053912da0fbaae216e2d862371298228346063 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 10 Mar 2023 09:00:30 +0100
Subject: [PATCH 20/39] Drop unused InMemoryCandidate._entity_hash.

---
 spacy/kb/candidate.pxd | 1 -
 1 file changed, 1 deletion(-)

diff --git a/spacy/kb/candidate.pxd b/spacy/kb/candidate.pxd
index 23da038d3..924357ad0 100644
--- a/spacy/kb/candidate.pxd
+++ b/spacy/kb/candidate.pxd
@@ -11,6 +11,5 @@ cdef class Candidate:
 
 cdef class InMemoryCandidate(Candidate):
     cdef readonly InMemoryLookupKB _kb
-    cdef hash_t _entity_hash
     cdef float _entity_freq
     cdef hash_t _mention
\ No newline at end of file

From 348dd1c87ec853f837907ef8b804b103be1ebfcc Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 10 Mar 2023 09:03:41 +0100
Subject: [PATCH 21/39] Update docstrings.

---
 spacy/kb/candidate.pyx | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index 6f0bd061e..d45d130c5 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -100,14 +100,15 @@ cdef class InMemoryCandidate(Candidate):
 
     @property
     def mention(self) -> str:
-        """RETURNS (str): ID/name of this entity in the KB"""
+        """RETURNS (str): Mention."""
         return self._kb.vocab.strings[self._mention]
 
     @property
     def entity_id_(self) -> str:
-        """RETURNS (str): ID/name of this entity in the KB"""
+        """RETURNS (str): ID/name of this entity in the KB."""
         return self._kb.vocab.strings[self._entity_id]
 
     @property
     def entity_freq(self) -> float:
+        """RETURNS (float): Entity frequence of this candidate's entity in the KB."""
         return self._entity_freq

From 649c146e2c60d71dbdabe4e7532bc985da7b038c Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Mon, 13 Mar 2023 09:21:08 +0100
Subject: [PATCH 22/39] Move attributes out of Candidate.

---
 spacy/kb/candidate.pxd |  8 +++---
 spacy/kb/candidate.pyx | 56 +++++++++++++++++-------------------------
 2 files changed, 27 insertions(+), 37 deletions(-)

diff --git a/spacy/kb/candidate.pxd b/spacy/kb/candidate.pxd
index ddb3dbca8..0e753bf99 100644
--- a/spacy/kb/candidate.pxd
+++ b/spacy/kb/candidate.pxd
@@ -3,13 +3,13 @@ from .kb_in_memory cimport InMemoryLookupKB
 from ..typedefs cimport hash_t
 
 cdef class Candidate:
-    cdef readonly str _entity_id_
-    cdef readonly hash_t _entity_id
-    cpdef vector[float] _entity_vector
-    cdef float _prior_prob
+    pass
 
 
 cdef class InMemoryCandidate(Candidate):
+    cdef readonly hash_t _entity_hash
+    cpdef vector[float] _entity_vector
+    cdef float _prior_prob
     cdef readonly InMemoryLookupKB _kb
     cdef float _entity_freq
     cdef hash_t _mention
diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index d45d130c5..9d3a05ec8 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -12,42 +12,23 @@ cdef class Candidate:
     DOCS: https://spacy.io/api/kb/#candidate-init
     """
 
-    def __init__(
-        self,
-        entity_id: str,
-        entity_vector: vector[float],
-        prior_prob: float,
-    ):
-        """Initializes properties of abstract base class `Candidate`.
-        entity_id (Union[str, int]): Unique entity ID.
-        entity_vector (List[float]): Entity embedding.
-        prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
-            the context, this mention resolves to this entity_id in the corpus used to build the knowledge base. In
-            cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
-            doesn't) it might be better to eschew this information and always supply the same value.
-        """
-        # Make sure abstract KB is not instantiated.
+    def __init__(self):
+        # Make sure abstract Candidate is not instantiated.
         if self.__class__ == Candidate:
             raise TypeError(
                 Errors.E1046.format(cls_name=self.__class__.__name__)
             )
 
-        self._entity_id_ = entity_id
-        # Note that hashing an int value yields the same int value.
-        self._entity_id = hash(entity_id)
-        self._entity_vector = entity_vector
-        self._prior_prob = prior_prob
-
     @property
     def entity_id(self) -> int:
         """RETURNS (int): Numerical representation of entity ID (if entity ID is numerical, this is just the entity ID,
         otherwise the hash of the entity ID string)."""
-        return self._entity_id
+        raise NotImplementedError
 
     @property
     def entity_id_(self) -> str:
         """RETURNS (str): String representation of entity ID."""
-        return self._entity_id_
+        raise NotImplementedError
 
     @property
     def mention(self) -> str:
@@ -57,12 +38,12 @@ cdef class Candidate:
     @property
     def entity_vector(self) -> vector[float]:
         """RETURNS (vector[float]): Entity vector."""
-        return self._entity_vector
+        raise NotImplementedError
 
     @property
     def prior_prob(self) -> float:
         """RETURNS (List[float]): Entity vector."""
-        return self._prior_prob
+        raise NotImplementedError
 
 
 cdef class InMemoryCandidate(Candidate):
@@ -88,27 +69,36 @@ cdef class InMemoryCandidate(Candidate):
             cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
             doesn't) it might be better to eschew this information and always supply the same value.
         """
-        super().__init__(
-            entity_id=kb.vocab.strings[entity_hash],
-            entity_vector=entity_vector,
-            prior_prob=prior_prob,
-        )
+        super().__init__()
+
+        self._entity_hash = entity_hash
+        self._entity_vector = entity_vector
+        self._prior_prob = prior_prob
         self._kb = kb
         self._mention = mention_hash
         self._entity_id = entity_hash
         self._entity_freq = entity_freq
 
+    @property
+    def entity_id(self) -> int:
+        return self._entity_hash
+
+    @property
+    def entity_vector(self) -> vector[float]:
+        return self._entity_vector
+
+    @property
+    def prior_prob(self) -> float:
+        return self._prior_prob
+
     @property
     def mention(self) -> str:
-        """RETURNS (str): Mention."""
         return self._kb.vocab.strings[self._mention]
 
     @property
     def entity_id_(self) -> str:
-        """RETURNS (str): ID/name of this entity in the KB."""
         return self._kb.vocab.strings[self._entity_id]
 
     @property
     def entity_freq(self) -> float:
-        """RETURNS (float): Entity frequence of this candidate's entity in the KB."""
         return self._entity_freq

From 6adc15178f74529aa1c01a390ab0c09c682e2329 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Mon, 13 Mar 2023 14:26:14 +0100
Subject: [PATCH 23/39] Partially fix alias/mention terminology usage. Convert
 Candidate to interface.

---
 spacy/errors.py           | 2 +-
 spacy/kb/candidate.pyx    | 6 +++---
 spacy/kb/kb.pyx           | 2 +-
 spacy/kb/kb_in_memory.pyx | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 92770b8a8..30446e7ea 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -82,7 +82,7 @@ class Warnings(metaclass=ErrorsWithCodes):
             "ignoring the duplicate entry.")
     W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be "
             "incorrect. Modify PhraseMatcher._terminal_hash to fix.")
-    W024 = ("Entity '{entity}' - mention '{mention}' combination already exists in "
+    W024 = ("Entity '{entity}' - alias '{alias}' combination already exists in "
             "the Knowledge Base.")
     W026 = ("Unable to set all sentence boundaries from dependency parses. If "
             "you are constructing a parse tree incrementally by setting "
diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index 9d3a05ec8..ac19df671 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -7,7 +7,8 @@ cdef class Candidate:
     """A `Candidate` object refers to a textual mention that may or may not be resolved
     to a specific entity from a Knowledge Base. This will be used as input for the entity linking
     algorithm which will disambiguate the various candidates to the correct one.
-    Each candidate (mention, entity_id) pair is assigned a certain prior probability.
+    Each candidate, which represents a possible link between one textual mention and one entity in the knowledge base,
+    is assigned a certain prior probability.
 
     DOCS: https://spacy.io/api/kb/#candidate-init
     """
@@ -76,7 +77,6 @@ cdef class InMemoryCandidate(Candidate):
         self._prior_prob = prior_prob
         self._kb = kb
         self._mention = mention_hash
-        self._entity_id = entity_hash
         self._entity_freq = entity_freq
 
     @property
@@ -97,7 +97,7 @@ cdef class InMemoryCandidate(Candidate):
 
     @property
     def entity_id_(self) -> str:
-        return self._kb.vocab.strings[self._entity_id]
+        return self._kb.vocab.strings[self._entity_hash]
 
     @property
     def entity_freq(self) -> float:
diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx
index 158c3304f..7da312863 100644
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@@ -11,7 +11,7 @@ from ..errors import Errors
 
 
 cdef class KnowledgeBase:
-    """A `KnowledgeBase` instance stores unique identifiers for entities and their textual mentions,
+    """A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
     to support entity linking of named entities to real-world concepts.
     This is an abstract class and requires its operations to be implemented.
 
diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx
index 059f3a140..4ceb87888 100644
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@@ -22,7 +22,7 @@ from .candidate import InMemoryCandidate
 
 
 cdef class InMemoryLookupKB(KnowledgeBase):
-    """An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual mentions,
+    """An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual aliases,
     to support entity linking of named entities to real-world concepts.
 
     DOCS: https://spacy.io/api/inmemorylookupkb
@@ -216,7 +216,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 
         if is_present:
             if not ignore_warnings:
-                warnings.warn(Warnings.W024.format(entity=entity, mention=alias))
+                warnings.warn(Warnings.W024.format(entity=entity, alias=alias))
         else:
             entry_indices.push_back(int(entry_index))
             alias_entry.entry_indices = entry_indices

From 4a921766f128755fb733e899e6701599008184a5 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Mon, 13 Mar 2023 16:54:38 +0100
Subject: [PATCH 24/39] Remove prior_prob from supported properties in
 Candidate. Introduce KnowledgeBase.supports_prior_probs().

---
 spacy/errors.py                 |  3 +++
 spacy/kb/candidate.pyx          |  7 ++-----
 spacy/kb/kb.pyx                 |  7 +++++++
 spacy/kb/kb_in_memory.pyx       |  3 +++
 spacy/pipeline/entity_linker.py | 14 ++++++++------
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 30446e7ea..0f8091e3a 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -209,7 +209,10 @@ class Warnings(metaclass=ErrorsWithCodes):
             "`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.")
     W124 = ("{host}:{port} is already in use, using the nearest available port {serve_port} as an alternative.")
 
+    # v4 warning strings
     W400 = ("`use_upper=False` is ignored, the upper layer is always enabled")
+    W401 = ("`incl_prior is True`, but the selected knowledge base type {kb_type} doesn't support prior probability "
+            "lookups.")
 
 
 class Errors(metaclass=ErrorsWithCodes):
diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index ac19df671..9e4e9f321 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -41,11 +41,6 @@ cdef class Candidate:
         """RETURNS (vector[float]): Entity vector."""
         raise NotImplementedError
 
-    @property
-    def prior_prob(self) -> float:
-        """RETURNS (List[float]): Entity vector."""
-        raise NotImplementedError
-
 
 cdef class InMemoryCandidate(Candidate):
     """Candidate for InMemoryLookupKB."""
@@ -89,6 +84,7 @@ cdef class InMemoryCandidate(Candidate):
 
     @property
     def prior_prob(self) -> float:
+        """RETURNS (float): Prior probability that this mention resolves to this entity."""
         return self._prior_prob
 
     @property
@@ -101,4 +97,5 @@ cdef class InMemoryCandidate(Candidate):
 
     @property
     def entity_freq(self) -> float:
+        """RETURNS (float): Entity frequency in KB corpus."""
         return self._entity_freq
diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx
index 7da312863..d10123e37 100644
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@@ -106,3 +106,10 @@ cdef class KnowledgeBase:
         raise NotImplementedError(
             Errors.E1045.format(parent="KnowledgeBase", method="from_disk", name=self.__name__)
         )
+
+    @property
+    def supports_prior_probs(self) -> bool:
+        """RETURNS (bool): Whether this KB type supports looking up prior probabilities for entity mentions."""
+        raise NotImplementedError(
+            Errors.E1045.format(parent="KnowledgeBase", method="supports_prior_probs", name=self.__name__)
+        )
diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx
index 4ceb87888..e3b9dfcb3 100644
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@@ -283,6 +283,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 
         return 0.0
 
+    def supports_prior_probs(self) -> bool:
+        return True
+
     def to_bytes(self, **kwargs):
         """Serialize the current state to a binary string.
         """
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index 39cff218a..caced9cfd 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -1,5 +1,5 @@
-from typing import Optional, Iterable, Callable, Dict, Sequence, Union, List, Any
-from typing import cast
+import warnings
+from typing import Optional, Iterable, Callable, Dict, Sequence, Union, List, Any, cast
 from numpy import dtype
 from thinc.types import Floats1d, Floats2d, Ints1d, Ragged
 from pathlib import Path
@@ -10,14 +10,13 @@ from thinc.api import CosineDistance, Model, Optimizer, Config
 from thinc.api import set_dropout_rate
 
 from ..kb import KnowledgeBase, Candidate
-from ..ml import empty_kb
 from ..tokens import Doc, Span
 from .pipe import deserialize_config
 from .trainable_pipe import TrainablePipe
 from ..language import Language
 from ..vocab import Vocab
 from ..training import Example, validate_examples, validate_get_examples
-from ..errors import Errors
+from ..errors import Errors, Warnings
 from ..util import SimpleFrozenList, registry
 from .. import util
 from ..scorer import Scorer
@@ -240,6 +239,8 @@ class EntityLinker(TrainablePipe):
 
         if candidates_batch_size < 1:
             raise ValueError(Errors.E1044)
+        if self.incl_prior and not self.kb.supports_prior_probs:
+            warnings.warn(Warnings.W401)
 
     def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]):
         """Define the KB of this pipe by providing a function that will
@@ -532,8 +533,9 @@ class EntityLinker(TrainablePipe):
                         else:
                             random.shuffle(candidates)
                             # set all prior probabilities to 0 if incl_prior=False
-                            prior_probs = xp.asarray([c.prior_prob for c in candidates])
-                            if not self.incl_prior:
+                            if self.incl_prior and self.kb.supports_prior_probs:
+                                prior_probs = xp.asarray([c.prior_prob for c in candidates])  # type: ignore
+                            else:
                                 prior_probs = xp.asarray([0.0 for _ in candidates])
                             scores = prior_probs
                             # add in similarity from the context

From be858981e6984a31b04a4b9068603a6cb8a07412 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Mon, 13 Mar 2023 17:01:20 +0100
Subject: [PATCH 25/39] Update docstrings related to prior_prob.

---
 spacy/kb/candidate.pyx | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index 9e4e9f321..6707d32ed 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -60,10 +60,8 @@ cdef class InMemoryCandidate(Candidate):
         entity_freq (int): Entity frequency in KB corpus.
         entity_vector (List[float]): Entity embedding.
         mention_hash (int): Mention hash.
-        prior_prob (float): Prior probability of entity for this mention - i.e. the probability that, independent of
-            the context, this mention resolves to this entity in the corpus used to build the knowledge base. In
-            cases in which this isn't always possible (e.g.: the corpus to analyse contains mentions that the KB corpus
-            doesn't) it might be better to eschew this information and always supply the same value.
+        prior_prob (float): Prior probability of entity for this mention. I. e. the probability that, independent of
+            the context, this mention - which matches one of this entity's aliases - resolves to one this entity.
         """
         super().__init__()
 
@@ -84,7 +82,8 @@ cdef class InMemoryCandidate(Candidate):
 
     @property
     def prior_prob(self) -> float:
-        """RETURNS (float): Prior probability that this mention resolves to this entity."""
+        """RETURNS (float): Prior probability that this mention, which matches one of this entity's aliases, resolves to
+        this entity."""
         return self._prior_prob
 
     @property

From 28dbed64cbd6f21041691890d4b5b9a348d6ebfa Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Tue, 14 Mar 2023 13:33:05 +0100
Subject: [PATCH 26/39] Update alias/mention usage in doc(strings).

---
 spacy/kb/candidate.pyx    | 12 ++++++------
 spacy/kb/kb_in_memory.pyx |  2 +-
 website/docs/api/kb.mdx   | 26 +++++++++++++-------------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index 6707d32ed..94445f27d 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -49,7 +49,7 @@ cdef class InMemoryCandidate(Candidate):
         self,
         kb: InMemoryLookupKB,
         entity_hash: int,
-        mention_hash: int,
+        alias_hash: int,
         entity_vector: vector[float],
         prior_prob: float,
         entity_freq: float
@@ -59,9 +59,9 @@ cdef class InMemoryCandidate(Candidate):
         entity_id (int): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__().
         entity_freq (int): Entity frequency in KB corpus.
         entity_vector (List[float]): Entity embedding.
-        mention_hash (int): Mention hash.
-        prior_prob (float): Prior probability of entity for this mention. I. e. the probability that, independent of
-            the context, this mention - which matches one of this entity's aliases - resolves to one this entity.
+        alias_hash (int): Alias hash.
+        prior_prob (float): Prior probability of entity for this alias. I. e. the probability that, independent of
+            the context, this alias - which matches one of this entity's aliases - resolves to one this entity.
         """
         super().__init__()
 
@@ -69,7 +69,7 @@ cdef class InMemoryCandidate(Candidate):
         self._entity_vector = entity_vector
         self._prior_prob = prior_prob
         self._kb = kb
-        self._mention = mention_hash
+        self._mention = alias_hash
         self._entity_freq = entity_freq
 
     @property
@@ -82,7 +82,7 @@ cdef class InMemoryCandidate(Candidate):
 
     @property
     def prior_prob(self) -> float:
-        """RETURNS (float): Prior probability that this mention, which matches one of this entity's aliases, resolves to
+        """RETURNS (float): Prior probability that this alias, which matches one of this entity's synonyms, resolves to
         this entity."""
         return self._prior_prob
 
diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx
index e3b9dfcb3..c9ced8309 100644
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@@ -245,7 +245,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
             InMemoryCandidate(
                 kb=self,
                 entity_hash=self._entries[entry_index].entity_hash,
-                mention_hash=alias_hash,
+                alias_hash=alias_hash,
                 entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
                 prior_prob=prior_prob,
                 entity_freq=self._entries[entry_index].freq
diff --git a/website/docs/api/kb.mdx b/website/docs/api/kb.mdx
index e3b699140..9536a3fe3 100644
--- a/website/docs/api/kb.mdx
+++ b/website/docs/api/kb.mdx
@@ -175,11 +175,11 @@ Restore the state of the knowledge base from a given directory. Note that the
 
 ## InMemoryCandidate {id="candidate",tag="class"}
 
-An `InMemoryCandidate` object refers to a textual mention (alias) that may or may
-not be resolved to a specific entity from a `KnowledgeBase`. This will be used
-as input for the entity linking algorithm which will disambiguate the various
-candidates to the correct one. Each candidate `(alias, entity)` pair is assigned
-to a certain prior probability.
+An `InMemoryCandidate` object refers to a textual mention (alias) that may or
+may not be resolved to a specific entity from a `KnowledgeBase`. This will be
+used as input for the entity linking algorithm which will disambiguate the
+various candidates to the correct one. Each candidate `(alias, entity)` pair is
+assigned to a certain prior probability.
 
 ### InMemoryCandidate.\_\_init\_\_ {id="candidate-init",tag="method"}
 
@@ -190,19 +190,19 @@ of the [`entity_linker`](/api/entitylinker) pipe.
 > #### Example```python
 >
 > from spacy.kb import InMemoryCandidate candidate = InMemoryCandidate(kb,
-> entity_hash, entity_freq, entity_vector, mention_hash, prior_prob)
+> entity_hash, entity_freq, entity_vector, alias_hash, prior_prob)
 >
 > ```
 >
 > ```
 
-| Name           | Description                                                               |
-| -------------- | ------------------------------------------------------------------------- |
-| `kb`           | The knowledge base that defined this candidate. ~~KnowledgeBase~~         |
-| `entity_hash`  | The hash of the entity's KB ID. ~~int~~                                   |
-| `entity_freq`  | The entity frequency as recorded in the KB. ~~float~~                     |
-| `mention_hash` | The hash of the textual mention. ~~int~~                                  |
-| `prior_prob`   | The prior probability of the `alias` referring to the `entity`. ~~float~~ |
+| Name          | Description                                                               |
+| ------------- | ------------------------------------------------------------------------- |
+| `kb`          | The knowledge base that defined this candidate. ~~KnowledgeBase~~         |
+| `entity_hash` | The hash of the entity's KB ID. ~~int~~                                   |
+| `entity_freq` | The entity frequency as recorded in the KB. ~~float~~                     |
+| `alias_hash`  | The hash of the entity alias. ~~int~~                                     |
+| `prior_prob`  | The prior probability of the `alias` referring to the `entity`. ~~float~~ |
 
 ## InMemoryCandidate attributes {id="candidate-attributes"}
 

From b7b4282821fb2b14035773de03f1891363b733ec Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Wed, 15 Mar 2023 09:20:07 +0100
Subject: [PATCH 27/39] Update spacy/ml/models/entity_linker.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/ml/models/entity_linker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
index 50ad4daba..5da0544a9 100644
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@@ -124,7 +124,7 @@ def get_candidates(kb: KnowledgeBase, mention: Span) -> Iterable[Candidate]:
     Return candidate entities for a given mention and fetching appropriate entries from the index.
     kb (KnowledgeBase): Knowledge base to query.
     mention (Span): Entity mention for which to identify candidates.
-    RETURNS (Iterable[InMemoryCandidate]): Identified candidates.
+    RETURNS (Iterable[Candidate]): Identified candidates.
     """
     return kb.get_candidates(mention)
 

From 961795d9f17d676f0ff46491c7b789106dbfaf0e Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Wed, 15 Mar 2023 09:20:25 +0100
Subject: [PATCH 28/39] Update spacy/ml/models/entity_linker.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/ml/models/entity_linker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
index 5da0544a9..ea8882430 100644
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@@ -136,6 +136,6 @@ def get_candidates_batch(
     Return candidate entities for the given mentions and fetching appropriate entries from the index.
     kb (KnowledgeBase): Knowledge base to query.
     mention (Iterable[Span]): Entity mentions for which to identify candidates.
-    RETURNS (Iterable[Iterable[InMemoryCandidate]]): Identified candidates.
+    RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
     """
     return kb.get_candidates_batch(mentions)

From 3cfc1c6accb4f75b141b075aa53d8d9ae12166f0 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Wed, 15 Mar 2023 09:23:31 +0100
Subject: [PATCH 29/39] Mention -> alias renaming. Drop Candidate.mentions().
 Drop InMemoryLookupKB.get_alias_candidates() from docs.

---
 spacy/kb/candidate.pyx                | 12 ++++--------
 spacy/kb/kb.pyx                       |  5 +++--
 website/docs/api/inmemorylookupkb.mdx | 16 ----------------
 3 files changed, 7 insertions(+), 26 deletions(-)

diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index 94445f27d..9f141b20a 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -31,11 +31,6 @@ cdef class Candidate:
         """RETURNS (str): String representation of entity ID."""
         raise NotImplementedError
 
-    @property
-    def mention(self) -> str:
-        """RETURNS (str): Mention."""
-        raise NotImplementedError
-
     @property
     def entity_vector(self) -> vector[float]:
         """RETURNS (vector[float]): Entity vector."""
@@ -69,7 +64,7 @@ cdef class InMemoryCandidate(Candidate):
         self._entity_vector = entity_vector
         self._prior_prob = prior_prob
         self._kb = kb
-        self._mention = alias_hash
+        self._alias = alias_hash
         self._entity_freq = entity_freq
 
     @property
@@ -87,8 +82,9 @@ cdef class InMemoryCandidate(Candidate):
         return self._prior_prob
 
     @property
-    def mention(self) -> str:
-        return self._kb.vocab.strings[self._mention]
+    def alias(self) -> str:
+        """RETURNS (str): Alias."""
+        return self._kb.vocab.strings[self._alias]
 
     @property
     def entity_id_(self) -> str:
diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx
index d10123e37..e4165301e 100644
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@@ -42,8 +42,9 @@ cdef class KnowledgeBase:
 
     def get_candidates(self, mention: Span) -> Iterable[Candidate]:
         """
-        Return candidate entities for specified text. Each candidate defines the entity, the original mention,
-        and the prior probability of that mention resolving to that entity.
+        Return candidate entities for specified text. Each candidate defines at least the entity and the entity's
+        embedding vector. Depending on the KB implementation, further properties - such as the prior probability of the
+        specified mention text resolving to that entity - might be included.
         If the no candidate is found for a given text, an empty list is returned.
         mention (Span): Mention for which to get candidates.
         RETURNS (Iterable[Candidate]): Identified candidates.
diff --git a/website/docs/api/inmemorylookupkb.mdx b/website/docs/api/inmemorylookupkb.mdx
index e88e4a500..9063939a3 100644
--- a/website/docs/api/inmemorylookupkb.mdx
+++ b/website/docs/api/inmemorylookupkb.mdx
@@ -199,22 +199,6 @@ to you.
 | `mentions`  | The textual mention or alias. ~~Iterable[Span]~~                                                             |
 | **RETURNS** | An iterable of iterable with relevant `InMemoryCandidate` objects. ~~Iterable[Iterable[InMemoryCandidate]]~~ |
 
-## InMemoryLookupKB.get_alias_candidates {id="get_alias_candidates",tag="method"}
-
-Given a certain textual mention as input, retrieve a list of candidate entities
-of type [`InMemoryCandidate`](/api/kb#candidate).
-
-> #### Example
->
-> ```python
-> candidates = kb.get_alias_candidates("Douglas")
-> ```
-
-| Name        | Description                                                                   |
-| ----------- | ----------------------------------------------------------------------------- |
-| `alias`     | The textual mention or alias. ~~str~~                                         |
-| **RETURNS** | The list of relevant `InMemoryCandidate` objects. ~~List[InMemoryCandidate]~~ |
-
 ## InMemoryLookupKB.get_vector {id="get_vector",tag="method"}
 
 Given a certain entity ID, retrieve its pretrained entity vector.

From 80fb0666b98ff0bd96c36daf5f7d0e5e00ecdb25 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Wed, 15 Mar 2023 09:25:41 +0100
Subject: [PATCH 30/39] Update docstrings.

---
 spacy/kb/kb.pyx | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx
index e4165301e..30694dc2a 100644
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@@ -32,9 +32,10 @@ cdef class KnowledgeBase:
 
     def get_candidates_batch(self, mentions: Iterable[Span]) -> Iterable[Iterable[Candidate]]:
         """
-        Return candidate entities for specified texts. Each candidate defines the entity, the original mention,
-        and the prior probability of this mention resolving to that entity.
-        If no candidate is found for a given text, an empty list is returned.
+        Return candidate entities for specified mention texts. Each candidate defines at least the entity and the
+        entity's embedding vector. Depending on the KB implementation, further properties - such as the prior
+        probability of the specified mention text resolving to that entity - might be included.
+        If the no candidates are found for a given mention text, an empty list is returned.
         mentions (Iterable[Span]): Mentions for which to get candidates.
         RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
         """
@@ -42,10 +43,10 @@ cdef class KnowledgeBase:
 
     def get_candidates(self, mention: Span) -> Iterable[Candidate]:
         """
-        Return candidate entities for specified text. Each candidate defines at least the entity and the entity's
-        embedding vector. Depending on the KB implementation, further properties - such as the prior probability of the
-        specified mention text resolving to that entity - might be included.
-        If the no candidate is found for a given text, an empty list is returned.
+        Return candidate entities for  specified mention text. Each candidate defines at least the entity and the
+        entity's embedding vector. Depending on the KB implementation, further properties - such as the prior
+        probability of the specified mention text resolving to that entity - might be included.
+        If the no candidate is found for the given mention text, an empty list is returned.
         mention (Span): Mention for which to get candidates.
         RETURNS (Iterable[Candidate]): Identified candidates.
         """

From 830939ee648e32ca5c5dd05e342b8c22ea9ffc96 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Wed, 15 Mar 2023 10:51:34 +0100
Subject: [PATCH 31/39] Fix InMemoryCandidate attribute names.

---
 spacy/kb/candidate.pxd                     | 2 +-
 spacy/kb/candidate.pyx                     | 4 ++--
 spacy/tests/pipeline/test_entity_linker.py | 6 +++---
 spacy/tests/serialize/test_serialize_kb.py | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/spacy/kb/candidate.pxd b/spacy/kb/candidate.pxd
index 0e753bf99..f21f423e4 100644
--- a/spacy/kb/candidate.pxd
+++ b/spacy/kb/candidate.pxd
@@ -8,8 +8,8 @@ cdef class Candidate:
 
 cdef class InMemoryCandidate(Candidate):
     cdef readonly hash_t _entity_hash
+    cdef readonly hash_t _alias_hash
     cpdef vector[float] _entity_vector
     cdef float _prior_prob
     cdef readonly InMemoryLookupKB _kb
     cdef float _entity_freq
-    cdef hash_t _mention
diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index 9f141b20a..3d8da4b95 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -64,7 +64,7 @@ cdef class InMemoryCandidate(Candidate):
         self._entity_vector = entity_vector
         self._prior_prob = prior_prob
         self._kb = kb
-        self._alias = alias_hash
+        self._alias_hash = alias_hash
         self._entity_freq = entity_freq
 
     @property
@@ -84,7 +84,7 @@ cdef class InMemoryCandidate(Candidate):
     @property
     def alias(self) -> str:
         """RETURNS (str): Alias."""
-        return self._kb.vocab.strings[self._alias]
+        return self._kb.vocab.strings[self._alias_hash]
 
     @property
     def entity_id_(self) -> str:
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index cd1dc90e4..e29e3920b 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -473,7 +473,7 @@ def test_candidate_generation(nlp):
 
     # test the content of the candidates
     assert adam_ent_cands[0].entity_id_ == "Q2"
-    assert adam_ent_cands[0].mention == "adam"
+    assert adam_ent_cands[0].alias == "adam"
     assert_almost_equal(adam_ent_cands[0].entity_freq, 12)
     assert_almost_equal(adam_ent_cands[0].prior_prob, 0.9)
 
@@ -566,7 +566,7 @@ def test_vocab_serialization(nlp):
     assert len(candidates) == 1
     assert candidates[0].entity_id == q2_hash
     assert candidates[0].entity_id_ == "Q2"
-    assert candidates[0].mention == "adam"
+    assert candidates[0].alias == "adam"
 
     with make_tempdir() as d:
         mykb.to_disk(d / "kb")
@@ -577,7 +577,7 @@ def test_vocab_serialization(nlp):
         assert len(candidates) == 1
         assert candidates[0].entity_id == q2_hash
         assert candidates[0].entity_id_ == "Q2"
-        assert candidates[0].mention == "adam"
+        assert candidates[0].alias == "adam"
 
         assert kb_new_vocab.get_vector("Q2") == [2]
         assert_almost_equal(kb_new_vocab.get_prior_prob("Q2", "douglas"), 0.4)
diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py
index 336fd16fe..eb4254d31 100644
--- a/spacy/tests/serialize/test_serialize_kb.py
+++ b/spacy/tests/serialize/test_serialize_kb.py
@@ -74,13 +74,13 @@ def _check_kb(kb):
     assert candidates[0].entity_id_ == "Q007"
     assert 6.999 < candidates[0].entity_freq < 7.01
     assert candidates[0].entity_vector == [0, 0, 7]
-    assert candidates[0].mention == "double07"
+    assert candidates[0].alias == "double07"
     assert 0.899 < candidates[0].prior_prob < 0.901
 
     assert candidates[1].entity_id_ == "Q17"
     assert 1.99 < candidates[1].entity_freq < 2.01
     assert candidates[1].entity_vector == [7, 1, 0]
-    assert candidates[1].mention == "double07"
+    assert candidates[1].alias == "double07"
     assert 0.099 < candidates[1].prior_prob < 0.101
 
 

From 978fbdcee1b45f5a88c21da5a219125f711e2b9f Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 17 Mar 2023 08:58:17 +0100
Subject: [PATCH 32/39] Update spacy/kb/kb.pyx

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/kb/kb.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx
index 30694dc2a..6260dfb72 100644
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@@ -32,10 +32,10 @@ cdef class KnowledgeBase:
 
     def get_candidates_batch(self, mentions: Iterable[Span]) -> Iterable[Iterable[Candidate]]:
         """
-        Return candidate entities for specified mention texts. Each candidate defines at least the entity and the
+        Return candidate entities for a specified Span mention. Each candidate defines at least the entity and the
         entity's embedding vector. Depending on the KB implementation, further properties - such as the prior
         probability of the specified mention text resolving to that entity - might be included.
-        If the no candidates are found for a given mention text, an empty list is returned.
+        If no candidates are found for a given mention, an empty list is returned.
         mentions (Iterable[Span]): Mentions for which to get candidates.
         RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
         """

From 307bbab285bb2aa7ab5a696ed3638e6559ba3633 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 17 Mar 2023 08:58:28 +0100
Subject: [PATCH 33/39] Update spacy/ml/models/entity_linker.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/ml/models/entity_linker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
index ea8882430..7fe0b4741 100644
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@@ -135,7 +135,7 @@ def get_candidates_batch(
     """
     Return candidate entities for the given mentions and fetching appropriate entries from the index.
     kb (KnowledgeBase): Knowledge base to query.
-    mention (Iterable[Span]): Entity mentions for which to identify candidates.
+    mentions (Iterable[Span]): Entity mentions for which to identify candidates.
     RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
     """
     return kb.get_candidates_batch(mentions)

From 2377b67f81d5707f46ba6ca085591dcc9256f334 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 17 Mar 2023 08:59:52 +0100
Subject: [PATCH 34/39] Update W401 test.

---
 spacy/errors.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 0f8091e3a..e7f59c091 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -212,7 +212,8 @@ class Warnings(metaclass=ErrorsWithCodes):
     # v4 warning strings
     W400 = ("`use_upper=False` is ignored, the upper layer is always enabled")
     W401 = ("`incl_prior is True`, but the selected knowledge base type {kb_type} doesn't support prior probability "
-            "lookups.")
+            "lookups (`.supports_prior_probs is False`). If your KB does support prior probability lookups, make sure "
+            "to return True in `.supports_prior_probs`.")
 
 
 class Errors(metaclass=ErrorsWithCodes):

From 4d8dce5ba2a74faf699b9fd78a3b773a871682e7 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 17 Mar 2023 11:28:18 +0100
Subject: [PATCH 35/39] Update spacy/errors.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/errors.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index e7f59c091..e1f7e7400 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -212,8 +212,8 @@ class Warnings(metaclass=ErrorsWithCodes):
     # v4 warning strings
     W400 = ("`use_upper=False` is ignored, the upper layer is always enabled")
     W401 = ("`incl_prior is True`, but the selected knowledge base type {kb_type} doesn't support prior probability "
-            "lookups (`.supports_prior_probs is False`). If your KB does support prior probability lookups, make sure "
-            "to return True in `.supports_prior_probs`.")
+            "lookups so this setting will be ignored. If your KB does support prior probability lookups, make sure "
+            "to return `True` in `.supports_prior_probs`.")
 
 
 class Errors(metaclass=ErrorsWithCodes):

From faede7155ccb553ee04c409d873eaf193f9ad86e Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 17 Mar 2023 11:32:41 +0100
Subject: [PATCH 36/39] Update spacy/kb/kb.pyx

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/kb/kb.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx
index 6260dfb72..1cb08f488 100644
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@@ -43,10 +43,10 @@ cdef class KnowledgeBase:
 
     def get_candidates(self, mention: Span) -> Iterable[Candidate]:
         """
-        Return candidate entities for  specified mention text. Each candidate defines at least the entity and the
+        Return candidate entities for a specific mention. Each candidate defines at least the entity and the
         entity's embedding vector. Depending on the KB implementation, further properties - such as the prior
         probability of the specified mention text resolving to that entity - might be included.
-        If the no candidate is found for the given mention text, an empty list is returned.
+        If no candidate is found for the given mention, an empty list is returned.
         mention (Span): Mention for which to get candidates.
         RETURNS (Iterable[Candidate]): Identified candidates.
         """

From 9e71adc0743d123fdc19688865dd468dbdf02776 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Sun, 19 Mar 2023 23:27:20 +0100
Subject: [PATCH 37/39] Use Candidate output type for toy generators in the
 test suite to mimick best practices

---
 spacy/tests/pipeline/test_entity_linker.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index e29e3920b..5d2f0c430 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -510,13 +510,13 @@ def test_el_pipe_configuration(nlp):
 
     @registry.misc("spacy.LowercaseCandidateGenerator.v1")
     def create_candidates() -> Callable[
-        [InMemoryLookupKB, "Span"], Iterable[InMemoryCandidate]
+        [InMemoryLookupKB, "Span"], Iterable[Candidate]
     ]:
         return get_lowercased_candidates
 
     @registry.misc("spacy.LowercaseCandidateBatchGenerator.v1")
     def create_candidates_batch() -> Callable[
-        [InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[InMemoryCandidate]]
+        [InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]]
     ]:
         return get_lowercased_candidates_batch
 

From 0365d3d2e2ff7e22b928b7bf1a54485a7565a5a6 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Sun, 19 Mar 2023 23:31:02 +0100
Subject: [PATCH 38/39] fix docs

---
 website/docs/api/inmemorylookupkb.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/api/inmemorylookupkb.mdx b/website/docs/api/inmemorylookupkb.mdx
index 9063939a3..6fa6cb235 100644
--- a/website/docs/api/inmemorylookupkb.mdx
+++ b/website/docs/api/inmemorylookupkb.mdx
@@ -196,7 +196,7 @@ to you.
 
 | Name        | Description                                                                                                  |
 | ----------- | ------------------------------------------------------------------------------------------------------------ |
-| `mentions`  | The textual mention or alias. ~~Iterable[Span]~~                                                             |
+| `mentions`  | The textual mentions. ~~Iterable[Span]~~                                                                     |
 | **RETURNS** | An iterable of iterable with relevant `InMemoryCandidate` objects. ~~Iterable[Iterable[InMemoryCandidate]]~~ |
 
 ## InMemoryLookupKB.get_vector {id="get_vector",tag="method"}

From b83407388af7bf5b6ae7065b5416ba707d283641 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Sun, 19 Mar 2023 23:34:00 +0100
Subject: [PATCH 39/39] fix import

---
 spacy/tests/pipeline/test_entity_linker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 5d2f0c430..65406a36e 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -7,7 +7,7 @@ from thinc.types import Ragged
 from spacy import registry, util
 from spacy.attrs import ENT_KB_ID
 from spacy.compat import pickle
-from spacy.kb import InMemoryCandidate, InMemoryLookupKB, KnowledgeBase
+from spacy.kb import Candidate, InMemoryLookupKB, KnowledgeBase
 from spacy.lang.en import English
 from spacy.ml import load_kb
 from spacy.ml.models.entity_linker import build_span_maker, get_candidates