From 41b3a0d932aafb4db9db02ae2e03b560305e0d53 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Tue, 7 Mar 2023 13:10:45 +0100
Subject: [PATCH 1/4] Drop support for EntityLinker_v1. (#12377)

---
 spacy/errors.py                            |  1 +
 spacy/pipeline/entity_linker.py            | 23 ++--------------------
 spacy/tests/pipeline/test_entity_linker.py |  7 +------
 3 files changed, 4 insertions(+), 27 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 5049100d8..390de126e 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -960,6 +960,7 @@ class Errors(metaclass=ErrorsWithCodes):
     E4003 = ("Training examples for distillation must have the exact same tokens in the "
              "reference and predicted docs.")
     E4004 = ("Backprop is not supported when is_train is not set.")
+    E4005 = ("EntityLinker_v1 is not supported in spaCy v4. Update your configuration.")
 
 RENAMED_LANGUAGE_CODES = {"xx": "mul", "is": "isl"}
 
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index cd13a4b21..6a187b6c3 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -117,28 +117,9 @@ def make_entity_linker(
         prediction is discarded. If None, predictions are not filtered by any threshold.
     save_activations (bool): save model activations in Doc when annotating.
     """
-
     if not model.attrs.get("include_span_maker", False):
-        try:
-            from spacy_legacy.components.entity_linker import EntityLinker_v1
-        except:
-            raise ImportError(
-                "In order to use v1 of the EntityLinker, you must use spacy-legacy>=3.0.12."
-            )
-        # The only difference in arguments here is that use_gold_ents and threshold aren't available.
-        return EntityLinker_v1(
-            nlp.vocab,
-            model,
-            name,
-            labels_discard=labels_discard,
-            n_sents=n_sents,
-            incl_prior=incl_prior,
-            incl_context=incl_context,
-            entity_vector_length=entity_vector_length,
-            get_candidates=get_candidates,
-            overwrite=overwrite,
-            scorer=scorer,
-        )
+        raise ValueError(Errors.E4005)
+
     return EntityLinker(
         nlp.vocab,
         model,
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index ed84ce674..87cacfc9d 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -993,13 +993,11 @@ def test_scorer_links():
 @pytest.mark.parametrize(
     "name,config",
     [
-        ("entity_linker", {"@architectures": "spacy.EntityLinker.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL}),
         ("entity_linker", {"@architectures": "spacy.EntityLinker.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL}),
     ],
 )
 # fmt: on
 def test_legacy_architectures(name, config):
-    from spacy_legacy.components.entity_linker import EntityLinker_v1
 
     # Ensure that the legacy architectures still work
     vector_length = 3
@@ -1022,10 +1020,7 @@ def test_legacy_architectures(name, config):
         return mykb
 
     entity_linker = nlp.add_pipe(name, config={"model": config})
-    if config["@architectures"] == "spacy.EntityLinker.v1":
-        assert isinstance(entity_linker, EntityLinker_v1)
-    else:
-        assert isinstance(entity_linker, EntityLinker)
+    assert isinstance(entity_linker, EntityLinker)
     entity_linker.set_kb(create_kb)
     optimizer = nlp.initialize(get_examples=lambda: train_examples)
 

From 520279ff7c9af199928e2a727999162cb79c38a3 Mon Sep 17 00:00:00 2001
From: Madeesh Kannan <shadeMe@users.noreply.github.com>
Date: Thu, 9 Mar 2023 09:37:19 +0100
Subject: [PATCH 2/4] `Tok2Vec`: Add `distill` method (#12108)

* `Tok2Vec`: Add `distill` method

* `Tok2Vec`: Refactor `update`

* Add `Tok2Vec.distill` test

* Update `distill` signature to accept `Example`s instead of separate teacher and student docs

* Add docs

* Remove docstring

* Update test

* Remove `update` calls from test

* Update `Tok2Vec.distill` docstring
---
 spacy/pipeline/tok2vec.py            | 125 ++++++++++++++++++++-------
 spacy/tests/pipeline/test_tok2vec.py |  83 ++++++++++++++++++
 website/docs/api/tok2vec.mdx         |  37 ++++++++
 3 files changed, 213 insertions(+), 32 deletions(-)

diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py
index c742aaeaa..d9639f8d5 100644
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@@ -1,5 +1,6 @@
-from typing import Sequence, Iterable, Optional, Dict, Callable, List, Any
+from typing import Sequence, Iterable, Optional, Dict, Callable, List, Any, Tuple
 from thinc.api import Model, set_dropout_rate, Optimizer, Config
+from thinc.types import Floats2d
 from itertools import islice
 
 from .trainable_pipe import TrainablePipe
@@ -157,39 +158,9 @@ class Tok2Vec(TrainablePipe):
 
         DOCS: https://spacy.io/api/tok2vec#update
         """
-        if losses is None:
-            losses = {}
         validate_examples(examples, "Tok2Vec.update")
         docs = [eg.predicted for eg in examples]
-        set_dropout_rate(self.model, drop)
-        tokvecs, bp_tokvecs = self.model.begin_update(docs)
-        d_tokvecs = [self.model.ops.alloc2f(*t2v.shape) for t2v in tokvecs]
-        losses.setdefault(self.name, 0.0)
-
-        def accumulate_gradient(one_d_tokvecs):
-            """Accumulate tok2vec loss and gradient. This is passed as a callback
-            to all but the last listener. Only the last one does the backprop.
-            """
-            nonlocal d_tokvecs
-            for i in range(len(one_d_tokvecs)):
-                d_tokvecs[i] += one_d_tokvecs[i]
-                losses[self.name] += float((one_d_tokvecs[i] ** 2).sum())
-            return [self.model.ops.alloc2f(*t2v.shape) for t2v in tokvecs]
-
-        def backprop(one_d_tokvecs):
-            """Callback to actually do the backprop. Passed to last listener."""
-            accumulate_gradient(one_d_tokvecs)
-            d_docs = bp_tokvecs(d_tokvecs)
-            if sgd is not None:
-                self.finish_update(sgd)
-            return d_docs
-
-        batch_id = Tok2VecListener.get_batch_id(docs)
-        for listener in self.listeners[:-1]:
-            listener.receive(batch_id, tokvecs, accumulate_gradient)
-        if self.listeners:
-            self.listeners[-1].receive(batch_id, tokvecs, backprop)
-        return losses
+        return self._update_with_docs(docs, drop=drop, sgd=sgd, losses=losses)
 
     def get_loss(self, examples, scores) -> None:
         pass
@@ -219,6 +190,96 @@ class Tok2Vec(TrainablePipe):
     def add_label(self, label):
         raise NotImplementedError
 
+    def distill(
+        self,
+        teacher_pipe: Optional["TrainablePipe"],
+        examples: Iterable["Example"],
+        *,
+        drop: float = 0.0,
+        sgd: Optional[Optimizer] = None,
+        losses: Optional[Dict[str, float]] = None,
+    ) -> Dict[str, float]:
+        """Performs an update of the student pipe's model using the
+        student's distillation examples and sets the annotations
+        of the teacher's distillation examples using the teacher pipe.
+
+        teacher_pipe (Optional[TrainablePipe]): The teacher pipe to use
+            for prediction.
+        examples (Iterable[Example]): Distillation examples. The reference (teacher)
+            and predicted (student) docs must have the same number of tokens and the
+            same orthography.
+        drop (float): dropout rate.
+        sgd (Optional[Optimizer]): An optimizer. Will be created via
+            create_optimizer if not set.
+        losses (Optional[Dict[str, float]]): Optional record of loss during
+            distillation.
+        RETURNS: The updated losses dictionary.
+
+        DOCS: https://spacy.io/api/tok2vec#distill
+        """
+        # By default we require a teacher pipe, but there are downstream
+        # implementations that don't require a pipe.
+        if teacher_pipe is None:
+            raise ValueError(Errors.E4002.format(name=self.name))
+        teacher_docs = [eg.reference for eg in examples]
+        student_docs = [eg.predicted for eg in examples]
+        teacher_preds = teacher_pipe.predict(teacher_docs)
+        teacher_pipe.set_annotations(teacher_docs, teacher_preds)
+        return self._update_with_docs(student_docs, drop=drop, sgd=sgd, losses=losses)
+
+    def _update_with_docs(
+        self,
+        docs: Iterable[Doc],
+        *,
+        drop: float = 0.0,
+        sgd: Optional[Optimizer] = None,
+        losses: Optional[Dict[str, float]] = None,
+    ):
+        if losses is None:
+            losses = {}
+        losses.setdefault(self.name, 0.0)
+        set_dropout_rate(self.model, drop)
+
+        tokvecs, accumulate_gradient, backprop = self._create_backprops(
+            docs, losses, sgd=sgd
+        )
+        batch_id = Tok2VecListener.get_batch_id(docs)
+        for listener in self.listeners[:-1]:
+            listener.receive(batch_id, tokvecs, accumulate_gradient)
+        if self.listeners:
+            self.listeners[-1].receive(batch_id, tokvecs, backprop)
+        return losses
+
+    def _create_backprops(
+        self,
+        docs: Iterable[Doc],
+        losses: Dict[str, float],
+        *,
+        sgd: Optional[Optimizer] = None,
+    ) -> Tuple[Floats2d, Callable, Callable]:
+        tokvecs, bp_tokvecs = self.model.begin_update(docs)
+        d_tokvecs = [self.model.ops.alloc2f(*t2v.shape) for t2v in tokvecs]
+
+        def accumulate_gradient(one_d_tokvecs):
+            """Accumulate tok2vec loss and gradient. This is passed as a callback
+            to all but the last listener. Only the last one does the backprop.
+            """
+            nonlocal d_tokvecs
+            for i in range(len(one_d_tokvecs)):
+                d_tokvecs[i] += one_d_tokvecs[i]
+                losses[self.name] += float((one_d_tokvecs[i] ** 2).sum())
+            return [self.model.ops.alloc2f(*t2v.shape) for t2v in tokvecs]
+
+        def backprop(one_d_tokvecs):
+            """Callback to actually do the backprop. Passed to last listener."""
+            accumulate_gradient(one_d_tokvecs)
+            d_docs = bp_tokvecs(d_tokvecs)
+            if sgd is not None:
+                self.finish_update(sgd)
+            return d_docs
+
+        return tokvecs, accumulate_gradient, backprop
+
 
 class Tok2VecListener(Model):
     """A layer that gets fed its answers from an upstream connection,
diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py
index ee62b1ab4..6929b76fa 100644
--- a/spacy/tests/pipeline/test_tok2vec.py
+++ b/spacy/tests/pipeline/test_tok2vec.py
@@ -540,3 +540,86 @@ def test_tok2vec_listeners_textcat():
     assert cats1["imperative"] < 0.9
     assert [t.tag_ for t in docs[0]] == ["V", "J", "N"]
     assert [t.tag_ for t in docs[1]] == ["N", "V", "J", "N"]
+
+
+cfg_string_distillation = """
+    [nlp]
+    lang = "en"
+    pipeline = ["tok2vec","tagger"]
+
+    [components]
+
+    [components.tagger]
+    factory = "tagger"
+
+    [components.tagger.model]
+    @architectures = "spacy.Tagger.v2"
+    nO = null
+
+    [components.tagger.model.tok2vec]
+    @architectures = "spacy.Tok2VecListener.v1"
+    width = ${components.tok2vec.model.encode.width}
+
+    [components.tok2vec]
+    factory = "tok2vec"
+
+    [components.tok2vec.model]
+    @architectures = "spacy.Tok2Vec.v2"
+
+    [components.tok2vec.model.embed]
+    @architectures = "spacy.MultiHashEmbed.v2"
+    width = ${components.tok2vec.model.encode.width}
+    rows = [2000, 1000, 1000, 1000]
+    attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
+    include_static_vectors = false
+
+    [components.tok2vec.model.encode]
+    @architectures = "spacy.MaxoutWindowEncoder.v2"
+    width = 96
+    depth = 4
+    window_size = 1
+    maxout_pieces = 3
+    """
+
+
+def test_tok2vec_distillation_teacher_annotations():
+    orig_config = Config().from_str(cfg_string_distillation)
+    teacher_nlp = util.load_model_from_config(
+        orig_config, auto_fill=True, validate=True
+    )
+    student_nlp = util.load_model_from_config(
+        orig_config, auto_fill=True, validate=True
+    )
+
+    train_examples_teacher = []
+    train_examples_student = []
+    for t in TRAIN_DATA:
+        train_examples_teacher.append(
+            Example.from_dict(teacher_nlp.make_doc(t[0]), t[1])
+        )
+        train_examples_student.append(
+            Example.from_dict(student_nlp.make_doc(t[0]), t[1])
+        )
+
+    optimizer = teacher_nlp.initialize(lambda: train_examples_teacher)
+    student_nlp.initialize(lambda: train_examples_student)
+
+    # Since Language.distill creates a copy of the examples to use as
+    # its internal teacher/student docs, we'll need to monkey-patch the
+    # tok2vec pipe's distill method.
+    student_tok2vec = student_nlp.get_pipe("tok2vec")
+    student_tok2vec._old_distill = student_tok2vec.distill
+
+    def tok2vec_distill_wrapper(
+        self,
+        teacher_pipe,
+        examples,
+        **kwargs,
+    ):
+        assert all(not eg.reference.tensor.any() for eg in examples)
+        out = self._old_distill(teacher_pipe, examples, **kwargs)
+        assert all(eg.reference.tensor.any() for eg in examples)
+        return out
+
+    student_tok2vec.distill = tok2vec_distill_wrapper.__get__(student_tok2vec, Tok2Vec)
+    student_nlp.distill(teacher_nlp, train_examples_student, sgd=optimizer, losses={})
diff --git a/website/docs/api/tok2vec.mdx b/website/docs/api/tok2vec.mdx
index a1bb1265e..8b6d2380b 100644
--- a/website/docs/api/tok2vec.mdx
+++ b/website/docs/api/tok2vec.mdx
@@ -100,6 +100,43 @@ pipeline components are applied to the `Doc` in order. Both
 | `doc`       | The document to process. ~~Doc~~ |
 | **RETURNS** | The processed document. ~~Doc~~  |
 
+## Tok2Vec.distill {id="distill", tag="method,experimental", version="4"}
+
+Performs an update of the student pipe's model using the student's distillation 
+examples and sets the annotations of the teacher's distillation examples using 
+the teacher pipe. 
+
+Unlike other trainable pipes, the student pipe doesn't directly learn its 
+representations from the teacher. However, since downstream pipes that do 
+perform distillation expect the tok2vec annotations to be present on the 
+correct distillation examples, we need to ensure that they are set beforehand.
+
+The distillation is performed on ~~Example~~ objects. The `Example.reference`
+and `Example.predicted` ~~Doc~~s must have the same number of tokens and the
+same orthography. Even though the reference does not need have to have gold
+annotations, the teacher could adds its own annotations when necessary.
+
+This feature is experimental.
+
+> #### Example
+>
+> ```python
+> teacher_pipe = teacher.add_pipe("tok2vec")
+> student_pipe = student.add_pipe("tok2vec")
+> optimizer = nlp.resume_training()
+> losses = student.distill(teacher_pipe, examples, sgd=optimizer)
+> ```
+
+| Name           | Description                                                                                                                                 |
+| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
+| `teacher_pipe` | The teacher pipe to use for prediction. ~~Optional[TrainablePipe]~~                                                                                 |
+| `examples`     | Distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ |
+| _keyword-only_ |                                                                                                                                             |
+| `drop`         | Dropout rate. ~~float~~                                                                                                                     |
+| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                               |
+| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                |
+| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                       |
+
 ## Tok2Vec.pipe {id="pipe",tag="method"}
 
 Apply the pipe to a stream of documents. This usually happens under the hood

From 6ae7618418d1a2514d55bff041fb196957844de6 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Sun, 19 Mar 2023 23:41:20 +0100
Subject: [PATCH 3/4] Clean up Vocab constructor (#12290)

* Clean up Vocab constructor

* Change effective type of `strings` from `Iterable[str]` to `Optional[StringStore]`
  * Don't automatically add strings to vocab
* Change default values to `None`
* Remove `**deprecated_kwargs`

* Format
---
 spacy/strings.pyi                             |  2 +-
 spacy/tests/pipeline/test_pipe_methods.py     |  3 ++-
 .../serialize/test_serialize_vocab_strings.py | 27 +++++++++++--------
 spacy/tests/vocab_vectors/test_lexeme.py      |  2 +-
 spacy/vocab.pyi                               |  2 +-
 spacy/vocab.pyx                               | 18 +++++++------
 website/docs/api/vocab.mdx                    |  5 ++--
 7 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/spacy/strings.pyi b/spacy/strings.pyi
index d9509ff57..38dee7034 100644
--- a/spacy/strings.pyi
+++ b/spacy/strings.pyi
@@ -2,7 +2,7 @@ from typing import List, Optional, Iterable, Iterator, Union, Any, Tuple, overlo
 from pathlib import Path
 
 class StringStore:
-    def __init__(self, strings: Optional[Iterable[str]]) -> None: ...
+    def __init__(self, strings: Optional[Iterable[str]] = None) -> None: ...
     @overload
     def __getitem__(self, string_or_hash: str) -> int: ...
     @overload
diff --git a/spacy/tests/pipeline/test_pipe_methods.py b/spacy/tests/pipeline/test_pipe_methods.py
index 9b9786f04..39611a742 100644
--- a/spacy/tests/pipeline/test_pipe_methods.py
+++ b/spacy/tests/pipeline/test_pipe_methods.py
@@ -9,6 +9,7 @@ from spacy.lang.en import English
 from spacy.lang.en.syntax_iterators import noun_chunks
 from spacy.language import Language
 from spacy.pipeline import TrainablePipe
+from spacy.strings import StringStore
 from spacy.tokens import Doc
 from spacy.training import Example
 from spacy.util import SimpleFrozenList, get_arg_names, make_tempdir
@@ -131,7 +132,7 @@ def test_issue5458():
     # Test that the noun chuncker does not generate overlapping spans
     # fmt: off
     words = ["In", "an", "era", "where", "markets", "have", "brought", "prosperity", "and", "empowerment", "."]
-    vocab = Vocab(strings=words)
+    vocab = Vocab(strings=StringStore(words))
     deps = ["ROOT", "det", "pobj", "advmod", "nsubj", "aux", "relcl", "dobj", "cc", "conj", "punct"]
     pos = ["ADP", "DET", "NOUN", "ADV", "NOUN", "AUX", "VERB", "NOUN", "CCONJ", "NOUN", "PUNCT"]
     heads = [0, 2, 0, 9, 6, 6, 2, 6, 7, 7, 0]
diff --git a/spacy/tests/serialize/test_serialize_vocab_strings.py b/spacy/tests/serialize/test_serialize_vocab_strings.py
index fd80c3d8e..f6356ac9e 100644
--- a/spacy/tests/serialize/test_serialize_vocab_strings.py
+++ b/spacy/tests/serialize/test_serialize_vocab_strings.py
@@ -13,8 +13,11 @@ from spacy.vocab import Vocab
 
 from ..util import make_tempdir
 
-test_strings = [([], []), (["rats", "are", "cute"], ["i", "like", "rats"])]
-test_strings_attrs = [(["rats", "are", "cute"], "Hello")]
+test_strings = [
+    (StringStore(), StringStore()),
+    (StringStore(["rats", "are", "cute"]), StringStore(["i", "like", "rats"])),
+]
+test_strings_attrs = [(StringStore(["rats", "are", "cute"]), "Hello")]
 
 
 @pytest.mark.issue(599)
@@ -81,7 +84,7 @@ def test_serialize_vocab_roundtrip_bytes(strings1, strings2):
     vocab2 = Vocab(strings=strings2)
     vocab1_b = vocab1.to_bytes()
     vocab2_b = vocab2.to_bytes()
-    if strings1 == strings2:
+    if strings1.to_bytes() == strings2.to_bytes():
         assert vocab1_b == vocab2_b
     else:
         assert vocab1_b != vocab2_b
@@ -117,11 +120,12 @@ def test_serialize_vocab_roundtrip_disk(strings1, strings2):
 def test_serialize_vocab_lex_attrs_bytes(strings, lex_attr):
     vocab1 = Vocab(strings=strings)
     vocab2 = Vocab()
-    vocab1[strings[0]].norm_ = lex_attr
-    assert vocab1[strings[0]].norm_ == lex_attr
-    assert vocab2[strings[0]].norm_ != lex_attr
+    s = next(iter(vocab1.strings))
+    vocab1[s].norm_ = lex_attr
+    assert vocab1[s].norm_ == lex_attr
+    assert vocab2[s].norm_ != lex_attr
     vocab2 = vocab2.from_bytes(vocab1.to_bytes())
-    assert vocab2[strings[0]].norm_ == lex_attr
+    assert vocab2[s].norm_ == lex_attr
 
 
 @pytest.mark.parametrize("strings,lex_attr", test_strings_attrs)
@@ -136,14 +140,15 @@ def test_deserialize_vocab_seen_entries(strings, lex_attr):
 def test_serialize_vocab_lex_attrs_disk(strings, lex_attr):
     vocab1 = Vocab(strings=strings)
     vocab2 = Vocab()
-    vocab1[strings[0]].norm_ = lex_attr
-    assert vocab1[strings[0]].norm_ == lex_attr
-    assert vocab2[strings[0]].norm_ != lex_attr
+    s = next(iter(vocab1.strings))
+    vocab1[s].norm_ = lex_attr
+    assert vocab1[s].norm_ == lex_attr
+    assert vocab2[s].norm_ != lex_attr
     with make_tempdir() as d:
         file_path = d / "vocab"
         vocab1.to_disk(file_path)
         vocab2 = vocab2.from_disk(file_path)
-    assert vocab2[strings[0]].norm_ == lex_attr
+    assert vocab2[s].norm_ == lex_attr
 
 
 @pytest.mark.parametrize("strings1,strings2", test_strings)
diff --git a/spacy/tests/vocab_vectors/test_lexeme.py b/spacy/tests/vocab_vectors/test_lexeme.py
index d91f41db3..cd7f954ae 100644
--- a/spacy/tests/vocab_vectors/test_lexeme.py
+++ b/spacy/tests/vocab_vectors/test_lexeme.py
@@ -17,7 +17,7 @@ def test_issue361(en_vocab, text1, text2):
 
 @pytest.mark.issue(600)
 def test_issue600():
-    vocab = Vocab(tag_map={"NN": {"pos": "NOUN"}})
+    vocab = Vocab()
     doc = Doc(vocab, words=["hello"])
     doc[0].tag_ = "NN"
 
diff --git a/spacy/vocab.pyi b/spacy/vocab.pyi
index 871044fff..e4a88bfd8 100644
--- a/spacy/vocab.pyi
+++ b/spacy/vocab.pyi
@@ -26,7 +26,7 @@ class Vocab:
     def __init__(
         self,
         lex_attr_getters: Optional[Dict[str, Callable[[str], Any]]] = ...,
-        strings: Optional[Union[List[str], StringStore]] = ...,
+        strings: Optional[StringStore] = ...,
         lookups: Optional[Lookups] = ...,
         oov_prob: float = ...,
         writing_system: Dict[str, Any] = ...,
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index f3c3595ef..0d3c9c883 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -49,9 +49,8 @@ cdef class Vocab:
 
     DOCS: https://spacy.io/api/vocab
     """
-    def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None,
-                 oov_prob=-20., writing_system={}, get_noun_chunks=None,
-                 **deprecated_kwargs):
+    def __init__(self, lex_attr_getters=None, strings=None, lookups=None,
+            oov_prob=-20., writing_system=None, get_noun_chunks=None):
         """Create the vocabulary.
 
         lex_attr_getters (dict): A dictionary mapping attribute IDs to
@@ -69,16 +68,19 @@ cdef class Vocab:
         self.cfg = {'oov_prob': oov_prob}
         self.mem = Pool()
         self._by_orth = PreshMap()
-        self.strings = StringStore()
         self.length = 0
-        if strings:
-            for string in strings:
-                _ = self[string]
+        if strings is None:
+            self.strings = StringStore()
+        else:
+            self.strings = strings
         self.lex_attr_getters = lex_attr_getters
         self.morphology = Morphology(self.strings)
         self.vectors = Vectors(strings=self.strings)
         self.lookups = lookups
-        self.writing_system = writing_system
+        if writing_system is None:
+            self.writing_system = {}
+        else:
+            self.writing_system = writing_system
         self.get_noun_chunks = get_noun_chunks
 
     property vectors:
diff --git a/website/docs/api/vocab.mdx b/website/docs/api/vocab.mdx
index 3faf1f1a0..304040f9c 100644
--- a/website/docs/api/vocab.mdx
+++ b/website/docs/api/vocab.mdx
@@ -17,14 +17,15 @@ Create the vocabulary.
 > #### Example
 >
 > ```python
+> from spacy.strings import StringStore
 > from spacy.vocab import Vocab
-> vocab = Vocab(strings=["hello", "world"])
+> vocab = Vocab(strings=StringStore(["hello", "world"]))
 > ```
 
 | Name               | Description                                                                                                                                                             |
 | ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `lex_attr_getters` | A dictionary mapping attribute IDs to functions to compute them. Defaults to `None`. ~~Optional[Dict[str, Callable[[str], Any]]]~~                                      |
-| `strings`          | A [`StringStore`](/api/stringstore) that maps strings to hash values, and vice versa, or a list of strings. ~~Union[List[str], StringStore]~~                           |
+| `strings`          | A [`StringStore`](/api/stringstore) that maps strings to hash values. ~~Optional[StringStore]~~                                                                         |
 | `lookups`          | A [`Lookups`](/api/lookups) that stores the `lexeme_norm` and other large lookup tables. Defaults to `None`. ~~Optional[Lookups]~~                                      |
 | `oov_prob`         | The default OOV probability. Defaults to `-20.0`. ~~float~~                                                                                                             |
 | `writing_system`   | A dictionary describing the language's writing system. Typically provided by [`Language.Defaults`](/api/language#defaults). ~~Dict[str, Any]~~                          |

From 9340eb8ad2a7525096c902112c7cf1a21d145df2 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Mon, 20 Mar 2023 00:34:35 +0100
Subject: [PATCH 4/4] Introduce hierarchy for EL `Candidate` objects (#12341)

* Convert Candidate from Cython to Python class.

* Format.

* Fix .entity_ typo in _add_activations() usage.

* Update spacy/kb/candidate.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Update doc string of BaseCandidate.__init__().

* Update spacy/kb/candidate.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Rename Candidate to InMemoryCandidate, BaseCandidate to Candidate.

* Adjust Candidate to support and mandate numerical entity IDs.

* Format.

* Fix docstring and docs.

* Update website/docs/api/kb.mdx

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Rename alias -> mention.

* Refactor Candidate attribute names. Update docs and tests accordingly.

* Refacor Candidate attributes and their usage.

* Format.

* Fix mypy error.

* Update error code in line with v4 convention.

* Update spacy/kb/candidate.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Updated error code.

* Simplify interface for int/str representations.

* Update website/docs/api/kb.mdx

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Rename 'alias' to 'mention'.

* Port Candidate and InMemoryCandidate to Cython.

* Remove redundant entry in setup.py.

* Add abstract class check.

* Drop storing mention.

* Update spacy/kb/candidate.pxd

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Fix entity_id refactoring problems in docstrings.

* Drop unused InMemoryCandidate._entity_hash.

* Update docstrings.

* Move attributes out of Candidate.

* Partially fix alias/mention terminology usage. Convert Candidate to interface.

* Remove prior_prob from supported properties in Candidate. Introduce KnowledgeBase.supports_prior_probs().

* Update docstrings related to prior_prob.

* Update alias/mention usage in doc(strings).

* Update spacy/ml/models/entity_linker.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Update spacy/ml/models/entity_linker.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Mention -> alias renaming. Drop Candidate.mentions(). Drop InMemoryLookupKB.get_alias_candidates() from docs.

* Update docstrings.

* Fix InMemoryCandidate attribute names.

* Update spacy/kb/kb.pyx

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Update spacy/ml/models/entity_linker.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Update W401 test.

* Update spacy/errors.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Update spacy/kb/kb.pyx

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Use Candidate output type for toy generators in the test suite to mimick best practices

* fix docs

* fix import

---------

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/errors.py                            |   8 +-
 spacy/kb/__init__.py                       |   5 +-
 spacy/kb/candidate.pxd                     |  19 ++--
 spacy/kb/candidate.pyx                     | 120 ++++++++++++---------
 spacy/kb/kb.pyx                            |  21 ++--
 spacy/kb/kb_in_memory.pyx                  |  31 +++---
 spacy/ml/models/entity_linker.py           |  24 ++++-
 spacy/pipeline/entity_linker.py            |  22 ++--
 spacy/tests/pipeline/test_entity_linker.py |  49 +++++----
 spacy/tests/serialize/test_serialize_kb.py |  12 ++-
 website/docs/api/inmemorylookupkb.mdx      |  40 +++----
 website/docs/api/kb.mdx                    |  51 ++++-----
 12 files changed, 223 insertions(+), 179 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 390de126e..e1f7e7400 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -82,7 +82,7 @@ class Warnings(metaclass=ErrorsWithCodes):
             "ignoring the duplicate entry.")
     W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be "
             "incorrect. Modify PhraseMatcher._terminal_hash to fix.")
-    W024 = ("Entity '{entity}' - Alias '{alias}' combination already exists in "
+    W024 = ("Entity '{entity}' - alias '{alias}' combination already exists in "
             "the Knowledge Base.")
     W026 = ("Unable to set all sentence boundaries from dependency parses. If "
             "you are constructing a parse tree incrementally by setting "
@@ -209,7 +209,11 @@ class Warnings(metaclass=ErrorsWithCodes):
             "`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.")
     W124 = ("{host}:{port} is already in use, using the nearest available port {serve_port} as an alternative.")
 
+    # v4 warning strings
     W400 = ("`use_upper=False` is ignored, the upper layer is always enabled")
+    W401 = ("`incl_prior is True`, but the selected knowledge base type {kb_type} doesn't support prior probability "
+            "lookups so this setting will be ignored. If your KB does support prior probability lookups, make sure "
+            "to return `True` in `.supports_prior_probs`.")
 
 
 class Errors(metaclass=ErrorsWithCodes):
@@ -961,6 +965,8 @@ class Errors(metaclass=ErrorsWithCodes):
              "reference and predicted docs.")
     E4004 = ("Backprop is not supported when is_train is not set.")
     E4005 = ("EntityLinker_v1 is not supported in spaCy v4. Update your configuration.")
+    E4006 = ("Expected `entity_id` to be of type {exp_type}, but is of type {found_type}.")
+
 
 RENAMED_LANGUAGE_CODES = {"xx": "mul", "is": "isl"}
 
diff --git a/spacy/kb/__init__.py b/spacy/kb/__init__.py
index 1d70a9b34..ff0e209e3 100644
--- a/spacy/kb/__init__.py
+++ b/spacy/kb/__init__.py
@@ -1,3 +1,6 @@
 from .kb import KnowledgeBase
 from .kb_in_memory import InMemoryLookupKB
-from .candidate import Candidate, get_candidates, get_candidates_batch
+from .candidate import Candidate, InMemoryCandidate
+
+
+__all__ = ["KnowledgeBase", "InMemoryLookupKB", "Candidate", "InMemoryCandidate"]
diff --git a/spacy/kb/candidate.pxd b/spacy/kb/candidate.pxd
index 942ce9dd0..f21f423e4 100644
--- a/spacy/kb/candidate.pxd
+++ b/spacy/kb/candidate.pxd
@@ -1,12 +1,15 @@
-from .kb cimport KnowledgeBase
 from libcpp.vector cimport vector
+from .kb_in_memory cimport InMemoryLookupKB
 from ..typedefs cimport hash_t
 
-# Object used by the Entity Linker that summarizes one entity-alias candidate combination.
 cdef class Candidate:
-    cdef readonly KnowledgeBase kb
-    cdef hash_t entity_hash
-    cdef float entity_freq
-    cdef vector[float] entity_vector
-    cdef hash_t alias_hash
-    cdef float prior_prob
+    pass
+
+
+cdef class InMemoryCandidate(Candidate):
+    cdef readonly hash_t _entity_hash
+    cdef readonly hash_t _alias_hash
+    cpdef vector[float] _entity_vector
+    cdef float _prior_prob
+    cdef readonly InMemoryLookupKB _kb
+    cdef float _entity_freq
diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index c89efeb03..3d8da4b95 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -1,74 +1,96 @@
 # cython: infer_types=True, profile=True
 
-from typing import Iterable
-from .kb cimport KnowledgeBase
-from ..tokens import Span
+from .kb_in_memory cimport InMemoryLookupKB
+from ..errors import Errors
 
 cdef class Candidate:
-    """A `Candidate` object refers to a textual mention (`alias`) that may or may not be resolved
-    to a specific `entity` from a Knowledge Base. This will be used as input for the entity linking
+    """A `Candidate` object refers to a textual mention that may or may not be resolved
+    to a specific entity from a Knowledge Base. This will be used as input for the entity linking
     algorithm which will disambiguate the various candidates to the correct one.
-    Each candidate (alias, entity) pair is assigned a certain prior probability.
+    Each candidate, which represents a possible link between one textual mention and one entity in the knowledge base,
+    is assigned a certain prior probability.
 
     DOCS: https://spacy.io/api/kb/#candidate-init
     """
 
-    def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob):
-        self.kb = kb
-        self.entity_hash = entity_hash
-        self.entity_freq = entity_freq
-        self.entity_vector = entity_vector
-        self.alias_hash = alias_hash
-        self.prior_prob = prior_prob
+    def __init__(self):
+        # Make sure abstract Candidate is not instantiated.
+        if self.__class__ == Candidate:
+            raise TypeError(
+                Errors.E1046.format(cls_name=self.__class__.__name__)
+            )
 
     @property
-    def entity(self) -> int:
-        """RETURNS (uint64): hash of the entity's KB ID/name"""
-        return self.entity_hash
+    def entity_id(self) -> int:
+        """RETURNS (int): Numerical representation of entity ID (if entity ID is numerical, this is just the entity ID,
+        otherwise the hash of the entity ID string)."""
+        raise NotImplementedError
 
     @property
-    def entity_(self) -> str:
-        """RETURNS (str): ID/name of this entity in the KB"""
-        return self.kb.vocab.strings[self.entity_hash]
+    def entity_id_(self) -> str:
+        """RETURNS (str): String representation of entity ID."""
+        raise NotImplementedError
 
     @property
-    def alias(self) -> int:
-        """RETURNS (uint64): hash of the alias"""
-        return self.alias_hash
+    def entity_vector(self) -> vector[float]:
+        """RETURNS (vector[float]): Entity vector."""
+        raise NotImplementedError
+
+
+cdef class InMemoryCandidate(Candidate):
+    """Candidate for InMemoryLookupKB."""
+
+    def __init__(
+        self,
+        kb: InMemoryLookupKB,
+        entity_hash: int,
+        alias_hash: int,
+        entity_vector: vector[float],
+        prior_prob: float,
+        entity_freq: float
+    ):
+        """
+        kb (InMemoryLookupKB]): InMemoryLookupKB instance.
+        entity_id (int): Entity ID as hash that can be looked up with InMemoryKB.vocab.strings.__getitem__().
+        entity_freq (int): Entity frequency in KB corpus.
+        entity_vector (List[float]): Entity embedding.
+        alias_hash (int): Alias hash.
+        prior_prob (float): Prior probability of entity for this alias. I. e. the probability that, independent of
+            the context, this alias - which matches one of this entity's aliases - resolves to one this entity.
+        """
+        super().__init__()
+
+        self._entity_hash = entity_hash
+        self._entity_vector = entity_vector
+        self._prior_prob = prior_prob
+        self._kb = kb
+        self._alias_hash = alias_hash
+        self._entity_freq = entity_freq
 
     @property
-    def alias_(self) -> str:
-        """RETURNS (str): ID of the original alias"""
-        return self.kb.vocab.strings[self.alias_hash]
+    def entity_id(self) -> int:
+        return self._entity_hash
 
     @property
-    def entity_freq(self) -> float:
-        return self.entity_freq
-
-    @property
-    def entity_vector(self) -> Iterable[float]:
-        return self.entity_vector
+    def entity_vector(self) -> vector[float]:
+        return self._entity_vector
 
     @property
     def prior_prob(self) -> float:
-        return self.prior_prob
+        """RETURNS (float): Prior probability that this alias, which matches one of this entity's synonyms, resolves to
+        this entity."""
+        return self._prior_prob
 
+    @property
+    def alias(self) -> str:
+        """RETURNS (str): Alias."""
+        return self._kb.vocab.strings[self._alias_hash]
 
-def get_candidates(kb: KnowledgeBase, mention: Span) -> Iterable[Candidate]:
-    """
-    Return candidate entities for a given mention and fetching appropriate entries from the index.
-    kb (KnowledgeBase): Knowledge base to query.
-    mention (Span): Entity mention for which to identify candidates.
-    RETURNS (Iterable[Candidate]): Identified candidates.
-    """
-    return kb.get_candidates(mention)
+    @property
+    def entity_id_(self) -> str:
+        return self._kb.vocab.strings[self._entity_hash]
 
-
-def get_candidates_batch(kb: KnowledgeBase, mentions: Iterable[Span]) -> Iterable[Iterable[Candidate]]:
-    """
-    Return candidate entities for the given mentions and fetching appropriate entries from the index.
-    kb (KnowledgeBase): Knowledge base to query.
-    mention (Iterable[Span]): Entity mentions for which to identify candidates.
-    RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
-    """
-    return kb.get_candidates_batch(mentions)
+    @property
+    def entity_freq(self) -> float:
+        """RETURNS (float): Entity frequency in KB corpus."""
+        return self._entity_freq
diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx
index ce4bc0138..1cb08f488 100644
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@@ -32,9 +32,10 @@ cdef class KnowledgeBase:
 
     def get_candidates_batch(self, mentions: Iterable[Span]) -> Iterable[Iterable[Candidate]]:
         """
-        Return candidate entities for specified texts. Each candidate defines the entity, the original alias,
-        and the prior probability of that alias resolving to that entity.
-        If no candidate is found for a given text, an empty list is returned.
+        Return candidate entities for a specified Span mention. Each candidate defines at least the entity and the
+        entity's embedding vector. Depending on the KB implementation, further properties - such as the prior
+        probability of the specified mention text resolving to that entity - might be included.
+        If no candidates are found for a given mention, an empty list is returned.
         mentions (Iterable[Span]): Mentions for which to get candidates.
         RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
         """
@@ -42,9 +43,10 @@ cdef class KnowledgeBase:
 
     def get_candidates(self, mention: Span) -> Iterable[Candidate]:
         """
-        Return candidate entities for specified text. Each candidate defines the entity, the original alias,
-        and the prior probability of that alias resolving to that entity.
-        If the no candidate is found for a given text, an empty list is returned.
+        Return candidate entities for a specific mention. Each candidate defines at least the entity and the
+        entity's embedding vector. Depending on the KB implementation, further properties - such as the prior
+        probability of the specified mention text resolving to that entity - might be included.
+        If no candidate is found for the given mention, an empty list is returned.
         mention (Span): Mention for which to get candidates.
         RETURNS (Iterable[Candidate]): Identified candidates.
         """
@@ -106,3 +108,10 @@ cdef class KnowledgeBase:
         raise NotImplementedError(
             Errors.E1045.format(parent="KnowledgeBase", method="from_disk", name=self.__name__)
         )
+
+    @property
+    def supports_prior_probs(self) -> bool:
+        """RETURNS (bool): Whether this KB type supports looking up prior probabilities for entity mentions."""
+        raise NotImplementedError(
+            Errors.E1045.format(parent="KnowledgeBase", method="supports_prior_probs", name=self.__name__)
+        )
diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx
index 2a74d047b..c9ced8309 100644
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@@ -18,7 +18,7 @@ from .. import util
 from ..util import SimpleFrozenList, ensure_path
 from ..vocab cimport Vocab
 from .kb cimport KnowledgeBase
-from .candidate import Candidate as Candidate
+from .candidate import InMemoryCandidate
 
 
 cdef class InMemoryLookupKB(KnowledgeBase):
@@ -226,10 +226,10 @@ cdef class InMemoryLookupKB(KnowledgeBase):
             alias_entry.probs = probs
             self._aliases_table[alias_index] = alias_entry
 
-    def get_candidates(self, mention: Span) -> Iterable[Candidate]:
-        return self.get_alias_candidates(mention.text)  # type: ignore
+    def get_candidates(self, mention: Span) -> Iterable[InMemoryCandidate]:
+        return self._get_alias_candidates(mention.text)  # type: ignore
 
-    def get_alias_candidates(self, str alias) -> Iterable[Candidate]:
+    def _get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
         """
         Return candidate entities for an alias. Each candidate defines the entity, the original alias,
         and the prior probability of that alias resolving to that entity.
@@ -241,14 +241,18 @@ cdef class InMemoryLookupKB(KnowledgeBase):
         alias_index = <int64_t>self._alias_index.get(alias_hash)
         alias_entry = self._aliases_table[alias_index]
 
-        return [Candidate(kb=self,
-                          entity_hash=self._entries[entry_index].entity_hash,
-                          entity_freq=self._entries[entry_index].freq,
-                          entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
-                          alias_hash=alias_hash,
-                          prior_prob=prior_prob)
-                for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs)
-                if entry_index != 0]
+        return [
+            InMemoryCandidate(
+                kb=self,
+                entity_hash=self._entries[entry_index].entity_hash,
+                alias_hash=alias_hash,
+                entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
+                prior_prob=prior_prob,
+                entity_freq=self._entries[entry_index].freq
+            )
+            for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs)
+            if entry_index != 0
+        ]
 
     def get_vector(self, str entity):
         cdef hash_t entity_hash = self.vocab.strings[entity]
@@ -279,6 +283,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 
         return 0.0
 
+    def supports_prior_probs(self) -> bool:
+        return True
+
     def to_bytes(self, **kwargs):
         """Serialize the current state to a binary string.
         """
diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
index 7332ca199..7fe0b4741 100644
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@@ -6,7 +6,7 @@ from thinc.api import Model, Maxout, Linear, tuplify, Ragged
 
 from ...util import registry
 from ...kb import KnowledgeBase, InMemoryLookupKB
-from ...kb import Candidate, get_candidates, get_candidates_batch
+from ...kb import Candidate
 from ...vocab import Vocab
 from ...tokens import Span, Doc
 from ..extract_spans import extract_spans
@@ -117,3 +117,25 @@ def create_candidates_batch() -> Callable[
     [KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]
 ]:
     return get_candidates_batch
+
+
+def get_candidates(kb: KnowledgeBase, mention: Span) -> Iterable[Candidate]:
+    """
+    Return candidate entities for a given mention and fetching appropriate entries from the index.
+    kb (KnowledgeBase): Knowledge base to query.
+    mention (Span): Entity mention for which to identify candidates.
+    RETURNS (Iterable[Candidate]): Identified candidates.
+    """
+    return kb.get_candidates(mention)
+
+
+def get_candidates_batch(
+    kb: KnowledgeBase, mentions: Iterable[Span]
+) -> Iterable[Iterable[Candidate]]:
+    """
+    Return candidate entities for the given mentions and fetching appropriate entries from the index.
+    kb (KnowledgeBase): Knowledge base to query.
+    mentions (Iterable[Span]): Entity mentions for which to identify candidates.
+    RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
+    """
+    return kb.get_candidates_batch(mentions)
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index 6a187b6c3..caced9cfd 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -1,5 +1,5 @@
-from typing import Optional, Iterable, Callable, Dict, Sequence, Union, List, Any
-from typing import cast
+import warnings
+from typing import Optional, Iterable, Callable, Dict, Sequence, Union, List, Any, cast
 from numpy import dtype
 from thinc.types import Floats1d, Floats2d, Ints1d, Ragged
 from pathlib import Path
@@ -10,14 +10,13 @@ from thinc.api import CosineDistance, Model, Optimizer, Config
 from thinc.api import set_dropout_rate
 
 from ..kb import KnowledgeBase, Candidate
-from ..ml import empty_kb
 from ..tokens import Doc, Span
 from .pipe import deserialize_config
 from .trainable_pipe import TrainablePipe
 from ..language import Language
 from ..vocab import Vocab
 from ..training import Example, validate_examples, validate_get_examples
-from ..errors import Errors
+from ..errors import Errors, Warnings
 from ..util import SimpleFrozenList, registry
 from .. import util
 from ..scorer import Scorer
@@ -240,6 +239,8 @@ class EntityLinker(TrainablePipe):
 
         if candidates_batch_size < 1:
             raise ValueError(Errors.E1044)
+        if self.incl_prior and not self.kb.supports_prior_probs:
+            warnings.warn(Warnings.W401)
 
     def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]):
         """Define the KB of this pipe by providing a function that will
@@ -522,18 +523,19 @@ class EntityLinker(TrainablePipe):
                             )
                         elif len(candidates) == 1 and self.threshold is None:
                             # shortcut for efficiency reasons: take the 1 candidate
-                            final_kb_ids.append(candidates[0].entity_)
+                            final_kb_ids.append(candidates[0].entity_id_)
                             self._add_activations(
                                 doc_scores=doc_scores,
                                 doc_ents=doc_ents,
                                 scores=[1.0],
-                                ents=[candidates[0].entity_],
+                                ents=[candidates[0].entity_id],
                             )
                         else:
                             random.shuffle(candidates)
                             # set all prior probabilities to 0 if incl_prior=False
-                            prior_probs = xp.asarray([c.prior_prob for c in candidates])
-                            if not self.incl_prior:
+                            if self.incl_prior and self.kb.supports_prior_probs:
+                                prior_probs = xp.asarray([c.prior_prob for c in candidates])  # type: ignore
+                            else:
                                 prior_probs = xp.asarray([0.0 for _ in candidates])
                             scores = prior_probs
                             # add in similarity from the context
@@ -557,7 +559,7 @@ class EntityLinker(TrainablePipe):
                                     raise ValueError(Errors.E161)
                                 scores = prior_probs + sims - (prior_probs * sims)
                             final_kb_ids.append(
-                                candidates[scores.argmax().item()].entity_
+                                candidates[scores.argmax().item()].entity_id_
                                 if self.threshold is None
                                 or scores.max() >= self.threshold
                                 else EntityLinker.NIL
@@ -566,7 +568,7 @@ class EntityLinker(TrainablePipe):
                                 doc_scores=doc_scores,
                                 doc_ents=doc_ents,
                                 scores=scores,
-                                ents=[c.entity for c in candidates],
+                                ents=[c.entity_id for c in candidates],
                             )
             self._add_doc_activations(
                 docs_scores=docs_scores,
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 87cacfc9d..65406a36e 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -7,10 +7,10 @@ from thinc.types import Ragged
 from spacy import registry, util
 from spacy.attrs import ENT_KB_ID
 from spacy.compat import pickle
-from spacy.kb import Candidate, InMemoryLookupKB, get_candidates, KnowledgeBase
+from spacy.kb import Candidate, InMemoryLookupKB, KnowledgeBase
 from spacy.lang.en import English
 from spacy.ml import load_kb
-from spacy.ml.models.entity_linker import build_span_maker
+from spacy.ml.models.entity_linker import build_span_maker, get_candidates
 from spacy.pipeline import EntityLinker, TrainablePipe
 from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
 from spacy.scorer import Scorer
@@ -465,16 +465,17 @@ def test_candidate_generation(nlp):
     mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])
 
     # test the size of the relevant candidates
+    adam_ent_cands = get_candidates(mykb, adam_ent)
     assert len(get_candidates(mykb, douglas_ent)) == 2
-    assert len(get_candidates(mykb, adam_ent)) == 1
+    assert len(adam_ent_cands) == 1
     assert len(get_candidates(mykb, Adam_ent)) == 0  # default case sensitive
     assert len(get_candidates(mykb, shrubbery_ent)) == 0
 
     # test the content of the candidates
-    assert get_candidates(mykb, adam_ent)[0].entity_ == "Q2"
-    assert get_candidates(mykb, adam_ent)[0].alias_ == "adam"
-    assert_almost_equal(get_candidates(mykb, adam_ent)[0].entity_freq, 12)
-    assert_almost_equal(get_candidates(mykb, adam_ent)[0].prior_prob, 0.9)
+    assert adam_ent_cands[0].entity_id_ == "Q2"
+    assert adam_ent_cands[0].alias == "adam"
+    assert_almost_equal(adam_ent_cands[0].entity_freq, 12)
+    assert_almost_equal(adam_ent_cands[0].prior_prob, 0.9)
 
 
 def test_el_pipe_configuration(nlp):
@@ -502,7 +503,7 @@ def test_el_pipe_configuration(nlp):
     assert doc[2].ent_kb_id_ == "Q2"
 
     def get_lowercased_candidates(kb, span):
-        return kb.get_alias_candidates(span.text.lower())
+        return kb._get_alias_candidates(span.text.lower())
 
     def get_lowercased_candidates_batch(kb, spans):
         return [get_lowercased_candidates(kb, span) for span in spans]
@@ -561,24 +562,22 @@ def test_vocab_serialization(nlp):
     mykb.add_alias(alias="douglas", entities=["Q2", "Q3"], probabilities=[0.4, 0.1])
     adam_hash = mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])
 
-    candidates = mykb.get_alias_candidates("adam")
+    candidates = mykb._get_alias_candidates("adam")
     assert len(candidates) == 1
-    assert candidates[0].entity == q2_hash
-    assert candidates[0].entity_ == "Q2"
-    assert candidates[0].alias == adam_hash
-    assert candidates[0].alias_ == "adam"
+    assert candidates[0].entity_id == q2_hash
+    assert candidates[0].entity_id_ == "Q2"
+    assert candidates[0].alias == "adam"
 
     with make_tempdir() as d:
         mykb.to_disk(d / "kb")
         kb_new_vocab = InMemoryLookupKB(Vocab(), entity_vector_length=1)
         kb_new_vocab.from_disk(d / "kb")
 
-        candidates = kb_new_vocab.get_alias_candidates("adam")
+        candidates = kb_new_vocab._get_alias_candidates("adam")
         assert len(candidates) == 1
-        assert candidates[0].entity == q2_hash
-        assert candidates[0].entity_ == "Q2"
-        assert candidates[0].alias == adam_hash
-        assert candidates[0].alias_ == "adam"
+        assert candidates[0].entity_id == q2_hash
+        assert candidates[0].entity_id_ == "Q2"
+        assert candidates[0].alias == "adam"
 
         assert kb_new_vocab.get_vector("Q2") == [2]
         assert_almost_equal(kb_new_vocab.get_prior_prob("Q2", "douglas"), 0.4)
@@ -598,20 +597,20 @@ def test_append_alias(nlp):
     mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])
 
     # test the size of the relevant candidates
-    assert len(mykb.get_alias_candidates("douglas")) == 2
+    assert len(mykb._get_alias_candidates("douglas")) == 2
 
     # append an alias
     mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2)
 
     # test the size of the relevant candidates has been incremented
-    assert len(mykb.get_alias_candidates("douglas")) == 3
+    assert len(mykb._get_alias_candidates("douglas")) == 3
 
     # append the same alias-entity pair again should not work (will throw a warning)
     with pytest.warns(UserWarning):
         mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.3)
 
     # test the size of the relevant candidates remained unchanged
-    assert len(mykb.get_alias_candidates("douglas")) == 3
+    assert len(mykb._get_alias_candidates("douglas")) == 3
 
 
 @pytest.mark.filterwarnings("ignore:\\[W036")
@@ -908,11 +907,11 @@ def test_kb_to_bytes():
     assert kb_2.contains_alias("Russ Cochran")
     assert kb_1.get_size_aliases() == kb_2.get_size_aliases()
     assert kb_1.get_alias_strings() == kb_2.get_alias_strings()
-    assert len(kb_1.get_alias_candidates("Russ Cochran")) == len(
-        kb_2.get_alias_candidates("Russ Cochran")
+    assert len(kb_1._get_alias_candidates("Russ Cochran")) == len(
+        kb_2._get_alias_candidates("Russ Cochran")
     )
-    assert len(kb_1.get_alias_candidates("Randomness")) == len(
-        kb_2.get_alias_candidates("Randomness")
+    assert len(kb_1._get_alias_candidates("Randomness")) == len(
+        kb_2._get_alias_candidates("Randomness")
     )
 
 
diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py
index f9d2e226b..eb4254d31 100644
--- a/spacy/tests/serialize/test_serialize_kb.py
+++ b/spacy/tests/serialize/test_serialize_kb.py
@@ -66,19 +66,21 @@ def _check_kb(kb):
         assert alias_string not in kb.get_alias_strings()
 
     # check candidates & probabilities
-    candidates = sorted(kb.get_alias_candidates("double07"), key=lambda x: x.entity_)
+    candidates = sorted(
+        kb._get_alias_candidates("double07"), key=lambda x: x.entity_id_
+    )
     assert len(candidates) == 2
 
-    assert candidates[0].entity_ == "Q007"
+    assert candidates[0].entity_id_ == "Q007"
     assert 6.999 < candidates[0].entity_freq < 7.01
     assert candidates[0].entity_vector == [0, 0, 7]
-    assert candidates[0].alias_ == "double07"
+    assert candidates[0].alias == "double07"
     assert 0.899 < candidates[0].prior_prob < 0.901
 
-    assert candidates[1].entity_ == "Q17"
+    assert candidates[1].entity_id_ == "Q17"
     assert 1.99 < candidates[1].entity_freq < 2.01
     assert candidates[1].entity_vector == [7, 1, 0]
-    assert candidates[1].alias_ == "double07"
+    assert candidates[1].alias == "double07"
     assert 0.099 < candidates[1].prior_prob < 0.101
 
 
diff --git a/website/docs/api/inmemorylookupkb.mdx b/website/docs/api/inmemorylookupkb.mdx
index c24fe78d6..6fa6cb235 100644
--- a/website/docs/api/inmemorylookupkb.mdx
+++ b/website/docs/api/inmemorylookupkb.mdx
@@ -10,9 +10,9 @@ version: 3.5
 
 The `InMemoryLookupKB` class inherits from [`KnowledgeBase`](/api/kb) and
 implements all of its methods. It stores all KB data in-memory and generates
-[`Candidate`](/api/kb#candidate) objects by exactly matching mentions with
-entity names. It's highly optimized for both a low memory footprint and speed of
-retrieval.
+[`InMemoryCandidate`](/api/kb#candidate) objects by exactly matching mentions
+with entity names. It's highly optimized for both a low memory footprint and
+speed of retrieval.
 
 ## InMemoryLookupKB.\_\_init\_\_ {id="init",tag="method"}
 
@@ -156,7 +156,7 @@ Get a list of all aliases in the knowledge base.
 ## InMemoryLookupKB.get_candidates {id="get_candidates",tag="method"}
 
 Given a certain textual mention as input, retrieve a list of candidate entities
-of type [`Candidate`](/api/kb#candidate). Wraps
+of type [`InMemoryCandidate`](/api/kb#candidate). Wraps
 [`get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates).
 
 > #### Example
@@ -168,10 +168,10 @@ of type [`Candidate`](/api/kb#candidate). Wraps
 > candidates = kb.get_candidates(doc[0:2])
 > ```
 
-| Name        | Description                                                          |
-| ----------- | -------------------------------------------------------------------- |
-| `mention`   | The textual mention or alias. ~~Span~~                               |
-| **RETURNS** | An iterable of relevant `Candidate` objects. ~~Iterable[Candidate]~~ |
+| Name        | Description                                                                          |
+| ----------- | ------------------------------------------------------------------------------------ |
+| `mention`   | The textual mention or alias. ~~Span~~                                               |
+| **RETURNS** | An iterable of relevant `InMemoryCandidate` objects. ~~Iterable[InMemoryCandidate]~~ |
 
 ## InMemoryLookupKB.get_candidates_batch {id="get_candidates_batch",tag="method"}
 
@@ -194,26 +194,10 @@ to you.
 > candidates = kb.get_candidates((doc[0:2], doc[3:]))
 > ```
 
-| Name        | Description                                                                                  |
-| ----------- | -------------------------------------------------------------------------------------------- |
-| `mentions`  | The textual mention or alias. ~~Iterable[Span]~~                                             |
-| **RETURNS** | An iterable of iterable with relevant `Candidate` objects. ~~Iterable[Iterable[Candidate]]~~ |
-
-## InMemoryLookupKB.get_alias_candidates {id="get_alias_candidates",tag="method"}
-
-Given a certain textual mention as input, retrieve a list of candidate entities
-of type [`Candidate`](/api/kb#candidate).
-
-> #### Example
->
-> ```python
-> candidates = kb.get_alias_candidates("Douglas")
-> ```
-
-| Name        | Description                                                   |
-| ----------- | ------------------------------------------------------------- |
-| `alias`     | The textual mention or alias. ~~str~~                         |
-| **RETURNS** | The list of relevant `Candidate` objects. ~~List[Candidate]~~ |
+| Name        | Description                                                                                                  |
+| ----------- | ------------------------------------------------------------------------------------------------------------ |
+| `mentions`  | The textual mentions. ~~Iterable[Span]~~                                                                     |
+| **RETURNS** | An iterable of iterable with relevant `InMemoryCandidate` objects. ~~Iterable[Iterable[InMemoryCandidate]]~~ |
 
 ## InMemoryLookupKB.get_vector {id="get_vector",tag="method"}
 
diff --git a/website/docs/api/kb.mdx b/website/docs/api/kb.mdx
index 2b0d4d9d6..9536a3fe3 100644
--- a/website/docs/api/kb.mdx
+++ b/website/docs/api/kb.mdx
@@ -103,23 +103,6 @@ to you.
 | `mentions`  | The textual mention or alias. ~~Iterable[Span]~~                                             |
 | **RETURNS** | An iterable of iterable with relevant `Candidate` objects. ~~Iterable[Iterable[Candidate]]~~ |
 
-## KnowledgeBase.get_alias_candidates {id="get_alias_candidates",tag="method"}
-
-<Infobox variant="warning">
-  This method is _not_ available from spaCy 3.5 onwards.
-</Infobox>
-
-From spaCy 3.5 on `KnowledgeBase` is an abstract class (with
-[`InMemoryLookupKB`](/api/inmemorylookupkb) being a drop-in replacement) to
-allow more flexibility in customizing knowledge bases. Some of its methods were
-moved to [`InMemoryLookupKB`](/api/inmemorylookupkb) during this refactoring,
-one of those being `get_alias_candidates()`. This method is now available as
-[`InMemoryLookupKB.get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates).
-Note:
-[`InMemoryLookupKB.get_candidates()`](/api/inmemorylookupkb#get_candidates)
-defaults to
-[`InMemoryLookupKB.get_alias_candidates()`](/api/inmemorylookupkb#get_alias_candidates).
-
 ## KnowledgeBase.get_vector {id="get_vector",tag="method"}
 
 Given a certain entity ID, retrieve its pretrained entity vector.
@@ -190,25 +173,27 @@ Restore the state of the knowledge base from a given directory. Note that the
 | `exclude`   | List of components to exclude. ~~Iterable[str]~~                                                |
 | **RETURNS** | The modified `KnowledgeBase` object. ~~KnowledgeBase~~                                          |
 
-## Candidate {id="candidate",tag="class"}
+## InMemoryCandidate {id="candidate",tag="class"}
 
-A `Candidate` object refers to a textual mention (alias) that may or may not be
-resolved to a specific entity from a `KnowledgeBase`. This will be used as input
-for the entity linking algorithm which will disambiguate the various candidates
-to the correct one. Each candidate `(alias, entity)` pair is assigned to a
-certain prior probability.
+An `InMemoryCandidate` object refers to a textual mention (alias) that may or
+may not be resolved to a specific entity from a `KnowledgeBase`. This will be
+used as input for the entity linking algorithm which will disambiguate the
+various candidates to the correct one. Each candidate `(alias, entity)` pair is
+assigned to a certain prior probability.
 
-### Candidate.\_\_init\_\_ {id="candidate-init",tag="method"}
+### InMemoryCandidate.\_\_init\_\_ {id="candidate-init",tag="method"}
 
-Construct a `Candidate` object. Usually this constructor is not called directly,
-but instead these objects are returned by the `get_candidates` method of the
-[`entity_linker`](/api/entitylinker) pipe.
+Construct an `InMemoryCandidate` object. Usually this constructor is not called
+directly, but instead these objects are returned by the `get_candidates` method
+of the [`entity_linker`](/api/entitylinker) pipe.
 
-> #### Example
+> #### Example```python
+>
+> from spacy.kb import InMemoryCandidate candidate = InMemoryCandidate(kb,
+> entity_hash, entity_freq, entity_vector, alias_hash, prior_prob)
+>
+> ```
 >
-> ```python
-> from spacy.kb import Candidate
-> candidate = Candidate(kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob)
 > ```
 
 | Name          | Description                                                               |
@@ -216,10 +201,10 @@ but instead these objects are returned by the `get_candidates` method of the
 | `kb`          | The knowledge base that defined this candidate. ~~KnowledgeBase~~         |
 | `entity_hash` | The hash of the entity's KB ID. ~~int~~                                   |
 | `entity_freq` | The entity frequency as recorded in the KB. ~~float~~                     |
-| `alias_hash`  | The hash of the textual mention or alias. ~~int~~                         |
+| `alias_hash`  | The hash of the entity alias. ~~int~~                                     |
 | `prior_prob`  | The prior probability of the `alias` referring to the `entity`. ~~float~~ |
 
-## Candidate attributes {id="candidate-attributes"}
+## InMemoryCandidate attributes {id="candidate-attributes"}
 
 | Name            | Description                                                              |
 | --------------- | ------------------------------------------------------------------------ |