diff --git a/spacy/language.py b/spacy/language.py
index d2b89029d..fb86689bc 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1248,17 +1248,12 @@ class Language:
             component_cfg[name].setdefault("drop", drop)
             pipe_kwargs[name].setdefault("batch_size", self.batch_size)
         for name, proc in self.pipeline:
-            # ignore statements are used here because mypy ignores hasattr
-            if name not in exclude and hasattr(proc, "update"):
-                proc.update(examples, sgd=None, losses=losses, **component_cfg[name])  # type: ignore
-            if sgd not in (None, False):
-                if (
-                    name not in exclude
-                    and isinstance(proc, ty.TrainableComponent)
-                    and proc.is_trainable
-                    and proc.model not in (True, False, None)
-                ):
-                    proc.finish_update(sgd)
+            if (
+                name not in exclude
+                and isinstance(proc, ty.TrainableComponent)
+                and proc.is_trainable
+            ):
+                proc.update(examples, sgd=None, losses=losses, **component_cfg[name])
             if name in annotates:
                 for doc, eg in zip(
                     _pipe(
@@ -1271,6 +1266,17 @@ class Language:
                     examples,
                 ):
                     eg.predicted = doc
+        # Only finish the update after all component updates are done. Some
+        # components may share weights (such as tok2vec) and we only want
+        # to apply weight updates after all gradients are accumulated.
+        for name, proc in self.pipeline:
+            if (
+                name not in exclude
+                and isinstance(proc, ty.TrainableComponent)
+                and proc.is_trainable
+            ):
+                proc.finish_update(sgd)
+
         return losses
 
     def rehearse(
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index 6fe322b62..63d5cccc2 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -27,9 +27,6 @@ ActivationsT = Dict[str, Union[List[Ragged], List[str]]]
 
 KNOWLEDGE_BASE_IDS = "kb_ids"
 
-# See #9050
-BACKWARD_OVERWRITE = True
-
 default_model_config = """
 [model]
 @architectures = "spacy.EntityLinker.v2"
@@ -60,7 +57,7 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
         "entity_vector_length": 64,
         "get_candidates": {"@misc": "spacy.CandidateGenerator.v1"},
         "get_candidates_batch": {"@misc": "spacy.CandidateBatchGenerator.v1"},
-        "overwrite": True,
+        "overwrite": False,
         "scorer": {"@scorers": "spacy.entity_linker_scorer.v1"},
         "use_gold_ents": True,
         "candidates_batch_size": 1,
@@ -191,7 +188,7 @@ class EntityLinker(TrainablePipe):
         get_candidates_batch: Callable[
             [KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]
         ],
-        overwrite: bool = BACKWARD_OVERWRITE,
+        overwrite: bool = False,
         scorer: Optional[Callable] = entity_linker_score,
         use_gold_ents: bool,
         candidates_batch_size: int,
@@ -215,6 +212,7 @@ class EntityLinker(TrainablePipe):
             Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]],
             Iterable[Candidate]]
             ): Function that produces a list of candidates, given a certain knowledge base and several textual mentions.
+        overwrite (bool): Whether to overwrite existing non-empty annotations.
         scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_links.
         use_gold_ents (bool): Whether to copy entities from gold docs or not. If false, another
             component must provide entity annotations.
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index 293add9e1..fabc51fee 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -21,10 +21,6 @@ from ..scorer import Scorer
 from ..training import validate_examples, validate_get_examples
 from ..util import registry
 
-# See #9050
-BACKWARD_OVERWRITE = True
-BACKWARD_EXTEND = False
-
 default_model_config = """
 [model]
 @architectures = "spacy.Tagger.v2"
@@ -102,8 +98,8 @@ class Morphologizer(Tagger):
         model: Model,
         name: str = "morphologizer",
         *,
-        overwrite: bool = BACKWARD_OVERWRITE,
-        extend: bool = BACKWARD_EXTEND,
+        overwrite: bool = False,
+        extend: bool = False,
         scorer: Optional[Callable] = morphologizer_score,
         save_activations: bool = False,
     ):
@@ -113,6 +109,8 @@ class Morphologizer(Tagger):
         model (thinc.api.Model): The Thinc Model powering the pipeline component.
         name (str): The component instance name, used to add entries to the
             losses during training.
+        overwrite (bool): Whether to overwrite existing annotations.
+        extend (bool): Whether to extend existing annotations.
         scorer (Optional[Callable]): The scoring method. Defaults to
             Scorer.score_token_attr for the attributes "pos" and "morph" and
             Scorer.score_token_attr_per_feat for the attribute "morph".
diff --git a/spacy/pipeline/sentencizer.pyx b/spacy/pipeline/sentencizer.pyx
index 77f4e8adb..6c2565170 100644
--- a/spacy/pipeline/sentencizer.pyx
+++ b/spacy/pipeline/sentencizer.pyx
@@ -10,9 +10,6 @@ from ..language import Language
 from ..scorer import Scorer
 from .. import util
 
-# see #9050
-BACKWARD_OVERWRITE = False
-
 @Language.factory(
     "sentencizer",
     assigns=["token.is_sent_start", "doc.sents"],
@@ -52,13 +49,14 @@ class Sentencizer(Pipe):
         name="sentencizer",
         *,
         punct_chars=None,
-        overwrite=BACKWARD_OVERWRITE,
+        overwrite=False,
         scorer=senter_score,
     ):
         """Initialize the sentencizer.
 
         punct_chars (list): Punctuation characters to split on. Will be
             serialized with the nlp object.
+        overwrite (bool): Whether to overwrite existing annotations.
         scorer (Optional[Callable]): The scoring method. Defaults to
             Scorer.score_spans for the attribute "sents".
 
diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx
index 42feeb277..a7d263e94 100644
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@@ -18,8 +18,6 @@ from ..training import validate_examples, validate_get_examples
 from ..util import registry
 from .. import util
 
-# See #9050
-BACKWARD_OVERWRITE = False
 
 default_model_config = """
 [model]
@@ -83,7 +81,7 @@ class SentenceRecognizer(Tagger):
         model,
         name="senter",
         *,
-        overwrite=BACKWARD_OVERWRITE,
+        overwrite=False,
         scorer=senter_score,
         save_activations: bool = False,
     ):
@@ -93,6 +91,7 @@ class SentenceRecognizer(Tagger):
         model (thinc.api.Model): The Thinc Model powering the pipeline component.
         name (str): The component instance name, used to add entries to the
             losses during training.
+        overwrite (bool): Whether to overwrite existing annotations.
         scorer (Optional[Callable]): The scoring method. Defaults to
             Scorer.score_spans for the attribute "sents".
         save_activations (bool): save model activations in Doc when annotating.
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index a6be51c3c..101d8bcea 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -27,9 +27,6 @@ from .. import util
 
 ActivationsT = Dict[str, Union[List[Floats2d], List[Ints1d]]]
 
-# See #9050
-BACKWARD_OVERWRITE = False
-
 default_model_config = """
 [model]
 @architectures = "spacy.Tagger.v2"
@@ -99,7 +96,7 @@ class Tagger(TrainablePipe):
         model,
         name="tagger",
         *,
-        overwrite=BACKWARD_OVERWRITE,
+        overwrite=False,
         scorer=tagger_score,
         neg_prefix="!",
         save_activations: bool = False,
@@ -110,6 +107,7 @@ class Tagger(TrainablePipe):
         model (thinc.api.Model): The Thinc Model powering the pipeline component.
         name (str): The component instance name, used to add entries to the
             losses during training.
+        overwrite (bool): Whether to overwrite existing annotations.
         scorer (Optional[Callable]): The scoring method. Defaults to
             Scorer.score_token_attr for the attribute "tag".
         save_activations (bool): save model activations in Doc when annotating.
diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py
index 21d247b74..a99f8b561 100644
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@@ -175,6 +175,18 @@ def test_modify_span_group(doc):
     assert group[0].label == doc.vocab.strings["TEST"]
 
 
+def test_char_span_attributes(doc):
+    label = "LABEL"
+    kb_id = "KB_ID"
+    span_id = "SPAN_ID"
+    span1 = doc.char_span(20, 45, label=label, kb_id=kb_id, span_id=span_id)
+    span2 = doc[1:].char_span(15, 40, label=label, kb_id=kb_id, span_id=span_id)
+    assert span1.text == span2.text
+    assert span1.label_ == span2.label_ == label
+    assert span1.kb_id_ == span2.kb_id_ == kb_id
+    assert span1.id_ == span2.id_ == span_id
+
+
 def test_spans_sent_spans(doc):
     sents = list(doc.sents)
     assert sents[0].start == 0
@@ -354,6 +366,14 @@ def test_spans_by_character(doc):
             span1.start_char + 1, span1.end_char, label="GPE", alignment_mode="unk"
         )
 
+    # Span.char_span + alignment mode "contract"
+    span2 = doc[0:2].char_span(
+        span1.start_char - 3, span1.end_char, label="GPE", alignment_mode="contract"
+    )
+    assert span1.start_char == span2.start_char
+    assert span1.end_char == span2.end_char
+    assert span2.label_ == "GPE"
+
 
 def test_span_to_array(doc):
     span = doc[1:-2]
diff --git a/spacy/tests/pipeline/test_annotates_on_update.py b/spacy/tests/pipeline/test_annotates_on_update.py
index 869b8b874..10fb22c97 100644
--- a/spacy/tests/pipeline/test_annotates_on_update.py
+++ b/spacy/tests/pipeline/test_annotates_on_update.py
@@ -54,9 +54,11 @@ def test_annotates_on_update():
         return AssertSents(name)
 
     class AssertSents:
+        model = None
+        is_trainable = True
+
         def __init__(self, name, **cfg):
             self.name = name
-            pass
 
         def __call__(self, doc):
             if not doc.has_annotation("SENT_START"):
@@ -64,10 +66,16 @@ def test_annotates_on_update():
             return doc
 
         def update(self, examples, *, drop=0.0, sgd=None, losses=None):
+            losses.setdefault(self.name, 0.0)
+
             for example in examples:
                 if not example.predicted.has_annotation("SENT_START"):
                     raise ValueError("No sents")
-            return {}
+
+            return losses
+
+        def finish_update(self, sgd=None):
+            pass
 
     nlp = English()
     nlp.add_pipe("sentencizer")
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 42ffae22d..dc7ce46fe 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -1017,8 +1017,6 @@ def test_local_remote_storage_pull_missing():
 
 
 def test_cli_find_threshold(capsys):
-    thresholds = numpy.linspace(0, 1, 10)
-
     def make_examples(nlp: Language) -> List[Example]:
         docs: List[Example] = []
 
@@ -1082,8 +1080,6 @@ def test_cli_find_threshold(capsys):
                 scores_key="cats_macro_f",
                 silent=True,
             )
-            assert best_threshold != thresholds[0]
-            assert thresholds[0] < best_threshold < thresholds[9]
             assert best_score == max(res.values())
             assert res[1.0] == 0.0
 
@@ -1091,7 +1087,7 @@ def test_cli_find_threshold(capsys):
         nlp, _ = init_nlp((("spancat", {}),))
         with make_tempdir() as nlp_dir:
             nlp.to_disk(nlp_dir)
-            res = find_threshold(
+            best_threshold, best_score, res = find_threshold(
                 model=nlp_dir,
                 data_path=docs_dir / "docs.spacy",
                 pipe_name="spancat",
@@ -1099,10 +1095,8 @@ def test_cli_find_threshold(capsys):
                 scores_key="spans_sc_f",
                 silent=True,
             )
-            assert res[0] != thresholds[0]
-            assert thresholds[0] < res[0] < thresholds[8]
-            assert res[1] >= 0.6
-            assert res[2][1.0] == 0.0
+            assert best_score == max(res.values())
+            assert res[1.0] == 0.0
 
         # Having multiple textcat_multilabel components should work, since the name has to be specified.
         nlp, _ = init_nlp((("textcat_multilabel", {}),))
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 9b099ccb5..648a52374 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -9,7 +9,7 @@ import spacy
 from spacy.cli._util import app
 from spacy.language import Language
 from spacy.tokens import DocBin
-from .util import make_tempdir
+from .util import make_tempdir, normalize_whitespace
 
 
 def test_convert_auto():
@@ -247,8 +247,8 @@ def test_benchmark_accuracy_alias():
     # Verify that the `evaluate` alias works correctly.
     result_benchmark = CliRunner().invoke(app, ["benchmark", "accuracy", "--help"])
     result_evaluate = CliRunner().invoke(app, ["evaluate", "--help"])
-    assert result_benchmark.stdout == result_evaluate.stdout.replace(
-        "spacy evaluate", "spacy benchmark accuracy"
+    assert normalize_whitespace(result_benchmark.stdout) == normalize_whitespace(
+        result_evaluate.stdout.replace("spacy evaluate", "spacy benchmark accuracy")
     )
 
 
diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py
index f2d6d5fc0..3d0905dd3 100644
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@@ -10,8 +10,9 @@ from spacy.training import Example
 from spacy.lang.en import English
 from spacy.lang.de import German
 from spacy.util import registry, ignore_error, raise_error, find_matching_language
+from spacy.util import load_model_from_config
 import spacy
-from thinc.api import CupyOps, NumpyOps, get_current_ops
+from thinc.api import Config, CupyOps, NumpyOps, get_array_module, get_current_ops
 
 from .util import add_vecs_to_vocab, assert_docs_equal
 
@@ -25,6 +26,51 @@ try:
 except ImportError:
     pass
 
+TAGGER_CFG_STRING = """
+    [nlp]
+    lang = "en"
+    pipeline = ["tok2vec","tagger"]
+
+    [components]
+
+    [components.tagger]
+    factory = "tagger"
+
+    [components.tagger.model]
+    @architectures = "spacy.Tagger.v2"
+    nO = null
+
+    [components.tagger.model.tok2vec]
+    @architectures = "spacy.Tok2VecListener.v1"
+    width = ${components.tok2vec.model.encode.width}
+
+    [components.tok2vec]
+    factory = "tok2vec"
+
+    [components.tok2vec.model]
+    @architectures = "spacy.Tok2Vec.v2"
+
+    [components.tok2vec.model.embed]
+    @architectures = "spacy.MultiHashEmbed.v1"
+    width = ${components.tok2vec.model.encode.width}
+    rows = [2000, 1000, 1000, 1000]
+    attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
+    include_static_vectors = false
+
+    [components.tok2vec.model.encode]
+    @architectures = "spacy.MaxoutWindowEncoder.v2"
+    width = 96
+    depth = 4
+    window_size = 1
+    maxout_pieces = 3
+    """
+
+
+TAGGER_TRAIN_DATA = [
+    ("I like green eggs", {"tags": ["N", "V", "J", "N"]}),
+    ("Eat blue ham", {"tags": ["V", "J", "N"]}),
+]
+
 
 TAGGER_TRAIN_DATA = [
     ("I like green eggs", {"tags": ["N", "V", "J", "N"]}),
@@ -91,6 +137,26 @@ def test_language_update(nlp):
         example = Example.from_dict(doc, wrongkeyannots)
 
 
+def test_language_update_updates():
+    config = Config().from_str(TAGGER_CFG_STRING)
+    nlp = load_model_from_config(config, auto_fill=True, validate=True)
+
+    train_examples = []
+    for t in TAGGER_TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+
+    optimizer = nlp.initialize(get_examples=lambda: train_examples)
+
+    docs_before_update = list(nlp.pipe([eg.predicted.copy() for eg in train_examples]))
+    nlp.update(train_examples, sgd=optimizer)
+    docs_after_update = list(nlp.pipe([eg.predicted.copy() for eg in train_examples]))
+
+    xp = get_array_module(docs_after_update[0].tensor)
+    assert xp.any(
+        xp.not_equal(docs_before_update[0].tensor, docs_after_update[0].tensor)
+    )
+
+
 def test_language_evaluate(nlp):
     text = "hello world"
     annots = {"doc_annotation": {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}}
diff --git a/spacy/tests/util.py b/spacy/tests/util.py
index d5f3c39ff..c2647558d 100644
--- a/spacy/tests/util.py
+++ b/spacy/tests/util.py
@@ -1,6 +1,7 @@
 import numpy
 import tempfile
 import contextlib
+import re
 import srsly
 from spacy.tokens import Doc
 from spacy.vocab import Vocab
@@ -95,3 +96,7 @@ def assert_packed_msg_equal(b1, b2):
     for (k1, v1), (k2, v2) in zip(sorted(msg1.items()), sorted(msg2.items())):
         assert k1 == k2
         assert v1 == v2
+
+
+def normalize_whitespace(s):
+    return re.sub(r"\s+", " ", s)
diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi
index 1c7c18bf3..93cd8de05 100644
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@@ -108,6 +108,7 @@ class Doc:
         kb_id: Union[int, str] = ...,
         vector: Optional[Floats1d] = ...,
         alignment_mode: str = ...,
+        span_id: Union[int, str] = ...,
     ) -> Span: ...
     def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ...
     @property
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 2b3b83e6a..2eca1aafd 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -528,9 +528,9 @@ cdef class Doc:
         doc (Doc): The parent document.
         start_idx (int): The index of the first character of the span.
         end_idx (int): The index of the first character after the span.
-        label (uint64 or string): A label to attach to the Span, e.g. for
+        label (Union[int, str]): A label to attach to the Span, e.g. for
             named entities.
-        kb_id (uint64 or string):  An ID from a KB to capture the meaning of a
+        kb_id (Union[int, str]):  An ID from a KB to capture the meaning of a
             named entity.
         vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
             the span.
@@ -539,6 +539,7 @@ cdef class Doc:
             with token boundaries), "contract" (span of all tokens completely
             within the character span), "expand" (span of all tokens at least
             partially covered by the character span). Defaults to "strict".
+        span_id (Union[int, str]): An identifier to associate with the span.
         RETURNS (Span): The newly constructed object.
 
         DOCS: https://spacy.io/api/doc#char_span
diff --git a/spacy/tokens/span.pyi b/spacy/tokens/span.pyi
index 5168f3b03..979e74e7e 100644
--- a/spacy/tokens/span.pyi
+++ b/spacy/tokens/span.pyi
@@ -96,6 +96,9 @@ class Span:
         label: Union[int, str] = ...,
         kb_id: Union[int, str] = ...,
         vector: Optional[Floats1d] = ...,
+        id: Union[int, str] = ...,
+        alignment_mode: str = ...,
+        span_id: Union[int, str] = ...,
     ) -> Span: ...
     @property
     def conjuncts(self) -> Tuple[Token]: ...
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index b605434fd..aefea4f71 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -382,7 +382,7 @@ cdef class Span:
         result = xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
         # ensure we get a scalar back (numpy does this automatically but cupy doesn't)
         return result.item()
-    
+
     cpdef np.ndarray to_array(self, object py_attr_ids):
         """Given a list of M attribute IDs, export the tokens to a numpy
         `ndarray` of shape `(N, M)`, where `N` is the length of the document.
@@ -656,22 +656,29 @@ cdef class Span:
         else:
             return self.doc[root]
 
-    def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, id=0):
+    def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, id=0, alignment_mode="strict", span_id=0):
         """Create a `Span` object from the slice `span.text[start : end]`.
 
         start (int): The index of the first character of the span.
         end (int): The index of the first character after the span.
-        label (uint64 or string): A label to attach to the Span, e.g. for
+        label (Union[int, str]): A label to attach to the Span, e.g. for
             named entities.
-        kb_id (uint64 or string):  An ID from a KB to capture the meaning of a named entity.
+        kb_id (Union[int, str]):  An ID from a KB to capture the meaning of a named entity.
         vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
             the span.
+        id (Union[int, str]): Unused.
+        alignment_mode (str): How character indices are aligned to token
+            boundaries. Options: "strict" (character indices must be aligned
+            with token boundaries), "contract" (span of all tokens completely
+            within the character span), "expand" (span of all tokens at least
+            partially covered by the character span). Defaults to "strict".
+        span_id (Union[int, str]): An identifier to associate with the span.
         RETURNS (Span): The newly constructed object.
         """
         cdef SpanC* span_c = self.span_c()
         start_idx += span_c.start_char
         end_idx += span_c.start_char
-        return self.doc.char_span(start_idx, end_idx, label=label, kb_id=kb_id, vector=vector)
+        return self.doc.char_span(start_idx, end_idx, label=label, kb_id=kb_id, vector=vector, alignment_mode=alignment_mode, span_id=span_id)
 
     @property
     def conjuncts(self):
diff --git a/spacy/training/loop.py b/spacy/training/loop.py
index fc929816d..fcc023a0d 100644
--- a/spacy/training/loop.py
+++ b/spacy/training/loop.py
@@ -210,7 +210,7 @@ def train_while_improving(
                 subbatch,
                 drop=dropout,
                 losses=losses,
-                sgd=False,  # type: ignore[arg-type]
+                sgd=None,
                 exclude=exclude,
                 annotates=annotating_components,
             )
diff --git a/website/docs/api/cli.mdx b/website/docs/api/cli.mdx
index 0bf708183..9777650a9 100644
--- a/website/docs/api/cli.mdx
+++ b/website/docs/api/cli.mdx
@@ -1410,12 +1410,13 @@ $ python -m spacy project assets [project_dir]
 > $ python -m spacy project assets [--sparse]
 > ```
 
-| Name             | Description                                                                                                                                               |
-| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `project_dir`    | Path to project directory. Defaults to current working directory. ~~Path (positional)~~                                                                   |
-| `--sparse`, `-S` | Enable [sparse checkout](https://git-scm.com/docs/git-sparse-checkout) to only check out and download what's needed. Requires Git v22.2+. ~~bool (flag)~~ |
-| `--help`, `-h`   | Show help message and available arguments. ~~bool (flag)~~                                                                                                |
-| **CREATES**      | Downloaded or copied assets defined in the `project.yml`.                                                                                                 |
+| Name                                           | Description                                                                                                                                               |
+| ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `project_dir`                                  | Path to project directory. Defaults to current working directory. ~~Path (positional)~~                                                                   |
+| `--extra`, `-e` <Tag variant="new">3.3.1</Tag> | Download assets marked as "extra". Default false. ~~bool (flag)~~                                                                                         |
+| `--sparse`, `-S`                               | Enable [sparse checkout](https://git-scm.com/docs/git-sparse-checkout) to only check out and download what's needed. Requires Git v22.2+. ~~bool (flag)~~ |
+| `--help`, `-h`                                 | Show help message and available arguments. ~~bool (flag)~~                                                                                                |
+| **CREATES**                                    | Downloaded or copied assets defined in the `project.yml`.                                                                                                 |
 
 ### project run {id="project-run",tag="command"}
 
diff --git a/website/docs/api/doc.mdx b/website/docs/api/doc.mdx
index a303d628e..1a3f6179f 100644
--- a/website/docs/api/doc.mdx
+++ b/website/docs/api/doc.mdx
@@ -37,7 +37,7 @@ Construct a `Doc` object. The most common way to get a `Doc` object is via the
 | `words`                                  | A list of strings or integer hash values to add to the document as words. ~~Optional[List[Union[str,int]]]~~                                                                                            |
 | `spaces`                                 | A list of boolean values indicating whether each word has a subsequent space. Must have the same length as `words`, if specified. Defaults to a sequence of `True`. ~~Optional[List[bool]]~~            |
 | _keyword-only_                           |                                                                                                                                                                                                         |
-| `user\_data`                             | Optional extra data to attach to the Doc. ~~Dict~~                                                                                                                                                      |
+| `user_data`                              | Optional extra data to attach to the Doc. ~~Dict~~                                                                                                                                                      |
 | `tags` <Tag variant="new">3</Tag>        | A list of strings, of the same length as `words`, to assign as `token.tag` for each word. Defaults to `None`. ~~Optional[List[str]]~~                                                                   |
 | `pos` <Tag variant="new">3</Tag>         | A list of strings, of the same length as `words`, to assign as `token.pos` for each word. Defaults to `None`. ~~Optional[List[str]]~~                                                                   |
 | `morphs` <Tag variant="new">3</Tag>      | A list of strings, of the same length as `words`, to assign as `token.morph` for each word. Defaults to `None`. ~~Optional[List[str]]~~                                                                 |
@@ -209,15 +209,16 @@ alignment mode `"strict".
 > assert span.text == "New York"
 > ```
 
-| Name             | Description                                                                                                                                                                                                                                                                  |
-| ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `start`          | The index of the first character of the span. ~~int~~                                                                                                                                                                                                                        |
-| `end`            | The index of the last character after the span. ~~int~~                                                                                                                                                                                                                      |
-| `label`          | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  |
-| `kb_id`          | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    |
-| `vector`         | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               |
-| `alignment_mode` | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
-| **RETURNS**      | The newly constructed object or `None`. ~~Optional[Span]~~                                                                                                                                                                                                                   |
+| Name                                     | Description                                                                                                                                                                                                                                                                  |
+| ---------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `start`                                  | The index of the first character of the span. ~~int~~                                                                                                                                                                                                                        |
+| `end`                                    | The index of the last character after the span. ~~int~~                                                                                                                                                                                                                      |
+| `label`                                  | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  |
+| `kb_id`                                  | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    |
+| `vector`                                 | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               |
+| `alignment_mode`                         | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
+| `span_id` <Tag variant="new">3.3.1</Tag> | An identifier to associate with the span. ~~Union[int, str]~~                                                                                                                                                                                                                |
+| **RETURNS**                              | The newly constructed object or `None`. ~~Optional[Span]~~                                                                                                                                                                                                                   |
 
 ## Doc.set_ents {id="set_ents",tag="method",version="3"}
 
diff --git a/website/docs/api/entitylinker.mdx b/website/docs/api/entitylinker.mdx
index 238b62a2e..12b2f6bef 100644
--- a/website/docs/api/entitylinker.mdx
+++ b/website/docs/api/entitylinker.mdx
@@ -63,7 +63,7 @@ architectures and their arguments and hyperparameters.
 | `entity_vector_length`                          | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~                                                                                                                                                                                                                               |
 | `use_gold_ents`                                 | Whether to copy entities from the gold docs or not. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~int~~                                                                                                        |
 | `get_candidates`                                | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~                    |
-| `overwrite` <Tag variant="new">3.2</Tag>        | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                                                    |
+| `overwrite` <Tag variant="new">3.2</Tag>        | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                                                   |
 | `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~                                                                                                                                                                                     |
 | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"ents"` and `"scores"`. ~~Union[bool, list[str]]~~                                                                                                                                                                        |
 | `threshold` <Tag variant="new">3.4</Tag>        | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |
diff --git a/website/docs/api/morphologizer.mdx b/website/docs/api/morphologizer.mdx
index 4660ec312..9514bc773 100644
--- a/website/docs/api/morphologizer.mdx
+++ b/website/docs/api/morphologizer.mdx
@@ -45,7 +45,7 @@ architectures and their arguments and hyperparameters.
 | Setting                                         | Description                                                                                                                                                                                                                                                            |
 | ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `model`                                         | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                |
-| `overwrite` <Tag variant="new">3.2</Tag>        | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                  |
+| `overwrite` <Tag variant="new">3.2</Tag>        | Whether the values of existing features are overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                 |
 | `extend` <Tag variant="new">3.2</Tag>           | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~                                                                                                                      |
 | `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
 | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~                                                                                                                                       |
diff --git a/website/docs/api/span.mdx b/website/docs/api/span.mdx
index 878bb30c3..e62d9c724 100644
--- a/website/docs/api/span.mdx
+++ b/website/docs/api/span.mdx
@@ -186,14 +186,17 @@ the character indices don't map to a valid span.
 > assert span.text == "New York"
 > ```
 
-| Name        | Description                                                                               |
-| ----------- | ----------------------------------------------------------------------------------------- |
-| `start`     | The index of the first character of the span. ~~int~~                                     |
-| `end`       | The index of the last character after the span. ~~int~~                                   |
-| `label`     | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~               |
-| `kb_id`     | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~ |
-| `vector`    | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~            |
-| **RETURNS** | The newly constructed object or `None`. ~~Optional[Span]~~                                |
+| Name                                            | Description                                                                                                                                                                                                                                                                  |
+| ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `start`                                         | The index of the first character of the span. ~~int~~                                                                                                                                                                                                                        |
+| `end`                                           | The index of the last character after the span. ~~int~~                                                                                                                                                                                                                      |
+| `label`                                         | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  |
+| `kb_id`                                         | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    |
+| `vector`                                        | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               |
+| `id`                                            | Unused. ~~Union[int, str]~~                                                                                                                                                                                                                                                  |
+| `alignment_mode` <Tag variant="new">3.5.1</Tag> | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
+| `span_id` <Tag variant="new">3.5.1</Tag>        | An identifier to associate with the span. ~~Union[int, str]~~                                                                                                                                                                                                                |
+| **RETURNS**                                     | The newly constructed object or `None`. ~~Optional[Span]~~                                                                                                                                                                                                                   |
 
 ## Span.similarity {id="similarity",tag="method",model="vectors"}
 
diff --git a/website/docs/models/index.mdx b/website/docs/models/index.mdx
index 371e4460f..366d44f0e 100644
--- a/website/docs/models/index.mdx
+++ b/website/docs/models/index.mdx
@@ -21,8 +21,8 @@ menu:
 ## Package naming conventions {id="conventions"}
 
 In general, spaCy expects all pipeline packages to follow the naming convention
-of `[lang]\_[name]`. For spaCy's pipelines, we also chose to divide the name
-into three components:
+of `[lang]_[name]`. For spaCy's pipelines, we also chose to divide the name into
+three components:
 
 1. **Type:** Capabilities (e.g. `core` for general-purpose pipeline with
    tagging, parsing, lemmatization and named entity recognition, or `dep` for
diff --git a/website/docs/usage/v3-5.mdx b/website/docs/usage/v3-5.mdx
index ac61338e3..3ca64f8a2 100644
--- a/website/docs/usage/v3-5.mdx
+++ b/website/docs/usage/v3-5.mdx
@@ -155,6 +155,21 @@ An error is now raised when unsupported values are given as input to train a
 `textcat` or `textcat_multilabel` model - ensure that values are `0.0` or `1.0`
 as explained in the [docs](/api/textcategorizer#assigned-attributes).
 
+### Using the default knowledge base
+
+As `KnowledgeBase` is now an abstract class, you should call the constructor of
+the new `InMemoryLookupKB` instead when you want to use spaCy's default KB
+implementation:
+
+```diff
+- kb = KnowledgeBase()
++ kb = InMemoryLookupKB()
+```
+
+If you've written a custom KB that inherits from `KnowledgeBase`, you'll need to
+implement its abstract methods, or alternatively inherit from `InMemoryLookupKB`
+instead.
+
 ### Updated scorers for tokenization and textcat {id="scores"}
 
 We fixed a bug that inflated the `token_acc` scores in v3.0-v3.4. The reported