Merge pull request #12494 from adrianeboyd/backport/v3.5.2-1

Backports for v3.5.2
2025-07-26 16:09:47 +03:00 · 2023-04-06 16:18:59 +02:00 · 2023-04-06 16:18:59 +02:00 · e4bbdf7b50
commit e4bbdf7b50
parent 8153bd573f f66d55fe5b
40 changed files with 604 additions and 240 deletions
--- a/.github/azure-steps.yml
+++ b/.github/azure-steps.yml
@ -57,51 +57,51 @@ steps:
      python -m spacy download ca_core_news_md
      python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
    displayName: 'Test download CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')

  - script: |
      python -W error -m spacy info ca_core_news_sm | grep -q download_url
    displayName: 'Test download_url in info CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')

  - script: |
      python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
    displayName: 'Test no warnings on load (#11713)'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')

  - script: |
      python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
    displayName: 'Test convert CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')

  - script: |
      python -m spacy init config -p ner -l ca ner.cfg
      python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
    displayName: 'Test debug config CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')

  - script: |
      # will have errors due to sparse data, check for summary in output
      python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
    displayName: 'Test debug data CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')

  - script: |
      python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
    displayName: 'Test train CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')

  - script: |
      python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
      PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
    displayName: 'Test assemble CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')

  - script: |
      python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
      python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
    displayName: 'Test assemble CLI vectors warning'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')

  - script: |
      python -m pip install -U -r requirements.txt
@ -116,9 +116,3 @@ steps:
      python -m pytest --pyargs spacy
    displayName: "Run CPU tests with thinc-apple-ops"
    condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.11'))
-
-  - script: |
-      python .github/validate_universe_json.py website/meta/universe.json
-    displayName: 'Test website/meta/universe.json'
-    condition: eq(variables['python_version'], '3.8')
-
--- a/.github/workflows/autoblack.yml
+++ b/.github/workflows/autoblack.yml
@ -1,45 +0,0 @@
-# GitHub Action that uses Black to reformat all Python code and submits a PR
-# in regular intervals. Inspired by: https://github.com/cclauss/autoblack
-
-name: autoblack
-on:
-  workflow_dispatch:  # allow manual trigger
-  schedule:
-    - cron: '0 8 * * 5'  # every Friday at 8am UTC
-
-jobs:
-  autoblack:
-    if: github.repository_owner == 'explosion'
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-        with:
-            ref: ${{ github.head_ref }}
-      - uses: actions/setup-python@v4
-      - run: pip install black -c requirements.txt
-      - name: Auto-format code if needed
-        run: black spacy
-      # We can't run black --check here because that returns a non-zero excit
-      # code and makes GitHub think the action failed
-      - name: Check for modified files
-        id: git-check
-        run: echo modified=$(if git diff-index --quiet HEAD --; then echo "false"; else echo "true"; fi) >> $GITHUB_OUTPUT
-
-      - name: Create Pull Request
-        if: steps.git-check.outputs.modified == 'true'
-        uses: peter-evans/create-pull-request@v4
-        with:
-            title: Auto-format code with black
-            labels: meta
-            commit-message: Auto-format code with black
-            committer: GitHub <noreply@github.com>
-            author: explosion-bot <explosion-bot@users.noreply.github.com>
-            body: _This PR is auto-generated._
-            branch: autoblack
-            delete-branch: true
-            draft: false
-      - name: Check outputs
-        if: steps.git-check.outputs.modified == 'true'
-        run: |
-          echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}"
-          echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}"
--- a/.github/workflows/explosionbot.yml
+++ b/.github/workflows/explosionbot.yml
@ -8,6 +8,7 @@ on:

 jobs:
  explosion-bot:
+    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
      - name: Dump GitHub context
--- a/.github/workflows/issue-manager.yml
+++ b/.github/workflows/issue-manager.yml
@ -13,6 +13,7 @@ on:

 jobs:
  issue-manager:
+    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
      - uses: tiangolo/issue-manager@0.4.0
--- a/.github/workflows/lock.yml
+++ b/.github/workflows/lock.yml
@ -13,6 +13,7 @@ concurrency:

 jobs:
  action:
+    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
      - uses: dessant/lock-threads@v4
--- a/.github/workflows/spacy_universe_alert.yml
+++ b/.github/workflows/spacy_universe_alert.yml
@ -7,6 +7,7 @@ on:

 jobs:
  build:
+    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest

    steps:
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -0,0 +1,173 @@
+name: tests
+
+on:
+  push:
+    branches-ignore:
+      - "spacy.io"
+      - "nightly.spacy.io"
+      - "v2.spacy.io"
+    paths-ignore:
+      - "*.md"
+      - "*.mdx"
+      - "website/**"
+      - ".github/workflows/**"
+  pull_request:
+    types: [opened, synchronize, reopened, edited]
+    paths-ignore:
+      - "*.md"
+      - "*.mdx"
+      - "website/**"
+
+jobs:
+  validate:
+    name: Validate
+    if: github.repository_owner == 'explosion'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v3
+
+      - name: Configure Python version
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.7"
+          architecture: x64
+
+      - name: black
+        run: |
+          python -m pip install black -c requirements.txt
+          python -m black spacy --check
+      - name: flake8
+        run: |
+          python -m pip install flake8==5.0.4
+          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
+  tests:
+    name: Test
+    needs: Validate
+    strategy:
+      fail-fast: true
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python_version: ["3.11"]
+        include:
+          - os: ubuntu-20.04
+            python_version: "3.6"
+          - os: windows-latest
+            python_version: "3.7"
+          - os: macos-latest
+            python_version: "3.8"
+          - os: ubuntu-latest
+            python_version: "3.9"
+          - os: windows-latest
+            python_version: "3.10"
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v3
+
+      - name: Configure Python version
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python_version }}
+          architecture: x64
+
+      - name: Install dependencies
+        run: |
+          python -m pip install -U build pip setuptools
+          python -m pip install -U -r requirements.txt
+
+      - name: Build sdist
+        run: |
+          python -m build --sdist
+
+      - name: Run mypy
+        run: |
+          python -m mypy spacy
+        if: matrix.python_version != '3.6'
+
+      - name: Delete source directory and .egg-info
+        run: |
+          rm -rf spacy *.egg-info
+        shell: bash
+
+      - name: Uninstall all packages
+        run: |
+          python -m pip freeze
+          python -m pip freeze --exclude pywin32 > installed.txt
+          python -m pip uninstall -y -r installed.txt
+
+      - name: Install from sdist
+        run: |
+          SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
+          SPACY_NUM_BUILD_JOBS=2 python -m pip install dist/$SDIST
+        shell: bash
+
+      - name: Test import
+        run: python -W error -c "import spacy"
+
+      - name: "Test download CLI"
+        run: |
+          python -m spacy download ca_core_news_sm
+          python -m spacy download ca_core_news_md
+          python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
+        if: matrix.python_version == '3.9'
+
+      - name: "Test download_url in info CLI"
+        run: |
+          python -W error -m spacy info ca_core_news_sm | grep -q download_url
+        if: matrix.python_version == '3.9'
+
+      - name: "Test no warnings on load (#11713)"
+        run: |
+          python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
+        if: matrix.python_version == '3.9'
+
+      - name: "Test convert CLI"
+        run: |
+          python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
+        if: matrix.python_version == '3.9'
+
+      - name: "Test debug config CLI"
+        run: |
+          python -m spacy init config -p ner -l ca ner.cfg
+          python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
+        if: matrix.python_version == '3.9'
+
+      - name: "Test debug data CLI"
+        run: |
+          # will have errors due to sparse data, check for summary in output
+          python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
+        if: matrix.python_version == '3.9'
+
+      - name: "Test train CLI"
+        run: |
+          python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
+        if: matrix.python_version == '3.9'
+
+      - name: "Test assemble CLI"
+        run: |
+          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
+          PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
+        if: matrix.python_version == '3.9'
+
+      - name: "Test assemble CLI vectors warning"
+        run: |
+          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
+          python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
+        if: matrix.python_version == '3.9'
+
+      - name: "Install test requirements"
+        run: |
+          python -m pip install -U -r requirements.txt
+
+      - name: "Run CPU tests"
+        run: |
+          python -m pytest --pyargs spacy -W error
+
+      - name: "Run CPU tests with thinc-apple-ops"
+        run: |
+          python -m pip install 'spacy[apple]'
+          python -m pytest --pyargs spacy
+        if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.11'
--- a/.github/workflows/universe_validation.yml
+++ b/.github/workflows/universe_validation.yml
@ -0,0 +1,33 @@
+name: universe validation
+
+on:
+  push:
+    branches-ignore:
+      - "spacy.io"
+      - "nightly.spacy.io"
+      - "v2.spacy.io"
+    paths:
+      - "website/meta/universe.json"
+  pull_request:
+    types: [opened, synchronize, reopened, edited]
+    paths:
+      - "website/meta/universe.json"
+
+jobs:
+  validate:
+    name: Validate
+    if: github.repository_owner == 'explosion'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v3
+
+      - name: Configure Python version
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.7"
+          architecture: x64
+
+      - name: Validate website/meta/universe.json
+        run: |
+          python .github/validate_universe_json.py website/meta/universe.json
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@ -48,6 +48,9 @@ jobs:
          pip install flake8==5.0.4
          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
        displayName: "flake8"
+      - script: |
+          python .github/validate_universe_json.py website/meta/universe.json
+        displayName: 'Validate website/meta/universe.json'

  - job: "Test"
    dependsOn: "Validate"
--- a/setup.cfg
+++ b/setup.cfg
@ -78,41 +78,41 @@ transformers =
 ray =
    spacy_ray>=0.1.0,<1.0.0
 cuda =
-    cupy>=5.0.0b4,<12.0.0
+    cupy>=5.0.0b4,<13.0.0
 cuda80 =
-    cupy-cuda80>=5.0.0b4,<12.0.0
+    cupy-cuda80>=5.0.0b4,<13.0.0
 cuda90 =
-    cupy-cuda90>=5.0.0b4,<12.0.0
+    cupy-cuda90>=5.0.0b4,<13.0.0
 cuda91 =
-    cupy-cuda91>=5.0.0b4,<12.0.0
+    cupy-cuda91>=5.0.0b4,<13.0.0
 cuda92 =
-    cupy-cuda92>=5.0.0b4,<12.0.0
+    cupy-cuda92>=5.0.0b4,<13.0.0
 cuda100 =
-    cupy-cuda100>=5.0.0b4,<12.0.0
+    cupy-cuda100>=5.0.0b4,<13.0.0
 cuda101 =
-    cupy-cuda101>=5.0.0b4,<12.0.0
+    cupy-cuda101>=5.0.0b4,<13.0.0
 cuda102 =
-    cupy-cuda102>=5.0.0b4,<12.0.0
+    cupy-cuda102>=5.0.0b4,<13.0.0
 cuda110 =
-    cupy-cuda110>=5.0.0b4,<12.0.0
+    cupy-cuda110>=5.0.0b4,<13.0.0
 cuda111 =
-    cupy-cuda111>=5.0.0b4,<12.0.0
+    cupy-cuda111>=5.0.0b4,<13.0.0
 cuda112 =
-    cupy-cuda112>=5.0.0b4,<12.0.0
+    cupy-cuda112>=5.0.0b4,<13.0.0
 cuda113 =
-    cupy-cuda113>=5.0.0b4,<12.0.0
+    cupy-cuda113>=5.0.0b4,<13.0.0
 cuda114 =
-    cupy-cuda114>=5.0.0b4,<12.0.0
+    cupy-cuda114>=5.0.0b4,<13.0.0
 cuda115 =
-    cupy-cuda115>=5.0.0b4,<12.0.0
+    cupy-cuda115>=5.0.0b4,<13.0.0
 cuda116 =
-    cupy-cuda116>=5.0.0b4,<12.0.0
+    cupy-cuda116>=5.0.0b4,<13.0.0
 cuda117 =
-    cupy-cuda117>=5.0.0b4,<12.0.0
+    cupy-cuda117>=5.0.0b4,<13.0.0
 cuda11x =
-    cupy-cuda11x>=11.0.0,<12.0.0
+    cupy-cuda11x>=11.0.0,<13.0.0
 cuda-autodetect =
-    cupy-wheel>=11.0.0,<12.0.0
+    cupy-wheel>=11.0.0,<13.0.0
 apple =
    thinc-apple-ops>=0.1.0.dev0,<1.0.0
 # Language tokenizers with external dependencies
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@ -35,7 +35,7 @@ def find_threshold_cli(
    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
    use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
    gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"),
-    verbose: bool = Opt(False, "--silent", "-V", "-VV", help="Display more information for debugging purposes"),
+    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
    # fmt: on
 ):
    """
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@ -23,6 +23,7 @@ def pretrain_cli(
    resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
    epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+    skip_last: bool = Opt(False, "--skip-last", "-L", help="Skip saving model-last.bin"),
    # fmt: on
 ):
    """
@ -74,6 +75,7 @@ def pretrain_cli(
        epoch_resume=epoch_resume,
        use_gpu=use_gpu,
        silent=False,
+        skip_last=skip_last,
    )
    msg.good("Successfully finished pretrain")

--- a/spacy/displacy/init.py
+++ b/spacy/displacy/init.py
@ -125,13 +125,17 @@ def app(environ, start_response):
    return [res]


-def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
+def parse_deps(
+    orig_doc: Union[Doc, Span], options: Dict[str, Any] = {}
+) -> Dict[str, Any]:
    """Generate dependency parse in {'words': [], 'arcs': []} format.

-    orig_doc (Doc): Document to parse.
+    orig_doc (Union[Doc, Span]): Document to parse.
    options (Dict[str, Any]): Dependency parse specific visualisation options.
    RETURNS (dict): Generated dependency parse keyed by words and arcs.
    """
+    if isinstance(orig_doc, Span):
+        orig_doc = orig_doc.as_doc()
    doc = Doc(orig_doc.vocab).from_bytes(
        orig_doc.to_bytes(exclude=["user_data", "user_hooks"])
    )
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -549,8 +549,8 @@ class Errors(metaclass=ErrorsWithCodes):
            "during training, make sure to include it in 'annotating components'")

    # New errors added in v3.x
-    E850 = ("The PretrainVectors objective currently only supports default "
-            "vectors, not {mode} vectors.")
+    E850 = ("The PretrainVectors objective currently only supports default or "
+            "floret vectors, not {mode} vectors.")
    E851 = ("The 'textcat' component labels should only have values of 0 or 1, "
            "but found value of '{val}'.")
    E852 = ("The tar file pulled from the remote attempted an unsafe path "
--- a/spacy/ml/models/multi_task.py
+++ b/spacy/ml/models/multi_task.py
@ -1,5 +1,5 @@
 from typing import Any, Optional, Iterable, Tuple, List, Callable, TYPE_CHECKING, cast
-from thinc.types import Floats2d
+from thinc.types import Floats2d, Ints1d
 from thinc.api import chain, Maxout, LayerNorm, Softmax, Linear, zero_init, Model
 from thinc.api import MultiSoftmax, list2array
 from thinc.api import to_categorical, CosineDistance, L2Distance
@ -7,7 +7,7 @@ from thinc.loss import Loss

 from ...util import registry, OOV_RANK
 from ...errors import Errors
-from ...attrs import ID
+from ...attrs import ID, ORTH
 from ...vectors import Mode as VectorsMode

 import numpy
@ -24,8 +24,6 @@ def create_pretrain_vectors(
    maxout_pieces: int, hidden_size: int, loss: str
 ) -> Callable[["Vocab", Model], Model]:
    def create_vectors_objective(vocab: "Vocab", tok2vec: Model) -> Model:
-        if vocab.vectors.mode != VectorsMode.default:
-            raise ValueError(Errors.E850.format(mode=vocab.vectors.mode))
        if vocab.vectors.shape[1] == 0:
            raise ValueError(Errors.E875)
        model = build_cloze_multi_task_model(
@ -70,14 +68,23 @@ def get_vectors_loss(ops, docs, prediction, distance):
    """Compute a loss based on a distance between the documents' vectors and
    the prediction.
    """
-    # The simplest way to implement this would be to vstack the
-    # token.vector values, but that's a bit inefficient, especially on GPU.
-    # Instead we fetch the index into the vectors table for each of our tokens,
-    # and look them up all at once. This prevents data copying.
-    ids = ops.flatten([doc.to_array(ID).ravel() for doc in docs])
-    target = docs[0].vocab.vectors.data[ids]
-    target[ids == OOV_RANK] = 0
-    d_target, loss = distance(prediction, target)
+    vocab = docs[0].vocab
+    if vocab.vectors.mode == VectorsMode.default:
+        # The simplest way to implement this would be to vstack the
+        # token.vector values, but that's a bit inefficient, especially on GPU.
+        # Instead we fetch the index into the vectors table for each of our
+        # tokens, and look them up all at once. This prevents data copying.
+        ids = ops.flatten([doc.to_array(ID).ravel() for doc in docs])
+        target = docs[0].vocab.vectors.data[ids]
+        target[ids == OOV_RANK] = 0
+        d_target, loss = distance(prediction, target)
+    elif vocab.vectors.mode == VectorsMode.floret:
+        keys = ops.flatten([cast(Ints1d, doc.to_array(ORTH)) for doc in docs])
+        target = vocab.vectors.get_batch(keys)
+        target = ops.as_contig(target)
+        d_target, loss = distance(prediction, target)
+    else:
+        raise ValueError(Errors.E850.format(mode=vocab.vectors.mode))
    return loss, d_target


--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@ -474,18 +474,24 @@ class EntityLinker(TrainablePipe):

                # Looping through each entity in batch (TODO: rewrite)
                for j, ent in enumerate(ent_batch):
-                    sent_index = sentences.index(ent.sent)
-                    assert sent_index >= 0
+                    assert hasattr(ent, "sents")
+                    sents = list(ent.sents)
+                    sent_indices = (
+                        sentences.index(sents[0]),
+                        sentences.index(sents[-1]),
+                    )
+                    assert sent_indices[1] >= sent_indices[0] >= 0

                    if self.incl_context:
                        # get n_neighbour sentences, clipped to the length of the document
-                        start_sentence = max(0, sent_index - self.n_sents)
+                        start_sentence = max(0, sent_indices[0] - self.n_sents)
                        end_sentence = min(
-                            len(sentences) - 1, sent_index + self.n_sents
+                            len(sentences) - 1, sent_indices[1] + self.n_sents
                        )
                        start_token = sentences[start_sentence].start
                        end_token = sentences[end_sentence].end
                        sent_doc = doc[start_token:end_token].as_doc()
+
                        # currently, the context is the same for each entity in a sentence (should be refined)
                        sentence_encoding = self.model.predict([sent_doc])[0]
                        sentence_encoding_t = sentence_encoding.T
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@ -1,5 +1,6 @@
 from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast, Union
 from dataclasses import dataclass
+from functools import partial
 from thinc.api import Config, Model, get_current_ops, set_dropout_rate, Ops
 from thinc.api import Optimizer
 from thinc.types import Ragged, Ints2d, Floats2d
@ -82,39 +83,42 @@ class Suggester(Protocol):
        ...


+def ngram_suggester(
+    docs: Iterable[Doc], sizes: List[int], *, ops: Optional[Ops] = None
+) -> Ragged:
+    if ops is None:
+        ops = get_current_ops()
+    spans = []
+    lengths = []
+    for doc in docs:
+        starts = ops.xp.arange(len(doc), dtype="i")
+        starts = starts.reshape((-1, 1))
+        length = 0
+        for size in sizes:
+            if size <= len(doc):
+                starts_size = starts[: len(doc) - (size - 1)]
+                spans.append(ops.xp.hstack((starts_size, starts_size + size)))
+                length += spans[-1].shape[0]
+            if spans:
+                assert spans[-1].ndim == 2, spans[-1].shape
+        lengths.append(length)
+    lengths_array = ops.asarray1i(lengths)
+    if len(spans) > 0:
+        output = Ragged(ops.xp.vstack(spans), lengths_array)
+    else:
+        output = Ragged(ops.xp.zeros((0, 0), dtype="i"), lengths_array)
+
+    assert output.dataXd.ndim == 2
+    return output
+
+
@registry.misc("spacy.ngram_suggester.v1")
 def build_ngram_suggester(sizes: List[int]) -> Suggester:
    """Suggest all spans of the given lengths. Spans are returned as a ragged
    array of integers. The array has two columns, indicating the start and end
    position."""

-    def ngram_suggester(docs: Iterable[Doc], *, ops: Optional[Ops] = None) -> Ragged:
-        if ops is None:
-            ops = get_current_ops()
-        spans = []
-        lengths = []
-        for doc in docs:
-            starts = ops.xp.arange(len(doc), dtype="i")
-            starts = starts.reshape((-1, 1))
-            length = 0
-            for size in sizes:
-                if size <= len(doc):
-                    starts_size = starts[: len(doc) - (size - 1)]
-                    spans.append(ops.xp.hstack((starts_size, starts_size + size)))
-                    length += spans[-1].shape[0]
-                if spans:
-                    assert spans[-1].ndim == 2, spans[-1].shape
-            lengths.append(length)
-        lengths_array = ops.asarray1i(lengths)
-        if len(spans) > 0:
-            output = Ragged(ops.xp.vstack(spans), lengths_array)
-        else:
-            output = Ragged(ops.xp.zeros((0, 0), dtype="i"), lengths_array)
-
-        assert output.dataXd.ndim == 2
-        return output
-
-    return ngram_suggester
+    return partial(ngram_suggester, sizes=sizes)


@registry.misc("spacy.ngram_range_suggester.v1")
@ -726,6 +730,7 @@ class SpanCategorizer(TrainablePipe):
        if not allow_overlap:
            # Get the probabilities
            sort_idx = (argmax_scores.squeeze() * -1).argsort()
+            argmax_scores = argmax_scores[sort_idx]
            predicted = predicted[sort_idx]
            indices = indices[sort_idx]
            keeps = keeps[sort_idx]
@ -748,4 +753,5 @@ class SpanCategorizer(TrainablePipe):
            attrs_scores.append(argmax_scores[i])
            spans.append(Span(doc, start, end, label=self.labels[label]))

+        spans.attrs["scores"] = numpy.array(attrs_scores)
        return spans
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@ -700,3 +700,34 @@ def test_span_group_copy(doc):
    assert len(doc.spans["test"]) == 3
    # check that the copy spans were not modified and this is an isolated doc
    assert len(doc_copy.spans["test"]) == 2
+
+
+def test_for_partial_ent_sents():
+    """Spans may be associated with multiple sentences. These .sents should always be complete, not partial, sentences,
+    which this tests for.
+    """
+    doc = Doc(
+        English().vocab,
+        words=["Mahler's", "Symphony", "No.", "8", "was", "beautiful."],
+        sent_starts=[1, 0, 0, 1, 0, 0],
+    )
+    doc.set_ents([Span(doc, 1, 4, "WORK")])
+    # The specified entity is associated with both sentences in this doc, so we expect all sentences in the doc to be
+    # equal to the sentences referenced in ent.sents.
+    for doc_sent, ent_sent in zip(doc.sents, doc.ents[0].sents):
+        assert doc_sent == ent_sent
+
+
+def test_for_no_ent_sents():
+    """Span.sents() should set .sents correctly, even if Span in question is trailing and doesn't form a full
+    sentence.
+    """
+    doc = Doc(
+        English().vocab,
+        words=["This", "is", "a", "test.", "ENTITY"],
+        sent_starts=[1, 0, 0, 0, 1],
+    )
+    doc.set_ents([Span(doc, 4, 5, "WORK")])
+    sents = list(doc.ents[0].sents)
+    assert len(sents) == 1
+    assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY"
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@ -1,9 +1,9 @@
-from typing import Callable, Iterable, Dict, Any
+from typing import Callable, Iterable, Dict, Any, Tuple

 import pytest
 from numpy.testing import assert_equal

-from spacy import registry, util
+from spacy import registry, util, Language
 from spacy.attrs import ENT_KB_ID
 from spacy.compat import pickle
 from spacy.kb import Candidate, InMemoryLookupKB, get_candidates, KnowledgeBase
@ -108,18 +108,23 @@ def test_issue7065():


@pytest.mark.issue(7065)
-def test_issue7065_b():
+@pytest.mark.parametrize("entity_in_first_sentence", [True, False])
+def test_sentence_crossing_ents(entity_in_first_sentence: bool):
+    """Tests if NEL crashes if entities cross sentence boundaries and the first associated sentence doesn't have an
+    entity.
+    entity_in_prior_sentence (bool): Whether to include an entity in the first sentence associated with the
+    sentence-crossing entity.
+    """
    # Test that the NEL doesn't crash when an entity crosses a sentence boundary
    nlp = English()
    vector_length = 3
-    nlp.add_pipe("sentencizer")
    text = "Mahler 's Symphony No. 8 was beautiful."
-    entities = [(0, 6, "PERSON"), (10, 24, "WORK")]
-    links = {
-        (0, 6): {"Q7304": 1.0, "Q270853": 0.0},
-        (10, 24): {"Q7304": 0.0, "Q270853": 1.0},
-    }
-    sent_starts = [1, -1, 0, 0, 0, 0, 0, 0, 0]
+    entities = [(10, 24, "WORK")]
+    links = {(10, 24): {"Q7304": 0.0, "Q270853": 1.0}}
+    if entity_in_first_sentence:
+        entities.append((0, 6, "PERSON"))
+        links[(0, 6)] = {"Q7304": 1.0, "Q270853": 0.0}
+    sent_starts = [1, -1, 0, 0, 0, 1, 0, 0, 0]
    doc = nlp(text)
    example = Example.from_dict(
        doc, {"entities": entities, "links": links, "sent_starts": sent_starts}
@ -145,31 +150,14 @@ def test_issue7065_b():

    # Create the Entity Linker component and add it to the pipeline
    entity_linker = nlp.add_pipe("entity_linker", last=True)
-    entity_linker.set_kb(create_kb)
+    entity_linker.set_kb(create_kb)  # type: ignore
    # train the NEL pipe
    optimizer = nlp.initialize(get_examples=lambda: train_examples)
    for i in range(2):
-        losses = {}
-        nlp.update(train_examples, sgd=optimizer, losses=losses)
+        nlp.update(train_examples, sgd=optimizer)

-    # Add a custom rule-based component to mimick NER
-    patterns = [
-        {"label": "PERSON", "pattern": [{"LOWER": "mahler"}]},
-        {
-            "label": "WORK",
-            "pattern": [
-                {"LOWER": "symphony"},
-                {"LOWER": "no"},
-                {"LOWER": "."},
-                {"LOWER": "8"},
-            ],
-        },
-    ]
-    ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
-    ruler.add_patterns(patterns)
-    # test the trained model - this should not throw E148
-    doc = nlp(text)
-    assert doc
+    # This shouldn't crash.
+    entity_linker.predict([example.reference])  # type: ignore


 def test_no_entities():
--- a/spacy/tests/pipeline/test_spancat.py
+++ b/spacy/tests/pipeline/test_spancat.py
@ -1,7 +1,7 @@
 import pytest
 import numpy
 from numpy.testing import assert_array_equal, assert_almost_equal
-from thinc.api import get_current_ops, Ragged
+from thinc.api import get_current_ops, NumpyOps, Ragged

 from spacy import util
 from spacy.lang.en import English
@ -190,17 +190,19 @@ def test_make_spangroup_singlelabel(threshold, allow_overlap, nr_results):
    spangroup = spancat._make_span_group_singlelabel(
        doc, indices, scores, allow_overlap
    )
-    assert len(spangroup) == nr_results
    if threshold > 0.4:
        if allow_overlap:
            assert spangroup[0].text == "London"
            assert spangroup[0].label_ == "City"
+            assert_almost_equal(0.6, spangroup.attrs["scores"][0], 5)
            assert spangroup[1].text == "Greater London"
            assert spangroup[1].label_ == "GreatCity"
-
+            assert spangroup.attrs["scores"][1] == 0.9
+            assert_almost_equal(0.9, spangroup.attrs["scores"][1], 5)
        else:
            assert spangroup[0].text == "Greater London"
            assert spangroup[0].label_ == "GreatCity"
+            assert spangroup.attrs["scores"][0] == 0.9
    else:
        if allow_overlap:
            assert spangroup[0].text == "Greater"
@ -256,22 +258,32 @@ def test_make_spangroup_negative_label():
    assert len(spangroup_single) == 2
    assert spangroup_single[0].text == "Greater"
    assert spangroup_single[0].label_ == "City"
+    assert_almost_equal(0.4, spangroup_single.attrs["scores"][0], 5)
    assert spangroup_single[1].text == "Greater London"
    assert spangroup_single[1].label_ == "GreatCity"
+    assert spangroup_single.attrs["scores"][1] == 0.9
+    assert_almost_equal(0.9, spangroup_single.attrs["scores"][1], 5)

    assert len(spangroup_multi) == 6
    assert spangroup_multi[0].text == "Greater"
    assert spangroup_multi[0].label_ == "City"
+    assert_almost_equal(0.4, spangroup_multi.attrs["scores"][0], 5)
    assert spangroup_multi[1].text == "Greater"
    assert spangroup_multi[1].label_ == "Person"
+    assert_almost_equal(0.3, spangroup_multi.attrs["scores"][1], 5)
    assert spangroup_multi[2].text == "London"
    assert spangroup_multi[2].label_ == "City"
+    assert_almost_equal(0.6, spangroup_multi.attrs["scores"][2], 5)
    assert spangroup_multi[3].text == "London"
    assert spangroup_multi[3].label_ == "GreatCity"
+    assert_almost_equal(0.4, spangroup_multi.attrs["scores"][3], 5)
    assert spangroup_multi[4].text == "Greater London"
    assert spangroup_multi[4].label_ == "Thing"
+    assert spangroup_multi[4].text == "Greater London"
+    assert_almost_equal(0.8, spangroup_multi.attrs["scores"][4], 5)
    assert spangroup_multi[5].text == "Greater London"
    assert spangroup_multi[5].label_ == "GreatCity"
+    assert_almost_equal(0.9, spangroup_multi.attrs["scores"][5], 5)


 def test_ngram_suggester(en_tokenizer):
@ -565,3 +577,21 @@ def test_set_candidates(name):
    assert len(docs[0].spans["candidates"]) == 9
    assert docs[0].spans["candidates"][0].text == "Just"
    assert docs[0].spans["candidates"][4].text == "Just a"
+
+
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+@pytest.mark.parametrize("n_process", [1, 2])
+def test_spancat_multiprocessing(name, n_process):
+    if isinstance(get_current_ops, NumpyOps) or n_process < 2:
+        nlp = Language()
+        spancat = nlp.add_pipe(name, config={"spans_key": SPAN_KEY})
+        train_examples = make_examples(nlp)
+        nlp.initialize(get_examples=lambda: train_examples)
+        texts = [
+            "Just a sentence.",
+            "I like London and Berlin",
+            "I like Berlin",
+            "I eat ham.",
+        ]
+        docs = list(nlp.pipe(texts, n_process=n_process))
+        assert len(docs) == len(texts)
--- a/spacy/tests/serialize/test_serialize_doc.py
+++ b/spacy/tests/serialize/test_serialize_doc.py
@ -213,6 +213,13 @@ def test_serialize_doc_exclude(en_vocab):

 def test_serialize_doc_span_groups(en_vocab):
    doc = Doc(en_vocab, words=["hello", "world", "!"])
-    doc.spans["content"] = [doc[0:2]]
+    span = doc[0:2]
+    span.label_ = "test_serialize_doc_span_groups_label"
+    span.id_ = "test_serialize_doc_span_groups_id"
+    span.kb_id_ = "test_serialize_doc_span_groups_kb_id"
+    doc.spans["content"] = [span]
    new_doc = Doc(en_vocab).from_bytes(doc.to_bytes())
    assert len(new_doc.spans["content"]) == 1
+    assert new_doc.spans["content"][0].label_ == "test_serialize_doc_span_groups_label"
+    assert new_doc.spans["content"][0].id_ == "test_serialize_doc_span_groups_id"
+    assert new_doc.spans["content"][0].kb_id_ == "test_serialize_doc_span_groups_kb_id"
--- a/spacy/tests/serialize/test_serialize_docbin.py
+++ b/spacy/tests/serialize/test_serialize_docbin.py
@ -49,7 +49,11 @@ def test_serialize_doc_bin():
    nlp = English()
    for doc in nlp.pipe(texts):
        doc.cats = cats
-        doc.spans["start"] = [doc[0:2]]
+        span = doc[0:2]
+        span.label_ = "UNUSUAL_SPAN_LABEL"
+        span.id_ = "UNUSUAL_SPAN_ID"
+        span.kb_id_ = "UNUSUAL_SPAN_KB_ID"
+        doc.spans["start"] = [span]
        doc[0].norm_ = "UNUSUAL_TOKEN_NORM"
        doc[0].ent_id_ = "UNUSUAL_TOKEN_ENT_ID"
        doc_bin.add(doc)
@ -63,6 +67,9 @@ def test_serialize_doc_bin():
        assert doc.text == texts[i]
        assert doc.cats == cats
        assert len(doc.spans) == 1
+        assert doc.spans["start"][0].label_ == "UNUSUAL_SPAN_LABEL"
+        assert doc.spans["start"][0].id_ == "UNUSUAL_SPAN_ID"
+        assert doc.spans["start"][0].kb_id_ == "UNUSUAL_SPAN_KB_ID"
        assert doc[0].norm_ == "UNUSUAL_TOKEN_NORM"
        assert doc[0].ent_id_ == "UNUSUAL_TOKEN_ENT_ID"

--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@ -275,6 +275,20 @@ def test_displacy_parse_deps(en_vocab):
        {"start": 2, "end": 3, "label": "det", "dir": "left"},
        {"start": 1, "end": 3, "label": "attr", "dir": "right"},
    ]
+    # Test that displacy.parse_deps converts Span to Doc
+    deps = displacy.parse_deps(doc[:])
+    assert isinstance(deps, dict)
+    assert deps["words"] == [
+        {"lemma": None, "text": words[0], "tag": pos[0]},
+        {"lemma": None, "text": words[1], "tag": pos[1]},
+        {"lemma": None, "text": words[2], "tag": pos[2]},
+        {"lemma": None, "text": words[3], "tag": pos[3]},
+    ]
+    assert deps["arcs"] == [
+        {"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
+        {"start": 2, "end": 3, "label": "det", "dir": "left"},
+        {"start": 1, "end": 3, "label": "attr", "dir": "right"},
+    ]


 def test_displacy_invalid_arcs():
--- a/spacy/tests/training/test_pretraining.py
+++ b/spacy/tests/training/test_pretraining.py
@ -165,7 +165,8 @@ def test_pretraining_default():


@pytest.mark.parametrize("objective", CHAR_OBJECTIVES)
-def test_pretraining_tok2vec_characters(objective):
+@pytest.mark.parametrize("skip_last", (True, False))
+def test_pretraining_tok2vec_characters(objective, skip_last):
    """Test that pretraining works with the character objective"""
    config = Config().from_str(pretrain_string_listener)
    config["pretraining"]["objective"] = objective
@ -178,10 +179,14 @@ def test_pretraining_tok2vec_characters(objective):
        filled["paths"]["raw_text"] = file_path
        filled = filled.interpolate()
        assert filled["pretraining"]["component"] == "tok2vec"
-        pretrain(filled, tmp_dir)
+        pretrain(filled, tmp_dir, skip_last=skip_last)
        assert Path(tmp_dir / "model0.bin").exists()
        assert Path(tmp_dir / "model4.bin").exists()
        assert not Path(tmp_dir / "model5.bin").exists()
+        if skip_last:
+            assert not Path(tmp_dir / "model-last.bin").exists()
+        else:
+            assert Path(tmp_dir / "model-last.bin").exists()


@pytest.mark.parametrize("objective", VECTOR_OBJECTIVES)
@ -237,6 +242,7 @@ def test_pretraining_tagger_tok2vec(config):
        pretrain(filled, tmp_dir)
        assert Path(tmp_dir / "model0.bin").exists()
        assert Path(tmp_dir / "model4.bin").exists()
+        assert Path(tmp_dir / "model-last.bin").exists()
        assert not Path(tmp_dir / "model5.bin").exists()


@ -359,19 +365,15 @@ def test_pretrain_default_vectors():
    nlp.vocab.vectors = Vectors(shape=(10, 10))
    create_pretrain_vectors(1, 1, "cosine")(nlp.vocab, nlp.get_pipe("tok2vec").model)

+    # floret vectors are supported
+    nlp.vocab.vectors = Vectors(
+        data=get_current_ops().xp.zeros((10, 10)), mode="floret", hash_count=1
+    )
+    create_pretrain_vectors(1, 1, "cosine")(nlp.vocab, nlp.get_pipe("tok2vec").model)
+
    # error for no vectors
    with pytest.raises(ValueError, match="E875"):
        nlp.vocab.vectors = Vectors()
        create_pretrain_vectors(1, 1, "cosine")(
            nlp.vocab, nlp.get_pipe("tok2vec").model
        )
-
-    # error for floret vectors
-    with pytest.raises(ValueError, match="E850"):
-        ops = get_current_ops()
-        nlp.vocab.vectors = Vectors(
-            data=ops.xp.zeros((10, 10)), mode="floret", hash_count=1
-        )
-        create_pretrain_vectors(1, 1, "cosine")(
-            nlp.vocab, nlp.get_pipe("tok2vec").model
-        )
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@ -124,6 +124,10 @@ class DocBin:
        for key, group in doc.spans.items():
            for span in group:
                self.strings.add(span.label_)
+                if span.kb_id in span.doc.vocab.strings:
+                    self.strings.add(span.kb_id_)
+                if span.id in span.doc.vocab.strings:
+                    self.strings.add(span.id_)

    def get_docs(self, vocab: Vocab) -> Iterator[Doc]:
        """Recover Doc objects from the annotations, using the given vocab.
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -544,10 +544,6 @@ cdef class Doc:

        DOCS: https://spacy.io/api/doc#char_span
        """
-        if not isinstance(label, int):
-            label = self.vocab.strings.add(label)
-        if not isinstance(kb_id, int):
-            kb_id = self.vocab.strings.add(kb_id)
        alignment_modes = ("strict", "contract", "expand")
        if alignment_mode not in alignment_modes:
            raise ValueError(
@ -1350,6 +1346,10 @@ cdef class Doc:
        for group in self.spans.values():
            for span in group:
                strings.add(span.label_)
+                if span.kb_id in span.doc.vocab.strings:
+                    strings.add(span.kb_id_)
+                if span.id in span.doc.vocab.strings:
+                    strings.add(span.id_)
        # Msgpack doesn't distinguish between lists and tuples, which is
        # vexing for user data. As a best guess, we *know* that within
        # keys, we must have tuples. In values we just have to hope
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@ -460,9 +460,12 @@ cdef class Span:
                    start = i
                    if start >= self.end:
                        break
-            if start < self.end:
-                yield Span(self.doc, start, self.end)
+                elif i == self.doc.length - 1:
+                    yield Span(self.doc, start, self.doc.length)

+            # Ensure that trailing parts of the Span instance are included in last element of .sents.
+            if start == self.doc.length - 1:
+                yield Span(self.doc, start, self.doc.length)

    @property
    def ents(self):
--- a/spacy/training/pretrain.py
+++ b/spacy/training/pretrain.py
@ -24,6 +24,7 @@ def pretrain(
    epoch_resume: Optional[int] = None,
    use_gpu: int = -1,
    silent: bool = True,
+    skip_last: bool = False,
 ):
    msg = Printer(no_print=silent)
    if config["training"]["seed"] is not None:
@ -60,10 +61,14 @@ def pretrain(
    row_settings = {"widths": (3, 10, 10, 6, 4), "aligns": ("r", "r", "r", "r", "r")}
    msg.row(("#", "# Words", "Total Loss", "Loss", "w/s"), **row_settings)

-    def _save_model(epoch, is_temp=False):
+    def _save_model(epoch, is_temp=False, is_last=False):
        is_temp_str = ".temp" if is_temp else ""
        with model.use_params(optimizer.averages):
-            with (output_dir / f"model{epoch}{is_temp_str}.bin").open("wb") as file_:
+            if is_last:
+                save_path = output_dir / f"model-last.bin"
+            else:
+                save_path = output_dir / f"model{epoch}{is_temp_str}.bin"
+            with (save_path).open("wb") as file_:
                file_.write(model.get_ref("tok2vec").to_bytes())
            log = {
                "nr_word": tracker.nr_word,
@ -76,22 +81,26 @@ def pretrain(

    # TODO: I think we probably want this to look more like the
    # 'create_train_batches' function?
-    for epoch in range(epoch_resume, P["max_epochs"]):
-        for batch_id, batch in enumerate(batcher(corpus(nlp))):
-            docs = ensure_docs(batch)
-            loss = make_update(model, docs, optimizer, objective)
-            progress = tracker.update(epoch, loss, docs)
-            if progress:
-                msg.row(progress, **row_settings)
-            if P["n_save_every"] and (batch_id % P["n_save_every"] == 0):
-                _save_model(epoch, is_temp=True)
+    try:
+        for epoch in range(epoch_resume, P["max_epochs"]):
+            for batch_id, batch in enumerate(batcher(corpus(nlp))):
+                docs = ensure_docs(batch)
+                loss = make_update(model, docs, optimizer, objective)
+                progress = tracker.update(epoch, loss, docs)
+                if progress:
+                    msg.row(progress, **row_settings)
+                if P["n_save_every"] and (batch_id % P["n_save_every"] == 0):
+                    _save_model(epoch, is_temp=True)

-        if P["n_save_epoch"]:
-            if epoch % P["n_save_epoch"] == 0 or epoch == P["max_epochs"] - 1:
+            if P["n_save_epoch"]:
+                if epoch % P["n_save_epoch"] == 0 or epoch == P["max_epochs"] - 1:
+                    _save_model(epoch)
+            else:
                _save_model(epoch)
-        else:
-            _save_model(epoch)
-        tracker.epoch_loss = 0.0
+            tracker.epoch_loss = 0.0
+    finally:
+        if not skip_last:
+            _save_model(P["max_epochs"], is_last=True)


 def ensure_docs(examples_or_docs: Iterable[Union[Doc, Example]]) -> List[Doc]:
--- a/website/docs/api/cli.mdx
+++ b/website/docs/api/cli.mdx
@ -1122,17 +1122,18 @@ auto-generated by setting `--pretraining` on
 $ python -m spacy pretrain [config_path] [output_dir] [--code] [--resume-path] [--epoch-resume] [--gpu-id] [overrides]
 ```

-| Name                    | Description                                                                                                                                                                                                        |
-| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `config_path`           | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
-| `output_dir`            | Directory to save binary weights to on each epoch. ~~Path (positional)~~                                                                                                                                           |
-| `--code`, `-c`          | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                               |
-| `--resume-path`, `-r`   | Path to pretrained weights from which to resume pretraining. ~~Optional[Path] \(option)~~                                                                                                                          |
-| `--epoch-resume`, `-er` | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. ~~Optional[int] \(option)~~                                                                |
-| `--gpu-id`, `-g`        | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~                                                                                                                                                         |
-| `--help`, `-h`          | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                         |
-| overrides               | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--training.dropout 0.2`. ~~Any (option/flag)~~                              |
-| **CREATES**             | The pretrained weights that can be used to initialize `spacy train`.                                                                                                                                               |
+| Name                                               | Description                                                                                                                                                                                                        |
+| -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `config_path`                                      | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
+| `output_dir`                                       | Directory to save binary weights to on each epoch. ~~Path (positional)~~                                                                                                                                           |
+| `--code`, `-c`                                     | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~                               |
+| `--resume-path`, `-r`                              | Path to pretrained weights from which to resume pretraining. ~~Optional[Path] \(option)~~                                                                                                                          |
+| `--epoch-resume`, `-er`                            | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. ~~Optional[int] \(option)~~                                                                |
+| `--gpu-id`, `-g`                                   | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~                                                                                                                                                         |
+| `--skip-last`, `-L` <Tag variant="new">3.5.2</Tag> | Skip saving `model-last.bin`. Defaults to `False`. ~~bool (flag)~~                                                                                                                                                 |
+| `--help`, `-h`                                     | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                         |
+| overrides                                          | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--training.dropout 0.2`. ~~Any (option/flag)~~                              |
+| **CREATES**                                        | The pretrained weights that can be used to initialize `spacy train`.                                                                                                                                               |

 ## evaluate {id="evaluate",version="2",tag="command"}

@ -1254,19 +1255,19 @@ be provided.
 > $ python -m spacy find-threshold my_nlp data.spacy spancat threshold spans_sc_f
 > ```

-| Name                    | Description                                                                                                                                                                          |
-| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model`                 | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                           |
-| `data_path`             | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~                                                                                                |
-| `pipe_name`             | Name of pipe to examine thresholds for. ~~str (positional)~~                                                                                                                         |
-| `threshold_key`         | Key of threshold attribute in component's configuration. ~~str (positional)~~                                                                                                        |
-| `scores_key`            | Name of score to metric to optimize. ~~str (positional)~~                                                                                                                            |
-| `--n_trials`, `-n`      | Number of trials to determine optimal thresholds. ~~int (option)~~                                                                                                                   |
-| `--code`, `-c`          | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--gpu-id`, `-g`        | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
-| `--gold-preproc`, `-G`  | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                              |
-| `--silent`, `-V`, `-VV` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
-| `--help`, `-h`          | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
+| Name                     | Description                                                                                                                                                                          |
+| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model`                  | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                           |
+| `data_path`              | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~                                                                                                |
+| `pipe_name`              | Name of pipe to examine thresholds for. ~~str (positional)~~                                                                                                                         |
+| `threshold_key`          | Key of threshold attribute in component's configuration. ~~str (positional)~~                                                                                                        |
+| `scores_key`             | Name of score to metric to optimize. ~~str (positional)~~                                                                                                                            |
+| `--n_trials`, `-n`       | Number of trials to determine optimal thresholds. ~~int (option)~~                                                                                                                   |
+| `--code`, `-c`           | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--gpu-id`, `-g`         | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
+| `--gold-preproc`, `-G`   | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                              |
+| `--verbose`, `-V`, `-VV` | Display more information for debugging purposes. ~~bool (flag)~~                                                                                                                     |
+| `--help`, `-h`           | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |

 ## assemble {id="assemble",tag="command"}

--- a/website/docs/api/coref.mdx
+++ b/website/docs/api/coref.mdx
@ -64,7 +64,7 @@ details on the architectures and their arguments and hyperparameters.
 > config={
 >     "model": DEFAULT_COREF_MODEL,
 >     "span_cluster_prefix": DEFAULT_CLUSTER_PREFIX,
-> },
+> }
 > nlp.add_pipe("experimental_coref", config=config)
 > ```

--- a/website/docs/api/spancategorizer.mdx
+++ b/website/docs/api/spancategorizer.mdx
@ -20,8 +20,9 @@ output class probabilities are independent for each class. However, if you need
 to predict at most one true class for a span, then use `spancat_singlelabel`. It
 uses a `Softmax` layer and treats the task as a multi-class problem.

-Predicted spans will be saved in a [`SpanGroup`](/api/spangroup) on the doc.
-Individual span scores can be found in `spangroup.attrs["scores"]`.
+Predicted spans will be saved in a [`SpanGroup`](/api/spangroup) on the doc
+under `doc.spans[spans_key]`, where `spans_key` is a component config setting.
+Individual span scores are stored in `doc.spans[spans_key].attrs["scores"]`.

 ## Assigned Attributes {id="assigned-attributes"}

@ -29,7 +30,9 @@ Predictions will be saved to `Doc.spans[spans_key]` as a
 [`SpanGroup`](/api/spangroup). The scores for the spans in the `SpanGroup` will
 be saved in `SpanGroup.attrs["scores"]`.

-`spans_key` defaults to `"sc"`, but can be passed as a parameter.
+`spans_key` defaults to `"sc"`, but can be passed as a parameter. The `spancat`
+component will overwrite any existing spans under the spans key
+`doc.spans[spans_key]`.

 | Location                               | Value                                                    |
 | -------------------------------------- | -------------------------------------------------------- |
--- a/website/docs/api/stringstore.mdx
+++ b/website/docs/api/stringstore.mdx
@ -8,6 +8,13 @@ Look up strings by 64-bit hashes. As of v2.0, spaCy uses hash values instead of
 integer IDs. This ensures that strings always map to the same ID, even from
 different `StringStores`.

+<Infobox variant ="warning">
+
+Note that a `StringStore` instance is not static. It increases in size as texts
+with new tokens are processed.
+
+</Infobox>
+
 ## StringStore.\_\_init\_\_ {id="init",tag="method"}

 Create the `StringStore`.
--- a/website/docs/api/top-level.mdx
+++ b/website/docs/api/top-level.mdx
@ -25,7 +25,10 @@ and call the package's own `load()` method. If a pipeline is loaded from a path,
 spaCy will assume it's a data directory, load its
 [`config.cfg`](/api/data-formats#config) and use the language and pipeline
 information to construct the `Language` class. The data will be loaded in via
-[`Language.from_disk`](/api/language#from_disk).
+[`Language.from_disk`](/api/language#from_disk). Loading a pipeline from a
+package will also import any custom code, if present, whereas loading from a
+directory does not. For these cases, you need to manually import your custom
+code.

 <Infobox variant="warning" title="Changed in v3.0">

@ -291,7 +294,7 @@ the `manual=True` argument in `displacy.render`.

 | Name        | Description                                                         |
 | ----------- | ------------------------------------------------------------------- |
-| `orig_doc`  | Doc to parse dependencies. ~~Doc~~                                  |
+| `orig_doc`  | Doc or span to parse dependencies. ~~Union[Doc, Span]~~             |
 | `options`   | Dependency parse specific visualisation options. ~~Dict[str, Any]~~ |
 | **RETURNS** | Generated dependency parse keyed by words and arcs. ~~dict~~        |

@ -577,7 +580,7 @@ start decreasing across epochs.
 > ```ini
 > [training.logger]
 > @loggers = "spacy.ConsoleLogger.v3"
-> progress_bar = "all_steps"
+> progress_bar = "eval"
 > console_output = true
 > output_file = "training_log.jsonl"
 > ```
--- a/website/docs/api/vocab.mdx
+++ b/website/docs/api/vocab.mdx
@ -10,6 +10,13 @@ The `Vocab` object provides a lookup table that allows you to access
 [`StringStore`](/api/stringstore). It also owns underlying C-data that is shared
 between `Doc` objects.

+<Infobox variant ="warning">
+
+Note that a `Vocab` instance is not static. It increases in size as texts with
+new tokens are processed.
+
+</Infobox>
+
 ## Vocab.\_\_init\_\_ {id="init",tag="method"}

 Create the vocabulary.
--- a/website/docs/usage/embeddings-transformers.mdx
+++ b/website/docs/usage/embeddings-transformers.mdx
@ -746,13 +746,16 @@ this by setting `initialize.init_tok2vec` to the filename of the `.bin` file
 that you want to use from pretraining.

 A pretraining step that runs for 5 epochs with an output path of `pretrain/`, as
-an example, produces `pretrain/model0.bin` through `pretrain/model4.bin`. To
-make use of the final output, you could fill in this value in your config file:
+an example, produces `pretrain/model0.bin` through `pretrain/model4.bin` plus a
+copy of the last iteration as `pretrain/model-last.bin`. Additionally, you can
+configure `n_save_epoch` to tell pretraining in which epoch interval it should
+save the current training progress. To use the final output to initialize your
+`tok2vec` layer, you could fill in this value in your config file:

 ```ini {title="config.cfg"}

 [paths]
-init_tok2vec = "pretrain/model4.bin"
+init_tok2vec = "pretrain/model-last.bin"

 [initialize]
 init_tok2vec = ${paths.init_tok2vec}
--- a/website/docs/usage/rule-based-matching.mdx
+++ b/website/docs/usage/rule-based-matching.mdx
@ -1682,6 +1682,8 @@ def expand_person_entities(doc):
            if prev_token.text in ("Dr", "Dr.", "Mr", "Mr.", "Ms", "Ms."):
                new_ent = Span(doc, ent.start - 1, ent.end, label=ent.label)
                new_ents.append(new_ent)
+            else:
+                new_ents.append(ent)
        else:
            new_ents.append(ent)
    doc.ents = new_ents
--- a/website/docs/usage/training.mdx
+++ b/website/docs/usage/training.mdx
@ -758,6 +758,15 @@ any custom architectures, functions or
 your pipeline and registered when it's loaded. See the documentation on
 [saving and loading pipelines](/usage/saving-loading#models-custom) for details.

+<Infobox variant="warning">
+
+Note that the unpackaged models produced by `spacy train` are data directories
+that **do not include custom code**. You need to import the code in your script
+before loading in unpackaged models. For more details, see
+[`spacy.load`](/api/top-level#spacy.load).
+
+</Infobox>
+
 #### Example: Modifying the nlp object {id="custom-code-nlp-callbacks"}

 For many use cases, you don't necessarily want to implement the whole `Language`
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@ -3215,6 +3215,51 @@
            "category": ["pipeline"],
            "tags": ["syllables", "multilingual"]
        },
+        {
+            "id": "sentimental-onix",
+            "title": "Sentimental Onix",
+            "slogan": "Use onnx for sentiment models",
+            "description": "spaCy pipeline component for sentiment analysis using onnx",
+            "github": "sloev/sentimental-onix",
+            "pip": "sentimental-onix",
+            "code_example": [
+                "# Download model:",
+                "#   python -m sentimental_onix download en",
+                "import spacy",
+                "from sentimental_onix import pipeline",
+                "",
+                "nlp = spacy.load(\"en_core_web_sm\")",
+                "nlp.add_pipe(\"sentencizer\")",
+                "nlp.add_pipe(\"sentimental_onix\", after=\"sentencizer\")",
+                "",
+                "sentences = [",
+                "    (sent.text, sent._.sentiment)",
+                "    for doc in nlp.pipe(",
+                "        [",
+                "            \"i hate pasta on tuesdays\",",
+                "            \"i like movies on wednesdays\",",
+                "            \"i find your argument ridiculous\",",
+                "            \"soda with straws are my favorite\",",
+                "        ]",
+                "    )",
+                "    for sent in doc.sents",
+                "]",
+                "",
+                "assert sentences == [",
+                "    (\"i hate pasta on tuesdays\", \"Negative\"),",
+                "    (\"i like movies on wednesdays\", \"Positive\"),",
+                "    (\"i find your argument ridiculous\", \"Negative\"),",
+                "    (\"soda with straws are my favorite\", \"Positive\"),",
+                "]"
+            ],
+            "thumb": "https://raw.githubusercontent.com/sloev/sentimental-onix/master/.github/onix.webp",
+            "author": "Johannes Valbjørn",
+            "author_links": {
+                "github": "sloev"
+            },
+            "category": ["pipeline"],
+            "tags": ["sentiment", "english"]
+        },
        {
            "id": "gobbli",
            "title": "gobbli",
--- a/website/src/styles/navigation.module.sass
+++ b/website/src/styles/navigation.module.sass
@ -111,11 +111,12 @@
    line-height: var(--line-height-xs)
    text-align: center

-@include breakpoint(max, xs)
-    .list
+@include breakpoint(max, md)
+    .alert
        display: none

-    .alert
+@include breakpoint(max, xs)
+    .list
        display: none

    .has-alert
--- a/website/src/templates/index.js
+++ b/website/src/templates/index.js
@ -25,11 +25,6 @@ const AlertSpace = ({ nightly, legacy }) => {
    const isOnline = useOnlineStatus()
    return (
        <>
-            {isOnline && (
-                <Alert title="💥 We'd love to learn more about your experience with spaCy!">
-                    <Link to="https://form.typeform.com/to/aMel9q9f">Take our survey here.</Link>
-                </Alert>
-            )}
            {nightly && (
                <Alert
                    title="You're viewing the pre-release docs."
@ -62,9 +57,15 @@ const AlertSpace = ({ nightly, legacy }) => {
    )
 }

+// const navAlert = (
+//     <Link to="/usage/v3-5" noLinkLayout>
+//         <strong>💥 Out now:</strong> spaCy v3.5
+//     </Link>
+// )
+
 const navAlert = (
-    <Link to="/usage/v3-5" noLinkLayout>
-        <strong>💥 Out now:</strong> spaCy v3.5
+    <Link to="https://form.typeform.com/to/aMel9q9f" noLinkLayout>
+        <strong>💥 Take the user survey!</strong>
    </Link>
 )