diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml
index 7c3c3e0a6..0b9b0731f 100644
--- a/.github/azure-steps.yml
+++ b/.github/azure-steps.yml
@@ -1,6 +1,6 @@
 parameters:
-  python_version: ''
-  architecture: 'x64'
+  python_version: ""
+  architecture: "x64"
   num_build_jobs: 2
 
 steps:
@@ -12,7 +12,7 @@ steps:
 
   - bash: |
       echo "##vso[task.setvariable variable=python_version]${{ parameters.python_version }}"
-    displayName: 'Set variables'
+    displayName: "Set variables"
 
   - script: |
       python -m pip install -U build pip setuptools
@@ -25,7 +25,7 @@ steps:
 
   - script: |
       python -m mypy spacy
-    displayName: 'Run mypy'
+    displayName: "Run mypy"
     condition: ne(variables['python_version'], '3.6')
 
   - task: DeleteFiles@1
@@ -52,56 +52,56 @@ steps:
       python -W error -c "import spacy"
     displayName: "Test import"
 
-#  - script: |
-#      python -m spacy download ca_core_news_sm
-#      python -m spacy download ca_core_news_md
-#      python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
-#    displayName: 'Test download CLI'
-#    condition: eq(variables['python_version'], '3.8')
-#
-#  - script: |
-#      python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
-#    displayName: 'Test no warnings on load (#11713)'
-#    condition: eq(variables['python_version'], '3.8')
-#
-#  - script: |
-#      python -m spacy download ca_core_news_sm 2>&1 | grep -q skipping
-#    displayName: 'Test skip re-download (#12188)'
-#    condition: eq(variables['python_version'], '3.8')
+  - script: |
+      python -m spacy download ca_core_news_sm
+      python -m spacy download ca_core_news_md
+      python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
+    displayName: "Test download CLI"
+    condition: eq(variables['python_version'], '3.9')
+
+  - script: |
+      python -W error -m spacy info ca_core_news_sm | grep -q download_url
+    displayName: "Test download_url in info CLI"
+    condition: eq(variables['python_version'], '3.9')
+
+  - script: |
+      python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
+    displayName: "Test no warnings on load (#11713)"
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
-    displayName: 'Test convert CLI'
-    condition: eq(variables['python_version'], '3.8')
+    displayName: "Test convert CLI"
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       python -m spacy init config -p ner -l ca ner.cfg
       python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
-    displayName: 'Test debug config CLI'
-    condition: eq(variables['python_version'], '3.8')
+    displayName: "Test debug config CLI"
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       # will have errors due to sparse data, check for summary in output
       python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
-    displayName: 'Test debug data CLI'
-    condition: eq(variables['python_version'], '3.8')
+    displayName: "Test debug data CLI"
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
-    displayName: 'Test train CLI'
-    condition: eq(variables['python_version'], '3.8')
+    displayName: "Test train CLI"
+    condition: eq(variables['python_version'], '3.9')
 
-#  - script: |
-#      python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
-#      PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
-#    displayName: 'Test assemble CLI'
-#    condition: eq(variables['python_version'], '3.8')
-#
-#  - script: |
-#      python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
-#      python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
-#    displayName: 'Test assemble CLI vectors warning'
-#    condition: eq(variables['python_version'], '3.8')
+  - script: |
+      python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
+      PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
+    displayName: "Test assemble CLI"
+    condition: eq(variables['python_version'], '3.9')
+
+  - script: |
+      python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
+      python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
+    displayName: "Test assemble CLI vectors warning"
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       python -m pip install -U -r requirements.txt
@@ -116,9 +116,3 @@ steps:
       python -m pytest --pyargs spacy
     displayName: "Run CPU tests with thinc-apple-ops"
     condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.11'))
-
-  - script: |
-      python .github/validate_universe_json.py website/meta/universe.json
-    displayName: 'Test website/meta/universe.json'
-    condition: eq(variables['python_version'], '3.8')
-
diff --git a/.github/workflows/autoblack.yml b/.github/workflows/autoblack.yml
deleted file mode 100644
index 555322782..000000000
--- a/.github/workflows/autoblack.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-# GitHub Action that uses Black to reformat all Python code and submits a PR
-# in regular intervals. Inspired by: https://github.com/cclauss/autoblack
-
-name: autoblack
-on:
-  workflow_dispatch:  # allow manual trigger
-  schedule:
-    - cron: '0 8 * * 5'  # every Friday at 8am UTC
-
-jobs:
-  autoblack:
-    if: github.repository_owner == 'explosion'
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-        with:
-            ref: ${{ github.head_ref }}
-      - uses: actions/setup-python@v4
-      - run: pip install black -c requirements.txt
-      - name: Auto-format code if needed
-        run: black spacy
-      # We can't run black --check here because that returns a non-zero excit
-      # code and makes GitHub think the action failed
-      - name: Check for modified files
-        id: git-check
-        run: echo modified=$(if git diff-index --quiet HEAD --; then echo "false"; else echo "true"; fi) >> $GITHUB_OUTPUT
-
-      - name: Create Pull Request
-        if: steps.git-check.outputs.modified == 'true'
-        uses: peter-evans/create-pull-request@v4
-        with:
-            title: Auto-format code with black
-            labels: meta
-            commit-message: Auto-format code with black
-            committer: GitHub <noreply@github.com>
-            author: explosion-bot <explosion-bot@users.noreply.github.com>
-            body: _This PR is auto-generated._
-            branch: autoblack
-            delete-branch: true
-            draft: false
-      - name: Check outputs
-        if: steps.git-check.outputs.modified == 'true'
-        run: |
-          echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}"
-          echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}"
diff --git a/.github/workflows/explosionbot.yml b/.github/workflows/explosionbot.yml
index 6b472cd12..910cfdc40 100644
--- a/.github/workflows/explosionbot.yml
+++ b/.github/workflows/explosionbot.yml
@@ -8,6 +8,7 @@ on:
 
 jobs:
   explosion-bot:
+    if: github.repository_owner == 'explosion'
     runs-on: ubuntu-latest
     steps:
       - name: Dump GitHub context
diff --git a/.github/workflows/issue-manager.yml b/.github/workflows/issue-manager.yml
index 8f3a151ea..6c7d7d5a6 100644
--- a/.github/workflows/issue-manager.yml
+++ b/.github/workflows/issue-manager.yml
@@ -13,6 +13,7 @@ on:
 
 jobs:
   issue-manager:
+    if: github.repository_owner == 'explosion'
     runs-on: ubuntu-latest
     steps:
       - uses: tiangolo/issue-manager@0.4.0
diff --git a/.github/workflows/lock.yml b/.github/workflows/lock.yml
index 794adee85..6c3985a93 100644
--- a/.github/workflows/lock.yml
+++ b/.github/workflows/lock.yml
@@ -13,6 +13,7 @@ concurrency:
 
 jobs:
   action:
+    if: github.repository_owner == 'explosion'
     runs-on: ubuntu-latest
     steps:
       - uses: dessant/lock-threads@v4
diff --git a/.github/workflows/spacy_universe_alert.yml b/.github/workflows/spacy_universe_alert.yml
index 837aaeb33..33851fbcc 100644
--- a/.github/workflows/spacy_universe_alert.yml
+++ b/.github/workflows/spacy_universe_alert.yml
@@ -7,6 +7,7 @@ on:
 
 jobs:
   build:
+    if: github.repository_owner == 'explosion'
     runs-on: ubuntu-latest
 
     steps:
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 000000000..f226057c9
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,173 @@
+name: tests
+
+on:
+  push:
+    branches-ignore:
+      - "spacy.io"
+      - "nightly.spacy.io"
+      - "v2.spacy.io"
+    paths-ignore:
+      - "*.md"
+      - "*.mdx"
+      - "website/**"
+      - ".github/workflows/**"
+  pull_request:
+    types: [opened, synchronize, reopened, edited]
+    paths-ignore:
+      - "*.md"
+      - "*.mdx"
+      - "website/**"
+
+jobs:
+  validate:
+    name: Validate
+    if: github.repository_owner == 'explosion'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v3
+
+      - name: Configure Python version
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.7"
+          architecture: x64
+
+      - name: black
+        run: |
+          python -m pip install black -c requirements.txt
+          python -m black spacy --check
+      - name: flake8
+        run: |
+          python -m pip install flake8==5.0.4
+          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
+  tests:
+    name: Test
+    needs: Validate
+    strategy:
+      fail-fast: true
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python_version: ["3.11"]
+        include:
+          - os: ubuntu-20.04
+            python_version: "3.6"
+          - os: windows-latest
+            python_version: "3.7"
+          - os: macos-latest
+            python_version: "3.8"
+          - os: ubuntu-latest
+            python_version: "3.9"
+          - os: windows-latest
+            python_version: "3.10"
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v3
+
+      - name: Configure Python version
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python_version }}
+          architecture: x64
+
+      - name: Install dependencies
+        run: |
+          python -m pip install -U build pip setuptools
+          python -m pip install -U -r requirements.txt
+
+      - name: Build sdist
+        run: |
+          python -m build --sdist
+
+      - name: Run mypy
+        run: |
+          python -m mypy spacy
+        if: matrix.python_version != '3.6'
+
+      - name: Delete source directory and .egg-info
+        run: |
+          rm -rf spacy *.egg-info
+        shell: bash
+
+      - name: Uninstall all packages
+        run: |
+          python -m pip freeze
+          python -m pip freeze --exclude pywin32 > installed.txt
+          python -m pip uninstall -y -r installed.txt
+
+      - name: Install from sdist
+        run: |
+          SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
+          SPACY_NUM_BUILD_JOBS=2 python -m pip install dist/$SDIST
+        shell: bash
+
+      - name: Test import
+        run: python -W error -c "import spacy"
+
+      - name: "Test download CLI"
+        run: |
+          python -m spacy download ca_core_news_sm
+          python -m spacy download ca_core_news_md
+          python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
+        if: matrix.python_version == '3.9'
+
+      - name: "Test download_url in info CLI"
+        run: |
+          python -W error -m spacy info ca_core_news_sm | grep -q download_url
+        if: matrix.python_version == '3.9'
+
+      - name: "Test no warnings on load (#11713)"
+        run: |
+          python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
+        if: matrix.python_version == '3.9'
+
+      - name: "Test convert CLI"
+        run: |
+          python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
+        if: matrix.python_version == '3.9'
+
+      - name: "Test debug config CLI"
+        run: |
+          python -m spacy init config -p ner -l ca ner.cfg
+          python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
+        if: matrix.python_version == '3.9'
+
+      - name: "Test debug data CLI"
+        run: |
+          # will have errors due to sparse data, check for summary in output
+          python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
+        if: matrix.python_version == '3.9'
+
+      - name: "Test train CLI"
+        run: |
+          python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
+        if: matrix.python_version == '3.9'
+
+      - name: "Test assemble CLI"
+        run: |
+          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
+          PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
+        if: matrix.python_version == '3.9'
+
+      - name: "Test assemble CLI vectors warning"
+        run: |
+          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
+          python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
+        if: matrix.python_version == '3.9'
+
+      - name: "Install test requirements"
+        run: |
+          python -m pip install -U -r requirements.txt
+
+      - name: "Run CPU tests"
+        run: |
+          python -m pytest --pyargs spacy -W error
+
+      - name: "Run CPU tests with thinc-apple-ops"
+        run: |
+          python -m pip install 'spacy[apple]'
+          python -m pytest --pyargs spacy
+        if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.11'
diff --git a/.github/workflows/universe_validation.yml b/.github/workflows/universe_validation.yml
new file mode 100644
index 000000000..a1e3253a9
--- /dev/null
+++ b/.github/workflows/universe_validation.yml
@@ -0,0 +1,33 @@
+name: universe validation
+
+on:
+  push:
+    branches-ignore:
+      - "spacy.io"
+      - "nightly.spacy.io"
+      - "v2.spacy.io"
+    paths:
+      - "website/meta/universe.json"
+  pull_request:
+    types: [opened, synchronize, reopened, edited]
+    paths:
+      - "website/meta/universe.json"
+
+jobs:
+  validate:
+    name: Validate
+    if: github.repository_owner == 'explosion'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v3
+
+      - name: Configure Python version
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.7"
+          architecture: x64
+
+      - name: Validate website/meta/universe.json
+        run: |
+          python .github/validate_universe_json.py website/meta/universe.json
diff --git a/README.md b/README.md
index bf8083e0e..3216a8890 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,9 @@ production-ready [**training system**](https://spacy.io/usage/training) and easy
 model packaging, deployment and workflow management. spaCy is commercial
 open-source software, released under the [MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).
 
+💥 **We'd love to hear more about your experience with spaCy!**
+[Fill out our survey here.](https://form.typeform.com/to/aMel9q9f)
+
 💫 **Version 3.5 out now!**
 [Check out the release notes here.](https://github.com/explosion/spaCy/releases)
 
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 9b7ebbe01..9ac5fcf1b 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -48,6 +48,9 @@ jobs:
           pip install flake8==5.0.4
           python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
         displayName: "flake8"
+      - script: |
+          python .github/validate_universe_json.py website/meta/universe.json
+        displayName: 'Validate website/meta/universe.json'
 
   - job: "Test"
     dependsOn: "Validate"
diff --git a/pyproject.toml b/pyproject.toml
index 837cf1fd8..9cd96ac2d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ requires = [
     "cymem>=2.0.2,<2.1.0",
     "preshed>=3.0.2,<3.1.0",
     "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=9.0.0.dev2,<9.1.0",
+    "thinc>=8.1.8,<8.2.0",
     "numpy>=1.15.0",
 ]
 build-backend = "setuptools.build_meta"
diff --git a/requirements.txt b/requirements.txt
index 6f4b61918..30a87c4cc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ spacy-legacy>=4.0.0.dev0,<4.1.0
 spacy-loggers>=1.0.0,<2.0.0
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=9.0.0.dev2,<9.1.0
+thinc>=8.1.8,<8.2.0
 ml_datasets>=0.2.0,<0.3.0
 murmurhash>=0.28.0,<1.1.0
 wasabi>=0.9.1,<1.2.0
diff --git a/setup.cfg b/setup.cfg
index 975ec03ce..859df09a9 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -29,7 +29,15 @@ project_urls =
 [options]
 zip_safe = false
 include_package_data = true
-python_requires = >=3.8
+python_requires = >=3.6
+setup_requires =
+    cython>=0.25,<3.0
+    numpy>=1.15.0
+    # We also need our Cython packages here to compile against
+    cymem>=2.0.2,<2.1.0
+    preshed>=3.0.2,<3.1.0
+    murmurhash>=0.28.0,<1.1.0
+    thinc>=8.1.8,<8.2.0
 install_requires =
     # Our libraries
     spacy-legacy>=4.0.0.dev0,<4.1.0
@@ -37,7 +45,7 @@ install_requires =
     murmurhash>=0.28.0,<1.1.0
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
-    thinc>=9.0.0.dev2,<9.1.0
+    thinc>=8.1.8,<8.2.0
     wasabi>=0.9.1,<1.2.0
     srsly>=2.4.3,<3.0.0
     catalogue>=2.0.6,<2.1.0
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 1c242cec8..61d563cb9 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -7,6 +7,7 @@ import srsly
 from wasabi import Printer, MESSAGES, msg
 import typer
 import math
+import numpy
 
 from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
 from ._util import import_code, debug_cli, _format_number
@@ -520,9 +521,13 @@ def debug_data(
 
     if "tagger" in factory_names:
         msg.divider("Part-of-speech Tagging")
-        label_list = [label for label in gold_train_data["tags"]]
-        model_labels = _get_labels_from_model(nlp, "tagger")
+        label_list, counts = zip(*gold_train_data["tags"].items())
         msg.info(f"{len(label_list)} label(s) in train data")
+        p = numpy.array(counts)
+        p = p / p.sum()
+        norm_entropy = (-p * numpy.log2(p)).sum() / numpy.log2(len(label_list))
+        msg.info(f"{norm_entropy} is the normalised label entropy")
+        model_labels = _get_labels_from_model(nlp, "tagger")
         labels = set(label_list)
         missing_labels = model_labels - labels
         if missing_labels:
diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py
index efa664832..6d591053d 100644
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@@ -35,7 +35,7 @@ def find_threshold_cli(
     code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
     use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
     gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"),
-    verbose: bool = Opt(False, "--silent", "-V", "-VV", help="Display more information for debugging purposes"),
+    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index 974bc0f4e..d82bf3fbc 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -1,6 +1,5 @@
 from typing import Optional, Dict, Any, Union, List
 import platform
-import pkg_resources
 import json
 from pathlib import Path
 from wasabi import Printer, MarkdownRenderer
@@ -10,6 +9,7 @@ from ._util import app, Arg, Opt, string_to_list
 from .download import get_model_filename, get_latest_version
 from .. import util
 from .. import about
+from ..compat import importlib_metadata
 
 
 @app.command("info")
@@ -137,15 +137,14 @@ def info_installed_model_url(model: str) -> Optional[str]:
     dist-info available.
     """
     try:
-        dist = pkg_resources.get_distribution(model)
-        data = json.loads(dist.get_metadata("direct_url.json"))
-        return data["url"]
-    except pkg_resources.DistributionNotFound:
-        # no such package
-        return None
+        dist = importlib_metadata.distribution(model)
+        text = dist.read_text("direct_url.json")
+        if isinstance(text, str):
+            data = json.loads(text)
+            return data["url"]
     except Exception:
-        # something else, like no file or invalid JSON
-        return None
+        pass
+    return None
 
 
 def info_model_url(model: str) -> Dict[str, Any]:
diff --git a/spacy/cli/project/run.py b/spacy/cli/project/run.py
index 6dd174902..0f4858a99 100644
--- a/spacy/cli/project/run.py
+++ b/spacy/cli/project/run.py
@@ -2,7 +2,6 @@ from typing import Optional, List, Dict, Sequence, Any, Iterable, Tuple
 import os.path
 from pathlib import Path
 
-import pkg_resources
 from wasabi import msg
 from wasabi.util import locale_escape
 import sys
@@ -331,6 +330,7 @@ def _check_requirements(requirements: List[str]) -> Tuple[bool, bool]:
     RETURNS (Tuple[bool, bool]): Whether (1) any packages couldn't be imported, (2) any packages with version conflicts
         exist.
     """
+    import pkg_resources
 
     failed_pkgs_msgs: List[str] = []
     conflicting_pkgs_msgs: List[str] = []
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index eb48d1de5..09df6a05b 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -3,7 +3,7 @@ the docs and the init config command. It encodes various best practices and
 can help generate the best possible configuration, given a user's requirements. #}
 {%- set use_transformer = hardware != "cpu" and transformer_data -%}
 {%- set transformer = transformer_data[optimize] if use_transformer else {} -%}
-{%- set listener_components = ["tagger", "morphologizer", "parser", "ner", "textcat", "textcat_multilabel", "entity_linker", "spancat", "trainable_lemmatizer"] -%}
+{%- set listener_components = ["tagger", "morphologizer", "parser", "ner", "textcat", "textcat_multilabel", "entity_linker", "spancat", "spancat_singlelabel", "trainable_lemmatizer"] -%}
 [paths]
 train = null
 dev = null
@@ -24,8 +24,11 @@ gpu_allocator = null
 lang = "{{ lang }}"
 {%- set has_textcat = ("textcat" in components or "textcat_multilabel" in components) -%}
 {%- set with_accuracy = optimize == "accuracy" -%}
-{%- set has_accurate_textcat = has_textcat and with_accuracy -%}
-{%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "spancat" in components or "trainable_lemmatizer" in components or "entity_linker" in components or has_accurate_textcat) -%}
+{# The BOW textcat doesn't need a source of features, so it can omit the
+tok2vec/transformer. #}
+{%- set with_accuracy_or_transformer = (use_transformer or with_accuracy) -%}
+{%- set textcat_needs_features = has_textcat and with_accuracy_or_transformer -%}
+{%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "spancat" in components or "spancat_singlelabel" in components or "trainable_lemmatizer" in components or "entity_linker" in components or textcat_needs_features) -%}
 {%- set full_pipeline = ["transformer" if use_transformer else "tok2vec"] + components -%}
 {%- else -%}
 {%- set full_pipeline = components -%}
@@ -154,6 +157,36 @@ grad_factor = 1.0
 sizes = [1,2,3]
 {% endif -%}
 
+{% if "spancat_singlelabel" in components %}
+[components.spancat_singlelabel]
+factory = "spancat_singlelabel"
+negative_weight = 1.0
+allow_overlap = true
+scorer = {"@scorers":"spacy.spancat_scorer.v1"}
+spans_key = "sc"
+
+[components.spancat_singlelabel.model]
+@architectures = "spacy.SpanCategorizer.v1"
+
+[components.spancat_singlelabel.model.reducer]
+@layers = "spacy.mean_max_reducer.v1"
+hidden_size = 128
+
+[components.spancat_singlelabel.model.scorer]
+@layers = "Softmax.v2"
+
+[components.spancat_singlelabel.model.tok2vec]
+@architectures = "spacy-transformers.TransformerListener.v1"
+grad_factor = 1.0
+
+[components.spancat_singlelabel.model.tok2vec.pooling]
+@layers = "reduce_mean.v1"
+
+[components.spancat_singlelabel.suggester]
+@misc = "spacy.ngram_suggester.v1"
+sizes = [1,2,3]
+{% endif %}
+
 {% if "trainable_lemmatizer" in components -%}
 [components.trainable_lemmatizer]
 factory = "trainable_lemmatizer"
@@ -219,10 +252,16 @@ no_output_layer = false
 
 {% else -%}
 [components.textcat.model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatCNN.v2"
 exclusive_classes = true
-ngram_size = 1
-no_output_layer = false
+nO = null
+
+[components.textcat.model.tok2vec]
+@architectures = "spacy-transformers.TransformerListener.v1"
+grad_factor = 1.0
+
+[components.textcat.model.tok2vec.pooling]
+@layers = "reduce_mean.v1"
 {%- endif %}
 {%- endif %}
 
@@ -250,10 +289,16 @@ no_output_layer = false
 
 {% else -%}
 [components.textcat_multilabel.model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatCNN.v2"
 exclusive_classes = false
-ngram_size = 1
-no_output_layer = false
+nO = null
+
+[components.textcat_multilabel.model.tok2vec]
+@architectures = "spacy-transformers.TransformerListener.v1"
+grad_factor = 1.0
+
+[components.textcat_multilabel.model.tok2vec.pooling]
+@layers = "reduce_mean.v1"
 {%- endif %}
 {%- endif %}
 
@@ -284,6 +329,7 @@ maxout_pieces = 3
 {% if "morphologizer" in components %}
 [components.morphologizer]
 factory = "morphologizer"
+label_smoothing = 0.05
 
 [components.morphologizer.model]
 @architectures = "spacy.Tagger.v2"
@@ -297,6 +343,7 @@ width = ${components.tok2vec.model.encode.width}
 {% if "tagger" in components %}
 [components.tagger]
 factory = "tagger"
+label_smoothing = 0.05
 
 [components.tagger.model]
 @architectures = "spacy.Tagger.v2"
@@ -370,6 +417,33 @@ width = ${components.tok2vec.model.encode.width}
 sizes = [1,2,3]
 {% endif %}
 
+{% if "spancat_singlelabel" in components %}
+[components.spancat_singlelabel]
+factory = "spancat_singlelabel"
+negative_weight = 1.0
+allow_overlap = true
+scorer = {"@scorers":"spacy.spancat_scorer.v1"}
+spans_key = "sc"
+
+[components.spancat_singlelabel.model]
+@architectures = "spacy.SpanCategorizer.v1"
+
+[components.spancat_singlelabel.model.reducer]
+@layers = "spacy.mean_max_reducer.v1"
+hidden_size = 128
+
+[components.spancat_singlelabel.model.scorer]
+@layers = "Softmax.v2"
+
+[components.spancat_singlelabel.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+
+[components.spancat_singlelabel.suggester]
+@misc = "spacy.ngram_suggester.v1"
+sizes = [1,2,3]
+{% endif %}
+
 {% if "trainable_lemmatizer" in components -%}
 [components.trainable_lemmatizer]
 factory = "trainable_lemmatizer"
diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index ea6bba2c9..f42dad0c9 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -125,13 +125,17 @@ def app(environ, start_response):
     return [res]
 
 
-def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
+def parse_deps(
+    orig_doc: Union[Doc, Span], options: Dict[str, Any] = {}
+) -> Dict[str, Any]:
     """Generate dependency parse in {'words': [], 'arcs': []} format.
 
-    orig_doc (Doc): Document to parse.
+    orig_doc (Union[Doc, Span]): Document to parse.
     options (Dict[str, Any]): Dependency parse specific visualisation options.
     RETURNS (dict): Generated dependency parse keyed by words and arcs.
     """
+    if isinstance(orig_doc, Span):
+        orig_doc = orig_doc.as_doc()
     doc = Doc(orig_doc.vocab).from_bytes(
         orig_doc.to_bytes(exclude=["user_data", "user_hooks"])
     )
diff --git a/spacy/errors.py b/spacy/errors.py
index 5049100d8..26774aac3 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -542,6 +542,8 @@ class Errors(metaclass=ErrorsWithCodes):
             "during training, make sure to include it in 'annotating components'")
 
     # New errors added in v3.x
+    E850 = ("The PretrainVectors objective currently only supports default or "
+            "floret vectors, not {mode} vectors.")
     E851 = ("The 'textcat' component labels should only have values of 0 or 1, "
             "but found value of '{val}'.")
     E852 = ("The tar file pulled from the remote attempted an unsafe path "
@@ -951,6 +953,7 @@ class Errors(metaclass=ErrorsWithCodes):
              "with `displacy.serve(doc, port=port)`")
     E1050 = ("Port {port} is already in use. Please specify an available port with `displacy.serve(doc, port=port)` "
              "or use `auto_select_port=True` to pick an available port automatically.")
+    E1051 = ("'allow_overlap' can only be False when max_positive is 1, but found 'max_positive': {max_positive}.")
 
     # v4 error strings
     E4000 = ("Expected a Doc as input, but got: '{type}'")
diff --git a/spacy/lang/sr/__init__.py b/spacy/lang/sr/__init__.py
index fd0c8c832..b99ce96ec 100644
--- a/spacy/lang/sr/__init__.py
+++ b/spacy/lang/sr/__init__.py
@@ -1,11 +1,14 @@
 from .stop_words import STOP_WORDS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
 from ...language import Language, BaseDefaults
 
 
 class SerbianDefaults(BaseDefaults):
     tokenizer_exceptions = TOKENIZER_EXCEPTIONS
+    infixes = TOKENIZER_INFIXES
+    suffixes = TOKENIZER_SUFFIXES
     lex_attr_getters = LEX_ATTRS
     stop_words = STOP_WORDS
 
diff --git a/spacy/lang/sr/punctuation.py b/spacy/lang/sr/punctuation.py
new file mode 100644
index 000000000..793a20ec2
--- /dev/null
+++ b/spacy/lang/sr/punctuation.py
@@ -0,0 +1,36 @@
+from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_PUNCT, LIST_QUOTES
+from ..char_classes import CURRENCY, UNITS, PUNCT
+from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
+
+
+_infixes = (
+    LIST_ELLIPSES
+    + LIST_ICONS
+    + [
+        r"(?<=[0-9])[+\-\*^](?=[0-9-])",
+        r"(?<=[{al}{q}])\.(?=[{au}{q}])".format(
+            al=ALPHA_LOWER, au=ALPHA_UPPER, q=CONCAT_QUOTES
+        ),
+        r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
+        r"(?<=[{a}0-9])[:<>=/](?=[{a}])".format(a=ALPHA),
+    ]
+)
+
+_suffixes = (
+    LIST_PUNCT
+    + LIST_ELLIPSES
+    + LIST_QUOTES
+    + LIST_ICONS
+    + [
+        r"(?<=[0-9])\+",
+        r"(?<=°[FfCcKk])\.",
+        r"(?<=[0-9])(?:{c})".format(c=CURRENCY),
+        r"(?<=[0-9])(?:{u})".format(u=UNITS),
+        r"(?<=[{a}{e}{p}(?:{q})])\.".format(
+            a=ALPHA, e=r"%²\-\+", q=CONCAT_QUOTES, p=PUNCT
+        ),
+    ]
+)
+
+TOKENIZER_INFIXES = _infixes
+TOKENIZER_SUFFIXES = _suffixes
diff --git a/spacy/lexeme.pyi b/spacy/lexeme.pyi
index 4942b18aa..a83be974e 100644
--- a/spacy/lexeme.pyi
+++ b/spacy/lexeme.pyi
@@ -24,7 +24,8 @@ class Lexeme:
     def orth_(self) -> str: ...
     @property
     def text(self) -> str: ...
-    lower: str
+    orth: int
+    lower: int
     norm: int
     shape: int
     prefix: int
diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index e57098f17..3d94542cf 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -186,7 +186,7 @@ cdef class Lexeme:
         return self.orth_
 
     property lower:
-        """RETURNS (str): Lowercase form of the lexeme."""
+        """RETURNS (uint64): Lowercase form of the lexeme."""
         def __get__(self):
             return self.c.lower
 
diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py
index a7d67c6dd..7eb13b608 100644
--- a/spacy/ml/models/multi_task.py
+++ b/spacy/ml/models/multi_task.py
@@ -1,5 +1,5 @@
 from typing import Any, Optional, Iterable, Tuple, List, Callable, TYPE_CHECKING, cast
-from thinc.types import Floats2d
+from thinc.types import Floats2d, Ints1d
 from thinc.api import chain, Maxout, LayerNorm, Softmax, Linear, zero_init, Model
 from thinc.api import MultiSoftmax, list2array
 from thinc.api import to_categorical, CosineDistance, L2Distance
@@ -7,7 +7,8 @@ from thinc.loss import Loss
 
 from ...util import registry, OOV_RANK
 from ...errors import Errors
-from ...attrs import ID
+from ...attrs import ID, ORTH
+from ...vectors import Mode as VectorsMode
 
 import numpy
 from functools import partial
@@ -67,14 +68,23 @@ def get_vectors_loss(ops, docs, prediction, distance):
     """Compute a loss based on a distance between the documents' vectors and
     the prediction.
     """
-    # The simplest way to implement this would be to vstack the
-    # token.vector values, but that's a bit inefficient, especially on GPU.
-    # Instead we fetch the index into the vectors table for each of our tokens,
-    # and look them up all at once. This prevents data copying.
-    ids = ops.flatten([doc.to_array(ID).ravel() for doc in docs])
-    target = docs[0].vocab.vectors.data[ids]
-    target[ids == OOV_RANK] = 0
-    d_target, loss = distance(prediction, target)
+    vocab = docs[0].vocab
+    if vocab.vectors.mode == VectorsMode.default:
+        # The simplest way to implement this would be to vstack the
+        # token.vector values, but that's a bit inefficient, especially on GPU.
+        # Instead we fetch the index into the vectors table for each of our
+        # tokens, and look them up all at once. This prevents data copying.
+        ids = ops.flatten([doc.to_array(ID).ravel() for doc in docs])
+        target = docs[0].vocab.vectors.data[ids]
+        target[ids == OOV_RANK] = 0
+        d_target, loss = distance(prediction, target)
+    elif vocab.vectors.mode == VectorsMode.floret:
+        keys = ops.flatten([cast(Ints1d, doc.to_array(ORTH)) for doc in docs])
+        target = vocab.vectors.get_batch(keys)
+        target = ops.as_contig(target)
+        d_target, loss = distance(prediction, target)
+    else:
+        raise ValueError(Errors.E850.format(mode=vocab.vectors.mode))
     return loss, d_target
 
 
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index cd13a4b21..c3e4e8957 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -502,18 +502,24 @@ class EntityLinker(TrainablePipe):
 
                 # Looping through each entity in batch (TODO: rewrite)
                 for j, ent in enumerate(ent_batch):
-                    sent_index = sentences.index(ent.sent)
-                    assert sent_index >= 0
+                    assert hasattr(ent, "sents")
+                    sents = list(ent.sents)
+                    sent_indices = (
+                        sentences.index(sents[0]),
+                        sentences.index(sents[-1]),
+                    )
+                    assert sent_indices[1] >= sent_indices[0] >= 0
 
                     if self.incl_context:
                         # get n_neighbour sentences, clipped to the length of the document
-                        start_sentence = max(0, sent_index - self.n_sents)
+                        start_sentence = max(0, sent_indices[0] - self.n_sents)
                         end_sentence = min(
-                            len(sentences) - 1, sent_index + self.n_sents
+                            len(sentences) - 1, sent_indices[1] + self.n_sents
                         )
                         start_token = sentences[start_sentence].start
                         end_token = sentences[end_sentence].end
                         sent_doc = doc[start_token:end_token].as_doc()
+
                         # currently, the context is the same for each entity in a sentence (should be refined)
                         sentence_encoding = self.model.predict([sent_doc])[0]
                         sentence_encoding_t = sentence_encoding.T
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index fabc51fee..3146c7907 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -50,13 +50,8 @@ DEFAULT_MORPH_MODEL = Config().from_str(default_model_config)["model"]
 @Language.factory(
     "morphologizer",
     assigns=["token.morph", "token.pos"],
-    default_config={
-        "model": DEFAULT_MORPH_MODEL,
-        "overwrite": True,
-        "extend": False,
-        "scorer": {"@scorers": "spacy.morphologizer_scorer.v1"},
-        "save_activations": False,
-    },
+    default_config={"model": DEFAULT_MORPH_MODEL, "overwrite": True, "extend": False,
+                    "scorer": {"@scorers": "spacy.morphologizer_scorer.v1"}, "label_smoothing": 0.0},
     default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5, "morph_per_feat": None},
 )
 def make_morphologizer(
@@ -65,11 +60,11 @@ def make_morphologizer(
     name: str,
     overwrite: bool,
     extend: bool,
+    label_smoothing: float,
     scorer: Optional[Callable],
     save_activations: bool,
 ):
-    return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, scorer=scorer,
-                         save_activations=save_activations)
+    return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, label_smoothing=label_smoothing, scorer=scorer)
 
 
 def morphologizer_score(examples, **kwargs):
@@ -98,8 +93,9 @@ class Morphologizer(Tagger):
         model: Model,
         name: str = "morphologizer",
         *,
-        overwrite: bool = False,
-        extend: bool = False,
+        overwrite: bool = BACKWARD_OVERWRITE,
+        extend: bool = BACKWARD_EXTEND,
+        label_smoothing: float = 0.0,
         scorer: Optional[Callable] = morphologizer_score,
         save_activations: bool = False,
     ):
@@ -131,6 +127,7 @@ class Morphologizer(Tagger):
             "labels_pos": {},
             "overwrite": overwrite,
             "extend": extend,
+            "label_smoothing": label_smoothing,
         }
         self.cfg = dict(sorted(cfg.items()))
         self.scorer = scorer
@@ -289,7 +286,8 @@ class Morphologizer(Tagger):
         DOCS: https://spacy.io/api/morphologizer#get_loss
         """
         validate_examples(examples, "Morphologizer.get_loss")
-        loss_func = LegacySequenceCategoricalCrossentropy(names=tuple(self.labels), normalize=False)
+        loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False,
+                                                    label_smoothing=self.cfg["label_smoothing"])
         truths = []
         for eg in examples:
             eg_truths = []
diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index 33e1c87dc..cca1a0bee 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -1,5 +1,5 @@
-from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast
-from typing import Union, Protocol, runtime_checkable
+from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast, Union
+from dataclasses import dataclass
 from thinc.api import Config, Model, get_current_ops, set_dropout_rate, Ops
 from thinc.api import Optimizer
 from thinc.types import Ragged, Ints2d, Floats2d
@@ -46,7 +46,36 @@ maxout_pieces = 3
 depth = 4
 """
 
+spancat_singlelabel_default_config = """
+[model]
+@architectures = "spacy.SpanCategorizer.v1"
+scorer = {"@layers": "Softmax.v2"}
+
+[model.reducer]
+@layers = spacy.mean_max_reducer.v1
+hidden_size = 128
+
+[model.tok2vec]
+@architectures = "spacy.Tok2Vec.v2"
+[model.tok2vec.embed]
+@architectures = "spacy.MultiHashEmbed.v1"
+width = 96
+rows = [5000, 1000, 2500, 1000]
+attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
+include_static_vectors = false
+
+[model.tok2vec.encode]
+@architectures = "spacy.MaxoutWindowEncoder.v2"
+width = ${model.tok2vec.embed.width}
+window_size = 1
+maxout_pieces = 3
+depth = 4
+"""
+
 DEFAULT_SPANCAT_MODEL = Config().from_str(spancat_default_config)["model"]
+DEFAULT_SPANCAT_SINGLELABEL_MODEL = Config().from_str(
+    spancat_singlelabel_default_config
+)["model"]
 
 
 @runtime_checkable
@@ -124,10 +153,14 @@ def make_spancat(
     max_positive: Optional[int],
     save_activations: bool,
 ) -> "SpanCategorizer":
-    """Create a SpanCategorizer component. The span categorizer consists of two
+    """Create a SpanCategorizer component and configure it for multi-label
+    classification to be able to assign multiple labels for each span.
+    The span categorizer consists of two
     parts: a suggester function that proposes candidate spans, and a labeller
     model that predicts one or more labels for each span.
 
+    name (str): The component instance name, used to add entries to the
+        losses during training.
     suggester (Callable[[Iterable[Doc], Optional[Ops]], Ragged]): A function that suggests spans.
         Spans are returned as a ragged array with two integer columns, for the
         start and end positions.
@@ -150,12 +183,80 @@ def make_spancat(
     """
     return SpanCategorizer(
         nlp.vocab,
-        suggester=suggester,
         model=model,
-        spans_key=spans_key,
-        threshold=threshold,
-        max_positive=max_positive,
+        suggester=suggester,
         name=name,
+        spans_key=spans_key,
+        negative_weight=None,
+        allow_overlap=True,
+        max_positive=max_positive,
+        threshold=threshold,
+        scorer=scorer,
+        add_negative_label=False,
+    )
+
+
+@Language.factory(
+    "spancat_singlelabel",
+    assigns=["doc.spans"],
+    default_config={
+        "spans_key": "sc",
+        "model": DEFAULT_SPANCAT_SINGLELABEL_MODEL,
+        "negative_weight": 1.0,
+        "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
+        "scorer": {"@scorers": "spacy.spancat_scorer.v1"},
+        "allow_overlap": True,
+    },
+    default_score_weights={"spans_sc_f": 1.0, "spans_sc_p": 0.0, "spans_sc_r": 0.0},
+)
+def make_spancat_singlelabel(
+    nlp: Language,
+    name: str,
+    suggester: Suggester,
+    model: Model[Tuple[List[Doc], Ragged], Floats2d],
+    spans_key: str,
+    negative_weight: float,
+    allow_overlap: bool,
+    scorer: Optional[Callable],
+) -> "SpanCategorizer":
+    """Create a SpanCategorizer component and configure it for multi-class
+    classification. With this configuration each span can get at most one
+    label. The span categorizer consists of two
+    parts: a suggester function that proposes candidate spans, and a labeller
+    model that predicts one or more labels for each span.
+
+    name (str): The component instance name, used to add entries to the
+        losses during training.
+    suggester (Callable[[Iterable[Doc], Optional[Ops]], Ragged]): A function that suggests spans.
+        Spans are returned as a ragged array with two integer columns, for the
+        start and end positions.
+    model (Model[Tuple[List[Doc], Ragged], Floats2d]): A model instance that
+        is given a list of documents and (start, end) indices representing
+        candidate span offsets. The model predicts a probability for each category
+        for each span.
+    spans_key (str): Key of the doc.spans dict to save the spans under. During
+        initialization and training, the component will look for spans on the
+        reference document under the same key.
+    scorer (Optional[Callable]): The scoring method. Defaults to
+        Scorer.score_spans for the Doc.spans[spans_key] with overlapping
+        spans allowed.
+    negative_weight (float): Multiplier for the loss terms.
+        Can be used to downweight the negative samples if there are too many.
+    allow_overlap (bool): If True the data is assumed to contain overlapping spans.
+        Otherwise it produces non-overlapping spans greedily prioritizing
+        higher assigned label scores.
+    """
+    return SpanCategorizer(
+        nlp.vocab,
+        model=model,
+        suggester=suggester,
+        name=name,
+        spans_key=spans_key,
+        negative_weight=negative_weight,
+        allow_overlap=allow_overlap,
+        max_positive=1,
+        add_negative_label=True,
+        threshold=None,
         scorer=scorer,
         save_activations=save_activations,
     )
@@ -179,6 +280,27 @@ def make_spancat_scorer():
     return spancat_score
 
 
+@dataclass
+class _Intervals:
+    """
+    Helper class to avoid storing overlapping spans.
+    """
+
+    def __init__(self):
+        self.ranges = set()
+
+    def add(self, i, j):
+        for e in range(i, j):
+            self.ranges.add(e)
+
+    def __contains__(self, rang):
+        i, j = rang
+        for e in range(i, j):
+            if e in self.ranges:
+                return True
+        return False
+
+
 class SpanCategorizer(TrainablePipe):
     """Pipeline component to label spans of text.
 
@@ -192,26 +314,44 @@ class SpanCategorizer(TrainablePipe):
         suggester: Suggester,
         name: str = "spancat",
         *,
+        add_negative_label: bool = False,
         spans_key: str = "spans",
-        threshold: float = 0.5,
+        negative_weight: Optional[float] = 1.0,
+        allow_overlap: Optional[bool] = True,
         max_positive: Optional[int] = None,
+        threshold: Optional[float] = 0.5,
         scorer: Optional[Callable] = spancat_score,
         save_activations: bool = False,
     ) -> None:
-        """Initialize the span categorizer.
+        """Initialize the multi-label or multi-class span categorizer.
+
         vocab (Vocab): The shared vocabulary.
         model (thinc.api.Model): The Thinc Model powering the pipeline component.
+            For multi-class classification (single label per span) we recommend
+            using a Softmax classifier as a the final layer, while for multi-label
+            classification (multiple possible labels per span) we recommend Logistic.
+        suggester (Callable[[Iterable[Doc], Optional[Ops]], Ragged]): A function that suggests spans.
+            Spans are returned as a ragged array with two integer columns, for the
+            start and end positions.
         name (str): The component instance name, used to add entries to the
             losses during training.
         spans_key (str): Key of the Doc.spans dict to save the spans under.
             During initialization and training, the component will look for
             spans on the reference document under the same key. Defaults to
             `"spans"`.
-        threshold (float): Minimum probability to consider a prediction
-            positive. Spans with a positive prediction will be saved on the Doc.
-            Defaults to 0.5.
+        add_negative_label (bool): Learn to predict a special 'negative_label'
+            when a Span is not annotated.
+        threshold (Optional[float]): Minimum probability to consider a prediction
+            positive. Defaults to 0.5. Spans with a positive prediction will be saved
+            on the Doc.
         max_positive (Optional[int]): Maximum number of labels to consider
             positive per span. Defaults to None, indicating no limit.
+        negative_weight (float): Multiplier for the loss terms.
+            Can be used to downweight the negative samples if there are too many
+            when add_negative_label is True. Otherwise its unused.
+        allow_overlap (bool): If True the data is assumed to contain overlapping spans.
+            Otherwise it produces non-overlapping spans greedily prioritizing
+            higher assigned label scores. Only used when max_positive is 1.
         scorer (Optional[Callable]): The scoring method. Defaults to
             Scorer.score_spans for the Doc.spans[spans_key] with overlapping
             spans allowed.
@@ -223,13 +363,17 @@ class SpanCategorizer(TrainablePipe):
             "spans_key": spans_key,
             "threshold": threshold,
             "max_positive": max_positive,
+            "negative_weight": negative_weight,
+            "allow_overlap": allow_overlap,
         }
         self.vocab = vocab
         self.suggester = suggester
         self.model = model
         self.name = name
         self.scorer = scorer
-        self.save_activations = save_activations
+        self.add_negative_label = add_negative_label
+        if not allow_overlap and max_positive is not None and max_positive > 1:
+            raise ValueError(Errors.E1051.format(max_positive=max_positive))
 
     @property
     def key(self) -> str:
@@ -239,6 +383,21 @@ class SpanCategorizer(TrainablePipe):
         """
         return str(self.cfg["spans_key"])
 
+    def _allow_extra_label(self) -> None:
+        """Raise an error if the component can not add any more labels."""
+        nO = None
+        if self.model.has_dim("nO"):
+            nO = self.model.get_dim("nO")
+        elif self.model.has_ref("output_layer") and self.model.get_ref(
+            "output_layer"
+        ).has_dim("nO"):
+            nO = self.model.get_ref("output_layer").get_dim("nO")
+        if nO is not None and nO == self._n_labels:
+            if not self.is_resizable:
+                raise ValueError(
+                    Errors.E922.format(name=self.name, nO=self.model.get_dim("nO"))
+                )
+
     def add_label(self, label: str) -> int:
         """Add a new label to the pipe.
 
@@ -272,7 +431,28 @@ class SpanCategorizer(TrainablePipe):
         """
         return list(self.labels)
 
-    def predict(self, docs: Iterable[Doc]) -> ActivationsT:
+    @property
+    def _label_map(self) -> Dict[str, int]:
+        """RETURNS (Dict[str, int]): The label map."""
+        return {label: i for i, label in enumerate(self.labels)}
+
+    @property
+    def _n_labels(self) -> int:
+        """RETURNS (int): Number of labels."""
+        if self.add_negative_label:
+            return len(self.labels) + 1
+        else:
+            return len(self.labels)
+
+    @property
+    def _negative_label_i(self) -> Union[int, None]:
+        """RETURNS (Union[int, None]): Index of the negative label."""
+        if self.add_negative_label:
+            return len(self.label_data)
+        else:
+            return None
+
+    def predict(self, docs: Iterable[Doc]):
         """Apply the pipeline's model to a batch of docs, without modifying them.
 
         docs (Iterable[Doc]): The documents to predict.
@@ -313,24 +493,24 @@ class SpanCategorizer(TrainablePipe):
 
         DOCS: https://spacy.io/api/spancategorizer#set_annotations
         """
-        labels = self.labels
-
-        indices = activations["indices"]
-        assert isinstance(indices, Ragged)
-        scores = cast(Floats2d, activations["scores"])
-
+        indices, scores = indices_scores
         offset = 0
         for i, doc in enumerate(docs):
             indices_i = indices[i].dataXd
-            if self.save_activations:
-                doc.activations[self.name] = {}
-                doc.activations[self.name]["indices"] = indices_i
-                doc.activations[self.name]["scores"] = scores[
-                    offset : offset + indices.lengths[i]
-                ]
-            doc.spans[self.key] = self._make_span_group(
-                doc, indices_i, scores[offset : offset + indices.lengths[i]], labels  # type: ignore[arg-type]
-            )
+            allow_overlap = cast(bool, self.cfg["allow_overlap"])
+            if self.cfg["max_positive"] == 1:
+                doc.spans[self.key] = self._make_span_group_singlelabel(
+                    doc,
+                    indices_i,
+                    scores[offset : offset + indices.lengths[i]],
+                    allow_overlap,
+                )
+            else:
+                doc.spans[self.key] = self._make_span_group_multilabel(
+                    doc,
+                    indices_i,
+                    scores[offset : offset + indices.lengths[i]],
+                )
             offset += indices.lengths[i]
 
     def update(
@@ -390,9 +570,11 @@ class SpanCategorizer(TrainablePipe):
         spans = Ragged(
             self.model.ops.to_numpy(spans.data), self.model.ops.to_numpy(spans.lengths)
         )
-        label_map = {label: i for i, label in enumerate(self.labels)}
         target = numpy.zeros(scores.shape, dtype=scores.dtype)
+        if self.add_negative_label:
+            negative_spans = numpy.ones((scores.shape[0]))
         offset = 0
+        label_map = self._label_map
         for i, eg in enumerate(examples):
             # Map (start, end) offset of spans to the row in the d_scores array,
             # so that we can adjust the gradient for predictions that were
@@ -409,10 +591,16 @@ class SpanCategorizer(TrainablePipe):
                     row = spans_index[key]
                     k = label_map[gold_span.label_]
                     target[row, k] = 1.0
+                    if self.add_negative_label:
+                        # delete negative label target.
+                        negative_spans[row] = 0.0
             # The target is a flat array for all docs. Track the position
             # we're at within the flat array.
             offset += spans.lengths[i]
         target = self.model.ops.asarray(target, dtype="f")  # type: ignore
+        if self.add_negative_label:
+            negative_samples = numpy.nonzero(negative_spans)[0]
+            target[negative_samples, self._negative_label_i] = 1.0  # type: ignore
         # The target will have the values 0 (for untrue predictions) or 1
         # (for true predictions).
         # The scores should be in the range [0, 1].
@@ -421,6 +609,10 @@ class SpanCategorizer(TrainablePipe):
         # If the prediction is 0.9 and it's false, the gradient will be
         # 0.9 (0.9 - 0.0)
         d_scores = scores - target
+        if self.add_negative_label:
+            neg_weight = cast(float, self.cfg["negative_weight"])
+            if neg_weight != 1.0:
+                d_scores[negative_samples] *= neg_weight
         loss = float((d_scores**2).sum())
         return loss, d_scores
 
@@ -457,7 +649,7 @@ class SpanCategorizer(TrainablePipe):
         if subbatch:
             docs = [eg.x for eg in subbatch]
             spans = build_ngram_suggester(sizes=[1])(docs)
-            Y = self.model.ops.alloc2f(spans.dataXd.shape[0], len(self.labels))
+            Y = self.model.ops.alloc2f(spans.dataXd.shape[0], self._n_labels)
             self.model.initialize(X=(docs, spans), Y=Y)
         else:
             self.model.initialize()
@@ -471,31 +663,98 @@ class SpanCategorizer(TrainablePipe):
             eg.reference.spans.get(self.key, []), allow_overlap=True
         )
 
-    def _make_span_group(
-        self, doc: Doc, indices: Ints2d, scores: Floats2d, labels: List[str]
+    def _make_span_group_multilabel(
+        self,
+        doc: Doc,
+        indices: Ints2d,
+        scores: Floats2d,
     ) -> SpanGroup:
+        """Find the top-k labels for each span (k=max_positive)."""
         spans = SpanGroup(doc, name=self.key)
-        max_positive = self.cfg["max_positive"]
+        if scores.size == 0:
+            return spans
+        scores = self.model.ops.to_numpy(scores)
+        indices = self.model.ops.to_numpy(indices)
         threshold = self.cfg["threshold"]
+        max_positive = self.cfg["max_positive"]
 
         keeps = scores >= threshold
-        ranked = (scores * -1).argsort()  # type: ignore
         if max_positive is not None:
             assert isinstance(max_positive, int)
+            if self.add_negative_label:
+                negative_scores = numpy.copy(scores[:, self._negative_label_i])
+                scores[:, self._negative_label_i] = -numpy.inf
+                ranked = (scores * -1).argsort()  # type: ignore
+                scores[:, self._negative_label_i] = negative_scores
+            else:
+                ranked = (scores * -1).argsort()  # type: ignore
             span_filter = ranked[:, max_positive:]
             for i, row in enumerate(span_filter):
                 keeps[i, row] = False
-        spans.attrs["scores"] = scores[keeps].flatten()
-
-        indices = self.model.ops.to_numpy(indices)
-        keeps = self.model.ops.to_numpy(keeps)
 
+        attrs_scores = []
         for i in range(indices.shape[0]):
             start = indices[i, 0]
             end = indices[i, 1]
-
             for j, keep in enumerate(keeps[i]):
                 if keep:
-                    spans.append(Span(doc, start, end, label=labels[j]))
-
+                    if j != self._negative_label_i:
+                        spans.append(Span(doc, start, end, label=self.labels[j]))
+                        attrs_scores.append(scores[i, j])
+        spans.attrs["scores"] = numpy.array(attrs_scores)
+        return spans
+
+    def _make_span_group_singlelabel(
+        self,
+        doc: Doc,
+        indices: Ints2d,
+        scores: Floats2d,
+        allow_overlap: bool = True,
+    ) -> SpanGroup:
+        """Find the argmax label for each span."""
+        # Handle cases when there are zero suggestions
+        if scores.size == 0:
+            return SpanGroup(doc, name=self.key)
+        scores = self.model.ops.to_numpy(scores)
+        indices = self.model.ops.to_numpy(indices)
+        predicted = scores.argmax(axis=1)
+        argmax_scores = numpy.take_along_axis(
+            scores, numpy.expand_dims(predicted, 1), axis=1
+        )
+        keeps = numpy.ones(predicted.shape, dtype=bool)
+        # Remove samples where the negative label is the argmax.
+        if self.add_negative_label:
+            keeps = numpy.logical_and(keeps, predicted != self._negative_label_i)
+        # Filter samples according to threshold.
+        threshold = self.cfg["threshold"]
+        if threshold is not None:
+            keeps = numpy.logical_and(keeps, (argmax_scores >= threshold).squeeze())
+        # Sort spans according to argmax probability
+        if not allow_overlap:
+            # Get the probabilities
+            sort_idx = (argmax_scores.squeeze() * -1).argsort()
+            argmax_scores = argmax_scores[sort_idx]
+            predicted = predicted[sort_idx]
+            indices = indices[sort_idx]
+            keeps = keeps[sort_idx]
+        seen = _Intervals()
+        spans = SpanGroup(doc, name=self.key)
+        attrs_scores = []
+        for i in range(indices.shape[0]):
+            if not keeps[i]:
+                continue
+
+            label = predicted[i]
+            start = indices[i, 0]
+            end = indices[i, 1]
+
+            if not allow_overlap:
+                if (start, end) in seen:
+                    continue
+                else:
+                    seen.add(start, end)
+            attrs_scores.append(argmax_scores[i])
+            spans.append(Span(doc, start, end, label=self.labels[label]))
+
+        spans.attrs["scores"] = numpy.array(attrs_scores)
         return spans
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index 101d8bcea..2dc64c30b 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -47,13 +47,7 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"]
 @Language.factory(
     "tagger",
     assigns=["token.tag"],
-    default_config={
-        "model": DEFAULT_TAGGER_MODEL,
-        "overwrite": False,
-        "scorer": {"@scorers": "spacy.tagger_scorer.v1"},
-        "neg_prefix": "!",
-        "save_activations": False,
-    },
+    default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", "label_smoothing": 0.0},
     default_score_weights={"tag_acc": 1.0},
 )
 def make_tagger(
@@ -63,7 +57,7 @@ def make_tagger(
     overwrite: bool,
     scorer: Optional[Callable],
     neg_prefix: str,
-    save_activations: bool,
+    label_smoothing: float,
 ):
     """Construct a part-of-speech tagger component.
 
@@ -72,8 +66,7 @@ def make_tagger(
         in size, and be normalized as probabilities (all scores between 0 and 1,
         with the rows summing to 1).
     """
-    return Tagger(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, neg_prefix=neg_prefix,
-                  save_activations=save_activations)
+    return Tagger(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, neg_prefix=neg_prefix, label_smoothing=label_smoothing)
 
 
 def tagger_score(examples, **kwargs):
@@ -99,7 +92,7 @@ class Tagger(TrainablePipe):
         overwrite=False,
         scorer=tagger_score,
         neg_prefix="!",
-        save_activations: bool = False,
+        label_smoothing=0.0,
     ):
         """Initialize a part-of-speech tagger.
 
@@ -118,7 +111,7 @@ class Tagger(TrainablePipe):
         self.model = model
         self.name = name
         self._rehearsal_model = None
-        cfg = {"labels": [], "overwrite": overwrite, "neg_prefix": neg_prefix}
+        cfg = {"labels": [], "overwrite": overwrite, "neg_prefix": neg_prefix, "label_smoothing": label_smoothing}
         self.cfg = dict(sorted(cfg.items()))
         self.scorer = scorer
         self.save_activations = save_activations
@@ -294,7 +287,7 @@ class Tagger(TrainablePipe):
         DOCS: https://spacy.io/api/tagger#get_loss
         """
         validate_examples(examples, "Tagger.get_loss")
-        loss_func = LegacySequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"])
+        loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"], label_smoothing=self.cfg["label_smoothing"])
         # Convert empty tag "" to missing value None so that both misaligned
         # tokens and tokens with missing annotation have the default missing
         # value None.
diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py
index a99f8b561..df650eaa9 100644
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@@ -689,21 +689,32 @@ def test_span_group_copy(doc):
     assert len(doc_copy.spans["test"]) == 2
 
 
-@pytest.mark.issue(11113)
-def test_span_ent_id(en_tokenizer):
-    doc = en_tokenizer("a b c d")
-    doc.ents = [Span(doc, 1, 3, label="A", span_id="ID0")]
-    span = doc.ents[0]
-    assert doc[1].ent_id_ == "ID0"
+def test_for_partial_ent_sents():
+    """Spans may be associated with multiple sentences. These .sents should always be complete, not partial, sentences,
+    which this tests for.
+    """
+    doc = Doc(
+        English().vocab,
+        words=["Mahler's", "Symphony", "No.", "8", "was", "beautiful."],
+        sent_starts=[1, 0, 0, 1, 0, 0],
+    )
+    doc.set_ents([Span(doc, 1, 4, "WORK")])
+    # The specified entity is associated with both sentences in this doc, so we expect all sentences in the doc to be
+    # equal to the sentences referenced in ent.sents.
+    for doc_sent, ent_sent in zip(doc.sents, doc.ents[0].sents):
+        assert doc_sent == ent_sent
 
-    # setting Span.id sets Token.ent_id
-    span.id_ = "ID1"
-    doc.ents = [span]
-    assert doc.ents[0].ent_id_ == "ID1"
-    assert doc[1].ent_id_ == "ID1"
 
-    # Span.ent_id is an alias of Span.id
-    span.ent_id_ = "ID2"
-    doc.ents = [span]
-    assert doc.ents[0].ent_id_ == "ID2"
-    assert doc[1].ent_id_ == "ID2"
+def test_for_no_ent_sents():
+    """Span.sents() should set .sents correctly, even if Span in question is trailing and doesn't form a full
+    sentence.
+    """
+    doc = Doc(
+        English().vocab,
+        words=["This", "is", "a", "test.", "ENTITY"],
+        sent_starts=[1, 0, 0, 0, 1],
+    )
+    doc.set_ents([Span(doc, 4, 5, "WORK")])
+    sents = list(doc.ents[0].sents)
+    assert len(sents) == 1
+    assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY"
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index 62b8f9704..403c3fed0 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -9,6 +9,8 @@ from spacy.lang.en import English
 from spacy.lang.it import Italian
 from spacy.language import Language
 from spacy.lookups import Lookups
+from spacy.pipeline import EntityRecognizer
+from spacy.pipeline.ner import DEFAULT_NER_MODEL
 from spacy.pipeline._parser_internals.ner import BiluoPushDown
 from spacy.training import Example, iob_to_biluo, split_bilu_label
 from spacy.tokens import Doc, Span
@@ -17,8 +19,6 @@ from thinc.api import fix_random_seed
 import logging
 
 from ..util import make_tempdir
-from ...pipeline import EntityRecognizer
-from ...pipeline.ner import DEFAULT_NER_MODEL
 
 TRAIN_DATA = [
     ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py
index 2f2fa397e..c964166f5 100644
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@@ -10,13 +10,11 @@ from spacy.lang.en import English
 from spacy.training import Example
 from spacy.tokens import Doc
 from spacy.vocab import Vocab
-from spacy import util, registry
-from thinc.api import fix_random_seed
+from spacy.pipeline import DependencyParser
+from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
+from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
 
-from ...pipeline import DependencyParser
-from ...pipeline.dep_parser import DEFAULT_PARSER_MODEL
 from ..util import apply_transition_sequence, make_tempdir
-from ...pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
 
 TRAIN_DATA = [
     (
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index ed84ce674..7afb7c804 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -1,10 +1,10 @@
-from typing import Callable, Iterable, Dict, Any, cast
+from typing import Callable, Iterable, Dict, Any, Tuple
 
 import pytest
 from numpy.testing import assert_equal
 from thinc.types import Ragged
 
-from spacy import registry, util
+from spacy import registry, util, Language
 from spacy.attrs import ENT_KB_ID
 from spacy.compat import pickle
 from spacy.kb import Candidate, InMemoryLookupKB, get_candidates, KnowledgeBase
@@ -108,18 +108,23 @@ def test_issue7065():
 
 
 @pytest.mark.issue(7065)
-def test_issue7065_b():
+@pytest.mark.parametrize("entity_in_first_sentence", [True, False])
+def test_sentence_crossing_ents(entity_in_first_sentence: bool):
+    """Tests if NEL crashes if entities cross sentence boundaries and the first associated sentence doesn't have an
+    entity.
+    entity_in_prior_sentence (bool): Whether to include an entity in the first sentence associated with the
+    sentence-crossing entity.
+    """
     # Test that the NEL doesn't crash when an entity crosses a sentence boundary
     nlp = English()
     vector_length = 3
-    nlp.add_pipe("sentencizer")
     text = "Mahler 's Symphony No. 8 was beautiful."
-    entities = [(0, 6, "PERSON"), (10, 24, "WORK")]
-    links = {
-        (0, 6): {"Q7304": 1.0, "Q270853": 0.0},
-        (10, 24): {"Q7304": 0.0, "Q270853": 1.0},
-    }
-    sent_starts = [1, -1, 0, 0, 0, 0, 0, 0, 0]
+    entities = [(10, 24, "WORK")]
+    links = {(10, 24): {"Q7304": 0.0, "Q270853": 1.0}}
+    if entity_in_first_sentence:
+        entities.append((0, 6, "PERSON"))
+        links[(0, 6)] = {"Q7304": 1.0, "Q270853": 0.0}
+    sent_starts = [1, -1, 0, 0, 0, 1, 0, 0, 0]
     doc = nlp(text)
     example = Example.from_dict(
         doc, {"entities": entities, "links": links, "sent_starts": sent_starts}
@@ -145,31 +150,14 @@ def test_issue7065_b():
 
     # Create the Entity Linker component and add it to the pipeline
     entity_linker = nlp.add_pipe("entity_linker", last=True)
-    entity_linker.set_kb(create_kb)
+    entity_linker.set_kb(create_kb)  # type: ignore
     # train the NEL pipe
     optimizer = nlp.initialize(get_examples=lambda: train_examples)
     for i in range(2):
-        losses = {}
-        nlp.update(train_examples, sgd=optimizer, losses=losses)
+        nlp.update(train_examples, sgd=optimizer)
 
-    # Add a custom rule-based component to mimick NER
-    patterns = [
-        {"label": "PERSON", "pattern": [{"LOWER": "mahler"}]},
-        {
-            "label": "WORK",
-            "pattern": [
-                {"LOWER": "symphony"},
-                {"LOWER": "no"},
-                {"LOWER": "."},
-                {"LOWER": "8"},
-            ],
-        },
-    ]
-    ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
-    ruler.add_patterns(patterns)
-    # test the trained model - this should not throw E148
-    doc = nlp(text)
-    assert doc
+    # This shouldn't crash.
+    entity_linker.predict([example.reference])  # type: ignore
 
 
 def test_no_entities():
diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py
index 5b9b17c01..fcbe62cf3 100644
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@@ -1,6 +1,6 @@
 from typing import cast
 import pytest
-from numpy.testing import assert_equal
+from numpy.testing import assert_equal, assert_almost_equal
 
 from spacy import util
 from spacy.training import Example
@@ -21,6 +21,8 @@ def test_label_types():
         morphologizer.add_label(9)
 
 
+TAGS = ["Feat=N", "Feat=V", "Feat=J"]
+
 TRAIN_DATA = [
     (
         "I like green eggs",
@@ -34,6 +36,29 @@ TRAIN_DATA = [
 ]
 
 
+def test_label_smoothing():
+    nlp = Language()
+    morph_no_ls = nlp.add_pipe("morphologizer", "no_label_smoothing")
+    morph_ls = nlp.add_pipe(
+        "morphologizer", "label_smoothing", config=dict(label_smoothing=0.05)
+    )
+    train_examples = []
+    losses = {}
+    for tag in TAGS:
+        morph_no_ls.add_label(tag)
+        morph_ls.add_label(tag)
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+
+    nlp.initialize(get_examples=lambda: train_examples)
+    tag_scores, bp_tag_scores = morph_ls.model.begin_update(
+        [eg.predicted for eg in train_examples]
+    )
+    no_ls_grads = morph_no_ls.get_loss(train_examples, tag_scores)[1][0]
+    ls_grads = morph_ls.get_loss(train_examples, tag_scores)[1][0]
+    assert_almost_equal(ls_grads / no_ls_grads, 0.94285715)
+
+
 def test_no_label():
     nlp = Language()
     nlp.add_pipe("morphologizer")
diff --git a/spacy/tests/pipeline/test_spancat.py b/spacy/tests/pipeline/test_spancat.py
index da9bffbc8..b216f9f6c 100644
--- a/spacy/tests/pipeline/test_spancat.py
+++ b/spacy/tests/pipeline/test_spancat.py
@@ -15,6 +15,8 @@ OPS = get_current_ops()
 
 SPAN_KEY = "labeled_spans"
 
+SPANCAT_COMPONENTS = ["spancat", "spancat_singlelabel"]
+
 TRAIN_DATA = [
     ("Who is Shaka Khan?", {"spans": {SPAN_KEY: [(7, 17, "PERSON")]}}),
     (
@@ -41,38 +43,42 @@ def make_examples(nlp, data=TRAIN_DATA):
     return train_examples
 
 
-def test_no_label():
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+def test_no_label(name):
     nlp = Language()
-    nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
+    nlp.add_pipe(name, config={"spans_key": SPAN_KEY})
     with pytest.raises(ValueError):
         nlp.initialize()
 
 
-def test_no_resize():
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+def test_no_resize(name):
     nlp = Language()
-    spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
+    spancat = nlp.add_pipe(name, config={"spans_key": SPAN_KEY})
     spancat.add_label("Thing")
     spancat.add_label("Phrase")
     assert spancat.labels == ("Thing", "Phrase")
     nlp.initialize()
-    assert spancat.model.get_dim("nO") == 2
+    assert spancat.model.get_dim("nO") == spancat._n_labels
     # this throws an error because the spancat can't be resized after initialization
     with pytest.raises(ValueError):
         spancat.add_label("Stuff")
 
 
-def test_implicit_labels():
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+def test_implicit_labels(name):
     nlp = Language()
-    spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
+    spancat = nlp.add_pipe(name, config={"spans_key": SPAN_KEY})
     assert len(spancat.labels) == 0
     train_examples = make_examples(nlp)
     nlp.initialize(get_examples=lambda: train_examples)
     assert spancat.labels == ("PERSON", "LOC")
 
 
-def test_explicit_labels():
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+def test_explicit_labels(name):
     nlp = Language()
-    spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
+    spancat = nlp.add_pipe(name, config={"spans_key": SPAN_KEY})
     assert len(spancat.labels) == 0
     spancat.add_label("PERSON")
     spancat.add_label("LOC")
@@ -102,13 +108,13 @@ def test_doc_gc():
             # XXX This fails with length 0 sometimes
             assert len(spangroup) > 0
             with pytest.raises(RuntimeError):
-                span = spangroup[0]
+                spangroup[0]
 
 
 @pytest.mark.parametrize(
     "max_positive,nr_results", [(None, 4), (1, 2), (2, 3), (3, 4), (4, 4)]
 )
-def test_make_spangroup(max_positive, nr_results):
+def test_make_spangroup_multilabel(max_positive, nr_results):
     fix_random_seed(0)
     nlp = Language()
     spancat = nlp.add_pipe(
@@ -120,10 +126,12 @@ def test_make_spangroup(max_positive, nr_results):
     indices = ngram_suggester([doc])[0].dataXd
     assert_array_equal(OPS.to_numpy(indices), numpy.asarray([[0, 1], [1, 2], [0, 2]]))
     labels = ["Thing", "City", "Person", "GreatCity"]
+    for label in labels:
+        spancat.add_label(label)
     scores = numpy.asarray(
         [[0.2, 0.4, 0.3, 0.1], [0.1, 0.6, 0.2, 0.4], [0.8, 0.7, 0.3, 0.9]], dtype="f"
     )
-    spangroup = spancat._make_span_group(doc, indices, scores, labels)
+    spangroup = spancat._make_span_group_multilabel(doc, indices, scores)
     assert len(spangroup) == nr_results
 
     # first span is always the second token "London"
@@ -154,6 +162,130 @@ def test_make_spangroup(max_positive, nr_results):
     assert_almost_equal(0.9, spangroup.attrs["scores"][-1], 5)
 
 
+@pytest.mark.parametrize(
+    "threshold,allow_overlap,nr_results",
+    [(0.05, True, 3), (0.05, False, 1), (0.5, True, 2), (0.5, False, 1)],
+)
+def test_make_spangroup_singlelabel(threshold, allow_overlap, nr_results):
+    fix_random_seed(0)
+    nlp = Language()
+    spancat = nlp.add_pipe(
+        "spancat",
+        config={
+            "spans_key": SPAN_KEY,
+            "threshold": threshold,
+            "max_positive": 1,
+        },
+    )
+    doc = nlp.make_doc("Greater London")
+    ngram_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2])
+    indices = ngram_suggester([doc])[0].dataXd
+    assert_array_equal(OPS.to_numpy(indices), numpy.asarray([[0, 1], [1, 2], [0, 2]]))
+    labels = ["Thing", "City", "Person", "GreatCity"]
+    for label in labels:
+        spancat.add_label(label)
+    scores = numpy.asarray(
+        [[0.2, 0.4, 0.3, 0.1], [0.1, 0.6, 0.2, 0.4], [0.8, 0.7, 0.3, 0.9]], dtype="f"
+    )
+    spangroup = spancat._make_span_group_singlelabel(
+        doc, indices, scores, allow_overlap
+    )
+    if threshold > 0.4:
+        if allow_overlap:
+            assert spangroup[0].text == "London"
+            assert spangroup[0].label_ == "City"
+            assert_almost_equal(0.6, spangroup.attrs["scores"][0], 5)
+            assert spangroup[1].text == "Greater London"
+            assert spangroup[1].label_ == "GreatCity"
+            assert spangroup.attrs["scores"][1] == 0.9
+            assert_almost_equal(0.9, spangroup.attrs["scores"][1], 5)
+        else:
+            assert spangroup[0].text == "Greater London"
+            assert spangroup[0].label_ == "GreatCity"
+            assert spangroup.attrs["scores"][0] == 0.9
+    else:
+        if allow_overlap:
+            assert spangroup[0].text == "Greater"
+            assert spangroup[0].label_ == "City"
+            assert spangroup[1].text == "London"
+            assert spangroup[1].label_ == "City"
+            assert spangroup[2].text == "Greater London"
+            assert spangroup[2].label_ == "GreatCity"
+        else:
+            assert spangroup[0].text == "Greater London"
+
+
+def test_make_spangroup_negative_label():
+    fix_random_seed(0)
+    nlp_single = Language()
+    nlp_multi = Language()
+    spancat_single = nlp_single.add_pipe(
+        "spancat",
+        config={
+            "spans_key": SPAN_KEY,
+            "threshold": 0.1,
+            "max_positive": 1,
+        },
+    )
+    spancat_multi = nlp_multi.add_pipe(
+        "spancat",
+        config={
+            "spans_key": SPAN_KEY,
+            "threshold": 0.1,
+            "max_positive": 2,
+        },
+    )
+    spancat_single.add_negative_label = True
+    spancat_multi.add_negative_label = True
+    doc = nlp_single.make_doc("Greater London")
+    labels = ["Thing", "City", "Person", "GreatCity"]
+    for label in labels:
+        spancat_multi.add_label(label)
+        spancat_single.add_label(label)
+    ngram_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2])
+    indices = ngram_suggester([doc])[0].dataXd
+    assert_array_equal(OPS.to_numpy(indices), numpy.asarray([[0, 1], [1, 2], [0, 2]]))
+    scores = numpy.asarray(
+        [
+            [0.2, 0.4, 0.3, 0.1, 0.1],
+            [0.1, 0.6, 0.2, 0.4, 0.9],
+            [0.8, 0.7, 0.3, 0.9, 0.1],
+        ],
+        dtype="f",
+    )
+    spangroup_multi = spancat_multi._make_span_group_multilabel(doc, indices, scores)
+    spangroup_single = spancat_single._make_span_group_singlelabel(doc, indices, scores)
+    assert len(spangroup_single) == 2
+    assert spangroup_single[0].text == "Greater"
+    assert spangroup_single[0].label_ == "City"
+    assert_almost_equal(0.4, spangroup_single.attrs["scores"][0], 5)
+    assert spangroup_single[1].text == "Greater London"
+    assert spangroup_single[1].label_ == "GreatCity"
+    assert spangroup_single.attrs["scores"][1] == 0.9
+    assert_almost_equal(0.9, spangroup_single.attrs["scores"][1], 5)
+
+    assert len(spangroup_multi) == 6
+    assert spangroup_multi[0].text == "Greater"
+    assert spangroup_multi[0].label_ == "City"
+    assert_almost_equal(0.4, spangroup_multi.attrs["scores"][0], 5)
+    assert spangroup_multi[1].text == "Greater"
+    assert spangroup_multi[1].label_ == "Person"
+    assert_almost_equal(0.3, spangroup_multi.attrs["scores"][1], 5)
+    assert spangroup_multi[2].text == "London"
+    assert spangroup_multi[2].label_ == "City"
+    assert_almost_equal(0.6, spangroup_multi.attrs["scores"][2], 5)
+    assert spangroup_multi[3].text == "London"
+    assert spangroup_multi[3].label_ == "GreatCity"
+    assert_almost_equal(0.4, spangroup_multi.attrs["scores"][3], 5)
+    assert spangroup_multi[4].text == "Greater London"
+    assert spangroup_multi[4].label_ == "Thing"
+    assert spangroup_multi[4].text == "Greater London"
+    assert_almost_equal(0.8, spangroup_multi.attrs["scores"][4], 5)
+    assert spangroup_multi[5].text == "Greater London"
+    assert spangroup_multi[5].label_ == "GreatCity"
+    assert_almost_equal(0.9, spangroup_multi.attrs["scores"][5], 5)
+
+
 def test_ngram_suggester(en_tokenizer):
     # test different n-gram lengths
     for size in [1, 2, 3]:
@@ -371,9 +503,9 @@ def test_overfitting_IO_overlapping():
         assert set([span.label_ for span in spans2]) == {"LOC", "DOUBLE_LOC"}
 
 
-def test_zero_suggestions():
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+def test_zero_suggestions(name):
     # Test with a suggester that can return 0 suggestions
-
     @registry.misc("test_mixed_zero_suggester")
     def make_mixed_zero_suggester():
         def mixed_zero_suggester(docs, *, ops=None):
@@ -400,7 +532,7 @@ def test_zero_suggestions():
     fix_random_seed(0)
     nlp = English()
     spancat = nlp.add_pipe(
-        "spancat",
+        name,
         config={
             "suggester": {"@misc": "test_mixed_zero_suggester"},
             "spans_key": SPAN_KEY,
@@ -408,7 +540,7 @@ def test_zero_suggestions():
     )
     train_examples = make_examples(nlp)
     optimizer = nlp.initialize(get_examples=lambda: train_examples)
-    assert spancat.model.get_dim("nO") == 2
+    assert spancat.model.get_dim("nO") == spancat._n_labels
     assert set(spancat.labels) == {"LOC", "PERSON"}
 
     nlp.update(train_examples, sgd=optimizer)
@@ -424,9 +556,10 @@ def test_zero_suggestions():
     list(nlp.pipe(["", "one", "three three three"]))
 
 
-def test_set_candidates():
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+def test_set_candidates(name):
     nlp = Language()
-    spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
+    spancat = nlp.add_pipe(name, config={"spans_key": SPAN_KEY})
     train_examples = make_examples(nlp)
     nlp.initialize(get_examples=lambda: train_examples)
     texts = [
diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py
index 505b41f8c..defb12365 100644
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@@ -1,6 +1,6 @@
 from typing import cast
 import pytest
-from numpy.testing import assert_equal
+from numpy.testing import assert_equal, assert_almost_equal
 from spacy.attrs import TAG
 
 from spacy import util
@@ -71,6 +71,29 @@ PARTIAL_DATA = [
 ]
 
 
+def test_label_smoothing():
+    nlp = Language()
+    tagger_no_ls = nlp.add_pipe("tagger", "no_label_smoothing")
+    tagger_ls = nlp.add_pipe(
+        "tagger", "label_smoothing", config=dict(label_smoothing=0.05)
+    )
+    train_examples = []
+    losses = {}
+    for tag in TAGS:
+        tagger_no_ls.add_label(tag)
+        tagger_ls.add_label(tag)
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+
+    nlp.initialize(get_examples=lambda: train_examples)
+    tag_scores, bp_tag_scores = tagger_ls.model.begin_update(
+        [eg.predicted for eg in train_examples]
+    )
+    no_ls_grads = tagger_no_ls.get_loss(train_examples, tag_scores)[1][0]
+    ls_grads = tagger_ls.get_loss(train_examples, tag_scores)[1][0]
+    assert_almost_equal(ls_grads / no_ls_grads, 0.925)
+
+
 def test_no_label():
     nlp = Language()
     nlp.add_pipe("tagger")
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index dc7ce46fe..1fdf059b3 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -2,7 +2,6 @@ import os
 import math
 from collections import Counter
 from typing import Tuple, List, Dict, Any
-import pkg_resources
 import time
 from pathlib import Path
 
@@ -29,6 +28,7 @@ from spacy.cli.debug_data import _print_span_characteristics
 from spacy.cli.debug_data import _get_spans_length_freq_dist
 from spacy.cli.download import get_compatibility, get_version
 from spacy.cli.init_config import RECOMMENDATIONS, init_config, fill_config
+from spacy.cli.init_pipeline import _init_labels
 from spacy.cli.package import get_third_party_dependencies
 from spacy.cli.package import _is_permitted_package_name
 from spacy.cli.project.remote_storage import RemoteStorage
@@ -47,7 +47,6 @@ from spacy.training.converters import conll_ner_to_docs, conllu_to_docs
 from spacy.training.converters import iob_to_docs
 from spacy.util import ENV_VARS, get_minor_version, load_model_from_config, load_config
 
-from ..cli.init_pipeline import _init_labels
 from .util import make_tempdir
 
 
@@ -553,7 +552,14 @@ def test_parse_cli_overrides():
 
 @pytest.mark.parametrize("lang", ["en", "nl"])
 @pytest.mark.parametrize(
-    "pipeline", [["tagger", "parser", "ner"], [], ["ner", "textcat", "sentencizer"]]
+    "pipeline",
+    [
+        ["tagger", "parser", "ner"],
+        [],
+        ["ner", "textcat", "sentencizer"],
+        ["morphologizer", "spancat", "entity_linker"],
+        ["spancat_singlelabel", "textcat_multilabel"],
+    ],
 )
 @pytest.mark.parametrize("optimize", ["efficiency", "accuracy"])
 @pytest.mark.parametrize("pretraining", [True, False])
@@ -1126,6 +1132,7 @@ def test_cli_find_threshold(capsys):
                 )
 
 
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
 @pytest.mark.parametrize(
     "reqs,output",
     [
@@ -1158,6 +1165,8 @@ def test_cli_find_threshold(capsys):
     ],
 )
 def test_project_check_requirements(reqs, output):
+    import pkg_resources
+
     # excessive guard against unlikely package name
     try:
         pkg_resources.require("spacyunknowndoesnotexist12345")
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 8aaadf686..9ba4f0e5c 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -5,10 +5,18 @@ import srsly
 from typer.testing import CliRunner
 from spacy.tokens import DocBin, Doc
 
-from spacy.cli._util import app
+from spacy.cli._util import app, get_git_version
 from .util import make_tempdir, normalize_whitespace
 
 
+def has_git():
+    try:
+        get_git_version()
+        return True
+    except RuntimeError:
+        return False
+
+
 def test_convert_auto():
     with make_tempdir() as d_in, make_tempdir() as d_out:
         for f in ["data1.iob", "data2.iob", "data3.iob"]:
@@ -181,6 +189,7 @@ def test_project_run(project_dir):
     assert "okokok" in result.stdout
 
 
+@pytest.mark.skipif(not has_git(), reason="git not installed")
 @pytest.mark.parametrize(
     "options",
     [
diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py
index f298b38e0..837a92e02 100644
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@@ -275,6 +275,20 @@ def test_displacy_parse_deps(en_vocab):
         {"start": 2, "end": 3, "label": "det", "dir": "left"},
         {"start": 1, "end": 3, "label": "attr", "dir": "right"},
     ]
+    # Test that displacy.parse_deps converts Span to Doc
+    deps = displacy.parse_deps(doc[:])
+    assert isinstance(deps, dict)
+    assert deps["words"] == [
+        {"lemma": None, "text": words[0], "tag": pos[0]},
+        {"lemma": None, "text": words[1], "tag": pos[1]},
+        {"lemma": None, "text": words[2], "tag": pos[2]},
+        {"lemma": None, "text": words[3], "tag": pos[3]},
+    ]
+    assert deps["arcs"] == [
+        {"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
+        {"start": 2, "end": 3, "label": "det", "dir": "left"},
+        {"start": 1, "end": 3, "label": "attr", "dir": "right"},
+    ]
 
 
 def test_displacy_invalid_arcs():
diff --git a/spacy/tests/training/test_pretraining.py b/spacy/tests/training/test_pretraining.py
index 9359c8485..d1db92de5 100644
--- a/spacy/tests/training/test_pretraining.py
+++ b/spacy/tests/training/test_pretraining.py
@@ -2,17 +2,19 @@ from pathlib import Path
 import numpy as np
 import pytest
 import srsly
-from spacy.vocab import Vocab
-from thinc.api import Config
+from thinc.api import Config, get_current_ops
 
+from spacy import util
+from spacy.lang.en import English
+from spacy.training.initialize import init_nlp
+from spacy.training.loop import train
+from spacy.training.pretrain import pretrain
+from spacy.tokens import Doc, DocBin
+from spacy.language import DEFAULT_CONFIG_PRETRAIN_PATH, DEFAULT_CONFIG_PATH
+from spacy.ml.models.multi_task import create_pretrain_vectors
+from spacy.vectors import Vectors
+from spacy.vocab import Vocab
 from ..util import make_tempdir
-from ... import util
-from ...lang.en import English
-from ...training.initialize import init_nlp
-from ...training.loop import train
-from ...training.pretrain import pretrain
-from ...tokens import Doc, DocBin
-from ...language import DEFAULT_CONFIG_PRETRAIN_PATH, DEFAULT_CONFIG_PATH
 
 pretrain_string_listener = """
 [nlp]
@@ -346,3 +348,26 @@ def write_vectors_model(tmp_dir):
     nlp = English(vocab)
     nlp.to_disk(nlp_path)
     return str(nlp_path)
+
+
+def test_pretrain_default_vectors():
+    nlp = English()
+    nlp.add_pipe("tok2vec")
+    nlp.initialize()
+
+    # default vectors are supported
+    nlp.vocab.vectors = Vectors(shape=(10, 10))
+    create_pretrain_vectors(1, 1, "cosine")(nlp.vocab, nlp.get_pipe("tok2vec").model)
+
+    # floret vectors are supported
+    nlp.vocab.vectors = Vectors(
+        data=get_current_ops().xp.zeros((10, 10)), mode="floret", hash_count=1
+    )
+    create_pretrain_vectors(1, 1, "cosine")(nlp.vocab, nlp.get_pipe("tok2vec").model)
+
+    # error for no vectors
+    with pytest.raises(ValueError, match="E875"):
+        nlp.vocab.vectors = Vectors()
+        create_pretrain_vectors(1, 1, "cosine")(
+            nlp.vocab, nlp.get_pipe("tok2vec").model
+        )
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 75f7db7ca..8227cf453 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -494,10 +494,12 @@ cdef class Span:
                     start = i
                     if start >= self.end:
                         break
-            if start < self.end:
-                spans.append(Span(self.doc, start, self.end))
-        return tuple(spans)
+                elif i == self.doc.length - 1:
+                    yield Span(self.doc, start, self.doc.length)
 
+            # Ensure that trailing parts of the Span instance are included in last element of .sents.
+            if start == self.doc.length - 1:
+                yield Span(self.doc, start, self.doc.length)
 
     @property
     def ents(self):
diff --git a/website/docs/api/cli.mdx b/website/docs/api/cli.mdx
index 1a3f15e48..cbce62dad 100644
--- a/website/docs/api/cli.mdx
+++ b/website/docs/api/cli.mdx
@@ -1253,19 +1253,19 @@ be provided.
 > $ python -m spacy find-threshold my_nlp data.spacy spancat threshold spans_sc_f
 > ```
 
-| Name                    | Description                                                                                                                                                                          |
-| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model`                 | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                           |
-| `data_path`             | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~                                                                                                |
-| `pipe_name`             | Name of pipe to examine thresholds for. ~~str (positional)~~                                                                                                                         |
-| `threshold_key`         | Key of threshold attribute in component's configuration. ~~str (positional)~~                                                                                                        |
-| `scores_key`            | Name of score to metric to optimize. ~~str (positional)~~                                                                                                                            |
-| `--n_trials`, `-n`      | Number of trials to determine optimal thresholds. ~~int (option)~~                                                                                                                   |
-| `--code`, `-c`          | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--gpu-id`, `-g`        | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
-| `--gold-preproc`, `-G`  | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                              |
-| `--silent`, `-V`, `-VV` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
-| `--help`, `-h`          | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
+| Name                     | Description                                                                                                                                                                          |
+| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model`                  | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                           |
+| `data_path`              | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~                                                                                                |
+| `pipe_name`              | Name of pipe to examine thresholds for. ~~str (positional)~~                                                                                                                         |
+| `threshold_key`          | Key of threshold attribute in component's configuration. ~~str (positional)~~                                                                                                        |
+| `scores_key`             | Name of score to metric to optimize. ~~str (positional)~~                                                                                                                            |
+| `--n_trials`, `-n`       | Number of trials to determine optimal thresholds. ~~int (option)~~                                                                                                                   |
+| `--code`, `-c`           | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--gpu-id`, `-g`         | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
+| `--gold-preproc`, `-G`   | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                              |
+| `--verbose`, `-V`, `-VV` | Display more information for debugging purposes. ~~bool (flag)~~                                                                                                                     |
+| `--help`, `-h`           | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
 
 ## assemble {id="assemble",tag="command"}
 
diff --git a/website/docs/api/coref.mdx b/website/docs/api/coref.mdx
index 8647f35d1..0b9ebb888 100644
--- a/website/docs/api/coref.mdx
+++ b/website/docs/api/coref.mdx
@@ -64,7 +64,7 @@ details on the architectures and their arguments and hyperparameters.
 > config={
 >     "model": DEFAULT_COREF_MODEL,
 >     "span_cluster_prefix": DEFAULT_CLUSTER_PREFIX,
-> },
+> }
 > nlp.add_pipe("experimental_coref", config=config)
 > ```
 
diff --git a/website/docs/api/morphologizer.mdx b/website/docs/api/morphologizer.mdx
index 9514bc773..d509d30d5 100644
--- a/website/docs/api/morphologizer.mdx
+++ b/website/docs/api/morphologizer.mdx
@@ -42,13 +42,13 @@ architectures and their arguments and hyperparameters.
 > nlp.add_pipe("morphologizer", config=config)
 > ```
 
-| Setting                                         | Description                                                                                                                                                                                                                                                            |
-| ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `model`                                         | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                |
-| `overwrite` <Tag variant="new">3.2</Tag>        | Whether the values of existing features are overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                 |
-| `extend` <Tag variant="new">3.2</Tag>           | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~                                                                                                                      |
-| `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
-| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~                                                                                                                                       |
+| Setting                                        | Description                                                                                                                                                                                                                                                            |
+| ---------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `model`                                        | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                |
+| `overwrite` <Tag variant="new">3.2</Tag>       | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                  |
+| `extend` <Tag variant="new">3.2</Tag>          | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~                                                                                                                      |
+| `scorer` <Tag variant="new">3.2</Tag>          | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
+| `label_smoothing` <Tag variant="new">3.6</Tag> | [Label smoothing](https://arxiv.org/abs/1906.02629) factor. Defaults to `0.0`. ~~float~~                                                                                                                                                                               |
 
 ```python
 %%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx
diff --git a/website/docs/api/spancategorizer.mdx b/website/docs/api/spancategorizer.mdx
index c51b32671..f54a8687b 100644
--- a/website/docs/api/spancategorizer.mdx
+++ b/website/docs/api/spancategorizer.mdx
@@ -13,8 +13,16 @@ A span categorizer consists of two parts: a [suggester function](#suggesters)
 that proposes candidate spans, which may or may not overlap, and a labeler model
 that predicts zero or more labels for each candidate.
 
-Predicted spans will be saved in a [`SpanGroup`](/api/spangroup) on the doc.
-Individual span scores can be found in `spangroup.attrs["scores"]`.
+This component comes in two forms: `spancat` and `spancat_singlelabel` (added in
+spaCy v3.5.1). When you need to perform multi-label classification on your
+spans, use `spancat`. The `spancat` component uses a `Logistic` layer where the
+output class probabilities are independent for each class. However, if you need
+to predict at most one true class for a span, then use `spancat_singlelabel`. It
+uses a `Softmax` layer and treats the task as a multi-class problem.
+
+Predicted spans will be saved in a [`SpanGroup`](/api/spangroup) on the doc
+under `doc.spans[spans_key]`, where `spans_key` is a component config setting.
+Individual span scores are stored in `doc.spans[spans_key].attrs["scores"]`.
 
 ## Assigned Attributes {id="assigned-attributes"}
 
@@ -22,7 +30,9 @@ Predictions will be saved to `Doc.spans[spans_key]` as a
 [`SpanGroup`](/api/spangroup). The scores for the spans in the `SpanGroup` will
 be saved in `SpanGroup.attrs["scores"]`.
 
-`spans_key` defaults to `"sc"`, but can be passed as a parameter.
+`spans_key` defaults to `"sc"`, but can be passed as a parameter. The `spancat`
+component will overwrite any existing spans under the spans key
+`doc.spans[spans_key]`.
 
 | Location                               | Value                                                    |
 | -------------------------------------- | -------------------------------------------------------- |
@@ -38,7 +48,7 @@ how the component should be configured. You can override its settings via the
 [model architectures](/api/architectures) documentation for details on the
 architectures and their arguments and hyperparameters.
 
-> #### Example
+> #### Example (spancat)
 >
 > ```python
 > from spacy.pipeline.spancat import DEFAULT_SPANCAT_MODEL
@@ -52,15 +62,33 @@ architectures and their arguments and hyperparameters.
 > nlp.add_pipe("spancat", config=config)
 > ```
 
-| Setting                                         | Description                                                                                                                                                                                                                                                                                             |
-| ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `suggester`                                     | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. Defaults to [`ngram_suggester`](#ngram_suggester). ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~                                                  |
-| `model`                                         | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. Defaults to [SpanCategorizer](/api/architectures#SpanCategorizer). ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ |
-| `spans_key`                                     | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~                                                                                  |
-| `threshold`                                     | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~                                                                                                                                                          |
-| `max_positive`                                  | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~                                                                                                                                                                                      |
-| `scorer`                                        | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~                                                                                                                                       |
-| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"indices"` and `"scores"`. ~~Union[bool, list[str]]~~                                                                                                                                                                                 |
+> #### Example (spancat_singlelabel)
+>
+> ```python
+> from spacy.pipeline.spancat import DEFAULT_SPANCAT_SINGLELABEL_MODEL
+> config = {
+>     "threshold": 0.5,
+>     "spans_key": "labeled_spans",
+>     "model": DEFAULT_SPANCAT_SINGLELABEL_MODEL,
+>     "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
+>     # Additional spancat_singlelabel parameters
+>     "negative_weight": 0.8,
+>     "allow_overlap": True,
+> }
+> nlp.add_pipe("spancat_singlelabel", config=config)
+> ```
+
+| Setting                                             | Description                                                                                                                                                                                                                                                                                             |
+| --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `suggester`                                         | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. Defaults to [`ngram_suggester`](#ngram_suggester). ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~                                                  |
+| `model`                                             | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. Defaults to [SpanCategorizer](/api/architectures#SpanCategorizer). ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ |
+| `spans_key`                                         | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~                                                                                  |
+| `threshold`                                         | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Meant to be used in combination with the multi-class `spancat` component with a `Logistic` scoring layer. Defaults to `0.5`. ~~float~~                                                |
+| `max_positive`                                      | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. Meant to be used together with the `spancat` component and defaults to 0 with `spancat_singlelabel`. ~~Optional[int]~~                                                                                 |
+| `scorer`                                            | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~                                                                                                                                       |
+| `add_negative_label` <Tag variant="new">3.5.1</Tag> | Whether to learn to predict a special negative label for each unannotated `Span` . This should be `True` when using a `Softmax` classifier layer and so its `True` by default for `spancat_singlelabel`. Spans with negative labels and their scores are not stored as annotations. ~~bool~~            |
+| `negative_weight` <Tag variant="new">3.5.1</Tag>    | Multiplier for the loss terms. It can be used to downweight the negative samples if there are too many. It is only used when `add_negative_label` is `True`. Defaults to `1.0`. ~~float~~                                                                                                               |
+| `allow_overlap` <Tag variant="new">3.5.1</Tag>      | If `True`, the data is assumed to contain overlapping spans. It is only available when `max_positive` is exactly 1. Defaults to `True`. ~~bool~~                                                                                                                                                        |
 
 ```python
 %%GITHUB_SPACY/spacy/pipeline/spancat.py
@@ -72,6 +100,7 @@ architectures and their arguments and hyperparameters.
 >
 > ```python
 > # Construction via add_pipe with default model
+> # Replace 'spancat' with 'spancat_singlelabel' for exclusive classes
 > spancat = nlp.add_pipe("spancat")
 >
 > # Construction via add_pipe with custom model
@@ -87,16 +116,19 @@ Create a new pipeline instance. In your application, you would normally use a
 shortcut for this and instantiate the component using its string name and
 [`nlp.add_pipe`](/api/language#create_pipe).
 
-| Name           | Description                                                                                                                                                                                                                          |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                     |
-| `model`        | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ |
-| `suggester`    | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~                                  |
-| `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                  |
-| _keyword-only_ |                                                                                                                                                                                                                                      |
-| `spans_key`    | Key of the [`Doc.spans`](/api/doc#sans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~                |
-| `threshold`    | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~                                                                                       |
-| `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~                                                                                                                   |
+| Name                                                | Description                                                                                                                                                                                                                                                                                  |
+| --------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `vocab`                                             | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                                                             |
+| `model`                                             | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~                                                         |
+| `suggester`                                         | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~                                                                                          |
+| `name`                                              | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                                                          |
+| _keyword-only_                                      |                                                                                                                                                                                                                                                                                              |
+| `spans_key`                                         | Key of the [`Doc.spans`](/api/doc#sans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~                                                                        |
+| `threshold`                                         | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~                                                                                                                                               |
+| `max_positive`                                      | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~                                                                                                                                                                           |
+| `allow_overlap` <Tag variant="new">3.5.1</Tag>      | If `True`, the data is assumed to contain overlapping spans. It is only available when `max_positive` is exactly 1. Defaults to `True`. ~~bool~~                                                                                                                                             |
+| `add_negative_label` <Tag variant="new">3.5.1</Tag> | Whether to learn to predict a special negative label for each unannotated `Span`. This should be `True` when using a `Softmax` classifier layer and so its `True` by default for `spancat_singlelabel` . Spans with negative labels and their scores are not stored as annotations. ~~bool~~ |
+| `negative_weight` <Tag variant="new">3.5.1</Tag>    | Multiplier for the loss terms. It can be used to downweight the negative samples if there are too many . It is only used when `add_negative_label` is `True`. Defaults to `1.0`. ~~float~~                                                                                                   |
 
 ## SpanCategorizer.\_\_call\_\_ {id="call",tag="method"}
 
diff --git a/website/docs/api/stringstore.mdx b/website/docs/api/stringstore.mdx
index 7e380f5f8..ea7e985e3 100644
--- a/website/docs/api/stringstore.mdx
+++ b/website/docs/api/stringstore.mdx
@@ -8,6 +8,13 @@ Look up strings by 64-bit hashes. As of v2.0, spaCy uses hash values instead of
 integer IDs. This ensures that strings always map to the same ID, even from
 different `StringStores`.
 
+<Infobox variant ="warning">
+
+Note that a `StringStore` instance is not static. It increases in size as texts
+with new tokens are processed.
+
+</Infobox>
+
 ## StringStore.\_\_init\_\_ {id="init",tag="method"}
 
 Create the `StringStore`.
diff --git a/website/docs/api/tagger.mdx b/website/docs/api/tagger.mdx
index 35e7a23b1..7548b309a 100644
--- a/website/docs/api/tagger.mdx
+++ b/website/docs/api/tagger.mdx
@@ -40,13 +40,13 @@ architectures and their arguments and hyperparameters.
 > nlp.add_pipe("tagger", config=config)
 > ```
 
-| Setting                                         | Description                                                                                                                                                                                                                                                                                            |
-| ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model`                                         | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
-| `overwrite` <Tag variant="new">3.2</Tag>        | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                                                              |
-| `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~                                                                                                                                                            |
-| `neg_prefix` <Tag variant="new">3.2.1</Tag>     | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~                                                                                                                                                              |
-| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~                                                                                                                                                                       |
+| Setting                                        | Description                                                                                                                                                                                                                                                                                            |
+| ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model`                                        | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
+| `overwrite` <Tag variant="new">3.2</Tag>       | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                                                              |
+| `scorer` <Tag variant="new">3.2</Tag>          | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~                                                                                                                                                            |
+| `neg_prefix` <Tag variant="new">3.2.1</Tag>    | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~                                                                                                                                                              |
+| `label_smoothing` <Tag variant="new">3.6</Tag> | [Label smoothing](https://arxiv.org/abs/1906.02629) factor. Defaults to `0.0`. ~~float~~                                                                                                                                                                                                               |
 
 ```python
 %%GITHUB_SPACY/spacy/pipeline/tagger.pyx
diff --git a/website/docs/api/top-level.mdx b/website/docs/api/top-level.mdx
index b5f561cae..aa7c51750 100644
--- a/website/docs/api/top-level.mdx
+++ b/website/docs/api/top-level.mdx
@@ -294,7 +294,7 @@ the `manual=True` argument in `displacy.render`.
 
 | Name        | Description                                                         |
 | ----------- | ------------------------------------------------------------------- |
-| `orig_doc`  | Doc to parse dependencies. ~~Doc~~                                  |
+| `orig_doc`  | Doc or span to parse dependencies. ~~Union[Doc, Span]~~             |
 | `options`   | Dependency parse specific visualisation options. ~~Dict[str, Any]~~ |
 | **RETURNS** | Generated dependency parse keyed by words and arcs. ~~dict~~        |
 
diff --git a/website/docs/api/vocab.mdx b/website/docs/api/vocab.mdx
index 3faf1f1a0..d03e34785 100644
--- a/website/docs/api/vocab.mdx
+++ b/website/docs/api/vocab.mdx
@@ -10,6 +10,13 @@ The `Vocab` object provides a lookup table that allows you to access
 [`StringStore`](/api/stringstore). It also owns underlying C-data that is shared
 between `Doc` objects.
 
+<Infobox variant ="warning">
+
+Note that a `Vocab` instance is not static. It increases in size as texts with
+new tokens are processed.
+
+</Infobox>
+
 ## Vocab.\_\_init\_\_ {id="init",tag="method"}
 
 Create the vocabulary.
diff --git a/website/meta/universe.json b/website/meta/universe.json
index e35a4f045..039d42b33 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -3219,6 +3219,51 @@
             "category": ["pipeline"],
             "tags": ["syllables", "multilingual"]
         },
+        {
+            "id": "sentimental-onix",
+            "title": "Sentimental Onix",
+            "slogan": "Use onnx for sentiment models",
+            "description": "spaCy pipeline component for sentiment analysis using onnx",
+            "github": "sloev/sentimental-onix",
+            "pip": "sentimental-onix",
+            "code_example": [
+                "# Download model:",
+                "#   python -m sentimental_onix download en",
+                "import spacy",
+                "from sentimental_onix import pipeline",
+                "",
+                "nlp = spacy.load(\"en_core_web_sm\")",
+                "nlp.add_pipe(\"sentencizer\")",
+                "nlp.add_pipe(\"sentimental_onix\", after=\"sentencizer\")",
+                "",
+                "sentences = [",
+                "    (sent.text, sent._.sentiment)",
+                "    for doc in nlp.pipe(",
+                "        [",
+                "            \"i hate pasta on tuesdays\",",
+                "            \"i like movies on wednesdays\",",
+                "            \"i find your argument ridiculous\",",
+                "            \"soda with straws are my favorite\",",
+                "        ]",
+                "    )",
+                "    for sent in doc.sents",
+                "]",
+                "",
+                "assert sentences == [",
+                "    (\"i hate pasta on tuesdays\", \"Negative\"),",
+                "    (\"i like movies on wednesdays\", \"Positive\"),",
+                "    (\"i find your argument ridiculous\", \"Negative\"),",
+                "    (\"soda with straws are my favorite\", \"Positive\"),",
+                "]"
+            ],
+            "thumb": "https://raw.githubusercontent.com/sloev/sentimental-onix/master/.github/onix.webp",
+            "author": "Johannes Valbjørn",
+            "author_links": {
+                "github": "sloev"
+            },
+            "category": ["pipeline"],
+            "tags": ["sentiment", "english"]
+        },
         {
             "id": "gobbli",
             "title": "gobbli",
diff --git a/website/package.json b/website/package.json
index eeefe32df..5f8bae47e 100644
--- a/website/package.json
+++ b/website/package.json
@@ -6,6 +6,7 @@
     "dev": "next dev",
     "build": "next build && npm run sitemap && next export",
     "prebuild": "pip install -r setup/requirements.txt && sh setup/setup.sh",
+    "predev": "npm run prebuild",
     "sitemap": "next-sitemap --config next-sitemap.config.mjs",
     "start": "next start",
     "lint": "next lint",
diff --git a/website/src/styles/navigation.module.sass b/website/src/styles/navigation.module.sass
index da5c18b6f..3adc5cd03 100644
--- a/website/src/styles/navigation.module.sass
+++ b/website/src/styles/navigation.module.sass
@@ -111,11 +111,12 @@
     line-height: var(--line-height-xs)
     text-align: center
 
-@include breakpoint(max, xs)
-    .list
+@include breakpoint(max, md)
+    .alert
         display: none
 
-    .alert
+@include breakpoint(max, xs)
+    .list
         display: none
 
     .has-alert
diff --git a/website/src/templates/index.js b/website/src/templates/index.js
index 227b25be8..4c10e09c5 100644
--- a/website/src/templates/index.js
+++ b/website/src/templates/index.js
@@ -57,9 +57,15 @@ const AlertSpace = ({ nightly, legacy }) => {
     )
 }
 
+// const navAlert = (
+//     <Link to="/usage/v3-5" noLinkLayout>
+//         <strong>💥 Out now:</strong> spaCy v3.5
+//     </Link>
+// )
+
 const navAlert = (
-    <Link to="/usage/v3-5" noLinkLayout>
-        <strong>💥 Out now:</strong> spaCy v3.5
+    <Link to="https://form.typeform.com/to/aMel9q9f" noLinkLayout>
+        <strong>💥 Take the user survey!</strong>
     </Link>
 )