Merge pull request #13490 from svlandeg/feat/update_v4

Update v4 branch with latest from master
2025-10-20 02:34:33 +03:00 · 2024-05-14 22:41:17 +02:00 · 2024-05-14 22:41:17 +02:00 · 818fdb537e
commit 818fdb537e
parent 287deee02c e32a394ff0
47 changed files with 1073 additions and 731 deletions
--- a/.github/workflows/explosionbot.yml
+++ b/.github/workflows/explosionbot.yml
@ -15,7 +15,7 @@ jobs:
        env:
          GITHUB_CONTEXT: ${{ toJson(github) }}
        run: echo "$GITHUB_CONTEXT"
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - uses: actions/setup-python@v4
      - name: Install and run explosion-bot
        run: |
--- a/.github/workflows/gputests.yml
+++ b/.github/workflows/gputests.yml
@ -9,7 +9,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        branch: [master, main]
+        branch: [master, v4]
    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/lock.yml
+++ b/.github/workflows/lock.yml
@ -16,7 +16,7 @@ jobs:
    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
-      - uses: dessant/lock-threads@v4
+      - uses: dessant/lock-threads@v5
        with:
          process-only: 'issues'
          issue-inactive-days: '30'
--- a/.github/workflows/slowtests.yml
+++ b/.github/workflows/slowtests.yml
@ -9,12 +9,12 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        branch: [master, main]
+        branch: [master, v4]
    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          ref: ${{ matrix.branch }}
      - name: Get commits from past 24 hours
--- a/.github/workflows/spacy_universe_alert.yml
+++ b/.github/workflows/spacy_universe_alert.yml
@ -18,7 +18,7 @@ jobs:
        run: |
          echo "$GITHUB_CONTEXT"

-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - uses: actions/setup-python@v4
        with:
          python-version: '3.10'
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -25,13 +25,12 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Check out repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4

      - name: Configure Python version
        uses: actions/setup-python@v4
        with:
          python-version: "3.9"
-          architecture: x64

      - name: black
        run: |
@ -71,13 +70,12 @@ jobs:

    steps:
      - name: Check out repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4

      - name: Configure Python version
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python_version }}
-          architecture: x64

      - name: Install dependencies
        run: |
--- a/.github/workflows/universe_validation.yml
+++ b/.github/workflows/universe_validation.yml
@ -20,13 +20,12 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Check out repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4

      - name: Configure Python version
        uses: actions/setup-python@v4
        with:
          python-version: "3.9"
-          architecture: x64

      - name: Validate website/meta/universe.json
        run: |
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 The MIT License (MIT)

-Copyright (C) 2016-2023 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal
+Copyright (C) 2016-2024 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/requirements.txt
+++ b/requirements.txt
@ -9,9 +9,8 @@ murmurhash>=0.28.0,<1.1.0
 wasabi>=0.9.1,<1.2.0
 srsly>=2.4.3,<3.0.0
 catalogue>=2.0.6,<2.1.0
-typer>=0.3.0,<0.10.0
-smart-open>=5.2.1,<7.0.0
-weasel>=0.1.0,<0.4.0
+typer>=0.3.0,<1.0.0
+weasel>=0.1.0,<0.5.0
 # Third party dependencies
 numpy>=1.15.0; python_version < "3.9"
 numpy>=1.19.0; python_version >= "3.9"
--- a/setup.cfg
+++ b/setup.cfg
@ -41,10 +41,9 @@ install_requires =
    wasabi>=0.9.1,<1.2.0
    srsly>=2.4.3,<3.0.0
    catalogue>=2.0.6,<2.1.0
-    weasel>=0.1.0,<0.4.0
+    weasel>=0.1.0,<0.5.0
    # Third-party dependencies
-    typer>=0.3.0,<0.10.0
-    smart-open>=5.2.1,<7.0.0
+    typer>=0.3.0,<1.0.0
    tqdm>=4.38.0,<5.0.0
    numpy>=1.15.0; python_version < "3.9"
    numpy>=1.19.0; python_version >= "3.9"
--- a/spacy/cli/init.py
+++ b/spacy/cli/init.py
@ -1,5 +1,7 @@
 from wasabi import msg

+# Needed for testing
+from . import download as download_module  # noqa: F401
 from ._util import app, setup_cli  # noqa: F401
 from .apply import apply  # noqa: F401
 from .assemble import assemble_cli  # noqa: F401
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@ -1,5 +1,6 @@
 import sys
 from typing import Optional, Sequence
+from urllib.parse import urljoin

 import requests
 import typer
@ -64,6 +65,13 @@ def download(
        )
        pip_args = pip_args + ("--no-deps",)
    if direct:
+        # Reject model names with '/', in order to prevent shenanigans.
+        if "/" in model:
+            msg.fail(
+                title="Model download rejected",
+                text=f"Cannot download model '{model}'. Models are expected to be file names, not URLs or fragments",
+                exits=True,
+            )
        components = model.split("-")
        model_name = "".join(components[:-1])
        version = components[-1]
@ -156,7 +164,16 @@ def get_latest_version(model: str) -> str:
 def download_model(
    filename: str, user_pip_args: Optional[Sequence[str]] = None
 ) -> None:
-    download_url = about.__download_url__ + "/" + filename
+    # Construct the download URL carefully. We need to make sure we don't
+    # allow relative paths or other shenanigans to trick us into download
+    # from outside our own repo.
+    base_url = about.__download_url__
+    # urljoin requires that the path ends with /, or the last path part will be dropped
+    if not base_url.endswith("/"):
+        base_url = about.__download_url__ + "/"
+    download_url = urljoin(base_url, filename)
+    if not download_url.startswith(about.__download_url__):
+        raise ValueError(f"Download from {filename} rejected. Was it a relative path?")
    pip_args = list(user_pip_args) if user_pip_args is not None else []
    cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url]
    run_command(cmd)
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@ -39,7 +39,7 @@ def find_threshold_cli(
    # fmt: on
 ):
    """
-    Runs prediction trials for a trained model with varying tresholds to maximize
+    Runs prediction trials for a trained model with varying thresholds to maximize
    the specified metric. The search space for the threshold is traversed linearly
    from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`
    (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`
@ -81,7 +81,7 @@ def find_threshold(
    silent: bool = True,
 ) -> Tuple[float, float, Dict[float, float]]:
    """
-    Runs prediction trials for models with varying tresholds to maximize the specified metric.
+    Runs prediction trials for models with varying thresholds to maximize the specified metric.
    model (Union[str, Path]): Pipeline to evaluate. Can be a package or a path to a data directory.
    data_path (Path): Path to file with DocBin with docs to use for threshold search.
    pipe_name (str): Name of pipe to examine thresholds for.
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -215,6 +215,7 @@ class Warnings(metaclass=ErrorsWithCodes):
            "key attribute for vectors, configure it through Vectors(attr=) or "
            "'spacy init vectors --attr'")
    W126 = ("These keys are unsupported: {unsupported}")
+    W127 = ("Not all `Language.pipe` worker processes completed successfully")

    # v4 warning strings
    W401 = ("`incl_prior is True`, but the selected knowledge base type {kb_type} doesn't support prior probability "
--- a/spacy/language.py
+++ b/spacy/language.py
@ -1844,6 +1844,9 @@ class Language:
            for proc in procs:
                proc.join()

+            if not all(proc.exitcode == 0 for proc in procs):
+                warnings.warn(Warnings.W127)
+
    def _link_components(self) -> None:
        """Register 'listeners' within pipeline components, to allow them to
        effectively share weights.
@ -2467,6 +2470,7 @@ def _apply_pipes(
            if isinstance(texts_with_ctx, _WorkDoneSentinel):
                sender.close()
                receiver.close()
+                return

            docs = (
                ensure_doc(doc_like, context) for doc_like, context in texts_with_ctx
@ -2492,6 +2496,7 @@ def _apply_pipes(
            # stop processing.
            sender.close()
            receiver.close()
+            return


 class _Sender:
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@ -164,32 +164,34 @@ cdef class Lexeme:
        vector = self.vector
        return numpy.sqrt((vector**2).sum())

-    property vector:
+    @property
+    def vector(self):
        """A real-valued meaning representation.

        RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
            representing the lexeme's semantics.
        """
-        def __get__(self):
-            cdef int length = self.vocab.vectors_length
-            if length == 0:
-                raise ValueError(Errors.E010)
-            return self.vocab.get_vector(self.c.orth)
+        cdef int length = self.vocab.vectors_length
+        if length == 0:
+            raise ValueError(Errors.E010)
+        return self.vocab.get_vector(self.c.orth)

-        def __set__(self, vector):
-            if len(vector) != self.vocab.vectors_length:
-                raise ValueError(Errors.E073.format(new_length=len(vector),
-                                                    length=self.vocab.vectors_length))
-            self.vocab.set_vector(self.c.orth, vector)
+    @vector.setter
+    def vector(self, vector):
+        if len(vector) != self.vocab.vectors_length:
+            raise ValueError(Errors.E073.format(new_length=len(vector),
+                                                length=self.vocab.vectors_length))
+        self.vocab.set_vector(self.c.orth, vector)

-    property rank:
+    @property
+    def rank(self):
        """RETURNS (str): Sequential ID of the lexeme's lexical type, used
            to index into tables, e.g. for word vectors."""
-        def __get__(self):
-            return self.c.id
+        return self.c.id

-        def __set__(self, value):
-            self.c.id = value
+    @rank.setter
+    def rank(self, value):
+        self.c.id = value

    @property
    def orth_(self):
@ -203,306 +205,338 @@ cdef class Lexeme:
        """RETURNS (str): The original verbatim text of the lexeme."""
        return self.orth_

-    property lower:
+    @property
+    def lower(self):
        """RETURNS (uint64): Lowercase form of the lexeme."""
-        def __get__(self):
-            return self.c.lower
+        return self.c.lower

-        def __set__(self, attr_t x):
-            self.c.lower = x
+    @lower.setter
+    def lower(self, attr_t x):
+        self.c.lower = x

-    property norm:
+    @property
+    def norm(self):
        """RETURNS (uint64): The lexeme's norm, i.e. a normalised form of the
            lexeme text.
        """
-        def __get__(self):
-            return self.c.norm
+        return self.c.norm

-        def __set__(self, attr_t x):
-            if "lexeme_norm" not in self.vocab.lookups:
-                self.vocab.lookups.add_table("lexeme_norm")
-            norm_table = self.vocab.lookups.get_table("lexeme_norm")
-            norm_table[self.c.orth] = self.vocab.strings[x]
-            self.c.norm = x
+    @norm.setter
+    def norm(self, attr_t x):
+        if "lexeme_norm" not in self.vocab.lookups:
+            self.vocab.lookups.add_table("lexeme_norm")
+        norm_table = self.vocab.lookups.get_table("lexeme_norm")
+        norm_table[self.c.orth] = self.vocab.strings[x]
+        self.c.norm = x

-    property shape:
+    @property
+    def shape(self):
        """RETURNS (uint64): Transform of the word's string, to show
            orthographic features.
        """
-        def __get__(self):
-            return self.c.shape
+        return self.c.shape

-        def __set__(self, attr_t x):
-            self.c.shape = x
+    @shape.setter
+    def shape(self, attr_t x):
+        self.c.shape = x

-    property prefix:
+    @property
+    def prefix(self):
        """RETURNS (uint64): Length-N substring from the start of the word.
            Defaults to `N=1`.
        """
-        def __get__(self):
-            return self.c.prefix
+        return self.c.prefix

-        def __set__(self, attr_t x):
-            self.c.prefix = x
+    @prefix.setter
+    def prefix(self, attr_t x):
+        self.c.prefix = x

-    property suffix:
+    @property
+    def suffix(self):
        """RETURNS (uint64): Length-N substring from the end of the word.
            Defaults to `N=3`.
        """
-        def __get__(self):
-            return self.c.suffix
+        return self.c.suffix

-        def __set__(self, attr_t x):
-            self.c.suffix = x
+    @suffix.setter
+    def suffix(self, attr_t x):
+        self.c.suffix = x

-    property cluster:
+    @property
+    def cluster(self):
        """RETURNS (int): Brown cluster ID."""
-        def __get__(self):
-            cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
-            return cluster_table.get(self.c.orth, 0)
+        cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
+        return cluster_table.get(self.c.orth, 0)

-        def __set__(self, int x):
-            cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
-            cluster_table[self.c.orth] = x
+    @cluster.setter
+    def cluster(self, int x):
+        cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
+        cluster_table[self.c.orth] = x

-    property lang:
+    @property
+    def lang(self):
        """RETURNS (uint64): Language of the parent vocabulary."""
-        def __get__(self):
-            return self.c.lang
+        return self.c.lang

-        def __set__(self, attr_t x):
-            self.c.lang = x
+    @lang.setter
+    def lang(self, attr_t x):
+        self.c.lang = x

-    property prob:
+    @property
+    def prob(self):
        """RETURNS (float): Smoothed log probability estimate of the lexeme's
            type."""
-        def __get__(self):
-            prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
-            settings_table = self.vocab.lookups.get_table("lexeme_settings", {})
-            default_oov_prob = settings_table.get("oov_prob", -20.0)
-            return prob_table.get(self.c.orth, default_oov_prob)
+        prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
+        settings_table = self.vocab.lookups.get_table("lexeme_settings", {})
+        default_oov_prob = settings_table.get("oov_prob", -20.0)
+        return prob_table.get(self.c.orth, default_oov_prob)

-        def __set__(self, float x):
-            prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
-            prob_table[self.c.orth] = x
+    @prob.setter
+    def prob(self, float x):
+        prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
+        prob_table[self.c.orth] = x

-    property lower_:
+    @property
+    def lower_(self):
        """RETURNS (str): Lowercase form of the word."""
-        def __get__(self):
-            return self.vocab.strings[self.c.lower]
+        return self.vocab.strings[self.c.lower]

-        def __set__(self, str x):
-            self.c.lower = self.vocab.strings.add(x)
+    @lower_.setter
+    def lower_(self, str x):
+        self.c.lower = self.vocab.strings.add(x)

-    property norm_:
+    @property
+    def norm_(self):
        """RETURNS (str): The lexeme's norm, i.e. a normalised form of the
            lexeme text.
        """
-        def __get__(self):
-            return self.vocab.strings[self.c.norm]
+        return self.vocab.strings[self.c.norm]

-        def __set__(self, str x):
-            self.norm = self.vocab.strings.add(x)
+    @norm_.setter
+    def norm_(self, str x):
+        self.norm = self.vocab.strings.add(x)

-    property shape_:
+    @property
+    def shape_(self):
        """RETURNS (str): Transform of the word's string, to show
            orthographic features.
        """
-        def __get__(self):
-            return self.vocab.strings[self.c.shape]
+        return self.vocab.strings[self.c.shape]

-        def __set__(self, str x):
-            self.c.shape = self.vocab.strings.add(x)
+    @shape_.setter
+    def shape_(self, str x):
+        self.c.shape = self.vocab.strings.add(x)

-    property prefix_:
+    @property
+    def prefix_(self):
        """RETURNS (str): Length-N substring from the start of the word.
            Defaults to `N=1`.
        """
-        def __get__(self):
-            return self.vocab.strings[self.c.prefix]
+        return self.vocab.strings[self.c.prefix]

-        def __set__(self, str x):
-            self.c.prefix = self.vocab.strings.add(x)
+    @prefix_.setter
+    def prefix_(self, str x):
+        self.c.prefix = self.vocab.strings.add(x)

-    property suffix_:
+    @property
+    def suffix_(self):
        """RETURNS (str): Length-N substring from the end of the word.
            Defaults to `N=3`.
        """
-        def __get__(self):
-            return self.vocab.strings[self.c.suffix]
+        return self.vocab.strings[self.c.suffix]

-        def __set__(self, str x):
-            self.c.suffix = self.vocab.strings.add(x)
+    @suffix_.setter
+    def suffix_(self, str x):
+        self.c.suffix = self.vocab.strings.add(x)

-    property lang_:
+    @property
+    def lang_(self):
        """RETURNS (str): Language of the parent vocabulary."""
-        def __get__(self):
-            return self.vocab.strings[self.c.lang]
+        return self.vocab.strings[self.c.lang]

-        def __set__(self, str x):
-            self.c.lang = self.vocab.strings.add(x)
+    @lang_.setter
+    def lang_(self, str x):
+        self.c.lang = self.vocab.strings.add(x)

-    property flags:
+    @property
+    def flags(self):
        """RETURNS (uint64): Container of the lexeme's binary flags."""
-        def __get__(self):
-            return self.c.flags
+        return self.c.flags

-        def __set__(self, flags_t x):
-            self.c.flags = x
+    @flags.setter
+    def flags(self, flags_t x):
+        self.c.flags = x

    @property
    def is_oov(self):
        """RETURNS (bool): Whether the lexeme is out-of-vocabulary."""
        return self.orth not in self.vocab.vectors

-    property is_stop:
+    @property
+    def is_stop(self):
        """RETURNS (bool): Whether the lexeme is a stop word."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_STOP)
+        return Lexeme.c_check_flag(self.c, IS_STOP)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_STOP, x)
+    @is_stop.setter
+    def is_stop(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_STOP, x)

-    property is_alpha:
+    @property
+    def is_alpha(self):
        """RETURNS (bool): Whether the lexeme consists of alphabetic
            characters. Equivalent to `lexeme.text.isalpha()`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_ALPHA)
+        return Lexeme.c_check_flag(self.c, IS_ALPHA)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_ALPHA, x)
+    @is_alpha.setter
+    def is_alpha(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_ALPHA, x)

-    property is_ascii:
+    @property
+    def is_ascii(self):
        """RETURNS (bool): Whether the lexeme consists of ASCII characters.
            Equivalent to `[any(ord(c) >= 128 for c in lexeme.text)]`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_ASCII)
+        return Lexeme.c_check_flag(self.c, IS_ASCII)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_ASCII, x)
+    @is_ascii.setter
+    def is_ascii(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_ASCII, x)

-    property is_digit:
+    @property
+    def is_digit(self):
        """RETURNS (bool): Whether the lexeme consists of digits. Equivalent
            to `lexeme.text.isdigit()`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_DIGIT)
+        return Lexeme.c_check_flag(self.c, IS_DIGIT)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_DIGIT, x)
+    @is_digit.setter
+    def is_digit(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_DIGIT, x)

-    property is_lower:
+    @property
+    def is_lower(self):
        """RETURNS (bool): Whether the lexeme is in lowercase. Equivalent to
            `lexeme.text.islower()`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_LOWER)
+        return Lexeme.c_check_flag(self.c, IS_LOWER)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_LOWER, x)
+    @is_lower.setter
+    def is_lower(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_LOWER, x)

-    property is_upper:
+    @property
+    def is_upper(self):
        """RETURNS (bool): Whether the lexeme is in uppercase. Equivalent to
            `lexeme.text.isupper()`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_UPPER)
+        return Lexeme.c_check_flag(self.c, IS_UPPER)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_UPPER, x)
+    @is_upper.setter
+    def is_upper(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_UPPER, x)

-    property is_title:
+    @property
+    def is_title(self):
        """RETURNS (bool): Whether the lexeme is in titlecase. Equivalent to
            `lexeme.text.istitle()`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_TITLE)
+        return Lexeme.c_check_flag(self.c, IS_TITLE)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_TITLE, x)
+    @is_title.setter
+    def is_title(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_TITLE, x)

-    property is_punct:
+    @property
+    def is_punct(self):
        """RETURNS (bool): Whether the lexeme is punctuation."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_PUNCT)
+        return Lexeme.c_check_flag(self.c, IS_PUNCT)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_PUNCT, x)
+    @is_punct.setter
+    def is_punct(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_PUNCT, x)

-    property is_space:
+    @property
+    def is_space(self):
        """RETURNS (bool): Whether the lexeme consist of whitespace characters.
            Equivalent to `lexeme.text.isspace()`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_SPACE)
+        return Lexeme.c_check_flag(self.c, IS_SPACE)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_SPACE, x)
+    @is_space.setter
+    def is_space(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_SPACE, x)

-    property is_bracket:
+    @property
+    def is_bracket(self):
        """RETURNS (bool): Whether the lexeme is a bracket."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_BRACKET)
+        return Lexeme.c_check_flag(self.c, IS_BRACKET)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_BRACKET, x)
+    @is_bracket.setter
+    def is_bracket(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_BRACKET, x)

-    property is_quote:
+    @property
+    def is_quote(self):
        """RETURNS (bool): Whether the lexeme is a quotation mark."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_QUOTE)
+        return Lexeme.c_check_flag(self.c, IS_QUOTE)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_QUOTE, x)
+    @is_quote.setter
+    def is_quote(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_QUOTE, x)

-    property is_left_punct:
+    @property
+    def is_left_punct(self):
        """RETURNS (bool): Whether the lexeme is left punctuation, e.g. (."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_LEFT_PUNCT)
+        return Lexeme.c_check_flag(self.c, IS_LEFT_PUNCT)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_LEFT_PUNCT, x)
+    @is_left_punct.setter
+    def is_left_punct(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_LEFT_PUNCT, x)

-    property is_right_punct:
+    @property
+    def is_right_punct(self):
        """RETURNS (bool): Whether the lexeme is right punctuation, e.g. )."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_RIGHT_PUNCT)
+        return Lexeme.c_check_flag(self.c, IS_RIGHT_PUNCT)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_RIGHT_PUNCT, x)
+    @is_right_punct.setter
+    def is_right_punct(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_RIGHT_PUNCT, x)

-    property is_currency:
+    @property
+    def is_currency(self):
        """RETURNS (bool): Whether the lexeme is a currency symbol, e.g. $, €."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, IS_CURRENCY)
+        return Lexeme.c_check_flag(self.c, IS_CURRENCY)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, IS_CURRENCY, x)
+    @is_currency.setter
+    def is_currency(self, bint x):
+        Lexeme.c_set_flag(self.c, IS_CURRENCY, x)

-    property like_url:
+    @property
+    def like_url(self):
        """RETURNS (bool): Whether the lexeme resembles a URL."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, LIKE_URL)
+        return Lexeme.c_check_flag(self.c, LIKE_URL)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, LIKE_URL, x)
+    @like_url.setter
+    def like_url(self, bint x):
+        Lexeme.c_set_flag(self.c, LIKE_URL, x)

-    property like_num:
+    @property
+    def like_num(self):
        """RETURNS (bool): Whether the lexeme represents a number, e.g. "10.9",
            "10", "ten", etc.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, LIKE_NUM)
+        return Lexeme.c_check_flag(self.c, LIKE_NUM)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, LIKE_NUM, x)
+    @like_num.setter
+    def like_num(self, bint x):
+        Lexeme.c_set_flag(self.c, LIKE_NUM, x)

-    property like_email:
+    @property
+    def like_email(self):
        """RETURNS (bool): Whether the lexeme resembles an email address."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c, LIKE_EMAIL)
+        return Lexeme.c_check_flag(self.c, LIKE_EMAIL)

-        def __set__(self, bint x):
-            Lexeme.c_set_flag(self.c, LIKE_EMAIL, x)
+    @like_email.setter
+    def like_email(self, bint x):
+        Lexeme.c_set_flag(self.c, LIKE_EMAIL, x)
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@ -241,6 +241,7 @@ def _build_parametric_attention_with_residual_nonlinear(

        parametric_attention.set_ref("tok2vec", tok2vec)
        parametric_attention.set_ref("attention_layer", attention_layer)
+        parametric_attention.set_ref("key_transform", key_transform)
        parametric_attention.set_ref("nonlinear_layer", nonlinear_layer)
        parametric_attention.set_ref("norm_layer", norm_layer)

@ -248,10 +249,19 @@ def _build_parametric_attention_with_residual_nonlinear(


 def _init_parametric_attention_with_residual_nonlinear(model, X, Y) -> Model:
+    # When tok2vec is lazily initialized, we need to initialize it before
+    # the rest of the chain to ensure that we can get its width.
+    tok2vec = model.get_ref("tok2vec")
+    tok2vec.initialize(X)
+
    tok2vec_width = get_tok2vec_width(model)
    model.get_ref("attention_layer").set_dim("nO", tok2vec_width)
-    model.get_ref("nonlinear_layer").set_dim("nO", tok2vec_width)
+    if model.get_ref("key_transform").has_dim("nI") is None:
+        model.get_ref("key_transform").set_dim("nI", tok2vec_width)
+    if model.get_ref("key_transform").has_dim("nO") is None:
+        model.get_ref("key_transform").set_dim("nO", tok2vec_width)
    model.get_ref("nonlinear_layer").set_dim("nI", tok2vec_width)
+    model.get_ref("nonlinear_layer").set_dim("nO", tok2vec_width)
    model.get_ref("norm_layer").set_dim("nI", tok2vec_width)
    model.get_ref("norm_layer").set_dim("nO", tok2vec_width)
    init_chain(model, X, Y)
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@ -24,7 +24,6 @@ from .. import util
 from ..errors import Errors, Warnings
 from ..kb import Candidate, KnowledgeBase
 from ..language import Language
-from ..ml import empty_kb
 from ..scorer import Scorer
 from ..tokens import Doc, Span, SpanGroup
 from ..training import Example, validate_examples, validate_get_examples
@ -114,7 +113,7 @@ def make_entity_linker(
        documents with textual mentions.
    generate_empty_kb (Callable[[Vocab, int], KnowledgeBase]): Callable returning empty KnowledgeBase.
    scorer (Optional[Callable]): The scoring method.
-    use_gold_ents (bool): Whether to copy entities from gold docs or not. If false, another
+    use_gold_ents (bool): Whether to copy entities from gold docs during training or not. If false, another
        component must provide entity annotations.
    threshold (Optional[float]): Confidence threshold for entity predictions. If confidence is below the threshold,
        prediction is discarded. If None, predictions are not filtered by any threshold.
@ -227,7 +226,6 @@ class EntityLinker(TrainablePipe):
        self.cfg: Dict[str, Any] = {"overwrite": overwrite}
        self.distance = CosineDistance(normalize=False)
        self.kb = generate_empty_kb(self.vocab, entity_vector_length)
-        self.scorer = scorer
        self.use_gold_ents = use_gold_ents
        self.threshold = threshold
        self.save_activations = save_activations
@ -235,6 +233,37 @@ class EntityLinker(TrainablePipe):
        if self.incl_prior and not self.kb.supports_prior_probs:
            warnings.warn(Warnings.W401)

+        def _score_with_ents_set(examples: Iterable[Example], **kwargs):
+            # Because of how spaCy works, we can't just score immediately, because Language.evaluate
+            # calls pipe() on the predicted docs, which won't have entities if there is no NER in the pipeline.
+            if not scorer:
+                return scorer
+            if not self.use_gold_ents:
+                return scorer(examples, **kwargs)
+            else:
+                examples = self._ensure_ents(examples)
+                docs = self.pipe(
+                    (eg.predicted for eg in examples),
+                )
+                for eg, doc in zip(examples, docs):
+                    eg.predicted = doc
+                return scorer(examples, **kwargs)
+
+        self.scorer = _score_with_ents_set
+
+    def _ensure_ents(self, examples: Iterable[Example]) -> Iterable[Example]:
+        """If use_gold_ents is true, set the gold entities to (a copy of) eg.predicted."""
+        if not self.use_gold_ents:
+            return examples
+
+        new_examples = []
+        for eg in examples:
+            ents, _ = eg.get_aligned_ents_and_ner()
+            new_eg = eg.copy()
+            new_eg.predicted.ents = ents
+            new_examples.append(new_eg)
+        return new_examples
+
    def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]):
        """Define the KB of this pipe by providing a function that will
        create it using this object's vocab."""
@ -276,11 +305,9 @@ class EntityLinker(TrainablePipe):
        nO = self.kb.entity_vector_length
        doc_sample = []
        vector_sample = []
-        for eg in islice(get_examples(), 10):
+        examples = self._ensure_ents(islice(get_examples(), 10))
+        for eg in examples:
            doc = eg.x
-            if self.use_gold_ents:
-                ents, _ = eg.get_aligned_ents_and_ner()
-                doc.ents = ents
            doc_sample.append(doc)
            vector_sample.append(self.model.ops.alloc1f(nO))
        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
@ -347,31 +374,17 @@ class EntityLinker(TrainablePipe):
        losses.setdefault(self.name, 0.0)
        if not examples:
            return losses
+        examples = self._ensure_ents(examples)
        validate_examples(examples, "EntityLinker.update")

-        set_dropout_rate(self.model, drop)
-        docs = [eg.predicted for eg in examples]
-        # save to restore later
-        old_ents = [doc.ents for doc in docs]
-
-        for doc, ex in zip(docs, examples):
-            if self.use_gold_ents:
-                ents, _ = ex.get_aligned_ents_and_ner()
-                doc.ents = ents
-            else:
-                # only keep matching ents
-                doc.ents = ex.get_matching_ents()
-
        # make sure we have something to learn from, if not, short-circuit
        if not self.batch_has_learnable_example(examples):
            return losses

+        set_dropout_rate(self.model, drop)
+        docs = [eg.predicted for eg in examples]
        sentence_encodings, bp_context = self.model.begin_update(docs)

-        # now restore the ents
-        for doc, old in zip(docs, old_ents):
-            doc.ents = old
-
        loss, d_scores = self.get_loss(
            sentence_encodings=sentence_encodings, examples=examples
        )
@ -379,11 +392,13 @@ class EntityLinker(TrainablePipe):
        if sgd is not None:
            self.finish_update(sgd)
        losses[self.name] += loss
+
        return losses

    def get_loss(self, examples: Iterable[Example], sentence_encodings: Floats2d):
        validate_examples(examples, "EntityLinker.get_loss")
        entity_encodings = []
+        # We assume that get_loss is called with gold ents set in the examples if need be
        eidx = 0  # indices in gold entities to keep
        keep_ents = []  # indices in sentence_encodings to keep

--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@ -799,7 +799,7 @@ GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
 # fmt: on


-def test_overfitting_IO():
+def test_overfitting_IO_gold_entities():
    # Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
    nlp = English()
    vector_length = 3
@ -826,7 +826,9 @@ def test_overfitting_IO():
        return mykb

    # Create the Entity Linker component and add it to the pipeline
-    entity_linker = nlp.add_pipe("entity_linker", last=True)
+    entity_linker = nlp.add_pipe(
+        "entity_linker", last=True, config={"use_gold_ents": True}
+    )
    assert isinstance(entity_linker, EntityLinker)
    entity_linker.set_kb(create_kb)
    assert "Q2146908" in entity_linker.vocab.strings
@ -889,6 +891,107 @@ def test_overfitting_IO():
    assert_equal(batch_deps_1, batch_deps_2)
    assert_equal(batch_deps_1, no_batch_deps)

+    eval = nlp.evaluate(train_examples)
+    assert "nel_macro_p" in eval
+    assert "nel_macro_r" in eval
+    assert "nel_macro_f" in eval
+    assert "nel_micro_p" in eval
+    assert "nel_micro_r" in eval
+    assert "nel_micro_f" in eval
+    assert "nel_f_per_type" in eval
+    assert "PERSON" in eval["nel_f_per_type"]
+
+    assert eval["nel_macro_f"] > 0
+    assert eval["nel_micro_f"] > 0
+
+
+def test_overfitting_IO_with_ner():
+    # Simple test to try and overfit the NER and NEL component in combination - ensuring the ML models work correctly
+    nlp = English()
+    vector_length = 3
+    assert "Q2146908" not in nlp.vocab.strings
+
+    # Convert the texts to docs to make sure we have doc.ents set for the training examples
+    train_examples = []
+    for text, annotation in TRAIN_DATA:
+        doc = nlp(text)
+        train_examples.append(Example.from_dict(doc, annotation))
+
+    def create_kb(vocab):
+        # create artificial KB - assign same prior weight to the two russ cochran's
+        # Q2146908 (Russ Cochran): American golfer
+        # Q7381115 (Russ Cochran): publisher
+        mykb = InMemoryLookupKB(vocab, entity_vector_length=vector_length)
+        mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
+        mykb.add_entity(entity="Q7381115", freq=12, entity_vector=[9, 1, -7])
+        mykb.add_alias(
+            alias="Russ Cochran",
+            entities=["Q2146908", "Q7381115"],
+            probabilities=[0.5, 0.5],
+        )
+        return mykb
+
+    # Create the NER and EL components and add them to the pipeline
+    ner = nlp.add_pipe("ner", first=True)
+    entity_linker = nlp.add_pipe(
+        "entity_linker", last=True, config={"use_gold_ents": False}
+    )
+    entity_linker.set_kb(create_kb)
+
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+        for ent in annotations.get("entities"):
+            ner.add_label(ent[2])
+    optimizer = nlp.initialize()
+
+    # train the NER and NEL pipes
+    for i in range(50):
+        losses = {}
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
+    assert losses["ner"] < 0.001
+    assert losses["entity_linker"] < 0.001
+
+    # adding additional components that are required for the entity_linker
+    nlp.add_pipe("sentencizer", first=True)
+
+    # test the trained model
+    test_text = "Russ Cochran captured his first major title with his son as caddie."
+    doc = nlp(test_text)
+    ents = doc.ents
+    assert len(ents) == 1
+    assert ents[0].text == "Russ Cochran"
+    assert ents[0].label_ == "PERSON"
+    assert ents[0].kb_id_ != "NIL"
+
+    # TODO: below assert is still flaky - EL doesn't properly overfit quite yet
+    # assert ents[0].kb_id_ == "Q2146908"
+
+    # Also test the results are still the same after IO
+    with make_tempdir() as tmp_dir:
+        nlp.to_disk(tmp_dir)
+        nlp2 = util.load_model_from_path(tmp_dir)
+        assert nlp2.pipe_names == nlp.pipe_names
+        doc2 = nlp2(test_text)
+        ents2 = doc2.ents
+        assert len(ents2) == 1
+        assert ents2[0].text == "Russ Cochran"
+        assert ents2[0].label_ == "PERSON"
+        assert ents2[0].kb_id_ != "NIL"
+
+    eval = nlp.evaluate(train_examples)
+    assert "nel_macro_f" in eval
+    assert "nel_micro_f" in eval
+    assert "ents_f" in eval
+    assert "nel_f_per_type" in eval
+    assert "ents_per_type" in eval
+    assert "PERSON" in eval["nel_f_per_type"]
+    assert "PERSON" in eval["ents_per_type"]
+
+    assert eval["nel_macro_f"] > 0
+    assert eval["nel_micro_f"] > 0
+    assert eval["ents_f"] > 0
+

 def test_kb_serialization():
    # Test that the KB can be used in a pipeline with a different vocab
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@ -29,6 +29,8 @@ from spacy.tokens import Doc, DocBin
 from spacy.training import Example
 from spacy.training.initialize import init_nlp

+# Ensure that the architecture gets added to the registry.
+from ..tok2vec import build_lazy_init_tok2vec as _
 from ..util import make_tempdir

 TRAIN_DATA_SINGLE_LABEL = [
@ -41,6 +43,13 @@ TRAIN_DATA_MULTI_LABEL = [
    ("I'm confused but happy", {"cats": {"ANGRY": 0.0, "CONFUSED": 1.0, "HAPPY": 1.0}}),
 ]

+lazy_init_model_config = """
+[model]
+@architectures = "test.LazyInitTok2Vec.v1"
+width = 96
+"""
+LAZY_INIT_TOK2VEC_MODEL = Config().from_str(lazy_init_model_config)["model"]
+

 def make_get_examples_single_label(nlp):
    train_examples = []
@ -551,6 +560,34 @@ def test_error_with_multi_labels():
        nlp.initialize(get_examples=lambda: train_examples)


+# fmt: off
+@pytest.mark.parametrize(
+    "name,textcat_config",
+    [
+        # ENSEMBLE V2
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": LAZY_INIT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
+        ("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": LAZY_INIT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
+        # PARAMETRIC ATTENTION V1
+        ("textcat", {"@architectures": "spacy.TextCatParametricAttention.v1", "tok2vec": LAZY_INIT_TOK2VEC_MODEL, "exclusive_classes": True}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatParametricAttention.v1", "tok2vec": LAZY_INIT_TOK2VEC_MODEL, "exclusive_classes": False}),
+        # REDUCE
+        ("textcat", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": LAZY_INIT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": LAZY_INIT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}),
+    ],
+)
+# fmt: on
+def test_tok2vec_lazy_init(name, textcat_config):
+    # Check that we can properly initialize and use a textcat model using
+    # a lazily-initialized tok2vec.
+    nlp = English()
+    pipe_config = {"model": textcat_config}
+    textcat = nlp.add_pipe(name, config=pipe_config)
+    textcat.add_label("POSITIVE")
+    textcat.add_label("NEGATIVE")
+    nlp.initialize()
+    nlp.pipe(["This is a test."])
+
+
@pytest.mark.parametrize(
    "name,get_examples, train_data",
    [
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@ -12,7 +12,7 @@ from thinc.api import Config

 import spacy
 from spacy import about
-from spacy.cli import info
+from spacy.cli import download_module, info
 from spacy.cli._util import parse_config_overrides, string_to_list, walk_directory
 from spacy.cli.apply import apply
 from spacy.cli.debug_data import (
@ -1066,3 +1066,15 @@ def test_debug_data_trainable_lemmatizer_not_annotated():
 def test_project_api_imports():
    from spacy.cli import project_run
    from spacy.cli.project.run import project_run  # noqa: F401, F811
+
+
+def test_download_rejects_relative_urls(monkeypatch):
+    """Test that we can't tell spacy download to get an arbitrary model by using a
+    relative path in the filename"""
+
+    monkeypatch.setattr(download_module, "run_command", lambda cmd: None)
+
+    # Check that normal download works
+    download_module.download("en_core_web_sm-3.7.1", direct=True)
+    with pytest.raises(SystemExit):
+        download_module.download("../en_core_web_sm-3.7.1", direct=True)
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@ -1,5 +1,6 @@
 import itertools
 import logging
+import warnings
 from unittest import mock

 import pytest
@ -423,7 +424,7 @@ def test_language_pipe_error_handler(n_process):
        nlp.set_error_handler(raise_error)
        with pytest.raises(ValueError):
            list(nlp.pipe(texts, n_process=n_process))
-        # set explicitely to ignoring
+        # set explicitly to ignoring
        nlp.set_error_handler(ignore_error)
        docs = list(nlp.pipe(texts, n_process=n_process))
        assert len(docs) == 0
@ -834,9 +835,13 @@ def test_pass_doc_to_pipeline(nlp, n_process):
    assert doc.text == texts[0]
    assert len(doc.cats) > 0
    if isinstance(get_current_ops(), NumpyOps) or n_process < 2:
-        docs = nlp.pipe(docs, n_process=n_process)
-        assert [doc.text for doc in docs] == texts
-        assert all(len(doc.cats) for doc in docs)
+        # Catch warnings to ensure that all worker processes exited
+        # succesfully.
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            docs = nlp.pipe(docs, n_process=n_process)
+            assert [doc.text for doc in docs] == texts
+            assert all(len(doc.cats) for doc in docs)


 def test_invalid_arg_to_pipeline(nlp):
--- a/spacy/tests/tok2vec.py
+++ b/spacy/tests/tok2vec.py
@ -0,0 +1,36 @@
+from typing import List
+
+from thinc.api import Model
+from thinc.types import Floats2d
+
+from spacy.tokens import Doc
+from spacy.util import registry
+
+
+@registry.architectures("test.LazyInitTok2Vec.v1")
+def build_lazy_init_tok2vec(*, width: int) -> Model[List[Doc], List[Floats2d]]:
+    """tok2vec model of which the output size is only known after
+    initialization. This implementation does not output meaningful
+    embeddings, it is strictly for testing."""
+    return Model(
+        "lazy_init_tok2vec",
+        lazy_init_tok2vec_forward,
+        init=lazy_init_tok2vec_init,
+        dims={"nO": None},
+        attrs={"width": width},
+    )
+
+
+def lazy_init_tok2vec_init(model: Model, X=None, Y=None):
+    width = model.attrs["width"]
+    model.set_dim("nO", width)
+
+
+def lazy_init_tok2vec_forward(model: Model, X: List[Doc], is_train: bool):
+    width = model.get_dim("nO")
+    Y = [model.ops.alloc2f(len(doc), width) for doc in X]
+
+    def backprop(dY):
+        return []
+
+    return Y, backprop
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -71,65 +71,72 @@ cdef class Tokenizer:
        self._special_matcher = PhraseMatcher(self.vocab)
        self._load_special_cases(rules)

-    property token_match:
-        def __get__(self):
-            return self._token_match
+    @property
+    def token_match(self):
+        return self._token_match

-        def __set__(self, token_match):
-            self._token_match = token_match
-            self._reload_special_cases()
+    @token_match.setter
+    def token_match(self, token_match):
+        self._token_match = token_match
+        self._reload_special_cases()

-    property url_match:
-        def __get__(self):
-            return self._url_match
+    @property
+    def url_match(self):
+        return self._url_match

-        def __set__(self, url_match):
-            self._url_match = url_match
-            self._reload_special_cases()
+    @url_match.setter
+    def url_match(self, url_match):
+        self._url_match = url_match
+        self._reload_special_cases()

-    property prefix_search:
-        def __get__(self):
-            return self._prefix_search
+    @property
+    def prefix_search(self):
+        return self._prefix_search

-        def __set__(self, prefix_search):
-            self._prefix_search = prefix_search
-            self._reload_special_cases()
+    @prefix_search.setter
+    def prefix_search(self, prefix_search):
+        self._prefix_search = prefix_search
+        self._reload_special_cases()

-    property suffix_search:
-        def __get__(self):
-            return self._suffix_search
+    @property
+    def suffix_search(self):
+        return self._suffix_search

-        def __set__(self, suffix_search):
-            self._suffix_search = suffix_search
-            self._reload_special_cases()
+    @suffix_search.setter
+    def suffix_search(self, suffix_search):
+        self._suffix_search = suffix_search
+        self._reload_special_cases()

-    property infix_finditer:
-        def __get__(self):
-            return self._infix_finditer
+    @property
+    def infix_finditer(self):
+        return self._infix_finditer

-        def __set__(self, infix_finditer):
-            self._infix_finditer = infix_finditer
-            self._reload_special_cases()
+    @infix_finditer.setter
+    def infix_finditer(self, infix_finditer):
+        self._infix_finditer = infix_finditer
+        self._reload_special_cases()

-    property rules:
-        def __get__(self):
-            return self._rules
+    @property
+    def rules(self):
+        return self._rules

-        def __set__(self, rules):
-            self._rules = {}
-            self._flush_cache()
-            self._flush_specials()
-            self._cache = PreshMap()
-            self._specials = PreshMap()
-            self._load_special_cases(rules)
+    @rules.setter
+    def rules(self, rules):
+        self._rules = {}
+        self._flush_cache()
+        self._flush_specials()
+        self._cache = PreshMap()
+        self._specials = PreshMap()
+        self._load_special_cases(rules)

-    property faster_heuristics:
-        def __get__(self):
-            return self._faster_heuristics
+    @property
+    def faster_heuristics(self):
+        return self._faster_heuristics

-        def __set__(self, faster_heuristics):
-            self._faster_heuristics = faster_heuristics
-            self._reload_special_cases()
+    @faster_heuristics.setter
+    def faster_heuristics(self, faster_heuristics):
+        self._faster_heuristics = faster_heuristics
+        self._reload_special_cases()

    def __reduce__(self):
        args = (self.vocab,
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -667,7 +667,8 @@ cdef class Doc:
        else:
            return False

-    property vector:
+    @property
+    def vector(self):
        """A real-valued meaning representation. Defaults to an average of the
        token vectors.

@ -676,45 +677,46 @@ cdef class Doc:

        DOCS: https://spacy.io/api/doc#vector
        """
-        def __get__(self):
-            if "vector" in self.user_hooks:
-                return self.user_hooks["vector"](self)
-            if self._vector is not None:
-                return self._vector
-            xp = get_array_module(self.vocab.vectors.data)
-            if not len(self):
-                self._vector = xp.zeros((self.vocab.vectors_length,), dtype="f")
-                return self._vector
-            elif self.vocab.vectors.size > 0:
-                self._vector = sum(t.vector for t in self) / len(self)
-                return self._vector
-            else:
-                return xp.zeros((self.vocab.vectors_length,), dtype="float32")
+        if "vector" in self.user_hooks:
+            return self.user_hooks["vector"](self)
+        if self._vector is not None:
+            return self._vector
+        xp = get_array_module(self.vocab.vectors.data)
+        if not len(self):
+            self._vector = xp.zeros((self.vocab.vectors_length,), dtype="f")
+            return self._vector
+        elif self.vocab.vectors.size > 0:
+            self._vector = sum(t.vector for t in self) / len(self)
+            return self._vector
+        else:
+            return xp.zeros((self.vocab.vectors_length,), dtype="float32")

-        def __set__(self, value):
-            self._vector = value
+    @vector.setter
+    def vector(self, value):
+        self._vector = value

-    property vector_norm:
+    @property
+    def vector_norm(self):
        """The L2 norm of the document's vector representation.

        RETURNS (float): The L2 norm of the vector representation.

        DOCS: https://spacy.io/api/doc#vector_norm
        """
-        def __get__(self):
-            if "vector_norm" in self.user_hooks:
-                return self.user_hooks["vector_norm"](self)
-            cdef float value
-            cdef double norm = 0
-            if self._vector_norm is None:
-                norm = 0.0
-                for value in self.vector:
-                    norm += value * value
-                self._vector_norm = sqrt(norm) if norm != 0 else 0
-            return self._vector_norm
+        if "vector_norm" in self.user_hooks:
+            return self.user_hooks["vector_norm"](self)
+        cdef float value
+        cdef double norm = 0
+        if self._vector_norm is None:
+            norm = 0.0
+            for value in self.vector:
+                norm += value * value
+            self._vector_norm = sqrt(norm) if norm != 0 else 0
+        return self._vector_norm

-        def __set__(self, value):
-            self._vector_norm = value
+    @vector_norm.setter
+    def vector_norm(self, value):
+        self._vector_norm = value

    @property
    def text(self):
@ -733,7 +735,8 @@ cdef class Doc:
        """
        return self.text

-    property ents:
+    @property
+    def ents(self):
        """The named entities in the document. Returns a list of named entity
        `Span` objects, if the entity recognizer has been applied.

@ -741,55 +744,55 @@ cdef class Doc:

        DOCS: https://spacy.io/api/doc#ents
        """
-        def __get__(self):
-            cdef int i
-            cdef const TokenC* token
-            cdef int start = -1
-            cdef attr_t label = 0
-            cdef attr_t kb_id = 0
-            cdef attr_t ent_id = 0
-            output = []
-            for i in range(self.length):
-                token = &self.c[i]
-                if token.ent_iob == 1:
-                    if start == -1:
-                        seq = [f"{t.text}|{t.ent_iob_}" for t in self[i-5:i+5]]
-                        raise ValueError(Errors.E093.format(seq=" ".join(seq)))
-                elif token.ent_iob == 2 or token.ent_iob == 0 or \
-                        (token.ent_iob == 3 and token.ent_type == 0):
-                    if start != -1:
-                        output.append(Span(self, start, i, label=label, kb_id=kb_id, span_id=ent_id))
-                    start = -1
-                    label = 0
-                    kb_id = 0
-                    ent_id = 0
-                elif token.ent_iob == 3:
-                    if start != -1:
-                        output.append(Span(self, start, i, label=label, kb_id=kb_id, span_id=ent_id))
-                    start = i
-                    label = token.ent_type
-                    kb_id = token.ent_kb_id
-                    ent_id = token.ent_id
-            if start != -1:
-                output.append(Span(self, start, self.length, label=label, kb_id=kb_id, span_id=ent_id))
-            # remove empty-label spans
-            output = [o for o in output if o.label_ != ""]
-            return tuple(output)
+        cdef int i
+        cdef const TokenC* token
+        cdef int start = -1
+        cdef attr_t label = 0
+        cdef attr_t kb_id = 0
+        cdef attr_t ent_id = 0
+        output = []
+        for i in range(self.length):
+            token = &self.c[i]
+            if token.ent_iob == 1:
+                if start == -1:
+                    seq = [f"{t.text}|{t.ent_iob_}" for t in self[i-5:i+5]]
+                    raise ValueError(Errors.E093.format(seq=" ".join(seq)))
+            elif token.ent_iob == 2 or token.ent_iob == 0 or \
+                    (token.ent_iob == 3 and token.ent_type == 0):
+                if start != -1:
+                    output.append(Span(self, start, i, label=label, kb_id=kb_id, span_id=ent_id))
+                start = -1
+                label = 0
+                kb_id = 0
+                ent_id = 0
+            elif token.ent_iob == 3:
+                if start != -1:
+                    output.append(Span(self, start, i, label=label, kb_id=kb_id, span_id=ent_id))
+                start = i
+                label = token.ent_type
+                kb_id = token.ent_kb_id
+                ent_id = token.ent_id
+        if start != -1:
+            output.append(Span(self, start, self.length, label=label, kb_id=kb_id, span_id=ent_id))
+        # remove empty-label spans
+        output = [o for o in output if o.label_ != ""]
+        return tuple(output)

-        def __set__(self, ents):
-            # TODO:
-            # 1. Test basic data-driven ORTH gazetteer
-            # 2. Test more nuanced date and currency regex
-            cdef attr_t kb_id, ent_id
-            cdef int ent_start, ent_end
-            ent_spans = []
-            for ent_info in ents:
-                entity_type_, kb_id, ent_start, ent_end, ent_id = get_entity_info(ent_info)
-                if isinstance(entity_type_, str):
-                    self.vocab.strings.add(entity_type_)
-                span = Span(self, ent_start, ent_end, label=entity_type_, kb_id=kb_id, span_id=ent_id)
-                ent_spans.append(span)
-            self.set_ents(ent_spans, default=SetEntsDefault.outside)
+    @ents.setter
+    def ents(self, ents):
+        # TODO:
+        # 1. Test basic data-driven ORTH gazetteer
+        # 2. Test more nuanced date and currency regex
+        cdef attr_t kb_id, ent_id
+        cdef int ent_start, ent_end
+        ent_spans = []
+        for ent_info in ents:
+            entity_type_, kb_id, ent_start, ent_end, ent_id = get_entity_info(ent_info)
+            if isinstance(entity_type_, str):
+                self.vocab.strings.add(entity_type_)
+            span = Span(self, ent_start, ent_end, label=entity_type_, kb_id=kb_id, span_id=ent_id)
+            ent_spans.append(span)
+        self.set_ents(ent_spans, default=SetEntsDefault.outside)

    def set_ents(self, entities, *, blocked=None, missing=None, outside=None, default=SetEntsDefault.outside):
        """Set entity annotation.
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@ -786,110 +786,130 @@ cdef class Span:
        for word in self.rights:
            yield from word.subtree

-    property start:
-        def __get__(self):
-            return self.span_c().start
+    @property
+    def start(self):
+        return self.span_c().start

-        def __set__(self, int start):
-            if start < 0 or start > self.doc.length:
-                raise IndexError(Errors.E1032.format(var="start", obj="Doc", length=self.doc.length, value=start))
-            cdef SpanC* span_c = self.span_c()
-            if start > span_c.end:
-                raise ValueError(Errors.E4007.format(var="start", value=start, op="<=", existing_var="end", existing_value=span_c.end))
-            span_c.start = start
-            span_c.start_char = self.doc.c[start].idx
+    @start.setter
+    def start(self, int start):
+        if start < 0 or start > self.doc.length:
+            raise IndexError(Errors.E1032.format(var="start", obj="Doc", length=self.doc.length, value=start))
+        cdef SpanC * span_c = self.span_c()
+        if start > span_c.end:
+            raise ValueError(
+                Errors.E4007.format(var="start", value=start, op="<=", existing_var="end", existing_value=span_c.end))
+        span_c.start = start
+        span_c.start_char = self.doc.c[start].idx

-    property end:
-        def __get__(self):
-            return self.span_c().end
+    @property
+    def end(self):
+        return self.span_c().end

-        def __set__(self, int end):
-            if end < 0 or end > self.doc.length:
-                raise IndexError(Errors.E1032.format(var="end", obj="Doc", length=self.doc.length, value=end))
-            cdef SpanC* span_c = self.span_c()
-            if span_c.start > end:
-                raise ValueError(Errors.E4007.format(var="end", value=end, op=">=", existing_var="start", existing_value=span_c.start))
-            span_c.end = end
-            if end > 0:
-                span_c.end_char = self.doc.c[end-1].idx + self.doc.c[end-1].lex.length
-            else:
-                span_c.end_char = 0
+    @end.setter
+    def end(self, int end):
+        if end < 0 or end > self.doc.length:
+            raise IndexError(Errors.E1032.format(var="end", obj="Doc", length=self.doc.length, value=end))
+        cdef SpanC * span_c = self.span_c()
+        if span_c.start > end:
+            raise ValueError(
+                Errors.E4007.format(var="end", value=end, op=">=", existing_var="start", existing_value=span_c.start))
+        span_c.end = end
+        if end > 0:
+            span_c.end_char = self.doc.c[end - 1].idx + self.doc.c[end - 1].lex.length
+        else:
+            span_c.end_char = 0

-    property start_char:
-        def __get__(self):
-            return self.span_c().start_char
+    @property
+    def start_char(self):
+        return self.span_c().start_char

-        def __set__(self, int start_char):
-            if start_char < 0 or start_char > len(self.doc.text):
-                raise IndexError(Errors.E1032.format(var="start_char", obj="Doc text", length=len(self.doc.text), value=start_char))
-            cdef int start = token_by_start(self.doc.c, self.doc.length, start_char)
-            if start < 0:
-                raise ValueError(Errors.E4008.format(value=start_char, pos="start"))
-            cdef SpanC* span_c = self.span_c()
-            if start_char > span_c.end_char:
-                raise ValueError(Errors.E4007.format(var="start_char", value=start_char, op="<=", existing_var="end_char", existing_value=span_c.end_char))
-            span_c.start_char = start_char
-            span_c.start = start
+    @start_char.setter
+    def start_char(self, int start_char):
+        if start_char < 0 or start_char > len(self.doc.text):
+            raise IndexError(
+                Errors.E1032.format(var="start_char", obj="Doc text", length=len(self.doc.text), value=start_char))
+        cdef int start = token_by_start(self.doc.c, self.doc.length, start_char)
+        if start < 0:
+            raise ValueError(Errors.E4008.format(value=start_char, pos="start"))
+        cdef SpanC * span_c = self.span_c()
+        if start_char > span_c.end_char:
+            raise ValueError(Errors.E4007.format(var="start_char", value=start_char, op="<=", existing_var="end_char",
+                                                 existing_value=span_c.end_char))
+        span_c.start_char = start_char
+        span_c.start = start

-    property end_char:
-        def __get__(self):
-            return self.span_c().end_char
+    @property
+    def end_char(self):
+        return self.span_c().end_char

-        def __set__(self, int end_char):
-            if end_char < 0 or end_char > len(self.doc.text):
-                raise IndexError(Errors.E1032.format(var="end_char", obj="Doc text", length=len(self.doc.text), value=end_char))
-            cdef int end = token_by_end(self.doc.c, self.doc.length, end_char)
-            if end < 0:
-                raise ValueError(Errors.E4008.format(value=end_char, pos="end"))
-            cdef SpanC* span_c = self.span_c()
-            if span_c.start_char > end_char:
-                raise ValueError(Errors.E4007.format(var="end_char", value=end_char, op=">=", existing_var="start_char", existing_value=span_c.start_char))
-            span_c.end_char = end_char
-            span_c.end = end
+    @end_char.setter
+    def end_char(self, int end_char):
+        if end_char < 0 or end_char > len(self.doc.text):
+            raise IndexError(
+                Errors.E1032.format(var="end_char", obj="Doc text", length=len(self.doc.text), value=end_char))
+        cdef int end = token_by_end(self.doc.c, self.doc.length, end_char)
+        if end < 0:
+            raise ValueError(Errors.E4008.format(value=end_char, pos="end"))
+        cdef SpanC * span_c = self.span_c()
+        if span_c.start_char > end_char:
+            raise ValueError(Errors.E4007.format(var="end_char", value=end_char, op=">=", existing_var="start_char",
+                                                 existing_value=span_c.start_char))
+        span_c.end_char = end_char
+        span_c.end = end

-    property label:
-        def __get__(self):
-            return self.span_c().label
+    @property
+    def label(self):
+        return self.span_c().label

-        def __set__(self, attr_t label):
-            if label != self.span_c().label :
-                old_label = self.span_c().label
-                self.span_c().label = label
-                new = Underscore(Underscore.span_extensions, self, start=self.span_c().start_char, end=self.span_c().end_char, label=self.label, kb_id=self.kb_id, span_id=self.id)
-                old = Underscore(Underscore.span_extensions, self, start=self.span_c().start_char, end=self.span_c().end_char, label=old_label, kb_id=self.kb_id, span_id=self.id)
-                Underscore._replace_keys(old, new)
+    @label.setter
+    def label(self, attr_t label):
+        if label != self.span_c().label:
+            old_label = self.span_c().label
+            self.span_c().label = label
+            new = Underscore(Underscore.span_extensions, self, start=self.span_c().start_char,
+                             end=self.span_c().end_char, label=self.label, kb_id=self.kb_id, span_id=self.id)
+            old = Underscore(Underscore.span_extensions, self, start=self.span_c().start_char,
+                             end=self.span_c().end_char, label=old_label, kb_id=self.kb_id, span_id=self.id)
+            Underscore._replace_keys(old, new)

-    property kb_id:
-        def __get__(self):
-            return self.span_c().kb_id
+    @property
+    def kb_id(self):
+        return self.span_c().kb_id

-        def __set__(self, attr_t kb_id):
-            if kb_id != self.span_c().kb_id :
-                old_kb_id = self.span_c().kb_id
-                self.span_c().kb_id = kb_id
-                new = Underscore(Underscore.span_extensions, self, start=self.span_c().start_char, end=self.span_c().end_char, label=self.label, kb_id=self.kb_id, span_id=self.id)
-                old = Underscore(Underscore.span_extensions, self, start=self.span_c().start_char, end=self.span_c().end_char, label=self.label, kb_id=old_kb_id, span_id=self.id)
-                Underscore._replace_keys(old, new)
+    @kb_id.setter
+    def kb_id(self, attr_t kb_id):
+        if kb_id != self.span_c().kb_id:
+            old_kb_id = self.span_c().kb_id
+            self.span_c().kb_id = kb_id
+            new = Underscore(Underscore.span_extensions, self, start=self.span_c().start_char,
+                             end=self.span_c().end_char, label=self.label, kb_id=self.kb_id, span_id=self.id)
+            old = Underscore(Underscore.span_extensions, self, start=self.span_c().start_char,
+                             end=self.span_c().end_char, label=self.label, kb_id=old_kb_id, span_id=self.id)
+            Underscore._replace_keys(old, new)

-    property id:
-        def __get__(self):
-            return self.span_c().id
+    @property
+    def id(self):
+        return self.span_c().id

-        def __set__(self, attr_t id):
-            if id != self.span_c().id :
-                old_id = self.span_c().id
-                self.span_c().id = id
-                new = Underscore(Underscore.span_extensions, self, start=self.span_c().start_char, end=self.span_c().end_char, label=self.label, kb_id=self.kb_id, span_id=self.id)
-                old = Underscore(Underscore.span_extensions, self, start=self.span_c().start_char, end=self.span_c().end_char, label=self.label, kb_id=self.kb_id, span_id=old_id)
-                Underscore._replace_keys(old, new)
+    @id.setter
+    def id(self, attr_t id):
+        if id != self.span_c().id:
+            old_id = self.span_c().id
+            self.span_c().id = id
+            new = Underscore(Underscore.span_extensions, self, start=self.span_c().start_char,
+                             end=self.span_c().end_char, label=self.label, kb_id=self.kb_id, span_id=self.id)
+            old = Underscore(Underscore.span_extensions, self, start=self.span_c().start_char,
+                             end=self.span_c().end_char, label=self.label, kb_id=self.kb_id, span_id=old_id)
+            Underscore._replace_keys(old, new)

-    property ent_id:
+    @property
+    def ent_id(self):
        """Alias for the span's ID."""
-        def __get__(self):
-            return self.id
+        return self.id

-        def __set__(self, attr_t ent_id):
-            self.id = ent_id
+    @ent_id.setter
+    def ent_id(self, attr_t ent_id):
+        self.id = ent_id

    @property
    def orth_(self):
@ -904,29 +924,32 @@ cdef class Span:
        """RETURNS (str): The span's lemma."""
        return "".join([t.lemma_ + t.whitespace_ for t in self]).strip()

-    property label_:
+    @property
+    def label_(self):
        """The span's label."""
-        def __get__(self):
-            return self.doc.vocab.strings[self.label]
+        return self.doc.vocab.strings[self.label]

-        def __set__(self, str label_):
-            self.label = self.doc.vocab.strings.add(label_)
+    @label_.setter
+    def label_(self, str label_):
+        self.label = self.doc.vocab.strings.add(label_)

-    property kb_id_:
+    @property
+    def kb_id_(self):
        """The span's KB ID."""
-        def __get__(self):
-            return self.doc.vocab.strings[self.kb_id]
+        return self.doc.vocab.strings[self.kb_id]

-        def __set__(self, str kb_id_):
-            self.kb_id = self.doc.vocab.strings.add(kb_id_)
+    @kb_id_.setter
+    def kb_id_(self, str kb_id_):
+        self.kb_id = self.doc.vocab.strings.add(kb_id_)

-    property id_:
+    @property
+    def id_(self):
        """The span's ID."""
-        def __get__(self):
-            return self.doc.vocab.strings[self.id]
+        return self.doc.vocab.strings[self.id]

-        def __set__(self, str id_):
-            self.id = self.doc.vocab.strings.add(id_)
+    @id_.setter
+    def id_(self, str id_):
+        self.id = self.doc.vocab.strings.add(id_)

    property ent_id_:
        """Alias for the span's ID."""
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@ -250,15 +250,16 @@ cdef class Token:
        """
        return not self.c.morph == 0

-    property morph:
-        def __get__(self):
-            return MorphAnalysis.from_id(self.vocab, self.c.morph)
+    @property
+    def morph(self):
+        return MorphAnalysis.from_id(self.vocab, self.c.morph)

-        def __set__(self, MorphAnalysis morph):
-            # Check that the morph has the same vocab
-            if self.vocab != morph.vocab:
-                raise ValueError(Errors.E1013)
-            self.c.morph = deref(morph.c).key
+    @morph.setter
+    def morph(self, MorphAnalysis morph):
+        # Check that the morph has the same vocab
+        if self.vocab != morph.vocab:
+            raise ValueError(Errors.E1013)
+        self.c.morph = deref(morph.c).key

    def set_morph(self, features):
        cdef hash_t key
@ -370,39 +371,43 @@ cdef class Token:
        """
        return self.c.lex.suffix

-    property lemma:
+    @property
+    def lemma(self):
        """RETURNS (uint64): ID of the base form of the word, with no
            inflectional suffixes.
        """
-        def __get__(self):
-            return self.c.lemma
+        return self.c.lemma

-        def __set__(self, attr_t lemma):
-            self.c.lemma = lemma
+    @lemma.setter
+    def lemma(self, attr_t lemma):
+        self.c.lemma = lemma

-    property pos:
+    @property
+    def pos(self):
        """RETURNS (uint64): ID of coarse-grained part-of-speech tag."""
-        def __get__(self):
-            return self.c.pos
+        return self.c.pos

-        def __set__(self, pos):
-            self.c.pos = pos
+    @pos.setter
+    def pos(self, pos):
+        self.c.pos = pos

-    property tag:
+    @property
+    def tag(self):
        """RETURNS (uint64): ID of fine-grained part-of-speech tag."""
-        def __get__(self):
-            return self.c.tag
+        return self.c.tag

-        def __set__(self, attr_t tag):
-            self.c.tag = tag
+    @tag.setter
+    def tag(self, attr_t tag):
+        self.c.tag = tag

-    property dep:
+    @property
+    def dep(self):
        """RETURNS (uint64): ID of syntactic dependency label."""
-        def __get__(self):
-            return self.c.dep
+        return self.c.dep

-        def __set__(self, attr_t label):
-            self.c.dep = label
+    @dep.setter
+    def dep(self, attr_t label):
+        self.c.dep = label

    @property
    def has_vector(self):
@ -483,48 +488,51 @@ cdef class Token:
            return self.doc.user_token_hooks["sent"](self)
        return self.doc[self.i : self.i+1].sent

-    property sent_start:
-        def __get__(self):
-            """Deprecated: use Token.is_sent_start instead."""
-            # Raising a deprecation warning here causes errors for autocomplete
-            # Handle broken backwards compatibility case: doc[0].sent_start
-            # was False.
-            if self.i == 0:
-                return False
-            else:
-                return self.c.sent_start
+    @property
+    def sent_start(self):
+        """Deprecated: use Token.is_sent_start instead."""
+        # Raising a deprecation warning here causes errors for autocomplete
+        # Handle broken backwards compatibility case: doc[0].sent_start
+        # was False.
+        if self.i == 0:
+            return False
+        else:
+            return self.c.sent_start

-        def __set__(self, value):
-            self.is_sent_start = value
+    @sent_start.setter
+    def sent_start(self, value):
+        self.is_sent_start = value

-    property is_sent_start:
+    @property
+    def is_sent_start(self):
        """A boolean value indicating whether the token starts a sentence.
        `None` if unknown. Defaults to `True` for the first token in the `Doc`.

        RETURNS (bool / None): Whether the token starts a sentence.
            None if unknown.
        """
-        def __get__(self):
-            if self.c.sent_start == 0:
-                return None
-            elif self.c.sent_start < 0:
-                return False
-            else:
-                return True
+        if self.c.sent_start == 0:
+            return None
+        elif self.c.sent_start < 0:
+            return False
+        else:
+            return True

-        def __set__(self, value):
-            if self.doc.has_annotation("DEP"):
-                raise ValueError(Errors.E043)
-            if value is None:
-                self.c.sent_start = 0
-            elif value is True:
-                self.c.sent_start = 1
-            elif value is False:
-                self.c.sent_start = -1
-            else:
-                raise ValueError(Errors.E044.format(value=value))
+    @is_sent_start.setter
+    def is_sent_start(self, value):
+        if self.doc.has_annotation("DEP"):
+            raise ValueError(Errors.E043)
+        if value is None:
+            self.c.sent_start = 0
+        elif value is True:
+            self.c.sent_start = 1
+        elif value is False:
+            self.c.sent_start = -1
+        else:
+            raise ValueError(Errors.E044.format(value=value))

-    property is_sent_end:
+    @property
+    def is_sent_end(self):
        """A boolean value indicating whether the token ends a sentence.
        `None` if unknown. Defaults to `True` for the last token in the `Doc`.

@ -533,18 +541,18 @@ cdef class Token:

        DOCS: https://spacy.io/api/token#is_sent_end
        """
-        def __get__(self):
-            if self.i + 1 == len(self.doc):
-                return True
-            elif self.doc[self.i+1].is_sent_start is None:
-                return None
-            elif self.doc[self.i+1].is_sent_start is True:
-                return True
-            else:
-                return False
+        if self.i + 1 == len(self.doc):
+            return True
+        elif self.doc[self.i+1].is_sent_start is None:
+            return None
+        elif self.doc[self.i+1].is_sent_start is True:
+            return True
+        else:
+            return False

-        def __set__(self, value):
-            raise ValueError(Errors.E196)
+    @is_sent_end.setter
+    def is_sent_end(self, value):
+        raise ValueError(Errors.E196)

    @property
    def lefts(self):
@ -671,41 +679,42 @@ cdef class Token:
        """
        return not Token.missing_head(self.c)

-    property head:
+    @property
+    def head(self):
        """The syntactic parent, or "governor", of this token.
        If token.has_head() is `False`, this method will return itself.

        RETURNS (Token): The token predicted by the parser to be the head of
            the current token.
        """
-        def __get__(self):
-            if not self.has_head():
-                return self
-            else:
-                return self.doc[self.i + self.c.head]
+        if not self.has_head():
+            return self
+        else:
+            return self.doc[self.i + self.c.head]

-        def __set__(self, Token new_head):
-            # This function sets the head of self to new_head and updates the
-            # counters for left/right dependents and left/right corner for the
-            # new and the old head
-            # Check that token is from the same document
-            if self.doc != new_head.doc:
-                raise ValueError(Errors.E191)
-            # Do nothing if old head is new head
-            if self.i + self.c.head == new_head.i:
-                return
-            # Find the widest l/r_edges of the roots of the two tokens involved
-            # to limit the number of tokens for set_children_from_heads
-            cdef Token self_root, new_head_root
-            self_root = ([self] + list(self.ancestors))[-1]
-            new_head_ancestors = list(new_head.ancestors)
-            new_head_root = new_head_ancestors[-1] if new_head_ancestors else new_head
-            start = self_root.c.l_edge if self_root.c.l_edge < new_head_root.c.l_edge else new_head_root.c.l_edge
-            end = self_root.c.r_edge if self_root.c.r_edge > new_head_root.c.r_edge else new_head_root.c.r_edge
-            # Set new head
-            self.c.head = new_head.i - self.i
-            # Adjust parse properties and sentence starts
-            set_children_from_heads(self.doc.c, start, end + 1)
+    @head.setter
+    def head(self, Token new_head):
+        # This function sets the head of self to new_head and updates the
+        # counters for left/right dependents and left/right corner for the
+        # new and the old head
+        # Check that token is from the same document
+        if self.doc != new_head.doc:
+            raise ValueError(Errors.E191)
+        # Do nothing if old head is new head
+        if self.i + self.c.head == new_head.i:
+            return
+        # Find the widest l/r_edges of the roots of the two tokens involved
+        # to limit the number of tokens for set_children_from_heads
+        cdef Token self_root, new_head_root
+        self_root = ([self] + list(self.ancestors))[-1]
+        new_head_ancestors = list(new_head.ancestors)
+        new_head_root = new_head_ancestors[-1] if new_head_ancestors else new_head
+        start = self_root.c.l_edge if self_root.c.l_edge < new_head_root.c.l_edge else new_head_root.c.l_edge
+        end = self_root.c.r_edge if self_root.c.r_edge > new_head_root.c.r_edge else new_head_root.c.r_edge
+        # Set new head
+        self.c.head = new_head.i - self.i
+        # Adjust parse properties and sentence starts
+        set_children_from_heads(self.doc.c, start, end + 1)

    @property
    def conjuncts(self):
@ -733,21 +742,23 @@ cdef class Token:
                    queue.append(child)
        return tuple([w for w in output if w.i != self.i])

-    property ent_type:
+    @property
+    def ent_type(self):
        """RETURNS (uint64): Named entity type."""
-        def __get__(self):
-            return self.c.ent_type
+        return self.c.ent_type

-        def __set__(self, ent_type):
-            self.c.ent_type = ent_type
+    @ent_type.setter
+    def ent_type(self, ent_type):
+        self.c.ent_type = ent_type

-    property ent_type_:
+    @property
+    def ent_type_(self):
        """RETURNS (str): Named entity type."""
-        def __get__(self):
-            return self.vocab.strings[self.c.ent_type]
+        return self.vocab.strings[self.c.ent_type]

-        def __set__(self, ent_type):
-            self.c.ent_type = self.vocab.strings.add(ent_type)
+    @ent_type_.setter
+    def ent_type_(self, ent_type):
+        self.c.ent_type = self.vocab.strings.add(ent_type)

    @property
    def ent_iob(self):
@ -773,41 +784,45 @@ cdef class Token:
        """
        return self.iob_strings()[self.c.ent_iob]

-    property ent_id:
+    @property
+    def ent_id(self):
        """RETURNS (uint64): ID of the entity the token is an instance of,
            if any.
        """
-        def __get__(self):
-            return self.c.ent_id
+        return self.c.ent_id

-        def __set__(self, hash_t key):
-            self.c.ent_id = key
+    @ent_id.setter
+    def ent_id(self, hash_t key):
+        self.c.ent_id = key

-    property ent_id_:
+    @property
+    def ent_id_(self):
        """RETURNS (str): ID of the entity the token is an instance of,
            if any.
        """
-        def __get__(self):
-            return self.vocab.strings[self.c.ent_id]
+        return self.vocab.strings[self.c.ent_id]

-        def __set__(self, name):
-            self.c.ent_id = self.vocab.strings.add(name)
+    @ent_id_.setter
+    def ent_id_(self, name):
+        self.c.ent_id = self.vocab.strings.add(name)

-    property ent_kb_id:
+    @property
+    def ent_kb_id(self):
        """RETURNS (uint64): Named entity KB ID."""
-        def __get__(self):
-            return self.c.ent_kb_id
+        return self.c.ent_kb_id

-        def __set__(self, attr_t ent_kb_id):
-            self.c.ent_kb_id = ent_kb_id
+    @ent_kb_id.setter
+    def ent_kb_id(self, attr_t ent_kb_id):
+        self.c.ent_kb_id = ent_kb_id

-    property ent_kb_id_:
+    @property
+    def ent_kb_id_(self):
        """RETURNS (str): Named entity KB ID."""
-        def __get__(self):
-            return self.vocab.strings[self.c.ent_kb_id]
+        return self.vocab.strings[self.c.ent_kb_id]

-        def __set__(self, ent_kb_id):
-            self.c.ent_kb_id = self.vocab.strings.add(ent_kb_id)
+    @ent_kb_id_.setter
+    def ent_kb_id_(self, ent_kb_id):
+        self.c.ent_kb_id = self.vocab.strings.add(ent_kb_id)

    @property
    def whitespace_(self):
@ -829,16 +844,17 @@ cdef class Token:
        """
        return self.vocab.strings[self.c.lex.lower]

-    property norm_:
+    @property
+    def norm_(self):
        """RETURNS (str): The token's norm, i.e. a normalised form of the
            token text. Usually set in the language's tokenizer exceptions or
            norm exceptions.
        """
-        def __get__(self):
-            return self.vocab.strings[self.norm]
+        return self.vocab.strings[self.norm]

-        def __set__(self, str norm_):
-            self.c.norm = self.vocab.strings.add(norm_)
+    @norm_.setter
+    def norm_(self, str norm_):
+        self.c.norm = self.vocab.strings.add(norm_)

    @property
    def shape_(self):
@ -868,33 +884,36 @@ cdef class Token:
        """
        return self.vocab.strings[self.c.lex.lang]

-    property lemma_:
+    @property
+    def lemma_(self):
        """RETURNS (str): The token lemma, i.e. the base form of the word,
            with no inflectional suffixes.
        """
-        def __get__(self):
-            return self.vocab.strings[self.c.lemma]
+        return self.vocab.strings[self.c.lemma]

-        def __set__(self, str lemma_):
-            self.c.lemma = self.vocab.strings.add(lemma_)
+    @lemma_.setter
+    def lemma_(self, str lemma_):
+        self.c.lemma = self.vocab.strings.add(lemma_)

-    property pos_:
+    @property
+    def pos_(self):
        """RETURNS (str): Coarse-grained part-of-speech tag."""
-        def __get__(self):
-            return parts_of_speech.NAMES[self.c.pos]
+        return parts_of_speech.NAMES[self.c.pos]

-        def __set__(self, pos_name):
-            if pos_name not in parts_of_speech.IDS:
-                raise ValueError(Errors.E1021.format(pp=pos_name))
-            self.c.pos = parts_of_speech.IDS[pos_name]
+    @pos_.setter
+    def pos_(self, pos_name):
+        if pos_name not in parts_of_speech.IDS:
+            raise ValueError(Errors.E1021.format(pp=pos_name))
+        self.c.pos = parts_of_speech.IDS[pos_name]

-    property tag_:
+    @property
+    def tag_(self):
        """RETURNS (str): Fine-grained part-of-speech tag."""
-        def __get__(self):
-            return self.vocab.strings[self.c.tag]
+        return self.vocab.strings[self.c.tag]

-        def __set__(self, tag):
-            self.tag = self.vocab.strings.add(tag)
+    @tag_.setter
+    def tag_(self, tag):
+        self.tag = self.vocab.strings.add(tag)

    def has_dep(self):
        """Check whether the token has annotated dep information.
@ -904,13 +923,14 @@ cdef class Token:
        """
        return not Token.missing_dep(self.c)

-    property dep_:
+    @property
+    def dep_(self):
        """RETURNS (str): The syntactic dependency label."""
-        def __get__(self):
-            return self.vocab.strings[self.c.dep]
+        return self.vocab.strings[self.c.dep]

-        def __set__(self, str label):
-            self.c.dep = self.vocab.strings.add(label)
+    @dep_.setter
+    def dep_(self, str label):
+        self.c.dep = self.vocab.strings.add(label)

    @property
    def is_oov(self):
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@ -101,23 +101,25 @@ cdef class Example:
    def __len__(self):
        return len(self.predicted)

-    property predicted:
-        def __get__(self):
-            return self.x
+    @property
+    def predicted(self):
+        return self.x

-        def __set__(self, doc):
-            self.x = doc
-            self._cached_alignment = None
-            self._cached_words_x = [t.text for t in doc]
+    @predicted.setter
+    def predicted(self, doc):
+        self.x = doc
+        self._cached_alignment = None
+        self._cached_words_x = [t.text for t in doc]

-    property reference:
-        def __get__(self):
-            return self.y
+    @property
+    def reference(self):
+        return self.y

-        def __set__(self, doc):
-            self.y = doc
-            self._cached_alignment = None
-            self._cached_words_y = [t.text for t in doc]
+    @reference.setter
+    def reference(self, doc):
+        self.y = doc
+        self._cached_alignment = None
+        self._cached_words_y = [t.text for t in doc]

    def copy(self):
        return Example(
@ -433,9 +435,9 @@ cdef class Example:
                seen_indices.update(indices)
        return output

-    property text:
-        def __get__(self):
-            return self.x.text
+    @property
+    def text(self):
+        return self.x.text

    def __str__(self):
        return str(self.to_dict())
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -87,16 +87,17 @@ cdef class Vocab:
            self.writing_system = writing_system
        self.get_noun_chunks = get_noun_chunks

-    property vectors:
-        def __get__(self):
-            return self._vectors
+    @property
+    def vectors(self):
+        return self._vectors

-        def __set__(self, vectors):
-            if hasattr(vectors, "strings"):
-                for s in vectors.strings:
-                    self.strings.add(s)
-            self._vectors = vectors
-            self._vectors.strings = self.strings
+    @vectors.setter
+    def vectors(self, vectors):
+        if hasattr(vectors, "strings"):
+            for s in vectors.strings:
+                self.strings.add(s)
+        self._vectors = vectors
+        self._vectors.strings = self.strings

    @property
    def lang(self):
@ -450,17 +451,18 @@ cdef class Vocab:
        key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
        return key in self.vectors

-    property lookups:
-        def __get__(self):
-            return self._lookups
+    @property
+    def lookups(self):
+        return self._lookups

-        def __set__(self, lookups):
-            self._lookups = lookups
-            if lookups.has_table("lexeme_norm"):
-                self.lex_attr_getters[NORM] = util.add_lookups(
-                    self.lex_attr_getters.get(NORM, LEX_ATTRS[NORM]),
-                    self.lookups.get_table("lexeme_norm"),
-                )
+    @lookups.setter
+    def lookups(self, lookups):
+        self._lookups = lookups
+        if lookups.has_table("lexeme_norm"):
+            self.lex_attr_getters[NORM] = util.add_lookups(
+                self.lex_attr_getters.get(NORM, LEX_ATTRS[NORM]),
+                self.lookups.get_table("lexeme_norm"),
+            )

    def to_disk(self, path, *, exclude=tuple()):
        """Save the current state to a directory.
--- a/website/docs/api/attributes.mdx
+++ b/website/docs/api/attributes.mdx
@ -45,33 +45,33 @@ For attributes that represent string values, the internal integer ID is accessed
 as `Token.attr`, e.g. `token.dep`, while the string value can be retrieved by
 appending `_` as in `token.dep_`.

-| Attribute    | Description                                                                                                                                                   |
-| ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `DEP`        | The token's dependency label. ~~str~~                                                                                                                         |
-| `ENT_ID`     | The token's entity ID (`ent_id`). ~~str~~                                                                                                                     |
-| `ENT_IOB`    | The IOB part of the token's entity tag. Uses custom integer vaues rather than the string store: unset is `0`, `I` is `1`, `O` is `2`, and `B` is `3`. ~~str~~ |
-| `ENT_KB_ID`  | The token's entity knowledge base ID. ~~str~~                                                                                                                 |
-| `ENT_TYPE`   | The token's entity label. ~~str~~                                                                                                                             |
-| `IS_ALPHA`   | Token text consists of alphabetic characters. ~~bool~~                                                                                                        |
-| `IS_ASCII`   | Token text consists of ASCII characters. ~~bool~~                                                                                                             |
-| `IS_DIGIT`   | Token text consists of digits. ~~bool~~                                                                                                                       |
-| `IS_LOWER`   | Token text is in lowercase. ~~bool~~                                                                                                                          |
-| `IS_PUNCT`   | Token is punctuation. ~~bool~~                                                                                                                                |
-| `IS_SPACE`   | Token is whitespace. ~~bool~~                                                                                                                                 |
-| `IS_STOP`    | Token is a stop word. ~~bool~~                                                                                                                                |
-| `IS_TITLE`   | Token text is in titlecase. ~~bool~~                                                                                                                          |
-| `IS_UPPER`   | Token text is in uppercase. ~~bool~~                                                                                                                          |
-| `LEMMA`      | The token's lemma. ~~str~~                                                                                                                                    |
-| `LENGTH`     | The length of the token text. ~~int~~                                                                                                                         |
-| `LIKE_EMAIL` | Token text resembles an email address. ~~bool~~                                                                                                               |
-| `LIKE_NUM`   | Token text resembles a number. ~~bool~~                                                                                                                       |
-| `LIKE_URL`   | Token text resembles a URL. ~~bool~~                                                                                                                          |
-| `LOWER`      | The lowercase form of the token text. ~~str~~                                                                                                                 |
-| `MORPH`      | The token's morphological analysis. ~~MorphAnalysis~~                                                                                                         |
-| `NORM`       | The normalized form of the token text. ~~str~~                                                                                                                |
-| `ORTH`       | The exact verbatim text of a token. ~~str~~                                                                                                                   |
-| `POS`        | The token's universal part of speech (UPOS). ~~str~~                                                                                                          |
-| `SENT_START` | Token is start of sentence. ~~bool~~                                                                                                                          |
-| `SHAPE`      | The token's shape. ~~str~~                                                                                                                                    |
-| `SPACY`      | Token has a trailing space. ~~bool~~                                                                                                                          |
-| `TAG`        | The token's fine-grained part of speech. ~~str~~                                                                                                              |
+| Attribute    | Description                                                                                                                                                    |
+| ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `DEP`        | The token's dependency label. ~~str~~                                                                                                                          |
+| `ENT_ID`     | The token's entity ID (`ent_id`). ~~str~~                                                                                                                      |
+| `ENT_IOB`    | The IOB part of the token's entity tag. Uses custom integer values rather than the string store: unset is `0`, `I` is `1`, `O` is `2`, and `B` is `3`. ~~str~~ |
+| `ENT_KB_ID`  | The token's entity knowledge base ID. ~~str~~                                                                                                                  |
+| `ENT_TYPE`   | The token's entity label. ~~str~~                                                                                                                              |
+| `IS_ALPHA`   | Token text consists of alphabetic characters. ~~bool~~                                                                                                         |
+| `IS_ASCII`   | Token text consists of ASCII characters. ~~bool~~                                                                                                              |
+| `IS_DIGIT`   | Token text consists of digits. ~~bool~~                                                                                                                        |
+| `IS_LOWER`   | Token text is in lowercase. ~~bool~~                                                                                                                           |
+| `IS_PUNCT`   | Token is punctuation. ~~bool~~                                                                                                                                 |
+| `IS_SPACE`   | Token is whitespace. ~~bool~~                                                                                                                                  |
+| `IS_STOP`    | Token is a stop word. ~~bool~~                                                                                                                                 |
+| `IS_TITLE`   | Token text is in titlecase. ~~bool~~                                                                                                                           |
+| `IS_UPPER`   | Token text is in uppercase. ~~bool~~                                                                                                                           |
+| `LEMMA`      | The token's lemma. ~~str~~                                                                                                                                     |
+| `LENGTH`     | The length of the token text. ~~int~~                                                                                                                          |
+| `LIKE_EMAIL` | Token text resembles an email address. ~~bool~~                                                                                                                |
+| `LIKE_NUM`   | Token text resembles a number. ~~bool~~                                                                                                                        |
+| `LIKE_URL`   | Token text resembles a URL. ~~bool~~                                                                                                                           |
+| `LOWER`      | The lowercase form of the token text. ~~str~~                                                                                                                  |
+| `MORPH`      | The token's morphological analysis. ~~MorphAnalysis~~                                                                                                          |
+| `NORM`       | The normalized form of the token text. ~~str~~                                                                                                                 |
+| `ORTH`       | The exact verbatim text of a token. ~~str~~                                                                                                                    |
+| `POS`        | The token's universal part of speech (UPOS). ~~str~~                                                                                                           |
+| `SENT_START` | Token is start of sentence. ~~bool~~                                                                                                                           |
+| `SHAPE`      | The token's shape. ~~str~~                                                                                                                                     |
+| `SPACY`      | Token has a trailing space. ~~bool~~                                                                                                                           |
+| `TAG`        | The token's fine-grained part of speech. ~~str~~                                                                                                               |
--- a/website/docs/api/cli.mdx
+++ b/website/docs/api/cli.mdx
@ -566,7 +566,7 @@ New: 'ORG' (23860), 'PERSON' (21395), 'GPE' (21193), 'DATE' (18080), 'CARDINAL'
 'LOC' (2113), 'TIME' (1616), 'WORK_OF_ART' (1229), 'QUANTITY' (1150), 'FAC'
 (1134), 'EVENT' (974), 'PRODUCT' (935), 'LAW' (444), 'LANGUAGE' (338)
 ✔ Good amount of examples for all labels
-✔ Examples without occurences available for all labels
+✔ Examples without occurrences available for all labels
 ✔ No entities consisting of or starting/ending with whitespace

 =========================== Part-of-speech Tagging ===========================
@ -1322,7 +1322,7 @@ $ python -m spacy apply [model] [data-path] [output-file] [--code] [--text-key]

 ## find-threshold {id="find-threshold",version="3.5",tag="command"}

-Runs prediction trials for a trained model with varying tresholds to maximize
+Runs prediction trials for a trained model with varying thresholds to maximize
 the specified metric. The search space for the threshold is traversed linearly
 from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`
 (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`
--- a/website/docs/api/entitylinker.mdx
+++ b/website/docs/api/entitylinker.mdx
@ -61,13 +61,13 @@ architectures and their arguments and hyperparameters.
 | `incl_context`                                   | Whether the local context is included in the model. Defaults to `True`. ~~bool~~                                                                                                                                                                                                                                                               |
 | `model`                                          | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [`EntityLinker`](/api/architectures#EntityLinker). ~~Model~~                                                                                                                                                                                       |
 | `entity_vector_length`                           | Size of encoding vectors in the KB. Defaults to `64`. ~~int~~                                                                                                                                                                                                                                                                                  |
-| `use_gold_ents`                                  | Whether entities are copied from the gold docs. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~int~~                                                                                                                                                               |
+| `use_gold_ents`                                  | Whether entities are copied from the gold docs. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~bool~~                                                                                                                                                              |
 | `get_candidates` <Tag variant="new">4.0</Tag>    | Function that retrieves plausible candidates per entity mention in a given `Iterator[SpanGroup]` (one `SpanGroup` includes all mentions found in a given `Doc` instance). Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator). ~~Callable[[KnowledgeBase, Iterator[SpanGroup]], Iterator[Iterable[Iterable[Candidate]]]]~~ |
 | `generate_empty_kb` <Tag variant="new">3.6</Tag> | Function that generates an empty `KnowledgeBase` object. Defaults to [`spacy.EmptyKB.v2`](/api/architectures#EmptyKB), which generates an empty [`InMemoryLookupKB`](/api/inmemorylookupkb). ~~Callable[[Vocab, int], KnowledgeBase]~~                                                                                                         |
 | `overwrite` <Tag variant="new">3.2</Tag>         | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                                                                                                       |
 | `scorer` <Tag variant="new">3.2</Tag>            | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~                                                                                                                                                                                                                                        |
 | `save_activations` <Tag variant="new">4.0</Tag>  | Save activations in `Doc` when annotating. Saved activations are `"ents"` and `"scores"`. ~~Union[bool, list[str]]~~                                                                                                                                                                                                                           |
-| `threshold` <Tag variant="new">3.4</Tag>         | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~                                                    |
+| `threshold` <Tag variant="new">3.4</Tag>         | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the threshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~                                                   |

 <Infobox variant="warning">

@ -114,21 +114,21 @@ custom knowledge base, you should either call
 [`set_kb`](/api/entitylinker#set_kb) or provide a `kb_loader` in the
 [`initialize`](/api/entitylinker#initialize) call.

-| Name                                     | Description                                                                                                                                                                                                                                                                                 |
-| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `vocab`                                  | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                                                            |
-| `model`                                  | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model~~                                                                                                                                                                                                   |
-| `name`                                   | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                                                         |
-| _keyword-only_                           |                                                                                                                                                                                                                                                                                             |
-| `entity_vector_length`                   | Size of encoding vectors in the KB. ~~int~~                                                                                                                                                                                                                                                 |
-| `get_candidates`                         | Function that retrieves plausible candidates per entity mention in a given `SpanGroup`. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator). ~~Callable[[KnowledgeBase, Iterator[SpanGroup]], Iterator[Iterable[Iterable[Candidate]]]]~~                                |
-| `labels_discard`                         | NER labels that will automatically get a `"NIL"` prediction. ~~Iterable[str]~~                                                                                                                                                                                                              |
-| `n_sents`                                | The number of neighbouring sentences to take into account. ~~int~~                                                                                                                                                                                                                          |
-| `incl_prior`                             | Whether or not to include prior probabilities from the KB in the model. ~~bool~~                                                                                                                                                                                                            |
-| `incl_context`                           | Whether or not to include the local context in the model. ~~bool~~                                                                                                                                                                                                                          |
-| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                                                    |
-| `scorer` <Tag variant="new">3.2</Tag>    | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~                                                                                                                                                                                     |
-| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |
+| Name                                     | Description                                                                                                                                                                                                                                                                                  |
+| ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `vocab`                                  | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                                                             |
+| `model`                                  | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model~~                                                                                                                                                                                                    |
+| `name`                                   | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                                                          |
+| _keyword-only_                           |                                                                                                                                                                                                                                                                                              |
+| `entity_vector_length`                   | Size of encoding vectors in the KB. ~~int~~                                                                                                                                                                                                                                                  |
+| `get_candidates`                         | Function that retrieves plausible candidates per entity mention in a given `SpanGroup`. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator). ~~Callable[[KnowledgeBase, Iterator[SpanGroup]], Iterator[Iterable[Iterable[Candidate]]]]~~                                 |
+| `labels_discard`                         | NER labels that will automatically get a `"NIL"` prediction. ~~Iterable[str]~~                                                                                                                                                                                                               |
+| `n_sents`                                | The number of neighbouring sentences to take into account. ~~int~~                                                                                                                                                                                                                           |
+| `incl_prior`                             | Whether or not to include prior probabilities from the KB in the model. ~~bool~~                                                                                                                                                                                                             |
+| `incl_context`                           | Whether or not to include the local context in the model. ~~bool~~                                                                                                                                                                                                                           |
+| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                                                     |
+| `scorer` <Tag variant="new">3.2</Tag>    | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~                                                                                                                                                                                      |
+| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the threshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |

 ## EntityLinker.\_\_call\_\_ {id="call",tag="method"}

--- a/website/docs/api/entityruler.mdx
+++ b/website/docs/api/entityruler.mdx
@ -69,7 +69,7 @@ how the component should be configured. You can override its settings via the
 | Setting                                              | Description                                                                                                                                                                                   |
 | ---------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `phrase_matcher_attr`                                | Optional attribute name match on for the internal [`PhraseMatcher`](/api/phrasematcher), e.g. `LOWER` to match on the lowercase token text. Defaults to `None`. ~~Optional[Union[int, str]]~~ |
-| `matcher_fuzzy_compare` <Tag variant="new">3.5</Tag> | The fuzzy comparison method, passed on to the internal `Matcher`. Defaults to `spacy.matcher.levenshtein.levenshtein_compare`. ~~Callable~~                                                             |
+| `matcher_fuzzy_compare` <Tag variant="new">3.5</Tag> | The fuzzy comparison method, passed on to the internal `Matcher`. Defaults to `spacy.matcher.levenshtein.levenshtein_compare`. ~~Callable~~                                                   |
 | `validate`                                           | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`). Defaults to `False`. ~~bool~~                                                                             |
 | `overwrite_ents`                                     | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~                                                     |
 | `ent_id_sep`                                         | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~                                                                                                                       |
--- a/website/docs/api/morphology.mdx
+++ b/website/docs/api/morphology.mdx
@ -147,9 +147,10 @@ Whether a feature/value pair is in the analysis.
 > assert "Feat1=Val1" in morph
 > ```

-| Name        | Description                                   |
-| ----------- | --------------------------------------------- |
-| **RETURNS** | A feature/value pair in the analysis. ~~str~~ |
+| Name         | Description                                                           |
+| ------------ | --------------------------------------------------------------------- |
+| `feature`    | A feature/value pair. ~~str~~                                         |
+| **RETURNS**  | Whether the feature/value pair is contained in the analysis. ~~bool~~ |

 ### MorphAnalysis.\_\_iter\_\_ {id="morphanalysis-iter",tag="method"}

--- a/website/docs/api/span.mdx
+++ b/website/docs/api/span.mdx
@ -287,7 +287,7 @@ does not permit other NPs to be nested within it – so no NP-level coordination
 no prepositional phrases, and no relative clauses.

 If the `noun_chunk` [syntax iterator](/usage/linguistic-features#language-data)
-has not been implemeted for the given language, a `NotImplementedError` is
+has not been implemented for the given language, a `NotImplementedError` is
 raised.

 > #### Example
--- a/website/docs/api/transformer.mdx
+++ b/website/docs/api/transformer.mdx
@ -416,7 +416,7 @@ by this class. Instances of this class are typically assigned to the
 | `align`        | Alignment from the `Doc`'s tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~                                                                                 |
 | `width`        | The width of the last hidden layer. ~~int~~                                                                                                                                                                                                                                                                                          |

-### TransformerData.empty {id="transformerdata-emoty",tag="classmethod"}
+### TransformerData.empty {id="transformerdata-empty",tag="classmethod"}

 Create an empty `TransformerData` container.

--- a/website/docs/usage/layers-architectures.mdx
+++ b/website/docs/usage/layers-architectures.mdx
@ -832,7 +832,7 @@ retrieve and add to them.

 After creation, the component needs to be
 [initialized](/usage/training#initialization). This method can define the
-relevant labels in two ways: explicitely by setting the `labels` argument in the
+relevant labels in two ways: explicitly by setting the `labels` argument in the
 [`initialize` block](/api/data-formats#config-initialize) of the config, or
 implicately by deducing them from the `get_examples` callback that generates the
 full **training data set**, or a representative sample.
--- a/website/docs/usage/linguistic-features.mdx
+++ b/website/docs/usage/linguistic-features.mdx
@ -1899,7 +1899,7 @@ the two words.
    "Shore": ("coast", 0.732257),
    "Precautionary": ("caution", 0.490973),
    "hopelessness": ("sadness", 0.742366),
-    "Continous": ("continuous", 0.732549),
+    "Continuous": ("continuous", 0.732549),
    "Disemboweled": ("corpse", 0.499432),
    "biostatistician": ("scientist", 0.339724),
    "somewheres": ("somewheres", 0.402736),
--- a/website/docs/usage/models.mdx
+++ b/website/docs/usage/models.mdx
@ -530,13 +530,17 @@ application's `requirements.txt`. If you're running your own internal PyPi
 installation, you can upload the pipeline packages there. pip's
 [requirements file format](https://pip.pypa.io/en/latest/reference/requirements-file-format/)
 supports both package names to download via a PyPi server, as well as
-[direct URLs](#pipeline-urls).
+[direct URLs](#pipeline-urls). For instance, you can specify the
+`en_core_web_sm` model for spaCy 3.7.x as follows:

 ```text {title="requirements.txt"}
 spacy>=3.0.0,<4.0.0
-en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl
+en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
 ```

+See the [list of models](https://spacy.io/models) for model download links for
+the current spaCy version.
+
 All pipeline packages are versioned and specify their spaCy dependency. This
 ensures cross-compatibility and lets you specify exact version requirements for
 each pipeline. If you've [trained](/usage/training) your own pipeline, you can
--- a/website/docs/usage/projects.mdx
+++ b/website/docs/usage/projects.mdx
@ -173,7 +173,7 @@ detected, a corresponding warning is displayed. If you'd like to disable the
 dependency check, set `check_requirements: false` in your project's
 `project.yml`.

-### 4. Run a workflow {id="run-workfow"}
+### 4. Run a workflow {id="run-workflow"}

 > #### project.yml
 >
@ -286,7 +286,7 @@ pipelines.
 | --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `title`                                             | An optional project title used in `--help` message and [auto-generated docs](#custom-docs).                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | `description`                                       | An optional project description used in [auto-generated docs](#custom-docs).                                                                                                                                                                                                                                                                                                                                                                                                                                 |
-| `vars`                                              | A dictionary of variables that can be referenced in paths, URLs and scripts and overriden on the CLI, just like [`config.cfg` variables](/usage/training#config-interpolation). For example, `${vars.name}` will use the value of the variable `name`. Variables need to be defined in the section `vars`, but can be a nested dict, so you're able to reference `${vars.model.name}`.                                                                                                                       |
+| `vars`                                              | A dictionary of variables that can be referenced in paths, URLs and scripts and overridden on the CLI, just like [`config.cfg` variables](/usage/training#config-interpolation). For example, `${vars.name}` will use the value of the variable `name`. Variables need to be defined in the section `vars`, but can be a nested dict, so you're able to reference `${vars.model.name}`.                                                                                                                      |
 | `env`                                               | A dictionary of variables, mapped to the names of environment variables that will be read in when running the project. For example, `${env.name}` will use the value of the environment variable defined as `name`.                                                                                                                                                                                                                                                                                          |
 | `directories`                                       | An optional list of [directories](#project-files) that should be created in the project for assets, training outputs, metrics etc. spaCy will make sure that these directories always exist.                                                                                                                                                                                                                                                                                                                 |
 | `assets`                                            | A list of assets that can be fetched with the [`project assets`](/api/cli#project-assets) command. `url` defines a URL or local path, `dest` is the destination file relative to the project directory, and an optional `checksum` ensures that an error is raised if the file's checksum doesn't match. Instead of `url`, you can also provide a `git` block with the keys `repo`, `branch` and `path`, to download from a Git repo.                                                                        |
--- a/website/docs/usage/saving-loading.mdx
+++ b/website/docs/usage/saving-loading.mdx
@ -306,7 +306,9 @@ installed in the same environment – that's it.

 ### Loading probability tables into existing models

-You can load a probability table from [spacy-lookups-data](https://github.com/explosion/spacy-lookups-data) into an existing spaCy model like `en_core_web_sm`.
+You can load a probability table from
+[spacy-lookups-data](https://github.com/explosion/spacy-lookups-data) into an
+existing spaCy model like `en_core_web_sm`.

 ```python
 # Requirements: pip install spacy-lookups-data
@ -317,7 +319,8 @@ lookups = load_lookups("en", ["lexeme_prob"])
 nlp.vocab.lookups.add_table("lexeme_prob", lookups.get_table("lexeme_prob"))
 ```

-When training a model from scratch you can also specify probability tables in the `config.cfg`.
+When training a model from scratch you can also specify probability tables in
+the `config.cfg`.

 ```ini {title="config.cfg (excerpt)"}
 [initialize.lookups]
@ -346,8 +349,8 @@ them**!
 To stick with the theme of
 [this entry points blog post](https://amir.rachum.com/blog/2017/07/28/python-entry-points/),
 consider the following custom spaCy
-[pipeline component](/usage/processing-pipelines#custom-coponents) that prints a
-snake when it's called:
+[pipeline component](/usage/processing-pipelines#custom-components) that prints
+a snake when it's called:

 > #### Package directory structure
 >
--- a/website/docs/usage/v2-2.mdx
+++ b/website/docs/usage/v2-2.mdx
@ -185,7 +185,7 @@ New: 'ORG' (23860), 'PERSON' (21395), 'GPE' (21193), 'DATE' (18080), 'CARDINAL'
 'LOC' (2113), 'TIME' (1616), 'WORK_OF_ART' (1229), 'QUANTITY' (1150), 'FAC'
 (1134), 'EVENT' (974), 'PRODUCT' (935), 'LAW' (444), 'LANGUAGE' (338)
 ✔ Good amount of examples for all labels
-✔ Examples without occurences available for all labels
+✔ Examples without occurrences available for all labels
 ✔ No entities consisting of or starting/ending with whitespace

 =========================== Part-of-speech Tagging ===========================
--- a/website/docs/usage/v3-2.mdx
+++ b/website/docs/usage/v3-2.mdx
@ -138,7 +138,7 @@ backwards compatibility, the tuple format remains available under
 `TransformerData.tensors` and `FullTransformerBatch.tensors`. See more details
 in the [transformer API docs](/api/architectures#TransformerModel).

-`spacy-transfomers` v1.1 also adds support for `transformer_config` settings
+`spacy-transformers` v1.1 also adds support for `transformer_config` settings
 such as `output_attentions`. Additional output is stored under
 `TransformerData.model_output`. More details are in the
 [TransformerModel docs](/api/architectures#TransformerModel). The training speed
--- a/website/meta/site.json
+++ b/website/meta/site.json
@ -23,7 +23,6 @@
    },
    "docSearch": {
        "appId": "Y1LB128RON",
-        "apiKey": "bb601a1daab73e2dc66faf2b79564807",
        "indexName": "spacy"
    },
    "binderUrl": "explosion/spacy-io-binder",
--- a/website/next.config.mjs
+++ b/website/next.config.mjs
@ -32,6 +32,9 @@ const nextConfig = withPWA(
            ignoreBuildErrors: true,
        },
        images: { unoptimized: true },
+        env: {
+            DOCSEARCH_API_KEY: process.env.DOCSEARCH_API_KEY
+        }
    })
 )

--- a/website/src/components/search.js
+++ b/website/src/components/search.js
@ -1,4 +1,4 @@
-import React, { useEffect, useState } from 'react'
+import React from 'react'
 import PropTypes from 'prop-types'
 import { DocSearch } from '@docsearch/react'
 import '@docsearch/css'
@ -6,7 +6,8 @@ import '@docsearch/css'
 import siteMetadata from '../../meta/site.json'

 export default function Search({ placeholder = 'Search docs' }) {
-    const { apiKey, indexName, appId } = siteMetadata.docSearch
+    const apiKey = process.env.DOCSEARCH_API_KEY
+    const { indexName, appId } = siteMetadata.docSearch
    return (
        <DocSearch appId={appId} indexName={indexName} apiKey={apiKey} placeholder={placeholder} />
    )
--- a/website/src/styles/code.module.sass
+++ b/website/src/styles/code.module.sass
@ -109,6 +109,8 @@
    box-shadow: inset 1px 1px 1px rgba(0, 0, 0, 0.25)
    background: var(--color-dark)
    margin: 1.5rem 0 0 2rem
+    position: sticky
+    left: 2rem

 .header
    width: 100%