Merge pull request #12842 from svlandeg/sync_v4

Sync v4 with latest from master and develop
2025-07-23 06:29:48 +03:00 · 2023-07-24 12:13:04 +02:00 · 2023-07-24 12:13:04 +02:00 · eaaac5a08c
commit eaaac5a08c
parent 536798f9e3 f293386d3e
86 changed files with 1311 additions and 806 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -45,6 +45,12 @@ jobs:
        run: |
          python -m pip install flake8==5.0.4
          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
+      - name: cython-lint
+        run: |
+          python -m pip install cython-lint -c requirements.txt
+          # E501: line too log, W291: trailing whitespace, E266: too many leading '#' for block comment
+          cython-lint spacy --ignore E501,W291,E266
+
  tests:
    name: Test
    needs: Validate
--- a/2
+++ b/2
@ -1,7 +1,7 @@
 SHELL := /bin/bash

 ifndef SPACY_EXTRAS
-override SPACY_EXTRAS = spacy-lookups-data==1.0.2 jieba spacy-pkuseg==0.0.28 sudachipy sudachidict_core pymorphy2
+override SPACY_EXTRAS = spacy-lookups-data==1.0.3
 endif

 ifndef PYVER
--- a/requirements.txt
+++ b/requirements.txt
@ -36,4 +36,5 @@ types-setuptools>=57.0.0
 types-requests
 types-setuptools>=57.0.0
 black==22.3.0
+cython-lint>=0.15.0; python_version >= "3.7"
 isort>=5.0,<6.0
--- a/spacy/cli/init_pipeline.py
+++ b/spacy/cli/init_pipeline.py
@ -32,6 +32,7 @@ def init_vectors_cli(
    mode: str = Opt("default", "--mode", "-m", help="Vectors mode: default or floret"),
    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
    jsonl_loc: Optional[Path] = Opt(None, "--lexemes-jsonl", "-j", help="Location of JSONL-formatted attributes file", hidden=True),
+    attr: str = Opt("ORTH", "--attr", "-a", help="Optional token attribute to use for vectors, e.g. LOWER or NORM"),
    # fmt: on
 ):
    """Convert word vectors for use with spaCy. Will export an nlp object that
@ -53,6 +54,7 @@ def init_vectors_cli(
        truncate=truncate,
        prune=prune,
        mode=mode,
+        attr=attr,
    )
    msg.good(f"Successfully converted {len(nlp.vocab.vectors)} vectors")
    nlp.to_disk(output_dir)
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@ -128,7 +128,7 @@ grad_factor = 1.0
 {% if "span_finder" in components -%}
 [components.span_finder]
 factory = "span_finder"
-max_length = null
+max_length = 25
 min_length = null
 scorer = {"@scorers":"spacy.span_finder_scorer.v1"}
 spans_key = "sc"
@ -415,7 +415,7 @@ width = ${components.tok2vec.model.encode.width}
 {% if "span_finder" in components %}
 [components.span_finder]
 factory = "span_finder"
-max_length = null
+max_length = 25
 min_length = null
 scorer = {"@scorers":"spacy.span_finder_scorer.v1"}
 spans_key = "sc"
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@ -1,4 +1,3 @@
-import itertools
 import uuid
 from typing import Any, Dict, List, Optional, Tuple, Union

@ -218,7 +217,7 @@ class SpanRenderer:
                    + (self.offset_step * (len(entities) - 1))
                )
                markup += self.span_template.format(
-                    text=token["text"],
+                    text=escape_html(token["text"]),
                    span_slices=slices,
                    span_starts=starts,
                    total_height=total_height,
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -208,6 +208,9 @@ class Warnings(metaclass=ErrorsWithCodes):
    W123 = ("Argument `enable` with value {enable} does not contain all values specified in the config option "
            "`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.")
    W124 = ("{host}:{port} is already in use, using the nearest available port {serve_port} as an alternative.")
+    W125 = ("The StaticVectors key_attr is no longer used. To set a custom "
+            "key attribute for vectors, configure it through Vectors(attr=) or "
+            "'spacy init vectors --attr'")

    # v4 warning strings
    W400 = ("`use_upper=False` is ignored, the upper layer is always enabled")
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@ -12,8 +12,9 @@ from .candidate import Candidate


 cdef class KnowledgeBase:
-    """A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
-    to support entity linking of named entities to real-world concepts.
+    """A `KnowledgeBase` instance stores unique identifiers for entities and
+    their textual aliases, to support entity linking of named entities to
+    real-world concepts.
    This is an abstract class and requires its operations to be implemented.

    DOCS: https://spacy.io/api/kb
@ -31,7 +32,9 @@ cdef class KnowledgeBase:
        self.entity_vector_length = entity_vector_length
        self.mem = Pool()

-    def get_candidates_batch(self, mentions: SpanGroup) -> Iterable[Iterable[Candidate]]:
+    def get_candidates_batch(
+            self, mentions: SpanGroup
+    ) -> Iterable[Iterable[Candidate]]:
        """
        Return candidate entities for a specified Span mention. Each candidate defines at least the entity and the
        entity's embedding vector. Depending on the KB implementation, further properties - such as the prior
@ -52,7 +55,9 @@ cdef class KnowledgeBase:
        RETURNS (Iterable[Candidate]): Identified candidates.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="get_candidates", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="get_candidates", name=self.__name__
+            )
        )

    def get_vectors(self, entities: Iterable[str]) -> Iterable[Iterable[float]]:
@ -70,7 +75,9 @@ cdef class KnowledgeBase:
        RETURNS (Iterable[float]): Vector for specified entity.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="get_vector", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="get_vector", name=self.__name__
+            )
        )

    def to_bytes(self, **kwargs) -> bytes:
@ -78,7 +85,9 @@ cdef class KnowledgeBase:
        RETURNS (bytes): Current state as binary string.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="to_bytes", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="to_bytes", name=self.__name__
+            )
        )

    def from_bytes(self, bytes_data: bytes, *, exclude: Tuple[str] = tuple()):
@ -87,27 +96,37 @@ cdef class KnowledgeBase:
        exclude (Tuple[str]): Properties to exclude when restoring KB.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="from_bytes", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="from_bytes", name=self.__name__
+            )
        )

-    def to_disk(self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()) -> None:
+    def to_disk(
+            self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()
+    ) -> None:
        """
        Write KnowledgeBase content to disk.
        path (Union[str, Path]): Target file path.
        exclude (Iterable[str]): List of components to exclude.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="to_disk", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="to_disk", name=self.__name__
+            )
        )

-    def from_disk(self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()) -> None:
+    def from_disk(
+            self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()
+    ) -> None:
        """
        Load KnowledgeBase content from disk.
        path (Union[str, Path]): Target file path.
        exclude (Iterable[str]): List of components to exclude.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="from_disk", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="from_disk", name=self.__name__
+            )
        )

    @property
--- a/spacy/kb/kb_in_memory.pxd
+++ b/spacy/kb/kb_in_memory.pxd
@ -55,23 +55,28 @@ cdef class InMemoryLookupKB(KnowledgeBase):
    # optional data, we can let users configure a DB as the backend for this.
    cdef object _features_table

-
    cdef inline int64_t c_add_vector(self, vector[float] entity_vector) nogil:
        """Add an entity vector to the vectors table."""
        cdef int64_t new_index = self._vectors_table.size()
        self._vectors_table.push_back(entity_vector)
        return new_index

-
-    cdef inline int64_t c_add_entity(self, hash_t entity_hash, float freq,
-                                     int32_t vector_index, int feats_row) nogil:
+    cdef inline int64_t c_add_entity(
+        self,
+        hash_t entity_hash,
+        float freq,
+        int32_t vector_index,
+        int feats_row
+    ) nogil:
        """Add an entry to the vector of entries.
-        After calling this method, make sure to update also the _entry_index using the return value"""
+        After calling this method, make sure to update also the _entry_index
+        using the return value"""
        # This is what we'll map the entity hash key to. It's where the entry will sit
        # in the vector of entries, so we can get it later.
        cdef int64_t new_index = self._entries.size()

-        # Avoid struct initializer to enable nogil, cf https://github.com/cython/cython/issues/1642
+        # Avoid struct initializer to enable nogil, cf.
+        # https://github.com/cython/cython/issues/1642
        cdef KBEntryC entry
        entry.entity_hash = entity_hash
        entry.vector_index = vector_index
@ -81,11 +86,17 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        self._entries.push_back(entry)
        return new_index

-    cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs) nogil:
-        """Connect a mention to a list of potential entities with their prior probabilities .
-        After calling this method, make sure to update also the _alias_index using the return value"""
-        # This is what we'll map the alias hash key to. It's where the alias will be defined
-        # in the vector of aliases.
+    cdef inline int64_t c_add_aliases(
+        self,
+        hash_t alias_hash,
+        vector[int64_t] entry_indices,
+        vector[float] probs
+    ) nogil:
+        """Connect a mention to a list of potential entities with their prior
+        probabilities. After calling this method, make sure to update also the
+        _alias_index using the return value"""
+        # This is what we'll map the alias hash key to. It's where the alias will be
+        # defined in the vector of aliases.
        cdef int64_t new_index = self._aliases_table.size()

        # Avoid struct initializer to enable nogil
@ -98,8 +109,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):

    cdef inline void _create_empty_vectors(self, hash_t dummy_hash) nogil:
        """
-        Initializing the vectors and making sure the first element of each vector is a dummy,
-        because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
+        Initializing the vectors and making sure the first element of each vector is a
+        dummy, because the PreshMap maps pointing to indices in these vectors can not
+        contain 0 as value.
        cf. https://github.com/explosion/preshed/issues/17
        """
        cdef int32_t dummy_value = 0
@ -130,12 +142,18 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 cdef class Writer:
    cdef FILE* _fp

-    cdef int write_header(self, int64_t nr_entries, int64_t entity_vector_length) except -1
+    cdef int write_header(
+        self, int64_t nr_entries, int64_t entity_vector_length
+    ) except -1
    cdef int write_vector_element(self, float element) except -1
-    cdef int write_entry(self, hash_t entry_hash, float entry_freq, int32_t vector_index) except -1
+    cdef int write_entry(
+        self, hash_t entry_hash, float entry_freq, int32_t vector_index
+    ) except -1

    cdef int write_alias_length(self, int64_t alias_length) except -1
-    cdef int write_alias_header(self, hash_t alias_hash, int64_t candidate_length) except -1
+    cdef int write_alias_header(
+        self, hash_t alias_hash, int64_t candidate_length
+    ) except -1
    cdef int write_alias(self, int64_t entry_index, float prob) except -1

    cdef int _write(self, void* value, size_t size) except -1
@ -143,12 +161,18 @@ cdef class Writer:
 cdef class Reader:
    cdef FILE* _fp

-    cdef int read_header(self, int64_t* nr_entries, int64_t* entity_vector_length) except -1
+    cdef int read_header(
+        self, int64_t* nr_entries, int64_t* entity_vector_length
+    ) except -1
    cdef int read_vector_element(self, float* element) except -1
-    cdef int read_entry(self, hash_t* entity_hash, float* freq, int32_t* vector_index) except -1
+    cdef int read_entry(
+        self, hash_t* entity_hash, float* freq, int32_t* vector_index
+    ) except -1

    cdef int read_alias_length(self, int64_t* alias_length) except -1
-    cdef int read_alias_header(self, hash_t* alias_hash, int64_t* candidate_length) except -1
+    cdef int read_alias_header(
+        self, hash_t* alias_hash, int64_t* candidate_length
+    ) except -1
    cdef int read_alias(self, int64_t* entry_index, float* prob) except -1

    cdef int _read(self, void* value, size_t size) except -1
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@ -1,5 +1,5 @@
 # cython: infer_types=True, profile=True
-from typing import Any, Callable, Dict, Iterable, Union
+from typing import Any, Callable, Dict, Iterable

 import srsly

@ -27,8 +27,9 @@ from .candidate import InMemoryCandidate


 cdef class InMemoryLookupKB(KnowledgeBase):
-    """An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual aliases,
-    to support entity linking of named entities to real-world concepts.
+    """An `InMemoryLookupKB` instance stores unique identifiers for entities
+    and their textual aliases, to support entity linking of named entities to
+    real-world concepts.

    DOCS: https://spacy.io/api/inmemorylookupkb
    """
@ -71,7 +72,8 @@ cdef class InMemoryLookupKB(KnowledgeBase):

    def add_entity(self, str entity, float freq, vector[float] entity_vector):
        """
-        Add an entity to the KB, optionally specifying its log probability based on corpus frequency
+        Add an entity to the KB, optionally specifying its log probability
+        based on corpus frequency.
        Return the hash of the entity ID/name at the end.
        """
        cdef hash_t entity_hash = self.vocab.strings.add(entity)
@ -83,14 +85,20 @@ cdef class InMemoryLookupKB(KnowledgeBase):

        # Raise an error if the provided entity vector is not of the correct length
        if len(entity_vector) != self.entity_vector_length:
-            raise ValueError(Errors.E141.format(found=len(entity_vector), required=self.entity_vector_length))
+            raise ValueError(
+                Errors.E141.format(
+                    found=len(entity_vector), required=self.entity_vector_length
+                )
+            )

        vector_index = self.c_add_vector(entity_vector=entity_vector)

-        new_index = self.c_add_entity(entity_hash=entity_hash,
-                                      freq=freq,
-                                      vector_index=vector_index,
-                                      feats_row=-1)  # Features table currently not implemented
+        new_index = self.c_add_entity(
+            entity_hash=entity_hash,
+            freq=freq,
+            vector_index=vector_index,
+            feats_row=-1
+        )  # Features table currently not implemented
        self._entry_index[entity_hash] = new_index

        return entity_hash
@ -115,7 +123,12 @@ cdef class InMemoryLookupKB(KnowledgeBase):
            else:
                entity_vector = vector_list[i]
                if len(entity_vector) != self.entity_vector_length:
-                    raise ValueError(Errors.E141.format(found=len(entity_vector), required=self.entity_vector_length))
+                    raise ValueError(
+                        Errors.E141.format(
+                            found=len(entity_vector),
+                            required=self.entity_vector_length
+                        )
+                    )

                entry.entity_hash = entity_hash
                entry.freq = freq_list[i]
@ -149,11 +162,15 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        previous_alias_nr = self.get_size_aliases()
        # Throw an error if the length of entities and probabilities are not the same
        if not len(entities) == len(probabilities):
-            raise ValueError(Errors.E132.format(alias=alias,
-                                                entities_length=len(entities),
-                                                probabilities_length=len(probabilities)))
+            raise ValueError(
+                Errors.E132.format(
+                    alias=alias,
+                    entities_length=len(entities),
+                    probabilities_length=len(probabilities))
+            )

-        # Throw an error if the probabilities sum up to more than 1 (allow for some rounding errors)
+        # Throw an error if the probabilities sum up to more than 1 (allow for
+        # some rounding errors)
        prob_sum = sum(probabilities)
        if prob_sum > 1.00001:
            raise ValueError(Errors.E133.format(alias=alias, sum=prob_sum))
@ -170,40 +187,47 @@ cdef class InMemoryLookupKB(KnowledgeBase):

        for entity, prob in zip(entities, probabilities):
            entity_hash = self.vocab.strings[entity]
-            if not entity_hash in self._entry_index:
+            if entity_hash not in self._entry_index:
                raise ValueError(Errors.E134.format(entity=entity))

            entry_index = <int64_t>self._entry_index.get(entity_hash)
            entry_indices.push_back(int(entry_index))
            probs.push_back(float(prob))

-        new_index = self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs)
+        new_index = self.c_add_aliases(
+            alias_hash=alias_hash, entry_indices=entry_indices, probs=probs
+        )
        self._alias_index[alias_hash] = new_index

        if previous_alias_nr + 1 != self.get_size_aliases():
            raise RuntimeError(Errors.E891.format(alias=alias))
        return alias_hash

-    def append_alias(self, str alias, str entity, float prior_prob, ignore_warnings=False):
+    def append_alias(
+        self, str alias, str entity, float prior_prob, ignore_warnings=False
+    ):
        """
-        For an alias already existing in the KB, extend its potential entities with one more.
+        For an alias already existing in the KB, extend its potential entities
+        with one more.
        Throw a warning if either the alias or the entity is unknown,
        or when the combination is already previously recorded.
        Throw an error if this entity+prior prob would exceed the sum of 1.
-        For efficiency, it's best to use the method `add_alias` as much as possible instead of this one.
+        For efficiency, it's best to use the method `add_alias` as much as
+        possible instead of this one.
        """
        # Check if the alias exists in the KB
        cdef hash_t alias_hash = self.vocab.strings[alias]
-        if not alias_hash in self._alias_index:
+        if alias_hash not in self._alias_index:
            raise ValueError(Errors.E176.format(alias=alias))

        # Check if the entity exists in the KB
        cdef hash_t entity_hash = self.vocab.strings[entity]
-        if not entity_hash in self._entry_index:
+        if entity_hash not in self._entry_index:
            raise ValueError(Errors.E134.format(entity=entity))
        entry_index = <int64_t>self._entry_index.get(entity_hash)

-        # Throw an error if the prior probabilities (including the new one) sum up to more than 1
+        # Throw an error if the prior probabilities (including the new one)
+        # sum up to more than 1
        alias_index = <int64_t>self._alias_index.get(alias_hash)
        alias_entry = self._aliases_table[alias_index]
        current_sum = sum([p for p in alias_entry.probs])
@ -236,12 +260,13 @@ cdef class InMemoryLookupKB(KnowledgeBase):

    def _get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
        """
-        Return candidate entities for an alias. Each candidate defines the entity, the original alias,
-        and the prior probability of that alias resolving to that entity.
+        Return candidate entities for an alias. Each candidate defines the
+        entity, the original alias, and the prior probability of that alias
+        resolving to that entity.
        If the alias is not known in the KB, and empty list is returned.
        """
        cdef hash_t alias_hash = self.vocab.strings[alias]
-        if not alias_hash in self._alias_index:
+        if alias_hash not in self._alias_index:
            return []
        alias_index = <int64_t>self._alias_index.get(alias_hash)
        alias_entry = self._aliases_table[alias_index]
@ -270,8 +295,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        return self._vectors_table[self._entries[entry_index].vector_index]

    def get_prior_prob(self, str entity, str alias):
-        """ Return the prior probability of a given alias being linked to a given entity,
-        or return 0.0 when this combination is not known in the knowledge base"""
+        """ Return the prior probability of a given alias being linked to a
+        given entity, or return 0.0 when this combination is not known in the
+        knowledge base."""
        cdef hash_t alias_hash = self.vocab.strings[alias]
        cdef hash_t entity_hash = self.vocab.strings[entity]

@ -282,7 +308,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        entry_index = self._entry_index[entity_hash]

        alias_entry = self._aliases_table[alias_index]
-        for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs):
+        for (entry_index, prior_prob) in zip(
+            alias_entry.entry_indices, alias_entry.probs
+        ):
            if self._entries[entry_index].entity_hash == entity_hash:
                return prior_prob

@ -295,13 +323,19 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        """Serialize the current state to a binary string.
        """
        def serialize_header():
-            header = (self.get_size_entities(), self.get_size_aliases(), self.entity_vector_length)
+            header = (
+                self.get_size_entities(),
+                self.get_size_aliases(),
+                self.entity_vector_length
+            )
            return srsly.json_dumps(header)

        def serialize_entries():
            i = 1
            tuples = []
-            for entry_hash, entry_index in sorted(self._entry_index.items(), key=lambda x: x[1]):
+            for entry_hash, entry_index in sorted(
+                self._entry_index.items(), key=lambda x: x[1]
+            ):
                entry = self._entries[entry_index]
                assert entry.entity_hash == entry_hash
                assert entry_index == i
@ -314,7 +348,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
            headers = []
            indices_lists = []
            probs_lists = []
-            for alias_hash, alias_index in sorted(self._alias_index.items(), key=lambda x: x[1]):
+            for alias_hash, alias_index in sorted(
+                self._alias_index.items(), key=lambda x: x[1]
+            ):
                alias = self._aliases_table[alias_index]
                assert alias_index == i
                candidate_length = len(alias.entry_indices)
@ -372,7 +408,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
            indices = srsly.json_loads(all_data[1])
            probs = srsly.json_loads(all_data[2])
            for header, indices, probs in zip(headers, indices, probs):
-                alias_hash, candidate_length = header
+                alias_hash, _candidate_length = header
                alias.entry_indices = indices
                alias.probs = probs
                self._aliases_table[i] = alias
@ -421,10 +457,14 @@ cdef class InMemoryLookupKB(KnowledgeBase):
                writer.write_vector_element(element)
            i = i+1

-        # dumping the entry records in the order in which they are in the _entries vector.
-        # index 0 is a dummy object not stored in the _entry_index and can be ignored.
+        # dumping the entry records in the order in which they are in the
+        # _entries vector.
+        # index 0 is a dummy object not stored in the _entry_index and can
+        # be ignored.
        i = 1
-        for entry_hash, entry_index in sorted(self._entry_index.items(), key=lambda x: x[1]):
+        for entry_hash, entry_index in sorted(
+            self._entry_index.items(), key=lambda x: x[1]
+        ):
            entry = self._entries[entry_index]
            assert entry.entity_hash == entry_hash
            assert entry_index == i
@ -436,7 +476,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        # dumping the aliases in the order in which they are in the _alias_index vector.
        # index 0 is a dummy object not stored in the _aliases_table and can be ignored.
        i = 1
-        for alias_hash, alias_index in sorted(self._alias_index.items(), key=lambda x: x[1]):
+        for alias_hash, alias_index in sorted(
+                self._alias_index.items(), key=lambda x: x[1]
+        ):
            alias = self._aliases_table[alias_index]
            assert alias_index == i

@ -542,7 +584,8 @@ cdef class Writer:
    def __init__(self, path):
        assert isinstance(path, Path)
        content = bytes(path)
-        cdef bytes bytes_loc = content.encode('utf8') if type(content) == str else content
+        cdef bytes bytes_loc = content.encode('utf8') \
+            if type(content) == str else content
        self._fp = fopen(<char*>bytes_loc, 'wb')
        if not self._fp:
            raise IOError(Errors.E146.format(path=path))
@ -552,14 +595,18 @@ cdef class Writer:
        cdef size_t status = fclose(self._fp)
        assert status == 0

-    cdef int write_header(self, int64_t nr_entries, int64_t entity_vector_length) except -1:
+    cdef int write_header(
+        self, int64_t nr_entries, int64_t entity_vector_length
+    ) except -1:
        self._write(&nr_entries, sizeof(nr_entries))
        self._write(&entity_vector_length, sizeof(entity_vector_length))

    cdef int write_vector_element(self, float element) except -1:
        self._write(&element, sizeof(element))

-    cdef int write_entry(self, hash_t entry_hash, float entry_freq, int32_t vector_index) except -1:
+    cdef int write_entry(
+        self, hash_t entry_hash, float entry_freq, int32_t vector_index
+    ) except -1:
        self._write(&entry_hash, sizeof(entry_hash))
        self._write(&entry_freq, sizeof(entry_freq))
        self._write(&vector_index, sizeof(vector_index))
@ -568,7 +615,9 @@ cdef class Writer:
    cdef int write_alias_length(self, int64_t alias_length) except -1:
        self._write(&alias_length, sizeof(alias_length))

-    cdef int write_alias_header(self, hash_t alias_hash, int64_t candidate_length) except -1:
+    cdef int write_alias_header(
+        self, hash_t alias_hash, int64_t candidate_length
+    ) except -1:
        self._write(&alias_hash, sizeof(alias_hash))
        self._write(&candidate_length, sizeof(candidate_length))

@ -584,16 +633,19 @@ cdef class Writer:
 cdef class Reader:
    def __init__(self, path):
        content = bytes(path)
-        cdef bytes bytes_loc = content.encode('utf8') if type(content) == str else content
+        cdef bytes bytes_loc = content.encode('utf8') \
+            if type(content) == str else content
        self._fp = fopen(<char*>bytes_loc, 'rb')
        if not self._fp:
            PyErr_SetFromErrno(IOError)
-        status = fseek(self._fp, 0, 0)  # this can be 0 if there is no header
+        fseek(self._fp, 0, 0)  # this can be 0 if there is no header

    def __dealloc__(self):
        fclose(self._fp)

-    cdef int read_header(self, int64_t* nr_entries, int64_t* entity_vector_length) except -1:
+    cdef int read_header(
+        self, int64_t* nr_entries, int64_t* entity_vector_length
+    ) except -1:
        status = self._read(nr_entries, sizeof(int64_t))
        if status < 1:
            if feof(self._fp):
@ -613,7 +665,9 @@ cdef class Reader:
                return 0  # end of file
            raise IOError(Errors.E145.format(param="vector element"))

-    cdef int read_entry(self, hash_t* entity_hash, float* freq, int32_t* vector_index) except -1:
+    cdef int read_entry(
+        self, hash_t* entity_hash, float* freq, int32_t* vector_index
+    ) except -1:
        status = self._read(entity_hash, sizeof(hash_t))
        if status < 1:
            if feof(self._fp):
@ -644,7 +698,9 @@ cdef class Reader:
                return 0  # end of file
            raise IOError(Errors.E145.format(param="alias length"))

-    cdef int read_alias_header(self, hash_t* alias_hash, int64_t* candidate_length) except -1:
+    cdef int read_alias_header(
+        self, hash_t* alias_hash, int64_t* candidate_length
+    ) except -1:
        status = self._read(alias_hash, sizeof(hash_t))
        if status < 1:
            if feof(self._fp):
--- a/spacy/language.py
+++ b/spacy/language.py
@ -740,6 +740,11 @@ class Language:
                )
            )
        pipe = source.get_pipe(source_name)
+        # There is no actual solution here. Either the component has the right
+        # name for the source pipeline or the component has the right name for
+        # the current pipeline. This prioritizes the current pipeline.
+        if hasattr(pipe, "name"):
+            pipe.name = name
        # Make sure the source config is interpolated so we don't end up with
        # orphaned variables in our final config
        source_config = source.config.interpolate()
@ -817,6 +822,7 @@ class Language:
        pipe_index = self._get_pipe_index(before, after, first, last)
        self._pipe_meta[name] = self.get_factory_meta(factory_name)
        self._components.insert(pipe_index, (name, pipe_component))
+        self._link_components()
        return pipe_component

    def _get_pipe_index(
@ -956,6 +962,7 @@ class Language:
        if old_name in self._config["initialize"]["components"]:
            init_cfg = self._config["initialize"]["components"].pop(old_name)
            self._config["initialize"]["components"][new_name] = init_cfg
+        self._link_components()

    def remove_pipe(self, name: str) -> Tuple[str, PipeCallable]:
        """Remove a component from the pipeline.
@ -979,6 +986,7 @@ class Language:
        # Make sure the name is also removed from the set of disabled components
        if name in self.disabled:
            self._disabled.remove(name)
+        self._link_components()
        return removed

    def disable_pipe(self, name: str) -> None:
@ -1823,8 +1831,16 @@ class Language:
        # The problem is we need to do it during deserialization...And the
        # components don't receive the pipeline then. So this does have to be
        # here :(
+        # First, fix up all the internal component names in case they have
+        # gotten out of sync due to sourcing components from different
+        # pipelines, since find_listeners uses proc2.name for the listener
+        # map.
+        for name, proc in self.pipeline:
+            if hasattr(proc, "name"):
+                proc.name = name
        for i, (name1, proc1) in enumerate(self.pipeline):
            if isinstance(proc1, ty.ListenedToComponent):
+                proc1.listener_map = {}
                for name2, proc2 in self.pipeline[i + 1 :]:
                    proc1.find_listeners(proc2)

@ -1934,7 +1950,6 @@ class Language:
        # Later we replace the component config with the raw config again.
        interpolated = filled.interpolate() if not filled.is_interpolated else filled
        pipeline = interpolated.get("components", {})
-        sourced = util.get_sourced_components(interpolated)
        # If components are loaded from a source (existing models), we cache
        # them here so they're only loaded once
        source_nlps = {}
@ -1962,6 +1977,7 @@ class Language:
                        raw_config=raw_config,
                    )
                else:
+                    assert "source" in pipe_cfg
                    # We need the sourced components to reference the same
                    # vocab without modifying the current vocab state **AND**
                    # we still want to load the source model vectors to perform
@ -1981,6 +1997,10 @@ class Language:
                    source_name = pipe_cfg.get("component", pipe_name)
                    listeners_replaced = False
                    if "replace_listeners" in pipe_cfg:
+                        # Make sure that the listened-to component has the
+                        # state of the source pipeline listener map so that the
+                        # replace_listeners method below works as intended.
+                        source_nlps[model]._link_components()
                        for name, proc in source_nlps[model].pipeline:
                            if source_name in getattr(proc, "listening_components", []):
                                source_nlps[model].replace_listeners(
@ -1992,6 +2012,8 @@ class Language:
                        nlp.add_pipe(
                            source_name, source=source_nlps[model], name=pipe_name
                        )
+                        # At this point after nlp.add_pipe, the listener map
+                        # corresponds to the new pipeline.
                    if model not in source_nlp_vectors_hashes:
                        source_nlp_vectors_hashes[model] = hash(
                            source_nlps[model].vocab.vectors.to_bytes(
@ -2046,27 +2068,6 @@ class Language:
                raise ValueError(
                    Errors.E942.format(name="pipeline_creation", value=type(nlp))
                )
-        # Detect components with listeners that are not frozen consistently
-        for name, proc in nlp.pipeline:
-            if isinstance(proc, ty.ListenedToComponent):
-                # Remove listeners not in the pipeline
-                listener_names = proc.listening_components
-                unused_listener_names = [
-                    ll for ll in listener_names if ll not in nlp.pipe_names
-                ]
-                for listener_name in unused_listener_names:
-                    for listener in proc.listener_map.get(listener_name, []):
-                        proc.remove_listener(listener, listener_name)
-
-                for listener_name in proc.listening_components:
-                    # e.g. tok2vec/transformer
-                    # If it's a component sourced from another pipeline, we check if
-                    # the tok2vec listeners should be replaced with standalone tok2vec
-                    # models (e.g. so component can be frozen without its performance
-                    # degrading when other components/tok2vec are updated)
-                    paths = sourced.get(listener_name, {}).get("replace_listeners", [])
-                    if paths:
-                        nlp.replace_listeners(name, listener_name, paths)
        return nlp

    def replace_listeners(
@ -2081,7 +2082,7 @@ class Language:
        useful when training a pipeline with components sourced from an existing
        pipeline: if multiple components (e.g. tagger, parser, NER) listen to
        the same tok2vec component, but some of them are frozen and not updated,
-        their performance may degrade significally as the tok2vec component is
+        their performance may degrade significantly as the tok2vec component is
        updated with new data. To prevent this, listeners can be replaced with
        a standalone tok2vec layer that is owned by the component and doesn't
        change if the component isn't updated.
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@ -1,7 +1,6 @@
 # cython: embedsignature=True
 # Compiler crashes on memory view coercion without this. Should report bug.
 cimport numpy as np
-from cython.view cimport array as cvarray
 from libc.string cimport memset

 np.import_array()
@ -35,7 +34,7 @@ from .typedefs cimport attr_t, flags_t
 from .attrs import intify_attrs
 from .errors import Errors, Warnings

-OOV_RANK = 0xffffffffffffffff # UINT64_MAX
+OOV_RANK = 0xffffffffffffffff  # UINT64_MAX
 memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
 EMPTY_LEXEME.id = OOV_RANK

@ -105,7 +104,7 @@ cdef class Lexeme:
            if isinstance(value, float):
                continue
            elif isinstance(value, (int, long)):
-                 Lexeme.set_struct_attr(self.c, attr, value)
+                Lexeme.set_struct_attr(self.c, attr, value)
            else:
                Lexeme.set_struct_attr(self.c, attr, self.vocab.strings.add(value))

@ -137,10 +136,12 @@ cdef class Lexeme:
        if hasattr(other, "orth"):
            if self.c.orth == other.orth:
                return 1.0
-        elif hasattr(other, "__len__") and len(other) == 1 \
-        and hasattr(other[0], "orth"):
-            if self.c.orth == other[0].orth:
-                return 1.0
+        elif (
+            hasattr(other, "__len__") and len(other) == 1
+            and hasattr(other[0], "orth")
+            and self.c.orth == other[0].orth
+        ):
+            return 1.0
        if self.vector_norm == 0 or other.vector_norm == 0:
            warnings.warn(Warnings.W008.format(obj="Lexeme"))
            return 0.0
--- a/spacy/matcher/dependencymatcher.pyx
+++ b/spacy/matcher/dependencymatcher.pyx
@ -108,7 +108,7 @@ cdef class DependencyMatcher:
        key (str): The match ID.
        RETURNS (bool): Whether the matcher contains rules for this match ID.
        """
-        return self.has_key(key)
+        return self.has_key(key)  # no-cython-lint: W601

    def _validate_input(self, pattern, key):
        idx = 0
@ -264,7 +264,7 @@ cdef class DependencyMatcher:

    def remove(self, key):
        key = self._normalize_key(key)
-        if not key in self._patterns:
+        if key not in self._patterns:
            raise ValueError(Errors.E175.format(key=key))
        self._patterns.pop(key)
        self._raw_patterns.pop(key)
@ -382,7 +382,7 @@ cdef class DependencyMatcher:
            return []
        return [doc[node].head]

-    def _gov(self,doc,node):
+    def _gov(self, doc, node):
        return list(doc[node].children)

    def _dep_chain(self, doc, node):
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -12,25 +12,13 @@ import warnings

 import srsly

-from ..attrs cimport (
-    DEP,
-    ENT_IOB,
-    ID,
-    LEMMA,
-    MORPH,
-    NULL_ATTR,
-    ORTH,
-    POS,
-    TAG,
-    attr_id_t,
-)
+from ..attrs cimport DEP, ENT_IOB, ID, LEMMA, MORPH, NULL_ATTR, POS, TAG
 from ..structs cimport TokenC
 from ..tokens.doc cimport Doc, get_token_attr_for_matcher
 from ..tokens.morphanalysis cimport MorphAnalysis
 from ..tokens.span cimport Span
 from ..tokens.token cimport Token
 from ..typedefs cimport attr_t
-from ..vocab cimport Vocab

 from ..errors import Errors, MatchPatternError, Warnings
 from ..schemas import validate_token_pattern
@ -42,7 +30,6 @@ from ..attrs import IDS
 from ..errors import Errors, MatchPatternError, Warnings
 from ..schemas import validate_token_pattern
 from ..strings import get_string_id
-from ..util import registry
 from .levenshtein import levenshtein_compare

 DEF PADDING = 5
@ -93,9 +80,9 @@ cdef class Matcher:
        key (str): The match ID.
        RETURNS (bool): Whether the matcher contains rules for this match ID.
        """
-        return self.has_key(key)
+        return self.has_key(key)  # no-cython-lint: W601

-    def add(self, key, patterns, *, on_match=None, greedy: str=None):
+    def add(self, key, patterns, *, on_match=None, greedy: str = None):
        """Add a match-rule to the matcher. A match-rule consists of: an ID
        key, an on_match callback, and one or more patterns.

@ -149,8 +136,13 @@ cdef class Matcher:
        key = self._normalize_key(key)
        for pattern in patterns:
            try:
-                specs = _preprocess_pattern(pattern, self.vocab,
-                    self._extensions, self._extra_predicates, self._fuzzy_compare)
+                specs = _preprocess_pattern(
+                    pattern,
+                    self.vocab,
+                    self._extensions,
+                    self._extra_predicates,
+                    self._fuzzy_compare
+                )
                self.patterns.push_back(init_pattern(self.mem, key, specs))
                for spec in specs:
                    for attr, _ in spec[1]:
@ -174,7 +166,7 @@ cdef class Matcher:
        key (str): The ID of the match rule.
        """
        norm_key = self._normalize_key(key)
-        if not norm_key in self._patterns:
+        if norm_key not in self._patterns:
            raise ValueError(Errors.E175.format(key=key))
        self._patterns.pop(norm_key)
        self._callbacks.pop(norm_key)
@ -274,8 +266,15 @@ cdef class Matcher:
        if self.patterns.empty():
            matches = []
        else:
-            matches = find_matches(&self.patterns[0], self.patterns.size(), doclike, length,
-                                    extensions=self._extensions, predicates=self._extra_predicates, with_alignments=with_alignments)
+            matches = find_matches(
+                &self.patterns[0],
+                self.patterns.size(),
+                doclike,
+                length,
+                extensions=self._extensions,
+                predicates=self._extra_predicates,
+                with_alignments=with_alignments
+            )
        final_matches = []
        pairs_by_id = {}
        # For each key, either add all matches, or only the filtered,
@ -299,9 +298,9 @@ cdef class Matcher:
            memset(matched, 0, length * sizeof(matched[0]))
            span_filter = self._filter.get(key)
            if span_filter == "FIRST":
-                sorted_pairs = sorted(pairs, key=lambda x: (x[0], -x[1]), reverse=False) # sort by start
+                sorted_pairs = sorted(pairs, key=lambda x: (x[0], -x[1]), reverse=False)  # sort by start
            elif span_filter == "LONGEST":
-                sorted_pairs = sorted(pairs, key=lambda x: (x[1]-x[0], -x[0]), reverse=True) # reverse sort by length
+                sorted_pairs = sorted(pairs, key=lambda x: (x[1]-x[0], -x[0]), reverse=True)  # reverse sort by length
            else:
                raise ValueError(Errors.E947.format(expected=["FIRST", "LONGEST"], arg=span_filter))
            for match in sorted_pairs:
@ -373,7 +372,6 @@ cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, e
    cdef vector[MatchC] matches
    cdef vector[vector[MatchAlignmentC]] align_states
    cdef vector[vector[MatchAlignmentC]] align_matches
-    cdef PatternStateC state
    cdef int i, j, nr_extra_attr
    cdef Pool mem = Pool()
    output = []
@ -395,14 +393,22 @@ cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, e
                value = token.vocab.strings[value]
            extra_attr_values[i * nr_extra_attr + index] = value
    # Main loop
-    cdef int nr_predicate = len(predicates)
    for i in range(length):
        for j in range(n):
            states.push_back(PatternStateC(patterns[j], i, 0))
        if with_alignments != 0:
            align_states.resize(states.size())
-        transition_states(states, matches, align_states, align_matches, predicate_cache,
-            doclike[i], extra_attr_values, predicates, with_alignments)
+        transition_states(
+            states,
+            matches,
+            align_states,
+            align_matches,
+            predicate_cache,
+            doclike[i],
+            extra_attr_values,
+            predicates,
+            with_alignments
+        )
        extra_attr_values += nr_extra_attr
        predicate_cache += len(predicates)
    # Handle matches that end in 0-width patterns
@ -428,18 +434,28 @@ cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, e
    return output


-cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& matches,
-                            vector[vector[MatchAlignmentC]]& align_states, vector[vector[MatchAlignmentC]]& align_matches,
-                            int8_t* cached_py_predicates,
-        Token token, const attr_t* extra_attrs, py_predicates, bint with_alignments) except *:
+cdef void transition_states(
+    vector[PatternStateC]& states,
+    vector[MatchC]& matches,
+    vector[vector[MatchAlignmentC]]& align_states,
+    vector[vector[MatchAlignmentC]]& align_matches,
+    int8_t* cached_py_predicates,
+    Token token,
+    const attr_t* extra_attrs,
+    py_predicates,
+    bint with_alignments
+) except *:
    cdef int q = 0
    cdef vector[PatternStateC] new_states
    cdef vector[vector[MatchAlignmentC]] align_new_states
-    cdef int nr_predicate = len(py_predicates)
    for i in range(states.size()):
        if states[i].pattern.nr_py >= 1:
-            update_predicate_cache(cached_py_predicates,
-                states[i].pattern, token, py_predicates)
+            update_predicate_cache(
+                cached_py_predicates,
+                states[i].pattern,
+                token,
+                py_predicates
+            )
        action = get_action(states[i], token.c, extra_attrs,
                            cached_py_predicates)
        if action == REJECT:
@ -475,8 +491,12 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
                    align_new_states.push_back(align_states[q])
            states[q].pattern += 1
            if states[q].pattern.nr_py != 0:
-                update_predicate_cache(cached_py_predicates,
-                    states[q].pattern, token, py_predicates)
+                update_predicate_cache(
+                    cached_py_predicates,
+                    states[q].pattern,
+                    token,
+                    py_predicates
+                )
            action = get_action(states[q], token.c, extra_attrs,
                                cached_py_predicates)
        # Update alignment before the transition of current state
@ -492,8 +512,12 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
            ent_id = get_ent_id(state.pattern)
            if action == MATCH:
                matches.push_back(
-                    MatchC(pattern_id=ent_id, start=state.start,
-                            length=state.length+1))
+                    MatchC(
+                        pattern_id=ent_id,
+                        start=state.start,
+                        length=state.length+1
+                    )
+                )
                # `align_matches` always corresponds to `matches` 1:1
                if with_alignments != 0:
                    align_matches.push_back(align_states[q])
@ -501,23 +525,35 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
                # push match without last token if length > 0
                if state.length > 0:
                    matches.push_back(
-                        MatchC(pattern_id=ent_id, start=state.start,
-                                length=state.length))
+                        MatchC(
+                            pattern_id=ent_id,
+                            start=state.start,
+                            length=state.length
+                        )
+                    )
                    # MATCH_DOUBLE emits matches twice,
                    # add one more to align_matches in order to keep 1:1 relationship
                    if with_alignments != 0:
                        align_matches.push_back(align_states[q])
                # push match with last token
                matches.push_back(
-                    MatchC(pattern_id=ent_id, start=state.start,
-                            length=state.length+1))
+                    MatchC(
+                        pattern_id=ent_id,
+                        start=state.start,
+                        length=state.length + 1
+                    )
+                )
                # `align_matches` always corresponds to `matches` 1:1
                if with_alignments != 0:
                    align_matches.push_back(align_states[q])
            elif action == MATCH_REJECT:
                matches.push_back(
-                    MatchC(pattern_id=ent_id, start=state.start,
-                            length=state.length))
+                    MatchC(
+                        pattern_id=ent_id,
+                        start=state.start,
+                        length=state.length
+                    )
+                )
                # `align_matches` always corresponds to `matches` 1:1
                if with_alignments != 0:
                    align_matches.push_back(align_states[q])
@ -540,8 +576,12 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
            align_states.push_back(align_new_states[i])


-cdef int update_predicate_cache(int8_t* cache,
-        const TokenPatternC* pattern, Token token, predicates) except -1:
+cdef int update_predicate_cache(
+    int8_t* cache,
+    const TokenPatternC* pattern,
+    Token token,
+    predicates
+) except -1:
    # If the state references any extra predicates, check whether they match.
    # These are cached, so that we don't call these potentially expensive
    # Python functions more than we need to.
@ -587,10 +627,12 @@ cdef void finish_states(vector[MatchC]& matches, vector[PatternStateC]& states,
            else:
                state.pattern += 1

-
-cdef action_t get_action(PatternStateC state,
-        const TokenC* token, const attr_t* extra_attrs,
-        const int8_t* predicate_matches) nogil:
+cdef action_t get_action(
+    PatternStateC state,
+    const TokenC * token,
+    const attr_t * extra_attrs,
+    const int8_t * predicate_matches
+) nogil:
    """We need to consider:
    a) Does the token match the specification? [Yes, No]
    b) What's the quantifier? [1, 0+, ?]
@ -656,53 +698,56 @@ cdef action_t get_action(PatternStateC state,
        is_match = not is_match
        quantifier = ONE
    if quantifier == ONE:
-      if is_match and is_final:
-          # Yes, final: 1000
-          return MATCH
-      elif is_match and not is_final:
-          # Yes, non-final: 0100
-          return ADVANCE
-      elif not is_match and is_final:
-          # No, final: 0000
-          return REJECT
-      else:
-          return REJECT
+        if is_match and is_final:
+            # Yes, final: 1000
+            return MATCH
+        elif is_match and not is_final:
+            # Yes, non-final: 0100
+            return ADVANCE
+        elif not is_match and is_final:
+            # No, final: 0000
+            return REJECT
+        else:
+            return REJECT
    elif quantifier == ZERO_PLUS:
-      if is_match and is_final:
-          # Yes, final: 1001
-          return MATCH_EXTEND
-      elif is_match and not is_final:
-          # Yes, non-final: 0011
-          return RETRY_EXTEND
-      elif not is_match and is_final:
-          # No, final 2000 (note: Don't include last token!)
-          return MATCH_REJECT
-      else:
-          # No, non-final 0010
-          return RETRY
+        if is_match and is_final:
+            # Yes, final: 1001
+            return MATCH_EXTEND
+        elif is_match and not is_final:
+            # Yes, non-final: 0011
+            return RETRY_EXTEND
+        elif not is_match and is_final:
+            # No, final 2000 (note: Don't include last token!)
+            return MATCH_REJECT
+        else:
+            # No, non-final 0010
+            return RETRY
    elif quantifier == ZERO_ONE:
-      if is_match and is_final:
-          # Yes, final: 3000
-          # To cater for a pattern ending in "?", we need to add
-          # a match both with and without the last token
-          return MATCH_DOUBLE
-      elif is_match and not is_final:
-          # Yes, non-final: 0110
-          # We need both branches here, consider a pair like:
-          # pattern: .?b string: b
-          # If we 'ADVANCE' on the .?, we miss the match.
-          return RETRY_ADVANCE
-      elif not is_match and is_final:
-          # No, final 2000 (note: Don't include last token!)
-          return MATCH_REJECT
-      else:
-          # No, non-final 0010
-          return RETRY
+        if is_match and is_final:
+            # Yes, final: 3000
+            # To cater for a pattern ending in "?", we need to add
+            # a match both with and without the last token
+            return MATCH_DOUBLE
+        elif is_match and not is_final:
+            # Yes, non-final: 0110
+            # We need both branches here, consider a pair like:
+            # pattern: .?b string: b
+            # If we 'ADVANCE' on the .?, we miss the match.
+            return RETRY_ADVANCE
+        elif not is_match and is_final:
+            # No, final 2000 (note: Don't include last token!)
+            return MATCH_REJECT
+        else:
+            # No, non-final 0010
+            return RETRY


-cdef int8_t get_is_match(PatternStateC state,
-        const TokenC* token, const attr_t* extra_attrs,
-        const int8_t* predicate_matches) nogil:
+cdef int8_t get_is_match(
+    PatternStateC state,
+    const TokenC* token,
+    const attr_t* extra_attrs,
+    const int8_t* predicate_matches
+) nogil:
    for i in range(state.pattern.nr_py):
        if predicate_matches[state.pattern.py_predicates[i]] == -1:
            return 0
@ -867,7 +912,7 @@ class _FuzzyPredicate:
        self.is_extension = is_extension
        if self.predicate not in self.operators:
            raise ValueError(Errors.E126.format(good=self.operators, bad=self.predicate))
-        fuzz = self.predicate[len("FUZZY"):] # number after prefix
+        fuzz = self.predicate[len("FUZZY"):]  # number after prefix
        self.fuzzy = int(fuzz) if fuzz else -1
        self.fuzzy_compare = fuzzy_compare
        self.key = _predicate_cache_key(self.attr, self.predicate, value, fuzzy=self.fuzzy)
@ -1089,7 +1134,7 @@ def _get_extra_predicates_dict(attr, value_dict, vocab, predicate_types,
        elif cls == _FuzzyPredicate:
            if isinstance(value, dict):
                # add predicates inside fuzzy operator
-                fuzz = type_[len("FUZZY"):] # number after prefix
+                fuzz = type_[len("FUZZY"):]  # number after prefix
                fuzzy_val = int(fuzz) if fuzz else -1
                output.extend(_get_extra_predicates_dict(attr, value, vocab, predicate_types,
                                                         extra_predicates, seen_predicates,
@ -1108,8 +1153,9 @@ def _get_extra_predicates_dict(attr, value_dict, vocab, predicate_types,
    return output


-def _get_extension_extra_predicates(spec, extra_predicates, predicate_types,
-        seen_predicates):
+def _get_extension_extra_predicates(
+    spec, extra_predicates, predicate_types, seen_predicates
+):
    output = []
    for attr, value in spec.items():
        if isinstance(value, dict):
@ -1138,7 +1184,7 @@ def _get_operators(spec):
        return (ONE,)
    elif spec["OP"] in lookup:
        return lookup[spec["OP"]]
-    #Min_max {n,m}
+    # Min_max {n,m}
    elif spec["OP"].startswith("{") and spec["OP"].endswith("}"):
        # {n}  --> {n,n}  exactly n                 ONE,(n)
        # {n,m}--> {n,m}  min of n, max of m        ONE,(n),ZERO_ONE,(m)
@ -1149,8 +1195,8 @@ def _get_operators(spec):
        min_max = min_max if "," in min_max else f"{min_max},{min_max}"
        n, m = min_max.split(",")

-        #1. Either n or m is a blank string and the other is numeric -->isdigit
-        #2. Both are numeric and n <= m
+        # 1. Either n or m is a blank string and the other is numeric -->isdigit
+        # 2. Both are numeric and n <= m
        if (not n.isdecimal() and not m.isdecimal()) or (n.isdecimal() and m.isdecimal() and int(n) > int(m)):
            keys = ", ".join(lookup.keys()) + ", {n}, {n,m}, {n,}, {,m} where n and m are integers and n <= m "
            raise ValueError(Errors.E011.format(op=spec["OP"], opts=keys))
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@ -2,16 +2,14 @@
 from collections import defaultdict
 from typing import List

-from libc.stdint cimport uintptr_t
 from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set

 import warnings

-from ..attrs cimport DEP, LEMMA, MORPH, ORTH, POS, TAG
+from ..attrs cimport DEP, LEMMA, MORPH, POS, TAG

 from ..attrs import IDS

-from ..structs cimport TokenC
 from ..tokens.span cimport Span
 from ..tokens.token cimport Token
 from ..typedefs cimport attr_t
@ -160,7 +158,6 @@ cdef class PhraseMatcher:
        del self._callbacks[key]
        del self._docs[key]

-
    def _add_from_arrays(self, key, specs, *, on_match=None):
        """Add a preprocessed list of specs, with an optional callback.

@ -196,7 +193,6 @@ cdef class PhraseMatcher:
                result = internal_node
            map_set(self.mem, <MapStruct*>result, self.vocab.strings[key], NULL)

-
    def add(self, key, docs, *, on_match=None):
        """Add a match-rule to the phrase-matcher. A match-rule consists of: an ID
        key, a list of one or more patterns, and (optionally) an on_match callback.
--- a/spacy/ml/staticvectors.py
+++ b/spacy/ml/staticvectors.py
@ -1,3 +1,4 @@
+import warnings
 from typing import Callable, List, Optional, Sequence, Tuple, cast

 from thinc.api import Model, Ops, registry
@ -5,7 +6,8 @@ from thinc.initializers import glorot_uniform_init
 from thinc.types import Floats1d, Floats2d, Ints1d, Ragged
 from thinc.util import partial

-from ..errors import Errors
+from ..attrs import ORTH
+from ..errors import Errors, Warnings
 from ..tokens import Doc
 from ..vectors import Mode
 from ..vocab import Vocab
@ -24,6 +26,8 @@ def StaticVectors(
    linear projection to control the dimensionality. If a dropout rate is
    specified, the dropout is applied per dimension over the whole batch.
    """
+    if key_attr != "ORTH":
+        warnings.warn(Warnings.W125, DeprecationWarning)
    return Model(
        "static_vectors",
        forward,
@ -40,9 +44,9 @@ def forward(
    token_count = sum(len(doc) for doc in docs)
    if not token_count:
        return _handle_empty(model.ops, model.get_dim("nO"))
-    key_attr: int = model.attrs["key_attr"]
-    keys = model.ops.flatten([cast(Ints1d, doc.to_array(key_attr)) for doc in docs])
    vocab: Vocab = docs[0].vocab
+    key_attr: int = getattr(vocab.vectors, "attr", ORTH)
+    keys = model.ops.flatten([cast(Ints1d, doc.to_array(key_attr)) for doc in docs])
    W = cast(Floats2d, model.ops.as_contig(model.get_param("W")))
    if vocab.vectors.mode == Mode.default:
        V = model.ops.asarray(vocab.vectors.data)
--- a/spacy/ml/tb_framework.pyx
+++ b/spacy/ml/tb_framework.pyx
@ -1,5 +1,5 @@
 # cython: infer_types=True, cdivision=True, boundscheck=False
-from typing import Any, List, Optional, Tuple, TypeVar, cast
+from typing import Any, List, Optional, Tuple, cast

 from libc.stdlib cimport calloc, free, realloc
 from libc.string cimport memcpy, memset
@ -23,7 +23,7 @@ from thinc.api import (

 from thinc.backends.cblas cimport CBlas, saxpy, sgemm

-from thinc.types import Floats1d, Floats2d, Floats3d, Floats4d, Ints1d, Ints2d
+from thinc.types import Floats2d, Floats3d, Floats4d, Ints1d, Ints2d

 from ..errors import Errors
 from ..pipeline._parser_internals import _beam_utils
@ -136,7 +136,7 @@ def init(
    Y: Optional[Tuple[List[State], List[Floats2d]]] = None,
 ):
    if X is not None:
-        docs, moves = X
+        docs, _ = X
        model.get_ref("tok2vec").initialize(X=docs)
    else:
        model.get_ref("tok2vec").initialize()
@ -145,7 +145,6 @@ def init(
        current_nO = model.maybe_get_dim("nO")
        if current_nO is None or current_nO != inferred_nO:
            model.attrs["resize_output"](model, inferred_nO)
-    nO = model.get_dim("nO")
    nP = model.get_dim("nP")
    nH = model.get_dim("nH")
    nI = model.get_dim("nI")
@ -192,9 +191,10 @@ class TransitionModelInputs:
        self,
        docs: List[Doc],
        moves: TransitionSystem,
-        actions: Optional[List[Ints1d]]=None,
-        max_moves: int=0,
-        states: Optional[List[State]]=None):
+        actions: Optional[List[Ints1d]] = None,
+        max_moves: int = 0,
+        states: Optional[List[State]] = None,
+    ):
        """
        actions (Optional[List[Ints1d]]): actions to apply for each Doc.
        docs (List[Doc]): Docs to predict transition sequences for.
@ -234,12 +234,12 @@ def forward(model, inputs: TransitionModelInputs, is_train: bool):
        return _forward_greedy_cpu(model, moves, states, feats, seen_mask, actions=actions)
    else:
        return _forward_fallback(model, moves, states, tokvecs, backprop_tok2vec,
-            feats, backprop_feats, seen_mask, is_train, actions=actions,
-            max_moves=inputs.max_moves)
+                                 feats, backprop_feats, seen_mask, is_train, actions=actions,
+                                 max_moves=inputs.max_moves)


 def _forward_greedy_cpu(model: Model, TransitionSystem moves, states: List[StateClass], np.ndarray feats,
-                np.ndarray[np.npy_bool, ndim=1] seen_mask, actions: Optional[List[Ints1d]]=None):
+                        np.ndarray[np.npy_bool, ndim = 1] seen_mask, actions: Optional[List[Ints1d]] = None):
    cdef vector[StateC*] c_states
    cdef StateClass state
    for state in states:
@ -257,9 +257,10 @@ def _forward_greedy_cpu(model: Model, TransitionSystem moves, states: List[State

    return (states, scores), backprop

+
 cdef list _parse_batch(CBlas cblas, TransitionSystem moves, StateC** states,
                       WeightsC weights, SizesC sizes, actions: Optional[List[Ints1d]]=None):
-    cdef int i, j
+    cdef int i
    cdef vector[StateC *] unfinished
    cdef ActivationsC activations = _alloc_activations(sizes)
    cdef np.ndarray step_scores
@ -276,7 +277,7 @@ cdef list _parse_batch(CBlas cblas, TransitionSystem moves, StateC** states,
            if actions is None:
                # Validate actions, argmax, take action.
                c_transition_batch(moves, states, <const float*>step_scores.data, sizes.classes,
-                    sizes.states)
+                                   sizes.states)
            else:
                c_apply_actions(moves, states, <const int*>step_actions.data, sizes.states)
            for i in range(sizes.states):
@ -302,8 +303,9 @@ def _forward_fallback(
    backprop_feats,
    seen_mask,
    is_train: bool,
-    actions: Optional[List[Ints1d]]=None,
-    max_moves: int=0):
+    actions: Optional[List[Ints1d]] = None,
+    max_moves: int = 0,
+):
    nF = model.get_dim("nF")
    output = model.get_ref("output")
    hidden_b = model.get_param("hidden_b")
@ -371,7 +373,7 @@ def _forward_fallback(
            for clas in set(model.attrs["unseen_classes"]):
                if (d_scores[:, clas] < 0).any():
                    model.attrs["unseen_classes"].remove(clas)
-        d_scores *= seen_mask == False
+        d_scores *= seen_mask == False  # no-cython-lint
        # Calculate the gradients for the parameters of the output layer.
        # The weight gemm is (nS, nO) @ (nS, nH).T
        output.inc_grad("b", d_scores.sum(axis=0))
@ -571,13 +573,13 @@ cdef void _resize_activations(ActivationsC* A, SizesC n) nogil:
        A._max_size = n.states
    else:
        A.token_ids = <int*>realloc(A.token_ids,
-            n.states * n.feats * sizeof(A.token_ids[0]))
+                                    n.states * n.feats * sizeof(A.token_ids[0]))
        A.unmaxed = <float*>realloc(A.unmaxed,
-            n.states * n.hiddens * n.pieces * sizeof(A.unmaxed[0]))
+                                    n.states * n.hiddens * n.pieces * sizeof(A.unmaxed[0]))
        A.hiddens = <float*>realloc(A.hiddens,
-            n.states * n.hiddens * sizeof(A.hiddens[0]))
+                                    n.states * n.hiddens * sizeof(A.hiddens[0]))
        A.is_valid = <int*>realloc(A.is_valid,
-            n.states * n.classes * sizeof(A.is_valid[0]))
+                                   n.states * n.classes * sizeof(A.is_valid[0]))
        A._max_size = n.states
    A._curr_size = n.states

@ -599,9 +601,9 @@ cdef void _predict_states(CBlas cblas, ActivationsC* A, float* scores, StateC**
    else:
        # Compute hidden-to-output
        sgemm(cblas)(False, True, n.states, n.classes, n.hiddens,
-                      1.0, <const float *>A.hiddens, n.hiddens,
-                      <const float *>W.hidden_weights, n.hiddens,
-                      0.0, scores, n.classes)
+                     1.0, <const float *>A.hiddens, n.hiddens,
+                     <const float *>W.hidden_weights, n.hiddens,
+                     0.0, scores, n.classes)
        # Add bias
        for i in range(n.states):
            saxpy(cblas)(n.classes, 1., W.hidden_bias, 1, &scores[i*n.classes], 1)
@ -617,12 +619,12 @@ cdef void _predict_states(CBlas cblas, ActivationsC* A, float* scores, StateC**
                scores[i*n.classes+j] = min_


-cdef void _sum_state_features(CBlas cblas, float* output,
-        const float* cached, const int* token_ids, SizesC n) nogil:
-    cdef int idx, b, f, i
+cdef void _sum_state_features(CBlas cblas, float* output, const float* cached,
+                              const int* token_ids, SizesC n) nogil:
+    cdef int idx, b, f
    cdef const float* feature
    cdef int B = n.states
-    cdef int O = n.hiddens * n.pieces
+    cdef int O = n.hiddens * n.pieces  # no-cython-lint
    cdef int F = n.feats
    cdef int T = n.tokens
    padding = cached + (T * F * O)
@ -637,4 +639,3 @@ cdef void _sum_state_features(CBlas cblas, float* output,
                feature = &cached[idx]
            saxpy(cblas)(O, one, <const float*>feature, 1, &output[b*O], 1)
        token_ids += F
-
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@ -80,15 +80,13 @@ cdef class Morphology:
        out.sort(key=lambda x: x[0])
        return dict(out)

-
    def _normalized_feat_dict_to_str(self, feats: Dict[str, str]) -> str:
        norm_feats_string = self.FEATURE_SEP.join([
-                self.FIELD_SEP.join([field, self.VALUE_SEP.join(values) if isinstance(values, list) else values])
+            self.FIELD_SEP.join([field, self.VALUE_SEP.join(values) if isinstance(values, list) else values])
            for field, values in feats.items()
-        ])
+            ])
        return norm_feats_string or self.EMPTY_MORPH

-
    cdef hash_t _add(self, features):
        """Insert a morphological analysis in the morphology table, if not
        already present. The morphological analysis may be provided in the UD
@ -246,6 +244,7 @@ cdef int get_n_by_field(attr_t* results, const shared_ptr[MorphAnalysisC] morph,
            n_results += 1
    return n_results

+
 def unpickle_morphology(strings, tags):
    cdef Morphology morphology = Morphology(strings)
    for tag in tags:
--- a/spacy/parts_of_speech.pxd
+++ b/spacy/parts_of_speech.pxd
@ -8,7 +8,7 @@ cpdef enum univ_pos_t:
    ADV = symbols.ADV
    AUX = symbols.AUX
    CONJ = symbols.CONJ
-    CCONJ  = symbols.CCONJ  # U20
+    CCONJ = symbols.CCONJ  # U20
    DET = symbols.DET
    INTJ = symbols.INTJ
    NOUN = symbols.NOUN
--- a/spacy/pipeline/_edit_tree_internals/edit_trees.pxd
+++ b/spacy/pipeline/_edit_tree_internals/edit_trees.pxd
@ -46,11 +46,18 @@ cdef struct EditTreeC:
    bint is_match_node
    NodeC inner

-cdef inline EditTreeC edittree_new_match(len_t prefix_len, len_t suffix_len,
-        uint32_t prefix_tree, uint32_t suffix_tree):
-    cdef MatchNodeC match_node = MatchNodeC(prefix_len=prefix_len,
-            suffix_len=suffix_len, prefix_tree=prefix_tree,
-            suffix_tree=suffix_tree)
+cdef inline EditTreeC edittree_new_match(
+    len_t prefix_len,
+    len_t suffix_len,
+    uint32_t prefix_tree,
+    uint32_t suffix_tree
+):
+    cdef MatchNodeC match_node = MatchNodeC(
+        prefix_len=prefix_len,
+        suffix_len=suffix_len,
+        prefix_tree=prefix_tree,
+        suffix_tree=suffix_tree
+    )
    cdef NodeC inner = NodeC(match_node=match_node)
    return EditTreeC(is_match_node=True, inner=inner)

--- a/spacy/pipeline/_edit_tree_internals/edit_trees.pyx
+++ b/spacy/pipeline/_edit_tree_internals/edit_trees.pyx
@ -5,8 +5,6 @@ from libc.string cimport memset
 from libcpp.pair cimport pair
 from libcpp.vector cimport vector

-from pathlib import Path
-
 from ...typedefs cimport hash_t

 from ... import util
@ -25,17 +23,16 @@ cdef LCS find_lcs(str source, str target):
    target (str): The second string.
    RETURNS (LCS): The spans of the longest common subsequences.
    """
-    cdef Py_ssize_t source_len = len(source)
    cdef Py_ssize_t target_len = len(target)
-    cdef size_t longest_align = 0;
+    cdef size_t longest_align = 0
    cdef int source_idx, target_idx
    cdef LCS lcs
    cdef Py_UCS4 source_cp, target_cp

    memset(&lcs, 0, sizeof(lcs))

-    cdef vector[size_t] prev_aligns = vector[size_t](target_len);
-    cdef vector[size_t] cur_aligns = vector[size_t](target_len);
+    cdef vector[size_t] prev_aligns = vector[size_t](target_len)
+    cdef vector[size_t] cur_aligns = vector[size_t](target_len)

    for (source_idx, source_cp) in enumerate(source):
        for (target_idx, target_cp) in enumerate(target):
@ -89,7 +86,7 @@ cdef class EditTrees:
        cdef LCS lcs = find_lcs(form, lemma)

        cdef EditTreeC tree
-        cdef uint32_t tree_id, prefix_tree, suffix_tree
+        cdef uint32_t prefix_tree, suffix_tree
        if lcs_is_empty(lcs):
            tree = edittree_new_subst(self.strings.add(form), self.strings.add(lemma))
        else:
@ -108,7 +105,7 @@ cdef class EditTrees:
        return self._tree_id(tree)

    cdef uint32_t _tree_id(self, EditTreeC tree):
-         # If this tree has been constructed before, return its identifier.
+        # If this tree has been constructed before, return its identifier.
        cdef hash_t hash = edittree_hash(tree)
        cdef unordered_map[hash_t, uint32_t].iterator iter = self.map.find(hash)
        if iter != self.map.end():
@ -289,6 +286,7 @@ def _tree2dict(tree):
        tree = tree["inner"]["subst_node"]
    return(dict(tree))

+
 def _dict2tree(tree):
    errors = validate_edit_tree(tree)
    if errors:
--- a/spacy/pipeline/_parser_internals/_beam_utils.pyx
+++ b/spacy/pipeline/_parser_internals/_beam_utils.pyx
@ -1,12 +1,8 @@
 # cython: infer_types=True
 # cython: profile=True
-cimport numpy as np
-
 import numpy

-from cpython.ref cimport Py_XDECREF, PyObject
-
-from ...typedefs cimport class_t, hash_t
+from ...typedefs cimport class_t
 from .transition_system cimport Transition, TransitionSystem

 from ...errors import Errors
@ -146,7 +142,6 @@ def update_beam(TransitionSystem moves, states, golds, model, int width, beam_de
    cdef MaxViolation violn
    pbeam = BeamBatch(moves, states, golds, width=width, density=beam_density)
    gbeam = BeamBatch(moves, states, golds, width=width, density=0.0)
-    cdef StateClass state
    beam_maps = []
    backprops = []
    violns = [MaxViolation() for _ in range(len(states))]
--- a/spacy/pipeline/_parser_internals/_state.pxd
+++ b/spacy/pipeline/_parser_internals/_state.pxd
@ -280,7 +280,6 @@ cdef cppclass StateC:

        return n

-
    int n_L(int head) nogil const:
        return n_arcs(this._left_arcs, head)

--- a/spacy/pipeline/_parser_internals/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@ -9,7 +9,7 @@ from ...strings cimport hash_string
 from ...structs cimport TokenC
 from ...tokens.doc cimport Doc, set_children_from_heads
 from ...tokens.token cimport MISSING_DEP
-from ...typedefs cimport attr_t, hash_t
+from ...typedefs cimport attr_t

 from ...training import split_bilu_label

@ -68,8 +68,9 @@ cdef struct GoldParseStateC:
    weight_t pop_cost


-cdef GoldParseStateC create_gold_state(Pool mem, const StateC* state,
-        heads, labels, sent_starts) except *:
+cdef GoldParseStateC create_gold_state(
+    Pool mem, const StateC* state, heads, labels, sent_starts
+) except *:
    cdef GoldParseStateC gs
    gs.length = len(heads)
    gs.stride = 1
@ -82,7 +83,7 @@ cdef GoldParseStateC create_gold_state(Pool mem, const StateC* state,
    gs.n_kids_in_stack = <int32_t*>mem.alloc(gs.length, sizeof(gs.n_kids_in_stack[0]))

    for i, is_sent_start in enumerate(sent_starts):
-        if is_sent_start == True:
+        if is_sent_start is True:
            gs.state_bits[i] = set_state_flag(
                gs.state_bits[i],
                IS_SENT_START,
@ -210,6 +211,7 @@ cdef class ArcEagerGold:
    def update(self, StateClass stcls):
        update_gold_state(&self.c, stcls.c)

+
 def _get_aligned_sent_starts(example):
    """Get list of SENT_START attributes aligned to the predicted tokenization.
    If the reference has not sentence starts, return a list of None values.
@ -524,7 +526,6 @@ cdef class Break:
    """
    @staticmethod
    cdef bint is_valid(const StateC* st, attr_t label) nogil:
-        cdef int i
        if st.buffer_length() < 2:
            return False
        elif st.B(1) != st.B(0) + 1:
@ -556,8 +557,8 @@ cdef class Break:
                cost -= 1
            if gold.heads[si] == b0:
                cost -= 1
-        if not is_sent_start(gold, state.B(1)) \
-        and not is_sent_start_unknown(gold, state.B(1)):
+        if not is_sent_start(gold, state.B(1)) and\
+                not is_sent_start_unknown(gold, state.B(1)):
            cost += 1
        return cost

@ -805,7 +806,6 @@ cdef class ArcEager(TransitionSystem):
            raise TypeError(Errors.E909.format(name="ArcEagerGold"))
        cdef ArcEagerGold gold_ = gold
        gold_state = gold_.c
-        n_gold = 0
        if self.c[i].is_valid(stcls.c, self.c[i].label):
            cost = self.c[i].get_cost(stcls.c, &gold_state, self.c[i].label)
        else:
@ -878,7 +878,7 @@ cdef class ArcEager(TransitionSystem):
            print("Gold")
            for token in example.y:
                print(token.i, token.text, token.dep_, token.head.text)
-            aligned_heads, aligned_labels = example.get_aligned_parse()
+            aligned_heads, _aligned_labels = example.get_aligned_parse()
            print("Aligned heads")
            for i, head in enumerate(aligned_heads):
                print(example.x[i], example.x[head] if head is not None else "__")
--- a/spacy/pipeline/_parser_internals/ner.pyx
+++ b/spacy/pipeline/_parser_internals/ner.pyx
@ -1,8 +1,4 @@
-import os
-import random
-
 from cymem.cymem cimport Pool
-from libc.stdint cimport int32_t
 from libcpp.memory cimport shared_ptr
 from libcpp.vector cimport vector

@ -14,7 +10,7 @@ from ...tokens.span import Span

 from ...attrs cimport IS_SPACE
 from ...lexeme cimport Lexeme
-from ...structs cimport SpanC, TokenC
+from ...structs cimport SpanC
 from ...tokens.span cimport Span
 from ...typedefs cimport attr_t, weight_t

@ -138,11 +134,10 @@ cdef class BiluoPushDown(TransitionSystem):
            OUT: Counter()
        }
        actions[OUT][''] = 1  # Represents a token predicted to be outside of any entity
-        actions[UNIT][''] = 1 # Represents a token prohibited to be in an entity
+        actions[UNIT][''] = 1  # Represents a token prohibited to be in an entity
        for entity_type in kwargs.get('entity_types', []):
            for action in (BEGIN, IN, LAST, UNIT):
                actions[action][entity_type] = 1
-        moves = ('M', 'B', 'I', 'L', 'U')
        for example in kwargs.get('examples', []):
            for token in example.y:
                ent_type = token.ent_type_
@ -324,7 +319,6 @@ cdef class BiluoPushDown(TransitionSystem):
            raise TypeError(Errors.E909.format(name="BiluoGold"))
        cdef BiluoGold gold_ = gold
        gold_state = gold_.c
-        n_gold = 0
        if self.c[i].is_valid(stcls.c, self.c[i].label):
            cost = self.c[i].get_cost(stcls.c, &gold_state, self.c[i].label)
        else:
@ -487,10 +481,8 @@ cdef class In:
    @staticmethod
    cdef weight_t cost(const StateC* s, const void* _gold, attr_t label) nogil:
        gold = <GoldNERStateC*>_gold
-        move = IN
        cdef int next_act = gold.ner[s.B(1)].move if s.B(1) >= 0 else OUT
        cdef int g_act = gold.ner[s.B(0)].move
-        cdef attr_t g_tag = gold.ner[s.B(0)].label
        cdef bint is_sunk = _entity_is_sunk(s, gold.ner)

        if g_act == MISSING:
@ -550,12 +542,10 @@ cdef class Last:
    @staticmethod
    cdef weight_t cost(const StateC* s, const void* _gold, attr_t label) nogil:
        gold = <GoldNERStateC*>_gold
-        move = LAST
        b0 = s.B(0)
        ent_start = s.E(0)

        cdef int g_act = gold.ner[b0].move
-        cdef attr_t g_tag = gold.ner[b0].label

        cdef int cost = 0

@ -655,7 +645,6 @@ cdef class Unit:
        return cost


-
 cdef class Out:
    @staticmethod
    cdef bint is_valid(const StateC* st, attr_t label) nogil:
@ -678,7 +667,6 @@ cdef class Out:
    cdef weight_t cost(const StateC* s, const void* _gold, attr_t label) nogil:
        gold = <GoldNERStateC*>_gold
        cdef int g_act = gold.ner[s.B(0)].move
-        cdef attr_t g_tag = gold.ner[s.B(0)].label
        cdef weight_t cost = 0
        if g_act == MISSING:
            pass
--- a/spacy/pipeline/_parser_internals/nonproj.pyx
+++ b/spacy/pipeline/_parser_internals/nonproj.pyx
@ -125,14 +125,17 @@ def decompose(label):
 def is_decorated(label):
    return DELIMITER in label

+
 def count_decorated_labels(gold_data):
    freqs = {}
    for example in gold_data:
        proj_heads, deco_deps = projectivize(example.get_aligned("HEAD"),
                                             example.get_aligned("DEP"))
        # set the label to ROOT for each root dependent
-        deco_deps = ['ROOT' if head == i else deco_deps[i]
-                       for i, head in enumerate(proj_heads)]
+        deco_deps = [
+            'ROOT' if head == i else deco_deps[i]
+            for i, head in enumerate(proj_heads)
+        ]
        # count label frequencies
        for label in deco_deps:
            if is_decorated(label):
@ -160,9 +163,9 @@ def projectivize(heads, labels):


 cdef vector[int] _heads_to_c(heads):
-    cdef vector[int] c_heads;
+    cdef vector[int] c_heads
    for head in heads:
-        if head == None:
+        if head is None:
            c_heads.push_back(-1)
        else:
            assert head < len(heads)
@ -199,6 +202,7 @@ def _decorate(heads, proj_heads, labels):
            deco_labels.append(labels[tokenid])
    return deco_labels

+
 def get_smallest_nonproj_arc_slow(heads):
    cdef vector[int] c_heads = _heads_to_c(heads)
    return _get_smallest_nonproj_arc(c_heads)
--- a/spacy/pipeline/_parser_internals/search.pxd
+++ b/spacy/pipeline/_parser_internals/search.pxd
@ -58,7 +58,6 @@ cdef class Beam:
                     void* extra_args) except -1
    cdef int check_done(self, finish_func_t finish_func, void* extra_args) except -1

-
    cdef inline void set_cell(self, int i, int j, weight_t score, int is_valid, weight_t cost) nogil:
        self.scores[i][j] = score
        self.is_valid[i][j] = is_valid
--- a/spacy/pipeline/_parser_internals/search.pyx
+++ b/spacy/pipeline/_parser_internals/search.pyx
@ -1,11 +1,8 @@
 # cython: profile=True, experimental_cpp_class_def=True, cdivision=True, infer_types=True
 cimport cython
-from libc.math cimport exp, log
-from libc.string cimport memcpy, memset
-
-import math
-
 from cymem.cymem cimport Pool
+from libc.math cimport exp
+from libc.string cimport memcpy, memset
 from preshed.maps cimport PreshMap


@ -70,7 +67,7 @@ cdef class Beam:
            self.costs[i][j] = costs[j]

    cdef int set_table(self, weight_t** scores, int** is_valid, weight_t** costs) except -1:
-        cdef int i, j
+        cdef int i
        for i in range(self.width):
            memcpy(self.scores[i], scores[i], sizeof(weight_t) * self.nr_class)
            memcpy(self.is_valid[i], is_valid[i], sizeof(bint) * self.nr_class)
@ -176,7 +173,6 @@ cdef class Beam:
        beam-width, and n is the number of classes.
        """
        cdef Entry entry
-        cdef weight_t score
        cdef _State* s
        cdef int i, j, move_id
        assert self.size >= 1
@ -269,7 +265,7 @@ cdef class MaxViolation:
                # This can happen from non-monotonic actions
                # If we find a better gold analysis this way, be sure to keep it.
                elif pred._states[i].loss <= 0 \
-                and tuple(pred.histories[i]) not in seen_golds:
+                        and tuple(pred.histories[i]) not in seen_golds:
                    g_scores.append(pred._states[i].score)
                    g_hist.append(list(pred.histories[i]))
            for i in range(gold.size):
--- a/spacy/pipeline/_parser_internals/stateclass.pyx
+++ b/spacy/pipeline/_parser_internals/stateclass.pyx
@ -1,6 +1,4 @@
 # cython: infer_types=True
-import numpy
-
 from libcpp.vector cimport vector

 from ...tokens.doc cimport Doc
@ -42,11 +40,11 @@ cdef class StateClass:
        cdef vector[ArcC] arcs
        self.c.get_arcs(&arcs)
        return list(arcs)
-        #py_arcs = []
-        #for arc in arcs:
-        #    if arc.head != -1 and arc.child != -1:
-        #        py_arcs.append((arc.head, arc.child, arc.label))
-        #return arcs
+        # py_arcs = []
+        # for arc in arcs:
+        #     if arc.head != -1 and arc.child != -1:
+        #         py_arcs.append((arc.head, arc.child, arc.label))
+        # return arcs

    def add_arc(self, int head, int child, int label):
        self.c.add_arc(head, child, label)
@ -138,7 +136,7 @@ cdef class StateClass:

    def at_break(self):
        return False
-        #return self.c.at_break()
+        # return self.c.at_break()

    def has_head(self, int i):
        return self.c.has_head(i)
--- a/spacy/pipeline/_parser_internals/transition_system.pxd
+++ b/spacy/pipeline/_parser_internals/transition_system.pxd
@ -20,11 +20,15 @@ cdef struct Transition:
    int (*do)(StateC* state, attr_t label) nogil


-ctypedef weight_t (*get_cost_func_t)(const StateC* state, const void* gold,
-        attr_tlabel) nogil
-ctypedef weight_t (*move_cost_func_t)(const StateC* state, const void* gold) nogil
-ctypedef weight_t (*label_cost_func_t)(const StateC* state, const void*
-        gold, attr_t label) nogil
+ctypedef weight_t (*get_cost_func_t)(
+    const StateC* state, const void* gold, attr_tlabel
+) nogil
+ctypedef weight_t (*move_cost_func_t)(
+        const StateC* state, const void* gold
+) nogil
+ctypedef weight_t (*label_cost_func_t)(
+    const StateC* state, const void* gold, attr_t label
+) nogil

 ctypedef int (*do_func_t)(StateC* state, attr_t label) nogil

@ -56,7 +60,7 @@ cdef class TransitionSystem:


 cdef void c_apply_actions(TransitionSystem moves, StateC** states, const int* actions,
-    int batch_size) nogil
+                          int batch_size) nogil

 cdef void c_transition_batch(TransitionSystem moves, StateC** states, const float* scores,
-        int nr_class, int batch_size) nogil
+                             int nr_class, int batch_size) nogil
--- a/spacy/pipeline/_parser_internals/transition_system.pyx
+++ b/spacy/pipeline/_parser_internals/transition_system.pyx
@ -10,9 +10,7 @@ from collections import Counter
 import srsly

 from ...structs cimport TokenC
-from ...tokens.doc cimport Doc
 from ...typedefs cimport attr_t, weight_t
-from . cimport _beam_utils
 from ._parser_utils cimport arg_max_if_valid
 from .stateclass cimport StateClass

@ -270,7 +268,6 @@ cdef class TransitionSystem:
        return self

    def to_bytes(self, exclude=tuple()):
-        transitions = []
        serializers = {
            'moves': lambda: srsly.json_dumps(self.labels),
            'strings': lambda: self.strings.to_bytes(),
@ -294,19 +291,19 @@ cdef class TransitionSystem:


 cdef void c_apply_actions(TransitionSystem moves, StateC** states, const int* actions,
-    int batch_size) nogil:
-        cdef int i
-        cdef Transition action
-        cdef StateC* state
-        for i in range(batch_size):
-            state = states[i]
-            action = moves.c[actions[i]]
-            action.do(state, action.label)
-            state.history.push_back(action.clas)
+                          int batch_size) nogil:
+    cdef int i
+    cdef Transition action
+    cdef StateC* state
+    for i in range(batch_size):
+        state = states[i]
+        action = moves.c[actions[i]]
+        action.do(state, action.label)
+        state.history.push_back(action.clas)


 cdef void c_transition_batch(TransitionSystem moves, StateC** states, const float* scores,
-    int nr_class, int batch_size) nogil:
+                             int nr_class, int batch_size) nogil:
    is_valid = <int*>calloc(moves.n_moves, sizeof(int))
    cdef int i, guess
    cdef Transition action
@ -322,4 +319,3 @@ cdef void c_transition_batch(TransitionSystem moves, StateC** states, const floa
            action.do(states[i], action.label)
            states[i].history.push_back(guess)
    free(is_valid)
-
--- a/spacy/pipeline/dep_parser.py
+++ b/spacy/pipeline/dep_parser.py
@ -1,6 +1,6 @@
 # cython: infer_types=True, profile=True, binding=True
 from collections import defaultdict
-from typing import Callable, Iterable, Optional
+from typing import Callable, Optional

 from thinc.api import Config, Model

--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -1,11 +1,9 @@
 # cython: infer_types=True, profile=True, binding=True
 from itertools import islice
-from typing import Callable, Dict, Iterable, List, Optional, Union
+from typing import Callable, Dict, Iterable, Optional, Union

-import srsly
 from thinc.api import Config, Model
 from thinc.legacy import LegacySequenceCategoricalCrossentropy
-from thinc.types import Floats2d, Ints1d

 from ..morphology cimport Morphology
 from ..tokens.doc cimport Doc
@ -16,10 +14,8 @@ from ..errors import Errors
 from ..language import Language
 from ..parts_of_speech import IDS as POS_IDS
 from ..scorer import Scorer
-from ..symbols import POS
 from ..training import validate_examples, validate_get_examples
 from ..util import registry
-from .pipe import deserialize_config
 from .tagger import ActivationsT, Tagger

 # See #9050
@ -86,8 +82,11 @@ def morphologizer_score(examples, **kwargs):
    results = {}
    results.update(Scorer.score_token_attr(examples, "pos", **kwargs))
    results.update(Scorer.score_token_attr(examples, "morph", getter=morph_key_getter, **kwargs))
-    results.update(Scorer.score_token_attr_per_feat(examples,
-        "morph", getter=morph_key_getter, **kwargs))
+    results.update(
+        Scorer.score_token_attr_per_feat(
+            examples, "morph", getter=morph_key_getter, **kwargs
+        )
+    )
    return results


@ -249,7 +248,6 @@ class Morphologizer(Tagger):
        if isinstance(docs, Doc):
            docs = [docs]
        cdef Doc doc
-        cdef Vocab vocab = self.vocab
        cdef bint overwrite = self.cfg["overwrite"]
        cdef bint extend = self.cfg["extend"]

--- a/spacy/pipeline/ner.py
+++ b/spacy/pipeline/ner.py
@ -1,12 +1,12 @@
 # cython: infer_types=True, profile=True, binding=True
 from collections import defaultdict
-from typing import Callable, Iterable, Optional
+from typing import Callable, Optional

 from thinc.api import Config, Model

 from ..language import Language
-from ..scorer import PRFScore, get_ner_prf
-from ..training import remove_bilu_prefix, validate_examples
+from ..scorer import get_ner_prf
+from ..training import remove_bilu_prefix
 from ..util import registry
 from ._parser_internals.ner import BiluoPushDown
 from ._parser_internals.transition_system import TransitionSystem
--- a/spacy/pipeline/pipe.pyx
+++ b/spacy/pipeline/pipe.pyx
@ -1,12 +1,11 @@
 # cython: infer_types=True, profile=True, binding=True
-import warnings
-from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple, Union
+from typing import Callable, Dict, Iterable, Iterator, Tuple, Union

 import srsly

 from ..tokens.doc cimport Doc

-from ..errors import Errors, Warnings
+from ..errors import Errors
 from ..language import Language
 from ..training import Example
 from ..util import raise_error
@ -33,7 +32,7 @@ cdef class Pipe:
        """
        raise NotImplementedError(Errors.E931.format(parent="Pipe", method="__call__", name=self.name))

-    def pipe(self, stream: Iterable[Doc], *, batch_size: int=128) -> Iterator[Doc]:
+    def pipe(self, stream: Iterable[Doc], *, batch_size: int = 128) -> Iterator[Doc]:
        """Apply the pipe to a stream of documents. This usually happens under
        the hood when the nlp object is called on a text and all components are
        applied to the Doc.
@ -52,7 +51,7 @@ cdef class Pipe:
            except Exception as e:
                error_handler(self.name, self, [doc], e)

-    def initialize(self, get_examples: Callable[[], Iterable[Example]], *, nlp: Language=None):
+    def initialize(self, get_examples: Callable[[], Iterable[Example]], *, nlp: Language = None):
        """Initialize the pipe. For non-trainable components, this method
        is optional. For trainable components, which should inherit
        from the subclass TrainablePipe, the provided data examples
--- a/spacy/pipeline/sentencizer.pyx
+++ b/spacy/pipeline/sentencizer.pyx
@ -7,7 +7,6 @@ from ..tokens.doc cimport Doc

 from .. import util
 from ..language import Language
-from ..scorer import Scorer
 from .pipe import Pipe
 from .senter import senter_score

@ -34,17 +33,19 @@ class Sentencizer(Pipe):
    DOCS: https://spacy.io/api/sentencizer
    """

-    default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
-            '।', '॥', '၊', '။', '።', '፧', '፨', '᙮', '᜵', '᜶', '᠃', '᠉', '᥄',
-            '᥅', '᪨', '᪩', '᪪', '᪫', '᭚', '᭛', '᭞', '᭟', '᰻', '᰼', '᱾', '᱿',
-            '‼', '‽', '⁇', '⁈', '⁉', '⸮', '⸼', '꓿', '꘎', '꘏', '꛳', '꛷', '꡶',
-            '꡷', '꣎', '꣏', '꤯', '꧈', '꧉', '꩝', '꩞', '꩟', '꫰', '꫱', '꯫', '﹒',
-            '﹖', '﹗', '！', '．', '？', '𐩖', '𐩗', '𑁇', '𑁈', '𑂾', '𑂿', '𑃀',
-            '𑃁', '𑅁', '𑅂', '𑅃', '𑇅', '𑇆', '𑇍', '𑇞', '𑇟', '𑈸', '𑈹', '𑈻', '𑈼',
-            '𑊩', '𑑋', '𑑌', '𑗂', '𑗃', '𑗉', '𑗊', '𑗋', '𑗌', '𑗍', '𑗎', '𑗏', '𑗐',
-            '𑗑', '𑗒', '𑗓', '𑗔', '𑗕', '𑗖', '𑗗', '𑙁', '𑙂', '𑜼', '𑜽', '𑜾', '𑩂',
-            '𑩃', '𑪛', '𑪜', '𑱁', '𑱂', '𖩮', '𖩯', '𖫵', '𖬷', '𖬸', '𖭄', '𛲟', '𝪈',
-            '｡', '。']
+    default_punct_chars = [
+        '!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
+        '।', '॥', '၊', '။', '።', '፧', '፨', '᙮', '᜵', '᜶', '᠃', '᠉', '᥄',
+        '᥅', '᪨', '᪩', '᪪', '᪫', '᭚', '᭛', '᭞', '᭟', '᰻', '᰼', '᱾', '᱿',
+        '‼', '‽', '⁇', '⁈', '⁉', '⸮', '⸼', '꓿', '꘎', '꘏', '꛳', '꛷', '꡶',
+        '꡷', '꣎', '꣏', '꤯', '꧈', '꧉', '꩝', '꩞', '꩟', '꫰', '꫱', '꯫', '﹒',
+        '﹖', '﹗', '！', '．', '？', '𐩖', '𐩗', '𑁇', '𑁈', '𑂾', '𑂿', '𑃀',
+        '𑃁', '𑅁', '𑅂', '𑅃', '𑇅', '𑇆', '𑇍', '𑇞', '𑇟', '𑈸', '𑈹', '𑈻', '𑈼',
+        '𑊩', '𑑋', '𑑌', '𑗂', '𑗃', '𑗉', '𑗊', '𑗋', '𑗌', '𑗍', '𑗎', '𑗏', '𑗐',
+        '𑗑', '𑗒', '𑗓', '𑗔', '𑗕', '𑗖', '𑗗', '𑙁', '𑙂', '𑜼', '𑜽', '𑜾', '𑩂',
+        '𑩃', '𑪛', '𑪜', '𑱁', '𑱂', '𖩮', '𖩯', '𖫵', '𖬷', '𖬸', '𖭄', '𛲟', '𝪈',
+        '｡', '。'
+    ]

    def __init__(
        self,
@ -127,7 +128,6 @@ class Sentencizer(Pipe):
        if isinstance(docs, Doc):
            docs = [docs]
        cdef Doc doc
-        cdef int idx = 0
        for i, doc in enumerate(docs):
            doc_tag_ids = batch_tag_ids[i]
            for j, tag_id in enumerate(doc_tag_ids):
@ -168,7 +168,6 @@ class Sentencizer(Pipe):
        path = path.with_suffix(".json")
        srsly.write_json(path, {"punct_chars": list(self.punct_chars), "overwrite": self.overwrite})

-
    def from_disk(self, path, *, exclude=tuple()):
        """Load the sentencizer from disk.

--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@ -1,11 +1,9 @@
 # cython: infer_types=True, profile=True, binding=True
 from itertools import islice
-from typing import Callable, Dict, Iterable, List, Optional, Union
+from typing import Callable, Iterable, Optional

-import srsly
 from thinc.api import Config, Model
 from thinc.legacy import LegacySequenceCategoricalCrossentropy
-from thinc.types import Floats2d, Ints1d

 from ..tokens.doc cimport Doc

--- a/spacy/pipeline/span_finder.py
+++ b/spacy/pipeline/span_finder.py
@ -48,14 +48,14 @@ DEFAULT_SPAN_FINDER_MODEL = Config().from_str(span_finder_default_config)["model
        "threshold": 0.5,
        "model": DEFAULT_SPAN_FINDER_MODEL,
        "spans_key": DEFAULT_SPANS_KEY,
-        "max_length": None,
+        "max_length": 25,
        "min_length": None,
        "scorer": {"@scorers": "spacy.span_finder_scorer.v1"},
    },
    default_score_weights={
-        f"span_finder_{DEFAULT_SPANS_KEY}_f": 1.0,
-        f"span_finder_{DEFAULT_SPANS_KEY}_p": 0.0,
-        f"span_finder_{DEFAULT_SPANS_KEY}_r": 0.0,
+        f"spans_{DEFAULT_SPANS_KEY}_f": 1.0,
+        f"spans_{DEFAULT_SPANS_KEY}_p": 0.0,
+        f"spans_{DEFAULT_SPANS_KEY}_r": 0.0,
    },
 )
 def make_span_finder(
@ -104,7 +104,7 @@ def make_span_finder_scorer():

 def span_finder_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
    kwargs = dict(kwargs)
-    attr_prefix = "span_finder_"
+    attr_prefix = "spans_"
    key = kwargs["spans_key"]
    kwargs.setdefault("attr", f"{attr_prefix}{key}")
    kwargs.setdefault(
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@ -1,27 +1,20 @@
 # cython: infer_types=True, profile=True, binding=True
-import warnings
 from itertools import islice
 from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union

 import numpy
-import srsly
 from thinc.api import Config, Model, set_dropout_rate
 from thinc.legacy import LegacySequenceCategoricalCrossentropy
 from thinc.types import Floats2d, Ints1d

-from ..morphology cimport Morphology
 from ..tokens.doc cimport Doc
-from ..vocab cimport Vocab

 from .. import util
-from ..attrs import ID, POS
-from ..errors import Errors, Warnings
+from ..errors import Errors
 from ..language import Language
-from ..parts_of_speech import X
 from ..scorer import Scorer
 from ..training import validate_examples, validate_get_examples
 from ..util import registry
-from .pipe import deserialize_config
 from .trainable_pipe import TrainablePipe

 ActivationsT = Dict[str, Union[List[Floats2d], List[Ints1d]]]
@ -188,7 +181,6 @@ class Tagger(TrainablePipe):
        if isinstance(docs, Doc):
            docs = [docs]
        cdef Doc doc
-        cdef Vocab vocab = self.vocab
        cdef bint overwrite = self.cfg["overwrite"]
        labels = self.labels
        for i, doc in enumerate(docs):
--- a/spacy/pipeline/trainable_pipe.pyx
+++ b/spacy/pipeline/trainable_pipe.pyx
@ -1,5 +1,4 @@
 # cython: infer_types=True, profile=True, binding=True
-import warnings
 from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple

 import srsly
@ -8,7 +7,7 @@ from thinc.api import Model, Optimizer, set_dropout_rate
 from ..tokens.doc cimport Doc

 from .. import util
-from ..errors import Errors, Warnings
+from ..errors import Errors
 from ..language import Language
 from ..training import Example, validate_distillation_examples, validate_examples
 from ..vocab import Vocab
@ -56,14 +55,14 @@ cdef class TrainablePipe(Pipe):
        except Exception as e:
            error_handler(self.name, self, [doc], e)

-
    def distill(self,
-               teacher_pipe: Optional["TrainablePipe"],
-               examples: Iterable["Example"],
-               *,
-               drop: float=0.0,
-               sgd: Optional[Optimizer]=None,
-               losses: Optional[Dict[str, float]]=None) -> Dict[str, float]:
+                teacher_pipe: Optional["TrainablePipe"],
+                examples: Iterable["Example"],
+                *,
+                drop: float = 0.0,
+                sgd: Optional[Optimizer] = None,
+                losses: Optional[Dict[str, float]] = None
+                ) -> Dict[str, float]:
        """Train a pipe (the student) on the predictions of another pipe
        (the teacher). The student is typically trained on the probability
        distribution of the teacher, but details may differ per pipe.
@ -103,7 +102,7 @@ cdef class TrainablePipe(Pipe):
        losses[self.name] += loss
        return losses

-    def pipe(self, stream: Iterable[Doc], *, batch_size: int=128) -> Iterator[Doc]:
+    def pipe(self, stream: Iterable[Doc], *, batch_size: int = 128) -> Iterator[Doc]:
        """Apply the pipe to a stream of documents. This usually happens under
        the hood when the nlp object is called on a text and all components are
        applied to the Doc.
@ -150,9 +149,9 @@ cdef class TrainablePipe(Pipe):
    def update(self,
               examples: Iterable["Example"],
               *,
-               drop: float=0.0,
-               sgd: Optimizer=None,
-               losses: Optional[Dict[str, float]]=None) -> Dict[str, float]:
+               drop: float = 0.0,
+               sgd: Optimizer = None,
+               losses: Optional[Dict[str, float]] = None) -> Dict[str, float]:
        """Learn from a batch of documents and gold-standard information,
        updating the pipe's model. Delegates to predict and get_loss.

@ -186,8 +185,8 @@ cdef class TrainablePipe(Pipe):
    def rehearse(self,
                 examples: Iterable[Example],
                 *,
-                 sgd: Optimizer=None,
-                 losses: Dict[str, float]=None,
+                 sgd: Optimizer = None,
+                 losses: Dict[str, float] = None,
                 **config) -> Dict[str, float]:
        """Perform a "rehearsal" update from a batch of data. Rehearsal updates
        teach the current model to make predictions similar to an initial model,
@ -238,7 +237,7 @@ cdef class TrainablePipe(Pipe):
        """
        return util.create_default_optimizer()

-    def initialize(self, get_examples: Callable[[], Iterable[Example]], *, nlp: Language=None):
+    def initialize(self, get_examples: Callable[[], Iterable[Example]], *, nlp: Language = None):
        """Initialize the pipe for training, using data examples if available.
        This method needs to be implemented by each TrainablePipe component,
        ensuring the internal model (if available) is initialized properly
--- a/spacy/pipeline/transition_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@ -6,15 +6,9 @@ from typing import Dict, Iterable, List, Optional, Tuple
 cimport numpy as np
 from cymem.cymem cimport Pool

-from itertools import islice
-
-from libc.stdlib cimport calloc, free
-from libc.string cimport memcpy, memset
-from libcpp.vector cimport vector
-
 import contextlib
 import random
-import warnings
+from itertools import islice

 import numpy
 import numpy.random
@ -23,44 +17,36 @@ from thinc.api import (
    CupyOps,
    NumpyOps,
    Optimizer,
-    chain,
    get_array_module,
    get_ops,
    set_dropout_rate,
-    softmax_activation,
-    use_ops,
 )
-from thinc.legacy import LegacySequenceCategoricalCrossentropy
 from thinc.types import Floats2d, Ints1d

 from ..ml.tb_framework import TransitionModelInputs

 from ..tokens.doc cimport Doc
-from ._parser_internals cimport _beam_utils
-from ._parser_internals.search cimport Beam
-from ._parser_internals.stateclass cimport StateC, StateClass
-from .trainable_pipe cimport TrainablePipe
-
-from ._parser_internals import _beam_utils
-
 from ..typedefs cimport weight_t
 from ..vocab cimport Vocab
+from ._parser_internals cimport _beam_utils
+from ._parser_internals.stateclass cimport StateC, StateClass
 from ._parser_internals.transition_system cimport Transition, TransitionSystem
+from .trainable_pipe cimport TrainablePipe

 from .. import util
-from ..errors import Errors, Warnings
+from ..errors import Errors
 from ..training import (
    validate_distillation_examples,
    validate_examples,
    validate_get_examples,
 )
+from ._parser_internals import _beam_utils


 # TODO: Remove when we switch to Cython 3.
 cdef extern from "<algorithm>" namespace "std" nogil:
    bint equal[InputIt1, InputIt2](InputIt1 first1, InputIt1 last1, InputIt2 first2) except +

-
 NUMPY_OPS = NumpyOps()


@ -236,12 +222,13 @@ class Parser(TrainablePipe):
        raise NotImplementedError

    def distill(self,
-               teacher_pipe: Optional[TrainablePipe],
-               examples: Iterable["Example"],
-               *,
-               drop: float=0.0,
-               sgd: Optional[Optimizer]=None,
-               losses: Optional[Dict[str, float]]=None):
+                teacher_pipe: Optional[TrainablePipe],
+                examples: Iterable["Example"],
+                *,
+                drop: float = 0.0,
+                sgd: Optional[Optimizer] = None,
+                losses: Optional[Dict[str, float]] = None
+                ):
        """Train a pipe (the student) on the predictions of another pipe
        (the teacher). The student is trained on the transition probabilities
        of the teacher.
@ -291,11 +278,13 @@ class Parser(TrainablePipe):
        # teacher's distributions.

        student_inputs = TransitionModelInputs(docs=student_docs,
-            states=[state.copy() for state in states], moves=self.moves, max_moves=max_moves)
+                                               states=[state.copy() for state in states],
+                                               moves=self.moves,
+                                               max_moves=max_moves)
        (student_states, student_scores), backprop_scores = self.model.begin_update(student_inputs)
        actions = _states_diff_to_actions(states, student_states)
        teacher_inputs = TransitionModelInputs(docs=[eg.reference for eg in examples],
-            states=states, moves=teacher_pipe.moves, actions=actions)
+                                               states=states, moves=teacher_pipe.moves, actions=actions)
        (_, teacher_scores) = teacher_pipe.model.predict(teacher_inputs)

        loss, d_scores = self.get_teacher_student_loss(teacher_scores, student_scores)
@ -308,10 +297,9 @@ class Parser(TrainablePipe):

        return losses

-
    def get_teacher_student_loss(
-        self, teacher_scores: List[Floats2d], student_scores: List[Floats2d],
-        normalize: bool=False,
+            self, teacher_scores: List[Floats2d], student_scores: List[Floats2d],
+            normalize: bool = False,
    ) -> Tuple[float, List[Floats2d]]:
        """Calculate the loss and its gradient for a batch of student
        scores, relative to teacher scores.
@ -334,9 +322,9 @@ class Parser(TrainablePipe):
        # ourselves.

        teacher_scores = self.model.ops.softmax(self.model.ops.xp.vstack(teacher_scores),
-            axis=-1, inplace=True)
+                                                axis=-1, inplace=True)
        student_scores = self.model.ops.softmax(self.model.ops.xp.vstack(student_scores),
-            axis=-1, inplace=True)
+                                                axis=-1, inplace=True)

        assert teacher_scores.shape == student_scores.shape

@ -384,7 +372,6 @@ class Parser(TrainablePipe):
            except Exception as e:
                error_handler(self.name, self, batch_in_order, e)

-
    def predict(self, docs):
        if isinstance(docs, Doc):
            docs = [docs]
@ -414,7 +401,6 @@ class Parser(TrainablePipe):

    def set_annotations(self, docs, states_or_beams):
        cdef StateClass state
-        cdef Beam beam
        cdef Doc doc
        states = _beam_utils.collect_states(states_or_beams, docs)
        for i, (state, doc) in enumerate(zip(states, docs)):
@ -423,7 +409,6 @@ class Parser(TrainablePipe):
                hook(doc)

    def update(self, examples, *, drop=0., sgd=None, losses=None):
-        cdef StateClass state
        if losses is None:
            losses = {}
        losses.setdefault(self.name, 0.)
@ -453,13 +438,15 @@ class Parser(TrainablePipe):
        else:
            init_states, gold_states, _ = self.moves.init_gold_batch(examples)

-        inputs = TransitionModelInputs(docs=docs, moves=self.moves,
-            max_moves=max_moves, states=[state.copy() for state in init_states])
+        inputs = TransitionModelInputs(docs=docs,
+                                       moves=self.moves,
+                                       max_moves=max_moves,
+                                       states=[state.copy() for state in init_states])
        (pred_states, scores), backprop_scores = self.model.begin_update(inputs)
        if sum(s.shape[0] for s in scores) == 0:
            return losses
        d_scores = self.get_loss((gold_states, init_states, pred_states, scores),
-            examples, max_moves)
+                                 examples, max_moves)
        backprop_scores((pred_states, d_scores))
        if sgd not in (None, False):
            self.finish_update(sgd)
@ -500,9 +487,7 @@ class Parser(TrainablePipe):
        cdef TransitionSystem moves = self.moves
        cdef StateClass state
        cdef int clas
-        cdef int nF = self.model.get_dim("nF")
        cdef int nO = moves.n_moves
-        cdef int nS = sum([len(history) for history in histories])
        cdef Pool mem = Pool()
        cdef np.ndarray costs_i
        is_valid = <int*>mem.alloc(nO, sizeof(int))
@ -569,8 +554,8 @@ class Parser(TrainablePipe):

        return losses

-    def update_beam(self, examples, *, beam_width,
-            drop=0., sgd=None, losses=None, beam_density=0.0):
+    def update_beam(self, examples, *, beam_width, drop=0.,
+                    sgd=None, losses=None, beam_density=0.0):
        raise NotImplementedError

    def set_output(self, nO):
@ -695,9 +680,10 @@ class Parser(TrainablePipe):
            return states

        # Parse the states that are too long with the teacher's parsing model.
-        teacher_inputs = TransitionModelInputs(docs=docs, moves=moves,
-            states=[state.copy() for state in to_cut])
-        (teacher_states, _ ) = teacher_pipe.model.predict(teacher_inputs)
+        teacher_inputs = TransitionModelInputs(docs=docs,
+                                               moves=moves,
+                                               states=[state.copy() for state in to_cut])
+        (teacher_states, _) = teacher_pipe.model.predict(teacher_inputs)

        # Step through the teacher's actions and store every state after
        # each multiple of max_length.
@ -795,6 +781,7 @@ def _states_to_actions(states: List[StateClass]) -> List[Ints1d]:

    return actions

+
 def _states_diff_to_actions(
    before_states: List[StateClass],
    after_states: List[StateClass]
@ -815,8 +802,9 @@ def _states_diff_to_actions(
        c_state_before = before_state.c
        c_state_after = after_state.c

-        assert equal(c_state_before.history.begin(), c_state_before.history.end(),
-            c_state_after.history.begin())
+        assert equal(c_state_before.history.begin(),
+                     c_state_before.history.end(),
+                     c_state_after.history.begin())

    actions = []
    while True:
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@ -1,10 +1,8 @@
 # cython: infer_types=True
-from typing import Any, Callable, Iterable, Iterator, List, Optional, Tuple, Union
+from typing import Iterable, Iterator, List, Optional, Tuple, Union

-cimport cython
 from libc.stdint cimport uint32_t
 from libc.string cimport memcpy
-from libcpp.set cimport set
 from murmurhash.mrmr cimport hash64

 import srsly
@ -244,7 +242,6 @@ cdef class StringStore:
        cdef int n_length_bytes
        cdef int i
        cdef Utf8Str* string = <Utf8Str*>self.mem.alloc(1, sizeof(Utf8Str))
-        cdef uint32_t ulength = length
        if length < sizeof(string.s):
            string.s[0] = <unsigned char>length
            memcpy(&string.s[1], chars, length)
@ -302,7 +299,7 @@ cpdef hash_t get_string_id(object string_or_hash) except -1:

    try:
        return hash_string(string_or_hash)
-    except:
+    except:   # no-cython-lint
        if _try_coerce_to_hash(string_or_hash, &str_hash):
            # Coerce the integral key to the expected primitive hash type.
            # This ensures that custom/overloaded "primitive" data types
@ -319,6 +316,5 @@ cdef inline bint _try_coerce_to_hash(object key, hash_t* out_hash):
    try:
        out_hash[0] = key
        return True
-    except:
+    except:  # no-cython-lint
        return False
-
--- a/spacy/structs.pxd
+++ b/spacy/structs.pxd
@ -52,7 +52,7 @@ cdef struct TokenC:

    int sent_start
    int ent_iob
-    attr_t ent_type # TODO: Is there a better way to do this? Multiple sources of truth..
+    attr_t ent_type  # TODO: Is there a better way to do this? Multiple sources of truth..
    attr_t ent_kb_id
    hash_t ent_id

--- a/spacy/symbols.pxd
+++ b/spacy/symbols.pxd
@ -93,7 +93,7 @@ cdef enum symbol_t:
    ADV
    AUX
    CONJ
-    CCONJ # U20
+    CCONJ  # U20
    DET
    INTJ
    NOUN
@ -419,7 +419,7 @@ cdef enum symbol_t:
    ccomp
    complm
    conj
-    cop # U20
+    cop  # U20
    csubj
    csubjpass
    dep
@ -442,8 +442,8 @@ cdef enum symbol_t:
    num
    number
    oprd
-    obj # U20
-    obl # U20
+    obj  # U20
+    obl  # U20
    parataxis
    partmod
    pcomp
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@ -96,7 +96,7 @@ IDS = {
    "ADV": ADV,
    "AUX": AUX,
    "CONJ": CONJ,
-    "CCONJ": CCONJ, # U20
+    "CCONJ": CCONJ,  # U20
    "DET": DET,
    "INTJ": INTJ,
    "NOUN": NOUN,
@ -421,7 +421,7 @@ IDS = {
    "ccomp": ccomp,
    "complm": complm,
    "conj": conj,
-    "cop": cop, # U20
+    "cop": cop,  # U20
    "csubj": csubj,
    "csubjpass": csubjpass,
    "dep": dep,
@ -444,8 +444,8 @@ IDS = {
    "num": num,
    "number": number,
    "oprd": oprd,
-    "obj": obj, # U20
-    "obl": obl, # U20
+    "obj": obj,  # U20
+    "obl": obl,  # U20
    "parataxis": parataxis,
    "partmod": partmod,
    "pcomp": pcomp,
--- a/spacy/tests/matcher/test_pattern_validation.py
+++ b/spacy/tests/matcher/test_pattern_validation.py
@ -52,7 +52,8 @@ TEST_PATTERNS = [


@pytest.mark.parametrize(
-    "pattern", [[{"XX": "y"}, {"LENGTH": "2"}, {"TEXT": {"IN": 5}}]]
+    "pattern",
+    [[{"XX": "y"}], [{"LENGTH": "2"}], [{"TEXT": {"IN": 5}}], [{"text": {"in": 6}}]],
 )
 def test_matcher_pattern_validation(en_vocab, pattern):
    matcher = Matcher(en_vocab, validate=True)
--- a/spacy/tests/package/test_requirements.py
+++ b/spacy/tests/package/test_requirements.py
@ -11,6 +11,7 @@ def test_build_dependencies():
        "flake8",
        "hypothesis",
        "pre-commit",
+        "cython-lint",
        "black",
        "isort",
        "mypy",
--- a/spacy/tests/parser/_search.pyx
+++ b/spacy/tests/parser/_search.pyx
@ -2,7 +2,7 @@
 from cymem.cymem cimport Pool

 from spacy.pipeline._parser_internals.search cimport Beam, MaxViolation
-from spacy.typedefs cimport class_t, weight_t
+from spacy.typedefs cimport class_t

 import pytest

@ -42,32 +42,35 @@ cdef int destroy(Pool mem, void* state, void* extra_args) except -1:
    state = <TestState*>state
    mem.free(state)

+
@cytest
@pytest.mark.parametrize("nr_class,beam_width",
-    [
-        (2, 3),
-        (3, 6),
-        (4, 20),
-    ]
-)
+                         [
+                             (2, 3),
+                             (3, 6),
+                             (4, 20),
+                         ]
+                         )
 def test_init(nr_class, beam_width):
    b = Beam(nr_class, beam_width)
    assert b.size == 1
    assert b.width == beam_width
    assert b.nr_class == nr_class

+
@cytest
 def test_init_violn():
    MaxViolation()

+
@cytest
@pytest.mark.parametrize("nr_class,beam_width,length",
-    [
-        (2, 3, 3),
-        (3, 6, 15),
-        (4, 20, 32),
-    ]
-)
+                         [
+                             (2, 3, 3),
+                             (3, 6, 15),
+                             (4, 20, 32),
+                         ]
+                         )
 def test_initialize(nr_class, beam_width, length):
    b = Beam(nr_class, beam_width)
    b.initialize(initialize, destroy, length, NULL)
@ -79,11 +82,11 @@ def test_initialize(nr_class, beam_width, length):

@cytest
@pytest.mark.parametrize("nr_class,beam_width,length,extra",
-    [
-        (2, 3, 4, None),
-        (3, 6, 15, u"test beam 1"),
-    ]
-)
+                         [
+                             (2, 3, 4, None),
+                             (3, 6, 15, u"test beam 1"),
+                         ]
+                         )
 def test_initialize_extra(nr_class, beam_width, length, extra):
    b = Beam(nr_class, beam_width)
    if extra is None:
@ -97,11 +100,11 @@ def test_initialize_extra(nr_class, beam_width, length, extra):

@cytest
@pytest.mark.parametrize("nr_class,beam_width,length",
-    [
-        (3, 6, 15),
-        (4, 20, 32),
-    ]
-)
+                         [
+                             (3, 6, 15),
+                             (4, 20, 32),
+                         ]
+                         )
 def test_transition(nr_class, beam_width, length):
    b = Beam(nr_class, beam_width)
    b.initialize(initialize, destroy, length, NULL)
--- a/spacy/tests/pipeline/test_span_finder.py
+++ b/spacy/tests/pipeline/test_span_finder.py
@ -230,10 +230,10 @@ def test_overfitting_IO():

    # Test scoring
    scores = nlp.evaluate(train_examples)
-    assert f"span_finder_{SPANS_KEY}_f" in scores
+    assert f"spans_{SPANS_KEY}_f" in scores
    # It's not perfect 1.0 F1 because it's designed to overgenerate for now.
-    assert scores[f"span_finder_{SPANS_KEY}_p"] == 0.75
-    assert scores[f"span_finder_{SPANS_KEY}_r"] == 1.0
+    assert scores[f"spans_{SPANS_KEY}_p"] == 0.75
+    assert scores[f"spans_{SPANS_KEY}_r"] == 1.0

    # also test that the spancat works for just a single entity in a sentence
    doc = nlp("London")
--- a/spacy/tests/pipeline/test_tok2vec.py
+++ b/spacy/tests/pipeline/test_tok2vec.py
@ -192,8 +192,7 @@ def test_tok2vec_listener(with_vectors):
        for tag in t[1]["tags"]:
            tagger.add_label(tag)

-    # Check that the Tok2Vec component finds it listeners
-    assert tok2vec.listeners == []
+    # Check that the Tok2Vec component finds its listeners
    optimizer = nlp.initialize(lambda: train_examples)
    assert tok2vec.listeners == [tagger_tok2vec]

@ -221,7 +220,6 @@ def test_tok2vec_listener_callback():
    assert nlp.pipe_names == ["tok2vec", "tagger"]
    tagger = nlp.get_pipe("tagger")
    tok2vec = nlp.get_pipe("tok2vec")
-    nlp._link_components()
    docs = [nlp.make_doc("A random sentence")]
    tok2vec.model.initialize(X=docs)
    gold_array = [[1.0 for tag in ["V", "Z"]] for word in docs]
@ -430,29 +428,46 @@ def test_replace_listeners_from_config():
        nlp.to_disk(dir_path)
        base_model = str(dir_path)
        new_config = {
-            "nlp": {"lang": "en", "pipeline": ["tok2vec", "tagger", "ner"]},
+            "nlp": {
+                "lang": "en",
+                "pipeline": ["tok2vec", "tagger2", "ner3", "tagger4"],
+            },
            "components": {
                "tok2vec": {"source": base_model},
-                "tagger": {
+                "tagger2": {
                    "source": base_model,
+                    "component": "tagger",
                    "replace_listeners": ["model.tok2vec"],
                },
-                "ner": {"source": base_model},
+                "ner3": {
+                    "source": base_model,
+                    "component": "ner",
+                },
+                "tagger4": {
+                    "source": base_model,
+                    "component": "tagger",
+                },
            },
        }
        new_nlp = util.load_model_from_config(new_config, auto_fill=True)
    new_nlp.initialize(lambda: examples)
    tok2vec = new_nlp.get_pipe("tok2vec")
-    tagger = new_nlp.get_pipe("tagger")
-    ner = new_nlp.get_pipe("ner")
-    assert tok2vec.listening_components == ["ner"]
+    tagger = new_nlp.get_pipe("tagger2")
+    ner = new_nlp.get_pipe("ner3")
+    assert "ner" not in new_nlp.pipe_names
+    assert "tagger" not in new_nlp.pipe_names
+    assert tok2vec.listening_components == ["ner3", "tagger4"]
    assert any(isinstance(node, Tok2VecListener) for node in ner.model.walk())
    assert not any(isinstance(node, Tok2VecListener) for node in tagger.model.walk())
    t2v_cfg = new_nlp.config["components"]["tok2vec"]["model"]
    assert t2v_cfg["@architectures"] == "spacy.Tok2Vec.v2"
-    assert new_nlp.config["components"]["tagger"]["model"]["tok2vec"] == t2v_cfg
+    assert new_nlp.config["components"]["tagger2"]["model"]["tok2vec"] == t2v_cfg
    assert (
-        new_nlp.config["components"]["ner"]["model"]["tok2vec"]["@architectures"]
+        new_nlp.config["components"]["ner3"]["model"]["tok2vec"]["@architectures"]
+        == "spacy.Tok2VecListener.v1"
+    )
+    assert (
+        new_nlp.config["components"]["tagger4"]["model"]["tok2vec"]["@architectures"]
        == "spacy.Tok2VecListener.v1"
    )

@ -627,3 +642,57 @@ def test_tok2vec_distillation_teacher_annotations():

    student_tok2vec.distill = tok2vec_distill_wrapper.__get__(student_tok2vec, Tok2Vec)
    student_nlp.distill(teacher_nlp, train_examples_student, sgd=optimizer, losses={})
+
+
+def test_tok2vec_listener_source_link_name():
+    """The component's internal name and the tok2vec listener map correspond
+    to the most recently modified pipeline.
+    """
+    orig_config = Config().from_str(cfg_string_multi)
+    nlp1 = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
+    assert nlp1.get_pipe("tok2vec").listening_components == ["tagger", "ner"]
+
+    nlp2 = English()
+    nlp2.add_pipe("tok2vec", source=nlp1)
+    nlp2.add_pipe("tagger", name="tagger2", source=nlp1)
+
+    # there is no way to have the component have the right name for both
+    # pipelines, right now the most recently modified pipeline is prioritized
+    assert nlp1.get_pipe("tagger").name == nlp2.get_pipe("tagger2").name == "tagger2"
+
+    # there is no way to have the tok2vec have the right listener map for both
+    # pipelines, right now the most recently modified pipeline is prioritized
+    assert nlp2.get_pipe("tok2vec").listening_components == ["tagger2"]
+    nlp2.add_pipe("ner", name="ner3", source=nlp1)
+    assert nlp2.get_pipe("tok2vec").listening_components == ["tagger2", "ner3"]
+    nlp2.remove_pipe("ner3")
+    assert nlp2.get_pipe("tok2vec").listening_components == ["tagger2"]
+    nlp2.remove_pipe("tagger2")
+    assert nlp2.get_pipe("tok2vec").listening_components == []
+
+    # at this point the tok2vec component corresponds to nlp2
+    assert nlp1.get_pipe("tok2vec").listening_components == []
+
+    # modifying the nlp1 pipeline syncs the tok2vec listener map back to nlp1
+    nlp1.add_pipe("sentencizer")
+    assert nlp1.get_pipe("tok2vec").listening_components == ["tagger", "ner"]
+
+    # modifying nlp2 syncs it back to nlp2
+    nlp2.add_pipe("sentencizer")
+    assert nlp1.get_pipe("tok2vec").listening_components == []
+
+
+def test_tok2vec_listener_source_replace_listeners():
+    orig_config = Config().from_str(cfg_string_multi)
+    nlp1 = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
+    assert nlp1.get_pipe("tok2vec").listening_components == ["tagger", "ner"]
+    nlp1.replace_listeners("tok2vec", "tagger", ["model.tok2vec"])
+    assert nlp1.get_pipe("tok2vec").listening_components == ["ner"]
+
+    nlp2 = English()
+    nlp2.add_pipe("tok2vec", source=nlp1)
+    assert nlp2.get_pipe("tok2vec").listening_components == []
+    nlp2.add_pipe("tagger", source=nlp1)
+    assert nlp2.get_pipe("tok2vec").listening_components == []
+    nlp2.add_pipe("ner", name="ner2", source=nlp1)
+    assert nlp2.get_pipe("tok2vec").listening_components == ["ner2"]
--- a/spacy/tests/serialize/test_serialize_config.py
+++ b/spacy/tests/serialize/test_serialize_config.py
@ -18,6 +18,7 @@ from spacy.ml.models import (
    build_Tok2Vec_model,
 )
 from spacy.schemas import ConfigSchema, ConfigSchemaDistill, ConfigSchemaPretrain
+from spacy.training import Example
 from spacy.util import (
    load_config,
    load_config_from_str,
@ -469,6 +470,55 @@ def test_config_overrides():
    assert nlp.pipe_names == ["tok2vec", "tagger"]


+@pytest.mark.filterwarnings("ignore:\\[W036")
+def test_config_overrides_registered_functions():
+    nlp = spacy.blank("en")
+    nlp.add_pipe("attribute_ruler")
+    with make_tempdir() as d:
+        nlp.to_disk(d)
+        nlp_re1 = spacy.load(
+            d,
+            config={
+                "components": {
+                    "attribute_ruler": {
+                        "scorer": {"@scorers": "spacy.tagger_scorer.v1"}
+                    }
+                }
+            },
+        )
+        assert (
+            nlp_re1.config["components"]["attribute_ruler"]["scorer"]["@scorers"]
+            == "spacy.tagger_scorer.v1"
+        )
+
+        @registry.misc("test_some_other_key")
+        def misc_some_other_key():
+            return "some_other_key"
+
+        nlp_re2 = spacy.load(
+            d,
+            config={
+                "components": {
+                    "attribute_ruler": {
+                        "scorer": {
+                            "@scorers": "spacy.overlapping_labeled_spans_scorer.v1",
+                            "spans_key": {"@misc": "test_some_other_key"},
+                        }
+                    }
+                }
+            },
+        )
+        assert nlp_re2.config["components"]["attribute_ruler"]["scorer"][
+            "spans_key"
+        ] == {"@misc": "test_some_other_key"}
+        # run dummy evaluation (will return None scores) in order to test that
+        # the spans_key value in the nested override is working as intended in
+        # the config
+        example = Example.from_dict(nlp_re2.make_doc("a b c"), {})
+        scores = nlp_re2.evaluate([example])
+        assert "spans_some_other_key_f" in scores
+
+
 def test_config_interpolation():
    config = Config().from_str(nlp_config_string, interpolate=False)
    assert config["corpora"]["train"]["path"] == "${paths.train}"
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@ -697,7 +697,6 @@ def test_string_to_list_intify(value):
    assert string_to_list(value, intify=True) == [1, 2, 3]


-@pytest.mark.skip(reason="Temporarily skip before models are published")
 def test_download_compatibility():
    spec = SpecifierSet("==" + about.__version__)
    spec.prereleases = False
@ -708,7 +707,6 @@ def test_download_compatibility():
        assert get_minor_version(about.__version__) == get_minor_version(version)


-@pytest.mark.skip(reason="Temporarily skip before models are published")
 def test_validate_compatibility_table():
    spec = SpecifierSet("==" + about.__version__)
    spec.prereleases = False
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@ -377,3 +377,22 @@ def test_displacy_manual_sorted_entities():

    html = displacy.render(doc, style="ent", manual=True)
    assert html.find("FIRST") < html.find("SECOND")
+
+
+@pytest.mark.issue(12816)
+def test_issue12816(en_vocab) -> None:
+    """Test that displaCy's span visualizer escapes annotated HTML tags correctly."""
+    # Create a doc containing an annotated word and an unannotated HTML tag
+    doc = Doc(en_vocab, words=["test", "<TEST>"])
+    doc.spans["sc"] = [Span(doc, 0, 1, label="test")]
+
+    # Verify that the HTML tag is escaped when unannotated
+    html = displacy.render(doc, style="span")
+    assert "&lt;TEST&gt;" in html
+
+    # Annotate the HTML tag
+    doc.spans["sc"].append(Span(doc, 1, 2, label="test"))
+
+    # Verify that the HTML tag is still escaped
+    html = displacy.render(doc, style="span")
+    assert "&lt;TEST&gt;" in html
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@ -220,6 +220,10 @@ def test_minor_version(a1, a2, b1, b2, is_match):
            {"training.batch_size": 128, "training.optimizer.learn_rate": 0.01},
            {"training": {"batch_size": 128, "optimizer": {"learn_rate": 0.01}}},
        ),
+        (
+            {"attribute_ruler.scorer.@scorers": "spacy.tagger_scorer.v1"},
+            {"attribute_ruler": {"scorer": {"@scorers": "spacy.tagger_scorer.v1"}}},
+        ),
    ],
 )
 def test_dot_to_dict(dot_notation, expected):
@ -228,6 +232,29 @@ def test_dot_to_dict(dot_notation, expected):
    assert util.dict_to_dot(result) == dot_notation


+@pytest.mark.parametrize(
+    "dot_notation,expected",
+    [
+        (
+            {"token.pos": True, "token._.xyz": True},
+            {"token": {"pos": True, "_": {"xyz": True}}},
+        ),
+        (
+            {"training.batch_size": 128, "training.optimizer.learn_rate": 0.01},
+            {"training": {"batch_size": 128, "optimizer": {"learn_rate": 0.01}}},
+        ),
+        (
+            {"attribute_ruler.scorer": {"@scorers": "spacy.tagger_scorer.v1"}},
+            {"attribute_ruler": {"scorer": {"@scorers": "spacy.tagger_scorer.v1"}}},
+        ),
+    ],
+)
+def test_dot_to_dict_overrides(dot_notation, expected):
+    result = util.dot_to_dict(dot_notation)
+    assert result == expected
+    assert util.dict_to_dot(result, for_overrides=True) == dot_notation
+
+
 def test_set_dot_to_object():
    config = {"foo": {"bar": 1, "baz": {"x": "y"}}, "test": {"a": {"b": "c"}}}
    with pytest.raises(KeyError):
--- a/spacy/tests/vocab_vectors/test_vectors.py
+++ b/spacy/tests/vocab_vectors/test_vectors.py
@ -401,6 +401,7 @@ def test_vectors_serialize():
        row_r = v_r.add("D", vector=OPS.asarray([10, 20, 30, 40], dtype="f"))
        assert row == row_r
        assert_equal(OPS.to_numpy(v.data), OPS.to_numpy(v_r.data))
+        assert v.attr == v_r.attr


 def test_vector_is_oov():
@ -645,3 +646,32 @@ def test_equality():
    vectors1.resize((5, 9))
    vectors2.resize((5, 9))
    assert vectors1 == vectors2
+
+
+def test_vectors_attr():
+    data = numpy.asarray([[0, 0, 0], [1, 2, 3], [9, 8, 7]], dtype="f")
+    # default ORTH
+    nlp = English()
+    nlp.vocab.vectors = Vectors(data=data, keys=["A", "B", "C"])
+    assert nlp.vocab.strings["A"] in nlp.vocab.vectors.key2row
+    assert nlp.vocab.strings["a"] not in nlp.vocab.vectors.key2row
+    assert nlp.vocab["A"].has_vector is True
+    assert nlp.vocab["a"].has_vector is False
+    assert nlp("A")[0].has_vector is True
+    assert nlp("a")[0].has_vector is False
+
+    # custom LOWER
+    nlp = English()
+    nlp.vocab.vectors = Vectors(data=data, keys=["a", "b", "c"], attr="LOWER")
+    assert nlp.vocab.strings["A"] not in nlp.vocab.vectors.key2row
+    assert nlp.vocab.strings["a"] in nlp.vocab.vectors.key2row
+    assert nlp.vocab["A"].has_vector is True
+    assert nlp.vocab["a"].has_vector is True
+    assert nlp("A")[0].has_vector is True
+    assert nlp("a")[0].has_vector is True
+    # add a new vectors entry
+    assert nlp.vocab["D"].has_vector is False
+    assert nlp.vocab["d"].has_vector is False
+    nlp.vocab.set_vector("D", numpy.asarray([4, 5, 6]))
+    assert nlp.vocab["D"].has_vector is True
+    assert nlp.vocab["d"].has_vector is True
--- a/spacy/tokenizer.pxd
+++ b/spacy/tokenizer.pxd
@ -26,24 +26,57 @@ cdef class Tokenizer:

    cdef Doc _tokenize_affixes(self, str string, bint with_special_cases)
    cdef int _apply_special_cases(self, Doc doc) except -1
-    cdef void _filter_special_spans(self, vector[SpanC] &original,
-                            vector[SpanC] &filtered, int doc_len) nogil
-    cdef object _prepare_special_spans(self, Doc doc,
-                                       vector[SpanC] &filtered)
-    cdef int _retokenize_special_spans(self, Doc doc, TokenC* tokens,
-                                       object span_data)
-    cdef int _try_specials_and_cache(self, hash_t key, Doc tokens,
-                                     int* has_special,
-                                     bint with_special_cases) except -1
-    cdef int _tokenize(self, Doc tokens, str span, hash_t key,
-                       int* has_special, bint with_special_cases) except -1
-    cdef str _split_affixes(self, str string,
-                                vector[LexemeC*] *prefixes,
-                                vector[LexemeC*] *suffixes, int* has_special,
-                                bint with_special_cases)
-    cdef int _attach_tokens(self, Doc tokens, str string,
-                            vector[LexemeC*] *prefixes,
-                            vector[LexemeC*] *suffixes, int* has_special,
-                            bint with_special_cases) except -1
-    cdef int _save_cached(self, const TokenC* tokens, hash_t key,
-                          int* has_special, int n) except -1
+    cdef void _filter_special_spans(
+        self,
+        vector[SpanC] &original,
+        vector[SpanC] &filtered,
+        int doc_len,
+    ) nogil
+    cdef object _prepare_special_spans(
+        self,
+        Doc doc,
+        vector[SpanC] &filtered,
+    )
+    cdef int _retokenize_special_spans(
+        self,
+        Doc doc,
+        TokenC* tokens,
+        object span_data,
+    )
+    cdef int _try_specials_and_cache(
+        self,
+        hash_t key,
+        Doc tokens,
+        int* has_special,
+        bint with_special_cases,
+    ) except -1
+    cdef int _tokenize(
+        self,
+        Doc tokens,
+        str span,
+        hash_t key,
+        int* has_special,
+        bint with_special_cases,
+    ) except -1
+    cdef str _split_affixes(
+        self,
+        str string,
+        vector[LexemeC*] *prefixes,
+        vector[LexemeC*] *suffixes, int* has_special,
+        bint with_special_cases,
+    )
+    cdef int _attach_tokens(
+        self,
+        Doc tokens,
+        str string,
+        vector[LexemeC*] *prefixes,
+        vector[LexemeC*] *suffixes, int* has_special,
+        bint with_special_cases,
+    ) except -1
+    cdef int _save_cached(
+        self,
+        const TokenC* tokens,
+        hash_t key,
+        int* has_special,
+        int n,
+    ) except -1
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -323,7 +323,7 @@ cdef class Tokenizer:
        cdef int span_start
        cdef int span_end
        while i < doc.length:
-            if not i in span_data:
+            if i not in span_data:
                tokens[i + offset] = doc.c[i]
                i += 1
            else:
@ -394,12 +394,14 @@ cdef class Tokenizer:
        self._save_cached(&tokens.c[orig_size], orig_key, has_special,
                          tokens.length - orig_size)

-    cdef str _split_affixes(self, str string,
-                                vector[const LexemeC*] *prefixes,
-                                vector[const LexemeC*] *suffixes,
-                                int* has_special,
-                                bint with_special_cases):
-        cdef size_t i
+    cdef str _split_affixes(
+        self,
+        str string,
+        vector[const LexemeC*] *prefixes,
+        vector[const LexemeC*] *suffixes,
+        int* has_special,
+        bint with_special_cases
+    ):
        cdef str prefix
        cdef str suffix
        cdef str minus_pre
@ -444,10 +446,6 @@ cdef class Tokenizer:
                            vector[const LexemeC*] *suffixes,
                            int* has_special,
                            bint with_special_cases) except -1:
-        cdef bint specials_hit = 0
-        cdef bint cache_hit = 0
-        cdef int split, end
-        cdef const LexemeC* const* lexemes
        cdef const LexemeC* lexeme
        cdef str span
        cdef int i
@ -457,9 +455,11 @@ cdef class Tokenizer:
        if string:
            if self._try_specials_and_cache(hash_string(string), tokens, has_special, with_special_cases):
                pass
-            elif (self.token_match and self.token_match(string)) or \
-                    (self.url_match and \
-                    self.url_match(string)):
+            elif (
+                (self.token_match and self.token_match(string)) or
+                (self.url_match and self.url_match(string))
+            ):
+
                # We're always saying 'no' to spaces here -- the caller will
                # fix up the outermost one, with reference to the original.
                # See Issue #859
@ -820,7 +820,7 @@ cdef class Tokenizer:
        self.infix_finditer = None
        self.token_match = None
        self.url_match = None
-        msg = util.from_bytes(bytes_data, deserializers, exclude)
+        util.from_bytes(bytes_data, deserializers, exclude)
        if "prefix_search" in data and isinstance(data["prefix_search"], str):
            self.prefix_search = re.compile(data["prefix_search"]).search
        if "suffix_search" in data and isinstance(data["suffix_search"], str):
--- a/spacy/tokens/doc.pxd
+++ b/spacy/tokens/doc.pxd
@ -31,7 +31,7 @@ cdef int token_by_start(const TokenC* tokens, int length, int start_char) except
 cdef int token_by_end(const TokenC* tokens, int length, int end_char) except -2


-cdef int [:,:] _get_lca_matrix(Doc, int start, int end)
+cdef int [:, :] _get_lca_matrix(Doc, int start, int end)


 cdef class Doc:
@ -61,7 +61,6 @@ cdef class Doc:
    cdef int length
    cdef int max_length

-
    cdef public object noun_chunks_iterator

    cdef object __weakref__
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -35,6 +35,7 @@ from ..attrs cimport (
    LENGTH,
    MORPH,
    NORM,
+    ORTH,
    POS,
    SENT_START,
    SPACY,
@ -42,14 +43,13 @@ from ..attrs cimport (
    attr_id_t,
 )
 from ..lexeme cimport EMPTY_LEXEME, Lexeme
-from ..typedefs cimport attr_t, flags_t
+from ..typedefs cimport attr_t
 from .token cimport Token

 from .. import parts_of_speech, schemas, util
 from ..attrs import IDS, intify_attr
-from ..compat import copy_reg, pickle
+from ..compat import copy_reg
 from ..errors import Errors, Warnings
-from ..morphology import Morphology
 from ..util import get_words_and_spaces
 from .retokenizer import Retokenizer
 from .underscore import Underscore, get_ext_args
@ -613,13 +613,26 @@ cdef class Doc:
        """
        if "similarity" in self.user_hooks:
            return self.user_hooks["similarity"](self, other)
-        if isinstance(other, (Lexeme, Token)) and self.length == 1:
-            if self.c[0].lex.orth == other.orth:
+        attr = getattr(self.vocab.vectors, "attr", ORTH)
+        cdef Token this_token
+        cdef Token other_token
+        cdef Lexeme other_lex
+        if len(self) == 1 and isinstance(other, Token):
+            this_token = self[0]
+            other_token = other
+            if Token.get_struct_attr(this_token.c, attr) == Token.get_struct_attr(other_token.c, attr):
                return 1.0
-        elif isinstance(other, (Span, Doc)) and len(self) == len(other):
+        elif len(self) == 1 and isinstance(other, Lexeme):
+            this_token = self[0]
+            other_lex = other
+            if Token.get_struct_attr(this_token.c, attr) == Lexeme.get_struct_attr(other_lex.c, attr):
+                return 1.0
+        elif isinstance(other, (Doc, Span)) and len(self) == len(other):
            similar = True
-            for i in range(self.length):
-                if self[i].orth != other[i].orth:
+            for i in range(len(self)):
+                this_token = self[i]
+                other_token = other[i]
+                if Token.get_struct_attr(this_token.c, attr) != Token.get_struct_attr(other_token.c, attr):
                    similar = False
                    break
            if similar:
@ -767,7 +780,7 @@ cdef class Doc:
            # TODO:
            # 1. Test basic data-driven ORTH gazetteer
            # 2. Test more nuanced date and currency regex
-            cdef attr_t entity_type, kb_id, ent_id
+            cdef attr_t kb_id, ent_id
            cdef int ent_start, ent_end
            ent_spans = []
            for ent_info in ents:
@ -975,7 +988,6 @@ cdef class Doc:
            >>> np_array = doc.to_array([LOWER, POS, ENT_TYPE, IS_ALPHA])
        """
        cdef int i, j
-        cdef attr_id_t feature
        cdef np.ndarray[attr_t, ndim=2] output
        # Handle scalar/list inputs of strings/ints for py_attr_ids
        # See also #3064
@ -987,8 +999,10 @@ cdef class Doc:
            py_attr_ids = [py_attr_ids]
        # Allow strings, e.g. 'lemma' or 'LEMMA'
        try:
-            py_attr_ids = [(IDS[id_.upper()] if hasattr(id_, "upper") else id_)
-                       for id_ in py_attr_ids]
+            py_attr_ids = [
+                (IDS[id_.upper()] if hasattr(id_, "upper") else id_)
+                for id_ in py_attr_ids
+            ]
        except KeyError as msg:
            keys = list(IDS.keys())
            raise KeyError(Errors.E983.format(dict="IDS", key=msg, keys=keys)) from None
@ -1022,8 +1036,6 @@ cdef class Doc:
        DOCS: https://spacy.io/api/doc#count_by
        """
        cdef int i
-        cdef attr_t attr
-        cdef size_t count

        if counts is None:
            counts = Counter()
@ -1085,7 +1097,6 @@ cdef class Doc:
        cdef int i, col
        cdef int32_t abs_head_index
        cdef attr_id_t attr_id
-        cdef TokenC* tokens = self.c
        cdef int length = len(array)
        if length != len(self):
            raise ValueError(Errors.E971.format(array_length=length, doc_length=len(self)))
@ -1226,7 +1237,7 @@ cdef class Doc:
                            span.label,
                            span.kb_id,
                            span.id,
-                            span.text, # included as a check
+                            span.text,  # included as a check
                        ))
            char_offset += len(doc.text)
            if len(doc) > 0 and ensure_whitespace and not doc[-1].is_space and not bool(doc[-1].whitespace_):
@ -1505,7 +1516,6 @@ cdef class Doc:
            attributes are inherited from the syntactic root of the span.
        RETURNS (Token): The first newly merged token.
        """
-        cdef str tag, lemma, ent_type
        attr_len = len(attributes)
        span_len = len(spans)
        if not attr_len == span_len:
@ -1621,7 +1631,6 @@ cdef class Doc:
                for token in char_span[1:]:
                    token.is_sent_start = False

-
        for span_group in doc_json.get("spans", {}):
            spans = []
            for span in doc_json["spans"][span_group]:
@ -1750,7 +1759,7 @@ cdef class Doc:
                                    data["underscore_span"] = {}
                                if attr not in data["underscore_span"]:
                                    data["underscore_span"][attr] = []
-                                data["underscore_span"][attr].append({"start": start, "end": end, "value": value, "label": _label, "kb_id": _kb_id, "id":_span_id})
+                                data["underscore_span"][attr].append({"start": start, "end": end, "value": value, "label": _label, "kb_id": _kb_id, "id": _span_id})

            for attr in underscore:
                if attr not in user_keys:
@ -1773,7 +1782,6 @@ cdef class Doc:
        output.fill(255)
        cdef int i, j, start_idx, end_idx
        cdef bytes byte_string
-        cdef unsigned char utf8_char
        for i, byte_string in enumerate(byte_strings):
            j = 0
            start_idx = 0
@ -1826,8 +1834,6 @@ cdef int token_by_char(const TokenC* tokens, int length, int char_idx) except -2

 cdef int set_children_from_heads(TokenC* tokens, int start, int end) except -1:
    # note: end is exclusive
-    cdef TokenC* head
-    cdef TokenC* child
    cdef int i
    # Set number of left/right children to 0. We'll increment it in the loops.
    for i in range(start, end):
@ -1927,7 +1933,7 @@ cdef int _get_tokens_lca(Token token_j, Token token_k):
    return -1


-cdef int [:,:] _get_lca_matrix(Doc doc, int start, int end):
+cdef int [:, :] _get_lca_matrix(Doc doc, int start, int end):
    """Given a doc and a start and end position defining a set of contiguous
    tokens within it, returns a matrix of Lowest Common Ancestors (LCA), where
    LCA[i, j] is the index of the lowest common ancestor among token i and j.
@ -1940,7 +1946,7 @@ cdef int [:,:] _get_lca_matrix(Doc doc, int start, int end):
    RETURNS (int [:, :]): memoryview of numpy.array[ndim=2, dtype=numpy.int32],
        with shape (n, n), where n = len(doc).
    """
-    cdef int [:,:] lca_matrix
+    cdef int [:, :] lca_matrix
    cdef int j, k
    n_tokens= end - start
    lca_mat = numpy.empty((n_tokens, n_tokens), dtype=numpy.int32)
--- a/spacy/tokens/graph.pyx
+++ b/spacy/tokens/graph.pyx
@ -3,7 +3,7 @@ from typing import Generator, List, Tuple

 cimport cython
 from cython.operator cimport dereference
-from libc.stdint cimport int32_t, int64_t
+from libc.stdint cimport int32_t
 from libcpp.pair cimport pair
 from libcpp.unordered_map cimport unordered_map
 from libcpp.unordered_set cimport unordered_set
@ -11,7 +11,6 @@ from libcpp.unordered_set cimport unordered_set
 import weakref

 from murmurhash.mrmr cimport hash64
-from preshed.maps cimport map_get_unless_missing

 from .. import Errors

@ -370,7 +369,9 @@ cdef class Graph:
        >>> assert graph.has_node((0,))
        >>> assert graph.has_edge((0,), (1,3), label="agent")
    """
-    def __init__(self, doc, *, name="", nodes=[], edges=[], labels=None, weights=None):
+    def __init__(
+        self, doc, *, name="", nodes=[], edges=[], labels=None, weights=None  # no-cython-lint
+    ):
        """Create a Graph object.

        doc (Doc): The Doc object the graph will refer to.
@ -441,8 +442,6 @@ cdef class Graph:
        be returned, and no new edge will be created. The weight of the edge
        will be updated if a weight is specified.
        """
-        label_hash = self.doc.vocab.strings.as_int(label)
-        weight_float = weight if weight is not None else 0.0
        edge_index = add_edge(
            &self.c,
            EdgeC(
--- a/spacy/tokens/morphanalysis.pyx
+++ b/spacy/tokens/morphanalysis.pyx
@ -1,5 +1,4 @@
 cimport numpy as np
-from libc.string cimport memset

 from ..errors import Errors
 from ..morphology import Morphology
@ -94,4 +93,3 @@ cdef class MorphAnalysis:

    def __repr__(self):
        return self.to_json()
-
--- a/spacy/tokens/retokenizer.pyx
+++ b/spacy/tokens/retokenizer.pyx
@ -1,7 +1,6 @@
 # cython: infer_types=True, bounds_check=False, profile=True
 from cymem.cymem cimport Pool
-from libc.stdlib cimport free, malloc
-from libc.string cimport memcpy, memset
+from libc.string cimport memset

 import numpy
 from thinc.api import get_array_module
@ -10,7 +9,7 @@ from ..attrs cimport MORPH, NORM
 from ..lexeme cimport EMPTY_LEXEME, Lexeme
 from ..structs cimport LexemeC, TokenC
 from ..vocab cimport Vocab
-from .doc cimport Doc, set_children_from_heads, token_by_end, token_by_start
+from .doc cimport Doc, set_children_from_heads, token_by_start
 from .span cimport Span
 from .token cimport Token

@ -148,7 +147,7 @@ def _merge(Doc doc, merges):
        syntactic root of the span.
    RETURNS (Token): The first newly merged token.
    """
-    cdef int i, merge_index, start, end, token_index, current_span_index, current_offset, offset, span_index
+    cdef int i, merge_index, start, token_index, current_span_index, current_offset, offset, span_index
    cdef Span span
    cdef const LexemeC* lex
    cdef TokenC* token
@ -166,7 +165,6 @@ def _merge(Doc doc, merges):
    merges.sort(key=_get_start)
    for merge_index, (span, attributes) in enumerate(merges):
        start = span.start
-        end = span.end
        spans.append(span)
        # House the new merged token where it starts
        token = &doc.c[start]
@ -204,8 +202,9 @@ def _merge(Doc doc, merges):
    # for the merged region. To do this, we create a boolean array indicating
    # whether the row is to be deleted, then use numpy.delete
    if doc.tensor is not None and doc.tensor.size != 0:
-        doc.tensor = _resize_tensor(doc.tensor,
-            [(m[0].start, m[0].end) for m in merges])
+        doc.tensor = _resize_tensor(
+            doc.tensor, [(m[0].start, m[0].end) for m in merges]
+        )
    # Memorize span roots and sets dependencies of the newly merged
    # tokens to the dependencies of their roots.
    span_roots = []
@ -268,11 +267,11 @@ def _merge(Doc doc, merges):
            span_index += 1
        if span_index < len(spans) and i == spans[span_index].start:
            # First token in a span
-            doc.c[i - offset] = doc.c[i] # move token to its place
+            doc.c[i - offset] = doc.c[i]  # move token to its place
            offset += (spans[span_index].end - spans[span_index].start) - 1
            in_span = True
        if not in_span:
-            doc.c[i - offset] = doc.c[i] # move token to its place
+            doc.c[i - offset] = doc.c[i]  # move token to its place

    for i in range(doc.length - offset, doc.length):
        memset(&doc.c[i], 0, sizeof(TokenC))
@ -346,7 +345,11 @@ def _split(Doc doc, int token_index, orths, heads, attrs):
    if to_process_tensor:
        xp = get_array_module(doc.tensor)
        if xp is numpy:
-            doc.tensor = xp.append(doc.tensor, xp.zeros((nb_subtokens,doc.tensor.shape[1]), dtype="float32"), axis=0)
+            doc.tensor = xp.append(
+                doc.tensor,
+                xp.zeros((nb_subtokens, doc.tensor.shape[1]), dtype="float32"),
+                axis=0
+            )
        else:
            shape = (doc.tensor.shape[0] + nb_subtokens, doc.tensor.shape[1])
            resized_array = xp.zeros(shape, dtype="float32")
@ -368,7 +371,8 @@ def _split(Doc doc, int token_index, orths, heads, attrs):
        token.norm = 0  # reset norm
        if to_process_tensor:
            # setting the tensors of the split tokens to array of zeros
-            doc.tensor[token_index + i:token_index + i + 1] = xp.zeros((1,doc.tensor.shape[1]), dtype="float32")
+            doc.tensor[token_index + i:token_index + i + 1] = \
+                xp.zeros((1, doc.tensor.shape[1]), dtype="float32")
        # Update the character offset of the subtokens
        if i != 0:
            token.idx = orig_token.idx + idx_offset
@ -456,7 +460,6 @@ def normalize_token_attrs(Vocab vocab, attrs):
 def set_token_attrs(Token py_token, attrs):
    cdef TokenC* token = py_token.c
    cdef const LexemeC* lex = token.lex
-    cdef Doc doc = py_token.doc
    # Assign attributes
    for attr_name, attr_value in attrs.items():
        if attr_name == "_":  # Set extension attributes
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@ -1,5 +1,4 @@
 cimport numpy as np
-from libc.math cimport sqrt
 from libcpp.memory cimport make_shared

 import copy
@ -9,13 +8,13 @@ import numpy
 from thinc.api import get_array_module

 from ..attrs cimport *
-from ..attrs cimport attr_id_t
+from ..attrs cimport ORTH, attr_id_t
 from ..lexeme cimport Lexeme
-from ..parts_of_speech cimport univ_pos_t
-from ..structs cimport LexemeC, TokenC
+from ..structs cimport TokenC
 from ..symbols cimport dep
-from ..typedefs cimport attr_t, flags_t, hash_t
+from ..typedefs cimport attr_t
 from .doc cimport _get_lca_matrix, get_token_attr, token_by_end, token_by_start
+from .token cimport Token

 from ..errors import Errors, Warnings
 from ..util import normalize_slice
@ -226,8 +225,8 @@ cdef class Span:

    @property
    def _(self):
-        cdef SpanC* span_c = self.span_c()
        """Custom extension attributes registered via `set_extension`."""
+        cdef SpanC* span_c = self.span_c()
        return Underscore(Underscore.span_extensions, self,
                          start=span_c.start_char, end=span_c.end_char, label=self.label, kb_id=self.kb_id, span_id=self.id)

@ -371,13 +370,26 @@ cdef class Span:
        """
        if "similarity" in self.doc.user_span_hooks:
            return self.doc.user_span_hooks["similarity"](self, other)
-        if len(self) == 1 and hasattr(other, "orth"):
-            if self[0].orth == other.orth:
+        attr = getattr(self.doc.vocab.vectors, "attr", ORTH)
+        cdef Token this_token
+        cdef Token other_token
+        cdef Lexeme other_lex
+        if len(self) == 1 and isinstance(other, Token):
+            this_token = self[0]
+            other_token = other
+            if Token.get_struct_attr(this_token.c, attr) == Token.get_struct_attr(other_token.c, attr):
+                return 1.0
+        elif len(self) == 1 and isinstance(other, Lexeme):
+            this_token = self[0]
+            other_lex = other
+            if Token.get_struct_attr(this_token.c, attr) == Lexeme.get_struct_attr(other_lex.c, attr):
                return 1.0
        elif isinstance(other, (Doc, Span)) and len(self) == len(other):
            similar = True
            for i in range(len(self)):
-                if self[i].orth != getattr(other[i], "orth", None):
+                this_token = self[i]
+                other_token = other[i]
+                if Token.get_struct_attr(this_token.c, attr) != Token.get_struct_attr(other_token.c, attr):
                    similar = False
                    break
            if similar:
@ -607,7 +619,6 @@ cdef class Span:
        """
        return "".join([t.text_with_ws for t in self])

-
    @property
    def noun_chunks(self):
        """Iterate over the base noun phrases in the span. Yields base
@ -922,7 +933,6 @@ cdef class Span:
            self.id_ = ent_id_


-
 cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
    # Don't allow spaces to be the root, if there are
    # better candidates
--- a/spacy/tokens/span_group.pyx
+++ b/spacy/tokens/span_group.pyx
@ -1,7 +1,7 @@
 import struct
 import weakref
 from copy import deepcopy
-from typing import TYPE_CHECKING, Iterable, Optional, Tuple, Union
+from typing import Iterable, Optional, Union

 import srsly

@ -36,7 +36,7 @@ cdef class SpanGroup:

    DOCS: https://spacy.io/api/spangroup
    """
-    def __init__(self, doc, *, name="", attrs={}, spans=[]):
+    def __init__(self, doc, *, name="", attrs={}, spans=[]):  # no-cython-lint
        """Create a SpanGroup.

        doc (Doc): The reference Doc object.
@ -315,7 +315,7 @@ cdef class SpanGroup:

            other_attrs = deepcopy(other_group.attrs)
            span_group.attrs.update({
-                key: value for key, value in other_attrs.items() \
+                key: value for key, value in other_attrs.items()
                if key not in span_group.attrs
            })
            if len(other_group):
--- a/spacy/tokens/token.pxd
+++ b/spacy/tokens/token.pxd
@ -26,7 +26,7 @@ cdef class Token:
        cdef Token self = Token.__new__(Token, vocab, doc, offset)
        return self

-    #cdef inline TokenC struct_from_attrs(Vocab vocab, attrs):
+    # cdef inline TokenC struct_from_attrs(Vocab vocab, attrs):
    #    cdef TokenC token
    #    attrs = normalize_attrs(attrs)

@ -98,12 +98,10 @@ cdef class Token:
        elif feat_name == SENT_START:
            token.sent_start = value

-
    @staticmethod
    cdef inline int missing_dep(const TokenC* token) nogil:
        return token.dep == MISSING_DEP

-
    @staticmethod
    cdef inline int missing_head(const TokenC* token) nogil:
        return Token.missing_dep(token)
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@ -1,13 +1,11 @@
 # cython: infer_types=True
 # Compiler crashes on memory view coercion without this. Should report bug.
 cimport numpy as np
-from cython.view cimport array as cvarray

 np.import_array()

 import warnings

-import numpy
 from thinc.api import get_array_module

 from ..attrs cimport (
@ -216,11 +214,17 @@ cdef class Token:
        """
        if "similarity" in self.doc.user_token_hooks:
            return self.doc.user_token_hooks["similarity"](self, other)
-        if hasattr(other, "__len__") and len(other) == 1 and hasattr(other, "__getitem__"):
-            if self.c.lex.orth == getattr(other[0], "orth", None):
+        attr = getattr(self.doc.vocab.vectors, "attr", ORTH)
+        cdef Token this_token = self
+        cdef Token other_token
+        cdef Lexeme other_lex
+        if isinstance(other, Token):
+            other_token = other
+            if Token.get_struct_attr(this_token.c, attr) == Token.get_struct_attr(other_token.c, attr):
                return 1.0
-        elif hasattr(other, "orth"):
-            if self.c.lex.orth == other.orth:
+        elif isinstance(other, Lexeme):
+            other_lex = other
+            if Token.get_struct_attr(this_token.c, attr) == Lexeme.get_struct_attr(other_lex.c, attr):
                return 1.0
        if self.vocab.vectors.n_keys == 0:
            warnings.warn(Warnings.W007.format(obj="Token"))
@ -528,9 +532,9 @@ cdef class Token:
        def __get__(self):
            if self.i + 1 == len(self.doc):
                return True
-            elif self.doc[self.i+1].is_sent_start == None:
+            elif self.doc[self.i+1].is_sent_start is None:
                return None
-            elif self.doc[self.i+1].is_sent_start == True:
+            elif self.doc[self.i+1].is_sent_start is True:
                return True
            else:
                return False
--- a/spacy/training/align.pyx
+++ b/spacy/training/align.pyx
@ -37,10 +37,14 @@ def get_alignments(A: List[str], B: List[str]) -> Tuple[List[List[int]], List[Li
            b2a.append(set())
        # Process the alignment at the current position
        if A[token_idx_a] == B[token_idx_b] and \
-                (char_idx_a == 0 or \
-                    char_to_token_a[char_idx_a - 1] < token_idx_a) and \
-                (char_idx_b == 0 or \
-                    char_to_token_b[char_idx_b - 1] < token_idx_b):
+                (
+                    char_idx_a == 0 or
+                    char_to_token_a[char_idx_a - 1] < token_idx_a
+                ) and \
+                (
+                    char_idx_b == 0 or
+                    char_to_token_b[char_idx_b - 1] < token_idx_b
+                ):
            # Current tokens are identical and both character offsets are the
            # start of a token (either at the beginning of the document or the
            # previous character belongs to a different token)
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@ -1,4 +1,3 @@
-import warnings
 from collections.abc import Iterable as IterableInstance

 import numpy
@ -31,9 +30,9 @@ cpdef Doc annotations_to_doc(vocab, tok_annot, doc_annot):
    attrs, array = _annot2array(vocab, tok_annot, doc_annot)
    output = Doc(vocab, words=tok_annot["ORTH"], spaces=tok_annot["SPACY"])
    if "entities" in doc_annot:
-       _add_entities_to_doc(output, doc_annot["entities"])
+        _add_entities_to_doc(output, doc_annot["entities"])
    if "spans" in doc_annot:
-       _add_spans_to_doc(output, doc_annot["spans"])
+        _add_spans_to_doc(output, doc_annot["spans"])
    if array.size:
        output = output.from_array(attrs, array)
    # links are currently added with ENT_KB_ID on the token level
@ -168,7 +167,6 @@ cdef class Example:
                self._y_sig = y_sig
                return self._cached_alignment

-
    def _get_aligned_vectorized(self, align, gold_values):
        # Fast path for Doc attributes/fields that are predominantly a single value,
        # i.e., TAG, POS, MORPH.
@ -211,7 +209,6 @@ cdef class Example:

        return output.tolist()

-
    def _get_aligned_non_vectorized(self, align, gold_values):
        # Slower path for fields that return multiple values (resulting
        # in ragged arrays that cannot be vectorized trivially).
@ -228,7 +225,6 @@ cdef class Example:

        return output

-
    def get_aligned(self, field, as_string=False):
        """Return an aligned array for a token attribute."""
        align = self.alignment.x2y
@ -337,7 +333,7 @@ cdef class Example:
            missing=None
        )
        # Now fill the tokens we can align to O.
-        O = 2 # I=1, O=2, B=3
+        O = 2 # I=1, O=2, B=3  # no-cython-lint: E741
        for i, ent_iob in enumerate(self.get_aligned("ENT_IOB")):
            if x_tags[i] is None:
                if ent_iob == O:
@ -347,7 +343,7 @@ cdef class Example:
        return x_ents, x_tags

    def get_aligned_ner(self):
-        x_ents, x_tags = self.get_aligned_ents_and_ner()
+        _x_ents, x_tags = self.get_aligned_ents_and_ner()
        return x_tags

    def get_matching_ents(self, check_label=True):
@ -405,7 +401,6 @@ cdef class Example:

        return span_dict

-
    def _links_to_dict(self):
        links = {}
        for ent in self.reference.ents:
@ -596,6 +591,7 @@ def _fix_legacy_dict_data(example_dict):
        "doc_annotation": doc_dict
    }

+
 def _has_field(annot, field):
    if field not in annot:
        return False
@ -632,6 +628,7 @@ def _parse_ner_tags(biluo_or_offsets, vocab, words, spaces):
                ent_types.append("")
    return ent_iobs, ent_types

+
 def _parse_links(vocab, words, spaces, links):
    reference = Doc(vocab, words=words, spaces=spaces)
    starts = {token.idx: token.i for token in reference}
--- a/spacy/training/gold_io.pyx
+++ b/spacy/training/gold_io.pyx
@ -1,4 +1,3 @@
-import json
 import warnings

 import srsly
@ -6,7 +5,7 @@ import srsly
 from .. import util
 from ..errors import Warnings
 from ..tokens import Doc
-from .iob_utils import offsets_to_biluo_tags, tags_to_entities
+from .iob_utils import offsets_to_biluo_tags


 def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
@ -23,7 +22,13 @@ def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
    json_doc = {"id": doc_id, "paragraphs": []}
    for i, doc in enumerate(docs):
        raw = None if doc.has_unknown_spaces else doc.text
-        json_para = {'raw': raw, "sentences": [], "cats": [], "entities": [], "links": []}
+        json_para = {
+            'raw': raw,
+            "sentences": [],
+            "cats": [],
+            "entities": [],
+            "links": []
+        }
        for cat, val in doc.cats.items():
            json_cat = {"label": cat, "value": val}
            json_para["cats"].append(json_cat)
@ -35,13 +40,17 @@ def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
            if ent.kb_id_:
                link_dict = {(ent.start_char, ent.end_char): {ent.kb_id_: 1.0}}
                json_para["links"].append(link_dict)
-        biluo_tags = offsets_to_biluo_tags(doc, json_para["entities"], missing=ner_missing_tag)
+        biluo_tags = offsets_to_biluo_tags(
+            doc, json_para["entities"], missing=ner_missing_tag
+        )
        attrs = ("TAG", "POS", "MORPH", "LEMMA", "DEP", "ENT_IOB")
        include_annotation = {attr: doc.has_annotation(attr) for attr in attrs}
        for j, sent in enumerate(doc.sents):
            json_sent = {"tokens": [], "brackets": []}
            for token in sent:
-                json_token = {"id": token.i, "orth": token.text, "space": token.whitespace_}
+                json_token = {
+                    "id": token.i, "orth": token.text, "space": token.whitespace_
+                }
                if include_annotation["TAG"]:
                    json_token["tag"] = token.tag_
                if include_annotation["POS"]:
@ -125,9 +134,14 @@ def json_to_annotations(doc):
                else:
                    sent_starts.append(-1)
            if "brackets" in sent:
-                brackets.extend((b["first"] + sent_start_i,
-                                 b["last"] + sent_start_i, b["label"])
-                                 for b in sent["brackets"])
+                brackets.extend(
+                    (
+                        b["first"] + sent_start_i,
+                        b["last"] + sent_start_i,
+                        b["label"]
+                    )
+                    for b in sent["brackets"]
+                )

        example["token_annotation"] = dict(
            ids=ids,
@ -160,6 +174,7 @@ def json_to_annotations(doc):
        )
        yield example

+
 def json_iterate(bytes utf8_str):
    # We should've made these files jsonl...But since we didn't, parse out
    # the docs one-by-one to reduce memory usage.
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@ -71,7 +71,8 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
        with nlp.select_pipes(enable=resume_components):
            logger.info("Resuming training for: %s", resume_components)
            nlp.resume_training(sgd=optimizer)
-    # Make sure that listeners are defined before initializing further
+    # Make sure that internal component names are synced and listeners are
+    # defined before initializing further
    nlp._link_components()
    with nlp.select_pipes(disable=[*frozen_components, *resume_components]):
        if T["max_epochs"] == -1:
@ -305,9 +306,14 @@ def convert_vectors(
    truncate: int,
    prune: int,
    mode: str = VectorsMode.default,
+    attr: str = "ORTH",
 ) -> None:
    vectors_loc = ensure_path(vectors_loc)
    if vectors_loc and vectors_loc.parts[-1].endswith(".npz"):
+        if attr != "ORTH":
+            raise ValueError(
+                "ORTH is the only attribute supported for vectors in .npz format."
+            )
        nlp.vocab.vectors = Vectors(
            strings=nlp.vocab.strings, data=numpy.load(vectors_loc.open("rb"))
        )
@ -335,11 +341,15 @@ def convert_vectors(
                nlp.vocab.vectors = Vectors(
                    strings=nlp.vocab.strings,
                    data=vectors_data,
+                    attr=attr,
                    **floret_settings,
                )
            else:
                nlp.vocab.vectors = Vectors(
-                    strings=nlp.vocab.strings, data=vectors_data, keys=vector_keys
+                    strings=nlp.vocab.strings,
+                    data=vectors_data,
+                    keys=vector_keys,
+                    attr=attr,
                )
                nlp.vocab.deduplicate_vectors()
    if prune >= 1 and mode != VectorsMode.floret:
--- a/spacy/util.py
+++ b/spacy/util.py
@ -518,7 +518,7 @@ def load_model_from_path(
    if not meta:
        meta = get_model_meta(model_path)
    config_path = model_path / "config.cfg"
-    overrides = dict_to_dot(config)
+    overrides = dict_to_dot(config, for_overrides=True)
    config = load_config(config_path, overrides=overrides)
    nlp = load_model_from_config(
        config,
@ -1486,14 +1486,19 @@ def dot_to_dict(values: Dict[str, Any]) -> Dict[str, dict]:
    return result


-def dict_to_dot(obj: Dict[str, dict]) -> Dict[str, Any]:
+def dict_to_dot(obj: Dict[str, dict], *, for_overrides: bool = False) -> Dict[str, Any]:
    """Convert dot notation to a dict. For example: {"token": {"pos": True,
    "_": {"xyz": True }}} becomes {"token.pos": True, "token._.xyz": True}.

-    values (Dict[str, dict]): The dict to convert.
+    obj (Dict[str, dict]): The dict to convert.
+    for_overrides (bool): Whether to enable special handling for registered
+        functions in overrides.
    RETURNS (Dict[str, Any]): The key/value pairs.
    """
-    return {".".join(key): value for key, value in walk_dict(obj)}
+    return {
+        ".".join(key): value
+        for key, value in walk_dict(obj, for_overrides=for_overrides)
+    }


 def dot_to_object(config: Config, section: str):
@ -1535,13 +1540,20 @@ def set_dot_to_object(config: Config, section: str, value: Any) -> None:


 def walk_dict(
-    node: Dict[str, Any], parent: List[str] = []
+    node: Dict[str, Any], parent: List[str] = [], *, for_overrides: bool = False
 ) -> Iterator[Tuple[List[str], Any]]:
-    """Walk a dict and yield the path and values of the leaves."""
+    """Walk a dict and yield the path and values of the leaves.
+
+    for_overrides (bool): Whether to treat registered functions that start with
+        @ as final values rather than dicts to traverse.
+    """
    for key, value in node.items():
        key_parent = [*parent, key]
-        if isinstance(value, dict):
-            yield from walk_dict(value, key_parent)
+        if isinstance(value, dict) and (
+            not for_overrides
+            or not any(value_key.startswith("@") for value_key in value)
+        ):
+            yield from walk_dict(value, key_parent, for_overrides=for_overrides)
        else:
            yield (key_parent, value)

--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@ -1,10 +1,8 @@
-cimport numpy as np
 from cython.operator cimport dereference as deref
 from libc.stdint cimport uint32_t, uint64_t
 from libcpp.set cimport set as cppset
 from murmurhash.mrmr cimport hash128_x64

-import functools
 import warnings
 from enum import Enum
 from typing import cast
@ -15,9 +13,11 @@ from thinc.api import Ops, get_array_module, get_current_ops
 from thinc.backends import get_array_ops
 from thinc.types import Floats2d

+from .attrs cimport ORTH, attr_id_t
 from .strings cimport StringStore

 from . import util
+from .attrs import IDS
 from .errors import Errors, Warnings
 from .strings import get_string_id

@ -63,8 +63,9 @@ cdef class Vectors:
    cdef readonly uint32_t hash_seed
    cdef readonly unicode bow
    cdef readonly unicode eow
+    cdef readonly attr_id_t attr

-    def __init__(self, *, strings=None, shape=None, data=None, keys=None, mode=Mode.default, minn=0, maxn=0, hash_count=1, hash_seed=0, bow="<", eow=">"):
+    def __init__(self, *, strings=None, shape=None, data=None, keys=None, mode=Mode.default, minn=0, maxn=0, hash_count=1, hash_seed=0, bow="<", eow=">", attr="ORTH"):
        """Create a new vector store.

        strings (StringStore): The string store.
@ -78,6 +79,8 @@ cdef class Vectors:
        hash_seed (int): The floret hash seed (default: 0).
        bow (str): The floret BOW string (default: "<").
        eow (str): The floret EOW string (default: ">").
+        attr (Union[int, str]): The token attribute for the vector keys
+            (default: "ORTH").

        DOCS: https://spacy.io/api/vectors#init
        """
@ -100,10 +103,18 @@ cdef class Vectors:
        self.hash_seed = hash_seed
        self.bow = bow
        self.eow = eow
+        if isinstance(attr, (int, long)):
+            self.attr = attr
+        else:
+            attr = attr.upper()
+            if attr == "TEXT":
+                attr = "ORTH"
+            self.attr = IDS.get(attr, ORTH)
+
        if self.mode == Mode.default:
            if data is None:
                if shape is None:
-                    shape = (0,0)
+                    shape = (0, 0)
                ops = get_current_ops()
                data = ops.xp.zeros(shape, dtype="f")
                self._unset = cppset[int]({i for i in range(data.shape[0])})
@ -244,11 +255,10 @@ cdef class Vectors:
    def __eq__(self, other):
        # Check for equality, with faster checks first
        return (
-                self.shape == other.shape
-                and self.key2row == other.key2row
-                and self.to_bytes(exclude=["strings"])
-                  == other.to_bytes(exclude=["strings"])
-               )
+            self.shape == other.shape
+            and self.key2row == other.key2row
+            and self.to_bytes(exclude=["strings"]) == other.to_bytes(exclude=["strings"])
+        )

    def resize(self, shape, inplace=False):
        """Resize the underlying vectors array. If inplace=True, the memory
@ -504,11 +514,12 @@ cdef class Vectors:
            # vectors e.g. (10000, 300)
            # sims    e.g. (1024, 10000)
            sims = xp.dot(batch, vectors.T)
-            best_rows[i:i+batch_size] = xp.argpartition(sims, -n, axis=1)[:,-n:]
-            scores[i:i+batch_size] = xp.partition(sims, -n, axis=1)[:,-n:]
+            best_rows[i:i+batch_size] = xp.argpartition(sims, -n, axis=1)[:, -n:]
+            scores[i:i+batch_size] = xp.partition(sims, -n, axis=1)[:, -n:]

            if sort and n >= 2:
-                sorted_index = xp.arange(scores.shape[0])[:,None][i:i+batch_size],xp.argsort(scores[i:i+batch_size], axis=1)[:,::-1]
+                sorted_index = xp.arange(scores.shape[0])[:, None][i:i+batch_size], \
+                    xp.argsort(scores[i:i+batch_size], axis=1)[:, ::-1]
                scores[i:i+batch_size] = scores[sorted_index]
                best_rows[i:i+batch_size] = best_rows[sorted_index]

@ -522,8 +533,12 @@ cdef class Vectors:

        numpy_rows = get_current_ops().to_numpy(best_rows)
        keys = xp.asarray(
-            [[row2key[row] for row in numpy_rows[i] if row in row2key]
-                    for i in range(len(queries)) ], dtype="uint64")
+            [
+                [row2key[row] for row in numpy_rows[i] if row in row2key]
+                for i in range(len(queries))
+            ],
+            dtype="uint64"
+        )
        return (keys, best_rows, scores)

    def to_ops(self, ops: Ops):
@ -543,6 +558,7 @@ cdef class Vectors:
                "hash_seed": self.hash_seed,
                "bow": self.bow,
                "eow": self.eow,
+                "attr": self.attr,
            }

    def _set_cfg(self, cfg):
@ -553,6 +569,7 @@ cdef class Vectors:
        self.hash_seed = cfg.get("hash_seed", 0)
        self.bow = cfg.get("bow", "<")
        self.eow = cfg.get("eow", ">")
+        self.attr = cfg.get("attr", ORTH)

    def to_disk(self, path, *, exclude=tuple()):
        """Save the current state to a directory.
@ -564,9 +581,9 @@ cdef class Vectors:
        """
        xp = get_array_module(self.data)
        if xp is numpy:
-            save_array = lambda arr, file_: xp.save(file_, arr, allow_pickle=False)
+            save_array = lambda arr, file_: xp.save(file_, arr, allow_pickle=False)  # no-cython-lint
        else:
-            save_array = lambda arr, file_: xp.save(file_, arr)
+            save_array = lambda arr, file_: xp.save(file_, arr)  # no-cython-lint

        def save_vectors(path):
            # the source of numpy.save indicates that the file object is closed after use.
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -1,6 +1,4 @@
 # cython: profile=True
-from libc.string cimport memcpy
-
 import functools

 import numpy
@ -19,7 +17,6 @@ from .errors import Errors
 from .lang.lex_attrs import LEX_ATTRS, get_lang, is_stop
 from .lang.norm_exceptions import BASE_NORMS
 from .lookups import Lookups
-from .util import registry
 from .vectors import Mode as VectorsMode
 from .vectors import Vectors

@ -50,8 +47,15 @@ cdef class Vocab:

    DOCS: https://spacy.io/api/vocab
    """
-    def __init__(self, lex_attr_getters=None, strings=None, lookups=None,
-            oov_prob=-20., writing_system=None, get_noun_chunks=None):
+    def __init__(
+        self,
+        lex_attr_getters=None,
+        strings=None,
+        lookups=None,
+        oov_prob=-20.,
+        writing_system=None,
+        get_noun_chunks=None
+    ):
        """Create the vocabulary.

        lex_attr_getters (dict): A dictionary mapping attribute IDs to
@ -150,7 +154,6 @@ cdef class Vocab:
        cdef LexemeC* lex
        cdef hash_t key = self.strings[string]
        lex = <LexemeC*>self._by_orth.get(key)
-        cdef size_t addr
        if lex != NULL:
            assert lex.orth in self.strings
            if lex.orth != key:
@ -352,8 +355,13 @@ cdef class Vocab:
            self[orth]
        # Make prob negative so it sorts by rank ascending
        # (key2row contains the rank)
-        priority = [(-lex.prob, self.vectors.key2row[lex.orth], lex.orth)
-                    for lex in self if lex.orth in self.vectors.key2row]
+        priority = []
+        cdef Lexeme lex
+        cdef attr_t value
+        for lex in self:
+            value = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
+            if value in self.vectors.key2row:
+                priority.append((-lex.prob, self.vectors.key2row[value], value))
        priority.sort()
        indices = xp.asarray([i for (prob, i, key) in priority], dtype="uint64")
        keys = xp.asarray([key for (prob, i, key) in priority], dtype="uint64")
@ -386,8 +394,10 @@ cdef class Vocab:
        """
        if isinstance(orth, str):
            orth = self.strings.add(orth)
-        if self.has_vector(orth):
-            return self.vectors[orth]
+        cdef Lexeme lex = self[orth]
+        key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
+        if self.has_vector(key):
+            return self.vectors[key]
        xp = get_array_module(self.vectors.data)
        vectors = xp.zeros((self.vectors_length,), dtype="f")
        return vectors
@ -403,15 +413,16 @@ cdef class Vocab:
        """
        if isinstance(orth, str):
            orth = self.strings.add(orth)
-        if self.vectors.is_full and orth not in self.vectors:
+        cdef Lexeme lex = self[orth]
+        key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
+        if self.vectors.is_full and key not in self.vectors:
            new_rows = max(100, int(self.vectors.shape[0]*1.3))
            if self.vectors.shape[1] == 0:
                width = vector.size
            else:
                width = self.vectors.shape[1]
            self.vectors.resize((new_rows, width))
-        lex = self[orth]  # Add word to vocab if necessary
-        row = self.vectors.add(orth, vector=vector)
+        row = self.vectors.add(key, vector=vector)
        if row >= 0:
            lex.rank = row

@ -426,7 +437,9 @@ cdef class Vocab:
        """
        if isinstance(orth, str):
            orth = self.strings.add(orth)
-        return orth in self.vectors
+        cdef Lexeme lex = self[orth]
+        key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
+        return key in self.vectors

    property lookups:
        def __get__(self):
@ -440,7 +453,6 @@ cdef class Vocab:
                    self.lookups.get_table("lexeme_norm"),
                )

-
    def to_disk(self, path, *, exclude=tuple()):
        """Save the current state to a directory.

@ -453,7 +465,6 @@ cdef class Vocab:
        path = util.ensure_path(path)
        if not path.exists():
            path.mkdir()
-        setters = ["strings", "vectors"]
        if "strings" not in exclude:
            self.strings.to_disk(path / "strings.json")
        if "vectors" not in exclude:
@ -472,7 +483,6 @@ cdef class Vocab:
        DOCS: https://spacy.io/api/vocab#to_disk
        """
        path = util.ensure_path(path)
-        getters = ["strings", "vectors"]
        if "strings" not in exclude:
            self.strings.from_disk(path / "strings.json")  # TODO: add exclude?
        if "vectors" not in exclude:
--- a/website/docs/api/architectures.mdx
+++ b/website/docs/api/architectures.mdx
@ -303,7 +303,7 @@ mapped to a zero vector. See the documentation on
 | `nM`        | The width of the static vectors. ~~Optional[int]~~                                                                                                                                                                      |
 | `dropout`   | Optional dropout rate. If set, it's applied per dimension over the whole batch. Defaults to `None`. ~~Optional[float]~~                                                                                                 |
 | `init_W`    | The [initialization function](https://thinc.ai/docs/api-initializers). Defaults to [`glorot_uniform_init`](https://thinc.ai/docs/api-initializers#glorot_uniform_init). ~~Callable[[Ops, Tuple[int, ...]]], FloatsXd]~~ |
-| `key_attr`  | Defaults to `"ORTH"`. ~~str~~                                                                                                                                                                                           |
+| `key_attr`  | This setting is ignored in spaCy v3.6+. To set a custom key attribute for vectors, configure it through [`Vectors`](/api/vectors) or [`spacy init vectors`](/api/cli#init-vectors). Defaults to `"ORTH"`. ~~str~~       |
 | **CREATES** | The model using the architecture. ~~Model[List[Doc], Ragged]~~                                                                                                                                                          |

 ### spacy.FeatureExtractor.v1 {id="FeatureExtractor"}
--- a/website/docs/api/language.mdx
+++ b/website/docs/api/language.mdx
@ -876,7 +876,7 @@ token-to-vector embedding component like [`Tok2Vec`](/api/tok2vec) or
 training a pipeline with components sourced from an existing pipeline: if
 multiple components (e.g. tagger, parser, NER) listen to the same
 token-to-vector component, but some of them are frozen and not updated, their
-performance may degrade significally as the token-to-vector component is updated
+performance may degrade significantly as the token-to-vector component is updated
 with new data. To prevent this, listeners can be replaced with a standalone
 token-to-vector layer that is owned by the component and doesn't change if the
 component isn't updated.
--- a/website/docs/api/spanfinder.mdx
+++ b/website/docs/api/spanfinder.mdx
@ -60,7 +60,7 @@ architectures and their arguments and hyperparameters.
 | `model`      | A model instance that is given a list of documents and predicts a probability for each token. ~~Model[List[Doc], Floats2d]~~                                                                                           |
 | `spans_key`  | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
 | `threshold`  | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~                                                                                                                                    |
-| `max_length` | Maximum length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~                                                                                                                   |
+| `max_length` | Maximum length of the produced spans, defaults to `25`. ~~Optional[int]~~                                                                                                                                              |
 | `min_length` | Minimum length of the produced spans, defaults to `None` meaning shortest span length is 1. ~~Optional[int]~~                                                                                                          |
 | `scorer`     | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~                                                      |

--- a/website/docs/api/vectors.mdx
+++ b/website/docs/api/vectors.mdx
@ -59,6 +59,7 @@ modified later.
 | `hash_seed` <Tag variant="new">3.2</Tag>  | The floret hash seed (default: `0`). ~~int~~                                                                                                                                           |
 | `bow` <Tag variant="new">3.2</Tag>        | The floret BOW string (default: `"<"`). ~~str~~                                                                                                                                        |
 | `eow` <Tag variant="new">3.2</Tag>        | The floret EOW string (default: `">"`). ~~str~~                                                                                                                                        |
+| `attr` <Tag variant="new">3.6</Tag>       | The token attribute for the vector keys (default: `"ORTH"`). ~~Union[int, str]~~                                                                                                       |

 ## Vectors.\_\_getitem\_\_ {id="getitem",tag="method"}

@ -452,8 +453,9 @@ Load state from a binary string.

 ## Attributes {id="attributes"}

-| Name      | Description                                                                                                                                                          |
-| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `data`    | Stored vectors data. `numpy` is used for CPU vectors, `cupy` for GPU vectors. ~~Union[numpy.ndarray[ndim=1, dtype=float32], cupy.ndarray[ndim=1, dtype=float32]]~~   |
-| `key2row` | Dictionary mapping word hashes to rows in the `Vectors.data` table. ~~Dict[int, int]~~                                                                               |
-| `keys`    | Array keeping the keys in order, such that `keys[vectors.key2row[key]] == key`. ~~Union[numpy.ndarray[ndim=1, dtype=float32], cupy.ndarray[ndim=1, dtype=float32]]~~ |
+| Name                                | Description                                                                                                                                                          |
+| ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `data`                              | Stored vectors data. `numpy` is used for CPU vectors, `cupy` for GPU vectors. ~~Union[numpy.ndarray[ndim=1, dtype=float32], cupy.ndarray[ndim=1, dtype=float32]]~~   |
+| `key2row`                           | Dictionary mapping word hashes to rows in the `Vectors.data` table. ~~Dict[int, int]~~                                                                               |
+| `keys`                              | Array keeping the keys in order, such that `keys[vectors.key2row[key]] == key`. ~~Union[numpy.ndarray[ndim=1, dtype=float32], cupy.ndarray[ndim=1, dtype=float32]]~~ |
+| `attr` <Tag variant="new">3.6</Tag> | The token attribute for the vector keys. ~~int~~                                                                                                                     |
--- a/website/docs/usage/linguistic-features.mdx
+++ b/website/docs/usage/linguistic-features.mdx
@ -113,7 +113,7 @@ print(doc[2].morph)  # 'Case=Nom|Person=2|PronType=Prs'
 print(doc[2].pos_)  # 'PRON'
 ```

-## Lemmatization {id="lemmatization",model="lemmatizer",version="3"}
+## Lemmatization {id="lemmatization",version="3"}

 spaCy provides two pipeline components for lemmatization:

@ -170,7 +170,7 @@ nlp = spacy.blank("sv")
 nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
 ```

-### Rule-based lemmatizer {id="lemmatizer-rule"}
+### Rule-based lemmatizer {id="lemmatizer-rule",model="morphologizer"}

 When training pipelines that include a component that assigns part-of-speech
 tags (a morphologizer or a tagger with a [POS mapping](#mappings-exceptions)), a
@ -194,7 +194,7 @@ information, without consulting the context of the token. The rule-based
 lemmatizer also accepts list-based exception files. For English, these are
 acquired from [WordNet](https://wordnet.princeton.edu/).

-### Trainable lemmatizer
+### Trainable lemmatizer {id="lemmatizer-train",model="trainable_lemmatizer"}

 The [`EditTreeLemmatizer`](/api/edittreelemmatizer) can learn form-to-lemma
 transformations from a training corpus that includes lemma annotations. This
--- a/website/docs/usage/training.mdx
+++ b/website/docs/usage/training.mdx
@ -11,7 +11,6 @@ menu:
  - ['Custom Functions', 'custom-functions']
  - ['Initialization', 'initialization']
  - ['Data Utilities', 'data']
-  - ['Parallel Training', 'parallel-training']
  - ['Internal API', 'api']
 ---

@ -1565,77 +1564,6 @@ token-based annotations like the dependency parse or entity labels, you'll need
 to take care to adjust the `Example` object so its annotations match and remain
 valid.

-## Parallel & distributed training with Ray {id="parallel-training"}
-
-> #### Installation
->
-> ```bash
-> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
-> # Check that the CLI is registered
-> $ python -m spacy ray --help
-> ```
-
-[Ray](https://ray.io/) is a fast and simple framework for building and running
-**distributed applications**. You can use Ray to train spaCy on one or more
-remote machines, potentially speeding up your training process. Parallel
-training won't always be faster though – it depends on your batch size, models,
-and hardware.
-
-<Infobox variant="warning">
-
-To use Ray with spaCy, you need the
-[`spacy-ray`](https://github.com/explosion/spacy-ray) package installed.
-Installing the package will automatically add the `ray` command to the spaCy
-CLI.
-
-</Infobox>
-
-The [`spacy ray train`](/api/cli#ray-train) command follows the same API as
-[`spacy train`](/api/cli#train), with a few extra options to configure the Ray
-setup. You can optionally set the `--address` option to point to your Ray
-cluster. If it's not set, Ray will run locally.
-
-```bash
-python -m spacy ray train config.cfg --n-workers 2
-```
-
-<Project id="integrations/ray">
-
-Get started with parallel training using our project template. It trains a
-simple model on a Universal Dependencies Treebank and lets you parallelize the
-training with Ray.
-
-</Project>
-
-### How parallel training works {id="parallel-training-details"}
-
-Each worker receives a shard of the **data** and builds a copy of the **model
-and optimizer** from the [`config.cfg`](#config). It also has a communication
-channel to **pass gradients and parameters** to the other workers. Additionally,
-each worker is given ownership of a subset of the parameter arrays. Every
-parameter array is owned by exactly one worker, and the workers are given a
-mapping so they know which worker owns which parameter.
-
-![Illustration of setup](/images/spacy-ray.svg)
-
-As training proceeds, every worker will be computing gradients for **all** of
-the model parameters. When they compute gradients for parameters they don't own,
-they'll **send them to the worker** that does own that parameter, along with a
-version identifier so that the owner can decide whether to discard the gradient.
-Workers use the gradients they receive and the ones they compute locally to
-update the parameters they own, and then broadcast the updated array and a new
-version ID to the other workers.
-
-This training procedure is **asynchronous** and **non-blocking**. Workers always
-push their gradient increments and parameter updates, they do not have to pull
-them and block on the result, so the transfers can happen in the background,
-overlapped with the actual training work. The workers also do not have to stop
-and wait for each other ("synchronize") at the start of each batch. This is very
-useful for spaCy, because spaCy is often trained on long documents, which means
-**batches can vary in size** significantly. Uneven workloads make synchronous
-gradient descent inefficient, because if one batch is slow, all of the other
-workers are stuck waiting for it to complete before they can continue.
-
 ## Internal training API {id="api"}

 <Infobox variant="danger">
--- a/website/docs/usage/v3-6.mdx
+++ b/website/docs/usage/v3-6.mdx
@ -0,0 +1,143 @@
+---
+title: What's New in v3.6
+teaser: New features and how to upgrade
+menu:
+  - ['New Features', 'features']
+  - ['Upgrading Notes', 'upgrading']
+---
+
+## New features {id="features",hidden="true"}
+
+spaCy v3.6 adds the new [`SpanFinder`](/api/spanfinder) component to the core
+spaCy library and new trained pipelines for Slovenian.
+
+### SpanFinder {id="spanfinder"}
+
+The [`SpanFinder`](/api/spanfinder) component identifies potentially
+overlapping, unlabeled spans by identifying span start and end tokens. It is
+intended for use in combination with a component like
+[`SpanCategorizer`](/api/spancategorizer) that may further filter or label the
+spans. See our
+[Spancat blog post](https://explosion.ai/blog/spancat#span-finder) for a more
+detailed introduction to the span finder.
+
+To train a pipeline with `span_finder` + `spancat`, remember to add
+`span_finder` (and its `tok2vec` or `transformer` if required) to
+`[training.annotating_components]` so that the `spancat` component can be
+trained directly from its predictions:
+
+```ini
+[nlp]
+pipeline = ["tok2vec","span_finder","spancat"]
+
+[training]
+annotating_components = ["tok2vec","span_finder"]
+```
+
+In practice it can be helpful to initially train the `span_finder` separately
+before [sourcing](/usage/processing-pipelines#sourced-components) it (along with
+its `tok2vec`) into the `spancat` pipeline for further training. Otherwise the
+memory usage can spike for `spancat` in the first few training steps if the
+`span_finder` makes a large number of predictions.
+
+### Additional features and improvements {id="additional-features-and-improvements"}
+
+- Language updates:
+  - Add initial support for Malay.
+  - Update Latin defaults to support noun chunks, update lexical/tokenizer
+    settings and add example sentences.
+- Support `spancat_singlelabel` in `spacy debug data` CLI.
+- Add `doc.spans` rendering to `spacy evaluate` CLI displaCy output.
+- Support custom token/lexeme attribute for vectors.
+- Add option to return scores separately keyed by component name with
+  `spacy evaluate --per-component`, `Language.evaluate(per_component=True)` and
+  `Scorer.score(per_component=True)`. This is useful when the pipeline contains
+  more than one of the same component like `textcat` that may have overlapping
+  scores keys.
+- Typing updates for `PhraseMatcher` and `SpanGroup`.
+
+## Trained pipelines {id="pipelines"}
+
+### New trained pipelines {id="new-pipelines"}
+
+v3.6 introduces new pipelines for Slovenian, which use the trainable lemmatizer
+and [floret vectors](https://github.com/explosion/floret).
+
+| Package                                           | UPOS | Parser LAS | NER F |
+| ------------------------------------------------- | ---: | ---------: | ----: |
+| [`sl_core_news_sm`](/models/sl#sl_core_news_sm)   | 96.9 |       82.1 |  62.9 |
+| [`sl_core_news_md`](/models/sl#sl_core_news_md)   | 97.6 |       84.3 |  73.5 |
+| [`sl_core_news_lg`](/models/sl#sl_core_news_lg)   | 97.7 |       84.3 |  79.0 |
+| [`sl_core_news_trf`](/models/sl#sl_core_news_trf) | 99.0 |       91.7 |  90.0 |
+
+### Pipeline updates {id="pipeline-updates"}
+
+The English pipelines have been updated to improve handling of contractions with
+various apostrophes and to lemmatize "get" as a passive auxiliary.
+
+The Danish pipeline `da_core_news_trf` has been updated to use
+[`vesteinn/DanskBERT`](https://huggingface.co/vesteinn/DanskBERT) with
+performance improvements across the board.
+
+## Notes about upgrading from v3.5 {id="upgrading"}
+
+### SpanGroup spans are now required to be from the same doc {id="spangroup-spans"}
+
+When initializing a `SpanGroup`, there is a new check to verify that all added
+spans refer to the current doc. Without this check, it was possible to run into
+string store or other errors.
+
+One place this may crop up is when creating `Example` objects for training with
+custom spans:
+
+```diff
+     doc = Doc(nlp.vocab, words=tokens)  # predicted doc
+     example = Example.from_dict(doc, {"ner": iob_tags})
+     # use the reference doc when creating reference spans
+-    span = Span(doc, 0, 5, "ORG")
+    span = Span(example.reference, 0, 5, "ORG")
+     example.reference.spans[spans_key] = [span]
+```
+
+### Pipeline package version compatibility {id="version-compat"}
+
+> #### Using legacy implementations
+>
+> In spaCy v3, you'll still be able to load and reference legacy implementations
+> via [`spacy-legacy`](https://github.com/explosion/spacy-legacy), even if the
+> components or architectures change and newer versions are available in the
+> core library.
+
+When you're loading a pipeline package trained with an earlier version of spaCy
+v3, you will see a warning telling you that the pipeline may be incompatible.
+This doesn't necessarily have to be true, but we recommend running your
+pipelines against your test suite or evaluation data to make sure there are no
+unexpected results.
+
+If you're using one of the [trained pipelines](/models) we provide, you should
+run [`spacy download`](/api/cli#download) to update to the latest version. To
+see an overview of all installed packages and their compatibility, you can run
+[`spacy validate`](/api/cli#validate).
+
+If you've trained your own custom pipeline and you've confirmed that it's still
+working as expected, you can update the spaCy version requirements in the
+[`meta.json`](/api/data-formats#meta):
+
+```diff
+- "spacy_version": ">=3.5.0,<3.6.0",
+ "spacy_version": ">=3.5.0,<3.7.0",
+```
+
+### Updating v3.5 configs
+
+To update a config from spaCy v3.5 with the new v3.6 settings, run
+[`init fill-config`](/api/cli#init-fill-config):
+
+```cli
+$ python -m spacy init fill-config config-v3.5.cfg config-v3.6.cfg
+```
+
+In many cases ([`spacy train`](/api/cli#train),
+[`spacy.load`](/api/top-level#spacy.load)), the new defaults will be filled in
+automatically, but you'll need to fill in the new settings to run
+[`debug config`](/api/cli#debug) and [`debug data`](/api/cli#debug-data).
--- a/website/meta/languages.json
+++ b/website/meta/languages.json
@ -222,7 +222,9 @@
        },
        {
            "code": "la",
-            "name": "Latin"
+            "name": "Latin",
+	    "example": "In principio creavit Deus caelum et terram.",
+	    "has_examples": true
        },
        {
            "code": "lb",
@ -339,7 +341,10 @@
        },
        {
            "code": "sl",
-            "name": "Slovenian"
+            "name": "Slovenian",
+	    "example": "France Prešeren je umrl 8. februarja 1849 v Kranju",
+	    "has_examples": true,
+            "models": ["sl_core_news_sm", "sl_core_news_md", "sl_core_news_lg", "sl_core_news_trf"]
        },
        {
            "code": "sq",
--- a/website/meta/sidebars.json
+++ b/website/meta/sidebars.json
@ -14,7 +14,8 @@
                    { "text": "New in v3.2", "url": "/usage/v3-2" },
                    { "text": "New in v3.3", "url": "/usage/v3-3" },
                    { "text": "New in v3.4", "url": "/usage/v3-4" },
-                    { "text": "New in v3.5", "url": "/usage/v3-5" }
+                    { "text": "New in v3.5", "url": "/usage/v3-5" },
+                    { "text": "New in v3.6", "url": "/usage/v3-6" }
                ]
            },
            {
--- a/website/meta/site.json
+++ b/website/meta/site.json
@ -27,7 +27,7 @@
        "indexName": "spacy"
    },
    "binderUrl": "explosion/spacy-io-binder",
-    "binderVersion": "3.5",
+    "binderVersion": "3.6",
    "sections": [
        { "id": "usage", "title": "Usage Documentation", "theme": "blue" },
        { "id": "models", "title": "Models Documentation", "theme": "blue" },
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@ -4376,7 +4376,7 @@
            "code_example": [
                "import spacy",
                "",
-                "nlp = spacy.load(\"en_core_web_sm\", disable=[\"ner\"])",
+                "nlp = spacy.load(\"en_core_web_sm\", exclude=[\"ner\"])",
                "nlp.add_pipe(\"span_marker\", config={\"model\": \"tomaarsen/span-marker-roberta-large-ontonotes5\"})",
                "",
                "text = \"\"\"Cleopatra VII, also known as Cleopatra the Great, was the last active ruler of the \\",
--- a/website/src/components/code.js
+++ b/website/src/components/code.js
@ -13,6 +13,8 @@ import 'prismjs/components/prism-json.min.js'
 import 'prismjs/components/prism-markdown.min.js'
 import 'prismjs/components/prism-python.min.js'
 import 'prismjs/components/prism-yaml.min.js'
+import 'prismjs/components/prism-docker.min.js'
+import 'prismjs/components/prism-r.min.js'

 import { isString } from './util'
 import Link, { OptionalLink } from './link'
@ -172,7 +174,7 @@ const convertLine = ({ line, prompt, lang }) => {
        return handlePromot({ lineFlat, prompt })
    }

-    return lang === 'none' || !lineFlat ? (
+    return lang === 'none' || !lineFlat || !(lang in Prism.languages) ? (
        lineFlat
    ) : (
        <span
--- a/website/src/templates/index.js
+++ b/website/src/templates/index.js
@ -58,8 +58,8 @@ const AlertSpace = ({ nightly, legacy }) => {
 }

 const navAlert = (
-    <Link to="/usage/v3-5" noLinkLayout>
-        <strong>💥 Out now:</strong> spaCy v3.5
+    <Link to="/usage/v3-6" noLinkLayout>
+        <strong>💥 Out now:</strong> spaCy v3.6
    </Link>
 )