diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 8822e0722..987298b7b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -45,6 +45,12 @@ jobs:
         run: |
           python -m pip install flake8==5.0.4
           python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
+      - name: cython-lint
+        run: |
+          python -m pip install cython-lint -c requirements.txt
+          # E501: line too log, W291: trailing whitespace, E266: too many leading '#' for block comment
+          cython-lint spacy --ignore E501,W291,E266
+
   tests:
     name: Test
     needs: Validate
diff --git a/Makefile b/Makefile
index 24a9bcee4..c8f68be7f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 SHELL := /bin/bash
 
 ifndef SPACY_EXTRAS
-override SPACY_EXTRAS = spacy-lookups-data==1.0.2 jieba spacy-pkuseg==0.0.28 sudachipy sudachidict_core pymorphy2
+override SPACY_EXTRAS = spacy-lookups-data==1.0.3
 endif
 
 ifndef PYVER
diff --git a/requirements.txt b/requirements.txt
index 4342af047..0b8d9a5de 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -36,4 +36,5 @@ types-setuptools>=57.0.0
 types-requests
 types-setuptools>=57.0.0
 black==22.3.0
+cython-lint>=0.15.0; python_version >= "3.7"
 isort>=5.0,<6.0
diff --git a/spacy/cli/init_pipeline.py b/spacy/cli/init_pipeline.py
index b4b013832..4b4fe93af 100644
--- a/spacy/cli/init_pipeline.py
+++ b/spacy/cli/init_pipeline.py
@@ -32,6 +32,7 @@ def init_vectors_cli(
     mode: str = Opt("default", "--mode", "-m", help="Vectors mode: default or floret"),
     verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
     jsonl_loc: Optional[Path] = Opt(None, "--lexemes-jsonl", "-j", help="Location of JSONL-formatted attributes file", hidden=True),
+    attr: str = Opt("ORTH", "--attr", "-a", help="Optional token attribute to use for vectors, e.g. LOWER or NORM"),
     # fmt: on
 ):
     """Convert word vectors for use with spaCy. Will export an nlp object that
@@ -53,6 +54,7 @@ def init_vectors_cli(
         truncate=truncate,
         prune=prune,
         mode=mode,
+        attr=attr,
     )
     msg.good(f"Successfully converted {len(nlp.vocab.vectors)} vectors")
     nlp.to_disk(output_dir)
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index c4a6ad035..1c1650cd1 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -128,7 +128,7 @@ grad_factor = 1.0
 {% if "span_finder" in components -%}
 [components.span_finder]
 factory = "span_finder"
-max_length = null
+max_length = 25
 min_length = null
 scorer = {"@scorers":"spacy.span_finder_scorer.v1"}
 spans_key = "sc"
@@ -415,7 +415,7 @@ width = ${components.tok2vec.model.encode.width}
 {% if "span_finder" in components %}
 [components.span_finder]
 factory = "span_finder"
-max_length = null
+max_length = 25
 min_length = null
 scorer = {"@scorers":"spacy.span_finder_scorer.v1"}
 spans_key = "sc"
diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index 86869e3b8..47407bcb7 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -1,4 +1,3 @@
-import itertools
 import uuid
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -218,7 +217,7 @@ class SpanRenderer:
                     + (self.offset_step * (len(entities) - 1))
                 )
                 markup += self.span_template.format(
-                    text=token["text"],
+                    text=escape_html(token["text"]),
                     span_slices=slices,
                     span_starts=starts,
                     total_height=total_height,
diff --git a/spacy/errors.py b/spacy/errors.py
index faae74781..164110e3f 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -208,6 +208,9 @@ class Warnings(metaclass=ErrorsWithCodes):
     W123 = ("Argument `enable` with value {enable} does not contain all values specified in the config option "
             "`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.")
     W124 = ("{host}:{port} is already in use, using the nearest available port {serve_port} as an alternative.")
+    W125 = ("The StaticVectors key_attr is no longer used. To set a custom "
+            "key attribute for vectors, configure it through Vectors(attr=) or "
+            "'spacy init vectors --attr'")
 
     # v4 warning strings
     W400 = ("`use_upper=False` is ignored, the upper layer is always enabled")
diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx
index 80fc8c2c5..22a67ed75 100644
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@@ -12,8 +12,9 @@ from .candidate import Candidate
 
 
 cdef class KnowledgeBase:
-    """A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
-    to support entity linking of named entities to real-world concepts.
+    """A `KnowledgeBase` instance stores unique identifiers for entities and
+    their textual aliases, to support entity linking of named entities to
+    real-world concepts.
     This is an abstract class and requires its operations to be implemented.
 
     DOCS: https://spacy.io/api/kb
@@ -31,7 +32,9 @@ cdef class KnowledgeBase:
         self.entity_vector_length = entity_vector_length
         self.mem = Pool()
 
-    def get_candidates_batch(self, mentions: SpanGroup) -> Iterable[Iterable[Candidate]]:
+    def get_candidates_batch(
+            self, mentions: SpanGroup
+    ) -> Iterable[Iterable[Candidate]]:
         """
         Return candidate entities for a specified Span mention. Each candidate defines at least the entity and the
         entity's embedding vector. Depending on the KB implementation, further properties - such as the prior
@@ -52,7 +55,9 @@ cdef class KnowledgeBase:
         RETURNS (Iterable[Candidate]): Identified candidates.
         """
         raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="get_candidates", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="get_candidates", name=self.__name__
+            )
         )
 
     def get_vectors(self, entities: Iterable[str]) -> Iterable[Iterable[float]]:
@@ -70,7 +75,9 @@ cdef class KnowledgeBase:
         RETURNS (Iterable[float]): Vector for specified entity.
         """
         raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="get_vector", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="get_vector", name=self.__name__
+            )
         )
 
     def to_bytes(self, **kwargs) -> bytes:
@@ -78,7 +85,9 @@ cdef class KnowledgeBase:
         RETURNS (bytes): Current state as binary string.
         """
         raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="to_bytes", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="to_bytes", name=self.__name__
+            )
         )
 
     def from_bytes(self, bytes_data: bytes, *, exclude: Tuple[str] = tuple()):
@@ -87,27 +96,37 @@ cdef class KnowledgeBase:
         exclude (Tuple[str]): Properties to exclude when restoring KB.
         """
         raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="from_bytes", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="from_bytes", name=self.__name__
+            )
         )
 
-    def to_disk(self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()) -> None:
+    def to_disk(
+            self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()
+    ) -> None:
         """
         Write KnowledgeBase content to disk.
         path (Union[str, Path]): Target file path.
         exclude (Iterable[str]): List of components to exclude.
         """
         raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="to_disk", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="to_disk", name=self.__name__
+            )
         )
 
-    def from_disk(self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()) -> None:
+    def from_disk(
+            self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()
+    ) -> None:
         """
         Load KnowledgeBase content from disk.
         path (Union[str, Path]): Target file path.
         exclude (Iterable[str]): List of components to exclude.
         """
         raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="from_disk", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="from_disk", name=self.__name__
+            )
         )
 
     @property
diff --git a/spacy/kb/kb_in_memory.pxd b/spacy/kb/kb_in_memory.pxd
index 08ec6b2a3..e0e33301a 100644
--- a/spacy/kb/kb_in_memory.pxd
+++ b/spacy/kb/kb_in_memory.pxd
@@ -55,23 +55,28 @@ cdef class InMemoryLookupKB(KnowledgeBase):
     # optional data, we can let users configure a DB as the backend for this.
     cdef object _features_table
 
-
     cdef inline int64_t c_add_vector(self, vector[float] entity_vector) nogil:
         """Add an entity vector to the vectors table."""
         cdef int64_t new_index = self._vectors_table.size()
         self._vectors_table.push_back(entity_vector)
         return new_index
 
-
-    cdef inline int64_t c_add_entity(self, hash_t entity_hash, float freq,
-                                     int32_t vector_index, int feats_row) nogil:
+    cdef inline int64_t c_add_entity(
+        self,
+        hash_t entity_hash,
+        float freq,
+        int32_t vector_index,
+        int feats_row
+    ) nogil:
         """Add an entry to the vector of entries.
-        After calling this method, make sure to update also the _entry_index using the return value"""
+        After calling this method, make sure to update also the _entry_index
+        using the return value"""
         # This is what we'll map the entity hash key to. It's where the entry will sit
         # in the vector of entries, so we can get it later.
         cdef int64_t new_index = self._entries.size()
 
-        # Avoid struct initializer to enable nogil, cf https://github.com/cython/cython/issues/1642
+        # Avoid struct initializer to enable nogil, cf.
+        # https://github.com/cython/cython/issues/1642
         cdef KBEntryC entry
         entry.entity_hash = entity_hash
         entry.vector_index = vector_index
@@ -81,11 +86,17 @@ cdef class InMemoryLookupKB(KnowledgeBase):
         self._entries.push_back(entry)
         return new_index
 
-    cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs) nogil:
-        """Connect a mention to a list of potential entities with their prior probabilities .
-        After calling this method, make sure to update also the _alias_index using the return value"""
-        # This is what we'll map the alias hash key to. It's where the alias will be defined
-        # in the vector of aliases.
+    cdef inline int64_t c_add_aliases(
+        self,
+        hash_t alias_hash,
+        vector[int64_t] entry_indices,
+        vector[float] probs
+    ) nogil:
+        """Connect a mention to a list of potential entities with their prior
+        probabilities. After calling this method, make sure to update also the
+        _alias_index using the return value"""
+        # This is what we'll map the alias hash key to. It's where the alias will be
+        # defined in the vector of aliases.
         cdef int64_t new_index = self._aliases_table.size()
 
         # Avoid struct initializer to enable nogil
@@ -98,8 +109,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 
     cdef inline void _create_empty_vectors(self, hash_t dummy_hash) nogil:
         """
-        Initializing the vectors and making sure the first element of each vector is a dummy,
-        because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
+        Initializing the vectors and making sure the first element of each vector is a
+        dummy, because the PreshMap maps pointing to indices in these vectors can not
+        contain 0 as value.
         cf. https://github.com/explosion/preshed/issues/17
         """
         cdef int32_t dummy_value = 0
@@ -130,12 +142,18 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 cdef class Writer:
     cdef FILE* _fp
 
-    cdef int write_header(self, int64_t nr_entries, int64_t entity_vector_length) except -1
+    cdef int write_header(
+        self, int64_t nr_entries, int64_t entity_vector_length
+    ) except -1
     cdef int write_vector_element(self, float element) except -1
-    cdef int write_entry(self, hash_t entry_hash, float entry_freq, int32_t vector_index) except -1
+    cdef int write_entry(
+        self, hash_t entry_hash, float entry_freq, int32_t vector_index
+    ) except -1
 
     cdef int write_alias_length(self, int64_t alias_length) except -1
-    cdef int write_alias_header(self, hash_t alias_hash, int64_t candidate_length) except -1
+    cdef int write_alias_header(
+        self, hash_t alias_hash, int64_t candidate_length
+    ) except -1
     cdef int write_alias(self, int64_t entry_index, float prob) except -1
 
     cdef int _write(self, void* value, size_t size) except -1
@@ -143,12 +161,18 @@ cdef class Writer:
 cdef class Reader:
     cdef FILE* _fp
 
-    cdef int read_header(self, int64_t* nr_entries, int64_t* entity_vector_length) except -1
+    cdef int read_header(
+        self, int64_t* nr_entries, int64_t* entity_vector_length
+    ) except -1
     cdef int read_vector_element(self, float* element) except -1
-    cdef int read_entry(self, hash_t* entity_hash, float* freq, int32_t* vector_index) except -1
+    cdef int read_entry(
+        self, hash_t* entity_hash, float* freq, int32_t* vector_index
+    ) except -1
 
     cdef int read_alias_length(self, int64_t* alias_length) except -1
-    cdef int read_alias_header(self, hash_t* alias_hash, int64_t* candidate_length) except -1
+    cdef int read_alias_header(
+        self, hash_t* alias_hash, int64_t* candidate_length
+    ) except -1
     cdef int read_alias(self, int64_t* entry_index, float* prob) except -1
 
     cdef int _read(self, void* value, size_t size) except -1
diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx
index 54bb1d0e7..0cf1f7ec1 100644
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@@ -1,5 +1,5 @@
 # cython: infer_types=True, profile=True
-from typing import Any, Callable, Dict, Iterable, Union
+from typing import Any, Callable, Dict, Iterable
 
 import srsly
 
@@ -27,8 +27,9 @@ from .candidate import InMemoryCandidate
 
 
 cdef class InMemoryLookupKB(KnowledgeBase):
-    """An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual aliases,
-    to support entity linking of named entities to real-world concepts.
+    """An `InMemoryLookupKB` instance stores unique identifiers for entities
+    and their textual aliases, to support entity linking of named entities to
+    real-world concepts.
 
     DOCS: https://spacy.io/api/inmemorylookupkb
     """
@@ -71,7 +72,8 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 
     def add_entity(self, str entity, float freq, vector[float] entity_vector):
         """
-        Add an entity to the KB, optionally specifying its log probability based on corpus frequency
+        Add an entity to the KB, optionally specifying its log probability
+        based on corpus frequency.
         Return the hash of the entity ID/name at the end.
         """
         cdef hash_t entity_hash = self.vocab.strings.add(entity)
@@ -83,14 +85,20 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 
         # Raise an error if the provided entity vector is not of the correct length
         if len(entity_vector) != self.entity_vector_length:
-            raise ValueError(Errors.E141.format(found=len(entity_vector), required=self.entity_vector_length))
+            raise ValueError(
+                Errors.E141.format(
+                    found=len(entity_vector), required=self.entity_vector_length
+                )
+            )
 
         vector_index = self.c_add_vector(entity_vector=entity_vector)
 
-        new_index = self.c_add_entity(entity_hash=entity_hash,
-                                      freq=freq,
-                                      vector_index=vector_index,
-                                      feats_row=-1)  # Features table currently not implemented
+        new_index = self.c_add_entity(
+            entity_hash=entity_hash,
+            freq=freq,
+            vector_index=vector_index,
+            feats_row=-1
+        )  # Features table currently not implemented
         self._entry_index[entity_hash] = new_index
 
         return entity_hash
@@ -115,7 +123,12 @@ cdef class InMemoryLookupKB(KnowledgeBase):
             else:
                 entity_vector = vector_list[i]
                 if len(entity_vector) != self.entity_vector_length:
-                    raise ValueError(Errors.E141.format(found=len(entity_vector), required=self.entity_vector_length))
+                    raise ValueError(
+                        Errors.E141.format(
+                            found=len(entity_vector),
+                            required=self.entity_vector_length
+                        )
+                    )
 
                 entry.entity_hash = entity_hash
                 entry.freq = freq_list[i]
@@ -149,11 +162,15 @@ cdef class InMemoryLookupKB(KnowledgeBase):
         previous_alias_nr = self.get_size_aliases()
         # Throw an error if the length of entities and probabilities are not the same
         if not len(entities) == len(probabilities):
-            raise ValueError(Errors.E132.format(alias=alias,
-                                                entities_length=len(entities),
-                                                probabilities_length=len(probabilities)))
+            raise ValueError(
+                Errors.E132.format(
+                    alias=alias,
+                    entities_length=len(entities),
+                    probabilities_length=len(probabilities))
+            )
 
-        # Throw an error if the probabilities sum up to more than 1 (allow for some rounding errors)
+        # Throw an error if the probabilities sum up to more than 1 (allow for
+        # some rounding errors)
         prob_sum = sum(probabilities)
         if prob_sum > 1.00001:
             raise ValueError(Errors.E133.format(alias=alias, sum=prob_sum))
@@ -170,40 +187,47 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 
         for entity, prob in zip(entities, probabilities):
             entity_hash = self.vocab.strings[entity]
-            if not entity_hash in self._entry_index:
+            if entity_hash not in self._entry_index:
                 raise ValueError(Errors.E134.format(entity=entity))
 
             entry_index = <int64_t>self._entry_index.get(entity_hash)
             entry_indices.push_back(int(entry_index))
             probs.push_back(float(prob))
 
-        new_index = self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs)
+        new_index = self.c_add_aliases(
+            alias_hash=alias_hash, entry_indices=entry_indices, probs=probs
+        )
         self._alias_index[alias_hash] = new_index
 
         if previous_alias_nr + 1 != self.get_size_aliases():
             raise RuntimeError(Errors.E891.format(alias=alias))
         return alias_hash
 
-    def append_alias(self, str alias, str entity, float prior_prob, ignore_warnings=False):
+    def append_alias(
+        self, str alias, str entity, float prior_prob, ignore_warnings=False
+    ):
         """
-        For an alias already existing in the KB, extend its potential entities with one more.
+        For an alias already existing in the KB, extend its potential entities
+        with one more.
         Throw a warning if either the alias or the entity is unknown,
         or when the combination is already previously recorded.
         Throw an error if this entity+prior prob would exceed the sum of 1.
-        For efficiency, it's best to use the method `add_alias` as much as possible instead of this one.
+        For efficiency, it's best to use the method `add_alias` as much as
+        possible instead of this one.
         """
         # Check if the alias exists in the KB
         cdef hash_t alias_hash = self.vocab.strings[alias]
-        if not alias_hash in self._alias_index:
+        if alias_hash not in self._alias_index:
             raise ValueError(Errors.E176.format(alias=alias))
 
         # Check if the entity exists in the KB
         cdef hash_t entity_hash = self.vocab.strings[entity]
-        if not entity_hash in self._entry_index:
+        if entity_hash not in self._entry_index:
             raise ValueError(Errors.E134.format(entity=entity))
         entry_index = <int64_t>self._entry_index.get(entity_hash)
 
-        # Throw an error if the prior probabilities (including the new one) sum up to more than 1
+        # Throw an error if the prior probabilities (including the new one)
+        # sum up to more than 1
         alias_index = <int64_t>self._alias_index.get(alias_hash)
         alias_entry = self._aliases_table[alias_index]
         current_sum = sum([p for p in alias_entry.probs])
@@ -236,12 +260,13 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 
     def _get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
         """
-        Return candidate entities for an alias. Each candidate defines the entity, the original alias,
-        and the prior probability of that alias resolving to that entity.
+        Return candidate entities for an alias. Each candidate defines the
+        entity, the original alias, and the prior probability of that alias
+        resolving to that entity.
         If the alias is not known in the KB, and empty list is returned.
         """
         cdef hash_t alias_hash = self.vocab.strings[alias]
-        if not alias_hash in self._alias_index:
+        if alias_hash not in self._alias_index:
             return []
         alias_index = <int64_t>self._alias_index.get(alias_hash)
         alias_entry = self._aliases_table[alias_index]
@@ -270,8 +295,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
         return self._vectors_table[self._entries[entry_index].vector_index]
 
     def get_prior_prob(self, str entity, str alias):
-        """ Return the prior probability of a given alias being linked to a given entity,
-        or return 0.0 when this combination is not known in the knowledge base"""
+        """ Return the prior probability of a given alias being linked to a
+        given entity, or return 0.0 when this combination is not known in the
+        knowledge base."""
         cdef hash_t alias_hash = self.vocab.strings[alias]
         cdef hash_t entity_hash = self.vocab.strings[entity]
 
@@ -282,7 +308,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
         entry_index = self._entry_index[entity_hash]
 
         alias_entry = self._aliases_table[alias_index]
-        for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs):
+        for (entry_index, prior_prob) in zip(
+            alias_entry.entry_indices, alias_entry.probs
+        ):
             if self._entries[entry_index].entity_hash == entity_hash:
                 return prior_prob
 
@@ -295,13 +323,19 @@ cdef class InMemoryLookupKB(KnowledgeBase):
         """Serialize the current state to a binary string.
         """
         def serialize_header():
-            header = (self.get_size_entities(), self.get_size_aliases(), self.entity_vector_length)
+            header = (
+                self.get_size_entities(),
+                self.get_size_aliases(),
+                self.entity_vector_length
+            )
             return srsly.json_dumps(header)
 
         def serialize_entries():
             i = 1
             tuples = []
-            for entry_hash, entry_index in sorted(self._entry_index.items(), key=lambda x: x[1]):
+            for entry_hash, entry_index in sorted(
+                self._entry_index.items(), key=lambda x: x[1]
+            ):
                 entry = self._entries[entry_index]
                 assert entry.entity_hash == entry_hash
                 assert entry_index == i
@@ -314,7 +348,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
             headers = []
             indices_lists = []
             probs_lists = []
-            for alias_hash, alias_index in sorted(self._alias_index.items(), key=lambda x: x[1]):
+            for alias_hash, alias_index in sorted(
+                self._alias_index.items(), key=lambda x: x[1]
+            ):
                 alias = self._aliases_table[alias_index]
                 assert alias_index == i
                 candidate_length = len(alias.entry_indices)
@@ -372,7 +408,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
             indices = srsly.json_loads(all_data[1])
             probs = srsly.json_loads(all_data[2])
             for header, indices, probs in zip(headers, indices, probs):
-                alias_hash, candidate_length = header
+                alias_hash, _candidate_length = header
                 alias.entry_indices = indices
                 alias.probs = probs
                 self._aliases_table[i] = alias
@@ -421,10 +457,14 @@ cdef class InMemoryLookupKB(KnowledgeBase):
                 writer.write_vector_element(element)
             i = i+1
 
-        # dumping the entry records in the order in which they are in the _entries vector.
-        # index 0 is a dummy object not stored in the _entry_index and can be ignored.
+        # dumping the entry records in the order in which they are in the
+        # _entries vector.
+        # index 0 is a dummy object not stored in the _entry_index and can
+        # be ignored.
         i = 1
-        for entry_hash, entry_index in sorted(self._entry_index.items(), key=lambda x: x[1]):
+        for entry_hash, entry_index in sorted(
+            self._entry_index.items(), key=lambda x: x[1]
+        ):
             entry = self._entries[entry_index]
             assert entry.entity_hash == entry_hash
             assert entry_index == i
@@ -436,7 +476,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
         # dumping the aliases in the order in which they are in the _alias_index vector.
         # index 0 is a dummy object not stored in the _aliases_table and can be ignored.
         i = 1
-        for alias_hash, alias_index in sorted(self._alias_index.items(), key=lambda x: x[1]):
+        for alias_hash, alias_index in sorted(
+                self._alias_index.items(), key=lambda x: x[1]
+        ):
             alias = self._aliases_table[alias_index]
             assert alias_index == i
 
@@ -542,7 +584,8 @@ cdef class Writer:
     def __init__(self, path):
         assert isinstance(path, Path)
         content = bytes(path)
-        cdef bytes bytes_loc = content.encode('utf8') if type(content) == str else content
+        cdef bytes bytes_loc = content.encode('utf8') \
+            if type(content) == str else content
         self._fp = fopen(<char*>bytes_loc, 'wb')
         if not self._fp:
             raise IOError(Errors.E146.format(path=path))
@@ -552,14 +595,18 @@ cdef class Writer:
         cdef size_t status = fclose(self._fp)
         assert status == 0
 
-    cdef int write_header(self, int64_t nr_entries, int64_t entity_vector_length) except -1:
+    cdef int write_header(
+        self, int64_t nr_entries, int64_t entity_vector_length
+    ) except -1:
         self._write(&nr_entries, sizeof(nr_entries))
         self._write(&entity_vector_length, sizeof(entity_vector_length))
 
     cdef int write_vector_element(self, float element) except -1:
         self._write(&element, sizeof(element))
 
-    cdef int write_entry(self, hash_t entry_hash, float entry_freq, int32_t vector_index) except -1:
+    cdef int write_entry(
+        self, hash_t entry_hash, float entry_freq, int32_t vector_index
+    ) except -1:
         self._write(&entry_hash, sizeof(entry_hash))
         self._write(&entry_freq, sizeof(entry_freq))
         self._write(&vector_index, sizeof(vector_index))
@@ -568,7 +615,9 @@ cdef class Writer:
     cdef int write_alias_length(self, int64_t alias_length) except -1:
         self._write(&alias_length, sizeof(alias_length))
 
-    cdef int write_alias_header(self, hash_t alias_hash, int64_t candidate_length) except -1:
+    cdef int write_alias_header(
+        self, hash_t alias_hash, int64_t candidate_length
+    ) except -1:
         self._write(&alias_hash, sizeof(alias_hash))
         self._write(&candidate_length, sizeof(candidate_length))
 
@@ -584,16 +633,19 @@ cdef class Writer:
 cdef class Reader:
     def __init__(self, path):
         content = bytes(path)
-        cdef bytes bytes_loc = content.encode('utf8') if type(content) == str else content
+        cdef bytes bytes_loc = content.encode('utf8') \
+            if type(content) == str else content
         self._fp = fopen(<char*>bytes_loc, 'rb')
         if not self._fp:
             PyErr_SetFromErrno(IOError)
-        status = fseek(self._fp, 0, 0)  # this can be 0 if there is no header
+        fseek(self._fp, 0, 0)  # this can be 0 if there is no header
 
     def __dealloc__(self):
         fclose(self._fp)
 
-    cdef int read_header(self, int64_t* nr_entries, int64_t* entity_vector_length) except -1:
+    cdef int read_header(
+        self, int64_t* nr_entries, int64_t* entity_vector_length
+    ) except -1:
         status = self._read(nr_entries, sizeof(int64_t))
         if status < 1:
             if feof(self._fp):
@@ -613,7 +665,9 @@ cdef class Reader:
                 return 0  # end of file
             raise IOError(Errors.E145.format(param="vector element"))
 
-    cdef int read_entry(self, hash_t* entity_hash, float* freq, int32_t* vector_index) except -1:
+    cdef int read_entry(
+        self, hash_t* entity_hash, float* freq, int32_t* vector_index
+    ) except -1:
         status = self._read(entity_hash, sizeof(hash_t))
         if status < 1:
             if feof(self._fp):
@@ -644,7 +698,9 @@ cdef class Reader:
                 return 0  # end of file
             raise IOError(Errors.E145.format(param="alias length"))
 
-    cdef int read_alias_header(self, hash_t* alias_hash, int64_t* candidate_length) except -1:
+    cdef int read_alias_header(
+        self, hash_t* alias_hash, int64_t* candidate_length
+    ) except -1:
         status = self._read(alias_hash, sizeof(hash_t))
         if status < 1:
             if feof(self._fp):
diff --git a/spacy/language.py b/spacy/language.py
index 51a4a7f93..555f30db0 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -740,6 +740,11 @@ class Language:
                 )
             )
         pipe = source.get_pipe(source_name)
+        # There is no actual solution here. Either the component has the right
+        # name for the source pipeline or the component has the right name for
+        # the current pipeline. This prioritizes the current pipeline.
+        if hasattr(pipe, "name"):
+            pipe.name = name
         # Make sure the source config is interpolated so we don't end up with
         # orphaned variables in our final config
         source_config = source.config.interpolate()
@@ -817,6 +822,7 @@ class Language:
         pipe_index = self._get_pipe_index(before, after, first, last)
         self._pipe_meta[name] = self.get_factory_meta(factory_name)
         self._components.insert(pipe_index, (name, pipe_component))
+        self._link_components()
         return pipe_component
 
     def _get_pipe_index(
@@ -956,6 +962,7 @@ class Language:
         if old_name in self._config["initialize"]["components"]:
             init_cfg = self._config["initialize"]["components"].pop(old_name)
             self._config["initialize"]["components"][new_name] = init_cfg
+        self._link_components()
 
     def remove_pipe(self, name: str) -> Tuple[str, PipeCallable]:
         """Remove a component from the pipeline.
@@ -979,6 +986,7 @@ class Language:
         # Make sure the name is also removed from the set of disabled components
         if name in self.disabled:
             self._disabled.remove(name)
+        self._link_components()
         return removed
 
     def disable_pipe(self, name: str) -> None:
@@ -1823,8 +1831,16 @@ class Language:
         # The problem is we need to do it during deserialization...And the
         # components don't receive the pipeline then. So this does have to be
         # here :(
+        # First, fix up all the internal component names in case they have
+        # gotten out of sync due to sourcing components from different
+        # pipelines, since find_listeners uses proc2.name for the listener
+        # map.
+        for name, proc in self.pipeline:
+            if hasattr(proc, "name"):
+                proc.name = name
         for i, (name1, proc1) in enumerate(self.pipeline):
             if isinstance(proc1, ty.ListenedToComponent):
+                proc1.listener_map = {}
                 for name2, proc2 in self.pipeline[i + 1 :]:
                     proc1.find_listeners(proc2)
 
@@ -1934,7 +1950,6 @@ class Language:
         # Later we replace the component config with the raw config again.
         interpolated = filled.interpolate() if not filled.is_interpolated else filled
         pipeline = interpolated.get("components", {})
-        sourced = util.get_sourced_components(interpolated)
         # If components are loaded from a source (existing models), we cache
         # them here so they're only loaded once
         source_nlps = {}
@@ -1962,6 +1977,7 @@ class Language:
                         raw_config=raw_config,
                     )
                 else:
+                    assert "source" in pipe_cfg
                     # We need the sourced components to reference the same
                     # vocab without modifying the current vocab state **AND**
                     # we still want to load the source model vectors to perform
@@ -1981,6 +1997,10 @@ class Language:
                     source_name = pipe_cfg.get("component", pipe_name)
                     listeners_replaced = False
                     if "replace_listeners" in pipe_cfg:
+                        # Make sure that the listened-to component has the
+                        # state of the source pipeline listener map so that the
+                        # replace_listeners method below works as intended.
+                        source_nlps[model]._link_components()
                         for name, proc in source_nlps[model].pipeline:
                             if source_name in getattr(proc, "listening_components", []):
                                 source_nlps[model].replace_listeners(
@@ -1992,6 +2012,8 @@ class Language:
                         nlp.add_pipe(
                             source_name, source=source_nlps[model], name=pipe_name
                         )
+                        # At this point after nlp.add_pipe, the listener map
+                        # corresponds to the new pipeline.
                     if model not in source_nlp_vectors_hashes:
                         source_nlp_vectors_hashes[model] = hash(
                             source_nlps[model].vocab.vectors.to_bytes(
@@ -2046,27 +2068,6 @@ class Language:
                 raise ValueError(
                     Errors.E942.format(name="pipeline_creation", value=type(nlp))
                 )
-        # Detect components with listeners that are not frozen consistently
-        for name, proc in nlp.pipeline:
-            if isinstance(proc, ty.ListenedToComponent):
-                # Remove listeners not in the pipeline
-                listener_names = proc.listening_components
-                unused_listener_names = [
-                    ll for ll in listener_names if ll not in nlp.pipe_names
-                ]
-                for listener_name in unused_listener_names:
-                    for listener in proc.listener_map.get(listener_name, []):
-                        proc.remove_listener(listener, listener_name)
-
-                for listener_name in proc.listening_components:
-                    # e.g. tok2vec/transformer
-                    # If it's a component sourced from another pipeline, we check if
-                    # the tok2vec listeners should be replaced with standalone tok2vec
-                    # models (e.g. so component can be frozen without its performance
-                    # degrading when other components/tok2vec are updated)
-                    paths = sourced.get(listener_name, {}).get("replace_listeners", [])
-                    if paths:
-                        nlp.replace_listeners(name, listener_name, paths)
         return nlp
 
     def replace_listeners(
@@ -2081,7 +2082,7 @@ class Language:
         useful when training a pipeline with components sourced from an existing
         pipeline: if multiple components (e.g. tagger, parser, NER) listen to
         the same tok2vec component, but some of them are frozen and not updated,
-        their performance may degrade significally as the tok2vec component is
+        their performance may degrade significantly as the tok2vec component is
         updated with new data. To prevent this, listeners can be replaced with
         a standalone tok2vec layer that is owned by the component and doesn't
         change if the component isn't updated.
diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index 7e7c35be2..b882a5479 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -1,7 +1,6 @@
 # cython: embedsignature=True
 # Compiler crashes on memory view coercion without this. Should report bug.
 cimport numpy as np
-from cython.view cimport array as cvarray
 from libc.string cimport memset
 
 np.import_array()
@@ -35,7 +34,7 @@ from .typedefs cimport attr_t, flags_t
 from .attrs import intify_attrs
 from .errors import Errors, Warnings
 
-OOV_RANK = 0xffffffffffffffff # UINT64_MAX
+OOV_RANK = 0xffffffffffffffff  # UINT64_MAX
 memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
 EMPTY_LEXEME.id = OOV_RANK
 
@@ -105,7 +104,7 @@ cdef class Lexeme:
             if isinstance(value, float):
                 continue
             elif isinstance(value, (int, long)):
-                 Lexeme.set_struct_attr(self.c, attr, value)
+                Lexeme.set_struct_attr(self.c, attr, value)
             else:
                 Lexeme.set_struct_attr(self.c, attr, self.vocab.strings.add(value))
 
@@ -137,10 +136,12 @@ cdef class Lexeme:
         if hasattr(other, "orth"):
             if self.c.orth == other.orth:
                 return 1.0
-        elif hasattr(other, "__len__") and len(other) == 1 \
-        and hasattr(other[0], "orth"):
-            if self.c.orth == other[0].orth:
-                return 1.0
+        elif (
+            hasattr(other, "__len__") and len(other) == 1
+            and hasattr(other[0], "orth")
+            and self.c.orth == other[0].orth
+        ):
+            return 1.0
         if self.vector_norm == 0 or other.vector_norm == 0:
             warnings.warn(Warnings.W008.format(obj="Lexeme"))
             return 0.0
@@ -149,7 +150,7 @@ cdef class Lexeme:
         result = xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
         # ensure we get a scalar back (numpy does this automatically but cupy doesn't)
         return result.item()
-    
+
     @property
     def has_vector(self):
         """RETURNS (bool): Whether a word vector is associated with the object.
diff --git a/spacy/matcher/dependencymatcher.pyx b/spacy/matcher/dependencymatcher.pyx
index dabd0c397..b8b7828dd 100644
--- a/spacy/matcher/dependencymatcher.pyx
+++ b/spacy/matcher/dependencymatcher.pyx
@@ -108,7 +108,7 @@ cdef class DependencyMatcher:
         key (str): The match ID.
         RETURNS (bool): Whether the matcher contains rules for this match ID.
         """
-        return self.has_key(key)
+        return self.has_key(key)  # no-cython-lint: W601
 
     def _validate_input(self, pattern, key):
         idx = 0
@@ -264,7 +264,7 @@ cdef class DependencyMatcher:
 
     def remove(self, key):
         key = self._normalize_key(key)
-        if not key in self._patterns:
+        if key not in self._patterns:
             raise ValueError(Errors.E175.format(key=key))
         self._patterns.pop(key)
         self._raw_patterns.pop(key)
@@ -382,7 +382,7 @@ cdef class DependencyMatcher:
             return []
         return [doc[node].head]
 
-    def _gov(self,doc,node):
+    def _gov(self, doc, node):
         return list(doc[node].children)
 
     def _dep_chain(self, doc, node):
@@ -443,7 +443,7 @@ cdef class DependencyMatcher:
 
     def _right_child(self, doc, node):
         return [child for child in doc[node].rights]
-    
+
     def _left_child(self, doc, node):
         return [child for child in doc[node].lefts]
 
@@ -461,7 +461,7 @@ cdef class DependencyMatcher:
         if doc[node].head.i > node:
             return [doc[node].head]
         return []
-    
+
     def _left_parent(self, doc, node):
         if doc[node].head.i < node:
             return [doc[node].head]
diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx
index 42b8a8f9a..f926608b8 100644
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@@ -12,25 +12,13 @@ import warnings
 
 import srsly
 
-from ..attrs cimport (
-    DEP,
-    ENT_IOB,
-    ID,
-    LEMMA,
-    MORPH,
-    NULL_ATTR,
-    ORTH,
-    POS,
-    TAG,
-    attr_id_t,
-)
+from ..attrs cimport DEP, ENT_IOB, ID, LEMMA, MORPH, NULL_ATTR, POS, TAG
 from ..structs cimport TokenC
 from ..tokens.doc cimport Doc, get_token_attr_for_matcher
 from ..tokens.morphanalysis cimport MorphAnalysis
 from ..tokens.span cimport Span
 from ..tokens.token cimport Token
 from ..typedefs cimport attr_t
-from ..vocab cimport Vocab
 
 from ..errors import Errors, MatchPatternError, Warnings
 from ..schemas import validate_token_pattern
@@ -42,7 +30,6 @@ from ..attrs import IDS
 from ..errors import Errors, MatchPatternError, Warnings
 from ..schemas import validate_token_pattern
 from ..strings import get_string_id
-from ..util import registry
 from .levenshtein import levenshtein_compare
 
 DEF PADDING = 5
@@ -93,9 +80,9 @@ cdef class Matcher:
         key (str): The match ID.
         RETURNS (bool): Whether the matcher contains rules for this match ID.
         """
-        return self.has_key(key)
+        return self.has_key(key)  # no-cython-lint: W601
 
-    def add(self, key, patterns, *, on_match=None, greedy: str=None):
+    def add(self, key, patterns, *, on_match=None, greedy: str = None):
         """Add a match-rule to the matcher. A match-rule consists of: an ID
         key, an on_match callback, and one or more patterns.
 
@@ -149,8 +136,13 @@ cdef class Matcher:
         key = self._normalize_key(key)
         for pattern in patterns:
             try:
-                specs = _preprocess_pattern(pattern, self.vocab,
-                    self._extensions, self._extra_predicates, self._fuzzy_compare)
+                specs = _preprocess_pattern(
+                    pattern,
+                    self.vocab,
+                    self._extensions,
+                    self._extra_predicates,
+                    self._fuzzy_compare
+                )
                 self.patterns.push_back(init_pattern(self.mem, key, specs))
                 for spec in specs:
                     for attr, _ in spec[1]:
@@ -174,7 +166,7 @@ cdef class Matcher:
         key (str): The ID of the match rule.
         """
         norm_key = self._normalize_key(key)
-        if not norm_key in self._patterns:
+        if norm_key not in self._patterns:
             raise ValueError(Errors.E175.format(key=key))
         self._patterns.pop(norm_key)
         self._callbacks.pop(norm_key)
@@ -274,8 +266,15 @@ cdef class Matcher:
         if self.patterns.empty():
             matches = []
         else:
-            matches = find_matches(&self.patterns[0], self.patterns.size(), doclike, length,
-                                    extensions=self._extensions, predicates=self._extra_predicates, with_alignments=with_alignments)
+            matches = find_matches(
+                &self.patterns[0],
+                self.patterns.size(),
+                doclike,
+                length,
+                extensions=self._extensions,
+                predicates=self._extra_predicates,
+                with_alignments=with_alignments
+            )
         final_matches = []
         pairs_by_id = {}
         # For each key, either add all matches, or only the filtered,
@@ -299,9 +298,9 @@ cdef class Matcher:
             memset(matched, 0, length * sizeof(matched[0]))
             span_filter = self._filter.get(key)
             if span_filter == "FIRST":
-                sorted_pairs = sorted(pairs, key=lambda x: (x[0], -x[1]), reverse=False) # sort by start
+                sorted_pairs = sorted(pairs, key=lambda x: (x[0], -x[1]), reverse=False)  # sort by start
             elif span_filter == "LONGEST":
-                sorted_pairs = sorted(pairs, key=lambda x: (x[1]-x[0], -x[0]), reverse=True) # reverse sort by length
+                sorted_pairs = sorted(pairs, key=lambda x: (x[1]-x[0], -x[0]), reverse=True)  # reverse sort by length
             else:
                 raise ValueError(Errors.E947.format(expected=["FIRST", "LONGEST"], arg=span_filter))
             for match in sorted_pairs:
@@ -373,7 +372,6 @@ cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, e
     cdef vector[MatchC] matches
     cdef vector[vector[MatchAlignmentC]] align_states
     cdef vector[vector[MatchAlignmentC]] align_matches
-    cdef PatternStateC state
     cdef int i, j, nr_extra_attr
     cdef Pool mem = Pool()
     output = []
@@ -395,14 +393,22 @@ cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, e
                 value = token.vocab.strings[value]
             extra_attr_values[i * nr_extra_attr + index] = value
     # Main loop
-    cdef int nr_predicate = len(predicates)
     for i in range(length):
         for j in range(n):
             states.push_back(PatternStateC(patterns[j], i, 0))
         if with_alignments != 0:
             align_states.resize(states.size())
-        transition_states(states, matches, align_states, align_matches, predicate_cache,
-            doclike[i], extra_attr_values, predicates, with_alignments)
+        transition_states(
+            states,
+            matches,
+            align_states,
+            align_matches,
+            predicate_cache,
+            doclike[i],
+            extra_attr_values,
+            predicates,
+            with_alignments
+        )
         extra_attr_values += nr_extra_attr
         predicate_cache += len(predicates)
     # Handle matches that end in 0-width patterns
@@ -428,18 +434,28 @@ cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, e
     return output
 
 
-cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& matches,
-                            vector[vector[MatchAlignmentC]]& align_states, vector[vector[MatchAlignmentC]]& align_matches,
-                            int8_t* cached_py_predicates,
-        Token token, const attr_t* extra_attrs, py_predicates, bint with_alignments) except *:
+cdef void transition_states(
+    vector[PatternStateC]& states,
+    vector[MatchC]& matches,
+    vector[vector[MatchAlignmentC]]& align_states,
+    vector[vector[MatchAlignmentC]]& align_matches,
+    int8_t* cached_py_predicates,
+    Token token,
+    const attr_t* extra_attrs,
+    py_predicates,
+    bint with_alignments
+) except *:
     cdef int q = 0
     cdef vector[PatternStateC] new_states
     cdef vector[vector[MatchAlignmentC]] align_new_states
-    cdef int nr_predicate = len(py_predicates)
     for i in range(states.size()):
         if states[i].pattern.nr_py >= 1:
-            update_predicate_cache(cached_py_predicates,
-                states[i].pattern, token, py_predicates)
+            update_predicate_cache(
+                cached_py_predicates,
+                states[i].pattern,
+                token,
+                py_predicates
+            )
         action = get_action(states[i], token.c, extra_attrs,
                             cached_py_predicates)
         if action == REJECT:
@@ -475,8 +491,12 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
                     align_new_states.push_back(align_states[q])
             states[q].pattern += 1
             if states[q].pattern.nr_py != 0:
-                update_predicate_cache(cached_py_predicates,
-                    states[q].pattern, token, py_predicates)
+                update_predicate_cache(
+                    cached_py_predicates,
+                    states[q].pattern,
+                    token,
+                    py_predicates
+                )
             action = get_action(states[q], token.c, extra_attrs,
                                 cached_py_predicates)
         # Update alignment before the transition of current state
@@ -492,8 +512,12 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
             ent_id = get_ent_id(state.pattern)
             if action == MATCH:
                 matches.push_back(
-                    MatchC(pattern_id=ent_id, start=state.start,
-                            length=state.length+1))
+                    MatchC(
+                        pattern_id=ent_id,
+                        start=state.start,
+                        length=state.length+1
+                    )
+                )
                 # `align_matches` always corresponds to `matches` 1:1
                 if with_alignments != 0:
                     align_matches.push_back(align_states[q])
@@ -501,23 +525,35 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
                 # push match without last token if length > 0
                 if state.length > 0:
                     matches.push_back(
-                        MatchC(pattern_id=ent_id, start=state.start,
-                                length=state.length))
+                        MatchC(
+                            pattern_id=ent_id,
+                            start=state.start,
+                            length=state.length
+                        )
+                    )
                     # MATCH_DOUBLE emits matches twice,
                     # add one more to align_matches in order to keep 1:1 relationship
                     if with_alignments != 0:
                         align_matches.push_back(align_states[q])
                 # push match with last token
                 matches.push_back(
-                    MatchC(pattern_id=ent_id, start=state.start,
-                            length=state.length+1))
+                    MatchC(
+                        pattern_id=ent_id,
+                        start=state.start,
+                        length=state.length + 1
+                    )
+                )
                 # `align_matches` always corresponds to `matches` 1:1
                 if with_alignments != 0:
                     align_matches.push_back(align_states[q])
             elif action == MATCH_REJECT:
                 matches.push_back(
-                    MatchC(pattern_id=ent_id, start=state.start,
-                            length=state.length))
+                    MatchC(
+                        pattern_id=ent_id,
+                        start=state.start,
+                        length=state.length
+                    )
+                )
                 # `align_matches` always corresponds to `matches` 1:1
                 if with_alignments != 0:
                     align_matches.push_back(align_states[q])
@@ -540,8 +576,12 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
             align_states.push_back(align_new_states[i])
 
 
-cdef int update_predicate_cache(int8_t* cache,
-        const TokenPatternC* pattern, Token token, predicates) except -1:
+cdef int update_predicate_cache(
+    int8_t* cache,
+    const TokenPatternC* pattern,
+    Token token,
+    predicates
+) except -1:
     # If the state references any extra predicates, check whether they match.
     # These are cached, so that we don't call these potentially expensive
     # Python functions more than we need to.
@@ -587,10 +627,12 @@ cdef void finish_states(vector[MatchC]& matches, vector[PatternStateC]& states,
             else:
                 state.pattern += 1
 
-
-cdef action_t get_action(PatternStateC state,
-        const TokenC* token, const attr_t* extra_attrs,
-        const int8_t* predicate_matches) nogil:
+cdef action_t get_action(
+    PatternStateC state,
+    const TokenC * token,
+    const attr_t * extra_attrs,
+    const int8_t * predicate_matches
+) nogil:
     """We need to consider:
     a) Does the token match the specification? [Yes, No]
     b) What's the quantifier? [1, 0+, ?]
@@ -656,53 +698,56 @@ cdef action_t get_action(PatternStateC state,
         is_match = not is_match
         quantifier = ONE
     if quantifier == ONE:
-      if is_match and is_final:
-          # Yes, final: 1000
-          return MATCH
-      elif is_match and not is_final:
-          # Yes, non-final: 0100
-          return ADVANCE
-      elif not is_match and is_final:
-          # No, final: 0000
-          return REJECT
-      else:
-          return REJECT
+        if is_match and is_final:
+            # Yes, final: 1000
+            return MATCH
+        elif is_match and not is_final:
+            # Yes, non-final: 0100
+            return ADVANCE
+        elif not is_match and is_final:
+            # No, final: 0000
+            return REJECT
+        else:
+            return REJECT
     elif quantifier == ZERO_PLUS:
-      if is_match and is_final:
-          # Yes, final: 1001
-          return MATCH_EXTEND
-      elif is_match and not is_final:
-          # Yes, non-final: 0011
-          return RETRY_EXTEND
-      elif not is_match and is_final:
-          # No, final 2000 (note: Don't include last token!)
-          return MATCH_REJECT
-      else:
-          # No, non-final 0010
-          return RETRY
+        if is_match and is_final:
+            # Yes, final: 1001
+            return MATCH_EXTEND
+        elif is_match and not is_final:
+            # Yes, non-final: 0011
+            return RETRY_EXTEND
+        elif not is_match and is_final:
+            # No, final 2000 (note: Don't include last token!)
+            return MATCH_REJECT
+        else:
+            # No, non-final 0010
+            return RETRY
     elif quantifier == ZERO_ONE:
-      if is_match and is_final:
-          # Yes, final: 3000
-          # To cater for a pattern ending in "?", we need to add
-          # a match both with and without the last token
-          return MATCH_DOUBLE
-      elif is_match and not is_final:
-          # Yes, non-final: 0110
-          # We need both branches here, consider a pair like:
-          # pattern: .?b string: b
-          # If we 'ADVANCE' on the .?, we miss the match.
-          return RETRY_ADVANCE
-      elif not is_match and is_final:
-          # No, final 2000 (note: Don't include last token!)
-          return MATCH_REJECT
-      else:
-          # No, non-final 0010
-          return RETRY
+        if is_match and is_final:
+            # Yes, final: 3000
+            # To cater for a pattern ending in "?", we need to add
+            # a match both with and without the last token
+            return MATCH_DOUBLE
+        elif is_match and not is_final:
+            # Yes, non-final: 0110
+            # We need both branches here, consider a pair like:
+            # pattern: .?b string: b
+            # If we 'ADVANCE' on the .?, we miss the match.
+            return RETRY_ADVANCE
+        elif not is_match and is_final:
+            # No, final 2000 (note: Don't include last token!)
+            return MATCH_REJECT
+        else:
+            # No, non-final 0010
+            return RETRY
 
 
-cdef int8_t get_is_match(PatternStateC state,
-        const TokenC* token, const attr_t* extra_attrs,
-        const int8_t* predicate_matches) nogil:
+cdef int8_t get_is_match(
+    PatternStateC state,
+    const TokenC* token,
+    const attr_t* extra_attrs,
+    const int8_t* predicate_matches
+) nogil:
     for i in range(state.pattern.nr_py):
         if predicate_matches[state.pattern.py_predicates[i]] == -1:
             return 0
@@ -867,7 +912,7 @@ class _FuzzyPredicate:
         self.is_extension = is_extension
         if self.predicate not in self.operators:
             raise ValueError(Errors.E126.format(good=self.operators, bad=self.predicate))
-        fuzz = self.predicate[len("FUZZY"):] # number after prefix
+        fuzz = self.predicate[len("FUZZY"):]  # number after prefix
         self.fuzzy = int(fuzz) if fuzz else -1
         self.fuzzy_compare = fuzzy_compare
         self.key = _predicate_cache_key(self.attr, self.predicate, value, fuzzy=self.fuzzy)
@@ -1089,7 +1134,7 @@ def _get_extra_predicates_dict(attr, value_dict, vocab, predicate_types,
         elif cls == _FuzzyPredicate:
             if isinstance(value, dict):
                 # add predicates inside fuzzy operator
-                fuzz = type_[len("FUZZY"):] # number after prefix
+                fuzz = type_[len("FUZZY"):]  # number after prefix
                 fuzzy_val = int(fuzz) if fuzz else -1
                 output.extend(_get_extra_predicates_dict(attr, value, vocab, predicate_types,
                                                          extra_predicates, seen_predicates,
@@ -1108,8 +1153,9 @@ def _get_extra_predicates_dict(attr, value_dict, vocab, predicate_types,
     return output
 
 
-def _get_extension_extra_predicates(spec, extra_predicates, predicate_types,
-        seen_predicates):
+def _get_extension_extra_predicates(
+    spec, extra_predicates, predicate_types, seen_predicates
+):
     output = []
     for attr, value in spec.items():
         if isinstance(value, dict):
@@ -1138,7 +1184,7 @@ def _get_operators(spec):
         return (ONE,)
     elif spec["OP"] in lookup:
         return lookup[spec["OP"]]
-    #Min_max {n,m}
+    # Min_max {n,m}
     elif spec["OP"].startswith("{") and spec["OP"].endswith("}"):
         # {n}  --> {n,n}  exactly n                 ONE,(n)
         # {n,m}--> {n,m}  min of n, max of m        ONE,(n),ZERO_ONE,(m)
@@ -1149,8 +1195,8 @@ def _get_operators(spec):
         min_max = min_max if "," in min_max else f"{min_max},{min_max}"
         n, m = min_max.split(",")
 
-        #1. Either n or m is a blank string and the other is numeric -->isdigit
-        #2. Both are numeric and n <= m
+        # 1. Either n or m is a blank string and the other is numeric -->isdigit
+        # 2. Both are numeric and n <= m
         if (not n.isdecimal() and not m.isdecimal()) or (n.isdecimal() and m.isdecimal() and int(n) > int(m)):
             keys = ", ".join(lookup.keys()) + ", {n}, {n,m}, {n,}, {,m} where n and m are integers and n <= m "
             raise ValueError(Errors.E011.format(op=spec["OP"], opts=keys))
diff --git a/spacy/matcher/phrasematcher.pyx b/spacy/matcher/phrasematcher.pyx
index 107d7d926..eb9ca675f 100644
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@@ -2,16 +2,14 @@
 from collections import defaultdict
 from typing import List
 
-from libc.stdint cimport uintptr_t
 from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set
 
 import warnings
 
-from ..attrs cimport DEP, LEMMA, MORPH, ORTH, POS, TAG
+from ..attrs cimport DEP, LEMMA, MORPH, POS, TAG
 
 from ..attrs import IDS
 
-from ..structs cimport TokenC
 from ..tokens.span cimport Span
 from ..tokens.token cimport Token
 from ..typedefs cimport attr_t
@@ -160,7 +158,6 @@ cdef class PhraseMatcher:
         del self._callbacks[key]
         del self._docs[key]
 
-
     def _add_from_arrays(self, key, specs, *, on_match=None):
         """Add a preprocessed list of specs, with an optional callback.
 
@@ -196,7 +193,6 @@ cdef class PhraseMatcher:
                 result = internal_node
             map_set(self.mem, <MapStruct*>result, self.vocab.strings[key], NULL)
 
-
     def add(self, key, docs, *, on_match=None):
         """Add a match-rule to the phrase-matcher. A match-rule consists of: an ID
         key, a list of one or more patterns, and (optionally) an on_match callback.
diff --git a/spacy/ml/staticvectors.py b/spacy/ml/staticvectors.py
index 6fcb13ad0..b75240c5d 100644
--- a/spacy/ml/staticvectors.py
+++ b/spacy/ml/staticvectors.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Callable, List, Optional, Sequence, Tuple, cast
 
 from thinc.api import Model, Ops, registry
@@ -5,7 +6,8 @@ from thinc.initializers import glorot_uniform_init
 from thinc.types import Floats1d, Floats2d, Ints1d, Ragged
 from thinc.util import partial
 
-from ..errors import Errors
+from ..attrs import ORTH
+from ..errors import Errors, Warnings
 from ..tokens import Doc
 from ..vectors import Mode
 from ..vocab import Vocab
@@ -24,6 +26,8 @@ def StaticVectors(
     linear projection to control the dimensionality. If a dropout rate is
     specified, the dropout is applied per dimension over the whole batch.
     """
+    if key_attr != "ORTH":
+        warnings.warn(Warnings.W125, DeprecationWarning)
     return Model(
         "static_vectors",
         forward,
@@ -40,9 +44,9 @@ def forward(
     token_count = sum(len(doc) for doc in docs)
     if not token_count:
         return _handle_empty(model.ops, model.get_dim("nO"))
-    key_attr: int = model.attrs["key_attr"]
-    keys = model.ops.flatten([cast(Ints1d, doc.to_array(key_attr)) for doc in docs])
     vocab: Vocab = docs[0].vocab
+    key_attr: int = getattr(vocab.vectors, "attr", ORTH)
+    keys = model.ops.flatten([cast(Ints1d, doc.to_array(key_attr)) for doc in docs])
     W = cast(Floats2d, model.ops.as_contig(model.get_param("W")))
     if vocab.vectors.mode == Mode.default:
         V = model.ops.asarray(vocab.vectors.data)
diff --git a/spacy/ml/tb_framework.pyx b/spacy/ml/tb_framework.pyx
index fd0af12ce..6c5c29d85 100644
--- a/spacy/ml/tb_framework.pyx
+++ b/spacy/ml/tb_framework.pyx
@@ -1,5 +1,5 @@
 # cython: infer_types=True, cdivision=True, boundscheck=False
-from typing import Any, List, Optional, Tuple, TypeVar, cast
+from typing import Any, List, Optional, Tuple, cast
 
 from libc.stdlib cimport calloc, free, realloc
 from libc.string cimport memcpy, memset
@@ -23,7 +23,7 @@ from thinc.api import (
 
 from thinc.backends.cblas cimport CBlas, saxpy, sgemm
 
-from thinc.types import Floats1d, Floats2d, Floats3d, Floats4d, Ints1d, Ints2d
+from thinc.types import Floats2d, Floats3d, Floats4d, Ints1d, Ints2d
 
 from ..errors import Errors
 from ..pipeline._parser_internals import _beam_utils
@@ -136,7 +136,7 @@ def init(
     Y: Optional[Tuple[List[State], List[Floats2d]]] = None,
 ):
     if X is not None:
-        docs, moves = X
+        docs, _ = X
         model.get_ref("tok2vec").initialize(X=docs)
     else:
         model.get_ref("tok2vec").initialize()
@@ -145,7 +145,6 @@ def init(
         current_nO = model.maybe_get_dim("nO")
         if current_nO is None or current_nO != inferred_nO:
             model.attrs["resize_output"](model, inferred_nO)
-    nO = model.get_dim("nO")
     nP = model.get_dim("nP")
     nH = model.get_dim("nH")
     nI = model.get_dim("nI")
@@ -192,9 +191,10 @@ class TransitionModelInputs:
         self,
         docs: List[Doc],
         moves: TransitionSystem,
-        actions: Optional[List[Ints1d]]=None,
-        max_moves: int=0,
-        states: Optional[List[State]]=None):
+        actions: Optional[List[Ints1d]] = None,
+        max_moves: int = 0,
+        states: Optional[List[State]] = None,
+    ):
         """
         actions (Optional[List[Ints1d]]): actions to apply for each Doc.
         docs (List[Doc]): Docs to predict transition sequences for.
@@ -234,12 +234,12 @@ def forward(model, inputs: TransitionModelInputs, is_train: bool):
         return _forward_greedy_cpu(model, moves, states, feats, seen_mask, actions=actions)
     else:
         return _forward_fallback(model, moves, states, tokvecs, backprop_tok2vec,
-            feats, backprop_feats, seen_mask, is_train, actions=actions,
-            max_moves=inputs.max_moves)
+                                 feats, backprop_feats, seen_mask, is_train, actions=actions,
+                                 max_moves=inputs.max_moves)
 
 
 def _forward_greedy_cpu(model: Model, TransitionSystem moves, states: List[StateClass], np.ndarray feats,
-                np.ndarray[np.npy_bool, ndim=1] seen_mask, actions: Optional[List[Ints1d]]=None):
+                        np.ndarray[np.npy_bool, ndim = 1] seen_mask, actions: Optional[List[Ints1d]] = None):
     cdef vector[StateC*] c_states
     cdef StateClass state
     for state in states:
@@ -257,9 +257,10 @@ def _forward_greedy_cpu(model: Model, TransitionSystem moves, states: List[State
 
     return (states, scores), backprop
 
+
 cdef list _parse_batch(CBlas cblas, TransitionSystem moves, StateC** states,
                        WeightsC weights, SizesC sizes, actions: Optional[List[Ints1d]]=None):
-    cdef int i, j
+    cdef int i
     cdef vector[StateC *] unfinished
     cdef ActivationsC activations = _alloc_activations(sizes)
     cdef np.ndarray step_scores
@@ -276,7 +277,7 @@ cdef list _parse_batch(CBlas cblas, TransitionSystem moves, StateC** states,
             if actions is None:
                 # Validate actions, argmax, take action.
                 c_transition_batch(moves, states, <const float*>step_scores.data, sizes.classes,
-                    sizes.states)
+                                   sizes.states)
             else:
                 c_apply_actions(moves, states, <const int*>step_actions.data, sizes.states)
             for i in range(sizes.states):
@@ -302,8 +303,9 @@ def _forward_fallback(
     backprop_feats,
     seen_mask,
     is_train: bool,
-    actions: Optional[List[Ints1d]]=None,
-    max_moves: int=0):
+    actions: Optional[List[Ints1d]] = None,
+    max_moves: int = 0,
+):
     nF = model.get_dim("nF")
     output = model.get_ref("output")
     hidden_b = model.get_param("hidden_b")
@@ -371,7 +373,7 @@ def _forward_fallback(
             for clas in set(model.attrs["unseen_classes"]):
                 if (d_scores[:, clas] < 0).any():
                     model.attrs["unseen_classes"].remove(clas)
-        d_scores *= seen_mask == False
+        d_scores *= seen_mask == False  # no-cython-lint
         # Calculate the gradients for the parameters of the output layer.
         # The weight gemm is (nS, nO) @ (nS, nH).T
         output.inc_grad("b", d_scores.sum(axis=0))
@@ -571,13 +573,13 @@ cdef void _resize_activations(ActivationsC* A, SizesC n) nogil:
         A._max_size = n.states
     else:
         A.token_ids = <int*>realloc(A.token_ids,
-            n.states * n.feats * sizeof(A.token_ids[0]))
+                                    n.states * n.feats * sizeof(A.token_ids[0]))
         A.unmaxed = <float*>realloc(A.unmaxed,
-            n.states * n.hiddens * n.pieces * sizeof(A.unmaxed[0]))
+                                    n.states * n.hiddens * n.pieces * sizeof(A.unmaxed[0]))
         A.hiddens = <float*>realloc(A.hiddens,
-            n.states * n.hiddens * sizeof(A.hiddens[0]))
+                                    n.states * n.hiddens * sizeof(A.hiddens[0]))
         A.is_valid = <int*>realloc(A.is_valid,
-            n.states * n.classes * sizeof(A.is_valid[0]))
+                                   n.states * n.classes * sizeof(A.is_valid[0]))
         A._max_size = n.states
     A._curr_size = n.states
 
@@ -599,9 +601,9 @@ cdef void _predict_states(CBlas cblas, ActivationsC* A, float* scores, StateC**
     else:
         # Compute hidden-to-output
         sgemm(cblas)(False, True, n.states, n.classes, n.hiddens,
-                      1.0, <const float *>A.hiddens, n.hiddens,
-                      <const float *>W.hidden_weights, n.hiddens,
-                      0.0, scores, n.classes)
+                     1.0, <const float *>A.hiddens, n.hiddens,
+                     <const float *>W.hidden_weights, n.hiddens,
+                     0.0, scores, n.classes)
         # Add bias
         for i in range(n.states):
             saxpy(cblas)(n.classes, 1., W.hidden_bias, 1, &scores[i*n.classes], 1)
@@ -617,12 +619,12 @@ cdef void _predict_states(CBlas cblas, ActivationsC* A, float* scores, StateC**
                 scores[i*n.classes+j] = min_
 
 
-cdef void _sum_state_features(CBlas cblas, float* output,
-        const float* cached, const int* token_ids, SizesC n) nogil:
-    cdef int idx, b, f, i
+cdef void _sum_state_features(CBlas cblas, float* output, const float* cached,
+                              const int* token_ids, SizesC n) nogil:
+    cdef int idx, b, f
     cdef const float* feature
     cdef int B = n.states
-    cdef int O = n.hiddens * n.pieces
+    cdef int O = n.hiddens * n.pieces  # no-cython-lint
     cdef int F = n.feats
     cdef int T = n.tokens
     padding = cached + (T * F * O)
@@ -637,4 +639,3 @@ cdef void _sum_state_features(CBlas cblas, float* output,
                 feature = &cached[idx]
             saxpy(cblas)(O, one, <const float*>feature, 1, &output[b*O], 1)
         token_ids += F
-
diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx
index f5fe81c53..665e964bf 100644
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@@ -80,15 +80,13 @@ cdef class Morphology:
         out.sort(key=lambda x: x[0])
         return dict(out)
 
-
     def _normalized_feat_dict_to_str(self, feats: Dict[str, str]) -> str:
         norm_feats_string = self.FEATURE_SEP.join([
-                self.FIELD_SEP.join([field, self.VALUE_SEP.join(values) if isinstance(values, list) else values])
+            self.FIELD_SEP.join([field, self.VALUE_SEP.join(values) if isinstance(values, list) else values])
             for field, values in feats.items()
-        ])
+            ])
         return norm_feats_string or self.EMPTY_MORPH
 
-
     cdef hash_t _add(self, features):
         """Insert a morphological analysis in the morphology table, if not
         already present. The morphological analysis may be provided in the UD
@@ -140,7 +138,7 @@ cdef class Morphology:
                     field_feature_pairs.append((field_key, value_key))
             else:
                 # We could box scalar values into a list and use a common
-                # code path to generate features but that incurs a small 
+                # code path to generate features but that incurs a small
                 # but measurable allocation/iteration overhead (as this
                 # branch is taken often enough).
                 value_key = self.strings.add(field + self.FIELD_SEP + values)
@@ -246,6 +244,7 @@ cdef int get_n_by_field(attr_t* results, const shared_ptr[MorphAnalysisC] morph,
             n_results += 1
     return n_results
 
+
 def unpickle_morphology(strings, tags):
     cdef Morphology morphology = Morphology(strings)
     for tag in tags:
diff --git a/spacy/parts_of_speech.pxd b/spacy/parts_of_speech.pxd
index 01f116ea6..22a571be7 100644
--- a/spacy/parts_of_speech.pxd
+++ b/spacy/parts_of_speech.pxd
@@ -8,7 +8,7 @@ cpdef enum univ_pos_t:
     ADV = symbols.ADV
     AUX = symbols.AUX
     CONJ = symbols.CONJ
-    CCONJ  = symbols.CCONJ  # U20
+    CCONJ = symbols.CCONJ  # U20
     DET = symbols.DET
     INTJ = symbols.INTJ
     NOUN = symbols.NOUN
diff --git a/spacy/pipeline/_edit_tree_internals/edit_trees.pxd b/spacy/pipeline/_edit_tree_internals/edit_trees.pxd
index 3d63af921..41acd2b07 100644
--- a/spacy/pipeline/_edit_tree_internals/edit_trees.pxd
+++ b/spacy/pipeline/_edit_tree_internals/edit_trees.pxd
@@ -46,11 +46,18 @@ cdef struct EditTreeC:
     bint is_match_node
     NodeC inner
 
-cdef inline EditTreeC edittree_new_match(len_t prefix_len, len_t suffix_len,
-        uint32_t prefix_tree, uint32_t suffix_tree):
-    cdef MatchNodeC match_node = MatchNodeC(prefix_len=prefix_len,
-            suffix_len=suffix_len, prefix_tree=prefix_tree,
-            suffix_tree=suffix_tree)
+cdef inline EditTreeC edittree_new_match(
+    len_t prefix_len,
+    len_t suffix_len,
+    uint32_t prefix_tree,
+    uint32_t suffix_tree
+):
+    cdef MatchNodeC match_node = MatchNodeC(
+        prefix_len=prefix_len,
+        suffix_len=suffix_len,
+        prefix_tree=prefix_tree,
+        suffix_tree=suffix_tree
+    )
     cdef NodeC inner = NodeC(match_node=match_node)
     return EditTreeC(is_match_node=True, inner=inner)
 
diff --git a/spacy/pipeline/_edit_tree_internals/edit_trees.pyx b/spacy/pipeline/_edit_tree_internals/edit_trees.pyx
index daab0d204..78cd25622 100644
--- a/spacy/pipeline/_edit_tree_internals/edit_trees.pyx
+++ b/spacy/pipeline/_edit_tree_internals/edit_trees.pyx
@@ -5,8 +5,6 @@ from libc.string cimport memset
 from libcpp.pair cimport pair
 from libcpp.vector cimport vector
 
-from pathlib import Path
-
 from ...typedefs cimport hash_t
 
 from ... import util
@@ -25,17 +23,16 @@ cdef LCS find_lcs(str source, str target):
     target (str): The second string.
     RETURNS (LCS): The spans of the longest common subsequences.
     """
-    cdef Py_ssize_t source_len = len(source)
     cdef Py_ssize_t target_len = len(target)
-    cdef size_t longest_align = 0;
+    cdef size_t longest_align = 0
     cdef int source_idx, target_idx
     cdef LCS lcs
     cdef Py_UCS4 source_cp, target_cp
 
     memset(&lcs, 0, sizeof(lcs))
 
-    cdef vector[size_t] prev_aligns = vector[size_t](target_len);
-    cdef vector[size_t] cur_aligns = vector[size_t](target_len);
+    cdef vector[size_t] prev_aligns = vector[size_t](target_len)
+    cdef vector[size_t] cur_aligns = vector[size_t](target_len)
 
     for (source_idx, source_cp) in enumerate(source):
         for (target_idx, target_cp) in enumerate(target):
@@ -89,7 +86,7 @@ cdef class EditTrees:
         cdef LCS lcs = find_lcs(form, lemma)
 
         cdef EditTreeC tree
-        cdef uint32_t tree_id, prefix_tree, suffix_tree
+        cdef uint32_t prefix_tree, suffix_tree
         if lcs_is_empty(lcs):
             tree = edittree_new_subst(self.strings.add(form), self.strings.add(lemma))
         else:
@@ -108,7 +105,7 @@ cdef class EditTrees:
         return self._tree_id(tree)
 
     cdef uint32_t _tree_id(self, EditTreeC tree):
-         # If this tree has been constructed before, return its identifier.
+        # If this tree has been constructed before, return its identifier.
         cdef hash_t hash = edittree_hash(tree)
         cdef unordered_map[hash_t, uint32_t].iterator iter = self.map.find(hash)
         if iter != self.map.end():
@@ -289,6 +286,7 @@ def _tree2dict(tree):
         tree = tree["inner"]["subst_node"]
     return(dict(tree))
 
+
 def _dict2tree(tree):
     errors = validate_edit_tree(tree)
     if errors:
diff --git a/spacy/pipeline/_parser_internals/_beam_utils.pyx b/spacy/pipeline/_parser_internals/_beam_utils.pyx
index e4383c732..fff8d63e9 100644
--- a/spacy/pipeline/_parser_internals/_beam_utils.pyx
+++ b/spacy/pipeline/_parser_internals/_beam_utils.pyx
@@ -1,12 +1,8 @@
 # cython: infer_types=True
 # cython: profile=True
-cimport numpy as np
-
 import numpy
 
-from cpython.ref cimport Py_XDECREF, PyObject
-
-from ...typedefs cimport class_t, hash_t
+from ...typedefs cimport class_t
 from .transition_system cimport Transition, TransitionSystem
 
 from ...errors import Errors
@@ -146,7 +142,6 @@ def update_beam(TransitionSystem moves, states, golds, model, int width, beam_de
     cdef MaxViolation violn
     pbeam = BeamBatch(moves, states, golds, width=width, density=beam_density)
     gbeam = BeamBatch(moves, states, golds, width=width, density=0.0)
-    cdef StateClass state
     beam_maps = []
     backprops = []
     violns = [MaxViolation() for _ in range(len(states))]
diff --git a/spacy/pipeline/_parser_internals/_state.pxd b/spacy/pipeline/_parser_internals/_state.pxd
index 6a8a11dee..1c61ac271 100644
--- a/spacy/pipeline/_parser_internals/_state.pxd
+++ b/spacy/pipeline/_parser_internals/_state.pxd
@@ -280,7 +280,6 @@ cdef cppclass StateC:
 
         return n
 
-
     int n_L(int head) nogil const:
         return n_arcs(this._left_arcs, head)
 
diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx
index b1bae00c8..b2653bce3 100644
--- a/spacy/pipeline/_parser_internals/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@@ -9,7 +9,7 @@ from ...strings cimport hash_string
 from ...structs cimport TokenC
 from ...tokens.doc cimport Doc, set_children_from_heads
 from ...tokens.token cimport MISSING_DEP
-from ...typedefs cimport attr_t, hash_t
+from ...typedefs cimport attr_t
 
 from ...training import split_bilu_label
 
@@ -68,8 +68,9 @@ cdef struct GoldParseStateC:
     weight_t pop_cost
 
 
-cdef GoldParseStateC create_gold_state(Pool mem, const StateC* state,
-        heads, labels, sent_starts) except *:
+cdef GoldParseStateC create_gold_state(
+    Pool mem, const StateC* state, heads, labels, sent_starts
+) except *:
     cdef GoldParseStateC gs
     gs.length = len(heads)
     gs.stride = 1
@@ -82,7 +83,7 @@ cdef GoldParseStateC create_gold_state(Pool mem, const StateC* state,
     gs.n_kids_in_stack = <int32_t*>mem.alloc(gs.length, sizeof(gs.n_kids_in_stack[0]))
 
     for i, is_sent_start in enumerate(sent_starts):
-        if is_sent_start == True:
+        if is_sent_start is True:
             gs.state_bits[i] = set_state_flag(
                 gs.state_bits[i],
                 IS_SENT_START,
@@ -210,6 +211,7 @@ cdef class ArcEagerGold:
     def update(self, StateClass stcls):
         update_gold_state(&self.c, stcls.c)
 
+
 def _get_aligned_sent_starts(example):
     """Get list of SENT_START attributes aligned to the predicted tokenization.
     If the reference has not sentence starts, return a list of None values.
@@ -524,7 +526,6 @@ cdef class Break:
     """
     @staticmethod
     cdef bint is_valid(const StateC* st, attr_t label) nogil:
-        cdef int i
         if st.buffer_length() < 2:
             return False
         elif st.B(1) != st.B(0) + 1:
@@ -556,8 +557,8 @@ cdef class Break:
                 cost -= 1
             if gold.heads[si] == b0:
                 cost -= 1
-        if not is_sent_start(gold, state.B(1)) \
-        and not is_sent_start_unknown(gold, state.B(1)):
+        if not is_sent_start(gold, state.B(1)) and\
+                not is_sent_start_unknown(gold, state.B(1)):
             cost += 1
         return cost
 
@@ -805,7 +806,6 @@ cdef class ArcEager(TransitionSystem):
             raise TypeError(Errors.E909.format(name="ArcEagerGold"))
         cdef ArcEagerGold gold_ = gold
         gold_state = gold_.c
-        n_gold = 0
         if self.c[i].is_valid(stcls.c, self.c[i].label):
             cost = self.c[i].get_cost(stcls.c, &gold_state, self.c[i].label)
         else:
@@ -878,7 +878,7 @@ cdef class ArcEager(TransitionSystem):
             print("Gold")
             for token in example.y:
                 print(token.i, token.text, token.dep_, token.head.text)
-            aligned_heads, aligned_labels = example.get_aligned_parse()
+            aligned_heads, _aligned_labels = example.get_aligned_parse()
             print("Aligned heads")
             for i, head in enumerate(aligned_heads):
                 print(example.x[i], example.x[head] if head is not None else "__")
diff --git a/spacy/pipeline/_parser_internals/ner.pyx b/spacy/pipeline/_parser_internals/ner.pyx
index 18e103b19..9220bb522 100644
--- a/spacy/pipeline/_parser_internals/ner.pyx
+++ b/spacy/pipeline/_parser_internals/ner.pyx
@@ -1,8 +1,4 @@
-import os
-import random
-
 from cymem.cymem cimport Pool
-from libc.stdint cimport int32_t
 from libcpp.memory cimport shared_ptr
 from libcpp.vector cimport vector
 
@@ -14,7 +10,7 @@ from ...tokens.span import Span
 
 from ...attrs cimport IS_SPACE
 from ...lexeme cimport Lexeme
-from ...structs cimport SpanC, TokenC
+from ...structs cimport SpanC
 from ...tokens.span cimport Span
 from ...typedefs cimport attr_t, weight_t
 
@@ -138,11 +134,10 @@ cdef class BiluoPushDown(TransitionSystem):
             OUT: Counter()
         }
         actions[OUT][''] = 1  # Represents a token predicted to be outside of any entity
-        actions[UNIT][''] = 1 # Represents a token prohibited to be in an entity
+        actions[UNIT][''] = 1  # Represents a token prohibited to be in an entity
         for entity_type in kwargs.get('entity_types', []):
             for action in (BEGIN, IN, LAST, UNIT):
                 actions[action][entity_type] = 1
-        moves = ('M', 'B', 'I', 'L', 'U')
         for example in kwargs.get('examples', []):
             for token in example.y:
                 ent_type = token.ent_type_
@@ -324,7 +319,6 @@ cdef class BiluoPushDown(TransitionSystem):
             raise TypeError(Errors.E909.format(name="BiluoGold"))
         cdef BiluoGold gold_ = gold
         gold_state = gold_.c
-        n_gold = 0
         if self.c[i].is_valid(stcls.c, self.c[i].label):
             cost = self.c[i].get_cost(stcls.c, &gold_state, self.c[i].label)
         else:
@@ -487,10 +481,8 @@ cdef class In:
     @staticmethod
     cdef weight_t cost(const StateC* s, const void* _gold, attr_t label) nogil:
         gold = <GoldNERStateC*>_gold
-        move = IN
         cdef int next_act = gold.ner[s.B(1)].move if s.B(1) >= 0 else OUT
         cdef int g_act = gold.ner[s.B(0)].move
-        cdef attr_t g_tag = gold.ner[s.B(0)].label
         cdef bint is_sunk = _entity_is_sunk(s, gold.ner)
 
         if g_act == MISSING:
@@ -550,12 +542,10 @@ cdef class Last:
     @staticmethod
     cdef weight_t cost(const StateC* s, const void* _gold, attr_t label) nogil:
         gold = <GoldNERStateC*>_gold
-        move = LAST
         b0 = s.B(0)
         ent_start = s.E(0)
 
         cdef int g_act = gold.ner[b0].move
-        cdef attr_t g_tag = gold.ner[b0].label
 
         cdef int cost = 0
 
@@ -655,7 +645,6 @@ cdef class Unit:
         return cost
 
 
-
 cdef class Out:
     @staticmethod
     cdef bint is_valid(const StateC* st, attr_t label) nogil:
@@ -678,7 +667,6 @@ cdef class Out:
     cdef weight_t cost(const StateC* s, const void* _gold, attr_t label) nogil:
         gold = <GoldNERStateC*>_gold
         cdef int g_act = gold.ner[s.B(0)].move
-        cdef attr_t g_tag = gold.ner[s.B(0)].label
         cdef weight_t cost = 0
         if g_act == MISSING:
             pass
diff --git a/spacy/pipeline/_parser_internals/nonproj.pyx b/spacy/pipeline/_parser_internals/nonproj.pyx
index 66f423b3b..93ad14feb 100644
--- a/spacy/pipeline/_parser_internals/nonproj.pyx
+++ b/spacy/pipeline/_parser_internals/nonproj.pyx
@@ -125,14 +125,17 @@ def decompose(label):
 def is_decorated(label):
     return DELIMITER in label
 
+
 def count_decorated_labels(gold_data):
     freqs = {}
     for example in gold_data:
         proj_heads, deco_deps = projectivize(example.get_aligned("HEAD"),
                                              example.get_aligned("DEP"))
         # set the label to ROOT for each root dependent
-        deco_deps = ['ROOT' if head == i else deco_deps[i]
-                       for i, head in enumerate(proj_heads)]
+        deco_deps = [
+            'ROOT' if head == i else deco_deps[i]
+            for i, head in enumerate(proj_heads)
+        ]
         # count label frequencies
         for label in deco_deps:
             if is_decorated(label):
@@ -160,9 +163,9 @@ def projectivize(heads, labels):
 
 
 cdef vector[int] _heads_to_c(heads):
-    cdef vector[int] c_heads;
+    cdef vector[int] c_heads
     for head in heads:
-        if head == None:
+        if head is None:
             c_heads.push_back(-1)
         else:
             assert head < len(heads)
@@ -199,6 +202,7 @@ def _decorate(heads, proj_heads, labels):
             deco_labels.append(labels[tokenid])
     return deco_labels
 
+
 def get_smallest_nonproj_arc_slow(heads):
     cdef vector[int] c_heads = _heads_to_c(heads)
     return _get_smallest_nonproj_arc(c_heads)
diff --git a/spacy/pipeline/_parser_internals/search.pxd b/spacy/pipeline/_parser_internals/search.pxd
index 462649633..ad68dc5c7 100644
--- a/spacy/pipeline/_parser_internals/search.pxd
+++ b/spacy/pipeline/_parser_internals/search.pxd
@@ -57,7 +57,6 @@ cdef class Beam:
     cdef int advance(self, trans_func_t transition_func, hash_func_t hash_func,
                      void* extra_args) except -1
     cdef int check_done(self, finish_func_t finish_func, void* extra_args) except -1
- 
 
     cdef inline void set_cell(self, int i, int j, weight_t score, int is_valid, weight_t cost) nogil:
         self.scores[i][j] = score
diff --git a/spacy/pipeline/_parser_internals/search.pyx b/spacy/pipeline/_parser_internals/search.pyx
index 251eaa805..578299b56 100644
--- a/spacy/pipeline/_parser_internals/search.pyx
+++ b/spacy/pipeline/_parser_internals/search.pyx
@@ -1,11 +1,8 @@
 # cython: profile=True, experimental_cpp_class_def=True, cdivision=True, infer_types=True
 cimport cython
-from libc.math cimport exp, log
-from libc.string cimport memcpy, memset
-
-import math
-
 from cymem.cymem cimport Pool
+from libc.math cimport exp
+from libc.string cimport memcpy, memset
 from preshed.maps cimport PreshMap
 
 
@@ -70,7 +67,7 @@ cdef class Beam:
             self.costs[i][j] = costs[j]
 
     cdef int set_table(self, weight_t** scores, int** is_valid, weight_t** costs) except -1:
-        cdef int i, j
+        cdef int i
         for i in range(self.width):
             memcpy(self.scores[i], scores[i], sizeof(weight_t) * self.nr_class)
             memcpy(self.is_valid[i], is_valid[i], sizeof(bint) * self.nr_class)
@@ -176,7 +173,6 @@ cdef class Beam:
         beam-width, and n is the number of classes.
         """
         cdef Entry entry
-        cdef weight_t score
         cdef _State* s
         cdef int i, j, move_id
         assert self.size >= 1
@@ -269,7 +265,7 @@ cdef class MaxViolation:
                 # This can happen from non-monotonic actions
                 # If we find a better gold analysis this way, be sure to keep it.
                 elif pred._states[i].loss <= 0 \
-                and tuple(pred.histories[i]) not in seen_golds:
+                        and tuple(pred.histories[i]) not in seen_golds:
                     g_scores.append(pred._states[i].score)
                     g_hist.append(list(pred.histories[i]))
             for i in range(gold.size):
diff --git a/spacy/pipeline/_parser_internals/stateclass.pyx b/spacy/pipeline/_parser_internals/stateclass.pyx
index 5d98c0314..bdb4d1cf0 100644
--- a/spacy/pipeline/_parser_internals/stateclass.pyx
+++ b/spacy/pipeline/_parser_internals/stateclass.pyx
@@ -1,6 +1,4 @@
 # cython: infer_types=True
-import numpy
-
 from libcpp.vector cimport vector
 
 from ...tokens.doc cimport Doc
@@ -42,11 +40,11 @@ cdef class StateClass:
         cdef vector[ArcC] arcs
         self.c.get_arcs(&arcs)
         return list(arcs)
-        #py_arcs = []
-        #for arc in arcs:
-        #    if arc.head != -1 and arc.child != -1:
-        #        py_arcs.append((arc.head, arc.child, arc.label))
-        #return arcs
+        # py_arcs = []
+        # for arc in arcs:
+        #     if arc.head != -1 and arc.child != -1:
+        #         py_arcs.append((arc.head, arc.child, arc.label))
+        # return arcs
 
     def add_arc(self, int head, int child, int label):
         self.c.add_arc(head, child, label)
@@ -56,10 +54,10 @@ cdef class StateClass:
 
     def H(self, int child):
         return self.c.H(child)
-    
+
     def L(self, int head, int idx):
         return self.c.L(head, idx)
-    
+
     def R(self, int head, int idx):
         return self.c.R(head, idx)
 
@@ -102,7 +100,7 @@ cdef class StateClass:
 
     def H(self, int i):
         return self.c.H(i)
-    
+
     def E(self, int i):
         return self.c.E(i)
 
@@ -120,7 +118,7 @@ cdef class StateClass:
 
     def H_(self, int i):
         return self.doc[self.c.H(i)]
-    
+
     def E_(self, int i):
         return self.doc[self.c.E(i)]
 
@@ -129,7 +127,7 @@ cdef class StateClass:
 
     def R_(self, int i, int idx):
         return self.doc[self.c.R(i, idx)]
- 
+
     def empty(self):
         return self.c.empty()
 
@@ -138,7 +136,7 @@ cdef class StateClass:
 
     def at_break(self):
         return False
-        #return self.c.at_break()
+        # return self.c.at_break()
 
     def has_head(self, int i):
         return self.c.has_head(i)
diff --git a/spacy/pipeline/_parser_internals/transition_system.pxd b/spacy/pipeline/_parser_internals/transition_system.pxd
index 296b99ef8..08baed932 100644
--- a/spacy/pipeline/_parser_internals/transition_system.pxd
+++ b/spacy/pipeline/_parser_internals/transition_system.pxd
@@ -20,11 +20,15 @@ cdef struct Transition:
     int (*do)(StateC* state, attr_t label) nogil
 
 
-ctypedef weight_t (*get_cost_func_t)(const StateC* state, const void* gold,
-        attr_tlabel) nogil
-ctypedef weight_t (*move_cost_func_t)(const StateC* state, const void* gold) nogil
-ctypedef weight_t (*label_cost_func_t)(const StateC* state, const void*
-        gold, attr_t label) nogil
+ctypedef weight_t (*get_cost_func_t)(
+    const StateC* state, const void* gold, attr_tlabel
+) nogil
+ctypedef weight_t (*move_cost_func_t)(
+        const StateC* state, const void* gold
+) nogil
+ctypedef weight_t (*label_cost_func_t)(
+    const StateC* state, const void* gold, attr_t label
+) nogil
 
 ctypedef int (*do_func_t)(StateC* state, attr_t label) nogil
 
@@ -56,7 +60,7 @@ cdef class TransitionSystem:
 
 
 cdef void c_apply_actions(TransitionSystem moves, StateC** states, const int* actions,
-    int batch_size) nogil
+                          int batch_size) nogil
 
 cdef void c_transition_batch(TransitionSystem moves, StateC** states, const float* scores,
-        int nr_class, int batch_size) nogil
+                             int nr_class, int batch_size) nogil
diff --git a/spacy/pipeline/_parser_internals/transition_system.pyx b/spacy/pipeline/_parser_internals/transition_system.pyx
index 6c8b77ff0..aaafe2aa0 100644
--- a/spacy/pipeline/_parser_internals/transition_system.pyx
+++ b/spacy/pipeline/_parser_internals/transition_system.pyx
@@ -10,9 +10,7 @@ from collections import Counter
 import srsly
 
 from ...structs cimport TokenC
-from ...tokens.doc cimport Doc
 from ...typedefs cimport attr_t, weight_t
-from . cimport _beam_utils
 from ._parser_utils cimport arg_max_if_valid
 from .stateclass cimport StateClass
 
@@ -270,7 +268,6 @@ cdef class TransitionSystem:
         return self
 
     def to_bytes(self, exclude=tuple()):
-        transitions = []
         serializers = {
             'moves': lambda: srsly.json_dumps(self.labels),
             'strings': lambda: self.strings.to_bytes(),
@@ -294,19 +291,19 @@ cdef class TransitionSystem:
 
 
 cdef void c_apply_actions(TransitionSystem moves, StateC** states, const int* actions,
-    int batch_size) nogil:
-        cdef int i
-        cdef Transition action
-        cdef StateC* state
-        for i in range(batch_size):
-            state = states[i]
-            action = moves.c[actions[i]]
-            action.do(state, action.label)
-            state.history.push_back(action.clas)
+                          int batch_size) nogil:
+    cdef int i
+    cdef Transition action
+    cdef StateC* state
+    for i in range(batch_size):
+        state = states[i]
+        action = moves.c[actions[i]]
+        action.do(state, action.label)
+        state.history.push_back(action.clas)
 
 
 cdef void c_transition_batch(TransitionSystem moves, StateC** states, const float* scores,
-    int nr_class, int batch_size) nogil:
+                             int nr_class, int batch_size) nogil:
     is_valid = <int*>calloc(moves.n_moves, sizeof(int))
     cdef int i, guess
     cdef Transition action
@@ -322,4 +319,3 @@ cdef void c_transition_batch(TransitionSystem moves, StateC** states, const floa
             action.do(states[i], action.label)
             states[i].history.push_back(guess)
     free(is_valid)
-
diff --git a/spacy/pipeline/dep_parser.py b/spacy/pipeline/dep_parser.py
index ce882b5ed..f2472451b 100644
--- a/spacy/pipeline/dep_parser.py
+++ b/spacy/pipeline/dep_parser.py
@@ -1,6 +1,6 @@
 # cython: infer_types=True, profile=True, binding=True
 from collections import defaultdict
-from typing import Callable, Iterable, Optional
+from typing import Callable, Optional
 
 from thinc.api import Config, Model
 
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index bf3fe3221..4b95f1ff7 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -1,11 +1,9 @@
 # cython: infer_types=True, profile=True, binding=True
 from itertools import islice
-from typing import Callable, Dict, Iterable, List, Optional, Union
+from typing import Callable, Dict, Iterable, Optional, Union
 
-import srsly
 from thinc.api import Config, Model
 from thinc.legacy import LegacySequenceCategoricalCrossentropy
-from thinc.types import Floats2d, Ints1d
 
 from ..morphology cimport Morphology
 from ..tokens.doc cimport Doc
@@ -16,10 +14,8 @@ from ..errors import Errors
 from ..language import Language
 from ..parts_of_speech import IDS as POS_IDS
 from ..scorer import Scorer
-from ..symbols import POS
 from ..training import validate_examples, validate_get_examples
 from ..util import registry
-from .pipe import deserialize_config
 from .tagger import ActivationsT, Tagger
 
 # See #9050
@@ -86,8 +82,11 @@ def morphologizer_score(examples, **kwargs):
     results = {}
     results.update(Scorer.score_token_attr(examples, "pos", **kwargs))
     results.update(Scorer.score_token_attr(examples, "morph", getter=morph_key_getter, **kwargs))
-    results.update(Scorer.score_token_attr_per_feat(examples,
-        "morph", getter=morph_key_getter, **kwargs))
+    results.update(
+        Scorer.score_token_attr_per_feat(
+            examples, "morph", getter=morph_key_getter, **kwargs
+        )
+    )
     return results
 
 
@@ -249,7 +248,6 @@ class Morphologizer(Tagger):
         if isinstance(docs, Doc):
             docs = [docs]
         cdef Doc doc
-        cdef Vocab vocab = self.vocab
         cdef bint overwrite = self.cfg["overwrite"]
         cdef bint extend = self.cfg["extend"]
 
diff --git a/spacy/pipeline/ner.py b/spacy/pipeline/ner.py
index 3b84126ed..445ed7663 100644
--- a/spacy/pipeline/ner.py
+++ b/spacy/pipeline/ner.py
@@ -1,12 +1,12 @@
 # cython: infer_types=True, profile=True, binding=True
 from collections import defaultdict
-from typing import Callable, Iterable, Optional
+from typing import Callable, Optional
 
 from thinc.api import Config, Model
 
 from ..language import Language
-from ..scorer import PRFScore, get_ner_prf
-from ..training import remove_bilu_prefix, validate_examples
+from ..scorer import get_ner_prf
+from ..training import remove_bilu_prefix
 from ..util import registry
 from ._parser_internals.ner import BiluoPushDown
 from ._parser_internals.transition_system import TransitionSystem
diff --git a/spacy/pipeline/pipe.pyx b/spacy/pipeline/pipe.pyx
index cbdfdaac0..8409e64c3 100644
--- a/spacy/pipeline/pipe.pyx
+++ b/spacy/pipeline/pipe.pyx
@@ -1,12 +1,11 @@
 # cython: infer_types=True, profile=True, binding=True
-import warnings
-from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple, Union
+from typing import Callable, Dict, Iterable, Iterator, Tuple, Union
 
 import srsly
 
 from ..tokens.doc cimport Doc
 
-from ..errors import Errors, Warnings
+from ..errors import Errors
 from ..language import Language
 from ..training import Example
 from ..util import raise_error
@@ -33,7 +32,7 @@ cdef class Pipe:
         """
         raise NotImplementedError(Errors.E931.format(parent="Pipe", method="__call__", name=self.name))
 
-    def pipe(self, stream: Iterable[Doc], *, batch_size: int=128) -> Iterator[Doc]:
+    def pipe(self, stream: Iterable[Doc], *, batch_size: int = 128) -> Iterator[Doc]:
         """Apply the pipe to a stream of documents. This usually happens under
         the hood when the nlp object is called on a text and all components are
         applied to the Doc.
@@ -52,7 +51,7 @@ cdef class Pipe:
             except Exception as e:
                 error_handler(self.name, self, [doc], e)
 
-    def initialize(self, get_examples: Callable[[], Iterable[Example]], *, nlp: Language=None):
+    def initialize(self, get_examples: Callable[[], Iterable[Example]], *, nlp: Language = None):
         """Initialize the pipe. For non-trainable components, this method
         is optional. For trainable components, which should inherit
         from the subclass TrainablePipe, the provided data examples
diff --git a/spacy/pipeline/sentencizer.pyx b/spacy/pipeline/sentencizer.pyx
index 599554814..28cf5d6b4 100644
--- a/spacy/pipeline/sentencizer.pyx
+++ b/spacy/pipeline/sentencizer.pyx
@@ -7,7 +7,6 @@ from ..tokens.doc cimport Doc
 
 from .. import util
 from ..language import Language
-from ..scorer import Scorer
 from .pipe import Pipe
 from .senter import senter_score
 
@@ -34,17 +33,19 @@ class Sentencizer(Pipe):
     DOCS: https://spacy.io/api/sentencizer
     """
 
-    default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
-            '।', '॥', '၊', '။', '።', '፧', '፨', '᙮', '᜵', '᜶', '᠃', '᠉', '᥄',
-            '᥅', '᪨', '᪩', '᪪', '᪫', '᭚', '᭛', '᭞', '᭟', '᰻', '᰼', '᱾', '᱿',
-            '‼', '‽', '⁇', '⁈', '⁉', '⸮', '⸼', '꓿', '꘎', '꘏', '꛳', '꛷', '꡶',
-            '꡷', '꣎', '꣏', '꤯', '꧈', '꧉', '꩝', '꩞', '꩟', '꫰', '꫱', '꯫', '﹒',
-            '﹖', '﹗', '！', '．', '？', '𐩖', '𐩗', '𑁇', '𑁈', '𑂾', '𑂿', '𑃀',
-            '𑃁', '𑅁', '𑅂', '𑅃', '𑇅', '𑇆', '𑇍', '𑇞', '𑇟', '𑈸', '𑈹', '𑈻', '𑈼',
-            '𑊩', '𑑋', '𑑌', '𑗂', '𑗃', '𑗉', '𑗊', '𑗋', '𑗌', '𑗍', '𑗎', '𑗏', '𑗐',
-            '𑗑', '𑗒', '𑗓', '𑗔', '𑗕', '𑗖', '𑗗', '𑙁', '𑙂', '𑜼', '𑜽', '𑜾', '𑩂',
-            '𑩃', '𑪛', '𑪜', '𑱁', '𑱂', '𖩮', '𖩯', '𖫵', '𖬷', '𖬸', '𖭄', '𛲟', '𝪈',
-            '｡', '。']
+    default_punct_chars = [
+        '!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
+        '।', '॥', '၊', '။', '።', '፧', '፨', '᙮', '᜵', '᜶', '᠃', '᠉', '᥄',
+        '᥅', '᪨', '᪩', '᪪', '᪫', '᭚', '᭛', '᭞', '᭟', '᰻', '᰼', '᱾', '᱿',
+        '‼', '‽', '⁇', '⁈', '⁉', '⸮', '⸼', '꓿', '꘎', '꘏', '꛳', '꛷', '꡶',
+        '꡷', '꣎', '꣏', '꤯', '꧈', '꧉', '꩝', '꩞', '꩟', '꫰', '꫱', '꯫', '﹒',
+        '﹖', '﹗', '！', '．', '？', '𐩖', '𐩗', '𑁇', '𑁈', '𑂾', '𑂿', '𑃀',
+        '𑃁', '𑅁', '𑅂', '𑅃', '𑇅', '𑇆', '𑇍', '𑇞', '𑇟', '𑈸', '𑈹', '𑈻', '𑈼',
+        '𑊩', '𑑋', '𑑌', '𑗂', '𑗃', '𑗉', '𑗊', '𑗋', '𑗌', '𑗍', '𑗎', '𑗏', '𑗐',
+        '𑗑', '𑗒', '𑗓', '𑗔', '𑗕', '𑗖', '𑗗', '𑙁', '𑙂', '𑜼', '𑜽', '𑜾', '𑩂',
+        '𑩃', '𑪛', '𑪜', '𑱁', '𑱂', '𖩮', '𖩯', '𖫵', '𖬷', '𖬸', '𖭄', '𛲟', '𝪈',
+        '｡', '。'
+    ]
 
     def __init__(
         self,
@@ -127,7 +128,6 @@ class Sentencizer(Pipe):
         if isinstance(docs, Doc):
             docs = [docs]
         cdef Doc doc
-        cdef int idx = 0
         for i, doc in enumerate(docs):
             doc_tag_ids = batch_tag_ids[i]
             for j, tag_id in enumerate(doc_tag_ids):
@@ -168,7 +168,6 @@ class Sentencizer(Pipe):
         path = path.with_suffix(".json")
         srsly.write_json(path, {"punct_chars": list(self.punct_chars), "overwrite": self.overwrite})
 
-
     def from_disk(self, path, *, exclude=tuple()):
         """Load the sentencizer from disk.
 
diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx
index 42615e194..2ef08efaa 100644
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@@ -1,11 +1,9 @@
 # cython: infer_types=True, profile=True, binding=True
 from itertools import islice
-from typing import Callable, Dict, Iterable, List, Optional, Union
+from typing import Callable, Iterable, Optional
 
-import srsly
 from thinc.api import Config, Model
 from thinc.legacy import LegacySequenceCategoricalCrossentropy
-from thinc.types import Floats2d, Ints1d
 
 from ..tokens.doc cimport Doc
 
diff --git a/spacy/pipeline/span_finder.py b/spacy/pipeline/span_finder.py
index 91be2f2ae..a12d52911 100644
--- a/spacy/pipeline/span_finder.py
+++ b/spacy/pipeline/span_finder.py
@@ -48,14 +48,14 @@ DEFAULT_SPAN_FINDER_MODEL = Config().from_str(span_finder_default_config)["model
         "threshold": 0.5,
         "model": DEFAULT_SPAN_FINDER_MODEL,
         "spans_key": DEFAULT_SPANS_KEY,
-        "max_length": None,
+        "max_length": 25,
         "min_length": None,
         "scorer": {"@scorers": "spacy.span_finder_scorer.v1"},
     },
     default_score_weights={
-        f"span_finder_{DEFAULT_SPANS_KEY}_f": 1.0,
-        f"span_finder_{DEFAULT_SPANS_KEY}_p": 0.0,
-        f"span_finder_{DEFAULT_SPANS_KEY}_r": 0.0,
+        f"spans_{DEFAULT_SPANS_KEY}_f": 1.0,
+        f"spans_{DEFAULT_SPANS_KEY}_p": 0.0,
+        f"spans_{DEFAULT_SPANS_KEY}_r": 0.0,
     },
 )
 def make_span_finder(
@@ -104,7 +104,7 @@ def make_span_finder_scorer():
 
 def span_finder_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
     kwargs = dict(kwargs)
-    attr_prefix = "span_finder_"
+    attr_prefix = "spans_"
     key = kwargs["spans_key"]
     kwargs.setdefault("attr", f"{attr_prefix}{key}")
     kwargs.setdefault(
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index 53693584d..b05a7835f 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -1,27 +1,20 @@
 # cython: infer_types=True, profile=True, binding=True
-import warnings
 from itertools import islice
 from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union
 
 import numpy
-import srsly
 from thinc.api import Config, Model, set_dropout_rate
 from thinc.legacy import LegacySequenceCategoricalCrossentropy
 from thinc.types import Floats2d, Ints1d
 
-from ..morphology cimport Morphology
 from ..tokens.doc cimport Doc
-from ..vocab cimport Vocab
 
 from .. import util
-from ..attrs import ID, POS
-from ..errors import Errors, Warnings
+from ..errors import Errors
 from ..language import Language
-from ..parts_of_speech import X
 from ..scorer import Scorer
 from ..training import validate_examples, validate_get_examples
 from ..util import registry
-from .pipe import deserialize_config
 from .trainable_pipe import TrainablePipe
 
 ActivationsT = Dict[str, Union[List[Floats2d], List[Ints1d]]]
@@ -188,7 +181,6 @@ class Tagger(TrainablePipe):
         if isinstance(docs, Doc):
             docs = [docs]
         cdef Doc doc
-        cdef Vocab vocab = self.vocab
         cdef bint overwrite = self.cfg["overwrite"]
         labels = self.labels
         for i, doc in enumerate(docs):
@@ -281,7 +273,7 @@ class Tagger(TrainablePipe):
         student_scores: Scores representing the student model's predictions.
 
         RETURNS (Tuple[float, float]): The loss and the gradient.
-        
+
         DOCS: https://spacy.io/api/tagger#get_teacher_student_loss
         """
         loss_func = LegacySequenceCategoricalCrossentropy(normalize=False)
diff --git a/spacy/pipeline/trainable_pipe.pyx b/spacy/pipeline/trainable_pipe.pyx
index 38b83f2c2..42394c907 100644
--- a/spacy/pipeline/trainable_pipe.pyx
+++ b/spacy/pipeline/trainable_pipe.pyx
@@ -1,5 +1,4 @@
 # cython: infer_types=True, profile=True, binding=True
-import warnings
 from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple
 
 import srsly
@@ -8,7 +7,7 @@ from thinc.api import Model, Optimizer, set_dropout_rate
 from ..tokens.doc cimport Doc
 
 from .. import util
-from ..errors import Errors, Warnings
+from ..errors import Errors
 from ..language import Language
 from ..training import Example, validate_distillation_examples, validate_examples
 from ..vocab import Vocab
@@ -56,14 +55,14 @@ cdef class TrainablePipe(Pipe):
         except Exception as e:
             error_handler(self.name, self, [doc], e)
 
-
     def distill(self,
-               teacher_pipe: Optional["TrainablePipe"],
-               examples: Iterable["Example"],
-               *,
-               drop: float=0.0,
-               sgd: Optional[Optimizer]=None,
-               losses: Optional[Dict[str, float]]=None) -> Dict[str, float]:
+                teacher_pipe: Optional["TrainablePipe"],
+                examples: Iterable["Example"],
+                *,
+                drop: float = 0.0,
+                sgd: Optional[Optimizer] = None,
+                losses: Optional[Dict[str, float]] = None
+                ) -> Dict[str, float]:
         """Train a pipe (the student) on the predictions of another pipe
         (the teacher). The student is typically trained on the probability
         distribution of the teacher, but details may differ per pipe.
@@ -79,7 +78,7 @@ cdef class TrainablePipe(Pipe):
         losses (Optional[Dict[str, float]]): Optional record of loss during
             distillation.
         RETURNS: The updated losses dictionary.
-        
+
         DOCS: https://spacy.io/api/pipe#distill
         """
         # By default we require a teacher pipe, but there are downstream
@@ -103,7 +102,7 @@ cdef class TrainablePipe(Pipe):
         losses[self.name] += loss
         return losses
 
-    def pipe(self, stream: Iterable[Doc], *, batch_size: int=128) -> Iterator[Doc]:
+    def pipe(self, stream: Iterable[Doc], *, batch_size: int = 128) -> Iterator[Doc]:
         """Apply the pipe to a stream of documents. This usually happens under
         the hood when the nlp object is called on a text and all components are
         applied to the Doc.
@@ -150,9 +149,9 @@ cdef class TrainablePipe(Pipe):
     def update(self,
                examples: Iterable["Example"],
                *,
-               drop: float=0.0,
-               sgd: Optimizer=None,
-               losses: Optional[Dict[str, float]]=None) -> Dict[str, float]:
+               drop: float = 0.0,
+               sgd: Optimizer = None,
+               losses: Optional[Dict[str, float]] = None) -> Dict[str, float]:
         """Learn from a batch of documents and gold-standard information,
         updating the pipe's model. Delegates to predict and get_loss.
 
@@ -186,8 +185,8 @@ cdef class TrainablePipe(Pipe):
     def rehearse(self,
                  examples: Iterable[Example],
                  *,
-                 sgd: Optimizer=None,
-                 losses: Dict[str, float]=None,
+                 sgd: Optimizer = None,
+                 losses: Dict[str, float] = None,
                  **config) -> Dict[str, float]:
         """Perform a "rehearsal" update from a batch of data. Rehearsal updates
         teach the current model to make predictions similar to an initial model,
@@ -224,7 +223,7 @@ cdef class TrainablePipe(Pipe):
         student_scores: Scores representing the student model's predictions.
 
         RETURNS (Tuple[float, float]): The loss and the gradient.
-        
+
         DOCS: https://spacy.io/api/pipe#get_teacher_student_loss
         """
         raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="get_teacher_student_loss", name=self.name))
@@ -238,7 +237,7 @@ cdef class TrainablePipe(Pipe):
         """
         return util.create_default_optimizer()
 
-    def initialize(self, get_examples: Callable[[], Iterable[Example]], *, nlp: Language=None):
+    def initialize(self, get_examples: Callable[[], Iterable[Example]], *, nlp: Language = None):
         """Initialize the pipe for training, using data examples if available.
         This method needs to be implemented by each TrainablePipe component,
         ensuring the internal model (if available) is initialized properly
diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx
index c3ee4d50a..2496d1376 100644
--- a/spacy/pipeline/transition_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@@ -6,15 +6,9 @@ from typing import Dict, Iterable, List, Optional, Tuple
 cimport numpy as np
 from cymem.cymem cimport Pool
 
-from itertools import islice
-
-from libc.stdlib cimport calloc, free
-from libc.string cimport memcpy, memset
-from libcpp.vector cimport vector
-
 import contextlib
 import random
-import warnings
+from itertools import islice
 
 import numpy
 import numpy.random
@@ -23,44 +17,36 @@ from thinc.api import (
     CupyOps,
     NumpyOps,
     Optimizer,
-    chain,
     get_array_module,
     get_ops,
     set_dropout_rate,
-    softmax_activation,
-    use_ops,
 )
-from thinc.legacy import LegacySequenceCategoricalCrossentropy
 from thinc.types import Floats2d, Ints1d
 
 from ..ml.tb_framework import TransitionModelInputs
 
 from ..tokens.doc cimport Doc
-from ._parser_internals cimport _beam_utils
-from ._parser_internals.search cimport Beam
-from ._parser_internals.stateclass cimport StateC, StateClass
-from .trainable_pipe cimport TrainablePipe
-
-from ._parser_internals import _beam_utils
-
 from ..typedefs cimport weight_t
 from ..vocab cimport Vocab
+from ._parser_internals cimport _beam_utils
+from ._parser_internals.stateclass cimport StateC, StateClass
 from ._parser_internals.transition_system cimport Transition, TransitionSystem
+from .trainable_pipe cimport TrainablePipe
 
 from .. import util
-from ..errors import Errors, Warnings
+from ..errors import Errors
 from ..training import (
     validate_distillation_examples,
     validate_examples,
     validate_get_examples,
 )
+from ._parser_internals import _beam_utils
 
 
 # TODO: Remove when we switch to Cython 3.
 cdef extern from "<algorithm>" namespace "std" nogil:
     bint equal[InputIt1, InputIt2](InputIt1 first1, InputIt1 last1, InputIt2 first2) except +
 
-
 NUMPY_OPS = NumpyOps()
 
 
@@ -236,12 +222,13 @@ class Parser(TrainablePipe):
         raise NotImplementedError
 
     def distill(self,
-               teacher_pipe: Optional[TrainablePipe],
-               examples: Iterable["Example"],
-               *,
-               drop: float=0.0,
-               sgd: Optional[Optimizer]=None,
-               losses: Optional[Dict[str, float]]=None):
+                teacher_pipe: Optional[TrainablePipe],
+                examples: Iterable["Example"],
+                *,
+                drop: float = 0.0,
+                sgd: Optional[Optimizer] = None,
+                losses: Optional[Dict[str, float]] = None
+                ):
         """Train a pipe (the student) on the predictions of another pipe
         (the teacher). The student is trained on the transition probabilities
         of the teacher.
@@ -257,7 +244,7 @@ class Parser(TrainablePipe):
         losses (Optional[Dict[str, float]]): Optional record of loss during
             distillation.
         RETURNS: The updated losses dictionary.
-        
+
         DOCS: https://spacy.io/api/dependencyparser#distill
         """
         if teacher_pipe is None:
@@ -291,11 +278,13 @@ class Parser(TrainablePipe):
         # teacher's distributions.
 
         student_inputs = TransitionModelInputs(docs=student_docs,
-            states=[state.copy() for state in states], moves=self.moves, max_moves=max_moves)
+                                               states=[state.copy() for state in states],
+                                               moves=self.moves,
+                                               max_moves=max_moves)
         (student_states, student_scores), backprop_scores = self.model.begin_update(student_inputs)
         actions = _states_diff_to_actions(states, student_states)
         teacher_inputs = TransitionModelInputs(docs=[eg.reference for eg in examples],
-            states=states, moves=teacher_pipe.moves, actions=actions)
+                                               states=states, moves=teacher_pipe.moves, actions=actions)
         (_, teacher_scores) = teacher_pipe.model.predict(teacher_inputs)
 
         loss, d_scores = self.get_teacher_student_loss(teacher_scores, student_scores)
@@ -308,10 +297,9 @@ class Parser(TrainablePipe):
 
         return losses
 
-
     def get_teacher_student_loss(
-        self, teacher_scores: List[Floats2d], student_scores: List[Floats2d],
-        normalize: bool=False,
+            self, teacher_scores: List[Floats2d], student_scores: List[Floats2d],
+            normalize: bool = False,
     ) -> Tuple[float, List[Floats2d]]:
         """Calculate the loss and its gradient for a batch of student
         scores, relative to teacher scores.
@@ -320,7 +308,7 @@ class Parser(TrainablePipe):
         student_scores: Scores representing the student model's predictions.
 
         RETURNS (Tuple[float, float]): The loss and the gradient.
-        
+
         DOCS: https://spacy.io/api/dependencyparser#get_teacher_student_loss
         """
 
@@ -334,9 +322,9 @@ class Parser(TrainablePipe):
         # ourselves.
 
         teacher_scores = self.model.ops.softmax(self.model.ops.xp.vstack(teacher_scores),
-            axis=-1, inplace=True)
+                                                axis=-1, inplace=True)
         student_scores = self.model.ops.softmax(self.model.ops.xp.vstack(student_scores),
-            axis=-1, inplace=True)
+                                                axis=-1, inplace=True)
 
         assert teacher_scores.shape == student_scores.shape
 
@@ -384,7 +372,6 @@ class Parser(TrainablePipe):
             except Exception as e:
                 error_handler(self.name, self, batch_in_order, e)
 
-
     def predict(self, docs):
         if isinstance(docs, Doc):
             docs = [docs]
@@ -414,7 +401,6 @@ class Parser(TrainablePipe):
 
     def set_annotations(self, docs, states_or_beams):
         cdef StateClass state
-        cdef Beam beam
         cdef Doc doc
         states = _beam_utils.collect_states(states_or_beams, docs)
         for i, (state, doc) in enumerate(zip(states, docs)):
@@ -423,7 +409,6 @@ class Parser(TrainablePipe):
                 hook(doc)
 
     def update(self, examples, *, drop=0., sgd=None, losses=None):
-        cdef StateClass state
         if losses is None:
             losses = {}
         losses.setdefault(self.name, 0.)
@@ -453,13 +438,15 @@ class Parser(TrainablePipe):
         else:
             init_states, gold_states, _ = self.moves.init_gold_batch(examples)
 
-        inputs = TransitionModelInputs(docs=docs, moves=self.moves,
-            max_moves=max_moves, states=[state.copy() for state in init_states])
+        inputs = TransitionModelInputs(docs=docs,
+                                       moves=self.moves,
+                                       max_moves=max_moves,
+                                       states=[state.copy() for state in init_states])
         (pred_states, scores), backprop_scores = self.model.begin_update(inputs)
         if sum(s.shape[0] for s in scores) == 0:
             return losses
         d_scores = self.get_loss((gold_states, init_states, pred_states, scores),
-            examples, max_moves)
+                                 examples, max_moves)
         backprop_scores((pred_states, d_scores))
         if sgd not in (None, False):
             self.finish_update(sgd)
@@ -500,9 +487,7 @@ class Parser(TrainablePipe):
         cdef TransitionSystem moves = self.moves
         cdef StateClass state
         cdef int clas
-        cdef int nF = self.model.get_dim("nF")
         cdef int nO = moves.n_moves
-        cdef int nS = sum([len(history) for history in histories])
         cdef Pool mem = Pool()
         cdef np.ndarray costs_i
         is_valid = <int*>mem.alloc(nO, sizeof(int))
@@ -569,8 +554,8 @@ class Parser(TrainablePipe):
 
         return losses
 
-    def update_beam(self, examples, *, beam_width,
-            drop=0., sgd=None, losses=None, beam_density=0.0):
+    def update_beam(self, examples, *, beam_width, drop=0.,
+                    sgd=None, losses=None, beam_density=0.0):
         raise NotImplementedError
 
     def set_output(self, nO):
@@ -695,9 +680,10 @@ class Parser(TrainablePipe):
             return states
 
         # Parse the states that are too long with the teacher's parsing model.
-        teacher_inputs = TransitionModelInputs(docs=docs, moves=moves,
-            states=[state.copy() for state in to_cut])
-        (teacher_states, _ ) = teacher_pipe.model.predict(teacher_inputs)
+        teacher_inputs = TransitionModelInputs(docs=docs,
+                                               moves=moves,
+                                               states=[state.copy() for state in to_cut])
+        (teacher_states, _) = teacher_pipe.model.predict(teacher_inputs)
 
         # Step through the teacher's actions and store every state after
         # each multiple of max_length.
@@ -795,6 +781,7 @@ def _states_to_actions(states: List[StateClass]) -> List[Ints1d]:
 
     return actions
 
+
 def _states_diff_to_actions(
     before_states: List[StateClass],
     after_states: List[StateClass]
@@ -815,8 +802,9 @@ def _states_diff_to_actions(
         c_state_before = before_state.c
         c_state_after = after_state.c
 
-        assert equal(c_state_before.history.begin(), c_state_before.history.end(),
-            c_state_after.history.begin())
+        assert equal(c_state_before.history.begin(),
+                     c_state_before.history.end(),
+                     c_state_after.history.begin())
 
     actions = []
     while True:
diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index 43826f07c..62ab9c20d 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -1,10 +1,8 @@
 # cython: infer_types=True
-from typing import Any, Callable, Iterable, Iterator, List, Optional, Tuple, Union
+from typing import Iterable, Iterator, List, Optional, Tuple, Union
 
-cimport cython
 from libc.stdint cimport uint32_t
 from libc.string cimport memcpy
-from libcpp.set cimport set
 from murmurhash.mrmr cimport hash64
 
 import srsly
@@ -244,7 +242,6 @@ cdef class StringStore:
         cdef int n_length_bytes
         cdef int i
         cdef Utf8Str* string = <Utf8Str*>self.mem.alloc(1, sizeof(Utf8Str))
-        cdef uint32_t ulength = length
         if length < sizeof(string.s):
             string.s[0] = <unsigned char>length
             memcpy(&string.s[1], chars, length)
@@ -302,7 +299,7 @@ cpdef hash_t get_string_id(object string_or_hash) except -1:
 
     try:
         return hash_string(string_or_hash)
-    except:
+    except:   # no-cython-lint
         if _try_coerce_to_hash(string_or_hash, &str_hash):
             # Coerce the integral key to the expected primitive hash type.
             # This ensures that custom/overloaded "primitive" data types
@@ -319,6 +316,5 @@ cdef inline bint _try_coerce_to_hash(object key, hash_t* out_hash):
     try:
         out_hash[0] = key
         return True
-    except:
+    except:  # no-cython-lint
         return False
-
diff --git a/spacy/structs.pxd b/spacy/structs.pxd
index 0730c4c73..e7513cc11 100644
--- a/spacy/structs.pxd
+++ b/spacy/structs.pxd
@@ -52,7 +52,7 @@ cdef struct TokenC:
 
     int sent_start
     int ent_iob
-    attr_t ent_type # TODO: Is there a better way to do this? Multiple sources of truth..
+    attr_t ent_type  # TODO: Is there a better way to do this? Multiple sources of truth..
     attr_t ent_kb_id
     hash_t ent_id
 
diff --git a/spacy/symbols.pxd b/spacy/symbols.pxd
index f5d7784dc..9e74bf676 100644
--- a/spacy/symbols.pxd
+++ b/spacy/symbols.pxd
@@ -93,7 +93,7 @@ cdef enum symbol_t:
     ADV
     AUX
     CONJ
-    CCONJ # U20
+    CCONJ  # U20
     DET
     INTJ
     NOUN
@@ -419,7 +419,7 @@ cdef enum symbol_t:
     ccomp
     complm
     conj
-    cop # U20
+    cop  # U20
     csubj
     csubjpass
     dep
@@ -442,8 +442,8 @@ cdef enum symbol_t:
     num
     number
     oprd
-    obj # U20
-    obl # U20
+    obj  # U20
+    obl  # U20
     parataxis
     partmod
     pcomp
diff --git a/spacy/symbols.pyx b/spacy/symbols.pyx
index fbfc6f10d..98c517aad 100644
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@@ -96,7 +96,7 @@ IDS = {
     "ADV": ADV,
     "AUX": AUX,
     "CONJ": CONJ,
-    "CCONJ": CCONJ, # U20
+    "CCONJ": CCONJ,  # U20
     "DET": DET,
     "INTJ": INTJ,
     "NOUN": NOUN,
@@ -421,7 +421,7 @@ IDS = {
     "ccomp": ccomp,
     "complm": complm,
     "conj": conj,
-    "cop": cop, # U20
+    "cop": cop,  # U20
     "csubj": csubj,
     "csubjpass": csubjpass,
     "dep": dep,
@@ -444,8 +444,8 @@ IDS = {
     "num": num,
     "number": number,
     "oprd": oprd,
-    "obj": obj, # U20
-    "obl": obl, # U20
+    "obj": obj,  # U20
+    "obl": obl,  # U20
     "parataxis": parataxis,
     "partmod": partmod,
     "pcomp": pcomp,
diff --git a/spacy/tests/matcher/test_pattern_validation.py b/spacy/tests/matcher/test_pattern_validation.py
index 21fa36865..45f9f4ee7 100644
--- a/spacy/tests/matcher/test_pattern_validation.py
+++ b/spacy/tests/matcher/test_pattern_validation.py
@@ -52,7 +52,8 @@ TEST_PATTERNS = [
 
 
 @pytest.mark.parametrize(
-    "pattern", [[{"XX": "y"}, {"LENGTH": "2"}, {"TEXT": {"IN": 5}}]]
+    "pattern",
+    [[{"XX": "y"}], [{"LENGTH": "2"}], [{"TEXT": {"IN": 5}}], [{"text": {"in": 6}}]],
 )
 def test_matcher_pattern_validation(en_vocab, pattern):
     matcher = Matcher(en_vocab, validate=True)
diff --git a/spacy/tests/package/test_requirements.py b/spacy/tests/package/test_requirements.py
index 2576e5a8b..99027ddeb 100644
--- a/spacy/tests/package/test_requirements.py
+++ b/spacy/tests/package/test_requirements.py
@@ -11,6 +11,7 @@ def test_build_dependencies():
         "flake8",
         "hypothesis",
         "pre-commit",
+        "cython-lint",
         "black",
         "isort",
         "mypy",
diff --git a/spacy/tests/parser/_search.pyx b/spacy/tests/parser/_search.pyx
index 0983159b7..cd9e6b2f5 100644
--- a/spacy/tests/parser/_search.pyx
+++ b/spacy/tests/parser/_search.pyx
@@ -2,7 +2,7 @@
 from cymem.cymem cimport Pool
 
 from spacy.pipeline._parser_internals.search cimport Beam, MaxViolation
-from spacy.typedefs cimport class_t, weight_t
+from spacy.typedefs cimport class_t
 
 import pytest
 
@@ -42,32 +42,35 @@ cdef int destroy(Pool mem, void* state, void* extra_args) except -1:
     state = <TestState*>state
     mem.free(state)
 
+
 @cytest
 @pytest.mark.parametrize("nr_class,beam_width",
-    [
-        (2, 3),
-        (3, 6),
-        (4, 20),
-    ]
-)
+                         [
+                             (2, 3),
+                             (3, 6),
+                             (4, 20),
+                         ]
+                         )
 def test_init(nr_class, beam_width):
     b = Beam(nr_class, beam_width)
     assert b.size == 1
     assert b.width == beam_width
     assert b.nr_class == nr_class
 
+
 @cytest
 def test_init_violn():
     MaxViolation()
 
+
 @cytest
 @pytest.mark.parametrize("nr_class,beam_width,length",
-    [
-        (2, 3, 3),
-        (3, 6, 15),
-        (4, 20, 32),
-    ]
-)
+                         [
+                             (2, 3, 3),
+                             (3, 6, 15),
+                             (4, 20, 32),
+                         ]
+                         )
 def test_initialize(nr_class, beam_width, length):
     b = Beam(nr_class, beam_width)
     b.initialize(initialize, destroy, length, NULL)
@@ -79,11 +82,11 @@ def test_initialize(nr_class, beam_width, length):
 
 @cytest
 @pytest.mark.parametrize("nr_class,beam_width,length,extra",
-    [
-        (2, 3, 4, None),
-        (3, 6, 15, u"test beam 1"),
-    ]
-)
+                         [
+                             (2, 3, 4, None),
+                             (3, 6, 15, u"test beam 1"),
+                         ]
+                         )
 def test_initialize_extra(nr_class, beam_width, length, extra):
     b = Beam(nr_class, beam_width)
     if extra is None:
@@ -97,11 +100,11 @@ def test_initialize_extra(nr_class, beam_width, length, extra):
 
 @cytest
 @pytest.mark.parametrize("nr_class,beam_width,length",
-    [
-        (3, 6, 15),
-        (4, 20, 32),
-    ]
-)
+                         [
+                             (3, 6, 15),
+                             (4, 20, 32),
+                         ]
+                         )
 def test_transition(nr_class, beam_width, length):
     b = Beam(nr_class, beam_width)
     b.initialize(initialize, destroy, length, NULL)
diff --git a/spacy/tests/pipeline/test_span_finder.py b/spacy/tests/pipeline/test_span_finder.py
index 1a8789fff..47a8a34a8 100644
--- a/spacy/tests/pipeline/test_span_finder.py
+++ b/spacy/tests/pipeline/test_span_finder.py
@@ -230,10 +230,10 @@ def test_overfitting_IO():
 
     # Test scoring
     scores = nlp.evaluate(train_examples)
-    assert f"span_finder_{SPANS_KEY}_f" in scores
+    assert f"spans_{SPANS_KEY}_f" in scores
     # It's not perfect 1.0 F1 because it's designed to overgenerate for now.
-    assert scores[f"span_finder_{SPANS_KEY}_p"] == 0.75
-    assert scores[f"span_finder_{SPANS_KEY}_r"] == 1.0
+    assert scores[f"spans_{SPANS_KEY}_p"] == 0.75
+    assert scores[f"spans_{SPANS_KEY}_r"] == 1.0
 
     # also test that the spancat works for just a single entity in a sentence
     doc = nlp("London")
diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py
index 2c5e6a167..f6cefbc1f 100644
--- a/spacy/tests/pipeline/test_tok2vec.py
+++ b/spacy/tests/pipeline/test_tok2vec.py
@@ -192,8 +192,7 @@ def test_tok2vec_listener(with_vectors):
         for tag in t[1]["tags"]:
             tagger.add_label(tag)
 
-    # Check that the Tok2Vec component finds it listeners
-    assert tok2vec.listeners == []
+    # Check that the Tok2Vec component finds its listeners
     optimizer = nlp.initialize(lambda: train_examples)
     assert tok2vec.listeners == [tagger_tok2vec]
 
@@ -221,7 +220,6 @@ def test_tok2vec_listener_callback():
     assert nlp.pipe_names == ["tok2vec", "tagger"]
     tagger = nlp.get_pipe("tagger")
     tok2vec = nlp.get_pipe("tok2vec")
-    nlp._link_components()
     docs = [nlp.make_doc("A random sentence")]
     tok2vec.model.initialize(X=docs)
     gold_array = [[1.0 for tag in ["V", "Z"]] for word in docs]
@@ -430,29 +428,46 @@ def test_replace_listeners_from_config():
         nlp.to_disk(dir_path)
         base_model = str(dir_path)
         new_config = {
-            "nlp": {"lang": "en", "pipeline": ["tok2vec", "tagger", "ner"]},
+            "nlp": {
+                "lang": "en",
+                "pipeline": ["tok2vec", "tagger2", "ner3", "tagger4"],
+            },
             "components": {
                 "tok2vec": {"source": base_model},
-                "tagger": {
+                "tagger2": {
                     "source": base_model,
+                    "component": "tagger",
                     "replace_listeners": ["model.tok2vec"],
                 },
-                "ner": {"source": base_model},
+                "ner3": {
+                    "source": base_model,
+                    "component": "ner",
+                },
+                "tagger4": {
+                    "source": base_model,
+                    "component": "tagger",
+                },
             },
         }
         new_nlp = util.load_model_from_config(new_config, auto_fill=True)
     new_nlp.initialize(lambda: examples)
     tok2vec = new_nlp.get_pipe("tok2vec")
-    tagger = new_nlp.get_pipe("tagger")
-    ner = new_nlp.get_pipe("ner")
-    assert tok2vec.listening_components == ["ner"]
+    tagger = new_nlp.get_pipe("tagger2")
+    ner = new_nlp.get_pipe("ner3")
+    assert "ner" not in new_nlp.pipe_names
+    assert "tagger" not in new_nlp.pipe_names
+    assert tok2vec.listening_components == ["ner3", "tagger4"]
     assert any(isinstance(node, Tok2VecListener) for node in ner.model.walk())
     assert not any(isinstance(node, Tok2VecListener) for node in tagger.model.walk())
     t2v_cfg = new_nlp.config["components"]["tok2vec"]["model"]
     assert t2v_cfg["@architectures"] == "spacy.Tok2Vec.v2"
-    assert new_nlp.config["components"]["tagger"]["model"]["tok2vec"] == t2v_cfg
+    assert new_nlp.config["components"]["tagger2"]["model"]["tok2vec"] == t2v_cfg
     assert (
-        new_nlp.config["components"]["ner"]["model"]["tok2vec"]["@architectures"]
+        new_nlp.config["components"]["ner3"]["model"]["tok2vec"]["@architectures"]
+        == "spacy.Tok2VecListener.v1"
+    )
+    assert (
+        new_nlp.config["components"]["tagger4"]["model"]["tok2vec"]["@architectures"]
         == "spacy.Tok2VecListener.v1"
     )
 
@@ -627,3 +642,57 @@ def test_tok2vec_distillation_teacher_annotations():
 
     student_tok2vec.distill = tok2vec_distill_wrapper.__get__(student_tok2vec, Tok2Vec)
     student_nlp.distill(teacher_nlp, train_examples_student, sgd=optimizer, losses={})
+
+
+def test_tok2vec_listener_source_link_name():
+    """The component's internal name and the tok2vec listener map correspond
+    to the most recently modified pipeline.
+    """
+    orig_config = Config().from_str(cfg_string_multi)
+    nlp1 = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
+    assert nlp1.get_pipe("tok2vec").listening_components == ["tagger", "ner"]
+
+    nlp2 = English()
+    nlp2.add_pipe("tok2vec", source=nlp1)
+    nlp2.add_pipe("tagger", name="tagger2", source=nlp1)
+
+    # there is no way to have the component have the right name for both
+    # pipelines, right now the most recently modified pipeline is prioritized
+    assert nlp1.get_pipe("tagger").name == nlp2.get_pipe("tagger2").name == "tagger2"
+
+    # there is no way to have the tok2vec have the right listener map for both
+    # pipelines, right now the most recently modified pipeline is prioritized
+    assert nlp2.get_pipe("tok2vec").listening_components == ["tagger2"]
+    nlp2.add_pipe("ner", name="ner3", source=nlp1)
+    assert nlp2.get_pipe("tok2vec").listening_components == ["tagger2", "ner3"]
+    nlp2.remove_pipe("ner3")
+    assert nlp2.get_pipe("tok2vec").listening_components == ["tagger2"]
+    nlp2.remove_pipe("tagger2")
+    assert nlp2.get_pipe("tok2vec").listening_components == []
+
+    # at this point the tok2vec component corresponds to nlp2
+    assert nlp1.get_pipe("tok2vec").listening_components == []
+
+    # modifying the nlp1 pipeline syncs the tok2vec listener map back to nlp1
+    nlp1.add_pipe("sentencizer")
+    assert nlp1.get_pipe("tok2vec").listening_components == ["tagger", "ner"]
+
+    # modifying nlp2 syncs it back to nlp2
+    nlp2.add_pipe("sentencizer")
+    assert nlp1.get_pipe("tok2vec").listening_components == []
+
+
+def test_tok2vec_listener_source_replace_listeners():
+    orig_config = Config().from_str(cfg_string_multi)
+    nlp1 = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
+    assert nlp1.get_pipe("tok2vec").listening_components == ["tagger", "ner"]
+    nlp1.replace_listeners("tok2vec", "tagger", ["model.tok2vec"])
+    assert nlp1.get_pipe("tok2vec").listening_components == ["ner"]
+
+    nlp2 = English()
+    nlp2.add_pipe("tok2vec", source=nlp1)
+    assert nlp2.get_pipe("tok2vec").listening_components == []
+    nlp2.add_pipe("tagger", source=nlp1)
+    assert nlp2.get_pipe("tok2vec").listening_components == []
+    nlp2.add_pipe("ner", name="ner2", source=nlp1)
+    assert nlp2.get_pipe("tok2vec").listening_components == ["ner2"]
diff --git a/spacy/tests/serialize/test_serialize_config.py b/spacy/tests/serialize/test_serialize_config.py
index 18c91c3ac..b351ea801 100644
--- a/spacy/tests/serialize/test_serialize_config.py
+++ b/spacy/tests/serialize/test_serialize_config.py
@@ -18,6 +18,7 @@ from spacy.ml.models import (
     build_Tok2Vec_model,
 )
 from spacy.schemas import ConfigSchema, ConfigSchemaDistill, ConfigSchemaPretrain
+from spacy.training import Example
 from spacy.util import (
     load_config,
     load_config_from_str,
@@ -469,6 +470,55 @@ def test_config_overrides():
     assert nlp.pipe_names == ["tok2vec", "tagger"]
 
 
+@pytest.mark.filterwarnings("ignore:\\[W036")
+def test_config_overrides_registered_functions():
+    nlp = spacy.blank("en")
+    nlp.add_pipe("attribute_ruler")
+    with make_tempdir() as d:
+        nlp.to_disk(d)
+        nlp_re1 = spacy.load(
+            d,
+            config={
+                "components": {
+                    "attribute_ruler": {
+                        "scorer": {"@scorers": "spacy.tagger_scorer.v1"}
+                    }
+                }
+            },
+        )
+        assert (
+            nlp_re1.config["components"]["attribute_ruler"]["scorer"]["@scorers"]
+            == "spacy.tagger_scorer.v1"
+        )
+
+        @registry.misc("test_some_other_key")
+        def misc_some_other_key():
+            return "some_other_key"
+
+        nlp_re2 = spacy.load(
+            d,
+            config={
+                "components": {
+                    "attribute_ruler": {
+                        "scorer": {
+                            "@scorers": "spacy.overlapping_labeled_spans_scorer.v1",
+                            "spans_key": {"@misc": "test_some_other_key"},
+                        }
+                    }
+                }
+            },
+        )
+        assert nlp_re2.config["components"]["attribute_ruler"]["scorer"][
+            "spans_key"
+        ] == {"@misc": "test_some_other_key"}
+        # run dummy evaluation (will return None scores) in order to test that
+        # the spans_key value in the nested override is working as intended in
+        # the config
+        example = Example.from_dict(nlp_re2.make_doc("a b c"), {})
+        scores = nlp_re2.evaluate([example])
+        assert "spans_some_other_key_f" in scores
+
+
 def test_config_interpolation():
     config = Config().from_str(nlp_config_string, interpolate=False)
     assert config["corpora"]["train"]["path"] == "${paths.train}"
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 9a2d7705f..8e1c9ca32 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -697,7 +697,6 @@ def test_string_to_list_intify(value):
     assert string_to_list(value, intify=True) == [1, 2, 3]
 
 
-@pytest.mark.skip(reason="Temporarily skip before models are published")
 def test_download_compatibility():
     spec = SpecifierSet("==" + about.__version__)
     spec.prereleases = False
@@ -708,7 +707,6 @@ def test_download_compatibility():
         assert get_minor_version(about.__version__) == get_minor_version(version)
 
 
-@pytest.mark.skip(reason="Temporarily skip before models are published")
 def test_validate_compatibility_table():
     spec = SpecifierSet("==" + about.__version__)
     spec.prereleases = False
diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py
index ce103068a..1570f8d09 100644
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@@ -377,3 +377,22 @@ def test_displacy_manual_sorted_entities():
 
     html = displacy.render(doc, style="ent", manual=True)
     assert html.find("FIRST") < html.find("SECOND")
+
+
+@pytest.mark.issue(12816)
+def test_issue12816(en_vocab) -> None:
+    """Test that displaCy's span visualizer escapes annotated HTML tags correctly."""
+    # Create a doc containing an annotated word and an unannotated HTML tag
+    doc = Doc(en_vocab, words=["test", "<TEST>"])
+    doc.spans["sc"] = [Span(doc, 0, 1, label="test")]
+
+    # Verify that the HTML tag is escaped when unannotated
+    html = displacy.render(doc, style="span")
+    assert "&lt;TEST&gt;" in html
+
+    # Annotate the HTML tag
+    doc.spans["sc"].append(Span(doc, 1, 2, label="test"))
+
+    # Verify that the HTML tag is still escaped
+    html = displacy.render(doc, style="span")
+    assert "&lt;TEST&gt;" in html
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index 5a7dc1e6f..cd1a25768 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -220,6 +220,10 @@ def test_minor_version(a1, a2, b1, b2, is_match):
             {"training.batch_size": 128, "training.optimizer.learn_rate": 0.01},
             {"training": {"batch_size": 128, "optimizer": {"learn_rate": 0.01}}},
         ),
+        (
+            {"attribute_ruler.scorer.@scorers": "spacy.tagger_scorer.v1"},
+            {"attribute_ruler": {"scorer": {"@scorers": "spacy.tagger_scorer.v1"}}},
+        ),
     ],
 )
 def test_dot_to_dict(dot_notation, expected):
@@ -228,6 +232,29 @@ def test_dot_to_dict(dot_notation, expected):
     assert util.dict_to_dot(result) == dot_notation
 
 
+@pytest.mark.parametrize(
+    "dot_notation,expected",
+    [
+        (
+            {"token.pos": True, "token._.xyz": True},
+            {"token": {"pos": True, "_": {"xyz": True}}},
+        ),
+        (
+            {"training.batch_size": 128, "training.optimizer.learn_rate": 0.01},
+            {"training": {"batch_size": 128, "optimizer": {"learn_rate": 0.01}}},
+        ),
+        (
+            {"attribute_ruler.scorer": {"@scorers": "spacy.tagger_scorer.v1"}},
+            {"attribute_ruler": {"scorer": {"@scorers": "spacy.tagger_scorer.v1"}}},
+        ),
+    ],
+)
+def test_dot_to_dict_overrides(dot_notation, expected):
+    result = util.dot_to_dict(dot_notation)
+    assert result == expected
+    assert util.dict_to_dot(result, for_overrides=True) == dot_notation
+
+
 def test_set_dot_to_object():
     config = {"foo": {"bar": 1, "baz": {"x": "y"}}, "test": {"a": {"b": "c"}}}
     with pytest.raises(KeyError):
diff --git a/spacy/tests/vocab_vectors/test_vectors.py b/spacy/tests/vocab_vectors/test_vectors.py
index ed1322908..16574656b 100644
--- a/spacy/tests/vocab_vectors/test_vectors.py
+++ b/spacy/tests/vocab_vectors/test_vectors.py
@@ -401,6 +401,7 @@ def test_vectors_serialize():
         row_r = v_r.add("D", vector=OPS.asarray([10, 20, 30, 40], dtype="f"))
         assert row == row_r
         assert_equal(OPS.to_numpy(v.data), OPS.to_numpy(v_r.data))
+        assert v.attr == v_r.attr
 
 
 def test_vector_is_oov():
@@ -645,3 +646,32 @@ def test_equality():
     vectors1.resize((5, 9))
     vectors2.resize((5, 9))
     assert vectors1 == vectors2
+
+
+def test_vectors_attr():
+    data = numpy.asarray([[0, 0, 0], [1, 2, 3], [9, 8, 7]], dtype="f")
+    # default ORTH
+    nlp = English()
+    nlp.vocab.vectors = Vectors(data=data, keys=["A", "B", "C"])
+    assert nlp.vocab.strings["A"] in nlp.vocab.vectors.key2row
+    assert nlp.vocab.strings["a"] not in nlp.vocab.vectors.key2row
+    assert nlp.vocab["A"].has_vector is True
+    assert nlp.vocab["a"].has_vector is False
+    assert nlp("A")[0].has_vector is True
+    assert nlp("a")[0].has_vector is False
+
+    # custom LOWER
+    nlp = English()
+    nlp.vocab.vectors = Vectors(data=data, keys=["a", "b", "c"], attr="LOWER")
+    assert nlp.vocab.strings["A"] not in nlp.vocab.vectors.key2row
+    assert nlp.vocab.strings["a"] in nlp.vocab.vectors.key2row
+    assert nlp.vocab["A"].has_vector is True
+    assert nlp.vocab["a"].has_vector is True
+    assert nlp("A")[0].has_vector is True
+    assert nlp("a")[0].has_vector is True
+    # add a new vectors entry
+    assert nlp.vocab["D"].has_vector is False
+    assert nlp.vocab["d"].has_vector is False
+    nlp.vocab.set_vector("D", numpy.asarray([4, 5, 6]))
+    assert nlp.vocab["D"].has_vector is True
+    assert nlp.vocab["d"].has_vector is True
diff --git a/spacy/tokenizer.pxd b/spacy/tokenizer.pxd
index b2e509694..b8ccc76e0 100644
--- a/spacy/tokenizer.pxd
+++ b/spacy/tokenizer.pxd
@@ -26,24 +26,57 @@ cdef class Tokenizer:
 
     cdef Doc _tokenize_affixes(self, str string, bint with_special_cases)
     cdef int _apply_special_cases(self, Doc doc) except -1
-    cdef void _filter_special_spans(self, vector[SpanC] &original,
-                            vector[SpanC] &filtered, int doc_len) nogil
-    cdef object _prepare_special_spans(self, Doc doc,
-                                       vector[SpanC] &filtered)
-    cdef int _retokenize_special_spans(self, Doc doc, TokenC* tokens,
-                                       object span_data)
-    cdef int _try_specials_and_cache(self, hash_t key, Doc tokens,
-                                     int* has_special,
-                                     bint with_special_cases) except -1
-    cdef int _tokenize(self, Doc tokens, str span, hash_t key,
-                       int* has_special, bint with_special_cases) except -1
-    cdef str _split_affixes(self, str string,
-                                vector[LexemeC*] *prefixes,
-                                vector[LexemeC*] *suffixes, int* has_special,
-                                bint with_special_cases)
-    cdef int _attach_tokens(self, Doc tokens, str string,
-                            vector[LexemeC*] *prefixes,
-                            vector[LexemeC*] *suffixes, int* has_special,
-                            bint with_special_cases) except -1
-    cdef int _save_cached(self, const TokenC* tokens, hash_t key,
-                          int* has_special, int n) except -1
+    cdef void _filter_special_spans(
+        self,
+        vector[SpanC] &original,
+        vector[SpanC] &filtered,
+        int doc_len,
+    ) nogil
+    cdef object _prepare_special_spans(
+        self,
+        Doc doc,
+        vector[SpanC] &filtered,
+    )
+    cdef int _retokenize_special_spans(
+        self,
+        Doc doc,
+        TokenC* tokens,
+        object span_data,
+    )
+    cdef int _try_specials_and_cache(
+        self,
+        hash_t key,
+        Doc tokens,
+        int* has_special,
+        bint with_special_cases,
+    ) except -1
+    cdef int _tokenize(
+        self,
+        Doc tokens,
+        str span,
+        hash_t key,
+        int* has_special,
+        bint with_special_cases,
+    ) except -1
+    cdef str _split_affixes(
+        self,
+        str string,
+        vector[LexemeC*] *prefixes,
+        vector[LexemeC*] *suffixes, int* has_special,
+        bint with_special_cases,
+    )
+    cdef int _attach_tokens(
+        self,
+        Doc tokens,
+        str string,
+        vector[LexemeC*] *prefixes,
+        vector[LexemeC*] *suffixes, int* has_special,
+        bint with_special_cases,
+    ) except -1
+    cdef int _save_cached(
+        self,
+        const TokenC* tokens,
+        hash_t key,
+        int* has_special,
+        int n,
+    ) except -1
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 442c80c8b..0b9d6e22a 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -323,7 +323,7 @@ cdef class Tokenizer:
         cdef int span_start
         cdef int span_end
         while i < doc.length:
-            if not i in span_data:
+            if i not in span_data:
                 tokens[i + offset] = doc.c[i]
                 i += 1
             else:
@@ -394,12 +394,14 @@ cdef class Tokenizer:
         self._save_cached(&tokens.c[orig_size], orig_key, has_special,
                           tokens.length - orig_size)
 
-    cdef str _split_affixes(self, str string,
-                                vector[const LexemeC*] *prefixes,
-                                vector[const LexemeC*] *suffixes,
-                                int* has_special,
-                                bint with_special_cases):
-        cdef size_t i
+    cdef str _split_affixes(
+        self,
+        str string,
+        vector[const LexemeC*] *prefixes,
+        vector[const LexemeC*] *suffixes,
+        int* has_special,
+        bint with_special_cases
+    ):
         cdef str prefix
         cdef str suffix
         cdef str minus_pre
@@ -444,10 +446,6 @@ cdef class Tokenizer:
                             vector[const LexemeC*] *suffixes,
                             int* has_special,
                             bint with_special_cases) except -1:
-        cdef bint specials_hit = 0
-        cdef bint cache_hit = 0
-        cdef int split, end
-        cdef const LexemeC* const* lexemes
         cdef const LexemeC* lexeme
         cdef str span
         cdef int i
@@ -457,9 +455,11 @@ cdef class Tokenizer:
         if string:
             if self._try_specials_and_cache(hash_string(string), tokens, has_special, with_special_cases):
                 pass
-            elif (self.token_match and self.token_match(string)) or \
-                    (self.url_match and \
-                    self.url_match(string)):
+            elif (
+                (self.token_match and self.token_match(string)) or
+                (self.url_match and self.url_match(string))
+            ):
+
                 # We're always saying 'no' to spaces here -- the caller will
                 # fix up the outermost one, with reference to the original.
                 # See Issue #859
@@ -820,7 +820,7 @@ cdef class Tokenizer:
         self.infix_finditer = None
         self.token_match = None
         self.url_match = None
-        msg = util.from_bytes(bytes_data, deserializers, exclude)
+        util.from_bytes(bytes_data, deserializers, exclude)
         if "prefix_search" in data and isinstance(data["prefix_search"], str):
             self.prefix_search = re.compile(data["prefix_search"]).search
         if "suffix_search" in data and isinstance(data["suffix_search"], str):
diff --git a/spacy/tokens/doc.pxd b/spacy/tokens/doc.pxd
index 4565832ec..9fb6a72c8 100644
--- a/spacy/tokens/doc.pxd
+++ b/spacy/tokens/doc.pxd
@@ -31,7 +31,7 @@ cdef int token_by_start(const TokenC* tokens, int length, int start_char) except
 cdef int token_by_end(const TokenC* tokens, int length, int end_char) except -2
 
 
-cdef int [:,:] _get_lca_matrix(Doc, int start, int end)
+cdef int [:, :] _get_lca_matrix(Doc, int start, int end)
 
 
 cdef class Doc:
@@ -61,7 +61,6 @@ cdef class Doc:
     cdef int length
     cdef int max_length
 
-
     cdef public object noun_chunks_iterator
 
     cdef object __weakref__
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 541178aff..df012a28a 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -35,6 +35,7 @@ from ..attrs cimport (
     LENGTH,
     MORPH,
     NORM,
+    ORTH,
     POS,
     SENT_START,
     SPACY,
@@ -42,14 +43,13 @@ from ..attrs cimport (
     attr_id_t,
 )
 from ..lexeme cimport EMPTY_LEXEME, Lexeme
-from ..typedefs cimport attr_t, flags_t
+from ..typedefs cimport attr_t
 from .token cimport Token
 
 from .. import parts_of_speech, schemas, util
 from ..attrs import IDS, intify_attr
-from ..compat import copy_reg, pickle
+from ..compat import copy_reg
 from ..errors import Errors, Warnings
-from ..morphology import Morphology
 from ..util import get_words_and_spaces
 from .retokenizer import Retokenizer
 from .underscore import Underscore, get_ext_args
@@ -613,13 +613,26 @@ cdef class Doc:
         """
         if "similarity" in self.user_hooks:
             return self.user_hooks["similarity"](self, other)
-        if isinstance(other, (Lexeme, Token)) and self.length == 1:
-            if self.c[0].lex.orth == other.orth:
+        attr = getattr(self.vocab.vectors, "attr", ORTH)
+        cdef Token this_token
+        cdef Token other_token
+        cdef Lexeme other_lex
+        if len(self) == 1 and isinstance(other, Token):
+            this_token = self[0]
+            other_token = other
+            if Token.get_struct_attr(this_token.c, attr) == Token.get_struct_attr(other_token.c, attr):
                 return 1.0
-        elif isinstance(other, (Span, Doc)) and len(self) == len(other):
+        elif len(self) == 1 and isinstance(other, Lexeme):
+            this_token = self[0]
+            other_lex = other
+            if Token.get_struct_attr(this_token.c, attr) == Lexeme.get_struct_attr(other_lex.c, attr):
+                return 1.0
+        elif isinstance(other, (Doc, Span)) and len(self) == len(other):
             similar = True
-            for i in range(self.length):
-                if self[i].orth != other[i].orth:
+            for i in range(len(self)):
+                this_token = self[i]
+                other_token = other[i]
+                if Token.get_struct_attr(this_token.c, attr) != Token.get_struct_attr(other_token.c, attr):
                     similar = False
                     break
             if similar:
@@ -767,7 +780,7 @@ cdef class Doc:
             # TODO:
             # 1. Test basic data-driven ORTH gazetteer
             # 2. Test more nuanced date and currency regex
-            cdef attr_t entity_type, kb_id, ent_id
+            cdef attr_t kb_id, ent_id
             cdef int ent_start, ent_end
             ent_spans = []
             for ent_info in ents:
@@ -975,7 +988,6 @@ cdef class Doc:
             >>> np_array = doc.to_array([LOWER, POS, ENT_TYPE, IS_ALPHA])
         """
         cdef int i, j
-        cdef attr_id_t feature
         cdef np.ndarray[attr_t, ndim=2] output
         # Handle scalar/list inputs of strings/ints for py_attr_ids
         # See also #3064
@@ -987,8 +999,10 @@ cdef class Doc:
             py_attr_ids = [py_attr_ids]
         # Allow strings, e.g. 'lemma' or 'LEMMA'
         try:
-            py_attr_ids = [(IDS[id_.upper()] if hasattr(id_, "upper") else id_)
-                       for id_ in py_attr_ids]
+            py_attr_ids = [
+                (IDS[id_.upper()] if hasattr(id_, "upper") else id_)
+                for id_ in py_attr_ids
+            ]
         except KeyError as msg:
             keys = list(IDS.keys())
             raise KeyError(Errors.E983.format(dict="IDS", key=msg, keys=keys)) from None
@@ -1022,8 +1036,6 @@ cdef class Doc:
         DOCS: https://spacy.io/api/doc#count_by
         """
         cdef int i
-        cdef attr_t attr
-        cdef size_t count
 
         if counts is None:
             counts = Counter()
@@ -1085,7 +1097,6 @@ cdef class Doc:
         cdef int i, col
         cdef int32_t abs_head_index
         cdef attr_id_t attr_id
-        cdef TokenC* tokens = self.c
         cdef int length = len(array)
         if length != len(self):
             raise ValueError(Errors.E971.format(array_length=length, doc_length=len(self)))
@@ -1226,7 +1237,7 @@ cdef class Doc:
                             span.label,
                             span.kb_id,
                             span.id,
-                            span.text, # included as a check
+                            span.text,  # included as a check
                         ))
             char_offset += len(doc.text)
             if len(doc) > 0 and ensure_whitespace and not doc[-1].is_space and not bool(doc[-1].whitespace_):
@@ -1505,7 +1516,6 @@ cdef class Doc:
             attributes are inherited from the syntactic root of the span.
         RETURNS (Token): The first newly merged token.
         """
-        cdef str tag, lemma, ent_type
         attr_len = len(attributes)
         span_len = len(spans)
         if not attr_len == span_len:
@@ -1621,7 +1631,6 @@ cdef class Doc:
                 for token in char_span[1:]:
                     token.is_sent_start = False
 
-
         for span_group in doc_json.get("spans", {}):
             spans = []
             for span in doc_json["spans"][span_group]:
@@ -1653,7 +1662,7 @@ cdef class Doc:
                 start = token_by_char(self.c, self.length, token_data["start"])
                 value = token_data["value"]
                 self[start]._.set(token_attr, value)
-                
+
         for span_attr in doc_json.get("underscore_span", {}):
             if not Span.has_extension(span_attr):
                 Span.set_extension(span_attr)
@@ -1699,7 +1708,7 @@ cdef class Doc:
                 token_data["dep"] = token.dep_
                 token_data["head"] = token.head.i
             data["tokens"].append(token_data)
-        
+
         if self.spans:
             data["spans"] = {}
             for span_group in self.spans:
@@ -1750,7 +1759,7 @@ cdef class Doc:
                                     data["underscore_span"] = {}
                                 if attr not in data["underscore_span"]:
                                     data["underscore_span"][attr] = []
-                                data["underscore_span"][attr].append({"start": start, "end": end, "value": value, "label": _label, "kb_id": _kb_id, "id":_span_id})
+                                data["underscore_span"][attr].append({"start": start, "end": end, "value": value, "label": _label, "kb_id": _kb_id, "id": _span_id})
 
             for attr in underscore:
                 if attr not in user_keys:
@@ -1773,7 +1782,6 @@ cdef class Doc:
         output.fill(255)
         cdef int i, j, start_idx, end_idx
         cdef bytes byte_string
-        cdef unsigned char utf8_char
         for i, byte_string in enumerate(byte_strings):
             j = 0
             start_idx = 0
@@ -1826,8 +1834,6 @@ cdef int token_by_char(const TokenC* tokens, int length, int char_idx) except -2
 
 cdef int set_children_from_heads(TokenC* tokens, int start, int end) except -1:
     # note: end is exclusive
-    cdef TokenC* head
-    cdef TokenC* child
     cdef int i
     # Set number of left/right children to 0. We'll increment it in the loops.
     for i in range(start, end):
@@ -1927,7 +1933,7 @@ cdef int _get_tokens_lca(Token token_j, Token token_k):
     return -1
 
 
-cdef int [:,:] _get_lca_matrix(Doc doc, int start, int end):
+cdef int [:, :] _get_lca_matrix(Doc doc, int start, int end):
     """Given a doc and a start and end position defining a set of contiguous
     tokens within it, returns a matrix of Lowest Common Ancestors (LCA), where
     LCA[i, j] is the index of the lowest common ancestor among token i and j.
@@ -1940,7 +1946,7 @@ cdef int [:,:] _get_lca_matrix(Doc doc, int start, int end):
     RETURNS (int [:, :]): memoryview of numpy.array[ndim=2, dtype=numpy.int32],
         with shape (n, n), where n = len(doc).
     """
-    cdef int [:,:] lca_matrix
+    cdef int [:, :] lca_matrix
     cdef int j, k
     n_tokens= end - start
     lca_mat = numpy.empty((n_tokens, n_tokens), dtype=numpy.int32)
diff --git a/spacy/tokens/graph.pyx b/spacy/tokens/graph.pyx
index 0466ed6f3..e789d1a37 100644
--- a/spacy/tokens/graph.pyx
+++ b/spacy/tokens/graph.pyx
@@ -3,7 +3,7 @@ from typing import Generator, List, Tuple
 
 cimport cython
 from cython.operator cimport dereference
-from libc.stdint cimport int32_t, int64_t
+from libc.stdint cimport int32_t
 from libcpp.pair cimport pair
 from libcpp.unordered_map cimport unordered_map
 from libcpp.unordered_set cimport unordered_set
@@ -11,7 +11,6 @@ from libcpp.unordered_set cimport unordered_set
 import weakref
 
 from murmurhash.mrmr cimport hash64
-from preshed.maps cimport map_get_unless_missing
 
 from .. import Errors
 
@@ -26,7 +25,7 @@ from .token import Token
 cdef class Edge:
     cdef readonly Graph graph
     cdef readonly int i
-    
+
     def __init__(self, Graph graph, int i):
         self.graph = graph
         self.i = i
@@ -42,7 +41,7 @@ cdef class Edge:
     @property
     def head(self) -> "Node":
         return Node(self.graph, self.graph.c.edges[self.i].head)
-    
+
     @property
     def tail(self) -> "Tail":
         return Node(self.graph, self.graph.c.edges[self.i].tail)
@@ -68,7 +67,7 @@ cdef class Node:
     def __init__(self, Graph graph, int i):
         """A reference to a node of an annotation graph. Each node is made up of
         an ordered set of zero or more token indices.
-        
+
         Node references are usually created by the Graph object itself, or from
         the Node or Edge objects. You usually won't need to instantiate this
         class yourself.
@@ -107,13 +106,13 @@ cdef class Node:
     @property
     def is_none(self) -> bool:
         """Whether the node is a special value, indicating 'none'.
-        
+
         The NoneNode type is returned by the Graph, Edge and Node objects when
         there is no match to a query. It has the same API as Node, but it always
         returns NoneNode, NoneEdge or empty lists for its queries.
         """
         return False
- 
+
     @property
     def doc(self) -> "Doc":
         """The Doc object that the graph refers to."""
@@ -128,19 +127,19 @@ cdef class Node:
     def head(self, i=None, label=None) -> "Node":
         """Get the head of the first matching edge, searching by index, label,
         both or neither.
-        
+
         For instance, `node.head(i=1)` will get the head of the second edge that
         this node is a tail of. `node.head(i=1, label="ARG0")` will further
         check that the second edge has the label `"ARG0"`. 
-        
+
         If no matching node can be found, the graph's NoneNode is returned. 
         """
         return self.headed(i=i, label=label)
-    
+
     def tail(self, i=None, label=None) -> "Node":
         """Get the tail of the first matching edge, searching by index, label,
         both or neither.
- 
+
         If no matching node can be found, the graph's NoneNode is returned. 
         """
         return self.tailed(i=i, label=label).tail
@@ -169,7 +168,7 @@ cdef class Node:
         cdef vector[int] edge_indices
         self._find_edges(edge_indices, "head", label)
         return [Node(self.graph, self.graph.c.edges[i].head) for i in edge_indices]
-     
+
     def tails(self, label=None) -> List["Node"]:
         """Find all matching tails of this node."""
         cdef vector[int] edge_indices
@@ -198,7 +197,7 @@ cdef class Node:
             return NoneEdge(self.graph)
         else:
             return Edge(self.graph, idx)
-    
+
     def tailed(self, i=None, label=None) -> Edge:
         """Find the first matching edge tailed by this node.
         If no matching edge can be found, the graph's NoneEdge is returned.
@@ -281,7 +280,7 @@ cdef class NoneEdge(Edge):
     def __init__(self, graph):
         self.graph = graph
         self.i = -1
-   
+
     @property
     def doc(self) -> "Doc":
         return self.graph.doc
@@ -289,7 +288,7 @@ cdef class NoneEdge(Edge):
     @property
     def head(self) -> "NoneNode":
         return NoneNode(self.graph)
-    
+
     @property
     def tail(self) -> "NoneNode":
         return NoneNode(self.graph)
@@ -317,7 +316,7 @@ cdef class NoneNode(Node):
 
     def __len__(self):
         return 0
- 
+
     @property
     def is_none(self):
         return -1
@@ -338,14 +337,14 @@ cdef class NoneNode(Node):
 
     def walk_heads(self):
         yield from [] 
-    
+
     def walk_tails(self):
         yield from [] 
- 
+
 
 cdef class Graph:
     """A set of directed labelled relationships between sets of tokens.
-    
+
     EXAMPLE:
         Construction 1
         >>> graph = Graph(doc, name="srl")
@@ -370,7 +369,9 @@ cdef class Graph:
         >>> assert graph.has_node((0,))
         >>> assert graph.has_edge((0,), (1,3), label="agent")
     """
-    def __init__(self, doc, *, name="", nodes=[], edges=[], labels=None, weights=None):
+    def __init__(
+        self, doc, *, name="", nodes=[], edges=[], labels=None, weights=None  # no-cython-lint
+    ):
         """Create a Graph object.
 
         doc (Doc): The Doc object the graph will refer to.
@@ -436,13 +437,11 @@ cdef class Graph:
 
     def add_edge(self, head, tail, *, label="", weight=None) -> Edge:
         """Add an edge to the graph, connecting two groups of tokens.
-       
+
         If there is already an edge for the (head, tail, label) triple, it will
         be returned, and no new edge will be created. The weight of the edge
         will be updated if a weight is specified.
         """
-        label_hash = self.doc.vocab.strings.as_int(label)
-        weight_float = weight if weight is not None else 0.0
         edge_index = add_edge(
             &self.c,
             EdgeC(
@@ -476,11 +475,11 @@ cdef class Graph:
     def has_edge(self, head, tail, label) -> bool:
         """Check whether a (head, tail, label) triple is an edge in the graph."""
         return not self.get_edge(head, tail, label=label).is_none
-    
+
     def add_node(self, indices) -> Node:
         """Add a node to the graph and return it. Nodes refer to ordered sets
         of token indices.
-        
+
         This method is idempotent: if there is already a node for the given
         indices, it is returned without a new node being created.
         """
@@ -508,7 +507,7 @@ cdef class Graph:
             return NoneNode(self)
         else:
             return Node(self, node_index)
- 
+
     def has_node(self, tuple indices) -> bool:
         """Check whether the graph has a node for the given indices."""
         return not self.get_node(indices).is_none
@@ -568,7 +567,7 @@ cdef int add_node(GraphC* graph, vector[int32_t]& node) nogil:
         graph.roots.insert(index)
         graph.node_map.insert(pair[hash_t, int](key, index))
         return index
- 
+
 
 cdef int get_node(const GraphC* graph, vector[int32_t] node) nogil:
     key = hash64(&node[0], node.size() * sizeof(node[0]), 0)
diff --git a/spacy/tokens/morphanalysis.pyx b/spacy/tokens/morphanalysis.pyx
index 15a33dcfe..7ff08c4bd 100644
--- a/spacy/tokens/morphanalysis.pyx
+++ b/spacy/tokens/morphanalysis.pyx
@@ -1,5 +1,4 @@
 cimport numpy as np
-from libc.string cimport memset
 
 from ..errors import Errors
 from ..morphology import Morphology
@@ -94,4 +93,3 @@ cdef class MorphAnalysis:
 
     def __repr__(self):
         return self.to_json()
-
diff --git a/spacy/tokens/retokenizer.pyx b/spacy/tokens/retokenizer.pyx
index 134756c3f..6c5e25fa1 100644
--- a/spacy/tokens/retokenizer.pyx
+++ b/spacy/tokens/retokenizer.pyx
@@ -1,7 +1,6 @@
 # cython: infer_types=True, bounds_check=False, profile=True
 from cymem.cymem cimport Pool
-from libc.stdlib cimport free, malloc
-from libc.string cimport memcpy, memset
+from libc.string cimport memset
 
 import numpy
 from thinc.api import get_array_module
@@ -10,7 +9,7 @@ from ..attrs cimport MORPH, NORM
 from ..lexeme cimport EMPTY_LEXEME, Lexeme
 from ..structs cimport LexemeC, TokenC
 from ..vocab cimport Vocab
-from .doc cimport Doc, set_children_from_heads, token_by_end, token_by_start
+from .doc cimport Doc, set_children_from_heads, token_by_start
 from .span cimport Span
 from .token cimport Token
 
@@ -148,7 +147,7 @@ def _merge(Doc doc, merges):
         syntactic root of the span.
     RETURNS (Token): The first newly merged token.
     """
-    cdef int i, merge_index, start, end, token_index, current_span_index, current_offset, offset, span_index
+    cdef int i, merge_index, start, token_index, current_span_index, current_offset, offset, span_index
     cdef Span span
     cdef const LexemeC* lex
     cdef TokenC* token
@@ -166,7 +165,6 @@ def _merge(Doc doc, merges):
     merges.sort(key=_get_start)
     for merge_index, (span, attributes) in enumerate(merges):
         start = span.start
-        end = span.end
         spans.append(span)
         # House the new merged token where it starts
         token = &doc.c[start]
@@ -204,8 +202,9 @@ def _merge(Doc doc, merges):
     # for the merged region. To do this, we create a boolean array indicating
     # whether the row is to be deleted, then use numpy.delete
     if doc.tensor is not None and doc.tensor.size != 0:
-        doc.tensor = _resize_tensor(doc.tensor,
-            [(m[0].start, m[0].end) for m in merges])
+        doc.tensor = _resize_tensor(
+            doc.tensor, [(m[0].start, m[0].end) for m in merges]
+        )
     # Memorize span roots and sets dependencies of the newly merged
     # tokens to the dependencies of their roots.
     span_roots = []
@@ -268,11 +267,11 @@ def _merge(Doc doc, merges):
             span_index += 1
         if span_index < len(spans) and i == spans[span_index].start:
             # First token in a span
-            doc.c[i - offset] = doc.c[i] # move token to its place
+            doc.c[i - offset] = doc.c[i]  # move token to its place
             offset += (spans[span_index].end - spans[span_index].start) - 1
             in_span = True
         if not in_span:
-            doc.c[i - offset] = doc.c[i] # move token to its place
+            doc.c[i - offset] = doc.c[i]  # move token to its place
 
     for i in range(doc.length - offset, doc.length):
         memset(&doc.c[i], 0, sizeof(TokenC))
@@ -346,7 +345,11 @@ def _split(Doc doc, int token_index, orths, heads, attrs):
     if to_process_tensor:
         xp = get_array_module(doc.tensor)
         if xp is numpy:
-            doc.tensor = xp.append(doc.tensor, xp.zeros((nb_subtokens,doc.tensor.shape[1]), dtype="float32"), axis=0)
+            doc.tensor = xp.append(
+                doc.tensor,
+                xp.zeros((nb_subtokens, doc.tensor.shape[1]), dtype="float32"),
+                axis=0
+            )
         else:
             shape = (doc.tensor.shape[0] + nb_subtokens, doc.tensor.shape[1])
             resized_array = xp.zeros(shape, dtype="float32")
@@ -368,7 +371,8 @@ def _split(Doc doc, int token_index, orths, heads, attrs):
         token.norm = 0  # reset norm
         if to_process_tensor:
             # setting the tensors of the split tokens to array of zeros
-            doc.tensor[token_index + i:token_index + i + 1] = xp.zeros((1,doc.tensor.shape[1]), dtype="float32")
+            doc.tensor[token_index + i:token_index + i + 1] = \
+                xp.zeros((1, doc.tensor.shape[1]), dtype="float32")
         # Update the character offset of the subtokens
         if i != 0:
             token.idx = orig_token.idx + idx_offset
@@ -456,7 +460,6 @@ def normalize_token_attrs(Vocab vocab, attrs):
 def set_token_attrs(Token py_token, attrs):
     cdef TokenC* token = py_token.c
     cdef const LexemeC* lex = token.lex
-    cdef Doc doc = py_token.doc
     # Assign attributes
     for attr_name, attr_value in attrs.items():
         if attr_name == "_":  # Set extension attributes
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 4d4864949..26e5920c0 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -1,5 +1,4 @@
 cimport numpy as np
-from libc.math cimport sqrt
 from libcpp.memory cimport make_shared
 
 import copy
@@ -9,13 +8,13 @@ import numpy
 from thinc.api import get_array_module
 
 from ..attrs cimport *
-from ..attrs cimport attr_id_t
+from ..attrs cimport ORTH, attr_id_t
 from ..lexeme cimport Lexeme
-from ..parts_of_speech cimport univ_pos_t
-from ..structs cimport LexemeC, TokenC
+from ..structs cimport TokenC
 from ..symbols cimport dep
-from ..typedefs cimport attr_t, flags_t, hash_t
+from ..typedefs cimport attr_t
 from .doc cimport _get_lca_matrix, get_token_attr, token_by_end, token_by_start
+from .token cimport Token
 
 from ..errors import Errors, Warnings
 from ..util import normalize_slice
@@ -226,8 +225,8 @@ cdef class Span:
 
     @property
     def _(self):
-        cdef SpanC* span_c = self.span_c()
         """Custom extension attributes registered via `set_extension`."""
+        cdef SpanC* span_c = self.span_c()
         return Underscore(Underscore.span_extensions, self,
                           start=span_c.start_char, end=span_c.end_char, label=self.label, kb_id=self.kb_id, span_id=self.id)
 
@@ -371,13 +370,26 @@ cdef class Span:
         """
         if "similarity" in self.doc.user_span_hooks:
             return self.doc.user_span_hooks["similarity"](self, other)
-        if len(self) == 1 and hasattr(other, "orth"):
-            if self[0].orth == other.orth:
+        attr = getattr(self.doc.vocab.vectors, "attr", ORTH)
+        cdef Token this_token
+        cdef Token other_token
+        cdef Lexeme other_lex
+        if len(self) == 1 and isinstance(other, Token):
+            this_token = self[0]
+            other_token = other
+            if Token.get_struct_attr(this_token.c, attr) == Token.get_struct_attr(other_token.c, attr):
+                return 1.0
+        elif len(self) == 1 and isinstance(other, Lexeme):
+            this_token = self[0]
+            other_lex = other
+            if Token.get_struct_attr(this_token.c, attr) == Lexeme.get_struct_attr(other_lex.c, attr):
                 return 1.0
         elif isinstance(other, (Doc, Span)) and len(self) == len(other):
             similar = True
             for i in range(len(self)):
-                if self[i].orth != getattr(other[i], "orth", None):
+                this_token = self[i]
+                other_token = other[i]
+                if Token.get_struct_attr(this_token.c, attr) != Token.get_struct_attr(other_token.c, attr):
                     similar = False
                     break
             if similar:
@@ -607,7 +619,6 @@ cdef class Span:
         """
         return "".join([t.text_with_ws for t in self])
 
-
     @property
     def noun_chunks(self):
         """Iterate over the base noun phrases in the span. Yields base
@@ -922,7 +933,6 @@ cdef class Span:
             self.id_ = ent_id_
 
 
-
 cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
     # Don't allow spaces to be the root, if there are
     # better candidates
diff --git a/spacy/tokens/span_group.pyx b/spacy/tokens/span_group.pyx
index 69c9efb9d..c2f5ce1c8 100644
--- a/spacy/tokens/span_group.pyx
+++ b/spacy/tokens/span_group.pyx
@@ -1,7 +1,7 @@
 import struct
 import weakref
 from copy import deepcopy
-from typing import TYPE_CHECKING, Iterable, Optional, Tuple, Union
+from typing import Iterable, Optional, Union
 
 import srsly
 
@@ -36,7 +36,7 @@ cdef class SpanGroup:
 
     DOCS: https://spacy.io/api/spangroup
     """
-    def __init__(self, doc, *, name="", attrs={}, spans=[]):
+    def __init__(self, doc, *, name="", attrs={}, spans=[]):  # no-cython-lint
         """Create a SpanGroup.
 
         doc (Doc): The reference Doc object.
@@ -315,7 +315,7 @@ cdef class SpanGroup:
 
             other_attrs = deepcopy(other_group.attrs)
             span_group.attrs.update({
-                key: value for key, value in other_attrs.items() \
+                key: value for key, value in other_attrs.items()
                 if key not in span_group.attrs
             })
             if len(other_group):
diff --git a/spacy/tokens/token.pxd b/spacy/tokens/token.pxd
index fc02ff624..f4e4611df 100644
--- a/spacy/tokens/token.pxd
+++ b/spacy/tokens/token.pxd
@@ -26,7 +26,7 @@ cdef class Token:
         cdef Token self = Token.__new__(Token, vocab, doc, offset)
         return self
 
-    #cdef inline TokenC struct_from_attrs(Vocab vocab, attrs):
+    # cdef inline TokenC struct_from_attrs(Vocab vocab, attrs):
     #    cdef TokenC token
     #    attrs = normalize_attrs(attrs)
 
@@ -98,12 +98,10 @@ cdef class Token:
         elif feat_name == SENT_START:
             token.sent_start = value
 
-
     @staticmethod
     cdef inline int missing_dep(const TokenC* token) nogil:
         return token.dep == MISSING_DEP
 
-
     @staticmethod
     cdef inline int missing_head(const TokenC* token) nogil:
         return Token.missing_dep(token)
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index 2c8b173e7..ff1120b7b 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -1,13 +1,11 @@
 # cython: infer_types=True
 # Compiler crashes on memory view coercion without this. Should report bug.
 cimport numpy as np
-from cython.view cimport array as cvarray
 
 np.import_array()
 
 import warnings
 
-import numpy
 from thinc.api import get_array_module
 
 from ..attrs cimport (
@@ -216,11 +214,17 @@ cdef class Token:
         """
         if "similarity" in self.doc.user_token_hooks:
             return self.doc.user_token_hooks["similarity"](self, other)
-        if hasattr(other, "__len__") and len(other) == 1 and hasattr(other, "__getitem__"):
-            if self.c.lex.orth == getattr(other[0], "orth", None):
+        attr = getattr(self.doc.vocab.vectors, "attr", ORTH)
+        cdef Token this_token = self
+        cdef Token other_token
+        cdef Lexeme other_lex
+        if isinstance(other, Token):
+            other_token = other
+            if Token.get_struct_attr(this_token.c, attr) == Token.get_struct_attr(other_token.c, attr):
                 return 1.0
-        elif hasattr(other, "orth"):
-            if self.c.lex.orth == other.orth:
+        elif isinstance(other, Lexeme):
+            other_lex = other
+            if Token.get_struct_attr(this_token.c, attr) == Lexeme.get_struct_attr(other_lex.c, attr):
                 return 1.0
         if self.vocab.vectors.n_keys == 0:
             warnings.warn(Warnings.W007.format(obj="Token"))
@@ -233,7 +237,7 @@ cdef class Token:
         result = xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
         # ensure we get a scalar back (numpy does this automatically but cupy doesn't)
         return result.item()
-    
+
     def has_morph(self):
         """Check whether the token has annotated morph information.
         Return False when the morph annotation is unset/missing.
@@ -528,9 +532,9 @@ cdef class Token:
         def __get__(self):
             if self.i + 1 == len(self.doc):
                 return True
-            elif self.doc[self.i+1].is_sent_start == None:
+            elif self.doc[self.i+1].is_sent_start is None:
                 return None
-            elif self.doc[self.i+1].is_sent_start == True:
+            elif self.doc[self.i+1].is_sent_start is True:
                 return True
             else:
                 return False
diff --git a/spacy/training/align.pyx b/spacy/training/align.pyx
index 8bd43b048..79fec73c4 100644
--- a/spacy/training/align.pyx
+++ b/spacy/training/align.pyx
@@ -37,10 +37,14 @@ def get_alignments(A: List[str], B: List[str]) -> Tuple[List[List[int]], List[Li
             b2a.append(set())
         # Process the alignment at the current position
         if A[token_idx_a] == B[token_idx_b] and \
-                (char_idx_a == 0 or \
-                    char_to_token_a[char_idx_a - 1] < token_idx_a) and \
-                (char_idx_b == 0 or \
-                    char_to_token_b[char_idx_b - 1] < token_idx_b):
+                (
+                    char_idx_a == 0 or
+                    char_to_token_a[char_idx_a - 1] < token_idx_a
+                ) and \
+                (
+                    char_idx_b == 0 or
+                    char_to_token_b[char_idx_b - 1] < token_idx_b
+                ):
             # Current tokens are identical and both character offsets are the
             # start of a token (either at the beginning of the document or the
             # previous character belongs to a different token)
diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx
index 1c3cd9939..efca4bcb0 100644
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@@ -1,4 +1,3 @@
-import warnings
 from collections.abc import Iterable as IterableInstance
 
 import numpy
@@ -31,9 +30,9 @@ cpdef Doc annotations_to_doc(vocab, tok_annot, doc_annot):
     attrs, array = _annot2array(vocab, tok_annot, doc_annot)
     output = Doc(vocab, words=tok_annot["ORTH"], spaces=tok_annot["SPACY"])
     if "entities" in doc_annot:
-       _add_entities_to_doc(output, doc_annot["entities"])
+        _add_entities_to_doc(output, doc_annot["entities"])
     if "spans" in doc_annot:
-       _add_spans_to_doc(output, doc_annot["spans"])
+        _add_spans_to_doc(output, doc_annot["spans"])
     if array.size:
         output = output.from_array(attrs, array)
     # links are currently added with ENT_KB_ID on the token level
@@ -168,7 +167,6 @@ cdef class Example:
                 self._y_sig = y_sig
                 return self._cached_alignment
 
-
     def _get_aligned_vectorized(self, align, gold_values):
         # Fast path for Doc attributes/fields that are predominantly a single value,
         # i.e., TAG, POS, MORPH.
@@ -211,7 +209,6 @@ cdef class Example:
 
         return output.tolist()
 
-
     def _get_aligned_non_vectorized(self, align, gold_values):
         # Slower path for fields that return multiple values (resulting
         # in ragged arrays that cannot be vectorized trivially).
@@ -228,7 +225,6 @@ cdef class Example:
 
         return output
 
-
     def get_aligned(self, field, as_string=False):
         """Return an aligned array for a token attribute."""
         align = self.alignment.x2y
@@ -337,7 +333,7 @@ cdef class Example:
             missing=None
         )
         # Now fill the tokens we can align to O.
-        O = 2 # I=1, O=2, B=3
+        O = 2 # I=1, O=2, B=3  # no-cython-lint: E741
         for i, ent_iob in enumerate(self.get_aligned("ENT_IOB")):
             if x_tags[i] is None:
                 if ent_iob == O:
@@ -347,7 +343,7 @@ cdef class Example:
         return x_ents, x_tags
 
     def get_aligned_ner(self):
-        x_ents, x_tags = self.get_aligned_ents_and_ner()
+        _x_ents, x_tags = self.get_aligned_ents_and_ner()
         return x_tags
 
     def get_matching_ents(self, check_label=True):
@@ -405,7 +401,6 @@ cdef class Example:
 
         return span_dict
 
-
     def _links_to_dict(self):
         links = {}
         for ent in self.reference.ents:
@@ -596,6 +591,7 @@ def _fix_legacy_dict_data(example_dict):
         "doc_annotation": doc_dict
     }
 
+
 def _has_field(annot, field):
     if field not in annot:
         return False
@@ -632,6 +628,7 @@ def _parse_ner_tags(biluo_or_offsets, vocab, words, spaces):
                 ent_types.append("")
     return ent_iobs, ent_types
 
+
 def _parse_links(vocab, words, spaces, links):
     reference = Doc(vocab, words=words, spaces=spaces)
     starts = {token.idx: token.i for token in reference}
diff --git a/spacy/training/gold_io.pyx b/spacy/training/gold_io.pyx
index 1e7b3681d..2fc36e41f 100644
--- a/spacy/training/gold_io.pyx
+++ b/spacy/training/gold_io.pyx
@@ -1,4 +1,3 @@
-import json
 import warnings
 
 import srsly
@@ -6,7 +5,7 @@ import srsly
 from .. import util
 from ..errors import Warnings
 from ..tokens import Doc
-from .iob_utils import offsets_to_biluo_tags, tags_to_entities
+from .iob_utils import offsets_to_biluo_tags
 
 
 def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
@@ -23,7 +22,13 @@ def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
     json_doc = {"id": doc_id, "paragraphs": []}
     for i, doc in enumerate(docs):
         raw = None if doc.has_unknown_spaces else doc.text
-        json_para = {'raw': raw, "sentences": [], "cats": [], "entities": [], "links": []}
+        json_para = {
+            'raw': raw,
+            "sentences": [],
+            "cats": [],
+            "entities": [],
+            "links": []
+        }
         for cat, val in doc.cats.items():
             json_cat = {"label": cat, "value": val}
             json_para["cats"].append(json_cat)
@@ -35,13 +40,17 @@ def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
             if ent.kb_id_:
                 link_dict = {(ent.start_char, ent.end_char): {ent.kb_id_: 1.0}}
                 json_para["links"].append(link_dict)
-        biluo_tags = offsets_to_biluo_tags(doc, json_para["entities"], missing=ner_missing_tag)
+        biluo_tags = offsets_to_biluo_tags(
+            doc, json_para["entities"], missing=ner_missing_tag
+        )
         attrs = ("TAG", "POS", "MORPH", "LEMMA", "DEP", "ENT_IOB")
         include_annotation = {attr: doc.has_annotation(attr) for attr in attrs}
         for j, sent in enumerate(doc.sents):
             json_sent = {"tokens": [], "brackets": []}
             for token in sent:
-                json_token = {"id": token.i, "orth": token.text, "space": token.whitespace_}
+                json_token = {
+                    "id": token.i, "orth": token.text, "space": token.whitespace_
+                }
                 if include_annotation["TAG"]:
                     json_token["tag"] = token.tag_
                 if include_annotation["POS"]:
@@ -125,9 +134,14 @@ def json_to_annotations(doc):
                 else:
                     sent_starts.append(-1)
             if "brackets" in sent:
-                brackets.extend((b["first"] + sent_start_i,
-                                 b["last"] + sent_start_i, b["label"])
-                                 for b in sent["brackets"])
+                brackets.extend(
+                    (
+                        b["first"] + sent_start_i,
+                        b["last"] + sent_start_i,
+                        b["label"]
+                    )
+                    for b in sent["brackets"]
+                )
 
         example["token_annotation"] = dict(
             ids=ids,
@@ -160,6 +174,7 @@ def json_to_annotations(doc):
         )
         yield example
 
+
 def json_iterate(bytes utf8_str):
     # We should've made these files jsonl...But since we didn't, parse out
     # the docs one-by-one to reduce memory usage.
diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py
index efe323202..7a883ce50 100644
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@@ -71,7 +71,8 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
         with nlp.select_pipes(enable=resume_components):
             logger.info("Resuming training for: %s", resume_components)
             nlp.resume_training(sgd=optimizer)
-    # Make sure that listeners are defined before initializing further
+    # Make sure that internal component names are synced and listeners are
+    # defined before initializing further
     nlp._link_components()
     with nlp.select_pipes(disable=[*frozen_components, *resume_components]):
         if T["max_epochs"] == -1:
@@ -305,9 +306,14 @@ def convert_vectors(
     truncate: int,
     prune: int,
     mode: str = VectorsMode.default,
+    attr: str = "ORTH",
 ) -> None:
     vectors_loc = ensure_path(vectors_loc)
     if vectors_loc and vectors_loc.parts[-1].endswith(".npz"):
+        if attr != "ORTH":
+            raise ValueError(
+                "ORTH is the only attribute supported for vectors in .npz format."
+            )
         nlp.vocab.vectors = Vectors(
             strings=nlp.vocab.strings, data=numpy.load(vectors_loc.open("rb"))
         )
@@ -335,11 +341,15 @@ def convert_vectors(
                 nlp.vocab.vectors = Vectors(
                     strings=nlp.vocab.strings,
                     data=vectors_data,
+                    attr=attr,
                     **floret_settings,
                 )
             else:
                 nlp.vocab.vectors = Vectors(
-                    strings=nlp.vocab.strings, data=vectors_data, keys=vector_keys
+                    strings=nlp.vocab.strings,
+                    data=vectors_data,
+                    keys=vector_keys,
+                    attr=attr,
                 )
                 nlp.vocab.deduplicate_vectors()
     if prune >= 1 and mode != VectorsMode.floret:
diff --git a/spacy/util.py b/spacy/util.py
index bf438f9e2..30300e019 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -518,7 +518,7 @@ def load_model_from_path(
     if not meta:
         meta = get_model_meta(model_path)
     config_path = model_path / "config.cfg"
-    overrides = dict_to_dot(config)
+    overrides = dict_to_dot(config, for_overrides=True)
     config = load_config(config_path, overrides=overrides)
     nlp = load_model_from_config(
         config,
@@ -1486,14 +1486,19 @@ def dot_to_dict(values: Dict[str, Any]) -> Dict[str, dict]:
     return result
 
 
-def dict_to_dot(obj: Dict[str, dict]) -> Dict[str, Any]:
+def dict_to_dot(obj: Dict[str, dict], *, for_overrides: bool = False) -> Dict[str, Any]:
     """Convert dot notation to a dict. For example: {"token": {"pos": True,
     "_": {"xyz": True }}} becomes {"token.pos": True, "token._.xyz": True}.
 
-    values (Dict[str, dict]): The dict to convert.
+    obj (Dict[str, dict]): The dict to convert.
+    for_overrides (bool): Whether to enable special handling for registered
+        functions in overrides.
     RETURNS (Dict[str, Any]): The key/value pairs.
     """
-    return {".".join(key): value for key, value in walk_dict(obj)}
+    return {
+        ".".join(key): value
+        for key, value in walk_dict(obj, for_overrides=for_overrides)
+    }
 
 
 def dot_to_object(config: Config, section: str):
@@ -1535,13 +1540,20 @@ def set_dot_to_object(config: Config, section: str, value: Any) -> None:
 
 
 def walk_dict(
-    node: Dict[str, Any], parent: List[str] = []
+    node: Dict[str, Any], parent: List[str] = [], *, for_overrides: bool = False
 ) -> Iterator[Tuple[List[str], Any]]:
-    """Walk a dict and yield the path and values of the leaves."""
+    """Walk a dict and yield the path and values of the leaves.
+
+    for_overrides (bool): Whether to treat registered functions that start with
+        @ as final values rather than dicts to traverse.
+    """
     for key, value in node.items():
         key_parent = [*parent, key]
-        if isinstance(value, dict):
-            yield from walk_dict(value, key_parent)
+        if isinstance(value, dict) and (
+            not for_overrides
+            or not any(value_key.startswith("@") for value_key in value)
+        ):
+            yield from walk_dict(value, key_parent, for_overrides=for_overrides)
         else:
             yield (key_parent, value)
 
diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx
index bc6d72e14..783e6d00a 100644
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@@ -1,10 +1,8 @@
-cimport numpy as np
 from cython.operator cimport dereference as deref
 from libc.stdint cimport uint32_t, uint64_t
 from libcpp.set cimport set as cppset
 from murmurhash.mrmr cimport hash128_x64
 
-import functools
 import warnings
 from enum import Enum
 from typing import cast
@@ -15,9 +13,11 @@ from thinc.api import Ops, get_array_module, get_current_ops
 from thinc.backends import get_array_ops
 from thinc.types import Floats2d
 
+from .attrs cimport ORTH, attr_id_t
 from .strings cimport StringStore
 
 from . import util
+from .attrs import IDS
 from .errors import Errors, Warnings
 from .strings import get_string_id
 
@@ -63,8 +63,9 @@ cdef class Vectors:
     cdef readonly uint32_t hash_seed
     cdef readonly unicode bow
     cdef readonly unicode eow
+    cdef readonly attr_id_t attr
 
-    def __init__(self, *, strings=None, shape=None, data=None, keys=None, mode=Mode.default, minn=0, maxn=0, hash_count=1, hash_seed=0, bow="<", eow=">"):
+    def __init__(self, *, strings=None, shape=None, data=None, keys=None, mode=Mode.default, minn=0, maxn=0, hash_count=1, hash_seed=0, bow="<", eow=">", attr="ORTH"):
         """Create a new vector store.
 
         strings (StringStore): The string store.
@@ -78,6 +79,8 @@ cdef class Vectors:
         hash_seed (int): The floret hash seed (default: 0).
         bow (str): The floret BOW string (default: "<").
         eow (str): The floret EOW string (default: ">").
+        attr (Union[int, str]): The token attribute for the vector keys
+            (default: "ORTH").
 
         DOCS: https://spacy.io/api/vectors#init
         """
@@ -100,10 +103,18 @@ cdef class Vectors:
         self.hash_seed = hash_seed
         self.bow = bow
         self.eow = eow
+        if isinstance(attr, (int, long)):
+            self.attr = attr
+        else:
+            attr = attr.upper()
+            if attr == "TEXT":
+                attr = "ORTH"
+            self.attr = IDS.get(attr, ORTH)
+
         if self.mode == Mode.default:
             if data is None:
                 if shape is None:
-                    shape = (0,0)
+                    shape = (0, 0)
                 ops = get_current_ops()
                 data = ops.xp.zeros(shape, dtype="f")
                 self._unset = cppset[int]({i for i in range(data.shape[0])})
@@ -244,11 +255,10 @@ cdef class Vectors:
     def __eq__(self, other):
         # Check for equality, with faster checks first
         return (
-                self.shape == other.shape
-                and self.key2row == other.key2row
-                and self.to_bytes(exclude=["strings"])
-                  == other.to_bytes(exclude=["strings"])
-               )
+            self.shape == other.shape
+            and self.key2row == other.key2row
+            and self.to_bytes(exclude=["strings"]) == other.to_bytes(exclude=["strings"])
+        )
 
     def resize(self, shape, inplace=False):
         """Resize the underlying vectors array. If inplace=True, the memory
@@ -504,11 +514,12 @@ cdef class Vectors:
             # vectors e.g. (10000, 300)
             # sims    e.g. (1024, 10000)
             sims = xp.dot(batch, vectors.T)
-            best_rows[i:i+batch_size] = xp.argpartition(sims, -n, axis=1)[:,-n:]
-            scores[i:i+batch_size] = xp.partition(sims, -n, axis=1)[:,-n:]
+            best_rows[i:i+batch_size] = xp.argpartition(sims, -n, axis=1)[:, -n:]
+            scores[i:i+batch_size] = xp.partition(sims, -n, axis=1)[:, -n:]
 
             if sort and n >= 2:
-                sorted_index = xp.arange(scores.shape[0])[:,None][i:i+batch_size],xp.argsort(scores[i:i+batch_size], axis=1)[:,::-1]
+                sorted_index = xp.arange(scores.shape[0])[:, None][i:i+batch_size], \
+                    xp.argsort(scores[i:i+batch_size], axis=1)[:, ::-1]
                 scores[i:i+batch_size] = scores[sorted_index]
                 best_rows[i:i+batch_size] = best_rows[sorted_index]
 
@@ -522,8 +533,12 @@ cdef class Vectors:
 
         numpy_rows = get_current_ops().to_numpy(best_rows)
         keys = xp.asarray(
-            [[row2key[row] for row in numpy_rows[i] if row in row2key]
-                    for i in range(len(queries)) ], dtype="uint64")
+            [
+                [row2key[row] for row in numpy_rows[i] if row in row2key]
+                for i in range(len(queries))
+            ],
+            dtype="uint64"
+        )
         return (keys, best_rows, scores)
 
     def to_ops(self, ops: Ops):
@@ -543,6 +558,7 @@ cdef class Vectors:
                 "hash_seed": self.hash_seed,
                 "bow": self.bow,
                 "eow": self.eow,
+                "attr": self.attr,
             }
 
     def _set_cfg(self, cfg):
@@ -553,6 +569,7 @@ cdef class Vectors:
         self.hash_seed = cfg.get("hash_seed", 0)
         self.bow = cfg.get("bow", "<")
         self.eow = cfg.get("eow", ">")
+        self.attr = cfg.get("attr", ORTH)
 
     def to_disk(self, path, *, exclude=tuple()):
         """Save the current state to a directory.
@@ -564,9 +581,9 @@ cdef class Vectors:
         """
         xp = get_array_module(self.data)
         if xp is numpy:
-            save_array = lambda arr, file_: xp.save(file_, arr, allow_pickle=False)
+            save_array = lambda arr, file_: xp.save(file_, arr, allow_pickle=False)  # no-cython-lint
         else:
-            save_array = lambda arr, file_: xp.save(file_, arr)
+            save_array = lambda arr, file_: xp.save(file_, arr)  # no-cython-lint
 
         def save_vectors(path):
             # the source of numpy.save indicates that the file object is closed after use.
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index db259f117..0129862c1 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -1,6 +1,4 @@
 # cython: profile=True
-from libc.string cimport memcpy
-
 import functools
 
 import numpy
@@ -19,7 +17,6 @@ from .errors import Errors
 from .lang.lex_attrs import LEX_ATTRS, get_lang, is_stop
 from .lang.norm_exceptions import BASE_NORMS
 from .lookups import Lookups
-from .util import registry
 from .vectors import Mode as VectorsMode
 from .vectors import Vectors
 
@@ -50,8 +47,15 @@ cdef class Vocab:
 
     DOCS: https://spacy.io/api/vocab
     """
-    def __init__(self, lex_attr_getters=None, strings=None, lookups=None,
-            oov_prob=-20., writing_system=None, get_noun_chunks=None):
+    def __init__(
+        self,
+        lex_attr_getters=None,
+        strings=None,
+        lookups=None,
+        oov_prob=-20.,
+        writing_system=None,
+        get_noun_chunks=None
+    ):
         """Create the vocabulary.
 
         lex_attr_getters (dict): A dictionary mapping attribute IDs to
@@ -150,7 +154,6 @@ cdef class Vocab:
         cdef LexemeC* lex
         cdef hash_t key = self.strings[string]
         lex = <LexemeC*>self._by_orth.get(key)
-        cdef size_t addr
         if lex != NULL:
             assert lex.orth in self.strings
             if lex.orth != key:
@@ -352,8 +355,13 @@ cdef class Vocab:
             self[orth]
         # Make prob negative so it sorts by rank ascending
         # (key2row contains the rank)
-        priority = [(-lex.prob, self.vectors.key2row[lex.orth], lex.orth)
-                    for lex in self if lex.orth in self.vectors.key2row]
+        priority = []
+        cdef Lexeme lex
+        cdef attr_t value
+        for lex in self:
+            value = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
+            if value in self.vectors.key2row:
+                priority.append((-lex.prob, self.vectors.key2row[value], value))
         priority.sort()
         indices = xp.asarray([i for (prob, i, key) in priority], dtype="uint64")
         keys = xp.asarray([key for (prob, i, key) in priority], dtype="uint64")
@@ -386,8 +394,10 @@ cdef class Vocab:
         """
         if isinstance(orth, str):
             orth = self.strings.add(orth)
-        if self.has_vector(orth):
-            return self.vectors[orth]
+        cdef Lexeme lex = self[orth]
+        key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
+        if self.has_vector(key):
+            return self.vectors[key]
         xp = get_array_module(self.vectors.data)
         vectors = xp.zeros((self.vectors_length,), dtype="f")
         return vectors
@@ -403,15 +413,16 @@ cdef class Vocab:
         """
         if isinstance(orth, str):
             orth = self.strings.add(orth)
-        if self.vectors.is_full and orth not in self.vectors:
+        cdef Lexeme lex = self[orth]
+        key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
+        if self.vectors.is_full and key not in self.vectors:
             new_rows = max(100, int(self.vectors.shape[0]*1.3))
             if self.vectors.shape[1] == 0:
                 width = vector.size
             else:
                 width = self.vectors.shape[1]
             self.vectors.resize((new_rows, width))
-        lex = self[orth]  # Add word to vocab if necessary
-        row = self.vectors.add(orth, vector=vector)
+        row = self.vectors.add(key, vector=vector)
         if row >= 0:
             lex.rank = row
 
@@ -426,7 +437,9 @@ cdef class Vocab:
         """
         if isinstance(orth, str):
             orth = self.strings.add(orth)
-        return orth in self.vectors
+        cdef Lexeme lex = self[orth]
+        key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
+        return key in self.vectors
 
     property lookups:
         def __get__(self):
@@ -440,7 +453,6 @@ cdef class Vocab:
                     self.lookups.get_table("lexeme_norm"),
                 )
 
-
     def to_disk(self, path, *, exclude=tuple()):
         """Save the current state to a directory.
 
@@ -453,7 +465,6 @@ cdef class Vocab:
         path = util.ensure_path(path)
         if not path.exists():
             path.mkdir()
-        setters = ["strings", "vectors"]
         if "strings" not in exclude:
             self.strings.to_disk(path / "strings.json")
         if "vectors" not in exclude:
@@ -472,7 +483,6 @@ cdef class Vocab:
         DOCS: https://spacy.io/api/vocab#to_disk
         """
         path = util.ensure_path(path)
-        getters = ["strings", "vectors"]
         if "strings" not in exclude:
             self.strings.from_disk(path / "strings.json")  # TODO: add exclude?
         if "vectors" not in exclude:
diff --git a/website/docs/api/architectures.mdx b/website/docs/api/architectures.mdx
index ee41144f6..3c3597d6a 100644
--- a/website/docs/api/architectures.mdx
+++ b/website/docs/api/architectures.mdx
@@ -303,7 +303,7 @@ mapped to a zero vector. See the documentation on
 | `nM`        | The width of the static vectors. ~~Optional[int]~~                                                                                                                                                                      |
 | `dropout`   | Optional dropout rate. If set, it's applied per dimension over the whole batch. Defaults to `None`. ~~Optional[float]~~                                                                                                 |
 | `init_W`    | The [initialization function](https://thinc.ai/docs/api-initializers). Defaults to [`glorot_uniform_init`](https://thinc.ai/docs/api-initializers#glorot_uniform_init). ~~Callable[[Ops, Tuple[int, ...]]], FloatsXd]~~ |
-| `key_attr`  | Defaults to `"ORTH"`. ~~str~~                                                                                                                                                                                           |
+| `key_attr`  | This setting is ignored in spaCy v3.6+. To set a custom key attribute for vectors, configure it through [`Vectors`](/api/vectors) or [`spacy init vectors`](/api/cli#init-vectors). Defaults to `"ORTH"`. ~~str~~       |
 | **CREATES** | The model using the architecture. ~~Model[List[Doc], Ragged]~~                                                                                                                                                          |
 
 ### spacy.FeatureExtractor.v1 {id="FeatureExtractor"}
diff --git a/website/docs/api/language.mdx b/website/docs/api/language.mdx
index d26d7b96b..d65ea3764 100644
--- a/website/docs/api/language.mdx
+++ b/website/docs/api/language.mdx
@@ -876,7 +876,7 @@ token-to-vector embedding component like [`Tok2Vec`](/api/tok2vec) or
 training a pipeline with components sourced from an existing pipeline: if
 multiple components (e.g. tagger, parser, NER) listen to the same
 token-to-vector component, but some of them are frozen and not updated, their
-performance may degrade significally as the token-to-vector component is updated
+performance may degrade significantly as the token-to-vector component is updated
 with new data. To prevent this, listeners can be replaced with a standalone
 token-to-vector layer that is owned by the component and doesn't change if the
 component isn't updated.
diff --git a/website/docs/api/spanfinder.mdx b/website/docs/api/spanfinder.mdx
index ca3104c85..ef4a6baa5 100644
--- a/website/docs/api/spanfinder.mdx
+++ b/website/docs/api/spanfinder.mdx
@@ -60,7 +60,7 @@ architectures and their arguments and hyperparameters.
 | `model`      | A model instance that is given a list of documents and predicts a probability for each token. ~~Model[List[Doc], Floats2d]~~                                                                                           |
 | `spans_key`  | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
 | `threshold`  | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~                                                                                                                                    |
-| `max_length` | Maximum length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~                                                                                                                   |
+| `max_length` | Maximum length of the produced spans, defaults to `25`. ~~Optional[int]~~                                                                                                                                              |
 | `min_length` | Minimum length of the produced spans, defaults to `None` meaning shortest span length is 1. ~~Optional[int]~~                                                                                                          |
 | `scorer`     | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~                                                      |
 
diff --git a/website/docs/api/vectors.mdx b/website/docs/api/vectors.mdx
index 021484a1b..787f10fa3 100644
--- a/website/docs/api/vectors.mdx
+++ b/website/docs/api/vectors.mdx
@@ -59,6 +59,7 @@ modified later.
 | `hash_seed` <Tag variant="new">3.2</Tag>  | The floret hash seed (default: `0`). ~~int~~                                                                                                                                           |
 | `bow` <Tag variant="new">3.2</Tag>        | The floret BOW string (default: `"<"`). ~~str~~                                                                                                                                        |
 | `eow` <Tag variant="new">3.2</Tag>        | The floret EOW string (default: `">"`). ~~str~~                                                                                                                                        |
+| `attr` <Tag variant="new">3.6</Tag>       | The token attribute for the vector keys (default: `"ORTH"`). ~~Union[int, str]~~                                                                                                       |
 
 ## Vectors.\_\_getitem\_\_ {id="getitem",tag="method"}
 
@@ -452,8 +453,9 @@ Load state from a binary string.
 
 ## Attributes {id="attributes"}
 
-| Name      | Description                                                                                                                                                          |
-| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `data`    | Stored vectors data. `numpy` is used for CPU vectors, `cupy` for GPU vectors. ~~Union[numpy.ndarray[ndim=1, dtype=float32], cupy.ndarray[ndim=1, dtype=float32]]~~   |
-| `key2row` | Dictionary mapping word hashes to rows in the `Vectors.data` table. ~~Dict[int, int]~~                                                                               |
-| `keys`    | Array keeping the keys in order, such that `keys[vectors.key2row[key]] == key`. ~~Union[numpy.ndarray[ndim=1, dtype=float32], cupy.ndarray[ndim=1, dtype=float32]]~~ |
+| Name                                | Description                                                                                                                                                          |
+| ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `data`                              | Stored vectors data. `numpy` is used for CPU vectors, `cupy` for GPU vectors. ~~Union[numpy.ndarray[ndim=1, dtype=float32], cupy.ndarray[ndim=1, dtype=float32]]~~   |
+| `key2row`                           | Dictionary mapping word hashes to rows in the `Vectors.data` table. ~~Dict[int, int]~~                                                                               |
+| `keys`                              | Array keeping the keys in order, such that `keys[vectors.key2row[key]] == key`. ~~Union[numpy.ndarray[ndim=1, dtype=float32], cupy.ndarray[ndim=1, dtype=float32]]~~ |
+| `attr` <Tag variant="new">3.6</Tag> | The token attribute for the vector keys. ~~int~~                                                                                                                     |
diff --git a/website/docs/usage/linguistic-features.mdx b/website/docs/usage/linguistic-features.mdx
index 55d5680fe..90f305ada 100644
--- a/website/docs/usage/linguistic-features.mdx
+++ b/website/docs/usage/linguistic-features.mdx
@@ -113,7 +113,7 @@ print(doc[2].morph)  # 'Case=Nom|Person=2|PronType=Prs'
 print(doc[2].pos_)  # 'PRON'
 ```
 
-## Lemmatization {id="lemmatization",model="lemmatizer",version="3"}
+## Lemmatization {id="lemmatization",version="3"}
 
 spaCy provides two pipeline components for lemmatization:
 
@@ -170,7 +170,7 @@ nlp = spacy.blank("sv")
 nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
 ```
 
-### Rule-based lemmatizer {id="lemmatizer-rule"}
+### Rule-based lemmatizer {id="lemmatizer-rule",model="morphologizer"}
 
 When training pipelines that include a component that assigns part-of-speech
 tags (a morphologizer or a tagger with a [POS mapping](#mappings-exceptions)), a
@@ -194,7 +194,7 @@ information, without consulting the context of the token. The rule-based
 lemmatizer also accepts list-based exception files. For English, these are
 acquired from [WordNet](https://wordnet.princeton.edu/).
 
-### Trainable lemmatizer
+### Trainable lemmatizer {id="lemmatizer-train",model="trainable_lemmatizer"}
 
 The [`EditTreeLemmatizer`](/api/edittreelemmatizer) can learn form-to-lemma
 transformations from a training corpus that includes lemma annotations. This
diff --git a/website/docs/usage/training.mdx b/website/docs/usage/training.mdx
index 16c3433ac..ebbdd6bdc 100644
--- a/website/docs/usage/training.mdx
+++ b/website/docs/usage/training.mdx
@@ -11,7 +11,6 @@ menu:
   - ['Custom Functions', 'custom-functions']
   - ['Initialization', 'initialization']
   - ['Data Utilities', 'data']
-  - ['Parallel Training', 'parallel-training']
   - ['Internal API', 'api']
 ---
 
@@ -1565,77 +1564,6 @@ token-based annotations like the dependency parse or entity labels, you'll need
 to take care to adjust the `Example` object so its annotations match and remain
 valid.
 
-## Parallel & distributed training with Ray {id="parallel-training"}
-
-> #### Installation
->
-> ```bash
-> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
-> # Check that the CLI is registered
-> $ python -m spacy ray --help
-> ```
-
-[Ray](https://ray.io/) is a fast and simple framework for building and running
-**distributed applications**. You can use Ray to train spaCy on one or more
-remote machines, potentially speeding up your training process. Parallel
-training won't always be faster though – it depends on your batch size, models,
-and hardware.
-
-<Infobox variant="warning">
-
-To use Ray with spaCy, you need the
-[`spacy-ray`](https://github.com/explosion/spacy-ray) package installed.
-Installing the package will automatically add the `ray` command to the spaCy
-CLI.
-
-</Infobox>
-
-The [`spacy ray train`](/api/cli#ray-train) command follows the same API as
-[`spacy train`](/api/cli#train), with a few extra options to configure the Ray
-setup. You can optionally set the `--address` option to point to your Ray
-cluster. If it's not set, Ray will run locally.
-
-```bash
-python -m spacy ray train config.cfg --n-workers 2
-```
-
-<Project id="integrations/ray">
-
-Get started with parallel training using our project template. It trains a
-simple model on a Universal Dependencies Treebank and lets you parallelize the
-training with Ray.
-
-</Project>
-
-### How parallel training works {id="parallel-training-details"}
-
-Each worker receives a shard of the **data** and builds a copy of the **model
-and optimizer** from the [`config.cfg`](#config). It also has a communication
-channel to **pass gradients and parameters** to the other workers. Additionally,
-each worker is given ownership of a subset of the parameter arrays. Every
-parameter array is owned by exactly one worker, and the workers are given a
-mapping so they know which worker owns which parameter.
-
-![Illustration of setup](/images/spacy-ray.svg)
-
-As training proceeds, every worker will be computing gradients for **all** of
-the model parameters. When they compute gradients for parameters they don't own,
-they'll **send them to the worker** that does own that parameter, along with a
-version identifier so that the owner can decide whether to discard the gradient.
-Workers use the gradients they receive and the ones they compute locally to
-update the parameters they own, and then broadcast the updated array and a new
-version ID to the other workers.
-
-This training procedure is **asynchronous** and **non-blocking**. Workers always
-push their gradient increments and parameter updates, they do not have to pull
-them and block on the result, so the transfers can happen in the background,
-overlapped with the actual training work. The workers also do not have to stop
-and wait for each other ("synchronize") at the start of each batch. This is very
-useful for spaCy, because spaCy is often trained on long documents, which means
-**batches can vary in size** significantly. Uneven workloads make synchronous
-gradient descent inefficient, because if one batch is slow, all of the other
-workers are stuck waiting for it to complete before they can continue.
-
 ## Internal training API {id="api"}
 
 <Infobox variant="danger">
diff --git a/website/docs/usage/v3-6.mdx b/website/docs/usage/v3-6.mdx
new file mode 100644
index 000000000..eda46b365
--- /dev/null
+++ b/website/docs/usage/v3-6.mdx
@@ -0,0 +1,143 @@
+---
+title: What's New in v3.6
+teaser: New features and how to upgrade
+menu:
+  - ['New Features', 'features']
+  - ['Upgrading Notes', 'upgrading']
+---
+
+## New features {id="features",hidden="true"}
+
+spaCy v3.6 adds the new [`SpanFinder`](/api/spanfinder) component to the core
+spaCy library and new trained pipelines for Slovenian.
+
+### SpanFinder {id="spanfinder"}
+
+The [`SpanFinder`](/api/spanfinder) component identifies potentially
+overlapping, unlabeled spans by identifying span start and end tokens. It is
+intended for use in combination with a component like
+[`SpanCategorizer`](/api/spancategorizer) that may further filter or label the
+spans. See our
+[Spancat blog post](https://explosion.ai/blog/spancat#span-finder) for a more
+detailed introduction to the span finder.
+
+To train a pipeline with `span_finder` + `spancat`, remember to add
+`span_finder` (and its `tok2vec` or `transformer` if required) to
+`[training.annotating_components]` so that the `spancat` component can be
+trained directly from its predictions:
+
+```ini
+[nlp]
+pipeline = ["tok2vec","span_finder","spancat"]
+
+[training]
+annotating_components = ["tok2vec","span_finder"]
+```
+
+In practice it can be helpful to initially train the `span_finder` separately
+before [sourcing](/usage/processing-pipelines#sourced-components) it (along with
+its `tok2vec`) into the `spancat` pipeline for further training. Otherwise the
+memory usage can spike for `spancat` in the first few training steps if the
+`span_finder` makes a large number of predictions.
+
+### Additional features and improvements {id="additional-features-and-improvements"}
+
+- Language updates:
+  - Add initial support for Malay.
+  - Update Latin defaults to support noun chunks, update lexical/tokenizer
+    settings and add example sentences.
+- Support `spancat_singlelabel` in `spacy debug data` CLI.
+- Add `doc.spans` rendering to `spacy evaluate` CLI displaCy output.
+- Support custom token/lexeme attribute for vectors.
+- Add option to return scores separately keyed by component name with
+  `spacy evaluate --per-component`, `Language.evaluate(per_component=True)` and
+  `Scorer.score(per_component=True)`. This is useful when the pipeline contains
+  more than one of the same component like `textcat` that may have overlapping
+  scores keys.
+- Typing updates for `PhraseMatcher` and `SpanGroup`.
+
+## Trained pipelines {id="pipelines"}
+
+### New trained pipelines {id="new-pipelines"}
+
+v3.6 introduces new pipelines for Slovenian, which use the trainable lemmatizer
+and [floret vectors](https://github.com/explosion/floret).
+
+| Package                                           | UPOS | Parser LAS | NER F |
+| ------------------------------------------------- | ---: | ---------: | ----: |
+| [`sl_core_news_sm`](/models/sl#sl_core_news_sm)   | 96.9 |       82.1 |  62.9 |
+| [`sl_core_news_md`](/models/sl#sl_core_news_md)   | 97.6 |       84.3 |  73.5 |
+| [`sl_core_news_lg`](/models/sl#sl_core_news_lg)   | 97.7 |       84.3 |  79.0 |
+| [`sl_core_news_trf`](/models/sl#sl_core_news_trf) | 99.0 |       91.7 |  90.0 |
+
+### Pipeline updates {id="pipeline-updates"}
+
+The English pipelines have been updated to improve handling of contractions with
+various apostrophes and to lemmatize "get" as a passive auxiliary.
+
+The Danish pipeline `da_core_news_trf` has been updated to use
+[`vesteinn/DanskBERT`](https://huggingface.co/vesteinn/DanskBERT) with
+performance improvements across the board.
+
+## Notes about upgrading from v3.5 {id="upgrading"}
+
+### SpanGroup spans are now required to be from the same doc {id="spangroup-spans"}
+
+When initializing a `SpanGroup`, there is a new check to verify that all added
+spans refer to the current doc. Without this check, it was possible to run into
+string store or other errors.
+
+One place this may crop up is when creating `Example` objects for training with
+custom spans:
+
+```diff
+     doc = Doc(nlp.vocab, words=tokens)  # predicted doc
+     example = Example.from_dict(doc, {"ner": iob_tags})
+     # use the reference doc when creating reference spans
+-    span = Span(doc, 0, 5, "ORG")
++    span = Span(example.reference, 0, 5, "ORG")
+     example.reference.spans[spans_key] = [span]
+```
+
+### Pipeline package version compatibility {id="version-compat"}
+
+> #### Using legacy implementations
+>
+> In spaCy v3, you'll still be able to load and reference legacy implementations
+> via [`spacy-legacy`](https://github.com/explosion/spacy-legacy), even if the
+> components or architectures change and newer versions are available in the
+> core library.
+
+When you're loading a pipeline package trained with an earlier version of spaCy
+v3, you will see a warning telling you that the pipeline may be incompatible.
+This doesn't necessarily have to be true, but we recommend running your
+pipelines against your test suite or evaluation data to make sure there are no
+unexpected results.
+
+If you're using one of the [trained pipelines](/models) we provide, you should
+run [`spacy download`](/api/cli#download) to update to the latest version. To
+see an overview of all installed packages and their compatibility, you can run
+[`spacy validate`](/api/cli#validate).
+
+If you've trained your own custom pipeline and you've confirmed that it's still
+working as expected, you can update the spaCy version requirements in the
+[`meta.json`](/api/data-formats#meta):
+
+```diff
+- "spacy_version": ">=3.5.0,<3.6.0",
++ "spacy_version": ">=3.5.0,<3.7.0",
+```
+
+### Updating v3.5 configs
+
+To update a config from spaCy v3.5 with the new v3.6 settings, run
+[`init fill-config`](/api/cli#init-fill-config):
+
+```cli
+$ python -m spacy init fill-config config-v3.5.cfg config-v3.6.cfg
+```
+
+In many cases ([`spacy train`](/api/cli#train),
+[`spacy.load`](/api/top-level#spacy.load)), the new defaults will be filled in
+automatically, but you'll need to fill in the new settings to run
+[`debug config`](/api/cli#debug) and [`debug data`](/api/cli#debug-data).
diff --git a/website/meta/languages.json b/website/meta/languages.json
index 4c99004eb..17b58679f 100644
--- a/website/meta/languages.json
+++ b/website/meta/languages.json
@@ -222,7 +222,9 @@
         },
         {
             "code": "la",
-            "name": "Latin"
+            "name": "Latin",
+	    "example": "In principio creavit Deus caelum et terram.",
+	    "has_examples": true
         },
         {
             "code": "lb",
@@ -339,7 +341,10 @@
         },
         {
             "code": "sl",
-            "name": "Slovenian"
+            "name": "Slovenian",
+	    "example": "France Prešeren je umrl 8. februarja 1849 v Kranju",
+	    "has_examples": true,
+            "models": ["sl_core_news_sm", "sl_core_news_md", "sl_core_news_lg", "sl_core_news_trf"]
         },
         {
             "code": "sq",
diff --git a/website/meta/sidebars.json b/website/meta/sidebars.json
index 12c3fce35..04102095f 100644
--- a/website/meta/sidebars.json
+++ b/website/meta/sidebars.json
@@ -14,7 +14,8 @@
                     { "text": "New in v3.2", "url": "/usage/v3-2" },
                     { "text": "New in v3.3", "url": "/usage/v3-3" },
                     { "text": "New in v3.4", "url": "/usage/v3-4" },
-                    { "text": "New in v3.5", "url": "/usage/v3-5" }
+                    { "text": "New in v3.5", "url": "/usage/v3-5" },
+                    { "text": "New in v3.6", "url": "/usage/v3-6" }
                 ]
             },
             {
diff --git a/website/meta/site.json b/website/meta/site.json
index 3d4f2d5ee..08fcde62e 100644
--- a/website/meta/site.json
+++ b/website/meta/site.json
@@ -27,7 +27,7 @@
         "indexName": "spacy"
     },
     "binderUrl": "explosion/spacy-io-binder",
-    "binderVersion": "3.5",
+    "binderVersion": "3.6",
     "sections": [
         { "id": "usage", "title": "Usage Documentation", "theme": "blue" },
         { "id": "models", "title": "Models Documentation", "theme": "blue" },
diff --git a/website/meta/universe.json b/website/meta/universe.json
index 967d9eb06..9f7b485bc 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -4376,7 +4376,7 @@
             "code_example": [
                 "import spacy",
                 "",
-                "nlp = spacy.load(\"en_core_web_sm\", disable=[\"ner\"])",
+                "nlp = spacy.load(\"en_core_web_sm\", exclude=[\"ner\"])",
                 "nlp.add_pipe(\"span_marker\", config={\"model\": \"tomaarsen/span-marker-roberta-large-ontonotes5\"})",
                 "",
                 "text = \"\"\"Cleopatra VII, also known as Cleopatra the Great, was the last active ruler of the \\",
diff --git a/website/src/components/code.js b/website/src/components/code.js
index 09c2fabfc..e733dba77 100644
--- a/website/src/components/code.js
+++ b/website/src/components/code.js
@@ -13,6 +13,8 @@ import 'prismjs/components/prism-json.min.js'
 import 'prismjs/components/prism-markdown.min.js'
 import 'prismjs/components/prism-python.min.js'
 import 'prismjs/components/prism-yaml.min.js'
+import 'prismjs/components/prism-docker.min.js'
+import 'prismjs/components/prism-r.min.js'
 
 import { isString } from './util'
 import Link, { OptionalLink } from './link'
@@ -172,7 +174,7 @@ const convertLine = ({ line, prompt, lang }) => {
         return handlePromot({ lineFlat, prompt })
     }
 
-    return lang === 'none' || !lineFlat ? (
+    return lang === 'none' || !lineFlat || !(lang in Prism.languages) ? (
         lineFlat
     ) : (
         <span
diff --git a/website/src/templates/index.js b/website/src/templates/index.js
index 227b25be8..c8295593c 100644
--- a/website/src/templates/index.js
+++ b/website/src/templates/index.js
@@ -58,8 +58,8 @@ const AlertSpace = ({ nightly, legacy }) => {
 }
 
 const navAlert = (
-    <Link to="/usage/v3-5" noLinkLayout>
-        <strong>💥 Out now:</strong> spaCy v3.5
+    <Link to="/usage/v3-6" noLinkLayout>
+        <strong>💥 Out now:</strong> spaCy v3.6
     </Link>
 )