From 5eeb25f0432d8a23246eed6fcb75eee2da8e5a63 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 28 Jun 2021 11:48:00 +0200
Subject: [PATCH] Tidy up code

---
 spacy/__init__.py                             |  1 +
 spacy/cli/debug_model.py                      |  5 ++-
 spacy/cli/evaluate.py                         |  4 +-
 spacy/displacy/__init__.py                    |  4 +-
 spacy/lang/bg/lex_attrs.py                    |  8 ++--
 spacy/lang/bg/tokenizer_exceptions.py         |  1 -
 spacy/lang/fi/tokenizer_exceptions.py         | 28 +++++++++---
 spacy/lang/it/lemmatizer.py                   |  2 +-
 spacy/lang/it/tokenizer_exceptions.py         |  2 +-
 spacy/language.py                             | 43 +++++++++++++------
 spacy/lookups.py                              |  4 +-
 spacy/ml/models/__init__.py                   |  2 +-
 spacy/pipeline/entity_linker.py               | 26 ++++-------
 spacy/pipeline/entityruler.py                 |  4 +-
 spacy/pipeline/spancat.py                     |  4 +-
 spacy/pipeline/textcat.py                     |  4 +-
 spacy/scorer.py                               | 16 ++++---
 spacy/tests/lang/bg/test_text.py              |  1 +
 spacy/tests/lang/fi/test_tokenizer.py         |  7 +--
 spacy/tests/matcher/test_matcher_logic.py     | 16 +++++--
 spacy/tests/parser/test_ner.py                |  6 ++-
 spacy/tests/pipeline/test_entity_linker.py    | 20 ++++++---
 spacy/tests/pipeline/test_models.py           |  4 +-
 spacy/tests/pipeline/test_pipe_factories.py   | 18 ++++++--
 spacy/tests/pipeline/test_pipe_methods.py     |  7 ++-
 spacy/tests/pipeline/test_spancat.py          |  5 +--
 spacy/tests/pipeline/test_textcat.py          | 16 ++++---
 spacy/tests/pipeline/test_tok2vec.py          | 14 ++++--
 spacy/tests/regression/test_issue6501-7000.py |  3 +-
 spacy/tests/regression/test_issue7056.py      |  3 +-
 spacy/tests/regression/test_issue7062.py      |  2 +-
 spacy/tests/regression/test_issue7065.py      | 32 +++++++++++---
 spacy/tests/regression/test_issue8168.py      | 19 ++++++--
 spacy/tests/regression/test_issue8190.py      | 11 +----
 .../tests/serialize/test_serialize_config.py  |  9 +++-
 spacy/tests/serialize/test_serialize_doc.py   |  4 +-
 spacy/tests/test_architectures.py             |  1 -
 spacy/tests/test_language.py                  |  4 +-
 spacy/tests/test_scorer.py                    |  8 +++-
 spacy/tests/training/test_pretraining.py      |  7 ++-
 spacy/tests/training/test_training.py         | 14 ++++--
 spacy/tests/vocab_vectors/test_vectors.py     |  1 +
 spacy/training/batchers.py                    |  6 ++-
 spacy/training/initialize.py                  |  8 +++-
 spacy/training/loop.py                        |  3 +-
 spacy/util.py                                 |  5 +--
 46 files changed, 276 insertions(+), 136 deletions(-)

diff --git a/spacy/__init__.py b/spacy/__init__.py
index d07931cfd..f20c32eb5 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -4,6 +4,7 @@ import sys
 
 # set library-specific custom warning handling before doing anything else
 from .errors import setup_default_warnings
+
 setup_default_warnings()
 
 # These are imported as part of the API
diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 015e3a76b..ed4293902 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -139,7 +139,10 @@ def debug_model(
     upstream_component = None
     if model.has_ref("tok2vec") and "tok2vec-listener" in model.get_ref("tok2vec").name:
         upstream_component = nlp.get_pipe("tok2vec")
-    if model.has_ref("tok2vec") and "transformer-listener" in model.get_ref("tok2vec").name:
+    if (
+        model.has_ref("tok2vec")
+        and "transformer-listener" in model.get_ref("tok2vec").name
+    ):
         upstream_component = nlp.get_pipe("transformer")
     goldY = None
     for e in range(3):
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index 687d63ad2..c563f24d3 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -127,7 +127,9 @@ def evaluate(
             data["ents_per_type"] = scores["ents_per_type"]
     if f"spans_{spans_key}_per_type" in scores:
         if scores[f"spans_{spans_key}_per_type"]:
-            print_prf_per_type(msg, scores[f"spans_{spans_key}_per_type"], "SPANS", "type")
+            print_prf_per_type(
+                msg, scores[f"spans_{spans_key}_per_type"], "SPANS", "type"
+            )
             data[f"spans_{spans_key}_per_type"] = scores[f"spans_{spans_key}_per_type"]
     if "cats_f_per_type" in scores:
         if scores["cats_f_per_type"]:
diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index aa61fb9f7..78b83f2e5 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -120,7 +120,9 @@ def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
     doc (Doc): Document do parse.
     RETURNS (dict): Generated dependency parse keyed by words and arcs.
     """
-    doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes(exclude=["user_data", "user_hooks"]))
+    doc = Doc(orig_doc.vocab).from_bytes(
+        orig_doc.to_bytes(exclude=["user_data", "user_hooks"])
+    )
     if not doc.has_annotation("DEP"):
         warnings.warn(Warnings.W005)
     if options.get("collapse_phrases", False):
diff --git a/spacy/lang/bg/lex_attrs.py b/spacy/lang/bg/lex_attrs.py
index 62b69d6cc..bba3c74cd 100644
--- a/spacy/lang/bg/lex_attrs.py
+++ b/spacy/lang/bg/lex_attrs.py
@@ -22,13 +22,13 @@ _num_words = [
     "тринадесет",
     "тринайсет",
     "четиринадесет",
-    "четиринайсет"
+    "четиринайсет",
     "петнадесет",
-    "петнайсет"
+    "петнайсет",
     "шестнадесет",
     "шестнайсет",
     "седемнадесет",
-    "седемнайсет"
+    "седемнайсет",
     "осемнадесет",
     "осемнайсет",
     "деветнадесет",
@@ -36,7 +36,7 @@ _num_words = [
     "двадесет",
     "двайсет",
     "тридесет",
-    "трийсет"
+    "трийсет",
     "четиридесет",
     "четиресет",
     "петдесет",
diff --git a/spacy/lang/bg/tokenizer_exceptions.py b/spacy/lang/bg/tokenizer_exceptions.py
index defa00ef7..0b7487c64 100644
--- a/spacy/lang/bg/tokenizer_exceptions.py
+++ b/spacy/lang/bg/tokenizer_exceptions.py
@@ -58,7 +58,6 @@ _abbr_dot_exc = [
     {ORTH: "стр.", NORM: "страница"},
     {ORTH: "ул.", NORM: "улица"},
     {ORTH: "чл.", NORM: "член"},
-
 ]
 
 for abbr in _abbr_dot_exc:
diff --git a/spacy/lang/fi/tokenizer_exceptions.py b/spacy/lang/fi/tokenizer_exceptions.py
index f0161f8b3..465333b0a 100644
--- a/spacy/lang/fi/tokenizer_exceptions.py
+++ b/spacy/lang/fi/tokenizer_exceptions.py
@@ -81,16 +81,32 @@ for exc_data in [
 
 # Source: https://kaino.kotus.fi/visk/sisallys.php?p=141
 conj_contraction_bases = [
-    ("ett", "että"), ("jott", "jotta"), ("kosk", "koska"), ("mutt", "mutta"),
-    ("vaikk", "vaikka"), ("ehk", "ehkä"), ("miks", "miksi"), ("siks", "siksi"),
-    ("joll", "jos"), ("ell", "jos")
+    ("ett", "että"),
+    ("jott", "jotta"),
+    ("kosk", "koska"),
+    ("mutt", "mutta"),
+    ("vaikk", "vaikka"),
+    ("ehk", "ehkä"),
+    ("miks", "miksi"),
+    ("siks", "siksi"),
+    ("joll", "jos"),
+    ("ell", "jos"),
 ]
 conj_contraction_negations = [
-    ("en", "en"), ("et", "et"), ("ei", "ei"), ("emme", "emme"),
-    ("ette", "ette"), ("eivat", "eivät"), ("eivät", "eivät")]
+    ("en", "en"),
+    ("et", "et"),
+    ("ei", "ei"),
+    ("emme", "emme"),
+    ("ette", "ette"),
+    ("eivat", "eivät"),
+    ("eivät", "eivät"),
+]
 for (base_lower, base_norm) in conj_contraction_bases:
     for base in [base_lower, base_lower.title()]:
         for (suffix, suffix_norm) in conj_contraction_negations:
-            _exc[base + suffix] = [{ORTH: base, NORM: base_norm}, {ORTH: suffix, NORM: suffix_norm}]
+            _exc[base + suffix] = [
+                {ORTH: base, NORM: base_norm},
+                {ORTH: suffix, NORM: suffix_norm},
+            ]
 
 TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/it/lemmatizer.py b/spacy/lang/it/lemmatizer.py
index fced97d35..e44e64e3a 100644
--- a/spacy/lang/it/lemmatizer.py
+++ b/spacy/lang/it/lemmatizer.py
@@ -4,12 +4,12 @@ from ...pipeline import Lemmatizer
 from ...tokens import Token
 
 
-
 class ItalianLemmatizer(Lemmatizer):
     """This lemmatizer was adapted from the Polish one (version of April 2021).
     It implements lookup lemmatization based on the morphological lexicon
     morph-it (Baroni and Zanchetta). The table lemma_lookup with non-POS-aware
     entries is used as a backup for words that aren't handled by morph-it."""
+
     @classmethod
     def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
         if mode == "pos_lookup":
diff --git a/spacy/lang/it/tokenizer_exceptions.py b/spacy/lang/it/tokenizer_exceptions.py
index 87c2929bf..42883863b 100644
--- a/spacy/lang/it/tokenizer_exceptions.py
+++ b/spacy/lang/it/tokenizer_exceptions.py
@@ -25,7 +25,7 @@ for orth in [
     "artt.",
     "att.",
     "avv.",
-    "Avv."
+    "Avv.",
     "by-pass",
     "c.d.",
     "c/c",
diff --git a/spacy/language.py b/spacy/language.py
index c35a8c016..e1cb1cb05 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -687,9 +687,11 @@ class Language:
         if not isinstance(source, Language):
             raise ValueError(Errors.E945.format(name=source_name, source=type(source)))
         # Check vectors, with faster checks first
-        if self.vocab.vectors.shape != source.vocab.vectors.shape or \
-                self.vocab.vectors.key2row != source.vocab.vectors.key2row or \
-                self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes():
+        if (
+            self.vocab.vectors.shape != source.vocab.vectors.shape
+            or self.vocab.vectors.key2row != source.vocab.vectors.key2row
+            or self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes()
+        ):
             warnings.warn(Warnings.W113.format(name=source_name))
         if not source_name in source.component_names:
             raise KeyError(
@@ -1539,15 +1541,21 @@ class Language:
 
         # Cycle channels not to break the order of docs.
         # The received object is a batch of byte-encoded docs, so flatten them with chain.from_iterable.
-        byte_tuples = chain.from_iterable(recv.recv() for recv in cycle(bytedocs_recv_ch))
+        byte_tuples = chain.from_iterable(
+            recv.recv() for recv in cycle(bytedocs_recv_ch)
+        )
         try:
-            for i, (_, (byte_doc, byte_error)) in enumerate(zip(raw_texts, byte_tuples), 1):
+            for i, (_, (byte_doc, byte_error)) in enumerate(
+                zip(raw_texts, byte_tuples), 1
+            ):
                 if byte_doc is not None:
                     doc = Doc(self.vocab).from_bytes(byte_doc)
                     yield doc
                 elif byte_error is not None:
                     error = srsly.msgpack_loads(byte_error)
-                    self.default_error_handler(None, None, None, ValueError(Errors.E871.format(error=error)))
+                    self.default_error_handler(
+                        None, None, None, ValueError(Errors.E871.format(error=error))
+                    )
                 if i % batch_size == 0:
                     # tell `sender` that one batch was consumed.
                     sender.step()
@@ -1707,7 +1715,9 @@ class Language:
                     if "replace_listeners" in pipe_cfg:
                         for name, proc in source_nlps[model].pipeline:
                             if source_name in getattr(proc, "listening_components", []):
-                                source_nlps[model].replace_listeners(name, source_name, pipe_cfg["replace_listeners"])
+                                source_nlps[model].replace_listeners(
+                                    name, source_name, pipe_cfg["replace_listeners"]
+                                )
                                 listeners_replaced = True
                     nlp.add_pipe(source_name, source=source_nlps[model], name=pipe_name)
                     # Delete from cache if listeners were replaced
@@ -1727,12 +1737,16 @@ class Language:
         for name, proc in nlp.pipeline:
             # Remove listeners not in the pipeline
             listener_names = getattr(proc, "listening_components", [])
-            unused_listener_names = [ll for ll in listener_names if ll not in nlp.pipe_names]
+            unused_listener_names = [
+                ll for ll in listener_names if ll not in nlp.pipe_names
+            ]
             for listener_name in unused_listener_names:
                 for listener in proc.listener_map.get(listener_name, []):
                     proc.remove_listener(listener, listener_name)
 
-            for listener in getattr(proc, "listening_components", []):  # e.g. tok2vec/transformer
+            for listener in getattr(
+                proc, "listening_components", []
+            ):  # e.g. tok2vec/transformer
                 # If it's a component sourced from another pipeline, we check if
                 # the tok2vec listeners should be replaced with standalone tok2vec
                 # models (e.g. so component can be frozen without its performance
@@ -1827,7 +1841,9 @@ class Language:
                 new_config = tok2vec_cfg["model"]
                 if "replace_listener_cfg" in tok2vec_model.attrs:
                     replace_func = tok2vec_model.attrs["replace_listener_cfg"]
-                    new_config = replace_func(tok2vec_cfg["model"], pipe_cfg["model"]["tok2vec"])
+                    new_config = replace_func(
+                        tok2vec_cfg["model"], pipe_cfg["model"]["tok2vec"]
+                    )
                 util.set_dot_to_object(pipe_cfg, listener_path, new_config)
             # Go over the listener layers and replace them
             for listener in pipe_listeners:
@@ -1866,8 +1882,11 @@ class Language:
         util.to_disk(path, serializers, exclude)
 
     def from_disk(
-        self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList(),
-            overrides: Dict[str, Any] = SimpleFrozenDict(),
+        self,
+        path: Union[str, Path],
+        *,
+        exclude: Iterable[str] = SimpleFrozenList(),
+        overrides: Dict[str, Any] = SimpleFrozenDict(),
     ) -> "Language":
         """Loads state from a directory. Modifies the object in place and
         returns it. If the saved `Language` object contains a model, the
diff --git a/spacy/lookups.py b/spacy/lookups.py
index f635f0dcf..025afa04b 100644
--- a/spacy/lookups.py
+++ b/spacy/lookups.py
@@ -12,9 +12,7 @@ from .strings import get_string_id
 UNSET = object()
 
 
-def load_lookups(
-    lang: str, tables: List[str], strict: bool = True
-) -> 'Lookups':
+def load_lookups(lang: str, tables: List[str], strict: bool = True) -> "Lookups":
     """Load the data from the spacy-lookups-data package for a given language,
     if available. Returns an empty `Lookups` container if there's no data or if the package
     is not installed.
diff --git a/spacy/ml/models/__init__.py b/spacy/ml/models/__init__.py
index daf47ef27..9b7628f0e 100644
--- a/spacy/ml/models/__init__.py
+++ b/spacy/ml/models/__init__.py
@@ -1,7 +1,7 @@
 from .entity_linker import *  # noqa
 from .multi_task import *  # noqa
 from .parser import *  # noqa
-from .spancat import * # noqa
+from .spancat import *  # noqa
 from .tagger import *  # noqa
 from .textcat import *  # noqa
 from .tok2vec import *  # noqa
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index a8c80df05..1c7f0ac8a 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -309,9 +309,7 @@ class EntityLinker(TrainablePipe):
                     assert sent_index >= 0
                     # get n_neighbour sentences, clipped to the length of the document
                     start_sentence = max(0, sent_index - self.n_sents)
-                    end_sentence = min(
-                        len(sentences) - 1, sent_index + self.n_sents
-                    )
+                    end_sentence = min(len(sentences) - 1, sent_index + self.n_sents)
                     start_token = sentences[start_sentence].start
                     end_token = sentences[end_sentence].end
                     sent_doc = doc[start_token:end_token].as_doc()
@@ -337,22 +335,16 @@ class EntityLinker(TrainablePipe):
                         else:
                             random.shuffle(candidates)
                             # set all prior probabilities to 0 if incl_prior=False
-                            prior_probs = xp.asarray(
-                                [c.prior_prob for c in candidates]
-                            )
+                            prior_probs = xp.asarray([c.prior_prob for c in candidates])
                             if not self.incl_prior:
-                                prior_probs = xp.asarray(
-                                    [0.0 for _ in candidates]
-                                )
+                                prior_probs = xp.asarray([0.0 for _ in candidates])
                             scores = prior_probs
                             # add in similarity from the context
                             if self.incl_context:
                                 entity_encodings = xp.asarray(
                                     [c.entity_vector for c in candidates]
                                 )
-                                entity_norm = xp.linalg.norm(
-                                    entity_encodings, axis=1
-                                )
+                                entity_norm = xp.linalg.norm(entity_encodings, axis=1)
                                 if len(entity_encodings) != len(prior_probs):
                                     raise RuntimeError(
                                         Errors.E147.format(
@@ -361,14 +353,12 @@ class EntityLinker(TrainablePipe):
                                         )
                                     )
                                 # cosine similarity
-                                sims = xp.dot(
-                                    entity_encodings, sentence_encoding_t
-                                ) / (sentence_norm * entity_norm)
+                                sims = xp.dot(entity_encodings, sentence_encoding_t) / (
+                                    sentence_norm * entity_norm
+                                )
                                 if sims.shape != prior_probs.shape:
                                     raise ValueError(Errors.E161)
-                                scores = (
-                                    prior_probs + sims - (prior_probs * sims)
-                                )
+                                scores = prior_probs + sims - (prior_probs * sims)
                             # TODO: thresholding
                             best_index = scores.argmax().item()
                             best_candidate = candidates[best_index]
diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py
index 761ff12bf..ea14dae69 100644
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@@ -278,9 +278,7 @@ class EntityRuler(Pipe):
                 if self == pipe:
                     current_index = i
                     break
-            subsequent_pipes = [
-                pipe for pipe in self.nlp.pipe_names[current_index :]
-            ]
+            subsequent_pipes = [pipe for pipe in self.nlp.pipe_names[current_index:]]
         except ValueError:
             subsequent_pipes = []
         with self.nlp.select_pipes(disable=subsequent_pipes):
diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index f59d8ddc9..fdf6f9f5e 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -61,7 +61,7 @@ def build_ngram_suggester(sizes: List[int]) -> Callable[[List[Doc]], Ragged]:
             length = 0
             for size in sizes:
                 if size <= len(doc):
-                    starts_size = starts[:len(doc) - (size - 1)]
+                    starts_size = starts[: len(doc) - (size - 1)]
                     spans.append(ops.xp.hstack((starts_size, starts_size + size)))
                     length += spans[-1].shape[0]
                 if spans:
@@ -70,7 +70,7 @@ def build_ngram_suggester(sizes: List[int]) -> Callable[[List[Doc]], Ragged]:
         if len(spans) > 0:
             output = Ragged(ops.xp.vstack(spans), ops.asarray(lengths, dtype="i"))
         else:
-            output = Ragged(ops.xp.zeros((0,0)), ops.asarray(lengths, dtype="i"))
+            output = Ragged(ops.xp.zeros((0, 0)), ops.asarray(lengths, dtype="i"))
 
         assert output.dataXd.ndim == 2
         return output
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 0d3bbdf35..72a6dcd61 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -299,7 +299,9 @@ class TextCategorizer(TrainablePipe):
         self._allow_extra_label()
         self.cfg["labels"].append(label)
         if self.model and "resize_output" in self.model.attrs:
-            self.model = self.model.attrs["resize_output"](self.model, len(self.cfg["labels"]))
+            self.model = self.model.attrs["resize_output"](
+                self.model, len(self.cfg["labels"])
+            )
         self.vocab.strings.add(label)
         return 1
 
diff --git a/spacy/scorer.py b/spacy/scorer.py
index 25df44f14..f4ccb2269 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -365,7 +365,9 @@ class Scorer:
                 gold_spans.add(gold_span)
                 gold_per_type[span.label_].add(gold_span)
             pred_per_type = {label: set() for label in labels}
-            for span in example.get_aligned_spans_x2y(getter(pred_doc, attr), allow_overlap):
+            for span in example.get_aligned_spans_x2y(
+                getter(pred_doc, attr), allow_overlap
+            ):
                 if labeled:
                     pred_span = (span.label_, span.start, span.end - 1)
                 else:
@@ -381,10 +383,10 @@ class Scorer:
             score.score_set(pred_spans, gold_spans)
         # Assemble final result
         final_scores = {
-                f"{attr}_p": None,
-                f"{attr}_r": None,
-                f"{attr}_f": None,
-            }
+            f"{attr}_p": None,
+            f"{attr}_r": None,
+            f"{attr}_f": None,
+        }
         if labeled:
             final_scores[f"{attr}_per_type"] = None
         if len(score) > 0:
@@ -392,7 +394,9 @@ class Scorer:
             final_scores[f"{attr}_r"] = score.recall
             final_scores[f"{attr}_f"] = score.fscore
             if labeled:
-                final_scores[f"{attr}_per_type"] = {k: v.to_dict() for k, v in score_per_type.items()}
+                final_scores[f"{attr}_per_type"] = {
+                    k: v.to_dict() for k, v in score_per_type.items()
+                }
         return final_scores
 
     @staticmethod
diff --git a/spacy/tests/lang/bg/test_text.py b/spacy/tests/lang/bg/test_text.py
index 3d35ba997..63ae4ffd8 100644
--- a/spacy/tests/lang/bg/test_text.py
+++ b/spacy/tests/lang/bg/test_text.py
@@ -1,6 +1,7 @@
 import pytest
 from spacy.lang.bg.lex_attrs import like_num
 
+
 @pytest.mark.parametrize(
     "word,match",
     [
diff --git a/spacy/tests/lang/fi/test_tokenizer.py b/spacy/tests/lang/fi/test_tokenizer.py
index b2f23f7fd..dc40e18a3 100644
--- a/spacy/tests/lang/fi/test_tokenizer.py
+++ b/spacy/tests/lang/fi/test_tokenizer.py
@@ -40,20 +40,21 @@ CONTRACTION_TESTS = [
     (
         "Päätimme ettemme tule.",
         ["Päätimme", "ett", "emme", "tule", "."],
-        ["päätimme", "että", "emme", "tule", "."]
+        ["päätimme", "että", "emme", "tule", "."],
     ),
     (
         "Miksei puhuttaisi?",
         ["Miks", "ei", "puhuttaisi", "?"],
-        ["miksi", "ei", "puhuttaisi", "?"]
+        ["miksi", "ei", "puhuttaisi", "?"],
     ),
     (
         "He tottelivat vaikkeivat halunneet",
         ["He", "tottelivat", "vaikk", "eivat", "halunneet"],
-        ["he", "tottelivat", "vaikka", "eivät", "halunneet"]
+        ["he", "tottelivat", "vaikka", "eivät", "halunneet"],
     ),
 ]
 
+
 @pytest.mark.parametrize("text,expected_tokens", ABBREVIATION_TESTS)
 def test_fi_tokenizer_abbreviations(fi_tokenizer, text, expected_tokens):
     tokens = fi_tokenizer(text)
diff --git a/spacy/tests/matcher/test_matcher_logic.py b/spacy/tests/matcher/test_matcher_logic.py
index 36708edd0..dcbe1ff33 100644
--- a/spacy/tests/matcher/test_matcher_logic.py
+++ b/spacy/tests/matcher/test_matcher_logic.py
@@ -255,13 +255,23 @@ def test_matcher_with_alignments_nongreedy(en_vocab):
         (0, "aaab", "a* b", [[0, 1], [0, 0, 1], [0, 0, 0, 1], [1]]),
         (1, "baab", "b a* b", [[0, 1, 1, 2]]),
         (2, "aaab", "a a a b", [[0, 1, 2, 3]]),
-        (3, "aaab", "a+ b",   [[0, 1], [0, 0, 1], [0, 0, 0, 1]]),
+        (3, "aaab", "a+ b", [[0, 1], [0, 0, 1], [0, 0, 0, 1]]),
         (4, "aaba", "a+ b a+", [[0, 1, 2], [0, 0, 1, 2]]),
-        (5, "aabaa", "a+ b a+", [[0, 1, 2], [0, 0, 1, 2], [0, 0, 1, 2, 2], [0, 1, 2, 2] ]),
+        (
+            5,
+            "aabaa",
+            "a+ b a+",
+            [[0, 1, 2], [0, 0, 1, 2], [0, 0, 1, 2, 2], [0, 1, 2, 2]],
+        ),
         (6, "aaba", "a+ b a*", [[0, 1], [0, 0, 1], [0, 0, 1, 2], [0, 1, 2]]),
         (7, "aaaa", "a*", [[0], [0, 0], [0, 0, 0], [0, 0, 0, 0]]),
         (8, "baab", "b a* b b*", [[0, 1, 1, 2]]),
-        (9, "aabb", "a* b* a*", [[1], [2], [2, 2], [0, 1], [0, 0, 1], [0, 0, 1, 1], [0, 1, 1], [1, 1]]),
+        (
+            9,
+            "aabb",
+            "a* b* a*",
+            [[1], [2], [2, 2], [0, 1], [0, 0, 1], [0, 0, 1, 1], [0, 1, 1], [1, 1]],
+        ),
         (10, "aaab", "a+ a+ a b", [[0, 1, 2, 3]]),
         (11, "aaab", "a+ a+ a+ b", [[0, 1, 2, 3]]),
         (12, "aaab", "a+ a a b", [[0, 1, 2, 3]]),
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index eccfbf174..00617df56 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -557,7 +557,11 @@ def test_neg_annotation(neg_key):
     ner.add_label("PERSON")
     ner.add_label("ORG")
     example = Example.from_dict(neg_doc, {"entities": [(7, 17, "PERSON")]})
-    example.reference.spans[neg_key] = [Span(neg_doc, 2, 4, "ORG"), Span(neg_doc, 2, 3, "PERSON"), Span(neg_doc, 1, 4, "PERSON")]
+    example.reference.spans[neg_key] = [
+        Span(neg_doc, 2, 4, "ORG"),
+        Span(neg_doc, 2, 3, "PERSON"),
+        Span(neg_doc, 1, 4, "PERSON"),
+    ]
 
     optimizer = nlp.initialize()
     for i in range(2):
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 13c8cb72e..78259be6a 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -254,7 +254,9 @@ def test_nel_nsents(nlp):
     """Test that n_sents can be set through the configuration"""
     entity_linker = nlp.add_pipe("entity_linker", config={})
     assert entity_linker.n_sents == 0
-    entity_linker = nlp.replace_pipe("entity_linker", "entity_linker", config={"n_sents": 2})
+    entity_linker = nlp.replace_pipe(
+        "entity_linker", "entity_linker", config={"n_sents": 2}
+    )
     assert entity_linker.n_sents == 2
 
 
@@ -596,7 +598,9 @@ def test_kb_to_bytes():
     kb_1.add_entity(entity="Q66", freq=9, entity_vector=[1, 2, 3])
     kb_1.add_alias(alias="Russ Cochran", entities=["Q2146908"], probabilities=[0.8])
     kb_1.add_alias(alias="Boeing", entities=["Q66"], probabilities=[0.5])
-    kb_1.add_alias(alias="Randomness", entities=["Q66", "Q2146908"], probabilities=[0.1, 0.2])
+    kb_1.add_alias(
+        alias="Randomness", entities=["Q66", "Q2146908"], probabilities=[0.1, 0.2]
+    )
     assert kb_1.contains_alias("Russ Cochran")
     kb_bytes = kb_1.to_bytes()
     kb_2 = KnowledgeBase(nlp.vocab, entity_vector_length=3)
@@ -611,8 +615,12 @@ def test_kb_to_bytes():
     assert kb_2.contains_alias("Russ Cochran")
     assert kb_1.get_size_aliases() == kb_2.get_size_aliases()
     assert kb_1.get_alias_strings() == kb_2.get_alias_strings()
-    assert len(kb_1.get_alias_candidates("Russ Cochran")) == len(kb_2.get_alias_candidates("Russ Cochran"))
-    assert len(kb_1.get_alias_candidates("Randomness")) == len(kb_2.get_alias_candidates("Randomness"))
+    assert len(kb_1.get_alias_candidates("Russ Cochran")) == len(
+        kb_2.get_alias_candidates("Russ Cochran")
+    )
+    assert len(kb_1.get_alias_candidates("Randomness")) == len(
+        kb_2.get_alias_candidates("Randomness")
+    )
 
 
 def test_nel_to_bytes():
@@ -640,7 +648,9 @@ def test_nel_to_bytes():
     kb_2 = nlp_2.get_pipe("entity_linker").kb
     assert kb_2.contains_alias("Russ Cochran")
     assert kb_2.get_vector("Q2146908") == [6, -4, 3]
-    assert_almost_equal(kb_2.get_prior_prob(entity="Q2146908", alias="Russ Cochran"), 0.8)
+    assert_almost_equal(
+        kb_2.get_prior_prob(entity="Q2146908", alias="Russ Cochran"), 0.8
+    )
 
 
 def test_scorer_links():
diff --git a/spacy/tests/pipeline/test_models.py b/spacy/tests/pipeline/test_models.py
index 302c307e2..e3fd28d0f 100644
--- a/spacy/tests/pipeline/test_models.py
+++ b/spacy/tests/pipeline/test_models.py
@@ -82,7 +82,9 @@ def util_batch_unbatch_docs_list(
         Y_batched = model.predict(in_data)
         Y_not_batched = [model.predict([u])[0] for u in in_data]
         for i in range(len(Y_batched)):
-            assert_almost_equal(OPS.to_numpy(Y_batched[i]), OPS.to_numpy(Y_not_batched[i]), decimal=4)
+            assert_almost_equal(
+                OPS.to_numpy(Y_batched[i]), OPS.to_numpy(Y_not_batched[i]), decimal=4
+            )
 
 
 def util_batch_unbatch_docs_array(
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index b28886925..f1f0c8a6e 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -351,9 +351,21 @@ def test_language_factories_invalid():
         ([{"a": 0.5, "b": 0.5}, {"b": 1.0}], {"a": 0.0}, {"a": 0.0, "b": 1.0}),
         ([{"a": 0.0, "b": 0.0}, {"c": 0.0}], {}, {"a": 0.0, "b": 0.0, "c": 0.0}),
         ([{"a": 0.0, "b": 0.0}, {"c": 1.0}], {}, {"a": 0.0, "b": 0.0, "c": 1.0}),
-        ([{"a": 0.0, "b": 0.0}, {"c": 0.0}], {"c": 0.2}, {"a": 0.0, "b": 0.0, "c": 1.0}),
-        ([{"a": 0.5, "b": 0.5, "c": 1.0, "d": 1.0}], {"a": 0.0, "b": 0.0}, {"a": 0.0, "b": 0.0, "c": 0.5, "d": 0.5}),
-        ([{"a": 0.5, "b": 0.5, "c": 1.0, "d": 1.0}], {"a": 0.0, "b": 0.0, "f": 0.0}, {"a": 0.0, "b": 0.0, "c": 0.5, "d": 0.5, "f": 0.0}),
+        (
+            [{"a": 0.0, "b": 0.0}, {"c": 0.0}],
+            {"c": 0.2},
+            {"a": 0.0, "b": 0.0, "c": 1.0},
+        ),
+        (
+            [{"a": 0.5, "b": 0.5, "c": 1.0, "d": 1.0}],
+            {"a": 0.0, "b": 0.0},
+            {"a": 0.0, "b": 0.0, "c": 0.5, "d": 0.5},
+        ),
+        (
+            [{"a": 0.5, "b": 0.5, "c": 1.0, "d": 1.0}],
+            {"a": 0.0, "b": 0.0, "f": 0.0},
+            {"a": 0.0, "b": 0.0, "c": 0.5, "d": 0.5, "f": 0.0},
+        ),
     ],
 )
 def test_language_factories_combine_score_weights(weights, override, expected):
diff --git a/spacy/tests/pipeline/test_pipe_methods.py b/spacy/tests/pipeline/test_pipe_methods.py
index 0b84db4c0..e530cb5c4 100644
--- a/spacy/tests/pipeline/test_pipe_methods.py
+++ b/spacy/tests/pipeline/test_pipe_methods.py
@@ -446,7 +446,12 @@ def test_update_with_annotates():
     for text in texts:
         examples.append(Example(nlp.make_doc(text), nlp.make_doc(text)))
 
-    for components_to_annotate in [[], [f"{name}1"], [f"{name}1", f"{name}2"], [f"{name}2", f"{name}1"]]:
+    for components_to_annotate in [
+        [],
+        [f"{name}1"],
+        [f"{name}1", f"{name}2"],
+        [f"{name}2", f"{name}1"],
+    ]:
         for key in results:
             results[key] = ""
         nlp = English(vocab=nlp.vocab)
diff --git a/spacy/tests/pipeline/test_spancat.py b/spacy/tests/pipeline/test_spancat.py
index d5e5db63c..f70df7478 100644
--- a/spacy/tests/pipeline/test_spancat.py
+++ b/spacy/tests/pipeline/test_spancat.py
@@ -79,10 +79,7 @@ def test_ngram_suggester(en_tokenizer):
             assert spans.shape[0] == len(spans_set)
             offset += ngrams.lengths[i]
         # the number of spans is correct
-        assert_equal(
-            ngrams.lengths,
-            [max(0, len(doc) - (size - 1)) for doc in docs]
-        )
+        assert_equal(ngrams.lengths, [max(0, len(doc) - (size - 1)) for doc in docs])
 
     # test 1-3-gram suggestions
     ngram_suggester = registry.misc.get("ngram_suggester.v1")(sizes=[1, 2, 3])
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 6f1d22eba..fdb44b412 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -131,7 +131,7 @@ def test_implicit_label(name, get_examples):
     nlp.initialize(get_examples=get_examples(nlp))
 
 
-#fmt: off
+# fmt: off
 @pytest.mark.parametrize(
     "name,textcat_config",
     [
@@ -150,7 +150,7 @@ def test_implicit_label(name, get_examples):
         ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
     ],
 )
-#fmt: on
+# fmt: on
 def test_no_resize(name, textcat_config):
     """The old textcat architectures weren't resizable"""
     nlp = Language()
@@ -165,7 +165,7 @@ def test_no_resize(name, textcat_config):
         textcat.add_label("NEUTRAL")
 
 
-#fmt: off
+# fmt: off
 @pytest.mark.parametrize(
     "name,textcat_config",
     [
@@ -179,7 +179,7 @@ def test_no_resize(name, textcat_config):
         ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
     ],
 )
-#fmt: on
+# fmt: on
 def test_resize(name, textcat_config):
     """The new textcat architectures are resizable"""
     nlp = Language()
@@ -194,7 +194,7 @@ def test_resize(name, textcat_config):
     assert textcat.model.maybe_get_dim("nO") in [3, None]
 
 
-#fmt: off
+# fmt: off
 @pytest.mark.parametrize(
     "name,textcat_config",
     [
@@ -208,7 +208,7 @@ def test_resize(name, textcat_config):
         ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
     ],
 )
-#fmt: on
+# fmt: on
 def test_resize_same_results(name, textcat_config):
     # Ensure that the resized textcat classifiers still produce the same results for old labels
     fix_random_seed(0)
@@ -511,7 +511,9 @@ def test_textcat_threshold():
     macro_f = scores["cats_score"]
     assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0
 
-    scores = nlp.evaluate(train_examples, scorer_cfg={"threshold": 0, "positive_label": "POSITIVE"})
+    scores = nlp.evaluate(
+        train_examples, scorer_cfg={"threshold": 0, "positive_label": "POSITIVE"}
+    )
     pos_f = scores["cats_score"]
     assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0
     assert pos_f > macro_f
diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py
index 809a79dd6..eeea906bb 100644
--- a/spacy/tests/pipeline/test_tok2vec.py
+++ b/spacy/tests/pipeline/test_tok2vec.py
@@ -129,8 +129,14 @@ cfg_string = """
     """
 
 TRAIN_DATA = [
-    ("I like green eggs", {"tags": ["N", "V", "J", "N"], "cats": {"preference": 1.0, "imperative": 0.0}}),
-    ("Eat blue ham", {"tags": ["V", "J", "N"], "cats": {"preference": 0.0, "imperative": 1.0}}),
+    (
+        "I like green eggs",
+        {"tags": ["N", "V", "J", "N"], "cats": {"preference": 1.0, "imperative": 0.0}},
+    ),
+    (
+        "Eat blue ham",
+        {"tags": ["V", "J", "N"], "cats": {"preference": 0.0, "imperative": 1.0}},
+    ),
 ]
 
 
@@ -405,5 +411,5 @@ def test_tok2vec_listeners_textcat():
     cats1 = docs[1].cats
     assert cats1["preference"] > 0.1
     assert cats1["imperative"] < 0.9
-    assert([t.tag_ for t in docs[0]] == ["V", "J", "N"])
-    assert([t.tag_ for t in docs[1]] == ["N", "V", "J", "N"])
+    assert [t.tag_ for t in docs[0]] == ["V", "J", "N"]
+    assert [t.tag_ for t in docs[1]] == ["N", "V", "J", "N"]
diff --git a/spacy/tests/regression/test_issue6501-7000.py b/spacy/tests/regression/test_issue6501-7000.py
index 3007f1dc6..f57e4085c 100644
--- a/spacy/tests/regression/test_issue6501-7000.py
+++ b/spacy/tests/regression/test_issue6501-7000.py
@@ -152,7 +152,8 @@ labels = ['label1', 'label2']
 
 
 @pytest.mark.parametrize(
-    "component_name", ["textcat", "textcat_multilabel"],
+    "component_name",
+    ["textcat", "textcat_multilabel"],
 )
 def test_issue6908(component_name):
     """Test intializing textcat with labels in a list"""
diff --git a/spacy/tests/regression/test_issue7056.py b/spacy/tests/regression/test_issue7056.py
index 541144877..e94a975d4 100644
--- a/spacy/tests/regression/test_issue7056.py
+++ b/spacy/tests/regression/test_issue7056.py
@@ -8,8 +8,7 @@ def test_issue7056():
     sentence segmentation errors."""
     vocab = Vocab()
     ae = ArcEager(
-        vocab.strings,
-        ArcEager.get_actions(left_labels=["amod"], right_labels=["pobj"])
+        vocab.strings, ArcEager.get_actions(left_labels=["amod"], right_labels=["pobj"])
     )
     doc = Doc(vocab, words="Severe pain , after trauma".split())
     state = ae.init_batch([doc])[0]
diff --git a/spacy/tests/regression/test_issue7062.py b/spacy/tests/regression/test_issue7062.py
index 88e5d2520..66bf09523 100644
--- a/spacy/tests/regression/test_issue7062.py
+++ b/spacy/tests/regression/test_issue7062.py
@@ -41,7 +41,7 @@ def test_partial_links():
     nlp.add_pipe("sentencizer", first=True)
     patterns = [
         {"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]},
-        {"label": "ORG", "pattern": [{"LOWER": "ec"}, {"LOWER": "comics"}]}
+        {"label": "ORG", "pattern": [{"LOWER": "ec"}, {"LOWER": "comics"}]},
     ]
     ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
     ruler.add_patterns(patterns)
diff --git a/spacy/tests/regression/test_issue7065.py b/spacy/tests/regression/test_issue7065.py
index 63d36552a..d40763c63 100644
--- a/spacy/tests/regression/test_issue7065.py
+++ b/spacy/tests/regression/test_issue7065.py
@@ -8,7 +8,17 @@ def test_issue7065():
     nlp = English()
     nlp.add_pipe("sentencizer")
     ruler = nlp.add_pipe("entity_ruler")
-    patterns = [{"label": "THING", "pattern": [{"LOWER": "symphony"}, {"LOWER": "no"}, {"LOWER": "."}, {"LOWER": "8"}]}]
+    patterns = [
+        {
+            "label": "THING",
+            "pattern": [
+                {"LOWER": "symphony"},
+                {"LOWER": "no"},
+                {"LOWER": "."},
+                {"LOWER": "8"},
+            ],
+        }
+    ]
     ruler.add_patterns(patterns)
 
     doc = nlp(text)
@@ -28,11 +38,15 @@ def test_issue7065_b():
 
     text = "Mahler 's Symphony No. 8 was beautiful."
     entities = [(0, 6, "PERSON"), (10, 24, "WORK")]
-    links = {(0, 6): {"Q7304": 1.0, "Q270853": 0.0},
-             (10, 24): {"Q7304": 0.0, "Q270853": 1.0}}
+    links = {
+        (0, 6): {"Q7304": 1.0, "Q270853": 0.0},
+        (10, 24): {"Q7304": 0.0, "Q270853": 1.0},
+    }
     sent_starts = [1, -1, 0, 0, 0, 0, 0, 0, 0]
     doc = nlp(text)
-    example = Example.from_dict(doc, {"entities": entities, "links": links, "sent_starts": sent_starts})
+    example = Example.from_dict(
+        doc, {"entities": entities, "links": links, "sent_starts": sent_starts}
+    )
     train_examples = [example]
 
     def create_kb(vocab):
@@ -65,7 +79,15 @@ def test_issue7065_b():
     # Add a custom rule-based component to mimick NER
     patterns = [
         {"label": "PERSON", "pattern": [{"LOWER": "mahler"}]},
-        {"label": "WORK", "pattern": [{"LOWER": "symphony"}, {"LOWER": "no"}, {"LOWER": "."}, {"LOWER": "8"}]}
+        {
+            "label": "WORK",
+            "pattern": [
+                {"LOWER": "symphony"},
+                {"LOWER": "no"},
+                {"LOWER": "."},
+                {"LOWER": "8"},
+            ],
+        },
     ]
     ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
     ruler.add_patterns(patterns)
diff --git a/spacy/tests/regression/test_issue8168.py b/spacy/tests/regression/test_issue8168.py
index cf5a9fc7a..fbddf643c 100644
--- a/spacy/tests/regression/test_issue8168.py
+++ b/spacy/tests/regression/test_issue8168.py
@@ -1,11 +1,22 @@
 from spacy.lang.en import English
 
+
 def test_issue8168():
     nlp = English()
     ruler = nlp.add_pipe("entity_ruler")
-    patterns = [{"label": "ORG", "pattern": "Apple"},
-                {"label": "GPE", "pattern": [{"LOWER": "san"}, {"LOWER": "francisco"}], "id": "san-francisco"},
-                {"label": "GPE", "pattern": [{"LOWER": "san"}, {"LOWER": "fran"}], "id": "san-francisco"}]
+    patterns = [
+        {"label": "ORG", "pattern": "Apple"},
+        {
+            "label": "GPE",
+            "pattern": [{"LOWER": "san"}, {"LOWER": "francisco"}],
+            "id": "san-francisco",
+        },
+        {
+            "label": "GPE",
+            "pattern": [{"LOWER": "san"}, {"LOWER": "fran"}],
+            "id": "san-francisco",
+        },
+    ]
     ruler.add_patterns(patterns)
 
-    assert ruler._ent_ids == {8043148519967183733: ('GPE', 'san-francisco')}
\ No newline at end of file
+    assert ruler._ent_ids == {8043148519967183733: ("GPE", "san-francisco")}
diff --git a/spacy/tests/regression/test_issue8190.py b/spacy/tests/regression/test_issue8190.py
index 800a1638d..6ddbe53e0 100644
--- a/spacy/tests/regression/test_issue8190.py
+++ b/spacy/tests/regression/test_issue8190.py
@@ -9,20 +9,13 @@ def test_issue8190():
         "nlp": {
             "lang": "en",
         },
-        "custom": {
-            "key": "value"
-        }
-
+        "custom": {"key": "value"},
     }
     source_nlp = English.from_config(source_cfg)
     with make_tempdir() as dir_path:
         # We need to create a loadable source pipeline
         source_path = dir_path / "test_model"
         source_nlp.to_disk(source_path)
-        nlp = spacy.load(source_path, config={
-            "custom": {
-                "key": "updated_value"
-            }
-        })
+        nlp = spacy.load(source_path, config={"custom": {"key": "updated_value"}})
 
         assert nlp.config["custom"]["key"] == "updated_value"
diff --git a/spacy/tests/serialize/test_serialize_config.py b/spacy/tests/serialize/test_serialize_config.py
index 2cd0e4ab6..114d4865c 100644
--- a/spacy/tests/serialize/test_serialize_config.py
+++ b/spacy/tests/serialize/test_serialize_config.py
@@ -4,7 +4,12 @@ import spacy
 from spacy.lang.en import English
 from spacy.lang.de import German
 from spacy.language import Language, DEFAULT_CONFIG, DEFAULT_CONFIG_PRETRAIN_PATH
-from spacy.util import registry, load_model_from_config, load_config, load_config_from_str
+from spacy.util import (
+    registry,
+    load_model_from_config,
+    load_config,
+    load_config_from_str,
+)
 from spacy.ml.models import build_Tok2Vec_model, build_tb_parser_model
 from spacy.ml.models import MultiHashEmbed, MaxoutWindowEncoder
 from spacy.schemas import ConfigSchema, ConfigSchemaPretrain
@@ -493,4 +498,4 @@ def test_hyphen_in_config():
             self.punctuation = punctuation
 
     nlp = English.from_config(load_config_from_str(hyphen_config_str))
-    assert nlp.get_pipe("my_punctual_component").punctuation == ['?', '-']
+    assert nlp.get_pipe("my_punctual_component").punctuation == ["?", "-"]
diff --git a/spacy/tests/serialize/test_serialize_doc.py b/spacy/tests/serialize/test_serialize_doc.py
index 5ce2549aa..e51c7f45b 100644
--- a/spacy/tests/serialize/test_serialize_doc.py
+++ b/spacy/tests/serialize/test_serialize_doc.py
@@ -64,7 +64,9 @@ def test_serialize_doc_span_groups(en_vocab):
 
 
 def test_serialize_doc_bin():
-    doc_bin = DocBin(attrs=["LEMMA", "ENT_IOB", "ENT_TYPE", "NORM", "ENT_ID"], store_user_data=True)
+    doc_bin = DocBin(
+        attrs=["LEMMA", "ENT_IOB", "ENT_TYPE", "NORM", "ENT_ID"], store_user_data=True
+    )
     texts = ["Some text", "Lots of texts...", "..."]
     cats = {"A": 0.5}
     nlp = English()
diff --git a/spacy/tests/test_architectures.py b/spacy/tests/test_architectures.py
index c9e451471..26eabd4e5 100644
--- a/spacy/tests/test_architectures.py
+++ b/spacy/tests/test_architectures.py
@@ -5,7 +5,6 @@ from catalogue import RegistryError
 
 
 def test_get_architecture():
-
     @registry.architectures("my_test_function")
     def create_model(nr_in, nr_out):
         return Linear(nr_in, nr_out)
diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py
index 916247d4d..57ec4bbb8 100644
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@@ -143,7 +143,9 @@ def sample_vectors():
 
 @pytest.fixture
 def nlp2(nlp, sample_vectors):
-    Language.component("test_language_vector_modification_pipe", func=vector_modification_pipe)
+    Language.component(
+        "test_language_vector_modification_pipe", func=vector_modification_pipe
+    )
     Language.component("test_language_userdata_pipe", func=userdata_pipe)
     Language.component("test_language_ner_pipe", func=ner_pipe)
     add_vecs_to_vocab(nlp.vocab, sample_vectors)
diff --git a/spacy/tests/test_scorer.py b/spacy/tests/test_scorer.py
index c044d8afe..16cc97f6d 100644
--- a/spacy/tests/test_scorer.py
+++ b/spacy/tests/test_scorer.py
@@ -444,7 +444,9 @@ def test_score_spans():
     assert f"{key}_per_type" in scores
 
     # Discard labels from the evaluation
-    scores = Scorer.score_spans([eg], attr=key, getter=span_getter, allow_overlap=True, labeled=False)
+    scores = Scorer.score_spans(
+        [eg], attr=key, getter=span_getter, allow_overlap=True, labeled=False
+    )
     assert scores[f"{key}_p"] == 1.0
     assert scores[f"{key}_r"] == 1.0
     assert f"{key}_per_type" not in scores
@@ -467,4 +469,6 @@ def test_prf_score():
     assert (c.precision, c.recall, c.fscore) == approx((0.25, 0.5, 0.33333333))
 
     a += b
-    assert (a.precision, a.recall, a.fscore) == approx((c.precision, c.recall, c.fscore))
\ No newline at end of file
+    assert (a.precision, a.recall, a.fscore) == approx(
+        (c.precision, c.recall, c.fscore)
+    )
diff --git a/spacy/tests/training/test_pretraining.py b/spacy/tests/training/test_pretraining.py
index bd8810a5c..8ee54b544 100644
--- a/spacy/tests/training/test_pretraining.py
+++ b/spacy/tests/training/test_pretraining.py
@@ -278,7 +278,9 @@ def test_pretraining_training():
         filled = filled.interpolate()
         P = filled["pretraining"]
         nlp_base = init_nlp(filled)
-        model_base = nlp_base.get_pipe(P["component"]).model.get_ref(P["layer"]).get_ref("embed")
+        model_base = (
+            nlp_base.get_pipe(P["component"]).model.get_ref(P["layer"]).get_ref("embed")
+        )
         embed_base = None
         for node in model_base.walk():
             if node.name == "hashembed":
@@ -331,11 +333,12 @@ def write_sample_training(tmp_dir):
 
 def write_vectors_model(tmp_dir):
     import numpy
+
     vocab = Vocab()
     vector_data = {
         "dog": numpy.random.uniform(-1, 1, (300,)),
         "cat": numpy.random.uniform(-1, 1, (300,)),
-        "orange": numpy.random.uniform(-1, 1, (300,))
+        "orange": numpy.random.uniform(-1, 1, (300,)),
     }
     for word, vector in vector_data.items():
         vocab.set_vector(word, vector)
diff --git a/spacy/tests/training/test_training.py b/spacy/tests/training/test_training.py
index 0ea5f0fcc..cd428be15 100644
--- a/spacy/tests/training/test_training.py
+++ b/spacy/tests/training/test_training.py
@@ -434,8 +434,14 @@ def test_aligned_spans_y2x_overlap(en_vocab, en_tokenizer):
     gold_doc = nlp.make_doc(text)
     spans = []
     prefix = "I flew to "
-    spans.append(gold_doc.char_span(len(prefix), len(prefix + "San Francisco"), label="CITY"))
-    spans.append(gold_doc.char_span(len(prefix), len(prefix + "San Francisco Valley"), label="VALLEY"))
+    spans.append(
+        gold_doc.char_span(len(prefix), len(prefix + "San Francisco"), label="CITY")
+    )
+    spans.append(
+        gold_doc.char_span(
+            len(prefix), len(prefix + "San Francisco Valley"), label="VALLEY"
+        )
+    )
     spans_key = "overlap_ents"
     gold_doc.spans[spans_key] = spans
     example = Example(doc, gold_doc)
@@ -443,7 +449,9 @@ def test_aligned_spans_y2x_overlap(en_vocab, en_tokenizer):
     assert [(ent.start, ent.end) for ent in spans_gold] == [(3, 5), (3, 6)]
 
     # Ensure that 'get_aligned_spans_y2x' has the aligned entities correct
-    spans_y2x_no_overlap = example.get_aligned_spans_y2x(spans_gold, allow_overlap=False)
+    spans_y2x_no_overlap = example.get_aligned_spans_y2x(
+        spans_gold, allow_overlap=False
+    )
     assert [(ent.start, ent.end) for ent in spans_y2x_no_overlap] == [(3, 5)]
     spans_y2x_overlap = example.get_aligned_spans_y2x(spans_gold, allow_overlap=True)
     assert [(ent.start, ent.end) for ent in spans_y2x_overlap] == [(3, 5), (3, 6)]
diff --git a/spacy/tests/vocab_vectors/test_vectors.py b/spacy/tests/vocab_vectors/test_vectors.py
index 37d48ad0f..8a7dd22c3 100644
--- a/spacy/tests/vocab_vectors/test_vectors.py
+++ b/spacy/tests/vocab_vectors/test_vectors.py
@@ -12,6 +12,7 @@ from ..util import add_vecs_to_vocab, get_cosine, make_tempdir
 
 OPS = get_current_ops()
 
+
 @pytest.fixture
 def strings():
     return ["apple", "orange"]
diff --git a/spacy/training/batchers.py b/spacy/training/batchers.py
index e9fa86c83..e79ba79b0 100644
--- a/spacy/training/batchers.py
+++ b/spacy/training/batchers.py
@@ -66,7 +66,11 @@ def configure_minibatch_by_words(
     """
     optionals = {"get_length": get_length} if get_length is not None else {}
     return partial(
-        minibatch_by_words, size=size, tolerance=tolerance, discard_oversize=discard_oversize, **optionals
+        minibatch_by_words,
+        size=size,
+        tolerance=tolerance,
+        discard_oversize=discard_oversize,
+        **optionals
     )
 
 
diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py
index 36384d67b..c1fda9181 100644
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@@ -70,14 +70,18 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
     nlp._link_components()
     with nlp.select_pipes(disable=[*frozen_components, *resume_components]):
         if T["max_epochs"] == -1:
-            logger.debug("Due to streamed train corpus, using only first 100 examples for initialization. If necessary, provide all labels in [initialize]. More info: https://spacy.io/api/cli#init_labels")
+            logger.debug(
+                "Due to streamed train corpus, using only first 100 examples for initialization. If necessary, provide all labels in [initialize]. More info: https://spacy.io/api/cli#init_labels"
+            )
             nlp.initialize(lambda: islice(train_corpus(nlp), 100), sgd=optimizer)
         else:
             nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer)
         logger.info(f"Initialized pipeline components: {nlp.pipe_names}")
     # Detect components with listeners that are not frozen consistently
     for name, proc in nlp.pipeline:
-        for listener in getattr(proc, "listening_components", []):  # e.g. tok2vec/transformer
+        for listener in getattr(
+            proc, "listening_components", []
+        ):  # e.g. tok2vec/transformer
             # Don't warn about components not in the pipeline
             if listener not in nlp.pipe_names:
                 continue
diff --git a/spacy/training/loop.py b/spacy/training/loop.py
index 85aa458f0..0c4aba7e3 100644
--- a/spacy/training/loop.py
+++ b/spacy/training/loop.py
@@ -96,8 +96,7 @@ def train(
         stdout.write(msg.info(f"Frozen components: {frozen_components}") + "\n")
     if annotating_components:
         stdout.write(
-            msg.info(f"Set annotations on update for: {annotating_components}")
-            + "\n"
+            msg.info(f"Set annotations on update for: {annotating_components}") + "\n"
         )
     stdout.write(msg.info(f"Initial learn rate: {optimizer.learn_rate}") + "\n")
     with nlp.select_pipes(disable=frozen_components):
diff --git a/spacy/util.py b/spacy/util.py
index 984445d81..421287ce2 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -57,13 +57,13 @@ if TYPE_CHECKING:
     from .vocab import Vocab  # noqa: F401
 
 
+# fmt: off
 OOV_RANK = numpy.iinfo(numpy.uint64).max
 DEFAULT_OOV_PROB = -20
 LEXEME_NORM_LANGS = ["cs", "da", "de", "el", "en", "id", "lb", "mk", "pt", "ru", "sr", "ta", "th"]
 
 # Default order of sections in the config.cfg. Not all sections needs to exist,
 # and additional sections are added at the end, in alphabetical order.
-# fmt: off
 CONFIG_SECTION_ORDER = ["paths", "variables", "system", "nlp", "components", "corpora", "training", "pretraining", "initialize"]
 # fmt: on
 
@@ -649,8 +649,7 @@ def get_model_version_range(spacy_version: str) -> str:
 
 
 def get_model_lower_version(constraint: str) -> Optional[str]:
-    """From a version range like >=1.2.3,<1.3.0 return the lower pin.
-    """
+    """From a version range like >=1.2.3,<1.3.0 return the lower pin."""
     try:
         specset = SpecifierSet(constraint)
         for spec in specset: