From 8283df80e91d7fba385b12c42eb976ab30ca1e2a Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Sat, 20 Jun 2020 14:15:04 +0200
Subject: [PATCH] Tidy up and auto-format

---
 spacy/cli/pretrain.py                         |  4 +-
 spacy/cli/train_from_config.py                | 86 ++++++++++---------
 spacy/lemmatizer.py                           |  9 +-
 spacy/ml/__init__.py                          |  2 +-
 spacy/ml/_biluo.py                            | 23 +++--
 spacy/ml/_iob.py                              | 22 +++--
 spacy/ml/_precomputable_affine.py             |  2 +-
 spacy/ml/models/__init__.py                   |  2 +-
 spacy/ml/models/multi_task.py                 | 17 +++-
 spacy/ml/models/parser.py                     |  8 +-
 spacy/ml/models/simple_ner.py                 | 29 ++++---
 spacy/ml/models/tagger.py                     |  3 +-
 spacy/ml/models/textcat.py                    | 76 ++++++++++------
 spacy/ml/models/tok2vec.py                    | 55 ++++++++----
 spacy/ml/tb_framework.py                      | 16 ++--
 spacy/pipeline/simple_ner.py                  | 36 ++++----
 spacy/scorer.py                               | 41 ++++++---
 spacy/tests/doc/test_add_entities.py          | 14 ++-
 spacy/tests/parser/test_add_label.py          | 17 +++-
 spacy/tests/parser/test_arc_eager_oracle.py   |  7 +-
 spacy/tests/parser/test_ner.py                | 21 ++++-
 spacy/tests/parser/test_neural_parser.py      |  7 +-
 spacy/tests/parser/test_nn_beam.py            |  7 +-
 spacy/tests/parser/test_preset_sbd.py         |  7 +-
 spacy/tests/pipeline/test_entity_linker.py    | 12 ++-
 spacy/tests/pipeline/test_morphologizer.py    | 20 ++++-
 spacy/tests/pipeline/test_simple_ner.py       | 27 +++---
 spacy/tests/regression/test_issue1501-2000.py |  7 +-
 spacy/tests/regression/test_issue3001-3500.py |  7 +-
 spacy/tests/regression/test_issue3830.py      | 14 ++-
 spacy/tests/regression/test_issue4042.py      |  7 +-
 spacy/tests/regression/test_issue4313.py      |  7 +-
 spacy/tests/regression/test_issue4924.py      |  1 -
 .../tests/serialize/test_serialize_config.py  |  4 +-
 .../serialize/test_serialize_pipeline.py      |  7 +-
 .../serialize/test_serialize_vocab_strings.py |  6 +-
 spacy/tests/test_scorer.py                    |  3 +-
 spacy/tests/test_util.py                      | 16 ++--
 spacy/util.py                                 |  4 +-
 39 files changed, 421 insertions(+), 232 deletions(-)

diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index 4f4707b52..4f4029834 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -24,8 +24,8 @@ from ..gold import Example
     output_dir=("Directory to write models to on each epoch", "positional", None, Path),
     config_path=("Path to config file", "positional", None, Path),
     use_gpu=("Use GPU", "option", "g", int),
-    resume_path=("Path to pretrained weights from which to resume pretraining", "option","r", Path),
-    epoch_resume=("The epoch to resume counting from when using '--resume_path'. Prevents unintended overwriting of existing weight files.","option", "er", int),
+    resume_path=("Path to pretrained weights from which to resume pretraining", "option", "r", Path),
+    epoch_resume=("The epoch to resume counting from when using '--resume_path'. Prevents unintended overwriting of existing weight files.", "option", "er", int),
     # fmt: on
 )
 def pretrain(
diff --git a/spacy/cli/train_from_config.py b/spacy/cli/train_from_config.py
index f24feffab..6080b698b 100644
--- a/spacy/cli/train_from_config.py
+++ b/spacy/cli/train_from_config.py
@@ -3,7 +3,6 @@ from timeit import default_timer as timer
 
 import srsly
 from pydantic import BaseModel, FilePath
-import plac
 import tqdm
 from pathlib import Path
 from wasabi import msg
@@ -16,7 +15,9 @@ from ..gold import GoldCorpus
 from ..lookups import Lookups
 from .. import util
 from ..errors import Errors
-from ..ml import models  # don't remove - required to load the built-in architectures
+
+# Don't remove - required to load the built-in architectures
+from ..ml import models  # noqa: F401
 
 registry = util.registry
 
@@ -114,33 +115,19 @@ class ConfigSchema(BaseModel):
         extra = "allow"
 
 
-@plac.annotations(
-    # fmt: off
-    train_path=("Location of JSON-formatted training data", "positional", None, Path),
-    dev_path=("Location of JSON-formatted development data", "positional", None, Path),
-    config_path=("Path to config file", "positional", None, Path),
-    output_path=("Output directory to store model in", "option", "o", Path),
-    init_tok2vec=(
-    "Path to pretrained weights for the tok2vec components. See 'spacy pretrain'. Experimental.", "option", "t2v",
-    Path),
-    raw_text=("Path to jsonl file with unlabelled text documents.", "option", "rt", Path),
-    verbose=("Display more information for debugging purposes", "flag", "VV", bool),
-    use_gpu=("Use GPU", "option", "g", int),
-    tag_map_path=("Location of JSON-formatted tag map", "option", "tm", Path),
-    omit_extra_lookups=("Don't include extra lookups in model", "flag", "OEL", bool),
-    # fmt: on
-)
 def train_cli(
-    train_path,
-    dev_path,
-    config_path,
-    output_path=None,
-    init_tok2vec=None,
-    raw_text=None,
-    verbose=False,
-    use_gpu=-1,
-    tag_map_path=None,
-    omit_extra_lookups=False,
+    # fmt: off
+    train_path: ("Location of JSON-formatted training data", "positional", None, Path),
+    dev_path: ("Location of JSON-formatted development data", "positional", None, Path),
+    config_path: ("Path to config file", "positional", None, Path),
+    output_path: ("Output directory to store model in", "option", "o", Path) = None,
+    init_tok2vec: ("Path to pretrained weights for the tok2vec components. See 'spacy pretrain'. Experimental.", "option", "t2v", Path) = None,
+    raw_text: ("Path to jsonl file with unlabelled text documents.", "option", "rt", Path) = None,
+    verbose: ("Display more information for debugging purposes", "flag", "VV", bool) = False,
+    use_gpu: ("Use GPU", "option", "g", int) = -1,
+    tag_map_path: ("Location of JSON-formatted tag map", "option", "tm", Path) = None,
+    omit_extra_lookups: ("Don't include extra lookups in model", "flag", "OEL", bool) = False,
+    # fmt: on
 ):
     """
     Train or update a spaCy model. Requires data to be formatted in spaCy's
@@ -212,7 +199,7 @@ def train(
     config = util.load_config(config_path, create_objects=False)
     util.fix_random_seed(config["training"]["seed"])
     if config["training"].get("use_pytorch_for_gpu_memory"):
-        # It feels kind of weird to not have a default for this. 
+        # It feels kind of weird to not have a default for this.
         use_pytorch_for_gpu_memory()
     nlp_config = config["nlp"]
     config = util.load_config(config_path, create_objects=True)
@@ -227,7 +214,9 @@ def train(
     # verify textcat config
     if "textcat" in nlp_config["pipeline"]:
         textcat_labels = set(nlp.get_pipe("textcat").labels)
-        textcat_multilabel = not nlp_config["pipeline"]["textcat"]["model"]["exclusive_classes"]
+        textcat_multilabel = not nlp_config["pipeline"]["textcat"]["model"][
+            "exclusive_classes"
+        ]
 
         # check whether the setting 'exclusive_classes' corresponds to the provided training data
         if textcat_multilabel:
@@ -255,7 +244,9 @@ def train(
                         "to 'false' in the config to train a classifier with classes "
                         "that are not mutually exclusive."
                     )
-        msg.info(f"Initialized textcat component for {len(textcat_labels)} unique labels")
+        msg.info(
+            f"Initialized textcat component for {len(textcat_labels)} unique labels"
+        )
         nlp.get_pipe("textcat").labels = tuple(textcat_labels)
 
         # if 'positive_label' is provided: double check whether it's in the data and the task is binary
@@ -281,9 +272,7 @@ def train(
         nlp.resume_training()
     else:
         msg.info(f"Initializing the nlp pipeline: {nlp.pipe_names}")
-        nlp.begin_training(
-            lambda: corpus.train_examples
-        )
+        nlp.begin_training(lambda: corpus.train_examples)
 
     # Update tag map with provided mapping
     nlp.vocab.morphology.tag_map.update(tag_map)
@@ -310,8 +299,7 @@ def train(
             tok2vec = tok2vec.get(subpath)
         if not tok2vec:
             msg.fail(
-                f"Could not locate the tok2vec model at {tok2vec_path}.",
-                exits=1,
+                f"Could not locate the tok2vec model at {tok2vec_path}.", exits=1,
             )
         tok2vec.from_bytes(weights_data)
 
@@ -376,7 +364,7 @@ def create_train_batches(nlp, corpus, cfg):
         train_examples = list(
             corpus.train_dataset(
                 nlp,
-                noise_level=0.0, # I think this is deprecated?
+                noise_level=0.0,  # I think this is deprecated?
                 orth_variant_level=cfg["orth_variant_level"],
                 gold_preproc=cfg["gold_preproc"],
                 max_length=cfg["max_length"],
@@ -429,7 +417,11 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg):
         try:
             weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights)
         except KeyError as e:
-            raise KeyError(Errors.E983.format(dict_name='score_weights', key=str(e), keys=list(scores.keys())))
+            raise KeyError(
+                Errors.E983.format(
+                    dict_name="score_weights", key=str(e), keys=list(scores.keys())
+                )
+            )
 
         scores["speed"] = wps
         return weighted_score, scores
@@ -578,15 +570,25 @@ def setup_printer(training, nlp):
             ]
         except KeyError as e:
             raise KeyError(
-                Errors.E983.format(dict_name='scores (losses)', key=str(e), keys=list(info["losses"].keys())))
+                Errors.E983.format(
+                    dict_name="scores (losses)",
+                    key=str(e),
+                    keys=list(info["losses"].keys()),
+                )
+            )
 
         try:
             scores = [
-                "{0:.2f}".format(float(info["other_scores"][col]))
-                for col in score_cols
+                "{0:.2f}".format(float(info["other_scores"][col])) for col in score_cols
             ]
         except KeyError as e:
-            raise KeyError(Errors.E983.format(dict_name='scores (other)', key=str(e), keys=list(info["other_scores"].keys())))
+            raise KeyError(
+                Errors.E983.format(
+                    dict_name="scores (other)",
+                    key=str(e),
+                    keys=list(info["other_scores"].keys()),
+                )
+            )
         data = (
             [info["step"]] + losses + scores + ["{0:.2f}".format(float(info["score"]))]
         )
diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py
index c4944407f..7d6bfbc12 100644
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@@ -1,4 +1,3 @@
-from .symbols import NOUN, VERB, ADJ, PUNCT, PROPN
 from .errors import Errors
 from .lookups import Lookups
 from .parts_of_speech import NAMES as UPOS_NAMES
@@ -51,7 +50,13 @@ class Lemmatizer(object):
         index_table = self.lookups.get_table("lemma_index", {})
         exc_table = self.lookups.get_table("lemma_exc", {})
         rules_table = self.lookups.get_table("lemma_rules", {})
-        if not any((index_table.get(univ_pos), exc_table.get(univ_pos), rules_table.get(univ_pos))):
+        if not any(
+            (
+                index_table.get(univ_pos),
+                exc_table.get(univ_pos),
+                rules_table.get(univ_pos),
+            )
+        ):
             if univ_pos == "propn":
                 return [string]
             else:
diff --git a/spacy/ml/__init__.py b/spacy/ml/__init__.py
index aed4fa323..c382d915b 100644
--- a/spacy/ml/__init__.py
+++ b/spacy/ml/__init__.py
@@ -1 +1 @@
-from .models import *
+from .models import *  # noqa: F401, F403
diff --git a/spacy/ml/_biluo.py b/spacy/ml/_biluo.py
index 28339089a..77a2a6a77 100644
--- a/spacy/ml/_biluo.py
+++ b/spacy/ml/_biluo.py
@@ -1,11 +1,8 @@
 """Thinc layer to do simpler transition-based parsing, NER, etc."""
-from typing import List, Tuple, Dict, Optional
+from typing import Dict, Optional
 import numpy
-from thinc.api import Ops, Model, with_array, softmax_activation, padded2list
-from thinc.api import to_numpy
-from thinc.types import Padded, Ints1d, Ints3d, Floats2d, Floats3d
-
-from ..tokens import Doc
+from thinc.api import Model
+from thinc.types import Padded, Floats3d
 
 
 def BILUO() -> Model[Padded, Padded]:
@@ -14,11 +11,11 @@ def BILUO() -> Model[Padded, Padded]:
         forward,
         init=init,
         dims={"nO": None},
-        attrs={"get_num_actions": get_num_actions}
+        attrs={"get_num_actions": get_num_actions},
     )
 
 
-def init(model, X: Optional[Padded]=None, Y: Optional[Padded]=None):
+def init(model, X: Optional[Padded] = None, Y: Optional[Padded] = None):
     if X is not None and Y is not None:
         if X.data.shape != Y.data.shape:
             # TODO: Fix error
@@ -49,12 +46,12 @@ def forward(model: Model[Padded, Padded], Xp: Padded, is_train: bool):
     masks = model.ops.alloc3f(*Y.shape)
     max_value = Xp.data.max()
     for t in range(Xp.data.shape[0]):
-        is_last = (Xp.lengths < (t+2)).astype("i")
+        is_last = (Xp.lengths < (t + 2)).astype("i")
         masks[t] = valid_transitions[is_last, prev_actions]
         # Don't train the out-of-bounds sequences.
-        masks[t, Xp.size_at_t[t]:] = 0
+        masks[t, Xp.size_at_t[t] :] = 0
         # Valid actions get 0*10e8, invalid get large negative value
-        Y[t] = Xp.data[t] + ((masks[t]-1) * max_value * 10)
+        Y[t] = Xp.data[t] + ((masks[t] - 1) * max_value * 10)
         prev_actions = Y[t].argmax(axis=-1)
 
     def backprop_biluo(dY: Padded) -> Padded:
@@ -83,13 +80,13 @@ def _get_transition_table(
     B_start, B_end = (0, n_labels)
     I_start, I_end = (B_end, B_end + n_labels)
     L_start, L_end = (I_end, I_end + n_labels)
-    U_start, U_end = (L_end, L_end + n_labels)
+    U_start, U_end = (L_end, L_end + n_labels)  # noqa: F841
     # Using ranges allows us to set specific cells, which is necessary to express
     # that only actions of the same label are valid continuations.
     B_range = numpy.arange(B_start, B_end)
     I_range = numpy.arange(I_start, I_end)
     L_range = numpy.arange(L_start, L_end)
-    O_action = U_end
+    O_action = U_end  # noqa: F841
     # If this is the last token and the previous action was B or I, only L
     # of that label is valid
     table[1, B_range, L_range] = 1
diff --git a/spacy/ml/_iob.py b/spacy/ml/_iob.py
index 0ce9a71e6..9f385ec0d 100644
--- a/spacy/ml/_iob.py
+++ b/spacy/ml/_iob.py
@@ -1,9 +1,7 @@
 """Thinc layer to do simpler transition-based parsing, NER, etc."""
-from typing import List, Tuple, Dict, Optional
-from thinc.api import Ops, Model, with_array, softmax_activation, padded2list
-from thinc.types import Padded, Ints1d, Ints3d, Floats2d, Floats3d
-
-from ..tokens import Doc
+from typing import Dict, Optional
+from thinc.api import Ops, Model
+from thinc.types import Padded, Floats3d
 
 
 def IOB() -> Model[Padded, Padded]:
@@ -12,11 +10,11 @@ def IOB() -> Model[Padded, Padded]:
         forward,
         init=init,
         dims={"nO": None},
-        attrs={"get_num_actions": get_num_actions}
+        attrs={"get_num_actions": get_num_actions},
     )
 
 
-def init(model, X: Optional[Padded]=None, Y: Optional[Padded]=None):
+def init(model, X: Optional[Padded] = None, Y: Optional[Padded] = None):
     if X is not None and Y is not None:
         if X.data.shape != Y.data.shape:
             # TODO: Fix error
@@ -48,14 +46,14 @@ def forward(model: Model[Padded, Padded], Xp: Padded, is_train: bool):
     for t in range(Xp.data.shape[0]):
         masks[t] = valid_transitions[prev_actions]
         # Don't train the out-of-bounds sequences.
-        masks[t, Xp.size_at_t[t]:] = 0
+        masks[t, Xp.size_at_t[t] :] = 0
         # Valid actions get 0*10e8, invalid get -1*10e8
-        Y[t] = Xp.data[t] + ((masks[t]-1) * 10e8)
+        Y[t] = Xp.data[t] + ((masks[t] - 1) * 10e8)
         prev_actions = Y[t].argmax(axis=-1)
 
     def backprop_biluo(dY: Padded) -> Padded:
         # Masking the gradient seems to do poorly here. But why?
-        #dY.data *= masks
+        # dY.data *= masks
         return dY
 
     return Padded(Y, Xp.size_at_t, Xp.lengths, Xp.indices), backprop_biluo
@@ -83,10 +81,10 @@ def _get_transition_table(
     B_range = ops.xp.arange(B_start, B_end)
     I_range = ops.xp.arange(I_start, I_end)
     # B and O are always valid
-    table[:, B_start : B_end] = 1
+    table[:, B_start:B_end] = 1
     table[:, O_action] = 1
     # I can only follow a matching B
     table[B_range, I_range] = 1
- 
+
     _cache[n_actions] = table
     return table
diff --git a/spacy/ml/_precomputable_affine.py b/spacy/ml/_precomputable_affine.py
index f4b5b16fe..215cdeda1 100644
--- a/spacy/ml/_precomputable_affine.py
+++ b/spacy/ml/_precomputable_affine.py
@@ -84,7 +84,7 @@ def _backprop_precomputable_affine_padding(model, dY, ids):
     #
     # (ids < 0).T @ dY
     mask = model.ops.asarray(ids < 0, dtype="f")
-    d_pad = model.ops.gemm(mask, dY.reshape(nB, nO*nP), trans1=True)
+    d_pad = model.ops.gemm(mask, dY.reshape(nB, nO * nP), trans1=True)
     return d_pad.reshape((1, nF, nO, nP))
 
 
diff --git a/spacy/ml/models/__init__.py b/spacy/ml/models/__init__.py
index 40cde2437..dd58dab00 100644
--- a/spacy/ml/models/__init__.py
+++ b/spacy/ml/models/__init__.py
@@ -1,6 +1,6 @@
 from .entity_linker import *  # noqa
 from .parser import *  # noqa
-from .simple_ner import *
+from .simple_ner import *  # noqa
 from .tagger import *  # noqa
 from .textcat import *  # noqa
 from .tok2vec import *  # noqa
diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py
index 4a360a9e6..b3a9e0815 100644
--- a/spacy/ml/models/multi_task.py
+++ b/spacy/ml/models/multi_task.py
@@ -7,7 +7,12 @@ def build_multi_task_model(tok2vec, maxout_pieces, token_vector_width, nO=None):
     softmax = Softmax(nO=nO, nI=token_vector_width * 2)
     model = chain(
         tok2vec,
-        Maxout(nO=token_vector_width * 2, nI=token_vector_width, nP=maxout_pieces, dropout=0.0),
+        Maxout(
+            nO=token_vector_width * 2,
+            nI=token_vector_width,
+            nP=maxout_pieces,
+            dropout=0.0,
+        ),
         LayerNorm(token_vector_width * 2),
         softmax,
     )
@@ -20,7 +25,11 @@ def build_cloze_multi_task_model(vocab, tok2vec, maxout_pieces, nO=None):
     # nO = vocab.vectors.data.shape[1]
     output_layer = chain(
         Maxout(
-            nO=nO, nI=tok2vec.get_dim("nO"), nP=maxout_pieces, normalize=True, dropout=0.0
+            nO=nO,
+            nI=tok2vec.get_dim("nO"),
+            nP=maxout_pieces,
+            normalize=True,
+            dropout=0.0,
         ),
         Linear(nO=nO, nI=nO, init_W=zero_init),
     )
@@ -39,7 +48,9 @@ def build_masked_language_model(vocab, wrapped_model, mask_prob=0.15):
     def mlm_forward(model, docs, is_train):
         mask, docs = _apply_mask(docs, random_words, mask_prob=mask_prob)
         mask = model.ops.asarray(mask).reshape((mask.shape[0], 1))
-        output, backprop = model.get_ref("wrapped-model").begin_update(docs)  # drop=drop
+        output, backprop = model.get_ref("wrapped-model").begin_update(
+            docs
+        )  # drop=drop
 
         def mlm_backward(d_output):
             d_output *= 1 - mask
diff --git a/spacy/ml/models/parser.py b/spacy/ml/models/parser.py
index bdcd709b1..47c94cfa1 100644
--- a/spacy/ml/models/parser.py
+++ b/spacy/ml/models/parser.py
@@ -16,18 +16,14 @@ def build_tb_parser_model(
     nO=None,
 ):
     t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
-    tok2vec = chain(
-        tok2vec,
-        with_array(Linear(hidden_width, t2v_width)),
-        list2array(),
-    )
+    tok2vec = chain(tok2vec, with_array(Linear(hidden_width, t2v_width)), list2array(),)
     tok2vec.set_dim("nO", hidden_width)
 
     lower = PrecomputableAffine(
         nO=hidden_width if use_upper else nO,
         nF=nr_feature_tokens,
         nI=tok2vec.get_dim("nO"),
-        nP=maxout_pieces
+        nP=maxout_pieces,
     )
     if use_upper:
         with use_ops("numpy"):
diff --git a/spacy/ml/models/simple_ner.py b/spacy/ml/models/simple_ner.py
index 01661f55b..1fb5a71c0 100644
--- a/spacy/ml/models/simple_ner.py
+++ b/spacy/ml/models/simple_ner.py
@@ -1,9 +1,8 @@
-import functools
-from typing import List, Tuple, Dict, Optional
-from thinc.api import Ops, Model, Linear, Softmax, with_array, softmax_activation, padded2list
+from typing import List
+from thinc.api import Model, Linear, with_array, softmax_activation, padded2list
 from thinc.api import chain, list2padded, configure_normal_init
 from thinc.api import Dropout
-from thinc.types import Padded, Ints1d, Ints3d, Floats2d, Floats3d
+from thinc.types import Floats2d
 
 from ...tokens import Doc
 from .._biluo import BILUO
@@ -12,12 +11,12 @@ from ...util import registry
 
 
 @registry.architectures.register("spacy.BiluoTagger.v1")
-def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], List[Floats2d]]:
+def BiluoTagger(
+    tok2vec: Model[List[Doc], List[Floats2d]]
+) -> Model[List[Doc], List[Floats2d]]:
     biluo = BILUO()
     linear = Linear(
-        nO=None,
-        nI=tok2vec.get_dim("nO"),
-        init_W=configure_normal_init(mean=0.02)
+        nO=None, nI=tok2vec.get_dim("nO"), init_W=configure_normal_init(mean=0.02)
     )
     model = chain(
         tok2vec,
@@ -25,7 +24,7 @@ def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], L
         with_array(chain(Dropout(0.1), linear)),
         biluo,
         with_array(softmax_activation()),
-        padded2list()
+        padded2list(),
     )
 
     return Model(
@@ -35,11 +34,14 @@ def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], L
         layers=[model, linear],
         refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
         dims={"nO": None},
-        attrs={"get_num_actions": biluo.attrs["get_num_actions"]}
+        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
     )
 
+
 @registry.architectures.register("spacy.IOBTagger.v1")
-def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], List[Floats2d]]:
+def IOBTagger(
+    tok2vec: Model[List[Doc], List[Floats2d]]
+) -> Model[List[Doc], List[Floats2d]]:
     biluo = IOB()
     linear = Linear(nO=None, nI=tok2vec.get_dim("nO"))
     model = chain(
@@ -48,7 +50,7 @@ def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], Lis
         with_array(linear),
         biluo,
         with_array(softmax_activation()),
-        padded2list()
+        padded2list(),
     )
 
     return Model(
@@ -58,11 +60,10 @@ def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], Lis
         layers=[model],
         refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
         dims={"nO": None},
-        attrs={"get_num_actions": biluo.attrs["get_num_actions"]}
+        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
     )
 
 
-
 def init(model: Model[List[Doc], List[Floats2d]], X=None, Y=None) -> None:
     if model.get_dim("nO") is None and Y:
         model.set_dim("nO", Y[0].shape[1])
diff --git a/spacy/ml/models/tagger.py b/spacy/ml/models/tagger.py
index 00e268ede..7fe417321 100644
--- a/spacy/ml/models/tagger.py
+++ b/spacy/ml/models/tagger.py
@@ -1,5 +1,4 @@
-from thinc.api import zero_init, with_array, Softmax, chain, Model, Dropout
-from thinc.api import glorot_uniform_init
+from thinc.api import zero_init, with_array, Softmax, chain, Model
 
 from ...util import registry
 
diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index a02e1a5a1..9db6f982f 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -1,11 +1,12 @@
-from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic, ParametricAttention
-from thinc.api import chain, concatenate, clone, Dropout
-from thinc.api import SparseLinear, Softmax, softmax_activation, Maxout, reduce_sum, Relu, residual, expand_window
-from thinc.api import HashEmbed, with_ragged, with_array, with_cpu, uniqued, FeatureExtractor
+from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic
+from thinc.api import ParametricAttention, chain, concatenate, clone, Dropout
+from thinc.api import SparseLinear, Softmax, softmax_activation, Maxout
+from thinc.api import reduce_sum, Relu, residual, expand_window, HashEmbed
+from thinc.api import with_ragged, with_array, with_cpu, uniqued, FeatureExtractor
 
 from ..spacy_vectors import SpacyVectors
 from ... import util
-from ...attrs import ID, ORTH, NORM, PREFIX, SUFFIX, SHAPE, LOWER
+from ...attrs import ID, ORTH, PREFIX, SUFFIX, SHAPE, LOWER
 from ...util import registry
 from ..extract_ngrams import extract_ngrams
 
@@ -50,14 +51,31 @@ def build_bow_text_classifier(exclusive_classes, ngram_size, no_output_layer, nO
 
 
 @registry.architectures.register("spacy.TextCat.v1")
-def build_text_classifier(width, embed_size, pretrained_vectors, exclusive_classes, ngram_size,
-                          window_size, conv_depth, dropout, nO=None):
+def build_text_classifier(
+    width,
+    embed_size,
+    pretrained_vectors,
+    exclusive_classes,
+    ngram_size,
+    window_size,
+    conv_depth,
+    dropout,
+    nO=None,
+):
     cols = [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]
     with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
-        lower = HashEmbed(nO=width, nV=embed_size, column=cols.index(LOWER), dropout=dropout)
-        prefix = HashEmbed(nO=width // 2, nV=embed_size, column=cols.index(PREFIX), dropout=dropout)
-        suffix = HashEmbed(nO=width // 2, nV=embed_size, column=cols.index(SUFFIX), dropout=dropout)
-        shape = HashEmbed(nO=width // 2, nV=embed_size, column=cols.index(SHAPE), dropout=dropout)
+        lower = HashEmbed(
+            nO=width, nV=embed_size, column=cols.index(LOWER), dropout=dropout
+        )
+        prefix = HashEmbed(
+            nO=width // 2, nV=embed_size, column=cols.index(PREFIX), dropout=dropout
+        )
+        suffix = HashEmbed(
+            nO=width // 2, nV=embed_size, column=cols.index(SUFFIX), dropout=dropout
+        )
+        shape = HashEmbed(
+            nO=width // 2, nV=embed_size, column=cols.index(SHAPE), dropout=dropout
+        )
 
         width_nI = sum(layer.get_dim("nO") for layer in [lower, prefix, suffix, shape])
         trained_vectors = FeatureExtractor(cols) >> with_array(
@@ -83,30 +101,38 @@ def build_text_classifier(width, embed_size, pretrained_vectors, exclusive_class
             vectors_width = width
         tok2vec = vector_layer >> with_array(
             Maxout(width, vectors_width, normalize=True)
-            >> residual((expand_window(window_size=window_size)
-                         >> Maxout(nO=width, nI=width * ((window_size * 2) + 1), normalize=True))) ** conv_depth,
+            >> residual(
+                (
+                    expand_window(window_size=window_size)
+                    >> Maxout(
+                        nO=width, nI=width * ((window_size * 2) + 1), normalize=True
+                    )
+                )
+            )
+            ** conv_depth,
             pad=conv_depth,
         )
         cnn_model = (
-                tok2vec
-                >> list2ragged()
-                >> ParametricAttention(width)
-                >> reduce_sum()
-                >> residual(Maxout(nO=width, nI=width))
-                >> Linear(nO=nO, nI=width)
-                >> Dropout(0.0)
+            tok2vec
+            >> list2ragged()
+            >> ParametricAttention(width)
+            >> reduce_sum()
+            >> residual(Maxout(nO=width, nI=width))
+            >> Linear(nO=nO, nI=width)
+            >> Dropout(0.0)
         )
 
         linear_model = build_bow_text_classifier(
-            nO=nO, ngram_size=ngram_size, exclusive_classes=exclusive_classes, no_output_layer=False
+            nO=nO,
+            ngram_size=ngram_size,
+            exclusive_classes=exclusive_classes,
+            no_output_layer=False,
         )
-        nO_double = nO*2 if nO else None
+        nO_double = nO * 2 if nO else None
         if exclusive_classes:
             output_layer = Softmax(nO=nO, nI=nO_double)
         else:
-            output_layer = (
-                    Linear(nO=nO, nI=nO_double) >> Dropout(0.0) >> Logistic()
-            )
+            output_layer = Linear(nO=nO, nI=nO_double) >> Dropout(0.0) >> Logistic()
         model = (linear_model | cnn_model) >> output_layer
         model.set_ref("tok2vec", tok2vec)
     if model.has_dim("nO") is not False:
diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py
index 53798e57c..b1bed1ea1 100644
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@@ -99,7 +99,13 @@ def hash_charembed_cnn(
 
 @registry.architectures.register("spacy.HashEmbedBiLSTM.v1")
 def hash_embed_bilstm_v1(
-    pretrained_vectors, width, depth, embed_size, subword_features, maxout_pieces, dropout
+    pretrained_vectors,
+    width,
+    depth,
+    embed_size,
+    subword_features,
+    maxout_pieces,
+    dropout,
 ):
     # Does not use character embeddings: set to False by default
     return build_Tok2Vec_model(
@@ -141,21 +147,24 @@ def hash_char_embed_bilstm_v1(
 
 @registry.architectures.register("spacy.LayerNormalizedMaxout.v1")
 def LayerNormalizedMaxout(width, maxout_pieces):
-    return Maxout(
-        nO=width,
-        nP=maxout_pieces,
-        dropout=0.0,
-        normalize=True,
-    )
+    return Maxout(nO=width, nP=maxout_pieces, dropout=0.0, normalize=True,)
 
 
 @registry.architectures.register("spacy.MultiHashEmbed.v1")
-def MultiHashEmbed(columns, width, rows, use_subwords, pretrained_vectors, mix, dropout):
+def MultiHashEmbed(
+    columns, width, rows, use_subwords, pretrained_vectors, mix, dropout
+):
     norm = HashEmbed(nO=width, nV=rows, column=columns.index("NORM"), dropout=dropout)
     if use_subwords:
-        prefix = HashEmbed(nO=width, nV=rows // 2, column=columns.index("PREFIX"), dropout=dropout)
-        suffix = HashEmbed(nO=width, nV=rows // 2, column=columns.index("SUFFIX"), dropout=dropout)
-        shape = HashEmbed(nO=width, nV=rows // 2, column=columns.index("SHAPE"), dropout=dropout)
+        prefix = HashEmbed(
+            nO=width, nV=rows // 2, column=columns.index("PREFIX"), dropout=dropout
+        )
+        suffix = HashEmbed(
+            nO=width, nV=rows // 2, column=columns.index("SUFFIX"), dropout=dropout
+        )
+        shape = HashEmbed(
+            nO=width, nV=rows // 2, column=columns.index("SHAPE"), dropout=dropout
+        )
 
     if pretrained_vectors:
         glove = StaticVectors(
@@ -195,7 +204,13 @@ def CharacterEmbed(columns, width, rows, nM, nC, features, dropout):
 def MaxoutWindowEncoder(width, window_size, maxout_pieces, depth):
     cnn = chain(
         expand_window(window_size=window_size),
-        Maxout(nO=width, nI=width * ((window_size * 2) + 1), nP=maxout_pieces, dropout=0.0, normalize=True),
+        Maxout(
+            nO=width,
+            nI=width * ((window_size * 2) + 1),
+            nP=maxout_pieces,
+            dropout=0.0,
+            normalize=True,
+        ),
     )
     model = clone(residual(cnn), depth)
     model.set_dim("nO", width)
@@ -247,11 +262,19 @@ def build_Tok2Vec_model(
         subword_features = False
     cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
     with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
-        norm = HashEmbed(nO=width, nV=embed_size, column=cols.index(NORM), dropout=dropout)
+        norm = HashEmbed(
+            nO=width, nV=embed_size, column=cols.index(NORM), dropout=dropout
+        )
         if subword_features:
-            prefix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=dropout)
-            suffix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=dropout)
-            shape = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=dropout)
+            prefix = HashEmbed(
+                nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=dropout
+            )
+            suffix = HashEmbed(
+                nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=dropout
+            )
+            shape = HashEmbed(
+                nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=dropout
+            )
         else:
             prefix, suffix, shape = (None, None, None)
         if pretrained_vectors is not None:
diff --git a/spacy/ml/tb_framework.py b/spacy/ml/tb_framework.py
index 251189389..69b40cbcf 100644
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@@ -20,8 +20,8 @@ def TransitionModel(tok2vec, lower, upper, unseen_classes=set()):
         attrs={
             "has_upper": has_upper,
             "unseen_classes": set(unseen_classes),
-            "resize_output": resize_output
-        }
+            "resize_output": resize_output,
+        },
     )
 
 
@@ -31,14 +31,14 @@ def forward(model, X, is_train):
         model.layers,
         unseen_classes=model.attrs["unseen_classes"],
         train=is_train,
-        has_upper=model.attrs["has_upper"]
+        has_upper=model.attrs["has_upper"],
     )
 
     return step_model, step_model.finish_steps
 
 
 def init(model, X=None, Y=None):
-    tok2vec = model.get_ref("tok2vec").initialize(X=X)
+    tok2vec = model.get_ref("tok2vec").initialize(X=X)  # noqa: F841
     lower = model.get_ref("lower").initialize()
     if model.attrs["has_upper"]:
         statevecs = model.ops.alloc2f(2, lower.get_dim("nO"))
@@ -46,7 +46,7 @@ def init(model, X=None, Y=None):
 
 
 def resize_output(model, new_nO):
-    tok2vec = model.get_ref("tok2vec")
+    tok2vec = model.get_ref("tok2vec")  # noqa: F841
     lower = model.get_ref("lower")
     upper = model.get_ref("upper")
     if not model.attrs["has_upper"]:
@@ -62,7 +62,7 @@ def resize_output(model, new_nO):
     nI = None
     if smaller.has_dim("nI"):
         nI = smaller.get_dim("nI")
-    with use_ops('numpy'):
+    with use_ops("numpy"):
         larger = Linear(nO=new_nO, nI=nI)
         larger.init = smaller.init
     # it could be that the model is not initialized yet, then skip this bit
@@ -74,8 +74,8 @@ def resize_output(model, new_nO):
         # Weights are stored in (nr_out, nr_in) format, so we're basically
         # just adding rows here.
         if smaller.has_dim("nO"):
-            larger_W[:smaller.get_dim("nO")] = smaller_W
-            larger_b[:smaller.get_dim("nO")] = smaller_b
+            larger_W[: smaller.get_dim("nO")] = smaller_W
+            larger_b[: smaller.get_dim("nO")] = smaller_b
             for i in range(smaller.get_dim("nO"), new_nO):
                 model.attrs["unseen_classes"].add(i)
 
diff --git a/spacy/pipeline/simple_ner.py b/spacy/pipeline/simple_ner.py
index c674046af..58f647b67 100644
--- a/spacy/pipeline/simple_ner.py
+++ b/spacy/pipeline/simple_ner.py
@@ -21,9 +21,7 @@ class SimpleNER(Pipe):
         self.model = model
         self.cfg = {"labels": []}
         self.loss_func = SequenceCategoricalCrossentropy(
-            names=self.get_tag_names(),
-            normalize=True,
-            missing_value=None
+            names=self.get_tag_names(), normalize=True, missing_value=None
         )
         assert self.model is not None
 
@@ -38,21 +36,21 @@ class SimpleNER(Pipe):
     def add_label(self, label):
         if label not in self.cfg["labels"]:
             self.cfg["labels"].append(label)
- 
+
     def get_tag_names(self):
         if self.is_biluo:
             return (
-                [f"B-{label}" for label in self.labels] +
-                [f"I-{label}" for label in self.labels] +
-                [f"L-{label}" for label in self.labels] +
-                [f"U-{label}" for label in self.labels] +
-                ["O"]
+                [f"B-{label}" for label in self.labels]
+                + [f"I-{label}" for label in self.labels]
+                + [f"L-{label}" for label in self.labels]
+                + [f"U-{label}" for label in self.labels]
+                + ["O"]
             )
         else:
             return (
-                [f"B-{label}" for label in self.labels] +
-                [f"I-{label}" for label in self.labels] +
-                ["O"]
+                [f"B-{label}" for label in self.labels]
+                + [f"I-{label}" for label in self.labels]
+                + ["O"]
             )
 
     def predict(self, docs: List[Doc]) -> List[Floats2d]:
@@ -108,7 +106,7 @@ class SimpleNER(Pipe):
 
     def begin_training(self, get_examples, pipeline=None, sgd=None, **kwargs):
         self.cfg.update(kwargs)
-        if not hasattr(get_examples, '__call__'):
+        if not hasattr(get_examples, "__call__"):
             gold_tuples = get_examples
             get_examples = lambda: gold_tuples
         labels = _get_labels(get_examples())
@@ -117,14 +115,12 @@ class SimpleNER(Pipe):
         labels = self.labels
         n_actions = self.model.attrs["get_num_actions"](len(labels))
         self.model.set_dim("nO", n_actions)
-        self.model.initialize() 
+        self.model.initialize()
         if pipeline is not None:
             self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
         link_vectors_to_models(self.vocab)
         self.loss_func = SequenceCategoricalCrossentropy(
-            names=self.get_tag_names(),
-            normalize=True,
-            missing_value=None
+            names=self.get_tag_names(), normalize=True, missing_value=None
         )
 
         return sgd
@@ -135,7 +131,7 @@ class SimpleNER(Pipe):
 
 def _has_ner(eg):
     for ner_tag in eg.gold.ner:
-        if ner_tag != "-" and ner_tag != None:
+        if ner_tag != "-" and ner_tag is not None:
             return True
     else:
         return False
@@ -145,7 +141,7 @@ def _get_labels(examples):
     labels = set()
     for eg in examples:
         for ner_tag in eg.token_annotation.entities:
-            if ner_tag != 'O' and ner_tag != '-':
-                _, label = ner_tag.split('-', 1)
+            if ner_tag != "O" and ner_tag != "-":
+                _, label = ner_tag.split("-", 1)
                 labels.add(label)
     return list(sorted(labels))
diff --git a/spacy/scorer.py b/spacy/scorer.py
index 288da23aa..af74db80e 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -98,7 +98,9 @@ class Scorer(object):
             for name, component in pipeline:
                 if name == "textcat":
                     self.textcat_multilabel = component.model.attrs["multi_label"]
-                    self.textcat_positive_label = component.cfg.get("positive_label", None)
+                    self.textcat_positive_label = component.cfg.get(
+                        "positive_label", None
+                    )
                     for label in component.cfg.get("labels", []):
                         self.textcat_auc_per_cat[label] = ROCAUCScore()
                         self.textcat_f_per_cat[label] = PRFScore()
@@ -119,19 +121,19 @@ class Scorer(object):
 
     @property
     def morphs_acc(self):
-       """RETURNS (float): Morph tag accuracy (morphological features,
+        """RETURNS (float): Morph tag accuracy (morphological features,
            i.e. `Token.morph`).
        """
-       return self.morphs.fscore * 100
+        return self.morphs.fscore * 100
 
     @property
     def morphs_per_type(self):
-       """RETURNS (dict): Scores per dependency label.
+        """RETURNS (dict): Scores per dependency label.
        """
-       return {
-           k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
-           for k, v in self.morphs_per_feat.items()
-       }
+        return {
+            k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
+            for k, v in self.morphs_per_feat.items()
+        }
 
     @property
     def sent_p(self):
@@ -302,7 +304,15 @@ class Scorer(object):
         gold_morphs_per_feat = {}
         gold_sent_starts = set()
         gold_ents = set(tags_to_entities(orig.entities))
-        for id_, tag, pos, morph, head, dep, sent_start in zip(orig.ids, orig.tags, orig.pos, orig.morphs, orig.heads, orig.deps, orig.sent_starts):
+        for id_, tag, pos, morph, head, dep, sent_start in zip(
+            orig.ids,
+            orig.tags,
+            orig.pos,
+            orig.morphs,
+            orig.heads,
+            orig.deps,
+            orig.sent_starts,
+        ):
             gold_tags.add((id_, tag))
             gold_pos.add((id_, pos))
             gold_morphs.add((id_, morph))
@@ -400,7 +410,10 @@ class Scorer(object):
         self.pos.score_set(cand_pos, gold_pos)
         self.morphs.score_set(cand_morphs, gold_morphs)
         for field in self.morphs_per_feat:
-            self.morphs_per_feat[field].score_set(cand_morphs_per_feat.get(field, set()), gold_morphs_per_feat.get(field, set()))
+            self.morphs_per_feat[field].score_set(
+                cand_morphs_per_feat.get(field, set()),
+                gold_morphs_per_feat.get(field, set()),
+            )
         self.sent_starts.score_set(cand_sent_starts, gold_sent_starts)
         self.labelled.score_set(cand_deps, gold_deps)
         for dep in self.labelled_per_dep:
@@ -412,7 +425,9 @@ class Scorer(object):
         )
         if (
             len(gold.cats) > 0
-            and set(self.textcat_f_per_cat) == set(self.textcat_auc_per_cat) == set(gold.cats)
+            and set(self.textcat_f_per_cat)
+            == set(self.textcat_auc_per_cat)
+            == set(gold.cats)
             and set(gold.cats) == set(doc.cats)
         ):
             goldcat = max(gold.cats, key=gold.cats.get)
@@ -424,10 +439,10 @@ class Scorer(object):
                 )
             for label in set(gold.cats):
                 self.textcat_auc_per_cat[label].score_set(
-                        doc.cats[label], gold.cats[label]
+                    doc.cats[label], gold.cats[label]
                 )
                 self.textcat_f_per_cat[label].score_set(
-                        set([label]) & set([candcat]), set([label]) & set([goldcat])
+                    set([label]) & set([candcat]), set([label]) & set([goldcat])
                 )
         elif len(self.textcat_f_per_cat) > 0:
             model_labels = set(self.textcat_f_per_cat)
diff --git a/spacy/tests/doc/test_add_entities.py b/spacy/tests/doc/test_add_entities.py
index 879334056..b9c230516 100644
--- a/spacy/tests/doc/test_add_entities.py
+++ b/spacy/tests/doc/test_add_entities.py
@@ -9,7 +9,12 @@ from spacy.pipeline.defaults import default_ner
 def test_doc_add_entities_set_ents_iob(en_vocab):
     text = ["This", "is", "a", "lion"]
     doc = get_doc(en_vocab, text)
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner = EntityRecognizer(en_vocab, default_ner(), **config)
     ner.begin_training([])
     ner(doc)
@@ -26,7 +31,12 @@ def test_doc_add_entities_set_ents_iob(en_vocab):
 def test_ents_reset(en_vocab):
     text = ["This", "is", "a", "lion"]
     doc = get_doc(en_vocab, text)
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner = EntityRecognizer(en_vocab, default_ner(), **config)
     ner.begin_training([])
     ner(doc)
diff --git a/spacy/tests/parser/test_add_label.py b/spacy/tests/parser/test_add_label.py
index f9663ba32..893465b45 100644
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@@ -1,9 +1,8 @@
 import pytest
-from thinc.api import Adam, NumpyOps
+from thinc.api import Adam
 from spacy.attrs import NORM
 from spacy.gold import GoldParse
 from spacy.vocab import Vocab
-
 from spacy.pipeline.defaults import default_parser, default_ner
 from spacy.tokens import Doc
 from spacy.pipeline import DependencyParser, EntityRecognizer
@@ -17,7 +16,12 @@ def vocab():
 
 @pytest.fixture
 def parser(vocab):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     parser = DependencyParser(vocab, default_parser(), **config)
     return parser
 
@@ -58,7 +62,12 @@ def test_add_label(parser):
 
 
 def test_add_label_deserializes_correctly():
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner1 = EntityRecognizer(Vocab(), default_ner(), **config)
     ner1.add_label("C")
     ner1.add_label("B")
diff --git a/spacy/tests/parser/test_arc_eager_oracle.py b/spacy/tests/parser/test_arc_eager_oracle.py
index 5d265261f..42b62251e 100644
--- a/spacy/tests/parser/test_arc_eager_oracle.py
+++ b/spacy/tests/parser/test_arc_eager_oracle.py
@@ -138,7 +138,12 @@ def test_get_oracle_actions():
         deps.append(dep)
         ents.append(ent)
     doc = Doc(Vocab(), words=[t[1] for t in annot_tuples])
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     parser = DependencyParser(doc.vocab, default_parser(), **config)
     parser.moves.add_action(0, "")
     parser.moves.add_action(1, "")
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index b0a8109dc..e82de03bf 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -138,7 +138,12 @@ def test_accept_blocked_token():
     # 1. test normal behaviour
     nlp1 = English()
     doc1 = nlp1("I live in New York")
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner1 = EntityRecognizer(doc1.vocab, default_ner(), **config)
     assert [token.ent_iob_ for token in doc1] == ["", "", "", "", ""]
     assert [token.ent_type_ for token in doc1] == ["", "", "", "", ""]
@@ -157,7 +162,12 @@ def test_accept_blocked_token():
     # 2. test blocking behaviour
     nlp2 = English()
     doc2 = nlp2("I live in New York")
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner2 = EntityRecognizer(doc2.vocab, default_ner(), **config)
 
     # set "New York" to a blocked entity
@@ -215,7 +225,12 @@ def test_overwrite_token():
     assert [token.ent_type_ for token in doc] == ["", "", "", "", ""]
 
     # Check that a new ner can overwrite O
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner2 = EntityRecognizer(doc.vocab, default_ner(), **config)
     ner2.moves.add_action(5, "")
     ner2.add_label("GPE")
diff --git a/spacy/tests/parser/test_neural_parser.py b/spacy/tests/parser/test_neural_parser.py
index 7f3e981ea..d88517fb5 100644
--- a/spacy/tests/parser/test_neural_parser.py
+++ b/spacy/tests/parser/test_neural_parser.py
@@ -28,7 +28,12 @@ def tok2vec():
 
 @pytest.fixture
 def parser(vocab, arc_eager):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     return Parser(vocab, model=default_parser(), moves=arc_eager, **config)
 
 
diff --git a/spacy/tests/parser/test_nn_beam.py b/spacy/tests/parser/test_nn_beam.py
index fa5d59f9e..841eb058c 100644
--- a/spacy/tests/parser/test_nn_beam.py
+++ b/spacy/tests/parser/test_nn_beam.py
@@ -94,7 +94,12 @@ def test_beam_advance_too_few_scores(beam, scores):
 
 def test_beam_parse():
     nlp = Language()
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     nlp.add_pipe(DependencyParser(nlp.vocab, default_parser(), **config), name="parser")
     nlp.parser.add_label("nsubj")
     nlp.parser.begin_training([], token_vector_width=8, hidden_width=8)
diff --git a/spacy/tests/parser/test_preset_sbd.py b/spacy/tests/parser/test_preset_sbd.py
index ccf7d3ba3..37a9136aa 100644
--- a/spacy/tests/parser/test_preset_sbd.py
+++ b/spacy/tests/parser/test_preset_sbd.py
@@ -16,7 +16,12 @@ def vocab():
 
 @pytest.fixture
 def parser(vocab):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     parser = DependencyParser(vocab, default_parser(), **config)
     parser.cfg["token_vector_width"] = 4
     parser.cfg["hidden_width"] = 32
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 32b434e04..62c7fbf17 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -264,11 +264,13 @@ GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
 def test_overfitting_IO():
     # Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
     nlp = English()
-    nlp.add_pipe(nlp.create_pipe('sentencizer'))
+    nlp.add_pipe(nlp.create_pipe("sentencizer"))
 
     # Add a custom component to recognize "Russ Cochran" as an entity for the example training data
     ruler = EntityRuler(nlp)
-    patterns = [{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}]
+    patterns = [
+        {"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
+    ]
     ruler.add_patterns(patterns)
     nlp.add_pipe(ruler)
 
@@ -285,7 +287,11 @@ def test_overfitting_IO():
     mykb = KnowledgeBase(nlp.vocab, entity_vector_length=3)
     mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
     mykb.add_entity(entity="Q7381115", freq=12, entity_vector=[9, 1, -7])
-    mykb.add_alias(alias="Russ Cochran", entities=["Q2146908", "Q7381115"], probabilities=[0.5, 0.5])
+    mykb.add_alias(
+        alias="Russ Cochran",
+        entities=["Q2146908", "Q7381115"],
+        probabilities=[0.5, 0.5],
+    )
 
     # Create the Entity Linker component and add it to the pipeline
     entity_linker = nlp.create_pipe("entity_linker", config={"kb": mykb})
diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py
index f9307afc2..f052c4380 100644
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@@ -15,8 +15,17 @@ def test_label_types():
 
 
 TRAIN_DATA = [
-    ("I like green eggs", {"morphs": ["Feat=N", "Feat=V", "Feat=J", "Feat=N"], "pos": ["NOUN", "VERB", "ADJ", "NOUN"]}),
-    ("Eat blue ham", {"morphs": ["Feat=V", "Feat=J", "Feat=N"], "pos": ["VERB", "ADJ", "NOUN"]}),
+    (
+        "I like green eggs",
+        {
+            "morphs": ["Feat=N", "Feat=V", "Feat=J", "Feat=N"],
+            "pos": ["NOUN", "VERB", "ADJ", "NOUN"],
+        },
+    ),
+    (
+        "Eat blue ham",
+        {"morphs": ["Feat=V", "Feat=J", "Feat=N"], "pos": ["VERB", "ADJ", "NOUN"]},
+    ),
 ]
 
 
@@ -38,7 +47,12 @@ def test_overfitting_IO():
     # test the trained model
     test_text = "I like blue eggs"
     doc = nlp(test_text)
-    gold_morphs = ["Feat=N|POS=NOUN", "Feat=V|POS=VERB", "Feat=J|POS=ADJ", "Feat=N|POS=NOUN"]
+    gold_morphs = [
+        "Feat=N|POS=NOUN",
+        "Feat=V|POS=VERB",
+        "Feat=J|POS=ADJ",
+        "Feat=N|POS=NOUN",
+    ]
     assert gold_morphs == [t.morph_ for t in doc]
 
     # Also test the results are still the same after IO
diff --git a/spacy/tests/pipeline/test_simple_ner.py b/spacy/tests/pipeline/test_simple_ner.py
index 9d4acf2fd..024d7bd26 100644
--- a/spacy/tests/pipeline/test_simple_ner.py
+++ b/spacy/tests/pipeline/test_simple_ner.py
@@ -1,30 +1,31 @@
 import pytest
 from collections import namedtuple
-
 from thinc.api import NumpyOps
 from spacy.ml._biluo import BILUO, _get_transition_table
-from spacy.pipeline.simple_ner import SimpleNER
-import spacy
 
 
-@pytest.fixture(params=[
-    ["PER", "ORG", "LOC", "MISC"],
-    ["GPE", "PERSON", "NUMBER", "CURRENCY", "EVENT"]
-])
+@pytest.fixture(
+    params=[
+        ["PER", "ORG", "LOC", "MISC"],
+        ["GPE", "PERSON", "NUMBER", "CURRENCY", "EVENT"],
+    ]
+)
 def labels(request):
     return request.param
 
+
 @pytest.fixture
 def ops():
     return NumpyOps()
 
+
 def _get_actions(labels):
     action_names = (
-        [f"B{label}" for label in labels] + \
-        [f"I{label}" for label in labels] + \
-        [f"L{label}" for label in labels] + \
-        [f"U{label}" for label in labels] + \
-        ["O"]
+        [f"B{label}" for label in labels]
+        + [f"I{label}" for label in labels]
+        + [f"L{label}" for label in labels]
+        + [f"U{label}" for label in labels]
+        + ["O"]
     )
     A = namedtuple("actions", action_names)
     return A(**{name: i for i, name in enumerate(action_names)})
@@ -228,7 +229,7 @@ def test_transition_table(ops):
     assert table[0, a.O, a.Uloc] == 1
     assert table[0, a.O, a.Uorg] == 1
     assert table[0, a.O, a.O] == 1
-    
+
     # Last token, prev action was B
     assert table[1, a.Bper, a.Bper] == 0
     assert table[1, a.Bper, a.Bloc] == 0
diff --git a/spacy/tests/regression/test_issue1501-2000.py b/spacy/tests/regression/test_issue1501-2000.py
index 177b6bb3d..6a2d16733 100644
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@@ -270,7 +270,12 @@ def test_issue1963(en_tokenizer):
 
 @pytest.mark.parametrize("label", ["U-JOB-NAME"])
 def test_issue1967(label):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner = EntityRecognizer(Vocab(), default_ner(), **config)
     example = Example(doc=None)
     example.set_token_annotation(
diff --git a/spacy/tests/regression/test_issue3001-3500.py b/spacy/tests/regression/test_issue3001-3500.py
index 6df437b3c..a37707379 100644
--- a/spacy/tests/regression/test_issue3001-3500.py
+++ b/spacy/tests/regression/test_issue3001-3500.py
@@ -196,7 +196,12 @@ def test_issue3345():
     doc = Doc(nlp.vocab, words=["I", "live", "in", "New", "York"])
     doc[4].is_sent_start = True
     ruler = EntityRuler(nlp, patterns=[{"label": "GPE", "pattern": "New York"}])
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner = EntityRecognizer(doc.vocab, default_ner(), **config)
     # Add the OUT action. I wouldn't have thought this would be necessary...
     ner.moves.add_action(5, "")
diff --git a/spacy/tests/regression/test_issue3830.py b/spacy/tests/regression/test_issue3830.py
index 15632bdf8..06b7893a7 100644
--- a/spacy/tests/regression/test_issue3830.py
+++ b/spacy/tests/regression/test_issue3830.py
@@ -6,7 +6,12 @@ from spacy.pipeline.defaults import default_parser
 
 def test_issue3830_no_subtok():
     """Test that the parser doesn't have subtok label if not learn_tokens"""
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     parser = DependencyParser(Vocab(), default_parser(), **config)
     parser.add_label("nsubj")
     assert "subtok" not in parser.labels
@@ -16,7 +21,12 @@ def test_issue3830_no_subtok():
 
 def test_issue3830_with_subtok():
     """Test that the parser does have subtok label if learn_tokens=True."""
-    config = {"learn_tokens": True, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": True,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     parser = DependencyParser(Vocab(), default_parser(), **config)
     parser.add_label("nsubj")
     assert "subtok" not in parser.labels
diff --git a/spacy/tests/regression/test_issue4042.py b/spacy/tests/regression/test_issue4042.py
index 4978aba44..f47290b92 100644
--- a/spacy/tests/regression/test_issue4042.py
+++ b/spacy/tests/regression/test_issue4042.py
@@ -74,7 +74,12 @@ def test_issue4042_bug2():
             output_dir.mkdir()
         ner1.to_disk(output_dir)
 
-        config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+        config = {
+            "learn_tokens": False,
+            "min_action_freq": 30,
+            "beam_width": 1,
+            "beam_update_prob": 1.0,
+        }
         ner2 = EntityRecognizer(vocab, default_ner(), **config)
         ner2.from_disk(output_dir)
         assert len(ner2.labels) == 2
diff --git a/spacy/tests/regression/test_issue4313.py b/spacy/tests/regression/test_issue4313.py
index 946316d85..5e2764618 100644
--- a/spacy/tests/regression/test_issue4313.py
+++ b/spacy/tests/regression/test_issue4313.py
@@ -12,7 +12,12 @@ def test_issue4313():
     beam_width = 16
     beam_density = 0.0001
     nlp = English()
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner = EntityRecognizer(nlp.vocab, default_ner(), **config)
     ner.add_label("SOME_LABEL")
     ner.begin_training([])
diff --git a/spacy/tests/regression/test_issue4924.py b/spacy/tests/regression/test_issue4924.py
index b240f6d4a..10c7868a0 100644
--- a/spacy/tests/regression/test_issue4924.py
+++ b/spacy/tests/regression/test_issue4924.py
@@ -1,4 +1,3 @@
-import pytest
 from spacy.language import Language
 
 
diff --git a/spacy/tests/serialize/test_serialize_config.py b/spacy/tests/serialize/test_serialize_config.py
index 870a980f2..cfb9d7381 100644
--- a/spacy/tests/serialize/test_serialize_config.py
+++ b/spacy/tests/serialize/test_serialize_config.py
@@ -112,7 +112,7 @@ def test_serialize_custom_nlp():
         nlp.to_disk(d)
         nlp2 = spacy.load(d)
         model = nlp2.get_pipe("parser").model
-        tok2vec = model.get_ref("tok2vec")
+        tok2vec = model.get_ref("tok2vec")  # noqa: F841
         upper = model.get_ref("upper")
 
         # check that we have the correct settings, not the default ones
@@ -132,7 +132,7 @@ def test_serialize_parser():
         nlp.to_disk(d)
         nlp2 = spacy.load(d)
         model = nlp2.get_pipe("parser").model
-        tok2vec = model.get_ref("tok2vec")
+        tok2vec = model.get_ref("tok2vec")  # noqa: F841
         upper = model.get_ref("upper")
 
         # check that we have the correct settings, not the default ones
diff --git a/spacy/tests/serialize/test_serialize_pipeline.py b/spacy/tests/serialize/test_serialize_pipeline.py
index 9c4e1f61e..abb5ccb27 100644
--- a/spacy/tests/serialize/test_serialize_pipeline.py
+++ b/spacy/tests/serialize/test_serialize_pipeline.py
@@ -12,7 +12,12 @@ test_parsers = [DependencyParser, EntityRecognizer]
 
 @pytest.fixture
 def parser(en_vocab):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     parser = DependencyParser(en_vocab, default_parser(), **config)
     parser.add_label("nsubj")
     return parser
diff --git a/spacy/tests/serialize/test_serialize_vocab_strings.py b/spacy/tests/serialize/test_serialize_vocab_strings.py
index d3e82296e..e570b1025 100644
--- a/spacy/tests/serialize/test_serialize_vocab_strings.py
+++ b/spacy/tests/serialize/test_serialize_vocab_strings.py
@@ -35,8 +35,10 @@ def test_serialize_vocab_roundtrip_bytes(strings1, strings2):
     assert vocab1.to_bytes() == vocab1_b
     new_vocab1 = Vocab().from_bytes(vocab1_b)
     assert new_vocab1.to_bytes() == vocab1_b
-    assert len(new_vocab1.strings) == len(strings1) + 2 # adds _SP and POS=SPACE
-    assert sorted([s for s in new_vocab1.strings]) == sorted(strings1 + list(default_strings))
+    assert len(new_vocab1.strings) == len(strings1) + 2  # adds _SP and POS=SPACE
+    assert sorted([s for s in new_vocab1.strings]) == sorted(
+        strings1 + list(default_strings)
+    )
 
 
 @pytest.mark.parametrize("strings1,strings2", test_strings)
diff --git a/spacy/tests/test_scorer.py b/spacy/tests/test_scorer.py
index d750a8202..2e1cf2730 100644
--- a/spacy/tests/test_scorer.py
+++ b/spacy/tests/test_scorer.py
@@ -40,6 +40,7 @@ test_ner_apple = [
     ]
 ]
 
+
 @pytest.fixture
 def tagged_doc():
     text = "Sarah's sister flew to Silicon Valley via London."
@@ -184,7 +185,7 @@ def test_tag_score(tagged_doc):
         tagged_doc,
         tags=[t.tag_ for t in tagged_doc],
         pos=[t.pos_ for t in tagged_doc],
-        morphs=[t.morph_ for t in tagged_doc]
+        morphs=[t.morph_ for t in tagged_doc],
     )
     scorer.score((tagged_doc, gold))
     results = scorer.scores
diff --git a/spacy/tests/test_util.py b/spacy/tests/test_util.py
index 1410755db..a7258449d 100644
--- a/spacy/tests/test_util.py
+++ b/spacy/tests/test_util.py
@@ -13,7 +13,7 @@ from spacy.util import minibatch_by_words
         ([400, 400, 199, 3], [4]),
         ([400, 400, 199, 3, 200], [3, 2]),
         ([400, 400, 199, 3, 1], [5]),
-        ([400, 400, 199, 3, 1, 1500], [5]),    # 1500 will be discarded
+        ([400, 400, 199, 3, 1, 1500], [5]),  # 1500 will be discarded
         ([400, 400, 199, 3, 1, 200], [3, 3]),
         ([400, 400, 199, 3, 1, 999], [3, 3]),
         ([400, 400, 199, 3, 1, 999, 999], [3, 2, 1, 1]),
@@ -28,7 +28,11 @@ def test_util_minibatch(doc_sizes, expected_batches):
     examples = [Example(doc=doc) for doc in docs]
     tol = 0.2
     batch_size = 1000
-    batches = list(minibatch_by_words(examples=examples, size=batch_size, tolerance=tol, discard_oversize=True))
+    batches = list(
+        minibatch_by_words(
+            examples=examples, size=batch_size, tolerance=tol, discard_oversize=True
+        )
+    )
     assert [len(batch) for batch in batches] == expected_batches
 
     max_size = batch_size + batch_size * tol
@@ -53,7 +57,9 @@ def test_util_minibatch_oversize(doc_sizes, expected_batches):
     examples = [Example(doc=doc) for doc in docs]
     tol = 0.2
     batch_size = 1000
-    batches = list(minibatch_by_words(examples=examples, size=batch_size, tolerance=tol, discard_oversize=False))
+    batches = list(
+        minibatch_by_words(
+            examples=examples, size=batch_size, tolerance=tol, discard_oversize=False
+        )
+    )
     assert [len(batch) for batch in batches] == expected_batches
-
-
diff --git a/spacy/util.py b/spacy/util.py
index d2d87bef9..ad3dc3635 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -697,7 +697,9 @@ def decaying(start, stop, decay):
         curr -= decay
 
 
-def minibatch_by_words(examples, size, count_words=len, tolerance=0.2, discard_oversize=False):
+def minibatch_by_words(
+    examples, size, count_words=len, tolerance=0.2, discard_oversize=False
+):
     """Create minibatches of roughly a given number of words. If any examples
     are longer than the specified batch length, they will appear in a batch by
     themselves, or be discarded if discard_oversize=True."""