diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index f4bddac39..40c6d861f 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -14,9 +14,9 @@ from ..gold.converters import iob2docs, conll_ner2docs, json2docs
 # imported from /converters.
 
 CONVERTERS = {
-    #"conllubio": conllu2docs, TODO
-    #"conllu": conllu2docs, TODO
-    #"conll": conllu2docs, TODO
+    # "conllubio": conllu2docs, TODO
+    # "conllu": conllu2docs, TODO
+    # "conll": conllu2docs, TODO
     "ner": conll_ner2docs,
     "iob": iob2docs,
     "json": json2docs,
@@ -134,7 +134,7 @@ def verify_cli_args(
     merge_subtokens,
     converter,
     ner_map,
-    lang
+    lang,
 ):
     if converter == "ner" or converter == "iob":
         input_data = input_path.open("r", encoding="utf-8").read()
@@ -148,7 +148,7 @@ def verify_cli_args(
         else:
             msg.warn(
                 "Can't automatically detect NER format. Conversion may not",
-                "succeed. See https://spacy.io/api/cli#convert"
+                "succeed. See https://spacy.io/api/cli#convert",
             )
     if file_type not in FILE_TYPES_STDOUT and output_dir == "-":
         # TODO: support msgpack via stdout in srsly?
@@ -176,7 +176,7 @@ def verify_cli_args(
     if converter not in CONVERTERS:
         msg.fail(f"Can't find converter for {converter}", exits=1)
     return converter
- 
+
 
 def _get_converter(msg, converter, input_path):
     if input_path.is_dir():
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 06e9be15b..ff8b9dc96 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -158,7 +158,7 @@ def train_cli(
 
     weights_data = None
     if init_tok2vec is not None:
-       with init_tok2vec.open("rb") as file_:
+        with init_tok2vec.open("rb") as file_:
             weights_data = file_.read()
 
     if use_gpu >= 0:
@@ -178,7 +178,6 @@ def train_cli(
     )
 
 
-
 def train(
     config_path,
     data_paths,
@@ -193,7 +192,7 @@ def train(
     config = util.load_config(config_path, create_objects=False)
     util.fix_random_seed(config["training"]["seed"])
     if config["training"].get("use_pytorch_for_gpu_memory"):
-        # It feels kind of weird to not have a default for this. 
+        # It feels kind of weird to not have a default for this.
         use_pytorch_for_gpu_memory()
     nlp_config = config["nlp"]
     config = util.load_config(config_path, create_objects=True)
@@ -238,8 +237,7 @@ def train(
             tok2vec = tok2vec.get(subpath)
         if not tok2vec:
             msg.fail(
-                f"Could not locate the tok2vec model at {tok2vec_path}.",
-                exits=1,
+                f"Could not locate the tok2vec model at {tok2vec_path}.", exits=1,
             )
         tok2vec.from_bytes(weights_data)
 
@@ -351,7 +349,11 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg):
         try:
             weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights)
         except KeyError as e:
-            raise KeyError(Errors.E983.format(dict='score_weights', key=str(e), keys=list(scores.keys())))
+            raise KeyError(
+                Errors.E983.format(
+                    dict="score_weights", key=str(e), keys=list(scores.keys())
+                )
+            )
 
         scores["speed"] = wps
         return weighted_score, scores
@@ -500,15 +502,23 @@ def setup_printer(training, nlp):
             ]
         except KeyError as e:
             raise KeyError(
-                Errors.E983.format(dict='scores (losses)', key=str(e), keys=list(info["losses"].keys())))
+                Errors.E983.format(
+                    dict="scores (losses)", key=str(e), keys=list(info["losses"].keys())
+                )
+            )
 
         try:
             scores = [
-                "{0:.2f}".format(float(info["other_scores"][col]))
-                for col in score_cols
+                "{0:.2f}".format(float(info["other_scores"][col])) for col in score_cols
             ]
         except KeyError as e:
-            raise KeyError(Errors.E983.format(dict='scores (other)', key=str(e), keys=list(info["other_scores"].keys())))
+            raise KeyError(
+                Errors.E983.format(
+                    dict="scores (other)",
+                    key=str(e),
+                    keys=list(info["other_scores"].keys()),
+                )
+            )
         data = (
             [info["step"]] + losses + scores + ["{0:.2f}".format(float(info["score"]))]
         )
@@ -564,7 +574,7 @@ def verify_cli_args(
 def verify_textcat_config(nlp, nlp_config):
     msg.info(f"Initialized textcat component for {len(textcat_labels)} unique labels")
     nlp.get_pipe("textcat").labels = tuple(textcat_labels)
-    # if 'positive_label' is provided: double check whether it's in the data and 
+    # if 'positive_label' is provided: double check whether it's in the data and
     # the task is binary
     if nlp_config["pipeline"]["textcat"].get("positive_label", None):
         textcat_labels = nlp.get_pipe("textcat").cfg.get("labels", [])
diff --git a/spacy/gold/augment.py b/spacy/gold/augment.py
index dda51cda6..45cfc0abe 100644
--- a/spacy/gold/augment.py
+++ b/spacy/gold/augment.py
@@ -5,7 +5,9 @@ import itertools
 def make_orth_variants_example(nlp, example, orth_variant_level=0.0):  # TODO: naming
     raw_text = example.text
     orig_dict = example.to_dict()
-    variant_text, variant_token_annot = make_orth_variants(nlp, raw_text, orig_dict["token_annotation"], orth_variant_level)
+    variant_text, variant_token_annot = make_orth_variants(
+        nlp, raw_text, orig_dict["token_annotation"], orth_variant_level
+    )
     doc = nlp.make_doc(variant_text)
     orig_dict["token_annotation"] = variant_token_annot
     return example.from_dict(doc, orig_dict)
diff --git a/spacy/gold/converters/__init__.py b/spacy/gold/converters/__init__.py
index 0a1242fb4..3e366933a 100644
--- a/spacy/gold/converters/__init__.py
+++ b/spacy/gold/converters/__init__.py
@@ -1,6 +1,6 @@
-from .iob2docs import iob2docs # noqa: F401
+from .iob2docs import iob2docs  # noqa: F401
 from .conll_ner2docs import conll_ner2docs  # noqa: F401
 from .json2docs import json2docs
 
 # TODO: Update this one
-#from .conllu2docs import conllu2docs  # noqa: F401
+# from .conllu2docs import conllu2docs  # noqa: F401
diff --git a/spacy/gold/converters/conll_ner2docs.py b/spacy/gold/converters/conll_ner2docs.py
index 7042bd7d6..4b32893f4 100644
--- a/spacy/gold/converters/conll_ner2docs.py
+++ b/spacy/gold/converters/conll_ner2docs.py
@@ -119,7 +119,7 @@ def conll_ner2docs(
             token.tag_ = pos_tags[i]
             token.is_sent_start = sent_starts[i]
         entities = tags_to_entities(biluo_tags)
-        doc.ents = [Span(doc, start=s, end=e+1, label=L) for L, s, e in entities]
+        doc.ents = [Span(doc, start=s, end=e + 1, label=L) for L, s, e in entities]
         output_docs.append(doc)
     return output_docs
 
diff --git a/spacy/gold/converters/conllu2json.py b/spacy/gold/converters/conllu2json.py
index 25ca1d4eb..8f54965f6 100644
--- a/spacy/gold/converters/conllu2json.py
+++ b/spacy/gold/converters/conllu2json.py
@@ -43,10 +43,7 @@ def conllu2json(
         raw += example.text
         sentences.append(
             generate_sentence(
-                example.to_dict(),
-                has_ner_tags,
-                MISC_NER_PATTERN,
-                ner_map=ner_map,
+                example.to_dict(), has_ner_tags, MISC_NER_PATTERN, ner_map=ner_map,
             )
         )
         # Real-sized documents could be extracted using the comments on the
diff --git a/spacy/gold/converters/json2docs.py b/spacy/gold/converters/json2docs.py
index 98219bb04..8f94e169e 100644
--- a/spacy/gold/converters/json2docs.py
+++ b/spacy/gold/converters/json2docs.py
@@ -8,6 +8,7 @@ from ..example import _fix_legacy_dict_data, _parse_example_dict_data
 from ...util import load_model
 from ...lang.xx import MultiLanguage
 
+
 @contextlib.contextmanager
 def make_tempdir():
     d = Path(tempfile.mkdtemp())
@@ -15,11 +16,7 @@ def make_tempdir():
     shutil.rmtree(str(d))
 
 
-def json2docs(
-    input_data,
-    model=None,
-    **kwargs
-):
+def json2docs(input_data, model=None, **kwargs):
     nlp = load_model(model) if model is not None else MultiLanguage()
     docs = []
     with make_tempdir() as tmp_dir:
@@ -29,10 +26,6 @@ def json2docs(
         for json_annot in read_json_file(json_path):
             example_dict = _fix_legacy_dict_data(json_annot)
             tok_dict, doc_dict = _parse_example_dict_data(example_dict)
-            doc = annotations2doc(
-                nlp.vocab,
-                tok_dict,
-                doc_dict
-            )
+            doc = annotations2doc(nlp.vocab, tok_dict, doc_dict)
             docs.append(doc)
     return docs
diff --git a/spacy/gold/corpus.py b/spacy/gold/corpus.py
index 8e1c1d204..9efa71ff7 100644
--- a/spacy/gold/corpus.py
+++ b/spacy/gold/corpus.py
@@ -12,6 +12,7 @@ class Corpus:
 
     DOCS: https://spacy.io/api/goldcorpus
     """
+
     def __init__(self, train_loc, dev_loc, limit=0):
         """Create a GoldCorpus.
 
@@ -19,7 +20,7 @@ class Corpus:
         dev (str / Path): File or directory of development data.
         RETURNS (GoldCorpus): The newly created object.
         """
-        self.train_loc = train_loc 
+        self.train_loc = train_loc
         self.dev_loc = dev_loc
 
     @staticmethod
@@ -56,7 +57,7 @@ class Corpus:
                 with loc.open("rb") as file_:
                     doc_bin = DocBin().from_bytes(file_.read())
                 yield from doc_bin.get_docs(vocab)
-    
+
     def count_train(self, nlp):
         """Returns count of words in train examples"""
         n = 0
diff --git a/spacy/gold/iob_utils.py b/spacy/gold/iob_utils.py
index c74ef5671..3ae911418 100644
--- a/spacy/gold/iob_utils.py
+++ b/spacy/gold/iob_utils.py
@@ -54,7 +54,7 @@ def biluo_tags_from_doc(doc, missing="O"):
     return biluo_tags_from_offsets(
         doc,
         [(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents],
-        missing=missing
+        missing=missing,
     )
 
 
diff --git a/spacy/language.py b/spacy/language.py
index 01a31400a..1eb53149c 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -542,7 +542,6 @@ class Language(object):
                 raise ValueError(Errors.E979.format(type=type(eg)))
         return converted_examples
 
-
     def update(
         self,
         examples,
@@ -822,7 +821,7 @@ class Language(object):
                 batch_size=batch_size,
                 disable=disable,
                 n_process=n_process,
-                component_cfg=component_cfg
+                component_cfg=component_cfg,
             )
             for doc, context in zip(docs, contexts):
                 yield (doc, context)
diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py
index c4944407f..b8a45db01 100644
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@@ -51,7 +51,13 @@ class Lemmatizer(object):
         index_table = self.lookups.get_table("lemma_index", {})
         exc_table = self.lookups.get_table("lemma_exc", {})
         rules_table = self.lookups.get_table("lemma_rules", {})
-        if not any((index_table.get(univ_pos), exc_table.get(univ_pos), rules_table.get(univ_pos))):
+        if not any(
+            (
+                index_table.get(univ_pos),
+                exc_table.get(univ_pos),
+                rules_table.get(univ_pos),
+            )
+        ):
             if univ_pos == "propn":
                 return [string]
             else:
diff --git a/spacy/ml/_biluo.py b/spacy/ml/_biluo.py
index 28339089a..16dcae792 100644
--- a/spacy/ml/_biluo.py
+++ b/spacy/ml/_biluo.py
@@ -14,11 +14,11 @@ def BILUO() -> Model[Padded, Padded]:
         forward,
         init=init,
         dims={"nO": None},
-        attrs={"get_num_actions": get_num_actions}
+        attrs={"get_num_actions": get_num_actions},
     )
 
 
-def init(model, X: Optional[Padded]=None, Y: Optional[Padded]=None):
+def init(model, X: Optional[Padded] = None, Y: Optional[Padded] = None):
     if X is not None and Y is not None:
         if X.data.shape != Y.data.shape:
             # TODO: Fix error
@@ -49,12 +49,12 @@ def forward(model: Model[Padded, Padded], Xp: Padded, is_train: bool):
     masks = model.ops.alloc3f(*Y.shape)
     max_value = Xp.data.max()
     for t in range(Xp.data.shape[0]):
-        is_last = (Xp.lengths < (t+2)).astype("i")
+        is_last = (Xp.lengths < (t + 2)).astype("i")
         masks[t] = valid_transitions[is_last, prev_actions]
         # Don't train the out-of-bounds sequences.
-        masks[t, Xp.size_at_t[t]:] = 0
+        masks[t, Xp.size_at_t[t] :] = 0
         # Valid actions get 0*10e8, invalid get large negative value
-        Y[t] = Xp.data[t] + ((masks[t]-1) * max_value * 10)
+        Y[t] = Xp.data[t] + ((masks[t] - 1) * max_value * 10)
         prev_actions = Y[t].argmax(axis=-1)
 
     def backprop_biluo(dY: Padded) -> Padded:
diff --git a/spacy/ml/_iob.py b/spacy/ml/_iob.py
index 0ce9a71e6..39feb3285 100644
--- a/spacy/ml/_iob.py
+++ b/spacy/ml/_iob.py
@@ -12,11 +12,11 @@ def IOB() -> Model[Padded, Padded]:
         forward,
         init=init,
         dims={"nO": None},
-        attrs={"get_num_actions": get_num_actions}
+        attrs={"get_num_actions": get_num_actions},
     )
 
 
-def init(model, X: Optional[Padded]=None, Y: Optional[Padded]=None):
+def init(model, X: Optional[Padded] = None, Y: Optional[Padded] = None):
     if X is not None and Y is not None:
         if X.data.shape != Y.data.shape:
             # TODO: Fix error
@@ -48,14 +48,14 @@ def forward(model: Model[Padded, Padded], Xp: Padded, is_train: bool):
     for t in range(Xp.data.shape[0]):
         masks[t] = valid_transitions[prev_actions]
         # Don't train the out-of-bounds sequences.
-        masks[t, Xp.size_at_t[t]:] = 0
+        masks[t, Xp.size_at_t[t] :] = 0
         # Valid actions get 0*10e8, invalid get -1*10e8
-        Y[t] = Xp.data[t] + ((masks[t]-1) * 10e8)
+        Y[t] = Xp.data[t] + ((masks[t] - 1) * 10e8)
         prev_actions = Y[t].argmax(axis=-1)
 
     def backprop_biluo(dY: Padded) -> Padded:
         # Masking the gradient seems to do poorly here. But why?
-        #dY.data *= masks
+        # dY.data *= masks
         return dY
 
     return Padded(Y, Xp.size_at_t, Xp.lengths, Xp.indices), backprop_biluo
@@ -83,10 +83,10 @@ def _get_transition_table(
     B_range = ops.xp.arange(B_start, B_end)
     I_range = ops.xp.arange(I_start, I_end)
     # B and O are always valid
-    table[:, B_start : B_end] = 1
+    table[:, B_start:B_end] = 1
     table[:, O_action] = 1
     # I can only follow a matching B
     table[B_range, I_range] = 1
- 
+
     _cache[n_actions] = table
     return table
diff --git a/spacy/ml/_precomputable_affine.py b/spacy/ml/_precomputable_affine.py
index f4b5b16fe..215cdeda1 100644
--- a/spacy/ml/_precomputable_affine.py
+++ b/spacy/ml/_precomputable_affine.py
@@ -84,7 +84,7 @@ def _backprop_precomputable_affine_padding(model, dY, ids):
     #
     # (ids < 0).T @ dY
     mask = model.ops.asarray(ids < 0, dtype="f")
-    d_pad = model.ops.gemm(mask, dY.reshape(nB, nO*nP), trans1=True)
+    d_pad = model.ops.gemm(mask, dY.reshape(nB, nO * nP), trans1=True)
     return d_pad.reshape((1, nF, nO, nP))
 
 
diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py
index 4a360a9e6..b3a9e0815 100644
--- a/spacy/ml/models/multi_task.py
+++ b/spacy/ml/models/multi_task.py
@@ -7,7 +7,12 @@ def build_multi_task_model(tok2vec, maxout_pieces, token_vector_width, nO=None):
     softmax = Softmax(nO=nO, nI=token_vector_width * 2)
     model = chain(
         tok2vec,
-        Maxout(nO=token_vector_width * 2, nI=token_vector_width, nP=maxout_pieces, dropout=0.0),
+        Maxout(
+            nO=token_vector_width * 2,
+            nI=token_vector_width,
+            nP=maxout_pieces,
+            dropout=0.0,
+        ),
         LayerNorm(token_vector_width * 2),
         softmax,
     )
@@ -20,7 +25,11 @@ def build_cloze_multi_task_model(vocab, tok2vec, maxout_pieces, nO=None):
     # nO = vocab.vectors.data.shape[1]
     output_layer = chain(
         Maxout(
-            nO=nO, nI=tok2vec.get_dim("nO"), nP=maxout_pieces, normalize=True, dropout=0.0
+            nO=nO,
+            nI=tok2vec.get_dim("nO"),
+            nP=maxout_pieces,
+            normalize=True,
+            dropout=0.0,
         ),
         Linear(nO=nO, nI=nO, init_W=zero_init),
     )
@@ -39,7 +48,9 @@ def build_masked_language_model(vocab, wrapped_model, mask_prob=0.15):
     def mlm_forward(model, docs, is_train):
         mask, docs = _apply_mask(docs, random_words, mask_prob=mask_prob)
         mask = model.ops.asarray(mask).reshape((mask.shape[0], 1))
-        output, backprop = model.get_ref("wrapped-model").begin_update(docs)  # drop=drop
+        output, backprop = model.get_ref("wrapped-model").begin_update(
+            docs
+        )  # drop=drop
 
         def mlm_backward(d_output):
             d_output *= 1 - mask
diff --git a/spacy/ml/models/parser.py b/spacy/ml/models/parser.py
index bdcd709b1..47c94cfa1 100644
--- a/spacy/ml/models/parser.py
+++ b/spacy/ml/models/parser.py
@@ -16,18 +16,14 @@ def build_tb_parser_model(
     nO=None,
 ):
     t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
-    tok2vec = chain(
-        tok2vec,
-        with_array(Linear(hidden_width, t2v_width)),
-        list2array(),
-    )
+    tok2vec = chain(tok2vec, with_array(Linear(hidden_width, t2v_width)), list2array(),)
     tok2vec.set_dim("nO", hidden_width)
 
     lower = PrecomputableAffine(
         nO=hidden_width if use_upper else nO,
         nF=nr_feature_tokens,
         nI=tok2vec.get_dim("nO"),
-        nP=maxout_pieces
+        nP=maxout_pieces,
     )
     if use_upper:
         with use_ops("numpy"):
diff --git a/spacy/ml/models/simple_ner.py b/spacy/ml/models/simple_ner.py
index 01661f55b..d857813ac 100644
--- a/spacy/ml/models/simple_ner.py
+++ b/spacy/ml/models/simple_ner.py
@@ -1,6 +1,14 @@
 import functools
 from typing import List, Tuple, Dict, Optional
-from thinc.api import Ops, Model, Linear, Softmax, with_array, softmax_activation, padded2list
+from thinc.api import (
+    Ops,
+    Model,
+    Linear,
+    Softmax,
+    with_array,
+    softmax_activation,
+    padded2list,
+)
 from thinc.api import chain, list2padded, configure_normal_init
 from thinc.api import Dropout
 from thinc.types import Padded, Ints1d, Ints3d, Floats2d, Floats3d
@@ -12,12 +20,12 @@ from ...util import registry
 
 
 @registry.architectures.register("spacy.BiluoTagger.v1")
-def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], List[Floats2d]]:
+def BiluoTagger(
+    tok2vec: Model[List[Doc], List[Floats2d]]
+) -> Model[List[Doc], List[Floats2d]]:
     biluo = BILUO()
     linear = Linear(
-        nO=None,
-        nI=tok2vec.get_dim("nO"),
-        init_W=configure_normal_init(mean=0.02)
+        nO=None, nI=tok2vec.get_dim("nO"), init_W=configure_normal_init(mean=0.02)
     )
     model = chain(
         tok2vec,
@@ -25,7 +33,7 @@ def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], L
         with_array(chain(Dropout(0.1), linear)),
         biluo,
         with_array(softmax_activation()),
-        padded2list()
+        padded2list(),
     )
 
     return Model(
@@ -35,11 +43,14 @@ def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], L
         layers=[model, linear],
         refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
         dims={"nO": None},
-        attrs={"get_num_actions": biluo.attrs["get_num_actions"]}
+        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
     )
 
+
 @registry.architectures.register("spacy.IOBTagger.v1")
-def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], List[Floats2d]]:
+def IOBTagger(
+    tok2vec: Model[List[Doc], List[Floats2d]]
+) -> Model[List[Doc], List[Floats2d]]:
     biluo = IOB()
     linear = Linear(nO=None, nI=tok2vec.get_dim("nO"))
     model = chain(
@@ -48,7 +59,7 @@ def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], Lis
         with_array(linear),
         biluo,
         with_array(softmax_activation()),
-        padded2list()
+        padded2list(),
     )
 
     return Model(
@@ -58,11 +69,10 @@ def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], Lis
         layers=[model],
         refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
         dims={"nO": None},
-        attrs={"get_num_actions": biluo.attrs["get_num_actions"]}
+        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
     )
 
 
-
 def init(model: Model[List[Doc], List[Floats2d]], X=None, Y=None) -> None:
     if model.get_dim("nO") is None and Y:
         model.set_dim("nO", Y[0].shape[1])
diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index a02e1a5a1..12a60345c 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -1,7 +1,30 @@
-from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic, ParametricAttention
+from thinc.api import (
+    Model,
+    reduce_mean,
+    Linear,
+    list2ragged,
+    Logistic,
+    ParametricAttention,
+)
 from thinc.api import chain, concatenate, clone, Dropout
-from thinc.api import SparseLinear, Softmax, softmax_activation, Maxout, reduce_sum, Relu, residual, expand_window
-from thinc.api import HashEmbed, with_ragged, with_array, with_cpu, uniqued, FeatureExtractor
+from thinc.api import (
+    SparseLinear,
+    Softmax,
+    softmax_activation,
+    Maxout,
+    reduce_sum,
+    Relu,
+    residual,
+    expand_window,
+)
+from thinc.api import (
+    HashEmbed,
+    with_ragged,
+    with_array,
+    with_cpu,
+    uniqued,
+    FeatureExtractor,
+)
 
 from ..spacy_vectors import SpacyVectors
 from ... import util
@@ -50,14 +73,31 @@ def build_bow_text_classifier(exclusive_classes, ngram_size, no_output_layer, nO
 
 
 @registry.architectures.register("spacy.TextCat.v1")
-def build_text_classifier(width, embed_size, pretrained_vectors, exclusive_classes, ngram_size,
-                          window_size, conv_depth, dropout, nO=None):
+def build_text_classifier(
+    width,
+    embed_size,
+    pretrained_vectors,
+    exclusive_classes,
+    ngram_size,
+    window_size,
+    conv_depth,
+    dropout,
+    nO=None,
+):
     cols = [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]
     with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
-        lower = HashEmbed(nO=width, nV=embed_size, column=cols.index(LOWER), dropout=dropout)
-        prefix = HashEmbed(nO=width // 2, nV=embed_size, column=cols.index(PREFIX), dropout=dropout)
-        suffix = HashEmbed(nO=width // 2, nV=embed_size, column=cols.index(SUFFIX), dropout=dropout)
-        shape = HashEmbed(nO=width // 2, nV=embed_size, column=cols.index(SHAPE), dropout=dropout)
+        lower = HashEmbed(
+            nO=width, nV=embed_size, column=cols.index(LOWER), dropout=dropout
+        )
+        prefix = HashEmbed(
+            nO=width // 2, nV=embed_size, column=cols.index(PREFIX), dropout=dropout
+        )
+        suffix = HashEmbed(
+            nO=width // 2, nV=embed_size, column=cols.index(SUFFIX), dropout=dropout
+        )
+        shape = HashEmbed(
+            nO=width // 2, nV=embed_size, column=cols.index(SHAPE), dropout=dropout
+        )
 
         width_nI = sum(layer.get_dim("nO") for layer in [lower, prefix, suffix, shape])
         trained_vectors = FeatureExtractor(cols) >> with_array(
@@ -83,30 +123,38 @@ def build_text_classifier(width, embed_size, pretrained_vectors, exclusive_class
             vectors_width = width
         tok2vec = vector_layer >> with_array(
             Maxout(width, vectors_width, normalize=True)
-            >> residual((expand_window(window_size=window_size)
-                         >> Maxout(nO=width, nI=width * ((window_size * 2) + 1), normalize=True))) ** conv_depth,
+            >> residual(
+                (
+                    expand_window(window_size=window_size)
+                    >> Maxout(
+                        nO=width, nI=width * ((window_size * 2) + 1), normalize=True
+                    )
+                )
+            )
+            ** conv_depth,
             pad=conv_depth,
         )
         cnn_model = (
-                tok2vec
-                >> list2ragged()
-                >> ParametricAttention(width)
-                >> reduce_sum()
-                >> residual(Maxout(nO=width, nI=width))
-                >> Linear(nO=nO, nI=width)
-                >> Dropout(0.0)
+            tok2vec
+            >> list2ragged()
+            >> ParametricAttention(width)
+            >> reduce_sum()
+            >> residual(Maxout(nO=width, nI=width))
+            >> Linear(nO=nO, nI=width)
+            >> Dropout(0.0)
         )
 
         linear_model = build_bow_text_classifier(
-            nO=nO, ngram_size=ngram_size, exclusive_classes=exclusive_classes, no_output_layer=False
+            nO=nO,
+            ngram_size=ngram_size,
+            exclusive_classes=exclusive_classes,
+            no_output_layer=False,
         )
-        nO_double = nO*2 if nO else None
+        nO_double = nO * 2 if nO else None
         if exclusive_classes:
             output_layer = Softmax(nO=nO, nI=nO_double)
         else:
-            output_layer = (
-                    Linear(nO=nO, nI=nO_double) >> Dropout(0.0) >> Logistic()
-            )
+            output_layer = Linear(nO=nO, nI=nO_double) >> Dropout(0.0) >> Logistic()
         model = (linear_model | cnn_model) >> output_layer
         model.set_ref("tok2vec", tok2vec)
     if model.has_dim("nO") is not False:
diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py
index 53798e57c..b1bed1ea1 100644
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@@ -99,7 +99,13 @@ def hash_charembed_cnn(
 
 @registry.architectures.register("spacy.HashEmbedBiLSTM.v1")
 def hash_embed_bilstm_v1(
-    pretrained_vectors, width, depth, embed_size, subword_features, maxout_pieces, dropout
+    pretrained_vectors,
+    width,
+    depth,
+    embed_size,
+    subword_features,
+    maxout_pieces,
+    dropout,
 ):
     # Does not use character embeddings: set to False by default
     return build_Tok2Vec_model(
@@ -141,21 +147,24 @@ def hash_char_embed_bilstm_v1(
 
 @registry.architectures.register("spacy.LayerNormalizedMaxout.v1")
 def LayerNormalizedMaxout(width, maxout_pieces):
-    return Maxout(
-        nO=width,
-        nP=maxout_pieces,
-        dropout=0.0,
-        normalize=True,
-    )
+    return Maxout(nO=width, nP=maxout_pieces, dropout=0.0, normalize=True,)
 
 
 @registry.architectures.register("spacy.MultiHashEmbed.v1")
-def MultiHashEmbed(columns, width, rows, use_subwords, pretrained_vectors, mix, dropout):
+def MultiHashEmbed(
+    columns, width, rows, use_subwords, pretrained_vectors, mix, dropout
+):
     norm = HashEmbed(nO=width, nV=rows, column=columns.index("NORM"), dropout=dropout)
     if use_subwords:
-        prefix = HashEmbed(nO=width, nV=rows // 2, column=columns.index("PREFIX"), dropout=dropout)
-        suffix = HashEmbed(nO=width, nV=rows // 2, column=columns.index("SUFFIX"), dropout=dropout)
-        shape = HashEmbed(nO=width, nV=rows // 2, column=columns.index("SHAPE"), dropout=dropout)
+        prefix = HashEmbed(
+            nO=width, nV=rows // 2, column=columns.index("PREFIX"), dropout=dropout
+        )
+        suffix = HashEmbed(
+            nO=width, nV=rows // 2, column=columns.index("SUFFIX"), dropout=dropout
+        )
+        shape = HashEmbed(
+            nO=width, nV=rows // 2, column=columns.index("SHAPE"), dropout=dropout
+        )
 
     if pretrained_vectors:
         glove = StaticVectors(
@@ -195,7 +204,13 @@ def CharacterEmbed(columns, width, rows, nM, nC, features, dropout):
 def MaxoutWindowEncoder(width, window_size, maxout_pieces, depth):
     cnn = chain(
         expand_window(window_size=window_size),
-        Maxout(nO=width, nI=width * ((window_size * 2) + 1), nP=maxout_pieces, dropout=0.0, normalize=True),
+        Maxout(
+            nO=width,
+            nI=width * ((window_size * 2) + 1),
+            nP=maxout_pieces,
+            dropout=0.0,
+            normalize=True,
+        ),
     )
     model = clone(residual(cnn), depth)
     model.set_dim("nO", width)
@@ -247,11 +262,19 @@ def build_Tok2Vec_model(
         subword_features = False
     cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
     with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
-        norm = HashEmbed(nO=width, nV=embed_size, column=cols.index(NORM), dropout=dropout)
+        norm = HashEmbed(
+            nO=width, nV=embed_size, column=cols.index(NORM), dropout=dropout
+        )
         if subword_features:
-            prefix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=dropout)
-            suffix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=dropout)
-            shape = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=dropout)
+            prefix = HashEmbed(
+                nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=dropout
+            )
+            suffix = HashEmbed(
+                nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=dropout
+            )
+            shape = HashEmbed(
+                nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=dropout
+            )
         else:
             prefix, suffix, shape = (None, None, None)
         if pretrained_vectors is not None:
diff --git a/spacy/ml/tb_framework.py b/spacy/ml/tb_framework.py
index 251189389..f7dad565e 100644
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@@ -20,8 +20,8 @@ def TransitionModel(tok2vec, lower, upper, unseen_classes=set()):
         attrs={
             "has_upper": has_upper,
             "unseen_classes": set(unseen_classes),
-            "resize_output": resize_output
-        }
+            "resize_output": resize_output,
+        },
     )
 
 
@@ -31,7 +31,7 @@ def forward(model, X, is_train):
         model.layers,
         unseen_classes=model.attrs["unseen_classes"],
         train=is_train,
-        has_upper=model.attrs["has_upper"]
+        has_upper=model.attrs["has_upper"],
     )
 
     return step_model, step_model.finish_steps
@@ -62,7 +62,7 @@ def resize_output(model, new_nO):
     nI = None
     if smaller.has_dim("nI"):
         nI = smaller.get_dim("nI")
-    with use_ops('numpy'):
+    with use_ops("numpy"):
         larger = Linear(nO=new_nO, nI=nI)
         larger.init = smaller.init
     # it could be that the model is not initialized yet, then skip this bit
@@ -74,8 +74,8 @@ def resize_output(model, new_nO):
         # Weights are stored in (nr_out, nr_in) format, so we're basically
         # just adding rows here.
         if smaller.has_dim("nO"):
-            larger_W[:smaller.get_dim("nO")] = smaller_W
-            larger_b[:smaller.get_dim("nO")] = smaller_b
+            larger_W[: smaller.get_dim("nO")] = smaller_W
+            larger_b[: smaller.get_dim("nO")] = smaller_b
             for i in range(smaller.get_dim("nO"), new_nO):
                 model.attrs["unseen_classes"].add(i)
 
diff --git a/spacy/pipeline/simple_ner.py b/spacy/pipeline/simple_ner.py
index 3ef6a48ce..08453becc 100644
--- a/spacy/pipeline/simple_ner.py
+++ b/spacy/pipeline/simple_ner.py
@@ -21,9 +21,7 @@ class SimpleNER(Pipe):
         self.model = model
         self.cfg = {"labels": []}
         self.loss_func = SequenceCategoricalCrossentropy(
-            names=self.get_tag_names(),
-            normalize=True,
-            missing_value=None
+            names=self.get_tag_names(), normalize=True, missing_value=None
         )
         assert self.model is not None
 
@@ -38,21 +36,21 @@ class SimpleNER(Pipe):
     def add_label(self, label):
         if label not in self.cfg["labels"]:
             self.cfg["labels"].append(label)
- 
+
     def get_tag_names(self):
         if self.is_biluo:
             return (
-                [f"B-{label}" for label in self.labels] +
-                [f"I-{label}" for label in self.labels] +
-                [f"L-{label}" for label in self.labels] +
-                [f"U-{label}" for label in self.labels] +
-                ["O"]
+                [f"B-{label}" for label in self.labels]
+                + [f"I-{label}" for label in self.labels]
+                + [f"L-{label}" for label in self.labels]
+                + [f"U-{label}" for label in self.labels]
+                + ["O"]
             )
         else:
             return (
-                [f"B-{label}" for label in self.labels] +
-                [f"I-{label}" for label in self.labels] +
-                ["O"]
+                [f"B-{label}" for label in self.labels]
+                + [f"I-{label}" for label in self.labels]
+                + ["O"]
             )
 
     def predict(self, docs: List[Doc]) -> List[Floats2d]:
@@ -107,7 +105,7 @@ class SimpleNER(Pipe):
 
     def begin_training(self, get_examples, pipeline=None, sgd=None, **kwargs):
         self.cfg.update(kwargs)
-        if not hasattr(get_examples, '__call__'):
+        if not hasattr(get_examples, "__call__"):
             gold_tuples = get_examples
             get_examples = lambda: gold_tuples
         labels = _get_labels(get_examples())
@@ -116,14 +114,12 @@ class SimpleNER(Pipe):
         labels = self.labels
         n_actions = self.model.attrs["get_num_actions"](len(labels))
         self.model.set_dim("nO", n_actions)
-        self.model.initialize() 
+        self.model.initialize()
         if pipeline is not None:
             self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
         link_vectors_to_models(self.vocab)
         self.loss_func = SequenceCategoricalCrossentropy(
-            names=self.get_tag_names(),
-            normalize=True,
-            missing_value=None
+            names=self.get_tag_names(), normalize=True, missing_value=None
         )
 
         return sgd
@@ -144,6 +140,6 @@ def _get_labels(examples):
     labels = set()
     for eg in examples:
         for ner_tag in eg.get_aligned("ENT_TYPE", as_string=True):
-            if ner_tag != 'O' and ner_tag != '-':
+            if ner_tag != "O" and ner_tag != "-":
                 labels.add(ner_tag)
     return list(sorted(labels))
diff --git a/spacy/scorer.py b/spacy/scorer.py
index 71cbc019a..31ff9ca33 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -97,7 +97,9 @@ class Scorer(object):
             for name, component in pipeline:
                 if name == "textcat":
                     self.textcat_multilabel = component.model.attrs["multi_label"]
-                    self.textcat_positive_label = component.cfg.get("positive_label", None)
+                    self.textcat_positive_label = component.cfg.get(
+                        "positive_label", None
+                    )
                     for label in component.cfg.get("labels", []):
                         self.textcat_auc_per_cat[label] = ROCAUCScore()
                         self.textcat_f_per_cat[label] = PRFScore()
@@ -118,19 +120,19 @@ class Scorer(object):
 
     @property
     def morphs_acc(self):
-       """RETURNS (float): Morph tag accuracy (morphological features,
+        """RETURNS (float): Morph tag accuracy (morphological features,
            i.e. `Token.morph`).
        """
-       return self.morphs.fscore * 100
+        return self.morphs.fscore * 100
 
     @property
     def morphs_per_type(self):
-       """RETURNS (dict): Scores per dependency label.
+        """RETURNS (dict): Scores per dependency label.
        """
-       return {
-           k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
-           for k, v in self.morphs_per_feat.items()
-       }
+        return {
+            k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
+            for k, v in self.morphs_per_feat.items()
+        }
 
     @property
     def sent_p(self):
@@ -359,7 +361,9 @@ class Scorer(object):
                         (gold_i, gold_head, token.dep_.lower())
                     )
         # Find all NER labels in gold and doc
-        ent_labels = set([k.label_ for k in gold_doc.ents] + [k.label_ for k in doc.ents])
+        ent_labels = set(
+            [k.label_ for k in gold_doc.ents] + [k.label_ for k in doc.ents]
+        )
         # Set up all labels for per type scoring and prepare gold per type
         gold_per_ents = {ent_label: set() for ent_label in ent_labels}
         for ent_label in ent_labels:
@@ -392,7 +396,10 @@ class Scorer(object):
         self.pos.score_set(cand_pos, gold_pos)
         self.morphs.score_set(cand_morphs, gold_morphs)
         for field in self.morphs_per_feat:
-            self.morphs_per_feat[field].score_set(cand_morphs_per_feat.get(field, set()), gold_morphs_per_feat.get(field, set()))
+            self.morphs_per_feat[field].score_set(
+                cand_morphs_per_feat.get(field, set()),
+                gold_morphs_per_feat.get(field, set()),
+            )
         self.sent_starts.score_set(cand_sent_starts, gold_sent_starts)
         self.labelled.score_set(cand_deps, gold_deps)
         for dep in self.labelled_per_dep:
@@ -404,7 +411,9 @@ class Scorer(object):
         )
         if (
             len(gold_doc.cats) > 0
-            and set(self.textcat_f_per_cat) == set(self.textcat_auc_per_cat) == set(gold_doc.cats)
+            and set(self.textcat_f_per_cat)
+            == set(self.textcat_auc_per_cat)
+            == set(gold_doc.cats)
             and set(gold_doc.cats) == set(doc.cats)
         ):
             goldcat = max(gold_doc.cats, key=gold_doc.cats.get)
@@ -416,10 +425,10 @@ class Scorer(object):
                 )
             for label in set(gold_doc.cats):
                 self.textcat_auc_per_cat[label].score_set(
-                        doc.cats[label], gold_doc.cats[label]
+                    doc.cats[label], gold_doc.cats[label]
                 )
                 self.textcat_f_per_cat[label].score_set(
-                        set([label]) & set([candcat]), set([label]) & set([goldcat])
+                    set([label]) & set([candcat]), set([label]) & set([goldcat])
                 )
         elif len(self.textcat_f_per_cat) > 0:
             model_labels = set(self.textcat_f_per_cat)
diff --git a/spacy/tests/doc/test_add_entities.py b/spacy/tests/doc/test_add_entities.py
index 879334056..b9c230516 100644
--- a/spacy/tests/doc/test_add_entities.py
+++ b/spacy/tests/doc/test_add_entities.py
@@ -9,7 +9,12 @@ from spacy.pipeline.defaults import default_ner
 def test_doc_add_entities_set_ents_iob(en_vocab):
     text = ["This", "is", "a", "lion"]
     doc = get_doc(en_vocab, text)
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner = EntityRecognizer(en_vocab, default_ner(), **config)
     ner.begin_training([])
     ner(doc)
@@ -26,7 +31,12 @@ def test_doc_add_entities_set_ents_iob(en_vocab):
 def test_ents_reset(en_vocab):
     text = ["This", "is", "a", "lion"]
     doc = get_doc(en_vocab, text)
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner = EntityRecognizer(en_vocab, default_ner(), **config)
     ner.begin_training([])
     ner(doc)
diff --git a/spacy/tests/parser/test_add_label.py b/spacy/tests/parser/test_add_label.py
index 4afa11963..87675e94d 100644
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@@ -17,7 +17,12 @@ def vocab():
 
 @pytest.fixture
 def parser(vocab):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     parser = DependencyParser(vocab, default_parser(), **config)
     return parser
 
@@ -35,10 +40,7 @@ def _train_parser(parser):
     for i in range(5):
         losses = {}
         doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
-        gold = {
-            "heads": [1, 1, 3, 3],
-            "deps": ["left", "ROOT", "left", "ROOT"]
-        }
+        gold = {"heads": [1, 1, 3, 3], "deps": ["left", "ROOT", "left", "ROOT"]}
         example = Example.from_dict(doc, gold)
         parser.update([example], sgd=sgd, losses=losses)
     return parser
@@ -51,10 +53,7 @@ def test_add_label(parser):
     for i in range(100):
         losses = {}
         doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
-        gold = {
-            "heads": [1, 1, 3, 3],
-            "deps": ["right", "ROOT", "left", "ROOT"]
-        }
+        gold = {"heads": [1, 1, 3, 3], "deps": ["right", "ROOT", "left", "ROOT"]}
         parser.update((doc, gold), sgd=sgd, losses=losses)
     doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
     doc = parser(doc)
@@ -63,7 +62,12 @@ def test_add_label(parser):
 
 
 def test_add_label_deserializes_correctly():
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner1 = EntityRecognizer(Vocab(), default_ner(), **config)
     ner1.add_label("C")
     ner1.add_label("B")
@@ -78,6 +82,7 @@ def test_add_label_deserializes_correctly():
     for i in range(ner1.moves.n_moves):
         assert ner1.moves.get_class_name(i) == ner2.moves.get_class_name(i)
 
+
 @pytest.mark.parametrize(
     "pipe_cls,n_moves,model",
     [(DependencyParser, 5, default_parser()), (EntityRecognizer, 4, default_ner())],
diff --git a/spacy/tests/parser/test_arc_eager_oracle.py b/spacy/tests/parser/test_arc_eager_oracle.py
index 12883ee08..f0f41e645 100644
--- a/spacy/tests/parser/test_arc_eager_oracle.py
+++ b/spacy/tests/parser/test_arc_eager_oracle.py
@@ -139,7 +139,12 @@ def test_get_oracle_actions():
         deps.append(dep)
         ents.append(ent)
     doc = Doc(Vocab(), words=[t[1] for t in annot_tuples])
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     parser = DependencyParser(doc.vocab, default_parser(), **config)
     parser.moves.add_action(0, "")
     parser.moves.add_action(1, "")
@@ -151,7 +156,9 @@ def test_get_oracle_actions():
             parser.moves.add_action(2, dep)
         elif head < i:
             parser.moves.add_action(3, dep)
-    example = Example.from_dict(doc, {"words": words, "tags": tags, "heads": heads, "deps": deps})
+    example = Example.from_dict(
+        doc, {"words": words, "tags": tags, "heads": heads, "deps": deps}
+    )
     parser.moves.get_oracle_sequence(example)
 
 
@@ -179,41 +186,41 @@ def test_oracle_dev_sentence(vocab, arc_eager):
         . punct said
     """
     expected_transitions = [
-        "S", # Shift 'Motor'
-        "S", # Shift 'Cars'
-        "L-nn", # Attach 'Cars' to 'Inc.'
-        "L-nn", # Attach 'Motor' to 'Inc.'
-        "L-nn", # Attach 'Rolls-Royce' to 'Inc.', force shift
-        "L-nsubj", # Attach 'Inc.' to 'said'
-        "S", # Shift 'it'
-        "L-nsubj", # Attach 'it.' to 'expects'
-        "R-ccomp", # Attach 'expects' to 'said'
-        "S", # Shift 'its'
-        "S", # Shift 'U.S.'
-        "L-nn", # Attach 'U.S.' to 'sales'
-        "L-poss", # Attach 'its' to 'sales'
-        "S", # Shift 'sales'
-        "S", # Shift 'to'
-        "S", # Shift 'remain'
-        "L-cop", # Attach 'remain' to 'steady'
-        "L-aux", # Attach 'to' to 'steady'
-        "L-nsubj", # Attach 'sales' to 'steady'
-        "R-xcomp", # Attach 'steady' to 'expects'
-        "R-prep", # Attach 'at' to 'steady'
-        "S", # Shift 'about'
-        "L-quantmod", # Attach "about" to "1,200"
-        "S", # Shift "1,200"
-        "L-num", # Attach "1,200" to "cars"
-        "R-pobj", # Attach "cars" to "at"
-        "D", # Reduce "cars"
-        "D", # Reduce "at"
-        "R-prep", # Attach "in" to "steady"
-        "R-pobj", # Attach "1990" to "in"
-        "D", # Reduce "1990"
-        "D", # Reduce "in"
-        "D", # Reduce "steady"
-        "D", # Reduce "expects"
-        "R-punct", # Attach "." to "said"
+        "S",  # Shift 'Motor'
+        "S",  # Shift 'Cars'
+        "L-nn",  # Attach 'Cars' to 'Inc.'
+        "L-nn",  # Attach 'Motor' to 'Inc.'
+        "L-nn",  # Attach 'Rolls-Royce' to 'Inc.', force shift
+        "L-nsubj",  # Attach 'Inc.' to 'said'
+        "S",  # Shift 'it'
+        "L-nsubj",  # Attach 'it.' to 'expects'
+        "R-ccomp",  # Attach 'expects' to 'said'
+        "S",  # Shift 'its'
+        "S",  # Shift 'U.S.'
+        "L-nn",  # Attach 'U.S.' to 'sales'
+        "L-poss",  # Attach 'its' to 'sales'
+        "S",  # Shift 'sales'
+        "S",  # Shift 'to'
+        "S",  # Shift 'remain'
+        "L-cop",  # Attach 'remain' to 'steady'
+        "L-aux",  # Attach 'to' to 'steady'
+        "L-nsubj",  # Attach 'sales' to 'steady'
+        "R-xcomp",  # Attach 'steady' to 'expects'
+        "R-prep",  # Attach 'at' to 'steady'
+        "S",  # Shift 'about'
+        "L-quantmod",  # Attach "about" to "1,200"
+        "S",  # Shift "1,200"
+        "L-num",  # Attach "1,200" to "cars"
+        "R-pobj",  # Attach "cars" to "at"
+        "D",  # Reduce "cars"
+        "D",  # Reduce "at"
+        "R-prep",  # Attach "in" to "steady"
+        "R-pobj",  # Attach "1990" to "in"
+        "D",  # Reduce "1990"
+        "D",  # Reduce "in"
+        "D",  # Reduce "steady"
+        "D",  # Reduce "expects"
+        "R-punct",  # Attach "." to "said"
     ]
 
     gold_words = []
@@ -229,8 +236,8 @@ def test_oracle_dev_sentence(vocab, arc_eager):
         gold_heads.append(head)
     gold_heads = [gold_words.index(head) for head in gold_heads]
     for dep in gold_deps:
-        arc_eager.add_action(2, dep) # Left
-        arc_eager.add_action(3, dep) # Right
+        arc_eager.add_action(2, dep)  # Left
+        arc_eager.add_action(3, dep)  # Right
 
     doc = Doc(Vocab(), words=gold_words)
     example = Example.from_dict(doc, {"heads": gold_heads, "deps": gold_deps})
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index ff8117196..61e25ffee 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -143,7 +143,12 @@ def test_accept_blocked_token():
     # 1. test normal behaviour
     nlp1 = English()
     doc1 = nlp1("I live in New York")
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner1 = EntityRecognizer(doc1.vocab, default_ner(), **config)
     assert [token.ent_iob_ for token in doc1] == ["", "", "", "", ""]
     assert [token.ent_type_ for token in doc1] == ["", "", "", "", ""]
@@ -162,7 +167,12 @@ def test_accept_blocked_token():
     # 2. test blocking behaviour
     nlp2 = English()
     doc2 = nlp2("I live in New York")
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner2 = EntityRecognizer(doc2.vocab, default_ner(), **config)
 
     # set "New York" to a blocked entity
@@ -220,7 +230,12 @@ def test_overwrite_token():
     assert [token.ent_type_ for token in doc] == ["", "", "", "", ""]
 
     # Check that a new ner can overwrite O
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner2 = EntityRecognizer(doc.vocab, default_ner(), **config)
     ner2.moves.add_action(5, "")
     ner2.add_label("GPE")
diff --git a/spacy/tests/parser/test_neural_parser.py b/spacy/tests/parser/test_neural_parser.py
index 32177d947..93d92e26b 100644
--- a/spacy/tests/parser/test_neural_parser.py
+++ b/spacy/tests/parser/test_neural_parser.py
@@ -29,7 +29,12 @@ def tok2vec():
 
 @pytest.fixture
 def parser(vocab, arc_eager):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     return Parser(vocab, model=default_parser(), moves=arc_eager, **config)
 
 
diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py
index 0d9e257b9..06e363b6b 100644
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@@ -33,7 +33,7 @@ def test_parser_root(en_tokenizer):
 
 
 @pytest.mark.xfail
-#@pytest.mark.parametrize("text", ["Hello"])
+# @pytest.mark.parametrize("text", ["Hello"])
 def test_parser_parse_one_word_sentence(en_tokenizer, en_parser, text):
     tokens = en_tokenizer(text)
     doc = get_doc(
@@ -180,6 +180,7 @@ def test_parser_set_sent_starts(en_vocab):
         for token in sent:
             assert token.head in sent
 
+
 def test_overfitting_IO():
     # Simple test to try and quickly overfit the dependency parser - ensuring the ML models work correctly
     nlp = English()
diff --git a/spacy/tests/parser/test_preset_sbd.py b/spacy/tests/parser/test_preset_sbd.py
index 5a29d84f4..ffd0c5df4 100644
--- a/spacy/tests/parser/test_preset_sbd.py
+++ b/spacy/tests/parser/test_preset_sbd.py
@@ -16,7 +16,12 @@ def vocab():
 
 @pytest.fixture
 def parser(vocab):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     parser = DependencyParser(vocab, default_parser(), **config)
     parser.cfg["token_vector_width"] = 4
     parser.cfg["hidden_width"] = 32
@@ -28,7 +33,9 @@ def parser(vocab):
     for i in range(10):
         losses = {}
         doc = Doc(vocab, words=["a", "b", "c", "d"])
-        example = Example.from_dict(doc, {"heads": [1, 1, 3, 3], "deps": ["left", "ROOT", "left", "ROOT"]})
+        example = Example.from_dict(
+            doc, {"heads": [1, 1, 3, 3], "deps": ["left", "ROOT", "left", "ROOT"]}
+        )
         parser.update([example], sgd=sgd, losses=losses)
     return parser
 
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 37dddc63e..a50ad8499 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -272,11 +272,13 @@ GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
 def test_overfitting_IO():
     # Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
     nlp = English()
-    nlp.add_pipe(nlp.create_pipe('sentencizer'))
+    nlp.add_pipe(nlp.create_pipe("sentencizer"))
 
     # Add a custom component to recognize "Russ Cochran" as an entity for the example training data
     ruler = EntityRuler(nlp)
-    patterns = [{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}]
+    patterns = [
+        {"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
+    ]
     ruler.add_patterns(patterns)
     nlp.add_pipe(ruler)
 
@@ -293,7 +295,11 @@ def test_overfitting_IO():
     mykb = KnowledgeBase(nlp.vocab, entity_vector_length=3)
     mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
     mykb.add_entity(entity="Q7381115", freq=12, entity_vector=[9, 1, -7])
-    mykb.add_alias(alias="Russ Cochran", entities=["Q2146908", "Q7381115"], probabilities=[0.5, 0.5])
+    mykb.add_alias(
+        alias="Russ Cochran",
+        entities=["Q2146908", "Q7381115"],
+        probabilities=[0.5, 0.5],
+    )
 
     # Create the Entity Linker component and add it to the pipeline
     entity_linker = nlp.create_pipe("entity_linker", config={"kb": mykb})
diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py
index 647e1a429..c853de232 100644
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@@ -15,8 +15,17 @@ def test_label_types():
 
 
 TRAIN_DATA = [
-    ("I like green eggs", {"morphs": ["Feat=N", "Feat=V", "Feat=J", "Feat=N"], "pos": ["NOUN", "VERB", "ADJ", "NOUN"]}),
-    ("Eat blue ham", {"morphs": ["Feat=V", "Feat=J", "Feat=N"], "pos": ["VERB", "ADJ", "NOUN"]}),
+    (
+        "I like green eggs",
+        {
+            "morphs": ["Feat=N", "Feat=V", "Feat=J", "Feat=N"],
+            "pos": ["NOUN", "VERB", "ADJ", "NOUN"],
+        },
+    ),
+    (
+        "Eat blue ham",
+        {"morphs": ["Feat=V", "Feat=J", "Feat=N"], "pos": ["VERB", "ADJ", "NOUN"]},
+    ),
 ]
 
 
@@ -38,7 +47,12 @@ def test_overfitting_IO():
     # test the trained model
     test_text = "I like blue eggs"
     doc = nlp(test_text)
-    gold_morphs = ["Feat=N|POS=NOUN", "Feat=V|POS=VERB", "Feat=J|POS=ADJ", "Feat=N|POS=NOUN"]
+    gold_morphs = [
+        "Feat=N|POS=NOUN",
+        "Feat=V|POS=VERB",
+        "Feat=J|POS=ADJ",
+        "Feat=N|POS=NOUN",
+    ]
     assert [t.morph_ for t in doc] == gold_morphs
 
     # Also test the results are still the same after IO
diff --git a/spacy/tests/pipeline/test_simple_ner.py b/spacy/tests/pipeline/test_simple_ner.py
index 9d4acf2fd..939786b0a 100644
--- a/spacy/tests/pipeline/test_simple_ner.py
+++ b/spacy/tests/pipeline/test_simple_ner.py
@@ -7,24 +7,28 @@ from spacy.pipeline.simple_ner import SimpleNER
 import spacy
 
 
-@pytest.fixture(params=[
-    ["PER", "ORG", "LOC", "MISC"],
-    ["GPE", "PERSON", "NUMBER", "CURRENCY", "EVENT"]
-])
+@pytest.fixture(
+    params=[
+        ["PER", "ORG", "LOC", "MISC"],
+        ["GPE", "PERSON", "NUMBER", "CURRENCY", "EVENT"],
+    ]
+)
 def labels(request):
     return request.param
 
+
 @pytest.fixture
 def ops():
     return NumpyOps()
 
+
 def _get_actions(labels):
     action_names = (
-        [f"B{label}" for label in labels] + \
-        [f"I{label}" for label in labels] + \
-        [f"L{label}" for label in labels] + \
-        [f"U{label}" for label in labels] + \
-        ["O"]
+        [f"B{label}" for label in labels]
+        + [f"I{label}" for label in labels]
+        + [f"L{label}" for label in labels]
+        + [f"U{label}" for label in labels]
+        + ["O"]
     )
     A = namedtuple("actions", action_names)
     return A(**{name: i for i, name in enumerate(action_names)})
@@ -228,7 +232,7 @@ def test_transition_table(ops):
     assert table[0, a.O, a.Uloc] == 1
     assert table[0, a.O, a.Uorg] == 1
     assert table[0, a.O, a.O] == 1
-    
+
     # Last token, prev action was B
     assert table[1, a.Bper, a.Bper] == 0
     assert table[1, a.Bper, a.Bloc] == 0
diff --git a/spacy/tests/regression/test_issue1501-2000.py b/spacy/tests/regression/test_issue1501-2000.py
index 129c00d99..94996c410 100644
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@@ -270,7 +270,12 @@ def test_issue1963(en_tokenizer):
 
 @pytest.mark.parametrize("label", ["U-JOB-NAME"])
 def test_issue1967(label):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner = EntityRecognizer(Vocab(), default_ner(), **config)
     example = Example.from_dict(
         Doc(ner.vocab, words=["word"]),
@@ -280,8 +285,8 @@ def test_issue1967(label):
             "tags": ["tag"],
             "heads": [0],
             "deps": ["dep"],
-            "entities": [label]
-        }
+            "entities": [label],
+        },
     )
     assert "JOB-NAME" in ner.moves.get_actions(gold_parses=[example])[1]
 
diff --git a/spacy/tests/regression/test_issue3001-3500.py b/spacy/tests/regression/test_issue3001-3500.py
index 6df437b3c..a37707379 100644
--- a/spacy/tests/regression/test_issue3001-3500.py
+++ b/spacy/tests/regression/test_issue3001-3500.py
@@ -196,7 +196,12 @@ def test_issue3345():
     doc = Doc(nlp.vocab, words=["I", "live", "in", "New", "York"])
     doc[4].is_sent_start = True
     ruler = EntityRuler(nlp, patterns=[{"label": "GPE", "pattern": "New York"}])
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner = EntityRecognizer(doc.vocab, default_ner(), **config)
     # Add the OUT action. I wouldn't have thought this would be necessary...
     ner.moves.add_action(5, "")
diff --git a/spacy/tests/regression/test_issue3830.py b/spacy/tests/regression/test_issue3830.py
index 15632bdf8..06b7893a7 100644
--- a/spacy/tests/regression/test_issue3830.py
+++ b/spacy/tests/regression/test_issue3830.py
@@ -6,7 +6,12 @@ from spacy.pipeline.defaults import default_parser
 
 def test_issue3830_no_subtok():
     """Test that the parser doesn't have subtok label if not learn_tokens"""
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     parser = DependencyParser(Vocab(), default_parser(), **config)
     parser.add_label("nsubj")
     assert "subtok" not in parser.labels
@@ -16,7 +21,12 @@ def test_issue3830_no_subtok():
 
 def test_issue3830_with_subtok():
     """Test that the parser does have subtok label if learn_tokens=True."""
-    config = {"learn_tokens": True, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": True,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     parser = DependencyParser(Vocab(), default_parser(), **config)
     parser.add_label("nsubj")
     assert "subtok" not in parser.labels
diff --git a/spacy/tests/regression/test_issue4042.py b/spacy/tests/regression/test_issue4042.py
index 4978aba44..f47290b92 100644
--- a/spacy/tests/regression/test_issue4042.py
+++ b/spacy/tests/regression/test_issue4042.py
@@ -74,7 +74,12 @@ def test_issue4042_bug2():
             output_dir.mkdir()
         ner1.to_disk(output_dir)
 
-        config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+        config = {
+            "learn_tokens": False,
+            "min_action_freq": 30,
+            "beam_width": 1,
+            "beam_update_prob": 1.0,
+        }
         ner2 = EntityRecognizer(vocab, default_ner(), **config)
         ner2.from_disk(output_dir)
         assert len(ner2.labels) == 2
diff --git a/spacy/tests/regression/test_issue4313.py b/spacy/tests/regression/test_issue4313.py
index 46f79d6f5..3bddc26ca 100644
--- a/spacy/tests/regression/test_issue4313.py
+++ b/spacy/tests/regression/test_issue4313.py
@@ -16,7 +16,12 @@ def test_issue4313():
     beam_width = 16
     beam_density = 0.0001
     nlp = English()
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     ner = EntityRecognizer(nlp.vocab, default_ner(), **config)
     ner.add_label("SOME_LABEL")
     ner.begin_training([])
diff --git a/spacy/tests/regression/test_issue4665.py b/spacy/tests/regression/test_issue4665.py
index cb9279250..2e1a6e549 100644
--- a/spacy/tests/regression/test_issue4665.py
+++ b/spacy/tests/regression/test_issue4665.py
@@ -1,6 +1,7 @@
 import pytest
+
 # TODO
-#from spacy.gold.converters.conllu2docs import conllu2docs
+# from spacy.gold.converters.conllu2docs import conllu2docs
 
 input_data = """
 1	[	_	PUNCT	-LRB-	_	_	punct	_	_
diff --git a/spacy/tests/serialize/test_serialize_pipeline.py b/spacy/tests/serialize/test_serialize_pipeline.py
index 9c4e1f61e..abb5ccb27 100644
--- a/spacy/tests/serialize/test_serialize_pipeline.py
+++ b/spacy/tests/serialize/test_serialize_pipeline.py
@@ -12,7 +12,12 @@ test_parsers = [DependencyParser, EntityRecognizer]
 
 @pytest.fixture
 def parser(en_vocab):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
+        "learn_tokens": False,
+        "min_action_freq": 30,
+        "beam_width": 1,
+        "beam_update_prob": 1.0,
+    }
     parser = DependencyParser(en_vocab, default_parser(), **config)
     parser.add_label("nsubj")
     return parser
diff --git a/spacy/tests/serialize/test_serialize_vocab_strings.py b/spacy/tests/serialize/test_serialize_vocab_strings.py
index d3e82296e..e570b1025 100644
--- a/spacy/tests/serialize/test_serialize_vocab_strings.py
+++ b/spacy/tests/serialize/test_serialize_vocab_strings.py
@@ -35,8 +35,10 @@ def test_serialize_vocab_roundtrip_bytes(strings1, strings2):
     assert vocab1.to_bytes() == vocab1_b
     new_vocab1 = Vocab().from_bytes(vocab1_b)
     assert new_vocab1.to_bytes() == vocab1_b
-    assert len(new_vocab1.strings) == len(strings1) + 2 # adds _SP and POS=SPACE
-    assert sorted([s for s in new_vocab1.strings]) == sorted(strings1 + list(default_strings))
+    assert len(new_vocab1.strings) == len(strings1) + 2  # adds _SP and POS=SPACE
+    assert sorted([s for s in new_vocab1.strings]) == sorted(
+        strings1 + list(default_strings)
+    )
 
 
 @pytest.mark.parametrize("strings1,strings2", test_strings)
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 4b244a3ce..3eb43ab92 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -3,6 +3,7 @@ import pytest
 from spacy.lang.en import English
 from spacy.gold.converters import iob2docs, conll_ner2docs
 from spacy.cli.pretrain import make_docs
+
 # TODO
 # from spacy.gold.converters import conllu2docs
 
diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py
index 7af62accb..61b9ca57c 100644
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@@ -155,7 +155,18 @@ def test_gold_biluo_misalign(en_vocab):
 def test_split_sentences(en_vocab):
     words = ["I", "flew", "to", "San Francisco Valley", "had", "loads of fun"]
     doc = Doc(en_vocab, words=words)
-    gold_words = ["I", "flew", "to", "San", "Francisco", "Valley", "had", "loads", "of", "fun"]
+    gold_words = [
+        "I",
+        "flew",
+        "to",
+        "San",
+        "Francisco",
+        "Valley",
+        "had",
+        "loads",
+        "of",
+        "fun",
+    ]
     sent_starts = [True, False, False, False, False, False, True, False, False, False]
     example = Example.from_dict(doc, {"words": gold_words, "sent_starts": sent_starts})
     assert example.text == "I flew to San Francisco Valley had loads of fun "
@@ -166,7 +177,16 @@ def test_split_sentences(en_vocab):
 
     words = ["I", "flew", "to", "San", "Francisco", "Valley", "had", "loads", "of fun"]
     doc = Doc(en_vocab, words=words)
-    gold_words = ["I", "flew", "to", "San Francisco", "Valley", "had", "loads of", "fun"]
+    gold_words = [
+        "I",
+        "flew",
+        "to",
+        "San Francisco",
+        "Valley",
+        "had",
+        "loads of",
+        "fun",
+    ]
     sent_starts = [True, False, False, False, False, True, False, False]
     example = Example.from_dict(doc, {"words": gold_words, "sent_starts": sent_starts})
     assert example.text == "I flew to San Francisco Valley had loads of fun "
@@ -195,7 +215,15 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
     gold_words = ["I", "flew to", "San Francisco Valley", "."]
     example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
     assert example.get_aligned("ENT_IOB") == [2, 2, 2, 3, 1, 1, 2]
-    assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "LOC", "LOC", "LOC", ""]
+    assert example.get_aligned("ENT_TYPE", as_string=True) == [
+        "",
+        "",
+        "",
+        "LOC",
+        "LOC",
+        "LOC",
+        "",
+    ]
 
     # misaligned
     words = ["I flew", "to", "San Francisco", "Valley", "."]
@@ -206,11 +234,21 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
     entities = [(offset_start, offset_end, "LOC")]
     links = {(offset_start, offset_end): {"Q816843": 1.0}}
     gold_words = ["I", "flew to", "San", "Francisco Valley", "."]
-    example = Example.from_dict(doc, {"words": gold_words, "entities": entities, "links": links})
+    example = Example.from_dict(
+        doc, {"words": gold_words, "entities": entities, "links": links}
+    )
     assert example.get_aligned("ENT_IOB") == [2, 2, 3, 1, 2]
     assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "LOC", "LOC", ""]
-    assert example.get_aligned("ENT_KB_ID", as_string=True) == ["", "", "Q816843", "Q816843", ""]
-    assert example.to_dict()["doc_annotation"]["links"][(offset_start, offset_end)] == {"Q816843": 1.0}
+    assert example.get_aligned("ENT_KB_ID", as_string=True) == [
+        "",
+        "",
+        "Q816843",
+        "Q816843",
+        "",
+    ]
+    assert example.to_dict()["doc_annotation"]["links"][(offset_start, offset_end)] == {
+        "Q816843": 1.0
+    }
 
     # additional whitespace tokens in GoldParse words
     words, spaces = get_words_and_spaces(
@@ -221,26 +259,55 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
     entities = [(len("I flew  to "), len("I flew  to San Francisco Valley"), "LOC")]
     gold_words = ["I", "flew", " ", "to", "San Francisco Valley", "."]
     gold_spaces = [True, True, False, True, False, False]
-    example = Example.from_dict(doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities})
+    example = Example.from_dict(
+        doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities}
+    )
     assert example.get_aligned("ENT_IOB") == [2, 2, 2, 2, 3, 1, 2]
-    assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "", "LOC", "LOC", ""]
+    assert example.get_aligned("ENT_TYPE", as_string=True) == [
+        "",
+        "",
+        "",
+        "",
+        "LOC",
+        "LOC",
+        "",
+    ]
 
     # from issue #4791
     doc = en_tokenizer("I'll return the ₹54 amount")
     gold_words = ["I", "'ll", "return", "the", "₹", "54", "amount"]
     gold_spaces = [False, True, True, True, False, True, False]
     entities = [(16, 19, "MONEY")]
-    example = Example.from_dict(doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities})
+    example = Example.from_dict(
+        doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities}
+    )
     assert example.get_aligned("ENT_IOB") == [2, 2, 2, 2, 3, 2]
-    assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "", "MONEY", ""]
+    assert example.get_aligned("ENT_TYPE", as_string=True) == [
+        "",
+        "",
+        "",
+        "",
+        "MONEY",
+        "",
+    ]
 
     doc = en_tokenizer("I'll return the $54 amount")
     gold_words = ["I", "'ll", "return", "the", "$", "54", "amount"]
     gold_spaces = [False, True, True, True, False, True, False]
     entities = [(16, 19, "MONEY")]
-    example = Example.from_dict(doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities})
+    example = Example.from_dict(
+        doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities}
+    )
     assert example.get_aligned("ENT_IOB") == [2, 2, 2, 2, 3, 1, 2]
-    assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "", "MONEY", "MONEY", ""]
+    assert example.get_aligned("ENT_TYPE", as_string=True) == [
+        "",
+        "",
+        "",
+        "",
+        "MONEY",
+        "MONEY",
+        "",
+    ]
 
 
 def test_roundtrip_offsets_biluo_conversion(en_tokenizer):
@@ -311,14 +378,16 @@ def test_roundtrip_docs_to_json(doc):
     assert lemmas == [t.lemma_ for t in reloaded_example.reference]
     assert deps == [t.dep_ for t in reloaded_example.reference]
     assert heads == [t.head.i for t in reloaded_example.reference]
-    assert ents == [(e.start_char, e.end_char, e.label_) for e in  reloaded_example.reference.ents]
+    assert ents == [
+        (e.start_char, e.end_char, e.label_) for e in reloaded_example.reference.ents
+    ]
     assert "TRAVEL" in reloaded_example.reference.cats
     assert "BAKING" in reloaded_example.reference.cats
     assert cats["TRAVEL"] == reloaded_example.reference.cats["TRAVEL"]
     assert cats["BAKING"] == reloaded_example.reference.cats["BAKING"]
 
 
-@pytest.mark.xfail # TODO do we need to do the projectivity differently?
+@pytest.mark.xfail  # TODO do we need to do the projectivity differently?
 def test_projective_train_vs_nonprojective_dev(doc):
     nlp = English()
     deps = [t.dep_ for t in doc]
@@ -348,9 +417,9 @@ def test_projective_train_vs_nonprojective_dev(doc):
 
 
 # Hm, not sure where misalignment check would be handled? In the components too?
-# I guess that does make sense. A text categorizer doesn't care if it's 
+# I guess that does make sense. A text categorizer doesn't care if it's
 # misaligned...
-@pytest.mark.xfail # TODO
+@pytest.mark.xfail  # TODO
 def test_ignore_misaligned(doc):
     nlp = English()
     text = doc.text
@@ -375,7 +444,9 @@ def test_ignore_misaligned(doc):
 
         # doesn't raise an AlignmentError, but there is nothing to iterate over
         # because the only example can't be aligned
-        train_reloaded_example = list(goldcorpus.train_dataset(nlp, ignore_misaligned=True))
+        train_reloaded_example = list(
+            goldcorpus.train_dataset(nlp, ignore_misaligned=True)
+        )
         assert len(train_reloaded_example) == 0
 
 
@@ -389,7 +460,9 @@ def test_make_orth_variants(doc):
 
         # due to randomness, test only that this runs with no errors for now
         train_example = next(goldcorpus.train_dataset(nlp))
-        variant_example = make_orth_variants_example(nlp, train_example, orth_variant_level=0.2)
+        variant_example = make_orth_variants_example(
+            nlp, train_example, orth_variant_level=0.2
+        )
 
 
 @pytest.mark.parametrize(
@@ -430,7 +503,9 @@ def test_goldparse_startswith_space(en_tokenizer):
     entities = ["U-DATE"]
     deps = ["ROOT"]
     heads = [0]
-    example = Example.from_dict(doc, {"words": gold_words, "entities": entities, "deps":deps, "heads": heads})
+    example = Example.from_dict(
+        doc, {"words": gold_words, "entities": entities, "deps": deps, "heads": heads}
+    )
     assert example.get_aligned("ENT_IOB") == [None, 3]
     assert example.get_aligned("ENT_TYPE", as_string=True) == [None, "DATE"]
     assert example.get_aligned("DEP", as_string=True) == [None, "ROOT"]
@@ -441,7 +516,12 @@ def test_gold_constructor():
     nlp = English()
     doc = nlp("This is a sentence")
     example = Example.from_dict(doc, {"cats": {"cat1": 1.0, "cat2": 0.0}})
-    assert example.get_aligned("ORTH", as_string=True) == ["This", "is", "a", "sentence"]
+    assert example.get_aligned("ORTH", as_string=True) == [
+        "This",
+        "is",
+        "a",
+        "sentence",
+    ]
     assert example.reference.cats["cat1"]
     assert not example.reference.cats["cat2"]
 
@@ -496,7 +576,7 @@ def test_split_sents(merged_dict):
     nlp = English()
     example = Example.from_dict(
         Doc(nlp.vocab, words=merged_dict["words"], spaces=merged_dict["spaces"]),
-        merged_dict
+        merged_dict,
     )
     assert example.text == "Hi there everyone It is just me"
 
@@ -517,15 +597,12 @@ def test_split_sents(merged_dict):
 
 
 # This fails on some None value? Need to look into that.
-@pytest.mark.xfail # TODO
+@pytest.mark.xfail  # TODO
 def test_tuples_to_example(vocab, merged_dict):
     cats = {"TRAVEL": 1.0, "BAKING": 0.0}
     merged_dict = dict(merged_dict)
     merged_dict["cats"] = cats
-    ex = Example.from_dict(
-        Doc(vocab, words=merged_dict["words"]),
-        merged_dict
-    )
+    ex = Example.from_dict(Doc(vocab, words=merged_dict["words"]), merged_dict)
     words = [token.text for token in ex.reference]
     assert words == merged_dict["words"]
     tags = [token.tag_ for token in ex.reference]
diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py
index 9da89e947..e5555bbc7 100644
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@@ -36,9 +36,7 @@ def test_language_update(nlp):
 
 def test_language_evaluate(nlp):
     text = "hello world"
-    annots = {
-        "doc_annotation": {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
-    }
+    annots = {"doc_annotation": {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}}
     doc = Doc(nlp.vocab, words=text.split(" "))
     # Evaluate with text and dict
     nlp.evaluate([(text, annots)])
diff --git a/spacy/tests/test_new_example.py b/spacy/tests/test_new_example.py
index 4c44543c4..b89654554 100644
--- a/spacy/tests/test_new_example.py
+++ b/spacy/tests/test_new_example.py
@@ -32,7 +32,9 @@ def test_Example_from_dict_invalid(annots):
         Example.from_dict(predicted, annots)
 
 
-@pytest.mark.parametrize("pred_words", [["ice", "cream"], ["icecream"], ["i", "ce", "cream"]])
+@pytest.mark.parametrize(
+    "pred_words", [["ice", "cream"], ["icecream"], ["i", "ce", "cream"]]
+)
 @pytest.mark.parametrize("annots", [{"words": ["icecream"], "tags": ["NN"]}])
 def test_Example_from_dict_with_tags(pred_words, annots):
     vocab = Vocab()
@@ -161,7 +163,15 @@ def test_Example_from_dict_with_entities(annots):
     example = Example.from_dict(predicted, annots)
 
     assert len(list(example.reference.ents)) == 2
-    assert [example.reference[i].ent_iob_ for i in range(7)] == ["O", "O", "B", "I", "O", "B", "O"]
+    assert [example.reference[i].ent_iob_ for i in range(7)] == [
+        "O",
+        "O",
+        "B",
+        "I",
+        "O",
+        "B",
+        "O",
+    ]
     assert example.get_aligned("ENT_IOB") == [2, 2, 3, 1, 2, 3, 2]
 
     assert example.reference[2].ent_type_ == "LOC"
@@ -174,7 +184,10 @@ def test_Example_from_dict_with_entities(annots):
     [
         {
             "words": ["I", "like", "New", "York", "and", "Berlin", "."],
-            "entities": [(0, 4, "LOC"), (21, 27, "LOC")],   # not aligned to token boundaries
+            "entities": [
+                (0, 4, "LOC"),
+                (21, 27, "LOC"),
+            ],  # not aligned to token boundaries
         }
     ],
 )
@@ -182,7 +195,7 @@ def test_Example_from_dict_with_entities_invalid(annots):
     vocab = Vocab()
     predicted = Doc(vocab, words=annots["words"])
     example = Example.from_dict(predicted, annots)
-     # TODO: shouldn't this throw some sort of warning ?
+    # TODO: shouldn't this throw some sort of warning ?
     assert len(list(example.reference.ents)) == 0
 
 
@@ -192,7 +205,10 @@ def test_Example_from_dict_with_entities_invalid(annots):
         {
             "words": ["I", "like", "New", "York", "and", "Berlin", "."],
             "entities": [(7, 15, "LOC"), (20, 26, "LOC")],
-            "links": {(7, 15): {"Q60": 1.0, "Q64": 0.0}, (20, 26): {"Q60": 0.0, "Q64": 1.0}},
+            "links": {
+                (7, 15): {"Q60": 1.0, "Q64": 0.0},
+                (20, 26): {"Q60": 0.0, "Q64": 1.0},
+            },
         }
     ],
 )
@@ -224,4 +240,3 @@ def test_Example_from_dict_with_links_invalid(annots):
     predicted = Doc(vocab, words=annots["words"])
     with pytest.raises(ValueError):
         Example.from_dict(predicted, annots)
-
diff --git a/spacy/tests/test_scorer.py b/spacy/tests/test_scorer.py
index 5574b7d6a..a6684b706 100644
--- a/spacy/tests/test_scorer.py
+++ b/spacy/tests/test_scorer.py
@@ -42,6 +42,7 @@ test_ner_apple = [
     ]
 ]
 
+
 @pytest.fixture
 def tagged_doc():
     text = "Sarah's sister flew to Silicon Valley via London."
diff --git a/spacy/tests/test_util.py b/spacy/tests/test_util.py
index 9d02c6c6a..65c33c54a 100644
--- a/spacy/tests/test_util.py
+++ b/spacy/tests/test_util.py
@@ -12,7 +12,7 @@ from spacy.util import minibatch_by_words
         ([400, 400, 199, 3], [4]),
         ([400, 400, 199, 3, 200], [3, 2]),
         ([400, 400, 199, 3, 1], [5]),
-        ([400, 400, 199, 3, 1, 1500], [5]),    # 1500 will be discarded
+        ([400, 400, 199, 3, 1, 1500], [5]),  # 1500 will be discarded
         ([400, 400, 199, 3, 1, 200], [3, 3]),
         ([400, 400, 199, 3, 1, 999], [3, 3]),
         ([400, 400, 199, 3, 1, 999, 999], [3, 2, 1, 1]),
@@ -26,7 +26,9 @@ def test_util_minibatch(doc_sizes, expected_batches):
     docs = [get_random_doc(doc_size) for doc_size in doc_sizes]
     tol = 0.2
     batch_size = 1000
-    batches = list(minibatch_by_words(docs, size=batch_size, tolerance=tol, discard_oversize=True))
+    batches = list(
+        minibatch_by_words(docs, size=batch_size, tolerance=tol, discard_oversize=True)
+    )
     assert [len(batch) for batch in batches] == expected_batches
 
     max_size = batch_size + batch_size * tol
@@ -50,7 +52,7 @@ def test_util_minibatch_oversize(doc_sizes, expected_batches):
     docs = [get_random_doc(doc_size) for doc_size in doc_sizes]
     tol = 0.2
     batch_size = 1000
-    batches = list(minibatch_by_words(docs, size=batch_size, tolerance=tol, discard_oversize=False))
+    batches = list(
+        minibatch_by_words(docs, size=batch_size, tolerance=tol, discard_oversize=False)
+    )
     assert [len(batch) for batch in batches] == expected_batches
-
-
diff --git a/spacy/tests/util.py b/spacy/tests/util.py
index a5d1737f1..7c3eaf8ad 100644
--- a/spacy/tests/util.py
+++ b/spacy/tests/util.py
@@ -27,7 +27,15 @@ def make_tempdir():
 
 
 def get_doc(
-    vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None, lemmas=None, morphs=None
+    vocab,
+    words=[],
+    pos=None,
+    heads=None,
+    deps=None,
+    tags=None,
+    ents=None,
+    lemmas=None,
+    morphs=None,
 ):
     """Create Doc object from given vocab, words and annotations."""
     if deps and not heads:
diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py
index 97f336eb3..a3b089222 100644
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@@ -9,16 +9,7 @@ from ..attrs import SPACY, ORTH, intify_attr
 from ..errors import Errors
 
 
-ALL_ATTRS = (
-    "ORTH",
-    "TAG",
-    "HEAD",
-    "DEP",
-    "ENT_IOB",
-    "ENT_TYPE",
-    "LEMMA",
-    "MORPH"
-)
+ALL_ATTRS = ("ORTH", "TAG", "HEAD", "DEP", "ENT_IOB", "ENT_TYPE", "LEMMA", "MORPH")
 
 
 class DocBin(object):