Merge pull request #5617 from explosion/chore/tidy-auto-format

2025-11-17 08:16:04 +03:00 · 2020-06-20 05:47:44 -07:00 · 2020-06-20 05:47:44 -07:00 · dbe9c29f61
commit dbe9c29f61
parent a1c5b694be f91e9e8c84
38 changed files with 415 additions and 226 deletions
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@ -24,8 +24,8 @@ from ..gold import Example
    output_dir=("Directory to write models to on each epoch", "positional", None, Path),
    config_path=("Path to config file", "positional", None, Path),
    use_gpu=("Use GPU", "option", "g", int),
-    resume_path=("Path to pretrained weights from which to resume pretraining", "option","r", Path),
+    resume_path=("Path to pretrained weights from which to resume pretraining", "option", "r", Path),
-    epoch_resume=("The epoch to resume counting from when using '--resume_path'. Prevents unintended overwriting of existing weight files.","option", "er", int),
+    epoch_resume=("The epoch to resume counting from when using '--resume_path'. Prevents unintended overwriting of existing weight files.", "option", "er", int),
    # fmt: on
 )
 def pretrain(
--- a/spacy/cli/train_from_config.py
+++ b/spacy/cli/train_from_config.py
@ -3,7 +3,6 @@ from timeit import default_timer as timer
 import srsly
 from pydantic import BaseModel, FilePath
 import plac
 import tqdm
 from pathlib import Path
 from wasabi import msg
@ -16,7 +15,9 @@ from ..gold import GoldCorpus
 from ..lookups import Lookups
 from .. import util
 from ..errors import Errors
-from ..ml import models  # don't remove - required to load the built-in architectures
+
 # Don't remove - required to load the built-in architectures
 from ..ml import models  # noqa: F401
 registry = util.registry
@ -114,33 +115,19 @@ class ConfigSchema(BaseModel):
        extra = "allow"
@plac.annotations(
    # fmt: off
    train_path=("Location of JSON-formatted training data", "positional", None, Path),
    dev_path=("Location of JSON-formatted development data", "positional", None, Path),
    config_path=("Path to config file", "positional", None, Path),
    output_path=("Output directory to store model in", "option", "o", Path),
    init_tok2vec=(
    "Path to pretrained weights for the tok2vec components. See 'spacy pretrain'. Experimental.", "option", "t2v",
    Path),
    raw_text=("Path to jsonl file with unlabelled text documents.", "option", "rt", Path),
    verbose=("Display more information for debugging purposes", "flag", "VV", bool),
    use_gpu=("Use GPU", "option", "g", int),
    tag_map_path=("Location of JSON-formatted tag map", "option", "tm", Path),
    omit_extra_lookups=("Don't include extra lookups in model", "flag", "OEL", bool),
    # fmt: on
 )
 def train_cli(
-    train_path,
+    # fmt: off
-    dev_path,
+    train_path: ("Location of JSON-formatted training data", "positional", None, Path),
-    config_path,
+    dev_path: ("Location of JSON-formatted development data", "positional", None, Path),
-    output_path=None,
+    config_path: ("Path to config file", "positional", None, Path),
-    init_tok2vec=None,
+    output_path: ("Output directory to store model in", "option", "o", Path) = None,
-    raw_text=None,
+    init_tok2vec: ("Path to pretrained weights for the tok2vec components. See 'spacy pretrain'. Experimental.", "option", "t2v", Path) = None,
-    verbose=False,
+    raw_text: ("Path to jsonl file with unlabelled text documents.", "option", "rt", Path) = None,
-    use_gpu=-1,
+    verbose: ("Display more information for debugging purposes", "flag", "VV", bool) = False,
-    tag_map_path=None,
+    use_gpu: ("Use GPU", "option", "g", int) = -1,
-    omit_extra_lookups=False,
+    tag_map_path: ("Location of JSON-formatted tag map", "option", "tm", Path) = None,
    omit_extra_lookups: ("Don't include extra lookups in model", "flag", "OEL", bool) = False,
    # fmt: on
 ):
    """
    Train or update a spaCy model. Requires data to be formatted in spaCy's
@ -212,7 +199,7 @@ def train(
    config = util.load_config(config_path, create_objects=False)
    util.fix_random_seed(config["training"]["seed"])
    if config["training"].get("use_pytorch_for_gpu_memory"):
-        # It feels kind of weird to not have a default for this. 
+        # It feels kind of weird to not have a default for this.
        use_pytorch_for_gpu_memory()
    nlp_config = config["nlp"]
    config = util.load_config(config_path, create_objects=True)
@ -227,7 +214,9 @@ def train(
    # verify textcat config
    if "textcat" in nlp_config["pipeline"]:
        textcat_labels = set(nlp.get_pipe("textcat").labels)
-        textcat_multilabel = not nlp_config["pipeline"]["textcat"]["model"]["exclusive_classes"]
+        textcat_multilabel = not nlp_config["pipeline"]["textcat"]["model"][
            "exclusive_classes"
        ]
        # check whether the setting 'exclusive_classes' corresponds to the provided training data
        if textcat_multilabel:
@ -255,7 +244,9 @@ def train(
                        "to 'false' in the config to train a classifier with classes "
                        "that are not mutually exclusive."
                    )
-        msg.info(f"Initialized textcat component for {len(textcat_labels)} unique labels")
+        msg.info(
            f"Initialized textcat component for {len(textcat_labels)} unique labels"
        )
        nlp.get_pipe("textcat").labels = tuple(textcat_labels)
        # if 'positive_label' is provided: double check whether it's in the data and the task is binary
@ -281,9 +272,7 @@ def train(
        nlp.resume_training()
    else:
        msg.info(f"Initializing the nlp pipeline: {nlp.pipe_names}")
-        nlp.begin_training(
+        nlp.begin_training(lambda: corpus.train_examples)
            lambda: corpus.train_examples
        )
    # Update tag map with provided mapping
    nlp.vocab.morphology.tag_map.update(tag_map)
@ -310,8 +299,7 @@ def train(
            tok2vec = tok2vec.get(subpath)
        if not tok2vec:
            msg.fail(
-                f"Could not locate the tok2vec model at {tok2vec_path}.",
+                f"Could not locate the tok2vec model at {tok2vec_path}.", exits=1,
                exits=1,
            )
        tok2vec.from_bytes(weights_data)
@ -376,7 +364,7 @@ def create_train_batches(nlp, corpus, cfg):
        train_examples = list(
            corpus.train_dataset(
                nlp,
-                noise_level=0.0, # I think this is deprecated?
+                noise_level=0.0,  # I think this is deprecated?
                orth_variant_level=cfg["orth_variant_level"],
                gold_preproc=cfg["gold_preproc"],
                max_length=cfg["max_length"],
@ -429,7 +417,11 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg):
        try:
            weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights)
        except KeyError as e:
-            raise KeyError(Errors.E983.format(dict_name='score_weights', key=str(e), keys=list(scores.keys())))
+            raise KeyError(
                Errors.E983.format(
                    dict_name="score_weights", key=str(e), keys=list(scores.keys())
                )
            )
        scores["speed"] = wps
        return weighted_score, scores
@ -578,15 +570,25 @@ def setup_printer(training, nlp):
            ]
        except KeyError as e:
            raise KeyError(
-                Errors.E983.format(dict_name='scores (losses)', key=str(e), keys=list(info["losses"].keys())))
+                Errors.E983.format(
                    dict_name="scores (losses)",
                    key=str(e),
                    keys=list(info["losses"].keys()),
                )
            )
        try:
            scores = [
-                "{0:.2f}".format(float(info["other_scores"][col]))
+                "{0:.2f}".format(float(info["other_scores"][col])) for col in score_cols
                for col in score_cols
            ]
        except KeyError as e:
-            raise KeyError(Errors.E983.format(dict_name='scores (other)', key=str(e), keys=list(info["other_scores"].keys())))
+            raise KeyError(
                Errors.E983.format(
                    dict_name="scores (other)",
                    key=str(e),
                    keys=list(info["other_scores"].keys()),
                )
            )
        data = (
            [info["step"]] + losses + scores + ["{0:.2f}".format(float(info["score"]))]
        )
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@ -1,4 +1,3 @@
 from .symbols import NOUN, VERB, ADJ, PUNCT, PROPN
 from .errors import Errors
 from .lookups import Lookups
 from .parts_of_speech import NAMES as UPOS_NAMES
@ -51,7 +50,13 @@ class Lemmatizer(object):
        index_table = self.lookups.get_table("lemma_index", {})
        exc_table = self.lookups.get_table("lemma_exc", {})
        rules_table = self.lookups.get_table("lemma_rules", {})
-        if not any((index_table.get(univ_pos), exc_table.get(univ_pos), rules_table.get(univ_pos))):
+        if not any(
            (
                index_table.get(univ_pos),
                exc_table.get(univ_pos),
                rules_table.get(univ_pos),
            )
        ):
            if univ_pos == "propn":
                return [string]
            else:
--- a/spacy/ml/init.py
+++ b/spacy/ml/init.py
@ -1 +1 @@
-from .models import *
+from .models import *  # noqa: F401, F403
--- a/spacy/ml/_biluo.py
+++ b/spacy/ml/_biluo.py
@ -1,11 +1,8 @@
 """Thinc layer to do simpler transition-based parsing, NER, etc."""
-from typing import List, Tuple, Dict, Optional
+from typing import Dict, Optional
 import numpy
-from thinc.api import Ops, Model, with_array, softmax_activation, padded2list
+from thinc.api import Model
-from thinc.api import to_numpy
+from thinc.types import Padded, Floats3d
 from thinc.types import Padded, Ints1d, Ints3d, Floats2d, Floats3d
 from ..tokens import Doc
 def BILUO() -> Model[Padded, Padded]:
@ -14,11 +11,11 @@ def BILUO() -> Model[Padded, Padded]:
        forward,
        init=init,
        dims={"nO": None},
-        attrs={"get_num_actions": get_num_actions}
+        attrs={"get_num_actions": get_num_actions},
    )
-def init(model, X: Optional[Padded]=None, Y: Optional[Padded]=None):
+def init(model, X: Optional[Padded] = None, Y: Optional[Padded] = None):
    if X is not None and Y is not None:
        if X.data.shape != Y.data.shape:
            # TODO: Fix error
@ -49,12 +46,12 @@ def forward(model: Model[Padded, Padded], Xp: Padded, is_train: bool):
    masks = model.ops.alloc3f(*Y.shape)
    max_value = Xp.data.max()
    for t in range(Xp.data.shape[0]):
-        is_last = (Xp.lengths < (t+2)).astype("i")
+        is_last = (Xp.lengths < (t + 2)).astype("i")
        masks[t] = valid_transitions[is_last, prev_actions]
        # Don't train the out-of-bounds sequences.
-        masks[t, Xp.size_at_t[t]:] = 0
+        masks[t, Xp.size_at_t[t] :] = 0
        # Valid actions get 0*10e8, invalid get large negative value
-        Y[t] = Xp.data[t] + ((masks[t]-1) * max_value * 10)
+        Y[t] = Xp.data[t] + ((masks[t] - 1) * max_value * 10)
        prev_actions = Y[t].argmax(axis=-1)
    def backprop_biluo(dY: Padded) -> Padded:
--- a/spacy/ml/_iob.py
+++ b/spacy/ml/_iob.py
@ -1,9 +1,7 @@
 """Thinc layer to do simpler transition-based parsing, NER, etc."""
-from typing import List, Tuple, Dict, Optional
+from typing import Dict, Optional
-from thinc.api import Ops, Model, with_array, softmax_activation, padded2list
+from thinc.api import Ops, Model
-from thinc.types import Padded, Ints1d, Ints3d, Floats2d, Floats3d
+from thinc.types import Padded, Floats3d
 from ..tokens import Doc
 def IOB() -> Model[Padded, Padded]:
@ -12,11 +10,11 @@ def IOB() -> Model[Padded, Padded]:
        forward,
        init=init,
        dims={"nO": None},
-        attrs={"get_num_actions": get_num_actions}
+        attrs={"get_num_actions": get_num_actions},
    )
-def init(model, X: Optional[Padded]=None, Y: Optional[Padded]=None):
+def init(model, X: Optional[Padded] = None, Y: Optional[Padded] = None):
    if X is not None and Y is not None:
        if X.data.shape != Y.data.shape:
            # TODO: Fix error
@ -48,14 +46,14 @@ def forward(model: Model[Padded, Padded], Xp: Padded, is_train: bool):
    for t in range(Xp.data.shape[0]):
        masks[t] = valid_transitions[prev_actions]
        # Don't train the out-of-bounds sequences.
-        masks[t, Xp.size_at_t[t]:] = 0
+        masks[t, Xp.size_at_t[t] :] = 0
        # Valid actions get 0*10e8, invalid get -1*10e8
-        Y[t] = Xp.data[t] + ((masks[t]-1) * 10e8)
+        Y[t] = Xp.data[t] + ((masks[t] - 1) * 10e8)
        prev_actions = Y[t].argmax(axis=-1)
    def backprop_biluo(dY: Padded) -> Padded:
        # Masking the gradient seems to do poorly here. But why?
-        #dY.data *= masks
+        # dY.data *= masks
        return dY
    return Padded(Y, Xp.size_at_t, Xp.lengths, Xp.indices), backprop_biluo
@ -83,10 +81,10 @@ def _get_transition_table(
    B_range = ops.xp.arange(B_start, B_end)
    I_range = ops.xp.arange(I_start, I_end)
    # B and O are always valid
-    table[:, B_start : B_end] = 1
+    table[:, B_start:B_end] = 1
    table[:, O_action] = 1
    # I can only follow a matching B
    table[B_range, I_range] = 1
- 
+
    _cache[n_actions] = table
    return table
--- a/spacy/ml/_precomputable_affine.py
+++ b/spacy/ml/_precomputable_affine.py
@ -84,7 +84,7 @@ def _backprop_precomputable_affine_padding(model, dY, ids):
    #
    # (ids < 0).T @ dY
    mask = model.ops.asarray(ids < 0, dtype="f")
-    d_pad = model.ops.gemm(mask, dY.reshape(nB, nO*nP), trans1=True)
+    d_pad = model.ops.gemm(mask, dY.reshape(nB, nO * nP), trans1=True)
    return d_pad.reshape((1, nF, nO, nP))
--- a/spacy/ml/models/init.py
+++ b/spacy/ml/models/init.py
@ -1,6 +1,6 @@
 from .entity_linker import *  # noqa
 from .parser import *  # noqa
-from .simple_ner import *
+from .simple_ner import *  # noqa
 from .tagger import *  # noqa
 from .textcat import *  # noqa
 from .tok2vec import *  # noqa
--- a/spacy/ml/models/multi_task.py
+++ b/spacy/ml/models/multi_task.py
@ -7,7 +7,12 @@ def build_multi_task_model(tok2vec, maxout_pieces, token_vector_width, nO=None):
    softmax = Softmax(nO=nO, nI=token_vector_width * 2)
    model = chain(
        tok2vec,
-        Maxout(nO=token_vector_width * 2, nI=token_vector_width, nP=maxout_pieces, dropout=0.0),
+        Maxout(
            nO=token_vector_width * 2,
            nI=token_vector_width,
            nP=maxout_pieces,
            dropout=0.0,
        ),
        LayerNorm(token_vector_width * 2),
        softmax,
    )
@ -20,7 +25,11 @@ def build_cloze_multi_task_model(vocab, tok2vec, maxout_pieces, nO=None):
    # nO = vocab.vectors.data.shape[1]
    output_layer = chain(
        Maxout(
-            nO=nO, nI=tok2vec.get_dim("nO"), nP=maxout_pieces, normalize=True, dropout=0.0
+            nO=nO,
            nI=tok2vec.get_dim("nO"),
            nP=maxout_pieces,
            normalize=True,
            dropout=0.0,
        ),
        Linear(nO=nO, nI=nO, init_W=zero_init),
    )
@ -39,7 +48,9 @@ def build_masked_language_model(vocab, wrapped_model, mask_prob=0.15):
    def mlm_forward(model, docs, is_train):
        mask, docs = _apply_mask(docs, random_words, mask_prob=mask_prob)
        mask = model.ops.asarray(mask).reshape((mask.shape[0], 1))
-        output, backprop = model.get_ref("wrapped-model").begin_update(docs)  # drop=drop
+        output, backprop = model.get_ref("wrapped-model").begin_update(
            docs
        )  # drop=drop
        def mlm_backward(d_output):
            d_output *= 1 - mask
--- a/spacy/ml/models/parser.py
+++ b/spacy/ml/models/parser.py
@ -16,18 +16,14 @@ def build_tb_parser_model(
    nO=None,
 ):
    t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
-    tok2vec = chain(
+    tok2vec = chain(tok2vec, with_array(Linear(hidden_width, t2v_width)), list2array(),)
        tok2vec,
        with_array(Linear(hidden_width, t2v_width)),
        list2array(),
    )
    tok2vec.set_dim("nO", hidden_width)
    lower = PrecomputableAffine(
        nO=hidden_width if use_upper else nO,
        nF=nr_feature_tokens,
        nI=tok2vec.get_dim("nO"),
-        nP=maxout_pieces
+        nP=maxout_pieces,
    )
    if use_upper:
        with use_ops("numpy"):
--- a/spacy/ml/models/simple_ner.py
+++ b/spacy/ml/models/simple_ner.py
@ -1,9 +1,8 @@
-import functools
+from typing import List
-from typing import List, Tuple, Dict, Optional
+from thinc.api import Model, Linear, with_array, softmax_activation, padded2list
 from thinc.api import Ops, Model, Linear, Softmax, with_array, softmax_activation, padded2list
 from thinc.api import chain, list2padded, configure_normal_init
 from thinc.api import Dropout
-from thinc.types import Padded, Ints1d, Ints3d, Floats2d, Floats3d
+from thinc.types import Floats2d
 from ...tokens import Doc
 from .._biluo import BILUO
@ -12,12 +11,12 @@ from ...util import registry
@registry.architectures.register("spacy.BiluoTagger.v1")
-def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], List[Floats2d]]:
+def BiluoTagger(
    tok2vec: Model[List[Doc], List[Floats2d]]
 ) -> Model[List[Doc], List[Floats2d]]:
    biluo = BILUO()
    linear = Linear(
-        nO=None,
+        nO=None, nI=tok2vec.get_dim("nO"), init_W=configure_normal_init(mean=0.02)
        nI=tok2vec.get_dim("nO"),
        init_W=configure_normal_init(mean=0.02)
    )
    model = chain(
        tok2vec,
@ -25,7 +24,7 @@ def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], L
        with_array(chain(Dropout(0.1), linear)),
        biluo,
        with_array(softmax_activation()),
-        padded2list()
+        padded2list(),
    )
    return Model(
@ -35,11 +34,14 @@ def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], L
        layers=[model, linear],
        refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
        dims={"nO": None},
-        attrs={"get_num_actions": biluo.attrs["get_num_actions"]}
+        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
    )
@registry.architectures.register("spacy.IOBTagger.v1")
-def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], List[Floats2d]]:
+def IOBTagger(
    tok2vec: Model[List[Doc], List[Floats2d]]
 ) -> Model[List[Doc], List[Floats2d]]:
    biluo = IOB()
    linear = Linear(nO=None, nI=tok2vec.get_dim("nO"))
    model = chain(
@ -48,7 +50,7 @@ def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], Lis
        with_array(linear),
        biluo,
        with_array(softmax_activation()),
-        padded2list()
+        padded2list(),
    )
    return Model(
@ -58,11 +60,10 @@ def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], Lis
        layers=[model],
        refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
        dims={"nO": None},
-        attrs={"get_num_actions": biluo.attrs["get_num_actions"]}
+        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
    )
 def init(model: Model[List[Doc], List[Floats2d]], X=None, Y=None) -> None:
    if model.get_dim("nO") is None and Y:
        model.set_dim("nO", Y[0].shape[1])
--- a/spacy/ml/models/tagger.py
+++ b/spacy/ml/models/tagger.py
@ -1,5 +1,4 @@
-from thinc.api import zero_init, with_array, Softmax, chain, Model, Dropout
+from thinc.api import zero_init, with_array, Softmax, chain, Model
 from thinc.api import glorot_uniform_init
 from ...util import registry
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@ -1,11 +1,12 @@
-from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic, ParametricAttention
+from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic
-from thinc.api import chain, concatenate, clone, Dropout
+from thinc.api import ParametricAttention, chain, concatenate, clone, Dropout
-from thinc.api import SparseLinear, Softmax, softmax_activation, Maxout, reduce_sum, Relu, residual, expand_window
+from thinc.api import SparseLinear, Softmax, softmax_activation, Maxout
-from thinc.api import HashEmbed, with_ragged, with_array, with_cpu, uniqued, FeatureExtractor
+from thinc.api import reduce_sum, Relu, residual, expand_window, HashEmbed
 from thinc.api import with_ragged, with_array, with_cpu, uniqued, FeatureExtractor
 from ..spacy_vectors import SpacyVectors
 from ... import util
-from ...attrs import ID, ORTH, NORM, PREFIX, SUFFIX, SHAPE, LOWER
+from ...attrs import ID, ORTH, PREFIX, SUFFIX, SHAPE, LOWER
 from ...util import registry
 from ..extract_ngrams import extract_ngrams
@ -50,14 +51,31 @@ def build_bow_text_classifier(exclusive_classes, ngram_size, no_output_layer, nO
@registry.architectures.register("spacy.TextCat.v1")
-def build_text_classifier(width, embed_size, pretrained_vectors, exclusive_classes, ngram_size,
+def build_text_classifier(
-                          window_size, conv_depth, dropout, nO=None):
+    width,
    embed_size,
    pretrained_vectors,
    exclusive_classes,
    ngram_size,
    window_size,
    conv_depth,
    dropout,
    nO=None,
 ):
    cols = [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]
    with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
-        lower = HashEmbed(nO=width, nV=embed_size, column=cols.index(LOWER), dropout=dropout)
+        lower = HashEmbed(
-        prefix = HashEmbed(nO=width // 2, nV=embed_size, column=cols.index(PREFIX), dropout=dropout)
+            nO=width, nV=embed_size, column=cols.index(LOWER), dropout=dropout
-        suffix = HashEmbed(nO=width // 2, nV=embed_size, column=cols.index(SUFFIX), dropout=dropout)
+        )
-        shape = HashEmbed(nO=width // 2, nV=embed_size, column=cols.index(SHAPE), dropout=dropout)
+        prefix = HashEmbed(
            nO=width // 2, nV=embed_size, column=cols.index(PREFIX), dropout=dropout
        )
        suffix = HashEmbed(
            nO=width // 2, nV=embed_size, column=cols.index(SUFFIX), dropout=dropout
        )
        shape = HashEmbed(
            nO=width // 2, nV=embed_size, column=cols.index(SHAPE), dropout=dropout
        )
        width_nI = sum(layer.get_dim("nO") for layer in [lower, prefix, suffix, shape])
        trained_vectors = FeatureExtractor(cols) >> with_array(
@ -83,30 +101,38 @@ def build_text_classifier(width, embed_size, pretrained_vectors, exclusive_class
            vectors_width = width
        tok2vec = vector_layer >> with_array(
            Maxout(width, vectors_width, normalize=True)
-            >> residual((expand_window(window_size=window_size)
+            >> residual(
-                         >> Maxout(nO=width, nI=width * ((window_size * 2) + 1), normalize=True))) ** conv_depth,
+                (
                    expand_window(window_size=window_size)
                    >> Maxout(
                        nO=width, nI=width * ((window_size * 2) + 1), normalize=True
                    )
                )
            )
            ** conv_depth,
            pad=conv_depth,
        )
        cnn_model = (
-                tok2vec
+            tok2vec
-                >> list2ragged()
+            >> list2ragged()
-                >> ParametricAttention(width)
+            >> ParametricAttention(width)
-                >> reduce_sum()
+            >> reduce_sum()
-                >> residual(Maxout(nO=width, nI=width))
+            >> residual(Maxout(nO=width, nI=width))
-                >> Linear(nO=nO, nI=width)
+            >> Linear(nO=nO, nI=width)
-                >> Dropout(0.0)
+            >> Dropout(0.0)
        )
        linear_model = build_bow_text_classifier(
-            nO=nO, ngram_size=ngram_size, exclusive_classes=exclusive_classes, no_output_layer=False
+            nO=nO,
            ngram_size=ngram_size,
            exclusive_classes=exclusive_classes,
            no_output_layer=False,
        )
-        nO_double = nO*2 if nO else None
+        nO_double = nO * 2 if nO else None
        if exclusive_classes:
            output_layer = Softmax(nO=nO, nI=nO_double)
        else:
-            output_layer = (
+            output_layer = Linear(nO=nO, nI=nO_double) >> Dropout(0.0) >> Logistic()
                    Linear(nO=nO, nI=nO_double) >> Dropout(0.0) >> Logistic()
            )
        model = (linear_model | cnn_model) >> output_layer
        model.set_ref("tok2vec", tok2vec)
    if model.has_dim("nO") is not False:
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@ -99,7 +99,13 @@ def hash_charembed_cnn(
@registry.architectures.register("spacy.HashEmbedBiLSTM.v1")
 def hash_embed_bilstm_v1(
-    pretrained_vectors, width, depth, embed_size, subword_features, maxout_pieces, dropout
+    pretrained_vectors,
    width,
    depth,
    embed_size,
    subword_features,
    maxout_pieces,
    dropout,
 ):
    # Does not use character embeddings: set to False by default
    return build_Tok2Vec_model(
@ -141,21 +147,24 @@ def hash_char_embed_bilstm_v1(
@registry.architectures.register("spacy.LayerNormalizedMaxout.v1")
 def LayerNormalizedMaxout(width, maxout_pieces):
-    return Maxout(
+    return Maxout(nO=width, nP=maxout_pieces, dropout=0.0, normalize=True,)
        nO=width,
        nP=maxout_pieces,
        dropout=0.0,
        normalize=True,
    )
@registry.architectures.register("spacy.MultiHashEmbed.v1")
-def MultiHashEmbed(columns, width, rows, use_subwords, pretrained_vectors, mix, dropout):
+def MultiHashEmbed(
    columns, width, rows, use_subwords, pretrained_vectors, mix, dropout
 ):
    norm = HashEmbed(nO=width, nV=rows, column=columns.index("NORM"), dropout=dropout)
    if use_subwords:
-        prefix = HashEmbed(nO=width, nV=rows // 2, column=columns.index("PREFIX"), dropout=dropout)
+        prefix = HashEmbed(
-        suffix = HashEmbed(nO=width, nV=rows // 2, column=columns.index("SUFFIX"), dropout=dropout)
+            nO=width, nV=rows // 2, column=columns.index("PREFIX"), dropout=dropout
-        shape = HashEmbed(nO=width, nV=rows // 2, column=columns.index("SHAPE"), dropout=dropout)
+        )
        suffix = HashEmbed(
            nO=width, nV=rows // 2, column=columns.index("SUFFIX"), dropout=dropout
        )
        shape = HashEmbed(
            nO=width, nV=rows // 2, column=columns.index("SHAPE"), dropout=dropout
        )
    if pretrained_vectors:
        glove = StaticVectors(
@ -195,7 +204,13 @@ def CharacterEmbed(columns, width, rows, nM, nC, features, dropout):
 def MaxoutWindowEncoder(width, window_size, maxout_pieces, depth):
    cnn = chain(
        expand_window(window_size=window_size),
-        Maxout(nO=width, nI=width * ((window_size * 2) + 1), nP=maxout_pieces, dropout=0.0, normalize=True),
+        Maxout(
            nO=width,
            nI=width * ((window_size * 2) + 1),
            nP=maxout_pieces,
            dropout=0.0,
            normalize=True,
        ),
    )
    model = clone(residual(cnn), depth)
    model.set_dim("nO", width)
@ -247,11 +262,19 @@ def build_Tok2Vec_model(
        subword_features = False
    cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
    with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
-        norm = HashEmbed(nO=width, nV=embed_size, column=cols.index(NORM), dropout=dropout)
+        norm = HashEmbed(
            nO=width, nV=embed_size, column=cols.index(NORM), dropout=dropout
        )
        if subword_features:
-            prefix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=dropout)
+            prefix = HashEmbed(
-            suffix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=dropout)
+                nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=dropout
-            shape = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=dropout)
+            )
            suffix = HashEmbed(
                nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=dropout
            )
            shape = HashEmbed(
                nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=dropout
            )
        else:
            prefix, suffix, shape = (None, None, None)
        if pretrained_vectors is not None:
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@ -20,8 +20,8 @@ def TransitionModel(tok2vec, lower, upper, unseen_classes=set()):
        attrs={
            "has_upper": has_upper,
            "unseen_classes": set(unseen_classes),
-            "resize_output": resize_output
+            "resize_output": resize_output,
-        }
+        },
    )
@ -31,7 +31,7 @@ def forward(model, X, is_train):
        model.layers,
        unseen_classes=model.attrs["unseen_classes"],
        train=is_train,
-        has_upper=model.attrs["has_upper"]
+        has_upper=model.attrs["has_upper"],
    )
    return step_model, step_model.finish_steps
@ -62,7 +62,7 @@ def resize_output(model, new_nO):
    nI = None
    if smaller.has_dim("nI"):
        nI = smaller.get_dim("nI")
-    with use_ops('numpy'):
+    with use_ops("numpy"):
        larger = Linear(nO=new_nO, nI=nI)
        larger.init = smaller.init
    # it could be that the model is not initialized yet, then skip this bit
@ -74,8 +74,8 @@ def resize_output(model, new_nO):
        # Weights are stored in (nr_out, nr_in) format, so we're basically
        # just adding rows here.
        if smaller.has_dim("nO"):
-            larger_W[:smaller.get_dim("nO")] = smaller_W
+            larger_W[: smaller.get_dim("nO")] = smaller_W
-            larger_b[:smaller.get_dim("nO")] = smaller_b
+            larger_b[: smaller.get_dim("nO")] = smaller_b
            for i in range(smaller.get_dim("nO"), new_nO):
                model.attrs["unseen_classes"].add(i)
--- a/spacy/pipeline/simple_ner.py
+++ b/spacy/pipeline/simple_ner.py
@ -21,9 +21,7 @@ class SimpleNER(Pipe):
        self.model = model
        self.cfg = {"labels": []}
        self.loss_func = SequenceCategoricalCrossentropy(
-            names=self.get_tag_names(),
+            names=self.get_tag_names(), normalize=True, missing_value=None
            normalize=True,
            missing_value=None
        )
        assert self.model is not None
@ -38,21 +36,21 @@ class SimpleNER(Pipe):
    def add_label(self, label):
        if label not in self.cfg["labels"]:
            self.cfg["labels"].append(label)
- 
+
    def get_tag_names(self):
        if self.is_biluo:
            return (
-                [f"B-{label}" for label in self.labels] +
+                [f"B-{label}" for label in self.labels]
-                [f"I-{label}" for label in self.labels] +
+                + [f"I-{label}" for label in self.labels]
-                [f"L-{label}" for label in self.labels] +
+                + [f"L-{label}" for label in self.labels]
-                [f"U-{label}" for label in self.labels] +
+                + [f"U-{label}" for label in self.labels]
-                ["O"]
+                + ["O"]
            )
        else:
            return (
-                [f"B-{label}" for label in self.labels] +
+                [f"B-{label}" for label in self.labels]
-                [f"I-{label}" for label in self.labels] +
+                + [f"I-{label}" for label in self.labels]
-                ["O"]
+                + ["O"]
            )
    def predict(self, docs: List[Doc]) -> List[Floats2d]:
@ -108,7 +106,7 @@ class SimpleNER(Pipe):
    def begin_training(self, get_examples, pipeline=None, sgd=None, **kwargs):
        self.cfg.update(kwargs)
-        if not hasattr(get_examples, '__call__'):
+        if not hasattr(get_examples, "__call__"):
            gold_tuples = get_examples
            get_examples = lambda: gold_tuples
        labels = _get_labels(get_examples())
@ -117,14 +115,12 @@ class SimpleNER(Pipe):
        labels = self.labels
        n_actions = self.model.attrs["get_num_actions"](len(labels))
        self.model.set_dim("nO", n_actions)
-        self.model.initialize() 
+        self.model.initialize()
        if pipeline is not None:
            self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
        link_vectors_to_models(self.vocab)
        self.loss_func = SequenceCategoricalCrossentropy(
-            names=self.get_tag_names(),
+            names=self.get_tag_names(), normalize=True, missing_value=None
            normalize=True,
            missing_value=None
        )
        return sgd
@ -135,7 +131,7 @@ class SimpleNER(Pipe):
 def _has_ner(eg):
    for ner_tag in eg.gold.ner:
-        if ner_tag != "-" and ner_tag != None:
+        if ner_tag != "-" and ner_tag is not None:
            return True
    else:
        return False
@ -145,7 +141,7 @@ def _get_labels(examples):
    labels = set()
    for eg in examples:
        for ner_tag in eg.token_annotation.entities:
-            if ner_tag != 'O' and ner_tag != '-':
+            if ner_tag != "O" and ner_tag != "-":
-                _, label = ner_tag.split('-', 1)
+                _, label = ner_tag.split("-", 1)
                labels.add(label)
    return list(sorted(labels))
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@ -98,7 +98,9 @@ class Scorer(object):
            for name, component in pipeline:
                if name == "textcat":
                    self.textcat_multilabel = component.model.attrs["multi_label"]
-                    self.textcat_positive_label = component.cfg.get("positive_label", None)
+                    self.textcat_positive_label = component.cfg.get(
                        "positive_label", None
                    )
                    for label in component.cfg.get("labels", []):
                        self.textcat_auc_per_cat[label] = ROCAUCScore()
                        self.textcat_f_per_cat[label] = PRFScore()
@ -119,19 +121,19 @@ class Scorer(object):
    @property
    def morphs_acc(self):
-       """RETURNS (float): Morph tag accuracy (morphological features,
+        """RETURNS (float): Morph tag accuracy (morphological features,
           i.e. `Token.morph`).
       """
-       return self.morphs.fscore * 100
+        return self.morphs.fscore * 100
    @property
    def morphs_per_type(self):
-       """RETURNS (dict): Scores per dependency label.
+        """RETURNS (dict): Scores per dependency label.
       """
-       return {
+        return {
-           k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
+            k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
-           for k, v in self.morphs_per_feat.items()
+            for k, v in self.morphs_per_feat.items()
-       }
+        }
    @property
    def sent_p(self):
@ -302,7 +304,15 @@ class Scorer(object):
        gold_morphs_per_feat = {}
        gold_sent_starts = set()
        gold_ents = set(tags_to_entities(orig.entities))
-        for id_, tag, pos, morph, head, dep, sent_start in zip(orig.ids, orig.tags, orig.pos, orig.morphs, orig.heads, orig.deps, orig.sent_starts):
+        for id_, tag, pos, morph, head, dep, sent_start in zip(
            orig.ids,
            orig.tags,
            orig.pos,
            orig.morphs,
            orig.heads,
            orig.deps,
            orig.sent_starts,
        ):
            gold_tags.add((id_, tag))
            gold_pos.add((id_, pos))
            gold_morphs.add((id_, morph))
@ -400,7 +410,10 @@ class Scorer(object):
        self.pos.score_set(cand_pos, gold_pos)
        self.morphs.score_set(cand_morphs, gold_morphs)
        for field in self.morphs_per_feat:
-            self.morphs_per_feat[field].score_set(cand_morphs_per_feat.get(field, set()), gold_morphs_per_feat.get(field, set()))
+            self.morphs_per_feat[field].score_set(
                cand_morphs_per_feat.get(field, set()),
                gold_morphs_per_feat.get(field, set()),
            )
        self.sent_starts.score_set(cand_sent_starts, gold_sent_starts)
        self.labelled.score_set(cand_deps, gold_deps)
        for dep in self.labelled_per_dep:
@ -412,7 +425,9 @@ class Scorer(object):
        )
        if (
            len(gold.cats) > 0
-            and set(self.textcat_f_per_cat) == set(self.textcat_auc_per_cat) == set(gold.cats)
+            and set(self.textcat_f_per_cat)
            == set(self.textcat_auc_per_cat)
            == set(gold.cats)
            and set(gold.cats) == set(doc.cats)
        ):
            goldcat = max(gold.cats, key=gold.cats.get)
@ -424,10 +439,10 @@ class Scorer(object):
                )
            for label in set(gold.cats):
                self.textcat_auc_per_cat[label].score_set(
-                        doc.cats[label], gold.cats[label]
+                    doc.cats[label], gold.cats[label]
                )
                self.textcat_f_per_cat[label].score_set(
-                        set([label]) & set([candcat]), set([label]) & set([goldcat])
+                    set([label]) & set([candcat]), set([label]) & set([goldcat])
                )
        elif len(self.textcat_f_per_cat) > 0:
            model_labels = set(self.textcat_f_per_cat)
--- a/spacy/tests/doc/test_add_entities.py
+++ b/spacy/tests/doc/test_add_entities.py
@ -9,7 +9,12 @@ from spacy.pipeline.defaults import default_ner
 def test_doc_add_entities_set_ents_iob(en_vocab):
    text = ["This", "is", "a", "lion"]
    doc = get_doc(en_vocab, text)
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner = EntityRecognizer(en_vocab, default_ner(), **config)
    ner.begin_training([])
    ner(doc)
@ -26,7 +31,12 @@ def test_doc_add_entities_set_ents_iob(en_vocab):
 def test_ents_reset(en_vocab):
    text = ["This", "is", "a", "lion"]
    doc = get_doc(en_vocab, text)
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner = EntityRecognizer(en_vocab, default_ner(), **config)
    ner.begin_training([])
    ner(doc)
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@ -1,9 +1,8 @@
 import pytest
-from thinc.api import Adam, NumpyOps
+from thinc.api import Adam
 from spacy.attrs import NORM
 from spacy.gold import GoldParse
 from spacy.vocab import Vocab
 from spacy.pipeline.defaults import default_parser, default_ner
 from spacy.tokens import Doc
 from spacy.pipeline import DependencyParser, EntityRecognizer
@ -17,7 +16,12 @@ def vocab():
@pytest.fixture
 def parser(vocab):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    parser = DependencyParser(vocab, default_parser(), **config)
    return parser
@ -58,7 +62,12 @@ def test_add_label(parser):
 def test_add_label_deserializes_correctly():
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner1 = EntityRecognizer(Vocab(), default_ner(), **config)
    ner1.add_label("C")
    ner1.add_label("B")
--- a/spacy/tests/parser/test_arc_eager_oracle.py
+++ b/spacy/tests/parser/test_arc_eager_oracle.py
@ -138,7 +138,12 @@ def test_get_oracle_actions():
        deps.append(dep)
        ents.append(ent)
    doc = Doc(Vocab(), words=[t[1] for t in annot_tuples])
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    parser = DependencyParser(doc.vocab, default_parser(), **config)
    parser.moves.add_action(0, "")
    parser.moves.add_action(1, "")
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@ -138,7 +138,12 @@ def test_accept_blocked_token():
    # 1. test normal behaviour
    nlp1 = English()
    doc1 = nlp1("I live in New York")
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner1 = EntityRecognizer(doc1.vocab, default_ner(), **config)
    assert [token.ent_iob_ for token in doc1] == ["", "", "", "", ""]
    assert [token.ent_type_ for token in doc1] == ["", "", "", "", ""]
@ -157,7 +162,12 @@ def test_accept_blocked_token():
    # 2. test blocking behaviour
    nlp2 = English()
    doc2 = nlp2("I live in New York")
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner2 = EntityRecognizer(doc2.vocab, default_ner(), **config)
    # set "New York" to a blocked entity
@ -215,7 +225,12 @@ def test_overwrite_token():
    assert [token.ent_type_ for token in doc] == ["", "", "", "", ""]
    # Check that a new ner can overwrite O
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner2 = EntityRecognizer(doc.vocab, default_ner(), **config)
    ner2.moves.add_action(5, "")
    ner2.add_label("GPE")
--- a/spacy/tests/parser/test_neural_parser.py
+++ b/spacy/tests/parser/test_neural_parser.py
@ -28,7 +28,12 @@ def tok2vec():
@pytest.fixture
 def parser(vocab, arc_eager):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    return Parser(vocab, model=default_parser(), moves=arc_eager, **config)
--- a/spacy/tests/parser/test_nn_beam.py
+++ b/spacy/tests/parser/test_nn_beam.py
@ -94,7 +94,12 @@ def test_beam_advance_too_few_scores(beam, scores):
 def test_beam_parse():
    nlp = Language()
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    nlp.add_pipe(DependencyParser(nlp.vocab, default_parser(), **config), name="parser")
    nlp.parser.add_label("nsubj")
    nlp.parser.begin_training([], token_vector_width=8, hidden_width=8)
--- a/spacy/tests/parser/test_preset_sbd.py
+++ b/spacy/tests/parser/test_preset_sbd.py
@ -16,7 +16,12 @@ def vocab():
@pytest.fixture
 def parser(vocab):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    parser = DependencyParser(vocab, default_parser(), **config)
    parser.cfg["token_vector_width"] = 4
    parser.cfg["hidden_width"] = 32
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@ -264,11 +264,13 @@ GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
 def test_overfitting_IO():
    # Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
    nlp = English()
-    nlp.add_pipe(nlp.create_pipe('sentencizer'))
+    nlp.add_pipe(nlp.create_pipe("sentencizer"))
    # Add a custom component to recognize "Russ Cochran" as an entity for the example training data
    ruler = EntityRuler(nlp)
-    patterns = [{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}]
+    patterns = [
        {"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
    ]
    ruler.add_patterns(patterns)
    nlp.add_pipe(ruler)
@ -285,7 +287,11 @@ def test_overfitting_IO():
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=3)
    mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
    mykb.add_entity(entity="Q7381115", freq=12, entity_vector=[9, 1, -7])
-    mykb.add_alias(alias="Russ Cochran", entities=["Q2146908", "Q7381115"], probabilities=[0.5, 0.5])
+    mykb.add_alias(
        alias="Russ Cochran",
        entities=["Q2146908", "Q7381115"],
        probabilities=[0.5, 0.5],
    )
    # Create the Entity Linker component and add it to the pipeline
    entity_linker = nlp.create_pipe("entity_linker", config={"kb": mykb})
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@ -15,8 +15,17 @@ def test_label_types():
 TRAIN_DATA = [
-    ("I like green eggs", {"morphs": ["Feat=N", "Feat=V", "Feat=J", "Feat=N"], "pos": ["NOUN", "VERB", "ADJ", "NOUN"]}),
+    (
-    ("Eat blue ham", {"morphs": ["Feat=V", "Feat=J", "Feat=N"], "pos": ["VERB", "ADJ", "NOUN"]}),
+        "I like green eggs",
        {
            "morphs": ["Feat=N", "Feat=V", "Feat=J", "Feat=N"],
            "pos": ["NOUN", "VERB", "ADJ", "NOUN"],
        },
    ),
    (
        "Eat blue ham",
        {"morphs": ["Feat=V", "Feat=J", "Feat=N"], "pos": ["VERB", "ADJ", "NOUN"]},
    ),
 ]
@ -38,7 +47,12 @@ def test_overfitting_IO():
    # test the trained model
    test_text = "I like blue eggs"
    doc = nlp(test_text)
-    gold_morphs = ["Feat=N|POS=NOUN", "Feat=V|POS=VERB", "Feat=J|POS=ADJ", "Feat=N|POS=NOUN"]
+    gold_morphs = [
        "Feat=N|POS=NOUN",
        "Feat=V|POS=VERB",
        "Feat=J|POS=ADJ",
        "Feat=N|POS=NOUN",
    ]
    assert gold_morphs == [t.morph_ for t in doc]
    # Also test the results are still the same after IO
--- a/spacy/tests/pipeline/test_simple_ner.py
+++ b/spacy/tests/pipeline/test_simple_ner.py
@ -1,30 +1,31 @@
 import pytest
 from collections import namedtuple
 from thinc.api import NumpyOps
 from spacy.ml._biluo import BILUO, _get_transition_table
 from spacy.pipeline.simple_ner import SimpleNER
 import spacy
-@pytest.fixture(params=[
+@pytest.fixture(
-    ["PER", "ORG", "LOC", "MISC"],
+    params=[
-    ["GPE", "PERSON", "NUMBER", "CURRENCY", "EVENT"]
+        ["PER", "ORG", "LOC", "MISC"],
-])
+        ["GPE", "PERSON", "NUMBER", "CURRENCY", "EVENT"],
    ]
 )
 def labels(request):
    return request.param
@pytest.fixture
 def ops():
    return NumpyOps()
 def _get_actions(labels):
    action_names = (
-        [f"B{label}" for label in labels] + \
+        [f"B{label}" for label in labels]
-        [f"I{label}" for label in labels] + \
+        + [f"I{label}" for label in labels]
-        [f"L{label}" for label in labels] + \
+        + [f"L{label}" for label in labels]
-        [f"U{label}" for label in labels] + \
+        + [f"U{label}" for label in labels]
-        ["O"]
+        + ["O"]
    )
    A = namedtuple("actions", action_names)
    return A(**{name: i for i, name in enumerate(action_names)})
@ -228,7 +229,7 @@ def test_transition_table(ops):
    assert table[0, a.O, a.Uloc] == 1
    assert table[0, a.O, a.Uorg] == 1
    assert table[0, a.O, a.O] == 1
-    
+
    # Last token, prev action was B
    assert table[1, a.Bper, a.Bper] == 0
    assert table[1, a.Bper, a.Bloc] == 0
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@ -270,7 +270,12 @@ def test_issue1963(en_tokenizer):
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
 def test_issue1967(label):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner = EntityRecognizer(Vocab(), default_ner(), **config)
    example = Example(doc=None)
    example.set_token_annotation(
--- a/spacy/tests/regression/test_issue3001-3500.py
+++ b/spacy/tests/regression/test_issue3001-3500.py
@ -196,7 +196,12 @@ def test_issue3345():
    doc = Doc(nlp.vocab, words=["I", "live", "in", "New", "York"])
    doc[4].is_sent_start = True
    ruler = EntityRuler(nlp, patterns=[{"label": "GPE", "pattern": "New York"}])
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner = EntityRecognizer(doc.vocab, default_ner(), **config)
    # Add the OUT action. I wouldn't have thought this would be necessary...
    ner.moves.add_action(5, "")
--- a/spacy/tests/regression/test_issue3830.py
+++ b/spacy/tests/regression/test_issue3830.py
@ -6,7 +6,12 @@ from spacy.pipeline.defaults import default_parser
 def test_issue3830_no_subtok():
    """Test that the parser doesn't have subtok label if not learn_tokens"""
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    parser = DependencyParser(Vocab(), default_parser(), **config)
    parser.add_label("nsubj")
    assert "subtok" not in parser.labels
@ -16,7 +21,12 @@ def test_issue3830_no_subtok():
 def test_issue3830_with_subtok():
    """Test that the parser does have subtok label if learn_tokens=True."""
-    config = {"learn_tokens": True, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": True,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    parser = DependencyParser(Vocab(), default_parser(), **config)
    parser.add_label("nsubj")
    assert "subtok" not in parser.labels
--- a/spacy/tests/regression/test_issue4042.py
+++ b/spacy/tests/regression/test_issue4042.py
@ -74,7 +74,12 @@ def test_issue4042_bug2():
            output_dir.mkdir()
        ner1.to_disk(output_dir)
-        config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+        config = {
            "learn_tokens": False,
            "min_action_freq": 30,
            "beam_width": 1,
            "beam_update_prob": 1.0,
        }
        ner2 = EntityRecognizer(vocab, default_ner(), **config)
        ner2.from_disk(output_dir)
        assert len(ner2.labels) == 2
--- a/spacy/tests/regression/test_issue4313.py
+++ b/spacy/tests/regression/test_issue4313.py
@ -12,7 +12,12 @@ def test_issue4313():
    beam_width = 16
    beam_density = 0.0001
    nlp = English()
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner = EntityRecognizer(nlp.vocab, default_ner(), **config)
    ner.add_label("SOME_LABEL")
    ner.begin_training([])
--- a/spacy/tests/regression/test_issue4924.py
+++ b/spacy/tests/regression/test_issue4924.py
@ -1,4 +1,3 @@
 import pytest
 from spacy.language import Language
--- a/spacy/tests/serialize/test_serialize_pipeline.py
+++ b/spacy/tests/serialize/test_serialize_pipeline.py
@ -12,7 +12,12 @@ test_parsers = [DependencyParser, EntityRecognizer]
@pytest.fixture
 def parser(en_vocab):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    parser = DependencyParser(en_vocab, default_parser(), **config)
    parser.add_label("nsubj")
    return parser
--- a/spacy/tests/serialize/test_serialize_vocab_strings.py
+++ b/spacy/tests/serialize/test_serialize_vocab_strings.py
@ -35,8 +35,10 @@ def test_serialize_vocab_roundtrip_bytes(strings1, strings2):
    assert vocab1.to_bytes() == vocab1_b
    new_vocab1 = Vocab().from_bytes(vocab1_b)
    assert new_vocab1.to_bytes() == vocab1_b
-    assert len(new_vocab1.strings) == len(strings1) + 2 # adds _SP and POS=SPACE
+    assert len(new_vocab1.strings) == len(strings1) + 2  # adds _SP and POS=SPACE
-    assert sorted([s for s in new_vocab1.strings]) == sorted(strings1 + list(default_strings))
+    assert sorted([s for s in new_vocab1.strings]) == sorted(
        strings1 + list(default_strings)
    )
@pytest.mark.parametrize("strings1,strings2", test_strings)
--- a/spacy/tests/test_scorer.py
+++ b/spacy/tests/test_scorer.py
@ -40,6 +40,7 @@ test_ner_apple = [
    ]
 ]
@pytest.fixture
 def tagged_doc():
    text = "Sarah's sister flew to Silicon Valley via London."
@ -184,7 +185,7 @@ def test_tag_score(tagged_doc):
        tagged_doc,
        tags=[t.tag_ for t in tagged_doc],
        pos=[t.pos_ for t in tagged_doc],
-        morphs=[t.morph_ for t in tagged_doc]
+        morphs=[t.morph_ for t in tagged_doc],
    )
    scorer.score((tagged_doc, gold))
    results = scorer.scores
--- a/spacy/tests/test_util.py
+++ b/spacy/tests/test_util.py
@ -13,7 +13,7 @@ from spacy.util import minibatch_by_words
        ([400, 400, 199, 3], [4]),
        ([400, 400, 199, 3, 200], [3, 2]),
        ([400, 400, 199, 3, 1], [5]),
-        ([400, 400, 199, 3, 1, 1500], [5]),    # 1500 will be discarded
+        ([400, 400, 199, 3, 1, 1500], [5]),  # 1500 will be discarded
        ([400, 400, 199, 3, 1, 200], [3, 3]),
        ([400, 400, 199, 3, 1, 999], [3, 3]),
        ([400, 400, 199, 3, 1, 999, 999], [3, 2, 1, 1]),
@ -28,7 +28,11 @@ def test_util_minibatch(doc_sizes, expected_batches):
    examples = [Example(doc=doc) for doc in docs]
    tol = 0.2
    batch_size = 1000
-    batches = list(minibatch_by_words(examples=examples, size=batch_size, tolerance=tol, discard_oversize=True))
+    batches = list(
        minibatch_by_words(
            examples=examples, size=batch_size, tolerance=tol, discard_oversize=True
        )
    )
    assert [len(batch) for batch in batches] == expected_batches
    max_size = batch_size + batch_size * tol
@ -53,7 +57,9 @@ def test_util_minibatch_oversize(doc_sizes, expected_batches):
    examples = [Example(doc=doc) for doc in docs]
    tol = 0.2
    batch_size = 1000
-    batches = list(minibatch_by_words(examples=examples, size=batch_size, tolerance=tol, discard_oversize=False))
+    batches = list(
        minibatch_by_words(
            examples=examples, size=batch_size, tolerance=tol, discard_oversize=False
        )
    )
    assert [len(batch) for batch in batches] == expected_batches
--- a/spacy/util.py
+++ b/spacy/util.py
@ -697,7 +697,9 @@ def decaying(start, stop, decay):
        curr -= decay
-def minibatch_by_words(examples, size, count_words=len, tolerance=0.2, discard_oversize=False):
+def minibatch_by_words(
    examples, size, count_words=len, tolerance=0.2, discard_oversize=False
 ):
    """Create minibatches of roughly a given number of words. If any examples
    are longer than the specified batch length, they will appear in a batch by
    themselves, or be discarded if discard_oversize=True."""
`@ -1 +1 @@`
	`from .models import *`	`from .models import * # noqa: F401, F403`
`@ -1,4 +1,3 @@`
	`import pytest`
	`from spacy.language import Language`	`from spacy.language import Language`