Format

2025-10-19 02:04:19 +03:00 · 2020-06-22 01:11:43 +02:00 · 2020-06-22 01:11:43 +02:00 · 6a75992af6
commit 6a75992af6
parent 455dc0d9e2
48 changed files with 593 additions and 287 deletions
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@ -134,7 +134,7 @@ def verify_cli_args(
    merge_subtokens,
    converter,
    ner_map,
-    lang
+    lang,
 ):
    if converter == "ner" or converter == "iob":
        input_data = input_path.open("r", encoding="utf-8").read()
@ -148,7 +148,7 @@ def verify_cli_args(
        else:
            msg.warn(
                "Can't automatically detect NER format. Conversion may not",
-                "succeed. See https://spacy.io/api/cli#convert"
+                "succeed. See https://spacy.io/api/cli#convert",
            )
    if file_type not in FILE_TYPES_STDOUT and output_dir == "-":
        # TODO: support msgpack via stdout in srsly?
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -178,7 +178,6 @@ def train_cli(
    )
 def train(
    config_path,
    data_paths,
@ -238,8 +237,7 @@ def train(
            tok2vec = tok2vec.get(subpath)
        if not tok2vec:
            msg.fail(
-                f"Could not locate the tok2vec model at {tok2vec_path}.",
+                f"Could not locate the tok2vec model at {tok2vec_path}.", exits=1,
                exits=1,
            )
        tok2vec.from_bytes(weights_data)
@ -351,7 +349,11 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg):
        try:
            weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights)
        except KeyError as e:
-            raise KeyError(Errors.E983.format(dict='score_weights', key=str(e), keys=list(scores.keys())))
+            raise KeyError(
                Errors.E983.format(
                    dict="score_weights", key=str(e), keys=list(scores.keys())
                )
            )
        scores["speed"] = wps
        return weighted_score, scores
@ -500,15 +502,23 @@ def setup_printer(training, nlp):
            ]
        except KeyError as e:
            raise KeyError(
-                Errors.E983.format(dict='scores (losses)', key=str(e), keys=list(info["losses"].keys())))
+                Errors.E983.format(
                    dict="scores (losses)", key=str(e), keys=list(info["losses"].keys())
                )
            )
        try:
            scores = [
-                "{0:.2f}".format(float(info["other_scores"][col]))
+                "{0:.2f}".format(float(info["other_scores"][col])) for col in score_cols
                for col in score_cols
            ]
        except KeyError as e:
-            raise KeyError(Errors.E983.format(dict='scores (other)', key=str(e), keys=list(info["other_scores"].keys())))
+            raise KeyError(
                Errors.E983.format(
                    dict="scores (other)",
                    key=str(e),
                    keys=list(info["other_scores"].keys()),
                )
            )
        data = (
            [info["step"]] + losses + scores + ["{0:.2f}".format(float(info["score"]))]
        )
--- a/spacy/gold/augment.py
+++ b/spacy/gold/augment.py
@ -5,7 +5,9 @@ import itertools
 def make_orth_variants_example(nlp, example, orth_variant_level=0.0):  # TODO: naming
    raw_text = example.text
    orig_dict = example.to_dict()
-    variant_text, variant_token_annot = make_orth_variants(nlp, raw_text, orig_dict["token_annotation"], orth_variant_level)
+    variant_text, variant_token_annot = make_orth_variants(
        nlp, raw_text, orig_dict["token_annotation"], orth_variant_level
    )
    doc = nlp.make_doc(variant_text)
    orig_dict["token_annotation"] = variant_token_annot
    return example.from_dict(doc, orig_dict)
--- a/spacy/gold/converters/conllu2json.py
+++ b/spacy/gold/converters/conllu2json.py
@ -43,10 +43,7 @@ def conllu2json(
        raw += example.text
        sentences.append(
            generate_sentence(
-                example.to_dict(),
+                example.to_dict(), has_ner_tags, MISC_NER_PATTERN, ner_map=ner_map,
                has_ner_tags,
                MISC_NER_PATTERN,
                ner_map=ner_map,
            )
        )
        # Real-sized documents could be extracted using the comments on the
--- a/spacy/gold/converters/json2docs.py
+++ b/spacy/gold/converters/json2docs.py
@ -8,6 +8,7 @@ from ..example import _fix_legacy_dict_data, _parse_example_dict_data
 from ...util import load_model
 from ...lang.xx import MultiLanguage
@contextlib.contextmanager
 def make_tempdir():
    d = Path(tempfile.mkdtemp())
@ -15,11 +16,7 @@ def make_tempdir():
    shutil.rmtree(str(d))
-def json2docs(
+def json2docs(input_data, model=None, **kwargs):
    input_data,
    model=None,
    **kwargs
 ):
    nlp = load_model(model) if model is not None else MultiLanguage()
    docs = []
    with make_tempdir() as tmp_dir:
@ -29,10 +26,6 @@ def json2docs(
        for json_annot in read_json_file(json_path):
            example_dict = _fix_legacy_dict_data(json_annot)
            tok_dict, doc_dict = _parse_example_dict_data(example_dict)
-            doc = annotations2doc(
+            doc = annotations2doc(nlp.vocab, tok_dict, doc_dict)
                nlp.vocab,
                tok_dict,
                doc_dict
            )
            docs.append(doc)
    return docs
--- a/spacy/gold/corpus.py
+++ b/spacy/gold/corpus.py
@ -12,6 +12,7 @@ class Corpus:
    DOCS: https://spacy.io/api/goldcorpus
    """
    def __init__(self, train_loc, dev_loc, limit=0):
        """Create a GoldCorpus.
--- a/spacy/gold/iob_utils.py
+++ b/spacy/gold/iob_utils.py
@ -54,7 +54,7 @@ def biluo_tags_from_doc(doc, missing="O"):
    return biluo_tags_from_offsets(
        doc,
        [(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents],
-        missing=missing
+        missing=missing,
    )
--- a/spacy/language.py
+++ b/spacy/language.py
@ -542,7 +542,6 @@ class Language(object):
                raise ValueError(Errors.E979.format(type=type(eg)))
        return converted_examples
    def update(
        self,
        examples,
@ -822,7 +821,7 @@ class Language(object):
                batch_size=batch_size,
                disable=disable,
                n_process=n_process,
-                component_cfg=component_cfg
+                component_cfg=component_cfg,
            )
            for doc, context in zip(docs, contexts):
                yield (doc, context)
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@ -51,7 +51,13 @@ class Lemmatizer(object):
        index_table = self.lookups.get_table("lemma_index", {})
        exc_table = self.lookups.get_table("lemma_exc", {})
        rules_table = self.lookups.get_table("lemma_rules", {})
-        if not any((index_table.get(univ_pos), exc_table.get(univ_pos), rules_table.get(univ_pos))):
+        if not any(
            (
                index_table.get(univ_pos),
                exc_table.get(univ_pos),
                rules_table.get(univ_pos),
            )
        ):
            if univ_pos == "propn":
                return [string]
            else:
--- a/spacy/ml/_biluo.py
+++ b/spacy/ml/_biluo.py
@ -14,7 +14,7 @@ def BILUO() -> Model[Padded, Padded]:
        forward,
        init=init,
        dims={"nO": None},
-        attrs={"get_num_actions": get_num_actions}
+        attrs={"get_num_actions": get_num_actions},
    )
--- a/spacy/ml/_iob.py
+++ b/spacy/ml/_iob.py
@ -12,7 +12,7 @@ def IOB() -> Model[Padded, Padded]:
        forward,
        init=init,
        dims={"nO": None},
-        attrs={"get_num_actions": get_num_actions}
+        attrs={"get_num_actions": get_num_actions},
    )
--- a/spacy/ml/models/multi_task.py
+++ b/spacy/ml/models/multi_task.py
@ -7,7 +7,12 @@ def build_multi_task_model(tok2vec, maxout_pieces, token_vector_width, nO=None):
    softmax = Softmax(nO=nO, nI=token_vector_width * 2)
    model = chain(
        tok2vec,
-        Maxout(nO=token_vector_width * 2, nI=token_vector_width, nP=maxout_pieces, dropout=0.0),
+        Maxout(
            nO=token_vector_width * 2,
            nI=token_vector_width,
            nP=maxout_pieces,
            dropout=0.0,
        ),
        LayerNorm(token_vector_width * 2),
        softmax,
    )
@ -20,7 +25,11 @@ def build_cloze_multi_task_model(vocab, tok2vec, maxout_pieces, nO=None):
    # nO = vocab.vectors.data.shape[1]
    output_layer = chain(
        Maxout(
-            nO=nO, nI=tok2vec.get_dim("nO"), nP=maxout_pieces, normalize=True, dropout=0.0
+            nO=nO,
            nI=tok2vec.get_dim("nO"),
            nP=maxout_pieces,
            normalize=True,
            dropout=0.0,
        ),
        Linear(nO=nO, nI=nO, init_W=zero_init),
    )
@ -39,7 +48,9 @@ def build_masked_language_model(vocab, wrapped_model, mask_prob=0.15):
    def mlm_forward(model, docs, is_train):
        mask, docs = _apply_mask(docs, random_words, mask_prob=mask_prob)
        mask = model.ops.asarray(mask).reshape((mask.shape[0], 1))
-        output, backprop = model.get_ref("wrapped-model").begin_update(docs)  # drop=drop
+        output, backprop = model.get_ref("wrapped-model").begin_update(
            docs
        )  # drop=drop
        def mlm_backward(d_output):
            d_output *= 1 - mask
--- a/spacy/ml/models/parser.py
+++ b/spacy/ml/models/parser.py
@ -16,18 +16,14 @@ def build_tb_parser_model(
    nO=None,
 ):
    t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
-    tok2vec = chain(
+    tok2vec = chain(tok2vec, with_array(Linear(hidden_width, t2v_width)), list2array(),)
        tok2vec,
        with_array(Linear(hidden_width, t2v_width)),
        list2array(),
    )
    tok2vec.set_dim("nO", hidden_width)
    lower = PrecomputableAffine(
        nO=hidden_width if use_upper else nO,
        nF=nr_feature_tokens,
        nI=tok2vec.get_dim("nO"),
-        nP=maxout_pieces
+        nP=maxout_pieces,
    )
    if use_upper:
        with use_ops("numpy"):
--- a/spacy/ml/models/simple_ner.py
+++ b/spacy/ml/models/simple_ner.py
@ -1,6 +1,14 @@
 import functools
 from typing import List, Tuple, Dict, Optional
-from thinc.api import Ops, Model, Linear, Softmax, with_array, softmax_activation, padded2list
+from thinc.api import (
    Ops,
    Model,
    Linear,
    Softmax,
    with_array,
    softmax_activation,
    padded2list,
 )
 from thinc.api import chain, list2padded, configure_normal_init
 from thinc.api import Dropout
 from thinc.types import Padded, Ints1d, Ints3d, Floats2d, Floats3d
@ -12,12 +20,12 @@ from ...util import registry
@registry.architectures.register("spacy.BiluoTagger.v1")
-def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], List[Floats2d]]:
+def BiluoTagger(
    tok2vec: Model[List[Doc], List[Floats2d]]
 ) -> Model[List[Doc], List[Floats2d]]:
    biluo = BILUO()
    linear = Linear(
-        nO=None,
+        nO=None, nI=tok2vec.get_dim("nO"), init_W=configure_normal_init(mean=0.02)
        nI=tok2vec.get_dim("nO"),
        init_W=configure_normal_init(mean=0.02)
    )
    model = chain(
        tok2vec,
@ -25,7 +33,7 @@ def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], L
        with_array(chain(Dropout(0.1), linear)),
        biluo,
        with_array(softmax_activation()),
-        padded2list()
+        padded2list(),
    )
    return Model(
@ -35,11 +43,14 @@ def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], L
        layers=[model, linear],
        refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
        dims={"nO": None},
-        attrs={"get_num_actions": biluo.attrs["get_num_actions"]}
+        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
    )
@registry.architectures.register("spacy.IOBTagger.v1")
-def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], List[Floats2d]]:
+def IOBTagger(
    tok2vec: Model[List[Doc], List[Floats2d]]
 ) -> Model[List[Doc], List[Floats2d]]:
    biluo = IOB()
    linear = Linear(nO=None, nI=tok2vec.get_dim("nO"))
    model = chain(
@ -48,7 +59,7 @@ def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], Lis
        with_array(linear),
        biluo,
        with_array(softmax_activation()),
-        padded2list()
+        padded2list(),
    )
    return Model(
@ -58,11 +69,10 @@ def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], Lis
        layers=[model],
        refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
        dims={"nO": None},
-        attrs={"get_num_actions": biluo.attrs["get_num_actions"]}
+        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
    )
 def init(model: Model[List[Doc], List[Floats2d]], X=None, Y=None) -> None:
    if model.get_dim("nO") is None and Y:
        model.set_dim("nO", Y[0].shape[1])
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@ -1,7 +1,30 @@
-from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic, ParametricAttention
+from thinc.api import (
    Model,
    reduce_mean,
    Linear,
    list2ragged,
    Logistic,
    ParametricAttention,
 )
 from thinc.api import chain, concatenate, clone, Dropout
-from thinc.api import SparseLinear, Softmax, softmax_activation, Maxout, reduce_sum, Relu, residual, expand_window
+from thinc.api import (
-from thinc.api import HashEmbed, with_ragged, with_array, with_cpu, uniqued, FeatureExtractor
+    SparseLinear,
    Softmax,
    softmax_activation,
    Maxout,
    reduce_sum,
    Relu,
    residual,
    expand_window,
 )
 from thinc.api import (
    HashEmbed,
    with_ragged,
    with_array,
    with_cpu,
    uniqued,
    FeatureExtractor,
 )
 from ..spacy_vectors import SpacyVectors
 from ... import util
@ -50,14 +73,31 @@ def build_bow_text_classifier(exclusive_classes, ngram_size, no_output_layer, nO
@registry.architectures.register("spacy.TextCat.v1")
-def build_text_classifier(width, embed_size, pretrained_vectors, exclusive_classes, ngram_size,
+def build_text_classifier(
-                          window_size, conv_depth, dropout, nO=None):
+    width,
    embed_size,
    pretrained_vectors,
    exclusive_classes,
    ngram_size,
    window_size,
    conv_depth,
    dropout,
    nO=None,
 ):
    cols = [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]
    with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
-        lower = HashEmbed(nO=width, nV=embed_size, column=cols.index(LOWER), dropout=dropout)
+        lower = HashEmbed(
-        prefix = HashEmbed(nO=width // 2, nV=embed_size, column=cols.index(PREFIX), dropout=dropout)
+            nO=width, nV=embed_size, column=cols.index(LOWER), dropout=dropout
-        suffix = HashEmbed(nO=width // 2, nV=embed_size, column=cols.index(SUFFIX), dropout=dropout)
+        )
-        shape = HashEmbed(nO=width // 2, nV=embed_size, column=cols.index(SHAPE), dropout=dropout)
+        prefix = HashEmbed(
            nO=width // 2, nV=embed_size, column=cols.index(PREFIX), dropout=dropout
        )
        suffix = HashEmbed(
            nO=width // 2, nV=embed_size, column=cols.index(SUFFIX), dropout=dropout
        )
        shape = HashEmbed(
            nO=width // 2, nV=embed_size, column=cols.index(SHAPE), dropout=dropout
        )
        width_nI = sum(layer.get_dim("nO") for layer in [lower, prefix, suffix, shape])
        trained_vectors = FeatureExtractor(cols) >> with_array(
@ -83,8 +123,15 @@ def build_text_classifier(width, embed_size, pretrained_vectors, exclusive_class
            vectors_width = width
        tok2vec = vector_layer >> with_array(
            Maxout(width, vectors_width, normalize=True)
-            >> residual((expand_window(window_size=window_size)
+            >> residual(
-                         >> Maxout(nO=width, nI=width * ((window_size * 2) + 1), normalize=True))) ** conv_depth,
+                (
                    expand_window(window_size=window_size)
                    >> Maxout(
                        nO=width, nI=width * ((window_size * 2) + 1), normalize=True
                    )
                )
            )
            ** conv_depth,
            pad=conv_depth,
        )
        cnn_model = (
@ -98,15 +145,16 @@ def build_text_classifier(width, embed_size, pretrained_vectors, exclusive_class
        )
        linear_model = build_bow_text_classifier(
-            nO=nO, ngram_size=ngram_size, exclusive_classes=exclusive_classes, no_output_layer=False
+            nO=nO,
            ngram_size=ngram_size,
            exclusive_classes=exclusive_classes,
            no_output_layer=False,
        )
        nO_double = nO * 2 if nO else None
        if exclusive_classes:
            output_layer = Softmax(nO=nO, nI=nO_double)
        else:
-            output_layer = (
+            output_layer = Linear(nO=nO, nI=nO_double) >> Dropout(0.0) >> Logistic()
                    Linear(nO=nO, nI=nO_double) >> Dropout(0.0) >> Logistic()
            )
        model = (linear_model | cnn_model) >> output_layer
        model.set_ref("tok2vec", tok2vec)
    if model.has_dim("nO") is not False:
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@ -99,7 +99,13 @@ def hash_charembed_cnn(
@registry.architectures.register("spacy.HashEmbedBiLSTM.v1")
 def hash_embed_bilstm_v1(
-    pretrained_vectors, width, depth, embed_size, subword_features, maxout_pieces, dropout
+    pretrained_vectors,
    width,
    depth,
    embed_size,
    subword_features,
    maxout_pieces,
    dropout,
 ):
    # Does not use character embeddings: set to False by default
    return build_Tok2Vec_model(
@ -141,21 +147,24 @@ def hash_char_embed_bilstm_v1(
@registry.architectures.register("spacy.LayerNormalizedMaxout.v1")
 def LayerNormalizedMaxout(width, maxout_pieces):
-    return Maxout(
+    return Maxout(nO=width, nP=maxout_pieces, dropout=0.0, normalize=True,)
        nO=width,
        nP=maxout_pieces,
        dropout=0.0,
        normalize=True,
    )
@registry.architectures.register("spacy.MultiHashEmbed.v1")
-def MultiHashEmbed(columns, width, rows, use_subwords, pretrained_vectors, mix, dropout):
+def MultiHashEmbed(
    columns, width, rows, use_subwords, pretrained_vectors, mix, dropout
 ):
    norm = HashEmbed(nO=width, nV=rows, column=columns.index("NORM"), dropout=dropout)
    if use_subwords:
-        prefix = HashEmbed(nO=width, nV=rows // 2, column=columns.index("PREFIX"), dropout=dropout)
+        prefix = HashEmbed(
-        suffix = HashEmbed(nO=width, nV=rows // 2, column=columns.index("SUFFIX"), dropout=dropout)
+            nO=width, nV=rows // 2, column=columns.index("PREFIX"), dropout=dropout
-        shape = HashEmbed(nO=width, nV=rows // 2, column=columns.index("SHAPE"), dropout=dropout)
+        )
        suffix = HashEmbed(
            nO=width, nV=rows // 2, column=columns.index("SUFFIX"), dropout=dropout
        )
        shape = HashEmbed(
            nO=width, nV=rows // 2, column=columns.index("SHAPE"), dropout=dropout
        )
    if pretrained_vectors:
        glove = StaticVectors(
@ -195,7 +204,13 @@ def CharacterEmbed(columns, width, rows, nM, nC, features, dropout):
 def MaxoutWindowEncoder(width, window_size, maxout_pieces, depth):
    cnn = chain(
        expand_window(window_size=window_size),
-        Maxout(nO=width, nI=width * ((window_size * 2) + 1), nP=maxout_pieces, dropout=0.0, normalize=True),
+        Maxout(
            nO=width,
            nI=width * ((window_size * 2) + 1),
            nP=maxout_pieces,
            dropout=0.0,
            normalize=True,
        ),
    )
    model = clone(residual(cnn), depth)
    model.set_dim("nO", width)
@ -247,11 +262,19 @@ def build_Tok2Vec_model(
        subword_features = False
    cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
    with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
-        norm = HashEmbed(nO=width, nV=embed_size, column=cols.index(NORM), dropout=dropout)
+        norm = HashEmbed(
            nO=width, nV=embed_size, column=cols.index(NORM), dropout=dropout
        )
        if subword_features:
-            prefix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=dropout)
+            prefix = HashEmbed(
-            suffix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=dropout)
+                nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=dropout
-            shape = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=dropout)
+            )
            suffix = HashEmbed(
                nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=dropout
            )
            shape = HashEmbed(
                nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=dropout
            )
        else:
            prefix, suffix, shape = (None, None, None)
        if pretrained_vectors is not None:
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@ -20,8 +20,8 @@ def TransitionModel(tok2vec, lower, upper, unseen_classes=set()):
        attrs={
            "has_upper": has_upper,
            "unseen_classes": set(unseen_classes),
-            "resize_output": resize_output
+            "resize_output": resize_output,
-        }
+        },
    )
@ -31,7 +31,7 @@ def forward(model, X, is_train):
        model.layers,
        unseen_classes=model.attrs["unseen_classes"],
        train=is_train,
-        has_upper=model.attrs["has_upper"]
+        has_upper=model.attrs["has_upper"],
    )
    return step_model, step_model.finish_steps
@ -62,7 +62,7 @@ def resize_output(model, new_nO):
    nI = None
    if smaller.has_dim("nI"):
        nI = smaller.get_dim("nI")
-    with use_ops('numpy'):
+    with use_ops("numpy"):
        larger = Linear(nO=new_nO, nI=nI)
        larger.init = smaller.init
    # it could be that the model is not initialized yet, then skip this bit
--- a/spacy/pipeline/simple_ner.py
+++ b/spacy/pipeline/simple_ner.py
@ -21,9 +21,7 @@ class SimpleNER(Pipe):
        self.model = model
        self.cfg = {"labels": []}
        self.loss_func = SequenceCategoricalCrossentropy(
-            names=self.get_tag_names(),
+            names=self.get_tag_names(), normalize=True, missing_value=None
            normalize=True,
            missing_value=None
        )
        assert self.model is not None
@ -42,17 +40,17 @@ class SimpleNER(Pipe):
    def get_tag_names(self):
        if self.is_biluo:
            return (
-                [f"B-{label}" for label in self.labels] +
+                [f"B-{label}" for label in self.labels]
-                [f"I-{label}" for label in self.labels] +
+                + [f"I-{label}" for label in self.labels]
-                [f"L-{label}" for label in self.labels] +
+                + [f"L-{label}" for label in self.labels]
-                [f"U-{label}" for label in self.labels] +
+                + [f"U-{label}" for label in self.labels]
-                ["O"]
+                + ["O"]
            )
        else:
            return (
-                [f"B-{label}" for label in self.labels] +
+                [f"B-{label}" for label in self.labels]
-                [f"I-{label}" for label in self.labels] +
+                + [f"I-{label}" for label in self.labels]
-                ["O"]
+                + ["O"]
            )
    def predict(self, docs: List[Doc]) -> List[Floats2d]:
@ -107,7 +105,7 @@ class SimpleNER(Pipe):
    def begin_training(self, get_examples, pipeline=None, sgd=None, **kwargs):
        self.cfg.update(kwargs)
-        if not hasattr(get_examples, '__call__'):
+        if not hasattr(get_examples, "__call__"):
            gold_tuples = get_examples
            get_examples = lambda: gold_tuples
        labels = _get_labels(get_examples())
@ -121,9 +119,7 @@ class SimpleNER(Pipe):
            self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
        link_vectors_to_models(self.vocab)
        self.loss_func = SequenceCategoricalCrossentropy(
-            names=self.get_tag_names(),
+            names=self.get_tag_names(), normalize=True, missing_value=None
            normalize=True,
            missing_value=None
        )
        return sgd
@ -144,6 +140,6 @@ def _get_labels(examples):
    labels = set()
    for eg in examples:
        for ner_tag in eg.get_aligned("ENT_TYPE", as_string=True):
-            if ner_tag != 'O' and ner_tag != '-':
+            if ner_tag != "O" and ner_tag != "-":
                labels.add(ner_tag)
    return list(sorted(labels))
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@ -97,7 +97,9 @@ class Scorer(object):
            for name, component in pipeline:
                if name == "textcat":
                    self.textcat_multilabel = component.model.attrs["multi_label"]
-                    self.textcat_positive_label = component.cfg.get("positive_label", None)
+                    self.textcat_positive_label = component.cfg.get(
                        "positive_label", None
                    )
                    for label in component.cfg.get("labels", []):
                        self.textcat_auc_per_cat[label] = ROCAUCScore()
                        self.textcat_f_per_cat[label] = PRFScore()
@ -359,7 +361,9 @@ class Scorer(object):
                        (gold_i, gold_head, token.dep_.lower())
                    )
        # Find all NER labels in gold and doc
-        ent_labels = set([k.label_ for k in gold_doc.ents] + [k.label_ for k in doc.ents])
+        ent_labels = set(
            [k.label_ for k in gold_doc.ents] + [k.label_ for k in doc.ents]
        )
        # Set up all labels for per type scoring and prepare gold per type
        gold_per_ents = {ent_label: set() for ent_label in ent_labels}
        for ent_label in ent_labels:
@ -392,7 +396,10 @@ class Scorer(object):
        self.pos.score_set(cand_pos, gold_pos)
        self.morphs.score_set(cand_morphs, gold_morphs)
        for field in self.morphs_per_feat:
-            self.morphs_per_feat[field].score_set(cand_morphs_per_feat.get(field, set()), gold_morphs_per_feat.get(field, set()))
+            self.morphs_per_feat[field].score_set(
                cand_morphs_per_feat.get(field, set()),
                gold_morphs_per_feat.get(field, set()),
            )
        self.sent_starts.score_set(cand_sent_starts, gold_sent_starts)
        self.labelled.score_set(cand_deps, gold_deps)
        for dep in self.labelled_per_dep:
@ -404,7 +411,9 @@ class Scorer(object):
        )
        if (
            len(gold_doc.cats) > 0
-            and set(self.textcat_f_per_cat) == set(self.textcat_auc_per_cat) == set(gold_doc.cats)
+            and set(self.textcat_f_per_cat)
            == set(self.textcat_auc_per_cat)
            == set(gold_doc.cats)
            and set(gold_doc.cats) == set(doc.cats)
        ):
            goldcat = max(gold_doc.cats, key=gold_doc.cats.get)
--- a/spacy/tests/doc/test_add_entities.py
+++ b/spacy/tests/doc/test_add_entities.py
@ -9,7 +9,12 @@ from spacy.pipeline.defaults import default_ner
 def test_doc_add_entities_set_ents_iob(en_vocab):
    text = ["This", "is", "a", "lion"]
    doc = get_doc(en_vocab, text)
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner = EntityRecognizer(en_vocab, default_ner(), **config)
    ner.begin_training([])
    ner(doc)
@ -26,7 +31,12 @@ def test_doc_add_entities_set_ents_iob(en_vocab):
 def test_ents_reset(en_vocab):
    text = ["This", "is", "a", "lion"]
    doc = get_doc(en_vocab, text)
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner = EntityRecognizer(en_vocab, default_ner(), **config)
    ner.begin_training([])
    ner(doc)
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@ -17,7 +17,12 @@ def vocab():
@pytest.fixture
 def parser(vocab):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    parser = DependencyParser(vocab, default_parser(), **config)
    return parser
@ -35,10 +40,7 @@ def _train_parser(parser):
    for i in range(5):
        losses = {}
        doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
-        gold = {
+        gold = {"heads": [1, 1, 3, 3], "deps": ["left", "ROOT", "left", "ROOT"]}
            "heads": [1, 1, 3, 3],
            "deps": ["left", "ROOT", "left", "ROOT"]
        }
        example = Example.from_dict(doc, gold)
        parser.update([example], sgd=sgd, losses=losses)
    return parser
@ -51,10 +53,7 @@ def test_add_label(parser):
    for i in range(100):
        losses = {}
        doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
-        gold = {
+        gold = {"heads": [1, 1, 3, 3], "deps": ["right", "ROOT", "left", "ROOT"]}
            "heads": [1, 1, 3, 3],
            "deps": ["right", "ROOT", "left", "ROOT"]
        }
        parser.update((doc, gold), sgd=sgd, losses=losses)
    doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
    doc = parser(doc)
@ -63,7 +62,12 @@ def test_add_label(parser):
 def test_add_label_deserializes_correctly():
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner1 = EntityRecognizer(Vocab(), default_ner(), **config)
    ner1.add_label("C")
    ner1.add_label("B")
@ -78,6 +82,7 @@ def test_add_label_deserializes_correctly():
    for i in range(ner1.moves.n_moves):
        assert ner1.moves.get_class_name(i) == ner2.moves.get_class_name(i)
@pytest.mark.parametrize(
    "pipe_cls,n_moves,model",
    [(DependencyParser, 5, default_parser()), (EntityRecognizer, 4, default_ner())],
--- a/spacy/tests/parser/test_arc_eager_oracle.py
+++ b/spacy/tests/parser/test_arc_eager_oracle.py
@ -139,7 +139,12 @@ def test_get_oracle_actions():
        deps.append(dep)
        ents.append(ent)
    doc = Doc(Vocab(), words=[t[1] for t in annot_tuples])
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    parser = DependencyParser(doc.vocab, default_parser(), **config)
    parser.moves.add_action(0, "")
    parser.moves.add_action(1, "")
@ -151,7 +156,9 @@ def test_get_oracle_actions():
            parser.moves.add_action(2, dep)
        elif head < i:
            parser.moves.add_action(3, dep)
-    example = Example.from_dict(doc, {"words": words, "tags": tags, "heads": heads, "deps": deps})
+    example = Example.from_dict(
        doc, {"words": words, "tags": tags, "heads": heads, "deps": deps}
    )
    parser.moves.get_oracle_sequence(example)
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@ -143,7 +143,12 @@ def test_accept_blocked_token():
    # 1. test normal behaviour
    nlp1 = English()
    doc1 = nlp1("I live in New York")
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner1 = EntityRecognizer(doc1.vocab, default_ner(), **config)
    assert [token.ent_iob_ for token in doc1] == ["", "", "", "", ""]
    assert [token.ent_type_ for token in doc1] == ["", "", "", "", ""]
@ -162,7 +167,12 @@ def test_accept_blocked_token():
    # 2. test blocking behaviour
    nlp2 = English()
    doc2 = nlp2("I live in New York")
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner2 = EntityRecognizer(doc2.vocab, default_ner(), **config)
    # set "New York" to a blocked entity
@ -220,7 +230,12 @@ def test_overwrite_token():
    assert [token.ent_type_ for token in doc] == ["", "", "", "", ""]
    # Check that a new ner can overwrite O
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner2 = EntityRecognizer(doc.vocab, default_ner(), **config)
    ner2.moves.add_action(5, "")
    ner2.add_label("GPE")
--- a/spacy/tests/parser/test_neural_parser.py
+++ b/spacy/tests/parser/test_neural_parser.py
@ -29,7 +29,12 @@ def tok2vec():
@pytest.fixture
 def parser(vocab, arc_eager):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    return Parser(vocab, model=default_parser(), moves=arc_eager, **config)
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@ -180,6 +180,7 @@ def test_parser_set_sent_starts(en_vocab):
        for token in sent:
            assert token.head in sent
 def test_overfitting_IO():
    # Simple test to try and quickly overfit the dependency parser - ensuring the ML models work correctly
    nlp = English()
--- a/spacy/tests/parser/test_preset_sbd.py
+++ b/spacy/tests/parser/test_preset_sbd.py
@ -16,7 +16,12 @@ def vocab():
@pytest.fixture
 def parser(vocab):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    parser = DependencyParser(vocab, default_parser(), **config)
    parser.cfg["token_vector_width"] = 4
    parser.cfg["hidden_width"] = 32
@ -28,7 +33,9 @@ def parser(vocab):
    for i in range(10):
        losses = {}
        doc = Doc(vocab, words=["a", "b", "c", "d"])
-        example = Example.from_dict(doc, {"heads": [1, 1, 3, 3], "deps": ["left", "ROOT", "left", "ROOT"]})
+        example = Example.from_dict(
            doc, {"heads": [1, 1, 3, 3], "deps": ["left", "ROOT", "left", "ROOT"]}
        )
        parser.update([example], sgd=sgd, losses=losses)
    return parser
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@ -272,11 +272,13 @@ GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
 def test_overfitting_IO():
    # Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
    nlp = English()
-    nlp.add_pipe(nlp.create_pipe('sentencizer'))
+    nlp.add_pipe(nlp.create_pipe("sentencizer"))
    # Add a custom component to recognize "Russ Cochran" as an entity for the example training data
    ruler = EntityRuler(nlp)
-    patterns = [{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}]
+    patterns = [
        {"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
    ]
    ruler.add_patterns(patterns)
    nlp.add_pipe(ruler)
@ -293,7 +295,11 @@ def test_overfitting_IO():
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=3)
    mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
    mykb.add_entity(entity="Q7381115", freq=12, entity_vector=[9, 1, -7])
-    mykb.add_alias(alias="Russ Cochran", entities=["Q2146908", "Q7381115"], probabilities=[0.5, 0.5])
+    mykb.add_alias(
        alias="Russ Cochran",
        entities=["Q2146908", "Q7381115"],
        probabilities=[0.5, 0.5],
    )
    # Create the Entity Linker component and add it to the pipeline
    entity_linker = nlp.create_pipe("entity_linker", config={"kb": mykb})
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@ -15,8 +15,17 @@ def test_label_types():
 TRAIN_DATA = [
-    ("I like green eggs", {"morphs": ["Feat=N", "Feat=V", "Feat=J", "Feat=N"], "pos": ["NOUN", "VERB", "ADJ", "NOUN"]}),
+    (
-    ("Eat blue ham", {"morphs": ["Feat=V", "Feat=J", "Feat=N"], "pos": ["VERB", "ADJ", "NOUN"]}),
+        "I like green eggs",
        {
            "morphs": ["Feat=N", "Feat=V", "Feat=J", "Feat=N"],
            "pos": ["NOUN", "VERB", "ADJ", "NOUN"],
        },
    ),
    (
        "Eat blue ham",
        {"morphs": ["Feat=V", "Feat=J", "Feat=N"], "pos": ["VERB", "ADJ", "NOUN"]},
    ),
 ]
@ -38,7 +47,12 @@ def test_overfitting_IO():
    # test the trained model
    test_text = "I like blue eggs"
    doc = nlp(test_text)
-    gold_morphs = ["Feat=N|POS=NOUN", "Feat=V|POS=VERB", "Feat=J|POS=ADJ", "Feat=N|POS=NOUN"]
+    gold_morphs = [
        "Feat=N|POS=NOUN",
        "Feat=V|POS=VERB",
        "Feat=J|POS=ADJ",
        "Feat=N|POS=NOUN",
    ]
    assert [t.morph_ for t in doc] == gold_morphs
    # Also test the results are still the same after IO
--- a/spacy/tests/pipeline/test_simple_ner.py
+++ b/spacy/tests/pipeline/test_simple_ner.py
@ -7,24 +7,28 @@ from spacy.pipeline.simple_ner import SimpleNER
 import spacy
-@pytest.fixture(params=[
+@pytest.fixture(
    params=[
        ["PER", "ORG", "LOC", "MISC"],
-    ["GPE", "PERSON", "NUMBER", "CURRENCY", "EVENT"]
+        ["GPE", "PERSON", "NUMBER", "CURRENCY", "EVENT"],
-])
+    ]
 )
 def labels(request):
    return request.param
@pytest.fixture
 def ops():
    return NumpyOps()
 def _get_actions(labels):
    action_names = (
-        [f"B{label}" for label in labels] + \
+        [f"B{label}" for label in labels]
-        [f"I{label}" for label in labels] + \
+        + [f"I{label}" for label in labels]
-        [f"L{label}" for label in labels] + \
+        + [f"L{label}" for label in labels]
-        [f"U{label}" for label in labels] + \
+        + [f"U{label}" for label in labels]
-        ["O"]
+        + ["O"]
    )
    A = namedtuple("actions", action_names)
    return A(**{name: i for i, name in enumerate(action_names)})
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@ -270,7 +270,12 @@ def test_issue1963(en_tokenizer):
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
 def test_issue1967(label):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner = EntityRecognizer(Vocab(), default_ner(), **config)
    example = Example.from_dict(
        Doc(ner.vocab, words=["word"]),
@ -280,8 +285,8 @@ def test_issue1967(label):
            "tags": ["tag"],
            "heads": [0],
            "deps": ["dep"],
-            "entities": [label]
+            "entities": [label],
-        }
+        },
    )
    assert "JOB-NAME" in ner.moves.get_actions(gold_parses=[example])[1]
--- a/spacy/tests/regression/test_issue3001-3500.py
+++ b/spacy/tests/regression/test_issue3001-3500.py
@ -196,7 +196,12 @@ def test_issue3345():
    doc = Doc(nlp.vocab, words=["I", "live", "in", "New", "York"])
    doc[4].is_sent_start = True
    ruler = EntityRuler(nlp, patterns=[{"label": "GPE", "pattern": "New York"}])
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner = EntityRecognizer(doc.vocab, default_ner(), **config)
    # Add the OUT action. I wouldn't have thought this would be necessary...
    ner.moves.add_action(5, "")
--- a/spacy/tests/regression/test_issue3830.py
+++ b/spacy/tests/regression/test_issue3830.py
@ -6,7 +6,12 @@ from spacy.pipeline.defaults import default_parser
 def test_issue3830_no_subtok():
    """Test that the parser doesn't have subtok label if not learn_tokens"""
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    parser = DependencyParser(Vocab(), default_parser(), **config)
    parser.add_label("nsubj")
    assert "subtok" not in parser.labels
@ -16,7 +21,12 @@ def test_issue3830_no_subtok():
 def test_issue3830_with_subtok():
    """Test that the parser does have subtok label if learn_tokens=True."""
-    config = {"learn_tokens": True, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": True,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    parser = DependencyParser(Vocab(), default_parser(), **config)
    parser.add_label("nsubj")
    assert "subtok" not in parser.labels
--- a/spacy/tests/regression/test_issue4042.py
+++ b/spacy/tests/regression/test_issue4042.py
@ -74,7 +74,12 @@ def test_issue4042_bug2():
            output_dir.mkdir()
        ner1.to_disk(output_dir)
-        config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+        config = {
            "learn_tokens": False,
            "min_action_freq": 30,
            "beam_width": 1,
            "beam_update_prob": 1.0,
        }
        ner2 = EntityRecognizer(vocab, default_ner(), **config)
        ner2.from_disk(output_dir)
        assert len(ner2.labels) == 2
--- a/spacy/tests/regression/test_issue4313.py
+++ b/spacy/tests/regression/test_issue4313.py
@ -16,7 +16,12 @@ def test_issue4313():
    beam_width = 16
    beam_density = 0.0001
    nlp = English()
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    ner = EntityRecognizer(nlp.vocab, default_ner(), **config)
    ner.add_label("SOME_LABEL")
    ner.begin_training([])
--- a/spacy/tests/regression/test_issue4665.py
+++ b/spacy/tests/regression/test_issue4665.py
@ -1,4 +1,5 @@
 import pytest
 # TODO
 # from spacy.gold.converters.conllu2docs import conllu2docs
--- a/spacy/tests/serialize/test_serialize_pipeline.py
+++ b/spacy/tests/serialize/test_serialize_pipeline.py
@ -12,7 +12,12 @@ test_parsers = [DependencyParser, EntityRecognizer]
@pytest.fixture
 def parser(en_vocab):
-    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
+    config = {
        "learn_tokens": False,
        "min_action_freq": 30,
        "beam_width": 1,
        "beam_update_prob": 1.0,
    }
    parser = DependencyParser(en_vocab, default_parser(), **config)
    parser.add_label("nsubj")
    return parser
--- a/spacy/tests/serialize/test_serialize_vocab_strings.py
+++ b/spacy/tests/serialize/test_serialize_vocab_strings.py
@ -36,7 +36,9 @@ def test_serialize_vocab_roundtrip_bytes(strings1, strings2):
    new_vocab1 = Vocab().from_bytes(vocab1_b)
    assert new_vocab1.to_bytes() == vocab1_b
    assert len(new_vocab1.strings) == len(strings1) + 2  # adds _SP and POS=SPACE
-    assert sorted([s for s in new_vocab1.strings]) == sorted(strings1 + list(default_strings))
+    assert sorted([s for s in new_vocab1.strings]) == sorted(
        strings1 + list(default_strings)
    )
@pytest.mark.parametrize("strings1,strings2", test_strings)
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@ -3,6 +3,7 @@ import pytest
 from spacy.lang.en import English
 from spacy.gold.converters import iob2docs, conll_ner2docs
 from spacy.cli.pretrain import make_docs
 # TODO
 # from spacy.gold.converters import conllu2docs
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@ -155,7 +155,18 @@ def test_gold_biluo_misalign(en_vocab):
 def test_split_sentences(en_vocab):
    words = ["I", "flew", "to", "San Francisco Valley", "had", "loads of fun"]
    doc = Doc(en_vocab, words=words)
-    gold_words = ["I", "flew", "to", "San", "Francisco", "Valley", "had", "loads", "of", "fun"]
+    gold_words = [
        "I",
        "flew",
        "to",
        "San",
        "Francisco",
        "Valley",
        "had",
        "loads",
        "of",
        "fun",
    ]
    sent_starts = [True, False, False, False, False, False, True, False, False, False]
    example = Example.from_dict(doc, {"words": gold_words, "sent_starts": sent_starts})
    assert example.text == "I flew to San Francisco Valley had loads of fun "
@ -166,7 +177,16 @@ def test_split_sentences(en_vocab):
    words = ["I", "flew", "to", "San", "Francisco", "Valley", "had", "loads", "of fun"]
    doc = Doc(en_vocab, words=words)
-    gold_words = ["I", "flew", "to", "San Francisco", "Valley", "had", "loads of", "fun"]
+    gold_words = [
        "I",
        "flew",
        "to",
        "San Francisco",
        "Valley",
        "had",
        "loads of",
        "fun",
    ]
    sent_starts = [True, False, False, False, False, True, False, False]
    example = Example.from_dict(doc, {"words": gold_words, "sent_starts": sent_starts})
    assert example.text == "I flew to San Francisco Valley had loads of fun "
@ -195,7 +215,15 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
    gold_words = ["I", "flew to", "San Francisco Valley", "."]
    example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
    assert example.get_aligned("ENT_IOB") == [2, 2, 2, 3, 1, 1, 2]
-    assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "LOC", "LOC", "LOC", ""]
+    assert example.get_aligned("ENT_TYPE", as_string=True) == [
        "",
        "",
        "",
        "LOC",
        "LOC",
        "LOC",
        "",
    ]
    # misaligned
    words = ["I flew", "to", "San Francisco", "Valley", "."]
@ -206,11 +234,21 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
    entities = [(offset_start, offset_end, "LOC")]
    links = {(offset_start, offset_end): {"Q816843": 1.0}}
    gold_words = ["I", "flew to", "San", "Francisco Valley", "."]
-    example = Example.from_dict(doc, {"words": gold_words, "entities": entities, "links": links})
+    example = Example.from_dict(
        doc, {"words": gold_words, "entities": entities, "links": links}
    )
    assert example.get_aligned("ENT_IOB") == [2, 2, 3, 1, 2]
    assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "LOC", "LOC", ""]
-    assert example.get_aligned("ENT_KB_ID", as_string=True) == ["", "", "Q816843", "Q816843", ""]
+    assert example.get_aligned("ENT_KB_ID", as_string=True) == [
-    assert example.to_dict()["doc_annotation"]["links"][(offset_start, offset_end)] == {"Q816843": 1.0}
+        "",
        "",
        "Q816843",
        "Q816843",
        "",
    ]
    assert example.to_dict()["doc_annotation"]["links"][(offset_start, offset_end)] == {
        "Q816843": 1.0
    }
    # additional whitespace tokens in GoldParse words
    words, spaces = get_words_and_spaces(
@ -221,26 +259,55 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
    entities = [(len("I flew  to "), len("I flew  to San Francisco Valley"), "LOC")]
    gold_words = ["I", "flew", " ", "to", "San Francisco Valley", "."]
    gold_spaces = [True, True, False, True, False, False]
-    example = Example.from_dict(doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities})
+    example = Example.from_dict(
        doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities}
    )
    assert example.get_aligned("ENT_IOB") == [2, 2, 2, 2, 3, 1, 2]
-    assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "", "LOC", "LOC", ""]
+    assert example.get_aligned("ENT_TYPE", as_string=True) == [
        "",
        "",
        "",
        "",
        "LOC",
        "LOC",
        "",
    ]
    # from issue #4791
    doc = en_tokenizer("I'll return the ₹54 amount")
    gold_words = ["I", "'ll", "return", "the", "₹", "54", "amount"]
    gold_spaces = [False, True, True, True, False, True, False]
    entities = [(16, 19, "MONEY")]
-    example = Example.from_dict(doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities})
+    example = Example.from_dict(
        doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities}
    )
    assert example.get_aligned("ENT_IOB") == [2, 2, 2, 2, 3, 2]
-    assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "", "MONEY", ""]
+    assert example.get_aligned("ENT_TYPE", as_string=True) == [
        "",
        "",
        "",
        "",
        "MONEY",
        "",
    ]
    doc = en_tokenizer("I'll return the $54 amount")
    gold_words = ["I", "'ll", "return", "the", "$", "54", "amount"]
    gold_spaces = [False, True, True, True, False, True, False]
    entities = [(16, 19, "MONEY")]
-    example = Example.from_dict(doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities})
+    example = Example.from_dict(
        doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities}
    )
    assert example.get_aligned("ENT_IOB") == [2, 2, 2, 2, 3, 1, 2]
-    assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "", "MONEY", "MONEY", ""]
+    assert example.get_aligned("ENT_TYPE", as_string=True) == [
        "",
        "",
        "",
        "",
        "MONEY",
        "MONEY",
        "",
    ]
 def test_roundtrip_offsets_biluo_conversion(en_tokenizer):
@ -311,7 +378,9 @@ def test_roundtrip_docs_to_json(doc):
    assert lemmas == [t.lemma_ for t in reloaded_example.reference]
    assert deps == [t.dep_ for t in reloaded_example.reference]
    assert heads == [t.head.i for t in reloaded_example.reference]
-    assert ents == [(e.start_char, e.end_char, e.label_) for e in  reloaded_example.reference.ents]
+    assert ents == [
        (e.start_char, e.end_char, e.label_) for e in reloaded_example.reference.ents
    ]
    assert "TRAVEL" in reloaded_example.reference.cats
    assert "BAKING" in reloaded_example.reference.cats
    assert cats["TRAVEL"] == reloaded_example.reference.cats["TRAVEL"]
@ -375,7 +444,9 @@ def test_ignore_misaligned(doc):
        # doesn't raise an AlignmentError, but there is nothing to iterate over
        # because the only example can't be aligned
-        train_reloaded_example = list(goldcorpus.train_dataset(nlp, ignore_misaligned=True))
+        train_reloaded_example = list(
            goldcorpus.train_dataset(nlp, ignore_misaligned=True)
        )
        assert len(train_reloaded_example) == 0
@ -389,7 +460,9 @@ def test_make_orth_variants(doc):
        # due to randomness, test only that this runs with no errors for now
        train_example = next(goldcorpus.train_dataset(nlp))
-        variant_example = make_orth_variants_example(nlp, train_example, orth_variant_level=0.2)
+        variant_example = make_orth_variants_example(
            nlp, train_example, orth_variant_level=0.2
        )
@pytest.mark.parametrize(
@ -430,7 +503,9 @@ def test_goldparse_startswith_space(en_tokenizer):
    entities = ["U-DATE"]
    deps = ["ROOT"]
    heads = [0]
-    example = Example.from_dict(doc, {"words": gold_words, "entities": entities, "deps":deps, "heads": heads})
+    example = Example.from_dict(
        doc, {"words": gold_words, "entities": entities, "deps": deps, "heads": heads}
    )
    assert example.get_aligned("ENT_IOB") == [None, 3]
    assert example.get_aligned("ENT_TYPE", as_string=True) == [None, "DATE"]
    assert example.get_aligned("DEP", as_string=True) == [None, "ROOT"]
@ -441,7 +516,12 @@ def test_gold_constructor():
    nlp = English()
    doc = nlp("This is a sentence")
    example = Example.from_dict(doc, {"cats": {"cat1": 1.0, "cat2": 0.0}})
-    assert example.get_aligned("ORTH", as_string=True) == ["This", "is", "a", "sentence"]
+    assert example.get_aligned("ORTH", as_string=True) == [
        "This",
        "is",
        "a",
        "sentence",
    ]
    assert example.reference.cats["cat1"]
    assert not example.reference.cats["cat2"]
@ -496,7 +576,7 @@ def test_split_sents(merged_dict):
    nlp = English()
    example = Example.from_dict(
        Doc(nlp.vocab, words=merged_dict["words"], spaces=merged_dict["spaces"]),
-        merged_dict
+        merged_dict,
    )
    assert example.text == "Hi there everyone It is just me"
@ -522,10 +602,7 @@ def test_tuples_to_example(vocab, merged_dict):
    cats = {"TRAVEL": 1.0, "BAKING": 0.0}
    merged_dict = dict(merged_dict)
    merged_dict["cats"] = cats
-    ex = Example.from_dict(
+    ex = Example.from_dict(Doc(vocab, words=merged_dict["words"]), merged_dict)
        Doc(vocab, words=merged_dict["words"]),
        merged_dict
    )
    words = [token.text for token in ex.reference]
    assert words == merged_dict["words"]
    tags = [token.tag_ for token in ex.reference]
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@ -36,9 +36,7 @@ def test_language_update(nlp):
 def test_language_evaluate(nlp):
    text = "hello world"
-    annots = {
+    annots = {"doc_annotation": {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}}
        "doc_annotation": {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
    }
    doc = Doc(nlp.vocab, words=text.split(" "))
    # Evaluate with text and dict
    nlp.evaluate([(text, annots)])
--- a/spacy/tests/test_new_example.py
+++ b/spacy/tests/test_new_example.py
@ -32,7 +32,9 @@ def test_Example_from_dict_invalid(annots):
        Example.from_dict(predicted, annots)
-@pytest.mark.parametrize("pred_words", [["ice", "cream"], ["icecream"], ["i", "ce", "cream"]])
+@pytest.mark.parametrize(
    "pred_words", [["ice", "cream"], ["icecream"], ["i", "ce", "cream"]]
 )
@pytest.mark.parametrize("annots", [{"words": ["icecream"], "tags": ["NN"]}])
 def test_Example_from_dict_with_tags(pred_words, annots):
    vocab = Vocab()
@ -161,7 +163,15 @@ def test_Example_from_dict_with_entities(annots):
    example = Example.from_dict(predicted, annots)
    assert len(list(example.reference.ents)) == 2
-    assert [example.reference[i].ent_iob_ for i in range(7)] == ["O", "O", "B", "I", "O", "B", "O"]
+    assert [example.reference[i].ent_iob_ for i in range(7)] == [
        "O",
        "O",
        "B",
        "I",
        "O",
        "B",
        "O",
    ]
    assert example.get_aligned("ENT_IOB") == [2, 2, 3, 1, 2, 3, 2]
    assert example.reference[2].ent_type_ == "LOC"
@ -174,7 +184,10 @@ def test_Example_from_dict_with_entities(annots):
    [
        {
            "words": ["I", "like", "New", "York", "and", "Berlin", "."],
-            "entities": [(0, 4, "LOC"), (21, 27, "LOC")],   # not aligned to token boundaries
+            "entities": [
                (0, 4, "LOC"),
                (21, 27, "LOC"),
            ],  # not aligned to token boundaries
        }
    ],
 )
@ -192,7 +205,10 @@ def test_Example_from_dict_with_entities_invalid(annots):
        {
            "words": ["I", "like", "New", "York", "and", "Berlin", "."],
            "entities": [(7, 15, "LOC"), (20, 26, "LOC")],
-            "links": {(7, 15): {"Q60": 1.0, "Q64": 0.0}, (20, 26): {"Q60": 0.0, "Q64": 1.0}},
+            "links": {
                (7, 15): {"Q60": 1.0, "Q64": 0.0},
                (20, 26): {"Q60": 0.0, "Q64": 1.0},
            },
        }
    ],
 )
@ -224,4 +240,3 @@ def test_Example_from_dict_with_links_invalid(annots):
    predicted = Doc(vocab, words=annots["words"])
    with pytest.raises(ValueError):
        Example.from_dict(predicted, annots)
--- a/spacy/tests/test_scorer.py
+++ b/spacy/tests/test_scorer.py
@ -42,6 +42,7 @@ test_ner_apple = [
    ]
 ]
@pytest.fixture
 def tagged_doc():
    text = "Sarah's sister flew to Silicon Valley via London."
--- a/spacy/tests/test_util.py
+++ b/spacy/tests/test_util.py
@ -26,7 +26,9 @@ def test_util_minibatch(doc_sizes, expected_batches):
    docs = [get_random_doc(doc_size) for doc_size in doc_sizes]
    tol = 0.2
    batch_size = 1000
-    batches = list(minibatch_by_words(docs, size=batch_size, tolerance=tol, discard_oversize=True))
+    batches = list(
        minibatch_by_words(docs, size=batch_size, tolerance=tol, discard_oversize=True)
    )
    assert [len(batch) for batch in batches] == expected_batches
    max_size = batch_size + batch_size * tol
@ -50,7 +52,7 @@ def test_util_minibatch_oversize(doc_sizes, expected_batches):
    docs = [get_random_doc(doc_size) for doc_size in doc_sizes]
    tol = 0.2
    batch_size = 1000
-    batches = list(minibatch_by_words(docs, size=batch_size, tolerance=tol, discard_oversize=False))
+    batches = list(
        minibatch_by_words(docs, size=batch_size, tolerance=tol, discard_oversize=False)
    )
    assert [len(batch) for batch in batches] == expected_batches
--- a/spacy/tests/util.py
+++ b/spacy/tests/util.py
@ -27,7 +27,15 @@ def make_tempdir():
 def get_doc(
-    vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None, lemmas=None, morphs=None
+    vocab,
    words=[],
    pos=None,
    heads=None,
    deps=None,
    tags=None,
    ents=None,
    lemmas=None,
    morphs=None,
 ):
    """Create Doc object from given vocab, words and annotations."""
    if deps and not heads:
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@ -9,16 +9,7 @@ from ..attrs import SPACY, ORTH, intify_attr
 from ..errors import Errors
-ALL_ATTRS = (
+ALL_ATTRS = ("ORTH", "TAG", "HEAD", "DEP", "ENT_IOB", "ENT_TYPE", "LEMMA", "MORPH")
    "ORTH",
    "TAG",
    "HEAD",
    "DEP",
    "ENT_IOB",
    "ENT_TYPE",
    "LEMMA",
    "MORPH"
 )
 class DocBin(object):