Merge from whatif/arrow

2025-11-13 06:16:02 +03:00 · 2020-06-14 17:43:59 +02:00 · 2020-06-14 17:43:59 +02:00 · d53723aa4f
commit d53723aa4f
parent 9296d71a54 380cce9d8b
38 changed files with 2659 additions and 1888 deletions
--- a/setup.py
+++ b/setup.py
@ -23,6 +23,8 @@ Options.docstrings = True
 PACKAGES = find_packages()
 MOD_NAMES = [
    "spacy.gold.align",
    "spacy.gold.new_example",
    "spacy.parts_of_speech",
    "spacy.strings",
    "spacy.lexeme",
@ -35,13 +37,14 @@ MOD_NAMES = [
    "spacy.syntax.stateclass",
    "spacy.syntax._state",
    "spacy.tokenizer",
    "spacy.syntax.gold_parse",
    "spacy.syntax.nn_parser",
    "spacy.syntax._parser_model",
    "spacy.syntax._beam_utils",
    "spacy.syntax.nonproj",
    "spacy.syntax.transition_system",
    "spacy.syntax.arc_eager",
-    "spacy.gold",
+    "spacy.gold.gold_io",
    "spacy.tokens.doc",
    "spacy.tokens.span",
    "spacy.tokens.token",
--- a/spacy/cli/converters/conllu2json.py
+++ b/spacy/cli/converters/conllu2json.py
@ -2,6 +2,7 @@ import re
 from ...gold import Example
 from ...gold import iob_to_biluo, spans_from_biluo_tags, biluo_tags_from_offsets
 from ...gold import TokenAnnotation
 from ...language import Language
 from ...tokens import Doc, Token
 from .conll_ner2json import n_sents_info
@ -284,13 +285,8 @@ def example_from_conllu_sentence(
        spaces.append(t._.merged_spaceafter)
    ent_offsets = [(e.start_char, e.end_char, e.label_) for e in doc.ents]
    ents = biluo_tags_from_offsets(doc, ent_offsets)
-    raw = ""
+    example = Example(doc=Doc(vocab, words=words, spaces=spaces))
-    for word, space in zip(words, spaces):
+    example.token_annotation = TokenAnnotation(
        raw += word
        if space:
            raw += " "
    example = Example(doc=raw)
    example.set_token_annotation(
        ids=ids,
        words=words,
        tags=tags,
--- a/spacy/cli/train_from_config.py
+++ b/spacy/cli/train_from_config.py
@ -13,7 +13,11 @@ from thinc.api import Model, use_pytorch_for_gpu_memory
 import random
 from ..gold import GoldCorpus
 <<<<<<< HEAD
 from ..gold import Example
 =======
 from ..lookups import Lookups
 >>>>>>> origin/develop
 from .. import util
 from ..errors import Errors
 from ..ml import models  # don't remove - required to load the built-in architectures
@ -223,7 +227,6 @@ def train(
    limit = training["limit"]
    msg.info("Loading training corpus")
    corpus = GoldCorpus(data_paths["train"], data_paths["dev"], limit=limit)
    # verify textcat config
    if "textcat" in nlp_config["pipeline"]:
        textcat_labels = set(nlp.get_pipe("textcat").labels)
@ -281,9 +284,7 @@ def train(
        nlp.resume_training()
    else:
        msg.info(f"Initializing the nlp pipeline: {nlp.pipe_names}")
-        nlp.begin_training(
+        nlp.begin_training(lambda: corpus.train_dataset(nlp))
            lambda: corpus.train_examples
        )
    # Update tag map with provided mapping
    nlp.vocab.morphology.tag_map.update(tag_map)
@ -373,6 +374,16 @@ def train(
 def create_train_batches(nlp, corpus, cfg):
    epochs_todo = cfg.get("max_epochs", 0)
    while True:
 <<<<<<< HEAD
        train_examples = list(corpus.train_dataset(
            nlp,
            noise_level=0.0,
            orth_variant_level=cfg["orth_variant_level"],
            gold_preproc=cfg["gold_preproc"],
            max_length=cfg["max_length"],
            ignore_misaligned=True
        ))
 =======
        train_examples = list(
            corpus.train_dataset(
                nlp,
@ -383,6 +394,7 @@ def create_train_batches(nlp, corpus, cfg):
                ignore_misaligned=True,
            )
        )
 >>>>>>> origin/develop
        if len(train_examples) == 0:
            raise ValueError(Errors.E988)
        random.shuffle(train_examples)
@ -413,6 +425,7 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg):
                nlp, gold_preproc=cfg["gold_preproc"], ignore_misaligned=True
            )
        )
        n_words = sum(len(ex.doc) for ex in dev_examples)
        start_time = timer()
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -620,6 +620,14 @@ class Errors(object):
    E999 = ("Encountered an unexpected format for the dictionary holding "
            "gold annotations: {gold_dict}")
    # TODO: These were left over after a merge, but I couldn't find them?
    #E983 = ("Each link annotation should refer to a dictionary with at most one "
    #        "identifier mapping to 1.0, and all others to 0.0.")
    #E984 = ("The offsets of the annotations for 'links' need to refer exactly "
    #        "to the offsets of the 'entities' annotations.")
    #E985 = ("The 'ent_iob' attribute of a Token should be an integer indexing "
    #        "into {values}, but found {value}.")
@add_codes
 class TempErrors(object):
--- a/spacy/gold.pxd
+++ b/spacy/gold.pxd
@ -1,68 +0,0 @@
 from cymem.cymem cimport Pool
 from .typedefs cimport attr_t
 from .syntax.transition_system cimport Transition
 from .tokens import Doc
 cdef struct GoldParseC:
    int* tags
    int* heads
    int* has_dep
    int* sent_start
    attr_t* labels
    int** brackets
    Transition* ner
 cdef class GoldParse:
    cdef Pool mem
    cdef GoldParseC c
    cdef readonly TokenAnnotation orig
    cdef int length
    cdef public int loss
    cdef public list words
    cdef public list tags
    cdef public list pos
    cdef public list morphs
    cdef public list lemmas
    cdef public list sent_starts
    cdef public list heads
    cdef public list labels
    cdef public dict orths
    cdef public list ner
    cdef public dict brackets
    cdef public dict cats
    cdef public dict links
    cdef readonly list cand_to_gold
    cdef readonly list gold_to_cand
 cdef class TokenAnnotation:
    cdef public list ids
    cdef public list words
    cdef public list tags
    cdef public list pos
    cdef public list morphs
    cdef public list lemmas
    cdef public list heads
    cdef public list deps
    cdef public list entities
    cdef public list sent_starts
    cdef public dict brackets_by_start
 cdef class DocAnnotation:
    cdef public object cats
    cdef public object links
 cdef class Example:
    cdef public object doc
    cdef public TokenAnnotation token_annotation
    cdef public DocAnnotation doc_annotation
    cdef public object goldparse
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
--- a/spacy/gold/init.pxd
+++ b/spacy/gold/init.pxd
--- a/spacy/gold/init.py
+++ b/spacy/gold/init.py
@ -0,0 +1,13 @@
 from .corpus import GoldCorpus
 from ..syntax.gold_parse import GoldParse
 from .example import Example
 from .annotation import TokenAnnotation, DocAnnotation
 from .align import align
 from .iob_utils import iob_to_biluo, biluo_to_iob
 from .iob_utils import biluo_tags_from_offsets, offsets_from_biluo_tags
 from .iob_utils import spans_from_biluo_tags
 from .iob_utils import tags_to_entities
 from .gold_io import docs_to_json
 from .gold_io import read_json_file
--- a/spacy/gold/align.pxd
+++ b/spacy/gold/align.pxd
@ -0,0 +1,8 @@
 cdef class Alignment:
    cdef public object cost
    cdef public object i2j
    cdef public object j2i
    cdef public object i2j_multi
    cdef public object j2i_multi
    cdef public object cand_to_gold
    cdef public object gold_to_cand
--- a/spacy/gold/align.pyx
+++ b/spacy/gold/align.pyx
@ -0,0 +1,101 @@
 import numpy
 from ..errors import Errors, AlignmentError
 cdef class Alignment:
    def __init__(self, spacy_words, gold_words):
        # Do many-to-one alignment for misaligned tokens.
        # If we over-segment, we'll have one gold word that covers a sequence
        # of predicted words
        # If we under-segment, we'll have one predicted word that covers a
        # sequence of gold words.
        # If we "mis-segment", we'll have a sequence of predicted words covering
        # a sequence of gold words. That's many-to-many -- we don't do that
        # except for NER spans where the start and end can be aligned.
        cost, i2j, j2i, i2j_multi, j2i_multi = align(spacy_words, gold_words)
        self.cost = cost
        self.i2j = i2j
        self.j2i = j2i
        self.i2j_multi = i2j_multi
        self.j2i_multi = j2i_multi
        self.cand_to_gold = [(j if j >= 0 else None) for j in i2j]
        self.gold_to_cand = [(i if i >= 0 else None) for i in j2i]
 def align(tokens_a, tokens_b):
    """Calculate alignment tables between two tokenizations.
    tokens_a (List[str]): The candidate tokenization.
    tokens_b (List[str]): The reference tokenization.
    RETURNS: (tuple): A 5-tuple consisting of the following information:
      * cost (int): The number of misaligned tokens.
      * a2b (List[int]): Mapping of indices in `tokens_a` to indices in `tokens_b`.
        For instance, if `a2b[4] == 6`, that means that `tokens_a[4]` aligns
        to `tokens_b[6]`. If there's no one-to-one alignment for a token,
        it has the value -1.
      * b2a (List[int]): The same as `a2b`, but mapping the other direction.
      * a2b_multi (Dict[int, int]): A dictionary mapping indices in `tokens_a`
        to indices in `tokens_b`, where multiple tokens of `tokens_a` align to
        the same token of `tokens_b`.
      * b2a_multi (Dict[int, int]): As with `a2b_multi`, but mapping the other
            direction.
    """
    tokens_a = _normalize_for_alignment(tokens_a)
    tokens_b = _normalize_for_alignment(tokens_b)
    cost = 0
    a2b = numpy.empty(len(tokens_a), dtype="i")
    b2a = numpy.empty(len(tokens_b), dtype="i")
    a2b.fill(-1)
    b2a.fill(-1)
    a2b_multi = {}
    b2a_multi = {}
    i = 0
    j = 0
    offset_a = 0
    offset_b = 0
    while i < len(tokens_a) and j < len(tokens_b):
        a = tokens_a[i][offset_a:]
        b = tokens_b[j][offset_b:]
        if a == b:
            if offset_a == offset_b == 0:
                a2b[i] = j
                b2a[j] = i
            elif offset_a == 0:
                cost += 2
                a2b_multi[i] = j
            elif offset_b == 0:
                cost += 2
                b2a_multi[j] = i
            offset_a = offset_b = 0
            i += 1
            j += 1
        elif a == "":
            assert offset_a == 0
            cost += 1
            i += 1
        elif b == "":
            assert offset_b == 0
            cost += 1
            j += 1
        elif b.startswith(a):
            cost += 1
            if offset_a == 0:
                a2b_multi[i] = j
            i += 1
            offset_a = 0
            offset_b += len(a)
        elif a.startswith(b):
            cost += 1
            if offset_b == 0:
                b2a_multi[j] = i
            j += 1
            offset_b = 0
            offset_a += len(b)
        else:
            assert "".join(tokens_a) != "".join(tokens_b)
            raise AlignmentError(Errors.E186.format(tok_a=tokens_a, tok_b=tokens_b))
    return cost, a2b, b2a, a2b_multi, b2a_multi
 def _normalize_for_alignment(tokens):
    return [w.replace(" ", "").lower() for w in tokens]
--- a/spacy/gold/annotation.py
+++ b/spacy/gold/annotation.py
@ -0,0 +1,150 @@
 from .iob_utils import biluo_tags_from_offsets
 class TokenAnnotation:
    def __init__(
        self,
        ids=None,
        words=None,
        tags=None,
        pos=None,
        morphs=None,
        lemmas=None,
        heads=None,
        deps=None,
        entities=None,
        sent_starts=None,
        brackets=None,
    ):
        self.ids = ids if ids else []
        self.words = words if words else []
        self.tags = tags if tags else []
        self.pos = pos if pos else []
        self.morphs = morphs if morphs else []
        self.lemmas = lemmas if lemmas else []
        self.heads = heads if heads else []
        self.deps = deps if deps else []
        self.entities = entities if entities else []
        self.sent_starts = sent_starts if sent_starts else []
        self.brackets_by_start = {}
        if brackets:
            for b_start, b_end, b_label in brackets:
                self.brackets_by_start.setdefault(b_start, []).append((b_end, b_label))
    def get_field(self, field):
        if field == "id":
            return self.ids
        elif field == "word":
            return self.words
        elif field == "tag":
            return self.tags
        elif field == "pos":
            return self.pos
        elif field == "morph":
            return self.morphs
        elif field == "lemma":
            return self.lemmas
        elif field == "head":
            return self.heads
        elif field == "dep":
            return self.deps
        elif field == "ner":
            return self.entities
        elif field == "sent_start":
            return self.sent_starts
        else:
            raise ValueError(f"Unknown field: {field}")
    @property
    def brackets(self):
        brackets = []
        for start, ends_labels in self.brackets_by_start.items():
            for end, label in ends_labels:
                brackets.append((start, end, label))
        return brackets
    @classmethod
    def from_dict(cls, token_dict):
        return cls(
            ids=token_dict.get("ids", None),
            words=token_dict.get("words", None),
            tags=token_dict.get("tags", None),
            pos=token_dict.get("pos", None),
            morphs=token_dict.get("morphs", None),
            lemmas=token_dict.get("lemmas", None),
            heads=token_dict.get("heads", None),
            deps=token_dict.get("deps", None),
            entities=token_dict.get("entities", None),
            sent_starts=token_dict.get("sent_starts", None),
            brackets=token_dict.get("brackets", None),
        )
    def to_dict(self):
        return {
            "ids": self.ids,
            "words": self.words,
            "tags": self.tags,
            "pos": self.pos,
            "morphs": self.morphs,
            "lemmas": self.lemmas,
            "heads": self.heads,
            "deps": self.deps,
            "entities": self.entities,
            "sent_starts": self.sent_starts,
            "brackets": self.brackets,
        }
    def get_id(self, i):
        return self.ids[i] if i < len(self.ids) else i
    def get_word(self, i):
        return self.words[i] if i < len(self.words) else ""
    def get_tag(self, i):
        return self.tags[i] if i < len(self.tags) else "-"
    def get_pos(self, i):
        return self.pos[i] if i < len(self.pos) else ""
    def get_morph(self, i):
        return self.morphs[i] if i < len(self.morphs) else ""
    def get_lemma(self, i):
        return self.lemmas[i] if i < len(self.lemmas) else ""
    def get_head(self, i):
        return self.heads[i] if i < len(self.heads) else i
    def get_dep(self, i):
        return self.deps[i] if i < len(self.deps) else ""
    def get_entity(self, i):
        return self.entities[i] if i < len(self.entities) else "-"
    def get_sent_start(self, i):
        return self.sent_starts[i] if i < len(self.sent_starts) else None
    def __str__(self):
        return str(self.to_dict())
    def __repr__(self):
        return self.__str__()
 class DocAnnotation:
    def __init__(self, cats=None, links=None):
        self.cats = cats if cats else {}
        self.links = links if links else {}
    @classmethod
    def from_dict(cls, doc_dict):
        return cls(cats=doc_dict.get("cats", None), links=doc_dict.get("links", None))
    def to_dict(self):
        return {"cats": self.cats, "links": self.links}
    def __str__(self):
        return str(self.to_dict())
    def __repr__(self):
        return self.__str__()
--- a/spacy/gold/augment.py
+++ b/spacy/gold/augment.py
@ -0,0 +1,131 @@
 import random
 import itertools
 from .example import Example
 from .annotation import TokenAnnotation
 def make_orth_variants(nlp, example, orth_variant_level=0.0):
    if random.random() >= orth_variant_level:
        return example
    if not example.token_annotation:
        return example
    raw = example.text
    lower = False
    if random.random() >= 0.5:
        lower = True
        if raw is not None:
            raw = raw.lower()
    ndsv = nlp.Defaults.single_orth_variants
    ndpv = nlp.Defaults.paired_orth_variants
    # modify words in paragraph_tuples
    variant_example = Example(doc=nlp.make_doc(raw))
    token_annotation = example.token_annotation
    words = token_annotation.words
    tags = token_annotation.tags
    if not words or not tags:
        # add the unmodified annotation
        token_dict = token_annotation.to_dict()
        variant_example.token_annotation = TokenAnnotation(**token_dict)
    else:
        if lower:
            words = [w.lower() for w in words]
        # single variants
        punct_choices = [random.choice(x["variants"]) for x in ndsv]
        for word_idx in range(len(words)):
            for punct_idx in range(len(ndsv)):
                if (
                    tags[word_idx] in ndsv[punct_idx]["tags"]
                    and words[word_idx] in ndsv[punct_idx]["variants"]
                ):
                    words[word_idx] = punct_choices[punct_idx]
        # paired variants
        punct_choices = [random.choice(x["variants"]) for x in ndpv]
        for word_idx in range(len(words)):
            for punct_idx in range(len(ndpv)):
                if tags[word_idx] in ndpv[punct_idx]["tags"] and words[
                    word_idx
                ] in itertools.chain.from_iterable(ndpv[punct_idx]["variants"]):
                    # backup option: random left vs. right from pair
                    pair_idx = random.choice([0, 1])
                    # best option: rely on paired POS tags like `` / ''
                    if len(ndpv[punct_idx]["tags"]) == 2:
                        pair_idx = ndpv[punct_idx]["tags"].index(tags[word_idx])
                    # next best option: rely on position in variants
                    # (may not be unambiguous, so order of variants matters)
                    else:
                        for pair in ndpv[punct_idx]["variants"]:
                            if words[word_idx] in pair:
                                pair_idx = pair.index(words[word_idx])
                    words[word_idx] = punct_choices[punct_idx][pair_idx]
        token_dict = token_annotation.to_dict()
        token_dict["words"] = words
        token_dict["tags"] = tags
        variant_example.token_annotation = TokenAnnotation(**token_dict)
    # modify raw to match variant_paragraph_tuples
    if raw is not None:
        variants = []
        for single_variants in ndsv:
            variants.extend(single_variants["variants"])
        for paired_variants in ndpv:
            variants.extend(
                list(itertools.chain.from_iterable(paired_variants["variants"]))
            )
        # store variants in reverse length order to be able to prioritize
        # longer matches (e.g., "---" before "--")
        variants = sorted(variants, key=lambda x: len(x))
        variants.reverse()
        variant_raw = ""
        raw_idx = 0
        # add initial whitespace
        while raw_idx < len(raw) and raw[raw_idx].isspace():
            variant_raw += raw[raw_idx]
            raw_idx += 1
        for word in variant_example.token_annotation.words:
            match_found = False
            # skip whitespace words
            if word.isspace():
                match_found = True
            # add identical word
            elif word not in variants and raw[raw_idx:].startswith(word):
                variant_raw += word
                raw_idx += len(word)
                match_found = True
            # add variant word
            else:
                for variant in variants:
                    if not match_found and raw[raw_idx:].startswith(variant):
                        raw_idx += len(variant)
                        variant_raw += word
                        match_found = True
            # something went wrong, abort
            # (add a warning message?)
            if not match_found:
                return example
            # add following whitespace
            while raw_idx < len(raw) and raw[raw_idx].isspace():
                variant_raw += raw[raw_idx]
                raw_idx += 1
        variant_example.doc = variant_raw
        return variant_example
    return variant_example
 def add_noise(orig, noise_level):
    if random.random() >= noise_level:
        return orig
    elif type(orig) == list:
        corrupted = [_corrupt(word, noise_level) for word in orig]
        corrupted = [w for w in corrupted if w]
        return corrupted
    else:
        return "".join(_corrupt(c, noise_level) for c in orig)
 def _corrupt(c, noise_level):
    if random.random() >= noise_level:
        return c
    elif c in [".", "'", "!", "?", ","]:
        return "\n"
    else:
        return c.lower()
--- a/spacy/gold/corpus.py
+++ b/spacy/gold/corpus.py
@ -0,0 +1,226 @@
 import random
 import shutil
 import tempfile
 import srsly
 from pathlib import Path
 import itertools
 from ..tokens import Doc
 from .. import util
 from ..errors import Errors, AlignmentError
 from .gold_io import read_json_file, json_to_annotations
 from .augment import make_orth_variants, add_noise
 from .new_example import NewExample as Example
 class GoldCorpus(object):
    """An annotated corpus, using the JSON file format. Manages
    annotations for tagging, dependency parsing and NER.
    DOCS: https://spacy.io/api/goldcorpus
    """
    def __init__(self, train, dev, gold_preproc=False, limit=None):
        """Create a GoldCorpus.
        train (str / Path): File or directory of training data.
        dev (str / Path): File or directory of development data.
        RETURNS (GoldCorpus): The newly created object.
        """
        self.limit = limit
        if isinstance(train, str) or isinstance(train, Path):
            train = self.read_annotations(self.walk_corpus(train))
            dev = self.read_annotations(self.walk_corpus(dev))
        # Write temp directory with one doc per file, so we can shuffle and stream
        self.tmp_dir = Path(tempfile.mkdtemp())
        self.write_msgpack(self.tmp_dir / "train", train, limit=self.limit)
        self.write_msgpack(self.tmp_dir / "dev", dev, limit=self.limit)
    def __del__(self):
        shutil.rmtree(self.tmp_dir)
    @staticmethod
    def write_msgpack(directory, examples, limit=0):
        if not directory.exists():
            directory.mkdir()
        n = 0
        for i, ex_dict in enumerate(examples):
            text = ex_dict["text"]
            srsly.write_msgpack(directory / f"{i}.msg", (text, ex_dict))
            n += 1
            if limit and n >= limit:
                break
    @staticmethod
    def walk_corpus(path):
        path = util.ensure_path(path)
        if not path.is_dir():
            return [path]
        paths = [path]
        locs = []
        seen = set()
        for path in paths:
            if str(path) in seen:
                continue
            seen.add(str(path))
            if path.parts[-1].startswith("."):
                continue
            elif path.is_dir():
                paths.extend(path.iterdir())
            elif path.parts[-1].endswith((".json", ".jsonl")):
                locs.append(path)
        return locs
    @staticmethod
    def read_annotations(locs, limit=0):
        """ Yield training examples """
        i = 0
        for loc in locs:
            loc = util.ensure_path(loc)
            file_name = loc.parts[-1]
            if file_name.endswith("json"):
                examples = read_json_file(loc)
            elif file_name.endswith("jsonl"):
                gold_tuples = srsly.read_jsonl(loc)
                first_gold_tuple = next(gold_tuples)
                gold_tuples = itertools.chain([first_gold_tuple], gold_tuples)
                # TODO: proper format checks with schemas
                if isinstance(first_gold_tuple, dict):
                    if first_gold_tuple.get("paragraphs", None):
                        examples = []
                        for json_doc in gold_tuples:
                            examples.extend(json_to_annotations(json_doc))
                    elif first_gold_tuple.get("doc_annotation", None):
                        examples = []
                        for ex_dict in gold_tuples:
                            doc = ex_dict.get("doc", None)
                            if doc is None:
                                doc = ex_dict.get("text", None)
                            if not (
                                doc is None
                                or isinstance(doc, Doc)
                                or isinstance(doc, str)
                            ):
                                raise ValueError(Errors.E987.format(type=type(doc)))
                            examples.append(ex_dict)
            elif file_name.endswith("msg"):
                text, ex_dict = srsly.read_msgpack(loc)
                examples = [ex_dict]
            else:
                supported = ("json", "jsonl", "msg")
                raise ValueError(Errors.E124.format(path=loc, formats=supported))
            try:
                for example in examples:
                    yield example
                    i += 1
                    if limit and i >= limit:
                        return
            except KeyError as e:
                msg = "Missing key {}".format(e)
                raise KeyError(Errors.E996.format(file=file_name, msg=msg))
            except UnboundLocalError as e:
                msg = "Unexpected document structure"
                raise ValueError(Errors.E996.format(file=file_name, msg=msg))
    @property
    def dev_annotations(self):
        locs = (self.tmp_dir / "dev").iterdir()
        yield from self.read_annotations(locs, limit=self.limit)
    @property
    def train_annotations(self):
        locs = (self.tmp_dir / "train").iterdir()
        yield from self.read_annotations(locs, limit=self.limit)
    def count_train(self):
        """Returns count of words in train examples"""
        n = 0
        i = 0
        for eg_dict in self.train_annotations:
            n += len(eg_dict["token_annotation"]["words"])
            if self.limit and i >= self.limit:
                break
            i += 1
        return n
    def train_dataset(
        self,
        nlp,
        gold_preproc=False,
        max_length=None,
        noise_level=0.0,
        orth_variant_level=0.0,
        ignore_misaligned=False,
    ):
        locs = list((self.tmp_dir / "train").iterdir())
        random.shuffle(locs)
        train_annotations = self.read_annotations(locs, limit=self.limit)
        examples = self.iter_examples(
            nlp,
            train_annotations,
            gold_preproc,
            max_length=max_length,
            noise_level=noise_level,
            orth_variant_level=orth_variant_level,
            make_projective=True,
            ignore_misaligned=ignore_misaligned,
        )
        yield from examples
    def train_dataset_without_preprocessing(
        self, nlp, gold_preproc=False, ignore_misaligned=False
    ):
        examples = self.iter_examples(
            nlp,
            self.train_annotations,
            gold_preproc=gold_preproc,
            ignore_misaligned=ignore_misaligned,
        )
        yield from examples
    def dev_dataset(self, nlp, gold_preproc=False, ignore_misaligned=False):
        examples = self.iter_examples(
            nlp,
            self.dev_annotations,
            gold_preproc=gold_preproc,
            ignore_misaligned=ignore_misaligned,
        )
        yield from examples
    @classmethod
    def iter_examples(
        cls,
        nlp,
        annotations,
        gold_preproc,
        max_length=None,
        noise_level=0.0,
        orth_variant_level=0.0,
        make_projective=False,
        ignore_misaligned=False,
    ):
        """ Setting gold_preproc will result in creating a doc per sentence """
        for eg_dict in annotations:
            if eg_dict["text"]:
                example = Example.from_dict(
                    nlp.make_doc(eg_dict["text"]),
                    eg_dict
                )
            else:
                example = Example.from_dict(
                    Doc(nlp.vocab, words=eg_dict["words"]),
                    eg_dict
                )
            if gold_preproc:
                # TODO: Data augmentation
                examples = example.split_sents()
            else:
                examples = [example]
            for ex in examples:
                if (not max_length) or len(ex.predicted) < max_length:
                    if ignore_misaligned:
                        try:
                            _ = ex._deprecated_get_gold()
                        except AlignmentError:
                            continue
                    yield ex
--- a/spacy/gold/example.py
+++ b/spacy/gold/example.py
@ -0,0 +1,261 @@
 import numpy
 from .annotation import TokenAnnotation, DocAnnotation
 from .iob_utils import spans_from_biluo_tags, biluo_tags_from_offsets
 from .align import Alignment
 from ..errors import Errors, AlignmentError
 from ..tokens import Doc
 def annotations2doc(doc, doc_annot, tok_annot):
    # TODO: Improve and test this
    words = tok_annot.words or [tok.text for tok in doc]
    fields = {
        "tags": "TAG",
        "pos": "POS",
        "lemmas": "LEMMA",
        "deps": "DEP",
    }
    attrs = []
    values = []
    for field, attr in fields.items():
        value = getattr(tok_annot, field)
        # Unset fields will be empty lists.
        if value:
            attrs.append(attr)
            values.append([doc.vocab.strings.add(v) for v in value])
    if tok_annot.heads:
        attrs.append("HEAD")
        values.append([h - i for i, h in enumerate(tok_annot.heads)])
    output = Doc(doc.vocab, words=words)
    if values:
        array = numpy.array(values, dtype="uint64")
        output = output.from_array(attrs, array.T)
    if tok_annot.entities:
        output.ents = spans_from_biluo_tags(output, tok_annot.entities)
    doc.cats = dict(doc_annot.cats)
    # TODO: Calculate token.ent_kb_id from links.
    # We need to fix this and the doc.ents thing, both should be doc
    # annotations.
    return doc
 class Example:
    def __init__(self, doc, doc_annotation=None, token_annotation=None):
        """ Doc can either be text, or an actual Doc """
        if not isinstance(doc, Doc):
            raise TypeError("Must pass Doc instance")
        self.predicted = doc
        self.doc = doc
        self.doc_annotation = doc_annotation if doc_annotation else DocAnnotation()
        self.token_annotation = (
            token_annotation if token_annotation else TokenAnnotation()
        )
        self._alignment = None
        self.reference = annotations2doc(
            self.doc,
            self.doc_annotation,
            self.token_annotation
        )
    @property
    def x(self):
        return self.predicted
    @property
    def y(self):
        return self.reference
    def _deprecated_get_gold(self, make_projective=False):
        from ..syntax.gold_parse import get_parses_from_example
        _, gold = get_parses_from_example(self, make_projective=make_projective)[0]
        return gold
    @classmethod
    def from_dict(cls, example_dict, doc=None):
        if example_dict is None:
            raise ValueError("Example.from_dict expected dict, received None")
        if doc is None:
            raise ValueError("Must pass doc")
        # TODO: This is ridiculous...
        token_dict = example_dict.get("token_annotation", {})
        doc_dict = example_dict.get("doc_annotation", {})
        for key, value in example_dict.items():
            if key in ("token_annotation", "doc_annotation"):
                pass
            elif key in ("cats", "links"):
                doc_dict[key] = value
            else:
                token_dict[key] = value
        if token_dict.get("entities"):
            entities = token_dict["entities"]
            if isinstance(entities[0], (list, tuple)):
                token_dict["entities"] = biluo_tags_from_offsets(doc, entities)
        token_annotation = TokenAnnotation.from_dict(token_dict)
        doc_annotation = DocAnnotation.from_dict(doc_dict)
        return cls(
            doc=doc, doc_annotation=doc_annotation, token_annotation=token_annotation
        )
    @property
    def alignment(self):
        if self._alignment is None:
            if self.doc is None:
                return None
            spacy_words = [token.orth_ for token in self.predicted]
            gold_words = [token.orth_ for token in self.reference]
            if gold_words == []:
                gold_words = spacy_words
            self._alignment = Alignment(spacy_words, gold_words)
        return self._alignment
    def to_dict(self):
        """ Note that this method does NOT export the doc, only the annotations ! """
        token_dict = self.token_annotation.to_dict()
        doc_dict = self.doc_annotation.to_dict()
        return {"token_annotation": token_dict, "doc_annotation": doc_dict}
    @property
    def text(self):
        if self.doc is None:
            return None
        if isinstance(self.doc, Doc):
            return self.doc.text
        return self.doc
    def get_aligned(self, field):
        """Return an aligned array for a token annotation field."""
        if self.doc is None:
            return self.token_annotation.get_field(field)
        doc = self.doc
        if field == "word":
            return [token.orth_ for token in doc]
        gold_values = self.token_annotation.get_field(field)
        alignment = self.alignment
        i2j_multi = alignment.i2j_multi
        gold_to_cand = alignment.gold_to_cand
        cand_to_gold = alignment.cand_to_gold
        output = []
        for i, gold_i in enumerate(cand_to_gold):
            if doc[i].text.isspace():
                output.append(None)
            elif gold_i is None:
                if i in i2j_multi:
                    output.append(gold_values[i2j_multi[i]])
                else:
                    output.append(None)
            else:
                output.append(gold_values[gold_i])
        return output
    def set_doc_annotation(self, cats=None, links=None):
        if cats:
            self.doc_annotation.cats = cats
        if links:
            self.doc_annotation.links = links
    def split_sents(self):
        """ Split the token annotations into multiple Examples based on
        sent_starts and return a list of the new Examples"""
        if not self.token_annotation.words:
            return [self]
        s_ids, s_words, s_tags, s_pos, s_morphs = [], [], [], [], []
        s_lemmas, s_heads, s_deps, s_ents, s_sent_starts = [], [], [], [], []
        s_brackets = []
        sent_start_i = 0
        t = self.token_annotation
        split_examples = []
        for i in range(len(t.words)):
            if i > 0 and t.sent_starts[i] == 1:
                split_examples.append(
                    Example(
                        doc=Doc(self.doc.vocab, words=s_words),
                        token_annotation=TokenAnnotation(
                            ids=s_ids,
                            words=s_words,
                            tags=s_tags,
                            pos=s_pos,
                            morphs=s_morphs,
                            lemmas=s_lemmas,
                            heads=s_heads,
                            deps=s_deps,
                            entities=s_ents,
                            sent_starts=s_sent_starts,
                            brackets=s_brackets,
                        ),
                        doc_annotation=self.doc_annotation
                    )
                )
                s_ids, s_words, s_tags, s_pos, s_heads = [], [], [], [], []
                s_deps, s_ents, s_morphs, s_lemmas = [], [], [], []
                s_sent_starts, s_brackets = [], []
                sent_start_i = i
            s_ids.append(t.get_id(i))
            s_words.append(t.get_word(i))
            s_tags.append(t.get_tag(i))
            s_pos.append(t.get_pos(i))
            s_morphs.append(t.get_morph(i))
            s_lemmas.append(t.get_lemma(i))
            s_heads.append(t.get_head(i) - sent_start_i)
            s_deps.append(t.get_dep(i))
            s_ents.append(t.get_entity(i))
            s_sent_starts.append(t.get_sent_start(i))
            for b_end, b_label in t.brackets_by_start.get(i, []):
                s_brackets.append((i - sent_start_i, b_end - sent_start_i, b_label))
            i += 1
        split_examples.append(
            Example(
                doc=Doc(self.doc.vocab, words=s_words),
                token_annotation=TokenAnnotation(
                    ids=s_ids,
                    words=s_words,
                    tags=s_tags,
                    pos=s_pos,
                    morphs=s_morphs,
                    lemmas=s_lemmas,
                    heads=s_heads,
                    deps=s_deps,
                    entities=s_ents,
                    sent_starts=s_sent_starts,
                    brackets=s_brackets,
                ),
                doc_annotation=self.doc_annotation
            )
        )
        return split_examples
    @classmethod
    def to_example_objects(cls, examples, make_doc=None, keep_raw_text=False):
        """
        Return a list of Example objects, from a variety of input formats.
        make_doc needs to be provided when the examples contain text strings and keep_raw_text=False
        """
        if isinstance(examples, Example):
            return [examples]
        if isinstance(examples, tuple):
            examples = [examples]
        converted_examples = []
        for ex in examples:
            if isinstance(ex, Example):
                converted_examples.append(ex)
            # convert string to Doc to Example
            elif isinstance(ex, str):
                if keep_raw_text:
                    converted_examples.append(Example(doc=ex))
                else:
                    doc = make_doc(ex)
                    converted_examples.append(Example(doc=doc))
            # convert tuples to Example
            elif isinstance(ex, tuple) and len(ex) == 2:
                doc, gold = ex
                # convert string to Doc
                if isinstance(doc, str) and not keep_raw_text:
                    doc = make_doc(doc)
                converted_examples.append(Example.from_dict(gold, doc=doc))
            # convert Doc to Example
            elif isinstance(ex, Doc):
                converted_examples.append(Example(doc=ex))
            else:
                converted_examples.append(ex)
        return converted_examples
--- a/spacy/gold/gold_io.pyx
+++ b/spacy/gold/gold_io.pyx
@ -0,0 +1,198 @@
 import warnings
 import srsly
 from .. import util
 from ..errors import Warnings
 from ..tokens import Token, Doc
 from .iob_utils import biluo_tags_from_offsets
 def merge_sents(sents):
    m_deps = [[], [], [], [], [], []]
    m_cats = {}
    m_brackets = []
    i = 0
    for (ids, words, tags, heads, labels, ner), (cats, brackets) in sents:
        m_deps[0].extend(id_ + i for id_ in ids)
        m_deps[1].extend(words)
        m_deps[2].extend(tags)
        m_deps[3].extend(head + i for head in heads)
        m_deps[4].extend(labels)
        m_deps[5].extend(ner)
        m_brackets.extend((b["first"] + i, b["last"] + i, b["label"])
                          for b in brackets)
        m_cats.update(cats)
        i += len(ids)
    return [(m_deps, (m_cats, m_brackets))]
 def docs_to_json(docs, id=0, ner_missing_tag="O"):
    """Convert a list of Doc objects into the JSON-serializable format used by
    the spacy train command.
    docs (iterable / Doc): The Doc object(s) to convert.
    id (int): Id for the JSON.
    RETURNS (dict): The data in spaCy's JSON format
        - each input doc will be treated as a paragraph in the output doc
    """
    if isinstance(docs, Doc):
        docs = [docs]
    json_doc = {"id": id, "paragraphs": []}
    for i, doc in enumerate(docs):
        json_para = {'raw': doc.text, "sentences": [], "cats": []}
        for cat, val in doc.cats.items():
            json_cat = {"label": cat, "value": val}
            json_para["cats"].append(json_cat)
        ent_offsets = [(e.start_char, e.end_char, e.label_) for e in doc.ents]
        biluo_tags = biluo_tags_from_offsets(doc, ent_offsets, missing=ner_missing_tag)
        for j, sent in enumerate(doc.sents):
            json_sent = {"tokens": [], "brackets": []}
            for token in sent:
                json_token = {"id": token.i, "orth": token.text}
                if doc.is_tagged:
                    json_token["tag"] = token.tag_
                    json_token["pos"] = token.pos_
                    json_token["morph"] = token.morph_
                    json_token["lemma"] = token.lemma_
                if doc.is_parsed:
                    json_token["head"] = token.head.i-token.i
                    json_token["dep"] = token.dep_
                json_token["ner"] = biluo_tags[token.i]
                json_sent["tokens"].append(json_token)
            json_para["sentences"].append(json_sent)
        json_doc["paragraphs"].append(json_para)
    return json_doc
 def read_json_file(loc, docs_filter=None, limit=None):
    loc = util.ensure_path(loc)
    if loc.is_dir():
        for filename in loc.iterdir():
            yield from read_json_file(loc / filename, limit=limit)
    else:
        for doc in json_iterate(loc):
            if docs_filter is not None and not docs_filter(doc):
                continue
            for json_data in json_to_annotations(doc):
                yield json_data
 def json_to_annotations(doc):
    """Convert an item in the JSON-formatted training data to the format
    used by GoldParse.
    doc (dict): One entry in the training data.
    YIELDS (tuple): The reformatted data - one training example per paragraph
    """
    for paragraph in doc["paragraphs"]:
        example = {"text": paragraph.get("raw", None)}
        words = []
        ids = []
        tags = []
        pos = []
        morphs = []
        lemmas = []
        heads = []
        labels = []
        ner = []
        sent_starts = []
        brackets = []
        for sent in paragraph["sentences"]:
            sent_start_i = len(words)
            for i, token in enumerate(sent["tokens"]):
                words.append(token["orth"])
                ids.append(token.get('id', sent_start_i + i))
                tags.append(token.get('tag', "-"))
                pos.append(token.get("pos", ""))
                morphs.append(token.get("morph", ""))
                lemmas.append(token.get("lemma", ""))
                heads.append(token.get("head", 0) + sent_start_i + i)
                labels.append(token.get("dep", ""))
                # Ensure ROOT label is case-insensitive
                if labels[-1].lower() == "root":
                    labels[-1] = "ROOT"
                ner.append(token.get("ner", "-"))
                if i == 0:
                    sent_starts.append(1)
                else:
                    sent_starts.append(0)
            if "brackets" in sent:
                brackets.extend((b["first"] + sent_start_i,
                                 b["last"] + sent_start_i, b["label"])
                                 for b in sent["brackets"])
        cats = {}
        for cat in paragraph.get("cats", {}):
            cats[cat["label"]] = cat["value"]
        example["token_annotation"] = dict(
            ids=ids,
            words=words,
            tags=tags,
            pos=pos,
            morphs=morphs,
            lemmas=lemmas,
            heads=heads,
            deps=labels,
            entities=ner,
            sent_starts=sent_starts,
            brackets=brackets
        )
        example["doc_annotation"] = dict(cats=cats)
        yield example
 def json_iterate(loc):
    # We should've made these files jsonl...But since we didn't, parse out
    # the docs one-by-one to reduce memory usage.
    # It's okay to read in the whole file -- just don't parse it into JSON.
    cdef bytes py_raw
    loc = util.ensure_path(loc)
    with loc.open("rb") as file_:
        py_raw = file_.read()
    cdef long file_length = len(py_raw)
    if file_length > 2 ** 30:
        warnings.warn(Warnings.W027.format(size=file_length))
    raw = <char*>py_raw
    cdef int square_depth = 0
    cdef int curly_depth = 0
    cdef int inside_string = 0
    cdef int escape = 0
    cdef long start = -1
    cdef char c
    cdef char quote = ord('"')
    cdef char backslash = ord("\\")
    cdef char open_square = ord("[")
    cdef char close_square = ord("]")
    cdef char open_curly = ord("{")
    cdef char close_curly = ord("}")
    for i in range(file_length):
        c = raw[i]
        if escape:
            escape = False
            continue
        if c == backslash:
            escape = True
            continue
        if c == quote:
            inside_string = not inside_string
            continue
        if inside_string:
            continue
        if c == open_square:
            square_depth += 1
        elif c == close_square:
            square_depth -= 1
        elif c == open_curly:
            if square_depth == 1 and curly_depth == 0:
                start = i
            curly_depth += 1
        elif c == close_curly:
            curly_depth -= 1
            if square_depth == 1 and curly_depth == 0:
                py_str = py_raw[start : i + 1].decode("utf8")
                try:
                    yield srsly.json_loads(py_str)
                except Exception:
                    print(py_str)
                    raise
                start = -1
--- a/spacy/gold/iob_utils.py
+++ b/spacy/gold/iob_utils.py
@ -0,0 +1,197 @@
 import warnings
 from ..errors import Errors, Warnings
 from ..tokens import Span
 def iob_to_biluo(tags):
    out = []
    tags = list(tags)
    while tags:
        out.extend(_consume_os(tags))
        out.extend(_consume_ent(tags))
    return out
 def biluo_to_iob(tags):
    out = []
    for tag in tags:
        tag = tag.replace("U-", "B-", 1).replace("L-", "I-", 1)
        out.append(tag)
    return out
 def _consume_os(tags):
    while tags and tags[0] == "O":
        yield tags.pop(0)
 def _consume_ent(tags):
    if not tags:
        return []
    tag = tags.pop(0)
    target_in = "I" + tag[1:]
    target_last = "L" + tag[1:]
    length = 1
    while tags and tags[0] in {target_in, target_last}:
        length += 1
        tags.pop(0)
    label = tag[2:]
    if length == 1:
        if len(label) == 0:
            raise ValueError(Errors.E177.format(tag=tag))
        return ["U-" + label]
    else:
        start = "B-" + label
        end = "L-" + label
        middle = [f"I-{label}" for _ in range(1, length - 1)]
        return [start] + middle + [end]
 def biluo_tags_from_doc(doc, missing="O"):
    return biluo_tags_from_offsets(
        doc,
        [(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents],
        missing=missing
    )
 def biluo_tags_from_offsets(doc, entities, missing="O"):
    """Encode labelled spans into per-token tags, using the
    Begin/In/Last/Unit/Out scheme (BILUO).
    doc (Doc): The document that the entity offsets refer to. The output tags
        will refer to the token boundaries within the document.
    entities (iterable): A sequence of `(start, end, label)` triples. `start`
        and `end` should be character-offset integers denoting the slice into
        the original string.
    RETURNS (list): A list of unicode strings, describing the tags. Each tag
        string will be of the form either "", "O" or "{action}-{label}", where
        action is one of "B", "I", "L", "U". The string "-" is used where the
        entity offsets don't align with the tokenization in the `Doc` object.
        The training algorithm will view these as missing values. "O" denotes a
        non-entity token. "B" denotes the beginning of a multi-token entity,
        "I" the inside of an entity of three or more tokens, and "L" the end
        of an entity of two or more tokens. "U" denotes a single-token entity.
    EXAMPLE:
        >>> text = 'I like London.'
        >>> entities = [(len('I like '), len('I like London'), 'LOC')]
        >>> doc = nlp.tokenizer(text)
        >>> tags = biluo_tags_from_offsets(doc, entities)
        >>> assert tags == ["O", "O", 'U-LOC', "O"]
    """
    # Ensure no overlapping entity labels exist
    tokens_in_ents = {}
    starts = {token.idx: token.i for token in doc}
    ends = {token.idx + len(token): token.i for token in doc}
    biluo = ["-" for _ in doc]
    # Handle entity cases
    for start_char, end_char, label in entities:
        for token_index in range(start_char, end_char):
            if token_index in tokens_in_ents.keys():
                raise ValueError(
                    Errors.E103.format(
                        span1=(
                            tokens_in_ents[token_index][0],
                            tokens_in_ents[token_index][1],
                            tokens_in_ents[token_index][2],
                        ),
                        span2=(start_char, end_char, label),
                    )
                )
            tokens_in_ents[token_index] = (start_char, end_char, label)
        start_token = starts.get(start_char)
        end_token = ends.get(end_char)
        # Only interested if the tokenization is correct
        if start_token is not None and end_token is not None:
            if start_token == end_token:
                biluo[start_token] = f"U-{label}"
            else:
                biluo[start_token] = f"B-{label}"
                for i in range(start_token + 1, end_token):
                    biluo[i] = f"I-{label}"
                biluo[end_token] = f"L-{label}"
    # Now distinguish the O cases from ones where we miss the tokenization
    entity_chars = set()
    for start_char, end_char, label in entities:
        for i in range(start_char, end_char):
            entity_chars.add(i)
    for token in doc:
        for i in range(token.idx, token.idx + len(token)):
            if i in entity_chars:
                break
        else:
            biluo[token.i] = missing
    if "-" in biluo:
        ent_str = str(entities)
        warnings.warn(
            Warnings.W030.format(
                text=doc.text[:50] + "..." if len(doc.text) > 50 else doc.text,
                entities=ent_str[:50] + "..." if len(ent_str) > 50 else ent_str,
            )
        )
    return biluo
 def spans_from_biluo_tags(doc, tags):
    """Encode per-token tags following the BILUO scheme into Span object, e.g.
    to overwrite the doc.ents.
    doc (Doc): The document that the BILUO tags refer to.
    entities (iterable): A sequence of BILUO tags with each tag describing one
        token. Each tags string will be of the form of either "", "O" or
        "{action}-{label}", where action is one of "B", "I", "L", "U".
    RETURNS (list): A sequence of Span objects.
    """
    token_offsets = tags_to_entities(tags)
    spans = []
    for label, start_idx, end_idx in token_offsets:
        span = Span(doc, start_idx, end_idx + 1, label=label)
        spans.append(span)
    return spans
 def offsets_from_biluo_tags(doc, tags):
    """Encode per-token tags following the BILUO scheme into entity offsets.
    doc (Doc): The document that the BILUO tags refer to.
    entities (iterable): A sequence of BILUO tags with each tag describing one
        token. Each tags string will be of the form of either "", "O" or
        "{action}-{label}", where action is one of "B", "I", "L", "U".
    RETURNS (list): A sequence of `(start, end, label)` triples. `start` and
        `end` will be character-offset integers denoting the slice into the
        original string.
    """
    spans = spans_from_biluo_tags(doc, tags)
    return [(span.start_char, span.end_char, span.label_) for span in spans]
 def tags_to_entities(tags):
    entities = []
    start = None
    for i, tag in enumerate(tags):
        if tag is None:
            continue
        if tag.startswith("O"):
            # TODO: We shouldn't be getting these malformed inputs. Fix this.
            if start is not None:
                start = None
            continue
        elif tag == "-":
            continue
        elif tag.startswith("I"):
            if start is None:
                raise ValueError(Errors.E067.format(tags=tags[: i + 1]))
            continue
        if tag.startswith("U"):
            entities.append((tag[2:], i, i))
        elif tag.startswith("B"):
            start = i
        elif tag.startswith("L"):
            entities.append((tag[2:], start, i))
            start = None
        else:
            raise ValueError(Errors.E068.format(tag=tag))
    return entities
--- a/spacy/gold/new_example.pxd
+++ b/spacy/gold/new_example.pxd
@ -0,0 +1,8 @@
 from ..tokens.doc cimport Doc
 from .align cimport Alignment
 cdef class NewExample:
    cdef readonly Doc x
    cdef readonly Doc y
    cdef readonly Alignment _alignment
--- a/spacy/gold/new_example.pyx
+++ b/spacy/gold/new_example.pyx
@ -0,0 +1,434 @@
 import numpy
 from ..tokens import Token
 from ..tokens.doc cimport Doc
 from ..attrs import IDS
 from .align cimport Alignment
 from .annotation import TokenAnnotation, DocAnnotation
 from .iob_utils import biluo_to_iob, biluo_tags_from_offsets, biluo_tags_from_doc
 from .align import Alignment
 from ..errors import Errors, AlignmentError
 cpdef Doc annotations2doc(Doc predicted, tok_annot, doc_annot):
    # TODO: Improve and test this
    words = tok_annot.get("ORTH", [tok.text for tok in predicted])
    attrs, array = _annot2array(predicted.vocab, tok_annot, doc_annot)
    output = Doc(predicted.vocab, words=words)
    if array.size:
        output = output.from_array(attrs, array)
    output.cats.update(doc_annot.get("cats", {}))
    return output
 cdef class NewExample:
    def __init__(self, Doc predicted, Doc reference, *, Alignment alignment=None):
        """ Doc can either be text, or an actual Doc """
        msg = "Example.__init__ got None for '{arg}'. Requires Doc."
        if predicted is None:
            raise TypeError(msg.format(arg="predicted"))
        if reference is None:
            raise TypeError(msg.format(arg="reference"))
        self.x = predicted
        self.y = reference
        self._alignment = alignment
    property predicted:
        def __get__(self):
            return self.x
        def __set__(self, doc):
            self.x = doc
    property reference:
        def __get__(self):
            return self.y
        def __set__(self, doc):
            self.y = doc
    @classmethod
    def from_dict(cls, Doc predicted, dict example_dict):
        if example_dict is None:
            raise ValueError("Example.from_dict expected dict, received None")
        if not isinstance(predicted, Doc):
            raise TypeError(f"Argument 1 should be Doc. Got {type(predicted)}")
        example_dict = _fix_legacy_dict_data(predicted, example_dict)
        tok_dict, doc_dict = _parse_example_dict_data(example_dict)
        return NewExample(
            predicted,
            annotations2doc(predicted, tok_dict, doc_dict)
        )
    @property
    def alignment(self):
        if self._alignment is None:
            if self.doc is None:
                return None
            spacy_words = [token.orth_ for token in self.predicted]
            gold_words = [token.orth_ for token in self.reference]
            if gold_words == []:
                gold_words = spacy_words
            self._alignment = Alignment(spacy_words, gold_words)
        return self._alignment
    def get_aligned(self, field):
        """Return an aligned array for a token attribute."""
        # TODO: This is probably wrong. I just bashed this out and there's probably
        # all sorts of edge-cases.
        alignment = self.alignment
        i2j_multi = alignment.i2j_multi
        gold_to_cand = alignment.gold_to_cand
        cand_to_gold = alignment.cand_to_gold
        gold_values = self.reference.to_array([field])
        output = []
        for i, gold_i in enumerate(cand_to_gold):
            if self.predicted[i].text.isspace():
                output.append(None)
            elif gold_i is None:
                if i in i2j_multi:
                    output.append(gold_values[i2j_multi[i]])
                else:
                    output.append(None)
            else:
                output.append(gold_values[gold_i])
        return output
    def to_dict(self):
        return {
            "doc_annotation": {
                "cats": dict(self.reference.cats),
                "links": [], # TODO
            },
            "token_annotation": {
                "ids": [t.i+1 for t in self.reference],
                "words": [t.text for t in self.reference],
                "tags": [t.tag_ for t in self.reference],
                "lemmas": [t.lemma_ for t in self.reference],
                "pos": [t.pos_ for t in self.reference],
                "morphs": [t.morph_ for t in self.reference],
                "heads": [t.head.i for t in self.reference],
                "deps": [t.dep_ for t in self.reference],
                "sent_starts": [int(bool(t.is_sent_start)) for t in self.reference],
                "entities": biluo_tags_from_doc(self.reference)
            }
        }
    def split_sents(self):
        """ Split the token annotations into multiple Examples based on
        sent_starts and return a list of the new Examples"""
        if not self.reference.is_sentenced:
            return [self]
        # TODO: Do this for misaligned somehow?
        predicted_words = [t.text for t in self.predicted]
        reference_words = [t.text for t in self.reference]
        if predicted_words != reference_words:
            raise NotImplementedError("TODO: Implement this")
        # Implement the easy case.
        output = []
        cls = self.__class__
        for sent in self.reference.sents:
            # I guess for misaligned we just need to use the gold_to_cand?
            output.append(
                cls(
                    self.predicted[sent.start : sent.end + 1].as_doc(),
                    sent.as_doc()
                )
            )
        return output
    def text(self):
        return self.x.text
 def _annot2array(vocab, tok_annot, doc_annot):
    attrs = []
    values = []
    for key, value in doc_annot.items():
        if key == "entities":
            words = tok_annot["ORTH"]
            ent_iobs, ent_types = _parse_ner_tags(vocab, words, value)
            tok_annot["ENT_IOB"] = ent_iobs
            tok_annot["ENT_TYPE"] = ent_types
        elif key == "links":
            entities = doc_annot.get("entities", {})
            if value and not entities:
                raise ValueError(Errors.E984)
            ent_kb_ids = _parse_links(vocab, words, value, entities)
            tok_annot["ENT_KB_ID"] = ent_kb_ids
        elif key == "cats":
            pass
        else:
            raise ValueError(f"Unknown doc attribute: {key}")
    for key, value in tok_annot.items():
        if key not in IDS:
            raise ValueError(f"Unknown token attribute: {key}")
        elif key == "ORTH":
            pass
        elif key == "HEAD":
            attrs.append(key)
            values.append([h-i for i, h in enumerate(value)])
        elif key == "SENT_START":
            attrs.append(key)
            values.append(value)
        elif key == "MORPH":
            attrs.append(key)
            values.append([vocab.morphology.add(v) for v in value])
        elif key == "ENT_IOB":
            iob_strings = Token.iob_strings()
            attrs.append(key)
            try:
                values.append([iob_strings.index(v) for v in value])
            except ValueError:
                raise ValueError(Errors.E985.format(values=iob_strings, value=values))
        else:
            attrs.append(key)
            values.append([vocab.strings.add(v) for v in value])
    array = numpy.asarray(values, dtype="uint64")
    return attrs, array.T
 def _parse_example_dict_data(example_dict):
    return (
        example_dict["token_annotation"],
        example_dict["doc_annotation"]
    )
 def _fix_legacy_dict_data(predicted, example_dict):
    token_dict = example_dict.get("token_annotation", {})
    doc_dict = example_dict.get("doc_annotation", {})
    for key, value in example_dict.items():
        if key in ("token_annotation", "doc_annotation"):
            pass
        elif key == "ids":
            pass
        elif key in ("cats", "links") and value:
            doc_dict[key] = value
        elif key in ("ner", "entities") and value:
            doc_dict["entities"] = value
        else:
            token_dict[key] = value
    # Remap keys
    remapping = {
        "words": "ORTH",
        "tags": "TAG",
        "pos": "POS",
        "lemmas": "LEMMA",
        "deps": "DEP",
        "heads": "HEAD",
        "sent_starts": "SENT_START",
        "morphs": "MORPH",
    }
    old_token_dict = token_dict
    token_dict = {}
    for key, value in old_token_dict.items():
        if key in ("text", "ids", "entities", "ner", "brackets"):
            pass
        elif key in remapping:
            token_dict[remapping[key]] = value
        else:
            raise ValueError(f"Unknown attr: {key}")
    if "HEAD" in token_dict and "SENT_START" in token_dict:
        # If heads are set, we don't also redundantly specify SENT_START.
        token_dict.pop("SENT_START")
    return {
        "token_annotation": token_dict,
        "doc_annotation": doc_dict
    }
 def _parse_ner_tags(vocab, words, biluo_or_offsets):
    if isinstance(biluo_or_offsets[0], (list, tuple)):
        # Convert to biluo if necessary
        # This is annoying but to convert the offsets we need a Doc
        # that has the target tokenization.
        reference = Doc(vocab, words=words)
        biluo = biluo_tags_from_offsets(reference, biluo_or_offsets)
    else:
        biluo = biluo_or_offsets
    ent_iobs = []
    ent_types = []
    for iob_tag in biluo_to_iob(biluo):
        ent_iobs.append(iob_tag.split("-")[0])
        if iob_tag.startswith("I") or iob_tag.startswith("B"):
            ent_types.append(iob_tag.split("-", 1)[1])
        else:
            ent_types.append("")
    return ent_iobs, ent_types
 def _parse_links(vocab, words, links, entities):
    reference = Doc(vocab, words=words)
    starts = {token.idx: token.i for token in reference}
    ends = {token.idx + len(token): token.i for token in reference}
    ent_kb_ids = ["" for _ in reference]
    entity_map = [(ent[0], ent[1]) for ent in entities]
    # links annotations need to refer 1-1 to entity annotations - throw error otherwise
    for index, annot_dict in links.items():
        start_char, end_char = index
        if (start_char, end_char) not in entity_map:
            raise ValueError(Errors.E984)
    for index, annot_dict in links.items():
        true_kb_ids = []
        for key, value in annot_dict.items():
            if value == 1.0:
                true_kb_ids.append(key)
        if len(true_kb_ids) > 1:
            raise ValueError(Errors.E983)
        if len(true_kb_ids) == 1:
            start_char, end_char = index
            start_token = starts.get(start_char)
            end_token = ends.get(end_char)
            for i in range(start_token, end_token+1):
                ent_kb_ids[i] = true_kb_ids[0]
    return ent_kb_ids
 class Example:
    def get_aligned(self, field):
        """Return an aligned array for a token annotation field."""
        if self.doc is None:
            return self.token_annotation.get_field(field)
        doc = self.doc
        if field == "word":
            return [token.orth_ for token in doc]
        gold_values = self.token_annotation.get_field(field)
        alignment = self.alignment
        i2j_multi = alignment.i2j_multi
        gold_to_cand = alignment.gold_to_cand
        cand_to_gold = alignment.cand_to_gold
        output = []
        for i, gold_i in enumerate(cand_to_gold):
            if doc[i].text.isspace():
                output.append(None)
            elif gold_i is None:
                if i in i2j_multi:
                    output.append(gold_values[i2j_multi[i]])
                else:
                    output.append(None)
            else:
                output.append(gold_values[gold_i])
        return output
    def split_sents(self):
        """ Split the token annotations into multiple Examples based on
        sent_starts and return a list of the new Examples"""
        if not self.token_annotation.words:
            return [self]
        s_ids, s_words, s_tags, s_pos, s_morphs = [], [], [], [], []
        s_lemmas, s_heads, s_deps, s_ents, s_sent_starts = [], [], [], [], []
        s_brackets = []
        sent_start_i = 0
        t = self.token_annotation
        split_examples = []
        for i in range(len(t.words)):
            if i > 0 and t.sent_starts[i] == 1:
                split_examples.append(
                    Example(
                        doc=Doc(self.doc.vocab, words=s_words),
                        token_annotation=TokenAnnotation(
                            ids=s_ids,
                            words=s_words,
                            tags=s_tags,
                            pos=s_pos,
                            morphs=s_morphs,
                            lemmas=s_lemmas,
                            heads=s_heads,
                            deps=s_deps,
                            entities=s_ents,
                            sent_starts=s_sent_starts,
                            brackets=s_brackets,
                        ),
                        doc_annotation=self.doc_annotation
                    )
                )
                s_ids, s_words, s_tags, s_pos, s_heads = [], [], [], [], []
                s_deps, s_ents, s_morphs, s_lemmas = [], [], [], []
                s_sent_starts, s_brackets = [], []
                sent_start_i = i
            s_ids.append(t.get_id(i))
            s_words.append(t.get_word(i))
            s_tags.append(t.get_tag(i))
            s_pos.append(t.get_pos(i))
            s_morphs.append(t.get_morph(i))
            s_lemmas.append(t.get_lemma(i))
            s_heads.append(t.get_head(i) - sent_start_i)
            s_deps.append(t.get_dep(i))
            s_ents.append(t.get_entity(i))
            s_sent_starts.append(t.get_sent_start(i))
            for b_end, b_label in t.brackets_by_start.get(i, []):
                s_brackets.append((i - sent_start_i, b_end - sent_start_i, b_label))
            i += 1
        split_examples.append(
            Example(
                doc=Doc(self.doc.vocab, words=s_words),
                token_annotation=TokenAnnotation(
                    ids=s_ids,
                    words=s_words,
                    tags=s_tags,
                    pos=s_pos,
                    morphs=s_morphs,
                    lemmas=s_lemmas,
                    heads=s_heads,
                    deps=s_deps,
                    entities=s_ents,
                    sent_starts=s_sent_starts,
                    brackets=s_brackets,
                ),
                doc_annotation=self.doc_annotation
            )
        )
        return split_examples
    @classmethod
    def to_example_objects(cls, examples, make_doc=None, keep_raw_text=False):
        """
        Return a list of Example objects, from a variety of input formats.
        make_doc needs to be provided when the examples contain text strings and keep_raw_text=False
        """
        if isinstance(examples, Example):
            return [examples]
        if isinstance(examples, tuple):
            examples = [examples]
        converted_examples = []
        for ex in examples:
            if isinstance(ex, Example):
                converted_examples.append(ex)
            # convert string to Doc to Example
            elif isinstance(ex, str):
                if keep_raw_text:
                    converted_examples.append(Example(doc=ex))
                else:
                    doc = make_doc(ex)
                    converted_examples.append(Example(doc=doc))
            # convert tuples to Example
            elif isinstance(ex, tuple) and len(ex) == 2:
                doc, gold = ex
                # convert string to Doc
                if isinstance(doc, str) and not keep_raw_text:
                    doc = make_doc(doc)
                converted_examples.append(Example.from_dict(gold, doc=doc))
            # convert Doc to Example
            elif isinstance(ex, Doc):
                converted_examples.append(Example(doc=ex))
            else:
                converted_examples.append(ex)
        return converted_examples
    def _deprecated_get_gold(self, make_projective=False):
        from ..syntax.gold_parse import get_parses_from_example
        _, gold = get_parses_from_example(self, make_projective=make_projective)[0]
        return gold
--- a/spacy/language.py
+++ b/spacy/language.py
@ -636,6 +636,7 @@ class Language(object):
        examples (iterable): `Example` objects.
        YIELDS (tuple): `Example` objects.
        """
        # TODO: This is deprecated right?
        for name, proc in self.pipeline:
            if hasattr(proc, "preprocess_gold"):
                examples = proc.preprocess_gold(examples)
@ -722,24 +723,26 @@ class Language(object):
        DOCS: https://spacy.io/api/language#evaluate
        """
-        examples = Example.to_example_objects(examples, make_doc=self.make_doc)
+        examples = Example.to_example_objects(examples)
        if scorer is None:
            scorer = Scorer(pipeline=self.pipeline)
        if component_cfg is None:
            component_cfg = {}
        docs = (eg.predicted for eg in examples)
        for name, pipe in self.pipeline:
            kwargs = component_cfg.get(name, {})
            kwargs.setdefault("batch_size", batch_size)
            if not hasattr(pipe, "pipe"):
-                examples = _pipe(examples, pipe, kwargs)
+                docs = _pipe(docs, pipe, kwargs)
            else:
-                examples = pipe.pipe(examples, as_example=True, **kwargs)
+                docs = pipe.pipe(docs, **kwargs)
-        for ex in examples:
+        for doc, eg in zip(docs, examples):
            if verbose:
                print(ex.doc)
            eg.predicted = doc
            kwargs = component_cfg.get("scorer", {})
            kwargs.setdefault("verbose", verbose)
-            scorer.score(ex, **kwargs)
+            scorer.score(eg, **kwargs)
        return scorer
    @contextmanager
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -51,9 +51,9 @@ class Morphologizer(Tagger):
    def begin_training(self, get_examples=lambda: [], pipeline=None, sgd=None,
                       **kwargs):
        for example in get_examples():
-            for i, morph in enumerate(example.token_annotation.morphs):
+            for i, token in enumerate(example.reference):
-                pos = example.token_annotation.get_pos(i)
+                pos = token.pos_
-                morph = Morphology.feats_to_dict(morph)
+                morph = token.morph
                norm_morph = self.vocab.strings[self.vocab.morphology.add(morph)]
                if pos:
                    morph["POS"] = pos
@ -92,7 +92,7 @@ class Morphologizer(Tagger):
        guesses = scores.argmax(axis=1)
        known_labels = numpy.ones((scores.shape[0], 1), dtype="f")
        for ex in examples:
-            gold = ex.gold
+            gold = ex._deprecated_get_gold()
            for i in range(len(gold.morphs)):
                pos = gold.pos[i] if i < len(gold.pos) else ""
                morph = gold.morphs[i]
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@ -20,7 +20,7 @@ from .defaults import default_nel, default_senter
 from .functions import merge_subtokens
 from ..language import Language, component
 from ..syntax import nonproj
-from ..gold import Example
+from ..gold.new_example import NewExample as Example
 from ..attrs import POS, ID
 from ..util import link_vectors_to_models, create_default_optimizer
 from ..parts_of_speech import X
@ -48,55 +48,38 @@ class Pipe(object):
    def from_nlp(cls, nlp, model, **cfg):
        return cls(nlp.vocab, model, **cfg)
    def _get_doc(self, example):
        """ Use this method if the `example` can be both a Doc or an Example """
        if isinstance(example, Doc):
            return example
        return example.doc
    def __init__(self, vocab, model, **cfg):
        """Create a new pipe instance."""
        raise NotImplementedError
-    def __call__(self, example):
+    def __call__(self, Doc doc):
        """Apply the pipe to one document. The document is
        modified in-place, and returned.
        Both __call__ and pipe should delegate to the `predict()`
        and `set_annotations()` methods.
        """
        doc = self._get_doc(example)
        predictions = self.predict([doc])
        if isinstance(predictions, tuple) and len(predictions) == 2:
            scores, tensors = predictions
            self.set_annotations([doc], scores, tensors=tensors)
        else:
            self.set_annotations([doc], predictions)
        if isinstance(example, Example):
            example.doc = doc
            return example
        return doc
-    def pipe(self, stream, batch_size=128, n_threads=-1, as_example=False):
+    def pipe(self, stream, batch_size=128, n_threads=-1):
        """Apply the pipe to a stream of documents.
        Both __call__ and pipe should delegate to the `predict()`
        and `set_annotations()` methods.
        """
-        for examples in util.minibatch(stream, size=batch_size):
+        for docs in util.minibatch(stream, size=batch_size):
            docs = [self._get_doc(ex) for ex in examples]
            predictions = self.predict(docs)
            if isinstance(predictions, tuple) and len(tuple) == 2:
                scores, tensors = predictions
                self.set_annotations(docs, scores, tensors=tensors)
            else:
                self.set_annotations(docs, predictions)
            if as_example:
                for ex, doc in zip(examples, docs):
                    ex.doc = doc
                    yield ex
            else:
            yield from docs
    def predict(self, docs):
@ -109,14 +92,13 @@ class Pipe(object):
        """Modify a batch of documents, using pre-computed scores."""
        raise NotImplementedError
-    def update(self, examples, set_annotations=False, drop=0.0, sgd=None, losses=None):
+    def update(self, docs, set_annotations=False, drop=0.0, sgd=None, losses=None):
        """Learn from a batch of documents and gold-standard information,
        updating the pipe's model.
        Delegates to predict() and get_loss().
        """
        if set_annotations:
            docs = (self._get_doc(ex) for ex in examples)
            docs = list(self.pipe(docs))
    def rehearse(self, examples, sgd=None, losses=None, **config):
@ -255,28 +237,15 @@ class Tagger(Pipe):
    def labels(self):
        return tuple(self.vocab.morphology.tag_names)
-    def __call__(self, example):
+    def __call__(self, doc):
        doc = self._get_doc(example)
        tags = self.predict([doc])
        self.set_annotations([doc], tags)
        if isinstance(example, Example):
            example.doc = doc
            return example
        return doc
-    def pipe(self, stream, batch_size=128, n_threads=-1, as_example=False):
+    def pipe(self, stream, batch_size=128, n_threads=-1):
-        for examples in util.minibatch(stream, size=batch_size):
+        for docs in util.minibatch(stream, size=batch_size):
            docs = [self._get_doc(ex) for ex in examples]
            tag_ids = self.predict(docs)
            assert len(docs) == len(examples)
            assert len(tag_ids) == len(examples)
            self.set_annotations(docs, tag_ids)
            if as_example:
                for ex, doc in zip(examples, docs):
                    ex.doc = doc
                    yield ex
            else:
            yield from docs
    def predict(self, docs):
@ -327,15 +296,17 @@ class Tagger(Pipe):
            doc.is_tagged = True
    def update(self, examples, drop=0., sgd=None, losses=None, set_annotations=False):
-        examples = Example.to_example_objects(examples)
+        for eg in examples:
            assert isinstance(eg, Example)
        if losses is not None and self.name not in losses:
            losses[self.name] = 0.
-        if not any(len(ex.doc) if ex.doc else 0 for ex in examples):
+        if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
            # Handle cases where there are no tokens in any docs.
            return
        set_dropout_rate(self.model, drop)
-        tag_scores, bp_tag_scores = self.model.begin_update([ex.doc for ex in examples])
+        tag_scores, bp_tag_scores = self.model.begin_update(
            [eg.predicted for eg in examples])
        for sc in tag_scores:
            if self.model.ops.xp.isnan(sc.sum()):
                raise ValueError("nan value in scores")
@ -347,17 +318,16 @@ class Tagger(Pipe):
        if losses is not None:
            losses[self.name] += loss
        if set_annotations:
-            docs = [ex.doc for ex in examples]
+            docs = [eg.predicted for eg in examples]
            self.set_annotations(docs, self._scores2guesses(tag_scores))
    def rehearse(self, examples, drop=0., sgd=None, losses=None):
        """Perform a 'rehearsal' update, where we try to match the output of
        an initial model.
        """
        docs = [eg.predicted for eg in examples]
        if self._rehearsal_model is None:
            return
        examples = Example.to_example_objects(examples)
        docs = [ex.doc for ex in examples]
        if not any(len(doc) for doc in docs):
            # Handle cases where there are no tokens in any docs.
            return
@ -373,7 +343,7 @@ class Tagger(Pipe):
    def get_loss(self, examples, scores):
        loss_func = SequenceCategoricalCrossentropy(names=self.labels)
-        truths = [eg.gold.tags for eg in examples]
+        truths = [eg.get_aligned("tag") for eg in examples]
        d_scores, loss = loss_func(scores, truths)
        if self.model.ops.xp.isnan(loss):
            raise ValueError("nan value when computing loss")
@ -387,7 +357,8 @@ class Tagger(Pipe):
        orig_tag_map = dict(self.vocab.morphology.tag_map)
        new_tag_map = {}
        for example in get_examples():
-            for tag in example.token_annotation.tags:
+            for token in example.y:
                tag = token.tag_
                if tag in orig_tag_map:
                    new_tag_map[tag] = orig_tag_map[tag]
                else:
@ -560,9 +531,9 @@ class SentenceRecognizer(Tagger):
        correct = numpy.zeros((scores.shape[0],), dtype="i")
        guesses = scores.argmax(axis=1)
        known_labels = numpy.ones((scores.shape[0], 1), dtype="f")
-        for ex in examples:
+        for eg in examples:
-            gold = ex.gold
+            sent_starts = eg.get_aligned("sent_start")
-            for sent_start in gold.sent_starts:
+            for sent_start in sent_starts:
                if sent_start is None:
                    correct[idx] = guesses[idx]
                elif sent_start in tag_index:
@ -575,7 +546,7 @@ class SentenceRecognizer(Tagger):
        d_scores = scores - to_categorical(correct, n_classes=scores.shape[1])
        d_scores *= self.model.ops.asarray(known_labels)
        loss = (d_scores**2).sum()
-        docs = [ex.doc for ex in examples]
+        docs = [eg.predicted for eg in examples]
        d_scores = self.model.ops.unflatten(d_scores, [len(d) for d in docs])
        return float(loss), d_scores
@ -686,8 +657,8 @@ class MultitaskObjective(Tagger):
        gold_examples = nonproj.preprocess_training_data(get_examples())
        # for raw_text, doc_annot in gold_tuples:
        for example in gold_examples:
-            for i in range(len(example.token_annotation.ids)):
+            for token in example.y:
-                label = self.make_label(i, example.token_annotation)
+                label = self.make_label(token)
                if label is not None and label not in self.labels:
                    self.labels[label] = len(self.labels)
        self.model.initialize()
@ -705,13 +676,13 @@ class MultitaskObjective(Tagger):
        cdef int idx = 0
        correct = numpy.zeros((scores.shape[0],), dtype="i")
        guesses = scores.argmax(axis=1)
-        golds = [ex.gold for ex in examples]
+        docs = [eg.predicted for eg in examples]
-        docs = [ex.doc for ex in examples]
+        for i, eg in enumerate(examples):
-        for i, gold in enumerate(golds):
+            # Handles alignment for tokenization differences
-            for j in range(len(docs[i])):
+            doc_annots = eg.get_aligned()
-                # Handels alignment for tokenization differences
+            for j in range(len(eg.predicted)):
-                token_annotation = gold.get_token_annotation()
+                tok_annots = {key: values[j] for key, values in tok_annots.items()}
-                label = self.make_label(j, token_annotation)
+                label = self.make_label(j, tok_annots)
                if label is None or label not in self.labels:
                    correct[idx] = guesses[idx]
                else:
@ -723,83 +694,49 @@ class MultitaskObjective(Tagger):
        return float(loss), d_scores
    @staticmethod
-    def make_dep(i, token_annotation):
+    def make_dep(token):
-        if token_annotation.deps[i] is None or token_annotation.heads[i] is None:
+        return token.dep_
            return None
        return token_annotation.deps[i]
    @staticmethod
-    def make_tag(i, token_annotation):
+    def make_tag(token):
-        return token_annotation.tags[i]
+        return token.tag_
    @staticmethod
-    def make_ent(i, token_annotation):
+    def make_ent(token):
-        if token_annotation.entities is None:
+        if token.ent_iob_ == "O":
-            return None
+            return "O"
-        return token_annotation.entities[i]
+        else:
            return token.ent_iob_ + "-" + token.ent_type_
    @staticmethod
-    def make_dep_tag_offset(i, token_annotation):
+    def make_dep_tag_offset(token):
-        if token_annotation.deps[i] is None or token_annotation.heads[i] is None:
+        dep = token.dep_
-            return None
+        tag = token.tag_
-        offset = token_annotation.heads[i] - i
+        offset = token.head.i - token.i
        offset = min(offset, 2)
        offset = max(offset, -2)
-        return f"{token_annotation.deps[i]}-{token_annotation.tags[i]}:{offset}"
+        return f"{dep}-{tag}:{offset}"
    @staticmethod
-    def make_ent_tag(i, token_annotation):
+    def make_ent_tag(token):
-        if token_annotation.entities is None or token_annotation.entities[i] is None:
+        if token.ent_iob_ == "O":
-            return None
+            ent = "O"
        else:
-            return f"{token_annotation.tags[i]}-{token_annotation.entities[i]}"
+            ent = token.ent_iob_ + "-" + token.ent_type_
        tag = token.tag_
        return f"{tag}-{ent}"
    @staticmethod
-    def make_sent_start(target, token_annotation, cache=True, _cache={}):
+    def make_sent_start(token):
        """A multi-task objective for representing sentence boundaries,
        using BILU scheme. (O is impossible)
        The implementation of this method uses an internal cache that relies
        on the identity of the heads array, to avoid requiring a new piece
        of gold data. You can pass cache=False if you know the cache will
        do the wrong thing.
        """
-        words = token_annotation.words
+        if token.is_sent_start and token.is_sent_end:
-        heads = token_annotation.heads
+            return "U-SENT"
-        assert len(words) == len(heads)
+        elif token.is_sent_start:
-        assert target < len(words), (target, len(words))
+            return "B-SENT"
        if cache:
            if id(heads) in _cache:
                return _cache[id(heads)][target]
        else:
-                for key in list(_cache.keys()):
+            return "I-SENT"
                    _cache.pop(key)
            sent_tags = ["I-SENT"] * len(words)
            _cache[id(heads)] = sent_tags
        else:
            sent_tags = ["I-SENT"] * len(words)
        def _find_root(child):
            seen = set([child])
            while child is not None and heads[child] != child:
                seen.add(child)
                child = heads[child]
            return child
        sentences = {}
        for i in range(len(words)):
            root = _find_root(i)
            if root is None:
                sent_tags[i] = None
            else:
                sentences.setdefault(root, []).append(i)
        for root, span in sorted(sentences.items()):
            if len(span) == 1:
                sent_tags[span[0]] = "U-SENT"
            else:
                sent_tags[span[0]] = "B-SENT"
                sent_tags[span[-1]] = "L-SENT"
        return sent_tags[target]
 class ClozeMultitask(Pipe):
@ -832,7 +769,7 @@ class ClozeMultitask(Pipe):
        # token.vector values, but that's a bit inefficient, especially on GPU.
        # Instead we fetch the index into the vectors table for each of our tokens,
        # and look them up all at once. This prevents data copying.
-        ids = self.model.ops.flatten([ex.doc.to_array(ID).ravel() for ex in examples])
+        ids = self.model.ops.flatten([eg.predicted.to_array(ID).ravel() for eg in examples])
        target = vectors[ids]
        gradient = self.distance.get_grad(prediction, target)
        loss = self.distance.get_loss(prediction, target)
@ -842,11 +779,12 @@ class ClozeMultitask(Pipe):
        pass
    def rehearse(self, examples, drop=0., sgd=None, losses=None):
        examples = Example.to_example_objects(examples)
        if losses is not None and self.name not in losses:
            losses[self.name] = 0.
        docs = [eg.predicted for eg in examples]
        set_dropout_rate(self.model, drop)
-        predictions, bp_predictions = self.model.begin_update([ex.doc for ex in examples])
+        predictions, bp_predictions = self.model.begin_update(
            [eg.predicted for eg in examples])
        loss, d_predictions = self.get_loss(examples, self.vocab.vectors.data, predictions)
        bp_predictions(d_predictions)
        if sgd is not None:
@ -881,17 +819,10 @@ class TextCategorizer(Pipe):
    def labels(self, value):
        self.cfg["labels"] = tuple(value)
-    def pipe(self, stream, batch_size=128, n_threads=-1, as_example=False):
+    def pipe(self, stream, batch_size=128, n_threads=-1):
-        for examples in util.minibatch(stream, size=batch_size):
+        for docs in util.minibatch(stream, size=batch_size):
            docs = [self._get_doc(ex) for ex in examples]
            scores, tensors = self.predict(docs)
            self.set_annotations(docs, scores, tensors=tensors)
            if as_example:
                for ex, doc in zip(examples, docs):
                    ex.doc = doc
                    yield ex
            else:
            yield from docs
    def predict(self, docs):
@ -913,12 +844,15 @@ class TextCategorizer(Pipe):
                doc.cats[label] = float(scores[i, j])
    def update(self, examples, state=None, drop=0., set_annotations=False, sgd=None, losses=None):
-        examples = Example.to_example_objects(examples)
+        for eg in examples:
-        if not any(len(ex.doc) if ex.doc else 0 for ex in examples):
+            assert isinstance(eg, Example)
        if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
            # Handle cases where there are no tokens in any docs.
            return
        set_dropout_rate(self.model, drop)
-        scores, bp_scores = self.model.begin_update([ex.doc for ex in examples])
+        scores, bp_scores = self.model.begin_update(
            [eg.predicted for eg in examples]
        )
        loss, d_scores = self.get_loss(examples, scores)
        bp_scores(d_scores)
        if sgd is not None:
@ -927,14 +861,15 @@ class TextCategorizer(Pipe):
            losses.setdefault(self.name, 0.0)
            losses[self.name] += loss
        if set_annotations:
-            docs = [ex.doc for ex in examples]
+            docs = [eg.predicted for eg in examples]
            self.set_annotations(docs, scores=scores)
    def rehearse(self, examples, drop=0., sgd=None, losses=None):
        if self._rehearsal_model is None:
            return
-        examples = Example.to_example_objects(examples)
+        for eg in examples:
-        docs=[ex.doc for ex in examples]
+            assert isinstance(eg, Example)
        docs = [eg.predicted for eg in examples]
        if not any(len(doc) for doc in docs):
            # Handle cases where there are no tokens in any docs.
            return
@ -950,13 +885,12 @@ class TextCategorizer(Pipe):
            losses[self.name] += (gradient**2).sum()
    def _examples_to_truth(self, examples):
-        gold_cats = [ex.doc_annotation.cats for ex in examples]
+        truths = numpy.zeros((len(examples), len(self.labels)), dtype="f")
-        truths = numpy.zeros((len(gold_cats), len(self.labels)), dtype="f")
+        not_missing = numpy.ones((len(examples), len(self.labels)), dtype="f")
-        not_missing = numpy.ones((len(gold_cats), len(self.labels)), dtype="f")
+        for i, eg in enumerate(examples):
        for i, gold_cat in enumerate(gold_cats):
            for j, label in enumerate(self.labels):
-                if label in gold_cat:
+                if label in eg.predicted.cats:
-                    truths[i, j] = gold_cat[label]
+                    truths[i, j] = eg.reference.cats[label]
                else:
                    not_missing[i, j] = 0.
        truths = self.model.ops.asarray(truths)
@ -993,7 +927,7 @@ class TextCategorizer(Pipe):
        # TODO: begin_training is not guaranteed to see all data / labels ?
        examples = list(get_examples())
        for example in examples:
-            for cat in example.doc_annotation.cats:
+            for cat in example.y.cats:
                self.add_label(cat)
        self.require_labels()
        docs = [Doc(Vocab(), words=["hello"])]
@ -1150,21 +1084,22 @@ class EntityLinker(Pipe):
            losses.setdefault(self.name, 0.0)
        if not examples:
            return 0
-        examples = Example.to_example_objects(examples)
+        for eg in examples:
            assert isinstance(eg, Example)
        sentence_docs = []
-        docs = [ex.doc for ex in examples]
+        docs = [eg.predicted for eg in examples]
        if set_annotations:
            # This seems simpler than other ways to get that exact output -- but
            # it does run the model twice :(
            predictions = self.model.predict(docs)
        golds = [ex.gold for ex in examples]
-        for doc, gold in zip(docs, golds):
+        for eg in examples:
            doc = eg.predicted
            ents_by_offset = dict()
            for ent in doc.ents:
                ents_by_offset[(ent.start_char, ent.end_char)] = ent
-
+            links = self._get_links_from_doc(eg.reference)
-            for entity, kb_dict in gold.links.items():
+            for entity, kb_dict in links.items():
                if isinstance(entity, str):
                    entity = literal_eval(entity)
                start, end = entity
@ -1185,7 +1120,10 @@ class EntityLinker(Pipe):
                            raise RuntimeError(Errors.E030)
        set_dropout_rate(self.model, drop)
        sentence_encodings, bp_context = self.model.begin_update(sentence_docs)
-        loss, d_scores = self.get_similarity_loss(scores=sentence_encodings, golds=golds)
+        loss, d_scores = self.get_similarity_loss(
            scores=sentence_encodings,
            examples=examples
        )
        bp_context(d_scores)
        if sgd is not None:
            self.model.finish_update(sgd)
@ -1196,10 +1134,11 @@ class EntityLinker(Pipe):
            self.set_annotations(docs, predictions)
        return loss
-    def get_similarity_loss(self, golds, scores):
+    def get_similarity_loss(self, examples, scores):
        entity_encodings = []
-        for gold in golds:
+        for eg in examples:
-            for entity, kb_dict in gold.links.items():
+            links = self._get_links_from_doc(eg.reference)
            for entity, kb_dict in links.items():
                for kb_id, value in kb_dict.items():
                    # this loss function assumes we're only using positive examples
                    if value:
@ -1218,8 +1157,9 @@ class EntityLinker(Pipe):
    def get_loss(self, examples, scores):
        cats = []
-        for ex in examples:
+        for eg in examples:
-            for entity, kb_dict in ex.gold.links.items():
+            links = self._get_links_from_doc(eg.reference)
            for entity, kb_dict in links.items():
                for kb_id, value in kb_dict.items():
                    cats.append([value])
@ -1232,26 +1172,18 @@ class EntityLinker(Pipe):
        loss = loss / len(cats)
        return loss, d_scores
-    def __call__(self, example):
+    def _get_links_from_doc(self, doc):
-        doc = self._get_doc(example)
+        return {}
    def __call__(self, doc):
        kb_ids, tensors = self.predict([doc])
        self.set_annotations([doc], kb_ids, tensors=tensors)
        if isinstance(example, Example):
            example.doc = doc
            return example
        return doc
-    def pipe(self, stream, batch_size=128, n_threads=-1, as_example=False):
+    def pipe(self, stream, batch_size=128, n_threads=-1):
-        for examples in util.minibatch(stream, size=batch_size):
+        for docs in util.minibatch(stream, size=batch_size):
            docs = [self._get_doc(ex) for ex in examples]
            kb_ids, tensors = self.predict(docs)
            self.set_annotations(docs, kb_ids, tensors=tensors)
            if as_example:
                for ex, doc in zip(examples, docs):
                    ex.doc = doc
                    yield ex
            else:
            yield from docs
    def predict(self, docs):
@ -1428,7 +1360,7 @@ class Sentencizer(Pipe):
    ):
        pass
-    def __call__(self, example):
+    def __call__(self, doc):
        """Apply the sentencizer to a Doc and set Token.is_sent_start.
        example (Doc or Example): The document to process.
@ -1436,7 +1368,6 @@ class Sentencizer(Pipe):
        DOCS: https://spacy.io/api/sentencizer#call
        """
        doc = self._get_doc(example)
        start = 0
        seen_period = False
        for i, token in enumerate(doc):
@ -1450,25 +1381,16 @@ class Sentencizer(Pipe):
                seen_period = True
        if start < len(doc):
            doc[start].is_sent_start = True
        if isinstance(example, Example):
            example.doc = doc
            return example
        return doc
-    def pipe(self, stream, batch_size=128, n_threads=-1, as_example=False):
+    def pipe(self, stream, batch_size=128, n_threads=-1):
-        for examples in util.minibatch(stream, size=batch_size):
+        for docs in util.minibatch(stream, size=batch_size):
            docs = [self._get_doc(ex) for ex in examples]
            predictions = self.predict(docs)
            if isinstance(predictions, tuple) and len(tuple) == 2:
                scores, tensors = predictions
                self.set_annotations(docs, scores, tensors=tensors)
            else:
                self.set_annotations(docs, predictions)
            if as_example:
                for ex, doc in zip(examples, docs):
                    ex.doc = doc
                    yield ex
            else:
            yield from docs
    def predict(self, docs):
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@ -286,7 +286,7 @@ class Scorer(object):
        if isinstance(example, tuple) and len(example) == 2:
            doc, gold = example
        else:
-            gold = example.gold
+            gold = example._deprecated_get_gold()
            doc = example.doc
        if len(doc) != len(gold):
--- a/spacy/syntax/arc_eager.pxd
+++ b/spacy/syntax/arc_eager.pxd
@ -3,7 +3,7 @@ from cymem.cymem cimport Pool
 from .stateclass cimport StateClass
 from ..typedefs cimport weight_t, attr_t
 from .transition_system cimport TransitionSystem, Transition
-from ..gold cimport GoldParseC
+from .gold_parse cimport GoldParseC
 cdef class ArcEager(TransitionSystem):
--- a/spacy/syntax/gold_parse.pxd
+++ b/spacy/syntax/gold_parse.pxd
@ -0,0 +1,39 @@
 from cymem.cymem cimport Pool
 from .transition_system cimport Transition
 from ..typedefs cimport attr_t
 cdef struct GoldParseC:
    int* tags
    int* heads
    int* has_dep
    int* sent_start
    attr_t* labels
    int** brackets
    Transition* ner
 cdef class GoldParse:
    cdef Pool mem
    cdef GoldParseC c
    cdef readonly object orig
    cdef int length
    cdef public int loss
    cdef public list words
    cdef public list tags
    cdef public list pos
    cdef public list morphs
    cdef public list lemmas
    cdef public list sent_starts
    cdef public list heads
    cdef public list labels
    cdef public dict orths
    cdef public list ner
    cdef public dict brackets
    cdef public dict cats
    cdef public dict links
    cdef readonly list cand_to_gold
    cdef readonly list gold_to_cand
--- a/spacy/syntax/gold_parse.pyx
+++ b/spacy/syntax/gold_parse.pyx
@ -0,0 +1,346 @@
 # cython: profile=True
 import re
 import random
 import numpy
 import tempfile
 import shutil
 import itertools
 from pathlib import Path
 import srsly
 import warnings
 from .. import util
 from . import nonproj
 from ..tokens import Doc, Span
 from ..errors import Errors, AlignmentError, Warnings
 from ..gold.annotation import TokenAnnotation
 from ..gold.iob_utils import offsets_from_biluo_tags, biluo_tags_from_offsets
 from ..gold.align import align
 punct_re = re.compile(r"\W")
 def is_punct_label(label):
    return label == "P" or label.lower() == "punct"
 def get_parses_from_example(
    example, merge=True, vocab=None, make_projective=True, ignore_misaligned=False
 ):
    """Return a list of (doc, GoldParse) objects.
    If merge is set to True, keep all Token annotations as one big list."""
    # merge == do not modify Example
    if merge:
        examples = [example]
    else:
        # not merging: one GoldParse per sentence, defining docs with the words
        # from each sentence
        examples = example.split_sents()
    outputs = []
    for eg in examples:
        eg_dict = eg.to_dict()
        try:
            gp = GoldParse.from_annotation(
                eg.predicted,
                eg_dict["doc_annotation"],
                eg_dict["token_annotation"],
                make_projective=make_projective
            )
        except AlignmentError:
            if ignore_misaligned:
                gp = None
            else:
                raise
        outputs.append((eg.predicted, gp))
    return outputs
 cdef class GoldParse:
    """Collection for training annotations.
    DOCS: https://spacy.io/api/goldparse
    """
    @classmethod
    def from_annotation(cls, doc, doc_annotation, token_annotation, make_projective=False):
        return cls(
            doc,
            words=token_annotation["words"],
            tags=token_annotation["tags"],
            pos=token_annotation["pos"],
            morphs=token_annotation["morphs"],
            lemmas=token_annotation["lemmas"],
            heads=token_annotation["heads"],
            deps=token_annotation["deps"],
            entities=token_annotation["entities"],
            sent_starts=token_annotation["sent_starts"],
            cats=doc_annotation["cats"],
            links=doc_annotation["links"],
            make_projective=make_projective
        )
    def get_token_annotation(self):
        ids = None
        if self.words:
            ids = list(range(len(self.words)))
        return TokenAnnotation(ids=ids, words=self.words, tags=self.tags,
                               pos=self.pos, morphs=self.morphs,
                               lemmas=self.lemmas, heads=self.heads,
                               deps=self.labels, entities=self.ner,
                               sent_starts=self.sent_starts)
    def __init__(self, doc, words=None, tags=None, pos=None, morphs=None,
                 lemmas=None, heads=None, deps=None, entities=None,
                 sent_starts=None, make_projective=False, cats=None,
                 links=None):
        """Create a GoldParse. The fields will not be initialized if len(doc) is zero.
        doc (Doc): The document the annotations refer to.
        words (iterable): A sequence of unicode word strings.
        tags (iterable): A sequence of strings, representing tag annotations.
        pos (iterable): A sequence of strings, representing UPOS annotations.
        morphs (iterable): A sequence of strings, representing morph
            annotations.
        lemmas (iterable): A sequence of strings, representing lemma
            annotations.
        heads (iterable): A sequence of integers, representing syntactic
            head offsets.
        deps (iterable): A sequence of strings, representing the syntactic
            relation types.
        entities (iterable): A sequence of named entity annotations, either as
            BILUO tag strings, or as `(start_char, end_char, label)` tuples,
            representing the entity positions.
        sent_starts (iterable): A sequence of sentence position tags, 1 for
            the first word in a sentence, 0 for all others.
        cats (dict): Labels for text classification. Each key in the dictionary
            may be a string or an int, or a `(start_char, end_char, label)`
            tuple, indicating that the label is applied to only part of the
            document (usually a sentence). Unlike entity annotations, label
            annotations can overlap, i.e. a single word can be covered by
            multiple labelled spans. The TextCategorizer component expects
            true examples of a label to have the value 1.0, and negative
            examples of a label to have the value 0.0. Labels not in the
            dictionary are treated as missing - the gradient for those labels
            will be zero.
        links (dict): A dict with `(start_char, end_char)` keys,
            and the values being dicts with kb_id:value entries,
            representing the external IDs in a knowledge base (KB)
            mapped to either 1.0 or 0.0, indicating positive and
            negative examples respectively.
        RETURNS (GoldParse): The newly constructed object.
        """
        self.mem = Pool()
        self.loss = 0
        self.length = len(doc)
        self.cats = {} if cats is None else dict(cats)
        self.links = {} if links is None else dict(links)
        # temporary doc for aligning entity annotation
        entdoc = None
        # avoid allocating memory if the doc does not contain any tokens
        if self.length == 0:
            self.words = []
            self.tags = []
            self.heads = []
            self.labels = []
            self.ner = []
            self.morphs = []
            # set a minimal orig so that the scorer can score an empty doc
            self.orig = TokenAnnotation(ids=[])
        else:
            if not words:
                words = [token.text for token in doc]
            if not tags:
                tags = [None for _ in words]
            if not pos:
                pos = [None for _ in words]
            if not morphs:
                morphs = [None for _ in words]
            if not lemmas:
                lemmas = [None for _ in words]
            if not heads:
                heads = [None for _ in words]
            if not deps:
                deps = [None for _ in words]
            if not sent_starts:
                sent_starts = [None for _ in words]
            if entities is None:
                entities = ["-" for _ in words]
            elif len(entities) == 0:
                entities = ["O" for _ in words]
            else:
                # Translate the None values to '-', to make processing easier.
                # See Issue #2603
                entities = [(ent if ent is not None else "-") for ent in entities]
                if not isinstance(entities[0], str):
                    # Assume we have entities specified by character offset.
                    # Create a temporary Doc corresponding to provided words
                    # (to preserve gold tokenization) and text (to preserve
                    # character offsets).
                    entdoc_words, entdoc_spaces = util.get_words_and_spaces(words, doc.text)
                    entdoc = Doc(doc.vocab, words=entdoc_words, spaces=entdoc_spaces)
                    entdoc_entities = biluo_tags_from_offsets(entdoc, entities)
                    # There may be some additional whitespace tokens in the
                    # temporary doc, so check that the annotations align with
                    # the provided words while building a list of BILUO labels.
                    entities = []
                    words_offset = 0
                    for i in range(len(entdoc_words)):
                        if words[i + words_offset] == entdoc_words[i]:
                            entities.append(entdoc_entities[i])
                        else:
                            words_offset -= 1
                    if len(entities) != len(words):
                        warnings.warn(Warnings.W029.format(text=doc.text))
                        entities = ["-" for _ in words]
            # These are filled by the tagger/parser/entity recogniser
            self.c.tags = <int*>self.mem.alloc(len(doc), sizeof(int))
            self.c.heads = <int*>self.mem.alloc(len(doc), sizeof(int))
            self.c.labels = <attr_t*>self.mem.alloc(len(doc), sizeof(attr_t))
            self.c.has_dep = <int*>self.mem.alloc(len(doc), sizeof(int))
            self.c.sent_start = <int*>self.mem.alloc(len(doc), sizeof(int))
            self.c.ner = <Transition*>self.mem.alloc(len(doc), sizeof(Transition))
            self.words = [None] * len(doc)
            self.tags = [None] * len(doc)
            self.pos = [None] * len(doc)
            self.morphs = [None] * len(doc)
            self.lemmas = [None] * len(doc)
            self.heads = [None] * len(doc)
            self.labels = [None] * len(doc)
            self.ner = [None] * len(doc)
            self.sent_starts = [None] * len(doc)
            # This needs to be done before we align the words
            if make_projective and any(heads) and any(deps) :
                heads, deps = nonproj.projectivize(heads, deps)
            # Do many-to-one alignment for misaligned tokens.
            # If we over-segment, we'll have one gold word that covers a sequence
            # of predicted words
            # If we under-segment, we'll have one predicted word that covers a
            # sequence of gold words.
            # If we "mis-segment", we'll have a sequence of predicted words covering
            # a sequence of gold words. That's many-to-many -- we don't do that
            # except for NER spans where the start and end can be aligned.
            cost, i2j, j2i, i2j_multi, j2i_multi = align([t.orth_ for t in doc], words)
            self.cand_to_gold = [(j if j >= 0 else None) for j in i2j]
            self.gold_to_cand = [(i if i >= 0 else None) for i in j2i]
            self.orig = TokenAnnotation(ids=list(range(len(words))),
                    words=words, tags=tags, pos=pos, morphs=morphs,
                    lemmas=lemmas, heads=heads, deps=deps, entities=entities,
                    sent_starts=sent_starts, brackets=[])
            for i, gold_i in enumerate(self.cand_to_gold):
                if doc[i].text.isspace():
                    self.words[i] = doc[i].text
                    self.tags[i] = "_SP"
                    self.pos[i] = "SPACE"
                    self.morphs[i] = None
                    self.lemmas[i] = None
                    self.heads[i] = None
                    self.labels[i] = None
                    self.ner[i] = None
                    self.sent_starts[i] = 0
                if gold_i is None:
                    if i in i2j_multi:
                        self.words[i] = words[i2j_multi[i]]
                        self.tags[i] = tags[i2j_multi[i]]
                        self.pos[i] = pos[i2j_multi[i]]
                        self.morphs[i] = morphs[i2j_multi[i]]
                        self.lemmas[i] = lemmas[i2j_multi[i]]
                        self.sent_starts[i] = sent_starts[i2j_multi[i]]
                        is_last = i2j_multi[i] != i2j_multi.get(i+1)
                        # Set next word in multi-token span as head, until last
                        if not is_last:
                            self.heads[i] = i+1
                            self.labels[i] = "subtok"
                        else:
                            head_i = heads[i2j_multi[i]]
                            if head_i:
                                self.heads[i] = self.gold_to_cand[head_i]
                            self.labels[i] = deps[i2j_multi[i]]
                        ner_tag = entities[i2j_multi[i]]
                        # Assign O/- for many-to-one O/- NER tags
                        if ner_tag in ("O", "-"):
                             self.ner[i] = ner_tag
                else:
                    self.words[i] = words[gold_i]
                    self.tags[i] = tags[gold_i]
                    self.pos[i] = pos[gold_i]
                    self.morphs[i] = morphs[gold_i]
                    self.lemmas[i] = lemmas[gold_i]
                    self.sent_starts[i] = sent_starts[gold_i]
                    if heads[gold_i] is None:
                        self.heads[i] = None
                    else:
                        self.heads[i] = self.gold_to_cand[heads[gold_i]]
                    self.labels[i] = deps[gold_i]
                    self.ner[i] = entities[gold_i]
            # Assign O/- for one-to-many O/- NER tags
            for j, cand_j in enumerate(self.gold_to_cand):
                if cand_j is None:
                    if j in j2i_multi:
                        i = j2i_multi[j]
                        ner_tag = entities[j]
                        if ner_tag in ("O", "-"):
                            self.ner[i] = ner_tag
            # If there is entity annotation and some tokens remain unaligned,
            # align all entities at the character level to account for all
            # possible token misalignments within the entity spans
            if any([e not in ("O", "-") for e in entities]) and None in self.ner:
                # If the temporary entdoc wasn't created above, initialize it
                if not entdoc:
                    entdoc_words, entdoc_spaces = util.get_words_and_spaces(words, doc.text)
                    entdoc = Doc(doc.vocab, words=entdoc_words, spaces=entdoc_spaces)
                # Get offsets based on gold words and BILUO entities
                entdoc_offsets = offsets_from_biluo_tags(entdoc, entities)
                aligned_offsets = []
                aligned_spans = []
                # Filter offsets to identify those that align with doc tokens
                for offset in entdoc_offsets:
                    span = doc.char_span(offset[0], offset[1])
                    if span and not span.text.isspace():
                        aligned_offsets.append(offset)
                        aligned_spans.append(span)
                # Convert back to BILUO for doc tokens and assign NER for all
                # aligned spans
                biluo_tags = biluo_tags_from_offsets(doc, aligned_offsets, missing=None)
                for span in aligned_spans:
                    for i in range(span.start, span.end):
                        self.ner[i] = biluo_tags[i]
            # Prevent whitespace that isn't within entities from being tagged as
            # an entity.
            for i in range(len(self.ner)):
                if self.tags[i] == "_SP":
                    prev_ner = self.ner[i-1] if i >= 1 else None
                    next_ner = self.ner[i+1] if (i+1) < len(self.ner) else None
                    if prev_ner == "O" or next_ner == "O":
                        self.ner[i] = "O"
            cycle = nonproj.contains_cycle(self.heads)
            if cycle is not None:
                raise ValueError(Errors.E069.format(cycle=cycle,
                    cycle_tokens=" ".join([f"'{self.words[tok_id]}'" for tok_id in cycle]),
                    doc_tokens=" ".join(words[:50])))
    def __len__(self):
        """Get the number of gold-standard tokens.
        RETURNS (int): The number of gold-standard tokens.
        """
        return self.length
    @property
    def is_projective(self):
        """Whether the provided syntactic annotations form a projective
        dependency tree.
        """
        return not nonproj.is_nonproj_tree(self.heads)
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -515,8 +515,8 @@ cdef class Parser:
        good_golds = []
        good_states = []
        for i, eg in enumerate(whole_examples):
-            doc = eg.doc
+            parses = get_parses_from_example(eg)
-            gold = self.moves.preprocess_gold(eg.gold)
+            doc, gold = parses[0]
            if gold is not None and self.moves.has_gold(gold):
                good_docs.append(doc)
                good_golds.append(gold)
@ -535,8 +535,12 @@ cdef class Parser:
        cdef:
            StateClass state
            Transition action
-        whole_docs = [ex.doc for ex in whole_examples]
+        whole_docs = []
-        whole_golds = [ex.gold for ex in whole_examples]
+        whole_golds = []
        for eg in whole_examples:
            for doc, gold in get_parses_from_example(eg):
                whole_docs.append(doc)
                whole_golds.append(gold)
        whole_states = self.moves.init_batch(whole_docs)
        max_length = max(min_length, min(max_length, min([len(doc) for doc in whole_docs])))
        max_moves = 0
@ -625,7 +629,7 @@ cdef class Parser:
        doc_sample = []
        gold_sample = []
        for example in islice(get_examples(), 10):
-            parses = example.get_gold_parses(merge=False, vocab=self.vocab)
+            parses = get_parses_from_example(example, merge=False, vocab=self.vocab)
            for doc, gold in parses:
                if len(doc):
                    doc_sample.append(doc)
--- a/spacy/syntax/nonproj.pyx
+++ b/spacy/syntax/nonproj.pyx
@ -7,7 +7,7 @@ from copy import copy
 from ..tokens.doc cimport Doc, set_children_from_heads
-from ..gold import Example
+from ..gold import Example, TokenAnnotation
 from ..errors import Errors
@ -108,7 +108,7 @@ def preprocess_training_data(gold_data, label_freq_cutoff=30):
        proj_token_dict = example.token_annotation.to_dict()
        proj_token_dict["heads"] = proj_heads
        proj_token_dict["deps"] = deco_deps
-        new_example.set_token_annotation(**proj_token_dict)
+        new_example.token_annotation = TokenAnnotation(**proj_token_dict)
        preprocessed.append(new_example)
    if label_freq_cutoff > 0:
        return _filter_labels(preprocessed, label_freq_cutoff, freqs)
@ -216,6 +216,6 @@ def _filter_labels(examples, cutoff, freqs):
                filtered_labels.append(label)
        filtered_token_dict = example.token_annotation.to_dict()
        filtered_token_dict["deps"] = filtered_labels
-        new_example.set_token_annotation(**filtered_token_dict)
+        new_example.token_annotation = TokenAnnotation(**filtered_token_dict)
        filtered.append(new_example)
    return filtered
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@ -35,7 +35,10 @@ def _train_parser(parser):
    for i in range(5):
        losses = {}
        doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
-        gold = GoldParse(doc, heads=[1, 1, 3, 3], deps=["left", "ROOT", "left", "ROOT"])
+        gold = {
            "heads": [1, 1, 3, 3],
            "deps": ["left", "ROOT", "left", "ROOT"]
        }
        parser.update((doc, gold), sgd=sgd, losses=losses)
    return parser
@ -47,9 +50,10 @@ def test_add_label(parser):
    for i in range(100):
        losses = {}
        doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
-        gold = GoldParse(
+        gold = {
-            doc, heads=[1, 1, 3, 3], deps=["right", "ROOT", "left", "ROOT"]
+            "heads": [1, 1, 3, 3],
-        )
+            "deps": ["right", "ROOT", "left", "ROOT"]
        }
        parser.update((doc, gold), sgd=sgd, losses=losses)
    doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
    doc = parser(doc)
--- a/spacy/tests/parser/test_neural_parser.py
+++ b/spacy/tests/parser/test_neural_parser.py
@ -47,7 +47,7 @@ def doc(vocab):
@pytest.fixture
 def gold(doc):
-    return GoldParse(doc, heads=[1, 1, 1], deps=["L", "ROOT", "R"])
+    return {"heads": [1, 1, 1], "deps": ["L", "ROOT", "R"]}
 def test_can_init_nn_parser(parser):
--- a/spacy/tests/parser/test_preset_sbd.py
+++ b/spacy/tests/parser/test_preset_sbd.py
@ -1,7 +1,6 @@
 import pytest
 from thinc.api import Adam
 from spacy.attrs import NORM
 from spacy.gold import GoldParse
 from spacy.vocab import Vocab
 from spacy.pipeline.defaults import default_parser
@ -28,7 +27,7 @@ def parser(vocab):
    for i in range(10):
        losses = {}
        doc = Doc(vocab, words=["a", "b", "c", "d"])
-        gold = GoldParse(doc, heads=[1, 1, 3, 3], deps=["left", "ROOT", "left", "ROOT"])
+        gold = dict(heads=[1, 1, 3, 3], deps=["left", "ROOT", "left", "ROOT"])
        parser.update((doc, gold), sgd=sgd, losses=losses)
    return parser
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@ -3,7 +3,7 @@ import gc
 import numpy
 import copy
-from spacy.gold import Example
+from spacy.gold import Example, TokenAnnotation
 from spacy.lang.en import English
 from spacy.lang.en.stop_words import STOP_WORDS
 from spacy.lang.lex_attrs import is_stop
@ -272,9 +272,16 @@ def test_issue1963(en_tokenizer):
 def test_issue1967(label):
    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
    ner = EntityRecognizer(Vocab(), default_ner(), **config)
-    example = Example(doc=None)
+    example = Example(
-    example.set_token_annotation(
+        doc=Doc(ner.vocab, words=["word"]),
-        ids=[0], words=["word"], tags=["tag"], heads=[0], deps=["dep"], entities=[label]
+        token_annotation=TokenAnnotation(
            ids=[0],
            words=["word"],
            tags=["tag"],
            heads=[0],
            deps=["dep"],
            entities=[label]
        )
    )
    ner.moves.get_actions(gold_parses=[example])
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@ -1,9 +1,12 @@
 from spacy.errors import AlignmentError
 from spacy.gold import biluo_tags_from_offsets, offsets_from_biluo_tags
-from spacy.gold import spans_from_biluo_tags, GoldParse, iob_to_biluo, align
+from spacy.gold import spans_from_biluo_tags, iob_to_biluo, align
-from spacy.gold import GoldCorpus, docs_to_json, Example, DocAnnotation
+from spacy.gold import GoldCorpus, docs_to_json, DocAnnotation
 from spacy.gold.new_example import NewExample as Example
 from spacy.lang.en import English
 from spacy.syntax.nonproj import is_nonproj_tree
 from spacy.syntax.gold_parse import GoldParse, get_parses_from_example
 from spacy.syntax.gold_parse import get_parses_from_example
 from spacy.tokens import Doc
 from spacy.util import get_words_and_spaces, compounding, minibatch
 import pytest
@ -90,10 +93,16 @@ def merged_dict():
        "ids": [1, 2, 3, 4, 5, 6, 7],
        "words": ["Hi", "there", "everyone", "It", "is", "just", "me"],
        "tags": ["INTJ", "ADV", "PRON", "PRON", "AUX", "ADV", "PRON"],
-        "sent_starts": [1, 0, 0, 1, 0, 0, 0, 0],
+        "sent_starts": [1, 0, 0, 1, 0, 0, 0],
    }
@pytest.fixture
 def vocab():
    nlp = English()
    return nlp.vocab
 def test_gold_biluo_U(en_vocab):
    words = ["I", "flew", "to", "London", "."]
    spaces = [True, True, True, False, True]
@ -270,88 +279,38 @@ def test_roundtrip_docs_to_json(doc):
        srsly.write_json(json_file, [docs_to_json(doc)])
        goldcorpus = GoldCorpus(train=str(json_file), dev=str(json_file))
-    reloaded_example = next(goldcorpus.dev_dataset(nlp))
+        reloaded_example = next(goldcorpus.dev_dataset(nlp=nlp))
    goldparse = reloaded_example.gold
        assert len(doc) == goldcorpus.count_train()
-    assert text == reloaded_example.text
+    assert text == reloaded_example.predicted.text
-    assert tags == goldparse.tags
+    assert tags == [t.tag_ for t in reloaded_example.reference]
-    assert pos == goldparse.pos
+    assert pos == [t.pos_ for t in reloaded_example.reference]
-    assert morphs == goldparse.morphs
+    assert morphs == [t.morph_ for t in reloaded_example.reference]
-    assert lemmas == goldparse.lemmas
+    assert lemmas == [t.lemma_ for t in reloaded_example.reference]
-    assert deps == goldparse.labels
+    assert deps == [t.dep_ for t in reloaded_example.reference]
-    assert heads == goldparse.heads
+    assert heads == [t.head.i for t in reloaded_example.reference]
-    assert biluo_tags == goldparse.ner
+    assert "TRAVEL" in reloaded_example.reference.cats
-    assert "TRAVEL" in goldparse.cats
+    assert "BAKING" in reloaded_example.reference.cats
-    assert "BAKING" in goldparse.cats
+    assert cats["TRAVEL"] == reloaded_example.reference.cats["TRAVEL"]
-    assert cats["TRAVEL"] == goldparse.cats["TRAVEL"]
+    assert cats["BAKING"] == reloaded_example.reference.cats["BAKING"]
    assert cats["BAKING"] == goldparse.cats["BAKING"]
    # roundtrip to JSONL train dicts
    with make_tempdir() as tmpdir:
        jsonl_file = tmpdir / "roundtrip.jsonl"
        srsly.write_jsonl(jsonl_file, [docs_to_json(doc)])
        goldcorpus = GoldCorpus(str(jsonl_file), str(jsonl_file))
    reloaded_example = next(goldcorpus.dev_dataset(nlp))
    goldparse = reloaded_example.gold
    assert len(doc) == goldcorpus.count_train()
    assert text == reloaded_example.text
    assert tags == goldparse.tags
    assert pos == goldparse.pos
    assert morphs == goldparse.morphs
    assert lemmas == goldparse.lemmas
    assert deps == goldparse.labels
    assert heads == goldparse.heads
    assert biluo_tags == goldparse.ner
    assert "TRAVEL" in goldparse.cats
    assert "BAKING" in goldparse.cats
    assert cats["TRAVEL"] == goldparse.cats["TRAVEL"]
    assert cats["BAKING"] == goldparse.cats["BAKING"]
    # roundtrip to JSONL tuples
    with make_tempdir() as tmpdir:
        jsonl_file = tmpdir / "roundtrip.jsonl"
        # write to JSONL train dicts
        srsly.write_jsonl(jsonl_file, [docs_to_json(doc)])
        goldcorpus = GoldCorpus(str(jsonl_file), str(jsonl_file))
        # load and rewrite as JSONL tuples
        srsly.write_jsonl(jsonl_file, goldcorpus.train_examples)
        goldcorpus = GoldCorpus(str(jsonl_file), str(jsonl_file))
    reloaded_example = next(goldcorpus.dev_dataset(nlp))
    goldparse = reloaded_example.gold
    assert len(doc) == goldcorpus.count_train()
    assert text == reloaded_example.text
    assert tags == goldparse.tags
    assert deps == goldparse.labels
    assert heads == goldparse.heads
    assert lemmas == goldparse.lemmas
    assert biluo_tags == goldparse.ner
    assert "TRAVEL" in goldparse.cats
    assert "BAKING" in goldparse.cats
    assert cats["TRAVEL"] == goldparse.cats["TRAVEL"]
    assert cats["BAKING"] == goldparse.cats["BAKING"]
@pytest.mark.xfail # TODO do we need to do the projectivity differently?
 def test_projective_train_vs_nonprojective_dev(doc):
    nlp = English()
    deps = [t.dep_ for t in doc]
    heads = [t.head.i for t in doc]
    with make_tempdir() as tmpdir:
-        jsonl_file = tmpdir / "test.jsonl"
+        json_file = tmpdir / "test.json"
-        # write to JSONL train dicts
+        # write to JSON train dicts
-        srsly.write_jsonl(jsonl_file, [docs_to_json(doc)])
+        srsly.write_json(json_file, [docs_to_json(doc)])
-        goldcorpus = GoldCorpus(str(jsonl_file), str(jsonl_file))
+        goldcorpus = GoldCorpus(str(json_file), str(json_file))
        train_reloaded_example = next(goldcorpus.train_dataset(nlp))
-    train_goldparse = train_reloaded_example.gold
+        train_goldparse = get_parses_from_example(train_reloaded_example)[0][1]
        dev_reloaded_example = next(goldcorpus.dev_dataset(nlp))
-    dev_goldparse = dev_reloaded_example.gold
+        dev_goldparse = get_parses_from_example(dev_reloaded_example)[0][1]
    assert is_nonproj_tree([t.head.i for t in doc]) is True
    assert is_nonproj_tree(train_goldparse.heads) is False
@ -364,27 +323,31 @@ def test_projective_train_vs_nonprojective_dev(doc):
    assert deps == dev_goldparse.labels
 # Hm, not sure where misalignment check would be handled? In the components too?
 # I guess that does make sense. A text categorizer doesn't care if it's 
 # misaligned...
@pytest.mark.xfail # TODO
 def test_ignore_misaligned(doc):
    nlp = English()
    text = doc.text
    with make_tempdir() as tmpdir:
-        jsonl_file = tmpdir / "test.jsonl"
+        json_file = tmpdir / "test.json"
        data = [docs_to_json(doc)]
        data[0]["paragraphs"][0]["raw"] = text.replace("Sarah", "Jane")
-        # write to JSONL train dicts
+        # write to JSON train dicts
-        srsly.write_jsonl(jsonl_file, data)
+        srsly.write_json(json_file, data)
-        goldcorpus = GoldCorpus(str(jsonl_file), str(jsonl_file))
+        goldcorpus = GoldCorpus(str(json_file), str(json_file))
        with pytest.raises(AlignmentError):
            train_reloaded_example = next(goldcorpus.train_dataset(nlp))
    with make_tempdir() as tmpdir:
-        jsonl_file = tmpdir / "test.jsonl"
+        json_file = tmpdir / "test.json"
        data = [docs_to_json(doc)]
        data[0]["paragraphs"][0]["raw"] = text.replace("Sarah", "Jane")
-        # write to JSONL train dicts
+        # write to JSON train dicts
-        srsly.write_jsonl(jsonl_file, data)
+        srsly.write_json(json_file, data)
-        goldcorpus = GoldCorpus(str(jsonl_file), str(jsonl_file))
+        goldcorpus = GoldCorpus(str(json_file), str(json_file))
        # doesn't raise an AlignmentError, but there is nothing to iterate over
        # because the only example can't be aligned
@ -395,14 +358,14 @@ def test_ignore_misaligned(doc):
 def test_make_orth_variants(doc):
    nlp = English()
    with make_tempdir() as tmpdir:
-        jsonl_file = tmpdir / "test.jsonl"
+        json_file = tmpdir / "test.json"
-        # write to JSONL train dicts
+        # write to JSON train dicts
-        srsly.write_jsonl(jsonl_file, [docs_to_json(doc)])
+        srsly.write_json(json_file, [docs_to_json(doc)])
-        goldcorpus = GoldCorpus(str(jsonl_file), str(jsonl_file))
+        goldcorpus = GoldCorpus(str(json_file), str(json_file))
        # due to randomness, test only that this runs with no errors for now
        train_reloaded_example = next(goldcorpus.train_dataset(nlp, orth_variant_level=0.2))
-    train_goldparse = train_reloaded_example.gold  # noqa: F841
+        train_goldparse = get_parses_from_example(train_reloaded_example)[0][1]
@pytest.mark.parametrize(
@ -456,20 +419,6 @@ def test_gold_constructor():
    assert gold.words == ["This", "is", "a", "sentence"]
 def test_gold_orig_annot():
    nlp = English()
    doc = nlp("This is a sentence")
    gold = GoldParse(doc, cats={"cat1": 1.0, "cat2": 0.0})
    assert gold.orig.words == ["This", "is", "a", "sentence"]
    assert gold.cats["cat1"]
    doc_annotation = DocAnnotation(cats={"cat1": 0.0, "cat2": 1.0})
    gold2 = GoldParse.from_annotation(doc, doc_annotation, gold.orig)
    assert gold2.orig.words == ["This", "is", "a", "sentence"]
    assert not gold2.cats["cat1"]
 def test_tuple_format_implicit():
    """Test tuple format with implicit GoldParse creation"""
@ -485,6 +434,7 @@ def test_tuple_format_implicit():
    _train(train_data)
@pytest.mark.xfail # TODO
 def test_tuple_format_implicit_invalid():
    """Test that an error is thrown for an implicit invalid GoldParse field"""
@ -518,43 +468,51 @@ def _train(train_data):
 def test_split_sents(merged_dict):
    nlp = English()
-    example = Example()
+    example = Example.from_dict(
-    example.set_token_annotation(**merged_dict)
+        Doc(nlp.vocab, words=merged_dict["words"]),
-    assert len(example.get_gold_parses(merge=False, vocab=nlp.vocab)) == 2
+        merged_dict
-    assert len(example.get_gold_parses(merge=True, vocab=nlp.vocab)) == 1
+    )
    assert len(get_parses_from_example(
        example,
        merge=False,
        vocab=nlp.vocab,
        make_projective=False)
    ) == 2
    assert len(get_parses_from_example(
        example,
        merge=True,
        vocab=nlp.vocab,
        make_projective=False
    )) == 1
    split_examples = example.split_sents()
    assert len(split_examples) == 2
-    token_annotation_1 = split_examples[0].token_annotation
+    token_annotation_1 = split_examples[0].to_dict()["token_annotation"]
-    assert token_annotation_1.ids == [1, 2, 3]
+    assert token_annotation_1["words"] == ["Hi", "there", "everyone"]
-    assert token_annotation_1.words == ["Hi", "there", "everyone"]
+    assert token_annotation_1["tags"] == ["INTJ", "ADV", "PRON"]
-    assert token_annotation_1.tags == ["INTJ", "ADV", "PRON"]
+    assert token_annotation_1["sent_starts"] == [1, 0, 0]
    assert token_annotation_1.sent_starts == [1, 0, 0]
-    token_annotation_2 = split_examples[1].token_annotation
+    token_annotation_2 = split_examples[1].to_dict()["token_annotation"]
-    assert token_annotation_2.ids == [4, 5, 6, 7]
+    assert token_annotation_2["words"] == ["It", "is", "just", "me"]
-    assert token_annotation_2.words == ["It", "is", "just", "me"]
+    assert token_annotation_2["tags"] == ["PRON", "AUX", "ADV", "PRON"]
-    assert token_annotation_2.tags == ["PRON", "AUX", "ADV", "PRON"]
+    assert token_annotation_2["sent_starts"] == [1, 0, 0, 0]
    assert token_annotation_2.sent_starts == [1, 0, 0, 0]
-def test_tuples_to_example(merged_dict):
+# This fails on some None value? Need to look into that.
-    ex = Example()
+@pytest.mark.xfail # TODO
-    ex.set_token_annotation(**merged_dict)
+def test_tuples_to_example(vocab, merged_dict):
    cats = {"TRAVEL": 1.0, "BAKING": 0.0}
-    ex.set_doc_annotation(cats=cats)
+    merged_dict = dict(merged_dict)
-    ex_dict = ex.to_dict()
+    merged_dict["cats"] = cats
-
+    ex = Example.from_dict(
-    assert ex_dict["token_annotation"]["ids"] == merged_dict["ids"]
+        Doc(vocab, words=merged_dict["words"]),
-    assert ex_dict["token_annotation"]["words"] == merged_dict["words"]
+        merged_dict
-    assert ex_dict["token_annotation"]["tags"] == merged_dict["tags"]
+    )
-    assert ex_dict["token_annotation"]["sent_starts"] == merged_dict["sent_starts"]
+    words = [token.text for token in ex.reference]
-    assert ex_dict["doc_annotation"]["cats"] == cats
+    assert words == merged_dict["words"]
-
+    tags = [token.tag_ for token in ex.reference]
-
+    assert tags == merged_dict["tags"]
-def test_empty_example_goldparse():
+    sent_starts = [token.is_sent_start for token in ex.reference]
-    nlp = English()
+    assert sent_starts == [bool(v) for v in merged_dict["sent_starts"]]
-    doc = nlp("")
+    ex.reference.cats == cats
    example = Example(doc=doc)
    assert len(example.get_gold_parses()) == 1
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@ -19,22 +19,16 @@ def nlp():
    return nlp
@pytest.mark.xfail # TODO
 def test_language_update(nlp):
    text = "hello world"
    annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
    wrongkeyannots = {"LABEL": True}
    doc = Doc(nlp.vocab, words=text.split(" "))
    gold = GoldParse(doc, **annots)
    # Update with doc and gold objects
    nlp.update((doc, gold))
    # Update with text and dict
    nlp.update((text, annots))
    # Update with doc object and dict
    nlp.update((doc, annots))
    # Update with text and gold object
    nlp.update((text, gold))
    # Update with empty doc and gold object
    nlp.update((None, gold))
    # Update badly
    with pytest.raises(ValueError):
        nlp.update((doc, None))
@ -44,20 +38,16 @@ def test_language_update(nlp):
 def test_language_evaluate(nlp):
    text = "hello world"
-    annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
+    annots = {
        "doc_annotation": {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
    }
    doc = Doc(nlp.vocab, words=text.split(" "))
    gold = GoldParse(doc, **annots)
    # Evaluate with doc and gold objects
    nlp.evaluate([(doc, gold)])
    # Evaluate with text and dict
    nlp.evaluate([(text, annots)])
    # Evaluate with doc object and dict
    nlp.evaluate([(doc, annots)])
    # Evaluate with text and gold object
    nlp.evaluate([(text, gold)])
    # Evaluate badly
    with pytest.raises(Exception):
-        nlp.evaluate([text, gold])
+        nlp.evaluate([text, annots])
 def test_evaluate_no_pipe(nlp):
--- a/spacy/tests/test_new_example.py
+++ b/spacy/tests/test_new_example.py
@ -0,0 +1,186 @@
 import pytest
 from spacy.gold.new_example import NewExample as Example
 from spacy.tokens import Doc
 from spacy.vocab import Vocab
 def test_Example_init_requires_doc_objects():
    vocab = Vocab()
    with pytest.raises(TypeError):
        eg = Example(None, None)
    with pytest.raises(TypeError):
        eg = Example(Doc(vocab, words=["hi"]), None)
    with pytest.raises(TypeError):
        eg = Example(None, Doc(vocab, words=["hi"]))
 def test_Example_from_dict_basic():
    eg = Example.from_dict(
        Doc(Vocab(), words=["hello", "world"]), {"words": ["hello", "world"]}
    )
    assert isinstance(eg.x, Doc)
    assert isinstance(eg.y, Doc)
@pytest.mark.parametrize(
    "annots", [{"words": ["ice", "cream"], "weirdannots": ["something", "such"]}]
 )
 def test_Example_from_dict_invalid(annots):
    vocab = Vocab()
    predicted = Doc(vocab, words=annots["words"])
    with pytest.raises(ValueError):
        Example.from_dict(predicted, annots)
@pytest.mark.parametrize("annots", [{"words": ["ice", "cream"], "tags": ["NN", "NN"]}])
 def test_Example_from_dict_with_tags(annots):
    vocab = Vocab()
    predicted = Doc(vocab, words=annots["words"])
    eg = Example.from_dict(predicted, annots)
    for i, token in enumerate(eg.reference):
        assert token.tag_ == annots["tags"][i]
@pytest.mark.parametrize(
    "annots",
    [
        {
            "words": ["I", "like", "London", "and", "Berlin", "."],
            "deps": ["nsubj", "ROOT", "dobj", "cc", "conj", "punct"],
            "heads": [1, 1, 1, 2, 2, 1],
        }
    ],
 )
 def test_Example_from_dict_with_parse(annots):
    vocab = Vocab()
    predicted = Doc(vocab, words=annots["words"])
    eg = Example.from_dict(predicted, annots)
    for i, token in enumerate(eg.reference):
        assert token.dep_ == annots["deps"][i]
        assert token.head.i == annots["heads"][i]
@pytest.mark.parametrize(
    "annots",
    [
        {
            "words": ["Sarah", "'s", "sister", "flew"],
            "morphs": [
                "NounType=prop|Number=sing",
                "Poss=yes",
                "Number=sing",
                "Tense=past|VerbForm=fin",
            ],
        }
    ],
 )
 def test_Example_from_dict_with_morphology(annots):
    vocab = Vocab()
    predicted = Doc(vocab, words=annots["words"])
    eg = Example.from_dict(predicted, annots)
    for i, token in enumerate(eg.reference):
        assert token.morph_ == annots["morphs"][i]
@pytest.mark.parametrize(
    "annots",
    [
        {
            "words": ["This", "is", "one", "sentence", "this", "is", "another"],
            "sent_starts": [1, 0, 0, 0, 1, 0, 0],
        }
    ],
 )
 def test_Example_from_dict_with_sent_start(annots):
    vocab = Vocab()
    predicted = Doc(vocab, words=annots["words"])
    eg = Example.from_dict(predicted, annots)
    assert len(list(eg.reference.sents)) == 2
    for i, token in enumerate(eg.reference):
        assert bool(token.is_sent_start) == bool(annots["sent_starts"][i])
@pytest.mark.parametrize(
    "annots",
    [
        {
            "words": ["This", "is", "a", "sentence"],
            "cats": {"cat1": 1.0, "cat2": 0.0, "cat3": 0.5},
        }
    ],
 )
 def test_Example_from_dict_with_cats(annots):
    vocab = Vocab()
    predicted = Doc(vocab, words=annots["words"])
    eg = Example.from_dict(predicted, annots)
    assert len(list(eg.reference.cats)) == 3
    assert eg.reference.cats["cat1"] == 1.0
    assert eg.reference.cats["cat2"] == 0.0
    assert eg.reference.cats["cat3"] == 0.5
@pytest.mark.parametrize(
    "annots",
    [
        {
            "words": ["I", "like", "New", "York", "and", "Berlin", "."],
            "entities": [(7, 15, "LOC"), (20, 26, "LOC")],
        }
    ],
 )
 def test_Example_from_dict_with_entities(annots):
    vocab = Vocab()
    predicted = Doc(vocab, words=annots["words"])
    eg = Example.from_dict(predicted, annots)
    assert len(list(eg.reference.ents)) == 2
    assert eg.reference[0].ent_iob_ == "O"
    assert eg.reference[1].ent_iob_ == "O"
    assert eg.reference[2].ent_iob_ == "B"
    assert eg.reference[3].ent_iob_ == "I"
    assert eg.reference[4].ent_iob_ == "O"
    assert eg.reference[5].ent_iob_ == "B"
    assert eg.reference[6].ent_iob_ == "O"
    assert eg.reference[2].ent_type_ == "LOC"
    assert eg.reference[3].ent_type_ == "LOC"
    assert eg.reference[5].ent_type_ == "LOC"
@pytest.mark.parametrize(
    "annots",
    [
        {
            "words": ["I", "like", "New", "York", "and", "Berlin", "."],
            "entities": [(7, 15, "LOC"), (20, 26, "LOC")],
            "links": {(7, 15): {"Q60": 1.0, "Q64": 0.0}, (20, 26): {"Q60": 0.0, "Q64": 1.0}},
        }
    ],
 )
 def test_Example_from_dict_with_links(annots):
    vocab = Vocab()
    predicted = Doc(vocab, words=annots["words"])
    eg = Example.from_dict(predicted, annots)
    assert eg.reference[0].ent_kb_id_ == ""
    assert eg.reference[1].ent_kb_id_ == ""
    assert eg.reference[2].ent_kb_id_ == "Q60"
    assert eg.reference[3].ent_kb_id_ == "Q60"
    assert eg.reference[4].ent_kb_id_ == ""
    assert eg.reference[5].ent_kb_id_ == "Q64"
    assert eg.reference[6].ent_kb_id_ == ""
@pytest.mark.parametrize(
    "annots",
    [
        {
            "words": ["I", "like", "New", "York", "and", "Berlin", "."],
            "entities": [(7, 15, "LOC"), (20, 26, "LOC")],
            "links": {(0, 1): {"Q7381115": 1.0, "Q2146908": 0.0}},
        }
    ],
 )
 def test_Example_from_dict_with_links_invalid(annots):
    vocab = Vocab()
    predicted = Doc(vocab, words=annots["words"])
    with pytest.raises(ValueError):
        Example.from_dict(predicted, annots)
--- a/spacy/tests/test_scorer.py
+++ b/spacy/tests/test_scorer.py
@ -1,12 +1,14 @@
 from numpy.testing import assert_almost_equal, assert_array_almost_equal
 import pytest
 from pytest import approx
-from spacy.gold import Example, GoldParse
+from spacy.gold import Example, GoldParse, TokenAnnotation
 from spacy.gold.iob_utils import biluo_tags_from_offsets
 from spacy.scorer import Scorer, ROCAUCScore
 from spacy.scorer import _roc_auc_score, _roc_curve
 from .util import get_doc
 from spacy.lang.en import English
 test_las_apple = [
    [
        "Apple is looking at buying U.K. startup for $ 1 billion",
@ -134,8 +136,11 @@ def test_ner_per_type(en_vocab):
            words=input_.split(" "),
            ents=[[0, 1, "CARDINAL"], [2, 3, "CARDINAL"]],
        )
-        ex = Example(doc=doc)
+        entities = biluo_tags_from_offsets(doc, annot["entities"])
-        ex.set_token_annotation(entities=annot["entities"])
+        ex = Example(
            doc=doc,
            token_annotation=TokenAnnotation(entities=entities)
        )
        scorer.score(ex)
    results = scorer.scores
@ -155,8 +160,11 @@ def test_ner_per_type(en_vocab):
            words=input_.split(" "),
            ents=[[0, 1, "ORG"], [5, 6, "GPE"], [6, 7, "ORG"]],
        )
-        ex = Example(doc=doc)
+        entities = biluo_tags_from_offsets(doc, annot["entities"])
-        ex.set_token_annotation(entities=annot["entities"])
+        ex = Example(
            doc=doc,
            token_annotation=TokenAnnotation(entities=entities)
        )
        scorer.score(ex)
    results = scorer.scores
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -799,6 +799,8 @@ cdef class Doc:
        cdef attr_id_t attr_id
        cdef TokenC* tokens = self.c
        cdef int length = len(array)
        if length != len(self):
            raise ValueError("Cannot set array values longer than the document.")
        # Get set up for fast loading
        cdef Pool mem = Pool()
        cdef int n_attrs = len(attrs)
@ -823,6 +825,13 @@ cdef class Doc:
            for i in range(length):
                if array[i, col] != 0:
                    self.vocab.morphology.assign_tag(&tokens[i], array[i, col])
        # Verify ENT_IOB are proper integers
        if ENT_IOB in attrs:
            iob_strings = Token.iob_strings()
            col = attrs.index(ENT_IOB)
            for i in range(length):
                if array[i, col] not in range(0, len(iob_strings)):
                    raise ValueError(Errors.E985.format(values=iob_strings, value=array[i, col]))
        # Now load the data
        for i in range(length):
            token = &self.c[i]
@ -881,6 +890,32 @@ cdef class Doc:
    def to_bytes(self, exclude=tuple(), **kwargs):
        """Serialize, i.e. export the document contents to a binary string.
        exclude (list): String names of serialization fields to exclude.
        RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
            all annotations.
        DOCS: https://spacy.io/api/doc#to_bytes
        """
        return srsly.msgpack_dumps(self.to_dict(exclude=exclude, **kwargs))
    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
        """Deserialize, i.e. import the document contents from a binary string.
        data (bytes): The string to load from.
        exclude (list): String names of serialization fields to exclude.
        RETURNS (Doc): Itself.
        DOCS: https://spacy.io/api/doc#from_bytes
        """
        return self.from_dict(
            srsly.msgpack_loads(bytes_data),
            exclude=exclude,
            **kwargs
        )
    def to_dict(self, exclude=tuple(), **kwargs):
        """Export the document contents to a dictionary for serialization.
        exclude (list): String names of serialization fields to exclude.
        RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
            all annotations.
@ -917,9 +952,9 @@ cdef class Doc:
                serializers["user_data_keys"] = lambda: srsly.msgpack_dumps(user_data_keys)
            if "user_data_values" not in exclude:
                serializers["user_data_values"] = lambda: srsly.msgpack_dumps(user_data_values)
-        return util.to_bytes(serializers, exclude)
+        return util.to_dict(serializers, exclude)
-    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
+    def from_dict(self, msg, exclude=tuple(), **kwargs):
        """Deserialize, i.e. import the document contents from a binary string.
        data (bytes): The string to load from.
@ -943,7 +978,6 @@ cdef class Doc:
        for key in kwargs:
            if key in deserializers or key in ("user_data",):
                raise ValueError(Errors.E128.format(arg=key))
        msg = util.from_bytes(bytes_data, deserializers, exclude)
        # Msgpack doesn't distinguish between lists and tuples, which is
        # vexing for user data. As a best guess, we *know* that within
        # keys, we must have tuples. In values we just have to hope
@ -975,6 +1009,7 @@ cdef class Doc:
        self.from_array(msg["array_head"][2:], attrs[:, 2:])
        return self
    def extend_tensor(self, tensor):
        """Concatenate a new tensor onto the doc.tensor object.
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@ -778,6 +778,10 @@ cdef class Token:
        """
        return self.c.ent_iob
    @classmethod
    def iob_strings(cls):
        return ("", "I", "O", "B")
    @property
    def ent_iob_(self):
        """IOB code of named entity tag. "B" means the token begins an entity,
@ -787,8 +791,7 @@ cdef class Token:
        RETURNS (str): IOB code of named entity tag.
        """
-        iob_strings = ("", "I", "O", "B")
+        return self.iob_strings()[self.c.ent_iob]
        return iob_strings[self.c.ent_iob]
    property ent_id:
        """RETURNS (uint64): ID of the entity the token is an instance of,
--- a/spacy/util.py
+++ b/spacy/util.py
@ -819,16 +819,23 @@ def filter_spans(spans):
 def to_bytes(getters, exclude):
    return srsly.msgpack_dumps(to_dict(getters, exclude))
 def from_bytes(bytes_data, setters, exclude):
    return from_dict(srsly.msgpack_loads(bytes_data), setters, exclude)
 def to_dict(getters, exclude):
    serialized = {}
    for key, getter in getters.items():
        # Split to support file names like meta.json
        if key.split(".")[0] not in exclude:
            serialized[key] = getter()
-    return srsly.msgpack_dumps(serialized)
+    return serialized
-def from_bytes(bytes_data, setters, exclude):
+def from_dict(msg, setters, exclude):
    msg = srsly.msgpack_loads(bytes_data)
    for key, setter in setters.items():
        # Split to support file names like meta.json
        if key.split(".")[0] not in exclude and key in msg: