diff --git a/examples/training/rehearsal.py b/examples/training/rehearsal.py
index 8c94ab14e..a7eb120c9 100644
--- a/examples/training/rehearsal.py
+++ b/examples/training/rehearsal.py
@@ -33,7 +33,7 @@ def read_raw_data(nlp, jsonl_loc):
     for json_obj in srsly.read_jsonl(jsonl_loc):
         if json_obj["text"].strip():
             doc = nlp.make_doc(json_obj["text"])
-            yield doc
+            yield Example.from_dict(doc, {})
 
 
 def read_gold_data(nlp, gold_loc):
@@ -52,7 +52,7 @@ def main(model_name, unlabelled_loc):
     batch_size = 4
     nlp = spacy.load(model_name)
     nlp.get_pipe("ner").add_label(LABEL)
-    raw_docs = list(read_raw_data(nlp, unlabelled_loc))
+    raw_examples = list(read_raw_data(nlp, unlabelled_loc))
     optimizer = nlp.resume_training()
     # Avoid use of Adam when resuming training. I don't understand this well
     # yet, but I'm getting weird results from Adam. Try commenting out the
@@ -61,20 +61,24 @@ def main(model_name, unlabelled_loc):
     optimizer.learn_rate = 0.1
     optimizer.b1 = 0.0
     optimizer.b2 = 0.0
-
     sizes = compounding(1.0, 4.0, 1.001)
+
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+
     with nlp.select_pipes(enable="ner") and warnings.catch_warnings():
         # show warnings for misaligned entity spans once
         warnings.filterwarnings("once", category=UserWarning, module="spacy")
 
         for itn in range(n_iter):
-            random.shuffle(TRAIN_DATA)
-            random.shuffle(raw_docs)
+            random.shuffle(train_examples)
+            random.shuffle(raw_examples)
             losses = {}
             r_losses = {}
             # batch up the examples using spaCy's minibatch
-            raw_batches = minibatch(raw_docs, size=4)
-            for batch in minibatch(TRAIN_DATA, size=sizes):
+            raw_batches = minibatch(raw_examples, size=4)
+            for batch in minibatch(train_examples, size=sizes):
                 nlp.update(batch, sgd=optimizer, drop=dropout, losses=losses)
                 raw_batch = list(next(raw_batches))
                 nlp.rehearse(raw_batch, sgd=optimizer, losses=r_losses)
diff --git a/examples/training/train_entity_linker.py b/examples/training/train_entity_linker.py
index b82ff5bb4..e107b6165 100644
--- a/examples/training/train_entity_linker.py
+++ b/examples/training/train_entity_linker.py
@@ -20,6 +20,8 @@ from pathlib import Path
 from spacy.vocab import Vocab
 import spacy
 from spacy.kb import KnowledgeBase
+
+from spacy.gold import Example
 from spacy.pipeline import EntityRuler
 from spacy.util import minibatch, compounding
 
@@ -94,7 +96,7 @@ def main(kb_path, vocab_path=None, output_dir=None, n_iter=50):
     # Convert the texts to docs to make sure we have doc.ents set for the training examples.
     # Also ensure that the annotated examples correspond to known identifiers in the knowledge base.
     kb_ids = nlp.get_pipe("entity_linker").kb.get_entity_strings()
-    TRAIN_DOCS = []
+    train_examples  = []
     for text, annotation in TRAIN_DATA:
         with nlp.select_pipes(disable="entity_linker"):
             doc = nlp(text)
@@ -109,17 +111,17 @@ def main(kb_path, vocab_path=None, output_dir=None, n_iter=50):
                         "Removed", kb_id, "from training because it is not in the KB."
                     )
             annotation_clean["links"][offset] = new_dict
-        TRAIN_DOCS.append((doc, annotation_clean))
+        train_examples .append(Example.from_dict(doc, annotation_clean))
 
     with nlp.select_pipes(enable="entity_linker"):  # only train entity linker
         # reset and initialize the weights randomly
         optimizer = nlp.begin_training()
 
         for itn in range(n_iter):
-            random.shuffle(TRAIN_DOCS)
+            random.shuffle(train_examples)
             losses = {}
             # batch up the examples using spaCy's minibatch
-            batches = minibatch(TRAIN_DOCS, size=compounding(4.0, 32.0, 1.001))
+            batches = minibatch(train_examples, size=compounding(4.0, 32.0, 1.001))
             for batch in batches:
                 nlp.update(
                     batch,
diff --git a/examples/training/train_intent_parser.py b/examples/training/train_intent_parser.py
index df1356e3c..fffa140f4 100644
--- a/examples/training/train_intent_parser.py
+++ b/examples/training/train_intent_parser.py
@@ -23,6 +23,7 @@ import plac
 import random
 from pathlib import Path
 import spacy
+from spacy.gold import Example
 from spacy.util import minibatch, compounding
 
 
@@ -120,17 +121,19 @@ def main(model=None, output_dir=None, n_iter=15):
     parser = nlp.create_pipe("parser")
     nlp.add_pipe(parser, first=True)
 
+    train_examples = []
     for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
         for dep in annotations.get("deps", []):
             parser.add_label(dep)
 
     with nlp.select_pipes(enable="parser"):  # only train parser
         optimizer = nlp.begin_training()
         for itn in range(n_iter):
-            random.shuffle(TRAIN_DATA)
+            random.shuffle(train_examples)
             losses = {}
             # batch up the examples using spaCy's minibatch
-            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
+            batches = minibatch(train_examples, size=compounding(4.0, 32.0, 1.001))
             for batch in batches:
                 nlp.update(batch, sgd=optimizer, losses=losses)
             print("Losses", losses)
diff --git a/examples/training/train_morphologizer.py b/examples/training/train_morphologizer.py
index aec114de7..8c39a28a6 100644
--- a/examples/training/train_morphologizer.py
+++ b/examples/training/train_morphologizer.py
@@ -14,6 +14,7 @@ import plac
 import random
 from pathlib import Path
 import spacy
+from spacy.gold import Example
 from spacy.util import minibatch, compounding
 from spacy.morphology import Morphology
 
@@ -84,8 +85,10 @@ def main(lang="en", output_dir=None, n_iter=25):
     morphologizer = nlp.create_pipe("morphologizer")
     nlp.add_pipe(morphologizer)
 
-    # add labels
-    for _, annotations in TRAIN_DATA:
+    # add labels and create the Example instances
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
         morph_labels = annotations.get("morphs")
         pos_labels = annotations.get("pos", [""] * len(annotations.get("morphs")))
         assert len(morph_labels) == len(pos_labels)
@@ -98,10 +101,10 @@ def main(lang="en", output_dir=None, n_iter=25):
 
     optimizer = nlp.begin_training()
     for i in range(n_iter):
-        random.shuffle(TRAIN_DATA)
+        random.shuffle(train_examples)
         losses = {}
         # batch up the examples using spaCy's minibatch
-        batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
+        batches = minibatch(train_examples, size=compounding(4.0, 32.0, 1.001))
         for batch in batches:
             nlp.update(batch, sgd=optimizer, losses=losses)
         print("Losses", losses)
diff --git a/examples/training/train_ner.py b/examples/training/train_ner.py
index 98b428bf8..26b283777 100644
--- a/examples/training/train_ner.py
+++ b/examples/training/train_ner.py
@@ -17,6 +17,7 @@ import random
 import warnings
 from pathlib import Path
 import spacy
+from spacy.gold import Example
 from spacy.util import minibatch, compounding
 
 
@@ -50,8 +51,10 @@ def main(model=None, output_dir=None, n_iter=100):
     else:
         ner = nlp.get_pipe("simple_ner")
 
-    # add labels
-    for _, annotations in TRAIN_DATA:
+    # add labels and create Example objects
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
         for ent in annotations.get("entities"):
             print("Add label", ent[2])
             ner.add_label(ent[2])
@@ -68,10 +71,10 @@ def main(model=None, output_dir=None, n_iter=100):
             "Transitions", list(enumerate(nlp.get_pipe("simple_ner").get_tag_names()))
         )
         for itn in range(n_iter):
-            random.shuffle(TRAIN_DATA)
+            random.shuffle(train_examples)
             losses = {}
             # batch up the examples using spaCy's minibatch
-            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
+            batches = minibatch(train_examples, size=compounding(4.0, 32.0, 1.001))
             for batch in batches:
                 nlp.update(
                     batch,
diff --git a/examples/training/train_new_entity_type.py b/examples/training/train_new_entity_type.py
index 5124d0a2c..c4edafac4 100644
--- a/examples/training/train_new_entity_type.py
+++ b/examples/training/train_new_entity_type.py
@@ -80,6 +80,10 @@ def main(model=None, new_model_name="animal", output_dir=None, n_iter=30):
         print("Created blank 'en' model")
     # Add entity recognizer to model if it's not in the pipeline
     # nlp.create_pipe works for built-ins that are registered with spaCy
+    train_examples = []
+    for text, annotation in TRAIN_DATA:
+        train_examples.append(TRAIN_DATA.from_dict(nlp(text), annotation))
+
     if "ner" not in nlp.pipe_names:
         ner = nlp.create_pipe("ner")
         nlp.add_pipe(ner)
@@ -102,8 +106,8 @@ def main(model=None, new_model_name="animal", output_dir=None, n_iter=30):
         sizes = compounding(1.0, 4.0, 1.001)
         # batch up the examples using spaCy's minibatch
         for itn in range(n_iter):
-            random.shuffle(TRAIN_DATA)
-            batches = minibatch(TRAIN_DATA, size=sizes)
+            random.shuffle(train_examples)
+            batches = minibatch(train_examples, size=sizes)
             losses = {}
             for batch in batches:
                 nlp.update(batch, sgd=optimizer, drop=0.35, losses=losses)
diff --git a/examples/training/train_parser.py b/examples/training/train_parser.py
index 4f4409e31..d46a8f4b9 100644
--- a/examples/training/train_parser.py
+++ b/examples/training/train_parser.py
@@ -14,6 +14,7 @@ import plac
 import random
 from pathlib import Path
 import spacy
+from spacy.gold import Example
 from spacy.util import minibatch, compounding
 
 
@@ -59,18 +60,20 @@ def main(model=None, output_dir=None, n_iter=15):
     else:
         parser = nlp.get_pipe("parser")
 
-    # add labels to the parser
-    for _, annotations in TRAIN_DATA:
+    # add labels to the parser and create the Example objects
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
         for dep in annotations.get("deps", []):
             parser.add_label(dep)
 
     with nlp.select_pipes(enable="parser"):  # only train parser
         optimizer = nlp.begin_training()
         for itn in range(n_iter):
-            random.shuffle(TRAIN_DATA)
+            random.shuffle(train_examples)
             losses = {}
             # batch up the examples using spaCy's minibatch
-            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
+            batches = minibatch(train_examples, size=compounding(4.0, 32.0, 1.001))
             for batch in batches:
                 nlp.update(batch, sgd=optimizer, losses=losses)
             print("Losses", losses)
diff --git a/examples/training/train_tagger.py b/examples/training/train_tagger.py
index 06e05f6cd..4eeb77fb9 100644
--- a/examples/training/train_tagger.py
+++ b/examples/training/train_tagger.py
@@ -17,6 +17,7 @@ import plac
 import random
 from pathlib import Path
 import spacy
+from spacy.gold import Example
 from spacy.util import minibatch, compounding
 
 
@@ -58,12 +59,16 @@ def main(lang="en", output_dir=None, n_iter=25):
         tagger.add_label(tag, values)
     nlp.add_pipe(tagger)
 
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+
     optimizer = nlp.begin_training()
     for i in range(n_iter):
-        random.shuffle(TRAIN_DATA)
+        random.shuffle(train_examples)
         losses = {}
         # batch up the examples using spaCy's minibatch
-        batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
+        batches = minibatch(train_examples, size=compounding(4.0, 32.0, 1.001))
         for batch in batches:
             nlp.update(batch, sgd=optimizer, losses=losses)
         print("Losses", losses)
diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py
index ee9f3e707..3dc9f1027 100644
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@@ -31,17 +31,20 @@ def profile_cli(
 
 
 def profile(model: str, inputs: Optional[Path] = None, n_texts: int = 10000) -> None:
-    try:
-        import ml_datasets
-    except ImportError:
-        msg.fail(
-            "This command requires the ml_datasets library to be installed:"
-            "pip install ml_datasets",
-            exits=1,
-        )
+
     if inputs is not None:
         inputs = _read_inputs(inputs, msg)
     if inputs is None:
+        try:
+            import ml_datasets
+        except ImportError:
+            msg.fail(
+                "This command, when run without an input file, "
+                "requires the ml_datasets library to be installed: "
+                "pip install ml_datasets",
+                exits=1,
+            )
+
         n_inputs = 25000
         with msg.loading("Loading IMDB dataset via Thinc..."):
             imdb_train, _ = ml_datasets.imdb()
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 92fd8c20a..b974247bd 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -12,7 +12,7 @@ from thinc.api import Model, use_pytorch_for_gpu_memory
 import random
 
 from ._app import app, Arg, Opt
-from ..gold import Corpus
+from ..gold import Corpus, Example
 from ..lookups import Lookups
 from .. import util
 from ..errors import Errors
@@ -423,9 +423,8 @@ def train_while_improving(
 
     if raw_text:
         random.shuffle(raw_text)
-        raw_batches = util.minibatch(
-            (nlp.make_doc(rt["text"]) for rt in raw_text), size=8
-        )
+        raw_examples = [Example.from_dict(nlp.make_doc(rt["text"]), {}) for rt in raw_text]
+        raw_batches = util.minibatch(raw_examples, size=8)
 
     for step, (epoch, batch) in enumerate(train_data):
         dropout = next(dropouts)
diff --git a/spacy/errors.py b/spacy/errors.py
index 07cf7bbdf..4e73aee6f 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -547,13 +547,13 @@ class Errors(object):
     E972 = ("Example.__init__ got None for '{arg}'. Requires Doc.")
     E973 = ("Unexpected type for NER data")
     E974 = ("Unknown {obj} attribute: {key}")
-    E975 = ("The method Example.from_dict expects a Doc as first argument, "
+    E975 = ("The method 'Example.from_dict' expects a Doc as first argument, "
             "but got {type}")
-    E976 = ("The method Example.from_dict expects a dict as second argument, "
+    E976 = ("The method 'Example.from_dict' expects a dict as second argument, "
             "but received None.")
     E977 = ("Can not compare a MorphAnalysis with a string object. "
             "This is likely a bug in spaCy, so feel free to open an issue.")
-    E978 = ("The {method} method of component {name} takes a list of Example objects, "
+    E978 = ("The '{method}' method of {name} takes a list of Example objects, "
             "but found {types} instead.")
     E979 = ("Cannot convert {type} to an Example object.")
     E980 = ("Each link annotation should refer to a dictionary with at most one "
diff --git a/spacy/language.py b/spacy/language.py
index 573b83e5f..dbc213574 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -2,6 +2,7 @@ import random
 import itertools
 import weakref
 import functools
+from collections import Iterable
 from contextlib import contextmanager
 from copy import copy, deepcopy
 from pathlib import Path
@@ -529,22 +530,6 @@ class Language(object):
     def make_doc(self, text):
         return self.tokenizer(text)
 
-    def _convert_examples(self, examples):
-        converted_examples = []
-        if isinstance(examples, tuple):
-            examples = [examples]
-        for eg in examples:
-            if isinstance(eg, Example):
-                converted_examples.append(eg.copy())
-            elif isinstance(eg, tuple):
-                doc, annot = eg
-                if isinstance(doc, str):
-                    doc = self.make_doc(doc)
-                converted_examples.append(Example.from_dict(doc, annot))
-            else:
-                raise ValueError(Errors.E979.format(type=type(eg)))
-        return converted_examples
-
     def update(
         self,
         examples,
@@ -557,7 +542,7 @@ class Language(object):
     ):
         """Update the models in the pipeline.
 
-        examples (iterable): A batch of `Example` or `Doc` objects.
+        examples (iterable): A batch of `Example` objects.
         dummy: Should not be set - serves to catch backwards-incompatible scripts.
         drop (float): The dropout rate.
         sgd (callable): An optimizer.
@@ -569,10 +554,13 @@ class Language(object):
         """
         if dummy is not None:
             raise ValueError(Errors.E989)
-
         if len(examples) == 0:
             return
-        examples = self._convert_examples(examples)
+        if not isinstance(examples, Iterable):
+            raise TypeError(Errors.E978.format(name="language", method="update", types=type(examples)))
+        wrong_types = set([type(eg) for eg in examples if not isinstance(eg, Example)])
+        if wrong_types:
+            raise TypeError(Errors.E978.format(name="language", method="update", types=wrong_types))
 
         if sgd is None:
             if self._optimizer is None:
@@ -605,22 +593,26 @@ class Language(object):
         initial ones. This is useful for keeping a pretrained model on-track,
         even if you're updating it with a smaller set of examples.
 
-        examples (iterable): A batch of `Doc` objects.
+        examples (iterable): A batch of `Example` objects.
         drop (float): The dropout rate.
         sgd (callable): An optimizer.
         RETURNS (dict): Results from the update.
 
         EXAMPLE:
             >>> raw_text_batches = minibatch(raw_texts)
-            >>> for labelled_batch in minibatch(zip(train_docs, train_golds)):
+            >>> for labelled_batch in minibatch(examples):
             >>>     nlp.update(labelled_batch)
-            >>>     raw_batch = [nlp.make_doc(text) for text in next(raw_text_batches)]
+            >>>     raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)]
             >>>     nlp.rehearse(raw_batch)
         """
         # TODO: document
         if len(examples) == 0:
             return
-        examples = self._convert_examples(examples)
+        if not isinstance(examples, Iterable):
+            raise TypeError(Errors.E978.format(name="language", method="rehearse", types=type(examples)))
+        wrong_types = set([type(eg) for eg in examples if not isinstance(eg, Example)])
+        if wrong_types:
+            raise TypeError(Errors.E978.format(name="language", method="rehearse", types=wrong_types))
         if sgd is None:
             if self._optimizer is None:
                 self._optimizer = create_default_optimizer()
@@ -696,7 +688,7 @@ class Language(object):
         component that has a .rehearse() method. Rehearsal is used to prevent
         models from "forgetting" their initialised "knowledge". To perform
         rehearsal, collect samples of text you want the models to retain performance
-        on, and call nlp.rehearse() with a batch of Doc objects.
+        on, and call nlp.rehearse() with a batch of Example objects.
         """
         if cfg.get("device", -1) >= 0:
             util.use_gpu(cfg["device"])
@@ -728,7 +720,11 @@ class Language(object):
 
         DOCS: https://spacy.io/api/language#evaluate
         """
-        examples = self._convert_examples(examples)
+        if not isinstance(examples, Iterable):
+            raise TypeError(Errors.E978.format(name="language", method="evaluate", types=type(examples)))
+        wrong_types = set([type(eg) for eg in examples if not isinstance(eg, Example)])
+        if wrong_types:
+            raise TypeError(Errors.E978.format(name="language", method="evaluate", types=wrong_types))
         if scorer is None:
             scorer = Scorer(pipeline=self.pipeline)
         if component_cfg is None:
diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx
index be28dcc85..ed700b09a 100644
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@@ -295,7 +295,7 @@ class Tagger(Pipe):
                 return
         except AttributeError:
             types = set([type(eg) for eg in examples])
-            raise ValueError(Errors.E978.format(name="Tagger", method="update", types=types))
+            raise TypeError(Errors.E978.format(name="Tagger", method="update", types=types))
         set_dropout_rate(self.model, drop)
         tag_scores, bp_tag_scores = self.model.begin_update(
             [eg.predicted for eg in examples])
@@ -321,7 +321,7 @@ class Tagger(Pipe):
             docs = [eg.predicted for eg in examples]
         except AttributeError:
             types = set([type(eg) for eg in examples])
-            raise ValueError(Errors.E978.format(name="Tagger", method="rehearse", types=types))
+            raise TypeError(Errors.E978.format(name="Tagger", method="rehearse", types=types))
         if self._rehearsal_model is None:
             return
         if not any(len(doc) for doc in docs):
@@ -358,7 +358,7 @@ class Tagger(Pipe):
             try:
                 y = example.y
             except AttributeError:
-                raise ValueError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example)))
+                raise TypeError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example)))
             for token in y:
                 tag = token.tag_
                 if tag in orig_tag_map:
@@ -790,7 +790,7 @@ class ClozeMultitask(Pipe):
             predictions, bp_predictions = self.model.begin_update([eg.predicted for eg in examples])
         except AttributeError:
             types = set([type(eg) for eg in examples])
-            raise ValueError(Errors.E978.format(name="ClozeMultitask", method="rehearse", types=types))
+            raise TypeError(Errors.E978.format(name="ClozeMultitask", method="rehearse", types=types))
         loss, d_predictions = self.get_loss(examples, self.vocab.vectors.data, predictions)
         bp_predictions(d_predictions)
         if sgd is not None:
@@ -856,7 +856,7 @@ class TextCategorizer(Pipe):
                 return
         except AttributeError:
             types = set([type(eg) for eg in examples])
-            raise ValueError(Errors.E978.format(name="TextCategorizer", method="update", types=types))
+            raise TypeError(Errors.E978.format(name="TextCategorizer", method="update", types=types))
         set_dropout_rate(self.model, drop)
         scores, bp_scores = self.model.begin_update(
             [eg.predicted for eg in examples]
@@ -879,7 +879,7 @@ class TextCategorizer(Pipe):
             docs = [eg.predicted for eg in examples]
         except AttributeError:
             types = set([type(eg) for eg in examples])
-            raise ValueError(Errors.E978.format(name="TextCategorizer", method="rehearse", types=types))
+            raise TypeError(Errors.E978.format(name="TextCategorizer", method="rehearse", types=types))
         if not any(len(doc) for doc in docs):
             # Handle cases where there are no tokens in any docs.
             return
@@ -940,7 +940,7 @@ class TextCategorizer(Pipe):
             try:
                 y = example.y
             except AttributeError:
-                raise ValueError(Errors.E978.format(name="TextCategorizer", method="update", types=type(example)))
+                raise TypeError(Errors.E978.format(name="TextCategorizer", method="update", types=type(example)))
             for cat in y.cats:
                 self.add_label(cat)
         self.require_labels()
@@ -1105,7 +1105,7 @@ class EntityLinker(Pipe):
             docs = [eg.predicted for eg in examples]
         except AttributeError:
             types = set([type(eg) for eg in examples])
-            raise ValueError(Errors.E978.format(name="EntityLinker", method="update", types=types))
+            raise TypeError(Errors.E978.format(name="EntityLinker", method="update", types=types))
         if set_annotations:
             # This seems simpler than other ways to get that exact output -- but
             # it does run the model twice :(
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index 81484c083..2f828e7fa 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -209,6 +209,10 @@ def test_train_empty():
     ]
 
     nlp = English()
+    train_examples = []
+    for t in train_data:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+
     ner = nlp.create_pipe("ner")
     ner.add_label("PERSON")
     nlp.add_pipe(ner, last=True)
@@ -216,10 +220,9 @@ def test_train_empty():
     nlp.begin_training()
     for itn in range(2):
         losses = {}
-        batches = util.minibatch(train_data)
+        batches = util.minibatch(train_examples)
         for batch in batches:
-            texts, annotations = zip(*batch)
-            nlp.update(train_data, losses=losses)
+            nlp.update(batch, losses=losses)
 
 
 def test_overwrite_token():
@@ -328,7 +331,9 @@ def test_overfitting_IO():
     # Simple test to try and quickly overfit the NER component - ensuring the ML models work correctly
     nlp = English()
     ner = nlp.create_pipe("ner")
-    for _, annotations in TRAIN_DATA:
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
         for ent in annotations.get("entities"):
             ner.add_label(ent[2])
     nlp.add_pipe(ner)
@@ -336,7 +341,7 @@ def test_overfitting_IO():
 
     for i in range(50):
         losses = {}
-        nlp.update(TRAIN_DATA, sgd=optimizer, losses=losses)
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
     assert losses["ner"] < 0.00001
 
     # test the trained model
diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py
index c54088f56..4cff31712 100644
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@@ -3,6 +3,7 @@ import pytest
 from spacy.lang.en import English
 from ..util import get_doc, apply_transition_sequence, make_tempdir
 from ... import util
+from ...gold import Example
 
 TRAIN_DATA = [
     (
@@ -189,7 +190,9 @@ def test_overfitting_IO():
     # Simple test to try and quickly overfit the dependency parser - ensuring the ML models work correctly
     nlp = English()
     parser = nlp.create_pipe("parser")
-    for _, annotations in TRAIN_DATA:
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
         for dep in annotations.get("deps", []):
             parser.add_label(dep)
     nlp.add_pipe(parser)
@@ -197,7 +200,7 @@ def test_overfitting_IO():
 
     for i in range(50):
         losses = {}
-        nlp.update(TRAIN_DATA, sgd=optimizer, losses=losses)
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
     assert losses["parser"] < 0.00001
 
     # test the trained model
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index a50ad8499..f91cc6f70 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -3,6 +3,7 @@ import pytest
 from spacy.kb import KnowledgeBase
 
 from spacy import util
+from spacy.gold import Example
 from spacy.lang.en import English
 from spacy.pipeline import EntityRuler
 from spacy.tests.util import make_tempdir
@@ -283,11 +284,10 @@ def test_overfitting_IO():
     nlp.add_pipe(ruler)
 
     # Convert the texts to docs to make sure we have doc.ents set for the training examples
-    TRAIN_DOCS = []
+    train_examples = []
     for text, annotation in TRAIN_DATA:
         doc = nlp(text)
-        annotation_clean = annotation
-        TRAIN_DOCS.append((doc, annotation_clean))
+        train_examples.append(Example.from_dict(doc, annotation))
 
     # create artificial KB - assign same prior weight to the two russ cochran's
     # Q2146908 (Russ Cochran): American golfer
@@ -309,7 +309,7 @@ def test_overfitting_IO():
     optimizer = nlp.begin_training()
     for i in range(50):
         losses = {}
-        nlp.update(TRAIN_DOCS, sgd=optimizer, losses=losses)
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
     assert losses["entity_linker"] < 0.001
 
     # test the trained model
diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py
index c853de232..9b7e2788d 100644
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@@ -1,6 +1,7 @@
 import pytest
 
 from spacy import util
+from spacy.gold import Example
 from spacy.lang.en import English
 from spacy.language import Language
 from spacy.tests.util import make_tempdir
@@ -33,7 +34,9 @@ def test_overfitting_IO():
     # Simple test to try and quickly overfit the morphologizer - ensuring the ML models work correctly
     nlp = English()
     morphologizer = nlp.create_pipe("morphologizer")
+    train_examples = []
     for inst in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(inst[0]), inst[1]))
         for morph, pos in zip(inst[1]["morphs"], inst[1]["pos"]):
             morphologizer.add_label(morph + "|POS=" + pos)
     nlp.add_pipe(morphologizer)
@@ -41,7 +44,7 @@ def test_overfitting_IO():
 
     for i in range(50):
         losses = {}
-        nlp.update(TRAIN_DATA, sgd=optimizer, losses=losses)
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
     assert losses["morphologizer"] < 0.00001
 
     # test the trained model
diff --git a/spacy/tests/pipeline/test_senter.py b/spacy/tests/pipeline/test_senter.py
index 041da2c9f..bfa1bd65a 100644
--- a/spacy/tests/pipeline/test_senter.py
+++ b/spacy/tests/pipeline/test_senter.py
@@ -1,6 +1,7 @@
 import pytest
 
 from spacy import util
+from spacy.gold import Example
 from spacy.lang.en import English
 from spacy.language import Language
 from spacy.tests.util import make_tempdir
@@ -34,12 +35,15 @@ def test_overfitting_IO():
     # Simple test to try and quickly overfit the senter - ensuring the ML models work correctly
     nlp = English()
     senter = nlp.create_pipe("senter")
+    train_examples = []
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
     nlp.add_pipe(senter)
     optimizer = nlp.begin_training()
 
     for i in range(200):
         losses = {}
-        nlp.update(TRAIN_DATA, sgd=optimizer, losses=losses)
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
     assert losses["senter"] < 0.001
 
     # test the trained model
diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py
index a90207a78..aedf8e2b3 100644
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@@ -1,6 +1,7 @@
 import pytest
 
 from spacy import util
+from spacy.gold import Example
 from spacy.lang.en import English
 from spacy.language import Language
 from spacy.tests.util import make_tempdir
@@ -28,12 +29,15 @@ def test_overfitting_IO():
     tagger = nlp.create_pipe("tagger")
     for tag, values in TAG_MAP.items():
         tagger.add_label(tag, values)
+    train_examples = []
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
     nlp.add_pipe(tagger)
     optimizer = nlp.begin_training()
 
     for i in range(50):
         losses = {}
-        nlp.update(TRAIN_DATA, sgd=optimizer, losses=losses)
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
     assert losses["tagger"] < 0.00001
 
     # test the trained model
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 6f01ada69..214163a97 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -85,7 +85,9 @@ def test_overfitting_IO():
     fix_random_seed(0)
     nlp = English()
     textcat = nlp.create_pipe("textcat")
-    for _, annotations in TRAIN_DATA:
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
         for label, value in annotations.get("cats").items():
             textcat.add_label(label)
     nlp.add_pipe(textcat)
@@ -93,7 +95,7 @@ def test_overfitting_IO():
 
     for i in range(50):
         losses = {}
-        nlp.update(TRAIN_DATA, sgd=optimizer, losses=losses)
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
     assert losses["textcat"] < 0.01
 
     # test the trained model
@@ -134,11 +136,13 @@ def test_textcat_configs(textcat_config):
     pipe_config = {"model": textcat_config}
     nlp = English()
     textcat = nlp.create_pipe("textcat", pipe_config)
-    for _, annotations in TRAIN_DATA:
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
         for label, value in annotations.get("cats").items():
             textcat.add_label(label)
     nlp.add_pipe(textcat)
     optimizer = nlp.begin_training()
     for i in range(5):
         losses = {}
-        nlp.update(TRAIN_DATA, sgd=optimizer, losses=losses)
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
diff --git a/spacy/tests/regression/test_issue2501-3000.py b/spacy/tests/regression/test_issue2501-3000.py
index 033e4f83e..5d504a9c6 100644
--- a/spacy/tests/regression/test_issue2501-3000.py
+++ b/spacy/tests/regression/test_issue2501-3000.py
@@ -1,5 +1,6 @@
 import pytest
 from spacy import displacy
+from spacy.gold import Example
 from spacy.lang.en import English
 from spacy.lang.ja import Japanese
 from spacy.lang.xx import MultiLanguage
@@ -141,10 +142,10 @@ def test_issue2800():
     """Test issue that arises when too many labels are added to NER model.
     Used to cause segfault.
     """
-    train_data = []
-    train_data.extend([("One sentence", {"entities": []})])
-    entity_types = [str(i) for i in range(1000)]
     nlp = English()
+    train_data = []
+    train_data.extend([Example.from_dict(nlp.make_doc("One sentence"), {"entities": []})])
+    entity_types = [str(i) for i in range(1000)]
     ner = nlp.create_pipe("ner")
     nlp.add_pipe(ner)
     for entity_type in list(entity_types):
@@ -153,8 +154,8 @@ def test_issue2800():
     for i in range(20):
         losses = {}
         random.shuffle(train_data)
-        for statement, entities in train_data:
-            nlp.update((statement, entities), sgd=optimizer, losses=losses, drop=0.5)
+        for example in train_data:
+            nlp.update([example], sgd=optimizer, losses=losses, drop=0.5)
 
 
 def test_issue2822(it_tokenizer):
diff --git a/spacy/tests/regression/test_issue3611.py b/spacy/tests/regression/test_issue3611.py
index cab68793c..67bc88466 100644
--- a/spacy/tests/regression/test_issue3611.py
+++ b/spacy/tests/regression/test_issue3611.py
@@ -1,4 +1,5 @@
 import spacy
+from spacy.gold import Example
 from spacy.util import minibatch, compounding
 
 
@@ -12,15 +13,15 @@ def test_issue3611():
     ]
     y_train = ["offensive", "offensive", "inoffensive"]
 
-    # preparing the data
-    pos_cats = list()
-    for train_instance in y_train:
-        pos_cats.append({label: label == train_instance for label in unique_classes})
-    train_data = list(zip(x_train, [{"cats": cats} for cats in pos_cats]))
-
-    # set up the spacy model with a text categorizer component
     nlp = spacy.blank("en")
 
+    # preparing the data
+    train_data = []
+    for text, train_instance in zip(x_train, y_train):
+        cat_dict = {label: label == train_instance for label in unique_classes}
+        train_data.append(Example.from_dict(nlp.make_doc(text), {"cats": cat_dict}))
+
+    # add a text categorizer component
     textcat = nlp.create_pipe(
         "textcat",
         config={"exclusive_classes": True, "architecture": "bow", "ngram_size": 2},
diff --git a/spacy/tests/regression/test_issue4030.py b/spacy/tests/regression/test_issue4030.py
index b641213ad..12a320c71 100644
--- a/spacy/tests/regression/test_issue4030.py
+++ b/spacy/tests/regression/test_issue4030.py
@@ -1,4 +1,5 @@
 import spacy
+from spacy.gold import Example
 from spacy.util import minibatch, compounding
 
 
@@ -12,15 +13,15 @@ def test_issue4030():
     ]
     y_train = ["offensive", "offensive", "inoffensive"]
 
-    # preparing the data
-    pos_cats = list()
-    for train_instance in y_train:
-        pos_cats.append({label: label == train_instance for label in unique_classes})
-    train_data = list(zip(x_train, [{"cats": cats} for cats in pos_cats]))
-
-    # set up the spacy model with a text categorizer component
     nlp = spacy.blank("en")
 
+    # preparing the data
+    train_data = []
+    for text, train_instance in zip(x_train, y_train):
+        cat_dict = {label: label == train_instance for label in unique_classes}
+        train_data.append(Example.from_dict(nlp.make_doc(text), {"cats": cat_dict}))
+
+    # add a text categorizer component
     textcat = nlp.create_pipe(
         "textcat",
         config={"exclusive_classes": True, "architecture": "bow", "ngram_size": 2},
diff --git a/spacy/tests/regression/test_issue4348.py b/spacy/tests/regression/test_issue4348.py
index 4978e0c8e..d7a12d054 100644
--- a/spacy/tests/regression/test_issue4348.py
+++ b/spacy/tests/regression/test_issue4348.py
@@ -1,3 +1,4 @@
+from spacy.gold import Example
 from spacy.lang.en import English
 from spacy.util import minibatch, compounding
 import pytest
@@ -7,9 +8,10 @@ import pytest
 def test_issue4348():
     """Test that training the tagger with empty data, doesn't throw errors"""
 
-    TRAIN_DATA = [("", {"tags": []}), ("", {"tags": []})]
-
     nlp = English()
+    example = Example.from_dict(nlp.make_doc(""), {"tags": []})
+    TRAIN_DATA = [example, example]
+
     tagger = nlp.create_pipe("tagger")
     nlp.add_pipe(tagger)
 
diff --git a/spacy/tests/regression/test_issue4924.py b/spacy/tests/regression/test_issue4924.py
index 10c7868a0..c3d3c4326 100644
--- a/spacy/tests/regression/test_issue4924.py
+++ b/spacy/tests/regression/test_issue4924.py
@@ -1,7 +1,8 @@
+from spacy.gold import Example
 from spacy.language import Language
 
 
 def test_issue4924():
     nlp = Language()
-    docs_golds = [("", {})]
-    nlp.evaluate(docs_golds)
+    example = Example.from_dict(nlp.make_doc(""), {})
+    nlp.evaluate([example])
diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py
index a5e11ea28..0ed4d50d5 100644
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@@ -589,7 +589,7 @@ def test_tuple_format_implicit():
         ("Google rebrands its business apps", {"entities": [(0, 6, "ORG")]}),
     ]
 
-    _train(train_data)
+    _train_tuples(train_data)
 
 
 def test_tuple_format_implicit_invalid():
@@ -605,20 +605,24 @@ def test_tuple_format_implicit_invalid():
     ]
 
     with pytest.raises(KeyError):
-        _train(train_data)
+        _train_tuples(train_data)
 
 
-def _train(train_data):
+def _train_tuples(train_data):
     nlp = English()
     ner = nlp.create_pipe("ner")
     ner.add_label("ORG")
     ner.add_label("LOC")
     nlp.add_pipe(ner)
 
+    train_examples = []
+    for t in train_data:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+
     optimizer = nlp.begin_training()
     for i in range(5):
         losses = {}
-        batches = minibatch(train_data, size=compounding(4.0, 32.0, 1.001))
+        batches = minibatch(train_examples, size=compounding(4.0, 32.0, 1.001))
         for batch in batches:
             nlp.update(batch, sgd=optimizer, losses=losses)
 
diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py
index e5555bbc7..7b4c29c5a 100644
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@@ -5,6 +5,7 @@ from spacy.tokens import Doc, Span
 from spacy.vocab import Vocab
 
 from .util import add_vecs_to_vocab, assert_docs_equal
+from ..gold import Example
 
 
 @pytest.fixture
@@ -23,26 +24,45 @@ def test_language_update(nlp):
     annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
     wrongkeyannots = {"LABEL": True}
     doc = Doc(nlp.vocab, words=text.split(" "))
-    # Update with text and dict
-    nlp.update((text, annots))
+    example = Example.from_dict(doc, annots)
+    nlp.update([example])
+
+    # Not allowed to call with just one Example
+    with pytest.raises(TypeError):
+        nlp.update(example)
+
+    # Update with text and dict: not supported anymore since v.3
+    with pytest.raises(TypeError):
+        nlp.update((text, annots))
     # Update with doc object and dict
-    nlp.update((doc, annots))
-    # Update badly
+    with pytest.raises(TypeError):
+        nlp.update((doc, annots))
+
+    # Create examples badly
     with pytest.raises(ValueError):
-        nlp.update((doc, None))
+        example = Example.from_dict(doc, None)
     with pytest.raises(KeyError):
-        nlp.update((text, wrongkeyannots))
+        example = Example.from_dict(doc, wrongkeyannots)
 
 
 def test_language_evaluate(nlp):
     text = "hello world"
     annots = {"doc_annotation": {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}}
     doc = Doc(nlp.vocab, words=text.split(" "))
-    # Evaluate with text and dict
-    nlp.evaluate([(text, annots)])
+    example = Example.from_dict(doc, annots)
+    nlp.evaluate([example])
+
+    # Not allowed to call with just one Example
+    with pytest.raises(TypeError):
+        nlp.evaluate(example)
+
+    # Evaluate with text and dict: not supported anymore since v.3
+    with pytest.raises(TypeError):
+        nlp.evaluate([(text, annots)])
     # Evaluate with doc object and dict
-    nlp.evaluate([(doc, annots)])
-    with pytest.raises(Exception):
+    with pytest.raises(TypeError):
+        nlp.evaluate([(doc, annots)])
+    with pytest.raises(TypeError):
         nlp.evaluate([text, annots])
 
 
@@ -56,8 +76,9 @@ def test_evaluate_no_pipe(nlp):
     text = "hello world"
     annots = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
     nlp = Language(Vocab())
+    doc = nlp(text)
     nlp.add_pipe(pipe)
-    nlp.evaluate([(text, annots)])
+    nlp.evaluate([Example.from_dict(doc, annots)])
 
 
 def vector_modification_pipe(doc):