diff --git a/examples/experiments/onto-joint/defaults.cfg b/examples/experiments/onto-joint/defaults.cfg
index f76336d84..337fe0379 100644
--- a/examples/experiments/onto-joint/defaults.cfg
+++ b/examples/experiments/onto-joint/defaults.cfg
@@ -9,7 +9,6 @@ max_length = 0
 limit = 0
 # Data augmentation
 orth_variant_level = 0.0
-noise_level = 0.0
 dropout = 0.1
 # Controls early-stopping. 0 or -1 mean unlimited.
 patience = 1600
diff --git a/examples/experiments/onto-joint/pretrain.cfg b/examples/experiments/onto-joint/pretrain.cfg
index 40885b6e8..83991f888 100644
--- a/examples/experiments/onto-joint/pretrain.cfg
+++ b/examples/experiments/onto-joint/pretrain.cfg
@@ -9,7 +9,6 @@ max_length = 0
 limit = 0
 # Data augmentation
 orth_variant_level = 0.0
-noise_level = 0.0
 dropout = 0.1
 # Controls early-stopping. 0 or -1 mean unlimited.
 patience = 1600
diff --git a/examples/experiments/ptb-joint-pos-dep/bilstm_tok2vec.cfg b/examples/experiments/ptb-joint-pos-dep/bilstm_tok2vec.cfg
index 905b5b4e0..f1b702a4e 100644
--- a/examples/experiments/ptb-joint-pos-dep/bilstm_tok2vec.cfg
+++ b/examples/experiments/ptb-joint-pos-dep/bilstm_tok2vec.cfg
@@ -6,7 +6,6 @@ init_tok2vec = null
 vectors = null
 max_epochs = 100
 orth_variant_level = 0.0
-noise_level = 0.0
 gold_preproc = true
 max_length = 0
 use_gpu = 0
diff --git a/examples/experiments/ptb-joint-pos-dep/defaults.cfg b/examples/experiments/ptb-joint-pos-dep/defaults.cfg
index 7383116e7..1c946ac60 100644
--- a/examples/experiments/ptb-joint-pos-dep/defaults.cfg
+++ b/examples/experiments/ptb-joint-pos-dep/defaults.cfg
@@ -6,7 +6,6 @@ init_tok2vec = null
 vectors = null
 max_epochs = 100
 orth_variant_level = 0.0
-noise_level = 0.0
 gold_preproc = true
 max_length = 0
 use_gpu = -1
diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index 2ffbe2d0c..6f09c6884 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -4,7 +4,7 @@ from .download import download  # noqa: F401
 from .info import info  # noqa: F401
 from .package import package  # noqa: F401
 from .profile import profile  # noqa: F401
-from .train_from_config import train_cli  # noqa: F401
+from .train import train_cli  # noqa: F401
 from .pretrain import pretrain  # noqa: F401
 from .debug_data import debug_data  # noqa: F401
 from .evaluate import evaluate  # noqa: F401
diff --git a/spacy/cli/train_from_config.py b/spacy/cli/train.py
similarity index 99%
rename from spacy/cli/train_from_config.py
rename to spacy/cli/train.py
index 3a4d28356..fb4347158 100644
--- a/spacy/cli/train_from_config.py
+++ b/spacy/cli/train.py
@@ -371,7 +371,6 @@ def create_train_batches(nlp, corpus, cfg):
         train_examples = list(
             corpus.train_dataset(
                 nlp,
-                noise_level=cfg["noise_level"], # I think this is deprecated?
                 orth_variant_level=cfg["orth_variant_level"],
                 gold_preproc=cfg["gold_preproc"],
                 max_length=cfg["max_length"],
diff --git a/spacy/gold/augment.py b/spacy/gold/augment.py
index a129793c8..dda51cda6 100644
--- a/spacy/gold/augment.py
+++ b/spacy/gold/augment.py
@@ -2,6 +2,15 @@ import random
 import itertools
 
 
+def make_orth_variants_example(nlp, example, orth_variant_level=0.0):  # TODO: naming
+    raw_text = example.text
+    orig_dict = example.to_dict()
+    variant_text, variant_token_annot = make_orth_variants(nlp, raw_text, orig_dict["token_annotation"], orth_variant_level)
+    doc = nlp.make_doc(variant_text)
+    orig_dict["token_annotation"] = variant_token_annot
+    return example.from_dict(doc, orig_dict)
+
+
 def make_orth_variants(nlp, raw_text, orig_token_dict, orth_variant_level=0.0):
     if random.random() >= orth_variant_level:
         return raw_text, orig_token_dict
@@ -98,23 +107,3 @@ def make_orth_variants(nlp, raw_text, orig_token_dict, orth_variant_level=0.0):
                 raw_idx += 1
         raw = variant_raw
     return raw, token_dict
-
-
-def add_noise(orig, noise_level):
-    if random.random() >= noise_level:
-        return orig
-    elif type(orig) == list:
-        corrupted = [_corrupt(word, noise_level) for word in orig]
-        corrupted = [w for w in corrupted if w]
-        return corrupted
-    else:
-        return "".join(_corrupt(c, noise_level) for c in orig)
-
-
-def _corrupt(c, noise_level):
-    if random.random() >= noise_level:
-        return c
-    elif c in [".", "'", "!", "?", ","]:
-        return "\n"
-    else:
-        return c.lower()
diff --git a/spacy/gold/corpus.py b/spacy/gold/corpus.py
index d55845fb8..c84f8355f 100644
--- a/spacy/gold/corpus.py
+++ b/spacy/gold/corpus.py
@@ -8,7 +8,7 @@ from ..tokens import Doc
 from .. import util
 from ..errors import Errors, AlignmentError
 from .gold_io import read_json_file, json_to_annotations
-from .augment import make_orth_variants, add_noise
+from .augment import make_orth_variants
 from .example import Example
 
 
@@ -148,7 +148,6 @@ class GoldCorpus(object):
         nlp,
         gold_preproc=False,
         max_length=None,
-        noise_level=0.0,
         orth_variant_level=0.0,
         ignore_misaligned=False,
     ):
@@ -160,7 +159,6 @@ class GoldCorpus(object):
             train_annotations,
             gold_preproc,
             max_length=max_length,
-            noise_level=noise_level,
             orth_variant_level=orth_variant_level,
             make_projective=True,
             ignore_misaligned=ignore_misaligned,
@@ -194,33 +192,31 @@ class GoldCorpus(object):
         annotations,
         gold_preproc,
         max_length=None,
-        noise_level=0.0,
         orth_variant_level=0.0,
         make_projective=False,
         ignore_misaligned=False,
     ):
         """ Setting gold_preproc will result in creating a doc per sentence """
         for eg_dict in annotations:
+            token_annot = eg_dict.get("token_annotation", {})
             if eg_dict["text"]:
-                example = Example.from_dict(
-                    nlp.make_doc(eg_dict["text"]),
-                    eg_dict
-                )
+                doc = nlp.make_doc(eg_dict["text"])
+            elif "words" in token_annot:
+                doc = Doc(nlp.vocab, words=token_annot["words"])
             else:
-                example = Example.from_dict(
-                    Doc(nlp.vocab, words=eg_dict["words"]),
-                    eg_dict
-                )
+                raise ValueError("Expecting either 'text' or token_annotation.words annotation")
+
             if gold_preproc:
-                # TODO: Data augmentation
+                variant_text, variant_token_annot = make_orth_variants(nlp, doc.text, token_annot, orth_variant_level)
+                doc = nlp.make_doc(variant_text)
+                eg_dict["token_annotation"] = variant_token_annot
+                example = Example.from_dict(doc, eg_dict)
                 examples = example.split_sents()
+
             else:
+                example = Example.from_dict(doc, eg_dict)
                 examples = [example]
+
             for eg in examples:
                 if (not max_length) or len(eg.predicted) < max_length:
-                    if ignore_misaligned:
-                        try:
-                            _ = eg._deprecated_get_gold()
-                        except AlignmentError:
-                            continue
                     yield eg
diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx
index 402228994..b5d1b1402 100644
--- a/spacy/gold/example.pyx
+++ b/spacy/gold/example.pyx
@@ -126,7 +126,7 @@ cdef class Example:
             "doc_annotation": {
                 "cats": dict(self.reference.cats),
                 "entities": biluo_tags_from_doc(self.reference),
-                "links": [], # TODO
+                "links": self._links_to_dict()
             },
             "token_annotation": {
                 "ids": [t.i+1 for t in self.reference],
@@ -141,6 +141,14 @@ cdef class Example:
             }
         }
 
+    def _links_to_dict(self):
+        links = {}
+        for ent in self.reference.ents:
+            if ent.kb_id_:
+                links[(ent.start_char, ent.end_char)] = {ent.kb_id_: 1.0}
+        return links
+
+
     def split_sents(self):
         """ Split the token annotations into multiple Examples based on
         sent_starts and return a list of the new Examples"""
diff --git a/spacy/language.py b/spacy/language.py
index c168afeea..b9a84e1bb 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -646,20 +646,6 @@ class Language(object):
             sgd(W, dW, key=key)
         return losses
 
-    def preprocess_gold(self, examples):
-        """Can be called before training to pre-process gold data. By default,
-        it handles nonprojectivity and adds missing tags to the tag map.
-
-        examples (iterable): `Example` objects.
-        YIELDS (tuple): `Example` objects.
-        """
-        # TODO: This is deprecated right?
-        for name, proc in self.pipeline:
-            if hasattr(proc, "preprocess_gold"):
-                examples = proc.preprocess_gold(examples)
-        for eg in examples:
-            yield eg
-
     def begin_training(self, get_examples=None, sgd=None, component_cfg=None, **cfg):
         """Allocate models, pre-process training data and acquire a trainer and
         optimizer. Used as a contextmanager.
diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx
index 6ba7ad396..1512955a5 100644
--- a/spacy/syntax/arc_eager.pyx
+++ b/spacy/syntax/arc_eager.pyx
@@ -459,9 +459,9 @@ cdef class ArcEager(TransitionSystem):
             actions[RIGHT][label] = 1
             actions[REDUCE][label] = 1
         for example in kwargs.get('gold_parses', []):
-            heads, labels = nonproj.projectivize(example.token_annotation.heads,
-                                                 example.token_annotation.deps)
-            for child, head, label in zip(example.token_annotation.ids, heads, labels):
+            heads, labels = nonproj.projectivize(example.get_aligned("HEAD"),
+                                                 example.get_aligned("DEP"))
+            for child, head, label in zip(example.get_aligned("ID"), heads, labels):
                 if label.upper() == 'ROOT' :
                     label = 'ROOT'
                 if head == child:
diff --git a/spacy/syntax/nonproj.pyx b/spacy/syntax/nonproj.pyx
index 5b1f57d2b..eded53fac 100644
--- a/spacy/syntax/nonproj.pyx
+++ b/spacy/syntax/nonproj.pyx
@@ -78,8 +78,8 @@ def is_decorated(label):
 def count_decorated_labels(gold_data):
     freqs = {}
     for example in gold_data:
-        proj_heads, deco_deps = projectivize(example.token_annotation.heads,
-                                             example.token_annotation.deps)
+        proj_heads, deco_deps = projectivize(example.get_aligned("HEAD"),
+                                             example.get_aligned("DEP"))
         # set the label to ROOT for each root dependent
         deco_deps = ['ROOT' if head == i else deco_deps[i]
                        for i, head in enumerate(proj_heads)]
diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py
index d98a93f2f..726492138 100644
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@@ -11,6 +11,7 @@ import pytest
 import srsly
 
 from .util import make_tempdir
+from ..gold.augment import make_orth_variants_example
 
 
 @pytest.fixture
@@ -200,13 +201,16 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
     words = ["I flew", "to", "San Francisco", "Valley", "."]
     spaces = [True, True, True, False, False]
     doc = Doc(en_vocab, words=words, spaces=spaces)
-    entities = [(len("I flew to "), len("I flew to San Francisco Valley"), "LOC")]
-    links = {(len("I flew to "), len("I flew to San Francisco Valley")): {"Q816843": 1.0}}
+    offset_start = len("I flew to ")
+    offset_end = len("I flew to San Francisco Valley")
+    entities = [(offset_start, offset_end, "LOC")]
+    links = {(offset_start, offset_end): {"Q816843": 1.0}}
     gold_words = ["I", "flew to", "San", "Francisco Valley", "."]
     example = Example.from_dict(doc, {"words": gold_words, "entities": entities, "links": links})
     assert example.get_aligned("ENT_IOB") == [2, 2, 3, 1, 2]
     assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "LOC", "LOC", ""]
     assert example.get_aligned("ENT_KB_ID", as_string=True) == ["", "", "Q816843", "Q816843", ""]
+    assert example.to_dict()["doc_annotation"]["links"][(offset_start, offset_end)] == {"Q816843": 1.0}
 
     # additional whitespace tokens in GoldParse words
     words, spaces = get_words_and_spaces(
@@ -384,8 +388,8 @@ def test_make_orth_variants(doc):
         goldcorpus = GoldCorpus(str(json_file), str(json_file))
 
         # due to randomness, test only that this runs with no errors for now
-        train_reloaded_example = next(goldcorpus.train_dataset(nlp, orth_variant_level=0.2))
-        train_goldparse = get_parses_from_example(train_reloaded_example)[0][1]
+        train_example = next(goldcorpus.train_dataset(nlp))
+        variant_example = make_orth_variants_example(nlp, train_example, orth_variant_level=0.2)
 
 
 @pytest.mark.parametrize(
@@ -494,18 +498,7 @@ def test_split_sents(merged_dict):
         Doc(nlp.vocab, words=merged_dict["words"], spaces=merged_dict["spaces"]),
         merged_dict
     )
-    assert len(get_parses_from_example(
-        example,
-        merge=False,
-        vocab=nlp.vocab,
-        make_projective=False)
-    ) == 2
-    assert len(get_parses_from_example(
-        example,
-        merge=True,
-        vocab=nlp.vocab,
-        make_projective=False
-    )) == 1
+    assert example.text == "Hi there everyone It is just me"
 
     split_examples = example.split_sents()
     assert len(split_examples) == 2