diff --git a/examples/experiments/onto-joint/pretrain.cfg b/examples/experiments/onto-joint/pretrain.cfg
index 87501fb16..f1de3eab9 100644
--- a/examples/experiments/onto-joint/pretrain.cfg
+++ b/examples/experiments/onto-joint/pretrain.cfg
@@ -45,12 +45,16 @@ eps = 1e-8
 learn_rate = 0.001
 
 [pretraining]
-max_epochs = 100
+max_epochs = 1000
+start_epoch = 0
 min_length = 5
 max_length = 500
 dropout = 0.2
 n_save_every = null
 batch_size = 3000
+seed = ${training:seed}
+use_pytorch_for_gpu_memory = ${training:use_pytorch_for_gpu_memory}
+init_tok2vec = null
 
 [pretraining.model]
 @architectures = "spacy.HashEmbedCNN.v1"
diff --git a/examples/experiments/ptb-joint-pos-dep/bilstm_tok2vec.cfg b/examples/experiments/ptb-joint-pos-dep/bilstm_tok2vec.cfg
index 52faad9ec..acbcc8d41 100644
--- a/examples/experiments/ptb-joint-pos-dep/bilstm_tok2vec.cfg
+++ b/examples/experiments/ptb-joint-pos-dep/bilstm_tok2vec.cfg
@@ -66,4 +66,4 @@ depth = 4
 embed_size = 2000
 subword_features = true
 maxout_pieces = 3
-dropout = null
\ No newline at end of file
+dropout = null
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index 0022a0d07..d6f4d484c 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -16,14 +16,15 @@ from ..tokens import Doc
 from ..attrs import ID, HEAD
 from .. import util
 from ..gold import Example
+from .deprecated_pretrain import _load_pretrained_tok2vec  # TODO
 
 
 @plac.annotations(
     # fmt: off
     texts_loc=("Path to JSONL file with raw texts to learn from, with text provided as the key 'text' or tokens as the key 'tokens'", "positional", None, str),
     vectors_model=("Name or path to spaCy model with vectors to learn from", "positional", None, str),
-    config_path=("Path to config file", "positional", None, Path),
     output_dir=("Directory to write models to on each epoch", "positional", None, Path),
+    config_path=("Path to config file", "positional", None, Path),
     use_gpu=("Use GPU", "option", "g", int),
     # fmt: on
 )
@@ -60,8 +61,8 @@ def pretrain(
 
     msg.info(f"Loading config from: {config_path}")
     config = util.load_config(config_path, create_objects=False)
-    util.fix_random_seed(config["training"]["seed"])
-    if config["training"]["use_pytorch_for_gpu_memory"]:
+    util.fix_random_seed(config["pretraining"]["seed"])
+    if config["pretraining"]["use_pytorch_for_gpu_memory"]:
         use_pytorch_for_gpu_memory()
 
     if output_dir.exists() and [p for p in output_dir.iterdir()]:
@@ -100,8 +101,33 @@ def pretrain(
     tok2vec = pretrain_config["model"]
     model = create_pretraining_model(nlp, tok2vec)
     optimizer = pretrain_config["optimizer"]
+    init_tok2vec = pretrain_config["init_tok2vec"]
+    epoch_start = pretrain_config["epoch_start"]
+
+    # Load in pretrained weights - TODO test
+    if init_tok2vec is not None:
+        components = _load_pretrained_tok2vec(nlp, init_tok2vec)
+        msg.text(f"Loaded pretrained tok2vec for: {components}")
+        # Parse the epoch number from the given weight file
+        model_name = re.search(r"model\d+\.bin", str(init_tok2vec))
+        if model_name:
+            # Default weight file name so read epoch_start from it by cutting off 'model' and '.bin'
+            epoch_start = int(model_name.group(0)[5:][:-4]) + 1
+        else:
+            if not epoch_start:
+                msg.fail(
+                    "You have to use the epoch_start setting when using a renamed weight file for init_tok2vec",
+                    exits=True,
+                )
+            elif epoch_start < 0:
+                msg.fail(
+                    f"The setting epoch_start has to be greater or equal to 0. {epoch_start} is invalid",
+                    exits=True,
+                )
+    else:
+        # Without 'init-tok2vec' the 'epoch_start' setting is ignored
+        epoch_start = 0
 
-    epoch_start = 0  # TODO
     tracker = ProgressTracker(frequency=10000)
     msg.divider(f"Pre-training tok2vec layer - starting at epoch {epoch_start}")
     row_settings = {"widths": (3, 10, 10, 6, 4), "aligns": ("r", "r", "r", "r", "r")}