diff --git a/bin/ud/ud_train.py b/bin/ud/ud_train.py
index ac5987aa4..11ad564ec 100644
--- a/bin/ud/ud_train.py
+++ b/bin/ud/ud_train.py
@@ -16,7 +16,7 @@ from bin.ud import conll17_ud_eval
 from spacy.tokens import Token, Doc
 from spacy.gold import Example
 from spacy.util import compounding, minibatch, minibatch_by_words
-from spacy.syntax.nonproj import projectivize
+from spacy.pipeline._parser_internals.nonproj import projectivize
 from spacy.matcher import Matcher
 from spacy import displacy
 from collections import defaultdict
diff --git a/examples/experiments/onto-joint/defaults.cfg b/examples/experiments/onto-joint/defaults.cfg
index 0e0d4d4c3..3ab3ddaba 100644
--- a/examples/experiments/onto-joint/defaults.cfg
+++ b/examples/experiments/onto-joint/defaults.cfg
@@ -1,37 +1,46 @@
-# Training hyper-parameters and additional features.
-[training]
-# Whether to train on sequences with 'gold standard' sentence boundaries
-# and tokens. If you set this to true, take care to ensure your run-time
-# data is passed in sentence-by-sentence via some prior preprocessing.
-gold_preproc = false
-# Limitations on training document length or number of examples.
-max_length = 5000
-limit = 0
-# Data augmentation
-orth_variant_level = 0.0
-dropout = 0.1
-# Controls early-stopping. 0 or -1 mean unlimited.
-patience = 1600
-max_epochs = 0
-max_steps = 20000
-eval_frequency = 200
-# Other settings
-seed = 0
-accumulate_gradient = 1
-use_pytorch_for_gpu_memory = false
-# Control how scores are printed and checkpoints are evaluated.
-eval_batch_size = 128
-score_weights = {"dep_las": 0.4, "ents_f": 0.4, "tag_acc": 0.2}
+[paths]
+train = ""
+dev = ""
+raw = null
 init_tok2vec = null
-discard_oversize = false
-batch_by = "words"
-raw_text = null
-tag_map = null
-vectors = null
-base_model = null
-morph_rules = null
 
-[training.batch_size]
+[system]
+seed = 0
+use_pytorch_for_gpu_memory = false
+
+[training]
+seed = ${system:seed}
+dropout = 0.1
+init_tok2vec = ${paths:init_tok2vec}
+vectors = null
+accumulate_gradient = 1
+max_steps = 0
+max_epochs = 0
+patience = 10000
+eval_frequency = 200
+score_weights = {"dep_las": 0.4, "ents_f": 0.4, "tag_acc": 0.2}
+frozen_components = []
+
+[training.train_corpus]
+@readers = "spacy.Corpus.v1"
+path = ${paths:train}
+gold_preproc = true
+max_length = 0
+limit = 0
+
+[training.dev_corpus]
+@readers = "spacy.Corpus.v1"
+path = ${paths:dev}
+gold_preproc = ${training.read_train:gold_preproc}
+max_length = 0
+limit = 0
+
+[training.batcher]
+@batchers = "batch_by_words.v1"
+discard_oversize = false
+tolerance = 0.2
+
+[training.batcher.size]
 @schedules = "compounding.v1"
 start = 100
 stop = 1000
diff --git a/examples/experiments/ptb-joint-pos-dep/defaults.cfg b/examples/experiments/ptb-joint-pos-dep/defaults.cfg
index eed76cb7b..fc471ac43 100644
--- a/examples/experiments/ptb-joint-pos-dep/defaults.cfg
+++ b/examples/experiments/ptb-joint-pos-dep/defaults.cfg
@@ -1,30 +1,45 @@
+[paths]
+train = ""
+dev = ""
+raw = null
+init_tok2vec = null
+
+[system]
+seed = 0
+use_pytorch_for_gpu_memory = false
+
 [training]
+seed = ${system:seed}
+dropout = 0.2
+init_tok2vec = ${paths:init_tok2vec}
+vectors = null
+accumulate_gradient = 1
 max_steps = 0
+max_epochs = 0
 patience = 10000
 eval_frequency = 200
-dropout = 0.2
-init_tok2vec = null
-vectors = null
-max_epochs = 100
-orth_variant_level = 0.0
+score_weights = {"dep_las": 0.8, "tag_acc": 0.2}
+
+[training.read_train]
+@readers = "spacy.Corpus.v1"
+path = ${paths:train}
 gold_preproc = true
 max_length = 0
-scores = ["tag_acc", "dep_uas", "dep_las", "speed"]
-score_weights = {"dep_las": 0.8, "tag_acc": 0.2}
 limit = 0
-seed = 0
-accumulate_gradient = 1
+
+[training.read_dev]
+@readers = "spacy.Corpus.v1"
+path = ${paths:dev}
+gold_preproc = ${training.read_train:gold_preproc}
+max_length = 0
+limit = 0
+
+[training.batcher]
+@batchers = "batch_by_words.v1"
 discard_oversize = false
-raw_text = null
-tag_map = null
-morph_rules = null
-base_model = null
+tolerance = 0.2
 
-eval_batch_size = 128
-use_pytorch_for_gpu_memory = false
-batch_by = "words"
-
-[training.batch_size]
+[training.batcher.size]
 @schedules = "compounding.v1"
 start = 100
 stop = 1000
diff --git a/examples/training/conllu.py b/examples/training/conllu.py
index ecc07ccf2..a398b0ae0 100644
--- a/examples/training/conllu.py
+++ b/examples/training/conllu.py
@@ -13,7 +13,7 @@ import spacy
 import spacy.util
 from spacy.tokens import Token, Doc
 from spacy.gold import Example
-from spacy.syntax.nonproj import projectivize
+from spacy.pipeline._parser_internals.nonproj import projectivize
 from collections import defaultdict
 from spacy.matcher import Matcher
 
diff --git a/examples/training/create_kb.py b/examples/training/create_kb.py
index 5b17bb59e..0c6e29226 100644
--- a/examples/training/create_kb.py
+++ b/examples/training/create_kb.py
@@ -48,7 +48,8 @@ def main(model, output_dir=None):
     # You can change the dimension of vectors in your KB by using an encoder that changes the dimensionality.
     # For simplicity, we'll just use the original vector dimension here instead.
     vectors_dim = nlp.vocab.vectors.shape[1]
-    kb = KnowledgeBase(vocab=nlp.vocab, entity_vector_length=vectors_dim)
+    kb = KnowledgeBase(entity_vector_length=vectors_dim)
+    kb.initialize(nlp.vocab)
 
     # set up the data
     entity_ids = []
@@ -95,7 +96,8 @@ def main(model, output_dir=None):
         print("Loading vocab from", vocab_path)
         print("Loading KB from", kb_path)
         vocab2 = Vocab().from_disk(vocab_path)
-        kb2 = KnowledgeBase(vocab=vocab2)
+        kb2 = KnowledgeBase(entity_vector_length=1)
+        kb.initialize(vocab2)
         kb2.load_bulk(kb_path)
         print()
         _print_kb(kb2)
diff --git a/pyproject.toml b/pyproject.toml
index 91f1464df..935b221d8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ requires = [
     "cymem>=2.0.2,<2.1.0",
     "preshed>=3.0.2,<3.1.0",
     "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.0.0a19,<8.0.0a30",
+    "thinc>=8.0.0a22,<8.0.0a30",
     "blis>=0.4.0,<0.5.0",
     "pytokenizations",
     "smart_open>=2.0.0,<3.0.0"
diff --git a/requirements.txt b/requirements.txt
index d0413825b..a082f4b6e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 # Our libraries
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.0.0a19,<8.0.0a30
+thinc>=8.0.0a22,<8.0.0a30
 blis>=0.4.0,<0.5.0
 ml_datasets>=0.1.1
 murmurhash>=0.28.0,<1.1.0
diff --git a/setup.cfg b/setup.cfg
index d2cb7c92a..249dc9827 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -34,13 +34,13 @@ setup_requires =
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
     murmurhash>=0.28.0,<1.1.0
-    thinc>=8.0.0a19,<8.0.0a30
+    thinc>=8.0.0a22,<8.0.0a30
 install_requires =
     # Our libraries
     murmurhash>=0.28.0,<1.1.0
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
-    thinc>=8.0.0a19,<8.0.0a30
+    thinc>=8.0.0a22,<8.0.0a30
     blis>=0.4.0,<0.5.0
     wasabi>=0.7.1,<1.1.0
     srsly>=2.1.0,<3.0.0
diff --git a/setup.py b/setup.py
index 6d962ab59..af4cd0ec6 100755
--- a/setup.py
+++ b/setup.py
@@ -31,6 +31,7 @@ MOD_NAMES = [
     "spacy.vocab",
     "spacy.attrs",
     "spacy.kb",
+    "spacy.ml.parser_model",
     "spacy.morphology",
     "spacy.pipeline.dep_parser",
     "spacy.pipeline.morphologizer",
@@ -40,14 +41,14 @@ MOD_NAMES = [
     "spacy.pipeline.sentencizer",
     "spacy.pipeline.senter",
     "spacy.pipeline.tagger",
-    "spacy.syntax.stateclass",
-    "spacy.syntax._state",
+    "spacy.pipeline.transition_parser",
+    "spacy.pipeline._parser_internals.arc_eager",
+    "spacy.pipeline._parser_internals.ner",
+    "spacy.pipeline._parser_internals.nonproj",
+    "spacy.pipeline._parser_internals._state",
+    "spacy.pipeline._parser_internals.stateclass",
+    "spacy.pipeline._parser_internals.transition_system",
     "spacy.tokenizer",
-    "spacy.syntax.nn_parser",
-    "spacy.syntax._parser_model",
-    "spacy.syntax.nonproj",
-    "spacy.syntax.transition_system",
-    "spacy.syntax.arc_eager",
     "spacy.gold.gold_io",
     "spacy.tokens.doc",
     "spacy.tokens.span",
@@ -57,7 +58,6 @@ MOD_NAMES = [
     "spacy.matcher.matcher",
     "spacy.matcher.phrasematcher",
     "spacy.matcher.dependencymatcher",
-    "spacy.syntax.ner",
     "spacy.symbols",
     "spacy.vectors",
 ]
diff --git a/spacy/__init__.py b/spacy/__init__.py
index da2b23a20..73e828936 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -8,6 +8,7 @@ warnings.filterwarnings("ignore", message="numpy.ufunc size changed")  # noqa
 
 # These are imported as part of the API
 from thinc.api import prefer_gpu, require_gpu  # noqa: F401
+from thinc.api import Config
 
 from . import pipeline  # noqa: F401
 from .cli.info import info  # noqa: F401
@@ -26,17 +27,17 @@ if sys.maxunicode == 65535:
 def load(
     name: Union[str, Path],
     disable: Iterable[str] = tuple(),
-    component_cfg: Dict[str, Dict[str, Any]] = util.SimpleFrozenDict(),
+    config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
 ) -> Language:
     """Load a spaCy model from an installed package or a local path.
 
     name (str): Package name or model path.
     disable (Iterable[str]): Names of pipeline components to disable.
-    component_cfg (Dict[str, dict]): Config overrides for pipeline components,
-        keyed by component names.
+    config (Dict[str, Any] / Config): Config overrides as nested dict or dict
+        keyed by section values in dot notation.
     RETURNS (Language): The loaded nlp object.
     """
-    return util.load_model(name, disable=disable, component_cfg=component_cfg)
+    return util.load_model(name, disable=disable, config=config)
 
 
 def blank(name: str, **overrides) -> Language:
diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index 72fac05a6..bc47ffdef 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -15,6 +15,7 @@ from .debug_model import debug_model  # noqa: F401
 from .evaluate import evaluate  # noqa: F401
 from .convert import convert  # noqa: F401
 from .init_model import init_model  # noqa: F401
+from .init_config import init_config  # noqa: F401
 from .validate import validate  # noqa: F401
 from .project.clone import project_clone  # noqa: F401
 from .project.assets import project_assets  # noqa: F401
diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index f277988f8..93ec9f31e 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -6,7 +6,7 @@ import hashlib
 import typer
 from typer.main import get_command
 from contextlib import contextmanager
-from thinc.config import ConfigValidationError
+from thinc.config import Config, ConfigValidationError
 from configparser import InterpolationError
 import sys
 
@@ -31,6 +31,7 @@ DEBUG_HELP = """Suite of helpful commands for debugging and profiling. Includes
 commands to check and validate your config files, training and evaluation data,
 and custom model implementations.
 """
+INIT_HELP = """Commands for initializing configs and models."""
 
 # Wrappers for Typer's annotations. Initially created to set defaults and to
 # keep the names short, but not needed at the moment.
@@ -40,9 +41,11 @@ Opt = typer.Option
 app = typer.Typer(name=NAME, help=HELP)
 project_cli = typer.Typer(name="project", help=PROJECT_HELP, no_args_is_help=True)
 debug_cli = typer.Typer(name="debug", help=DEBUG_HELP, no_args_is_help=True)
+init_cli = typer.Typer(name="init", help=INIT_HELP, no_args_is_help=True)
 
 app.add_typer(project_cli)
 app.add_typer(debug_cli)
+app.add_typer(init_cli)
 
 
 def setup_cli() -> None:
@@ -172,16 +175,34 @@ def get_checksum(path: Union[Path, str]) -> str:
 
 
 @contextmanager
-def show_validation_error(title: str = "Config validation error"):
+def show_validation_error(
+    file_path: Optional[Union[str, Path]] = None,
+    *,
+    title: str = "Config validation error",
+    hint_init: bool = True,
+):
     """Helper to show custom config validation errors on the CLI.
 
+    file_path (str / Path): Optional file path of config file, used in hints.
     title (str): Title of the custom formatted error.
+    hint_init (bool): Show hint about filling config.
     """
     try:
         yield
     except (ConfigValidationError, InterpolationError) as e:
         msg.fail(title, spaced=True)
-        print(str(e).replace("Config validation error", "").strip())
+        # TODO: This is kinda hacky and we should probably provide a better
+        # helper for this in Thinc
+        err_text = str(e).replace("Config validation error", "").strip()
+        print(err_text)
+        if hint_init and "field required" in err_text:
+            config_path = file_path if file_path is not None else "config.cfg"
+            msg.text(
+                "If your config contains missing values, you can run the 'init "
+                "config' command to fill in all the defaults, if possible:",
+                spaced=True,
+            )
+            print(f"{COMMAND} init config {config_path} --base {config_path}\n")
         sys.exit(1)
 
 
@@ -196,3 +217,15 @@ def import_code(code_path: Optional[Union[Path, str]]) -> None:
             import_file("python_code", code_path)
         except Exception as e:
             msg.fail(f"Couldn't load Python code: {code_path}", e, exits=1)
+
+
+def get_sourced_components(config: Union[Dict[str, Any], Config]) -> List[str]:
+    """RETURNS (List[str]): All sourced components in the original config,
+        e.g. {"source": "en_core_web_sm"}. If the config contains a key
+        "factory", we assume it refers to a component factory.
+    """
+    return [
+        name
+        for name, cfg in config.get("components", {}).items()
+        if "factory" not in cfg and "source" in cfg
+    ]
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 1ffceeca1..6c8c85e30 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -8,9 +8,9 @@ import typer
 from thinc.api import Config
 
 from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
-from ._util import import_code, debug_cli
+from ._util import import_code, debug_cli, get_sourced_components
 from ..gold import Corpus, Example
-from ..syntax import nonproj
+from ..pipeline._parser_internals import nonproj
 from ..language import Language
 from .. import util
 
@@ -33,7 +33,6 @@ def debug_config_cli(
     ctx: typer.Context,  # This is only used to read additional arguments
     config_path: Path = Arg(..., help="Path to config file", exists=True),
     code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    output_path: Optional[Path] = Opt(None, "--output", "-o", help="Output path for filled config or '-' for standard output", allow_dash=True),
     auto_fill: bool = Opt(False, "--auto-fill", "-F", help="Whether or not to auto-fill the config with built-in defaults if possible"),
     diff: bool = Opt(False, "--diff", "-D", help="Show a visual diff if config was auto-filled")
     # fmt: on
@@ -49,15 +48,12 @@ def debug_config_cli(
     """
     overrides = parse_config_overrides(ctx.args)
     import_code(code_path)
-    with show_validation_error():
-        config = Config().from_disk(config_path)
+    with show_validation_error(config_path):
+        config = Config().from_disk(config_path, overrides=overrides)
         try:
-            nlp, _ = util.load_model_from_config(
-                config, overrides=overrides, auto_fill=auto_fill
-            )
+            nlp, _ = util.load_model_from_config(config, auto_fill=auto_fill)
         except ValueError as e:
             msg.fail(str(e), exits=1)
-    is_stdout = output_path is not None and str(output_path) == "-"
     if auto_fill:
         orig_config = config.to_str()
         filled_config = nlp.config.to_str()
@@ -68,12 +64,7 @@ def debug_config_cli(
             if diff:
                 print(diff_strings(config.to_str(), nlp.config.to_str()))
     else:
-        msg.good("Original config is valid", show=not is_stdout)
-    if is_stdout:
-        print(nlp.config.to_str())
-    elif output_path is not None:
-        nlp.config.to_disk(output_path)
-        msg.good(f"Saved updated config to {output_path}")
+        msg.good("Original config is valid")
 
 
 @debug_cli.command(
@@ -142,12 +133,13 @@ def debug_data(
         msg.fail("Development data not found", dev_path, exits=1)
     if not config_path.exists():
         msg.fail("Config file not found", config_path, exists=1)
-    with show_validation_error():
-        cfg = Config().from_disk(config_path)
-        nlp, config = util.load_model_from_config(cfg, overrides=config_overrides)
-    # TODO: handle base model
-    lang = config["nlp"]["lang"]
-    base_model = config["training"]["base_model"]
+    with show_validation_error(config_path):
+        cfg = Config().from_disk(config_path, overrides=config_overrides)
+        nlp, config = util.load_model_from_config(cfg)
+    # Use original config here, not resolved version
+    sourced_components = get_sourced_components(cfg)
+    frozen_components = config["training"]["frozen_components"]
+    resume_components = [p for p in sourced_components if p not in frozen_components]
     pipeline = nlp.pipe_names
     factory_names = [nlp.get_pipe_meta(pipe).factory for pipe in nlp.pipe_names]
     tag_map_path = util.ensure_path(config["training"]["tag_map"])
@@ -169,13 +161,12 @@ def debug_data(
     loading_train_error_message = ""
     loading_dev_error_message = ""
     with msg.loading("Loading corpus..."):
-        corpus = Corpus(train_path, dev_path)
         try:
-            train_dataset = list(corpus.train_dataset(nlp))
+            train_dataset = list(Corpus(train_path)(nlp))
         except ValueError as e:
             loading_train_error_message = f"Training data cannot be loaded: {e}"
         try:
-            dev_dataset = list(corpus.dev_dataset(nlp))
+            dev_dataset = list(Corpus(dev_path)(nlp))
         except ValueError as e:
             loading_dev_error_message = f"Development data cannot be loaded: {e}"
     if loading_train_error_message or loading_dev_error_message:
@@ -195,13 +186,15 @@ def debug_data(
 
     train_texts = gold_train_data["texts"]
     dev_texts = gold_dev_data["texts"]
+    frozen_components = config["training"]["frozen_components"]
 
     msg.divider("Training stats")
+    msg.text(f"Language: {config['nlp']['lang']}")
     msg.text(f"Training pipeline: {', '.join(pipeline)}")
-    if base_model:
-        msg.text(f"Starting with base model '{base_model}'")
-    else:
-        msg.text(f"Starting with blank model '{lang}'")
+    if resume_components:
+        msg.text(f"Components from other models: {', '.join(resume_components)}")
+    if frozen_components:
+        msg.text(f"Frozen components: {', '.join(frozen_components)}")
     msg.text(f"{len(train_dataset)} training docs")
     msg.text(f"{len(dev_dataset)} evaluation docs")
 
@@ -212,7 +205,9 @@ def debug_data(
         msg.warn(f"{overlap} training examples also in evaluation data")
     else:
         msg.good("No overlap between training and evaluation data")
-    if not base_model and len(train_dataset) < BLANK_MODEL_THRESHOLD:
+    # TODO: make this feedback more fine-grained and report on updated
+    # components vs. blank components
+    if not resume_components and len(train_dataset) < BLANK_MODEL_THRESHOLD:
         text = (
             f"Low number of examples to train from a blank model ({len(train_dataset)})"
         )
diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 88e060238..cc6cb98ea 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -2,13 +2,11 @@ from typing import Dict, Any, Optional
 from pathlib import Path
 from wasabi import msg
 from thinc.api import require_gpu, fix_random_seed, set_dropout_rate, Adam, Config
-from thinc.api import Model
+from thinc.api import Model, data_validation
 import typer
 
 from ._util import Arg, Opt, debug_cli, show_validation_error, parse_config_overrides
 from .. import util
-from ..lang.en import English
-from ..util import dot_to_object
 
 
 @debug_cli.command("model")
@@ -16,7 +14,7 @@ def debug_model_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
     config_path: Path = Arg(..., help="Path to config file", exists=True),
-    section: str = Arg(..., help="Section that defines the model to be analysed"),
+    component: str = Arg(..., help="Name of the pipeline component of which the model should be analysed"),
     layers: str = Opt("", "--layers", "-l", help="Comma-separated names of layer IDs to print"),
     dimensions: bool = Opt(False, "--dimensions", "-DIM", help="Show dimensions"),
     parameters: bool = Opt(False, "--parameters", "-PAR", help="Show parameters"),
@@ -25,7 +23,7 @@ def debug_model_cli(
     P0: bool = Opt(False, "--print-step0", "-P0", help="Print model before training"),
     P1: bool = Opt(False, "--print-step1", "-P1", help="Print model after initialization"),
     P2: bool = Opt(False, "--print-step2", "-P2", help="Print model after training"),
-    P3: bool = Opt(True, "--print-step3", "-P3", help="Print final predictions"),
+    P3: bool = Opt(False, "--print-step3", "-P3", help="Print final predictions"),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
     # fmt: on
 ):
@@ -50,10 +48,10 @@ def debug_model_cli(
         "print_prediction": P3,
     }
     config_overrides = parse_config_overrides(ctx.args)
-    cfg = Config().from_disk(config_path)
-    with show_validation_error():
+    with show_validation_error(config_path):
+        cfg = Config().from_disk(config_path, overrides=config_overrides)
         try:
-            _, config = util.load_model_from_config(cfg, overrides=config_overrides)
+            nlp, config = util.load_model_from_config(cfg)
         except ValueError as e:
             msg.fail(str(e), exits=1)
     seed = config["pretraining"]["seed"]
@@ -61,12 +59,12 @@ def debug_model_cli(
         msg.info(f"Fixing random seed: {seed}")
         fix_random_seed(seed)
 
-    component = dot_to_object(config, section)
-    if hasattr(component, "model"):
-        model = component.model
+    pipe = nlp.get_pipe(component)
+    if hasattr(pipe, "model"):
+        model = pipe.model
     else:
         msg.fail(
-            f"The section '{section}' does not specify an object that holds a Model.",
+            f"The component '{component}' does not specify an object that holds a Model.",
             exits=1,
         )
     debug_model(model, print_settings=print_settings)
@@ -84,15 +82,17 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
     # STEP 0: Printing before training
     msg.info(f"Analysing model with ID {model.id}")
     if print_settings.get("print_before_training"):
-        msg.info(f"Before training:")
+        msg.divider(f"STEP 0 - before training")
         _print_model(model, print_settings)
 
     # STEP 1: Initializing the model and printing again
     Y = _get_output(model.ops.xp)
     _set_output_dim(nO=Y.shape[-1], model=model)
-    model.initialize(X=_get_docs(), Y=Y)
+    # The output vector might differ from the official type of the output layer
+    with data_validation(False):
+        model.initialize(X=_get_docs(), Y=Y)
     if print_settings.get("print_after_init"):
-        msg.info(f"After initialization:")
+        msg.divider(f"STEP 1 - after initialization")
         _print_model(model, print_settings)
 
     # STEP 2: Updating the model and printing again
@@ -104,13 +104,14 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
         get_dX(dY)
         model.finish_update(optimizer)
     if print_settings.get("print_after_training"):
-        msg.info(f"After training:")
+        msg.divider(f"STEP 2 - after training")
         _print_model(model, print_settings)
 
     # STEP 3: the final prediction
     prediction = model.predict(_get_docs())
     if print_settings.get("print_prediction"):
-        msg.info(f"Prediction:", str(prediction))
+        msg.divider(f"STEP 3 - prediction")
+        msg.info(str(prediction))
 
 
 def get_gradient(model, Y):
@@ -127,8 +128,8 @@ def _sentences():
     ]
 
 
-def _get_docs():
-    nlp = English()
+def _get_docs(lang: str = "en"):
+    nlp = util.get_lang_class(lang)()
     return list(nlp.pipe(_sentences()))
 
 
diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index cdbd7514a..e55e6e40e 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -7,23 +7,7 @@ import typer
 from ._util import app, Arg, Opt
 from .. import about
 from ..util import is_package, get_base_version, run_command
-
-# These are the old shortcuts we previously supported in spacy download. As of
-# v3, shortcuts are deprecated so we're not expecting to add anything to this
-# list. It only exists to show users warnings.
-OLD_SHORTCUTS = {
-    "en": "en_core_web_sm",
-    "de": "de_core_news_sm",
-    "es": "es_core_news_sm",
-    "pt": "pt_core_news_sm",
-    "fr": "fr_core_news_sm",
-    "it": "it_core_news_sm",
-    "nl": "nl_core_news_sm",
-    "el": "el_core_news_sm",
-    "nb": "nb_core_news_sm",
-    "lt": "lt_core_news_sm",
-    "xx": "xx_ent_wiki_sm",
-}
+from ..errors import OLD_MODEL_SHORTCUTS
 
 
 @app.command(
@@ -66,12 +50,12 @@ def download(model: str, direct: bool = False, *pip_args) -> None:
         download_model(dl_tpl.format(m=model_name, v=version), pip_args)
     else:
         model_name = model
-        if model in OLD_SHORTCUTS:
+        if model in OLD_MODEL_SHORTCUTS:
             msg.warn(
-                f"As of spaCy v3.0, shortcuts like '{model}' are deprecated. "
-                f"Please use the full model name '{OLD_SHORTCUTS[model]}' instead."
+                f"As of spaCy v3.0, shortcuts like '{model}' are deprecated. Please"
+                f"use the full model name '{OLD_MODEL_SHORTCUTS[model]}' instead."
             )
-            model_name = OLD_SHORTCUTS[model]
+            model_name = OLD_MODEL_SHORTCUTS[model]
         compatibility = get_compatibility()
         version = get_version(model_name, compatibility)
         download_model(dl_tpl.format(m=model_name, v=version), pip_args)
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index ee1be57a3..5b434ee32 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -1,5 +1,4 @@
 from typing import Optional, List, Dict
-from timeit import default_timer as timer
 from wasabi import Printer
 from pathlib import Path
 import re
@@ -64,9 +63,9 @@ def evaluate(
         msg.fail("Evaluation data not found", data_path, exits=1)
     if displacy_path and not displacy_path.exists():
         msg.fail("Visualization output directory not found", displacy_path, exits=1)
-    corpus = Corpus(data_path, data_path)
+    corpus = Corpus(data_path, gold_preproc=gold_preproc)
     nlp = util.load_model(model)
-    dev_dataset = list(corpus.dev_dataset(nlp, gold_preproc=gold_preproc))
+    dev_dataset = list(corpus(nlp))
     scores = nlp.evaluate(dev_dataset, verbose=False)
     metrics = {
         "TOK": "token_acc",
diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
new file mode 100644
index 000000000..01664ee40
--- /dev/null
+++ b/spacy/cli/init_config.py
@@ -0,0 +1,81 @@
+from typing import Optional, List
+from pathlib import Path
+from thinc.api import Config
+from wasabi import msg
+
+from ..util import load_model_from_config, get_lang_class, load_model
+from ._util import init_cli, Arg, Opt, show_validation_error
+
+
+@init_cli.command("config")
+def init_config_cli(
+    # fmt: off
+    output_path: Path = Arg("-", help="Output path or - for stdout", allow_dash=True),
+    base_path: Optional[Path] = Opt(None, "--base", "-b", help="Optional base config to fill", exists=True, dir_okay=False),
+    model: Optional[str] = Opt(None, "--model", "-m", help="Optional model to copy config from"),
+    lang: Optional[str] = Opt(None, "--lang", "-l", help="Optional language code for blank config"),
+    pipeline: Optional[str] = Opt(None, "--pipeline", "-p", help="Optional pipeline components to use")
+    # fmt: on
+):
+    """Generate a starter config.cfg for training."""
+    validate_cli_args(base_path, model, lang)
+    is_stdout = str(output_path) == "-"
+    pipeline = [p.strip() for p in pipeline.split(",")] if pipeline else []
+    cfg = init_config(output_path, base_path, model, lang, pipeline, silent=is_stdout)
+    if is_stdout:
+        print(cfg.to_str())
+    else:
+        cfg.to_disk(output_path)
+        msg.good("Saved config", output_path)
+
+
+def init_config(
+    output_path: Path,
+    config_path: Optional[Path],
+    model: Optional[str],
+    lang: Optional[str],
+    pipeline: Optional[List[str]],
+    silent: bool = False,
+) -> Config:
+    if config_path is not None:
+        msg.info("Generating config from base config", show=not silent)
+        with show_validation_error(config_path, hint_init=False):
+            config = Config().from_disk(config_path)
+            try:
+                nlp, _ = load_model_from_config(config, auto_fill=True)
+            except ValueError as e:
+                msg.fail(str(e), exits=1)
+        return nlp.config
+    if model is not None:
+        ext = f" with pipeline {pipeline}" if pipeline else ""
+        msg.info(f"Generating config from model {model}{ext}", show=not silent)
+        nlp = load_model(model)
+        for existing_pipe_name in nlp.pipe_names:
+            if existing_pipe_name not in pipeline:
+                nlp.remove_pipe(existing_pipe_name)
+        for pipe_name in pipeline:
+            if pipe_name not in nlp.pipe_names:
+                nlp.add_pipe(pipe_name)
+        return nlp.config
+    if lang is not None:
+        ext = f" with pipeline {pipeline}" if pipeline else ""
+        msg.info(f"Generating config for language '{lang}'{ext}", show=not silent)
+        nlp = get_lang_class(lang)()
+        for pipe_name in pipeline:
+            nlp.add_pipe(pipe_name)
+        return nlp.config
+
+
+def validate_cli_args(
+    config_path: Optional[Path], model: Optional[str], lang: Optional[str]
+) -> None:
+    args = {"--base": config_path, "--model": model, "--lang": lang}
+    if sum(arg is not None for arg in args.values()) != 1:
+        existing = " ".join(f"{a} {v}" for a, v in args.items() if v is not None)
+        msg.fail(
+            "The init config command expects only one of the following arguments: "
+            "--base (base config to fill and update), --lang (language code to "
+            "use for blank config) or --model (base model to copy config from).",
+            f"Got: {existing if existing else 'no arguments'}",
+            exits=1,
+        )
diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py
index e1dca2395..4fdd2bbbc 100644
--- a/spacy/cli/init_model.py
+++ b/spacy/cli/init_model.py
@@ -10,14 +10,14 @@ import gzip
 import zipfile
 import srsly
 import warnings
-from wasabi import Printer
+from wasabi import msg, Printer
+import typer
 
-from ._util import app, Arg, Opt
+from ._util import app, init_cli, Arg, Opt
 from ..vectors import Vectors
 from ..errors import Errors, Warnings
 from ..language import Language
 from ..util import ensure_path, get_lang_class, load_model, OOV_RANK
-from ..lookups import Lookups
 
 try:
     import ftfy
@@ -28,9 +28,15 @@ except ImportError:
 DEFAULT_OOV_PROB = -20
 
 
-@app.command("init-model")
+@init_cli.command("model")
+@app.command(
+    "init-model",
+    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+    hidden=True,  # hide this from main CLI help but still allow it to work with warning
+)
 def init_model_cli(
     # fmt: off
+    ctx: typer.Context,  # This is only used to read additional arguments
     lang: str = Arg(..., help="Model language"),
     output_dir: Path = Arg(..., help="Model output directory"),
     freqs_loc: Optional[Path] = Arg(None, help="Location of words frequencies file", exists=True),
@@ -48,6 +54,12 @@ def init_model_cli(
     Create a new model from raw data. If vectors are provided in Word2Vec format,
     they can be either a .txt or zipped as a .zip or .tar.gz.
     """
+    if ctx.command.name == "init-model":
+        msg.warn(
+            "The init-model command is now available via the 'init model' "
+            "subcommand (without the hyphen). You can run python -m spacy init "
+            "--help for an overview of the other available initialization commands."
+        )
     init_model(
         lang,
         output_dir,
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index 7d1a217be..7202ccacf 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -87,9 +87,9 @@ def pretrain(
     else:
         msg.info("Using CPU")
     msg.info(f"Loading config from: {config_path}")
-    config = Config().from_disk(config_path)
-    with show_validation_error():
-        nlp, config = util.load_model_from_config(config, overrides=config_overrides)
+    with show_validation_error(config_path):
+        config = Config().from_disk(config_path, overrides=config_overrides)
+        nlp, config = util.load_model_from_config(config)
     # TODO: validate that [pretraining] block exists
     if not output_dir.exists():
         output_dir.mkdir()
diff --git a/spacy/cli/project/assets.py b/spacy/cli/project/assets.py
index e42935e2f..3be784e04 100644
--- a/spacy/cli/project/assets.py
+++ b/spacy/cli/project/assets.py
@@ -1,7 +1,6 @@
 from typing import Optional
 from pathlib import Path
 from wasabi import msg
-import tqdm
 import re
 import shutil
 import requests
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 9cc36f77b..c5c6e7252 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -11,10 +11,10 @@ import random
 import typer
 
 from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
-from ._util import import_code
-from ..gold import Corpus, Example
+from ._util import import_code, get_sourced_components
 from ..language import Language
 from .. import util
+from ..gold.example import Example
 from ..errors import Errors
 
 
@@ -28,8 +28,6 @@ from ..ml import models  # noqa: F401
 def train_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    train_path: Path = Arg(..., help="Location of training data", exists=True),
-    dev_path: Path = Arg(..., help="Location of development data", exists=True),
     config_path: Path = Arg(..., help="Path to config file", exists=True),
     output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store model in"),
     code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
@@ -51,12 +49,11 @@ def train_cli(
     referenced in the config.
     """
     util.set_env_log(verbose)
-    verify_cli_args(train_path, dev_path, config_path, output_path)
+    verify_cli_args(config_path, output_path)
     overrides = parse_config_overrides(ctx.args)
     import_code(code_path)
     train(
         config_path,
-        {"train": train_path, "dev": dev_path},
         output_path=output_path,
         config_overrides=overrides,
         use_gpu=use_gpu,
@@ -66,8 +63,6 @@ def train_cli(
 
 def train(
     config_path: Path,
-    data_paths: Dict[str, Path],
-    raw_text: Optional[Path] = None,
     output_path: Optional[Path] = None,
     config_overrides: Dict[str, Any] = {},
     use_gpu: int = -1,
@@ -79,41 +74,37 @@ def train(
     else:
         msg.info("Using CPU")
     msg.info(f"Loading config and nlp from: {config_path}")
-    config = Config().from_disk(config_path)
+    with show_validation_error(config_path):
+        config = Config().from_disk(config_path, overrides=config_overrides)
     if config.get("training", {}).get("seed") is not None:
         fix_random_seed(config["training"]["seed"])
-    with show_validation_error():
-        nlp, config = util.load_model_from_config(config, overrides=config_overrides)
-    if config["training"]["base_model"]:
-        # TODO: do something to check base_nlp against regular nlp described in config?
-        # If everything matches it will look something like:
-        # base_nlp = util.load_model(config["training"]["base_model"])
-        # nlp = base_nlp
-        raise NotImplementedError("base_model not supported yet.")
+    # Use original config here before it's resolved to functions
+    sourced_components = get_sourced_components(config)
+    with show_validation_error(config_path):
+        nlp, config = util.load_model_from_config(config)
     if config["training"]["vectors"] is not None:
         util.load_vectors_into_model(nlp, config["training"]["vectors"])
     verify_config(nlp)
     raw_text, tag_map, morph_rules, weights_data = load_from_paths(config)
-    if config["training"]["use_pytorch_for_gpu_memory"]:
+    if config.get("system", {}).get("use_pytorch_for_gpu_memory"):
         # It feels kind of weird to not have a default for this.
         use_pytorch_for_gpu_memory()
-    training = config["training"]
-    optimizer = training["optimizer"]
-    limit = training["limit"]
-    corpus = Corpus(data_paths["train"], data_paths["dev"], limit=limit)
-    if resume_training:
-        msg.info("Resuming training")
-        nlp.resume_training()
-    else:
-        msg.info(f"Initializing the nlp pipeline: {nlp.pipe_names}")
-        train_examples = corpus.train_dataset(
-            nlp,
-            shuffle=False,
-            gold_preproc=training["gold_preproc"],
-            max_length=training["max_length"],
-        )
-        train_examples = list(train_examples)
-        nlp.begin_training(lambda: train_examples)
+    T_cfg = config["training"]
+    optimizer = T_cfg["optimizer"]
+    train_corpus = T_cfg["train_corpus"]
+    dev_corpus = T_cfg["dev_corpus"]
+    batcher = T_cfg["batcher"]
+    # Components that shouldn't be updated during training
+    frozen_components = T_cfg["frozen_components"]
+    # Sourced components that require resume_training
+    resume_components = [p for p in sourced_components if p not in frozen_components]
+    msg.info(f"Pipeline: {nlp.pipe_names}")
+    if resume_components:
+        with nlp.select_pipes(enable=resume_components):
+            msg.info(f"Resuming training for: {resume_components}")
+            nlp.resume_training()
+    with nlp.select_pipes(disable=[*frozen_components, *resume_components]):
+        nlp.begin_training(lambda: train_corpus(nlp))
 
     if tag_map:
         # Replace tag map with provided mapping
@@ -139,38 +130,36 @@ def train(
             msg.fail(err, exits=1)
         tok2vec.from_bytes(weights_data)
 
-    msg.info("Loading training corpus")
-    train_batches = create_train_batches(nlp, corpus, training)
-    evaluate = create_evaluation_callback(nlp, optimizer, corpus, training)
-
     # Create iterator, which yields out info after each optimization step.
     msg.info("Start training")
+    score_weights = T_cfg["score_weights"]
     training_step_iterator = train_while_improving(
         nlp,
         optimizer,
-        train_batches,
-        evaluate,
-        dropout=training["dropout"],
-        accumulate_gradient=training["accumulate_gradient"],
-        patience=training["patience"],
-        max_steps=training["max_steps"],
-        eval_frequency=training["eval_frequency"],
-        raw_text=raw_text,
+        create_train_batches(train_corpus(nlp), batcher, T_cfg["max_epochs"]),
+        create_evaluation_callback(nlp, dev_corpus, score_weights),
+        dropout=T_cfg["dropout"],
+        accumulate_gradient=T_cfg["accumulate_gradient"],
+        patience=T_cfg["patience"],
+        max_steps=T_cfg["max_steps"],
+        eval_frequency=T_cfg["eval_frequency"],
+        raw_text=None,
+        exclude=frozen_components,
     )
     msg.info(f"Training. Initial learn rate: {optimizer.learn_rate}")
-    print_row = setup_printer(training, nlp)
+    print_row = setup_printer(T_cfg, nlp)
 
     try:
-        progress = tqdm.tqdm(total=training["eval_frequency"], leave=False)
+        progress = tqdm.tqdm(total=T_cfg["eval_frequency"], leave=False)
         for batch, info, is_best_checkpoint in training_step_iterator:
             progress.update(1)
             if is_best_checkpoint is not None:
                 progress.close()
                 print_row(info)
                 if is_best_checkpoint and output_path is not None:
-                    update_meta(training, nlp, info)
+                    update_meta(T_cfg, nlp, info)
                     nlp.to_disk(output_path / "model-best")
-                progress = tqdm.tqdm(total=training["eval_frequency"], leave=False)
+                progress = tqdm.tqdm(total=T_cfg["eval_frequency"], leave=False)
     except Exception as e:
         if output_path is not None:
             msg.warn(
@@ -191,72 +180,32 @@ def train(
             msg.good(f"Saved model to output directory {final_model_path}")
 
 
-def create_train_batches(
-    nlp: Language, corpus: Corpus, cfg: Union[Config, Dict[str, Any]]
-):
-    max_epochs = cfg["max_epochs"]
-    train_examples = list(
-        corpus.train_dataset(
-            nlp,
-            shuffle=True,
-            gold_preproc=cfg["gold_preproc"],
-            max_length=cfg["max_length"],
-        )
-    )
-    epoch = 0
-    batch_strategy = cfg["batch_by"]
-    while True:
-        if len(train_examples) == 0:
-            raise ValueError(Errors.E988)
-        epoch += 1
-        if batch_strategy == "padded":
-            batches = util.minibatch_by_padded_size(
-                train_examples,
-                size=cfg["batch_size"],
-                buffer=256,
-                discard_oversize=cfg["discard_oversize"],
-            )
-        elif batch_strategy == "words":
-            batches = util.minibatch_by_words(
-                train_examples,
-                size=cfg["batch_size"],
-                discard_oversize=cfg["discard_oversize"],
-            )
-        else:
-            batches = util.minibatch(train_examples, size=cfg["batch_size"])
-        # make sure the minibatch_by_words result is not empty, or we'll have an infinite training loop
-        try:
-            first = next(batches)
-            yield epoch, first
-        except StopIteration:
-            raise ValueError(Errors.E986)
-        for batch in batches:
+def create_train_batches(iterator, batcher, max_epochs: int):
+    epoch = 1
+    examples = []
+    # Stream the first epoch, so we start training faster and support
+    # infinite streams.
+    for batch in batcher(iterator):
+        yield epoch, batch
+        if max_epochs != 1:
+            examples.extend(batch)
+    if not examples:
+        # Raise error if no data
+        raise ValueError(Errors.E986)
+    while epoch != max_epochs:
+        random.shuffle(examples)
+        for batch in batcher(examples):
             yield epoch, batch
-        if max_epochs >= 1 and epoch >= max_epochs:
-            break
-        random.shuffle(train_examples)
+        epoch += 1
 
 
 def create_evaluation_callback(
-    nlp: Language,
-    optimizer: Optimizer,
-    corpus: Corpus,
-    cfg: Union[Config, Dict[str, Any]],
+    nlp: Language, dev_corpus: Callable, weights: Dict[str, float],
 ) -> Callable[[], Tuple[float, Dict[str, float]]]:
     def evaluate() -> Tuple[float, Dict[str, float]]:
-        dev_examples = corpus.dev_dataset(
-            nlp, gold_preproc=cfg["gold_preproc"]
-        )
-        dev_examples = list(dev_examples)
-        n_words = sum(len(ex.predicted) for ex in dev_examples)
-        batch_size = cfg["eval_batch_size"]
-        if optimizer.averages:
-            with nlp.use_params(optimizer.averages):
-                scores = nlp.evaluate(dev_examples, batch_size=batch_size)
-        else:
-            scores = nlp.evaluate(dev_examples, batch_size=batch_size)
+        dev_examples = list(dev_corpus(nlp))
+        scores = nlp.evaluate(dev_examples)
         # Calculate a weighted sum based on score_weights for the main score
-        weights = cfg["score_weights"]
         try:
             weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights)
         except KeyError as e:
@@ -280,6 +229,7 @@ def train_while_improving(
     patience: int,
     max_steps: int,
     raw_text: List[Dict[str, str]],
+    exclude: List[str],
 ):
     """Train until an evaluation stops improving. Works as a generator,
     with each iteration yielding a tuple `(batch, info, is_best_checkpoint)`,
@@ -325,8 +275,6 @@ def train_while_improving(
         dropouts = dropout
     results = []
     losses = {}
-    to_enable = [name for name, proc in nlp.pipeline if hasattr(proc, "model")]
-
     if raw_text:
         random.shuffle(raw_text)
         raw_examples = [
@@ -336,20 +284,26 @@ def train_while_improving(
 
     for step, (epoch, batch) in enumerate(train_data):
         dropout = next(dropouts)
-        with nlp.select_pipes(enable=to_enable):
-            for subbatch in subdivide_batch(batch, accumulate_gradient):
-                nlp.update(subbatch, drop=dropout, losses=losses, sgd=False)
-                if raw_text:
-                    # If raw text is available, perform 'rehearsal' updates,
-                    # which use unlabelled data to reduce overfitting.
-                    raw_batch = list(next(raw_batches))
-                    nlp.rehearse(raw_batch, sgd=optimizer, losses=losses)
-            for name, proc in nlp.pipeline:
-                if hasattr(proc, "model"):
-                    proc.model.finish_update(optimizer)
+        for subbatch in subdivide_batch(batch, accumulate_gradient):
+            nlp.update(
+                subbatch, drop=dropout, losses=losses, sgd=False, exclude=exclude
+            )
+            if raw_text:
+                # If raw text is available, perform 'rehearsal' updates,
+                # which use unlabelled data to reduce overfitting.
+                raw_batch = list(next(raw_batches))
+                nlp.rehearse(raw_batch, sgd=optimizer, losses=losses, exclude=exclude)
+        # TODO: refactor this so we don't have to run it separately in here
+        for name, proc in nlp.pipeline:
+            if name not in exclude and hasattr(proc, "model"):
+                proc.model.finish_update(optimizer)
         optimizer.step_schedules()
         if not (step % eval_frequency):
-            score, other_scores = evaluate()
+            if optimizer.averages:
+                with nlp.use_params(optimizer.averages):
+                    score, other_scores = evaluate()
+            else:
+                score, other_scores = evaluate()
             results.append((score, step))
             is_best_checkpoint = score == max(results)[0]
         else:
@@ -460,17 +414,7 @@ def load_from_paths(
             msg.fail("Can't find raw text", raw_text, exits=1)
         raw_text = list(srsly.read_jsonl(config["training"]["raw_text"]))
     tag_map = {}
-    tag_map_path = util.ensure_path(config["training"]["tag_map"])
-    if tag_map_path is not None:
-        if not tag_map_path.exists():
-            msg.fail("Can't find tag map path", tag_map_path, exits=1)
-        tag_map = srsly.read_json(config["training"]["tag_map"])
     morph_rules = {}
-    morph_rules_path = util.ensure_path(config["training"]["morph_rules"])
-    if morph_rules_path is not None:
-        if not morph_rules_path.exists():
-            msg.fail("Can't find tag map path", morph_rules_path, exits=1)
-        morph_rules = srsly.read_json(config["training"]["morph_rules"])
     weights_data = None
     init_tok2vec = util.ensure_path(config["training"]["init_tok2vec"])
     if init_tok2vec is not None:
@@ -481,19 +425,10 @@ def load_from_paths(
     return raw_text, tag_map, morph_rules, weights_data
 
 
-def verify_cli_args(
-    train_path: Path,
-    dev_path: Path,
-    config_path: Path,
-    output_path: Optional[Path] = None,
-) -> None:
+def verify_cli_args(config_path: Path, output_path: Optional[Path] = None,) -> None:
     # Make sure all files and paths exists if they are needed
     if not config_path or not config_path.exists():
         msg.fail("Config file not found", config_path, exits=1)
-    if not train_path or not train_path.exists():
-        msg.fail("Training data not found", train_path, exits=1)
-    if not dev_path or not dev_path.exists():
-        msg.fail("Development data not found", dev_path, exits=1)
     if output_path is not None:
         if not output_path.exists():
             output_path.mkdir()
diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py
index 0580d34c5..e6ba284df 100644
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@@ -13,8 +13,9 @@ from ..util import get_package_path, get_model_meta, is_compatible_version
 @app.command("validate")
 def validate_cli():
     """
-    Validate that the currently installed version of spaCy is compatible
-    with the installed models. Should be run after `pip install -U spacy`.
+    Validate the currently installed models and spaCy version. Checks if the
+    installed models are compatible and shows upgrade instructions if available.
+    Should be run after `pip install -U spacy`.
     """
     validate()
 
diff --git a/spacy/default_config.cfg b/spacy/default_config.cfg
index fead996ba..353924280 100644
--- a/spacy/default_config.cfg
+++ b/spacy/default_config.cfg
@@ -1,7 +1,20 @@
+[paths]
+train = ""
+dev = ""
+raw = null
+init_tok2vec = null
+
+[system]
+seed = 0
+use_pytorch_for_gpu_memory = false
+
 [nlp]
 lang = null
 pipeline = []
 load_vocab_data = true
+before_creation = null
+after_creation = null
+after_pipeline_creation = null
 
 [nlp.tokenizer]
 @tokenizers = "spacy.Tokenizer.v1"
@@ -13,38 +26,57 @@ load_vocab_data = true
 
 # Training hyper-parameters and additional features.
 [training]
-# Whether to train on sequences with 'gold standard' sentence boundaries
-# and tokens. If you set this to true, take care to ensure your run-time
-# data is passed in sentence-by-sentence via some prior preprocessing.
-gold_preproc = false
-# Limitations on training document length or number of examples.
-max_length = 5000
-limit = 0
-# Data augmentation
-orth_variant_level = 0.0
+seed = ${system:seed}
 dropout = 0.1
+accumulate_gradient = 1
+# Extra resources for transfer-learning or pseudo-rehearsal
+init_tok2vec = ${paths:init_tok2vec}
+raw_text = ${paths:raw}
+vectors = null
 # Controls early-stopping. 0 or -1 mean unlimited.
 patience = 1600
 max_epochs = 0
 max_steps = 20000
 eval_frequency = 200
-eval_batch_size = 128
-# Other settings
-seed = 0
-accumulate_gradient = 1
-use_pytorch_for_gpu_memory = false
 # Control how scores are printed and checkpoints are evaluated.
 score_weights = {}
-# These settings are invalid for the transformer models.
-init_tok2vec = null
+# Names of pipeline components that shouldn't be updated during training
+frozen_components = []
+
+[training.train_corpus]
+@readers = "spacy.Corpus.v1"
+path = ${paths:train}
+# Whether to train on sequences with 'gold standard' sentence boundaries
+# and tokens. If you set this to true, take care to ensure your run-time
+# data is passed in sentence-by-sentence via some prior preprocessing.
+gold_preproc = false
+# Limitations on training document length
+max_length = 2000
+# Limitation on number of training examples
+limit = 0
+
+[training.dev_corpus]
+@readers = "spacy.Corpus.v1"
+path = ${paths:dev}
+# Whether to train on sequences with 'gold standard' sentence boundaries
+# and tokens. If you set this to true, take care to ensure your run-time
+# data is passed in sentence-by-sentence via some prior preprocessing.
+gold_preproc = false
+# Limitations on training document length
+max_length = 2000
+# Limitation on number of training examples
+limit = 0
+
+[training.batcher]
+@batchers = "batch_by_words.v1"
 discard_oversize = false
-raw_text = null
-tag_map = null
-morph_rules = null
-base_model = null
-vectors = null
-batch_by = "words"
-batch_size = 1000
+tolerance = 0.2
+
+[training.batcher.size]
+@schedules = "compounding.v1"
+start = 100
+stop = 1000
+compound = 1.001
 
 [training.optimizer]
 @optimizers = "Adam.v1"
@@ -69,8 +101,8 @@ max_length = 500
 dropout = 0.2
 n_save_every = null
 batch_size = 3000
-seed = ${training:seed}
-use_pytorch_for_gpu_memory = ${training:use_pytorch_for_gpu_memory}
+seed = ${system:seed}
+use_pytorch_for_gpu_memory = ${system:use_pytorch_for_gpu_memory}
 tok2vec_model = "components.tok2vec.model"
 
 [pretraining.objective]
diff --git a/spacy/errors.py b/spacy/errors.py
index 3fe53d6db..5c443ccad 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -63,8 +63,6 @@ class Warnings:
             "have the spacy-lookups-data package installed.")
     W024 = ("Entity '{entity}' - Alias '{alias}' combination already exists in "
             "the Knowledge Base.")
-    W025 = ("'{name}' requires '{attr}' to be assigned, but none of the "
-            "previous components in the pipeline declare that they assign it.")
     W026 = ("Unable to set all sentence boundaries from dependency parses.")
     W027 = ("Found a large training file of {size} bytes. Note that it may "
             "be more efficient to split your training data into multiple "
@@ -376,7 +374,8 @@ class Errors:
     E138 = ("Invalid JSONL format for raw text '{text}'. Make sure the input "
             "includes either the `text` or `tokens` key. For more info, see "
             "the docs:\nhttps://spacy.io/api/cli#pretrain-jsonl")
-    E139 = ("Knowledge Base for component '{name}' is empty.")
+    E139 = ("Knowledge Base for component '{name}' is empty. Use the methods "
+            "kb.add_entity and kb.add_alias to add entries.")
     E140 = ("The list of entities, prior probabilities and entity vectors "
             "should be of equal length.")
     E141 = ("Entity vectors should be of length {required} instead of the "
@@ -483,10 +482,31 @@ class Errors:
     E199 = ("Unable to merge 0-length span at doc[{start}:{end}].")
 
     # TODO: fix numbering after merging develop into master
+    E941 = ("Can't find model '{name}'. It looks like you're trying to load a "
+            "model from a shortcut, which is deprecated as of spaCy v3.0. To "
+            "load the model, use its full name instead:\n\n"
+            "nlp = spacy.load(\"{full}\")\n\nFor more details on the available "
+            "models, see the models directory: https://spacy.io/models. If you "
+            "want to create a blank model, use spacy.blank: "
+            "nlp = spacy.blank(\"{name}\")")
+    E942 = ("Executing after_{name} callback failed. Expected the function to "
+            "return an initialized nlp object but got: {value}. Maybe "
+            "you forgot to return the modified object in your function?")
+    E943 = ("Executing before_creation callback failed. Expected the function to "
+            "return an uninitialized Language subclass but got: {value}. Maybe "
+            "you forgot to return the modified object in your function or "
+            "returned the initialized nlp object instead?")
+    E944 = ("Can't copy pipeline component '{name}' from source model '{model}': "
+            "not found in pipeline. Available components: {opts}")
+    E945 = ("Can't copy pipeline component '{name}' from source. Expected loaded "
+            "nlp object, but got: {source}")
+    E946 = ("The Vocab for the knowledge base is not initialized. Did you forget to "
+            "call kb.initialize()?")
     E947 = ("Matcher.add received invalid 'greedy' argument: expected "
             "a string value from {expected} but got: '{arg}'")
     E948 = ("Matcher.add received invalid 'patterns' argument: expected "
             "a List, but got: {arg_type}")
+    E949 = ("Can only create an alignment when the texts are the same.")
     E952 = ("The section '{name}' is not a valid section in the provided config.")
     E953 = ("Mismatched IDs received by the Tok2Vec listener: {id1} vs. {id2}")
     E954 = ("The Tok2Vec listener did not receive a valid input.")
@@ -569,11 +589,13 @@ class Errors:
             "into {values}, but found {value}.")
     E983 = ("Invalid key for '{dict}': {key}. Available keys: "
             "{keys}")
-    E984 = ("Invalid component config for '{name}': no 'factory' key "
-            "specifying the registered function used to initialize the "
-            "component. For example, factory = \"ner\" will use the 'ner' "
-            "factory and all other settings in the block will be passed "
-            "to it as arguments.\n\n{config}")
+    E984 = ("Invalid component config for '{name}': component block needs either "
+            "a key 'factory' specifying the registered function used to "
+            "initialize the component, or a key 'source' key specifying a "
+            "spaCy model to copy the component from. For example, factory = "
+            "\"ner\" will use the 'ner' factory and all other settings in the "
+            "block will be passed to it as arguments. Alternatively, source = "
+            "\"en_core_web_sm\" will copy the component from that model.\n\n{config}")
     E985 = ("Can't load model from config file: no 'nlp' section found.\n\n{config}")
     E986 = ("Could not create any training batches: check your input. "
             "Perhaps discard_oversize should be set to False ?")
@@ -608,6 +630,9 @@ class Errors:
              "initializing the pipeline:\n"
              'cfg = {"tokenizer": {"segmenter": "pkuseg", "pkuseg_model": name_or_path}}\n'
              'nlp = Chinese(config=cfg)')
+    E1001 = ("Target token outside of matched span for match with tokens "
+             "'{span}' and offset '{index}' matched by patterns '{patterns}'.")
+    E1002 = ("Span index out of range.")
 
 
 @add_codes
@@ -617,6 +642,15 @@ class TempErrors:
             "issue tracker: http://github.com/explosion/spaCy/issues")
 
 
+# Deprecated model shortcuts, only used in errors and warnings
+OLD_MODEL_SHORTCUTS = {
+    "en": "en_core_web_sm", "de": "de_core_news_sm", "es": "es_core_news_sm",
+    "pt": "pt_core_news_sm", "fr": "fr_core_news_sm", "it": "it_core_news_sm",
+    "nl": "nl_core_news_sm", "el": "el_core_news_sm", "nb": "nb_core_news_sm",
+    "lt": "lt_core_news_sm", "xx": "xx_ent_wiki_sm"
+}
+
+
 # fmt: on
 
 
diff --git a/spacy/gold/__init__.py b/spacy/gold/__init__.py
index c8b5fc44d..142c6b3a7 100644
--- a/spacy/gold/__init__.py
+++ b/spacy/gold/__init__.py
@@ -1,11 +1,8 @@
-from .corpus import Corpus
-from .example import Example
-from .align import Alignment
-
-from .iob_utils import iob_to_biluo, biluo_to_iob
-from .iob_utils import biluo_tags_from_offsets, offsets_from_biluo_tags
-from .iob_utils import spans_from_biluo_tags
-from .iob_utils import tags_to_entities
-
-from .gold_io import docs_to_json
-from .gold_io import read_json_file
+from .corpus import Corpus  # noqa: F401
+from .example import Example  # noqa: F401
+from .align import Alignment  # noqa: F401
+from .iob_utils import iob_to_biluo, biluo_to_iob  # noqa: F401
+from .iob_utils import biluo_tags_from_offsets, offsets_from_biluo_tags  # noqa: F401
+from .iob_utils import spans_from_biluo_tags, tags_to_entities  # noqa: F401
+from .gold_io import docs_to_json, read_json_file  # noqa: F401
+from .batchers import minibatch_by_padded_size, minibatch_by_words  # noqa: F401
diff --git a/spacy/gold/align.py b/spacy/gold/align.py
index af70ee5b7..e8f17a667 100644
--- a/spacy/gold/align.py
+++ b/spacy/gold/align.py
@@ -4,6 +4,8 @@ from thinc.types import Ragged
 from dataclasses import dataclass
 import tokenizations
 
+from ..errors import Errors
+
 
 @dataclass
 class Alignment:
@@ -18,6 +20,8 @@ class Alignment:
 
     @classmethod
     def from_strings(cls, A: List[str], B: List[str]) -> "Alignment":
+        if "".join(A).replace(" ", "").lower() != "".join(B).replace(" ", "").lower():
+            raise ValueError(Errors.E949)
         x2y, y2x = tokenizations.get_alignments(A, B)
         return Alignment.from_indices(x2y=x2y, y2x=y2x)
 
diff --git a/spacy/gold/batchers.py b/spacy/gold/batchers.py
new file mode 100644
index 000000000..57c6b4b3a
--- /dev/null
+++ b/spacy/gold/batchers.py
@@ -0,0 +1,171 @@
+from typing import Union, Iterator, Iterable, Sequence, TypeVar, List, Callable
+from typing import Optional, Any
+from functools import partial
+import itertools
+
+from ..util import registry, minibatch
+
+
+Sizing = Union[Iterable[int], int]
+ItemT = TypeVar("ItemT")
+BatcherT = Callable[[Iterable[ItemT]], Iterable[List[ItemT]]]
+
+
+@registry.batchers("batch_by_padded.v1")
+def configure_minibatch_by_padded_size(
+    *,
+    size: Sizing,
+    buffer: int,
+    discard_oversize: bool,
+    get_length: Optional[Callable[[ItemT], int]] = None
+) -> BatcherT:
+    # Avoid displacing optional values from the underlying function.
+    optionals = {"get_length": get_length} if get_length is not None else {}
+    return partial(
+        minibatch_by_padded_size,
+        size=size,
+        buffer=buffer,
+        discard_oversize=discard_oversize,
+        **optionals
+    )
+
+
+@registry.batchers("batch_by_words.v1")
+def configure_minibatch_by_words(
+    *,
+    size: Sizing,
+    tolerance: float,
+    discard_oversize: bool,
+    get_length: Optional[Callable[[ItemT], int]] = None
+) -> BatcherT:
+    optionals = {"get_length": get_length} if get_length is not None else {}
+    return partial(
+        minibatch_by_words, size=size, discard_oversize=discard_oversize, **optionals
+    )
+
+
+@registry.batchers("batch_by_sequence.v1")
+def configure_minibatch(
+    size: Sizing, get_length: Optional[Callable[[ItemT], int]] = None
+) -> BatcherT:
+    optionals = {"get_length": get_length} if get_length is not None else {}
+    return partial(minibatch, size=size, **optionals)
+
+
+def minibatch_by_padded_size(
+    docs: Iterator["Doc"],
+    size: Sizing,
+    buffer: int = 256,
+    discard_oversize: bool = False,
+    get_length: Callable = len,
+) -> Iterator[Iterator["Doc"]]:
+    if isinstance(size, int):
+        size_ = itertools.repeat(size)
+    else:
+        size_ = size
+    for outer_batch in minibatch(docs, size=buffer):
+        outer_batch = list(outer_batch)
+        target_size = next(size_)
+        for indices in _batch_by_length(outer_batch, target_size, get_length):
+            subbatch = [outer_batch[i] for i in indices]
+            padded_size = max(len(seq) for seq in subbatch) * len(subbatch)
+            if discard_oversize and padded_size >= target_size:
+                pass
+            else:
+                yield subbatch
+
+
+def minibatch_by_words(
+    docs, size, tolerance=0.2, discard_oversize=False, get_length=len
+):
+    """Create minibatches of roughly a given number of words. If any examples
+    are longer than the specified batch length, they will appear in a batch by
+    themselves, or be discarded if discard_oversize=True.
+    The argument 'docs' can be a list of strings, Docs or Examples.
+    """
+    if isinstance(size, int):
+        size_ = itertools.repeat(size)
+    elif isinstance(size, List):
+        size_ = iter(size)
+    else:
+        size_ = size
+    target_size = next(size_)
+    tol_size = target_size * tolerance
+    batch = []
+    overflow = []
+    batch_size = 0
+    overflow_size = 0
+    for doc in docs:
+        n_words = get_length(doc)
+        # if the current example exceeds the maximum batch size, it is returned separately
+        # but only if discard_oversize=False.
+        if n_words > target_size + tol_size:
+            if not discard_oversize:
+                yield [doc]
+        # add the example to the current batch if there's no overflow yet and it still fits
+        elif overflow_size == 0 and (batch_size + n_words) <= target_size:
+            batch.append(doc)
+            batch_size += n_words
+        # add the example to the overflow buffer if it fits in the tolerance margin
+        elif (batch_size + overflow_size + n_words) <= (target_size + tol_size):
+            overflow.append(doc)
+            overflow_size += n_words
+        # yield the previous batch and start a new one. The new one gets the overflow examples.
+        else:
+            if batch:
+                yield batch
+            target_size = next(size_)
+            tol_size = target_size * tolerance
+            batch = overflow
+            batch_size = overflow_size
+            overflow = []
+            overflow_size = 0
+            # this example still fits
+            if (batch_size + n_words) <= target_size:
+                batch.append(doc)
+                batch_size += n_words
+            # this example fits in overflow
+            elif (batch_size + n_words) <= (target_size + tol_size):
+                overflow.append(doc)
+                overflow_size += n_words
+            # this example does not fit with the previous overflow: start another new batch
+            else:
+                if batch:
+                    yield batch
+                target_size = next(size_)
+                tol_size = target_size * tolerance
+                batch = [doc]
+                batch_size = n_words
+    batch.extend(overflow)
+    if batch:
+        yield batch
+
+
+def _batch_by_length(
+    seqs: Sequence[Any], max_words: int, get_length=len
+) -> List[List[Any]]:
+    """Given a list of sequences, return a batched list of indices into the
+    list, where the batches are grouped by length, in descending order.
+
+    Batches may be at most max_words in size, defined as max sequence length * size.
+    """
+    # Use negative index so we can get sort by position ascending.
+    lengths_indices = [(get_length(seq), i) for i, seq in enumerate(seqs)]
+    lengths_indices.sort()
+    batches = []
+    batch = []
+    for length, i in lengths_indices:
+        if not batch:
+            batch.append(i)
+        elif length * (len(batch) + 1) <= max_words:
+            batch.append(i)
+        else:
+            batches.append(batch)
+            batch = [i]
+    if batch:
+        batches.append(batch)
+    # Check lengths match
+    assert sum(len(b) for b in batches) == len(seqs)
+    batches = [list(sorted(batch)) for batch in batches]
+    batches.reverse()
+    return batches
diff --git a/spacy/gold/converters/__init__.py b/spacy/gold/converters/__init__.py
index 63d52ad9d..15f025a08 100644
--- a/spacy/gold/converters/__init__.py
+++ b/spacy/gold/converters/__init__.py
@@ -1,4 +1,4 @@
 from .iob2docs import iob2docs  # noqa: F401
 from .conll_ner2docs import conll_ner2docs  # noqa: F401
-from .json2docs import json2docs
+from .json2docs import json2docs  # noqa: F401
 from .conllu2docs import conllu2docs  # noqa: F401
diff --git a/spacy/gold/corpus.py b/spacy/gold/corpus.py
index d23f70bee..4a65d8885 100644
--- a/spacy/gold/corpus.py
+++ b/spacy/gold/corpus.py
@@ -1,6 +1,5 @@
-from typing import Union, List, Iterable, Iterator, TYPE_CHECKING
+from typing import Union, List, Iterable, Iterator, TYPE_CHECKING, Callable
 from pathlib import Path
-import random
 
 from .. import util
 from .example import Example
@@ -12,26 +11,43 @@ if TYPE_CHECKING:
     from ..language import Language  # noqa: F401
 
 
+@util.registry.readers("spacy.Corpus.v1")
+def create_docbin_reader(
+    path: Path, gold_preproc: bool, max_length: int = 0, limit: int = 0
+) -> Callable[["Language"], Iterable[Example]]:
+    return Corpus(path, gold_preproc=gold_preproc, max_length=max_length, limit=limit)
+
+
 class Corpus:
-    """An annotated corpus, reading train and dev datasets from
-    the DocBin (.spacy) format.
+    """Iterate Example objects from a file or directory of DocBin (.spacy)
+    formated data files.
+
+    path (Path): The directory or filename to read from.
+    gold_preproc (bool): Whether to set up the Example object with gold-standard
+        sentences and tokens for the predictions. Gold preprocessing helps
+        the annotations align to the tokenization, and may result in sequences
+        of more consistent length. However, it may reduce run-time accuracy due
+        to train/test skew. Defaults to False.
+    max_length (int): Maximum document length. Longer documents will be
+        split into sentences, if sentence boundaries are available. Defaults to
+        0, which indicates no limit.
+    limit (int): Limit corpus to a subset of examples, e.g. for debugging.
+        Defaults to 0, which indicates no limit.
 
     DOCS: https://spacy.io/api/corpus
     """
 
     def __init__(
-        self, train_loc: Union[str, Path], dev_loc: Union[str, Path], limit: int = 0
+        self,
+        path,
+        *,
+        limit: int = 0,
+        gold_preproc: bool = False,
+        max_length: bool = False,
     ) -> None:
-        """Create a Corpus.
-
-        train (str / Path): File or directory of training data.
-        dev (str / Path): File or directory of development data.
-        limit (int): Max. number of examples returned.
-
-        DOCS: https://spacy.io/api/corpus#init
-        """
-        self.train_loc = train_loc
-        self.dev_loc = dev_loc
+        self.path = util.ensure_path(path)
+        self.gold_preproc = gold_preproc
+        self.max_length = max_length
         self.limit = limit
 
     @staticmethod
@@ -54,6 +70,21 @@ class Corpus:
                 locs.append(path)
         return locs
 
+    def __call__(self, nlp: "Language") -> Iterator[Example]:
+        """Yield examples from the data.
+
+        nlp (Language): The current nlp object.
+        YIELDS (Example): The examples.
+
+        DOCS: https://spacy.io/api/corpus#call
+        """
+        ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.path))
+        if self.gold_preproc:
+            examples = self.make_examples_gold_preproc(nlp, ref_docs)
+        else:
+            examples = self.make_examples(nlp, ref_docs, self.max_length)
+        yield from examples
+
     def _make_example(
         self, nlp: "Language", reference: Doc, gold_preproc: bool
     ) -> Example:
@@ -114,68 +145,3 @@ class Corpus:
                         i += 1
                         if self.limit >= 1 and i >= self.limit:
                             break
-
-    def count_train(self, nlp: "Language") -> int:
-        """Returns count of words in train examples.
-
-        nlp (Language): The current nlp. object.
-        RETURNS (int): The word count.
-
-        DOCS: https://spacy.io/api/corpus#count_train
-        """
-        n = 0
-        i = 0
-        for example in self.train_dataset(nlp):
-            n += len(example.predicted)
-            if self.limit >= 0 and i >= self.limit:
-                break
-            i += 1
-        return n
-
-    def train_dataset(
-        self,
-        nlp: "Language",
-        *,
-        shuffle: bool = True,
-        gold_preproc: bool = False,
-        max_length: int = 0
-    ) -> Iterator[Example]:
-        """Yield examples from the training data.
-
-        nlp (Language): The current nlp object.
-        shuffle (bool): Whether to shuffle the examples.
-        gold_preproc (bool): Whether to train on gold-standard sentences and tokens.
-        max_length (int): Maximum document length. Longer documents will be
-            split into sentences, if sentence boundaries are available. 0 for
-            no limit.
-        YIELDS (Example): The examples.
-
-        DOCS: https://spacy.io/api/corpus#train_dataset
-        """
-        ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.train_loc))
-        if gold_preproc:
-            examples = self.make_examples_gold_preproc(nlp, ref_docs)
-        else:
-            examples = self.make_examples(nlp, ref_docs, max_length)
-        if shuffle:
-            examples = list(examples)
-            random.shuffle(examples)
-        yield from examples
-
-    def dev_dataset(
-        self, nlp: "Language", *, gold_preproc: bool = False
-    ) -> Iterator[Example]:
-        """Yield examples from the development data.
-
-        nlp (Language): The current nlp object.
-        gold_preproc (bool): Whether to train on gold-standard sentences and tokens.
-        YIELDS (Example): The examples.
-
-        DOCS: https://spacy.io/api/corpus#dev_dataset
-        """
-        ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.dev_loc))
-        if gold_preproc:
-            examples = self.make_examples_gold_preproc(nlp, ref_docs)
-        else:
-            examples = self.make_examples(nlp, ref_docs, max_length=0)
-        yield from examples
diff --git a/spacy/gold/example.pxd b/spacy/gold/example.pxd
index 1f63b12d0..e06e36287 100644
--- a/spacy/gold/example.pxd
+++ b/spacy/gold/example.pxd
@@ -4,4 +4,6 @@ from ..tokens.doc cimport Doc
 cdef class Example:
     cdef readonly Doc x
     cdef readonly Doc y
-    cdef readonly object _alignment
+    cdef readonly object _cached_alignment
+    cdef readonly object _cached_words_x
+    cdef readonly object _cached_words_y
diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx
index 9101cefce..f90d98603 100644
--- a/spacy/gold/example.pyx
+++ b/spacy/gold/example.pyx
@@ -10,7 +10,7 @@ from .align import Alignment
 from .iob_utils import biluo_to_iob, biluo_tags_from_offsets, biluo_tags_from_doc
 from .iob_utils import spans_from_biluo_tags
 from ..errors import Errors, Warnings
-from ..syntax import nonproj
+from ..pipeline._parser_internals import nonproj
 
 
 cpdef Doc annotations2doc(vocab, tok_annot, doc_annot):
@@ -32,9 +32,9 @@ cdef class Example:
             raise TypeError(Errors.E972.format(arg="predicted"))
         if reference is None:
             raise TypeError(Errors.E972.format(arg="reference"))
-        self.x = predicted
-        self.y = reference
-        self._alignment = alignment
+        self.predicted = predicted
+        self.reference = reference
+        self._cached_alignment = alignment
 
     def __len__(self):
         return len(self.predicted)
@@ -45,7 +45,8 @@ cdef class Example:
 
         def __set__(self, doc):
             self.x = doc
-            self._alignment = None
+            self._cached_alignment = None
+            self._cached_words_x = [t.text for t in doc]
 
     property reference:
         def __get__(self):
@@ -53,7 +54,8 @@ cdef class Example:
 
         def __set__(self, doc):
             self.y = doc
-            self._alignment = None
+            self._cached_alignment = None
+            self._cached_words_y = [t.text for t in doc]
 
     def copy(self):
         return Example(
@@ -79,13 +81,15 @@ cdef class Example:
 
     @property
     def alignment(self):
-        if self._alignment is None:
-            spacy_words = [token.orth_ for token in self.predicted]
-            gold_words = [token.orth_ for token in self.reference]
-            if gold_words == []:
-                gold_words = spacy_words
-            self._alignment = Alignment.from_strings(spacy_words, gold_words)
-        return self._alignment
+        words_x = [token.text for token in self.x]
+        words_y = [token.text for token in self.y]
+        if self._cached_alignment is None or \
+                words_x != self._cached_words_x or \
+                words_y != self._cached_words_y:
+            self._cached_alignment = Alignment.from_strings(words_x, words_y)
+            self._cached_words_x = words_x
+            self._cached_words_y = words_y
+        return self._cached_alignment
 
     def get_aligned(self, field, as_string=False):
         """Return an aligned array for a token attribute."""
@@ -179,15 +183,15 @@ cdef class Example:
                 "links": self._links_to_dict()
             },
             "token_annotation": {
-                "ids": [t.i+1 for t in self.reference],
-                "words": [t.text for t in self.reference],
-                "tags": [t.tag_ for t in self.reference],
-                "lemmas": [t.lemma_ for t in self.reference],
-                "pos": [t.pos_ for t in self.reference],
-                "morphs": [t.morph_ for t in self.reference],
-                "heads": [t.head.i for t in self.reference],
-                "deps": [t.dep_ for t in self.reference],
-                "sent_starts": [int(bool(t.is_sent_start)) for t in self.reference]
+                "ORTH": [t.text for t in self.reference],
+                "SPACY": [bool(t.whitespace_) for t in self.reference],
+                "TAG": [t.tag_ for t in self.reference],
+                "LEMMA": [t.lemma_ for t in self.reference],
+                "POS": [t.pos_ for t in self.reference],
+                "MORPH": [t.morph_ for t in self.reference],
+                "HEAD": [t.head.i for t in self.reference],
+                "DEP": [t.dep_ for t in self.reference],
+                "SENT_START": [int(bool(t.is_sent_start)) for t in self.reference]
             }
         }
 
@@ -331,10 +335,14 @@ def _fix_legacy_dict_data(example_dict):
     for key, value in old_token_dict.items():
         if key in ("text", "ids", "brackets"):
             pass
+        elif key in remapping.values():
+            token_dict[key] = value
         elif key.lower() in remapping:
             token_dict[remapping[key.lower()]] = value
         else:
-            raise KeyError(Errors.E983.format(key=key, dict="token_annotation", keys=remapping.keys()))
+            all_keys = set(remapping.values())
+            all_keys.update(remapping.keys())
+            raise KeyError(Errors.E983.format(key=key, dict="token_annotation", keys=all_keys))
     text = example_dict.get("text", example_dict.get("raw"))
     if _has_field(token_dict, "ORTH") and not _has_field(token_dict, "SPACY"):
         token_dict["SPACY"] = _guess_spaces(text, token_dict["ORTH"])
diff --git a/spacy/kb.pyx b/spacy/kb.pyx
index 3f226596c..9035f7e6a 100644
--- a/spacy/kb.pyx
+++ b/spacy/kb.pyx
@@ -71,17 +71,25 @@ cdef class KnowledgeBase:
     DOCS: https://spacy.io/api/kb
     """
 
-    def __init__(self, Vocab vocab, entity_vector_length=64):
-        self.vocab = vocab
+    def __init__(self, entity_vector_length):
+        """Create a KnowledgeBase. Make sure to call kb.initialize() before using it."""
         self.mem = Pool()
         self.entity_vector_length = entity_vector_length
 
         self._entry_index = PreshMap()
         self._alias_index = PreshMap()
+        self.vocab = None
 
+
+    def initialize(self, Vocab vocab):
+        self.vocab = vocab
         self.vocab.strings.add("")
         self._create_empty_vectors(dummy_hash=self.vocab.strings[""])
 
+    def require_vocab(self):
+        if self.vocab is None:
+            raise ValueError(Errors.E946)
+
     @property
     def entity_vector_length(self):
         """RETURNS (uint64): length of the entity vectors"""
@@ -94,12 +102,14 @@ cdef class KnowledgeBase:
         return len(self._entry_index)
 
     def get_entity_strings(self):
+        self.require_vocab()
         return [self.vocab.strings[x] for x in self._entry_index]
 
     def get_size_aliases(self):
         return len(self._alias_index)
 
     def get_alias_strings(self):
+        self.require_vocab()
         return [self.vocab.strings[x] for x in self._alias_index]
 
     def add_entity(self, unicode entity, float freq, vector[float] entity_vector):
@@ -107,6 +117,7 @@ cdef class KnowledgeBase:
         Add an entity to the KB, optionally specifying its log probability based on corpus frequency
         Return the hash of the entity ID/name at the end.
         """
+        self.require_vocab()
         cdef hash_t entity_hash = self.vocab.strings.add(entity)
 
         # Return if this entity was added before
@@ -129,6 +140,7 @@ cdef class KnowledgeBase:
         return entity_hash
 
     cpdef set_entities(self, entity_list, freq_list, vector_list):
+        self.require_vocab()
         if len(entity_list) != len(freq_list) or len(entity_list) != len(vector_list):
             raise ValueError(Errors.E140)
 
@@ -164,10 +176,12 @@ cdef class KnowledgeBase:
             i += 1
 
     def contains_entity(self, unicode entity):
+        self.require_vocab()
         cdef hash_t entity_hash = self.vocab.strings.add(entity)
         return entity_hash in self._entry_index
 
     def contains_alias(self, unicode alias):
+        self.require_vocab()
         cdef hash_t alias_hash = self.vocab.strings.add(alias)
         return alias_hash in self._alias_index
 
@@ -176,6 +190,7 @@ cdef class KnowledgeBase:
         For a given alias, add its potential entities and prior probabilies to the KB.
         Return the alias_hash at the end
         """
+        self.require_vocab()
         # Throw an error if the length of entities and probabilities are not the same
         if not len(entities) == len(probabilities):
             raise ValueError(Errors.E132.format(alias=alias,
@@ -219,6 +234,7 @@ cdef class KnowledgeBase:
         Throw an error if this entity+prior prob would exceed the sum of 1.
         For efficiency, it's best to use the method `add_alias` as much as possible instead of this one.
         """
+        self.require_vocab()
         # Check if the alias exists in the KB
         cdef hash_t alias_hash = self.vocab.strings[alias]
         if not alias_hash in self._alias_index:
@@ -265,6 +281,7 @@ cdef class KnowledgeBase:
         and the prior probability of that alias resolving to that entity.
         If the alias is not known in the KB, and empty list is returned.
         """
+        self.require_vocab()
         cdef hash_t alias_hash = self.vocab.strings[alias]
         if not alias_hash in self._alias_index:
             return []
@@ -281,6 +298,7 @@ cdef class KnowledgeBase:
                 if entry_index != 0]
 
     def get_vector(self, unicode entity):
+        self.require_vocab()
         cdef hash_t entity_hash = self.vocab.strings[entity]
 
         # Return an empty list if this entity is unknown in this KB
@@ -293,6 +311,7 @@ cdef class KnowledgeBase:
     def get_prior_prob(self, unicode entity, unicode alias):
         """ Return the prior probability of a given alias being linked to a given entity,
         or return 0.0 when this combination is not known in the knowledge base"""
+        self.require_vocab()
         cdef hash_t alias_hash = self.vocab.strings[alias]
         cdef hash_t entity_hash = self.vocab.strings[entity]
 
@@ -311,6 +330,7 @@ cdef class KnowledgeBase:
 
 
     def dump(self, loc):
+        self.require_vocab()
         cdef Writer writer = Writer(loc)
         writer.write_header(self.get_size_entities(), self.entity_vector_length)
 
diff --git a/spacy/language.py b/spacy/language.py
index e415869b3..e9d7e9eb6 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -18,7 +18,7 @@ from timeit import default_timer as timer
 
 from .tokens.underscore import Underscore
 from .vocab import Vocab, create_vocab
-from .pipe_analysis import analyze_pipes, analyze_all_pipes, validate_attrs
+from .pipe_analysis import validate_attrs, analyze_pipes, print_pipe_analysis
 from .gold import Example
 from .scorer import Scorer
 from .util import create_default_optimizer, registry
@@ -37,8 +37,6 @@ from . import util
 from . import about
 
 
-# TODO: integrate pipeline analyis
-ENABLE_PIPELINE_ANALYSIS = False
 # This is the base config will all settings (training etc.)
 DEFAULT_CONFIG_PATH = Path(__file__).parent / "default_config.cfg"
 DEFAULT_CONFIG = Config().from_disk(DEFAULT_CONFIG_PATH)
@@ -522,6 +520,25 @@ class Language:
             return add_component(func)
         return add_component
 
+    def analyze_pipes(
+        self,
+        *,
+        keys: List[str] = ["assigns", "requires", "scores", "retokenizes"],
+        pretty: bool = False,
+    ) -> Optional[Dict[str, Any]]:
+        """Analyze the current pipeline components, print a summary of what
+        they assign or require and check that all requirements are met.
+
+        keys (List[str]): The meta values to display in the table. Corresponds
+            to values in FactoryMeta, defined by @Language.factory decorator.
+        pretty (bool): Pretty-print the results.
+        RETURNS (dict): The data.
+        """
+        analysis = analyze_pipes(self, keys=keys)
+        if pretty:
+            print_pipe_analysis(analysis, keys=keys)
+        return analysis
+
     def get_pipe(self, name: str) -> Callable[[Doc], Doc]:
         """Get a pipeline component for a given component name.
 
@@ -541,7 +558,6 @@ class Language:
         name: Optional[str] = None,
         *,
         config: Optional[Dict[str, Any]] = SimpleFrozenDict(),
-        overrides: Optional[Dict[str, Any]] = SimpleFrozenDict(),
         validate: bool = True,
     ) -> Callable[[Doc], Doc]:
         """Create a pipeline component. Mostly used internally. To create and
@@ -552,8 +568,6 @@ class Language:
             Defaults to factory name if not set.
         config (Optional[Dict[str, Any]]): Config parameters to use for this
             component. Will be merged with default config, if available.
-        overrides (Optional[Dict[str, Any]]): Config overrides, typically
-            passed in via the CLI.
         validate (bool): Whether to validate the component config against the
             arguments and types expected by the factory.
         RETURNS (Callable[[Doc], Doc]): The pipeline component.
@@ -596,13 +610,39 @@ class Language:
         # registered functions twice
         # TODO: customize validation to make it more readable / relate it to
         # pipeline component and why it failed, explain default config
-        resolved, filled = registry.resolve(cfg, validate=validate, overrides=overrides)
+        resolved, filled = registry.resolve(cfg, validate=validate)
         filled = filled[factory_name]
         filled["factory"] = factory_name
         filled.pop("@factories", None)
         self._pipe_configs[name] = filled
         return resolved[factory_name]
 
+    def create_pipe_from_source(
+        self, source_name: str, source: "Language", *, name: str,
+    ) -> Tuple[Callable[[Doc], Doc], str]:
+        """Create a pipeline component by copying it from an existing model.
+
+        source_name (str): Name of the component in the source pipeline.
+        source (Language): The source nlp object to copy from.
+        name (str): Optional alternative name to use in current pipeline.
+        RETURNS (Tuple[Callable, str]): The component and its factory name.
+        """
+        # TODO: handle errors and mismatches (vectors etc.)
+        if not isinstance(source, self.__class__):
+            raise ValueError(Errors.E945.format(name=source_name, source=type(source)))
+        if not source.has_pipe(source_name):
+            raise KeyError(
+                Errors.E944.format(
+                    name=source_name,
+                    model=f"{source.meta['lang']}_{source.meta['name']}",
+                    opts=", ".join(source.pipe_names),
+                )
+            )
+        pipe = source.get_pipe(source_name)
+        pipe_config = util.copy_config(source.config["components"][source_name])
+        self._pipe_configs[name] = pipe_config
+        return pipe, pipe_config["factory"]
+
     def add_pipe(
         self,
         factory_name: str,
@@ -612,8 +652,8 @@ class Language:
         after: Optional[Union[str, int]] = None,
         first: Optional[bool] = None,
         last: Optional[bool] = None,
+        source: Optional["Language"] = None,
         config: Optional[Dict[str, Any]] = SimpleFrozenDict(),
-        overrides: Optional[Dict[str, Any]] = SimpleFrozenDict(),
         validate: bool = True,
     ) -> Callable[[Doc], Doc]:
         """Add a component to the processing pipeline. Valid components are
@@ -631,10 +671,10 @@ class Language:
             component directly after.
         first (bool): If True, insert component first in the pipeline.
         last (bool): If True, insert component last in the pipeline.
+        source (Language): Optional loaded nlp object to copy the pipeline
+            component from.
         config (Optional[Dict[str, Any]]): Config parameters to use for this
             component. Will be merged with default config, if available.
-        overrides (Optional[Dict[str, Any]]): Config overrides, typically
-            passed in via the CLI.
         validate (bool): Whether to validate the component config against the
             arguments and types expected by the factory.
         RETURNS (Callable[[Doc], Doc]): The pipeline component.
@@ -645,29 +685,30 @@ class Language:
             bad_val = repr(factory_name)
             err = Errors.E966.format(component=bad_val, name=name)
             raise ValueError(err)
-        if not self.has_factory(factory_name):
-            err = Errors.E002.format(
-                name=factory_name,
-                opts=", ".join(self.factory_names),
-                method="add_pipe",
-                lang=util.get_object_name(self),
-                lang_code=self.lang,
-            )
         name = name if name is not None else factory_name
         if name in self.pipe_names:
             raise ValueError(Errors.E007.format(name=name, opts=self.pipe_names))
-        pipe_component = self.create_pipe(
-            factory_name,
-            name=name,
-            config=config,
-            overrides=overrides,
-            validate=validate,
-        )
+        if source is not None:
+            # We're loading the component from a model. After loading the
+            # component, we know its real factory name
+            pipe_component, factory_name = self.create_pipe_from_source(
+                factory_name, source, name=name
+            )
+        else:
+            if not self.has_factory(factory_name):
+                err = Errors.E002.format(
+                    name=factory_name,
+                    opts=", ".join(self.factory_names),
+                    method="add_pipe",
+                    lang=util.get_object_name(self),
+                    lang_code=self.lang,
+                )
+            pipe_component = self.create_pipe(
+                factory_name, name=name, config=config, validate=validate,
+            )
         pipe_index = self._get_pipe_index(before, after, first, last)
         self._pipe_meta[name] = self.get_factory_meta(factory_name)
         self.pipeline.insert(pipe_index, (name, pipe_component))
-        if ENABLE_PIPELINE_ANALYSIS:
-            analyze_pipes(self, name, pipe_index)
         return pipe_component
 
     def _get_pipe_index(
@@ -754,12 +795,11 @@ class Language:
         # to Language.pipeline to make sure the configs are handled correctly
         pipe_index = self.pipe_names.index(name)
         self.remove_pipe(name)
-        if not len(self.pipeline):  # we have no components to insert before/after
+        if not len(self.pipeline) or pipe_index == len(self.pipeline):
+            # we have no components to insert before/after, or we're replacing the last component
             self.add_pipe(factory_name, name=name)
         else:
             self.add_pipe(factory_name, name=name, before=pipe_index)
-        if ENABLE_PIPELINE_ANALYSIS:
-            analyze_all_pipes(self)
 
     def rename_pipe(self, old_name: str, new_name: str) -> None:
         """Rename a pipeline component.
@@ -793,8 +833,6 @@ class Language:
         # because factory may be used for something else
         self._pipe_meta.pop(name)
         self._pipe_configs.pop(name)
-        if ENABLE_PIPELINE_ANALYSIS:
-            analyze_all_pipes(self)
         return removed
 
     def __call__(
@@ -900,6 +938,7 @@ class Language:
         sgd: Optional[Optimizer] = None,
         losses: Optional[Dict[str, float]] = None,
         component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
+        exclude: Iterable[str] = tuple(),
     ):
         """Update the models in the pipeline.
 
@@ -910,6 +949,7 @@ class Language:
         losses (Dict[str, float]): Dictionary to update with the loss, keyed by component.
         component_cfg (Dict[str, Dict]): Config parameters for specific pipeline
             components, keyed by component name.
+        exclude (Iterable[str]): Names of components that shouldn't be updated.
         RETURNS (Dict[str, float]): The updated losses dictionary
 
         DOCS: https://spacy.io/api/language#update
@@ -942,12 +982,12 @@ class Language:
             component_cfg[name].setdefault("drop", drop)
             component_cfg[name].setdefault("set_annotations", False)
         for name, proc in self.pipeline:
-            if not hasattr(proc, "update"):
+            if name in exclude or not hasattr(proc, "update"):
                 continue
             proc.update(examples, sgd=None, losses=losses, **component_cfg[name])
         if sgd not in (None, False):
             for name, proc in self.pipeline:
-                if hasattr(proc, "model"):
+                if name not in exclude and hasattr(proc, "model"):
                     proc.model.finish_update(sgd)
         return losses
 
@@ -958,6 +998,7 @@ class Language:
         sgd: Optional[Optimizer] = None,
         losses: Optional[Dict[str, float]] = None,
         component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
+        exclude: Iterable[str] = tuple(),
     ) -> Dict[str, float]:
         """Make a "rehearsal" update to the models in the pipeline, to prevent
         forgetting. Rehearsal updates run an initial copy of the model over some
@@ -969,6 +1010,7 @@ class Language:
         sgd (Optional[Optimizer]): An optimizer.
         component_cfg (Dict[str, Dict]): Config parameters for specific pipeline
             components, keyed by component name.
+        exclude (Iterable[str]): Names of components that shouldn't be updated.
         RETURNS (dict): Results from the update.
 
         EXAMPLE:
@@ -1012,7 +1054,7 @@ class Language:
         get_grads.b1 = sgd.b1
         get_grads.b2 = sgd.b2
         for name, proc in pipes:
-            if not hasattr(proc, "rehearse"):
+            if name in exclude or not hasattr(proc, "rehearse"):
                 continue
             grads = {}
             proc.rehearse(
@@ -1063,7 +1105,7 @@ class Language:
         return self._optimizer
 
     def resume_training(
-        self, *, sgd: Optional[Optimizer] = None, device: int = -1
+        self, *, sgd: Optional[Optimizer] = None, device: int = -1,
     ) -> Optimizer:
         """Continue training a pretrained model.
 
@@ -1099,6 +1141,7 @@ class Language:
         batch_size: int = 256,
         scorer: Optional[Scorer] = None,
         component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
+        scorer_cfg: Optional[Dict[str, Any]] = None,
     ) -> Dict[str, Union[float, dict]]:
         """Evaluate a model's pipeline components.
 
@@ -1109,6 +1152,8 @@ class Language:
             will be created.
         component_cfg (dict): An optional dictionary with extra keyword
             arguments for specific components.
+        scorer_cfg (dict): An optional dictionary with extra keyword arguments
+            for the scorer.
         RETURNS (Scorer): The scorer containing the evaluation results.
 
         DOCS: https://spacy.io/api/language#evaluate
@@ -1126,8 +1171,10 @@ class Language:
             raise TypeError(err)
         if component_cfg is None:
             component_cfg = {}
+        if scorer_cfg is None:
+            scorer_cfg = {}
         if scorer is None:
-            kwargs = component_cfg.get("scorer", {})
+            kwargs = dict(scorer_cfg)
             kwargs.setdefault("verbose", verbose)
             kwargs.setdefault("nlp", self)
             scorer = Scorer(**kwargs)
@@ -1136,9 +1183,9 @@ class Language:
         start_time = timer()
         # tokenize the texts only for timing purposes
         if not hasattr(self.tokenizer, "pipe"):
-            _ = [self.tokenizer(text) for text in texts]
+            _ = [self.tokenizer(text) for text in texts]  # noqa: F841
         else:
-            _ = list(self.tokenizer.pipe(texts))
+            _ = list(self.tokenizer.pipe(texts))  # noqa: F841
         for name, pipe in self.pipeline:
             kwargs = component_cfg.get(name, {})
             kwargs.setdefault("batch_size", batch_size)
@@ -1357,8 +1404,8 @@ class Language:
         cls,
         config: Union[Dict[str, Any], Config] = {},
         *,
+        vocab: Union[Vocab, bool] = True,
         disable: Iterable[str] = tuple(),
-        overrides: Dict[str, Any] = {},
         auto_fill: bool = True,
         validate: bool = True,
     ) -> "Language":
@@ -1367,6 +1414,7 @@ class Language:
         the default config of the given language is used.
 
         config (Dict[str, Any] / Config): The loaded config.
+        vocab (Vocab): A Vocab object. If True, a vocab is created.
         disable (Iterable[str]): List of pipeline component names to disable.
         auto_fill (bool): Automatically fill in missing values in config based
             on defaults and function argument annotations.
@@ -1397,43 +1445,76 @@ class Language:
         config = util.copy_config(config)
         orig_pipeline = config.pop("components", {})
         config["components"] = {}
-        non_pipe_overrides, pipe_overrides = _get_config_overrides(overrides)
         resolved, filled = registry.resolve(
-            config, validate=validate, schema=ConfigSchema, overrides=non_pipe_overrides
+            config, validate=validate, schema=ConfigSchema
         )
         filled["components"] = orig_pipeline
         config["components"] = orig_pipeline
         create_tokenizer = resolved["nlp"]["tokenizer"]
         create_lemmatizer = resolved["nlp"]["lemmatizer"]
-        nlp = cls(
-            create_tokenizer=create_tokenizer, create_lemmatizer=create_lemmatizer,
+        before_creation = resolved["nlp"]["before_creation"]
+        after_creation = resolved["nlp"]["after_creation"]
+        after_pipeline_creation = resolved["nlp"]["after_pipeline_creation"]
+        lang_cls = cls
+        if before_creation is not None:
+            lang_cls = before_creation(cls)
+            if (
+                not isinstance(lang_cls, type)
+                or not issubclass(lang_cls, cls)
+                or lang_cls is not cls
+            ):
+                raise ValueError(Errors.E943.format(value=type(lang_cls)))
+        nlp = lang_cls(
+            vocab=vocab,
+            create_tokenizer=create_tokenizer,
+            create_lemmatizer=create_lemmatizer,
         )
+        if after_creation is not None:
+            nlp = after_creation(nlp)
+            if not isinstance(nlp, cls):
+                raise ValueError(Errors.E942.format(name="creation", value=type(nlp)))
         # Note that we don't load vectors here, instead they get loaded explicitly
         # inside stuff like the spacy train function. If we loaded them here,
         # then we would load them twice at runtime: once when we make from config,
         # and then again when we load from disk.
         pipeline = config.get("components", {})
+        # If components are loaded from a source (existing models), we cache
+        # them here so they're only loaded once
+        source_nlps = {}
         for pipe_name in config["nlp"]["pipeline"]:
             if pipe_name not in pipeline:
                 opts = ", ".join(pipeline.keys())
                 raise ValueError(Errors.E956.format(name=pipe_name, opts=opts))
             pipe_cfg = util.copy_config(pipeline[pipe_name])
             if pipe_name not in disable:
-                if "factory" not in pipe_cfg:
+                if "factory" not in pipe_cfg and "source" not in pipe_cfg:
                     err = Errors.E984.format(name=pipe_name, config=pipe_cfg)
                     raise ValueError(err)
-                factory = pipe_cfg.pop("factory")
-                # The pipe name (key in the config) here is the unique name of the
-                # component, not necessarily the factory
-                nlp.add_pipe(
-                    factory,
-                    name=pipe_name,
-                    config=pipe_cfg,
-                    overrides=pipe_overrides,
-                    validate=validate,
-                )
+                if "factory" in pipe_cfg:
+                    factory = pipe_cfg.pop("factory")
+                    # The pipe name (key in the config) here is the unique name
+                    # of the component, not necessarily the factory
+                    nlp.add_pipe(
+                        factory, name=pipe_name, config=pipe_cfg, validate=validate,
+                    )
+                else:
+                    model = pipe_cfg["source"]
+                    if model not in source_nlps:
+                        # We only need the components here and we need to init
+                        # model with the same vocab as the current nlp object
+                        source_nlps[model] = util.load_model(
+                            model, vocab=nlp.vocab, disable=["vocab", "tokenizer"]
+                        )
+                    source_name = pipe_cfg.get("component", pipe_name)
+                    nlp.add_pipe(source_name, source=source_nlps[model], name=pipe_name)
         nlp.config = filled if auto_fill else config
         nlp.resolved = resolved
+        if after_pipeline_creation is not None:
+            nlp = after_pipeline_creation(nlp)
+            if not isinstance(nlp, cls):
+                raise ValueError(
+                    Errors.E942.format(name="pipeline_creation", value=type(nlp))
+                )
         return nlp
 
     def to_disk(
@@ -1599,15 +1680,6 @@ class FactoryMeta:
     default_score_weights: Optional[Dict[str, float]] = None  # noqa: E704
 
 
-def _get_config_overrides(
-    items: Dict[str, Any], prefix: str = "components"
-) -> Tuple[Dict[str, Any], Dict[str, Any]]:
-    prefix = f"{prefix}."
-    non_pipe = {k: v for k, v in items.items() if not k.startswith(prefix)}
-    pipe = {k.replace(prefix, ""): v for k, v in items.items() if k.startswith(prefix)}
-    return non_pipe, pipe
-
-
 def _fix_pretrained_vectors_name(nlp: Language) -> None:
     # TODO: Replace this once we handle vectors consistently as static
     # data
diff --git a/spacy/ml/_biluo.py b/spacy/ml/_biluo.py
index 5a8f28dfe..5a66a35bd 100644
--- a/spacy/ml/_biluo.py
+++ b/spacy/ml/_biluo.py
@@ -80,7 +80,7 @@ def _get_transition_table(
     B_start, B_end = (0, n_labels)
     I_start, I_end = (B_end, B_end + n_labels)
     L_start, L_end = (I_end, I_end + n_labels)
-    U_start, _ = (L_end, L_end + n_labels)
+    U_start, _ = (L_end, L_end + n_labels)  # noqa: F841
     # Using ranges allows us to set specific cells, which is necessary to express
     # that only actions of the same label are valid continuations.
     B_range = numpy.arange(B_start, B_end)
diff --git a/spacy/ml/_character_embed.py b/spacy/ml/_character_embed.py
index ab0cb85c7..f5c539c42 100644
--- a/spacy/ml/_character_embed.py
+++ b/spacy/ml/_character_embed.py
@@ -1,6 +1,7 @@
 from typing import List
 from thinc.api import Model
 from thinc.types import Floats2d
+
 from ..tokens import Doc
 
 
@@ -15,14 +16,14 @@ def CharacterEmbed(nM: int, nC: int) -> Model[List[Doc], List[Floats2d]]:
     )
 
 
-def init(model, X=None, Y=None):
+def init(model: Model, X=None, Y=None):
     vectors_table = model.ops.alloc3f(
         model.get_dim("nC"), model.get_dim("nV"), model.get_dim("nM")
     )
     model.set_param("E", vectors_table)
 
 
-def forward(model, docs, is_train):
+def forward(model: Model, docs: List[Doc], is_train: bool):
     if docs is None:
         return []
     ids = []
diff --git a/spacy/ml/_iob.py b/spacy/ml/_iob.py
index 9f385ec0d..4dbc79f52 100644
--- a/spacy/ml/_iob.py
+++ b/spacy/ml/_iob.py
@@ -14,7 +14,7 @@ def IOB() -> Model[Padded, Padded]:
     )
 
 
-def init(model, X: Optional[Padded] = None, Y: Optional[Padded] = None):
+def init(model: Model, X: Optional[Padded] = None, Y: Optional[Padded] = None) -> None:
     if X is not None and Y is not None:
         if X.data.shape != Y.data.shape:
             # TODO: Fix error
diff --git a/spacy/ml/extract_ngrams.py b/spacy/ml/extract_ngrams.py
index f9f691aae..bdc297232 100644
--- a/spacy/ml/extract_ngrams.py
+++ b/spacy/ml/extract_ngrams.py
@@ -4,14 +4,14 @@ from thinc.api import Model
 from ..attrs import LOWER
 
 
-def extract_ngrams(ngram_size, attr=LOWER) -> Model:
+def extract_ngrams(ngram_size: int, attr: int = LOWER) -> Model:
     model = Model("extract_ngrams", forward)
     model.attrs["ngram_size"] = ngram_size
     model.attrs["attr"] = attr
     return model
 
 
-def forward(model, docs, is_train: bool):
+def forward(model: Model, docs, is_train: bool):
     batch_keys = []
     batch_vals = []
     for doc in docs:
diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
index ffd6c3c1c..f96d50a7b 100644
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@@ -1,5 +1,4 @@
-from pathlib import Path
-
+from typing import Optional
 from thinc.api import chain, clone, list2ragged, reduce_mean, residual
 from thinc.api import Model, Maxout, Linear
 
@@ -9,7 +8,7 @@ from ...vocab import Vocab
 
 
 @registry.architectures.register("spacy.EntityLinker.v1")
-def build_nel_encoder(tok2vec, nO=None):
+def build_nel_encoder(tok2vec: Model, nO: Optional[int] = None) -> Model:
     with Model.define_operators({">>": chain, "**": clone}):
         token_width = tok2vec.get_dim("nO")
         output_layer = Linear(nO=nO, nI=token_width)
@@ -26,8 +25,15 @@ def build_nel_encoder(tok2vec, nO=None):
 
 
 @registry.assets.register("spacy.KBFromFile.v1")
-def load_kb(vocab_path, kb_path) -> KnowledgeBase:
+def load_kb(vocab_path: str, kb_path: str) -> KnowledgeBase:
     vocab = Vocab().from_disk(vocab_path)
-    kb = KnowledgeBase(vocab=vocab)
+    kb = KnowledgeBase(entity_vector_length=1)
+    kb.initialize(vocab)
     kb.load_bulk(kb_path)
     return kb
+
+
+@registry.assets.register("spacy.EmptyKB.v1")
+def empty_kb(entity_vector_length: int) -> KnowledgeBase:
+    kb = KnowledgeBase(entity_vector_length=entity_vector_length)
+    return kb
diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py
index ed85b1a91..ac990c015 100644
--- a/spacy/ml/models/multi_task.py
+++ b/spacy/ml/models/multi_task.py
@@ -1,10 +1,20 @@
+from typing import Optional, Iterable, Tuple, List, TYPE_CHECKING
 import numpy
-
 from thinc.api import chain, Maxout, LayerNorm, Softmax, Linear, zero_init, Model
 from thinc.api import MultiSoftmax, list2array
 
+if TYPE_CHECKING:
+    # This lets us add type hints for mypy etc. without causing circular imports
+    from ...vocab import Vocab  # noqa: F401
+    from ...tokens import Doc  # noqa: F401
 
-def build_multi_task_model(tok2vec, maxout_pieces, token_vector_width, nO=None):
+
+def build_multi_task_model(
+    tok2vec: Model,
+    maxout_pieces: int,
+    token_vector_width: int,
+    nO: Optional[int] = None,
+) -> Model:
     softmax = Softmax(nO=nO, nI=token_vector_width * 2)
     model = chain(
         tok2vec,
@@ -22,7 +32,13 @@ def build_multi_task_model(tok2vec, maxout_pieces, token_vector_width, nO=None):
     return model
 
 
-def build_cloze_multi_task_model(vocab, tok2vec, maxout_pieces, hidden_size, nO=None):
+def build_cloze_multi_task_model(
+    vocab: "Vocab",
+    tok2vec: Model,
+    maxout_pieces: int,
+    hidden_size: int,
+    nO: Optional[int] = None,
+) -> Model:
     # nO = vocab.vectors.data.shape[1]
     output_layer = chain(
         list2array(),
@@ -43,24 +59,24 @@ def build_cloze_multi_task_model(vocab, tok2vec, maxout_pieces, hidden_size, nO=
 
 
 def build_cloze_characters_multi_task_model(
-    vocab, tok2vec, maxout_pieces, hidden_size, nr_char
-):
+    vocab: "Vocab", tok2vec: Model, maxout_pieces: int, hidden_size: int, nr_char: int
+) -> Model:
     output_layer = chain(
         list2array(),
         Maxout(hidden_size, nP=maxout_pieces),
         LayerNorm(nI=hidden_size),
         MultiSoftmax([256] * nr_char, nI=hidden_size),
     )
-
     model = build_masked_language_model(vocab, chain(tok2vec, output_layer))
     model.set_ref("tok2vec", tok2vec)
     model.set_ref("output_layer", output_layer)
     return model
 
 
-def build_masked_language_model(vocab, wrapped_model, mask_prob=0.15):
+def build_masked_language_model(
+    vocab: "Vocab", wrapped_model: Model, mask_prob: float = 0.15
+) -> Model:
     """Convert a model into a BERT-style masked language model"""
-
     random_words = _RandomWords(vocab)
 
     def mlm_forward(model, docs, is_train):
@@ -74,7 +90,7 @@ def build_masked_language_model(vocab, wrapped_model, mask_prob=0.15):
 
         return output, mlm_backward
 
-    def mlm_initialize(model, X=None, Y=None):
+    def mlm_initialize(model: Model, X=None, Y=None):
         wrapped = model.layers[0]
         wrapped.initialize(X=X, Y=Y)
         for dim in wrapped.dim_names:
@@ -90,12 +106,11 @@ def build_masked_language_model(vocab, wrapped_model, mask_prob=0.15):
         dims={dim: None for dim in wrapped_model.dim_names},
     )
     mlm_model.set_ref("wrapped", wrapped_model)
-
     return mlm_model
 
 
 class _RandomWords:
-    def __init__(self, vocab):
+    def __init__(self, vocab: "Vocab") -> None:
         self.words = [lex.text for lex in vocab if lex.prob != 0.0]
         self.probs = [lex.prob for lex in vocab if lex.prob != 0.0]
         self.words = self.words[:10000]
@@ -104,7 +119,7 @@ class _RandomWords:
         self.probs /= self.probs.sum()
         self._cache = []
 
-    def next(self):
+    def next(self) -> str:
         if not self._cache:
             self._cache.extend(
                 numpy.random.choice(len(self.words), 10000, p=self.probs)
@@ -113,9 +128,11 @@ class _RandomWords:
         return self.words[index]
 
 
-def _apply_mask(docs, random_words, mask_prob=0.15):
+def _apply_mask(
+    docs: Iterable["Doc"], random_words: _RandomWords, mask_prob: float = 0.15
+) -> Tuple[numpy.ndarray, List["Doc"]]:
     # This needs to be here to avoid circular imports
-    from ...tokens import Doc
+    from ...tokens import Doc  # noqa: F811
 
     N = sum(len(doc) for doc in docs)
     mask = numpy.random.uniform(0.0, 1.0, (N,))
@@ -141,7 +158,7 @@ def _apply_mask(docs, random_words, mask_prob=0.15):
     return mask, masked_docs
 
 
-def _replace_word(word, random_words, mask="[MASK]"):
+def _replace_word(word: str, random_words: _RandomWords, mask: str = "[MASK]") -> str:
     roll = numpy.random.random()
     if roll < 0.8:
         return mask
diff --git a/spacy/ml/models/parser.py b/spacy/ml/models/parser.py
index c1e530d4a..429ceff28 100644
--- a/spacy/ml/models/parser.py
+++ b/spacy/ml/models/parser.py
@@ -1,6 +1,5 @@
-from pydantic import StrictInt
-from thinc.api import Model, chain, list2array, Linear, zero_init, use_ops, with_array
-from thinc.api import LayerNorm, Maxout, Mish
+from typing import Optional
+from thinc.api import Model, chain, list2array, Linear, zero_init, use_ops
 
 from ...util import registry
 from .._precomputable_affine import PrecomputableAffine
@@ -10,16 +9,15 @@ from ..tb_framework import TransitionModel
 @registry.architectures.register("spacy.TransitionBasedParser.v1")
 def build_tb_parser_model(
     tok2vec: Model,
-    nr_feature_tokens: StrictInt,
-    hidden_width: StrictInt,
-    maxout_pieces: StrictInt,
-    use_upper=True,
-    nO=None,
-):
+    nr_feature_tokens: int,
+    hidden_width: int,
+    maxout_pieces: int,
+    use_upper: bool = True,
+    nO: Optional[int] = None,
+) -> Model:
     t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
     tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width),)
     tok2vec.set_dim("nO", hidden_width)
-
     lower = PrecomputableAffine(
         nO=hidden_width if use_upper else nO,
         nF=nr_feature_tokens,
diff --git a/spacy/ml/models/simple_ner.py b/spacy/ml/models/simple_ner.py
index 1fb5a71c0..b2934dadc 100644
--- a/spacy/ml/models/simple_ner.py
+++ b/spacy/ml/models/simple_ner.py
@@ -26,7 +26,6 @@ def BiluoTagger(
         with_array(softmax_activation()),
         padded2list(),
     )
-
     return Model(
         "biluo-tagger",
         forward,
@@ -52,7 +51,6 @@ def IOBTagger(
         with_array(softmax_activation()),
         padded2list(),
     )
-
     return Model(
         "iob-tagger",
         forward,
diff --git a/spacy/ml/models/tagger.py b/spacy/ml/models/tagger.py
index 7fe417321..78637e8b5 100644
--- a/spacy/ml/models/tagger.py
+++ b/spacy/ml/models/tagger.py
@@ -1,10 +1,11 @@
+from typing import Optional
 from thinc.api import zero_init, with_array, Softmax, chain, Model
 
 from ...util import registry
 
 
 @registry.architectures.register("spacy.Tagger.v1")
-def build_tagger_model(tok2vec, nO=None) -> Model:
+def build_tagger_model(tok2vec: Model, nO: Optional[int] = None) -> Model:
     # TODO: glorot_uniform_init seems to work a bit better than zero_init here?!
     t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
     output_layer = Softmax(nO, t2v_width, init_W=zero_init)
diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index 53200c165..0a25699dc 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -2,10 +2,9 @@ from typing import Optional
 from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic
 from thinc.api import chain, concatenate, clone, Dropout, ParametricAttention
 from thinc.api import SparseLinear, Softmax, softmax_activation, Maxout, reduce_sum
-from thinc.api import HashEmbed, with_ragged, with_array, with_cpu, uniqued
+from thinc.api import HashEmbed, with_array, with_cpu, uniqued
 from thinc.api import Relu, residual, expand_window, FeatureExtractor
 
-from ... import util
 from ...attrs import ID, ORTH, PREFIX, SUFFIX, SHAPE, LOWER
 from ...util import registry
 from ..extract_ngrams import extract_ngrams
@@ -40,7 +39,12 @@ def build_simple_cnn_text_classifier(
 
 
 @registry.architectures.register("spacy.TextCatBOW.v1")
-def build_bow_text_classifier(exclusive_classes, ngram_size, no_output_layer, nO=None):
+def build_bow_text_classifier(
+    exclusive_classes: bool,
+    ngram_size: int,
+    no_output_layer: bool,
+    nO: Optional[int] = None,
+) -> Model:
     with Model.define_operators({">>": chain}):
         sparse_linear = SparseLinear(nO)
         model = extract_ngrams(ngram_size, attr=ORTH) >> sparse_linear
@@ -55,16 +59,16 @@ def build_bow_text_classifier(exclusive_classes, ngram_size, no_output_layer, nO
 
 @registry.architectures.register("spacy.TextCatEnsemble.v1")
 def build_text_classifier(
-    width,
-    embed_size,
-    pretrained_vectors,
-    exclusive_classes,
-    ngram_size,
-    window_size,
-    conv_depth,
-    dropout,
-    nO=None,
-):
+    width: int,
+    embed_size: int,
+    pretrained_vectors: Optional[bool],
+    exclusive_classes: bool,
+    ngram_size: int,
+    window_size: int,
+    conv_depth: int,
+    dropout: Optional[float],
+    nO: Optional[int] = None,
+) -> Model:
     cols = [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]
     with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
         lower = HashEmbed(
@@ -91,7 +95,6 @@ def build_text_classifier(
             dropout=dropout,
             seed=13,
         )
-
         width_nI = sum(layer.get_dim("nO") for layer in [lower, prefix, suffix, shape])
         trained_vectors = FeatureExtractor(cols) >> with_array(
             uniqued(
@@ -100,7 +103,6 @@ def build_text_classifier(
                 column=cols.index(ORTH),
             )
         )
-
         if pretrained_vectors:
             static_vectors = StaticVectors(width)
             vector_layer = trained_vectors | static_vectors
@@ -152,7 +154,12 @@ def build_text_classifier(
 
 
 @registry.architectures.register("spacy.TextCatLowData.v1")
-def build_text_classifier_lowdata(width, pretrained_vectors, dropout, nO=None):
+def build_text_classifier_lowdata(
+    width: int,
+    pretrained_vectors: Optional[bool],
+    dropout: Optional[float],
+    nO: Optional[int] = None,
+) -> Model:
     # Note, before v.3, this was the default if setting "low_data" and "pretrained_dims"
     with Model.define_operators({">>": chain, "**": clone}):
         model = (
diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py
index 1460b3005..474942558 100644
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@@ -6,16 +6,15 @@ from thinc.api import expand_window, residual, Maxout, Mish, PyTorchLSTM
 from thinc.types import Floats2d
 
 from ...tokens import Doc
-from ... import util
 from ...util import registry
 from ...ml import _character_embed
 from ..staticvectors import StaticVectors
 from ...pipeline.tok2vec import Tok2VecListener
-from ...attrs import ID, ORTH, NORM, PREFIX, SUFFIX, SHAPE
+from ...attrs import ORTH, NORM, PREFIX, SUFFIX, SHAPE
 
 
 @registry.architectures.register("spacy.Tok2VecListener.v1")
-def tok2vec_listener_v1(width, upstream="*"):
+def tok2vec_listener_v1(width: int, upstream: str = "*"):
     tok2vec = Tok2VecListener(upstream_name=upstream, width=width)
     return tok2vec
 
@@ -45,10 +44,11 @@ def build_hash_embed_cnn_tok2vec(
             width=width,
             depth=depth,
             window_size=window_size,
-            maxout_pieces=maxout_pieces
-        )
+            maxout_pieces=maxout_pieces,
+        ),
     )
 
+
 @registry.architectures.register("spacy.Tok2Vec.v1")
 def build_Tok2Vec_model(
     embed: Model[List[Doc], List[Floats2d]],
@@ -68,7 +68,6 @@ def MultiHashEmbed(
     width: int, rows: int, also_embed_subwords: bool, also_use_static_vectors: bool
 ):
     cols = [NORM, PREFIX, SUFFIX, SHAPE, ORTH]
-
     seed = 7
 
     def make_hash_embed(feature):
@@ -124,11 +123,11 @@ def CharacterEmbed(width: int, rows: int, nM: int, nC: int):
             chain(
                 FeatureExtractor([NORM]),
                 list2ragged(),
-                with_array(HashEmbed(nO=width, nV=rows, column=0, seed=5))
-            )
+                with_array(HashEmbed(nO=width, nV=rows, column=0, seed=5)),
+            ),
         ),
         with_array(Maxout(width, nM * nC + width, nP=3, normalize=True, dropout=0.0)),
-        ragged2list()
+        ragged2list(),
     )
     return model
 
@@ -155,12 +154,7 @@ def MaxoutWindowEncoder(width: int, window_size: int, maxout_pieces: int, depth:
 def MishWindowEncoder(width, window_size, depth):
     cnn = chain(
         expand_window(window_size=window_size),
-        Mish(
-            nO=width,
-            nI=width * ((window_size * 2) + 1),
-            dropout=0.0,
-            normalize=True
-        ),
+        Mish(nO=width, nI=width * ((window_size * 2) + 1), dropout=0.0, normalize=True),
     )
     model = clone(residual(cnn), depth)
     model.set_dim("nO", width)
diff --git a/spacy/syntax/_parser_model.pxd b/spacy/ml/parser_model.pxd
similarity index 88%
rename from spacy/syntax/_parser_model.pxd
rename to spacy/ml/parser_model.pxd
index 15befb372..6582b3468 100644
--- a/spacy/syntax/_parser_model.pxd
+++ b/spacy/ml/parser_model.pxd
@@ -1,8 +1,6 @@
 from libc.string cimport memset, memcpy
-from libc.stdlib cimport calloc, free, realloc
-from ..typedefs cimport weight_t, class_t, hash_t
-
-from ._state cimport StateC
+from ..typedefs cimport weight_t, hash_t
+from ..pipeline._parser_internals._state cimport StateC
 
 
 cdef struct SizesC:
diff --git a/spacy/syntax/_parser_model.pyx b/spacy/ml/parser_model.pyx
similarity index 97%
rename from spacy/syntax/_parser_model.pyx
rename to spacy/ml/parser_model.pyx
index eedd84bac..da937ca4f 100644
--- a/spacy/syntax/_parser_model.pyx
+++ b/spacy/ml/parser_model.pyx
@@ -1,29 +1,18 @@
 # cython: infer_types=True, cdivision=True, boundscheck=False
-cimport cython.parallel
 cimport numpy as np
 from libc.math cimport exp
-from libcpp.vector cimport vector
 from libc.string cimport memset, memcpy
 from libc.stdlib cimport calloc, free, realloc
-from cymem.cymem cimport Pool
-from thinc.extra.search cimport Beam
 from thinc.backends.linalg cimport Vec, VecVec
 cimport blis.cy
 
 import numpy
 import numpy.random
-from thinc.api import Linear, Model, CupyOps, NumpyOps, use_ops, noop
+from thinc.api import Model, CupyOps, NumpyOps
 
-from ..typedefs cimport weight_t, class_t, hash_t
-from ..tokens.doc cimport Doc
-from .stateclass cimport StateClass
-from .transition_system cimport Transition
-
-from ..compat import copy_array
-from ..errors import Errors, TempErrors
-from ..util import create_default_optimizer
 from .. import util
-from . import nonproj
+from ..typedefs cimport weight_t, class_t, hash_t
+from ..pipeline._parser_internals.stateclass cimport StateClass
 
 
 cdef WeightsC get_c_weights(model) except *:
diff --git a/spacy/ml/tb_framework.py b/spacy/ml/tb_framework.py
index 39d4b0a14..44f125a04 100644
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@@ -1,5 +1,5 @@
 from thinc.api import Model, noop, use_ops, Linear
-from ..syntax._parser_model import ParserStepModel
+from .parser_model import ParserStepModel
 
 
 def TransitionModel(tok2vec, lower, upper, dropout=0.2, unseen_classes=set()):
diff --git a/spacy/pipe_analysis.py b/spacy/pipe_analysis.py
index b57f1524b..008ac3384 100644
--- a/spacy/pipe_analysis.py
+++ b/spacy/pipe_analysis.py
@@ -1,9 +1,8 @@
 from typing import List, Dict, Iterable, Optional, Union, TYPE_CHECKING
-from wasabi import Printer
-import warnings
+from wasabi import msg
 
 from .tokens import Doc, Token, Span
-from .errors import Errors, Warnings
+from .errors import Errors
 from .util import dot_to_dict
 
 if TYPE_CHECKING:
@@ -11,48 +10,7 @@ if TYPE_CHECKING:
     from .language import Language  # noqa: F401
 
 
-def analyze_pipes(
-    nlp: "Language", name: str, index: int, warn: bool = True
-) -> List[str]:
-    """Analyze a pipeline component with respect to its position in the current
-    pipeline and the other components. Will check whether requirements are
-    fulfilled (e.g. if previous components assign the attributes).
-
-    nlp (Language): The current nlp object.
-    name (str): The name of the pipeline component to analyze.
-    index (int): The index of the component in the pipeline.
-    warn (bool): Show user warning if problem is found.
-    RETURNS (List[str]): The problems found for the given pipeline component.
-    """
-    assert nlp.pipeline[index][0] == name
-    prev_pipes = nlp.pipeline[:index]
-    meta = nlp.get_pipe_meta(name)
-    requires = {annot: False for annot in meta.requires}
-    if requires:
-        for prev_name, prev_pipe in prev_pipes:
-            prev_meta = nlp.get_pipe_meta(prev_name)
-            for annot in prev_meta.assigns:
-                requires[annot] = True
-    problems = []
-    for annot, fulfilled in requires.items():
-        if not fulfilled:
-            problems.append(annot)
-            if warn:
-                warnings.warn(Warnings.W025.format(name=name, attr=annot))
-    return problems
-
-
-def analyze_all_pipes(nlp: "Language", warn: bool = True) -> Dict[str, List[str]]:
-    """Analyze all pipes in the pipeline in order.
-
-    nlp (Language): The current nlp object.
-    warn (bool): Show user warning if problem is found.
-    RETURNS (Dict[str, List[str]]): The problems found, keyed by component name.
-    """
-    problems = {}
-    for i, name in enumerate(nlp.pipe_names):
-        problems[name] = analyze_pipes(nlp, name, i, warn=warn)
-    return problems
+DEFAULT_KEYS = ["requires", "assigns", "scores", "retokenizes"]
 
 
 def validate_attrs(values: Iterable[str]) -> Iterable[str]:
@@ -101,89 +59,77 @@ def validate_attrs(values: Iterable[str]) -> Iterable[str]:
     return values
 
 
-def _get_feature_for_attr(nlp: "Language", attr: str, feature: str) -> List[str]:
-    assert feature in ["assigns", "requires"]
-    result = []
+def get_attr_info(nlp: "Language", attr: str) -> Dict[str, List[str]]:
+    """Check which components in the pipeline assign or require an attribute.
+
+    nlp (Language): The current nlp object.
+    attr (str): The attribute, e.g. "doc.tensor".
+    RETURNS (Dict[str, List[str]]): A dict keyed by "assigns" and "requires",
+        mapped to a list of component names.
+    """
+    result = {"assigns": [], "requires": []}
     for pipe_name in nlp.pipe_names:
         meta = nlp.get_pipe_meta(pipe_name)
-        pipe_assigns = getattr(meta, feature, [])
-        if attr in pipe_assigns:
-            result.append(pipe_name)
+        if attr in meta.assigns:
+            result["assigns"].append(pipe_name)
+        if attr in meta.requires:
+            result["requires"].append(pipe_name)
     return result
 
 
-def get_assigns_for_attr(nlp: "Language", attr: str) -> List[str]:
-    """Get all pipeline components that assign an attr, e.g. "doc.tensor".
-
-    pipeline (Language): The current nlp object.
-    attr (str): The attribute to check.
-    RETURNS (List[str]): Names of components that require the attr.
-    """
-    return _get_feature_for_attr(nlp, attr, "assigns")
-
-
-def get_requires_for_attr(nlp: "Language", attr: str) -> List[str]:
-    """Get all pipeline components that require an attr, e.g. "doc.tensor".
-
-    pipeline (Language): The current nlp object.
-    attr (str): The attribute to check.
-    RETURNS (List[str]): Names of components that require the attr.
-    """
-    return _get_feature_for_attr(nlp, attr, "requires")
-
-
-def print_summary(
-    nlp: "Language", pretty: bool = True, no_print: bool = False
-) -> Optional[Dict[str, Union[List[str], Dict[str, List[str]]]]]:
+def analyze_pipes(
+    nlp: "Language", *, keys: List[str] = DEFAULT_KEYS,
+) -> Dict[str, Union[List[str], Dict[str, List[str]]]]:
     """Print a formatted summary for the current nlp object's pipeline. Shows
     a table with the pipeline components and why they assign and require, as
     well as any problems if available.
 
     nlp (Language): The nlp object.
-    pretty (bool): Pretty-print the results (color etc).
-    no_print (bool): Don't print anything, just return the data.
-    RETURNS (dict): A dict with "overview" and "problems".
+    keys (List[str]): The meta keys to show in the table.
+    RETURNS (dict): A dict with "summary" and "problems".
     """
-    msg = Printer(pretty=pretty, no_print=no_print)
-    overview = []
-    problems = {}
+    result = {"summary": {}, "problems": {}}
+    all_attrs = set()
     for i, name in enumerate(nlp.pipe_names):
         meta = nlp.get_pipe_meta(name)
-        overview.append((i, name, meta.requires, meta.assigns, meta.retokenizes))
-        problems[name] = analyze_pipes(nlp, name, i, warn=False)
+        all_attrs.update(meta.assigns)
+        all_attrs.update(meta.requires)
+        result["summary"][name] = {key: getattr(meta, key, None) for key in keys}
+        prev_pipes = nlp.pipeline[:i]
+        requires = {annot: False for annot in meta.requires}
+        if requires:
+            for prev_name, prev_pipe in prev_pipes:
+                prev_meta = nlp.get_pipe_meta(prev_name)
+                for annot in prev_meta.assigns:
+                    requires[annot] = True
+        result["problems"][name] = []
+        for annot, fulfilled in requires.items():
+            if not fulfilled:
+                result["problems"][name].append(annot)
+    result["attrs"] = {attr: get_attr_info(nlp, attr) for attr in all_attrs}
+    return result
+
+
+def print_pipe_analysis(
+    analysis: Dict[str, Union[List[str], Dict[str, List[str]]]],
+    *,
+    keys: List[str] = DEFAULT_KEYS,
+) -> Optional[Dict[str, Union[List[str], Dict[str, List[str]]]]]:
+    """Print a formatted version of the pipe analysis produced by analyze_pipes.
+
+    analysis (Dict[str, Union[List[str], Dict[str, List[str]]]]): The analysis.
+    keys (List[str]): The meta keys to show in the table.
+    """
     msg.divider("Pipeline Overview")
-    header = ("#", "Component", "Requires", "Assigns", "Retokenizes")
-    msg.table(overview, header=header, divider=True, multiline=True)
-    n_problems = sum(len(p) for p in problems.values())
-    if any(p for p in problems.values()):
+    header = ["#", "Component", *[key.capitalize() for key in keys]]
+    summary = analysis["summary"].items()
+    body = [[i, n, *[v for v in m.values()]] for i, (n, m) in enumerate(summary)]
+    msg.table(body, header=header, divider=True, multiline=True)
+    n_problems = sum(len(p) for p in analysis["problems"].values())
+    if any(p for p in analysis["problems"].values()):
         msg.divider(f"Problems ({n_problems})")
-        for name, problem in problems.items():
+        for name, problem in analysis["problems"].items():
             if problem:
                 msg.warn(f"'{name}' requirements not met: {', '.join(problem)}")
     else:
         msg.good("No problems found.")
-    if no_print:
-        return {"overview": overview, "problems": problems}
-
-
-def count_pipeline_interdependencies(nlp: "Language") -> List[int]:
-    """Count how many subsequent components require an annotation set by each
-    component in the pipeline.
-
-    nlp (Language): The current nlp object.
-    RETURNS (List[int]): The interdependency counts.
-    """
-    pipe_assigns = []
-    pipe_requires = []
-    for name in nlp.pipe_names:
-        meta = nlp.get_pipe_meta(name)
-        pipe_assigns.append(set(meta.assigns))
-        pipe_requires.append(set(meta.requires))
-    counts = []
-    for i, assigns in enumerate(pipe_assigns):
-        count = 0
-        for requires in pipe_requires[i + 1 :]:
-            if assigns.intersection(requires):
-                count += 1
-        counts.append(count)
-    return counts
diff --git a/spacy/pipeline/__init__.py b/spacy/pipeline/__init__.py
index f8accd14f..7f395b5f2 100644
--- a/spacy/pipeline/__init__.py
+++ b/spacy/pipeline/__init__.py
@@ -1,3 +1,4 @@
+from .attributeruler import AttributeRuler
 from .dep_parser import DependencyParser
 from .entity_linker import EntityLinker
 from .ner import EntityRecognizer
@@ -13,6 +14,7 @@ from .tok2vec import Tok2Vec
 from .functions import merge_entities, merge_noun_chunks, merge_subtokens
 
 __all__ = [
+    "AttributeRuler",
     "DependencyParser",
     "EntityLinker",
     "EntityRecognizer",
diff --git a/spacy/syntax/__init__.py b/spacy/pipeline/_parser_internals/__init__.py
similarity index 100%
rename from spacy/syntax/__init__.py
rename to spacy/pipeline/_parser_internals/__init__.py
diff --git a/spacy/syntax/_state.pxd b/spacy/pipeline/_parser_internals/_state.pxd
similarity index 98%
rename from spacy/syntax/_state.pxd
rename to spacy/pipeline/_parser_internals/_state.pxd
index fef4f0c92..0d0dd8c05 100644
--- a/spacy/syntax/_state.pxd
+++ b/spacy/pipeline/_parser_internals/_state.pxd
@@ -1,15 +1,14 @@
-from libc.string cimport memcpy, memset, memmove
-from libc.stdlib cimport malloc, calloc, free
+from libc.string cimport memcpy, memset
+from libc.stdlib cimport calloc, free
 from libc.stdint cimport uint32_t, uint64_t
 from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
 from murmurhash.mrmr cimport hash64
 
-from ..vocab cimport EMPTY_LEXEME
-from ..structs cimport TokenC, SpanC
-from ..lexeme cimport Lexeme
-from ..symbols cimport punct
-from ..attrs cimport IS_SPACE
-from ..typedefs cimport attr_t
+from ...vocab cimport EMPTY_LEXEME
+from ...structs cimport TokenC, SpanC
+from ...lexeme cimport Lexeme
+from ...attrs cimport IS_SPACE
+from ...typedefs cimport attr_t
 
 
 cdef inline bint is_space_token(const TokenC* token) nogil:
diff --git a/spacy/syntax/_state.pyx b/spacy/pipeline/_parser_internals/_state.pyx
similarity index 100%
rename from spacy/syntax/_state.pyx
rename to spacy/pipeline/_parser_internals/_state.pyx
diff --git a/spacy/syntax/arc_eager.pxd b/spacy/pipeline/_parser_internals/arc_eager.pxd
similarity index 65%
rename from spacy/syntax/arc_eager.pxd
rename to spacy/pipeline/_parser_internals/arc_eager.pxd
index a59be716a..e05a34f56 100644
--- a/spacy/syntax/arc_eager.pxd
+++ b/spacy/pipeline/_parser_internals/arc_eager.pxd
@@ -1,8 +1,6 @@
-from cymem.cymem cimport Pool
-
 from .stateclass cimport StateClass
-from ..typedefs cimport weight_t, attr_t
-from .transition_system cimport TransitionSystem, Transition
+from ...typedefs cimport weight_t, attr_t
+from .transition_system cimport Transition, TransitionSystem
 
 
 cdef class ArcEager(TransitionSystem):
diff --git a/spacy/syntax/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx
similarity index 98%
rename from spacy/syntax/arc_eager.pyx
rename to spacy/pipeline/_parser_internals/arc_eager.pyx
index 6e63859f0..7db8aae0f 100644
--- a/spacy/syntax/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@@ -1,24 +1,17 @@
 # cython: profile=True, cdivision=True, infer_types=True
-from cpython.ref cimport Py_INCREF
 from cymem.cymem cimport Pool, Address
 from libc.stdint cimport int32_t
 
 from collections import defaultdict, Counter
-import json
 
-from ..typedefs cimport hash_t, attr_t
-from ..strings cimport hash_string
-from ..structs cimport TokenC
-from ..tokens.doc cimport Doc, set_children_from_heads
+from ...typedefs cimport hash_t, attr_t
+from ...strings cimport hash_string
+from ...structs cimport TokenC
+from ...tokens.doc cimport Doc, set_children_from_heads
+from ...gold.example cimport Example
+from ...errors import Errors
 from .stateclass cimport StateClass
 from ._state cimport StateC
-from .transition_system cimport move_cost_func_t, label_cost_func_t
-from ..gold.example cimport Example
-
-from ..errors import Errors
-from .nonproj import is_nonproj_tree
-from . import nonproj
-
 
 # Calculate cost as gold/not gold. We don't use scalar value anyway.
 cdef int BINARY_COSTS = 1
diff --git a/spacy/syntax/ner.pxd b/spacy/pipeline/_parser_internals/ner.pxd
similarity index 58%
rename from spacy/syntax/ner.pxd
rename to spacy/pipeline/_parser_internals/ner.pxd
index 989593a92..2264a1518 100644
--- a/spacy/syntax/ner.pxd
+++ b/spacy/pipeline/_parser_internals/ner.pxd
@@ -1,6 +1,4 @@
 from .transition_system cimport TransitionSystem
-from .transition_system cimport Transition
-from ..typedefs cimport attr_t
 
 
 cdef class BiluoPushDown(TransitionSystem):
diff --git a/spacy/syntax/ner.pyx b/spacy/pipeline/_parser_internals/ner.pyx
similarity index 98%
rename from spacy/syntax/ner.pyx
rename to spacy/pipeline/_parser_internals/ner.pyx
index c4125bbdf..2570ccdee 100644
--- a/spacy/syntax/ner.pyx
+++ b/spacy/pipeline/_parser_internals/ner.pyx
@@ -2,17 +2,14 @@ from collections import Counter
 from libc.stdint cimport int32_t
 from cymem.cymem cimport Pool
 
-from ..typedefs cimport weight_t
+from ...typedefs cimport weight_t, attr_t
+from ...lexeme cimport Lexeme
+from ...attrs cimport IS_SPACE
+from ...gold.example cimport Example
+from ...errors import Errors
 from .stateclass cimport StateClass
 from ._state cimport StateC
-from .transition_system cimport Transition
-from .transition_system cimport do_func_t
-from ..lexeme cimport Lexeme
-from ..attrs cimport IS_SPACE
-from ..gold.iob_utils import biluo_tags_from_offsets
-from ..gold.example cimport Example
-
-from ..errors import Errors
+from .transition_system cimport Transition, do_func_t
 
 
 cdef enum:
diff --git a/spacy/syntax/nonproj.pxd b/spacy/pipeline/_parser_internals/nonproj.pxd
similarity index 100%
rename from spacy/syntax/nonproj.pxd
rename to spacy/pipeline/_parser_internals/nonproj.pxd
diff --git a/spacy/syntax/nonproj.pyx b/spacy/pipeline/_parser_internals/nonproj.pyx
similarity index 98%
rename from spacy/syntax/nonproj.pyx
rename to spacy/pipeline/_parser_internals/nonproj.pyx
index 5ccb11f37..8f5fdaa71 100644
--- a/spacy/syntax/nonproj.pyx
+++ b/spacy/pipeline/_parser_internals/nonproj.pyx
@@ -5,9 +5,9 @@ scheme.
 """
 from copy import copy
 
-from ..tokens.doc cimport Doc, set_children_from_heads
+from ...tokens.doc cimport Doc, set_children_from_heads
 
-from ..errors import Errors
+from ...errors import Errors
 
 
 DELIMITER = '||'
diff --git a/spacy/syntax/stateclass.pxd b/spacy/pipeline/_parser_internals/stateclass.pxd
similarity index 95%
rename from spacy/syntax/stateclass.pxd
rename to spacy/pipeline/_parser_internals/stateclass.pxd
index 567982a3f..1d9f05538 100644
--- a/spacy/syntax/stateclass.pxd
+++ b/spacy/pipeline/_parser_internals/stateclass.pxd
@@ -1,12 +1,8 @@
-from libc.string cimport memcpy, memset
-
 from cymem.cymem cimport Pool
-cimport cython
 
-from ..structs cimport TokenC, SpanC
-from ..typedefs cimport attr_t
+from ...structs cimport TokenC, SpanC
+from ...typedefs cimport attr_t
 
-from ..vocab cimport EMPTY_LEXEME
 from ._state cimport StateC
 
 
diff --git a/spacy/syntax/stateclass.pyx b/spacy/pipeline/_parser_internals/stateclass.pyx
similarity index 97%
rename from spacy/syntax/stateclass.pyx
rename to spacy/pipeline/_parser_internals/stateclass.pyx
index e472e9861..880cf6cc5 100644
--- a/spacy/syntax/stateclass.pyx
+++ b/spacy/pipeline/_parser_internals/stateclass.pyx
@@ -1,7 +1,7 @@
 # cython: infer_types=True
 import numpy
 
-from ..tokens.doc cimport Doc
+from ...tokens.doc cimport Doc
 
 
 cdef class StateClass:
diff --git a/spacy/syntax/transition_system.pxd b/spacy/pipeline/_parser_internals/transition_system.pxd
similarity index 91%
rename from spacy/syntax/transition_system.pxd
rename to spacy/pipeline/_parser_internals/transition_system.pxd
index 836c08168..ba4c33814 100644
--- a/spacy/syntax/transition_system.pxd
+++ b/spacy/pipeline/_parser_internals/transition_system.pxd
@@ -1,11 +1,11 @@
 from cymem.cymem cimport Pool
 
-from ..typedefs cimport attr_t, weight_t
-from ..structs cimport TokenC
-from ..strings cimport StringStore
+from ...typedefs cimport attr_t, weight_t
+from ...structs cimport TokenC
+from ...strings cimport StringStore
+from ...gold.example cimport Example
 from .stateclass cimport StateClass
 from ._state cimport StateC
-from ..gold.example cimport Example
 
 
 cdef struct Transition:
diff --git a/spacy/syntax/transition_system.pyx b/spacy/pipeline/_parser_internals/transition_system.pyx
similarity index 97%
rename from spacy/syntax/transition_system.pyx
rename to spacy/pipeline/_parser_internals/transition_system.pyx
index 17166dcf5..7694e7f34 100644
--- a/spacy/syntax/transition_system.pyx
+++ b/spacy/pipeline/_parser_internals/transition_system.pyx
@@ -1,19 +1,17 @@
 # cython: infer_types=True
 from __future__ import print_function
-from cpython.ref cimport Py_INCREF
 from cymem.cymem cimport Pool
 
 from collections import Counter
 import srsly
 
-from ..typedefs cimport weight_t
-from ..tokens.doc cimport Doc
-from ..structs cimport TokenC
+from ...typedefs cimport weight_t, attr_t
+from ...tokens.doc cimport Doc
+from ...structs cimport TokenC
 from .stateclass cimport StateClass
-from ..typedefs cimport attr_t
 
-from ..errors import Errors
-from .. import util
+from ...errors import Errors
+from ... import util
 
 
 cdef weight_t MIN_SCORE = -90000
diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py
new file mode 100644
index 000000000..1f1e63959
--- /dev/null
+++ b/spacy/pipeline/attributeruler.py
@@ -0,0 +1,266 @@
+import srsly
+from typing import List, Dict, Union, Iterable, Any, Optional
+from pathlib import Path
+
+from .pipe import Pipe
+from ..errors import Errors
+from ..language import Language
+from ..matcher import Matcher
+from ..symbols import IDS
+from ..tokens import Doc, Span
+from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
+from ..vocab import Vocab
+from .. import util
+
+
+MatcherPatternType = List[Dict[Union[int, str], Any]]
+AttributeRulerPatternType = Dict[str, Union[MatcherPatternType, Dict, int]]
+
+
+@Language.factory("attribute_ruler")
+def make_attribute_ruler(
+    nlp: Language,
+    name: str,
+    pattern_dicts: Optional[Iterable[AttributeRulerPatternType]] = None,
+):
+    return AttributeRuler(nlp.vocab, name, pattern_dicts=pattern_dicts)
+
+
+class AttributeRuler(Pipe):
+    """Set token-level attributes for tokens matched by Matcher patterns.
+    Additionally supports importing patterns from tag maps and morph rules.
+
+    DOCS: https://spacy.io/api/attributeruler
+    """
+
+    def __init__(
+        self,
+        vocab: Vocab,
+        name: str = "attribute_ruler",
+        *,
+        pattern_dicts: Optional[Iterable[AttributeRulerPatternType]] = None,
+    ) -> None:
+        """Initialize the AttributeRuler.
+
+        vocab (Vocab): The vocab.
+        name (str): The pipe name. Defaults to "attribute_ruler".
+        pattern_dicts (Iterable[Dict]): A list of pattern dicts with the keys as
+        the arguments to AttributeRuler.add (`patterns`/`attrs`/`index`) to add
+        as patterns.
+
+        RETURNS (AttributeRuler): The AttributeRuler component.
+
+        DOCS: https://spacy.io/api/attributeruler#init
+        """
+        self.name = name
+        self.vocab = vocab
+        self.matcher = Matcher(self.vocab)
+        self.attrs = []
+        self._attrs_unnormed = []  # store for reference
+        self.indices = []
+
+        if pattern_dicts:
+            self.add_patterns(pattern_dicts)
+
+    def __call__(self, doc: Doc) -> Doc:
+        """Apply the attributeruler to a Doc and set all attribute exceptions.
+
+        doc (Doc): The document to process.
+        RETURNS (Doc): The processed Doc.
+
+        DOCS: https://spacy.io/api/attributeruler#call
+        """
+        matches = self.matcher(doc)
+
+        for match_id, start, end in matches:
+            span = Span(doc, start, end, label=match_id)
+            attrs = self.attrs[span.label]
+            index = self.indices[span.label]
+            try:
+                token = span[index]
+            except IndexError:
+                raise ValueError(
+                    Errors.E1001.format(
+                        patterns=self.matcher.get(span.label),
+                        span=[t.text for t in span],
+                        index=index,
+                    )
+                )
+            set_token_attrs(token, attrs)
+        return doc
+
+    def load_from_tag_map(
+        self, tag_map: Dict[str, Dict[Union[int, str], Union[int, str]]]
+    ) -> None:
+        for tag, attrs in tag_map.items():
+            pattern = [{"TAG": tag}]
+            attrs, morph_attrs = _split_morph_attrs(attrs)
+            morph = self.vocab.morphology.add(morph_attrs)
+            attrs["MORPH"] = self.vocab.strings[morph]
+            self.add([pattern], attrs)
+
+    def load_from_morph_rules(
+        self, morph_rules: Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]
+    ) -> None:
+        for tag in morph_rules:
+            for word in morph_rules[tag]:
+                pattern = [{"ORTH": word, "TAG": tag}]
+                attrs = morph_rules[tag][word]
+                attrs, morph_attrs = _split_morph_attrs(attrs)
+                morph = self.vocab.morphology.add(morph_attrs)
+                attrs["MORPH"] = self.vocab.strings[morph]
+                self.add([pattern], attrs)
+
+    def add(
+        self, patterns: Iterable[MatcherPatternType], attrs: Dict, index: int = 0
+    ) -> None:
+        """Add Matcher patterns for tokens that should be modified with the
+        provided attributes. The token at the specified index within the
+        matched span will be assigned the attributes.
+
+        patterns (Iterable[List[Dict]]): A list of Matcher patterns.
+        attrs (Dict): The attributes to assign to the target token in the
+            matched span.
+        index (int): The index of the token in the matched span to modify. May
+            be negative to index from the end of the span. Defaults to 0.
+
+        DOCS: https://spacy.io/api/attributeruler#add
+        """
+        self.matcher.add(len(self.attrs), patterns)
+        self._attrs_unnormed.append(attrs)
+        attrs = normalize_token_attrs(self.vocab, attrs)
+        self.attrs.append(attrs)
+        self.indices.append(index)
+
+    def add_patterns(self, pattern_dicts: Iterable[AttributeRulerPatternType]) -> None:
+        for p in pattern_dicts:
+            self.add(**p)
+
+    @property
+    def patterns(self) -> List[AttributeRulerPatternType]:
+        all_patterns = []
+        for i in range(len(self.attrs)):
+            p = {}
+            p["patterns"] = self.matcher.get(i)[1]
+            p["attrs"] = self._attrs_unnormed[i]
+            p["index"] = self.indices[i]
+            all_patterns.append(p)
+        return all_patterns
+
+    def to_bytes(self, exclude: Iterable[str] = tuple()) -> bytes:
+        """Serialize the attributeruler to a bytestring.
+
+        exclude (Iterable[str]): String names of serialization fields to exclude.
+        RETURNS (bytes): The serialized object.
+
+        DOCS: https://spacy.io/api/attributeruler#to_bytes
+        """
+        serialize = {}
+        serialize["vocab"] = self.vocab.to_bytes
+        patterns = {k: self.matcher.get(k)[1] for k in range(len(self.attrs))}
+        serialize["patterns"] = lambda: srsly.msgpack_dumps(patterns)
+        serialize["attrs"] = lambda: srsly.msgpack_dumps(self.attrs)
+        serialize["indices"] = lambda: srsly.msgpack_dumps(self.indices)
+        return util.to_bytes(serialize, exclude)
+
+    def from_bytes(self, bytes_data: bytes, exclude: Iterable[str] = tuple()):
+        """Load the attributeruler from a bytestring.
+
+        bytes_data (bytes): The data to load.
+        exclude (Iterable[str]): String names of serialization fields to exclude.
+        returns (AttributeRuler): The loaded object.
+
+        DOCS: https://spacy.io/api/attributeruler#from_bytes
+        """
+        data = {"patterns": b""}
+
+        def load_patterns(b):
+            data["patterns"] = srsly.msgpack_loads(b)
+
+        def load_attrs(b):
+            self.attrs = srsly.msgpack_loads(b)
+
+        def load_indices(b):
+            self.indices = srsly.msgpack_loads(b)
+
+        deserialize = {
+            "vocab": lambda b: self.vocab.from_bytes(b),
+            "patterns": load_patterns,
+            "attrs": load_attrs,
+            "indices": load_indices,
+        }
+        util.from_bytes(bytes_data, deserialize, exclude)
+
+        if data["patterns"]:
+            for key, pattern in data["patterns"].items():
+                self.matcher.add(key, pattern)
+            assert len(self.attrs) == len(data["patterns"])
+            assert len(self.indices) == len(data["patterns"])
+
+        return self
+
+    def to_disk(self, path: Union[Path, str], exclude: Iterable[str] = tuple()) -> None:
+        """Serialize the attributeruler to disk.
+
+        path (Union[Path, str]): A path to a directory.
+        exclude (Iterable[str]): String names of serialization fields to exclude.
+        DOCS: https://spacy.io/api/attributeruler#to_disk
+        """
+        patterns = {k: self.matcher.get(k)[1] for k in range(len(self.attrs))}
+        serialize = {
+            "vocab": lambda p: self.vocab.to_disk(p),
+            "patterns": lambda p: srsly.write_msgpack(p, patterns),
+            "attrs": lambda p: srsly.write_msgpack(p, self.attrs),
+            "indices": lambda p: srsly.write_msgpack(p, self.indices),
+        }
+        util.to_disk(path, serialize, exclude)
+
+    def from_disk(
+        self, path: Union[Path, str], exclude: Iterable[str] = tuple()
+    ) -> None:
+        """Load the attributeruler from disk.
+
+        path (Union[Path, str]): A path to a directory.
+        exclude (Iterable[str]): String names of serialization fields to exclude.
+        DOCS: https://spacy.io/api/attributeruler#from_disk
+        """
+        data = {"patterns": b""}
+
+        def load_patterns(p):
+            data["patterns"] = srsly.read_msgpack(p)
+
+        def load_attrs(p):
+            self.attrs = srsly.read_msgpack(p)
+
+        def load_indices(p):
+            self.indices = srsly.read_msgpack(p)
+
+        deserialize = {
+            "vocab": lambda p: self.vocab.from_disk(p),
+            "patterns": load_patterns,
+            "attrs": load_attrs,
+            "indices": load_indices,
+        }
+        util.from_disk(path, deserialize, exclude)
+
+        if data["patterns"]:
+            for key, pattern in data["patterns"].items():
+                self.matcher.add(key, pattern)
+            assert len(self.attrs) == len(data["patterns"])
+            assert len(self.indices) == len(data["patterns"])
+
+        return self
+
+
+def _split_morph_attrs(attrs):
+    """Split entries from a tag map or morph rules dict into to two dicts, one
+    with the token-level features (POS, LEMMA) and one with the remaining
+    features, which are presumed to be individual MORPH features."""
+    other_attrs = {}
+    morph_attrs = {}
+    for k, v in attrs.items():
+        if k in "_" or k in IDS.keys() or k in IDS.values():
+            other_attrs[k] = v
+        else:
+            morph_attrs[k] = v
+    return other_attrs, morph_attrs
diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx
index a952385b4..a022d04d6 100644
--- a/spacy/pipeline/dep_parser.pyx
+++ b/spacy/pipeline/dep_parser.pyx
@@ -1,13 +1,13 @@
 # cython: infer_types=True, profile=True, binding=True
 from typing import Optional, Iterable
-from thinc.api import CosineDistance, to_categorical, get_array_module, Model, Config
+from thinc.api import Model, Config
 
-from ..syntax.nn_parser cimport Parser
-from ..syntax.arc_eager cimport ArcEager
+from .transition_parser cimport Parser
+from ._parser_internals.arc_eager cimport ArcEager
 
 from .functions import merge_subtokens
 from ..language import Language
-from ..syntax import nonproj
+from ._parser_internals import nonproj
 from ..scorer import Scorer
 
 
@@ -34,7 +34,7 @@ DEFAULT_PARSER_MODEL = Config().from_str(default_model_config)["model"]
 
 @Language.factory(
     "parser",
-    assigns=["token.dep", "token.is_sent_start", "doc.sents"],
+    assigns=["token.dep", "token.head", "token.is_sent_start", "doc.sents"],
     default_config={
         "moves": None,
         "update_with_oracle_cut_size": 100,
@@ -120,7 +120,8 @@ cdef class DependencyParser(Parser):
             return dep
         results = {}
         results.update(Scorer.score_spans(examples, "sents", **kwargs))
-        results.update(Scorer.score_deps(examples, "dep", getter=dep_getter,
-            ignore_labels=("p", "punct"), **kwargs))
+        kwargs.setdefault("getter", dep_getter)
+        kwargs.setdefault("ignore_label", ("p", "punct"))
+        results.update(Scorer.score_deps(examples, "dep", **kwargs))
         del results["sents_per_type"]
         return results
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index cc4e7b159..923d925dc 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -33,24 +33,31 @@ dropout = null
 """
 DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
 
+default_kb_config = """
+[kb]
+@assets = "spacy.EmptyKB.v1"
+entity_vector_length = 64
+"""
+DEFAULT_NEL_KB = Config().from_str(default_kb_config)["kb"]
+
 
 @Language.factory(
     "entity_linker",
     requires=["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"],
     assigns=["token.ent_kb_id"],
     default_config={
-        "kb": None,  # TODO - what kind of default makes sense here?
+        "kb": DEFAULT_NEL_KB,
+        "model": DEFAULT_NEL_MODEL,
         "labels_discard": [],
         "incl_prior": True,
         "incl_context": True,
-        "model": DEFAULT_NEL_MODEL,
     },
 )
 def make_entity_linker(
     nlp: Language,
     name: str,
     model: Model,
-    kb: Optional[KnowledgeBase],
+    kb: KnowledgeBase,
     *,
     labels_discard: Iterable[str],
     incl_prior: bool,
@@ -92,10 +99,10 @@ class EntityLinker(Pipe):
         model (thinc.api.Model): The Thinc Model powering the pipeline component.
         name (str): The component instance name, used to add entries to the
             losses during training.
-        kb (KnowledgeBase): TODO:
-        labels_discard (Iterable[str]): TODO:
-        incl_prior (bool): TODO:
-        incl_context (bool): TODO:
+        kb (KnowledgeBase): The KnowledgeBase holding all entities and their aliases.
+        labels_discard (Iterable[str]): NER labels that will automatically get a "NIL" prediction.
+        incl_prior (bool): Whether or not to include prior probabilities from the KB in the model.
+        incl_context (bool): Whether or not to include the local context in the model.
 
         DOCS: https://spacy.io/api/entitylinker#init
         """
@@ -108,14 +115,12 @@ class EntityLinker(Pipe):
             "incl_prior": incl_prior,
             "incl_context": incl_context,
         }
-        self.kb = kb
-        if self.kb is None:
-            # create an empty KB that should be filled by calling from_disk
-            self.kb = KnowledgeBase(vocab=vocab)
-        else:
-            del cfg["kb"]  # we don't want to duplicate its serialization
-        if not isinstance(self.kb, KnowledgeBase):
+        if not isinstance(kb, KnowledgeBase):
             raise ValueError(Errors.E990.format(type=type(self.kb)))
+        kb.initialize(vocab)
+        self.kb = kb
+        if "kb" in cfg:
+            del cfg["kb"]  # we don't want to duplicate its serialization
         self.cfg = dict(cfg)
         self.distance = CosineDistance(normalize=False)
         # how many neightbour sentences to take into account
@@ -222,9 +227,9 @@ class EntityLinker(Pipe):
         set_dropout_rate(self.model, drop)
         if not sentence_docs:
             warnings.warn(Warnings.W093.format(name="Entity Linker"))
-            return 0.0
+            return losses
         sentence_encodings, bp_context = self.model.begin_update(sentence_docs)
-        loss, d_scores = self.get_similarity_loss(
+        loss, d_scores = self.get_loss(
             sentence_encodings=sentence_encodings, examples=examples
         )
         bp_context(d_scores)
@@ -235,7 +240,7 @@ class EntityLinker(Pipe):
             self.set_annotations(docs, predictions)
         return losses
 
-    def get_similarity_loss(self, examples: Iterable[Example], sentence_encodings):
+    def get_loss(self, examples: Iterable[Example], sentence_encodings):
         entity_encodings = []
         for eg in examples:
             kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
@@ -247,7 +252,7 @@ class EntityLinker(Pipe):
         entity_encodings = self.model.ops.asarray(entity_encodings, dtype="float32")
         if sentence_encodings.shape != entity_encodings.shape:
             err = Errors.E147.format(
-                method="get_similarity_loss", msg="gold entities do not match up"
+                method="get_loss", msg="gold entities do not match up"
             )
             raise RuntimeError(err)
         gradients = self.distance.get_grad(sentence_encodings, entity_encodings)
@@ -337,13 +342,13 @@ class EntityLinker(Pipe):
                                     final_kb_ids.append(candidates[0].entity_)
                                 else:
                                     random.shuffle(candidates)
-                                    # this will set all prior probabilities to 0 if they should be excluded from the model
+                                    # set all prior probabilities to 0 if incl_prior=False
                                     prior_probs = xp.asarray(
                                         [c.prior_prob for c in candidates]
                                     )
                                     if not self.cfg.get("incl_prior"):
                                         prior_probs = xp.asarray(
-                                            [0.0 for c in candidates]
+                                            [0.0 for _ in candidates]
                                         )
                                     scores = prior_probs
                                     # add in similarity from the context
@@ -437,9 +442,8 @@ class EntityLinker(Pipe):
                 raise ValueError(Errors.E149)
 
         def load_kb(p):
-            self.kb = KnowledgeBase(
-                vocab=self.vocab, entity_vector_length=self.cfg["entity_width"]
-            )
+            self.kb = KnowledgeBase(entity_vector_length=self.cfg["entity_width"])
+            self.kb.initialize(self.vocab)
             self.kb.load_bulk(p)
 
         deserialize = {}
diff --git a/spacy/pipeline/multitask.pyx b/spacy/pipeline/multitask.pyx
index 97826aaa6..d85030adb 100644
--- a/spacy/pipeline/multitask.pyx
+++ b/spacy/pipeline/multitask.pyx
@@ -1,7 +1,7 @@
 # cython: infer_types=True, profile=True, binding=True
 from typing import Optional
 import numpy
-from thinc.api import CosineDistance, to_categorical, to_categorical, Model, Config
+from thinc.api import CosineDistance, to_categorical, Model, Config
 from thinc.api import set_dropout_rate
 
 from ..tokens.doc cimport Doc
@@ -9,7 +9,7 @@ from ..tokens.doc cimport Doc
 from .pipe import Pipe
 from .tagger import Tagger
 from ..language import Language
-from ..syntax import nonproj
+from ._parser_internals import nonproj
 from ..attrs import POS, ID
 from ..errors import Errors
 
@@ -219,3 +219,6 @@ class ClozeMultitask(Pipe):
 
         if losses is not None:
             losses[self.name] += loss
+
+    def add_label(self, label):
+        raise NotImplementedError
diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx
index 7ee4448fb..7f4fb8363 100644
--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@@ -1,9 +1,9 @@
 # cython: infer_types=True, profile=True, binding=True
 from typing import Optional, Iterable
-from thinc.api import CosineDistance, to_categorical, get_array_module, Model, Config
+from thinc.api import Model, Config
 
-from ..syntax.nn_parser cimport Parser
-from ..syntax.ner cimport BiluoPushDown
+from .transition_parser cimport Parser
+from ._parser_internals.ner cimport BiluoPushDown
 
 from ..language import Language
 from ..scorer import Scorer
diff --git a/spacy/gold.pyx b/spacy/pipeline/nn_parser.pyx
similarity index 100%
rename from spacy/gold.pyx
rename to spacy/pipeline/nn_parser.pyx
diff --git a/spacy/pipeline/pipe.pxd b/spacy/pipeline/pipe.pxd
new file mode 100644
index 000000000..bb97f79d0
--- /dev/null
+++ b/spacy/pipeline/pipe.pxd
@@ -0,0 +1,2 @@
+cdef class Pipe:
+    cdef public str name
diff --git a/spacy/pipeline/pipe.pyx b/spacy/pipeline/pipe.pyx
index 196cdebdc..1a94905a2 100644
--- a/spacy/pipeline/pipe.pyx
+++ b/spacy/pipeline/pipe.pyx
@@ -8,7 +8,7 @@ from ..errors import Errors
 from .. import util
 
 
-class Pipe:
+cdef class Pipe:
     """This class is a base class and not instantiated directly. Trainable
     pipeline components like the EntityRecognizer or TextCategorizer inherit
     from it and it defines the interface that components should follow to
@@ -17,8 +17,6 @@ class Pipe:
     DOCS: https://spacy.io/api/pipe
     """
 
-    name = None
-
     def __init__(self, vocab, model, name, **cfg):
         """Initialize a pipeline component.
 
diff --git a/spacy/pipeline/sentencizer.pyx b/spacy/pipeline/sentencizer.pyx
index 31208ea2c..be4351212 100644
--- a/spacy/pipeline/sentencizer.pyx
+++ b/spacy/pipeline/sentencizer.pyx
@@ -203,3 +203,9 @@ class Sentencizer(Pipe):
         cfg = srsly.read_json(path)
         self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars))
         return self
+
+    def get_loss(self, examples, scores):
+        raise NotImplementedError
+
+    def add_label(self, label):
+        raise NotImplementedError
diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx
index c6eb43661..620a8557e 100644
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@@ -108,8 +108,8 @@ class SentenceRecognizer(Tagger):
         truths = []
         for eg in examples:
             eg_truth = []
-            for x in eg.get_aligned("sent_start"):
-                if x == None:
+            for x in eg.get_aligned("SENT_START"):
+                if x is None:
                     eg_truth.append(None)
                 elif x == 1:
                     eg_truth.append(labels[1])
diff --git a/spacy/pipeline/simple_ner.py b/spacy/pipeline/simple_ner.py
index 9b9872b77..43a3283ca 100644
--- a/spacy/pipeline/simple_ner.py
+++ b/spacy/pipeline/simple_ner.py
@@ -4,12 +4,12 @@ from thinc.api import SequenceCategoricalCrossentropy, set_dropout_rate, Model
 from thinc.api import Optimizer, Config
 from thinc.util import to_numpy
 
+from ..errors import Errors
 from ..gold import Example, spans_from_biluo_tags, iob_to_biluo, biluo_to_iob
 from ..tokens import Doc
 from ..language import Language
 from ..vocab import Vocab
 from ..scorer import Scorer
-from .. import util
 from .pipe import Pipe
 
 
@@ -37,7 +37,6 @@ DEFAULT_SIMPLE_NER_MODEL = Config().from_str(default_model_config)["model"]
     default_config={"labels": [], "model": DEFAULT_SIMPLE_NER_MODEL},
     scores=["ents_p", "ents_r", "ents_f", "ents_per_type"],
     default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0},
-
 )
 def make_simple_ner(
     nlp: Language, name: str, model: Model, labels: Iterable[str]
@@ -60,7 +59,9 @@ class SimpleNER(Pipe):
         self.vocab = vocab
         self.model = model
         self.name = name
-        self.labels = labels
+        self.cfg = {"labels": []}
+        for label in labels:
+            self.add_label(label)
         self.loss_func = SequenceCategoricalCrossentropy(
             names=self.get_tag_names(), normalize=True, missing_value=None
         )
@@ -70,9 +71,20 @@ class SimpleNER(Pipe):
     def is_biluo(self) -> bool:
         return self.model.name.startswith("biluo")
 
+    @property
+    def labels(self) -> Tuple[str]:
+        return tuple(self.cfg["labels"])
+
     def add_label(self, label: str) -> None:
+        """Add a new label to the pipe.
+        label (str): The label to add.
+        DOCS: https://spacy.io/api/simplener#add_label
+        """
+        if not isinstance(label, str):
+            raise ValueError(Errors.E187)
         if label not in self.labels:
-            self.labels.append(label)
+            self.cfg["labels"].append(label)
+            self.vocab.strings.add(label)
 
     def get_tag_names(self) -> List[str]:
         if self.is_biluo:
@@ -131,11 +143,9 @@ class SimpleNER(Pipe):
         return losses
 
     def get_loss(self, examples: List[Example], scores) -> Tuple[List[Floats2d], float]:
-        loss = 0
-        d_scores = []
         truths = []
         for eg in examples:
-            tags = eg.get_aligned("TAG", as_string=True)
+            tags = eg.get_aligned_ner()
             gold_tags = [(tag if tag != "-" else None) for tag in tags]
             if not self.is_biluo:
                 gold_tags = biluo_to_iob(gold_tags)
@@ -159,7 +169,6 @@ class SimpleNER(Pipe):
         if not hasattr(get_examples, "__call__"):
             gold_tuples = get_examples
             get_examples = lambda: gold_tuples
-        labels = _get_labels(get_examples())
         for label in _get_labels(get_examples()):
             self.add_label(label)
         labels = self.labels
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index f2e06efed..43f5b02cb 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -259,7 +259,7 @@ class Tagger(Pipe):
         DOCS: https://spacy.io/api/tagger#get_loss
         """
         loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
-        truths = [eg.get_aligned("tag", as_string=True) for eg in examples]
+        truths = [eg.get_aligned("TAG", as_string=True) for eg in examples]
         d_scores, loss = loss_func(scores, truths)
         if self.model.ops.xp.isnan(loss):
             raise ValueError("nan value when computing loss")
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 2c399defc..bc16e790f 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -238,8 +238,11 @@ class TextCategorizer(Pipe):
 
         DOCS: https://spacy.io/api/textcategorizer#rehearse
         """
+
+        if losses is not None:
+            losses.setdefault(self.name, 0.0)
         if self._rehearsal_model is None:
-            return
+            return losses
         try:
             docs = [eg.predicted for eg in examples]
         except AttributeError:
@@ -250,7 +253,7 @@ class TextCategorizer(Pipe):
             raise TypeError(err)
         if not any(len(doc) for doc in docs):
             # Handle cases where there are no tokens in any docs.
-            return
+            return losses
         set_dropout_rate(self.model, drop)
         scores, bp_scores = self.model.begin_update(docs)
         target = self._rehearsal_model(examples)
@@ -259,7 +262,6 @@ class TextCategorizer(Pipe):
         if sgd is not None:
             self.model.finish_update(sgd)
         if losses is not None:
-            losses.setdefault(self.name, 0.0)
             losses[self.name] += (gradient ** 2).sum()
         return losses
 
@@ -353,7 +355,7 @@ class TextCategorizer(Pipe):
             for cat in y.cats:
                 self.add_label(cat)
         self.require_labels()
-        docs = [Doc(Vocab(), words=["hello"])]
+        docs = [Doc(self.vocab, words=["hello"])]
         truths, _ = self._examples_to_truth(examples)
         self.set_output(len(self.labels))
         self.model.initialize(X=docs, Y=truths)
diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py
index b147cf177..31643a7d3 100644
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@@ -199,6 +199,9 @@ class Tok2Vec(Pipe):
         docs = [Doc(self.vocab, words=["hello"])]
         self.model.initialize(X=docs)
 
+    def add_label(self, label):
+        raise NotImplementedError
+
 
 class Tok2VecListener(Model):
     """A layer that gets fed its answers from an upstream connection,
diff --git a/spacy/syntax/nn_parser.pxd b/spacy/pipeline/transition_parser.pxd
similarity index 62%
rename from spacy/syntax/nn_parser.pxd
rename to spacy/pipeline/transition_parser.pxd
index 7840ec27a..e594a3098 100644
--- a/spacy/syntax/nn_parser.pxd
+++ b/spacy/pipeline/transition_parser.pxd
@@ -1,16 +1,15 @@
-from .stateclass cimport StateClass
-from .arc_eager cimport TransitionSystem
+from cymem.cymem cimport Pool
+
 from ..vocab cimport Vocab
-from ..tokens.doc cimport Doc
-from ..structs cimport TokenC
-from ._state cimport StateC
-from ._parser_model cimport WeightsC, ActivationsC, SizesC
+from .pipe cimport Pipe
+from ._parser_internals.transition_system cimport Transition, TransitionSystem
+from ._parser_internals._state cimport StateC
+from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC
 
 
-cdef class Parser:
+cdef class Parser(Pipe):
     cdef readonly Vocab vocab
     cdef public object model
-    cdef public str name
     cdef public object _rehearsal_model
     cdef readonly TransitionSystem moves
     cdef readonly object cfg
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/pipeline/transition_parser.pyx
similarity index 95%
rename from spacy/syntax/nn_parser.pyx
rename to spacy/pipeline/transition_parser.pyx
index a0ee13a0a..b14a55cb4 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@@ -1,42 +1,32 @@
 # cython: infer_types=True, cdivision=True, boundscheck=False
-cimport cython.parallel
+from __future__ import print_function
+from cymem.cymem cimport Pool
 cimport numpy as np
 from itertools import islice
-from cpython.ref cimport PyObject, Py_XDECREF
-from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
-from libc.math cimport exp
 from libcpp.vector cimport vector
-from libc.string cimport memset, memcpy
+from libc.string cimport memset
 from libc.stdlib cimport calloc, free
-from cymem.cymem cimport Pool
-from thinc.backends.linalg cimport Vec, VecVec
 
-from thinc.api import chain, clone, Linear, list2array, NumpyOps, CupyOps, use_ops
-from thinc.api import get_array_module, zero_init, set_dropout_rate
-from itertools import islice
 import srsly
+
+from ._parser_internals.stateclass cimport StateClass
+from ..ml.parser_model cimport alloc_activations, free_activations
+from ..ml.parser_model cimport predict_states, arg_max_if_valid
+from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
+from ..ml.parser_model cimport get_c_weights, get_c_sizes
+
+from ..tokens.doc cimport Doc
+from ..errors import Errors, Warnings
+from .. import util
+from ..util import create_default_optimizer
+
+from thinc.api import set_dropout_rate
 import numpy.random
 import numpy
 import warnings
 
-from ..tokens.doc cimport Doc
-from ..typedefs cimport weight_t, class_t, hash_t
-from ._parser_model cimport alloc_activations, free_activations
-from ._parser_model cimport predict_states, arg_max_if_valid
-from ._parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
-from ._parser_model cimport get_c_weights, get_c_sizes
-from .stateclass cimport StateClass
-from ._state cimport StateC
-from .transition_system cimport Transition
 
-from ..util import create_default_optimizer, registry
-from ..compat import copy_array
-from ..errors import Errors, Warnings
-from .. import util
-from . import nonproj
-
-
-cdef class Parser:
+cdef class Parser(Pipe):
     """
     Base class of the DependencyParser and EntityRecognizer.
     """
@@ -107,7 +97,7 @@ cdef class Parser:
 
     @property
     def tok2vec(self):
-        '''Return the embedding and convolutional layer of the model.'''
+        """Return the embedding and convolutional layer of the model."""
         return self.model.get_ref("tok2vec")
 
     @property
@@ -138,13 +128,13 @@ cdef class Parser:
         raise NotImplementedError
 
     def init_multitask_objectives(self, get_examples, pipeline, **cfg):
-        '''Setup models for secondary objectives, to benefit from multi-task
+        """Setup models for secondary objectives, to benefit from multi-task
         learning. This method is intended to be overridden by subclasses.
 
         For instance, the dependency parser can benefit from sharing
         an input representation with a label prediction model. These auxiliary
         models are discarded after training.
-        '''
+        """
         pass
 
     def use_params(self, params):
diff --git a/spacy/schemas.py b/spacy/schemas.py
index 971d283e2..d599ccbb2 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -1,4 +1,5 @@
 from typing import Dict, List, Union, Optional, Sequence, Any, Callable, Type
+from typing import Iterable, TypeVar, TYPE_CHECKING
 from enum import Enum
 from pydantic import BaseModel, Field, ValidationError, validator
 from pydantic import StrictStr, StrictInt, StrictFloat, StrictBool
@@ -8,6 +9,16 @@ from thinc.api import Optimizer
 
 from .attrs import NAMES
 
+if TYPE_CHECKING:
+    # This lets us add type hints for mypy etc. without causing circular imports
+    from .language import Language  # noqa: F401
+    from .gold import Example  # noqa: F401
+
+
+ItemT = TypeVar("ItemT")
+Batcher = Callable[[Iterable[ItemT]], Iterable[List[ItemT]]]
+Reader = Callable[["Language", str], Iterable["Example"]]
+
 
 def validate(schema: Type[BaseModel], obj: Dict[str, Any]) -> List[str]:
     """Validate data against a given pydantic schema.
@@ -181,30 +192,22 @@ class ModelMetaSchema(BaseModel):
 
 class ConfigSchemaTraining(BaseModel):
     # fmt: off
-    base_model: Optional[StrictStr] = Field(..., title="The base model to use")
     vectors: Optional[StrictStr] = Field(..., title="Path to vectors")
-    gold_preproc: StrictBool = Field(..., title="Whether to train on gold-standard sentences and tokens")
-    max_length: StrictInt = Field(..., title="Maximum length of examples (longer examples are divided into sentences if possible)")
-    limit: StrictInt = Field(..., title="Number of examples to use (0 for all)")
-    orth_variant_level: StrictFloat = Field(..., title="Orth variants for data augmentation")
+    train_corpus: Reader = Field(..., title="Reader for the training data")
+    dev_corpus: Reader = Field(..., title="Reader for the dev data")
+    batcher: Batcher = Field(..., title="Batcher for the training data")
     dropout: StrictFloat = Field(..., title="Dropout rate")
     patience: StrictInt = Field(..., title="How many steps to continue without improvement in evaluation score")
     max_epochs: StrictInt = Field(..., title="Maximum number of epochs to train for")
     max_steps: StrictInt = Field(..., title="Maximum number of update steps to train for")
     eval_frequency: StrictInt = Field(..., title="How often to evaluate during training (steps)")
-    eval_batch_size: StrictInt = Field(..., title="Evaluation batch size")
     seed: Optional[StrictInt] = Field(..., title="Random seed")
     accumulate_gradient: StrictInt = Field(..., title="Whether to divide the batch up into substeps")
-    use_pytorch_for_gpu_memory: StrictBool = Field(..., title="Allocate memory via PyTorch")
     score_weights: Dict[StrictStr, Union[StrictFloat, StrictInt]] = Field(..., title="Scores to report and their weights for selecting final model")
     init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
-    discard_oversize: StrictBool = Field(..., title="Whether to skip examples longer than batch size")
-    batch_by: StrictStr = Field(..., title="Batch examples by type")
-    raw_text: Optional[StrictStr] = Field(..., title="Raw text")
-    tag_map: Optional[StrictStr] = Field(..., title="Path to JSON-formatted tag map")
-    morph_rules: Optional[StrictStr] = Field(..., title="Path to morphology rules")
-    batch_size: Union[Sequence[int], int] = Field(..., title="The batch size or batch size schedule")
+    raw_text: Optional[StrictStr] = Field(default=None, title="Raw text")
     optimizer: Optimizer = Field(..., title="The optimizer to use")
+    frozen_components: List[str] = Field(..., title="Pipeline components that shouldn't be updated during training")
     # fmt: on
 
     class Config:
@@ -219,6 +222,9 @@ class ConfigSchemaNlp(BaseModel):
     tokenizer: Callable = Field(..., title="The tokenizer to use")
     lemmatizer: Callable = Field(..., title="The lemmatizer to use")
     load_vocab_data: StrictBool = Field(..., title="Whether to load additional vocab data from spacy-lookups-data")
+    before_creation: Optional[Callable[[Type["Language"]], Type["Language"]]] = Field(..., title="Optional callback to modify Language class before initialization")
+    after_creation: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object after creation and before the pipeline is constructed")
+    after_pipeline_creation: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object after the pipeline is constructed")
     # fmt: on
 
     class Config:
diff --git a/spacy/scorer.py b/spacy/scorer.py
index 702c74521..40a819e7c 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -1,55 +1,61 @@
+from typing import Optional, Iterable, Dict, Any, Callable, Tuple, TYPE_CHECKING
 import numpy as np
 
+from .gold import Example
+from .tokens import Token, Doc
 from .errors import Errors
 from .util import get_lang_class
 from .morphology import Morphology
 
+if TYPE_CHECKING:
+    # This lets us add type hints for mypy etc. without causing circular imports
+    from .language import Language  # noqa: F401
+
+
+DEFAULT_PIPELINE = ["senter", "tagger", "morphologizer", "parser", "ner", "textcat"]
+
 
 class PRFScore:
-    """
-    A precision / recall / F score
-    """
+    """A precision / recall / F score."""
 
-    def __init__(self):
+    def __init__(self) -> None:
         self.tp = 0
         self.fp = 0
         self.fn = 0
 
-    def score_set(self, cand, gold):
+    def score_set(self, cand: set, gold: set) -> None:
         self.tp += len(cand.intersection(gold))
         self.fp += len(cand - gold)
         self.fn += len(gold - cand)
 
     @property
-    def precision(self):
+    def precision(self) -> float:
         return self.tp / (self.tp + self.fp + 1e-100)
 
     @property
-    def recall(self):
+    def recall(self) -> float:
         return self.tp / (self.tp + self.fn + 1e-100)
 
     @property
-    def fscore(self):
+    def fscore(self) -> float:
         p = self.precision
         r = self.recall
         return 2 * ((p * r) / (p + r + 1e-100))
 
-    def to_dict(self):
+    def to_dict(self) -> Dict[str, float]:
         return {"p": self.precision, "r": self.recall, "f": self.fscore}
 
 
 class ROCAUCScore:
-    """
-    An AUC ROC score.
-    """
+    """An AUC ROC score."""
 
-    def __init__(self):
+    def __init__(self) -> None:
         self.golds = []
         self.cands = []
         self.saved_score = 0.0
         self.saved_score_at_len = 0
 
-    def score_set(self, cand, gold):
+    def score_set(self, cand, gold) -> None:
         self.cands.append(cand)
         self.golds.append(gold)
 
@@ -70,51 +76,52 @@ class ROCAUCScore:
 class Scorer:
     """Compute evaluation scores."""
 
-    def __init__(self, nlp=None, **cfg):
+    def __init__(
+        self,
+        nlp: Optional["Language"] = None,
+        default_lang: str = "xx",
+        default_pipeline=DEFAULT_PIPELINE,
+        **cfg,
+    ) -> None:
         """Initialize the Scorer.
 
         DOCS: https://spacy.io/api/scorer#init
         """
         self.nlp = nlp
         self.cfg = cfg
-
         if not nlp:
-            # create a default pipeline
-            nlp = get_lang_class("xx")()
-            nlp.add_pipe("senter")
-            nlp.add_pipe("tagger")
-            nlp.add_pipe("morphologizer")
-            nlp.add_pipe("parser")
-            nlp.add_pipe("ner")
-            nlp.add_pipe("textcat")
+            nlp = get_lang_class(default_lang)()
+            for pipe in default_pipeline:
+                nlp.add_pipe(pipe)
             self.nlp = nlp
 
-    def score(self, examples):
+    def score(self, examples: Iterable[Example]) -> Dict[str, Any]:
         """Evaluate a list of Examples.
 
         examples (Iterable[Example]): The predicted annotations + correct annotations.
         RETURNS (Dict): A dictionary of scores.
+
         DOCS: https://spacy.io/api/scorer#score
         """
         scores = {}
-
         if hasattr(self.nlp.tokenizer, "score"):
             scores.update(self.nlp.tokenizer.score(examples, **self.cfg))
         for name, component in self.nlp.pipeline:
             if hasattr(component, "score"):
                 scores.update(component.score(examples, **self.cfg))
-
         return scores
 
     @staticmethod
-    def score_tokenization(examples, **cfg):
+    def score_tokenization(examples: Iterable[Example], **cfg) -> Dict[str, float]:
         """Returns accuracy and PRF scores for tokenization.
-
         * token_acc: # correct tokens / # gold tokens
         * token_p/r/f: PRF for token character spans
 
         examples (Iterable[Example]): Examples to score
-        RETURNS (dict): A dictionary containing the scores token_acc/p/r/f.
+        RETURNS (Dict[str, float]): A dictionary containing the scores
+            token_acc/p/r/f.
+
+        DOCS: https://spacy.io/api/scorer#score_tokenization
         """
         acc_score = PRFScore()
         prf_score = PRFScore()
@@ -145,16 +152,24 @@ class Scorer:
         }
 
     @staticmethod
-    def score_token_attr(examples, attr, getter=getattr, **cfg):
+    def score_token_attr(
+        examples: Iterable[Example],
+        attr: str,
+        *,
+        getter: Callable[[Token, str], Any] = getattr,
+        **cfg,
+    ) -> Dict[str, float]:
         """Returns an accuracy score for a token-level attribute.
 
         examples (Iterable[Example]): Examples to score
         attr (str): The attribute to score.
-        getter (callable): Defaults to getattr. If provided,
+        getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
             getter(token, attr) should return the value of the attribute for an
             individual token.
-        RETURNS (dict): A dictionary containing the accuracy score under the
-            key attr_acc.
+        RETURNS (Dict[str, float]): A dictionary containing the accuracy score
+            under the key attr_acc.
+
+        DOCS: https://spacy.io/api/scorer#score_token_attr
         """
         tag_score = PRFScore()
         for example in examples:
@@ -172,17 +187,21 @@ class Scorer:
                     gold_i = align.x2y[token.i].dataXd[0, 0]
                     pred_tags.add((gold_i, getter(token, attr)))
             tag_score.score_set(pred_tags, gold_tags)
-        return {
-            attr + "_acc": tag_score.fscore,
-        }
+        return {f"{attr}_acc": tag_score.fscore}
 
     @staticmethod
-    def score_token_attr_per_feat(examples, attr, getter=getattr, **cfg):
+    def score_token_attr_per_feat(
+        examples: Iterable[Example],
+        attr: str,
+        *,
+        getter: Callable[[Token, str], Any] = getattr,
+        **cfg,
+    ):
         """Return PRF scores per feat for a token attribute in UFEATS format.
 
         examples (Iterable[Example]): Examples to score
         attr (str): The attribute to score.
-        getter (callable): Defaults to getattr. If provided,
+        getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
             getter(token, attr) should return the value of the attribute for an
             individual token.
         RETURNS (dict): A dictionary containing the per-feat PRF scores unders
@@ -223,20 +242,26 @@ class Scorer:
                 per_feat[field].score_set(
                     pred_per_feat.get(field, set()), gold_per_feat.get(field, set()),
                 )
-        return {
-            attr + "_per_feat": per_feat,
-        }
+        return {f"{attr}_per_feat": per_feat}
 
     @staticmethod
-    def score_spans(examples, attr, getter=getattr, **cfg):
+    def score_spans(
+        examples: Iterable[Example],
+        attr: str,
+        *,
+        getter: Callable[[Doc, str], Any] = getattr,
+        **cfg,
+    ) -> Dict[str, Any]:
         """Returns PRF scores for labeled spans.
 
         examples (Iterable[Example]): Examples to score
         attr (str): The attribute to score.
-        getter (callable): Defaults to getattr. If provided,
+        getter (Callable[[Doc, str], Any]): Defaults to getattr. If provided,
             getter(doc, attr) should return the spans for the individual doc.
-        RETURNS (dict): A dictionary containing the PRF scores under the
-            keys attr_p/r/f and the per-type PRF scores under attr_per_type.
+        RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under
+            the keys attr_p/r/f and the per-type PRF scores under attr_per_type.
+
+        DOCS: https://spacy.io/api/scorer#score_spans
         """
         score = PRFScore()
         score_per_type = dict()
@@ -256,14 +281,12 @@ class Scorer:
             # Find all predidate labels, for all and per type
             gold_spans = set()
             pred_spans = set()
-
             # Special case for ents:
             # If we have missing values in the gold, we can't easily tell
             # whether our NER predictions are true.
             # It seems bad but it's what we've always done.
             if attr == "ents" and not all(token.ent_iob != 0 for token in gold_doc):
                 continue
-
             for span in getter(gold_doc, attr):
                 gold_span = (span.label_, span.start, span.end - 1)
                 gold_spans.add(gold_span)
@@ -279,38 +302,39 @@ class Scorer:
             # Score for all labels
             score.score_set(pred_spans, gold_spans)
         results = {
-            attr + "_p": score.precision,
-            attr + "_r": score.recall,
-            attr + "_f": score.fscore,
-            attr + "_per_type": {k: v.to_dict() for k, v in score_per_type.items()},
+            f"{attr}_p": score.precision,
+            f"{attr}_r": score.recall,
+            f"{attr}_f": score.fscore,
+            f"{attr}_per_type": {k: v.to_dict() for k, v in score_per_type.items()},
         }
         return results
 
     @staticmethod
     def score_cats(
-        examples,
-        attr,
-        getter=getattr,
-        labels=[],
-        multi_label=True,
-        positive_label=None,
-        **cfg
-    ):
+        examples: Iterable[Example],
+        attr: str,
+        *,
+        getter: Callable[[Doc, str], Any] = getattr,
+        labels: Iterable[str] = tuple(),
+        multi_label: bool = True,
+        positive_label: Optional[str] = None,
+        **cfg,
+    ) -> Dict[str, Any]:
         """Returns PRF and ROC AUC scores for a doc-level attribute with a
         dict with scores for each label like Doc.cats. The reported overall
         score depends on the scorer settings.
 
         examples (Iterable[Example]): Examples to score
         attr (str): The attribute to score.
-        getter (callable): Defaults to getattr. If provided,
+        getter (Callable[[Doc, str], Any]): Defaults to getattr. If provided,
             getter(doc, attr) should return the values for the individual doc.
         labels (Iterable[str]): The set of possible labels. Defaults to [].
         multi_label (bool): Whether the attribute allows multiple labels.
             Defaults to True.
         positive_label (str): The positive label for a binary task with
             exclusive classes. Defaults to None.
-        RETURNS (dict): A dictionary containing the scores, with inapplicable
-                scores as None:
+        RETURNS (Dict[str, Any]): A dictionary containing the scores, with
+            inapplicable scores as None:
             for all:
                 attr_score (one of attr_f / attr_macro_f / attr_macro_auc),
                 attr_score_desc (text description of the overall score),
@@ -319,6 +343,8 @@ class Scorer:
             for binary exclusive with positive label: attr_p/r/f
             for 3+ exclusive classes, macro-averaged fscore: attr_macro_f
             for multilabel, macro-averaged AUC: attr_macro_auc
+
+        DOCS: https://spacy.io/api/scorer#score_cats
         """
         score = PRFScore()
         f_per_type = dict()
@@ -367,64 +393,67 @@ class Scorer:
                     )
                 )
         results = {
-            attr + "_score": None,
-            attr + "_score_desc": None,
-            attr + "_p": None,
-            attr + "_r": None,
-            attr + "_f": None,
-            attr + "_macro_f": None,
-            attr + "_macro_auc": None,
-            attr + "_f_per_type": {k: v.to_dict() for k, v in f_per_type.items()},
-            attr + "_auc_per_type": {k: v.score for k, v in auc_per_type.items()},
+            f"{attr}_score": None,
+            f"{attr}_score_desc": None,
+            f"{attr}_p": None,
+            f"{attr}_r": None,
+            f"{attr}_f": None,
+            f"{attr}_macro_f": None,
+            f"{attr}_macro_auc": None,
+            f"{attr}_f_per_type": {k: v.to_dict() for k, v in f_per_type.items()},
+            f"{attr}_auc_per_type": {k: v.score for k, v in auc_per_type.items()},
         }
         if len(labels) == 2 and not multi_label and positive_label:
-            results[attr + "_p"] = score.precision
-            results[attr + "_r"] = score.recall
-            results[attr + "_f"] = score.fscore
-            results[attr + "_score"] = results[attr + "_f"]
-            results[attr + "_score_desc"] = "F (" + positive_label + ")"
+            results[f"{attr}_p"] = score.precision
+            results[f"{attr}_r"] = score.recall
+            results[f"{attr}_f"] = score.fscore
+            results[f"{attr}_score"] = results[f"{attr}_f"]
+            results[f"{attr}_score_desc"] = f"F ({positive_label})"
         elif not multi_label:
-            results[attr + "_macro_f"] = sum(
+            results[f"{attr}_macro_f"] = sum(
                 [score.fscore for label, score in f_per_type.items()]
             ) / (len(f_per_type) + 1e-100)
-            results[attr + "_score"] = results[attr + "_macro_f"]
-            results[attr + "_score_desc"] = "macro F"
+            results[f"{attr}_score"] = results[f"{attr}_macro_f"]
+            results[f"{attr}_score_desc"] = "macro F"
         else:
-            results[attr + "_macro_auc"] = max(
+            results[f"{attr}_macro_auc"] = max(
                 sum([score.score for label, score in auc_per_type.items()])
                 / (len(auc_per_type) + 1e-100),
                 -1,
             )
-            results[attr + "_score"] = results[attr + "_macro_auc"]
-            results[attr + "_score_desc"] = "macro AUC"
+            results[f"{attr}_score"] = results[f"{attr}_macro_auc"]
+            results[f"{attr}_score_desc"] = "macro AUC"
         return results
 
     @staticmethod
     def score_deps(
-        examples,
-        attr,
-        getter=getattr,
-        head_attr="head",
-        head_getter=getattr,
-        ignore_labels=tuple(),
-        **cfg
-    ):
+        examples: Iterable[Example],
+        attr: str,
+        *,
+        getter: Callable[[Token, str], Any] = getattr,
+        head_attr: str = "head",
+        head_getter: Callable[[Token, str], Any] = getattr,
+        ignore_labels: Tuple[str] = tuple(),
+        **cfg,
+    ) -> Dict[str, Any]:
         """Returns the UAS, LAS, and LAS per type scores for dependency
         parses.
 
         examples (Iterable[Example]): Examples to score
         attr (str): The attribute containing the dependency label.
-        getter (callable): Defaults to getattr. If provided,
+        getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
             getter(token, attr) should return the value of the attribute for an
             individual token.
         head_attr (str): The attribute containing the head token. Defaults to
             'head'.
-        head_getter (callable): Defaults to getattr. If provided,
+        head_getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
             head_getter(token, attr) should return the value of the head for an
             individual token.
         ignore_labels (Tuple): Labels to ignore while scoring (e.g., punct).
-        RETURNS (dict): A dictionary containing the scores:
+        RETURNS (Dict[str, Any]): A dictionary containing the scores:
             attr_uas, attr_las, and attr_las_per_type.
+
+        DOCS: https://spacy.io/api/scorer#score_deps
         """
         unlabelled = PRFScore()
         labelled = PRFScore()
@@ -482,10 +511,11 @@ class Scorer:
                 set(item[:2] for item in pred_deps), set(item[:2] for item in gold_deps)
             )
         return {
-            attr + "_uas": unlabelled.fscore,
-            attr + "_las": labelled.fscore,
-            attr
-            + "_las_per_type": {k: v.to_dict() for k, v in labelled_per_dep.items()},
+            f"{attr}_uas": unlabelled.fscore,
+            f"{attr}_las": labelled.fscore,
+            f"{attr}_las_per_type": {
+                k: v.to_dict() for k, v in labelled_per_dep.items()
+            },
         }
 
 
diff --git a/spacy/syntax/__init__.pxd b/spacy/syntax/__init__.pxd
deleted file mode 100644
index e69de29bb..000000000
diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py
index 91b0ec922..79e8f31c0 100644
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@@ -282,3 +282,15 @@ def test_span_eq_hash(doc, doc_not_parsed):
     assert hash(doc[0:2]) == hash(doc[0:2])
     assert hash(doc[0:2]) != hash(doc[1:3])
     assert hash(doc[0:2]) != hash(doc_not_parsed[0:2])
+
+
+def test_span_boundaries(doc):
+    start = 1
+    end = 5
+    span = doc[start:end]
+    for i in range(start, end):
+        assert span[i - start] == doc[i]
+    with pytest.raises(IndexError):
+        span[-5]
+    with pytest.raises(IndexError):
+        span[5]
diff --git a/spacy/tests/lang/zh/test_serialize.py b/spacy/tests/lang/zh/test_serialize.py
index 015f92785..1c6fdf419 100644
--- a/spacy/tests/lang/zh/test_serialize.py
+++ b/spacy/tests/lang/zh/test_serialize.py
@@ -29,9 +29,7 @@ def test_zh_tokenizer_serialize_jieba(zh_tokenizer_jieba):
 def test_zh_tokenizer_serialize_pkuseg_with_processors(zh_tokenizer_pkuseg):
     nlp = Chinese(
         meta={
-            "tokenizer": {
-                "config": {"segmenter": "pkuseg", "pkuseg_model": "medicine",}
-            }
+            "tokenizer": {"config": {"segmenter": "pkuseg", "pkuseg_model": "medicine"}}
         }
     )
     zh_tokenizer_serialize(nlp.tokenizer)
diff --git a/spacy/tests/matcher/test_matcher_logic.py b/spacy/tests/matcher/test_matcher_logic.py
index 8f4c13471..5f4c2991a 100644
--- a/spacy/tests/matcher/test_matcher_logic.py
+++ b/spacy/tests/matcher/test_matcher_logic.py
@@ -21,7 +21,7 @@ re_pattern5 = "B*A*B"
 longest1 = "A A A A A"
 longest2 = "A A A A A"
 longest3 = "A A"
-longest4 = "B A A A A A B"      # "FIRST" would be "B B"
+longest4 = "B A A A A A B"  # "FIRST" would be "B B"
 longest5 = "B B A A A A A B"
 
 
diff --git a/spacy/tests/parser/test_arc_eager_oracle.py b/spacy/tests/parser/test_arc_eager_oracle.py
index 77e142215..fd1880030 100644
--- a/spacy/tests/parser/test_arc_eager_oracle.py
+++ b/spacy/tests/parser/test_arc_eager_oracle.py
@@ -4,8 +4,8 @@ from spacy import registry
 from spacy.gold import Example
 from spacy.pipeline import DependencyParser
 from spacy.tokens import Doc
-from spacy.syntax.nonproj import projectivize
-from spacy.syntax.arc_eager import ArcEager
+from spacy.pipeline._parser_internals.nonproj import projectivize
+from spacy.pipeline._parser_internals.arc_eager import ArcEager
 from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
 
 
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index 4a6bf73a5..dbeb0a9cb 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -5,7 +5,7 @@ from spacy.lang.en import English
 
 from spacy.language import Language
 from spacy.lookups import Lookups
-from spacy.syntax.ner import BiluoPushDown
+from spacy.pipeline._parser_internals.ner import BiluoPushDown
 from spacy.gold import Example
 from spacy.tokens import Doc
 from spacy.vocab import Vocab
@@ -210,7 +210,7 @@ def test_train_empty():
     nlp.begin_training()
     for itn in range(2):
         losses = {}
-        batches = util.minibatch(train_examples)
+        batches = util.minibatch(train_examples, size=8)
         for batch in batches:
             nlp.update(batch, losses=losses)
 
diff --git a/spacy/tests/parser/test_neural_parser.py b/spacy/tests/parser/test_neural_parser.py
index feae52f7f..6594c7e78 100644
--- a/spacy/tests/parser/test_neural_parser.py
+++ b/spacy/tests/parser/test_neural_parser.py
@@ -3,8 +3,8 @@ import pytest
 from spacy import registry
 from spacy.gold import Example
 from spacy.vocab import Vocab
-from spacy.syntax.arc_eager import ArcEager
-from spacy.syntax.nn_parser import Parser
+from spacy.pipeline._parser_internals.arc_eager import ArcEager
+from spacy.pipeline.transition_parser import Parser
 from spacy.tokens.doc import Doc
 from thinc.api import Model
 from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
diff --git a/spacy/tests/parser/test_nonproj.py b/spacy/tests/parser/test_nonproj.py
index 496ec7e03..41da7cf49 100644
--- a/spacy/tests/parser/test_nonproj.py
+++ b/spacy/tests/parser/test_nonproj.py
@@ -1,7 +1,7 @@
 import pytest
-from spacy.syntax.nonproj import ancestors, contains_cycle, is_nonproj_arc
-from spacy.syntax.nonproj import is_nonproj_tree
-from spacy.syntax import nonproj
+from spacy.pipeline._parser_internals.nonproj import ancestors, contains_cycle
+from spacy.pipeline._parser_internals.nonproj import is_nonproj_tree, is_nonproj_arc
+from spacy.pipeline._parser_internals import nonproj
 
 from ..util import get_doc
 
diff --git a/spacy/tests/pipeline/test_analysis.py b/spacy/tests/pipeline/test_analysis.py
index 4e1407707..df3d7dff5 100644
--- a/spacy/tests/pipeline/test_analysis.py
+++ b/spacy/tests/pipeline/test_analysis.py
@@ -1,15 +1,10 @@
-import spacy.language
 from spacy.language import Language
-from spacy.pipe_analysis import print_summary, validate_attrs
-from spacy.pipe_analysis import get_assigns_for_attr, get_requires_for_attr
-from spacy.pipe_analysis import count_pipeline_interdependencies
+from spacy.pipe_analysis import get_attr_info, validate_attrs
 from mock import Mock
 import pytest
 
 
 def test_component_decorator_assigns():
-    spacy.language.ENABLE_PIPELINE_ANALYSIS = True
-
     @Language.component("c1", assigns=["token.tag", "doc.tensor"])
     def test_component1(doc):
         return doc
@@ -32,10 +27,11 @@ def test_component_decorator_assigns():
 
     nlp = Language()
     nlp.add_pipe("c1")
-    with pytest.warns(UserWarning):
-        nlp.add_pipe("c2")
+    nlp.add_pipe("c2")
+    problems = nlp.analyze_pipes()["problems"]
+    assert problems["c2"] == ["token.pos"]
     nlp.add_pipe("c3")
-    assert get_assigns_for_attr(nlp, "doc.tensor") == ["c1", "c2"]
+    assert get_attr_info(nlp, "doc.tensor")["assigns"] == ["c1", "c2"]
     nlp.add_pipe("c1", name="c4")
     test_component4_meta = nlp.get_pipe_meta("c1")
     assert test_component4_meta.factory == "c1"
@@ -43,9 +39,8 @@ def test_component_decorator_assigns():
     assert not Language.has_factory("c4")
     assert nlp.pipe_factories["c1"] == "c1"
     assert nlp.pipe_factories["c4"] == "c1"
-    assert get_assigns_for_attr(nlp, "doc.tensor") == ["c1", "c2", "c4"]
-    assert get_requires_for_attr(nlp, "token.pos") == ["c2"]
-    assert print_summary(nlp, no_print=True)
+    assert get_attr_info(nlp, "doc.tensor")["assigns"] == ["c1", "c2", "c4"]
+    assert get_attr_info(nlp, "token.pos")["requires"] == ["c2"]
     assert nlp("hello world")
 
 
@@ -100,7 +95,6 @@ def test_analysis_validate_attrs_invalid(attr):
 
 def test_analysis_validate_attrs_remove_pipe():
     """Test that attributes are validated correctly on remove."""
-    spacy.language.ENABLE_PIPELINE_ANALYSIS = True
 
     @Language.component("pipe_analysis_c6", assigns=["token.tag"])
     def c1(doc):
@@ -112,26 +106,9 @@ def test_analysis_validate_attrs_remove_pipe():
 
     nlp = Language()
     nlp.add_pipe("pipe_analysis_c6")
-    with pytest.warns(UserWarning):
-        nlp.add_pipe("pipe_analysis_c7")
-    with pytest.warns(None) as record:
-        nlp.remove_pipe("pipe_analysis_c7")
-    assert not record.list
-
-
-def test_pipe_interdependencies():
-    prefix = "test_pipe_interdependencies"
-
-    @Language.component(f"{prefix}.fancifier", assigns=("doc._.fancy",))
-    def fancifier(doc):
-        return doc
-
-    @Language.component(f"{prefix}.needer", requires=("doc._.fancy",))
-    def needer(doc):
-        return doc
-
-    nlp = Language()
-    nlp.add_pipe(f"{prefix}.fancifier")
-    nlp.add_pipe(f"{prefix}.needer")
-    counts = count_pipeline_interdependencies(nlp)
-    assert counts == [1, 0]
+    nlp.add_pipe("pipe_analysis_c7")
+    problems = nlp.analyze_pipes()["problems"]
+    assert problems["pipe_analysis_c7"] == ["token.pos"]
+    nlp.remove_pipe("pipe_analysis_c7")
+    problems = nlp.analyze_pipes()["problems"]
+    assert all(p == [] for p in problems.values())
diff --git a/spacy/tests/pipeline/test_attributeruler.py b/spacy/tests/pipeline/test_attributeruler.py
new file mode 100644
index 000000000..bcde7bf63
--- /dev/null
+++ b/spacy/tests/pipeline/test_attributeruler.py
@@ -0,0 +1,207 @@
+import pytest
+import numpy
+from spacy.lang.en import English
+from spacy.pipeline import AttributeRuler
+from spacy import util, registry
+
+from ..util import get_doc, make_tempdir
+
+
+@pytest.fixture
+def nlp():
+    return English()
+
+
+@pytest.fixture
+def pattern_dicts():
+    return [
+        {
+            "patterns": [[{"ORTH": "a"}], [{"ORTH": "irrelevant"}]],
+            "attrs": {"LEMMA": "the", "MORPH": "Case=Nom|Number=Plur"},
+        },
+        # one pattern sets the lemma
+        {"patterns": [[{"ORTH": "test"}]], "attrs": {"LEMMA": "cat"}},
+        # another pattern sets the morphology
+        {
+            "patterns": [[{"ORTH": "test"}]],
+            "attrs": {"MORPH": "Case=Nom|Number=Sing"},
+            "index": 0,
+        },
+    ]
+
+
+@registry.assets("attribute_ruler_patterns")
+def attribute_ruler_patterns():
+    return [
+        {
+            "patterns": [[{"ORTH": "a"}], [{"ORTH": "irrelevant"}]],
+            "attrs": {"LEMMA": "the", "MORPH": "Case=Nom|Number=Plur"},
+        },
+        # one pattern sets the lemma
+        {"patterns": [[{"ORTH": "test"}]], "attrs": {"LEMMA": "cat"}},
+        # another pattern sets the morphology
+        {
+            "patterns": [[{"ORTH": "test"}]],
+            "attrs": {"MORPH": "Case=Nom|Number=Sing"},
+            "index": 0,
+        },
+    ]
+
+
+@pytest.fixture
+def tag_map():
+    return {
+        ".": {"POS": "PUNCT", "PunctType": "peri"},
+        ",": {"POS": "PUNCT", "PunctType": "comm"},
+    }
+
+
+@pytest.fixture
+def morph_rules():
+    return {"DT": {"the": {"POS": "DET", "LEMMA": "a", "Case": "Nom"}}}
+
+
+def test_attributeruler_init(nlp, pattern_dicts):
+    a = nlp.add_pipe("attribute_ruler")
+    for p in pattern_dicts:
+        a.add(**p)
+
+    doc = nlp("This is a test.")
+    assert doc[2].lemma_ == "the"
+    assert doc[2].morph_ == "Case=Nom|Number=Plur"
+    assert doc[3].lemma_ == "cat"
+    assert doc[3].morph_ == "Case=Nom|Number=Sing"
+
+
+def test_attributeruler_init_patterns(nlp, pattern_dicts):
+    # initialize with patterns
+    nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
+    doc = nlp("This is a test.")
+    assert doc[2].lemma_ == "the"
+    assert doc[2].morph_ == "Case=Nom|Number=Plur"
+    assert doc[3].lemma_ == "cat"
+    assert doc[3].morph_ == "Case=Nom|Number=Sing"
+    nlp.remove_pipe("attribute_ruler")
+    # initialize with patterns from asset
+    nlp.add_pipe(
+        "attribute_ruler",
+        config={"pattern_dicts": {"@assets": "attribute_ruler_patterns"}},
+    )
+    doc = nlp("This is a test.")
+    assert doc[2].lemma_ == "the"
+    assert doc[2].morph_ == "Case=Nom|Number=Plur"
+    assert doc[3].lemma_ == "cat"
+    assert doc[3].morph_ == "Case=Nom|Number=Sing"
+
+
+def test_attributeruler_tag_map(nlp, tag_map):
+    a = AttributeRuler(nlp.vocab)
+    a.load_from_tag_map(tag_map)
+    doc = get_doc(
+        nlp.vocab,
+        words=["This", "is", "a", "test", "."],
+        tags=["DT", "VBZ", "DT", "NN", "."],
+    )
+    doc = a(doc)
+
+    for i in range(len(doc)):
+        if i == 4:
+            assert doc[i].pos_ == "PUNCT"
+            assert doc[i].morph_ == "PunctType=peri"
+        else:
+            assert doc[i].pos_ == ""
+            assert doc[i].morph_ == ""
+
+
+def test_attributeruler_morph_rules(nlp, morph_rules):
+    a = AttributeRuler(nlp.vocab)
+    a.load_from_morph_rules(morph_rules)
+    doc = get_doc(
+        nlp.vocab,
+        words=["This", "is", "the", "test", "."],
+        tags=["DT", "VBZ", "DT", "NN", "."],
+    )
+    doc = a(doc)
+
+    for i in range(len(doc)):
+        if i != 2:
+            assert doc[i].pos_ == ""
+            assert doc[i].morph_ == ""
+        else:
+            assert doc[2].pos_ == "DET"
+            assert doc[2].lemma_ == "a"
+            assert doc[2].morph_ == "Case=Nom"
+
+
+def test_attributeruler_indices(nlp):
+    a = nlp.add_pipe("attribute_ruler")
+    a.add(
+        [[{"ORTH": "a"}, {"ORTH": "test"}]],
+        {"LEMMA": "the", "MORPH": "Case=Nom|Number=Plur"},
+        index=0,
+    )
+    a.add(
+        [[{"ORTH": "This"}, {"ORTH": "is"}]],
+        {"LEMMA": "was", "MORPH": "Case=Nom|Number=Sing"},
+        index=1,
+    )
+    a.add([[{"ORTH": "a"}, {"ORTH": "test"}]], {"LEMMA": "cat"}, index=-1)
+
+    text = "This is a test."
+    doc = nlp(text)
+
+    for i in range(len(doc)):
+        if i == 1:
+            assert doc[i].lemma_ == "was"
+            assert doc[i].morph_ == "Case=Nom|Number=Sing"
+        elif i == 2:
+            assert doc[i].lemma_ == "the"
+            assert doc[i].morph_ == "Case=Nom|Number=Plur"
+        elif i == 3:
+            assert doc[i].lemma_ == "cat"
+        else:
+            assert doc[i].morph_ == ""
+
+    # raises an error when trying to modify a token outside of the match
+    a.add([[{"ORTH": "a"}, {"ORTH": "test"}]], {"LEMMA": "cat"}, index=2)
+    with pytest.raises(ValueError):
+        doc = nlp(text)
+
+    # raises an error when trying to modify a token outside of the match
+    a.add([[{"ORTH": "a"}, {"ORTH": "test"}]], {"LEMMA": "cat"}, index=10)
+    with pytest.raises(ValueError):
+        doc = nlp(text)
+
+
+def test_attributeruler_patterns_prop(nlp, pattern_dicts):
+    a = nlp.add_pipe("attribute_ruler")
+    a.add_patterns(pattern_dicts)
+
+    for p1, p2 in zip(pattern_dicts, a.patterns):
+        assert p1["patterns"] == p2["patterns"]
+        assert p1["attrs"] == p2["attrs"]
+        if p1.get("index"):
+            assert p1["index"] == p2["index"]
+
+
+def test_attributeruler_serialize(nlp, pattern_dicts):
+    a = nlp.add_pipe("attribute_ruler")
+    a.add_patterns(pattern_dicts)
+
+    text = "This is a test."
+    attrs = ["ORTH", "LEMMA", "MORPH"]
+    doc = nlp(text)
+
+    # bytes roundtrip
+    a_reloaded = AttributeRuler(nlp.vocab).from_bytes(a.to_bytes())
+    assert a.to_bytes() == a_reloaded.to_bytes()
+    doc1 = a_reloaded(nlp.make_doc(text))
+    numpy.array_equal(doc.to_array(attrs), doc1.to_array(attrs))
+
+    # disk roundtrip
+    with make_tempdir() as tmp_dir:
+        nlp.to_disk(tmp_dir)
+        nlp2 = util.load_model_from_path(tmp_dir)
+        doc2 = nlp2(text)
+        assert nlp2.get_pipe("attribute_ruler").to_bytes() == a.to_bytes()
+        assert numpy.array_equal(doc.to_array(attrs), doc2.to_array(attrs))
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 4002eafe3..bb93cf118 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -21,7 +21,8 @@ def assert_almost_equal(a, b):
 
 def test_kb_valid_entities(nlp):
     """Test the valid construction of a KB with 3 entities and two aliases"""
-    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=3)
+    mykb = KnowledgeBase(entity_vector_length=3)
+    mykb.initialize(nlp.vocab)
 
     # adding entities
     mykb.add_entity(entity="Q1", freq=19, entity_vector=[8, 4, 3])
@@ -50,7 +51,8 @@ def test_kb_valid_entities(nlp):
 
 def test_kb_invalid_entities(nlp):
     """Test the invalid construction of a KB with an alias linked to a non-existing entity"""
-    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
+    mykb = KnowledgeBase(entity_vector_length=1)
+    mykb.initialize(nlp.vocab)
 
     # adding entities
     mykb.add_entity(entity="Q1", freq=19, entity_vector=[1])
@@ -66,7 +68,8 @@ def test_kb_invalid_entities(nlp):
 
 def test_kb_invalid_probabilities(nlp):
     """Test the invalid construction of a KB with wrong prior probabilities"""
-    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
+    mykb = KnowledgeBase(entity_vector_length=1)
+    mykb.initialize(nlp.vocab)
 
     # adding entities
     mykb.add_entity(entity="Q1", freq=19, entity_vector=[1])
@@ -80,7 +83,8 @@ def test_kb_invalid_probabilities(nlp):
 
 def test_kb_invalid_combination(nlp):
     """Test the invalid construction of a KB with non-matching entity and probability lists"""
-    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
+    mykb = KnowledgeBase(entity_vector_length=1)
+    mykb.initialize(nlp.vocab)
 
     # adding entities
     mykb.add_entity(entity="Q1", freq=19, entity_vector=[1])
@@ -96,7 +100,8 @@ def test_kb_invalid_combination(nlp):
 
 def test_kb_invalid_entity_vector(nlp):
     """Test the invalid construction of a KB with non-matching entity vector lengths"""
-    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=3)
+    mykb = KnowledgeBase(entity_vector_length=3)
+    mykb.initialize(nlp.vocab)
 
     # adding entities
     mykb.add_entity(entity="Q1", freq=19, entity_vector=[1, 2, 3])
@@ -106,9 +111,47 @@ def test_kb_invalid_entity_vector(nlp):
         mykb.add_entity(entity="Q2", freq=5, entity_vector=[2])
 
 
+def test_kb_default(nlp):
+    """Test that the default (empty) KB is loaded when not providing a config"""
+    entity_linker = nlp.add_pipe("entity_linker", config={})
+    assert len(entity_linker.kb) == 0
+    assert entity_linker.kb.get_size_entities() == 0
+    assert entity_linker.kb.get_size_aliases() == 0
+    # default value from pipeline.entity_linker
+    assert entity_linker.kb.entity_vector_length == 64
+
+
+def test_kb_custom_length(nlp):
+    """Test that the default (empty) KB can be configured with a custom entity length"""
+    entity_linker = nlp.add_pipe(
+        "entity_linker", config={"kb": {"entity_vector_length": 35}}
+    )
+    assert len(entity_linker.kb) == 0
+    assert entity_linker.kb.get_size_entities() == 0
+    assert entity_linker.kb.get_size_aliases() == 0
+    assert entity_linker.kb.entity_vector_length == 35
+
+
+def test_kb_undefined(nlp):
+    """Test that the EL can't train without defining a KB"""
+    entity_linker = nlp.add_pipe("entity_linker", config={})
+    with pytest.raises(ValueError):
+        entity_linker.begin_training()
+
+
+def test_kb_empty(nlp):
+    """Test that the EL can't train with an empty KB"""
+    config = {"kb": {"@assets": "spacy.EmptyKB.v1", "entity_vector_length": 342}}
+    entity_linker = nlp.add_pipe("entity_linker", config=config)
+    assert len(entity_linker.kb) == 0
+    with pytest.raises(ValueError):
+        entity_linker.begin_training()
+
+
 def test_candidate_generation(nlp):
     """Test correct candidate generation"""
-    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
+    mykb = KnowledgeBase(entity_vector_length=1)
+    mykb.initialize(nlp.vocab)
 
     # adding entities
     mykb.add_entity(entity="Q1", freq=27, entity_vector=[1])
@@ -133,7 +176,8 @@ def test_candidate_generation(nlp):
 
 def test_append_alias(nlp):
     """Test that we can append additional alias-entity pairs"""
-    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
+    mykb = KnowledgeBase(entity_vector_length=1)
+    mykb.initialize(nlp.vocab)
 
     # adding entities
     mykb.add_entity(entity="Q1", freq=27, entity_vector=[1])
@@ -163,7 +207,8 @@ def test_append_alias(nlp):
 
 def test_append_invalid_alias(nlp):
     """Test that append an alias will throw an error if prior probs are exceeding 1"""
-    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
+    mykb = KnowledgeBase(entity_vector_length=1)
+    mykb.initialize(nlp.vocab)
 
     # adding entities
     mykb.add_entity(entity="Q1", freq=27, entity_vector=[1])
@@ -184,7 +229,8 @@ def test_preserving_links_asdoc(nlp):
 
     @registry.assets.register("myLocationsKB.v1")
     def dummy_kb() -> KnowledgeBase:
-        mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
+        mykb = KnowledgeBase(entity_vector_length=1)
+        mykb.initialize(nlp.vocab)
         # adding entities
         mykb.add_entity(entity="Q1", freq=19, entity_vector=[1])
         mykb.add_entity(entity="Q2", freq=8, entity_vector=[1])
@@ -289,7 +335,8 @@ def test_overfitting_IO():
         # create artificial KB - assign same prior weight to the two russ cochran's
         # Q2146908 (Russ Cochran): American golfer
         # Q7381115 (Russ Cochran): publisher
-        mykb = KnowledgeBase(nlp.vocab, entity_vector_length=3)
+        mykb = KnowledgeBase(entity_vector_length=3)
+        mykb.initialize(nlp.vocab)
         mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
         mykb.add_entity(entity="Q7381115", freq=12, entity_vector=[9, 1, -7])
         mykb.add_alias(
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index 64c6c2d6f..9948f6bcd 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -8,6 +8,8 @@ from thinc.api import Model, Linear
 from thinc.config import ConfigValidationError
 from pydantic import StrictInt, StrictStr
 
+from ..util import make_tempdir
+
 
 def test_pipe_function_component():
     name = "test_component"
@@ -374,3 +376,65 @@ def test_language_factories_scores():
     cfg = nlp.config["training"]
     expected_weights = {"a1": 0.25, "a2": 0.25, "b1": 0.1, "b2": 0.35, "b3": 0.05}
     assert cfg["score_weights"] == expected_weights
+
+
+def test_pipe_factories_from_source():
+    """Test adding components from a source model."""
+    source_nlp = English()
+    source_nlp.add_pipe("tagger", name="my_tagger")
+    nlp = English()
+    with pytest.raises(ValueError):
+        nlp.add_pipe("my_tagger", source="en_core_web_sm")
+    nlp.add_pipe("my_tagger", source=source_nlp)
+    assert "my_tagger" in nlp.pipe_names
+    with pytest.raises(KeyError):
+        nlp.add_pipe("custom", source=source_nlp)
+
+
+def test_pipe_factories_from_source_custom():
+    """Test adding components from a source model with custom components."""
+    name = "test_pipe_factories_from_source_custom"
+
+    @Language.factory(name, default_config={"arg": "hello"})
+    def test_factory(nlp, name, arg: str):
+        return lambda doc: doc
+
+    source_nlp = English()
+    source_nlp.add_pipe("tagger")
+    source_nlp.add_pipe(name, config={"arg": "world"})
+    nlp = English()
+    nlp.add_pipe(name, source=source_nlp)
+    assert name in nlp.pipe_names
+    assert nlp.get_pipe_meta(name).default_config["arg"] == "hello"
+    config = nlp.config["components"][name]
+    assert config["factory"] == name
+    assert config["arg"] == "world"
+
+
+def test_pipe_factories_from_source_config():
+    name = "test_pipe_factories_from_source_config"
+
+    @Language.factory(name, default_config={"arg": "hello"})
+    def test_factory(nlp, name, arg: str):
+        return lambda doc: doc
+
+    source_nlp = English()
+    source_nlp.add_pipe("tagger")
+    source_nlp.add_pipe(name, name="yolo", config={"arg": "world"})
+    dest_nlp_cfg = {"lang": "en", "pipeline": ["parser", "custom"]}
+    with make_tempdir() as tempdir:
+        source_nlp.to_disk(tempdir)
+        dest_components_cfg = {
+            "parser": {"factory": "parser"},
+            "custom": {"source": str(tempdir), "component": "yolo"},
+        }
+        dest_config = {"nlp": dest_nlp_cfg, "components": dest_components_cfg}
+        nlp = English.from_config(dest_config)
+    assert nlp.pipe_names == ["parser", "custom"]
+    assert nlp.pipe_factories == {"parser": "parser", "custom": name}
+    meta = nlp.get_pipe_meta("custom")
+    assert meta.factory == name
+    assert meta.default_config["arg"] == "hello"
+    config = nlp.config["components"]["custom"]
+    assert config["factory"] == name
+    assert config["arg"] == "world"
diff --git a/spacy/tests/pipeline/test_pipe_methods.py b/spacy/tests/pipeline/test_pipe_methods.py
index e37375bf1..0141708b4 100644
--- a/spacy/tests/pipeline/test_pipe_methods.py
+++ b/spacy/tests/pipeline/test_pipe_methods.py
@@ -70,6 +70,14 @@ def test_replace_pipe(nlp, name, replacement, invalid_replacement):
     assert nlp.get_pipe(name) == nlp.create_pipe(replacement)
 
 
+def test_replace_last_pipe(nlp):
+    nlp.add_pipe("sentencizer")
+    nlp.add_pipe("ner")
+    assert nlp.pipe_names == ["sentencizer", "ner"]
+    nlp.replace_pipe("ner", "ner")
+    assert nlp.pipe_names == ["sentencizer", "ner"]
+
+
 @pytest.mark.parametrize("old_name,new_name", [("old_pipe", "new_pipe")])
 def test_rename_pipe(nlp, old_name, new_name):
     with pytest.raises(ValueError):
diff --git a/spacy/tests/pipeline/test_simple_ner.py b/spacy/tests/pipeline/test_simple_ner.py
index 024d7bd26..b012a2cd6 100644
--- a/spacy/tests/pipeline/test_simple_ner.py
+++ b/spacy/tests/pipeline/test_simple_ner.py
@@ -1,418 +1,45 @@
-import pytest
-from collections import namedtuple
-from thinc.api import NumpyOps
-from spacy.ml._biluo import BILUO, _get_transition_table
+from spacy.lang.en import English
+from spacy.gold import Example
+from spacy import util
+from ..util import make_tempdir
 
 
-@pytest.fixture(
-    params=[
-        ["PER", "ORG", "LOC", "MISC"],
-        ["GPE", "PERSON", "NUMBER", "CURRENCY", "EVENT"],
-    ]
-)
-def labels(request):
-    return request.param
+TRAIN_DATA = [
+    ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
+    ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
+]
 
 
-@pytest.fixture
-def ops():
-    return NumpyOps()
+def test_overfitting_IO():
+    # Simple test to try and quickly overfit the SimpleNER component - ensuring the ML models work correctly
+    nlp = English()
+    ner = nlp.add_pipe("simple_ner")
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+        for ent in annotations.get("entities"):
+            ner.add_label(ent[2])
+    optimizer = nlp.begin_training()
 
+    for i in range(50):
+        losses = {}
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
+    assert losses["ner"] < 0.0001
 
-def _get_actions(labels):
-    action_names = (
-        [f"B{label}" for label in labels]
-        + [f"I{label}" for label in labels]
-        + [f"L{label}" for label in labels]
-        + [f"U{label}" for label in labels]
-        + ["O"]
-    )
-    A = namedtuple("actions", action_names)
-    return A(**{name: i for i, name in enumerate(action_names)})
+    # test the trained model
+    test_text = "I like London."
+    doc = nlp(test_text)
+    ents = doc.ents
+    assert len(ents) == 1
+    assert ents[0].text == "London"
+    assert ents[0].label_ == "LOC"
 
-
-def test_init_biluo_layer(labels):
-    model = BILUO()
-    model.set_dim("nO", model.attrs["get_num_actions"](len(labels)))
-    model.initialize()
-    assert model.get_dim("nO") == len(labels) * 4 + 1
-
-
-def test_transition_table(ops):
-    labels = ["per", "loc", "org"]
-    table = _get_transition_table(len(labels))
-    a = _get_actions(labels)
-    assert table.shape == (2, len(a), len(a))
-    # Not last token, prev action was B
-    assert table[0, a.Bper, a.Bper] == 0
-    assert table[0, a.Bper, a.Bloc] == 0
-    assert table[0, a.Bper, a.Borg] == 0
-    assert table[0, a.Bper, a.Iper] == 1
-    assert table[0, a.Bper, a.Iloc] == 0
-    assert table[0, a.Bper, a.Iorg] == 0
-    assert table[0, a.Bper, a.Lper] == 1
-    assert table[0, a.Bper, a.Lloc] == 0
-    assert table[0, a.Bper, a.Lorg] == 0
-    assert table[0, a.Bper, a.Uper] == 0
-    assert table[0, a.Bper, a.Uloc] == 0
-    assert table[0, a.Bper, a.Uorg] == 0
-    assert table[0, a.Bper, a.O] == 0
-
-    assert table[0, a.Bloc, a.Bper] == 0
-    assert table[0, a.Bloc, a.Bloc] == 0
-    assert table[0, a.Bloc, a.Borg] == 0
-    assert table[0, a.Bloc, a.Iper] == 0
-    assert table[0, a.Bloc, a.Iloc] == 1
-    assert table[0, a.Bloc, a.Iorg] == 0
-    assert table[0, a.Bloc, a.Lper] == 0
-    assert table[0, a.Bloc, a.Lloc] == 1
-    assert table[0, a.Bloc, a.Lorg] == 0
-    assert table[0, a.Bloc, a.Uper] == 0
-    assert table[0, a.Bloc, a.Uloc] == 0
-    assert table[0, a.Bloc, a.Uorg] == 0
-    assert table[0, a.Bloc, a.O] == 0
-
-    assert table[0, a.Borg, a.Bper] == 0
-    assert table[0, a.Borg, a.Bloc] == 0
-    assert table[0, a.Borg, a.Borg] == 0
-    assert table[0, a.Borg, a.Iper] == 0
-    assert table[0, a.Borg, a.Iloc] == 0
-    assert table[0, a.Borg, a.Iorg] == 1
-    assert table[0, a.Borg, a.Lper] == 0
-    assert table[0, a.Borg, a.Lloc] == 0
-    assert table[0, a.Borg, a.Lorg] == 1
-    assert table[0, a.Borg, a.Uper] == 0
-    assert table[0, a.Borg, a.Uloc] == 0
-    assert table[0, a.Borg, a.Uorg] == 0
-    assert table[0, a.Borg, a.O] == 0
-
-    # Not last token, prev action was I
-    assert table[0, a.Iper, a.Bper] == 0
-    assert table[0, a.Iper, a.Bloc] == 0
-    assert table[0, a.Iper, a.Borg] == 0
-    assert table[0, a.Iper, a.Iper] == 1
-    assert table[0, a.Iper, a.Iloc] == 0
-    assert table[0, a.Iper, a.Iorg] == 0
-    assert table[0, a.Iper, a.Lper] == 1
-    assert table[0, a.Iper, a.Lloc] == 0
-    assert table[0, a.Iper, a.Lorg] == 0
-    assert table[0, a.Iper, a.Uper] == 0
-    assert table[0, a.Iper, a.Uloc] == 0
-    assert table[0, a.Iper, a.Uorg] == 0
-    assert table[0, a.Iper, a.O] == 0
-
-    assert table[0, a.Iloc, a.Bper] == 0
-    assert table[0, a.Iloc, a.Bloc] == 0
-    assert table[0, a.Iloc, a.Borg] == 0
-    assert table[0, a.Iloc, a.Iper] == 0
-    assert table[0, a.Iloc, a.Iloc] == 1
-    assert table[0, a.Iloc, a.Iorg] == 0
-    assert table[0, a.Iloc, a.Lper] == 0
-    assert table[0, a.Iloc, a.Lloc] == 1
-    assert table[0, a.Iloc, a.Lorg] == 0
-    assert table[0, a.Iloc, a.Uper] == 0
-    assert table[0, a.Iloc, a.Uloc] == 0
-    assert table[0, a.Iloc, a.Uorg] == 0
-    assert table[0, a.Iloc, a.O] == 0
-
-    assert table[0, a.Iorg, a.Bper] == 0
-    assert table[0, a.Iorg, a.Bloc] == 0
-    assert table[0, a.Iorg, a.Borg] == 0
-    assert table[0, a.Iorg, a.Iper] == 0
-    assert table[0, a.Iorg, a.Iloc] == 0
-    assert table[0, a.Iorg, a.Iorg] == 1
-    assert table[0, a.Iorg, a.Lper] == 0
-    assert table[0, a.Iorg, a.Lloc] == 0
-    assert table[0, a.Iorg, a.Lorg] == 1
-    assert table[0, a.Iorg, a.Uper] == 0
-    assert table[0, a.Iorg, a.Uloc] == 0
-    assert table[0, a.Iorg, a.Uorg] == 0
-    assert table[0, a.Iorg, a.O] == 0
-
-    # Not last token, prev action was L
-    assert table[0, a.Lper, a.Bper] == 1
-    assert table[0, a.Lper, a.Bloc] == 1
-    assert table[0, a.Lper, a.Borg] == 1
-    assert table[0, a.Lper, a.Iper] == 0
-    assert table[0, a.Lper, a.Iloc] == 0
-    assert table[0, a.Lper, a.Iorg] == 0
-    assert table[0, a.Lper, a.Lper] == 0
-    assert table[0, a.Lper, a.Lloc] == 0
-    assert table[0, a.Lper, a.Lorg] == 0
-    assert table[0, a.Lper, a.Uper] == 1
-    assert table[0, a.Lper, a.Uloc] == 1
-    assert table[0, a.Lper, a.Uorg] == 1
-    assert table[0, a.Lper, a.O] == 1
-
-    assert table[0, a.Lloc, a.Bper] == 1
-    assert table[0, a.Lloc, a.Bloc] == 1
-    assert table[0, a.Lloc, a.Borg] == 1
-    assert table[0, a.Lloc, a.Iper] == 0
-    assert table[0, a.Lloc, a.Iloc] == 0
-    assert table[0, a.Lloc, a.Iorg] == 0
-    assert table[0, a.Lloc, a.Lper] == 0
-    assert table[0, a.Lloc, a.Lloc] == 0
-    assert table[0, a.Lloc, a.Lorg] == 0
-    assert table[0, a.Lloc, a.Uper] == 1
-    assert table[0, a.Lloc, a.Uloc] == 1
-    assert table[0, a.Lloc, a.Uorg] == 1
-    assert table[0, a.Lloc, a.O] == 1
-
-    assert table[0, a.Lorg, a.Bper] == 1
-    assert table[0, a.Lorg, a.Bloc] == 1
-    assert table[0, a.Lorg, a.Borg] == 1
-    assert table[0, a.Lorg, a.Iper] == 0
-    assert table[0, a.Lorg, a.Iloc] == 0
-    assert table[0, a.Lorg, a.Iorg] == 0
-    assert table[0, a.Lorg, a.Lper] == 0
-    assert table[0, a.Lorg, a.Lloc] == 0
-    assert table[0, a.Lorg, a.Lorg] == 0
-    assert table[0, a.Lorg, a.Uper] == 1
-    assert table[0, a.Lorg, a.Uloc] == 1
-    assert table[0, a.Lorg, a.Uorg] == 1
-    assert table[0, a.Lorg, a.O] == 1
-
-    # Not last token, prev action was U
-    assert table[0, a.Uper, a.Bper] == 1
-    assert table[0, a.Uper, a.Bloc] == 1
-    assert table[0, a.Uper, a.Borg] == 1
-    assert table[0, a.Uper, a.Iper] == 0
-    assert table[0, a.Uper, a.Iloc] == 0
-    assert table[0, a.Uper, a.Iorg] == 0
-    assert table[0, a.Uper, a.Lper] == 0
-    assert table[0, a.Uper, a.Lloc] == 0
-    assert table[0, a.Uper, a.Lorg] == 0
-    assert table[0, a.Uper, a.Uper] == 1
-    assert table[0, a.Uper, a.Uloc] == 1
-    assert table[0, a.Uper, a.Uorg] == 1
-    assert table[0, a.Uper, a.O] == 1
-
-    assert table[0, a.Uloc, a.Bper] == 1
-    assert table[0, a.Uloc, a.Bloc] == 1
-    assert table[0, a.Uloc, a.Borg] == 1
-    assert table[0, a.Uloc, a.Iper] == 0
-    assert table[0, a.Uloc, a.Iloc] == 0
-    assert table[0, a.Uloc, a.Iorg] == 0
-    assert table[0, a.Uloc, a.Lper] == 0
-    assert table[0, a.Uloc, a.Lloc] == 0
-    assert table[0, a.Uloc, a.Lorg] == 0
-    assert table[0, a.Uloc, a.Uper] == 1
-    assert table[0, a.Uloc, a.Uloc] == 1
-    assert table[0, a.Uloc, a.Uorg] == 1
-    assert table[0, a.Uloc, a.O] == 1
-
-    assert table[0, a.Uorg, a.Bper] == 1
-    assert table[0, a.Uorg, a.Bloc] == 1
-    assert table[0, a.Uorg, a.Borg] == 1
-    assert table[0, a.Uorg, a.Iper] == 0
-    assert table[0, a.Uorg, a.Iloc] == 0
-    assert table[0, a.Uorg, a.Iorg] == 0
-    assert table[0, a.Uorg, a.Lper] == 0
-    assert table[0, a.Uorg, a.Lloc] == 0
-    assert table[0, a.Uorg, a.Lorg] == 0
-    assert table[0, a.Uorg, a.Uper] == 1
-    assert table[0, a.Uorg, a.Uloc] == 1
-    assert table[0, a.Uorg, a.Uorg] == 1
-    assert table[0, a.Uorg, a.O] == 1
-
-    # Not last token, prev action was O
-    assert table[0, a.O, a.Bper] == 1
-    assert table[0, a.O, a.Bloc] == 1
-    assert table[0, a.O, a.Borg] == 1
-    assert table[0, a.O, a.Iper] == 0
-    assert table[0, a.O, a.Iloc] == 0
-    assert table[0, a.O, a.Iorg] == 0
-    assert table[0, a.O, a.Lper] == 0
-    assert table[0, a.O, a.Lloc] == 0
-    assert table[0, a.O, a.Lorg] == 0
-    assert table[0, a.O, a.Uper] == 1
-    assert table[0, a.O, a.Uloc] == 1
-    assert table[0, a.O, a.Uorg] == 1
-    assert table[0, a.O, a.O] == 1
-
-    # Last token, prev action was B
-    assert table[1, a.Bper, a.Bper] == 0
-    assert table[1, a.Bper, a.Bloc] == 0
-    assert table[1, a.Bper, a.Borg] == 0
-    assert table[1, a.Bper, a.Iper] == 0
-    assert table[1, a.Bper, a.Iloc] == 0
-    assert table[1, a.Bper, a.Iorg] == 0
-    assert table[1, a.Bper, a.Lper] == 1
-    assert table[1, a.Bper, a.Lloc] == 0
-    assert table[1, a.Bper, a.Lorg] == 0
-    assert table[1, a.Bper, a.Uper] == 0
-    assert table[1, a.Bper, a.Uloc] == 0
-    assert table[1, a.Bper, a.Uorg] == 0
-    assert table[1, a.Bper, a.O] == 0
-
-    assert table[1, a.Bloc, a.Bper] == 0
-    assert table[1, a.Bloc, a.Bloc] == 0
-    assert table[0, a.Bloc, a.Borg] == 0
-    assert table[1, a.Bloc, a.Iper] == 0
-    assert table[1, a.Bloc, a.Iloc] == 0
-    assert table[1, a.Bloc, a.Iorg] == 0
-    assert table[1, a.Bloc, a.Lper] == 0
-    assert table[1, a.Bloc, a.Lloc] == 1
-    assert table[1, a.Bloc, a.Lorg] == 0
-    assert table[1, a.Bloc, a.Uper] == 0
-    assert table[1, a.Bloc, a.Uloc] == 0
-    assert table[1, a.Bloc, a.Uorg] == 0
-    assert table[1, a.Bloc, a.O] == 0
-
-    assert table[1, a.Borg, a.Bper] == 0
-    assert table[1, a.Borg, a.Bloc] == 0
-    assert table[1, a.Borg, a.Borg] == 0
-    assert table[1, a.Borg, a.Iper] == 0
-    assert table[1, a.Borg, a.Iloc] == 0
-    assert table[1, a.Borg, a.Iorg] == 0
-    assert table[1, a.Borg, a.Lper] == 0
-    assert table[1, a.Borg, a.Lloc] == 0
-    assert table[1, a.Borg, a.Lorg] == 1
-    assert table[1, a.Borg, a.Uper] == 0
-    assert table[1, a.Borg, a.Uloc] == 0
-    assert table[1, a.Borg, a.Uorg] == 0
-    assert table[1, a.Borg, a.O] == 0
-
-    # Last token, prev action was I
-    assert table[1, a.Iper, a.Bper] == 0
-    assert table[1, a.Iper, a.Bloc] == 0
-    assert table[1, a.Iper, a.Borg] == 0
-    assert table[1, a.Iper, a.Iper] == 0
-    assert table[1, a.Iper, a.Iloc] == 0
-    assert table[1, a.Iper, a.Iorg] == 0
-    assert table[1, a.Iper, a.Lper] == 1
-    assert table[1, a.Iper, a.Lloc] == 0
-    assert table[1, a.Iper, a.Lorg] == 0
-    assert table[1, a.Iper, a.Uper] == 0
-    assert table[1, a.Iper, a.Uloc] == 0
-    assert table[1, a.Iper, a.Uorg] == 0
-    assert table[1, a.Iper, a.O] == 0
-
-    assert table[1, a.Iloc, a.Bper] == 0
-    assert table[1, a.Iloc, a.Bloc] == 0
-    assert table[1, a.Iloc, a.Borg] == 0
-    assert table[1, a.Iloc, a.Iper] == 0
-    assert table[1, a.Iloc, a.Iloc] == 0
-    assert table[1, a.Iloc, a.Iorg] == 0
-    assert table[1, a.Iloc, a.Lper] == 0
-    assert table[1, a.Iloc, a.Lloc] == 1
-    assert table[1, a.Iloc, a.Lorg] == 0
-    assert table[1, a.Iloc, a.Uper] == 0
-    assert table[1, a.Iloc, a.Uloc] == 0
-    assert table[1, a.Iloc, a.Uorg] == 0
-    assert table[1, a.Iloc, a.O] == 0
-
-    assert table[1, a.Iorg, a.Bper] == 0
-    assert table[1, a.Iorg, a.Bloc] == 0
-    assert table[1, a.Iorg, a.Borg] == 0
-    assert table[1, a.Iorg, a.Iper] == 0
-    assert table[1, a.Iorg, a.Iloc] == 0
-    assert table[1, a.Iorg, a.Iorg] == 0
-    assert table[1, a.Iorg, a.Lper] == 0
-    assert table[1, a.Iorg, a.Lloc] == 0
-    assert table[1, a.Iorg, a.Lorg] == 1
-    assert table[1, a.Iorg, a.Uper] == 0
-    assert table[1, a.Iorg, a.Uloc] == 0
-    assert table[1, a.Iorg, a.Uorg] == 0
-    assert table[1, a.Iorg, a.O] == 0
-
-    # Last token, prev action was L
-    assert table[1, a.Lper, a.Bper] == 0
-    assert table[1, a.Lper, a.Bloc] == 0
-    assert table[1, a.Lper, a.Borg] == 0
-    assert table[1, a.Lper, a.Iper] == 0
-    assert table[1, a.Lper, a.Iloc] == 0
-    assert table[1, a.Lper, a.Iorg] == 0
-    assert table[1, a.Lper, a.Lper] == 0
-    assert table[1, a.Lper, a.Lloc] == 0
-    assert table[1, a.Lper, a.Lorg] == 0
-    assert table[1, a.Lper, a.Uper] == 1
-    assert table[1, a.Lper, a.Uloc] == 1
-    assert table[1, a.Lper, a.Uorg] == 1
-    assert table[1, a.Lper, a.O] == 1
-
-    assert table[1, a.Lloc, a.Bper] == 0
-    assert table[1, a.Lloc, a.Bloc] == 0
-    assert table[1, a.Lloc, a.Borg] == 0
-    assert table[1, a.Lloc, a.Iper] == 0
-    assert table[1, a.Lloc, a.Iloc] == 0
-    assert table[1, a.Lloc, a.Iorg] == 0
-    assert table[1, a.Lloc, a.Lper] == 0
-    assert table[1, a.Lloc, a.Lloc] == 0
-    assert table[1, a.Lloc, a.Lorg] == 0
-    assert table[1, a.Lloc, a.Uper] == 1
-    assert table[1, a.Lloc, a.Uloc] == 1
-    assert table[1, a.Lloc, a.Uorg] == 1
-    assert table[1, a.Lloc, a.O] == 1
-
-    assert table[1, a.Lorg, a.Bper] == 0
-    assert table[1, a.Lorg, a.Bloc] == 0
-    assert table[1, a.Lorg, a.Borg] == 0
-    assert table[1, a.Lorg, a.Iper] == 0
-    assert table[1, a.Lorg, a.Iloc] == 0
-    assert table[1, a.Lorg, a.Iorg] == 0
-    assert table[1, a.Lorg, a.Lper] == 0
-    assert table[1, a.Lorg, a.Lloc] == 0
-    assert table[1, a.Lorg, a.Lorg] == 0
-    assert table[1, a.Lorg, a.Uper] == 1
-    assert table[1, a.Lorg, a.Uloc] == 1
-    assert table[1, a.Lorg, a.Uorg] == 1
-    assert table[1, a.Lorg, a.O] == 1
-
-    # Last token, prev action was U
-    assert table[1, a.Uper, a.Bper] == 0
-    assert table[1, a.Uper, a.Bloc] == 0
-    assert table[1, a.Uper, a.Borg] == 0
-    assert table[1, a.Uper, a.Iper] == 0
-    assert table[1, a.Uper, a.Iloc] == 0
-    assert table[1, a.Uper, a.Iorg] == 0
-    assert table[1, a.Uper, a.Lper] == 0
-    assert table[1, a.Uper, a.Lloc] == 0
-    assert table[1, a.Uper, a.Lorg] == 0
-    assert table[1, a.Uper, a.Uper] == 1
-    assert table[1, a.Uper, a.Uloc] == 1
-    assert table[1, a.Uper, a.Uorg] == 1
-    assert table[1, a.Uper, a.O] == 1
-
-    assert table[1, a.Uloc, a.Bper] == 0
-    assert table[1, a.Uloc, a.Bloc] == 0
-    assert table[1, a.Uloc, a.Borg] == 0
-    assert table[1, a.Uloc, a.Iper] == 0
-    assert table[1, a.Uloc, a.Iloc] == 0
-    assert table[1, a.Uloc, a.Iorg] == 0
-    assert table[1, a.Uloc, a.Lper] == 0
-    assert table[1, a.Uloc, a.Lloc] == 0
-    assert table[1, a.Uloc, a.Lorg] == 0
-    assert table[1, a.Uloc, a.Uper] == 1
-    assert table[1, a.Uloc, a.Uloc] == 1
-    assert table[1, a.Uloc, a.Uorg] == 1
-    assert table[1, a.Uloc, a.O] == 1
-
-    assert table[1, a.Uorg, a.Bper] == 0
-    assert table[1, a.Uorg, a.Bloc] == 0
-    assert table[1, a.Uorg, a.Borg] == 0
-    assert table[1, a.Uorg, a.Iper] == 0
-    assert table[1, a.Uorg, a.Iloc] == 0
-    assert table[1, a.Uorg, a.Iorg] == 0
-    assert table[1, a.Uorg, a.Lper] == 0
-    assert table[1, a.Uorg, a.Lloc] == 0
-    assert table[1, a.Uorg, a.Lorg] == 0
-    assert table[1, a.Uorg, a.Uper] == 1
-    assert table[1, a.Uorg, a.Uloc] == 1
-    assert table[1, a.Uorg, a.Uorg] == 1
-    assert table[1, a.Uorg, a.O] == 1
-
-    # Last token, prev action was O
-    assert table[1, a.O, a.Bper] == 0
-    assert table[1, a.O, a.Bloc] == 0
-    assert table[1, a.O, a.Borg] == 0
-    assert table[1, a.O, a.Iper] == 0
-    assert table[1, a.O, a.Iloc] == 0
-    assert table[1, a.O, a.Iorg] == 0
-    assert table[1, a.O, a.Lper] == 0
-    assert table[1, a.O, a.Lloc] == 0
-    assert table[1, a.O, a.Lorg] == 0
-    assert table[1, a.O, a.Uper] == 1
-    assert table[1, a.O, a.Uloc] == 1
-    assert table[1, a.O, a.Uorg] == 1
-    assert table[1, a.O, a.O] == 1
+    # Also test the results are still the same after IO
+    with make_tempdir() as tmp_dir:
+        nlp.to_disk(tmp_dir)
+        nlp2 = util.load_model_from_path(tmp_dir)
+        doc2 = nlp2(test_text)
+        ents2 = doc2.ents
+        assert len(ents2) == 1
+        assert ents2[0].text == "London"
+        assert ents2[0].label_ == "LOC"
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index d5a549f13..41384897a 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -117,9 +117,7 @@ def test_overfitting_IO():
         assert cats2["POSITIVE"] + cats2["NEGATIVE"] == pytest.approx(1.0, 0.1)
 
     # Test scoring
-    scores = nlp.evaluate(
-        train_examples, component_cfg={"scorer": {"positive_label": "POSITIVE"}}
-    )
+    scores = nlp.evaluate(train_examples, scorer_cfg={"positive_label": "POSITIVE"})
     assert scores["cats_f"] == 1.0
     assert scores["cats_score"] == 1.0
     assert "cats_score_desc" in scores
diff --git a/spacy/tests/regression/test_issue4001-4500.py b/spacy/tests/regression/test_issue4001-4500.py
index 636cddcb7..27464a39a 100644
--- a/spacy/tests/regression/test_issue4001-4500.py
+++ b/spacy/tests/regression/test_issue4001-4500.py
@@ -438,9 +438,8 @@ def test_issue4402():
         data = DocBin(docs=docs, attrs=attrs).to_bytes()
         with output_file.open("wb") as file_:
             file_.write(data)
-        corpus = Corpus(train_loc=str(output_file), dev_loc=str(output_file))
-
-        train_data = list(corpus.train_dataset(nlp))
+        reader = Corpus(output_file)
+        train_data = list(reader(nlp))
         assert len(train_data) == 2
 
         split_train_data = []
diff --git a/spacy/tests/regression/test_issue4501-5000.py b/spacy/tests/regression/test_issue4501-5000.py
index 08a21e690..0b3b4a9fc 100644
--- a/spacy/tests/regression/test_issue4501-5000.py
+++ b/spacy/tests/regression/test_issue4501-5000.py
@@ -139,7 +139,8 @@ def test_issue4665():
 def test_issue4674():
     """Test that setting entities with overlapping identifiers does not mess up IO"""
     nlp = English()
-    kb = KnowledgeBase(nlp.vocab, entity_vector_length=3)
+    kb = KnowledgeBase(entity_vector_length=3)
+    kb.initialize(nlp.vocab)
     vector1 = [0.9, 1.1, 1.01]
     vector2 = [1.8, 2.25, 2.01]
     with pytest.warns(UserWarning):
@@ -156,7 +157,8 @@ def test_issue4674():
             dir_path.mkdir()
         file_path = dir_path / "kb"
         kb.dump(str(file_path))
-        kb2 = KnowledgeBase(vocab=nlp.vocab, entity_vector_length=3)
+        kb2 = KnowledgeBase(entity_vector_length=3)
+        kb2.initialize(nlp.vocab)
         kb2.load_bulk(str(file_path))
     assert kb2.get_size_entities() == 1
 
diff --git a/spacy/tests/regression/test_issue5137.py b/spacy/tests/regression/test_issue5137.py
index 095ca8495..cc7a9bd38 100644
--- a/spacy/tests/regression/test_issue5137.py
+++ b/spacy/tests/regression/test_issue5137.py
@@ -27,6 +27,6 @@ def test_issue5137():
 
     with make_tempdir() as tmpdir:
         nlp.to_disk(tmpdir)
-        overrides = {"my_component": {"categories": "my_categories"}}
-        nlp2 = spacy.load(tmpdir, component_cfg=overrides)
+        overrides = {"components": {"my_component": {"categories": "my_categories"}}}
+        nlp2 = spacy.load(tmpdir, config=overrides)
         assert nlp2.get_pipe("my_component").categories == "my_categories"
diff --git a/spacy/tests/regression/test_issue5230.py b/spacy/tests/regression/test_issue5230.py
index ae9ed1844..31292b700 100644
--- a/spacy/tests/regression/test_issue5230.py
+++ b/spacy/tests/regression/test_issue5230.py
@@ -72,7 +72,8 @@ def entity_linker():
 
     @registry.assets.register("TestIssue5230KB.v1")
     def dummy_kb() -> KnowledgeBase:
-        kb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
+        kb = KnowledgeBase(entity_vector_length=1)
+        kb.initialize(nlp.vocab)
         kb.add_entity("test", 0.0, zeros((1, 1), dtype="f"))
         return kb
 
@@ -121,7 +122,8 @@ def test_writer_with_path_py35():
 
 def test_save_and_load_knowledge_base():
     nlp = Language()
-    kb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
+    kb = KnowledgeBase(entity_vector_length=1)
+    kb.initialize(nlp.vocab)
     with make_tempdir() as d:
         path = d / "kb"
         try:
@@ -130,7 +132,8 @@ def test_save_and_load_knowledge_base():
             pytest.fail(str(e))
 
         try:
-            kb_loaded = KnowledgeBase(nlp.vocab, entity_vector_length=1)
+            kb_loaded = KnowledgeBase(entity_vector_length=1)
+            kb_loaded.initialize(nlp.vocab)
             kb_loaded.load_bulk(path)
         except Exception as e:
             pytest.fail(str(e))
diff --git a/spacy/tests/serialize/test_serialize_config.py b/spacy/tests/serialize/test_serialize_config.py
index ce35add42..0d3c90c92 100644
--- a/spacy/tests/serialize/test_serialize_config.py
+++ b/spacy/tests/serialize/test_serialize_config.py
@@ -2,6 +2,7 @@ import pytest
 from thinc.config import Config, ConfigValidationError
 import spacy
 from spacy.lang.en import English
+from spacy.lang.de import German
 from spacy.language import Language
 from spacy.util import registry, deep_merge_configs, load_model_from_config
 from spacy.ml.models import build_Tok2Vec_model, build_tb_parser_model
@@ -11,8 +12,23 @@ from ..util import make_tempdir
 
 
 nlp_config_string = """
+[paths]
+train = ""
+dev = ""
+
 [training]
-batch_size = 666
+
+[training.train_corpus]
+@readers = "spacy.Corpus.v1"
+path = ${paths:train}
+
+[training.dev_corpus]
+@readers = "spacy.Corpus.v1"
+path = ${paths:dev}
+
+[training.batcher]
+@batchers = "batch_by_words.v1"
+size = 666
 
 [nlp]
 lang = "en"
@@ -73,14 +89,9 @@ def my_parser():
             width=321,
             rows=5432,
             also_embed_subwords=True,
-            also_use_static_vectors=False
+            also_use_static_vectors=False,
         ),
-        MaxoutWindowEncoder(
-            width=321,
-            window_size=3,
-            maxout_pieces=4,
-            depth=2
-        )
+        MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2),
     )
     parser = build_tb_parser_model(
         tok2vec=tok2vec, nr_feature_tokens=7, hidden_width=65, maxout_pieces=5
@@ -93,7 +104,7 @@ def test_create_nlp_from_config():
     with pytest.raises(ConfigValidationError):
         nlp, _ = load_model_from_config(config, auto_fill=False)
     nlp, resolved = load_model_from_config(config, auto_fill=True)
-    assert nlp.config["training"]["batch_size"] == 666
+    assert nlp.config["training"]["batcher"]["size"] == 666
     assert len(nlp.config["training"]) > 1
     assert nlp.pipe_names == ["tok2vec", "tagger"]
     assert len(nlp.config["components"]) == 2
@@ -272,3 +283,33 @@ def test_serialize_config_missing_pipes():
     assert "tok2vec" not in config["components"]
     with pytest.raises(ValueError):
         load_model_from_config(config, auto_fill=True)
+
+
+def test_config_overrides():
+    overrides_nested = {"nlp": {"lang": "de", "pipeline": ["tagger"]}}
+    overrides_dot = {"nlp.lang": "de", "nlp.pipeline": ["tagger"]}
+    # load_model from config with overrides passed directly to Config
+    config = Config().from_str(nlp_config_string, overrides=overrides_dot)
+    nlp, _ = load_model_from_config(config, auto_fill=True)
+    assert isinstance(nlp, German)
+    assert nlp.pipe_names == ["tagger"]
+    # Serialized roundtrip with config passed in
+    base_config = Config().from_str(nlp_config_string)
+    base_nlp, _ = load_model_from_config(base_config, auto_fill=True)
+    assert isinstance(base_nlp, English)
+    assert base_nlp.pipe_names == ["tok2vec", "tagger"]
+    with make_tempdir() as d:
+        base_nlp.to_disk(d)
+        nlp = spacy.load(d, config=overrides_nested)
+    assert isinstance(nlp, German)
+    assert nlp.pipe_names == ["tagger"]
+    with make_tempdir() as d:
+        base_nlp.to_disk(d)
+        nlp = spacy.load(d, config=overrides_dot)
+    assert isinstance(nlp, German)
+    assert nlp.pipe_names == ["tagger"]
+    with make_tempdir() as d:
+        base_nlp.to_disk(d)
+        nlp = spacy.load(d)
+    assert isinstance(nlp, English)
+    assert nlp.pipe_names == ["tok2vec", "tagger"]
diff --git a/spacy/tests/serialize/test_serialize_doc.py b/spacy/tests/serialize/test_serialize_doc.py
index a547b51bc..4a976fc02 100644
--- a/spacy/tests/serialize/test_serialize_doc.py
+++ b/spacy/tests/serialize/test_serialize_doc.py
@@ -1,5 +1,4 @@
 import spacy
-import pytest
 from spacy.lang.en import English
 from spacy.tokens import Doc, DocBin
 
diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py
index 91036a496..3f33c6f06 100644
--- a/spacy/tests/serialize/test_serialize_kb.py
+++ b/spacy/tests/serialize/test_serialize_kb.py
@@ -17,7 +17,8 @@ def test_serialize_kb_disk(en_vocab):
         file_path = dir_path / "kb"
         kb1.dump(str(file_path))
 
-        kb2 = KnowledgeBase(vocab=en_vocab, entity_vector_length=3)
+        kb2 = KnowledgeBase(entity_vector_length=3)
+        kb2.initialize(en_vocab)
         kb2.load_bulk(str(file_path))
 
     # final assertions
@@ -25,7 +26,8 @@ def test_serialize_kb_disk(en_vocab):
 
 
 def _get_dummy_kb(vocab):
-    kb = KnowledgeBase(vocab=vocab, entity_vector_length=3)
+    kb = KnowledgeBase(entity_vector_length=3)
+    kb.initialize(vocab)
 
     kb.add_entity(entity="Q53", freq=33, entity_vector=[0, 5, 3])
     kb.add_entity(entity="Q17", freq=2, entity_vector=[7, 1, 0])
diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py
index c44daf630..16974a4c2 100644
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@@ -1,5 +1,5 @@
 import numpy
-from spacy.gold import biluo_tags_from_offsets, offsets_from_biluo_tags
+from spacy.gold import biluo_tags_from_offsets, offsets_from_biluo_tags, Alignment
 from spacy.gold import spans_from_biluo_tags, iob_to_biluo
 from spacy.gold import Corpus, docs_to_json
 from spacy.gold.example import Example
@@ -483,14 +483,14 @@ def test_roundtrip_docs_to_docbin(doc):
         reloaded_nlp = English()
         json_file = tmpdir / "roundtrip.json"
         srsly.write_json(json_file, [docs_to_json(doc)])
-        goldcorpus = Corpus(str(json_file), str(json_file))
         output_file = tmpdir / "roundtrip.spacy"
         data = DocBin(docs=[doc]).to_bytes()
         with output_file.open("wb") as file_:
             file_.write(data)
-        goldcorpus = Corpus(train_loc=str(output_file), dev_loc=str(output_file))
-        reloaded_example = next(goldcorpus.dev_dataset(nlp=reloaded_nlp))
-        assert len(doc) == goldcorpus.count_train(reloaded_nlp)
+        reader = Corpus(output_file)
+        reloaded_examples = list(reader(reloaded_nlp))
+        assert len(doc) == sum(len(eg) for eg in reloaded_examples)
+    reloaded_example = reloaded_examples[0]
     assert text == reloaded_example.reference.text
     assert idx == [t.idx for t in reloaded_example.reference]
     assert tags == [t.tag_ for t in reloaded_example.reference]
@@ -515,10 +515,9 @@ def test_make_orth_variants(doc):
         data = DocBin(docs=[doc]).to_bytes()
         with output_file.open("wb") as file_:
             file_.write(data)
-        goldcorpus = Corpus(train_loc=str(output_file), dev_loc=str(output_file))
-
         # due to randomness, test only that this runs with no errors for now
-        train_example = next(goldcorpus.train_dataset(nlp))
+        reader = Corpus(output_file)
+        train_example = next(reader(nlp))
         make_orth_variants_example(nlp, train_example, orth_variant_level=0.2)
 
 
@@ -647,11 +646,83 @@ def test_split_sents(merged_dict):
     assert split_examples[1].text == "It is just me"
 
     token_annotation_1 = split_examples[0].to_dict()["token_annotation"]
-    assert token_annotation_1["words"] == ["Hi", "there", "everyone"]
-    assert token_annotation_1["tags"] == ["INTJ", "ADV", "PRON"]
-    assert token_annotation_1["sent_starts"] == [1, 0, 0]
+    assert token_annotation_1["ORTH"] == ["Hi", "there", "everyone"]
+    assert token_annotation_1["TAG"] == ["INTJ", "ADV", "PRON"]
+    assert token_annotation_1["SENT_START"] == [1, 0, 0]
 
     token_annotation_2 = split_examples[1].to_dict()["token_annotation"]
-    assert token_annotation_2["words"] == ["It", "is", "just", "me"]
-    assert token_annotation_2["tags"] == ["PRON", "AUX", "ADV", "PRON"]
-    assert token_annotation_2["sent_starts"] == [1, 0, 0, 0]
+    assert token_annotation_2["ORTH"] == ["It", "is", "just", "me"]
+    assert token_annotation_2["TAG"] == ["PRON", "AUX", "ADV", "PRON"]
+    assert token_annotation_2["SENT_START"] == [1, 0, 0, 0]
+
+
+def test_alignment():
+    other_tokens = ["i", "listened", "to", "obama", "'", "s", "podcasts", "."]
+    spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts", "."]
+    align = Alignment.from_strings(other_tokens, spacy_tokens)
+    assert list(align.x2y.lengths) == [1, 1, 1, 1, 1, 1, 1, 1]
+    assert list(align.x2y.dataXd) == [0, 1, 2, 3, 4, 4, 5, 6]
+    assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 1, 1]
+    assert list(align.y2x.dataXd) == [0, 1, 2, 3, 4, 5, 6, 7]
+
+
+def test_alignment_case_insensitive():
+    other_tokens = ["I", "listened", "to", "obama", "'", "s", "podcasts", "."]
+    spacy_tokens = ["i", "listened", "to", "Obama", "'s", "PODCASTS", "."]
+    align = Alignment.from_strings(other_tokens, spacy_tokens)
+    assert list(align.x2y.lengths) == [1, 1, 1, 1, 1, 1, 1, 1]
+    assert list(align.x2y.dataXd) == [0, 1, 2, 3, 4, 4, 5, 6]
+    assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 1, 1]
+    assert list(align.y2x.dataXd) == [0, 1, 2, 3, 4, 5, 6, 7]
+
+
+def test_alignment_complex():
+    other_tokens = ["i listened to", "obama", "'", "s", "podcasts", "."]
+    spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts."]
+    align = Alignment.from_strings(other_tokens, spacy_tokens)
+    assert list(align.x2y.lengths) == [3, 1, 1, 1, 1, 1]
+    assert list(align.x2y.dataXd) == [0, 1, 2, 3, 4, 4, 5, 5]
+    assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2]
+    assert list(align.y2x.dataXd) == [0, 0, 0, 1, 2, 3, 4, 5]
+
+
+def test_alignment_complex_example(en_vocab):
+    other_tokens = ["i listened to", "obama", "'", "s", "podcasts", "."]
+    spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts."]
+    predicted = Doc(
+        en_vocab, words=other_tokens, spaces=[True, False, False, True, False, False]
+    )
+    reference = Doc(
+        en_vocab, words=spacy_tokens, spaces=[True, True, True, False, True, False]
+    )
+    assert predicted.text == "i listened to obama's podcasts."
+    assert reference.text == "i listened to obama's podcasts."
+    example = Example(predicted, reference)
+    align = example.alignment
+    assert list(align.x2y.lengths) == [3, 1, 1, 1, 1, 1]
+    assert list(align.x2y.dataXd) == [0, 1, 2, 3, 4, 4, 5, 5]
+    assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2]
+    assert list(align.y2x.dataXd) == [0, 0, 0, 1, 2, 3, 4, 5]
+
+
+def test_alignment_different_texts():
+    other_tokens = ["she", "listened", "to", "obama", "'s", "podcasts", "."]
+    spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts", "."]
+    with pytest.raises(ValueError):
+        Alignment.from_strings(other_tokens, spacy_tokens)
+
+
+def test_retokenized_docs(doc):
+    a = doc.to_array(["TAG"])
+    doc1 = Doc(doc.vocab, words=[t.text for t in doc]).from_array(["TAG"], a)
+    doc2 = Doc(doc.vocab, words=[t.text for t in doc]).from_array(["TAG"], a)
+    example = Example(doc1, doc2)
+    # fmt: off
+    expected1 = ["Sarah", "'s", "sister", "flew", "to", "Silicon", "Valley", "via", "London", "."]
+    expected2 = [None, "sister", "flew", "to", None, "via", "London", "."]
+    # fmt: on
+    assert example.get_aligned("ORTH", as_string=True) == expected1
+    with doc1.retokenize() as retokenizer:
+        retokenizer.merge(doc1[0:2])
+        retokenizer.merge(doc1[5:7])
+    assert example.get_aligned("ORTH", as_string=True) == expected2
diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py
index a63a8e24c..6865cd1e5 100644
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@@ -3,10 +3,11 @@ import pytest
 from spacy.language import Language
 from spacy.tokens import Doc, Span
 from spacy.vocab import Vocab
+from spacy.gold import Example
 from spacy.lang.en import English
+from spacy.util import registry
 
 from .util import add_vecs_to_vocab, assert_docs_equal
-from ..gold import Example
 
 
 @pytest.fixture
@@ -153,6 +154,85 @@ def test_language_pipe_stream(nlp2, n_process, texts):
         assert_docs_equal(doc, expected_doc)
 
 
-def test_language_from_config():
-    English.from_config()
-    # TODO: add more tests
+def test_language_from_config_before_after_init():
+    name = "test_language_from_config_before_after_init"
+    ran_before = False
+    ran_after = False
+    ran_after_pipeline = False
+
+    @registry.callbacks(f"{name}_before")
+    def make_before_creation():
+        def before_creation(lang_cls):
+            nonlocal ran_before
+            ran_before = True
+            assert lang_cls is English
+            lang_cls.Defaults.foo = "bar"
+            return lang_cls
+
+        return before_creation
+
+    @registry.callbacks(f"{name}_after")
+    def make_after_creation():
+        def after_creation(nlp):
+            nonlocal ran_after
+            ran_after = True
+            assert isinstance(nlp, English)
+            assert nlp.pipe_names == []
+            assert nlp.Defaults.foo == "bar"
+            nlp.meta["foo"] = "bar"
+            return nlp
+
+        return after_creation
+
+    @registry.callbacks(f"{name}_after_pipeline")
+    def make_after_pipeline_creation():
+        def after_pipeline_creation(nlp):
+            nonlocal ran_after_pipeline
+            ran_after_pipeline = True
+            assert isinstance(nlp, English)
+            assert nlp.pipe_names == ["sentencizer"]
+            assert nlp.Defaults.foo == "bar"
+            assert nlp.meta["foo"] == "bar"
+            nlp.meta["bar"] = "baz"
+            return nlp
+
+        return after_pipeline_creation
+
+    config = {
+        "nlp": {
+            "pipeline": ["sentencizer"],
+            "before_creation": {"@callbacks": f"{name}_before"},
+            "after_creation": {"@callbacks": f"{name}_after"},
+            "after_pipeline_creation": {"@callbacks": f"{name}_after_pipeline"},
+        },
+        "components": {"sentencizer": {"factory": "sentencizer"}},
+    }
+    nlp = English.from_config(config)
+    assert all([ran_before, ran_after, ran_after_pipeline])
+    assert nlp.Defaults.foo == "bar"
+    assert nlp.meta["foo"] == "bar"
+    assert nlp.meta["bar"] == "baz"
+    assert nlp.pipe_names == ["sentencizer"]
+    assert nlp("text")
+
+
+def test_language_from_config_before_after_init_invalid():
+    """Check that an error is raised if function doesn't return nlp."""
+    name = "test_language_from_config_before_after_init_invalid"
+    registry.callbacks(f"{name}_before1", func=lambda: lambda nlp: None)
+    registry.callbacks(f"{name}_before2", func=lambda: lambda nlp: nlp())
+    registry.callbacks(f"{name}_after1", func=lambda: lambda nlp: None)
+    registry.callbacks(f"{name}_after1", func=lambda: lambda nlp: English)
+
+    for callback_name in [f"{name}_before1", f"{name}_before2"]:
+        config = {"nlp": {"before_creation": {"@callbacks": callback_name}}}
+        with pytest.raises(ValueError):
+            English.from_config(config)
+    for callback_name in [f"{name}_after1", f"{name}_after2"]:
+        config = {"nlp": {"after_creation": {"@callbacks": callback_name}}}
+        with pytest.raises(ValueError):
+            English.from_config(config)
+    for callback_name in [f"{name}_after1", f"{name}_after2"]:
+        config = {"nlp": {"after_pipeline_creation": {"@callbacks": callback_name}}}
+        with pytest.raises(ValueError):
+            English.from_config(config)
diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py
index 4c38ea6c6..8f1bb1c3d 100644
--- a/spacy/tests/test_models.py
+++ b/spacy/tests/test_models.py
@@ -24,6 +24,7 @@ def get_textcat_kwargs():
         "nO": 7,
     }
 
+
 def get_textcat_cnn_kwargs():
     return {
         "tok2vec": test_tok2vec(),
@@ -31,6 +32,7 @@ def get_textcat_cnn_kwargs():
         "nO": 13,
     }
 
+
 def get_all_params(model):
     params = []
     for node in model.walk():
@@ -59,17 +61,11 @@ def get_tok2vec_kwargs():
     # This actually creates models, so seems best to put it in a function.
     return {
         "embed": MultiHashEmbed(
-            width=32,
-            rows=500,
-            also_embed_subwords=True,
-            also_use_static_vectors=False
+            width=32, rows=500, also_embed_subwords=True, also_use_static_vectors=False
         ),
         "encode": MaxoutWindowEncoder(
-            width=32,
-            depth=2,
-            maxout_pieces=2,
-            window_size=1,
-        )
+            width=32, depth=2, maxout_pieces=2, window_size=1,
+        ),
     }
 
 
diff --git a/spacy/tests/test_new_example.py b/spacy/tests/test_new_example.py
index 886a24a8e..df6489aa8 100644
--- a/spacy/tests/test_new_example.py
+++ b/spacy/tests/test_new_example.py
@@ -42,7 +42,7 @@ def test_Example_from_dict_with_tags(pred_words, annots):
     example = Example.from_dict(predicted, annots)
     for i, token in enumerate(example.reference):
         assert token.tag_ == annots["tags"][i]
-    aligned_tags = example.get_aligned("tag", as_string=True)
+    aligned_tags = example.get_aligned("TAG", as_string=True)
     assert aligned_tags == ["NN" for _ in predicted]
 
 
@@ -53,9 +53,13 @@ def test_aligned_tags():
     annots = {"words": gold_words, "tags": gold_tags}
     vocab = Vocab()
     predicted = Doc(vocab, words=pred_words)
-    example = Example.from_dict(predicted, annots)
-    aligned_tags = example.get_aligned("tag", as_string=True)
-    assert aligned_tags == ["VERB", "DET", "NOUN", "SCONJ", "PRON", "VERB", "VERB"]
+    example1 = Example.from_dict(predicted, annots)
+    aligned_tags1 = example1.get_aligned("TAG", as_string=True)
+    assert aligned_tags1 == ["VERB", "DET", "NOUN", "SCONJ", "PRON", "VERB", "VERB"]
+    # ensure that to_dict works correctly
+    example2 = Example.from_dict(predicted, example1.to_dict())
+    aligned_tags2 = example2.get_aligned("TAG", as_string=True)
+    assert aligned_tags2 == ["VERB", "DET", "NOUN", "SCONJ", "PRON", "VERB", "VERB"]
 
 
 def test_aligned_tags_multi():
@@ -66,7 +70,7 @@ def test_aligned_tags_multi():
     vocab = Vocab()
     predicted = Doc(vocab, words=pred_words)
     example = Example.from_dict(predicted, annots)
-    aligned_tags = example.get_aligned("tag", as_string=True)
+    aligned_tags = example.get_aligned("TAG", as_string=True)
     assert aligned_tags == [None, None, "SCONJ", "PRON", "VERB", "VERB"]
 
 
diff --git a/spacy/tests/test_tok2vec.py b/spacy/tests/test_tok2vec.py
index 76b5e64df..b30705088 100644
--- a/spacy/tests/test_tok2vec.py
+++ b/spacy/tests/test_tok2vec.py
@@ -19,14 +19,9 @@ def test_empty_doc():
             width=width,
             rows=embed_size,
             also_use_static_vectors=False,
-            also_embed_subwords=True
+            also_embed_subwords=True,
         ),
-        MaxoutWindowEncoder(
-            width=width,
-            depth=4,
-            window_size=1,
-            maxout_pieces=3
-        )
+        MaxoutWindowEncoder(width=width, depth=4, window_size=1, maxout_pieces=3),
     )
     tok2vec.initialize()
     vectors, backprop = tok2vec.begin_update([doc])
@@ -44,14 +39,9 @@ def test_tok2vec_batch_sizes(batch_size, width, embed_size):
             width=width,
             rows=embed_size,
             also_use_static_vectors=False,
-            also_embed_subwords=True
+            also_embed_subwords=True,
         ),
-        MaxoutWindowEncoder(
-            width=width,
-            depth=4,
-            window_size=1,
-            maxout_pieces=3,
-        )
+        MaxoutWindowEncoder(width=width, depth=4, window_size=1, maxout_pieces=3,),
     )
     tok2vec.initialize()
     vectors, backprop = tok2vec.begin_update(batch)
diff --git a/spacy/tests/test_util.py b/spacy/tests/test_util.py
index 3a6c0fd95..47111a902 100644
--- a/spacy/tests/test_util.py
+++ b/spacy/tests/test_util.py
@@ -3,8 +3,9 @@ import pytest
 from .util import get_random_doc
 
 from spacy import util
-from spacy.util import minibatch_by_words, dot_to_object
+from spacy.util import dot_to_object
 from thinc.api import Config, Optimizer
+from spacy.gold.batchers import minibatch_by_words
 
 from ..lang.en import English
 from ..lang.nl import Dutch
@@ -84,27 +85,24 @@ def test_util_dot_section():
     """
     nlp_config = Config().from_str(cfg_string)
     en_nlp, en_config = util.load_model_from_config(nlp_config, auto_fill=True)
-
     default_config = Config().from_disk(DEFAULT_CONFIG_PATH)
     default_config["nlp"]["lang"] = "nl"
     nl_nlp, nl_config = util.load_model_from_config(default_config, auto_fill=True)
-
     # Test that creation went OK
     assert isinstance(en_nlp, English)
     assert isinstance(nl_nlp, Dutch)
     assert nl_nlp.pipe_names == []
     assert en_nlp.pipe_names == ["textcat"]
-    assert en_nlp.get_pipe("textcat").model.attrs["multi_label"] == False   # not exclusive_classes
-
+    # not exclusive_classes
+    assert en_nlp.get_pipe("textcat").model.attrs["multi_label"] is False
     # Test that default values got overwritten
     assert not en_config["nlp"]["load_vocab_data"]
     assert nl_config["nlp"]["load_vocab_data"]  # default value True
-
     # Test proper functioning of 'dot_to_object'
     with pytest.raises(KeyError):
-        obj = dot_to_object(en_config, "nlp.pipeline.tagger")
+        dot_to_object(en_config, "nlp.pipeline.tagger")
     with pytest.raises(KeyError):
-        obj = dot_to_object(en_config, "nlp.unknownattribute")
+        dot_to_object(en_config, "nlp.unknownattribute")
     assert not dot_to_object(en_config, "nlp.load_vocab_data")
     assert dot_to_object(nl_config, "nlp.load_vocab_data")
     assert isinstance(dot_to_object(nl_config, "training.optimizer"), Optimizer)
diff --git a/spacy/tokens/_retokenize.pyx b/spacy/tokens/_retokenize.pyx
index b89ce3bdd..61f7c3db0 100644
--- a/spacy/tokens/_retokenize.pyx
+++ b/spacy/tokens/_retokenize.pyx
@@ -12,6 +12,7 @@ from .token cimport Token
 from ..lexeme cimport Lexeme, EMPTY_LEXEME
 from ..structs cimport LexemeC, TokenC
 from ..attrs cimport TAG, MORPH
+from ..vocab cimport Vocab
 
 from .underscore import is_writable_attr
 from ..attrs import intify_attrs
@@ -57,16 +58,7 @@ cdef class Retokenizer:
                 raise ValueError(Errors.E102.format(token=repr(token)))
             self.tokens_to_merge.add(token.i)
         self._spans_to_merge.append((span.start, span.end))
-        if "_" in attrs:  # Extension attributes
-            extensions = attrs["_"]
-            _validate_extensions(extensions)
-            attrs = {key: value for key, value in attrs.items() if key != "_"}
-            attrs = intify_attrs(attrs, strings_map=self.doc.vocab.strings)
-            attrs["_"] = extensions
-        else:
-            attrs = intify_attrs(attrs, strings_map=self.doc.vocab.strings)
-            if MORPH in attrs:
-                self.doc.vocab.morphology.add(self.doc.vocab.strings.as_string(attrs[MORPH]))
+        attrs = normalize_token_attrs(self.doc.vocab, attrs)
         self.merges.append((span, attrs))
 
     def split(self, Token token, orths, heads, attrs=SimpleFrozenDict()):
@@ -98,9 +90,11 @@ cdef class Retokenizer:
             # NB: Since we support {"KEY": [value, value]} syntax here, this
             # will only "intify" the keys, not the values
             attrs = intify_attrs(attrs, strings_map=self.doc.vocab.strings)
-            if MORPH in attrs:
-                for morph in attrs[MORPH]:
-                    self.doc.vocab.morphology.add(self.doc.vocab.strings.as_string(morph))
+        if MORPH in attrs:
+            for i, morph in enumerate(attrs[MORPH]):
+                # add and set to normalized value
+                morph = self.doc.vocab.morphology.add(self.doc.vocab.strings.as_string(morph))
+                attrs[MORPH][i] = morph
         head_offsets = []
         for head in heads:
             if isinstance(head, Token):
@@ -224,21 +218,7 @@ def _merge(Doc doc, merges):
         token.lex = lex
         # We set trailing space here too
         token.spacy = doc.c[spans[token_index].end-1].spacy
-        py_token = span[0]
-        # Assign attributes
-        for attr_name, attr_value in attributes.items():
-            if attr_name == "_":  # Set extension attributes
-                for ext_attr_key, ext_attr_value in attr_value.items():
-                    py_token._.set(ext_attr_key, ext_attr_value)
-            elif attr_name == TAG:
-                doc.vocab.morphology.assign_tag(token, attr_value)
-            else:
-                # Set attributes on both token and lexeme to take care of token
-                # attribute vs. lexical attribute without having to enumerate
-                # them. If an attribute name is not valid, set_struct_attr will
-                # ignore it.
-                Token.set_struct_attr(token, attr_name, attr_value)
-                Lexeme.set_struct_attr(<LexemeC*>lex, attr_name, attr_value)
+        set_token_attrs(span[0], attributes)
     # Begin by setting all the head indices to absolute token positions
     # This is easier to work with for now than the offsets
     # Before thinking of something simpler, beware the case where a
@@ -423,3 +403,40 @@ cdef make_iob_consistent(TokenC* tokens, int length):
     for i in range(1, length):
         if tokens[i].ent_iob == 1 and tokens[i - 1].ent_type != tokens[i].ent_type:
             tokens[i].ent_iob = 3
+
+
+def normalize_token_attrs(Vocab vocab, attrs):
+    if "_" in attrs:  # Extension attributes
+        extensions = attrs["_"]
+        print("EXTENSIONS", extensions)
+        _validate_extensions(extensions)
+        attrs = {key: value for key, value in attrs.items() if key != "_"}
+        attrs = intify_attrs(attrs, strings_map=vocab.strings)
+        attrs["_"] = extensions
+    else:
+        attrs = intify_attrs(attrs, strings_map=vocab.strings)
+    if MORPH in attrs:
+        # add and set to normalized value
+        morph = vocab.morphology.add(vocab.strings.as_string(attrs[MORPH]))
+        attrs[MORPH] = morph
+    return attrs
+
+
+def set_token_attrs(Token py_token, attrs):
+    cdef TokenC* token = py_token.c
+    cdef const LexemeC* lex = token.lex
+    cdef Doc doc = py_token.doc
+    # Assign attributes
+    for attr_name, attr_value in attrs.items():
+        if attr_name == "_":  # Set extension attributes
+            for ext_attr_key, ext_attr_value in attr_value.items():
+                py_token._.set(ext_attr_key, ext_attr_value)
+        elif attr_name == TAG:
+            doc.vocab.morphology.assign_tag(token, attr_value)
+        else:
+            # Set attributes on both token and lexeme to take care of token
+            # attribute vs. lexical attribute without having to enumerate
+            # them. If an attribute name is not valid, set_struct_attr will
+            # ignore it.
+            Token.set_struct_attr(token, attr_name, attr_value)
+            Lexeme.set_struct_attr(<LexemeC*>lex, attr_name, attr_value)
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 5b55d8e88..15e6518d6 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -176,9 +176,13 @@ cdef class Span:
             return Span(self.doc, start + self.start, end + self.start)
         else:
             if i < 0:
-                return self.doc[self.end + i]
+                token_i = self.end + i
             else:
-                return self.doc[self.start + i]
+                token_i = self.start + i
+            if self.start <= token_i < self.end:
+                return self.doc[token_i]
+            else:
+                raise IndexError(Errors.E1002)
 
     def __iter__(self):
         """Iterate over `Token` objects.
diff --git a/spacy/util.py b/spacy/util.py
index 677f5e8e0..e580d6c62 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -1,5 +1,5 @@
 from typing import List, Union, Dict, Any, Optional, Iterable, Callable, Tuple
-from typing import Iterator, Type, Pattern, Sequence, TYPE_CHECKING
+from typing import Iterator, Type, Pattern, TYPE_CHECKING
 from types import ModuleType
 import os
 import importlib
@@ -7,7 +7,7 @@ import importlib.util
 import re
 from pathlib import Path
 import thinc
-from thinc.api import NumpyOps, get_current_ops, Adam, Config, Optimizer, Model
+from thinc.api import NumpyOps, get_current_ops, Adam, Config, Optimizer
 import functools
 import itertools
 import numpy.random
@@ -24,8 +24,6 @@ import tempfile
 import shutil
 import shlex
 import inspect
-from thinc.types import Unserializable
-
 
 try:
     import cupy.random
@@ -46,7 +44,7 @@ from thinc.api import fix_random_seed, compounding, decaying  # noqa: F401
 
 from .symbols import ORTH
 from .compat import cupy, CudaStream, is_windows
-from .errors import Errors, Warnings
+from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
 from . import about
 
 if TYPE_CHECKING:
@@ -69,6 +67,10 @@ class registry(thinc.registry):
     lookups = catalogue.create("spacy", "lookups", entry_points=True)
     displacy_colors = catalogue.create("spacy", "displacy_colors", entry_points=True)
     assets = catalogue.create("spacy", "assets", entry_points=True)
+    # Callback functions used to manipulate nlp object etc.
+    callbacks = catalogue.create("spacy", "callbacks")
+    batchers = catalogue.create("spacy", "batchers", entry_points=True)
+    readers = catalogue.create("spacy", "readers", entry_points=True)
     # These are factories registered via third-party packages and the
     # spacy_factories entry point. This registry only exists so we can easily
     # load them via the entry points. The "true" factories are added via the
@@ -205,45 +207,55 @@ def load_vectors_into_model(
 
 def load_model(
     name: Union[str, Path],
+    *,
+    vocab: Union["Vocab", bool] = True,
     disable: Iterable[str] = tuple(),
-    component_cfg: Dict[str, Dict[str, Any]] = SimpleFrozenDict(),
+    config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
 ) -> "Language":
     """Load a model from a package or data path.
 
     name (str): Package name or model path.
+    vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
+        a new Vocab object will be created.
     disable (Iterable[str]): Names of pipeline components to disable.
-    component_cfg (Dict[str, dict]): Config overrides for pipeline components,
-        keyed by component names.
+    config (Dict[str, Any] / Config): Config overrides as nested dict or dict
+        keyed by section values in dot notation.
     RETURNS (Language): The loaded nlp object.
     """
-    cfg = component_cfg
+    kwargs = {"vocab": vocab, "disable": disable, "config": config}
     if isinstance(name, str):  # name or string path
         if name.startswith("blank:"):  # shortcut for blank model
             return get_lang_class(name.replace("blank:", ""))()
         if is_package(name):  # installed as package
-            return load_model_from_package(name, disable=disable, component_cfg=cfg)
+            return load_model_from_package(name, **kwargs)
         if Path(name).exists():  # path to model data directory
-            return load_model_from_path(Path(name), disable=disable, component_cfg=cfg)
+            return load_model_from_path(Path(name), **kwargs)
     elif hasattr(name, "exists"):  # Path or Path-like to model data
-        return load_model_from_path(name, disable=disable, component_cfg=cfg)
+        return load_model_from_path(name, **kwargs)
+    if name in OLD_MODEL_SHORTCUTS:
+        raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SHORTCUTS[name]))
     raise IOError(Errors.E050.format(name=name))
 
 
 def load_model_from_package(
     name: str,
+    *,
+    vocab: Union["Vocab", bool] = True,
     disable: Iterable[str] = tuple(),
-    component_cfg: Dict[str, Dict[str, Any]] = SimpleFrozenDict(),
+    config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
 ) -> "Language":
     """Load a model from an installed package."""
     cls = importlib.import_module(name)
-    return cls.load(disable=disable, component_cfg=component_cfg)
+    return cls.load(vocab=vocab, disable=disable, config=config)
 
 
 def load_model_from_path(
     model_path: Union[str, Path],
+    *,
     meta: Optional[Dict[str, Any]] = None,
+    vocab: Union["Vocab", bool] = True,
     disable: Iterable[str] = tuple(),
-    component_cfg: Dict[str, Dict[str, Any]] = SimpleFrozenDict(),
+    config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
 ) -> "Language":
     """Load a model from a data directory path. Creates Language class with
     pipeline from config.cfg and then calls from_disk() with path."""
@@ -254,17 +266,16 @@ def load_model_from_path(
     config_path = model_path / "config.cfg"
     if not config_path.exists() or not config_path.is_file():
         raise IOError(Errors.E053.format(path=config_path, name="config.cfg"))
-    config = Config().from_disk(config_path)
-    override_cfg = {"components": {p: dict_to_dot(c) for p, c in component_cfg.items()}}
-    overrides = dict_to_dot(override_cfg)
-    nlp, _ = load_model_from_config(config, disable=disable, overrides=overrides)
+    config = Config().from_disk(config_path, overrides=dict_to_dot(config))
+    nlp, _ = load_model_from_config(config, vocab=vocab, disable=disable)
     return nlp.from_disk(model_path, exclude=disable)
 
 
 def load_model_from_config(
     config: Union[Dict[str, Any], Config],
+    *,
+    vocab: Union["Vocab", bool] = True,
     disable: Iterable[str] = tuple(),
-    overrides: Dict[str, Any] = {},
     auto_fill: bool = False,
     validate: bool = True,
 ) -> Tuple["Language", Config]:
@@ -280,26 +291,20 @@ def load_model_from_config(
     # registry, including custom subclasses provided via entry points
     lang_cls = get_lang_class(nlp_config["lang"])
     nlp = lang_cls.from_config(
-        config,
-        disable=disable,
-        overrides=overrides,
-        auto_fill=auto_fill,
-        validate=validate,
+        config, vocab=vocab, disable=disable, auto_fill=auto_fill, validate=validate,
     )
     return nlp, nlp.resolved
 
 
 def load_model_from_init_py(
     init_file: Union[Path, str],
+    *,
+    vocab: Union["Vocab", bool] = True,
     disable: Iterable[str] = tuple(),
-    component_cfg: Dict[str, Dict[str, Any]] = SimpleFrozenDict(),
+    config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
 ) -> "Language":
     """Helper function to use in the `load()` method of a model package's
     __init__.py.
-
-    init_file (str): Path to model's __init__.py, i.e. `__file__`.
-    **overrides: Specific overrides, like pipeline components to disable.
-    RETURNS (Language): `Language` class with loaded model.
     """
     model_path = Path(init_file).parent
     meta = get_model_meta(model_path)
@@ -308,7 +313,7 @@ def load_model_from_init_py(
     if not model_path.exists():
         raise IOError(Errors.E052.format(path=data_path))
     return load_model_from_path(
-        data_path, meta, disable=disable, component_cfg=component_cfg
+        data_path, vocab=vocab, meta=meta, disable=disable, config=config
     )
 
 
@@ -749,145 +754,6 @@ def normalize_slice(
     return start, stop
 
 
-def minibatch(
-    items: Iterable[Any], size: Union[Iterator[int], int] = 8
-) -> Iterator[Any]:
-    """Iterate over batches of items. `size` may be an iterator,
-    so that batch-size can vary on each step.
-    """
-    if isinstance(size, int):
-        size_ = itertools.repeat(size)
-    else:
-        size_ = size
-    items = iter(items)
-    while True:
-        batch_size = next(size_)
-        batch = list(itertools.islice(items, int(batch_size)))
-        if len(batch) == 0:
-            break
-        yield list(batch)
-
-
-def minibatch_by_padded_size(
-    docs: Iterator["Doc"],
-    size: Union[Iterator[int], int],
-    buffer: int = 256,
-    discard_oversize: bool = False,
-) -> Iterator[Iterator["Doc"]]:
-    if isinstance(size, int):
-        size_ = itertools.repeat(size)
-    else:
-        size_ = size
-    for outer_batch in minibatch(docs, buffer):
-        outer_batch = list(outer_batch)
-        target_size = next(size_)
-        for indices in _batch_by_length(outer_batch, target_size):
-            subbatch = [outer_batch[i] for i in indices]
-            padded_size = max(len(seq) for seq in subbatch) * len(subbatch)
-            if discard_oversize and padded_size >= target_size:
-                pass
-            else:
-                yield subbatch
-
-
-def _batch_by_length(seqs: Sequence[Any], max_words: int) -> List[List[Any]]:
-    """Given a list of sequences, return a batched list of indices into the
-    list, where the batches are grouped by length, in descending order.
-
-    Batches may be at most max_words in size, defined as max sequence length * size.
-    """
-    # Use negative index so we can get sort by position ascending.
-    lengths_indices = [(len(seq), i) for i, seq in enumerate(seqs)]
-    lengths_indices.sort()
-    batches = []
-    batch = []
-    for length, i in lengths_indices:
-        if not batch:
-            batch.append(i)
-        elif length * (len(batch) + 1) <= max_words:
-            batch.append(i)
-        else:
-            batches.append(batch)
-            batch = [i]
-    if batch:
-        batches.append(batch)
-    # Check lengths match
-    assert sum(len(b) for b in batches) == len(seqs)
-    batches = [list(sorted(batch)) for batch in batches]
-    batches.reverse()
-    return batches
-
-
-def minibatch_by_words(docs, size, tolerance=0.2, discard_oversize=False):
-    """Create minibatches of roughly a given number of words. If any examples
-    are longer than the specified batch length, they will appear in a batch by
-    themselves, or be discarded if discard_oversize=True.
-    The argument 'docs' can be a list of strings, Doc's or Example's. """
-    from .gold import Example
-
-    if isinstance(size, int):
-        size_ = itertools.repeat(size)
-    elif isinstance(size, List):
-        size_ = iter(size)
-    else:
-        size_ = size
-    target_size = next(size_)
-    tol_size = target_size * tolerance
-    batch = []
-    overflow = []
-    batch_size = 0
-    overflow_size = 0
-    for doc in docs:
-        if isinstance(doc, Example):
-            n_words = len(doc.reference)
-        elif isinstance(doc, str):
-            n_words = len(doc.split())
-        else:
-            n_words = len(doc)
-        # if the current example exceeds the maximum batch size, it is returned separately
-        # but only if discard_oversize=False.
-        if n_words > target_size + tol_size:
-            if not discard_oversize:
-                yield [doc]
-        # add the example to the current batch if there's no overflow yet and it still fits
-        elif overflow_size == 0 and (batch_size + n_words) <= target_size:
-            batch.append(doc)
-            batch_size += n_words
-        # add the example to the overflow buffer if it fits in the tolerance margin
-        elif (batch_size + overflow_size + n_words) <= (target_size + tol_size):
-            overflow.append(doc)
-            overflow_size += n_words
-        # yield the previous batch and start a new one. The new one gets the overflow examples.
-        else:
-            if batch:
-                yield batch
-            target_size = next(size_)
-            tol_size = target_size * tolerance
-            batch = overflow
-            batch_size = overflow_size
-            overflow = []
-            overflow_size = 0
-            # this example still fits
-            if (batch_size + n_words) <= target_size:
-                batch.append(doc)
-                batch_size += n_words
-            # this example fits in overflow
-            elif (batch_size + n_words) <= (target_size + tol_size):
-                overflow.append(doc)
-                overflow_size += n_words
-            # this example does not fit with the previous overflow: start another new batch
-            else:
-                if batch:
-                    yield batch
-                target_size = next(size_)
-                tol_size = target_size * tolerance
-                batch = [doc]
-                batch_size = n_words
-    batch.extend(overflow)
-    if batch:
-        yield batch
-
-
 def filter_spans(spans: Iterable["Span"]) -> List["Span"]:
     """Filter a sequence of spans and remove duplicates or overlaps. Useful for
     creating named entities (where one token can only be part of one entity) or
@@ -1219,3 +1085,20 @@ def create_default_optimizer() -> Optimizer:
         L2_is_weight_decay=L2_is_weight_decay,
     )
     return optimizer
+
+
+def minibatch(items, size):
+    """Iterate over batches of items. `size` may be an iterator,
+    so that batch-size can vary on each step.
+    """
+    if isinstance(size, int):
+        size_ = itertools.repeat(size)
+    else:
+        size_ = size
+    items = iter(items)
+    while True:
+        batch_size = next(size_)
+        batch = list(itertools.islice(items, int(batch_size)))
+        if len(batch) == 0:
+            break
+        yield list(batch)
diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md
index 95f7d0597..a22ee5be8 100644
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@@ -6,6 +6,7 @@ menu:
   - ['Tok2Vec', 'tok2vec']
   - ['Transformers', 'transformers']
   - ['Parser & NER', 'parser']
+  - ['Tagging', 'tagger']
   - ['Text Classification', 'textcat']
   - ['Entity Linking', 'entitylinker']
 ---
@@ -18,6 +19,30 @@ TODO: intro and how architectures work, link to
 
 ### spacy.HashEmbedCNN.v1 {#HashEmbedCNN}
 
+<!-- TODO: intro -->
+
+> #### Example Config
+>
+> ```ini
+> [model]
+> @architectures = "spacy.HashEmbedCNN.v1"
+> # TODO: ...
+>
+> [model.tok2vec]
+> # ...
+> ```
+
+| Name                 | Type  | Description |
+| -------------------- | ----- | ----------- |
+| `width`              | int   |             |
+| `depth`              | int   |             |
+| `embed_size`         | int   |             |
+| `window_size`        | int   |             |
+| `maxout_pieces`      | int   |             |
+| `subword_features`   | bool  |             |
+| `dropout`            | float |             |
+| `pretrained_vectors` | bool  |             |
+
 ### spacy.HashCharEmbedCNN.v1 {#HashCharEmbedCNN}
 
 ### spacy.HashCharEmbedBiLSTM.v1 {#HashCharEmbedBiLSTM}
@@ -99,6 +124,28 @@ architectures into your training config.
 | `use_upper`         | bool                                       |             |
 | `nO`                | int                                        |             |
 
+## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"}
+
+### spacy.Tagger.v1 {#Tagger}
+
+<!-- TODO: intro -->
+
+> #### Example Config
+>
+> ```ini
+> [model]
+> @architectures = "spacy.Tagger.v1"
+> nO = null
+>
+> [model.tok2vec]
+> # ...
+> ```
+
+| Name      | Type                                       | Description |
+| --------- | ------------------------------------------ | ----------- |
+| `tok2vec` | [`Model`](https://thinc.ai/docs/api-model) |             |
+| `nO`      | int                                        |             |
+
 ## Text classification architectures {#textcat source="spacy/ml/models/textcat.py"}
 
 ### spacy.TextCatEnsemble.v1 {#TextCatEnsemble}
@@ -112,3 +159,21 @@ architectures into your training config.
 ## Entity linking architectures {#entitylinker source="spacy/ml/models/entity_linker.py"}
 
 ### spacy.EntityLinker.v1 {#EntityLinker}
+
+<!-- TODO: intro -->
+
+> #### Example Config
+>
+> ```ini
+> [model]
+> @architectures = "spacy.EntityLinker.v1"
+> nO = null
+>
+> [model.tok2vec]
+> # ...
+> ```
+
+| Name      | Type                                       | Description |
+| --------- | ------------------------------------------ | ----------- |
+| `tok2vec` | [`Model`](https://thinc.ai/docs/api-model) |             |
+| `nO`      | int                                        |             |
diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md
index 68aff4c46..abe050661 100644
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@@ -6,11 +6,11 @@ menu:
   - ['Download', 'download']
   - ['Info', 'info']
   - ['Validate', 'validate']
+  - ['Init', 'init']
   - ['Convert', 'convert']
   - ['Debug', 'debug']
   - ['Train', 'train']
   - ['Pretrain', 'pretrain']
-  - ['Init Model', 'init-model']
   - ['Evaluate', 'evaluate']
   - ['Package', 'package']
   - ['Project', 'project']
@@ -94,6 +94,80 @@ $ python -m spacy validate
 | ---------- | -------- | --------------------------------------------------------- |
 | **PRINTS** | `stdout` | Details about the compatibility of your installed models. |
 
+## Init {#init new="3"}
+
+The `spacy init` CLI includes helpful commands for initializing training config
+files and model directories.
+
+### init config {#init-config new="3"}
+
+Initialize and export a [`config.cfg` file](/usage/training#config) for training
+and update it with all default values, if possible. Config files used for
+training should always be complete and not contain any hidden defaults or
+missing values, so this command helps you create your final config. It takes
+**one** of the following options:
+
+- `--base`: Base **config** to auto-fill, e.g. created using the
+  [training quickstart](/usage/training#quickstart) widget.
+- `--lang`: Base **language** code to use for blank config.
+- `--model`: Base **model** to copy config from.
+
+> ```bash
+> ### with base config {wrap="true"}
+> $ python -m spacy init config config.cfg --base base.cfg
+> ```
+>
+> ```bash
+> ### blank language {wrap="true"}
+> $ python -m spacy init config config.cfg --lang en --pipeline tagger,parser
+> ```
+
+```bash
+$ python -m spacy init config [output] [--base] [--lang] [--model] [--pipeline]
+```
+
+| Argument           | Type       | Description                                                                                                                                                           |
+| ------------------ | ---------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `output`           | positional | Path to output `.cfg` file. If not set, the config is written to stdout so you can pipe it forward to a file.                                                         |
+| `--base`, `-b`     | option     | Optional base config file to auto-fill with defaults.                                                                                                                 |
+| `--lang`, `-l`     | option     | Optional language code to use for blank config. If a `--pipeline` is specified, the components will be added in order.                                                |
+| `--model`, `-m`    | option     | Optional base model to copy config from. If a `--pipeline` is specified, only those components will be kept, and all other components not in the model will be added. |
+| `--pipeline`, `-p` | option     | Optional comma-separate pipeline of components to add to blank language or model.                                                                                     |
+| **CREATES**        | config     | Complete and auto-filled config file for training.                                                                                                                    |
+
+### init model {#init-model new="2"}
+
+<!-- TODO: update for v3 -->
+
+Create a new model directory from raw data, like word frequencies, Brown
+clusters and word vectors. This command is similar to the `spacy model` command
+in v1.x. Note that in order to populate the model's vocab, you need to pass in a
+JSONL-formatted [vocabulary file](/api/data-formats#vocab-jsonl) as
+`--jsonl-loc` with optional `id` values that correspond to the vectors table.
+Just loading in vectors will not automatically populate the vocab.
+
+<Infobox title="New in v3.0" variant="warning">
+
+The `init-model` command is now available as a subcommand of `spacy init`.
+
+</Infobox>
+
+```bash
+$ python -m spacy init model [lang] [output_dir] [--jsonl-loc] [--vectors-loc]
+[--prune-vectors]
+```
+
+| Argument                                                | Type       | Description                                                                                                                                                                                                                                            |
+| ------------------------------------------------------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `lang`                                                  | positional | Model language [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes), e.g. `en`.                                                                                                                                                           |
+| `output_dir`                                            | positional | Model output directory. Will be created if it doesn't exist.                                                                                                                                                                                           |
+| `--jsonl-loc`, `-j`                                     | option     | Optional location of JSONL-formatted [vocabulary file](/api/data-formats#vocab-jsonl) with lexical attributes.                                                                                                                                         |
+| `--vectors-loc`, `-v`                                   | option     | Optional location of vectors. Should be a file where the first row contains the dimensions of the vectors, followed by a space-separated Word2Vec table. File can be provided in `.txt` format or as a zipped text file in `.zip` or `.tar.gz` format. |
+| `--truncate-vectors`, `-t` <Tag variant="new">2.3</Tag> | option     | Number of vectors to truncate to when reading in vectors file. Defaults to `0` for no truncation.                                                                                                                                                      |
+| `--prune-vectors`, `-V`                                 | option     | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning.                                                                                                                                                                         |
+| `--vectors-name`, `-vn`                                 | option     | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`.                                                                                                                                                                  |
+| **CREATES**                                             | model      | A spaCy model containing the vocab and vectors.                                                                                                                                                                                                        |
+
 ## Convert {#convert}
 
 Convert files into spaCy's
@@ -619,32 +693,6 @@ tokenization can be provided.
 {"tokens": ["If", "tokens", "are", "provided", "then", "we", "can", "skip", "the", "raw", "input", "text"]}
 ```
 
-## Init Model {#init-model new="2"}
-
-Create a new model directory from raw data, like word frequencies, Brown
-clusters and word vectors. This command is similar to the `spacy model` command
-in v1.x. Note that in order to populate the model's vocab, you need to pass in a
-JSONL-formatted [vocabulary file](/api/data-formats#vocab-jsonl) as
-`--jsonl-loc` with optional `id` values that correspond to the vectors table.
-Just loading in vectors will not automatically populate the vocab.
-
-```bash
-$ python -m spacy init-model [lang] [output_dir] [--jsonl-loc] [--vectors-loc]
-[--prune-vectors]
-```
-
-| Argument                                                    | Type       | Description                                                                                                                                                                                                                                            |
-| ----------------------------------------------------------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `lang`                                                      | positional | Model language [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes), e.g. `en`.                                                                                                                                                           |
-| `output_dir`                                                | positional | Model output directory. Will be created if it doesn't exist.                                                                                                                                                                                           |
-| `--jsonl-loc`, `-j`                                         | option     | Optional location of JSONL-formatted [vocabulary file](/api/data-formats#vocab-jsonl) with lexical attributes.                                                                                                                                         |
-| `--vectors-loc`, `-v`                                       | option     | Optional location of vectors. Should be a file where the first row contains the dimensions of the vectors, followed by a space-separated Word2Vec table. File can be provided in `.txt` format or as a zipped text file in `.zip` or `.tar.gz` format. |
-| `--truncate-vectors`, `-t` <Tag variant="new">2.3</Tag>     | option     | Number of vectors to truncate to when reading in vectors file. Defaults to `0` for no truncation.                                                                                                                                                      |
-| `--prune-vectors`, `-V`                                     | option     | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning.                                                                                                                                                                         |
-| `--vectors-name`, `-vn`                                     | option     | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`.                                                                                                                                                                  |
-| `--omit-extra-lookups`, `-OEL` <Tag variant="new">2.3</Tag> | flag       | Do not include any of the extra lookups tables (`cluster`/`prob`/`sentiment`) from `spacy-lookups-data` in the model.                                                                                                                                  |
-| **CREATES**                                                 | model      | A spaCy model containing the vocab and vectors.                                                                                                                                                                                                        |
-
 ## Evaluate {#evaluate new="2"}
 
 <!-- TODO: document new evaluate command -->
diff --git a/website/docs/api/corpus.md b/website/docs/api/corpus.md
index 38e19129d..5f639d050 100644
--- a/website/docs/api/corpus.md
+++ b/website/docs/api/corpus.md
@@ -6,30 +6,44 @@ source: spacy/gold/corpus.py
 new: 3
 ---
 
-This class manages annotated corpora and can read training and development
-datasets in the [DocBin](/api/docbin) (`.spacy`) format.
+This class manages annotated corpora and can be used for training and
+development datasets in the [DocBin](/api/docbin) (`.spacy`) format. To
+customize the data loading during training, you can register your own
+[data readers and batchers](/usage/training#custom-code-readers-batchers)
 
 ## Corpus.\_\_init\_\_ {#init tag="method"}
 
-Create a `Corpus`. The input data can be a file or a directory of files.
+Create a `Corpus` for iterating [Example](/api/example) objects from a file or
+directory of [`.spacy` data files](/api/data-formats#binary-training). The
+`gold_preproc` setting lets you specify whether to set up the `Example` object
+with gold-standard sentences and tokens for the predictions. Gold preprocessing
+helps the annotations align to the tokenization, and may result in sequences of
+more consistent length. However, it may reduce runtime accuracy due to
+train/test skew.
 
 > #### Example
 >
 > ```python
 > from spacy.gold import Corpus
 >
-> corpus = Corpus("./train.spacy", "./dev.spacy")
+> # With a single file
+> corpus = Corpus("./data/train.spacy")
+>
+> # With a directory
+> corpus = Corpus("./data", limit=10)
 > ```
 
-| Name    | Type         | Description                                                      |
-| ------- | ------------ | ---------------------------------------------------------------- |
-| `train` | str / `Path` | Training data (`.spacy` file or directory of `.spacy` files).    |
-| `dev`   | str / `Path` | Development data (`.spacy` file or directory of `.spacy` files). |
-| `limit` | int          | Maximum number of examples returned. `0` for no limit (default). |
+| Name            | Type         | Description                                                                                                                                 |
+| --------------- | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------- |
+| `path`          | str / `Path` | The directory or filename to read from.                                                                                                     |
+| _keyword-only_  |              |                                                                                                                                             |
+|  `gold_preproc` | bool         | Whether to set up the Example object with gold-standard sentences and tokens for the predictions. Defaults to `False`.                      |
+| `max_length`    | int          | Maximum document length. Longer documents will be split into sentences, if sentence boundaries are available. Defaults to `0` for no limit. |
+| `limit`         | int          | Limit corpus to a subset of examples, e.g. for debugging. Defaults to `0` for no limit.                                                     |
 
-## Corpus.train_dataset {#train_dataset tag="method"}
+## Corpus.\_\_call\_\_ {#call tag="method"}
 
-Yield examples from the training data.
+Yield examples from the data.
 
 > #### Example
 >
@@ -37,60 +51,12 @@ Yield examples from the training data.
 > from spacy.gold import Corpus
 > import spacy
 >
-> corpus = Corpus("./train.spacy", "./dev.spacy")
+> corpus = Corpus("./train.spacy")
 > nlp = spacy.blank("en")
-> train_data = corpus.train_dataset(nlp)
+> train_data = corpus(nlp)
 > ```
 
-| Name           | Type       | Description                                                                                                                                |
-| -------------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
-| `nlp`          | `Language` | The current `nlp` object.                                                                                                                  |
-| _keyword-only_ |            |                                                                                                                                            |
-| `shuffle`      | bool       | Whether to shuffle the examples. Defaults to `True`.                                                                                       |
-| `gold_preproc` | bool       | Whether to train on gold-standard sentences and tokens. Defaults to `False`.                                                               |
-| `max_length`   | int        | Maximum document length. Longer documents will be split into sentences, if sentence boundaries are available. `0` for no limit (default).  |
-| **YIELDS**     | `Example`  | The examples.                                                                                                                              |
-
-## Corpus.dev_dataset {#dev_dataset tag="method"}
-
-Yield examples from the development data.
-
-> #### Example
->
-> ```python
-> from spacy.gold import Corpus
-> import spacy
->
-> corpus = Corpus("./train.spacy", "./dev.spacy")
-> nlp = spacy.blank("en")
-> dev_data = corpus.dev_dataset(nlp)
-> ```
-
-| Name           | Type       | Description                                                                  |
-| -------------- | ---------- | ---------------------------------------------------------------------------- |
-| `nlp`          | `Language` | The current `nlp` object.                                                    |
-| _keyword-only_ |            |                                                                              |
-| `gold_preproc` | bool       | Whether to train on gold-standard sentences and tokens. Defaults to `False`. |
-| **YIELDS**     | `Example`  | The examples.                                                                |
-
-## Corpus.count_train {#count_train tag="method"}
-
-Get the word count of all training examples.
-
-> #### Example
->
-> ```python
-> from spacy.gold import Corpus
-> import spacy
->
-> corpus = Corpus("./train.spacy", "./dev.spacy")
-> nlp = spacy.blank("en")
-> word_count = corpus.count_train(nlp)
-> ```
-
-| Name        | Type       | Description               |
-| ----------- | ---------- | ------------------------- |
-| `nlp`       | `Language` | The current `nlp` object. |
-| **RETURNS** | int        | The word count.           |
-
-<!-- TODO: document remaining methods? / decide which to document -->
+| Name       | Type       | Description               |
+| ---------- | ---------- | ------------------------- |
+| `nlp`      | `Language` | The current `nlp` object. |
+| **YIELDS** | `Example`  | The examples.             |
diff --git a/website/docs/api/dependencyparser.md b/website/docs/api/dependencyparser.md
index a18e9e582..e56e85e64 100644
--- a/website/docs/api/dependencyparser.md
+++ b/website/docs/api/dependencyparser.md
@@ -29,9 +29,11 @@ architectures and their arguments and hyperparameters.
 > nlp.add_pipe("parser", config=config)
 > ```
 
+<!-- TODO: finish API docs -->
+
 | Setting | Type                                       | Description       | Default                                                           |
 | ------- | ------------------------------------------ | ----------------- | ----------------------------------------------------------------- |
-| `moves` | list                                       | <!-- TODO: -->    | `None`                                                            |
+| `moves` | list                                       |                   | `None`                                                            |
 | `model` | [`Model`](https://thinc.ai/docs/api-model) | The model to use. | [TransitionBasedParser](/api/architectures#TransitionBasedParser) |
 
 ```python
@@ -59,17 +61,19 @@ Create a new pipeline instance. In your application, you would normally use a
 shortcut for this and instantiate the component using its string name and
 [`nlp.add_pipe`](/api/language#add_pipe).
 
+<!-- TODO: finish API docs -->
+
 | Name                          | Type                                       | Description                                                                                 |
 | ----------------------------- | ------------------------------------------ | ------------------------------------------------------------------------------------------- |
 | `vocab`                       | `Vocab`                                    | The shared vocabulary.                                                                      |
 | `model`                       | [`Model`](https://thinc.ai/docs/api-model) | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component.             |
 | `name`                        | str                                        | String name of the component instance. Used to add entries to the `losses` during training. |
-| `moves`                       | list                                       | <!-- TODO: -->                                                                              |
+| `moves`                       | list                                       |                                                                                             |
 | _keyword-only_                |                                            |                                                                                             |
-| `update_with_oracle_cut_size` | int                                        | <!-- TODO: -->                                                                              |
-| `multitasks`                  | `Iterable`                                 | <!-- TODO: -->                                                                              |
-| `learn_tokens`                | bool                                       | <!-- TODO: -->                                                                              |
-| `min_action_freq`             | int                                        | <!-- TODO: -->                                                                              |
+| `update_with_oracle_cut_size` | int                                        |                                                                                             |
+| `multitasks`                  | `Iterable`                                 |                                                                                             |
+| `learn_tokens`                | bool                                       |                                                                                             |
+| `min_action_freq`             | int                                        |                                                                                             |
 
 ## DependencyParser.\_\_call\_\_ {#call tag="method"}
 
diff --git a/website/docs/api/entitylinker.md b/website/docs/api/entitylinker.md
index 06b4ade60..18d9c5edd 100644
--- a/website/docs/api/entitylinker.md
+++ b/website/docs/api/entitylinker.md
@@ -65,6 +65,8 @@ Create a new pipeline instance. In your application, you would normally use a
 shortcut for this and instantiate the component using its string name and
 [`nlp.add_pipe`](/api/language#add_pipe).
 
+<!-- TODO: finish API docs -->
+
 | Name             | Type            | Description                                                                                 |
 | ---------------- | --------------- | ------------------------------------------------------------------------------------------- |
 | `vocab`          | `Vocab`         | The shared vocabulary.                                                                      |
@@ -126,7 +128,7 @@ applied to the `Doc` in order. Both [`__call__`](/api/entitylinker#call) and
 ## EntityLinker.begin_training {#begin_training tag="method"}
 
 Initialize the pipe for training, using data examples if available. Returns an
-[`Optimizer`](https://thinc.ai/docs/api-optimizers) object. 
+[`Optimizer`](https://thinc.ai/docs/api-optimizers) object.
 
 > #### Example
 >
diff --git a/website/docs/api/entityrecognizer.md b/website/docs/api/entityrecognizer.md
index b5b549a04..0ab17f953 100644
--- a/website/docs/api/entityrecognizer.md
+++ b/website/docs/api/entityrecognizer.md
@@ -29,9 +29,11 @@ architectures and their arguments and hyperparameters.
 > nlp.add_pipe("ner", config=config)
 > ```
 
+<!-- TODO: finish API docs -->
+
 | Setting | Type                                       | Description       | Default                                                           |
 | ------- | ------------------------------------------ | ----------------- | ----------------------------------------------------------------- |
-| `moves` | list                                       | <!-- TODO: -->    | `None`                                                            |
+| `moves` | list                                       |                   | `None`                                                            |
 | `model` | [`Model`](https://thinc.ai/docs/api-model) | The model to use. | [TransitionBasedParser](/api/architectures#TransitionBasedParser) |
 
 ```python
@@ -59,17 +61,19 @@ Create a new pipeline instance. In your application, you would normally use a
 shortcut for this and instantiate the component using its string name and
 [`nlp.add_pipe`](/api/language#add_pipe).
 
+<!-- TODO: finish API docs -->
+
 | Name                          | Type                                       | Description                                                                                 |
 | ----------------------------- | ------------------------------------------ | ------------------------------------------------------------------------------------------- |
 | `vocab`                       | `Vocab`                                    | The shared vocabulary.                                                                      |
 | `model`                       | [`Model`](https://thinc.ai/docs/api-model) | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component.             |
 | `name`                        | str                                        | String name of the component instance. Used to add entries to the `losses` during training. |
-| `moves`                       | list                                       | <!-- TODO: -->                                                                              |
+| `moves`                       | list                                       |                                                                                             |
 | _keyword-only_                |                                            |                                                                                             |
-| `update_with_oracle_cut_size` | int                                        | <!-- TODO: -->                                                                              |
-| `multitasks`                  | `Iterable`                                 | <!-- TODO: -->                                                                              |
-| `learn_tokens`                | bool                                       | <!-- TODO: -->                                                                              |
-| `min_action_freq`             | int                                        | <!-- TODO: -->                                                                              |
+| `update_with_oracle_cut_size` | int                                        |                                                                                             |
+| `multitasks`                  | `Iterable`                                 |                                                                                             |
+| `learn_tokens`                | bool                                       |                                                                                             |
+| `min_action_freq`             | int                                        |                                                                                             |
 
 ## EntityRecognizer.\_\_call\_\_ {#call tag="method"}
 
diff --git a/website/docs/api/example.md b/website/docs/api/example.md
index d3f61c7e2..8c117aec7 100644
--- a/website/docs/api/example.md
+++ b/website/docs/api/example.md
@@ -289,7 +289,6 @@ Calculate alignment tables between two tokenizations.
 | `x2y` | [`Ragged`](https://thinc.ai/docs/api-types#ragged) | The `Ragged` object holding the alignment from `x` to `y`. |
 | `y2x` | [`Ragged`](https://thinc.ai/docs/api-types#ragged) | The `Ragged` object holding the alignment from `y` to `x`. |
 
-
 <Infobox title="Important note" variant="warning">
 
 The current implementation of the alignment algorithm assumes that both
@@ -310,8 +309,9 @@ tokenizations add up to the same string. For example, you'll be able to align
 > a2b = alignment.x2y
 > assert list(a2b.dataXd) == [0, 1, 1, 2]
 > ```
-> 
-> If `a2b.dataXd[1] == a2b.dataXd[2] == 1`, that means that `A[1]` (`"'"`) and `A[2]` (`"s"`) both align to `B[1]` (`"'s"`). 
+>
+> If `a2b.dataXd[1] == a2b.dataXd[2] == 1`, that means that `A[1]` (`"'"`) and
+> `A[2]` (`"s"`) both align to `B[1]` (`"'s"`).
 
 ### Alignment.from_strings {#classmethod tag="function"}
 
@@ -320,4 +320,3 @@ tokenizations add up to the same string. For example, you'll be able to align
 | `A`         | list        | String values of candidate tokens to align.     |
 | `B`         | list        | String values of reference tokens to align.     |
 | **RETURNS** | `Alignment` | An `Alignment` object describing the alignment. |
-
diff --git a/website/docs/api/language.md b/website/docs/api/language.md
index 7e25106d1..7464a029e 100644
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@@ -98,10 +98,10 @@ decorator. For more details and examples, see the
 | ----------------------- | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `name`                  | str                  | The name of the component factory.                                                                                                                                                                                          |
 | _keyword-only_          |                      |                                                                                                                                                                                                                             |
-| `assigns`               | `Iterable[str]`      | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something -->                                                                               |
-| `requires`              | `Iterable[str]`      | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something -->                                                                               |
-| `retokenizes`           | bool                 | Whether the component changes tokenization. Used for pipeline analysis. <!-- TODO: link to something -->                                                                                                                    |
-| `scores`                | `Iterable[str]`      | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`.                                                                                                                                  |
+| `assigns`               | `Iterable[str]`      | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis)..                                                                           |
+| `requires`              | `Iterable[str]`      | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis).                                                                            |
+| `retokenizes`           | bool                 | Whether the component changes tokenization. Used for [pipe analysis](/usage/processing-pipelines#analysis).                                                                                                                 |
+| `scores`                | `Iterable[str]`      | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis).                                                                  |
 | `default_score_weights` | `Dict[str, float]`   | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. |
 | `func`                  | `Optional[Callable]` | Optional function if not used a a decorator.                                                                                                                                                                                |
 
@@ -146,10 +146,10 @@ examples, see the
 | `name`                  | str                  | The name of the component factory.                                                                                                                                                                                          |
 | _keyword-only_          |                      |                                                                                                                                                                                                                             |
 | `default_config`        | `Dict[str, any]`     | The default config, describing the default values of the factory arguments.                                                                                                                                                 |
-| `assigns`               | `Iterable[str]`      | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something -->                                                                               |
-| `requires`              | `Iterable[str]`      | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something -->                                                                               |
-| `retokenizes`           | bool                 | Whether the component changes tokenization. Used for pipeline analysis. <!-- TODO: link to something -->                                                                                                                    |
-| `scores`                | `Iterable[str]`      | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`.                                                                                                                                  |
+| `assigns`               | `Iterable[str]`      | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis).                                                                            |
+| `requires`              | `Iterable[str]`      | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis).                                                                            |
+| `retokenizes`           | bool                 | Whether the component changes tokenization. Used for [pipe analysis](/usage/processing-pipelines#analysis).                                                                                                                 |
+| `scores`                | `Iterable[str]`      | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis).                                                                  |
 | `default_score_weights` | `Dict[str, float]`   | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. |
 | `func`                  | `Optional[Callable]` | Optional function if not used a a decorator.                                                                                                                                                                                |
 
@@ -302,6 +302,7 @@ Evaluate a model's pipeline components.
 | `batch_size`    | int                             | The batch size to use.                                                                                 |
 | `scorer`        | `Scorer`                        | Optional [`Scorer`](/api/scorer) to use. If not passed in, a new one will be created.                  |
 | `component_cfg` | `Dict[str, dict]`               | Optional dictionary of keyword arguments for components, keyed by component names. Defaults to `None`. |
+| `scorer_cfg`    | `Dict[str, Any]`                | Optional dictionary of keyword arguments for the `Scorer`. Defaults to `None`.                         |
 | **RETURNS**     | `Dict[str, Union[float, dict]]` | A dictionary of evaluation scores.                                                                     |
 
 ## Language.use_params {#use_params tag="contextmanager, method"}
@@ -362,7 +363,7 @@ that take a `Doc` object, modify it and return it. Only one of `before`,
 <Infobox title="Changed in v3.0" variant="warning">
 
 As of v3.0, the [`Language.add_pipe`](/api/language#add_pipe) method doesn't
-take callables anymore and instead expects the name of a component factory
+take callables anymore and instead expects the **name of a component factory**
 registered using [`@Language.component`](/api/language#component) or
 [`@Language.factory`](/api/language#factory). It now takes care of creating the
 component, adds it to the pipeline and returns it.
@@ -378,20 +379,25 @@ component, adds it to the pipeline and returns it.
 >
 > nlp.add_pipe("component", before="ner")
 > component = nlp.add_pipe("component", name="custom_name", last=True)
+>
+> # Add component from source model
+> source_nlp = spacy.load("en_core_web_sm")
+> nlp.add_pipe("ner", source=source_nlp)
 > ```
 
-| Name                                   | Type             | Description                                                                                                                                               |
-| -------------------------------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `factory_name`                         | str              | Name of the registered component factory.                                                                                                                 |
-| `name`                                 | str              | Optional unique name of pipeline component instance. If not set, the factory name is used. An error is raised if the name already exists in the pipeline. |
-| _keyword-only_                         |                  |                                                                                                                                                           |
-| `before`                               | str / int        | Component name or index to insert component directly before.                                                                                              |
-| `after`                                | str / int        | Component name or index to insert component directly after:                                                                                               |
-| `first`                                | bool             | Insert component first / not first in the pipeline.                                                                                                       |
-| `last`                                 | bool             | Insert component last / not last in the pipeline.                                                                                                         |
-| `config` <Tag variant="new">3</Tag>    | `Dict[str, Any]` | Optional config parameters to use for this component. Will be merged with the `default_config` specified by the component factory.                        |
-| `validate` <Tag variant="new">3</Tag>  | bool             | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`.                                     |
-| **RETURNS** <Tag variant="new">3</Tag> | callable         | The pipeline component.                                                                                                                                   |
+| Name                                   | Type             | Description                                                                                                                                                                                                                                              |
+| -------------------------------------- | ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `factory_name`                         | str              | Name of the registered component factory.                                                                                                                                                                                                                |
+| `name`                                 | str              | Optional unique name of pipeline component instance. If not set, the factory name is used. An error is raised if the name already exists in the pipeline.                                                                                                |
+| _keyword-only_                         |                  |                                                                                                                                                                                                                                                          |
+| `before`                               | str / int        | Component name or index to insert component directly before.                                                                                                                                                                                             |
+| `after`                                | str / int        | Component name or index to insert component directly after:                                                                                                                                                                                              |
+| `first`                                | bool             | Insert component first / not first in the pipeline.                                                                                                                                                                                                      |
+| `last`                                 | bool             | Insert component last / not last in the pipeline.                                                                                                                                                                                                        |
+| `config` <Tag variant="new">3</Tag>    | `Dict[str, Any]` | Optional config parameters to use for this component. Will be merged with the `default_config` specified by the component factory.                                                                                                                       |
+| `source` <Tag variant="new">3</Tag>    | `Language`       | Optional source model to copy component from. If a source is provided, the `factory_name` is interpreted as the name of the component in the source pipeline. Make sure that the vocab, vectors and settings of the source model match the target model. |
+| `validate` <Tag variant="new">3</Tag>  | bool             | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`.                                                                                                                                    |
+| **RETURNS** <Tag variant="new">3</Tag> | callable         | The pipeline component.                                                                                                                                                                                                                                  |
 
 ## Language.has_factory {#has_factory tag="classmethod" new="3"}
 
@@ -597,6 +603,97 @@ contains the information about the component and its default provided by the
 | `name`      | str                           | The pipeline component name. |
 | **RETURNS** | [`FactoryMeta`](#factorymeta) |  The factory meta.           |
 
+## Language.analyze_pipes {#analyze_pipes tag="method" new="3"}
+
+Analyze the current pipeline components and show a summary of the attributes
+they assign and require, and the scores they set. The data is based on the
+information provided in the [`@Language.component`](/api/language#component) and
+[`@Language.factory`](/api/language#factory) decorator. If requirements aren't
+met, e.g. if a component specifies a required property that is not set by a
+previous component, a warning is shown.
+
+<Infobox variant="warning" title="Important note">
+
+The pipeline analysis is static and does **not actually run the components**.
+This means that it relies on the information provided by the components
+themselves. If a custom component declares that it assigns an attribute but it
+doesn't, the pipeline analysis won't catch that.
+
+</Infobox>
+
+> #### Example
+>
+> ```python
+> nlp = spacy.blank("en")
+> nlp.add_pipe("tagger")
+> nlp.add_pipe("entity_linker")
+> analysis = nlp.analyze_pipes()
+> ```
+
+<Accordion title="Example output" spaced>
+
+```json
+### Structured
+{
+  "summary": {
+    "tagger": {
+      "assigns": ["token.tag"],
+      "requires": [],
+      "scores": ["tag_acc", "pos_acc", "lemma_acc"],
+      "retokenizes": false
+    },
+    "entity_linker": {
+      "assigns": ["token.ent_kb_id"],
+      "requires": ["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"],
+      "scores": [],
+      "retokenizes": false
+    }
+  },
+  "problems": {
+    "tagger": [],
+    "entity_linker": ["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"]
+  },
+  "attrs": {
+    "token.ent_iob": { "assigns": [], "requires": ["entity_linker"] },
+    "doc.ents": { "assigns": [], "requires": ["entity_linker"] },
+    "token.ent_kb_id": { "assigns": ["entity_linker"], "requires": [] },
+    "doc.sents": { "assigns": [], "requires": ["entity_linker"] },
+    "token.tag": { "assigns": ["tagger"], "requires": [] },
+    "token.ent_type": { "assigns": [], "requires": ["entity_linker"] }
+  }
+}
+```
+
+```
+### Pretty
+============================= Pipeline Overview =============================
+
+#   Component       Assigns           Requires         Scores      Retokenizes
+-   -------------   ---------------   --------------   ---------   -----------
+0   tagger          token.tag                          tag_acc     False
+                                                       pos_acc
+                                                       lemma_acc
+
+1   entity_linker   token.ent_kb_id   doc.ents                     False
+                                      doc.sents
+                                      token.ent_iob
+                                      token.ent_type
+
+
+================================ Problems (4) ================================
+⚠ 'entity_linker' requirements not met: doc.ents, doc.sents,
+token.ent_iob, token.ent_type
+```
+
+</Accordion>
+
+| Name           | Type        | Description                                                                                                                                                                                                    |
+| -------------- | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| _keyword-only_ |             |                                                                                                                                                                                                                |
+| `keys`         | `List[str]` | The values to display in the table. Corresponds to attributes of the [`FactoryMeta`](/api/language#factorymeta). Defaults to `["assigns", "requires", "scores", "retokenizes"]`.                               |
+| `pretty`       | bool        | Pretty-print the results as a table. Defaults to `False`.                                                                                                                                                      |
+| **RETURNS**    | dict        | Dictionary containing the pipe analysis, keyed by `"summary"` (component meta by pipe), `"problems"` (attribute names by pipe) and `"attrs"` (pipes that assign and require an attribute, keyed by attribute). |
+
 ## Language.meta {#meta tag="property"}
 
 Custom meta data for the Language class. If a model is loaded, contains meta
@@ -832,8 +929,8 @@ instance and factory instance.
 | ----------------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `factory`               | str                | The name of the registered component factory.                                                                                                                                                                               |
 | `default_config`        | `Dict[str, Any]`   | The default config, describing the default values of the factory arguments.                                                                                                                                                 |
-| `assigns`               | `Iterable[str]`    | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something -->                                                                               |
-| `requires`              | `Iterable[str]`    | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something -->                                                                               |
-| `retokenizes`           | bool               | Whether the component changes tokenization. Used for pipeline analysis. <!-- TODO: link to something -->                                                                                                                    |
-| `scores`                | `Iterable[str]`    | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`.                                                                                                                                  |
+| `assigns`               | `Iterable[str]`    | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis).                                                                            |
+| `requires`              | `Iterable[str]`    | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis).                                                                            |
+| `retokenizes`           | bool               | Whether the component changes tokenization. Used for [pipe analysis](/usage/processing-pipelines#analysis).                                                                                                                 |
+| `scores`                | `Iterable[str]`    | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis).                                                                  |
 | `default_score_weights` | `Dict[str, float]` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. |
diff --git a/website/docs/api/morphologizer.md b/website/docs/api/morphologizer.md
index ac7146543..bfe5c3c77 100644
--- a/website/docs/api/morphologizer.md
+++ b/website/docs/api/morphologizer.md
@@ -63,14 +63,16 @@ Create a new pipeline instance. In your application, you would normally use a
 shortcut for this and instantiate the component using its string name and
 [`nlp.add_pipe`](/api/language#add_pipe).
 
+<!-- TODO: finish API docs -->
+
 | Name           | Type    | Description                                                                                 |
 | -------------- | ------- | ------------------------------------------------------------------------------------------- |
 | `vocab`        | `Vocab` | The shared vocabulary.                                                                      |
 | `model`        | `Model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component.             |
 | `name`         | str     | String name of the component instance. Used to add entries to the `losses` during training. |
 | _keyword-only_ |         |                                                                                             |
-| `labels_morph` | dict    | <!-- TODO: -->                                                                              |
-| `labels_pos`   | dict    | <!-- TODO: -->                                                                              |
+| `labels_morph` | dict    |                                                                                             |
+| `labels_pos`   | dict    |                                                                                             |
 
 ## Morphologizer.\_\_call\_\_ {#call tag="method"}
 
diff --git a/website/docs/api/scorer.md b/website/docs/api/scorer.md
index f50a13099..2f37843a0 100644
--- a/website/docs/api/scorer.md
+++ b/website/docs/api/scorer.md
@@ -6,10 +6,9 @@ source: spacy/scorer.py
 ---
 
 The `Scorer` computes evaluation scores. It's typically created by
-[`Language.evaluate`](/api/language#evaluate).
-
-In addition, the `Scorer` provides a number of evaluation methods for evaluating
-`Token` and `Doc` attributes.
+[`Language.evaluate`](/api/language#evaluate). In addition, the `Scorer`
+provides a number of evaluation methods for evaluating [`Token`](/api/token) and
+[`Doc`](/api/doc) attributes.
 
 ## Scorer.\_\_init\_\_ {#init tag="method"}
 
@@ -20,10 +19,10 @@ Create a new `Scorer`.
 > ```python
 > from spacy.scorer import Scorer
 >
-> # default scoring pipeline
+> # Default scoring pipeline
 > scorer = Scorer()
 >
-> # provided scoring pipeline
+> # Provided scoring pipeline
 > nlp = spacy.load("en_core_web_sm")
 > scorer = Scorer(nlp)
 > ```
@@ -40,16 +39,20 @@ scoring methods provided by the components in the pipeline.
 The returned `Dict` contains the scores provided by the individual pipeline
 components. For the scoring methods provided by the `Scorer` and use by the core
 pipeline components, the individual score names start with the `Token` or `Doc`
-attribute being scored: `token_acc`, `token_p/r/f`, `sents_p/r/f`, `tag_acc`,
-`pos_acc`, `morph_acc`, `morph_per_feat`, `lemma_acc`, `dep_uas`, `dep_las`,
-`dep_las_per_type`, `ents_p/r/f`, `ents_per_type`, `textcat_macro_auc`,
-`textcat_macro_f`.
+attribute being scored:
+
+- `token_acc`, `token_p`, `token_r`, `token_f`,
+- `sents_p`, `sents_r`, `sents_f`
+- `tag_acc`, `pos_acc`, `morph_acc`, `morph_per_feat`, `lemma_acc`
+- `dep_uas`, `dep_las`, `dep_las_per_type`
+- `ents_p`, `ents_r` `ents_f`, `ents_per_type`
+- `textcat_macro_auc`, `textcat_macro_f`
 
 > #### Example
 >
 > ```python
 > scorer = Scorer()
-> scorer.score(examples)
+> scores = scorer.score(examples)
 > ```
 
 | Name        | Type                | Description                                                                                   |
@@ -57,78 +60,148 @@ attribute being scored: `token_acc`, `token_p/r/f`, `sents_p/r/f`, `tag_acc`,
 | `examples`  | `Iterable[Example]` | The `Example` objects holding both the predictions and the correct gold-standard annotations. |
 | **RETURNS** | `Dict`              | A dictionary of scores.                                                                       |
 
-## Scorer.score_tokenization {#score_tokenization tag="staticmethod"}
+## Scorer.score_tokenization {#score_tokenization tag="staticmethod" new="3"}
 
 Scores the tokenization:
 
-- `token_acc`: # correct tokens / # gold tokens
-- `token_p/r/f`: PRF for token character spans
+- `token_acc`: number of correct tokens / number of gold tokens
+- `token_p`, `token_r`, `token_f`: precision, recall and F-score for token
+  character spans
+
+> #### Example
+>
+> ```python
+> scores = Scorer.score_tokenization(examples)
+> ```
 
 | Name        | Type                | Description                                                                                   |
 | ----------- | ------------------- | --------------------------------------------------------------------------------------------- |
 | `examples`  | `Iterable[Example]` | The `Example` objects holding both the predictions and the correct gold-standard annotations. |
-| **RETURNS** | `Dict`              | A dictionary containing the scores `token_acc/p/r/f`.                                         |
+| **RETURNS** | `Dict`              | A dictionary containing the scores `token_acc`, `token_p`, `token_r`, `token_f`.              |
 
-## Scorer.score_token_attr {#score_token_attr tag="staticmethod"}
+## Scorer.score_token_attr {#score_token_attr tag="staticmethod" new="3"}
 
 Scores a single token attribute.
 
-| Name        | Type                | Description                                                                                                                   |
-| ----------- | ------------------- | ----------------------------------------------------------------------------------------------------------------------------- |
-| `examples`  | `Iterable[Example]` | The `Example` objects holding both the predictions and the correct gold-standard annotations.                                 |
-| `attr`      | `str`               | The attribute to score.                                                                                                       |
-| `getter`    | `callable`          | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. |
-| **RETURNS** | `Dict`              | A dictionary containing the score `attr_acc`.                                                                                 |
+> #### Example
+>
+> ```python
+> scores = Scorer.score_token_attr(examples, "pos")
+> print(scores["pos_acc"])
+> ```
 
-## Scorer.score_token_attr_per_feat {#score_token_attr_per_feat tag="staticmethod"}
+| Name           | Type                | Description                                                                                                                   |
+| -------------- | ------------------- | ----------------------------------------------------------------------------------------------------------------------------- |
+| `examples`     | `Iterable[Example]` | The `Example` objects holding both the predictions and the correct gold-standard annotations.                                 |
+| `attr`         | `str`               | The attribute to score.                                                                                                       |
+| _keyword-only_ |                     |                                                                                                                               |
+| `getter`       | `Callable`          | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. |
+| **RETURNS**    | `Dict[str, float]`  | A dictionary containing the score `{attr}_acc`.                                                                               |
 
-Scores a single token attribute per feature for a token attribute in UFEATS
+## Scorer.score_token_attr_per_feat {#score_token_attr_per_feat tag="staticmethod" new="3"}
+
+Scores a single token attribute per feature for a token attribute in
+[UFEATS](https://universaldependencies.org/format.html#morphological-annotation)
 format.
 
-| Name        | Type                | Description                                                                                                                   |
-| ----------- | ------------------- | ----------------------------------------------------------------------------------------------------------------------------- |
-| `examples`  | `Iterable[Example]` | The `Example` objects holding both the predictions and the correct gold-standard annotations.                                 |
-| `attr`      | `str`               | The attribute to score.                                                                                                       |
-| `getter`    | `callable`          | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. |
-| **RETURNS** | `Dict`              | A dictionary containing the per-feature PRF scores unders the key `attr_per_feat`.                                            |
+> #### Example
+>
+> ```python
+> scores = Scorer.score_token_attr_per_feat(examples, "morph")
+> print(scores["morph_per_feat"])
+> ```
 
-## Scorer.score_spans {#score_spans tag="staticmethod"}
+| Name           | Type                | Description                                                                                                                   |
+| -------------- | ------------------- | ----------------------------------------------------------------------------------------------------------------------------- |
+| `examples`     | `Iterable[Example]` | The `Example` objects holding both the predictions and the correct gold-standard annotations.                                 |
+| `attr`         | `str`               | The attribute to score.                                                                                                       |
+| _keyword-only_ |                     |                                                                                                                               |
+| `getter`       | `Callable`          | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. |
+| **RETURNS**    | `Dict`              | A dictionary containing the per-feature PRF scores under the key `{attr}_per_feat`.                                           |
+
+## Scorer.score_spans {#score_spans tag="staticmethod" new="3"}
 
 Returns PRF scores for labeled or unlabeled spans.
 
-| Name        | Type                | Description                                                                                                           |
-| ----------- | ------------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `examples`  | `Iterable[Example]` | The `Example` objects holding both the predictions and the correct gold-standard annotations.                         |
-| `attr`      | `str`               | The attribute to score.                                                                                               |
-| `getter`    | `callable`          | Defaults to `getattr`. If provided, `getter(doc, attr)` should return the `Span` objects for an individual `Doc`.     |
-| **RETURNS** | `Dict`              | A dictionary containing the PRF scores under the keys `attr_p/r/f` and the per-type PRF scores under `attr_per_type`. |
+> #### Example
+>
+> ```python
+> scores = Scorer.score_spans(examples, "ents")
+> print(scores["ents_f"])
+> ```
 
-## Scorer.score_deps {#score_deps tag="staticmethod"}
+| Name           | Type                | Description                                                                                                                                   |
+| -------------- | ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
+| `examples`     | `Iterable[Example]` | The `Example` objects holding both the predictions and the correct gold-standard annotations.                                                 |
+| `attr`         | `str`               | The attribute to score.                                                                                                                       |
+| _keyword-only_ |                     |                                                                                                                                               |
+| `getter`       | `Callable`          | Defaults to `getattr`. If provided, `getter(doc, attr)` should return the `Span` objects for an individual `Doc`.                             |
+| **RETURNS**    | `Dict`              | A dictionary containing the PRF scores under the keys `{attr}_p`, `{attr}_r`, `{attr}_f` and the per-type PRF scores under `{attr}_per_type`. |
+
+## Scorer.score_deps {#score_deps tag="staticmethod" new="3"}
 
 Calculate the UAS, LAS, and LAS per type scores for dependency parses.
 
+> #### Example
+>
+> ```python
+> def dep_getter(token, attr):
+>     dep = getattr(token, attr)
+>     dep = token.vocab.strings.as_string(dep).lower()
+>     return dep
+>
+> scores = Scorer.score_deps(
+>     examples,
+>     "dep",
+>     getter=dep_getter,
+>     ignore_labels=("p", "punct")
+> )
+> print(scores["dep_uas"], scores["dep_las"])
+> ```
+
 | Name            | Type                | Description                                                                                                                   |
 | --------------- | ------------------- | ----------------------------------------------------------------------------------------------------------------------------- |
 | `examples`      | `Iterable[Example]` | The `Example` objects holding both the predictions and the correct gold-standard annotations.                                 |
 | `attr`          | `str`               | The attribute containing the dependency label.                                                                                |
-| `getter`        | `callable`          | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. |
+| _keyword-only_  |                     |                                                                                                                               |
+| `getter`        | `Callable`          | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. |
 | `head_attr`     | `str`               | The attribute containing the head token.                                                                                      |
 | `head_getter`   | `callable`          | Defaults to `getattr`. If provided, `head_getter(token, attr)` should return the head for an individual `Token`.              |
 | `ignore_labels` | `Tuple`             | Labels to ignore while scoring (e.g., `punct`).                                                                               |
-| **RETURNS**     | `Dict`              | A dictionary containing the scores: `attr_uas`, `attr_las`, and `attr_las_per_type`.                                          |
+| **RETURNS**     | `Dict`              | A dictionary containing the scores: `{attr}_uas`, `{attr}_las`, and `{attr}_las_per_type`.                                    |
 
-## Scorer.score_cats {#score_cats tag="staticmethod"}
+## Scorer.score_cats {#score_cats tag="staticmethod" new="3"}
 
 Calculate PRF and ROC AUC scores for a doc-level attribute that is a dict
 containing scores for each label like `Doc.cats`. The reported overall score
-depends on the scorer settings.
+depends on the scorer settings:
 
-| Name             | Type                | Description                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| ---------------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `examples`       | `Iterable[Example]` | The `Example` objects holding both the predictions and the correct gold-standard annotations.                                                                                                                                                                                                                                                                                                                                                     |
-| `attr`           | `str`               | The attribute to score.                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| `getter`         | `callable`          | Defaults to `getattr`. If provided, `getter(doc, attr)` should return the cats for an individual `Doc`.                                                                                                                                                                                                                                                                                                                                           |
-| labels           | `Iterable[str]`     | The set of possible labels. Defaults to `[]`.                                                                                                                                                                                                                                                                                                                                                                                                     |
-| `multi_label`    | `bool`              | Whether the attribute allows multiple labels. Defaults to `True`.                                                                                                                                                                                                                                                                                                                                                                                 |
-| `positive_label` | `str`               | The positive label for a binary task with exclusive classes. Defaults to `None`.                                                                                                                                                                                                                                                                                                                                                                  |
-| **RETURNS**      | `Dict`              | A dictionary containing the scores, with inapplicable scores as `None`: 1) for all: `attr_score` (one of `attr_f` / `attr_macro_f` / `attr_macro_auc`), `attr_score_desc` (text description of the overall score), `attr_f_per_type`, `attr_auc_per_type`; 2) for binary exclusive with positive label: `attr_p/r/f`; 3) for 3+ exclusive classes, macro-averaged fscore: `attr_macro_f`; 4) for multilabel, macro-averaged AUC: `attr_macro_auc` |
+1. **all:** `{attr}_score` (one of `{attr}_f` / `{attr}_macro_f` /
+   `{attr}_macro_auc`), `{attr}_score_desc` (text description of the overall
+   score), `{attr}_f_per_type`, `{attr}_auc_per_type`
+2. **binary exclusive with positive label:** `{attr}_p`, `{attr}_r`, `{attr}_f`
+3. **3+ exclusive classes**, macro-averaged F-score: `{attr}_macro_f`;
+4. **multilabel**, macro-averaged AUC: `{attr}_macro_auc`
+
+> #### Example
+>
+> ```python
+> labels = ["LABEL_A", "LABEL_B", "LABEL_C"]
+> scores = Scorer.score_cats(
+>     examples,
+>     "cats",
+>     labels=labels
+> )
+> print(scores["cats_macro_auc"])
+> ```
+
+| Name             | Type                | Description                                                                                             |
+| ---------------- | ------------------- | ------------------------------------------------------------------------------------------------------- |
+| `examples`       | `Iterable[Example]` | The `Example` objects holding both the predictions and the correct gold-standard annotations.           |
+| `attr`           | `str`               | The attribute to score.                                                                                 |
+| _keyword-only_   |                     |                                                                                                         |
+| `getter`         | `Callable`          | Defaults to `getattr`. If provided, `getter(doc, attr)` should return the cats for an individual `Doc`. |
+| labels           | `Iterable[str]`     | The set of possible labels. Defaults to `[]`.                                                           |
+| `multi_label`    | `bool`              | Whether the attribute allows multiple labels. Defaults to `True`.                                       |
+| `positive_label` | `str`               | The positive label for a binary task with exclusive classes. Defaults to `None`.                        |
+| **RETURNS**      | `Dict`              | A dictionary containing the scores, with inapplicable scores as `None`.                                 |
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 68158645d..0954fb577 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -4,6 +4,7 @@ menu:
   - ['spacy', 'spacy']
   - ['displacy', 'displacy']
   - ['registry', 'registry']
+  - ['Readers & Batchers', 'readers-batchers']
   - ['Data & Alignment', 'gold']
   - ['Utility Functions', 'util']
 ---
@@ -31,12 +32,13 @@ loaded in via [`Language.from_disk`](/api/language#from_disk).
 > nlp = spacy.load("en_core_web_sm", disable=["parser", "tagger"])
 > ```
 
-| Name                                       | Type              | Description                                                                       |
-| ------------------------------------------ | ----------------- | --------------------------------------------------------------------------------- |
-| `name`                                     | str / `Path`      | Model to load, i.e. package name or path.                                         |
-| `disable`                                  | `List[str]`       | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). |
-| `component_cfg` <Tag variant="new">3</Tag> | `Dict[str, dict]` | Optional config overrides for pipeline components, keyed by component names.      |
-| **RETURNS**                                | `Language`        | A `Language` object with the loaded model.                                        |
+| Name                                | Type                                                                   | Description                                                                                                                      |
+| ----------------------------------- | ---------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- |
+| `name`                              | str / `Path`                                                           | Model to load, i.e. package name or path.                                                                                        |
+| _keyword-only_                      |                                                                        |                                                                                                                                  |
+| `disable`                           | `List[str]`                                                            | Names of pipeline components to [disable](/usage/processing-pipelines#disabling).                                                |
+| `config` <Tag variant="new">3</Tag> | `Dict[str, Any]` / [`Config`](https://thinc.ai/docs/api-config#config) | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. |
+| **RETURNS**                         | `Language`                                                             | A `Language` object with the loaded model.                                                                                       |
 
 Essentially, `spacy.load()` is a convenience wrapper that reads the language ID
 and pipeline components from a model's `meta.json`, initializes the `Language`
@@ -83,11 +85,12 @@ meta data as a dictionary instead, you can use the `meta` attribute on your
 > markdown = spacy.info(markdown=True, silent=True)
 > ```
 
-| Name       | Type | Description                                      |
-| ---------- | ---- | ------------------------------------------------ |
-| `model`    | str  | A model, i.e. a package name or path (optional). |
-| `markdown` | bool | Print information as Markdown.                   |
-| `silent`   | bool | Don't print anything, just return.               |
+| Name           | Type | Description                                      |
+| -------------- | ---- | ------------------------------------------------ |
+| `model`        | str  | A model, i.e. a package name or path (optional). |
+| _keyword-only_ |      |                                                  |
+| `markdown`     | bool | Print information as Markdown.                   |
+| `silent`       | bool | Don't print anything, just return.               |
 
 ### spacy.explain {#spacy.explain tag="function"}
 
@@ -290,6 +293,8 @@ factories.
 >     return Model("custom", forward, dims={"nO": nO})
 > ```
 
+<!-- TODO: finish table -->
+
 | Registry name     | Description                                                                                                                                                                                                                                       |
 | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `architectures`   | Registry for functions that create [model architectures](/api/architectures). Can be used to register custom model architectures and reference them in the `config.cfg`.                                                                          |
@@ -297,7 +302,10 @@ factories.
 | `languages`       | Registry for language-specific `Language` subclasses. Automatically reads from [entry points](/usage/saving-loading#entry-points).                                                                                                                |
 | `lookups`         | Registry for large lookup tables available via `vocab.lookups`.                                                                                                                                                                                   |
 | `displacy_colors` | Registry for custom color scheme for the [`displacy` NER visualizer](/usage/visualizers). Automatically reads from [entry points](/usage/saving-loading#entry-points).                                                                            |
-| `assets`          | <!-- TODO: what is this used for again?-->                                                                                                                                                                                                        |
+| `assets`          |                                                                                                                                                                                                                                                   |
+| `callbacks`       | Registry for custom callbacks to [modify the `nlp` object](/usage/training#custom-code-nlp-callbacks) before training.                                                                                                                            |
+| `readers`         | Registry for training and evaluation [data readers](#readers-batchers).                                                                                                                                                                           |
+| `batchers`        | Registry for training and evaluation [data batchers](#readers-batchers).                                                                                                                                                                          |
 | `optimizers`      | Registry for functions that create [optimizers](https://thinc.ai/docs/api-optimizers).                                                                                                                                                            |
 | `schedules`       | Registry for functions that create [schedules](https://thinc.ai/docs/api-schedules).                                                                                                                                                              |
 | `layers`          | Registry for functions that create [layers](https://thinc.ai/docs/api-layers).                                                                                                                                                                    |
@@ -324,10 +332,117 @@ See the [`Transformer`](/api/transformer) API reference and
 >     return annotation_sette
 > ```
 
-| Registry name                                                | Description                                                                                                                                                                                                                                       |
-| ------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| [`span_getters`](/api/transformer#span_getters)              | Registry for functions that take a batch of `Doc` objects and return a list of `Span` objects to process by the transformer, e.g. sentences.                                                                                                      |
-| [`annotation_setters`](/api/transformers#annotation_setters) | Registry for functions that create annotation setters. Annotation setters are functions that take a batch of `Doc` objects and a [`FullTransformerBatch`](/api/transformer#fulltransformerbatch) and can set additional annotations on the `Doc`. |
+| Registry name                                               | Description                                                                                                                                                                                                                                       |
+| ----------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [`span_getters`](/api/transformer#span_getters)             | Registry for functions that take a batch of `Doc` objects and return a list of `Span` objects to process by the transformer, e.g. sentences.                                                                                                      |
+| [`annotation_setters`](/api/transformer#annotation_setters) | Registry for functions that create annotation setters. Annotation setters are functions that take a batch of `Doc` objects and a [`FullTransformerBatch`](/api/transformer#fulltransformerbatch) and can set additional annotations on the `Doc`. |
+
+## Data readers and batchers {#readers-batchers new="3"}
+
+<!-- TODO: -->
+
+### spacy.Corpus.v1 {#corpus tag="registered function" source="spacy/gold/corpus.py"}
+
+Registered function that creates a [`Corpus`](/api/corpus) of training or
+evaluation data. It takes the same arguments as the `Corpus` class and returns a
+callable that yields [`Example`](/api/example) objects. You can replace it with
+your own registered function in the [`@readers` registry](#regsitry) to
+customize the data loading and streaming.
+
+> #### Example config
+>
+> ```ini
+> [paths]
+> train = "corpus/train.spacy"
+>
+> [training.train_corpus]
+> @readers = "spacy.Corpus.v1"
+> path = ${paths:train}
+> gold_preproc = false
+> max_length = 0
+> limit = 0
+> ```
+
+| Name            | Type   | Description                                                                                                                                     |
+| --------------- | ------ | ----------------------------------------------------------------------------------------------------------------------------------------------- |
+| `path`          | `Path` | The directory or filename to read from. Expects data in spaCy's binary [`.spacy` format](/api/data-formats#binary-training).                    |
+|  `gold_preproc` | bool   | Whether to set up the Example object with gold-standard sentences and tokens for the predictions. See [`Corpus`](/api/corpus#init) for details. |
+| `max_length`    | int    | Maximum document length. Longer documents will be split into sentences, if sentence boundaries are available. Defaults to `0` for no limit.     |
+| `limit`         | int    | Limit corpus to a subset of examples, e.g. for debugging. Defaults to `0` for no limit.                                                         |
+
+### Batchers {#batchers source="spacy/gold/batchers.py"}
+
+<!-- TODO: -->
+
+#### batch_by_words.v1 {#batch_by_words tag="registered function"}
+
+Create minibatches of roughly a given number of words. If any examples are
+longer than the specified batch length, they will appear in a batch by
+themselves, or be discarded if `discard_oversize` is set to `True`. The argument
+`docs` can be a list of strings, [`Doc`](/api/doc) objects or
+[`Example`](/api/example) objects.
+
+> #### Example config
+>
+> ```ini
+> [training.batcher]
+> @batchers = "batch_by_words.v1"
+> size = 100
+> tolerance = 0.2
+> discard_oversize = false
+> get_length = null
+> ```
+
+<!-- TODO: complete table -->
+
+| Name               | Type                   | Description                                                                                                                         |
+| ------------------ | ---------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
+| `size`             | `Iterable[int]` / int  | The batch size. Can also be a block referencing a schedule, e.g. [`compounding`](https://thinc.ai/docs/api-schedules/#compounding). |
+| `tolerance`        | float                  |                                                                                                                                     |
+| `discard_oversize` | bool                   | Discard items that are longer than the specified batch length.                                                                      |
+| `get_length`       | `Callable[[Any], int]` | Optional function that receives a sequence and returns its length. Defaults to the built-in `len()` if not set.                     |
+
+#### batch_by_sequence.v1 {#batch_by_sequence tag="registered function"}
+
+<!-- TODO: -->
+
+> #### Example config
+>
+> ```ini
+> [training.batcher]
+> @batchers = "batch_by_sequence.v1"
+> size = 32
+> get_length = null
+> ```
+
+<!-- TODO: complete table -->
+
+| Name         | Type                   | Description                                                                                                                         |
+| ------------ | ---------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
+| `size`       | `Iterable[int]` / int  | The batch size. Can also be a block referencing a schedule, e.g. [`compounding`](https://thinc.ai/docs/api-schedules/#compounding). |
+| `get_length` | `Callable[[Any], int]` | Optional function that receives a sequence and returns its length. Defaults to the built-in `len()` if not set.                     |
+
+#### batch_by_padded.v1 {#batch_by_padded tag="registered function"}
+
+<!-- TODO: -->
+
+> #### Example config
+>
+> ```ini
+> [training.batcher]
+> @batchers = "batch_by_words.v1"
+> size = 100
+> buffer = TODO:
+> discard_oversize = false
+> get_length = null
+> ```
+
+| Name               | Type                   | Description                                                                                                                         |
+| ------------------ | ---------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
+| `size`             | `Iterable[int]` / int  | The batch size. Can also be a block referencing a schedule, e.g. [`compounding`](https://thinc.ai/docs/api-schedules/#compounding). |
+| `buffer`           | int                    |                                                                                                                                     |
+| `discard_oversize` | bool                   | Discard items that are longer than the specified batch length.                                                                      |
+| `get_length`       | `Callable[[Any], int]` | Optional function that receives a sequence and returns its length. Defaults to the built-in `len()` if not set.                     |
 
 ## Training data and alignment {#gold source="spacy/gold"}
 
diff --git a/website/docs/api/transformer.md b/website/docs/api/transformer.md
index 70128d225..6b6be6bd0 100644
--- a/website/docs/api/transformer.md
+++ b/website/docs/api/transformer.md
@@ -347,50 +347,52 @@ serialization by passing in the string names via the `exclude` argument.
 
 Transformer tokens and outputs for one `Doc` object.
 
-| Name      | Type                                               | Description                               |
-| --------- | -------------------------------------------------- | ----------------------------------------- |
-| `tokens`  | `Dict`                                             | <!-- TODO: -->                            |
-| `tensors` | `List[FloatsXd]`                                   | <!-- TODO: -->                            |
-| `align`   | [`Ragged`](https://thinc.ai/docs/api-types#ragged) | <!-- TODO: -->                            |
-| `width`   | int                                                | <!-- TODO: also mention it's property --> |
+<!-- TODO: finish API docs, also mention "width" is property -->
+
+| Name      | Type                                               | Description |
+| --------- | -------------------------------------------------- | ----------- |
+| `tokens`  | `Dict`                                             |             |
+| `tensors` | `List[FloatsXd]`                                   |             |
+| `align`   | [`Ragged`](https://thinc.ai/docs/api-types#ragged) |             |
+| `width`   | int                                                |             |
 
 ### TransformerData.empty {#transformerdata-emoty tag="classmethod"}
 
-<!-- TODO: -->
+<!-- TODO: finish API docs -->
 
-| Name        | Type              | Description    |
-| ----------- | ----------------- | -------------- |
-| **RETURNS** | `TransformerData` | <!-- TODO: --> |
+| Name        | Type              | Description |
+| ----------- | ----------------- | ----------- |
+| **RETURNS** | `TransformerData` |             |
 
 ## FullTransformerBatch {#fulltransformerbatch tag="dataclass"}
 
-<!-- TODO: -->
+<!-- TODO: write, also mention doc_data is property -->
 
-| Name       | Type                                                                                                                       | Description                               |
-| ---------- | -------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------- |
-| `spans`    | `List[List[Span]]`                                                                                                         | <!-- TODO: -->                            |
-| `tokens`   | [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) | <!-- TODO: -->                            |
-| `tensors`  | `List[torch.Tensor]`                                                                                                       | <!-- TODO: -->                            |
-| `align`    | [`Ragged`](https://thinc.ai/docs/api-types#ragged)                                                                         | <!-- TODO: -->                            |
-| `doc_data` | `List[TransformerData]`                                                                                                    | <!-- TODO: also mention it's property --> |
+| Name       | Type                                                                                                                       | Description |
+| ---------- | -------------------------------------------------------------------------------------------------------------------------- | ----------- |
+| `spans`    | `List[List[Span]]`                                                                                                         |             |
+| `tokens`   | [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) |             |
+| `tensors`  | `List[torch.Tensor]`                                                                                                       |             |
+| `align`    | [`Ragged`](https://thinc.ai/docs/api-types#ragged)                                                                         |             |
+| `doc_data` | `List[TransformerData]`                                                                                                    |             |
 
 ### FullTransformerBatch.unsplit_by_doc {#fulltransformerbatch-unsplit_by_doc tag="method"}
 
-<!-- TODO: -->
+<!-- TODO: write -->
 
-| Name        | Type                   | Description    |
-| ----------- | ---------------------- | -------------- |
-| `arrays`    | `List[List[Floats3d]]` | <!-- TODO: --> |
-| **RETURNS** | `FullTransformerBatch` | <!-- TODO: --> |
+| Name        | Type                   | Description |
+| ----------- | ---------------------- | ----------- |
+| `arrays`    | `List[List[Floats3d]]` |             |
+| **RETURNS** | `FullTransformerBatch` |             |
 
 ### FullTransformerBatch.split_by_doc {#fulltransformerbatch-split_by_doc tag="method"}
 
 Split a `TransformerData` object that represents a batch into a list with one
 `TransformerData` per `Doc`.
 
-| Name        | Type                    | Description    |
-| ----------- | ----------------------- | -------------- |
-| **RETURNS** | `List[TransformerData]` | <!-- TODO: --> |
+| Name        | Type                    | Description |
+| ----------- | ----------------------- | ----------- |
+| **RETURNS** | `List[TransformerData]` |             |
 
 ## Span getters {#span_getters tag="registered functions" source="github.com/explosion/spacy-transformers/blob/master/spacy_transformers/span_getters.py"}
 
@@ -421,11 +423,13 @@ getters using the `@registry.span_getters` decorator.
 
 The following built-in functions are available:
 
+<!-- TODO: finish API docs -->
+
 | Name               | Description                                                        |
 | ------------------ | ------------------------------------------------------------------ |
 | `doc_spans.v1`     | Create a span for each doc (no transformation, process each text). |
 | `sent_spans.v1`    | Create a span for each sentence if sentence boundaries are set.    |
-| `strided_spans.v1` | <!-- TODO: -->                                                     |
+| `strided_spans.v1` |                                                                    |
 
 ## Annotation setters {#annotation_setters tag="registered functions" source="github.com/explosion/spacy-transformers/blob/master/spacy_transformers/annotation_setters.py"}
 
diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md
index 56ade692a..7c47c0c73 100644
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@@ -231,10 +231,10 @@ available pipeline components and component functions.
 | `morphologizer` | [`Morphologizer`](/api/morphologizer)           | Assign morphological features and coarse-grained POS tags.                                |
 | `senter`        | [`SentenceRecognizer`](/api/sentencerecognizer) | Assign sentence boundaries.                                                               |
 | `sentencizer`   | [`Sentencizer`](/api/sentencizer)               | Add rule-based sentence segmentation without the dependency parse.                        |
-| `tok2vec`       | [`Tok2Vec`](/api/tok2vec)                       | <!-- TODO: -->                                                                            |
+| `tok2vec`       | [`Tok2Vec`](/api/tok2vec)                       |                                                                                           |
 | `transformer`   | [`Transformer`](/api/transformer)               | Assign the tokens and outputs of a transformer model.                                     |
 
-<!-- TODO: update with more components -->
+<!-- TODO: finish and update with more components -->
 
 <!-- TODO: explain default config and factories -->
 
@@ -311,6 +311,155 @@ nlp.rename_pipe("ner", "entityrecognizer")
 nlp.replace_pipe("tagger", my_custom_tagger)
 ```
 
+### Sourcing pipeline components from existing models {#sourced-components new="3"}
+
+Pipeline components that are independent can also be reused across models.
+Instead of adding a new blank component to a pipeline, you can also copy an
+existing component from a pretrained model by setting the `source` argument on
+[`nlp.add_pipe`](/api/language#add_pipe). The first argument will then be
+interpreted as the name of the component in the source pipeline – for instance,
+`"ner"`. This is especially useful for
+[training a model](/usage/training#config-components) because it lets you mix
+and match components and create fully custom model packages with updated
+pretrained components and new components trained on your data.
+
+<Infobox variant="warning" title="Important note for pretrained components">
+
+When reusing components across models, keep in mind that the **vocabulary**,
+**vectors** and model settings **must match**. If a pretrained model includes
+[word vectors](/usage/vectors-embeddings) and the component uses them as
+features, the model you copy it to needs to have the _same_ vectors available –
+otherwise, it won't be able to make the same predictions.
+
+</Infobox>
+
+> #### In training config
+>
+> Instead of providing a `factory`, component blocks in the training
+> [config](/usage/training#config) can also define a `source`. The string needs
+> to be a loadable spaCy model package or path. The
+>
+> ```ini
+> [components.ner]
+> source = "en_core_web_sm"
+> component = "ner"
+> ```
+>
+> By default, sourced components will be updated with your data during training.
+> If you want to preserve the component as-is, you can "freeze" it:
+>
+> ```ini
+> [training]
+> frozen_components = ["ner"]
+> ```
+
+```python
+### {executable="true"}
+import spacy
+
+# The source model with different components
+source_nlp = spacy.load("en_core_web_sm")
+print(source_nlp.pipe_names)
+
+# Add only the entity recognizer to the new blank model
+nlp = spacy.blank("en")
+nlp.add_pipe("ner", source=source_nlp)
+print(nlp.pipe_names)
+```
+
+### Analyzing pipeline components {#analysis new="3"}
+
+The [`nlp.analyze_pipes`](/api/language#analyze_pipes) method analyzes the
+components in the current pipeline and outputs information about them, like the
+attributes they set on the [`Doc`](/api/doc) and [`Token`](/api/token), whether
+they retokenize the `Doc` and which scores they produce during training. It will
+also show warnings if components require values that aren't set by previous
+component – for instance, if the entity linker is used but no component that
+runs before it sets named entities. Setting `pretty=True` will pretty-print a
+table instead of only returning the structured data.
+
+> #### ✏️ Things to try
+>
+> 1. Add the components `"ner"` and `"sentencizer"` _before_ the entity linker.
+>    The analysis should now show no problems, because requirements are met.
+
+```python
+### {executable="true"}
+import spacy
+
+nlp = spacy.blank("en")
+nlp.add_pipe("tagger")
+# This is a problem because it needs entities and sentence boundaries
+nlp.add_pipe("entity_linker")
+analysis = nlp.analyze_pipes(pretty=True)
+```
+
+<Accordion title="Example output">
+
+```json
+### Structured
+{
+  "summary": {
+    "tagger": {
+      "assigns": ["token.tag"],
+      "requires": [],
+      "scores": ["tag_acc", "pos_acc", "lemma_acc"],
+      "retokenizes": false
+    },
+    "entity_linker": {
+      "assigns": ["token.ent_kb_id"],
+      "requires": ["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"],
+      "scores": [],
+      "retokenizes": false
+    }
+  },
+  "problems": {
+    "tagger": [],
+    "entity_linker": ["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"]
+  },
+  "attrs": {
+    "token.ent_iob": { "assigns": [], "requires": ["entity_linker"] },
+    "doc.ents": { "assigns": [], "requires": ["entity_linker"] },
+    "token.ent_kb_id": { "assigns": ["entity_linker"], "requires": [] },
+    "doc.sents": { "assigns": [], "requires": ["entity_linker"] },
+    "token.tag": { "assigns": ["tagger"], "requires": [] },
+    "token.ent_type": { "assigns": [], "requires": ["entity_linker"] }
+  }
+}
+```
+
+```
+### Pretty
+============================= Pipeline Overview =============================
+
+#   Component       Assigns           Requires         Scores      Retokenizes
+-   -------------   ---------------   --------------   ---------   -----------
+0   tagger          token.tag                          tag_acc     False
+                                                       pos_acc
+                                                       lemma_acc
+
+1   entity_linker   token.ent_kb_id   doc.ents                     False
+                                      doc.sents
+                                      token.ent_iob
+                                      token.ent_type
+
+
+================================ Problems (4) ================================
+⚠ 'entity_linker' requirements not met: doc.ents, doc.sents,
+token.ent_iob, token.ent_type
+```
+
+</Accordion>
+
+<Infobox variant="warning" title="Important note">
+
+The pipeline analysis is static and does **not actually run the components**.
+This means that it relies on the information provided by the components
+themselves. If a custom component declares that it assigns an attribute but it
+doesn't, the pipeline analysis won't catch that.
+
+</Infobox>
+
 ## Creating custom pipeline components {#custom-components}
 
 A pipeline component is a function that receives a `Doc` object, modifies it and
@@ -489,6 +638,8 @@ All other settings can be passed in by the user via the `config` argument on
 [`@Language.factory`](/api/language#factory) decorator also lets you define a
 `default_config` that's used as a fallback.
 
+<!-- TODO: add example of passing in a custom Python object via the config based on a registered function -->
+
 ```python
 ### With config {highlight="4,9"}
 import spacy
diff --git a/website/docs/usage/saving-loading.md b/website/docs/usage/saving-loading.md
index cdd7d1c49..904477733 100644
--- a/website/docs/usage/saving-loading.md
+++ b/website/docs/usage/saving-loading.md
@@ -15,8 +15,6 @@ import Serialization101 from 'usage/101/\_serialization.md'
 
 ### Serializing the pipeline {#pipeline}
 
-<!-- TODO: update this -->
-
 When serializing the pipeline, keep in mind that this will only save out the
 **binary data for the individual components** to allow spaCy to restore them –
 not the entire objects. This is a good thing, because it makes serialization
diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md
index 12785b6de..c0ec052b9 100644
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@@ -3,9 +3,10 @@ title: Training Models
 next: /usage/projects
 menu:
   - ['Introduction', 'basics']
-  - ['CLI & Config', 'cli-config']
-  - ['Transfer Learning', 'transfer-learning']
+  - ['Quickstart', 'quickstart']
+  - ['Config System', 'config']
   - ['Custom Models', 'custom-models']
+  - ['Transfer Learning', 'transfer-learning']
   - ['Parallel Training', 'parallel-training']
   - ['Internal API', 'api']
 ---
@@ -29,12 +30,13 @@ ready-to-use spaCy models.
 
 </Infobox>
 
-## Training CLI & config {#cli-config}
+### Training CLI & config {#cli-config}
 
 <!-- TODO: intro describing the new v3 training philosophy -->
 
 The recommended way to train your spaCy models is via the
-[`spacy train`](/api/cli#train) command on the command line.
+[`spacy train`](/api/cli#train) command on the command line. You can pass in the
+following data and information:
 
 1. The **training and evaluation data** in spaCy's
    [binary `.spacy` format](/api/data-formats#binary-training) created using
@@ -42,14 +44,43 @@ The recommended way to train your spaCy models is via the
 2. A [`config.cfg`](#config) **configuration file** with all settings and
    hyperparameters.
 3. An optional **Python file** to register
-   [custom models and architectures](#custom-models).
-
-<!-- TODO: decide how we want to present the "getting started" workflow here, get a default config etc. -->
+   [custom functions and architectures](#custom-code).
 
 ```bash
 $ python -m spacy train train.spacy dev.spacy config.cfg --output ./output
 ```
 
+<Project id="some_example_project">
+
+The easiest way to get started with an end-to-end training process is to clone a
+[project](/usage/projects) template. Projects let you manage multi-step
+workflows, from data preprocessing to training and packaging your model.
+
+</Project>
+
+## Quickstart {#quickstart}
+
+> #### Instructions
+>
+> 1. Select your requirements and settings.
+> 2. Use the buttons at the bottom to save the result to your clipboard or a
+>    file `base_config.cfg`.
+> 3. Run [`init config`](/api/cli#init-config) to create a full training config.
+> 4. Run [`train`](/api/cli#train) with your config and data.
+
+import QuickstartTraining from 'widgets/quickstart-training.js'
+
+<QuickstartTraining download="base_config.cfg" />
+
+After you've saved the starter config to a file `base_config.cfg`, you can use
+the [`init config`](/api/cli#init-config) command to fill in the remaining
+defaults. Training configs should always be **complete and without hidden
+defaults**, to keep your experiments reproducible.
+
+```bash
+$ python -m spacy init config config.cfg --base base_config.cfg
+```
+
 > #### Tip: Debug your data
 >
 > The [`debug-data` command](/api/cli#debug-data) lets you analyze and validate
@@ -60,46 +91,15 @@ $ python -m spacy train train.spacy dev.spacy config.cfg --output ./output
 > $ python -m spacy debug-data en train.spacy dev.spacy --verbose
 > ```
 
-<Project id="some_example_project">
+You can now run [`train`](/api/cli#train) with your training and development
+data and the training config. See the [`convert`](/api/cli#convert) command for
+details on how to convert your data to spaCy's binary `.spacy` format.
 
-The easiest way to get started with an end-to-end training process is to clone a
-[project](/usage/projects) template. Projects let you manage multi-step
-workflows, from data preprocessing to training and packaging your model.
+```bash
+$ python -m spacy train train.spacy dev.spacy config.cfg --output ./output
+```
 
-</Project>
-
-<Accordion title="Understanding the training output">
-
-When you train a model using the [`spacy train`](/api/cli#train) command, you'll
-see a table showing metrics after each pass over the data. Here's what those
-metrics means:
-
-<!-- TODO: update table below and include note about scores in config -->
-
-| Name       | Description                                                                                       |
-| ---------- | ------------------------------------------------------------------------------------------------- |
-| `Dep Loss` | Training loss for dependency parser. Should decrease, but usually not to 0.                       |
-| `NER Loss` | Training loss for named entity recognizer. Should decrease, but usually not to 0.                 |
-| `UAS`      | Unlabeled attachment score for parser. The percentage of unlabeled correct arcs. Should increase. |
-| `NER P.`   | NER precision on development data. Should increase.                                               |
-| `NER R.`   | NER recall on development data. Should increase.                                                  |
-| `NER F.`   | NER F-score on development data. Should increase.                                                 |
-| `Tag %`    | Fine-grained part-of-speech tag accuracy on development data. Should increase.                    |
-| `Token %`  | Tokenization accuracy on development data.                                                        |
-| `CPU WPS`  | Prediction speed on CPU in words per second, if available. Should stay stable.                    |
-| `GPU WPS`  | Prediction speed on GPU in words per second, if available. Should stay stable.                    |
-
-Note that if the development data has raw text, some of the gold-standard
-entities might not align to the predicted tokenization. These tokenization
-errors are **excluded from the NER evaluation**. If your tokenization makes it
-impossible for the model to predict 50% of your entities, your NER F-score might
-still look good.
-
-</Accordion>
-
----
-
-### Training config files {#config}
+## Training config {#config}
 
 > #### Migration from spaCy v2.x
 >
@@ -149,12 +149,14 @@ not just define static settings, but also construct objects like architectures,
 schedules, optimizers or any other custom components. The main top-level
 sections of a config file are:
 
-| Section       | Description                                                                                                           |
-| ------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `training`    | Settings and controls for the training and evaluation process.                                                        |
-| `pretraining` | Optional settings and controls for the [language model pretraining](#pretraining).                                    |
-| `nlp`         | Definition of the `nlp` object, its tokenizer and [processing pipeline](/usage/processing-pipelines) component names. |
-| `components`  | Definitions of the [pipeline components](/usage/processing-pipelines) and their models.                               |
+| Section       | Description                                                                                                                                            |
+| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `nlp`         | Definition of the `nlp` object, its tokenizer and [processing pipeline](/usage/processing-pipelines) component names.                                  |
+| `components`  | Definitions of the [pipeline components](/usage/processing-pipelines) and their models.                                                                |
+| `paths`       | Paths to data and other assets. Can be re-used across the config as variables, e.g. `${paths:train}`, and [overwritten](#config-overrides) on the CLI. |
+| `system`      | Settings related to system and hardware.                                                                                                               |
+| `training`    | Settings and controls for the training and evaluation process.                                                                                         |
+| `pretraining` | Optional settings and controls for the [language model pretraining](#pretraining).                                                                     |
 
 <Infobox title="Config format and settings" emoji="📖">
 
@@ -168,7 +170,7 @@ available for the different architectures are documented with the
 
 </Infobox>
 
-#### Overwriting config settings on the command line {#config-overrides}
+### Overwriting config settings on the command line {#config-overrides}
 
 The config system means that you can define all settings **in one place** and in
 a consistent format. There are no command-line arguments that need to be set,
@@ -192,7 +194,87 @@ of the training, the final filled `config.cfg` is exported with your model, so
 you'll always have a record of the settings that were used, including your
 overrides.
 
-#### Using registered functions {#config-functions}
+### Defining pipeline components {#config-components}
+
+When you train a model, you typically train a
+[pipeline](/usage/processing-pipelines) of **one or more components**. The
+`[components]` block in the config defines the available pipeline components and
+how they should be created – either by a built-in or custom
+[factory](/usage/processing-pipelines#built-in), or
+[sourced](/usage/processing-pipelines#sourced-components) from an existing
+pretrained model. For example, `[components.parser]` defines the component named
+`"parser"` in the pipeline. There are different ways you might want to treat
+your components during training, and the most common scenarios are:
+
+1. Train a **new component** from scratch on your data.
+2. Update an existing **pretrained component** with more examples.
+3. Include an existing pretrained component without updating it.
+4. Include a non-trainable component, like a rule-based
+   [`EntityRuler`](/api/entityruler) or [`Sentencizer`](/api/sentencizer), or a
+   fully [custom component](/usage/processing-pipelines#custom-components).
+
+If a component block defines a `factory`, spaCy will look it up in the
+[built-in](/usage/processing-pipelines#built-in) or
+[custom](/usage/processing-pipelines#custom-components) components and create a
+new component from scratch. All settings defined in the config block will be
+passed to the component factory as arguments. This lets you configure the model
+settings and hyperparameters. If a component block defines a `source`, the
+component will be copied over from an existing pretrained model, with its
+existing weights. This lets you include an already trained component in your
+model pipeline, or update a pretrained components with more data specific to
+your use case.
+
+```ini
+### config.cfg (excerpt)
+[components]
+
+# "parser" and "ner" are sourced from pretrained model
+[components.parser]
+source = "en_core_web_sm"
+
+[components.ner]
+source = "en_core_web_sm"
+
+# "textcat" and "custom" are created blank from built-in / custom factory
+[components.textcat]
+factory = "textcat"
+
+[components.custom]
+factory = "your_custom_factory"
+your_custom_setting = true
+```
+
+The `pipeline` setting in the `[nlp]` block defines the pipeline components
+added to the pipeline, in order. For example, `"parser"` here references
+`[components.parser]`. By default, spaCy will **update all components that can
+be updated**. Trainable components that are created from scratch are initialized
+with random weights. For sourced components, spaCy will keep the existing
+weights and [resume training](/api/language#resume_training).
+
+If you don't want a component to be updated, you can **freeze** it by adding it
+to the `frozen_components` list in the `[training]` block. Frozen components are
+**not updated** during training and are included in the final trained model
+as-is.
+
+> #### Note on frozen components
+>
+> Even though frozen components are not **updated** during training, they will
+> still **run** during training and evaluation. This is very important, because
+> they may still impact your model's performance – for instance, a sentence
+> boundary detector can impact what the parser or entity recognizer considers a
+> valid parse. So the evaluation results should always reflect what your model
+> will produce at runtime.
+
+```ini
+[nlp]
+lang = "en"
+pipeline = ["parser", "ner", "textcat", "custom"]
+
+[training]
+frozen_components = ["parser", "custom"]
+```
+
+### Using registered functions {#config-functions}
 
 The training configuration defined in the config file doesn't have to only
 consist of static values. Some settings can also be **functions**. For instance,
@@ -233,40 +315,78 @@ stop = 1000
 compound = 1.001
 ```
 
+### Using variable interpolation {#config-interpolation}
+
+<!-- TODO: describe and come up with good example showing both values and sections -->
+
 ### Model architectures {#model-architectures}
 
 <!-- TODO: refer to architectures API: /api/architectures. This should document the architectures in spacy/ml/models -->
 
-<!-- TODO: how do we document the default configs? -->
+### Metrics, training output and weighted scores {#metrics}
 
-## Transfer learning {#transfer-learning}
+When you train a model using the [`spacy train`](/api/cli#train) command, you'll
+see a table showing the metrics after each pass over the data. The available
+metrics **depend on the pipeline components**. Pipeline components also define
+which scores are shown and how they should be **weighted in the final score**
+that decides about the best model.
 
-### Using transformer models like BERT {#transformers}
+The `training.score_weights` setting in your `config.cfg` lets you customize the
+scores shown in the table and how they should be weighted. In this example, the
+labeled dependency accuracy and NER F-score count towards the final score with
+40% each and the tagging accuracy makes up the remaining 20%. The tokenization
+accuracy and speed are both shown in the table, but not counted towards the
+score.
 
-spaCy v3.0 lets you use almost any statistical model to power your pipeline. You
-can use models implemented in a variety of frameworks. A transformer model is
-just a statistical model, so the
-[`spacy-transformers`](https://github.com/explosion/spacy-transformers) package
-actually has very little work to do: it just has to provide a few functions that
-do the required plumbing. It also provides a pipeline component,
-[`Transformer`](/api/transformer), that lets you do multi-task learning and lets
-you save the transformer outputs for later use.
+> #### Why do I need score weights?
+>
+> At the end of your training process, you typically want to select the **best
+> model** – but what "best" means depends on the available components and your
+> specific use case. For instance, you may prefer a model with higher NER and
+> lower POS tagging accuracy over a model with lower NER and higher POS
+> accuracy. You can express this preference in the score weights, e.g. by
+> assigning `ents_f` (NER F-score) a higher weight.
 
-<Project id="en_core_bert">
+```ini
+[training.score_weights]
+dep_las = 0.4
+ents_f = 0.4
+tag_acc = 0.2
+token_acc = 0.0
+speed = 0.0
+```
 
-Try out a BERT-based model pipeline using this project template: swap in your
-data, edit the settings and hyperparameters and train, evaluate, package and
-visualize your model.
+The `score_weights` don't _have to_ sum to `1.0` – but it's recommended. When
+you generate a config for a given pipeline, the score weights are generated by
+combining and normalizing the default score weights of the pipeline components.
+The default score weights are defined by each pipeline component via the
+`default_score_weights` setting on the
+[`@Language.component`](/api/language#component) or
+[`@Language.factory`](/api/language#factory). By default, all pipeline
+components are weighted equally.
 
-</Project>
+<Accordion title="Understanding the training output and score types" spaced>
 
-For more details on how to integrate transformer models into your training
-config and customize the implementations, see the usage guide on
-[training transformers](/usage/transformers#training).
+<!-- TODO: come up with good short explanation of precision and recall -->
 
-### Pretraining with spaCy {#pretraining}
+| Name                       | Description                                                                                                             |
+| -------------------------- | ----------------------------------------------------------------------------------------------------------------------- |
+| **Loss**                   | The training loss representing the amount of work left for the optimizer. Should decrease, but usually not to `0`.      |
+| **Precision** (P)          | Should increase.                                                                                                        |
+| **Recall** (R)             | Should increase.                                                                                                        |
+| **F-Score** (F)            | The weighted average of precision and recall. Should increase.                                                          |
+| **UAS** / **LAS**          | Unlabeled and labeled attachment score for the dependency parser, i.e. the percentage of correct arcs. Should increase. |
+| **Words per second** (WPS) | Prediction speed in words per second. Should stay stable.                                                               |
 
-<!-- TODO: document spacy pretrain -->
+<!-- TODO: is this still relevant? -->
+
+Note that if the development data has raw text, some of the gold-standard
+entities might not align to the predicted tokenization. These tokenization
+errors are **excluded from the NER evaluation**. If your tokenization makes it
+impossible for the model to predict 50% of your entities, your NER F-score might
+still look good.
+
+</Accordion>
 
 ## Custom model implementations and architectures {#custom-models}
 
@@ -274,6 +394,11 @@ config and customize the implementations, see the usage guide on
 
 ### Training with custom code {#custom-code}
 
+> ```bash
+> ### Example {wrap="true"}
+> $ python -m spacy train train.spacy dev.spacy config.cfg --code functions.py
+> ```
+
 The [`spacy train`](/api/cli#train) recipe lets you specify an optional argument
 `--code` that points to a Python file. The file is imported before training and
 allows you to add custom functions and architectures to the function registry
@@ -281,6 +406,120 @@ that can then be referenced from your `config.cfg`. This lets you train spaCy
 models with custom components, without having to re-implement the whole training
 workflow.
 
+#### Example: Modifying the nlp object {#custom-code-nlp-callbacks}
+
+For many use cases, you don't necessarily want to implement the whole `Language`
+subclass and language data from scratch – it's often enough to make a few small
+modifications, like adjusting the
+[tokenization rules](/usage/linguistic-features#native-tokenizer-additions) or
+[language defaults](/api/language#defaults) like stop words. The config lets you
+provide three optional **callback functions** that give you access to the
+language class and `nlp` object at different points of the lifecycle:
+
+| Callback                  | Description                                                                                                                                                                              |
+| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `before_creation`         | Called before the `nlp` object is created and receives the language subclass like `English` (not the instance). Useful for writing to the [`Language.Defaults`](/api/language#defaults). |
+| `after_creation`          | Called right after the `nlp` object is created, but before the pipeline components are added to the pipeline and receives the `nlp` object. Useful for modifying the tokenizer.          |
+| `after_pipeline_creation` | Called right after the pipeline components are created and added and receives the `nlp` object. Useful for modifying pipeline components.                                                |
+
+The `@spacy.registry.callbacks` decorator lets you register that function in the
+`callbacks` [registry](/api/top-level#registry) under a given name. You can then
+reference the function in a config block using the `@callbacks` key. If a block
+contains a key starting with an `@`, it's interpreted as a reference to a
+function. Because you've registered the function, spaCy knows how to create it
+when you reference `"customize_language_data"` in your config. Here's an example
+of a callback that runs before the `nlp` object is created and adds a few custom
+tokenization rules to the defaults:
+
+> #### config.cfg
+>
+> ```ini
+> [nlp.before_creation]
+> @callbacks = "customize_language_data"
+> ```
+
+```python
+### functions.py {highlight="3,6"}
+import spacy
+
+@spacy.registry.callbacks("customize_language_data")
+def create_callback():
+    def customize_language_data(lang_cls):
+        lang_cls.Defaults.suffixes = lang_cls.Defaults.suffixes + (r"-+$",)
+        return lang_cls
+
+    return customize_language_data
+```
+
+<Infobox variant="warning">
+
+Remember that a registered function should always be a function that spaCy
+**calls to create something**. In this case, it **creates a callback** – it's
+not the callback itself.
+
+</Infobox>
+
+Any registered function – in this case `create_callback` – can also take
+**arguments** that can be **set by the config**. This lets you implement and
+keep track of different configurations, without having to hack at your code. You
+can choose any arguments that make sense for your use case. In this example,
+we're adding the arguments `extra_stop_words` (a list of strings) and `debug`
+(boolean) for printing additional info when the function runs.
+
+> #### config.cfg
+>
+> ```ini
+> [nlp.before_creation]
+> @callbacks = "customize_language_data"
+> extra_stop_words = ["ooh", "aah"]
+> debug = true
+> ```
+
+```python
+### functions.py {highlight="5,8-10"}
+from typing import List
+import spacy
+
+@spacy.registry.callbacks("customize_language_data")
+def create_callback(extra_stop_words: List[str] = [], debug: bool = False):
+    def customize_language_data(lang_cls):
+        lang_cls.Defaults.suffixes = lang_cls.Defaults.suffixes + (r"-+$",)
+        lang_cls.Defaults.stop_words.add(extra_stop_words)
+        if debug:
+            print("Updated stop words and tokenizer suffixes")
+        return lang_cls
+
+    return customize_language_data
+```
+
+<Infobox title="Tip: Use Python type hints" emoji="💡">
+
+spaCy's configs are powered by our machine learning library Thinc's
+[configuration system](https://thinc.ai/docs/usage-config), which supports
+[type hints](https://docs.python.org/3/library/typing.html) and even
+[advanced type annotations](https://thinc.ai/docs/usage-config#advanced-types)
+using [`pydantic`](https://github.com/samuelcolvin/pydantic). If your registered
+function provides type hints, the values that are passed in will be checked
+against the expected types. For example, `debug: bool` in the example above will
+ensure that the value received as the argument `debug` is an boolean. If the
+value can't be coerced into a boolean, spaCy will raise an error.
+`start: pydantic.StrictBool` will force the value to be an boolean and raise an
+error if it's not – for instance, if your config defines `1` instead of `true`.
+
+</Infobox>
+
+With your `functions.py` defining additional code and the updated `config.cfg`,
+you can now run [`spacy train`](/api/cli#train) and point the argument `--code`
+to your Python file. Before loading the config, spaCy will import the
+`functions.py` module and your custom functions will be registered.
+
+```bash
+### Training with custom code {wrap="true"}
+python -m spacy train train.spacy dev.spacy config.cfg --output ./output --code ./functions.py
+```
+
+#### Example: Custom batch size schedule {#custom-code-schedule}
+
 For example, let's say you've implemented your own batch size schedule to use
 during training. The `@spacy.registry.schedules` decorator lets you register
 that function in the `schedules` [registry](/api/top-level#registry) and assign
@@ -310,9 +549,9 @@ In your config, you can now reference the schedule in the
 starting with an `@`, it's interpreted as a reference to a function. All other
 settings in the block will be passed to the function as keyword arguments. Keep
 in mind that the config shouldn't have any hidden defaults and all arguments on
-the functions need to be represented in the config.
-
-<!-- TODO: this needs to be updated once we've decided on a workflow for "fill config" -->
+the functions need to be represented in the config. If your function defines
+**default argument values**, spaCy is able to auto-fill your config when you run
+[`init config`](/api/cli#init-config).
 
 ```ini
 ### config.cfg (excerpt)
@@ -322,31 +561,9 @@ start = 2
 factor = 1.005
 ```
 
-You can now run [`spacy train`](/api/cli#train) with the `config.cfg` and your
-custom `functions.py` as the argument `--code`. Before loading the config, spaCy
-will import the `functions.py` module and your custom functions will be
-registered.
+#### Example: Custom data reading and batching {#custom-code-readers-batchers}
 
-```bash
-### Training with custom code {wrap="true"}
-python -m spacy train train.spacy dev.spacy config.cfg --output ./output --code ./functions.py
-```
-
-<Infobox title="Tip: Use Python type hints" emoji="💡">
-
-spaCy's configs are powered by our machine learning library Thinc's
-[configuration system](https://thinc.ai/docs/usage-config), which supports
-[type hints](https://docs.python.org/3/library/typing.html) and even
-[advanced type annotations](https://thinc.ai/docs/usage-config#advanced-types)
-using [`pydantic`](https://github.com/samuelcolvin/pydantic). If your registered
-function provides type hints, the values that are passed in will be checked
-against the expected types. For example, `start: int` in the example above will
-ensure that the value received as the argument `start` is an integer. If the
-value can't be cast to an integer, spaCy will raise an error.
-`start: pydantic.StrictInt` will force the value to be an integer and raise an
-error if it's not – for instance, if your config defines a float.
-
-</Infobox>
+<!-- TODO: -->
 
 ### Wrapping PyTorch and TensorFlow {#custom-frameworks}
 
@@ -364,6 +581,35 @@ mattis pretium.
 
 <!-- TODO: this could maybe be a more general example of using Thinc to compose some layers? We don't want to go too deep here and probably want to focus on a simple architecture example to show how it works -->
 
+## Transfer learning {#transfer-learning}
+
+### Using transformer models like BERT {#transformers}
+
+spaCy v3.0 lets you use almost any statistical model to power your pipeline. You
+can use models implemented in a variety of frameworks. A transformer model is
+just a statistical model, so the
+[`spacy-transformers`](https://github.com/explosion/spacy-transformers) package
+actually has very little work to do: it just has to provide a few functions that
+do the required plumbing. It also provides a pipeline component,
+[`Transformer`](/api/transformer), that lets you do multi-task learning and lets
+you save the transformer outputs for later use.
+
+<Project id="en_core_bert">
+
+Try out a BERT-based model pipeline using this project template: swap in your
+data, edit the settings and hyperparameters and train, evaluate, package and
+visualize your model.
+
+</Project>
+
+For more details on how to integrate transformer models into your training
+config and customize the implementations, see the usage guide on
+[training transformers](/usage/transformers#training).
+
+### Pretraining with spaCy {#pretraining}
+
+<!-- TODO: document spacy pretrain -->
+
 ## Parallel Training with Ray {#parallel-training}
 
 <!-- TODO: document Ray integration -->
diff --git a/website/docs/usage/transformers.md b/website/docs/usage/transformers.md
index bab1b82d3..b837c62de 100644
--- a/website/docs/usage/transformers.md
+++ b/website/docs/usage/transformers.md
@@ -88,7 +88,8 @@ The recommended workflow for training is to use spaCy's
 [`spacy train`](/api/cli#train) command. The training config defines all
 component settings and hyperparameters in one place and lets you describe a tree
 of objects by referring to creation functions, including functions you register
-yourself.
+yourself. For details on how to get started with training your own model, check
+out the [training quickstart](/usage/training#quickstart).
 
 <Project id="en_core_bert">
 
@@ -164,10 +165,8 @@ resolved, the function is created and passed into the model as an argument.
 Remember that the `config.cfg` used for training should contain **no missing
 values** and requires all settings to be defined. You don't want any hidden
 defaults creeping in and changing your results! spaCy will tell you if settings
-are missing, and you can run [`spacy debug config`](/api/cli#debug-config) with
-`--auto-fill` to automatically fill in all defaults.
-
-<!-- TODO: update with details on getting started with a config -->
+are missing, and you can run [`spacy init config`](/api/cli#init-config) with to
+automatically fill in all defaults.
 
 </Infobox>
 
diff --git a/website/src/components/copy.js b/website/src/components/copy.js
index 4392273e2..f8013c5f1 100644
--- a/website/src/components/copy.js
+++ b/website/src/components/copy.js
@@ -3,21 +3,23 @@ import React, { useState, useRef } from 'react'
 import Icon from './icon'
 import classes from '../styles/copy.module.sass'
 
+export function copyToClipboard(ref, callback) {
+    const isClient = typeof window !== 'undefined'
+    if (ref.current && isClient) {
+        ref.current.select()
+        document.execCommand('copy')
+        callback(true)
+        ref.current.blur()
+        setTimeout(() => callback(false), 1000)
+    }
+}
+
 const CopyInput = ({ text, prefix }) => {
     const isClient = typeof window !== 'undefined'
     const supportsCopy = isClient && document.queryCommandSupported('copy')
     const textareaRef = useRef()
     const [copySuccess, setCopySuccess] = useState(false)
-
-    function copyToClipboard() {
-        if (textareaRef.current && isClient) {
-            textareaRef.current.select()
-            document.execCommand('copy')
-            setCopySuccess(true)
-            textareaRef.current.blur()
-            setTimeout(() => setCopySuccess(false), 1000)
-        }
-    }
+    const onClick = () => copyToClipboard(textareaRef, setCopySuccess)
 
     function selectText() {
         if (textareaRef.current && isClient) {
@@ -37,7 +39,7 @@ const CopyInput = ({ text, prefix }) => {
                 onClick={selectText}
             />
             {supportsCopy && (
-                <button title="Copy to clipboard" onClick={copyToClipboard}>
+                <button title="Copy to clipboard" onClick={onClick}>
                     <Icon width={16} name={copySuccess ? 'accept' : 'clipboard'} />
                 </button>
             )}
diff --git a/website/src/components/icon.js b/website/src/components/icon.js
index 8c917d13d..00b237795 100644
--- a/website/src/components/icon.js
+++ b/website/src/components/icon.js
@@ -22,6 +22,7 @@ import { ReactComponent as SearchIcon } from '../images/icons/search.svg'
 import { ReactComponent as MoonIcon } from '../images/icons/moon.svg'
 import { ReactComponent as ClipboardIcon } from '../images/icons/clipboard.svg'
 import { ReactComponent as NetworkIcon } from '../images/icons/network.svg'
+import { ReactComponent as DownloadIcon } from '../images/icons/download.svg'
 
 import classes from '../styles/icon.module.sass'
 
@@ -46,7 +47,8 @@ const icons = {
     search: SearchIcon,
     moon: MoonIcon,
     clipboard: ClipboardIcon,
-    network: NetworkIcon
+    network: NetworkIcon,
+    download: DownloadIcon,
 }
 
 const Icon = ({ name, width, height, inline, variant, className }) => {
diff --git a/website/src/components/quickstart.js b/website/src/components/quickstart.js
index fe73658c7..f1d3616a5 100644
--- a/website/src/components/quickstart.js
+++ b/website/src/components/quickstart.js
@@ -1,4 +1,4 @@
-import React, { Fragment, useState, useEffect } from 'react'
+import React, { Fragment, useState, useEffect, useRef } from 'react'
 import PropTypes from 'prop-types'
 import classNames from 'classnames'
 import { window } from 'browser-monads'
@@ -6,6 +6,7 @@ import { window } from 'browser-monads'
 import Section from './section'
 import Icon from './icon'
 import { H2 } from './typography'
+import { copyToClipboard } from './copy'
 import classes from '../styles/quickstart.module.sass'
 
 function getNewChecked(optionId, checkedForId, multiple) {
@@ -14,10 +15,41 @@ function getNewChecked(optionId, checkedForId, multiple) {
     return [...checkedForId, optionId]
 }
 
-const Quickstart = ({ data, title, description, id, children }) => {
+function getRawContent(ref) {
+    if (ref.current && ref.current.childNodes) {
+        // Select all currently visible nodes (spans and text nodes)
+        const result = [...ref.current.childNodes].filter(el => el.offsetParent !== null)
+        return result.map(el => el.textContent).join('\n')
+    }
+    return ''
+}
+
+const Quickstart = ({
+    data,
+    title,
+    description,
+    copy,
+    download,
+    id,
+    setters = {},
+    hidePrompts,
+    children,
+}) => {
+    const contentRef = useRef()
+    const copyAreaRef = useRef()
+    const isClient = typeof window !== 'undefined'
+    const supportsCopy = isClient && document.queryCommandSupported('copy')
+    const showCopy = supportsCopy && copy
     const [styles, setStyles] = useState({})
     const [checked, setChecked] = useState({})
     const [initialized, setInitialized] = useState(false)
+    const [copySuccess, setCopySuccess] = useState(false)
+    const [otherState, setOtherState] = useState({})
+    const setOther = (id, value) => setOtherState({ ...otherState, [id]: value })
+    const onClickCopy = () => {
+        copyAreaRef.current.value = getRawContent(contentRef)
+        copyToClipboard(copyAreaRef, setCopySuccess)
+    }
 
     const getCss = (id, checkedOptions) => {
         const checkedForId = checkedOptions[id] || []
@@ -32,7 +64,7 @@ const Quickstart = ({ data, title, description, id, children }) => {
         if (!initialized) {
             const initialChecked = Object.assign(
                 {},
-                ...data.map(({ id, options }) => ({
+                ...data.map(({ id, options = [] }) => ({
                     [id]: options.filter(option => option.checked).map(({ id }) => id),
                 }))
             )
@@ -48,7 +80,7 @@ const Quickstart = ({ data, title, description, id, children }) => {
 
     return !data.length ? null : (
         <Section id={id}>
-            <div className={classes.root}>
+            <div className={classNames(classes.root, { [classes.hidePrompts]: !!hidePrompts })}>
                 {title && (
                     <H2 className={classes.title} name={id}>
                         <a href={`#${id}`}>{title}</a>
@@ -57,82 +89,154 @@ const Quickstart = ({ data, title, description, id, children }) => {
 
                 {description && <p className={classes.description}>{description}</p>}
 
-                {data.map(({ id, title, options = [], multiple, help }) => (
-                    <div key={id} data-quickstart-group={id} className={classes.group}>
-                        <style data-quickstart-style={id}>
-                            {styles[id] ||
-                                `[data-quickstart-results]>[data-quickstart-${id}] { display: none }`}
-                        </style>
-                        <div className={classes.legend}>
-                            {title}
-                            {help && (
-                                <span data-tooltip={help} className={classes.help}>
-                                    {' '}
-                                    <Icon name="help" width={16} spaced />
-                                </span>
-                            )}
-                        </div>
-                        <div className={classes.fields}>
-                            {options.map(option => {
-                                const optionType = multiple ? 'checkbox' : 'radio'
-                                const checkedForId = checked[id] || []
-                                return (
-                                    <Fragment key={option.id}>
-                                        <input
-                                            onChange={() => {
-                                                const newChecked = {
-                                                    ...checked,
-                                                    [id]: getNewChecked(
-                                                        option.id,
-                                                        checkedForId,
-                                                        multiple
-                                                    ),
+                {data.map(
+                    ({
+                        id,
+                        title,
+                        options = [],
+                        dropdown = [],
+                        defaultValue,
+                        multiple,
+                        other,
+                        help,
+                    }) => {
+                        // Optional function that's called with the value
+                        const setterFunc = setters[id] || (() => {})
+                        return (
+                            <div key={id} data-quickstart-group={id} className={classes.group}>
+                                <style data-quickstart-style={id} scoped>
+                                    {styles[id] ||
+                                        `[data-quickstart-results]>[data-quickstart-${id}] { display: none }`}
+                                </style>
+                                <div className={classes.legend}>
+                                    {title}
+                                    {help && (
+                                        <span data-tooltip={help} className={classes.help}>
+                                            {' '}
+                                            <Icon name="help" width={16} spaced />
+                                        </span>
+                                    )}
+                                </div>
+                                <div className={classes.fields}>
+                                    {!!dropdown.length && (
+                                        <select
+                                            defaultValue={defaultValue}
+                                            className={classes.select}
+                                            onChange={({ target }) => {
+                                                const value = target.value
+                                                if (value != other) {
+                                                    setterFunc(value)
+                                                    setOther(id, false)
+                                                } else {
+                                                    setterFunc('')
+                                                    setOther(id, true)
                                                 }
-                                                setChecked(newChecked)
-                                                setStyles({
-                                                    ...styles,
-                                                    [id]: getCss(id, newChecked),
-                                                })
                                             }}
-                                            type={optionType}
-                                            className={classNames(
-                                                classes.input,
-                                                classes[optionType]
-                                            )}
-                                            name={id}
-                                            id={`quickstart-${option.id}`}
-                                            value={option.id}
-                                            checked={checkedForId.includes(option.id)}
-                                        />
-                                        <label
-                                            className={classes.label}
-                                            htmlFor={`quickstart-${option.id}`}
                                         >
-                                            {option.title}
-                                            {option.meta && (
-                                                <span className={classes.meta}>{option.meta}</span>
-                                            )}
-                                            {option.help && (
-                                                <span
-                                                    data-tooltip={option.help}
-                                                    className={classes.help}
+                                            {dropdown.map(({ id, title }) => (
+                                                <option key={id} value={id}>
+                                                    {title}
+                                                </option>
+                                            ))}
+                                            {other && <option value={other}>{other}</option>}
+                                        </select>
+                                    )}
+                                    {other && otherState[id] && (
+                                        <input
+                                            type="text"
+                                            className={classes.textInput}
+                                            placeholder="Type here..."
+                                            onChange={({ target }) => setterFunc(target.value)}
+                                        />
+                                    )}
+                                    {options.map(option => {
+                                        const optionType = multiple ? 'checkbox' : 'radio'
+                                        const checkedForId = checked[id] || []
+                                        return (
+                                            <Fragment key={option.id}>
+                                                <input
+                                                    onChange={() => {
+                                                        const newChecked = {
+                                                            ...checked,
+                                                            [id]: getNewChecked(
+                                                                option.id,
+                                                                checkedForId,
+                                                                multiple
+                                                            ),
+                                                        }
+                                                        setChecked(newChecked)
+                                                        setStyles({
+                                                            ...styles,
+                                                            [id]: getCss(id, newChecked),
+                                                        })
+                                                        setterFunc(newChecked[id])
+                                                    }}
+                                                    type={optionType}
+                                                    className={classNames(
+                                                        classes.input,
+                                                        classes[optionType]
+                                                    )}
+                                                    name={id}
+                                                    id={`quickstart-${option.id}`}
+                                                    value={option.id}
+                                                    checked={checkedForId.includes(option.id)}
+                                                />
+                                                <label
+                                                    className={classes.label}
+                                                    htmlFor={`quickstart-${option.id}`}
                                                 >
-                                                    {' '}
-                                                    <Icon name="help" width={16} spaced />
-                                                </span>
-                                            )}
-                                        </label>
-                                    </Fragment>
-                                )
-                            })}
-                        </div>
-                    </div>
-                ))}
+                                                    {option.title}
+                                                    {option.meta && (
+                                                        <span className={classes.meta}>
+                                                            {option.meta}
+                                                        </span>
+                                                    )}
+                                                    {option.help && (
+                                                        <span
+                                                            data-tooltip={option.help}
+                                                            className={classes.help}
+                                                        >
+                                                            {' '}
+                                                            <Icon name="help" width={16} spaced />
+                                                        </span>
+                                                    )}
+                                                </label>
+                                            </Fragment>
+                                        )
+                                    })}
+                                </div>
+                            </div>
+                        )
+                    }
+                )}
                 <pre className={classes.code}>
-                    <code className={classes.results} data-quickstart-results="">
+                    <code className={classes.results} data-quickstart-results="" ref={contentRef}>
                         {children}
                     </code>
+
+                    <menu className={classes.menu}>
+                        {showCopy && (
+                            <button
+                                title="Copy to clipboard"
+                                onClick={onClickCopy}
+                                className={classes.iconButton}
+                            >
+                                <Icon width={18} name={copySuccess ? 'accept' : 'clipboard'} />
+                            </button>
+                        )}
+                        {download && (
+                            <a
+                                href={`data:application/octet-stream,${getRawContent(contentRef)}`}
+                                title="Download file"
+                                download={download}
+                                className={classes.iconButton}
+                            >
+                                <Icon width={18} name="download" />
+                            </a>
+                        )}
+                    </menu>
                 </pre>
+                {showCopy && <textarea ref={copyAreaRef} className={classes.copyArea} rows={1} />}
             </div>
         </Section>
     )
@@ -141,6 +245,7 @@ const Quickstart = ({ data, title, description, id, children }) => {
 Quickstart.defaultProps = {
     data: [],
     id: 'quickstart',
+    copy: true,
 }
 
 Quickstart.propTypes = {
@@ -164,12 +269,13 @@ Quickstart.propTypes = {
     ),
 }
 
-const QS = ({ children, prompt = 'bash', divider = false, ...props }) => {
+const QS = ({ children, prompt = 'bash', divider = false, comment = false, ...props }) => {
     const qsClassNames = classNames({
         [classes.prompt]: !!prompt && !divider,
         [classes.bash]: prompt === 'bash' && !divider,
         [classes.python]: prompt === 'python' && !divider,
         [classes.divider]: !!divider,
+        [classes.comment]: !!comment,
     })
     const attrs = Object.assign(
         {},
diff --git a/website/src/images/icons/download.svg b/website/src/images/icons/download.svg
new file mode 100644
index 000000000..109fb73ec
--- /dev/null
+++ b/website/src/images/icons/download.svg
@@ -0,0 +1,4 @@
+<svg  xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
+<path d="M16.707 7.404c-0.189-0.188-0.448-0.283-0.707-0.283s-0.518 0.095-0.707 0.283l-2.293 2.293v-6.697c0-0.552-0.448-1-1-1s-1 0.448-1 1v6.697l-2.293-2.293c-0.189-0.188-0.44-0.293-0.707-0.293s-0.518 0.105-0.707 0.293c-0.39 0.39-0.39 1.024 0 1.414l4.707 4.682 4.709-4.684c0.388-0.387 0.388-1.022-0.002-1.412z"></path>
+<path d="M20.987 16c0-0.105-0.004-0.211-0.039-0.316l-2-6c-0.136-0.409-0.517-0.684-0.948-0.684h-0.219c-0.094 0.188-0.21 0.368-0.367 0.525l-1.482 1.475h1.348l1.667 5h-13.893l1.667-5h1.348l-1.483-1.475c-0.157-0.157-0.274-0.337-0.367-0.525h-0.219c-0.431 0-0.812 0.275-0.948 0.684l-2 6c-0.035 0.105-0.039 0.211-0.039 0.316-0.013 0-0.013 5-0.013 5 0 0.553 0.447 1 1 1h16c0.553 0 1-0.447 1-1 0 0 0-5-0.013-5z"></path>
+</svg>
diff --git a/website/src/styles/aside.module.sass b/website/src/styles/aside.module.sass
index 7746451b4..0e73cc61a 100644
--- a/website/src/styles/aside.module.sass
+++ b/website/src/styles/aside.module.sass
@@ -24,10 +24,16 @@ $border-radius: 6px
         &:last-child
             margin: 0
 
+        &:first-child h4
+            margin-top: 0 !important
+
         code
             padding: 0
             margin: 0
 
+        h4
+            margin-left: 0
+
     p, ul, ol
         font: inherit
         margin-bottom: var(--spacing-sm)
diff --git a/website/src/styles/code.module.sass b/website/src/styles/code.module.sass
index 5dffb09c5..6a91bf8aa 100644
--- a/website/src/styles/code.module.sass
+++ b/website/src/styles/code.module.sass
@@ -24,7 +24,7 @@
 .code,
 .juniper-input pre
     display: block
-    padding: 1.75em 2em
+    padding: 1.75em 1.5em
 
 .code
     &[data-prompt]:before,
diff --git a/website/src/styles/layout.sass b/website/src/styles/layout.sass
index f3d78625a..9660363dd 100644
--- a/website/src/styles/layout.sass
+++ b/website/src/styles/layout.sass
@@ -370,10 +370,10 @@ body [id]:target
     background-color: var(--color-dark-secondary)
     border-left: 0.35em solid var(--color-theme)
     display: block
-    margin-right: -2em
-    margin-left: -2em
-    padding-right: 2em
-    padding-left: 1.65em
+    margin-right: -1.5em
+    margin-left: -1.5em
+    padding-right: 1.5em
+    padding-left: 1.1em
 
     &:empty:before
         // Fix issue where empty lines would disappear
diff --git a/website/src/styles/quickstart.module.sass b/website/src/styles/quickstart.module.sass
index 51e94fa4d..a10bacca1 100644
--- a/website/src/styles/quickstart.module.sass
+++ b/website/src/styles/quickstart.module.sass
@@ -83,6 +83,24 @@
 .fields
     flex: 100%
 
+.select
+    cursor: pointer
+    border: 1px solid var(--color-subtle)
+    border-radius: var(--border-radius)
+    display: inline-block
+    padding: 0.35rem 1.25rem
+    margin: 0 1rem 0.75rem 0
+    font-size: var(--font-size-sm)
+    background: var(--color-back)
+
+.text-input
+    border: 1px solid var(--color-subtle)
+    border-radius: var(--border-radius)
+    display: inline-block
+    padding: 0.35rem 0.75rem
+    font-size: var(--font-size-sm)
+    background: var(--color-back)
+
 .code
     background: var(--color-front)
     color: var(--color-back)
@@ -95,6 +113,7 @@
     border-bottom-right-radius: var(--border-radius)
     -webkit-font-smoothing: subpixel-antialiased
     -moz-osx-font-smoothing: auto
+    position: relative
 
 .results
     display: block
@@ -105,6 +124,9 @@
     & > span
         display: block
 
+.hide-prompts .prompt:before
+    content: initial !important
+
 .prompt:before
     color: var(--color-theme)
     margin-right: 1em
@@ -115,6 +137,9 @@
 .python:before
     content: ">>>"
 
+.comment
+    color: var(--syntax-comment)
+
 .divider
     padding: 1.5rem 0
 
@@ -123,3 +148,29 @@
 
     .input:checked + .label &
         color: inherit
+
+.copy-area
+    width: 1px
+    height: 1px
+    opacity: 0
+    position: absolute
+
+.menu
+    color: var(--color-subtle)
+    padding-right: 1.5rem
+    display: inline-block
+    position: absolute
+    bottom: var(--spacing-xs)
+    right: 0
+
+.icon-button
+    display: inline-block
+    color: inherit
+    cursor: pointer
+    transition: transform 0.05s ease
+
+    &:not(:last-child)
+        margin-right: 1.5rem
+
+    &:hover
+        transform: scale(1.1)
diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js
index b2e72752a..9d993c969 100644
--- a/website/src/widgets/quickstart-install.js
+++ b/website/src/widgets/quickstart-install.js
@@ -92,7 +92,7 @@ const QuickstartInstall = ({ id, title }) => (
                     </QS>
                     <QS package="source">pip install -r requirements.txt</QS>
                     <QS addition="transformers" package="pip">
-                        pip install -U spacy-lookups-transformers
+                        pip install -U spacy-transformers
                     </QS>
                     <QS addition="transformers" package="source">
                         pip install -U spacy-transformers
diff --git a/website/src/widgets/quickstart-training.js b/website/src/widgets/quickstart-training.js
new file mode 100644
index 000000000..53c3a0efb
--- /dev/null
+++ b/website/src/widgets/quickstart-training.js
@@ -0,0 +1,128 @@
+import React, { useState } from 'react'
+import { StaticQuery, graphql } from 'gatsby'
+
+import { Quickstart, QS } from '../components/quickstart'
+
+const DEFAULT_LANG = 'en'
+const MODELS_SMALL = { en: 'roberta-base-small' }
+const MODELS_LARGE = { en: 'roberta-base' }
+
+const COMPONENTS = ['tagger', 'parser', 'ner', 'textcat']
+const COMMENT = `# This is an auto-generated partial config for training a model.
+# TODO: intructions for how to fill and use it`
+const DATA = [
+    {
+        id: 'lang',
+        title: 'Language',
+        defaultValue: DEFAULT_LANG,
+    },
+    {
+        id: 'components',
+        title: 'Components',
+        help: 'Pipeline components to train. Requires training data for those annotations.',
+        options: COMPONENTS.map(id => ({ id, title: id })),
+        multiple: true,
+    },
+    {
+        id: 'hardware',
+        title: 'Hardware',
+        options: [
+            { id: 'cpu-only', title: 'CPU only' },
+            { id: 'cpu', title: 'CPU preferred' },
+            { id: 'gpu', title: 'GPU', checked: true },
+        ],
+    },
+    {
+        id: 'optimize',
+        title: 'Optimize for',
+        help: '...',
+        options: [
+            { id: 'efficiency', title: 'efficiency', checked: true },
+            { id: 'accuracy', title: 'accuracy' },
+        ],
+    },
+    {
+        id: 'config',
+        title: 'Configuration',
+        options: [
+            {
+                id: 'independent',
+                title: 'independent components',
+                help: "Make components independent and don't share weights",
+            },
+        ],
+        multiple: true,
+    },
+]
+
+const QuickstartTraining = ({ id, title, download = 'config.cfg' }) => {
+    const [lang, setLang] = useState(DEFAULT_LANG)
+    const [pipeline, setPipeline] = useState([])
+    const setters = { lang: setLang, components: setPipeline }
+    return (
+        <StaticQuery
+            query={query}
+            render={({ site }) => {
+                const langs = site.siteMetadata.languages
+                DATA[0].dropdown = langs.map(({ name, code }) => ({
+                    id: code,
+                    title: name,
+                }))
+                const recommendedTrf = Object.assign(
+                    {},
+                    ...langs.map(({ code }) => ({ [code]: { sm: 'TODO', lg: 'TODO' } }))
+                )
+                return (
+                    <Quickstart
+                        download={download}
+                        data={DATA}
+                        title={title}
+                        id={id}
+                        setters={setters}
+                        hidePrompts
+                    >
+                        <QS comment>{COMMENT}</QS>
+                        <span>[nlp]</span>
+                        <span>lang = "{lang}"</span>
+                        <span>pipeline = {JSON.stringify(pipeline).replace(/,/g, ', ')}</span>
+                        <br />
+                        <span>[components]</span>
+                        <br />
+                        <span>[components.transformer]</span>
+                        <QS optimize="efficiency">name = "{recommendedTrf[lang].sm}"</QS>
+                        <QS optimize="accuracy">name = "{recommendedTrf[lang].lg}"</QS>
+                        {!!pipeline.length && <br />}
+                        {pipeline.map((pipe, i) => (
+                            <>
+                                {i !== 0 && <br />}
+                                <span>[components.{pipe}]</span>
+                                <span>factory = "{pipe}"</span>
+                                <QS config="independent">
+                                    <br />
+                                    [components.parser.model.tok2vec]
+                                    <br />
+                                    @architectures = "spacy.Tok2Vec.v1"
+                                </QS>
+                            </>
+                        ))}
+                    </Quickstart>
+                )
+            }}
+        />
+    )
+}
+
+const query = graphql`
+    query QuickstartTrainingQuery {
+        site {
+            siteMetadata {
+                languages {
+                    code
+                    name
+                }
+            }
+        }
+    }
+`
+
+export default QuickstartTraining