diff --git a/Makefile b/Makefile
index c4e77d101..46a7b22ba 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 SHELL := /bin/bash
 
 ifndef SPACY_EXTRAS
-override SPACY_EXTRAS = spacy-lookups-data jieba pkuseg==0.0.25 sudachipy sudachidict_core
+override SPACY_EXTRAS = spacy-lookups-data==0.4.0.dev0 jieba pkuseg==0.0.25 sudachipy sudachidict_core
 endif
 
 ifndef PYVER
diff --git a/extra/example_data/textcat_example_data/textcatjsonl_to_trainjson.py b/extra/example_data/textcat_example_data/textcatjsonl_to_trainjson.py
index 66d96ff68..41b6a70da 100644
--- a/extra/example_data/textcat_example_data/textcatjsonl_to_trainjson.py
+++ b/extra/example_data/textcat_example_data/textcatjsonl_to_trainjson.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 import plac
 import spacy
-from spacy.gold import docs_to_json
+from spacy.training import docs_to_json
 import srsly
 import sys
 
diff --git a/extra/experiments/tok2vec-ner/multihashembed_tok2vec.cfg b/extra/experiments/tok2vec-ner/multihashembed_tok2vec.cfg
index a5fa32b18..e2ab148c6 100644
--- a/extra/experiments/tok2vec-ner/multihashembed_tok2vec.cfg
+++ b/extra/experiments/tok2vec-ner/multihashembed_tok2vec.cfg
@@ -31,10 +31,13 @@ lang = "en"
 vectors = null
 
 [nlp.pipeline.ner]
-factory = "simple_ner"
+factory = "ner"
 
 [nlp.pipeline.ner.model]
-@architectures = "spacy.BiluoTagger.v1"
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 6
+hidden_width = 64
+maxout_pieces = 2
 
 [nlp.pipeline.ner.model.tok2vec]
 @architectures = "spacy.HashEmbedCNN.v1"
diff --git a/pyproject.toml b/pyproject.toml
index d23730b00..e610e603e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ requires = [
     "cymem>=2.0.2,<2.1.0",
     "preshed>=3.0.2,<3.1.0",
     "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.0.0a30,<8.0.0a40",
+    "thinc>=8.0.0a31,<8.0.0a40",
     "blis>=0.4.0,<0.5.0",
     "pytokenizations",
     "pathy"
diff --git a/requirements.txt b/requirements.txt
index 9b108de8d..db6eae2ef 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 # Our libraries
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.0.0a30,<8.0.0a40
+thinc>=8.0.0a31,<8.0.0a40
 blis>=0.4.0,<0.5.0
 ml_datasets>=0.1.1
 murmurhash>=0.28.0,<1.1.0
diff --git a/setup.cfg b/setup.cfg
index fc33abedb..10a8972b0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -34,13 +34,13 @@ setup_requires =
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
     murmurhash>=0.28.0,<1.1.0
-    thinc>=8.0.0a30,<8.0.0a40
+    thinc>=8.0.0a31,<8.0.0a40
 install_requires =
     # Our libraries
     murmurhash>=0.28.0,<1.1.0
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
-    thinc>=8.0.0a30,<8.0.0a40
+    thinc>=8.0.0a31,<8.0.0a40
     blis>=0.4.0,<0.5.0
     wasabi>=0.8.0,<1.1.0
     srsly>=2.1.0,<3.0.0
@@ -64,7 +64,7 @@ console_scripts =
 
 [options.extras_require]
 lookups =
-    spacy_lookups_data>=0.3.2,<0.4.0
+    spacy_lookups_data==0.4.0.dev0
 cuda =
     cupy>=5.0.0b4,<9.0.0
 cuda80 =
diff --git a/setup.py b/setup.py
index d448a262c..4a4b99f22 100755
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@ Options.docstrings = True
 
 PACKAGES = find_packages()
 MOD_NAMES = [
-    "spacy.gold.example",
+    "spacy.training.example",
     "spacy.parts_of_speech",
     "spacy.strings",
     "spacy.lexeme",
@@ -48,7 +48,7 @@ MOD_NAMES = [
     "spacy.pipeline._parser_internals.stateclass",
     "spacy.pipeline._parser_internals.transition_system",
     "spacy.tokenizer",
-    "spacy.gold.gold_io",
+    "spacy.training.gold_io",
     "spacy.tokens.doc",
     "spacy.tokens.span",
     "spacy.tokens.token",
diff --git a/spacy/about.py b/spacy/about.py
index 7d0e85a17..b8dc65455 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -1,7 +1,8 @@
 # fmt: off
 __title__ = "spacy-nightly"
-__version__ = "3.0.0a14"
+__version__ = "3.0.0a16"
 __release__ = True
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
-__projects__ = "https://github.com/explosion/spacy-boilerplates"
+__projects__ = "https://github.com/explosion/projects"
+__projects_branch__ = "v3"
diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index 0ecb5ad8f..360d2439a 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -1,4 +1,4 @@
-from typing import Dict, Any, Union, List, Optional, TYPE_CHECKING
+from typing import Dict, Any, Union, List, Optional, Tuple, TYPE_CHECKING
 import sys
 import shutil
 from pathlib import Path
@@ -6,6 +6,7 @@ from wasabi import msg
 import srsly
 import hashlib
 import typer
+import subprocess
 from click import NoSuchOption
 from typer.main import get_command
 from contextlib import contextmanager
@@ -13,7 +14,7 @@ from thinc.config import Config, ConfigValidationError
 from configparser import InterpolationError
 
 from ..schemas import ProjectConfigSchema, validate
-from ..util import import_file, run_command, make_tempdir
+from ..util import import_file, run_command, make_tempdir, registry
 
 if TYPE_CHECKING:
     from pathy import Pathy  # noqa: F401
@@ -54,6 +55,8 @@ app.add_typer(init_cli)
 
 
 def setup_cli() -> None:
+    # Make sure the entry-point for CLI runs, so that they get imported.
+    registry.cli.get_all()
     # Ensure that the help messages always display the correct prompt
     command = get_command(app)
     command(prog_name=COMMAND)
@@ -318,33 +321,87 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
     # *that* we can do by path.
     # We're using Git and sparse checkout to only clone the files we need
     with make_tempdir() as tmp_dir:
+        git_version = get_git_version()
+        supports_sparse = git_version >= (2, 22)
         # This is the "clone, but don't download anything" part.
-        cmd = (
-            f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
-            f"--filter=blob:none "  # <-- The key bit
-            f"-b {branch}"
-        )
-        run_command(cmd, capture=True)
+        cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
+        if supports_sparse:
+            cmd += f"--filter=blob:none"  # <-- The key bit
+        else:
+            msg.warn(
+                f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
+                f"that doesn't fully support sparse checkout yet. This means that "
+                f"more files than necessary may be downloaded temporarily. To "
+                f"only download the files needed, upgrade to Git v2.22 or above."
+            )
+        _attempt_run_command(cmd)
         # Now we need to find the missing filenames for the subpath we want.
         # Looking for this 'rev-list' command in the git --help? Hah.
-        cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
-        ret = run_command(cmd, capture=True)
-        repo = _from_http_to_git(repo)
+        cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if supports_sparse else ''} -- {subpath}"
+        ret = _attempt_run_command(cmd)
+        git_repo = _from_http_to_git(repo)
         # Now pass those missings into another bit of git internals
         missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
-        cmd = f"git -C {tmp_dir} fetch-pack {repo} {missings}"
-        run_command(cmd, capture=True)
+        if supports_sparse and not missings:
+            err = (
+                f"Could not find any relevant files for '{subpath}'. "
+                f"Did you specify a correct and complete path within repo '{repo}' "
+                f"and branch {branch}?"
+            )
+            msg.fail(err, exits=1)
+        if supports_sparse:
+            cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
+            _attempt_run_command(cmd)
         # And finally, we can checkout our subpath
         cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
-        run_command(cmd)
+        _attempt_run_command(cmd)
         # We need Path(name) to make sure we also support subdirectories
         shutil.move(str(tmp_dir / Path(subpath)), str(dest))
 
 
-def _from_http_to_git(repo):
+def get_git_version() -> Tuple[int, int]:
+    ret = _attempt_run_command(["git", "--version"])
+    # TODO: this seems kinda brittle?
+    version = ret.stdout[11:].strip().split(".")
+    return (int(version[0]), int(version[1]))
+
+
+def _attempt_run_command(cmd: Union[str, List[str]]):
+    try:
+        return run_command(cmd, capture=True)
+    except subprocess.CalledProcessError as e:
+        err = f"Could not run command"
+        msg.fail(err)
+        print(cmd)
+        sys.exit(1)
+
+
+def _from_http_to_git(repo: str) -> str:
     if repo.startswith("http://"):
         repo = repo.replace(r"http://", r"https://")
     if repo.startswith(r"https://"):
         repo = repo.replace("https://", "git@").replace("/", ":", 1)
+        if repo.endswith("/"):
+            repo = repo[:-1]
         repo = f"{repo}.git"
     return repo
+
+
+def string_to_list(value, intify=False):
+    """Parse a comma-separated string to a list"""
+    if not value:
+        return []
+    if value.startswith("[") and value.endswith("]"):
+        value = value[1:-1]
+    result = []
+    for p in value.split(","):
+        p = p.strip()
+        if p.startswith("'") and p.endswith("'"):
+            p = p[1:-1]
+        if p.startswith('"') and p.endswith('"'):
+            p = p[1:-1]
+        p = p.strip()
+        if intify:
+            p = int(p)
+        result.append(p)
+    return result
diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index ade5a3ad4..ad89b9976 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -7,9 +7,9 @@ import re
 import sys
 
 from ._util import app, Arg, Opt
-from ..gold import docs_to_json
+from ..training import docs_to_json
 from ..tokens import DocBin
-from ..gold.converters import iob2docs, conll_ner2docs, json2docs, conllu2docs
+from ..training.converters import iob2docs, conll_ner2docs, json2docs, conllu2docs
 
 
 # Converters are matched by file extension except for ner/iob, which are
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 75a81e6f5..d52f30b82 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -8,7 +8,7 @@ import typer
 
 from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
 from ._util import import_code, debug_cli, get_sourced_components
-from ..gold import Corpus, Example
+from ..training import Corpus, Example
 from ..pipeline._parser_internals import nonproj
 from ..language import Language
 from .. import util
diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 5bd4e008f..1a250e43e 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -5,7 +5,7 @@ from thinc.api import require_gpu, fix_random_seed, set_dropout_rate, Adam
 from thinc.api import Model, data_validation
 import typer
 
-from ._util import Arg, Opt, debug_cli, show_validation_error, parse_config_overrides
+from ._util import Arg, Opt, debug_cli, show_validation_error, parse_config_overrides, string_to_list
 from .. import util
 
 
@@ -38,12 +38,13 @@ def debug_model_cli(
         require_gpu(use_gpu)
     else:
         msg.info("Using CPU")
+    layers = string_to_list(layers, intify=True)
     print_settings = {
         "dimensions": dimensions,
         "parameters": parameters,
         "gradients": gradients,
         "attributes": attributes,
-        "layers": [int(x.strip()) for x in layers.split(",")] if layers else [],
+        "layers": layers,
         "print_before_training": P0,
         "print_after_init": P1,
         "print_after_training": P2,
@@ -84,11 +85,11 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
         _print_model(model, print_settings)
 
     # STEP 1: Initializing the model and printing again
+    X = _get_docs()
     Y = _get_output(model.ops.xp)
-    _set_output_dim(nO=Y.shape[-1], model=model)
     # The output vector might differ from the official type of the output layer
     with data_validation(False):
-        model.initialize(X=_get_docs(), Y=Y)
+        model.initialize(X=X, Y=Y)
     if print_settings.get("print_after_init"):
         msg.divider(f"STEP 1 - after initialization")
         _print_model(model, print_settings)
@@ -135,15 +136,6 @@ def _get_output(xp):
     return xp.asarray([i + 10 for i, _ in enumerate(_get_docs())], dtype="float32")
 
 
-def _set_output_dim(model, nO):
-    # the dim inference doesn't always work 100%, we need this hack like we have it in pipe.pyx
-    if model.has_dim("nO") is None:
-        model.set_dim("nO", nO)
-    if model.has_ref("output_layer"):
-        if model.get_ref("output_layer").has_dim("nO") is None:
-            model.get_ref("output_layer").set_dim("nO", nO)
-
-
 def _print_model(model, print_settings):
     layers = print_settings.get("layers", "")
     parameters = print_settings.get("parameters", False)
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index c5cbab09a..f9954d9ad 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -5,7 +5,7 @@ import re
 import srsly
 from thinc.api import require_gpu, fix_random_seed
 
-from ..gold import Corpus
+from ..training import Corpus
 from ..tokens import Doc
 from ._util import app, Arg, Opt
 from ..scorer import Scorer
diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
index 584ca7f64..ec65b0e0a 100644
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@@ -9,7 +9,7 @@ import re
 from .. import util
 from ..language import DEFAULT_CONFIG_PRETRAIN_PATH
 from ..schemas import RecommendationSchema
-from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
+from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND, string_to_list
 
 
 ROOT = Path(__file__).parent / "templates"
@@ -42,7 +42,7 @@ def init_config_cli(
     """
     if isinstance(optimize, Optimizations):  # instance of enum from the CLI
         optimize = optimize.value
-    pipeline = [p.strip() for p in pipeline.split(",")]
+    pipeline = string_to_list(pipeline)
     init_config(output_file, lang=lang, pipeline=pipeline, optimize=optimize, cpu=cpu)
 
 
diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py
index 5f06fd895..9eab7b54d 100644
--- a/spacy/cli/init_model.py
+++ b/spacy/cli/init_model.py
@@ -256,6 +256,7 @@ def add_vectors(
 
 def read_vectors(msg: Printer, vectors_loc: Path, truncate_vectors: int):
     f = open_file(vectors_loc)
+    f = ensure_shape(f)
     shape = tuple(int(size) for size in next(f).split())
     if truncate_vectors >= 1:
         shape = (truncate_vectors, shape[1])
@@ -274,6 +275,31 @@ def read_vectors(msg: Printer, vectors_loc: Path, truncate_vectors: int):
     return vectors_data, vectors_keys
 
 
+def ensure_shape(lines):
+    """Ensure that the first line of the data is the vectors shape.
+    
+    If it's not, we read in the data and output the shape as the first result,
+    so that the reader doesn't have to deal with the problem.
+    """
+    first_line = next(lines)
+    try:
+        shape = tuple(int(size) for size in first_line.split())
+    except ValueError:
+        shape = None
+    if shape is not None:
+        # All good, give the data
+        yield first_line
+        yield from lines
+    else:
+        # Figure out the shape, make it the first value, and then give the
+        # rest of the data.
+        width = len(first_line.split()) - 1
+        captured = [first_line] + list(lines)
+        length = len(captured)
+        yield f"{length} {width}"
+        yield from captured
+
+
 def read_freqs(
     freqs_loc: Path, max_length: int = 100, min_doc_freq: int = 5, min_freq: int = 50
 ):
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index c457b3e17..8d6cd84c1 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -18,6 +18,7 @@ def package_cli(
     output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False),
     meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False),
     create_meta: bool = Opt(False, "--create-meta", "-c", "-C", help="Create meta.json, even if one exists"),
+    name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"),
     version: Optional[str] = Opt(None, "--version", "-v", help="Package version to override meta"),
     no_sdist: bool = Opt(False, "--no-sdist", "-NS", help="Don't build .tar.gz sdist, can be set if you want to run this step manually"),
     force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing data in output directory"),
@@ -38,6 +39,7 @@ def package_cli(
         input_dir,
         output_dir,
         meta_path=meta_path,
+        name=name,
         version=version,
         create_meta=create_meta,
         create_sdist=not no_sdist,
@@ -50,6 +52,7 @@ def package(
     input_dir: Path,
     output_dir: Path,
     meta_path: Optional[Path] = None,
+    name: Optional[str] = None,
     version: Optional[str] = None,
     create_meta: bool = False,
     create_sdist: bool = True,
@@ -71,6 +74,8 @@ def package(
         msg.fail("Can't load pipeline meta.json", meta_path, exits=1)
     meta = srsly.read_json(meta_path)
     meta = get_meta(input_dir, meta)
+    if name is not None:
+        meta["name"] = name
     if version is not None:
         meta["version"] = version
     if not create_meta:  # only print if user doesn't want to overwrite
diff --git a/spacy/cli/project/assets.py b/spacy/cli/project/assets.py
index 2b623675d..7326b2e5c 100644
--- a/spacy/cli/project/assets.py
+++ b/spacy/cli/project/assets.py
@@ -38,16 +38,21 @@ def project_assets(project_dir: Path) -> None:
         msg.warn(f"No assets specified in {PROJECT_FILE}", exits=0)
     msg.info(f"Fetching {len(assets)} asset(s)")
     for asset in assets:
-        dest = Path(asset["dest"])
+        dest = (project_dir / asset["dest"]).resolve()
         checksum = asset.get("checksum")
         if "git" in asset:
             if dest.exists():
                 # If there's already a file, check for checksum
                 if checksum and checksum == get_checksum(dest):
-                    msg.good(f"Skipping download with matching checksum: {dest}")
+                    msg.good(
+                        f"Skipping download with matching checksum: {asset['dest']}"
+                    )
                     continue
                 else:
-                    shutil.rmtree(dest)
+                    if dest.is_dir():
+                        shutil.rmtree(dest)
+                    else:
+                        dest.unlink()
             git_sparse_checkout(
                 asset["git"]["repo"],
                 asset["git"]["path"],
@@ -67,14 +72,16 @@ def check_private_asset(dest: Path, checksum: Optional[str] = None) -> None:
     """Check and validate assets without a URL (private assets that the user
     has to provide themselves) and give feedback about the checksum.
 
-    dest (Path): Desintation path of the asset.
+    dest (Path): Destination path of the asset.
     checksum (Optional[str]): Optional checksum of the expected file.
     """
     if not Path(dest).exists():
         err = f"No URL provided for asset. You need to add this file yourself: {dest}"
         msg.warn(err)
     else:
-        if checksum and checksum == get_checksum(dest):
+        if not checksum:
+            msg.good(f"Asset already exists: {dest}")
+        elif checksum == get_checksum(dest):
             msg.good(f"Asset exists with matching checksum: {dest}")
         else:
             msg.fail(f"Asset available but with incorrect checksum: {dest}")
diff --git a/spacy/cli/project/clone.py b/spacy/cli/project/clone.py
index a419feb0f..ab617e4ba 100644
--- a/spacy/cli/project/clone.py
+++ b/spacy/cli/project/clone.py
@@ -16,6 +16,7 @@ def project_clone_cli(
     name: str = Arg(..., help="The name of the template to clone"),
     dest: Optional[Path] = Arg(None, help="Where to clone the project. Defaults to current working directory", exists=False),
     repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to clone from"),
+    branch: str = Opt(about.__projects_branch__, "--branch", "-b", help="The branch to clone from")
     # fmt: on
 ):
     """Clone a project template from a repository. Calls into "git" and will
@@ -26,23 +27,30 @@ def project_clone_cli(
     DOCS: https://nightly.spacy.io/api/cli#project-clone
     """
     if dest is None:
-        dest = Path.cwd() / name
-    project_clone(name, dest, repo=repo)
+        dest = Path.cwd() / Path(name).parts[-1]
+    project_clone(name, dest, repo=repo, branch=branch)
 
 
-def project_clone(name: str, dest: Path, *, repo: str = about.__projects__) -> None:
+def project_clone(
+    name: str,
+    dest: Path,
+    *,
+    repo: str = about.__projects__,
+    branch: str = about.__projects_branch__,
+) -> None:
     """Clone a project template from a repository.
 
     name (str): Name of subdirectory to clone.
     dest (Path): Destination path of cloned project.
     repo (str): URL of Git repo containing project templates.
+    branch (str): The branch to clone from
     """
     dest = ensure_path(dest)
     check_clone(name, dest, repo)
     project_dir = dest.resolve()
     repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo)
     try:
-        git_sparse_checkout(repo, name, dest)
+        git_sparse_checkout(repo, name, dest, branch=branch)
     except subprocess.CalledProcessError:
         err = f"Could not clone '{name}' from repo '{repo_name}'"
         msg.fail(err, exits=1)
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 6be47fa39..0bc493e56 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -1,4 +1,5 @@
 from typing import Optional, Dict, Any, Tuple, Union, Callable, List
+from timeit import default_timer as timer
 import srsly
 import tqdm
 from pathlib import Path
@@ -15,7 +16,7 @@ from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
 from ._util import import_code, get_sourced_components
 from ..language import Language
 from .. import util
-from ..gold.example import Example
+from ..training.example import Example
 from ..errors import Errors
 
 
@@ -286,9 +287,12 @@ def train_while_improving(
         ]
         raw_batches = util.minibatch(raw_examples, size=8)
 
+    words_seen = 0
+    start_time = timer()
     for step, (epoch, batch) in enumerate(train_data):
         dropout = next(dropouts)
         for subbatch in subdivide_batch(batch, accumulate_gradient):
+
             nlp.update(
                 subbatch, drop=dropout, losses=losses, sgd=False, exclude=exclude
             )
@@ -317,6 +321,7 @@ def train_while_improving(
         else:
             score, other_scores = (None, None)
             is_best_checkpoint = None
+        words_seen += sum(len(eg) for eg in batch)
         info = {
             "epoch": epoch,
             "step": step,
@@ -324,6 +329,8 @@ def train_while_improving(
             "other_scores": other_scores,
             "losses": losses,
             "checkpoints": results,
+            "seconds": int(timer() - start_time),
+            "words": words_seen,
         }
         yield batch, info, is_best_checkpoint
         if is_best_checkpoint is not None:
diff --git a/spacy/default_config.cfg b/spacy/default_config.cfg
index 9507f0f0a..7cd71453f 100644
--- a/spacy/default_config.cfg
+++ b/spacy/default_config.cfg
@@ -52,7 +52,7 @@ path = ${paths.train}
 # data is passed in sentence-by-sentence via some prior preprocessing.
 gold_preproc = false
 # Limitations on training document length
-max_length = 2000
+max_length = 0
 # Limitation on number of training examples
 limit = 0
 
@@ -64,7 +64,7 @@ path = ${paths.dev}
 # data is passed in sentence-by-sentence via some prior preprocessing.
 gold_preproc = false
 # Limitations on training document length
-max_length = 2000
+max_length = 0
 # Limitation on number of training examples
 limit = 0
 
@@ -88,9 +88,4 @@ L2 = 0.01
 grad_clip = 1.0
 use_averages = false
 eps = 1e-8
-
-[training.optimizer.learn_rate]
-@schedules = "warmup_linear.v1"
-warmup_steps = 250
-total_steps = 20000
-initial_rate = 0.001
+learn_rate = 0.001
diff --git a/spacy/errors.py b/spacy/errors.py
index bad3e83e4..7164598b6 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -66,7 +66,7 @@ class Warnings:
             "in problems with the vocab further on in the pipeline.")
     W030 = ("Some entities could not be aligned in the text \"{text}\" with "
             "entities \"{entities}\". Use "
-            "`spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)`"
+            "`spacy.training.biluo_tags_from_offsets(nlp.make_doc(text), entities)`"
             " to check the alignment. Misaligned entities ('-') will be "
             "ignored during training.")
     W033 = ("Training a new {model} using a model with no lexeme normalization "
@@ -247,8 +247,8 @@ class Errors:
             "Query string: {string}\nOrth cached: {orth}\nOrth ID: {orth_id}")
     E065 = ("Only one of the vector table's width and shape can be specified. "
             "Got width {width} and shape {shape}.")
-    E067 = ("Invalid BILUO tag sequence: Got a tag starting with 'I' (inside "
-            "an entity) without a preceding 'B' (beginning of an entity). "
+    E067 = ("Invalid BILUO tag sequence: Got a tag starting with {start} "
+            "without a preceding 'B' (beginning of an entity). "
             "Tag sequence:\n{tags}")
     E068 = ("Invalid BILUO tag: '{tag}'.")
     E071 = ("Error creating lexeme: specified orth ID ({orth}) does not "
@@ -320,10 +320,6 @@ class Errors:
             "So instead of pickling the span, pickle the Doc it belongs to or "
             "use Span.as_doc to convert the span to a standalone Doc object.")
     E115 = ("All subtokens must have associated heads.")
-    E116 = ("Cannot currently add labels to pretrained text classifier. Add "
-            "labels before training begins. This functionality was available "
-            "in previous versions, but had significant bugs that led to poor "
-            "performance.")
     E117 = ("The newly split tokens must match the text of the original token. "
             "New orths: {new}. Old text: {old}.")
     E118 = ("The custom extension attribute '{attr}' is not registered on the "
@@ -378,8 +374,9 @@ class Errors:
             "should be of equal length.")
     E141 = ("Entity vectors should be of length {required} instead of the "
             "provided {found}.")
-    E143 = ("Labels for component '{name}' not initialized. Did you forget to "
-            "call add_label()?")
+    E143 = ("Labels for component '{name}' not initialized. This can be fixed "
+            "by calling add_label, or by providing a representative batch of "
+            "examples to the component's begin_training method.")
     E145 = ("Error reading `{param}` from input file.")
     E146 = ("Could not access `{path}`.")
     E147 = ("Unexpected error in the {method} functionality of the "
@@ -483,6 +480,16 @@ class Errors:
     E201 = ("Span index out of range.")
 
     # TODO: fix numbering after merging develop into master
+    E921 = ("The method 'set_output' can only be called on components that have "
+            "a Model with a 'resize_output' attribute. Otherwise, the output "
+            "layer can not be dynamically changed.")
+    E922 = ("Component '{name}' has been initialized with an output dimension of "
+            "{nO} - cannot add any more labels.")
+    E923 = ("It looks like there is no proper sample data to initialize the "
+            "Model of component '{name}'. "
+            "This is likely a bug in spaCy, so feel free to open an issue.")
+    E924 = ("The '{name}' component does not seem to be initialized properly. "
+            "This is likely a bug in spaCy, so feel free to open an issue.")
     E925 = ("Invalid color values for displaCy visualizer: expected dictionary "
             "mapping label names to colors but got: {obj}")
     E926 = ("It looks like you're trying to modify nlp.{attr} directly. This "
diff --git a/spacy/language.py b/spacy/language.py
index cd84e30a4..70dad59f3 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -17,7 +17,7 @@ from timeit import default_timer as timer
 from .tokens.underscore import Underscore
 from .vocab import Vocab, create_vocab
 from .pipe_analysis import validate_attrs, analyze_pipes, print_pipe_analysis
-from .gold import Example, validate_examples
+from .training import Example, validate_examples
 from .scorer import Scorer
 from .util import create_default_optimizer, registry, SimpleFrozenList
 from .util import SimpleFrozenDict, combine_score_weights, CONFIG_SECTION_ORDER
@@ -243,7 +243,8 @@ class Language:
         self._config["nlp"]["pipeline"] = list(self.component_names)
         self._config["nlp"]["disabled"] = list(self.disabled)
         self._config["components"] = pipeline
-        self._config["training"]["score_weights"] = combine_score_weights(score_weights)
+        if not self._config["training"].get("score_weights"):
+            self._config["training"]["score_weights"] = combine_score_weights(score_weights)
         if not srsly.is_json_serializable(self._config):
             raise ValueError(Errors.E961.format(config=self._config))
         return self._config
@@ -656,7 +657,7 @@ class Language:
         return resolved[factory_name]
 
     def create_pipe_from_source(
-        self, source_name: str, source: "Language", *, name: str,
+        self, source_name: str, source: "Language", *, name: str
     ) -> Tuple[Callable[[Doc], Doc], str]:
         """Create a pipeline component by copying it from an existing model.
 
@@ -1155,21 +1156,24 @@ class Language:
 
         DOCS: https://nightly.spacy.io/api/language#begin_training
         """
-        # TODO: throw warning when get_gold_tuples is provided instead of get_examples
         if get_examples is None:
-            get_examples = lambda: []
-        else:  # Populate vocab
-            if not hasattr(get_examples, "__call__"):
-                err = Errors.E930.format(name="Language", obj=type(get_examples))
+            util.logger.debug(
+                "No 'get_examples' callback provided to 'Language.begin_training', creating dummy examples"
+            )
+            doc = Doc(self.vocab, words=["x", "y", "z"])
+            get_examples = lambda: [Example.from_dict(doc, {})]
+        # Populate vocab
+        if not hasattr(get_examples, "__call__"):
+            err = Errors.E930.format(name="Language", obj=type(get_examples))
+            raise ValueError(err)
+        for example in get_examples():
+            if not isinstance(example, Example):
+                err = Errors.E978.format(
+                    name="Language.begin_training", types=type(example)
+                )
                 raise ValueError(err)
-            for example in get_examples():
-                if not isinstance(example, Example):
-                    err = Errors.E978.format(
-                        name="Language.begin_training", types=type(example)
-                    )
-                    raise ValueError(err)
-                for word in [t.text for t in example.reference]:
-                    _ = self.vocab[word]  # noqa: F841
+            for word in [t.text for t in example.reference]:
+                _ = self.vocab[word]  # noqa: F841
         if device >= 0:  # TODO: do we need this here?
             require_gpu(device)
             if self.vocab.vectors.data.shape[1] >= 1:
@@ -1187,7 +1191,7 @@ class Language:
         return self._optimizer
 
     def resume_training(
-        self, *, sgd: Optional[Optimizer] = None, device: int = -1,
+        self, *, sgd: Optional[Optimizer] = None, device: int = -1
     ) -> Optimizer:
         """Continue training a pretrained model.
 
diff --git a/spacy/ml/_biluo.py b/spacy/ml/_biluo.py
deleted file mode 100644
index 5a66a35bd..000000000
--- a/spacy/ml/_biluo.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""Thinc layer to do simpler transition-based parsing, NER, etc."""
-from typing import Dict, Optional
-import numpy
-from thinc.api import Model
-from thinc.types import Padded, Floats3d
-
-
-def BILUO() -> Model[Padded, Padded]:
-    return Model(
-        "biluo",
-        forward,
-        init=init,
-        dims={"nO": None},
-        attrs={"get_num_actions": get_num_actions},
-    )
-
-
-def init(model, X: Optional[Padded] = None, Y: Optional[Padded] = None):
-    if X is not None and Y is not None:
-        if X.data.shape != Y.data.shape:
-            # TODO: Fix error
-            raise ValueError("Mismatched shapes (TODO: Fix message)")
-        model.set_dim("nO", X.data.shape[2])
-    elif X is not None:
-        model.set_dim("nO", X.data.shape[2])
-    elif Y is not None:
-        model.set_dim("nO", Y.data.shape[2])
-    elif model.get_dim("nO") is None:
-        raise ValueError("Dimension unset for BILUO: nO")
-
-
-def forward(model: Model[Padded, Padded], Xp: Padded, is_train: bool):
-    n_labels = (model.get_dim("nO") - 1) // 4
-    n_tokens, n_docs, n_actions = Xp.data.shape
-    # At each timestep, we make a validity mask of shape (n_docs, n_actions)
-    # to indicate which actions are valid next for each sequence. To construct
-    # the mask, we have a state of shape (2, n_actions) and a validity table of
-    # shape (2, n_actions+1, n_actions). The first dimension of the state indicates
-    # whether it's the last token, the second dimension indicates the previous
-    # action, plus a special 'null action' for the first entry.
-    valid_transitions = model.ops.asarray(_get_transition_table(n_labels))
-    prev_actions = model.ops.alloc1i(n_docs)
-    # Initialize as though prev action was O
-    prev_actions.fill(n_actions - 1)
-    Y = model.ops.alloc3f(*Xp.data.shape)
-    masks = model.ops.alloc3f(*Y.shape)
-    max_value = Xp.data.max()
-    for t in range(Xp.data.shape[0]):
-        is_last = (Xp.lengths < (t + 2)).astype("i")
-        masks[t] = valid_transitions[is_last, prev_actions]
-        # Don't train the out-of-bounds sequences.
-        masks[t, Xp.size_at_t[t] :] = 0
-        # Valid actions get 0*10e8, invalid get large negative value
-        Y[t] = Xp.data[t] + ((masks[t] - 1) * max_value * 10)
-        prev_actions = Y[t].argmax(axis=-1)
-
-    def backprop_biluo(dY: Padded) -> Padded:
-        dY.data *= masks
-        return dY
-
-    return Padded(Y, Xp.size_at_t, Xp.lengths, Xp.indices), backprop_biluo
-
-
-def get_num_actions(n_labels: int) -> int:
-    # One BEGIN action per label
-    # One IN action per label
-    # One LAST action per label
-    # One UNIT action per label
-    # One OUT action
-    return n_labels + n_labels + n_labels + n_labels + 1
-
-
-def _get_transition_table(
-    n_labels: int, *, _cache: Dict[int, Floats3d] = {}
-) -> Floats3d:
-    n_actions = get_num_actions(n_labels)
-    if n_actions in _cache:
-        return _cache[n_actions]
-    table = numpy.zeros((2, n_actions, n_actions), dtype="f")
-    B_start, B_end = (0, n_labels)
-    I_start, I_end = (B_end, B_end + n_labels)
-    L_start, L_end = (I_end, I_end + n_labels)
-    U_start, _ = (L_end, L_end + n_labels)  # noqa: F841
-    # Using ranges allows us to set specific cells, which is necessary to express
-    # that only actions of the same label are valid continuations.
-    B_range = numpy.arange(B_start, B_end)
-    I_range = numpy.arange(I_start, I_end)
-    L_range = numpy.arange(L_start, L_end)
-    # If this is the last token and the previous action was B or I, only L
-    # of that label is valid
-    table[1, B_range, L_range] = 1
-    table[1, I_range, L_range] = 1
-    # If this isn't the last token and the previous action was B or I, only I or
-    # L of that label are valid.
-    table[0, B_range, I_range] = 1
-    table[0, B_range, L_range] = 1
-    table[0, I_range, I_range] = 1
-    table[0, I_range, L_range] = 1
-    # If this isn't the last token and the previous was L, U or O, B is valid
-    table[0, L_start:, :B_end] = 1
-    # Regardless of whether this is the last token, if the previous action was
-    # {L, U, O}, U and O are valid.
-    table[:, L_start:, U_start:] = 1
-    _cache[n_actions] = table
-    return table
diff --git a/spacy/ml/_iob.py b/spacy/ml/_iob.py
deleted file mode 100644
index 4dbc79f52..000000000
--- a/spacy/ml/_iob.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""Thinc layer to do simpler transition-based parsing, NER, etc."""
-from typing import Dict, Optional
-from thinc.api import Ops, Model
-from thinc.types import Padded, Floats3d
-
-
-def IOB() -> Model[Padded, Padded]:
-    return Model(
-        "biluo",
-        forward,
-        init=init,
-        dims={"nO": None},
-        attrs={"get_num_actions": get_num_actions},
-    )
-
-
-def init(model: Model, X: Optional[Padded] = None, Y: Optional[Padded] = None) -> None:
-    if X is not None and Y is not None:
-        if X.data.shape != Y.data.shape:
-            # TODO: Fix error
-            raise ValueError("Mismatched shapes (TODO: Fix message)")
-        model.set_dim("nO", X.data.shape[2])
-    elif X is not None:
-        model.set_dim("nO", X.data.shape[2])
-    elif Y is not None:
-        model.set_dim("nO", Y.data.shape[2])
-    elif model.get_dim("nO") is None:
-        raise ValueError("Dimension unset for BILUO: nO")
-
-
-def forward(model: Model[Padded, Padded], Xp: Padded, is_train: bool):
-    n_labels = (model.get_dim("nO") - 1) // 2
-    n_tokens, n_docs, n_actions = Xp.data.shape
-    # At each timestep, we make a validity mask of shape (n_docs, n_actions)
-    # to indicate which actions are valid next for each sequence. To construct
-    # the mask, we have a state of shape (2, n_actions) and a validity table of
-    # shape (2, n_actions+1, n_actions). The first dimension of the state indicates
-    # whether it's the last token, the second dimension indicates the previous
-    # action, plus a special 'null action' for the first entry.
-    valid_transitions = _get_transition_table(model.ops, n_labels)
-    prev_actions = model.ops.alloc1i(n_docs)
-    # Initialize as though prev action was O
-    prev_actions.fill(n_actions - 1)
-    Y = model.ops.alloc3f(*Xp.data.shape)
-    masks = model.ops.alloc3f(*Y.shape)
-    for t in range(Xp.data.shape[0]):
-        masks[t] = valid_transitions[prev_actions]
-        # Don't train the out-of-bounds sequences.
-        masks[t, Xp.size_at_t[t] :] = 0
-        # Valid actions get 0*10e8, invalid get -1*10e8
-        Y[t] = Xp.data[t] + ((masks[t] - 1) * 10e8)
-        prev_actions = Y[t].argmax(axis=-1)
-
-    def backprop_biluo(dY: Padded) -> Padded:
-        # Masking the gradient seems to do poorly here. But why?
-        # dY.data *= masks
-        return dY
-
-    return Padded(Y, Xp.size_at_t, Xp.lengths, Xp.indices), backprop_biluo
-
-
-def get_num_actions(n_labels: int) -> int:
-    # One BEGIN action per label
-    # One IN action per label
-    # One LAST action per label
-    # One UNIT action per label
-    # One OUT action
-    return n_labels * 2 + 1
-
-
-def _get_transition_table(
-    ops: Ops, n_labels: int, _cache: Dict[int, Floats3d] = {}
-) -> Floats3d:
-    n_actions = get_num_actions(n_labels)
-    if n_actions in _cache:
-        return ops.asarray(_cache[n_actions])
-    table = ops.alloc2f(n_actions, n_actions)
-    B_start, B_end = (0, n_labels)
-    I_start, I_end = (B_end, B_end + n_labels)
-    O_action = I_end
-    B_range = ops.xp.arange(B_start, B_end)
-    I_range = ops.xp.arange(I_start, I_end)
-    # B and O are always valid
-    table[:, B_start:B_end] = 1
-    table[:, O_action] = 1
-    # I can only follow a matching B
-    table[B_range, I_range] = 1
-
-    _cache[n_actions] = table
-    return table
diff --git a/spacy/ml/models/__init__.py b/spacy/ml/models/__init__.py
index dd58dab00..67e70421f 100644
--- a/spacy/ml/models/__init__.py
+++ b/spacy/ml/models/__init__.py
@@ -1,6 +1,5 @@
 from .entity_linker import *  # noqa
 from .parser import *  # noqa
-from .simple_ner import *  # noqa
 from .tagger import *  # noqa
 from .textcat import *  # noqa
 from .tok2vec import *  # noqa
diff --git a/spacy/ml/models/simple_ner.py b/spacy/ml/models/simple_ner.py
deleted file mode 100644
index aca58c937..000000000
--- a/spacy/ml/models/simple_ner.py
+++ /dev/null
@@ -1,104 +0,0 @@
-from typing import List
-from thinc.api import Model, Linear, with_array, softmax_activation, padded2list
-from thinc.api import chain, list2padded, configure_normal_init
-from thinc.api import Dropout
-from thinc.types import Floats2d
-
-from ...tokens import Doc
-from .._biluo import BILUO
-from .._iob import IOB
-from ...util import registry
-
-
-@registry.architectures.register("spacy.BILUOTagger.v1")
-def BiluoTagger(
-    tok2vec: Model[List[Doc], List[Floats2d]]
-) -> Model[List[Doc], List[Floats2d]]:
-    """Construct a simple NER tagger, that predicts BILUO tag scores for each
-    token and uses greedy decoding with transition-constraints to return a valid
-    BILUO tag sequence.
-
-    A BILUO tag sequence encodes a sequence of non-overlapping labelled spans
-    into tags assigned to each token. The first token of a span is given the
-    tag B-LABEL, the last token of the span is given the tag L-LABEL, and tokens
-    within the span are given the tag U-LABEL. Single-token spans are given
-    the tag U-LABEL. All other tokens are assigned the tag O.
-
-    The BILUO tag scheme generally results in better linear separation between
-    classes, especially for non-CRF models, because there are more distinct classes
-    for the different situations (Ratinov et al., 2009).
-    """
-    biluo = BILUO()
-    linear = Linear(
-        nO=None, nI=tok2vec.get_dim("nO"), init_W=configure_normal_init(mean=0.02)
-    )
-    model = chain(
-        tok2vec,
-        list2padded(),
-        with_array(chain(Dropout(0.1), linear)),
-        biluo,
-        with_array(softmax_activation()),
-        padded2list(),
-    )
-    return Model(
-        "biluo-tagger",
-        forward,
-        init=init,
-        layers=[model, linear],
-        refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
-        dims={"nO": None},
-        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
-    )
-
-
-@registry.architectures.register("spacy.IOBTagger.v1")
-def IOBTagger(
-    tok2vec: Model[List[Doc], List[Floats2d]]
-) -> Model[List[Doc], List[Floats2d]]:
-    """Construct a simple NER tagger, that predicts IOB tag scores for each
-    token and uses greedy decoding with transition-constraints to return a valid
-    IOB tag sequence.
-
-    An IOB tag sequence encodes a sequence of non-overlapping labelled spans
-    into tags assigned to each token. The first token of a span is given the
-    tag B-LABEL, and subsequent tokens are given the tag I-LABEL.
-    All other tokens are assigned the tag O.
-    """
-    biluo = IOB()
-    linear = Linear(nO=None, nI=tok2vec.get_dim("nO"))
-    model = chain(
-        tok2vec,
-        list2padded(),
-        with_array(linear),
-        biluo,
-        with_array(softmax_activation()),
-        padded2list(),
-    )
-    return Model(
-        "iob-tagger",
-        forward,
-        init=init,
-        layers=[model],
-        refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
-        dims={"nO": None},
-        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
-    )
-
-
-def init(model: Model[List[Doc], List[Floats2d]], X=None, Y=None) -> None:
-    if model.get_dim("nO") is None and Y:
-        model.set_dim("nO", Y[0].shape[1])
-    nO = model.get_dim("nO")
-    biluo = model.get_ref("biluo")
-    linear = model.get_ref("linear")
-    biluo.set_dim("nO", nO)
-    if linear.has_dim("nO") is None:
-        linear.set_dim("nO", nO)
-    model.layers[0].initialize(X=X, Y=Y)
-
-
-def forward(model: Model, X: List[Doc], is_train: bool):
-    return model.layers[0](X, is_train)
-
-
-__all__ = ["BiluoTagger"]
diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py
index faa5350d4..2e5f8a802 100644
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@@ -165,7 +165,7 @@ def MultiHashEmbed(
 
 @registry.architectures.register("spacy.CharacterEmbed.v1")
 def CharacterEmbed(width: int, rows: int, nM: int, nC: int):
-    """Construct an embedded representations based on character embeddings, using
+    """Construct an embedded representation based on character embeddings, using
     a feed-forward network. A fixed number of UTF-8 byte characters are used for
     each word, taken from the beginning and end of the word equally. Padding is
     used in the centre for words that are too short.
@@ -176,8 +176,8 @@ def CharacterEmbed(width: int, rows: int, nM: int, nC: int):
     ensures that the final character is always in the last position, instead
     of being in an arbitrary position depending on the word length.
 
-    The characters are embedded in a embedding table with 256 rows, and the
-    vectors concatenated. A hash-embedded vector of the NORM of the word is
+    The characters are embedded in a embedding table with a given number of rows,
+    and the vectors concatenated. A hash-embedded vector of the NORM of the word is
     also concatenated on, and the result is then passed through a feed-forward
     network to construct a single vector to represent the information.
 
diff --git a/spacy/pipeline/__init__.py b/spacy/pipeline/__init__.py
index 793aa83c3..656182088 100644
--- a/spacy/pipeline/__init__.py
+++ b/spacy/pipeline/__init__.py
@@ -8,7 +8,6 @@ from .morphologizer import Morphologizer
 from .pipe import Pipe
 from .senter import SentenceRecognizer
 from .sentencizer import Sentencizer
-from .simple_ner import SimpleNER
 from .tagger import Tagger
 from .textcat import TextCategorizer
 from .tok2vec import Tok2Vec
@@ -25,7 +24,6 @@ __all__ = [
     "Pipe",
     "SentenceRecognizer",
     "Sentencizer",
-    "SimpleNER",
     "Tagger",
     "TextCategorizer",
     "Tok2Vec",
diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx
index 7db8aae0f..bb0bf35b8 100644
--- a/spacy/pipeline/_parser_internals/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@@ -8,7 +8,7 @@ from ...typedefs cimport hash_t, attr_t
 from ...strings cimport hash_string
 from ...structs cimport TokenC
 from ...tokens.doc cimport Doc, set_children_from_heads
-from ...gold.example cimport Example
+from ...training.example cimport Example
 from ...errors import Errors
 from .stateclass cimport StateClass
 from ._state cimport StateC
diff --git a/spacy/pipeline/_parser_internals/ner.pyx b/spacy/pipeline/_parser_internals/ner.pyx
index 2570ccdee..0351bcaf7 100644
--- a/spacy/pipeline/_parser_internals/ner.pyx
+++ b/spacy/pipeline/_parser_internals/ner.pyx
@@ -5,7 +5,7 @@ from cymem.cymem cimport Pool
 from ...typedefs cimport weight_t, attr_t
 from ...lexeme cimport Lexeme
 from ...attrs cimport IS_SPACE
-from ...gold.example cimport Example
+from ...training.example cimport Example
 from ...errors import Errors
 from .stateclass cimport StateClass
 from ._state cimport StateC
diff --git a/spacy/pipeline/_parser_internals/transition_system.pxd b/spacy/pipeline/_parser_internals/transition_system.pxd
index ba4c33814..458f1d5f9 100644
--- a/spacy/pipeline/_parser_internals/transition_system.pxd
+++ b/spacy/pipeline/_parser_internals/transition_system.pxd
@@ -3,7 +3,7 @@ from cymem.cymem cimport Pool
 from ...typedefs cimport attr_t, weight_t
 from ...structs cimport TokenC
 from ...strings cimport StringStore
-from ...gold.example cimport Example
+from ...training.example cimport Example
 from .stateclass cimport StateClass
 from ._state cimport StateC
 
diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py
index 406112681..f64fcbc54 100644
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@@ -4,7 +4,7 @@ from pathlib import Path
 
 from .pipe import Pipe
 from ..errors import Errors
-from ..gold import validate_examples
+from ..training import validate_examples
 from ..language import Language
 from ..matcher import Matcher
 from ..scorer import Scorer
diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx
index eee4ed535..edd791e40 100644
--- a/spacy/pipeline/dep_parser.pyx
+++ b/spacy/pipeline/dep_parser.pyx
@@ -9,7 +9,7 @@ from .functions import merge_subtokens
 from ..language import Language
 from ._parser_internals import nonproj
 from ..scorer import Scorer
-from ..gold import validate_examples
+from ..training import validate_examples
 
 
 default_model_config = """
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index d4f1e6b56..1debadd82 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -1,3 +1,4 @@
+from itertools import islice
 from typing import Optional, Iterable, Callable, Dict, Iterator, Union, List, Tuple
 from pathlib import Path
 import srsly
@@ -11,7 +12,7 @@ from ..tokens import Doc
 from .pipe import Pipe, deserialize_config
 from ..language import Language
 from ..vocab import Vocab
-from ..gold import Example, validate_examples
+from ..training import Example, validate_examples
 from ..errors import Errors, Warnings
 from ..util import SimpleFrozenList
 from .. import util
@@ -128,7 +129,7 @@ class EntityLinker(Pipe):
         # how many neightbour sentences to take into account
         self.n_sents = cfg.get("n_sents", 0)
 
-    def require_kb(self) -> None:
+    def _require_kb(self) -> None:
         # Raise an error if the knowledge base is not initialized.
         if len(self.kb) == 0:
             raise ValueError(Errors.E139.format(name=self.name))
@@ -140,10 +141,11 @@ class EntityLinker(Pipe):
         pipeline: Optional[List[Tuple[str, Callable[[Doc], Doc]]]] = None,
         sgd: Optional[Optimizer] = None,
     ) -> Optimizer:
-        """Initialize the pipe for training, using data examples if available.
+        """Initialize the pipe for training, using a representative set
+        of data examples.
 
-        get_examples (Callable[[], Iterable[Example]]): Optional function that
-            returns gold-standard Example objects.
+        get_examples (Callable[[], Iterable[Example]]): Function that
+            returns a representative sample of gold-standard Example objects.
         pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
             components that this component is part of. Corresponds to
             nlp.pipeline.
@@ -153,10 +155,19 @@ class EntityLinker(Pipe):
 
         DOCS: https://nightly.spacy.io/api/entitylinker#begin_training
         """
-        self.require_kb()
+        self._ensure_examples(get_examples)
+        self._require_kb()
         nO = self.kb.entity_vector_length
-        self.set_output(nO)
-        self.model.initialize()
+        doc_sample = []
+        vector_sample = []
+        for example in islice(get_examples(), 10):
+            doc_sample.append(example.x)
+            vector_sample.append(self.model.ops.alloc1f(nO))
+        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
+        assert len(vector_sample) > 0, Errors.E923.format(name=self.name)
+        self.model.initialize(
+            X=doc_sample, Y=self.model.ops.asarray(vector_sample, dtype="float32")
+        )
         if sgd is None:
             sgd = self.create_optimizer()
         return sgd
@@ -184,7 +195,7 @@ class EntityLinker(Pipe):
 
         DOCS: https://nightly.spacy.io/api/entitylinker#update
         """
-        self.require_kb()
+        self._require_kb()
         if losses is None:
             losses = {}
         losses.setdefault(self.name, 0.0)
@@ -296,7 +307,7 @@ class EntityLinker(Pipe):
 
         DOCS: https://nightly.spacy.io/api/entitylinker#predict
         """
-        self.require_kb()
+        self._require_kb()
         entity_count = 0
         final_kb_ids = []
         if not docs:
@@ -405,7 +416,7 @@ class EntityLinker(Pipe):
                     token.ent_kb_id_ = kb_id
 
     def to_disk(
-        self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList(),
+        self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
     ) -> None:
         """Serialize the pipe to disk.
 
@@ -422,7 +433,7 @@ class EntityLinker(Pipe):
         util.to_disk(path, serialize, exclude)
 
     def from_disk(
-        self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList(),
+        self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
     ) -> "EntityLinker":
         """Load the pipe from disk. Modifies the object in place and returns it.
 
diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py
index 4f4ff230e..24bbb067f 100644
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@@ -9,7 +9,7 @@ from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList
 from ..tokens import Doc, Span
 from ..matcher import Matcher, PhraseMatcher
 from ..scorer import Scorer
-from ..gold import validate_examples
+from ..training import validate_examples
 
 
 DEFAULT_ENT_ID_SEP = "||"
diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py
index 3f3e387b7..0fd3482c4 100644
--- a/spacy/pipeline/lemmatizer.py
+++ b/spacy/pipeline/lemmatizer.py
@@ -8,7 +8,7 @@ from ..lookups import Lookups, load_lookups
 from ..scorer import Scorer
 from ..tokens import Doc, Token
 from ..vocab import Vocab
-from ..gold import validate_examples
+from ..training import validate_examples
 from .. import util
 
 
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index bcb555b90..57bdb28d7 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -2,6 +2,7 @@
 from typing import Optional
 import srsly
 from thinc.api import SequenceCategoricalCrossentropy, Model, Config
+from itertools import islice
 
 from ..tokens.doc cimport Doc
 from ..vocab cimport Vocab
@@ -15,7 +16,7 @@ from .pipe import deserialize_config
 from .tagger import Tagger
 from .. import util
 from ..scorer import Scorer
-from ..gold import validate_examples
+from ..training import validate_examples
 
 
 default_model_config = """
@@ -112,6 +113,7 @@ class Morphologizer(Tagger):
             raise ValueError(Errors.E187)
         if label in self.labels:
             return 0
+        self._allow_extra_label()
         # normalize label
         norm_label = self.vocab.morphology.normalize_features(label)
         # extract separate POS and morph tags
@@ -128,10 +130,11 @@ class Morphologizer(Tagger):
         return 1
 
     def begin_training(self, get_examples, *, pipeline=None, sgd=None):
-        """Initialize the pipe for training, using data examples if available.
+        """Initialize the pipe for training, using a representative set
+        of data examples.
 
-        get_examples (Callable[[], Iterable[Example]]): Optional function that
-            returns gold-standard Example objects.
+        get_examples (Callable[[], Iterable[Example]]): Function that
+            returns a representative sample of gold-standard Example objects.
         pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
             components that this component is part of. Corresponds to
             nlp.pipeline.
@@ -141,9 +144,8 @@ class Morphologizer(Tagger):
 
         DOCS: https://nightly.spacy.io/api/morphologizer#begin_training
         """
-        if not hasattr(get_examples, "__call__"):
-            err = Errors.E930.format(name="Morphologizer", obj=type(get_examples))
-            raise ValueError(err)
+        self._ensure_examples(get_examples)
+        # First, fetch all labels from the data
         for example in get_examples():
             for i, token in enumerate(example.reference):
                 pos = token.pos_
@@ -157,8 +159,25 @@ class Morphologizer(Tagger):
                 if norm_label not in self.cfg["labels_morph"]:
                     self.cfg["labels_morph"][norm_label] = morph
                     self.cfg["labels_pos"][norm_label] = POS_IDS[pos]
-        self.set_output(len(self.labels))
-        self.model.initialize()
+        if len(self.labels) <= 1:
+            raise ValueError(Errors.E143.format(name=self.name))
+        doc_sample = []
+        label_sample = []
+        for example in islice(get_examples(), 10):
+            gold_array = []
+            for i, token in enumerate(example.reference):
+                pos = token.pos_
+                morph = token.morph_
+                morph_dict = Morphology.feats_to_dict(morph)
+                if pos:
+                    morph_dict[self.POS_FEAT] = pos
+                norm_label = self.vocab.strings[self.vocab.morphology.add(morph_dict)]
+                gold_array.append([1.0 if label == norm_label else 0.0 for label in self.labels])
+            doc_sample.append(example.x)
+            label_sample.append(self.model.ops.asarray(gold_array, dtype="float32"))
+        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
+        assert len(label_sample) > 0, Errors.E923.format(name=self.name)
+        self.model.initialize(X=doc_sample, Y=label_sample)
         if sgd is None:
             sgd = self.create_optimizer()
         return sgd
diff --git a/spacy/pipeline/multitask.pyx b/spacy/pipeline/multitask.pyx
index 3ef85c821..2f8940124 100644
--- a/spacy/pipeline/multitask.pyx
+++ b/spacy/pipeline/multitask.pyx
@@ -8,7 +8,7 @@ from ..tokens.doc cimport Doc
 
 from .pipe import Pipe
 from .tagger import Tagger
-from ..gold import validate_examples
+from ..training import validate_examples
 from ..language import Language
 from ._parser_internals import nonproj
 from ..attrs import POS, ID
@@ -90,7 +90,7 @@ class MultitaskObjective(Tagger):
                 label = self.make_label(token)
                 if label is not None and label not in self.labels:
                     self.labels[label] = len(self.labels)
-        self.model.initialize()
+        self.model.initialize()   # TODO: fix initialization by defining X and Y
         if sgd is None:
             sgd = self.create_optimizer()
         return sgd
@@ -178,7 +178,7 @@ class ClozeMultitask(Pipe):
         pass
 
     def begin_training(self, get_examples, pipeline=None, sgd=None):
-        self.model.initialize()
+        self.model.initialize()  # TODO: fix initialization by defining X and Y
         X = self.model.ops.alloc((5, self.model.get_ref("tok2vec").get_dim("nO")))
         self.model.output_layer.begin_training(X)
         if sgd is None:
diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx
index d9f33ccb4..2fa5c6392 100644
--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@@ -7,7 +7,7 @@ from ._parser_internals.ner cimport BiluoPushDown
 
 from ..language import Language
 from ..scorer import Scorer
-from ..gold import validate_examples
+from ..training import validate_examples
 
 
 default_model_config = """
diff --git a/spacy/pipeline/pipe.pyx b/spacy/pipeline/pipe.pyx
index 2518ebad3..324c8e19c 100644
--- a/spacy/pipeline/pipe.pyx
+++ b/spacy/pipeline/pipe.pyx
@@ -4,7 +4,7 @@ from thinc.api import set_dropout_rate, Model
 
 from ..tokens.doc cimport Doc
 
-from ..gold import validate_examples
+from ..training import validate_examples
 from ..errors import Errors
 from .. import util
 
@@ -160,6 +160,20 @@ cdef class Pipe:
         """
         raise NotImplementedError(Errors.E931.format(method="add_label", name=self.name))
 
+
+    def _require_labels(self) -> None:
+        """Raise an error if the component's model has no labels defined."""
+        if not self.labels or list(self.labels) == [""]:
+            raise ValueError(Errors.E143.format(name=self.name))
+
+
+    def _allow_extra_label(self) -> None:
+        """Raise an error if the component can not add any more labels."""
+        if self.model.has_dim("nO") and self.model.get_dim("nO") == len(self.labels):
+            if not self.is_resizable():
+                raise ValueError(Errors.E922.format(name=self.name, nO=self.model.get_dim("nO")))
+
+
     def create_optimizer(self):
         """Create an optimizer for the pipeline component.
 
@@ -171,9 +185,12 @@ cdef class Pipe:
 
     def begin_training(self, get_examples, *, pipeline=None, sgd=None):
         """Initialize the pipe for training, using data examples if available.
+        This method needs to be implemented by each Pipe component,
+        ensuring the internal model (if available) is initialized properly
+        using the provided sample of Example objects.
 
-        get_examples (Callable[[], Iterable[Example]]): Optional function that
-            returns gold-standard Example objects.
+        get_examples (Callable[[], Iterable[Example]]): Function that
+            returns a representative sample of gold-standard Example objects.
         pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
             components that this component is part of. Corresponds to
             nlp.pipeline.
@@ -183,16 +200,24 @@ cdef class Pipe:
 
         DOCS: https://nightly.spacy.io/api/pipe#begin_training
         """
-        self.model.initialize()
-        if sgd is None:
-            sgd = self.create_optimizer()
-        return sgd
+        raise NotImplementedError(Errors.E931.format(method="add_label", name=self.name))
+
+    def _ensure_examples(self, get_examples):
+        if get_examples is None or not hasattr(get_examples, "__call__"):
+            err = Errors.E930.format(name=self.name, obj=type(get_examples))
+            raise ValueError(err)
+        if not get_examples():
+            err = Errors.E930.format(name=self.name, obj=get_examples())
+            raise ValueError(err)
+
+    def is_resizable(self):
+        return hasattr(self, "model") and "resize_output" in self.model.attrs
 
     def set_output(self, nO):
-        if self.model.has_dim("nO") is not False:
-            self.model.set_dim("nO", nO)
-        if self.model.has_ref("output_layer"):
-            self.model.get_ref("output_layer").set_dim("nO", nO)
+        if self.is_resizable():
+            self.model.attrs["resize_output"](self.model, nO)
+        else:
+            raise NotImplementedError(Errors.E921)
 
     def use_params(self, params):
         """Modify the pipe's model, to use the given parameter values. At the
diff --git a/spacy/pipeline/sentencizer.pyx b/spacy/pipeline/sentencizer.pyx
index aaf08d594..5700c2b98 100644
--- a/spacy/pipeline/sentencizer.pyx
+++ b/spacy/pipeline/sentencizer.pyx
@@ -7,7 +7,7 @@ from ..tokens.doc cimport Doc
 from .pipe import Pipe
 from ..language import Language
 from ..scorer import Scorer
-from ..gold import validate_examples
+from ..training import validate_examples
 from .. import util
 
 
diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx
index b78be44f8..00664131b 100644
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@@ -1,4 +1,6 @@
 # cython: infer_types=True, profile=True, binding=True
+from itertools import islice
+
 import srsly
 from thinc.api import Model, SequenceCategoricalCrossentropy, Config
 
@@ -9,7 +11,7 @@ from .tagger import Tagger
 from ..language import Language
 from ..errors import Errors
 from ..scorer import Scorer
-from ..gold import validate_examples
+from ..training import validate_examples
 from .. import util
 
 
@@ -124,10 +126,11 @@ class SentenceRecognizer(Tagger):
         return float(loss), d_scores
 
     def begin_training(self, get_examples, *, pipeline=None, sgd=None):
-        """Initialize the pipe for training, using data examples if available.
+        """Initialize the pipe for training, using a representative set
+        of data examples.
 
-        get_examples (Callable[[], Iterable[Example]]): Optional function that
-            returns gold-standard Example objects.
+        get_examples (Callable[[], Iterable[Example]]): Function that
+            returns a representative sample of gold-standard Example objects.
         pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
             components that this component is part of. Corresponds to
             nlp.pipeline.
@@ -137,8 +140,18 @@ class SentenceRecognizer(Tagger):
 
         DOCS: https://nightly.spacy.io/api/sentencerecognizer#begin_training
         """
-        self.set_output(len(self.labels))
-        self.model.initialize()
+        self._ensure_examples(get_examples)
+        doc_sample = []
+        label_sample = []
+        assert self.labels, Errors.E924.format(name=self.name)
+        for example in islice(get_examples(), 10):
+            doc_sample.append(example.x)
+            gold_tags = example.get_aligned("SENT_START")
+            gold_array = [[1.0 if tag == gold_tag else 0.0 for tag in self.labels] for gold_tag in gold_tags]
+            label_sample.append(self.model.ops.asarray(gold_array, dtype="float32"))
+        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
+        assert len(label_sample) > 0, Errors.E923.format(name=self.name)
+        self.model.initialize(X=doc_sample, Y=label_sample)
         if sgd is None:
             sgd = self.create_optimizer()
         return sgd
diff --git a/spacy/pipeline/simple_ner.py b/spacy/pipeline/simple_ner.py
deleted file mode 100644
index c55edb067..000000000
--- a/spacy/pipeline/simple_ner.py
+++ /dev/null
@@ -1,211 +0,0 @@
-from typing import List, Iterable, Optional, Dict, Tuple, Callable, Set
-from thinc.types import Floats2d
-from thinc.api import SequenceCategoricalCrossentropy, set_dropout_rate, Model
-from thinc.api import Optimizer, Config
-from thinc.util import to_numpy
-
-from ..errors import Errors
-from ..gold import Example, spans_from_biluo_tags, iob_to_biluo, biluo_to_iob
-from ..gold import validate_examples
-from ..tokens import Doc
-from ..language import Language
-from ..vocab import Vocab
-from ..scorer import Scorer
-from .pipe import Pipe
-
-
-default_model_config = """
-[model]
-@architectures = "spacy.BILUOTagger.v1"
-
-[model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
-pretrained_vectors = null
-width = 128
-depth = 4
-embed_size = 7000
-window_size = 1
-maxout_pieces = 3
-subword_features = true
-"""
-DEFAULT_SIMPLE_NER_MODEL = Config().from_str(default_model_config)["model"]
-
-
-@Language.factory(
-    "simple_ner",
-    assigns=["doc.ents"],
-    default_config={"labels": [], "model": DEFAULT_SIMPLE_NER_MODEL},
-    scores=["ents_p", "ents_r", "ents_f", "ents_per_type"],
-    default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0},
-)
-def make_simple_ner(
-    nlp: Language, name: str, model: Model, labels: Iterable[str]
-) -> "SimpleNER":
-    return SimpleNER(nlp.vocab, model, name, labels=labels)
-
-
-class SimpleNER(Pipe):
-    """Named entity recognition with a tagging model. The model should include
-    validity constraints to ensure that only valid tag sequences are returned."""
-
-    def __init__(
-        self,
-        vocab: Vocab,
-        model: Model,
-        name: str = "simple_ner",
-        *,
-        labels: Iterable[str],
-    ) -> None:
-        self.vocab = vocab
-        self.model = model
-        self.name = name
-        self.cfg = {"labels": []}
-        for label in labels:
-            self.add_label(label)
-        self.loss_func = SequenceCategoricalCrossentropy(
-            names=self.get_tag_names(), normalize=True, missing_value=None
-        )
-        assert self.model is not None
-
-    @property
-    def is_biluo(self) -> bool:
-        return self.model.name.startswith("biluo")
-
-    @property
-    def labels(self) -> Tuple[str]:
-        return tuple(self.cfg["labels"])
-
-    def add_label(self, label: str) -> None:
-        """Add a new label to the pipe.
-        label (str): The label to add.
-        DOCS: https://nightly.spacy.io/api/simplener#add_label
-        """
-        if not isinstance(label, str):
-            raise ValueError(Errors.E187)
-        if label not in self.labels:
-            self.cfg["labels"].append(label)
-            self.vocab.strings.add(label)
-
-    def get_tag_names(self) -> List[str]:
-        if self.is_biluo:
-            return (
-                [f"B-{label}" for label in self.labels]
-                + [f"I-{label}" for label in self.labels]
-                + [f"L-{label}" for label in self.labels]
-                + [f"U-{label}" for label in self.labels]
-                + ["O"]
-            )
-        else:
-            return (
-                [f"B-{label}" for label in self.labels]
-                + [f"I-{label}" for label in self.labels]
-                + ["O"]
-            )
-
-    def predict(self, docs: List[Doc]) -> List[Floats2d]:
-        scores = self.model.predict(docs)
-        return scores
-
-    def set_annotations(self, docs: List[Doc], scores: List[Floats2d]) -> None:
-        """Set entities on a batch of documents from a batch of scores."""
-        tag_names = self.get_tag_names()
-        for i, doc in enumerate(docs):
-            actions = to_numpy(scores[i].argmax(axis=1))
-            tags = [tag_names[actions[j]] for j in range(len(doc))]
-            if not self.is_biluo:
-                tags = iob_to_biluo(tags)
-            doc.ents = spans_from_biluo_tags(doc, tags)
-
-    def update(
-        self,
-        examples: List[Example],
-        *,
-        set_annotations: bool = False,
-        drop: float = 0.0,
-        sgd: Optional[Optimizer] = None,
-        losses: Optional[Dict[str, float]] = None,
-    ) -> Dict[str, float]:
-        if losses is None:
-            losses = {}
-        losses.setdefault("ner", 0.0)
-        validate_examples(examples, "SimpleNER.update")
-        if not any(_has_ner(eg) for eg in examples):
-            return losses
-        docs = [eg.predicted for eg in examples]
-        set_dropout_rate(self.model, drop)
-        scores, bp_scores = self.model.begin_update(docs)
-        loss, d_scores = self.get_loss(examples, scores)
-        bp_scores(d_scores)
-        if set_annotations:
-            self.set_annotations(docs, scores)
-        if sgd is not None:
-            self.model.finish_update(sgd)
-        losses["ner"] += loss
-        return losses
-
-    def get_loss(self, examples: List[Example], scores) -> Tuple[List[Floats2d], float]:
-        validate_examples(examples, "SimpleNER.get_loss")
-        truths = []
-        for eg in examples:
-            tags = eg.get_aligned_ner()
-            gold_tags = [(tag if tag != "-" else None) for tag in tags]
-            if not self.is_biluo:
-                gold_tags = biluo_to_iob(gold_tags)
-            truths.append(gold_tags)
-        for i in range(len(scores)):
-            if len(scores[i]) != len(truths[i]):
-                raise ValueError(
-                    f"Mismatched output and gold sizes.\n"
-                    f"Output: {len(scores[i])}, gold: {len(truths[i])}."
-                    f"Input: {len(examples[i].doc)}"
-                )
-        d_scores, loss = self.loss_func(scores, truths)
-        return loss, d_scores
-
-    def begin_training(
-        self,
-        get_examples: Callable[[], Iterable[Example]],
-        pipeline: Optional[List[Tuple[str, Callable[[Doc], Doc]]]] = None,
-        sgd: Optional[Optimizer] = None,
-    ):
-        all_labels = set()
-        if not hasattr(get_examples, "__call__"):
-            err = Errors.E930.format(name="SimpleNER", obj=type(get_examples))
-            raise ValueError(err)
-        for example in get_examples():
-            all_labels.update(_get_labels(example))
-        for label in sorted(all_labels):
-            self.add_label(label)
-        labels = self.labels
-        n_actions = self.model.attrs["get_num_actions"](len(labels))
-        self.model.set_dim("nO", n_actions)
-        self.model.initialize()
-        if pipeline is not None:
-            self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
-        self.loss_func = SequenceCategoricalCrossentropy(
-            names=self.get_tag_names(), normalize=True, missing_value=None
-        )
-        return sgd
-
-    def init_multitask_objectives(self, *args, **kwargs):
-        pass
-
-    def score(self, examples, **kwargs):
-        validate_examples(examples, "SimpleNER.score")
-        return Scorer.score_spans(examples, "ents", **kwargs)
-
-
-def _has_ner(example: Example) -> bool:
-    for ner_tag in example.get_aligned_ner():
-        if ner_tag != "-" and ner_tag is not None:
-            return True
-    else:
-        return False
-
-
-def _get_labels(example: Example) -> Set[str]:
-    labels = set()
-    for ner_tag in example.get_aligned("ENT_TYPE", as_string=True):
-        if ner_tag != "O" and ner_tag != "-":
-            labels.add(ner_tag)
-    return labels
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index 2b760c878..1f8b4eb7a 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -5,6 +5,7 @@ import srsly
 from thinc.api import Model, set_dropout_rate, SequenceCategoricalCrossentropy, Config
 from thinc.types import Floats2d
 import warnings
+from itertools import islice
 
 from ..tokens.doc cimport Doc
 from ..morphology cimport Morphology
@@ -16,7 +17,7 @@ from ..attrs import POS, ID
 from ..parts_of_speech import X
 from ..errors import Errors, TempErrors, Warnings
 from ..scorer import Scorer
-from ..gold import validate_examples
+from ..training import validate_examples
 from .. import util
 
 
@@ -258,10 +259,11 @@ class Tagger(Pipe):
         return float(loss), d_scores
 
     def begin_training(self, get_examples, *, pipeline=None, sgd=None):
-        """Initialize the pipe for training, using data examples if available.
+        """Initialize the pipe for training, using a representative set
+        of data examples.
 
-        get_examples (Callable[[], Iterable[Example]]): Optional function that
-            returns gold-standard Example objects.
+        get_examples (Callable[[], Iterable[Example]]): Function that
+            returns a representative sample of gold-standard Example objects..
         pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
             components that this component is part of. Corresponds to
             nlp.pipeline.
@@ -271,32 +273,24 @@ class Tagger(Pipe):
 
         DOCS: https://nightly.spacy.io/api/tagger#begin_training
         """
-        if not hasattr(get_examples, "__call__"):
-            err = Errors.E930.format(name="Tagger", obj=type(get_examples))
-            raise ValueError(err)
-        tags = set()
+        self._ensure_examples(get_examples)
         doc_sample = []
+        label_sample = []
+        tags = set()
         for example in get_examples():
             for token in example.y:
-                tags.add(token.tag_)
-            if len(doc_sample) < 10:
-                doc_sample.append(example.x)
-        if not doc_sample:
-            doc_sample.append(Doc(self.vocab, words=["hello"]))
+                if token.tag_:
+                    tags.add(token.tag_)
         for tag in sorted(tags):
             self.add_label(tag)
-        if len(self.labels) == 0:
-            err = Errors.E1006.format(name="Tagger")
-            raise ValueError(err)
-        self.set_output(len(self.labels))
-        if doc_sample:
-            label_sample = [
-                self.model.ops.alloc2f(len(doc), len(self.labels))
-                for doc in doc_sample
-            ]
-            self.model.initialize(X=doc_sample, Y=label_sample)
-        else:
-            self.model.initialize()
+        for example in islice(get_examples(), 10):
+            doc_sample.append(example.x)
+            gold_tags = example.get_aligned("TAG", as_string=True)
+            gold_array = [[1.0 if tag == gold_tag else 0.0 for tag in self.labels] for gold_tag in gold_tags]
+            label_sample.append(self.model.ops.asarray(gold_array, dtype="float32"))
+        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
+        assert len(label_sample) > 0, Errors.E923.format(name=self.name)
+        self.model.initialize(X=doc_sample, Y=label_sample)
         if sgd is None:
             sgd = self.create_optimizer()
         return sgd
@@ -313,6 +307,7 @@ class Tagger(Pipe):
             raise ValueError(Errors.E187)
         if label in self.labels:
             return 0
+        self._allow_extra_label()
         self.cfg["labels"].append(label)
         self.vocab.strings.add(label)
         return 1
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index d6efb4348..4be6f580d 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -1,3 +1,4 @@
+from itertools import islice
 from typing import Iterable, Tuple, Optional, Dict, List, Callable, Iterator, Any
 from thinc.api import get_array_module, Model, Optimizer, set_dropout_rate, Config
 from thinc.types import Floats2d
@@ -5,7 +6,7 @@ import numpy
 
 from .pipe import Pipe
 from ..language import Language
-from ..gold import Example, validate_examples
+from ..training import Example, validate_examples
 from ..errors import Errors
 from ..scorer import Scorer
 from .. import util
@@ -128,11 +129,6 @@ class TextCategorizer(Pipe):
         """
         return tuple(self.cfg.setdefault("labels", []))
 
-    def require_labels(self) -> None:
-        """Raise an error if the component's model has no labels defined."""
-        if not self.labels:
-            raise ValueError(Errors.E143.format(name=self.name))
-
     @labels.setter
     def labels(self, value: Iterable[str]) -> None:
         self.cfg["labels"] = tuple(value)
@@ -311,17 +307,7 @@ class TextCategorizer(Pipe):
             raise ValueError(Errors.E187)
         if label in self.labels:
             return 0
-        if self.model.has_dim("nO"):
-            # This functionality was available previously, but was broken.
-            # The problem is that we resize the last layer, but the last layer
-            # is actually just an ensemble. We're not resizing the child layers
-            # - a huge problem.
-            raise ValueError(Errors.E116)
-            # smaller = self.model._layers[-1]
-            # larger = Linear(len(self.labels)+1, smaller.nI)
-            # copy_array(larger.W[:smaller.nO], smaller.W)
-            # copy_array(larger.b[:smaller.nO], smaller.b)
-            # self.model._layers[-1] = larger
+        self._allow_extra_label()
         self.labels = tuple(list(self.labels) + [label])
         return 1
 
@@ -332,10 +318,11 @@ class TextCategorizer(Pipe):
         pipeline: Optional[List[Tuple[str, Callable[[Doc], Doc]]]] = None,
         sgd: Optional[Optimizer] = None,
     ) -> Optimizer:
-        """Initialize the pipe for training, using data examples if available.
+        """Initialize the pipe for training, using a representative set
+        of data examples.
 
-        get_examples (Callable[[], Iterable[Example]]): Optional function that
-            returns gold-standard Example objects.
+        get_examples (Callable[[], Iterable[Example]]): Function that
+            returns a representative sample of gold-standard Example objects.
         pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
             components that this component is part of. Corresponds to
             nlp.pipeline.
@@ -345,22 +332,19 @@ class TextCategorizer(Pipe):
 
         DOCS: https://nightly.spacy.io/api/textcategorizer#begin_training
         """
-        if not hasattr(get_examples, "__call__"):
-            err = Errors.E930.format(name="TextCategorizer", obj=type(get_examples))
-            raise ValueError(err)
+        self._ensure_examples(get_examples)
         subbatch = []  # Select a subbatch of examples to initialize the model
-        for example in get_examples():
+        for example in islice(get_examples(), 10):
             if len(subbatch) < 2:
                 subbatch.append(example)
             for cat in example.y.cats:
                 self.add_label(cat)
-        self.require_labels()
-        docs = [eg.reference for eg in subbatch]
-        if not docs:  # need at least one doc
-            docs = [Doc(self.vocab, words=["hello"])]
-        truths, _ = self._examples_to_truth(subbatch)
-        self.set_output(len(self.labels))
-        self.model.initialize(X=docs, Y=truths)
+        doc_sample = [eg.reference for eg in subbatch]
+        label_sample, _ = self._examples_to_truth(subbatch)
+        self._require_labels()
+        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
+        assert len(label_sample) > 0, Errors.E923.format(name=self.name)
+        self.model.initialize(X=doc_sample, Y=label_sample)
         if sgd is None:
             sgd = self.create_optimizer()
         return sgd
diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py
index 5657d687d..721c67a19 100644
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@@ -1,8 +1,9 @@
 from typing import Iterator, Sequence, Iterable, Optional, Dict, Callable, List, Tuple
 from thinc.api import Model, set_dropout_rate, Optimizer, Config
+from itertools import islice
 
 from .pipe import Pipe
-from ..gold import Example, validate_examples
+from ..training import Example, validate_examples
 from ..tokens import Doc
 from ..vocab import Vocab
 from ..language import Language
@@ -209,10 +210,11 @@ class Tok2Vec(Pipe):
         pipeline: Optional[List[Tuple[str, Callable[[Doc], Doc]]]] = None,
         sgd: Optional[Optimizer] = None,
     ):
-        """Initialize the pipe for training, using data examples if available.
+        """Initialize the pipe for training, using a representative set
+        of data examples.
 
-        get_examples (Callable[[], Iterable[Example]]): Optional function that
-            returns gold-standard Example objects.
+        get_examples (Callable[[], Iterable[Example]]): Function that
+            returns a representative sample of gold-standard Example objects.
         pipeline (List[Tuple[str, Callable]]): Optional list of pipeline
             components that this component is part of. Corresponds to
             nlp.pipeline.
@@ -222,8 +224,12 @@ class Tok2Vec(Pipe):
 
         DOCS: https://nightly.spacy.io/api/tok2vec#begin_training
         """
-        docs = [Doc(self.vocab, words=["hello"])]
-        self.model.initialize(X=docs)
+        self._ensure_examples(get_examples)
+        doc_sample = []
+        for example in islice(get_examples(), 10):
+            doc_sample.append(example.x)
+        assert doc_sample, Errors.E923.format(name=self.name)
+        self.model.initialize(X=doc_sample)
 
     def add_label(self, label):
         raise NotImplementedError
diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx
index 5a6b491e0..1350e1f12 100644
--- a/spacy/pipeline/transition_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@@ -21,7 +21,7 @@ from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
 from ..ml.parser_model cimport get_c_weights, get_c_sizes
 from ..tokens.doc cimport Doc
 
-from ..gold import validate_examples
+from ..training import validate_examples
 from ..errors import Errors, Warnings
 from .. import util
 
@@ -244,7 +244,7 @@ cdef class Parser(Pipe):
             int nr_class, int batch_size) nogil:
         # n_moves should not be zero at this point, but make sure to avoid zero-length mem alloc
         with gil:
-            assert self.moves.n_moves > 0
+            assert self.moves.n_moves > 0, Errors.E924.format(name=self.name)
         is_valid = <int*>calloc(self.moves.n_moves, sizeof(int))
         cdef int i, guess
         cdef Transition action
@@ -378,7 +378,7 @@ cdef class Parser(Pipe):
         cdef int i
 
         # n_moves should not be zero at this point, but make sure to avoid zero-length mem alloc
-        assert self.moves.n_moves > 0
+        assert self.moves.n_moves > 0, Errors.E924.format(name=self.name)
 
         is_valid = <int*>mem.alloc(self.moves.n_moves, sizeof(int))
         costs = <float*>mem.alloc(self.moves.n_moves, sizeof(float))
@@ -406,9 +406,7 @@ cdef class Parser(Pipe):
         self.model.attrs["resize_output"](self.model, nO)
 
     def begin_training(self, get_examples, pipeline=None, sgd=None, **kwargs):
-        if not hasattr(get_examples, "__call__"):
-            err = Errors.E930.format(name="DependencyParser/EntityRecognizer", obj=type(get_examples))
-            raise ValueError(err)
+        self._ensure_examples(get_examples)
         self.cfg.update(kwargs)
         lexeme_norms = self.vocab.lookups.get_table("lexeme_norm", {})
         if len(lexeme_norms) == 0 and self.vocab.lang in util.LEXEME_NORM_LANGS:
@@ -430,9 +428,6 @@ cdef class Parser(Pipe):
         if sgd is None:
             sgd = self.create_optimizer()
         doc_sample = []
-        for example in islice(get_examples(), 10):
-            doc_sample.append(example.predicted)
-
         if pipeline is not None:
             for name, component in pipeline:
                 if component is self:
@@ -441,10 +436,11 @@ cdef class Parser(Pipe):
                     doc_sample = list(component.pipe(doc_sample, batch_size=8))
                 else:
                     doc_sample = [component(doc) for doc in doc_sample]
-        if doc_sample:
-            self.model.initialize(doc_sample)
-        else:
-            self.model.initialize()
+        if not doc_sample:
+            for example in islice(get_examples(), 10):
+                doc_sample.append(example.predicted)
+        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
+        self.model.initialize(doc_sample)
         if pipeline is not None:
             self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
         return sgd
diff --git a/spacy/schemas.py b/spacy/schemas.py
index 59af53301..38f47c668 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -12,7 +12,7 @@ from .attrs import NAMES
 if TYPE_CHECKING:
     # This lets us add type hints for mypy etc. without causing circular imports
     from .language import Language  # noqa: F401
-    from .gold import Example  # noqa: F401
+    from .training import Example  # noqa: F401
 
 
 ItemT = TypeVar("ItemT")
@@ -180,7 +180,7 @@ class ModelMetaSchema(BaseModel):
     url: StrictStr = Field("", title="Model author URL")
     sources: Optional[Union[List[StrictStr], List[Dict[str, str]]]] = Field(None, title="Training data sources")
     vectors: Dict[str, Any] = Field({}, title="Included word vectors")
-    labels: Dict[str, Dict[str, List[str]]] = Field({}, title="Component labels, keyed by component name")
+    labels: Dict[str, List[str]] = Field({}, title="Component labels, keyed by component name")
     accuracy: Dict[str, Union[float, Dict[str, float]]] = Field({}, title="Accuracy numbers")
     speed: Dict[str, Union[float, int]] = Field({}, title="Speed evaluation numbers")
     spacy_git_version: StrictStr = Field("", title="Commit of spaCy version used")
diff --git a/spacy/scorer.py b/spacy/scorer.py
index 9b1831a91..7f7418237 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -1,7 +1,7 @@
 from typing import Optional, Iterable, Dict, Any, Callable, TYPE_CHECKING
 import numpy as np
 
-from .gold import Example
+from .training import Example
 from .tokens import Token, Doc, Span
 from .errors import Errors
 from .util import get_lang_class, SimpleFrozenList
diff --git a/spacy/tests/doc/test_add_entities.py b/spacy/tests/doc/test_add_entities.py
index d6e345336..751bd36d4 100644
--- a/spacy/tests/doc/test_add_entities.py
+++ b/spacy/tests/doc/test_add_entities.py
@@ -1,5 +1,6 @@
+from spacy.training import Example
 from spacy.pipeline import EntityRecognizer
-from spacy.tokens import Span
+from spacy.tokens import Span, Doc
 from spacy import registry
 import pytest
 
@@ -7,6 +8,12 @@ from ..util import get_doc
 from spacy.pipeline.ner import DEFAULT_NER_MODEL
 
 
+def _ner_example(ner):
+    doc = Doc(ner.vocab, words=["Joe", "loves", "visiting", "London", "during", "the", "weekend"])
+    gold = {"entities": [(0, 3, "PERSON"), (19, 25, "LOC")]}
+    return Example.from_dict(doc, gold)
+
+
 def test_doc_add_entities_set_ents_iob(en_vocab):
     text = ["This", "is", "a", "lion"]
     doc = get_doc(en_vocab, text)
@@ -18,10 +25,8 @@ def test_doc_add_entities_set_ents_iob(en_vocab):
     cfg = {"model": DEFAULT_NER_MODEL}
     model = registry.make_from_config(cfg, validate=True)["model"]
     ner = EntityRecognizer(en_vocab, model, **config)
-    ner.begin_training(lambda: [])
+    ner.begin_training(lambda: [_ner_example(ner)])
     ner(doc)
-    assert len(list(doc.ents)) == 0
-    assert [w.ent_iob_ for w in doc] == (["O"] * len(doc))
 
     doc.ents = [(doc.vocab.strings["ANIMAL"], 3, 4)]
     assert [w.ent_iob_ for w in doc] == ["O", "O", "O", "B"]
@@ -31,6 +36,7 @@ def test_doc_add_entities_set_ents_iob(en_vocab):
 
 
 def test_ents_reset(en_vocab):
+    """Ensure that resetting doc.ents does not change anything"""
     text = ["This", "is", "a", "lion"]
     doc = get_doc(en_vocab, text)
     config = {
@@ -41,11 +47,11 @@ def test_ents_reset(en_vocab):
     cfg = {"model": DEFAULT_NER_MODEL}
     model = registry.make_from_config(cfg, validate=True)["model"]
     ner = EntityRecognizer(en_vocab, model, **config)
-    ner.begin_training(lambda: [])
+    ner.begin_training(lambda: [_ner_example(ner)])
     ner(doc)
-    assert [t.ent_iob_ for t in doc] == (["O"] * len(doc))
+    orig_iobs = [t.ent_iob_ for t in doc]
     doc.ents = list(doc.ents)
-    assert [t.ent_iob_ for t in doc] == (["O"] * len(doc))
+    assert [t.ent_iob_ for t in doc] == orig_iobs
 
 
 def test_add_overlapping_entities(en_vocab):
diff --git a/spacy/tests/parser/test_add_label.py b/spacy/tests/parser/test_add_label.py
index fce5f679f..0da42daa2 100644
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@@ -3,7 +3,7 @@ from thinc.api import Adam, fix_random_seed
 from spacy import registry
 from spacy.attrs import NORM
 from spacy.vocab import Vocab
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.tokens import Doc
 from spacy.pipeline import DependencyParser, EntityRecognizer
 from spacy.pipeline.ner import DEFAULT_NER_MODEL
@@ -35,7 +35,7 @@ def test_init_parser(parser):
 def _train_parser(parser):
     fix_random_seed(1)
     parser.add_label("left")
-    parser.begin_training(lambda: [], **parser.cfg)
+    parser.begin_training(lambda: [_parser_example(parser)], **parser.cfg)
     sgd = Adam(0.001)
 
     for i in range(5):
@@ -47,16 +47,25 @@ def _train_parser(parser):
     return parser
 
 
+def _parser_example(parser):
+    doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
+    gold = {"heads": [1, 1, 3, 3], "deps": ["right", "ROOT", "left", "ROOT"]}
+    return Example.from_dict(doc, gold)
+
+
+def _ner_example(ner):
+    doc = Doc(ner.vocab, words=["Joe", "loves", "visiting", "London", "during", "the", "weekend"])
+    gold = {"entities": [(0, 3, "PERSON"), (19, 25, "LOC")]}
+    return Example.from_dict(doc, gold)
+
+
 def test_add_label(parser):
     parser = _train_parser(parser)
     parser.add_label("right")
     sgd = Adam(0.001)
     for i in range(100):
         losses = {}
-        doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
-        gold = {"heads": [1, 1, 3, 3], "deps": ["right", "ROOT", "left", "ROOT"]}
-        example = Example.from_dict(doc, gold)
-        parser.update([example], sgd=sgd, losses=losses)
+        parser.update([_parser_example(parser)], sgd=sgd, losses=losses)
     doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
     doc = parser(doc)
     assert doc[0].dep_ == "right"
@@ -75,7 +84,7 @@ def test_add_label_deserializes_correctly():
     ner1.add_label("C")
     ner1.add_label("B")
     ner1.add_label("A")
-    ner1.begin_training(lambda: [])
+    ner1.begin_training(lambda: [_ner_example(ner1)])
     ner2 = EntityRecognizer(Vocab(), model, **config)
 
     # the second model needs to be resized before we can call from_bytes
diff --git a/spacy/tests/parser/test_arc_eager_oracle.py b/spacy/tests/parser/test_arc_eager_oracle.py
index fd1880030..826fc1d87 100644
--- a/spacy/tests/parser/test_arc_eager_oracle.py
+++ b/spacy/tests/parser/test_arc_eager_oracle.py
@@ -1,7 +1,7 @@
 import pytest
 from spacy.vocab import Vocab
 from spacy import registry
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.pipeline import DependencyParser
 from spacy.tokens import Doc
 from spacy.pipeline._parser_internals.nonproj import projectivize
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index c7a1ed0d2..548cd2697 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -4,7 +4,7 @@ from spacy.lang.en import English
 from spacy.language import Language
 from spacy.lookups import Lookups
 from spacy.pipeline._parser_internals.ner import BiluoPushDown
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.tokens import Doc
 from spacy.vocab import Vocab
 import logging
diff --git a/spacy/tests/parser/test_neural_parser.py b/spacy/tests/parser/test_neural_parser.py
index 6594c7e78..0747241d8 100644
--- a/spacy/tests/parser/test_neural_parser.py
+++ b/spacy/tests/parser/test_neural_parser.py
@@ -1,7 +1,7 @@
 import pytest
 
 from spacy import registry
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.vocab import Vocab
 from spacy.pipeline._parser_internals.arc_eager import ArcEager
 from spacy.pipeline.transition_parser import Parser
diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py
index 8265a8a45..8d45e2132 100644
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@@ -3,7 +3,7 @@ import pytest
 from spacy.lang.en import English
 from ..util import get_doc, apply_transition_sequence, make_tempdir
 from ... import util
-from ...gold import Example
+from ...training import Example
 
 TRAIN_DATA = [
     (
@@ -85,7 +85,7 @@ def test_parser_merge_pp(en_tokenizer):
     pos = ["DET", "NOUN", "ADP", "DET", "NOUN", "VERB"]
     tokens = en_tokenizer(text)
     doc = get_doc(
-        tokens.vocab, words=[t.text for t in tokens], deps=deps, heads=heads, pos=pos,
+        tokens.vocab, words=[t.text for t in tokens], deps=deps, heads=heads, pos=pos
     )
     with doc.retokenize() as retokenizer:
         for np in doc.noun_chunks:
diff --git a/spacy/tests/parser/test_preset_sbd.py b/spacy/tests/parser/test_preset_sbd.py
index 594498b0b..1de05be1b 100644
--- a/spacy/tests/parser/test_preset_sbd.py
+++ b/spacy/tests/parser/test_preset_sbd.py
@@ -3,7 +3,7 @@ from thinc.api import Adam
 from spacy.attrs import NORM
 from spacy.vocab import Vocab
 from spacy import registry
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
 from spacy.tokens import Doc
 from spacy.pipeline import DependencyParser
@@ -14,6 +14,12 @@ def vocab():
     return Vocab(lex_attr_getters={NORM: lambda s: s})
 
 
+def _parser_example(parser):
+    doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
+    gold = {"heads": [1, 1, 3, 3], "deps": ["right", "ROOT", "left", "ROOT"]}
+    return Example.from_dict(doc, gold)
+
+
 @pytest.fixture
 def parser(vocab):
     config = {
@@ -28,7 +34,7 @@ def parser(vocab):
     parser.cfg["hidden_width"] = 32
     # parser.add_label('right')
     parser.add_label("left")
-    parser.begin_training(lambda: [], **parser.cfg)
+    parser.begin_training(lambda: [_parser_example(parser)], **parser.cfg)
     sgd = Adam(0.001)
 
     for i in range(10):
diff --git a/spacy/tests/pipeline/test_attributeruler.py b/spacy/tests/pipeline/test_attributeruler.py
index c12a2b650..9254688cc 100644
--- a/spacy/tests/pipeline/test_attributeruler.py
+++ b/spacy/tests/pipeline/test_attributeruler.py
@@ -1,6 +1,6 @@
 import pytest
 import numpy
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.lang.en import English
 from spacy.pipeline import AttributeRuler
 from spacy import util, registry
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 4eaa71272..c43d2c58e 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -4,7 +4,7 @@ import pytest
 from spacy.kb import KnowledgeBase, get_candidates, Candidate
 
 from spacy import util, registry
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.lang.en import English
 from spacy.tests.util import make_tempdir
 from spacy.tokens import Span
@@ -281,11 +281,12 @@ def test_append_invalid_alias(nlp):
 
 def test_preserving_links_asdoc(nlp):
     """Test that Span.as_doc preserves the existing entity links"""
+    vector_length = 1
 
     @registry.misc.register("myLocationsKB.v1")
     def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]:
         def create_kb(vocab):
-            mykb = KnowledgeBase(vocab, entity_vector_length=1)
+            mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
             # adding entities
             mykb.add_entity(entity="Q1", freq=19, entity_vector=[1])
             mykb.add_entity(entity="Q2", freq=8, entity_vector=[1])
@@ -305,10 +306,9 @@ def test_preserving_links_asdoc(nlp):
     ruler = nlp.add_pipe("entity_ruler")
     ruler.add_patterns(patterns)
     el_config = {"kb_loader": {"@misc": "myLocationsKB.v1"}, "incl_prior": False}
-    el_pipe = nlp.add_pipe("entity_linker", config=el_config, last=True)
-    el_pipe.begin_training(lambda: [])
-    el_pipe.incl_context = False
-    el_pipe.incl_prior = True
+    entity_linker = nlp.add_pipe("entity_linker", config=el_config, last=True)
+    nlp.begin_training()
+    assert entity_linker.model.get_dim("nO") == vector_length
 
     # test whether the entity links are preserved by the `as_doc()` function
     text = "She lives in Boston. He lives in Denver."
@@ -373,6 +373,7 @@ def test_overfitting_IO():
     # Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
     nlp = English()
     nlp.add_pipe("sentencizer")
+    vector_length = 3
 
     # Add a custom component to recognize "Russ Cochran" as an entity for the example training data
     patterns = [
@@ -393,7 +394,7 @@ def test_overfitting_IO():
             # create artificial KB - assign same prior weight to the two russ cochran's
             # Q2146908 (Russ Cochran): American golfer
             # Q7381115 (Russ Cochran): publisher
-            mykb = KnowledgeBase(vocab, entity_vector_length=3)
+            mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
             mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
             mykb.add_entity(entity="Q7381115", freq=12, entity_vector=[9, 1, -7])
             mykb.add_alias(
@@ -406,14 +407,17 @@ def test_overfitting_IO():
         return create_kb
 
     # Create the Entity Linker component and add it to the pipeline
-    nlp.add_pipe(
+    entity_linker = nlp.add_pipe(
         "entity_linker",
         config={"kb_loader": {"@misc": "myOverfittingKB.v1"}},
         last=True,
     )
 
     # train the NEL pipe
-    optimizer = nlp.begin_training()
+    optimizer = nlp.begin_training(get_examples=lambda: train_examples)
+    assert entity_linker.model.get_dim("nO") == vector_length
+    assert entity_linker.model.get_dim("nO") == entity_linker.kb.entity_vector_length
+
     for i in range(50):
         losses = {}
         nlp.update(train_examples, sgd=optimizer, losses=losses)
diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py
index 501c00f84..864c7332e 100644
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@@ -1,7 +1,7 @@
 import pytest
 
 from spacy import util
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.lang.en import English
 from spacy.language import Language
 from spacy.tests.util import make_tempdir
@@ -25,27 +25,61 @@ TRAIN_DATA = [
         },
     ),
     # test combinations of morph+POS
-    ("Eat blue ham", {"morphs": ["Feat=V", "", ""], "pos": ["", "ADJ", ""]},),
+    ("Eat blue ham", {"morphs": ["Feat=V", "", ""], "pos": ["", "ADJ", ""]}),
 ]
 
 
+def test_no_label():
+    nlp = Language()
+    nlp.add_pipe("morphologizer")
+    with pytest.raises(ValueError):
+        nlp.begin_training()
+
+
+def test_implicit_label():
+    nlp = Language()
+    nlp.add_pipe("morphologizer")
+    train_examples = []
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+    nlp.begin_training(get_examples=lambda: train_examples)
+
+
+def test_no_resize():
+    nlp = Language()
+    morphologizer = nlp.add_pipe("morphologizer")
+    morphologizer.add_label("POS" + Morphology.FIELD_SEP + "NOUN")
+    morphologizer.add_label("POS" + Morphology.FIELD_SEP + "VERB")
+    nlp.begin_training()
+    # this throws an error because the morphologizer can't be resized after initialization
+    with pytest.raises(ValueError):
+        morphologizer.add_label("POS" + Morphology.FIELD_SEP + "ADJ")
+
+
+def test_begin_training_examples():
+    nlp = Language()
+    morphologizer = nlp.add_pipe("morphologizer")
+    morphologizer.add_label("POS" + Morphology.FIELD_SEP + "NOUN")
+    train_examples = []
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+    # you shouldn't really call this more than once, but for testing it should be fine
+    nlp.begin_training()
+    nlp.begin_training(get_examples=lambda: train_examples)
+    with pytest.raises(TypeError):
+        nlp.begin_training(get_examples=lambda: None)
+    with pytest.raises(ValueError):
+        nlp.begin_training(get_examples=train_examples)
+
+
 def test_overfitting_IO():
     # Simple test to try and quickly overfit the morphologizer - ensuring the ML models work correctly
     nlp = English()
-    morphologizer = nlp.add_pipe("morphologizer")
+    nlp.add_pipe("morphologizer")
     train_examples = []
     for inst in TRAIN_DATA:
         train_examples.append(Example.from_dict(nlp.make_doc(inst[0]), inst[1]))
-        for morph, pos in zip(inst[1]["morphs"], inst[1]["pos"]):
-            if morph and pos:
-                morphologizer.add_label(
-                    morph + Morphology.FEATURE_SEP + "POS" + Morphology.FIELD_SEP + pos
-                )
-            elif pos:
-                morphologizer.add_label("POS" + Morphology.FIELD_SEP + pos)
-            elif morph:
-                morphologizer.add_label(morph)
-    optimizer = nlp.begin_training()
+    optimizer = nlp.begin_training(get_examples=lambda: train_examples)
 
     for i in range(50):
         losses = {}
@@ -55,18 +89,8 @@ def test_overfitting_IO():
     # test the trained model
     test_text = "I like blue ham"
     doc = nlp(test_text)
-    gold_morphs = [
-        "Feat=N",
-        "Feat=V",
-        "",
-        "",
-    ]
-    gold_pos_tags = [
-        "NOUN",
-        "VERB",
-        "ADJ",
-        "",
-    ]
+    gold_morphs = ["Feat=N", "Feat=V", "", ""]
+    gold_pos_tags = ["NOUN", "VERB", "ADJ", ""]
     assert [t.morph_ for t in doc] == gold_morphs
     assert [t.pos_ for t in doc] == gold_pos_tags
 
diff --git a/spacy/tests/pipeline/test_senter.py b/spacy/tests/pipeline/test_senter.py
index b64fa8581..1752df5d0 100644
--- a/spacy/tests/pipeline/test_senter.py
+++ b/spacy/tests/pipeline/test_senter.py
@@ -1,7 +1,7 @@
 import pytest
 
 from spacy import util
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.lang.en import English
 from spacy.language import Language
 from spacy.tests.util import make_tempdir
@@ -30,6 +30,20 @@ TRAIN_DATA = [
     ),
 ]
 
+def test_begin_training_examples():
+    nlp = Language()
+    senter = nlp.add_pipe("senter")
+    train_examples = []
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+    # you shouldn't really call this more than once, but for testing it should be fine
+    nlp.begin_training()
+    nlp.begin_training(get_examples=lambda: train_examples)
+    with pytest.raises(TypeError):
+        nlp.begin_training(get_examples=lambda: None)
+    with pytest.raises(ValueError):
+        nlp.begin_training(get_examples=train_examples)
+
 
 def test_overfitting_IO():
     # Simple test to try and quickly overfit the senter - ensuring the ML models work correctly
diff --git a/spacy/tests/pipeline/test_simple_ner.py b/spacy/tests/pipeline/test_simple_ner.py
deleted file mode 100644
index b012a2cd6..000000000
--- a/spacy/tests/pipeline/test_simple_ner.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from spacy.lang.en import English
-from spacy.gold import Example
-from spacy import util
-from ..util import make_tempdir
-
-
-TRAIN_DATA = [
-    ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
-    ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
-]
-
-
-def test_overfitting_IO():
-    # Simple test to try and quickly overfit the SimpleNER component - ensuring the ML models work correctly
-    nlp = English()
-    ner = nlp.add_pipe("simple_ner")
-    train_examples = []
-    for text, annotations in TRAIN_DATA:
-        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
-        for ent in annotations.get("entities"):
-            ner.add_label(ent[2])
-    optimizer = nlp.begin_training()
-
-    for i in range(50):
-        losses = {}
-        nlp.update(train_examples, sgd=optimizer, losses=losses)
-    assert losses["ner"] < 0.0001
-
-    # test the trained model
-    test_text = "I like London."
-    doc = nlp(test_text)
-    ents = doc.ents
-    assert len(ents) == 1
-    assert ents[0].text == "London"
-    assert ents[0].label_ == "LOC"
-
-    # Also test the results are still the same after IO
-    with make_tempdir() as tmp_dir:
-        nlp.to_disk(tmp_dir)
-        nlp2 = util.load_model_from_path(tmp_dir)
-        doc2 = nlp2(test_text)
-        ents2 = doc2.ents
-        assert len(ents2) == 1
-        assert ents2[0].text == "London"
-        assert ents2[0].label_ == "LOC"
diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py
index 540301eac..cd5927675 100644
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@@ -1,6 +1,6 @@
 import pytest
 from spacy import util
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.lang.en import English
 from spacy.language import Language
 
@@ -34,6 +34,56 @@ TRAIN_DATA = [
 ]
 
 
+def test_no_label():
+    nlp = Language()
+    nlp.add_pipe("tagger")
+    with pytest.raises(ValueError):
+        nlp.begin_training()
+
+
+def test_no_resize():
+    nlp = Language()
+    tagger = nlp.add_pipe("tagger")
+    tagger.add_label("N")
+    tagger.add_label("V")
+    assert tagger.labels == ("N", "V")
+    nlp.begin_training()
+    assert tagger.model.get_dim("nO") == 2
+    # this throws an error because the tagger can't be resized after initialization
+    with pytest.raises(ValueError):
+        tagger.add_label("J")
+
+
+def test_implicit_label():
+    nlp = Language()
+    nlp.add_pipe("tagger")
+    train_examples = []
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+    nlp.begin_training(get_examples=lambda: train_examples)
+
+
+def test_begin_training_examples():
+    nlp = Language()
+    tagger = nlp.add_pipe("tagger")
+    train_examples = []
+    for tag in TAGS:
+        tagger.add_label(tag)
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+    # you shouldn't really call this more than once, but for testing it should be fine
+    nlp.begin_training()
+    nlp.begin_training(get_examples=lambda: train_examples)
+    with pytest.raises(TypeError):
+        nlp.begin_training(get_examples=lambda: None)
+    with pytest.raises(TypeError):
+        nlp.begin_training(get_examples=lambda: train_examples[0])
+    with pytest.raises(ValueError):
+        nlp.begin_training(get_examples=lambda: [])
+    with pytest.raises(ValueError):
+        nlp.begin_training(get_examples=train_examples)
+
+
 def test_overfitting_IO():
     # Simple test to try and quickly overfit the tagger - ensuring the ML models work correctly
     nlp = English()
@@ -41,9 +91,8 @@ def test_overfitting_IO():
     train_examples = []
     for t in TRAIN_DATA:
         train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
-    for tag in TAGS:
-        tagger.add_label(tag)
-    optimizer = nlp.begin_training()
+    optimizer = nlp.begin_training(get_examples=lambda: train_examples)
+    assert tagger.model.get_dim("nO") == len(TAGS)
 
     for i in range(50):
         losses = {}
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 12ead90cb..3f9506bb1 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -10,7 +10,7 @@ from spacy.tokens import Doc
 from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
 
 from ..util import make_tempdir
-from ...gold import Example
+from ...training import Example
 
 
 TRAIN_DATA = [
@@ -80,6 +80,51 @@ def test_label_types():
         textcat.add_label(9)
 
 
+def test_no_label():
+    nlp = Language()
+    nlp.add_pipe("textcat")
+    with pytest.raises(ValueError):
+        nlp.begin_training()
+
+
+def test_implicit_label():
+    nlp = Language()
+    textcat = nlp.add_pipe("textcat")
+    train_examples = []
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+    nlp.begin_training(get_examples=lambda: train_examples)
+
+
+def test_no_resize():
+    nlp = Language()
+    textcat = nlp.add_pipe("textcat")
+    textcat.add_label("POSITIVE")
+    textcat.add_label("NEGATIVE")
+    nlp.begin_training()
+    assert textcat.model.get_dim("nO") == 2
+    # this throws an error because the textcat can't be resized after initialization
+    with pytest.raises(ValueError):
+        textcat.add_label("NEUTRAL")
+
+
+def test_begin_training_examples():
+    nlp = Language()
+    textcat = nlp.add_pipe("textcat")
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+        for label, value in annotations.get("cats").items():
+            textcat.add_label(label)
+    # you shouldn't really call this more than once, but for testing it should be fine
+    nlp.begin_training()
+    nlp.begin_training(get_examples=lambda: train_examples)
+    with pytest.raises(TypeError):
+        nlp.begin_training(get_examples=lambda: None)
+    with pytest.raises(ValueError):
+        nlp.begin_training(get_examples=train_examples)
+
+
 def test_overfitting_IO():
     # Simple test to try and quickly overfit the textcat component - ensuring the ML models work correctly
     fix_random_seed(0)
@@ -89,9 +134,8 @@ def test_overfitting_IO():
     train_examples = []
     for text, annotations in TRAIN_DATA:
         train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
-        for label, value in annotations.get("cats").items():
-            textcat.add_label(label)
-    optimizer = nlp.begin_training()
+    optimizer = nlp.begin_training(get_examples=lambda: train_examples)
+    assert textcat.model.get_dim("nO") == 2
 
     for i in range(50):
         losses = {}
diff --git a/spacy/tests/regression/test_issue1-1000.py b/spacy/tests/regression/test_issue1-1000.py
index 5c93ea3c8..ed5bcc1a5 100644
--- a/spacy/tests/regression/test_issue1-1000.py
+++ b/spacy/tests/regression/test_issue1-1000.py
@@ -1,7 +1,7 @@
 import pytest
 import random
 from spacy import util
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.matcher import Matcher
 from spacy.attrs import IS_PUNCT, ORTH, LOWER
 from spacy.vocab import Vocab
diff --git a/spacy/tests/regression/test_issue1501-2000.py b/spacy/tests/regression/test_issue1501-2000.py
index 4988575ea..c1d726db6 100644
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@@ -3,7 +3,7 @@ import gc
 import numpy
 import copy
 
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.lang.en import English
 from spacy.lang.en.stop_words import STOP_WORDS
 from spacy.lang.lex_attrs import is_stop
diff --git a/spacy/tests/regression/test_issue2001-2500.py b/spacy/tests/regression/test_issue2001-2500.py
index 259ca9b0c..357fbb84e 100644
--- a/spacy/tests/regression/test_issue2001-2500.py
+++ b/spacy/tests/regression/test_issue2001-2500.py
@@ -3,7 +3,7 @@ import numpy
 from spacy.tokens import Doc
 from spacy.matcher import Matcher
 from spacy.displacy import render
-from spacy.gold import iob_to_biluo
+from spacy.training import iob_to_biluo
 from spacy.lang.it import Italian
 from spacy.lang.en import English
 
diff --git a/spacy/tests/regression/test_issue2501-3000.py b/spacy/tests/regression/test_issue2501-3000.py
index 3882df0a6..beb8faca1 100644
--- a/spacy/tests/regression/test_issue2501-3000.py
+++ b/spacy/tests/regression/test_issue2501-3000.py
@@ -1,6 +1,6 @@
 import pytest
 from spacy import displacy
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.lang.en import English
 from spacy.lang.ja import Japanese
 from spacy.lang.xx import MultiLanguage
@@ -20,7 +20,7 @@ def test_issue2564():
     nlp = Language()
     tagger = nlp.add_pipe("tagger")
     tagger.add_label("A")
-    tagger.begin_training(lambda: [])
+    nlp.begin_training()
     doc = nlp("hello world")
     assert doc.is_tagged
     docs = nlp.pipe(["hello", "world"])
diff --git a/spacy/tests/regression/test_issue3501-4000.py b/spacy/tests/regression/test_issue3501-4000.py
index fc2a3ed7c..d36e693c7 100644
--- a/spacy/tests/regression/test_issue3501-4000.py
+++ b/spacy/tests/regression/test_issue3501-4000.py
@@ -9,7 +9,7 @@ from spacy.tokens import Doc, Token
 from spacy.matcher import Matcher, PhraseMatcher
 from spacy.errors import MatchPatternError
 from spacy.util import minibatch
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.lang.hi import Hindi
 from spacy.lang.es import Spanish
 from spacy.lang.en import English
@@ -251,6 +251,12 @@ def test_issue3803():
     assert [t.like_num for t in doc] == [True, True, True, True, True, True]
 
 
+def _parser_example(parser):
+    doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
+    gold = {"heads": [1, 1, 3, 3], "deps": ["right", "ROOT", "left", "ROOT"]}
+    return Example.from_dict(doc, gold)
+
+
 def test_issue3830_no_subtok():
     """Test that the parser doesn't have subtok label if not learn_tokens"""
     config = {
@@ -264,7 +270,7 @@ def test_issue3830_no_subtok():
     parser = DependencyParser(Vocab(), model, **config)
     parser.add_label("nsubj")
     assert "subtok" not in parser.labels
-    parser.begin_training(lambda: [])
+    parser.begin_training(lambda: [_parser_example(parser)])
     assert "subtok" not in parser.labels
 
 
@@ -281,7 +287,7 @@ def test_issue3830_with_subtok():
     parser = DependencyParser(Vocab(), model, **config)
     parser.add_label("nsubj")
     assert "subtok" not in parser.labels
-    parser.begin_training(lambda: [])
+    parser.begin_training(lambda: [_parser_example(parser)])
     assert "subtok" in parser.labels
 
 
diff --git a/spacy/tests/regression/test_issue4001-4500.py b/spacy/tests/regression/test_issue4001-4500.py
index e846841d4..2beccedcf 100644
--- a/spacy/tests/regression/test_issue4001-4500.py
+++ b/spacy/tests/regression/test_issue4001-4500.py
@@ -2,8 +2,8 @@ import pytest
 from spacy.pipeline import Pipe
 from spacy.matcher import PhraseMatcher, Matcher
 from spacy.tokens import Doc, Span, DocBin
-from spacy.gold import Example, Corpus
-from spacy.gold.converters import json2docs
+from spacy.training import Example, Corpus
+from spacy.training.converters import json2docs
 from spacy.vocab import Vocab
 from spacy.lang.en import English
 from spacy.util import minibatch, ensure_path, load_model
diff --git a/spacy/tests/regression/test_issue4501-5000.py b/spacy/tests/regression/test_issue4501-5000.py
index d83a2c718..9454d7f0c 100644
--- a/spacy/tests/regression/test_issue4501-5000.py
+++ b/spacy/tests/regression/test_issue4501-5000.py
@@ -1,9 +1,7 @@
 import pytest
-from mock import Mock
-from spacy.matcher import DependencyMatcher
 from spacy.tokens import Doc, Span, DocBin
-from spacy.gold import Example
-from spacy.gold.converters.conllu2docs import conllu2docs
+from spacy.training import Example
+from spacy.training.converters.conllu2docs import conllu2docs
 from spacy.lang.en import English
 from spacy.kb import KnowledgeBase
 from spacy.vocab import Vocab
@@ -12,7 +10,7 @@ from spacy.util import ensure_path, load_model_from_path
 import numpy
 import pickle
 
-from ..util import get_doc, make_tempdir
+from ..util import make_tempdir
 
 
 def test_issue4528(en_vocab):
diff --git a/spacy/tests/regression/test_issue5230.py b/spacy/tests/regression/test_issue5230.py
index af643aadc..531e48ec3 100644
--- a/spacy/tests/regression/test_issue5230.py
+++ b/spacy/tests/regression/test_issue5230.py
@@ -64,7 +64,7 @@ def tagger():
     # 1. no model leads to error in serialization,
     # 2. the affected line is the one for model serialization
     tagger.add_label("A")
-    tagger.begin_training(lambda: [], pipeline=nlp.pipeline)
+    nlp.begin_training()
     return tagger
 
 
@@ -85,7 +85,7 @@ def entity_linker():
     # need to add model for two reasons:
     # 1. no model leads to error in serialization,
     # 2. the affected line is the one for model serialization
-    entity_linker.begin_training(lambda: [], pipeline=nlp.pipeline)
+    nlp.begin_training()
     return entity_linker
 
 
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index aa8ea6051..0df707dc0 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -1,14 +1,15 @@
 import pytest
 from click import NoSuchOption
 
-from spacy.gold import docs_to_json, biluo_tags_from_offsets
-from spacy.gold.converters import iob2docs, conll_ner2docs, conllu2docs
+from spacy.training import docs_to_json, biluo_tags_from_offsets
+from spacy.training.converters import iob2docs, conll_ner2docs, conllu2docs
 from spacy.lang.en import English
 from spacy.schemas import ProjectConfigSchema, RecommendationSchema, validate
 from spacy.cli.pretrain import make_docs
 from spacy.cli.init_config import init_config, RECOMMENDATIONS
 from spacy.cli._util import validate_project_commands, parse_config_overrides
 from spacy.cli._util import load_project_config, substitute_project_variables
+from spacy.cli._util import string_to_list
 from thinc.config import ConfigValidationError
 import srsly
 
@@ -372,17 +373,13 @@ def test_parse_config_overrides(args, expected):
     assert parse_config_overrides(args) == expected
 
 
-@pytest.mark.parametrize(
-    "args", [["--foo"], ["--x.foo", "bar", "--baz"]],
-)
+@pytest.mark.parametrize("args", [["--foo"], ["--x.foo", "bar", "--baz"]])
 def test_parse_config_overrides_invalid(args):
     with pytest.raises(NoSuchOption):
         parse_config_overrides(args)
 
 
-@pytest.mark.parametrize(
-    "args", [["--x.foo", "bar", "baz"], ["x.foo"]],
-)
+@pytest.mark.parametrize("args", [["--x.foo", "bar", "baz"], ["x.foo"]])
 def test_parse_config_overrides_invalid_2(args):
     with pytest.raises(SystemExit):
         parse_config_overrides(args)
@@ -401,3 +398,44 @@ def test_init_config(lang, pipeline, optimize):
 def test_model_recommendations():
     for lang, data in RECOMMENDATIONS.items():
         assert RecommendationSchema(**data)
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        # fmt: off
+        "parser,textcat,tagger",
+        " parser, textcat ,tagger ",
+        'parser,textcat,tagger',
+        ' parser, textcat ,tagger ',
+        ' "parser"," textcat " ,"tagger "',
+        " 'parser',' textcat ' ,'tagger '",
+        '[parser,textcat,tagger]',
+        '["parser","textcat","tagger"]',
+        '[" parser" ,"textcat ", " tagger " ]',
+        "[parser,textcat,tagger]",
+        "[ parser, textcat , tagger]",
+        "['parser','textcat','tagger']",
+        "[' parser' , 'textcat', ' tagger ' ]",
+        # fmt: on
+    ],
+)
+def test_string_to_list(value):
+    assert string_to_list(value, intify=False) == ["parser", "textcat", "tagger"]
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        # fmt: off
+        "1,2,3",
+        '[1,2,3]',
+        '["1","2","3"]',
+        '[" 1" ,"2 ", " 3 " ]',
+        "[' 1' , '2', ' 3 ' ]",
+        # fmt: on
+    ],
+)
+def test_string_to_list_intify(value):
+    assert string_to_list(value, intify=False) == ["1", "2", "3"]
+    assert string_to_list(value, intify=True) == [1, 2, 3]
diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py
index ebc804235..840d878c2 100644
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@@ -3,7 +3,7 @@ import pytest
 from spacy.language import Language
 from spacy.tokens import Doc, Span
 from spacy.vocab import Vocab
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.lang.en import English
 from spacy.util import registry
 
diff --git a/spacy/tests/test_new_example.py b/spacy/tests/test_new_example.py
index 321eaae95..597809286 100644
--- a/spacy/tests/test_new_example.py
+++ b/spacy/tests/test_new_example.py
@@ -1,5 +1,5 @@
 import pytest
-from spacy.gold.example import Example
+from spacy.training.example import Example
 from spacy.tokens import Doc
 from spacy.vocab import Vocab
 
diff --git a/spacy/tests/test_scorer.py b/spacy/tests/test_scorer.py
index 6dae14210..fb96c0361 100644
--- a/spacy/tests/test_scorer.py
+++ b/spacy/tests/test_scorer.py
@@ -1,8 +1,8 @@
 from numpy.testing import assert_almost_equal, assert_array_almost_equal
 import pytest
 from pytest import approx
-from spacy.gold import Example
-from spacy.gold.iob_utils import biluo_tags_from_offsets
+from spacy.training import Example
+from spacy.training.iob_utils import biluo_tags_from_offsets
 from spacy.scorer import Scorer, ROCAUCScore
 from spacy.scorer import _roc_auc_score, _roc_curve
 from .util import get_doc
diff --git a/spacy/tests/test_tok2vec.py b/spacy/tests/test_tok2vec.py
index 9f0f4b74a..fb30c6ae5 100644
--- a/spacy/tests/test_tok2vec.py
+++ b/spacy/tests/test_tok2vec.py
@@ -6,7 +6,7 @@ from spacy.ml.models.tok2vec import MishWindowEncoder, MaxoutWindowEncoder
 from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener
 from spacy.vocab import Vocab
 from spacy.tokens import Doc
-from spacy.gold import Example
+from spacy.training import Example
 from spacy import util
 from spacy.lang.en import English
 from .util import get_batch
@@ -89,6 +89,7 @@ def test_init_tok2vec():
     tok2vec = nlp.add_pipe("tok2vec")
     assert tok2vec.listeners == []
     nlp.begin_training()
+    assert tok2vec.model.get_dim("nO")
 
 
 cfg_string = """
diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_training.py
similarity index 98%
rename from spacy/tests/test_gold.py
rename to spacy/tests/test_training.py
index 334d9fc24..1926aca1f 100644
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_training.py
@@ -1,9 +1,10 @@
 import numpy
-from spacy.gold import biluo_tags_from_offsets, offsets_from_biluo_tags, Alignment
-from spacy.gold import spans_from_biluo_tags, iob_to_biluo
-from spacy.gold import Corpus, docs_to_json
-from spacy.gold.example import Example
-from spacy.gold.converters import json2docs
+from spacy.training import biluo_tags_from_offsets, offsets_from_biluo_tags, Alignment
+from spacy.training import spans_from_biluo_tags, iob_to_biluo
+from spacy.training import Corpus, docs_to_json
+from spacy.training.example import Example
+from spacy.training.converters import json2docs
+from spacy.training.augment import make_orth_variants_example
 from spacy.lang.en import English
 from spacy.tokens import Doc, DocBin
 from spacy.util import get_words_and_spaces, minibatch
@@ -12,7 +13,6 @@ import pytest
 import srsly
 
 from .util import make_tempdir
-from ..gold.augment import make_orth_variants_example
 
 
 @pytest.fixture
diff --git a/spacy/tests/test_util.py b/spacy/tests/test_util.py
index 40cd71eb5..1f073ab32 100644
--- a/spacy/tests/test_util.py
+++ b/spacy/tests/test_util.py
@@ -5,7 +5,7 @@ from .util import get_random_doc
 from spacy import util
 from spacy.util import dot_to_object, SimpleFrozenList
 from thinc.api import Config, Optimizer
-from spacy.gold.batchers import minibatch_by_words
+from spacy.training.batchers import minibatch_by_words
 from ..lang.en import English
 from ..lang.nl import Dutch
 from ..language import DEFAULT_CONFIG_PATH
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 5e7222d40..787cca652 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -24,7 +24,7 @@ from .util import registry
 from .attrs import intify_attrs
 from .symbols import ORTH
 from .scorer import Scorer
-from .gold import validate_examples
+from .training import validate_examples
 
 
 cdef class Tokenizer:
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 3f8c735fb..93520aeda 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -576,7 +576,7 @@ cdef class Doc:
                 entity_type = 0
                 kb_id = 0
 
-                # Set ent_iob to Missing (0) bij default unless this token was nered before
+                # Set ent_iob to Missing (0) by default unless this token was nered before
                 ent_iob = 0
                 if self.c[i].ent_iob != 0:
                     ent_iob = 2
diff --git a/spacy/gold/__init__.pxd b/spacy/training/__init__.pxd
similarity index 100%
rename from spacy/gold/__init__.pxd
rename to spacy/training/__init__.pxd
diff --git a/spacy/gold/__init__.py b/spacy/training/__init__.py
similarity index 100%
rename from spacy/gold/__init__.py
rename to spacy/training/__init__.py
diff --git a/spacy/gold/align.py b/spacy/training/align.py
similarity index 100%
rename from spacy/gold/align.py
rename to spacy/training/align.py
diff --git a/spacy/gold/augment.py b/spacy/training/augment.py
similarity index 100%
rename from spacy/gold/augment.py
rename to spacy/training/augment.py
diff --git a/spacy/gold/batchers.py b/spacy/training/batchers.py
similarity index 100%
rename from spacy/gold/batchers.py
rename to spacy/training/batchers.py
diff --git a/spacy/gold/converters/__init__.py b/spacy/training/converters/__init__.py
similarity index 100%
rename from spacy/gold/converters/__init__.py
rename to spacy/training/converters/__init__.py
diff --git a/spacy/gold/converters/conll_ner2docs.py b/spacy/training/converters/conll_ner2docs.py
similarity index 99%
rename from spacy/gold/converters/conll_ner2docs.py
rename to spacy/training/converters/conll_ner2docs.py
index c04a77f07..8dcaf2599 100644
--- a/spacy/gold/converters/conll_ner2docs.py
+++ b/spacy/training/converters/conll_ner2docs.py
@@ -1,7 +1,7 @@
 from wasabi import Printer
 
 from .. import tags_to_entities
-from ...gold import iob_to_biluo
+from ...training import iob_to_biluo
 from ...lang.xx import MultiLanguage
 from ...tokens import Doc, Span
 from ...util import load_model
diff --git a/spacy/gold/converters/conllu2docs.py b/spacy/training/converters/conllu2docs.py
similarity index 99%
rename from spacy/gold/converters/conllu2docs.py
rename to spacy/training/converters/conllu2docs.py
index 11ee86182..85afdeef3 100644
--- a/spacy/gold/converters/conllu2docs.py
+++ b/spacy/training/converters/conllu2docs.py
@@ -1,7 +1,7 @@
 import re
 
 from .conll_ner2docs import n_sents_info
-from ...gold import iob_to_biluo, spans_from_biluo_tags
+from ...training import iob_to_biluo, spans_from_biluo_tags
 from ...tokens import Doc, Token, Span
 from ...vocab import Vocab
 from wasabi import Printer
diff --git a/spacy/gold/converters/iob2docs.py b/spacy/training/converters/iob2docs.py
similarity index 97%
rename from spacy/gold/converters/iob2docs.py
rename to spacy/training/converters/iob2docs.py
index eebf1266b..f8076c5ab 100644
--- a/spacy/gold/converters/iob2docs.py
+++ b/spacy/training/converters/iob2docs.py
@@ -1,7 +1,7 @@
 from wasabi import Printer
 
 from .conll_ner2docs import n_sents_info
-from ...gold import iob_to_biluo, tags_to_entities
+from ...training import iob_to_biluo, tags_to_entities
 from ...tokens import Doc, Span
 from ...util import minibatch
 
diff --git a/spacy/gold/converters/json2docs.py b/spacy/training/converters/json2docs.py
similarity index 100%
rename from spacy/gold/converters/json2docs.py
rename to spacy/training/converters/json2docs.py
diff --git a/spacy/gold/corpus.py b/spacy/training/corpus.py
similarity index 100%
rename from spacy/gold/corpus.py
rename to spacy/training/corpus.py
diff --git a/spacy/gold/example.pxd b/spacy/training/example.pxd
similarity index 100%
rename from spacy/gold/example.pxd
rename to spacy/training/example.pxd
diff --git a/spacy/gold/example.pyx b/spacy/training/example.pyx
similarity index 100%
rename from spacy/gold/example.pyx
rename to spacy/training/example.pyx
diff --git a/spacy/gold/gold_io.pyx b/spacy/training/gold_io.pyx
similarity index 100%
rename from spacy/gold/gold_io.pyx
rename to spacy/training/gold_io.pyx
diff --git a/spacy/gold/iob_utils.py b/spacy/training/iob_utils.py
similarity index 97%
rename from spacy/gold/iob_utils.py
rename to spacy/training/iob_utils.py
index 08751cfd4..ceb5e16b8 100644
--- a/spacy/gold/iob_utils.py
+++ b/spacy/training/iob_utils.py
@@ -195,13 +195,15 @@ def tags_to_entities(tags):
             continue
         elif tag.startswith("I"):
             if start is None:
-                raise ValueError(Errors.E067.format(tags=tags[: i + 1]))
+                raise ValueError(Errors.E067.format(start="I", tags=tags[: i + 1]))
             continue
         if tag.startswith("U"):
             entities.append((tag[2:], i, i))
         elif tag.startswith("B"):
             start = i
         elif tag.startswith("L"):
+            if start is None:
+                raise ValueError(Errors.E067.format(start="L", tags=tags[: i + 1]))
             entities.append((tag[2:], start, i))
             start = None
         else:
diff --git a/spacy/gold/loggers.py b/spacy/training/loggers.py
similarity index 100%
rename from spacy/gold/loggers.py
rename to spacy/training/loggers.py
diff --git a/spacy/util.py b/spacy/util.py
index fa4815df8..d8df04554 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -93,6 +93,7 @@ class registry(thinc.registry):
     # environment. spaCy models packaged with `spacy package` will "advertise"
     # themselves via entry points.
     models = catalogue.create("spacy", "models", entry_points=True)
+    cli = catalogue.create("spacy", "cli", entry_points=True)
 
 
 class SimpleFrozenDict(dict):
@@ -647,7 +648,7 @@ def join_command(command: List[str]) -> str:
     return " ".join(shlex.quote(cmd) for cmd in command)
 
 
-def run_command(command: Union[str, List[str]], *, capture=False, stdin=None) -> None:
+def run_command(command: Union[str, List[str]], *, capture=False, stdin=None):
     """Run a command on the command line as a subprocess. If the subprocess
     returns a non-zero exit code, a system exit is performed.
 
diff --git a/website/README.md b/website/README.md
index f3a64d1cb..825d13c65 100644
--- a/website/README.md
+++ b/website/README.md
@@ -289,11 +289,11 @@ always be the **last element** in the row.
 > | Column 1 | Column 2 ~~List[Doc]~~ |
 > ```
 
-| Name                | Description                                                                                                                                                             |
-| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `vocab`             | The shared vocabulary. ~~Vocab~~                                                                                                                                        |
-| `model`             | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. ~~Model[List[Doc], FullTransformerBatch]~~                                               |
-| `annotation_setter` | Function that takes a batch of `Doc` objects and transformer outputs can set additional annotations on the `Doc`. ~~Callable[[List[Doc], FullTransformerBatch], None]~~ |
+| Name                    | Description                                                                                                                                                                 |
+| ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `vocab`                 | The shared vocabulary. ~~Vocab~~                                                                                                                                            |
+| `model`                 | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. ~~Model[List[Doc], FullTransformerBatch]~~                                                   |
+| `set_extra_annotations` | Function that takes a batch of `Doc` objects and transformer outputs and can set additional annotations on the `Doc`. ~~Callable[[List[Doc], FullTransformerBatch], None]~~ |
 
 ### List {#list}
 
@@ -609,7 +609,6 @@ In addition to the native markdown elements, you can use the components
 ├── docs                 # the actual markdown content
 ├── meta                 # JSON-formatted site metadata
 |   ├── languages.json   # supported languages and statistical models
-|   ├── logos.json       # logos and links for landing page
 |   ├── sidebars.json    # sidebar navigations for different sections
 |   ├── site.json        # general site metadata
 |   └── universe.json    # data for the spaCy universe section
diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md
index ee844d961..30d863b17 100644
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@@ -181,10 +181,10 @@ characters would be `"jumpping"`: 4 from the start, 4 from the end. This ensures
 that the final character is always in the last position, instead of being in an
 arbitrary position depending on the word length.
 
-The characters are embedded in a embedding table with 256 rows, and the vectors
-concatenated. A hash-embedded vector of the `NORM` of the word is also
-concatenated on, and the result is then passed through a feed-forward network to
-construct a single vector to represent the information.
+The characters are embedded in a embedding table with a given number of rows,
+and the vectors concatenated. A hash-embedded vector of the `NORM` of the word
+is also concatenated on, and the result is then passed through a feed-forward
+network to construct a single vector to represent the information.
 
 | Name        | Description                                                                                                                                                     |
 | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- |
@@ -456,62 +456,6 @@ consists of either two or three subnetworks:
 | `nO`                | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~                                                                                                                                                                                                                             |
 | **CREATES**         | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~                                                                                                                                                                                                                                                                                           |
 
-### spacy.BILUOTagger.v1 {#BILUOTagger source="spacy/ml/models/simple_ner.py"}
-
-> #### Example Config
->
-> ```ini
-> [model]
-> @architectures = "spacy.BILUOTagger.v1 "
->
-> [model.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v1"
-> # etc.
-> ```
-
-Construct a simple NER tagger that predicts
-[BILUO](/usage/linguistic-features#accessing-ner) tag scores for each token and
-uses greedy decoding with transition-constraints to return a valid BILUO tag
-sequence. A BILUO tag sequence encodes a sequence of non-overlapping labelled
-spans into tags assigned to each token. The first token of a span is given the
-tag `B-LABEL`, the last token of the span is given the tag `L-LABEL`, and tokens
-within the span are given the tag `U-LABEL`. Single-token spans are given the
-tag `U-LABEL`. All other tokens are assigned the tag `O`. The BILUO tag scheme
-generally results in better linear separation between classes, especially for
-non-CRF models, because there are more distinct classes for the different
-situations ([Ratinov et al., 2009](https://www.aclweb.org/anthology/W09-1119/)).
-
-| Name        | Description                                                                                |
-| ----------- | ------------------------------------------------------------------------------------------ |
-| `tok2vec`   | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
-| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                     |
-
-### spacy.IOBTagger.v1 {#IOBTagger source="spacy/ml/models/simple_ner.py"}
-
-> #### Example Config
->
-> ```ini
-> [model]
-> @architectures = "spacy.IOBTagger.v1 "
->
-> [model.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v1"
-> # etc.
-> ```
-
-Construct a simple NER tagger, that predicts
-[IOB](/usage/linguistic-features#accessing-ner) tag scores for each token and
-uses greedy decoding with transition-constraints to return a valid IOB tag
-sequence. An IOB tag sequence encodes a sequence of non-overlapping labeled
-spans into tags assigned to each token. The first token of a span is given the
-tag B-LABEL, and subsequent tokens are given the tag I-LABEL. All other tokens
-are assigned the tag O.
-
-| Name        | Description                                                                                |
-| ----------- | ------------------------------------------------------------------------------------------ |
-| `tok2vec`   | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
-| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                     |
-
 ## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"}
 
 ### spacy.Tagger.v1 {#Tagger}
diff --git a/website/docs/api/attributeruler.md b/website/docs/api/attributeruler.md
index fc72eda98..53c8c46cf 100644
--- a/website/docs/api/attributeruler.md
+++ b/website/docs/api/attributeruler.md
@@ -38,7 +38,7 @@ how the component should be configured. You can override its settings via the
 | `validate`      | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. ~~bool~~                                                                                                                                                  |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/attributeruler.py
+%%GITHUB_SPACY/spacy/pipeline/attributeruler.py
 ```
 
 ## AttributeRuler.\_\_init\_\_ {#init tag="method"}
diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md
index 7852d0482..55e552e72 100644
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@@ -229,13 +229,13 @@ $ python -m spacy convert [input_file] [output_dir] [--converter] [--file-type]
 
 ### Converters {#converters}
 
-| ID      | Description                                                                                                                                                                                                                                                                                                                                                                                    |
-| ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `auto`  | Automatically pick converter based on file extension and file content (default).                                                                                                                                                                                                                                                                                                               |
-| `json`  | JSON-formatted training data used in spaCy v2.x.                                                                                                                                                                                                                                                                                                                                               |
-| `conll` | Universal Dependencies `.conllu` or `.conll` format.                                                                                                                                                                                                                                                                                                                                           |
-| `ner`   | NER with IOB/IOB2 tags, one token per line with columns separated by whitespace. The first column is the token and the final column is the IOB tag. Sentences are separated by blank lines and documents are separated by the line `-DOCSTART- -X- O O`. Supports CoNLL 2003 NER format. See [sample data](https://github.com/explosion/spaCy/tree/master/examples/training/ner_example_data). |
-| `iob`   | NER with IOB/IOB2 tags, one sentence per line with tokens separated by whitespace and annotation separated by `|`, either `word|B-ENT` or `word|POS|B-ENT`. See [sample data](https://github.com/explosion/spaCy/tree/master/examples/training/ner_example_data).                                                                                                                              |
+| ID      | Description                                                                                                                                                                                                                                                                                                                                                     |
+| ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `auto`  | Automatically pick converter based on file extension and file content (default).                                                                                                                                                                                                                                                                                |
+| `json`  | JSON-formatted training data used in spaCy v2.x.                                                                                                                                                                                                                                                                                                                |
+| `conll` | Universal Dependencies `.conllu` or `.conll` format.                                                                                                                                                                                                                                                                                                            |
+| `ner`   | NER with IOB/IOB2 tags, one token per line with columns separated by whitespace. The first column is the token and the final column is the IOB tag. Sentences are separated by blank lines and documents are separated by the line `-DOCSTART- -X- O O`. Supports CoNLL 2003 NER format. See [sample data](%%GITHUB_SPACY/extra/example_data/ner_example_data). |
+| `iob`   | NER with IOB/IOB2 tags, one sentence per line with tokens separated by whitespace and annotation separated by `|`, either `word|B-ENT` or `word|POS|B-ENT`. See [sample data](%%GITHUB_SPACY/extra/example_data/ner_example_data).                                                                                                                              |
 
 ## debug {#debug new="3"}
 
@@ -357,38 +357,38 @@ File       /path/to/spacy/ml/models/tok2vec.py (line 207)
 ℹ [training.logger]
 Registry   @loggers
 Name       spacy.ConsoleLogger.v1
-Module     spacy.gold.loggers
-File       /path/to/spacy/gold/loggers.py (line 8)
+Module     spacy.training.loggers
+File       /path/to/spacy/training/loggers.py (line 8)
 ℹ [training.batcher]
 Registry   @batchers
 Name       spacy.batch_by_words.v1
-Module     spacy.gold.batchers
-File       /path/to/spacy/gold/batchers.py (line 49)
+Module     spacy.training.batchers
+File       /path/to/spacy/training/batchers.py (line 49)
 ℹ [training.batcher.size]
 Registry   @schedules
 Name       compounding.v1
 Module     thinc.schedules
-File       /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 43)
+File       /path/to/thinc/thinc/schedules.py (line 43)
 ℹ [training.dev_corpus]
 Registry   @readers
 Name       spacy.Corpus.v1
-Module     spacy.gold.corpus
-File       /path/to/spacy/gold/corpus.py (line 18)
+Module     spacy.training.corpus
+File       /path/to/spacy/training/corpus.py (line 18)
 ℹ [training.optimizer]
 Registry   @optimizers
 Name       Adam.v1
 Module     thinc.optimizers
-File       /Users/ines/Repos/explosion/thinc/thinc/optimizers.py (line 58)
+File       /path/to/thinc/thinc/optimizers.py (line 58)
 ℹ [training.optimizer.learn_rate]
 Registry   @schedules
 Name       warmup_linear.v1
 Module     thinc.schedules
-File       /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 91)
+File       /path/to/thinc/thinc/schedules.py (line 91)
 ℹ [training.train_corpus]
 Registry   @readers
 Name       spacy.Corpus.v1
-Module     spacy.gold.corpus
-File       /path/to/spacy/gold/corpus.py (line 18)
+Module     spacy.training.corpus
+File       /path/to/spacy/training/corpus.py (line 18)
 ```
 
 </Accordion>
@@ -852,7 +852,7 @@ this, you can set the `--no-sdist` flag.
 </Infobox>
 
 ```cli
-$ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta] [--no-sdist] [--version] [--force]
+$ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta] [--no-sdist] [--name] [--version] [--force]
 ```
 
 > #### Example
@@ -870,6 +870,7 @@ $ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta]
 | `--meta-path`, `-m` <Tag variant="new">2</Tag>   | Path to [`meta.json`](/api/data-formats#meta) file (optional). ~~Optional[Path] \(option)~~                                                                                                                     |
 | `--create-meta`, `-C` <Tag variant="new">2</Tag> | Create a `meta.json` file on the command line, even if one already exists in the directory. If an existing file is found, its entries will be shown as the defaults in the command line prompt. ~~bool (flag)~~ |
 | `--no-sdist`, `-NS`,                             | Don't build the `.tar.gz` sdist automatically. Can be set if you want to run this step manually. ~~bool (flag)~~                                                                                                |
+| `--name`, `-n` <Tag variant="new">3</Tag>        | Package name to override in meta. ~~Optional[str] \(option)~~                                                                                                                                                   |
 | `--version`, `-v` <Tag variant="new">3</Tag>     | Package version to override in meta. Useful when training new versions, as it doesn't require editing the meta template. ~~Optional[str] \(option)~~                                                            |
 | `--force`, `-f`                                  | Force overwriting of existing folder in output directory. ~~bool (flag)~~                                                                                                                                       |
 | `--help`, `-h`                                   | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                                      |
@@ -893,7 +894,7 @@ can provide any other repo (public or private) that you have access to using the
 <!-- TODO: update example once we've decided on repo structure -->
 
 ```cli
-$ python -m spacy project clone [name] [dest] [--repo]
+$ python -m spacy project clone [name] [dest] [--repo] [--branch]
 ```
 
 > #### Example
@@ -908,13 +909,14 @@ $ python -m spacy project clone [name] [dest] [--repo]
 > $ python -m spacy project clone template --repo https://github.com/your_org/your_repo
 > ```
 
-| Name           | Description                                                                                                                                       |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `name`         | The name of the template to clone, relative to the repo. Can be a top-level directory or a subdirectory like `dir/template`. ~~str (positional)~~ |
-| `dest`         | Where to clone the project. Defaults to current working directory. ~~Path (positional)~~                                                          |
-| `--repo`, `-r` | The repository to clone from. Can be any public or private Git repo you have access to. ~~str (option)~~                                          |
-| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~                                                                                        |
-| **CREATES**    | The cloned [project directory](/usage/projects#project-files).                                                                                    |
+| Name             | Description                                                                                                                                       |
+| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `name`           | The name of the template to clone, relative to the repo. Can be a top-level directory or a subdirectory like `dir/template`. ~~str (positional)~~ |
+| `dest`           | Where to clone the project. Defaults to current working directory. ~~Path (positional)~~                                                          |
+| `--repo`, `-r`   | The repository to clone from. Can be any public or private Git repo you have access to. ~~str (option)~~                                          |
+| `--branch`, `-b` | The branch to clone from. Defaults to `master`. ~~str (option)~~                                                                                  |
+| `--help`, `-h`   | Show help message and available arguments. ~~bool (flag)~~                                                                                        |
+| **CREATES**      | The cloned [project directory](/usage/projects#project-files).                                                                                    |
 
 ### project assets {#project-assets tag="command"}
 
diff --git a/website/docs/api/corpus.md b/website/docs/api/corpus.md
index 86cfa9121..f6f6bbf68 100644
--- a/website/docs/api/corpus.md
+++ b/website/docs/api/corpus.md
@@ -2,7 +2,7 @@
 title: Corpus
 teaser: An annotated corpus
 tag: class
-source: spacy/gold/corpus.py
+source: spacy/training/corpus.py
 new: 3
 ---
 
@@ -42,7 +42,7 @@ streaming.
 | `limit`         | Limit corpus to a subset of examples, e.g. for debugging. Defaults to `0` for no limit. ~~int~~                                                          |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/gold/corpus.py
+%%GITHUB_SPACY/spacy/training/corpus.py
 ```
 
 ## Corpus.\_\_init\_\_ {#init tag="method"}
@@ -58,7 +58,7 @@ train/test skew.
 > #### Example
 >
 > ```python
-> from spacy.gold import Corpus
+> from spacy.training import Corpus
 >
 > # With a single file
 > corpus = Corpus("./data/train.spacy")
@@ -82,7 +82,7 @@ Yield examples from the data.
 > #### Example
 >
 > ```python
-> from spacy.gold import Corpus
+> from spacy.training import Corpus
 > import spacy
 >
 > corpus = Corpus("./train.spacy")
diff --git a/website/docs/api/cython.md b/website/docs/api/cython.md
index d7c03cf41..16b11cead 100644
--- a/website/docs/api/cython.md
+++ b/website/docs/api/cython.md
@@ -23,12 +23,12 @@ abruptly.
 With Cython there are four ways of declaring complex data types. Unfortunately
 we use all four in different places, as they all have different utility:
 
-| Declaration     | Description                                                                                                                                                                                                                                                                      | Example                                                                            |
-| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------- |
-| `class`         | A normal Python class.                                                                                                                                                                                                                                                           | [`Language`](/api/language)                                                        |
-| `cdef class`    | A Python extension type. Differs from a normal Python class in that its attributes can be defined on the underlying struct. Can have C-level objects as attributes (notably structs and pointers), and can have methods which have C-level objects as arguments or return types. | [`Lexeme`](/api/cython-classes#lexeme)                                             |
-| `cdef struct`   | A struct is just a collection of variables, sort of like a named tuple, except the memory is contiguous. Structs can't have methods, only attributes.                                                                                                                            | [`LexemeC`](/api/cython-structs#lexemec)                                           |
-| `cdef cppclass` | A C++ class. Like a struct, this can be allocated on the stack, but can have methods, a constructor and a destructor. Differs from `cdef class` in that it can be created and destroyed without acquiring the Python global interpreter lock. This style is the most obscure.    | [`StateC`](https://github.com/explosion/spaCy/tree/master/spacy/syntax/_state.pxd) |
+| Declaration     | Description                                                                                                                                                                                                                                                                      | Example                                                                |
+| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------- |
+| `class`         | A normal Python class.                                                                                                                                                                                                                                                           | [`Language`](/api/language)                                            |
+| `cdef class`    | A Python extension type. Differs from a normal Python class in that its attributes can be defined on the underlying struct. Can have C-level objects as attributes (notably structs and pointers), and can have methods which have C-level objects as arguments or return types. | [`Lexeme`](/api/cython-classes#lexeme)                                 |
+| `cdef struct`   | A struct is just a collection of variables, sort of like a named tuple, except the memory is contiguous. Structs can't have methods, only attributes.                                                                                                                            | [`LexemeC`](/api/cython-structs#lexemec)                               |
+| `cdef cppclass` | A C++ class. Like a struct, this can be allocated on the stack, but can have methods, a constructor and a destructor. Differs from `cdef class` in that it can be created and destroyed without acquiring the Python global interpreter lock. This style is the most obscure.    | [`StateC`](%%GITHUB_SPACY/spacy/pipeline/_parser_internals/_state.pxd) |
 
 The most important classes in spaCy are defined as `cdef class` objects. The
 underlying data for these objects is usually gathered into a struct, which is
diff --git a/website/docs/api/data-formats.md b/website/docs/api/data-formats.md
index 3fd2818f4..3d78df39d 100644
--- a/website/docs/api/data-formats.md
+++ b/website/docs/api/data-formats.md
@@ -37,7 +37,7 @@ recommended settings for your use case, check out the
 > guide on [registered functions](/usage/training#config-functions) for details.
 
 ```ini
-https://github.com/explosion/spaCy/blob/develop/spacy/default_config.cfg
+%%GITHUB_SPACY/spacy/default_config.cfg
 ```
 
 <Infobox title="Notes on data validation" emoji="💡">
@@ -45,8 +45,7 @@ https://github.com/explosion/spaCy/blob/develop/spacy/default_config.cfg
 Under the hood, spaCy's configs are powered by our machine learning library
 [Thinc's config system](https://thinc.ai/docs/usage-config), which uses
 [`pydantic`](https://github.com/samuelcolvin/pydantic/) for data validation
-based on type hints. See
-[`spacy/schemas.py`](https://github.com/explosion/spaCy/blob/develop/spacy/schemas.py)
+based on type hints. See [`spacy/schemas.py`](%%GITHUB_SPACY/spacy/schemas.py)
 for the schemas used to validate the default config. Arguments of registered
 functions are validated against their type annotations, if available. To debug
 your config and check that it's valid, you can run the
@@ -175,7 +174,7 @@ run [`spacy pretrain`](/api/cli#pretrain).
 >
 > ```python
 > from spacy.tokens import DocBin
-> from spacy.gold import Corpus
+> from spacy.training import Corpus
 >
 > doc_bin = DocBin(docs=docs)
 > doc_bin.to_disk("./data.spacy")
@@ -456,7 +455,7 @@ lexical data.
 Here's an example of the 20 most frequent lexemes in the English training data:
 
 ```json
-https://github.com/explosion/spaCy/tree/master/examples/training/vocab-data.jsonl
+%%GITHUB_SPACY / extra / example_data / vocab - data.jsonl
 ```
 
 ## Pipeline meta {#meta}
diff --git a/website/docs/api/dependencyparser.md b/website/docs/api/dependencyparser.md
index 7a09a840a..674812567 100644
--- a/website/docs/api/dependencyparser.md
+++ b/website/docs/api/dependencyparser.md
@@ -57,7 +57,7 @@ architectures and their arguments and hyperparameters.
 | `model`                       | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                   |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/dep_parser.pyx
+%%GITHUB_SPACY/spacy/pipeline/dep_parser.pyx
 ```
 
 ## DependencyParser.\_\_init\_\_ {#init tag="method"}
@@ -293,7 +293,12 @@ context, the original parameters are restored.
 
 ## DependencyParser.add_label {#add_label tag="method"}
 
-Add a new label to the pipe.
+Add a new label to the pipe. Note that you don't have to call this method if you
+provide a **representative data sample** to the
+[`begin_training`](#begin_training) method. In this case, all labels found in
+the sample will be automatically added to the model, and the output dimension
+will be [inferred](/usage/layers-architectures#thinc-shape-inference)
+automatically.
 
 > #### Example
 >
@@ -307,6 +312,25 @@ Add a new label to the pipe.
 | `label`     | The label to add. ~~str~~                                   |
 | **RETURNS** | `0` if the label is already present, otherwise `1`. ~~int~~ |
 
+## DependencyParser.set_output {#set_output tag="method"}
+
+Change the output dimension of the component's model by calling the model's
+attribute `resize_output`. This is a function that takes the original model and
+the new output dimension `nO`, and changes the model in place. When resizing an
+already trained model, care should be taken to avoid the "catastrophic
+forgetting" problem.
+
+> #### Example
+>
+> ```python
+> parser = nlp.add_pipe("parser")
+> parser.set_output(512)
+> ```
+
+| Name | Description                       |
+| ---- | --------------------------------- |
+| `nO` | The new output dimension. ~~int~~ |
+
 ## DependencyParser.to_disk {#to_disk tag="method"}
 
 Serialize the pipe to disk.
diff --git a/website/docs/api/entitylinker.md b/website/docs/api/entitylinker.md
index 8cde6c490..a9d45d68e 100644
--- a/website/docs/api/entitylinker.md
+++ b/website/docs/api/entitylinker.md
@@ -50,7 +50,7 @@ architectures and their arguments and hyperparameters.
 | `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/entity_linker.py
+%%GITHUB_SPACY/spacy/pipeline/entity_linker.py
 ```
 
 ## EntityLinker.\_\_init\_\_ {#init tag="method"}
diff --git a/website/docs/api/entityrecognizer.md b/website/docs/api/entityrecognizer.md
index b6b9caa84..1420aa1a7 100644
--- a/website/docs/api/entityrecognizer.md
+++ b/website/docs/api/entityrecognizer.md
@@ -48,7 +48,7 @@ architectures and their arguments and hyperparameters.
 | `model`                       | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~                                                 |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/ner.pyx
+%%GITHUB_SPACY/spacy/pipeline/ner.pyx
 ```
 
 ## EntityRecognizer.\_\_init\_\_ {#init tag="method"}
@@ -281,7 +281,12 @@ context, the original parameters are restored.
 
 ## EntityRecognizer.add_label {#add_label tag="method"}
 
-Add a new label to the pipe.
+Add a new label to the pipe. Note that you don't have to call this method if you
+provide a **representative data sample** to the
+[`begin_training`](#begin_training) method. In this case, all labels found in
+the sample will be automatically added to the model, and the output dimension
+will be [inferred](/usage/layers-architectures#thinc-shape-inference)
+automatically.
 
 > #### Example
 >
@@ -295,6 +300,25 @@ Add a new label to the pipe.
 | `label`     | The label to add. ~~str~~                                   |
 | **RETURNS** | `0` if the label is already present, otherwise `1`. ~~int~~ |
 
+## EntityRecognizer.set_output {#set_output tag="method"}
+
+Change the output dimension of the component's model by calling the model's
+attribute `resize_output`. This is a function that takes the original model and
+the new output dimension `nO`, and changes the model in place. When resizing an
+already trained model, care should be taken to avoid the "catastrophic
+forgetting" problem.
+
+> #### Example
+>
+> ```python
+> ner = nlp.add_pipe("ner")
+> ner.set_output(512)
+> ```
+
+| Name | Description                       |
+| ---- | --------------------------------- |
+| `nO` | The new output dimension. ~~int~~ |
+
 ## EntityRecognizer.to_disk {#to_disk tag="method"}
 
 Serialize the pipe to disk.
diff --git a/website/docs/api/entityruler.md b/website/docs/api/entityruler.md
index 454b2a04b..a6934eeef 100644
--- a/website/docs/api/entityruler.md
+++ b/website/docs/api/entityruler.md
@@ -42,7 +42,7 @@ how the component should be configured. You can override its settings via the
 | `ent_id_sep`          | Separator used internally for entity IDs. Defaults to `"||"`. ~~str~~                                                                                                                         |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/entityruler.py
+%%GITHUB_SPACY/spacy/pipeline/entityruler.py
 ```
 
 ## EntityRuler.\_\_init\_\_ {#init tag="method"}
diff --git a/website/docs/api/example.md b/website/docs/api/example.md
index 2434cce43..668c8028f 100644
--- a/website/docs/api/example.md
+++ b/website/docs/api/example.md
@@ -2,7 +2,7 @@
 title: Example
 teaser: A training instance
 tag: class
-source: spacy/gold/example.pyx
+source: spacy/training/example.pyx
 new: 3.0
 ---
 
@@ -22,7 +22,7 @@ both documents.
 >
 > ```python
 > from spacy.tokens import Doc
-> from spacy.gold import Example
+> from spacy.training import Example
 >
 > words = ["hello", "world", "!"]
 > spaces = [True, False, False]
@@ -48,7 +48,7 @@ see the [training format documentation](/api/data-formats#dict-input).
 >
 > ```python
 > from spacy.tokens import Doc
-> from spacy.gold import Example
+> from spacy.training import Example
 >
 > predicted = Doc(vocab, words=["Apply", "some", "sunscreen"])
 > token_ref = ["Apply", "some", "sun", "screen"]
@@ -301,7 +301,7 @@ tokenizations add up to the same string. For example, you'll be able to align
 > #### Example
 >
 > ```python
-> from spacy.gold import Alignment
+> from spacy.training import Alignment
 >
 > bert_tokens = ["obama", "'", "s", "podcast"]
 > spacy_tokens = ["obama", "'s", "podcast"]
diff --git a/website/docs/api/language.md b/website/docs/api/language.md
index 7799f103b..c24023177 100644
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@@ -205,8 +205,15 @@ examples can either be the full training data or a representative sample. They
 are used to **initialize the models** of trainable pipeline components and are
 passed each component's [`begin_training`](/api/pipe#begin_training) method, if
 available. Initialization includes validating the network,
-[inferring missing shapes](https://thinc.ai/docs/usage-models#validation) and
-setting up the label scheme based on the data.
+[inferring missing shapes](/usage/layers-architectures#thinc-shape-inference)
+and setting up the label scheme based on the data.
+
+If no `get_examples` function is provided when calling `nlp.begin_training`, the
+pipeline components will be initialized with generic data. In this case, it is
+crucial that the output dimension of each component has already been defined
+either in the [config](/usage/training#config), or by calling
+[`pipe.add_label`](/api/pipe#add_label) for each possible output label (e.g. for
+the tagger or textcat).
 
 <Infobox variant="warning" title="Changed in v3.0">
 
@@ -937,11 +944,11 @@ available to the loaded object.
 
 ## Class attributes {#class-attributes}
 
-| Name             | Description                                                                                                                                                                                                        |
-| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `Defaults`       | Settings, data and factory methods for creating the `nlp` object and processing pipeline. ~~Defaults~~                                                                                                             |
-| `lang`           | Two-letter language ID, i.e. [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). ~~str~~                                                                                                            |
-| `default_config` | Base [config](/usage/training#config) to use for [Language.config](/api/language#config). Defaults to [`default_config.cfg`](https://github.com/explosion/spaCy/tree/develop/spacy/default_config.cfg). ~~Config~~ |
+| Name             | Description                                                                                                                                                                       |
+| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `Defaults`       | Settings, data and factory methods for creating the `nlp` object and processing pipeline. ~~Defaults~~                                                                            |
+| `lang`           | Two-letter language ID, i.e. [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). ~~str~~                                                                           |
+| `default_config` | Base [config](/usage/training#config) to use for [Language.config](/api/language#config). Defaults to [`default_config.cfg`](%%GITHUB_SPACY/spacy/default_config.cfg). ~~Config~~ |
 
 ## Defaults {#defaults}
 
@@ -974,34 +981,17 @@ customize the default language data:
 >    config = Config().from_str(DEFAULT_CONFIG)
 > ```
 
-| Name                              | Description                                                                                                                                                                                                                                                                         |
-| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `stop_words`                      | List of stop words, used for `Token.is_stop`.<br />**Example:** [`stop_words.py`][stop_words.py] ~~Set[str]~~                                                                                                                                                                       |
-| `tokenizer_exceptions`            | Tokenizer exception rules, string mapped to list of token attributes.<br />**Example:** [`de/tokenizer_exceptions.py`][de/tokenizer_exceptions.py] ~~Dict[str, List[dict]]~~                                                                                                        |
-| `prefixes`, `suffixes`, `infixes` | Prefix, suffix and infix rules for the default tokenizer.<br />**Example:** [`puncutation.py`][punctuation.py] ~~Optional[List[Union[str, Pattern]]]~~                                                                                                                              |
-| `token_match`                     | Optional regex for matching strings that should never be split, overriding the infix rules.<br />**Example:** [`fr/tokenizer_exceptions.py`][fr/tokenizer_exceptions.py] ~~Optional[Pattern]~~                                                                                      |
-| `url_match`                       | Regular expression for matching URLs. Prefixes and suffixes are removed before applying the match.<br />**Example:** [`tokenizer_exceptions.py`][tokenizer_exceptions.py] ~~Optional[Pattern]~~                                                                                     |
-| `lex_attr_getters`                | Custom functions for setting lexical attributes on tokens, e.g. `like_num`.<br />**Example:** [`lex_attrs.py`][lex_attrs.py] ~~Dict[int, Callable[[str], Any]]~~                                                                                                                    |
-| `syntax_iterators`                | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks).<br />**Example:** [`syntax_iterators.py`][syntax_iterators.py]. ~~Dict[str, Callable[[Union[Doc, Span]], Iterator[Span]]]~~ |
-| `writing_system`                  | Information about the language's writing system, available via `Vocab.writing_system`. Defaults to: `{"direction": "ltr", "has_case": True, "has_letters": True}.`.<br />**Example:** [`zh/__init__.py`][zh/__init__.py] ~~Dict[str, Any]~~                                         |
-| `config`                          | Default [config](/usage/training#config) added to `nlp.config`. This can include references to custom tokenizers or lemmatizers.<br />**Example:** [`zh/__init__.py`][zh/__init__.py] ~~Config~~                                                                                    |
-
-[stop_words.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/en/stop_words.py
-[tokenizer_exceptions.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/tokenizer_exceptions.py
-[de/tokenizer_exceptions.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/de/tokenizer_exceptions.py
-[fr/tokenizer_exceptions.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/fr/tokenizer_exceptions.py
-[punctuation.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py
-[lex_attrs.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/en/lex_attrs.py
-[syntax_iterators.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/en/syntax_iterators.py
-[zh/__init__.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/zh/__init__.py
+| Name                              | Description                                                                                                                                                                                                                                                                                                      |
+| --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `stop_words`                      | List of stop words, used for `Token.is_stop`.<br />**Example:** [`stop_words.py`](%%GITHUB_SPACY/spacy/lang/en/stop_words.py) ~~Set[str]~~                                                                                                                                                                       |
+| `tokenizer_exceptions`            | Tokenizer exception rules, string mapped to list of token attributes.<br />**Example:** [`de/tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/de/tokenizer_exceptions.py) ~~Dict[str, List[dict]]~~                                                                                                           |
+| `prefixes`, `suffixes`, `infixes` | Prefix, suffix and infix rules for the default tokenizer.<br />**Example:** [`puncutation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py) ~~Optional[List[Union[str, Pattern]]]~~                                                                                                                                 |
+| `token_match`                     | Optional regex for matching strings that should never be split, overriding the infix rules.<br />**Example:** [`fr/tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/fr/tokenizer_exceptions.py) ~~Optional[Pattern]~~                                                                                         |
+| `url_match`                       | Regular expression for matching URLs. Prefixes and suffixes are removed before applying the match.<br />**Example:** [`tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/tokenizer_exceptions.py) ~~Optional[Pattern]~~                                                                                        |
+| `lex_attr_getters`                | Custom functions for setting lexical attributes on tokens, e.g. `like_num`.<br />**Example:** [`lex_attrs.py`](%%GITHUB_SPACY/spacy/lang/en/lex_attrs.py) ~~Dict[int, Callable[[str], Any]]~~                                                                                                                    |
+| `syntax_iterators`                | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks).<br />**Example:** [`syntax_iterators.py`](%%GITHUB_SPACY/spacy/lang/en/syntax_iterators.py). ~~Dict[str, Callable[[Union[Doc, Span]], Iterator[Span]]]~~ |
+| `writing_system`                  | Information about the language's writing system, available via `Vocab.writing_system`. Defaults to: `{"direction": "ltr", "has_case": True, "has_letters": True}.`.<br />**Example:** [`zh/__init__.py`](%%GITHUB_SPACY/spacy/lang/zh/__init__.py) ~~Dict[str, Any]~~                                            |
+| `config`                          | Default [config](/usage/training#config) added to `nlp.config`. This can include references to custom tokenizers or lemmatizers.<br />**Example:** [`zh/__init__.py`](%%GITHUB_SPACY/spacy/lang/zh/__init__.py) ~~Config~~                                                                                       |
 
 ## Serialization fields {#serialization-fields}
 
diff --git a/website/docs/api/lemmatizer.md b/website/docs/api/lemmatizer.md
index 45a8736db..486410907 100644
--- a/website/docs/api/lemmatizer.md
+++ b/website/docs/api/lemmatizer.md
@@ -56,7 +56,7 @@ data formats used by the lookup and rule-based lemmatizers, see
 | `model`     | **Not yet implemented:** the model to use. ~~Model~~                                                                                                                                                                                                                                |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/lemmatizer.py
+%%GITHUB_SPACY/spacy/pipeline/lemmatizer.py
 ```
 
 ## Lemmatizer.\_\_init\_\_ {#init tag="method"}
diff --git a/website/docs/api/morphologizer.md b/website/docs/api/morphologizer.md
index 069856ea3..f2b2f9cc0 100644
--- a/website/docs/api/morphologizer.md
+++ b/website/docs/api/morphologizer.md
@@ -37,7 +37,7 @@ architectures and their arguments and hyperparameters.
 | `model` | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/morphologizer.pyx
+%%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx
 ```
 
 ## Morphologizer.\_\_init\_\_ {#init tag="method"}
@@ -258,6 +258,13 @@ context, the original parameters are restored.
 
 Add a new label to the pipe. If the `Morphologizer` should set annotations for
 both `pos` and `morph`, the label should include the UPOS as the feature `POS`.
+Raises an error if the output dimension is already set, or if the model has
+already been fully [initialized](#begin_training). Note that you don't have to
+call this method if you provide a **representative data sample** to the
+[`begin_training`](#begin_training) method. In this case, all labels found in
+the sample will be automatically added to the model, and the output dimension
+will be [inferred](/usage/layers-architectures#thinc-shape-inference)
+automatically.
 
 > #### Example
 >
diff --git a/website/docs/api/pipe.md b/website/docs/api/pipe.md
index 57b2af44d..c8d61a5a9 100644
--- a/website/docs/api/pipe.md
+++ b/website/docs/api/pipe.md
@@ -22,7 +22,7 @@ for how to use the `Pipe` base class to implement custom components.
 > inherit from `Pipe`.
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/pipe.pyx
+%%GITHUB_SPACY/spacy/pipeline/pipe.pyx
 ```
 
 ## Pipe.\_\_init\_\_ {#init tag="method"}
@@ -286,9 +286,6 @@ context, the original parameters are restored.
 
 ## Pipe.add_label {#add_label tag="method"}
 
-Add a new label to the pipe. It's possible to extend trained models with new
-labels, but care should be taken to avoid the "catastrophic forgetting" problem.
-
 > #### Example
 >
 > ```python
@@ -296,10 +293,82 @@ labels, but care should be taken to avoid the "catastrophic forgetting" problem.
 > pipe.add_label("MY_LABEL")
 > ```
 
-| Name        | Description                                                 |
-| ----------- | ----------------------------------------------------------- |
-| `label`     | The label to add. ~~str~~                                   |
-| **RETURNS** | `0` if the label is already present, otherwise `1`. ~~int~~ |
+Add a new label to the pipe, to be predicted by the model. The actual
+implementation depends on the specific component, but in general `add_label`
+shouldn't be called if the output dimension is already set, or if the model has
+already been fully [initialized](#begin_training). If these conditions are
+violated, the function will raise an Error. The exception to this rule is when
+the component is [resizable](#is_resizable), in which case
+[`set_output`](#set_output) should be called to ensure that the model is
+properly resized.
+
+<Infobox variant="danger">
+
+This method needs to be overwritten with your own custom `add_label` method.
+
+</Infobox>
+
+| Name        | Description                                             |
+| ----------- | ------------------------------------------------------- |
+| `label`     | The label to add. ~~str~~                               |
+| **RETURNS** | 0 if the label is already present, otherwise 1. ~~int~~ |
+
+Note that in general, you don't have to call `pipe.add_label` if you provide a
+representative data sample to the [`begin_training`](#begin_training) method. In
+this case, all labels found in the sample will be automatically added to the
+model, and the output dimension will be
+[inferred](/usage/layers-architectures#thinc-shape-inference) automatically.
+
+## Pipe.is_resizable {#is_resizable tag="method"}
+
+> #### Example
+>
+> ```python
+> can_resize = pipe.is_resizable()
+> ```
+>
+> With custom resizing implemented by a component:
+>
+> ```python
+> def custom_resize(model, new_nO):
+>     # adjust model
+>     return model
+>
+> custom_model.attrs["resize_output"] = custom_resize
+> ```
+
+Check whether or not the output dimension of the component's model can be
+resized. If this method returns `True`, [`set_output`](#set_output) can be
+called to change the model's output dimension.
+
+For built-in components that are not resizable, you have to create and train a
+new model from scratch with the appropriate architecture and output dimension.
+For custom components, you can implement a `resize_output` function and add it
+as an attribute to the component's model.
+
+| Name        | Description                                                                                    |
+| ----------- | ---------------------------------------------------------------------------------------------- |
+| **RETURNS** | Whether or not the output dimension of the model can be changed after initialization. ~~bool~~ |
+
+## Pipe.set_output {#set_output tag="method"}
+
+Change the output dimension of the component's model. If the component is not
+[resizable](#is_resizable), this method will raise a `NotImplementedError`. If a
+component is resizable, the model's attribute `resize_output` will be called.
+This is a function that takes the original model and the new output dimension
+`nO`, and changes the model in place. When resizing an already trained model,
+care should be taken to avoid the "catastrophic forgetting" problem.
+
+> #### Example
+>
+> ```python
+> if pipe.is_resizable():
+>     pipe.set_output(512)
+> ```
+
+| Name | Description                       |
+| ---- | --------------------------------- |
+| `nO` | The new output dimension. ~~int~~ |
 
 ## Pipe.to_disk {#to_disk tag="method"}
 
diff --git a/website/docs/api/sentencerecognizer.md b/website/docs/api/sentencerecognizer.md
index 3d9f61e8d..ca19327bb 100644
--- a/website/docs/api/sentencerecognizer.md
+++ b/website/docs/api/sentencerecognizer.md
@@ -34,7 +34,7 @@ architectures and their arguments and hyperparameters.
 | `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/senter.pyx
+%%GITHUB_SPACY/spacy/pipeline/senter.pyx
 ```
 
 ## SentenceRecognizer.\_\_init\_\_ {#init tag="method"}
diff --git a/website/docs/api/sentencizer.md b/website/docs/api/sentencizer.md
index 8104b1151..c435acdcb 100644
--- a/website/docs/api/sentencizer.md
+++ b/website/docs/api/sentencizer.md
@@ -33,7 +33,7 @@ how the component should be configured. You can override its settings via the
 | `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults if not set. Defaults to `None`. ~~Optional[List[str]]~~ | `None` |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/sentencizer.pyx
+%%GITHUB_SPACY/spacy/pipeline/sentencizer.pyx
 ```
 
 ## Sentencizer.\_\_init\_\_ {#init tag="method"}
diff --git a/website/docs/api/tagger.md b/website/docs/api/tagger.md
index af0e3af3c..d83a77357 100644
--- a/website/docs/api/tagger.md
+++ b/website/docs/api/tagger.md
@@ -34,7 +34,7 @@ architectures and their arguments and hyperparameters.
 | `model`          | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/tagger.pyx
+%%GITHUB_SPACY/spacy/pipeline/tagger.pyx
 ```
 
 ## Tagger.\_\_init\_\_ {#init tag="method"}
@@ -249,9 +249,9 @@ Score a batch of examples.
 > scores = tagger.score(examples)
 > ```
 
-| Name        | Description                                                                                                                                               |
-| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                              |
+| Name        | Description                                                                                                                       |
+| ----------- | --------------------------------------------------------------------------------------------------------------------------------- |
+| `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                      |
 | **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Dict[str, float]~~ |
 
 ## Tagger.create_optimizer {#create_optimizer tag="method"}
@@ -288,7 +288,13 @@ context, the original parameters are restored.
 
 ## Tagger.add_label {#add_label tag="method"}
 
-Add a new label to the pipe.
+Add a new label to the pipe. Raises an error if the output dimension is already
+set, or if the model has already been fully [initialized](#begin_training). Note
+that you don't have to call this method if you provide a **representative data
+sample** to the [`begin_training`](#begin_training) method. In this case, all
+labels found in the sample will be automatically added to the model, and the
+output dimension will be
+[inferred](/usage/layers-architectures#thinc-shape-inference) automatically.
 
 > #### Example
 >
diff --git a/website/docs/api/textcategorizer.md b/website/docs/api/textcategorizer.md
index 927ac5411..cc20d6fd2 100644
--- a/website/docs/api/textcategorizer.md
+++ b/website/docs/api/textcategorizer.md
@@ -41,7 +41,7 @@ architectures and their arguments and hyperparameters.
 | `model`  | A model instance that predicts scores for each category. Defaults to [TextCatEnsemble](/api/architectures#TextCatEnsemble). ~~Model[List[Doc], List[Floats2d]]~~ |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/textcat.py
+%%GITHUB_SPACY/spacy/pipeline/textcat.py
 ```
 
 ## TextCategorizer.\_\_init\_\_ {#init tag="method"}
@@ -297,7 +297,13 @@ Modify the pipe's model, to use the given parameter values.
 
 ## TextCategorizer.add_label {#add_label tag="method"}
 
-Add a new label to the pipe.
+Add a new label to the pipe. Raises an error if the output dimension is already
+set, or if the model has already been fully [initialized](#begin_training). Note
+that you don't have to call this method if you provide a **representative data
+sample** to the [`begin_training`](#begin_training) method. In this case, all
+labels found in the sample will be automatically added to the model, and the
+output dimension will be
+[inferred](/usage/layers-architectures#thinc-shape-inference) automatically.
 
 > #### Example
 >
diff --git a/website/docs/api/tok2vec.md b/website/docs/api/tok2vec.md
index deb8369ab..6f13a17a5 100644
--- a/website/docs/api/tok2vec.md
+++ b/website/docs/api/tok2vec.md
@@ -45,7 +45,7 @@ architectures and their arguments and hyperparameters.
 | `model` | The model to use. Defaults to [HashEmbedCNN](/api/architectures#HashEmbedCNN). ~~Model[List[Doc], List[Floats2d]~~ |
 
 ```python
-https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/tok2vec.py
+%%GITHUB_SPACY/spacy/pipeline/tok2vec.py
 ```
 
 ## Tok2Vec.\_\_init\_\_ {#init tag="method"}
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 7f2eb2e66..38e2299fa 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -105,8 +105,7 @@ your installation, installed pipelines and local setup from within spaCy.
 ### spacy.explain {#spacy.explain tag="function"}
 
 Get a description for a given POS tag, dependency label or entity type. For a
-list of available terms, see
-[`glossary.py`](https://github.com/explosion/spaCy/tree/master/spacy/glossary.py).
+list of available terms, see [`glossary.py`](%%GITHUB_SPACY/spacy/glossary.py).
 
 > #### Example
 >
@@ -262,11 +261,11 @@ If a setting is not present in the options, the default value will be used.
 > displacy.serve(doc, style="ent", options=options)
 > ```
 
-| Name                                    | Description                                                                                                                                                                                                                                                                 |
-| --------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `ents`                                  | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~                                                                                                                                                                                        |
-| `colors`                                | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~                                                                                                                                                                                 |
-| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](https://github.com/explosion/spaCy/blob/master/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
+| Name                                    | Description                                                                                                                                                                                                                               |
+| --------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `ents`                                  | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~                                                                                                                                                      |
+| `colors`                                | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~                                                                                                                                               |
+| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
 
 By default, displaCy comes with colors for all entity types used by
 [spaCy's trained pipelines](/models). If you're using custom entity types, you
@@ -348,7 +347,7 @@ See the [`Transformer`](/api/transformer) API reference and
 | [`span_getters`](/api/transformer#span_getters)             | Registry for functions that take a batch of `Doc` objects and return a list of `Span` objects to process by the transformer, e.g. sentences.                                                                                                      |
 | [`annotation_setters`](/api/transformer#annotation_setters) | Registry for functions that create annotation setters. Annotation setters are functions that take a batch of `Doc` objects and a [`FullTransformerBatch`](/api/transformer#fulltransformerbatch) and can set additional annotations on the `Doc`. |
 
-## Loggers {#loggers source="spacy/gold/loggers.py" new="3"}
+## Loggers {#loggers source="spacy/training/loggers.py" new="3"}
 
 A logger records the training results. When a logger is created, two functions
 are returned: one for logging the information for each training step, and a
@@ -452,7 +451,7 @@ remain in the config file stored on your local system.
 | `project_name`         | The name of the project in the Weights & Biases interface. The project will be created automatically if it doesn't exist yet. ~~str~~ |
 | `remove_config_values` | A list of values to include from the config before it is uploaded to W&B (default: empty). ~~List[str]~~                              |
 
-## Batchers {#batchers source="spacy/gold/batchers.py" new="3"}
+## Batchers {#batchers source="spacy/training/batchers.py" new="3"}
 
 A data batcher implements a batching strategy that essentially turns a stream of
 items into a stream of batches, with each batch consisting of one item or a list
@@ -536,9 +535,9 @@ sequences in the batch.
 | `discard_oversize` | Whether to discard sequences that are by themselves longer than the largest padded batch size. ~~bool~~                                                                                                                                     |
 | `get_length`       | Optional function that receives a sequence item and returns its length. Defaults to the built-in `len()` if not set. ~~Optional[Callable[[Any], int]]~~                                                                                     |
 
-## Training data and alignment {#gold source="spacy/gold"}
+## Training data and alignment {#gold source="spacy/training"}
 
-### gold.biluo_tags_from_offsets {#biluo_tags_from_offsets tag="function"}
+### training.biluo_tags_from_offsets {#biluo_tags_from_offsets tag="function"}
 
 Encode labelled spans into per-token tags, using the
 [BILUO scheme](/usage/linguistic-features#accessing-ner) (Begin, In, Last, Unit,
@@ -554,7 +553,7 @@ single-token entity.
 > #### Example
 >
 > ```python
-> from spacy.gold import biluo_tags_from_offsets
+> from spacy.training import biluo_tags_from_offsets
 >
 > doc = nlp("I like London.")
 > entities = [(7, 13, "LOC")]
@@ -568,7 +567,7 @@ single-token entity.
 | `entities`  | A sequence of `(start, end, label)` triples. `start` and `end` should be character-offset integers denoting the slice into the original string. ~~List[Tuple[int, int, Union[str, int]]]~~ |
 | **RETURNS** | A list of strings, describing the [BILUO](/usage/linguistic-features#accessing-ner) tags. ~~List[str]~~                                                                                    |
 
-### gold.offsets_from_biluo_tags {#offsets_from_biluo_tags tag="function"}
+### training.offsets_from_biluo_tags {#offsets_from_biluo_tags tag="function"}
 
 Encode per-token tags following the
 [BILUO scheme](/usage/linguistic-features#accessing-ner) into entity offsets.
@@ -576,7 +575,7 @@ Encode per-token tags following the
 > #### Example
 >
 > ```python
-> from spacy.gold import offsets_from_biluo_tags
+> from spacy.training import offsets_from_biluo_tags
 >
 > doc = nlp("I like London.")
 > tags = ["O", "O", "U-LOC", "O"]
@@ -590,7 +589,7 @@ Encode per-token tags following the
 | `entities`  | A sequence of [BILUO](/usage/linguistic-features#accessing-ner) tags with each tag describing one token. Each tag string will be of the form of either `""`, `"O"` or `"{action}-{label}"`, where action is one of `"B"`, `"I"`, `"L"`, `"U"`. ~~List[str]~~ |
 | **RETURNS** | A sequence of `(start, end, label)` triples. `start` and `end` will be character-offset integers denoting the slice into the original string. ~~List[Tuple[int, int, str]]~~                                                                                 |
 
-### gold.spans_from_biluo_tags {#spans_from_biluo_tags tag="function" new="2.1"}
+### training.spans_from_biluo_tags {#spans_from_biluo_tags tag="function" new="2.1"}
 
 Encode per-token tags following the
 [BILUO scheme](/usage/linguistic-features#accessing-ner) into
@@ -600,7 +599,7 @@ token-based tags, e.g. to overwrite the `doc.ents`.
 > #### Example
 >
 > ```python
-> from spacy.gold import spans_from_biluo_tags
+> from spacy.training import spans_from_biluo_tags
 >
 > doc = nlp("I like London.")
 > tags = ["O", "O", "U-LOC", "O"]
@@ -616,12 +615,12 @@ token-based tags, e.g. to overwrite the `doc.ents`.
 ## Utility functions {#util source="spacy/util.py"}
 
 spaCy comes with a small collection of utility functions located in
-[`spacy/util.py`](https://github.com/explosion/spaCy/tree/master/spacy/util.py).
-Because utility functions are mostly intended for **internal use within spaCy**,
-their behavior may change with future releases. The functions documented on this
-page should be safe to use and we'll try to ensure backwards compatibility.
-However, we recommend having additional tests in place if your application
-depends on any of spaCy's utilities.
+[`spacy/util.py`](%%GITHUB_SPACY/spacy/util.py). Because utility functions are
+mostly intended for **internal use within spaCy**, their behavior may change
+with future releases. The functions documented on this page should be safe to
+use and we'll try to ensure backwards compatibility. However, we recommend
+having additional tests in place if your application depends on any of spaCy's
+utilities.
 
 ### util.get_lang_class {#util.get_lang_class tag="function"}
 
@@ -832,10 +831,10 @@ Compile a sequence of prefix rules into a regex object.
 > nlp.tokenizer.prefix_search = prefix_regex.search
 > ```
 
-| Name        | Description                                                                                                                                                                 |
-| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `entries`   | The prefix rules, e.g. [`lang.punctuation.TOKENIZER_PREFIXES`](https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
-| **RETURNS** | The regex object. to be used for [`Tokenizer.prefix_search`](/api/tokenizer#attributes). ~~Pattern~~                                                                        |
+| Name        | Description                                                                                                                                 |
+| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
+| `entries`   | The prefix rules, e.g. [`lang.punctuation.TOKENIZER_PREFIXES`](%%GITHUB_SPACY/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
+| **RETURNS** | The regex object. to be used for [`Tokenizer.prefix_search`](/api/tokenizer#attributes). ~~Pattern~~                                        |
 
 ### util.compile_suffix_regex {#util.compile_suffix_regex tag="function"}
 
@@ -849,10 +848,10 @@ Compile a sequence of suffix rules into a regex object.
 > nlp.tokenizer.suffix_search = suffix_regex.search
 > ```
 
-| Name        | Description                                                                                                                                                                 |
-| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `entries`   | The suffix rules, e.g. [`lang.punctuation.TOKENIZER_SUFFIXES`](https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
-| **RETURNS** | The regex object. to be used for [`Tokenizer.suffix_search`](/api/tokenizer#attributes). ~~Pattern~~                                                                        |
+| Name        | Description                                                                                                                                 |
+| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
+| `entries`   | The suffix rules, e.g. [`lang.punctuation.TOKENIZER_SUFFIXES`](%%GITHUB_SPACY/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
+| **RETURNS** | The regex object. to be used for [`Tokenizer.suffix_search`](/api/tokenizer#attributes). ~~Pattern~~                                        |
 
 ### util.compile_infix_regex {#util.compile_infix_regex tag="function"}
 
@@ -866,10 +865,10 @@ Compile a sequence of infix rules into a regex object.
 > nlp.tokenizer.infix_finditer = infix_regex.finditer
 > ```
 
-| Name        | Description                                                                                                                                                               |
-| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `entries`   | The infix rules, e.g. [`lang.punctuation.TOKENIZER_INFIXES`](https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
-| **RETURNS** | The regex object. to be used for [`Tokenizer.infix_finditer`](/api/tokenizer#attributes). ~~Pattern~~                                                                     |
+| Name        | Description                                                                                                                               |
+| ----------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
+| `entries`   | The infix rules, e.g. [`lang.punctuation.TOKENIZER_INFIXES`](%%GITHUB_SPACY/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
+| **RETURNS** | The regex object. to be used for [`Tokenizer.infix_finditer`](/api/tokenizer#attributes). ~~Pattern~~                                     |
 
 ### util.minibatch {#util.minibatch tag="function" new="2"}
 
diff --git a/website/docs/api/transformer.md b/website/docs/api/transformer.md
index b41a18890..d5bcef229 100644
--- a/website/docs/api/transformer.md
+++ b/website/docs/api/transformer.md
@@ -31,7 +31,7 @@ supports all models that are available via the
 Usually you will connect subsequent components to the shared transformer using
 the [TransformerListener](/api/architectures#TransformerListener) layer. This
 works similarly to spaCy's [Tok2Vec](/api/tok2vec) component and
-[Tok2VecListener](/api/architectures/Tok2VecListener) sublayer.
+[Tok2VecListener](/api/architectures/#Tok2VecListener) sublayer.
 
 The component assigns the output of the transformer to the `Doc`'s extension
 attributes. We also calculate an alignment between the word-piece tokens and the
@@ -61,11 +61,11 @@ on the transformer architectures and their arguments and hyperparameters.
 > nlp.add_pipe("transformer", config=DEFAULT_CONFIG)
 > ```
 
-| Setting             | Description                                                                                                                                                                                                                                                                                                           |
-| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `max_batch_items`   | Maximum size of a padded batch. Defaults to `4096`. ~~int~~                                                                                                                                                                                                                                                           |
-| `annotation_setter` | Function that takes a batch of `Doc` objects and transformer outputs to set additional annotations on the `Doc`. The `Doc._.transformer_data` attribute is set prior to calling the callback. Defaults to `null_annotation_setter` (no additional annotations). ~~Callable[[List[Doc], FullTransformerBatch], None]~~ |
-| `model`             | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Defaults to [TransformerModel](/api/architectures#TransformerModel). ~~Model[List[Doc], FullTransformerBatch]~~                                                                                                                        |
+| Setting                 | Description                                                                                                                                                                                                                                                                                                           |
+| ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `max_batch_items`       | Maximum size of a padded batch. Defaults to `4096`. ~~int~~                                                                                                                                                                                                                                                           |
+| `set_extra_annotations` | Function that takes a batch of `Doc` objects and transformer outputs to set additional annotations on the `Doc`. The `Doc._.transformer_data` attribute is set prior to calling the callback. Defaults to `null_annotation_setter` (no additional annotations). ~~Callable[[List[Doc], FullTransformerBatch], None]~~ |
+| `model`                 | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Defaults to [TransformerModel](/api/architectures#TransformerModel). ~~Model[List[Doc], FullTransformerBatch]~~                                                                                                                        |
 
 ```python
 https://github.com/explosion/spacy-transformers/blob/master/spacy_transformers/pipeline_component.py
@@ -102,14 +102,14 @@ attribute. You can also provide a callback to set additional annotations. In
 your application, you would normally use a shortcut for this and instantiate the
 component using its string name and [`nlp.add_pipe`](/api/language#create_pipe).
 
-| Name                | Description                                                                                                                                                                                                                                                                             |
-| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `vocab`             | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                                                        |
-| `model`             | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Usually you will want to use the [TransformerModel](/api/architectures#TransformerModel) layer for this. ~~Model[List[Doc], FullTransformerBatch]~~                                                      |
-| `annotation_setter` | Function that takes a batch of `Doc` objects and transformer outputs and stores the annotations on the `Doc`. The `Doc._.trf_data` attribute is set prior to calling the callback. By default, no additional annotations are set. ~~Callable[[List[Doc], FullTransformerBatch], None]~~ |
-| _keyword-only_      |                                                                                                                                                                                                                                                                                         |
-| `name`              | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                                                     |
-| `max_batch_items`   | Maximum size of a padded batch. Defaults to `128*32`. ~~int~~                                                                                                                                                                                                                           |
+| Name                    | Description                                                                                                                                                                                                                                                                             |
+| ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `vocab`                 | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                                                        |
+| `model`                 | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Usually you will want to use the [TransformerModel](/api/architectures#TransformerModel) layer for this. ~~Model[List[Doc], FullTransformerBatch]~~                                                      |
+| `set_extra_annotations` | Function that takes a batch of `Doc` objects and transformer outputs and stores the annotations on the `Doc`. The `Doc._.trf_data` attribute is set prior to calling the callback. By default, no additional annotations are set. ~~Callable[[List[Doc], FullTransformerBatch], None]~~ |
+| _keyword-only_          |                                                                                                                                                                                                                                                                                         |
+| `name`                  | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                                                     |
+| `max_batch_items`       | Maximum size of a padded batch. Defaults to `128*32`. ~~int~~                                                                                                                                                                                                                           |
 
 ## Transformer.\_\_call\_\_ {#call tag="method"}
 
@@ -205,7 +205,7 @@ modifying them.
 
 Assign the extracted features to the Doc objects. By default, the
 [`TransformerData`](/api/transformer#transformerdata) object is written to the
-[`Doc._.trf_data`](#custom-attributes) attribute. Your annotation_setter
+[`Doc._.trf_data`](#custom-attributes) attribute. Your `set_extra_annotations`
 callback is then called, if provided.
 
 > #### Example
diff --git a/website/docs/images/prodigy_overview.jpg b/website/docs/images/prodigy_overview.jpg
new file mode 100644
index 000000000..84326ccea
Binary files /dev/null and b/website/docs/images/prodigy_overview.jpg differ
diff --git a/website/docs/images/projects.png b/website/docs/images/projects.png
new file mode 100644
index 000000000..934e98e0a
Binary files /dev/null and b/website/docs/images/projects.png differ
diff --git a/website/docs/images/wandb1.jpg b/website/docs/images/wandb1.jpg
new file mode 100644
index 000000000..3baf4aba0
Binary files /dev/null and b/website/docs/images/wandb1.jpg differ
diff --git a/website/docs/images/wandb2.jpg b/website/docs/images/wandb2.jpg
new file mode 100644
index 000000000..cd67c9aa4
Binary files /dev/null and b/website/docs/images/wandb2.jpg differ
diff --git a/website/docs/usage/101/_language-data.md b/website/docs/usage/101/_language-data.md
index f1fa1f3a2..239cec9d1 100644
--- a/website/docs/usage/101/_language-data.md
+++ b/website/docs/usage/101/_language-data.md
@@ -2,9 +2,8 @@ Every language is different – and usually full of **exceptions and special
 cases**, especially amongst the most common words. Some of these exceptions are
 shared across languages, while others are **entirely specific** – usually so
 specific that they need to be hard-coded. The
-[`lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang) module
-contains all language-specific data, organized in simple Python files. This
-makes the data easy to update and extend.
+[`lang`](%%GITHUB_SPACY/spacy/lang) module contains all language-specific data,
+organized in simple Python files. This makes the data easy to update and extend.
 
 The **shared language data** in the directory root includes rules that can be
 generalized across languages – for example, rules for basic punctuation, emoji,
@@ -22,28 +21,12 @@ values are defined in the [`Language.Defaults`](/api/language#defaults).
 > nlp_de = German()  # Includes German data
 > ```
 
-| Name                                                                                            | Description                                                                                                                                              |
-| ----------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **Stop words**<br />[`stop_words.py`][stop_words.py]                                            | List of most common words of a language that are often useful to filter out, for example "and" or "I". Matching tokens will return `True` for `is_stop`. |
-| **Tokenizer exceptions**<br />[`tokenizer_exceptions.py`][tokenizer_exceptions.py]              | Special-case rules for the tokenizer, for example, contractions like "can't" and abbreviations with punctuation, like "U.K.".                            |
-| **Punctuation rules**<br />[`punctuation.py`][punctuation.py]                                   | Regular expressions for splitting tokens, e.g. on punctuation or special characters like emoji. Includes rules for prefixes, suffixes and infixes.       |
-| **Character classes**<br />[`char_classes.py`][char_classes.py]                                 | Character classes to be used in regular expressions, for example, Latin characters, quotes, hyphens or icons.                                            |
-| **Lexical attributes**<br />[`lex_attrs.py`][lex_attrs.py]                                      | Custom functions for setting lexical attributes on tokens, e.g. `like_num`, which includes language-specific words like "ten" or "hundred".              |
-| **Syntax iterators**<br />[`syntax_iterators.py`][syntax_iterators.py]                          | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks).  |
-| **Lemmatizer**<br />[`lemmatizer.py`][lemmatizer.py] [`spacy-lookups-data`][spacy-lookups-data] | Custom lemmatizer implementation and lemmatization tables.                                                                                               |
-
-[stop_words.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/en/stop_words.py
-[tokenizer_exceptions.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/de/tokenizer_exceptions.py
-[punctuation.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py
-[char_classes.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/char_classes.py
-[lex_attrs.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/en/lex_attrs.py
-[syntax_iterators.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/en/syntax_iterators.py
-[lemmatizer.py]:
-  https://github.com/explosion/spaCy/tree/master/spacy/lang/fr/lemmatizer.py
-[spacy-lookups-data]: https://github.com/explosion/spacy-lookups-data
+| Name                                                                                                                                                             | Description                                                                                                                                              |
+| ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Stop words**<br />[`stop_words.py`](%%GITHUB_SPACY/spacy/lang/en/stop_words.py)                                                                                | List of most common words of a language that are often useful to filter out, for example "and" or "I". Matching tokens will return `True` for `is_stop`. |
+| **Tokenizer exceptions**<br />[`tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/de/tokenizer_exceptions.py)                                                  | Special-case rules for the tokenizer, for example, contractions like "can't" and abbreviations with punctuation, like "U.K.".                            |
+| **Punctuation rules**<br />[`punctuation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py)                                                                          | Regular expressions for splitting tokens, e.g. on punctuation or special characters like emoji. Includes rules for prefixes, suffixes and infixes.       |
+| **Character classes**<br />[`char_classes.py`](%%GITHUB_SPACY/spacy/lang/char_classes.py)                                                                        | Character classes to be used in regular expressions, for example, Latin characters, quotes, hyphens or icons.                                            |
+| **Lexical attributes**<br />[`lex_attrs.py`](%%GITHUB_SPACY/spacy/lang/en/lex_attrs.py)                                                                          | Custom functions for setting lexical attributes on tokens, e.g. `like_num`, which includes language-specific words like "ten" or "hundred".              |
+| **Syntax iterators**<br />[`syntax_iterators.py`](%%GITHUB_SPACY/spacy/lang/en/syntax_iterators.py)                                                              | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks).  |
+| **Lemmatizer**<br />[`lemmatizer.py`](%%GITHUB_SPACY/master/spacy/lang/fr/lemmatizer.py) [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) | Custom lemmatizer implementation and lemmatization tables.                                                                                               |
diff --git a/website/docs/usage/_benchmarks-choi.md b/website/docs/usage/_benchmarks-choi.md
deleted file mode 100644
index 47d6f479f..000000000
--- a/website/docs/usage/_benchmarks-choi.md
+++ /dev/null
@@ -1,10 +0,0 @@
-import { Help } from 'components/typography'
-
-| System         | Year | Language        | Accuracy |                                                                                                                                                                    Speed (wps) |
-| -------------- | ---- | --------------- | -------: | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| **spaCy v2.x** | 2017 | Python / Cython | **92.6** | _n/a_ <Help>This table shows speed as benchmarked by Choi et al. We therefore can't provide comparable figures, as we'd be running the benchmark on different hardware.</Help> |
-| **spaCy v1.x** | 2015 | Python / Cython |     91.8 |                                                                                                                                                                         13,963 |
-| ClearNLP       | 2015 | Java            |     91.7 |                                                                                                                                                                         10,271 |
-| CoreNLP        | 2015 | Java            |     89.6 |                                                                                                                                                                          8,602 |
-| MATE           | 2015 | Java            |     92.5 |                                                                                                                                                                            550 |
-| Turbo          | 2015 | C++             |     92.4 |                                                                                                                                                                            349 |
diff --git a/website/docs/usage/_benchmarks-models.md b/website/docs/usage/_benchmarks-models.md
new file mode 100644
index 000000000..0c04dd8d5
--- /dev/null
+++ b/website/docs/usage/_benchmarks-models.md
@@ -0,0 +1,44 @@
+import { Help } from 'components/typography'; import Link from 'components/link'
+
+<!-- TODO: update, add project template -->
+
+<figure>
+
+| System                                                                    |            Parser |            Tagger |  NER | WPS<br />CPU <Help>words per second on CPU, higher is better</Help> | WPS<br/>GPU <Help>words per second on GPU, higher is better</Help> |
+| ------------------------------------------------------------------------- | ----------------: | ----------------: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: |
+| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3)                |                   |                   |      |                                                                     |                                                                 6k |
+| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3)                  |                   |                   |      |                                                                     |                                                                    |
+| `en_core_web_lg` (spaCy v2)                                               |              91.9 |              97.2 | 85.9 |                                                                 10k |                                                                    |
+| [Stanza](https://stanfordnlp.github.io/stanza/) (StanfordNLP)<sup>1</sup> | _n/a_<sup>2</sup> | _n/a_<sup>2</sup> | 88.8 |                                                                 234 |                                                                 2k |
+| <Link to="https://github.com/flairNLP/flair" hideIcon>Flair</Link>        |                 - |              97.9 | 89.3 |                                                                     |                                                                    |
+
+<figcaption class="caption">
+
+**Accuracy and speed on the
+[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus.**<br />**1. **
+[Qi et al. (2020)](https://arxiv.org/pdf/2003.07082.pdf). **2. ** _Coming soon_:
+Qi et al. don't report parsing and tagging results on OntoNotes. We're working
+on training Stanza on this corpus to allow direct comparison.
+
+</figcaption>
+
+</figure>
+
+<figure>
+
+| System                                                                         |  POS |  USA |  LAS |
+| ------------------------------------------------------------------------------ | ---: | ---: | ---: |
+| spaCy RoBERTa (2020)                                                           |      |      |      |
+| spaCy CNN (2020)                                                               |      |      |      |
+| [Mrini et al.](https://khalilmrini.github.io/Label_Attention_Layer.pdf) (2019) | 97.3 | 97.4 | 96.3 |
+| [Zhou and Zhao](https://www.aclweb.org/anthology/P19-1230/) (2019)             | 97.3 | 97.2 | 95.7 |
+
+<figcaption class="caption">
+
+**Accuracy on the Penn Treebank.** See
+[NLP-progress](http://nlpprogress.com/english/dependency_parsing.html) for more
+results.
+
+</figcaption>
+
+</figure>
diff --git a/website/docs/usage/embeddings-transformers.md b/website/docs/usage/embeddings-transformers.md
index abd92a8ac..8dd104ead 100644
--- a/website/docs/usage/embeddings-transformers.md
+++ b/website/docs/usage/embeddings-transformers.md
@@ -253,10 +253,10 @@ for doc in nlp.pipe(["some text", "some other text"]):
 
 You can also customize how the [`Transformer`](/api/transformer) component sets
 annotations onto the [`Doc`](/api/doc), by specifying a custom
-`annotation_setter`. This callback will be called with the raw input and output
-data for the whole batch, along with the batch of `Doc` objects, allowing you to
-implement whatever you need. The annotation setter is called with a batch of
-[`Doc`](/api/doc) objects and a
+`set_extra_annotations` function. This callback will be called with the raw
+input and output data for the whole batch, along with the batch of `Doc`
+objects, allowing you to implement whatever you need. The annotation setter is
+called with a batch of [`Doc`](/api/doc) objects and a
 [`FullTransformerBatch`](/api/transformer#fulltransformerbatch) containing the
 transformers data for the batch.
 
@@ -267,7 +267,7 @@ def custom_annotation_setter(docs, trf_data):
         doc._.custom_attr = data
 
 nlp = spacy.load("en_core_trf_lg")
-nlp.get_pipe("transformer").annotation_setter = custom_annotation_setter
+nlp.get_pipe("transformer").set_extra_annotations = custom_annotation_setter
 doc = nlp("This is a text")
 assert isinstance(doc._.custom_attr, TransformerData)
 print(doc._.custom_attr.tensors)
@@ -314,7 +314,7 @@ component:
 >         get_spans=get_doc_spans,
 >         tokenizer_config={"use_fast": True},
 >     ),
->     annotation_setter=null_annotation_setter,
+>     set_extra_annotations=null_annotation_setter,
 >     max_batch_items=4096,
 > )
 > ```
@@ -333,7 +333,7 @@ tokenizer_config = {"use_fast": true}
 [components.transformer.model.get_spans]
 @span_getters = "spacy-transformers.doc_spans.v1"
 
-[components.transformer.annotation_setter]
+[components.transformer.set_extra_annotations]
 @annotation_setters = "spacy-transformers.null_annotation_setter.v1"
 
 ```
@@ -579,12 +579,17 @@ def MyCustomVectors(
 
 ## Pretraining {#pretraining}
 
+<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
+</Infobox>
+
+<!--
 - explain general concept and idea (short!)
 - present it as a separate lightweight mechanism for pretraining the tok2vec
   layer
 - advantages (could also be pros/cons table)
 - explain how it generates a separate file (!) and how it depends on the same
   vectors
+-->
 
 > #### Raw text format
 >
diff --git a/website/docs/usage/facts-figures.md b/website/docs/usage/facts-figures.md
index e2549ecfc..36f86dd51 100644
--- a/website/docs/usage/facts-figures.md
+++ b/website/docs/usage/facts-figures.md
@@ -5,254 +5,73 @@ next: /usage/spacy-101
 menu:
   - ['Feature Comparison', 'comparison']
   - ['Benchmarks', 'benchmarks']
+  # TODO: - ['Citing spaCy', 'citation']
 ---
 
-## Feature comparison {#comparison}
+## Comparison {#comparison hidden="true"}
 
-Here's a quick comparison of the functionalities offered by spaCy,
-[NLTK](http://www.nltk.org/py-modindex.html) and
-[CoreNLP](http://stanfordnlp.github.io/CoreNLP/).
+### When should I use spaCy? {#comparison-usage}
 
-|                         | spaCy  |  NLTK  |    CoreNLP    |
-| ----------------------- | :----: | :----: | :-----------: |
-| Programming language    | Python | Python | Java / Python |
-| Neural network models   |   ✅   |   ❌   |      ✅       |
-| Integrated word vectors |   ✅   |   ❌   |      ❌       |
-| Multi-language support  |   ✅   |   ✅   |      ✅       |
-| Tokenization            |   ✅   |   ✅   |      ✅       |
-| Part-of-speech tagging  |   ✅   |   ✅   |      ✅       |
-| Sentence segmentation   |   ✅   |   ✅   |      ✅       |
-| Dependency parsing      |   ✅   |   ❌   |      ✅       |
-| Entity recognition      |   ✅   |   ✅   |      ✅       |
-| Entity linking          |   ✅   |   ❌   |      ❌       |
-| Coreference resolution  |   ❌   |   ❌   |      ✅       |
-
-### When should I use what? {#comparison-usage}
-
-Natural Language Understanding is an active area of research and development, so
-there are many different tools or technologies catering to different use-cases.
-The table below summarizes a few libraries (spaCy,
-[NLTK](http://www.nltk.org/py-modindex.html), [AllenNLP](https://allennlp.org/),
-[StanfordNLP](https://stanfordnlp.github.io/stanfordnlp/) and
-[TensorFlow](https://www.tensorflow.org/)) to help you get a feel for things fit
-together.
-
-|                                                                   | spaCy | NLTK | Allen-<br />NLP | Stanford-<br />NLP | Tensor-<br />Flow |
-| ----------------------------------------------------------------- | :---: | :--: | :-------------: | :----------------: | :---------------: |
-| I'm a beginner and just getting started with NLP.                 |  ✅   |  ✅  |       ❌        |         ✅         |        ❌         |
-| I want to build an end-to-end production application.             |  ✅   |  ❌  |       ❌        |         ❌         |        ✅         |
-| I want to try out different neural network architectures for NLP. |  ❌   |  ❌  |       ✅        |         ❌         |        ✅         |
-| I want to try the latest models with state-of-the-art accuracy.   |  ❌   |  ❌  |       ✅        |         ✅         |        ✅         |
-| I want to train models from my own data.                          |  ✅   |  ✅  |       ✅        |         ✅         |        ✅         |
-| I want my application to be efficient on CPU.                     |  ✅   |  ✅  |       ❌        |         ❌         |        ❌         |
+- ✅ **I'm a beginner and just getting started with NLP.** – spaCy makes it easy
+  to get started and comes with extensive documentation, including a
+  beginner-friendly [101 guide](/usage/spacy-101), a free interactive
+  [online course](https://course.spacy.io) and a range of
+  [video tutorials](https://www.youtube.com/c/ExplosionAI).
+- ✅ **I want to build an end-to-end production application.** – spaCy is
+  specifically designed for production use and lets you build and train powerful
+  NLP pipelines and package them for easy deployment.
+- ✅ **I want my application to be efficient on GPU _and_ CPU.** – While spaCy
+  lets you train modern NLP models that are best run on GPU, it also offers
+  CPU-optimized pipelines, which are less accurate but much cheaper to run.
+- ✅ **I want to try out different neural network architectures for NLP.** –
+  spaCy lets you customize and swap out the model architectures powering its
+  components, and implement your own using a framework like PyTorch or
+  TensorFlow. The declarative configuration system makes it easy to mix and
+  match functions and keep track of your hyperparameters to make sure your
+  experiments are reproducible.
+- ❌ **I want to build a language generation application.** – spaCy's focus is
+  natural language _processing_ and extracting information from large volumes of
+  text. While you can use it to help you re-write existing text, it doesn't
+  include any specific functionality for language generation tasks.
+- ❌ **I want to research machine learning algorithms.** spaCy is built on the
+  latest research, but it's not a research library. If your goal is to write
+  papers and run benchmarks, spaCy is probably not a good choice. However, you
+  can use it to make the results of your research easily available for others to
+  use, e.g. via a custom spaCy component.
 
 ## Benchmarks {#benchmarks}
 
-Two peer-reviewed papers in 2015 confirmed that spaCy offers the **fastest
-syntactic parser in the world** and that **its accuracy is within 1% of the
-best** available. The few systems that are more accurate are 20× slower or more.
+spaCy v3.0 introduces transformer-based pipelines that bring spaCy's accuracy
+right up to **current state-of-the-art**. You can also use a CPU-optimized
+pipeline, which is less accurate but much cheaper to run.
 
-> #### About the evaluation
+<!-- TODO: -->
+
+> #### Evaluation details
 >
-> The first of the evaluations was published by **Yahoo! Labs** and **Emory
-> University**, as part of a survey of current parsing technologies
-> ([Choi et al., 2015](https://aclweb.org/anthology/P/P15/P15-1038.pdf)). Their
-> results and subsequent discussions helped us develop a novel
-> psychologically-motivated technique to improve spaCy's accuracy, which we
-> published in joint work with Macquarie University
-> ([Honnibal and Johnson, 2015](https://www.aclweb.org/anthology/D/D15/D15-1162.pdf)).
+> - **OntoNotes 5.0:** spaCy's English models are trained on this corpus, as
+>   it's several times larger than other English treebanks. However, most
+>   systems do not report accuracies on it.
+> - **Penn Treebank:** The "classic" parsing evaluation for research. However,
+>   it's quite far removed from actual usage: it uses sentences with
+>   gold-standard segmentation and tokenization, from a pretty specific type of
+>   text (articles from a single newspaper, 1984-1989).
 
-import BenchmarksChoi from 'usage/\_benchmarks-choi.md'
+import Benchmarks from 'usage/\_benchmarks-models.md'
 
-<BenchmarksChoi />
+<Benchmarks />
 
-### Algorithm comparison {#algorithm}
+<!-- TODO:
 
-In this section, we compare spaCy's algorithms to recently published systems,
-using some of the most popular benchmarks. These benchmarks are designed to help
-isolate the contributions of specific algorithmic decisions, so they promote
-slightly "idealized" conditions. Specifically, the text comes pre-processed with
-"gold standard" token and sentence boundaries. The data sets also tend to be
-fairly small, to help researchers iterate quickly. These conditions mean the
-models trained on these data sets are not always useful for practical purposes.
+<Project id="benchmarks/penn_treebank">
 
-#### Parse accuracy (Penn Treebank / Wall Street Journal) {#parse-accuracy-penn}
+The easiest way to reproduce spaCy's benchmarks on the Penn Treebank is to clone
+our project template.
 
-This is the "classic" evaluation, so it's the number parsing researchers are
-most easily able to put in context. However, it's quite far removed from actual
-usage: it uses sentences with gold-standard segmentation and tokenization, from
-a pretty specific type of text (articles from a single newspaper, 1984-1989).
+</Project>
 
-> #### Methodology
->
-> [Andor et al. (2016)](http://arxiv.org/abs/1603.06042) chose slightly
-> different experimental conditions from
-> [Choi et al. (2015)](https://aclweb.org/anthology/P/P15/P15-1038.pdf), so the
-> two accuracy tables here do not present directly comparable figures.
+-->
 
-| System                                                       | Year | Type   |  Accuracy |
-| ------------------------------------------------------------ | ---- | ------ | --------: |
-| spaCy v2.0.0                                                 | 2017 | neural |     94.48 |
-| spaCy v1.1.0                                                 | 2016 | linear |     92.80 |
-| [Dozat and Manning][dozat and manning]                       | 2017 | neural | **95.75** |
-| [Andor et al.][andor et al.]                                 | 2016 | neural |     94.44 |
-| [SyntaxNet Parsey McParseface][syntaxnet parsey mcparseface] | 2016 | neural |     94.15 |
-| [Weiss et al.][weiss et al.]                                 | 2015 | neural |     93.91 |
-| [Zhang and McDonald][zhang and mcdonald]                     | 2014 | linear |     93.32 |
-| [Martins et al.][martins et al.]                             | 2013 | linear |     93.10 |
+<!-- ## Citing spaCy {#citation}
 
-[dozat and manning]: https://arxiv.org/pdf/1611.01734.pdf
-[andor et al.]: http://arxiv.org/abs/1603.06042
-[syntaxnet parsey mcparseface]:
-  https://github.com/tensorflow/models/tree/master/research/syntaxnet
-[weiss et al.]:
-  http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43800.pdf
-[zhang and mcdonald]: http://research.google.com/pubs/archive/38148.pdf
-[martins et al.]: http://www.cs.cmu.edu/~ark/TurboParser/
-
-#### NER accuracy (OntoNotes 5, no pre-process) {#ner-accuracy-ontonotes5}
-
-This is the evaluation we use to tune spaCy's parameters to decide which
-algorithms are better than the others. It's reasonably close to actual usage,
-because it requires the parses to be produced from raw text, without any
-pre-processing.
-
-| System                                             | Year | Type   |  Accuracy |
-| -------------------------------------------------- | ---- | ------ | --------: |
-| spaCy [`en_core_web_lg`][en_core_web_lg] v2.0.0a3  | 2017 | neural |     85.85 |
-| [Strubell et al.][strubell et al.]                 | 2017 | neural | **86.81** |
-| [Chiu and Nichols][chiu and nichols]               | 2016 | neural |     86.19 |
-| [Durrett and Klein][durrett and klein]             | 2014 | neural |     84.04 |
-| [Ratinov and Roth][ratinov and roth]               | 2009 | linear |     83.45 |
-
-[en_core_web_lg]: /models/en#en_core_web_lg
-[strubell et al.]: https://arxiv.org/pdf/1702.02098.pdf
-[chiu and nichols]:
-  https://www.semanticscholar.org/paper/Named-Entity-Recognition-with-Bidirectional-LSTM-C-Chiu-Nichols/10a4db59e81d26b2e0e896d3186ef81b4458b93f
-[durrett and klein]:
-  https://www.semanticscholar.org/paper/A-Joint-Model-for-Entity-Analysis-Coreference-Typi-Durrett-Klein/28eb033eee5f51c5e5389cbb6b777779203a6778
-[ratinov and roth]: http://www.aclweb.org/anthology/W09-1119
-
-### Model comparison {#spacy-models}
-
-In this section, we provide benchmark accuracies for the pretrained model
-pipelines we distribute with spaCy. Evaluations are conducted end-to-end from
-raw text, with no "gold standard" pre-processing, over text from a mix of genres
-where possible.
-
-> #### Methodology
->
-> The evaluation was conducted on raw text with no gold standard information.
-> The parser, tagger and entity recognizer were trained on the
-> [OntoNotes 5](https://www.gabormelli.com/RKB/OntoNotes_Corpus) corpus, the
-> word vectors on [Common Crawl](http://commoncrawl.org).
-
-#### English {#benchmarks-models-english}
-
-| Model                                               | spaCy | Type   |      UAS |    NER F |      POS |       WPS |     Size |
-| --------------------------------------------------- | ----- | ------ | -------: | -------: | -------: | --------: | -------: |
-| [`en_core_web_sm`](/models/en#en_core_web_sm) 2.0.0 | 2.x   | neural |     91.7 |     85.3 |     97.0 |     10.1k | **35MB** |
-| [`en_core_web_md`](/models/en#en_core_web_md) 2.0.0 | 2.x   | neural |     91.7 | **85.9** |     97.1 |     10.0k |    115MB |
-| [`en_core_web_lg`](/models/en#en_core_web_lg) 2.0.0 | 2.x   | neural | **91.9** | **85.9** | **97.2** |     10.0k |    812MB |
-| `en_core_web_sm` 1.2.0                              | 1.x   | linear |     86.6 |     78.5 |     96.6 | **25.7k** |     50MB |
-| `en_core_web_md` 1.2.1                              | 1.x   | linear |     90.6 |     81.4 |     96.7 |     18.8k |      1GB |
-
-#### Spanish {#benchmarks-models-spanish}
-
-> #### Evaluation note
->
-> The NER accuracy refers to the "silver standard" annotations in the WikiNER
-> corpus. Accuracy on these annotations tends to be higher than correct human
-> annotations.
-
-| Model                                                 | spaCy | Type   |      UAS |    NER F |      POS |   WPS |     Size |
-| ----------------------------------------------------- | ----- | ------ | -------: | -------: | -------: | ----: | -------: |
-| [`es_core_news_sm`](/models/es#es_core_news_sm) 2.0.0 | 2.x   | neural |     89.8 |     88.7 | **96.9** | _n/a_ | **35MB** |
-| [`es_core_news_md`](/models/es#es_core_news_md) 2.0.0 | 2.x   | neural | **90.2** |     89.0 |     97.8 | _n/a_ |     93MB |
-| `es_core_web_md` 1.1.0                                | 1.x   | linear |     87.5 | **94.2** |     96.7 | _n/a_ |    377MB |
-
-### Detailed speed comparison {#speed-comparison}
-
-Here we compare the per-document processing time of various spaCy
-functionalities against other NLP libraries. We show both absolute timings (in
-ms) and relative performance (normalized to spaCy). Lower is better.
-
-<Infobox title="Important note" variant="warning">
-
-This evaluation was conducted in 2015. We're working on benchmarks on current
-CPU and GPU hardware. In the meantime, we're grateful to the Stanford folks for
-drawing our attention to what seems to be
-[a long-standing error](https://nlp.stanford.edu/software/tokenizer.html#Speed)
-in our CoreNLP benchmarks, especially for their tokenizer. Until we run
-corrected experiments, we have updated the table using their figures.
-
-</Infobox>
-
-> #### Methodology
->
-> - **Set up:** 100,000 plain-text documents were streamed from an SQLite3
->   database, and processed with an NLP library, to one of three levels of
->   detail — tokenization, tagging, or parsing. The tasks are additive: to parse
->   the text you have to tokenize and tag it. The pre-processing was not
->   subtracted from the times — we report the time required for the pipeline to
->   complete. We report mean times per document, in milliseconds.
-> - **Hardware**: Intel i7-3770 (2012)
-> - **Implementation**:
->   [`spacy-benchmarks`](https://github.com/explosion/spacy-benchmarks)
-
-<Table>
-<thead>
-    <Tr>
-        <Th></Th>
-        <Th colSpan="3">Absolute (ms per doc)</Th>
-        <Th colSpan="3">Relative (to spaCy)</Th>
-    </Tr>
-    <Tr>
-        <Th>System</Th>
-        <Th>Tokenize</Th>
-        <Th>Tag</Th>
-        <Th>Parse</Th>
-        <Th>Tokenize</Th>
-        <Th>Tag</Th>
-        <Th>Parse</Th>
-    </Tr>
-</thead>
-<tbody style="text-align: right">
-    <Tr>
-        <Td style="text-align: left"><strong>spaCy</strong></Td>
-        <Td>0.2ms</Td>
-        <Td>1ms</Td>
-        <Td>19ms</Td>
-        <Td>1x</Td>
-        <Td>1x</Td>
-        <Td>1x</Td>
-    </Tr>
-    <Tr>
-        <Td style="text-align: left">CoreNLP</Td>
-        <Td>0.18ms</Td>
-        <Td>10ms</Td>
-        <Td>49ms</Td>
-        <Td>0.9x</Td>
-        <Td>10x</Td>
-        <Td>2.6x</Td>
-    </Tr>
-    <Tr>
-        <Td style="text-align: left">ZPar</Td>
-        <Td>1ms</Td>
-        <Td>8ms</Td>
-        <Td>850ms</Td>
-        <Td>5x</Td>
-        <Td>8x</Td>
-        <Td>44.7x</Td>
-    </Tr>
-    <Tr>
-        <Td style="text-align: left">NLTK</Td>
-        <Td>4ms</Td>
-        <Td>443ms</Td>
-        <Td><em>n/a</em></Td>
-        <Td>20x</Td>
-        <Td>443x</Td>
-        <Td><em>n/a</em></Td>
-    </Tr>
-</tbody>
-</Table>
+<!-- TODO: update -->
diff --git a/website/docs/usage/index.md b/website/docs/usage/index.md
index ee5fd0a3b..170e16591 100644
--- a/website/docs/usage/index.md
+++ b/website/docs/usage/index.md
@@ -166,10 +166,9 @@ $ python setup.py build_ext --inplace           # compile spaCy
 ```
 
 Compared to regular install via pip, the
-[`requirements.txt`](https://github.com/explosion/spaCy/tree/master/requirements.txt)
-additionally installs developer dependencies such as Cython. See the
-[quickstart widget](#quickstart) to get the right commands for your platform and
-Python version.
+[`requirements.txt`](%%GITHUB_SPACY/requirements.txt) additionally installs
+developer dependencies such as Cython. See the [quickstart widget](#quickstart)
+to get the right commands for your platform and Python version.
 
 #### Ubuntu {#source-ubuntu}
 
@@ -195,16 +194,14 @@ that matches the version that was used to compile your Python interpreter.
 
 ### Run tests {#run-tests}
 
-spaCy comes with an
-[extensive test suite](https://github.com/explosion/spaCy/tree/master/spacy/tests).
-In order to run the tests, you'll usually want to clone the
-[repository](https://github.com/explosion/spaCy/tree/master/) and
-[build spaCy from source](#source). This will also install the required
+spaCy comes with an [extensive test suite](%%GITHUB_SPACY/spacy/tests). In order
+to run the tests, you'll usually want to clone the [repository](%%GITHUB_SPACY)
+and [build spaCy from source](#source). This will also install the required
 development dependencies and test utilities defined in the `requirements.txt`.
 
 Alternatively, you can find out where spaCy is installed and run `pytest` on
 that directory. Don't forget to also install the test utilities via spaCy's
-[`requirements.txt`](https://github.com/explosion/spaCy/tree/master/requirements.txt):
+[`requirements.txt`](%%GITHUB_SPACY/requirements.txt):
 
 ```bash
 $ python -c "import os; import spacy; print(os.path.dirname(spacy.__file__))"
diff --git a/website/docs/usage/layers-architectures.md b/website/docs/usage/layers-architectures.md
index e24b776c8..aefc64ece 100644
--- a/website/docs/usage/layers-architectures.md
+++ b/website/docs/usage/layers-architectures.md
@@ -5,7 +5,7 @@ menu:
   - ['Type Signatures', 'type-sigs']
   - ['Swapping Architectures', 'swap-architectures']
   - ['PyTorch & TensorFlow', 'frameworks']
-  - ['Thinc Models', 'thinc']
+  - ['Custom Thinc Models', 'thinc']
   - ['Trainable Components', 'components']
 next: /usage/projects
 ---
@@ -28,9 +28,9 @@ A **model architecture** is a function that wires up a
 neural network that is run internally as part of a component in a spaCy
 pipeline. To define the actual architecture, you can implement your logic in
 Thinc directly, or you can use Thinc as a thin wrapper around frameworks such as
-PyTorch, TensorFlow and MXNet. Each Model can also be used as a sublayer of a
+PyTorch, TensorFlow and MXNet. Each `Model` can also be used as a sublayer of a
 larger network, allowing you to freely combine implementations from different
-frameworks into one `Thinc` Model.
+frameworks into a single model.
 
 spaCy's built-in components require a `Model` instance to be passed to them via
 the config system. To change the model architecture of an existing component,
@@ -118,7 +118,7 @@ code.
 
 If no model is specified for the [`TextCategorizer`](/api/textcategorizer), the
 [TextCatEnsemble](/api/architectures#TextCatEnsemble) architecture is used by
-default. This architecture combines a simpel bag-of-words model with a neural
+default. This architecture combines a simple bag-of-words model with a neural
 network, usually resulting in the most accurate results, but at the cost of
 speed. The config file for this model would look something like this:
 
@@ -225,31 +225,266 @@ you'll be able to try it out in any of the spaCy components. ​
 
 Thinc allows you to [wrap models](https://thinc.ai/docs/usage-frameworks)
 written in other machine learning frameworks like PyTorch, TensorFlow and MXNet
-using a unified [`Model`](https://thinc.ai/docs/api-model) API. As well as
-**wrapping whole models**, Thinc lets you call into an external framework for
-just **part of your model**: you can have a model where you use PyTorch just for
-the transformer layers, using "native" Thinc layers to do fiddly input and
-output transformations and add on task-specific "heads", as efficiency is less
-of a consideration for those parts of the network.
+using a unified [`Model`](https://thinc.ai/docs/api-model) API. This makes it
+easy to use a model implemented in a different framework to power a component in
+your spaCy pipeline. For example, to wrap a PyTorch model as a Thinc `Model`,
+you can use Thinc's
+[`PyTorchWrapper`](https://thinc.ai/docs/api-layers#pytorchwrapper):
 
-<!-- TODO: custom tagger implemented in PyTorch, wrapped as Thinc model, link off to project (with notebook?) -->
+```python
+from thinc.api import PyTorchWrapper
 
-## Implementing models in Thinc {#thinc}
+wrapped_pt_model = PyTorchWrapper(torch_model)
+```
 
-<!-- TODO: use same example as above, custom tagger, but implemented in Thinc, link off to Thinc docs where appropriate -->
+Let's use PyTorch to define a very simple neural network consisting of two
+hidden `Linear` layers with `ReLU` activation and dropout, and a
+softmax-activated output layer:
 
-## Models for trainable components {#components}
+```python
+### PyTorch model
+from torch import nn
+
+torch_model = nn.Sequential(
+    nn.Linear(width, hidden_width),
+    nn.ReLU(),
+    nn.Dropout2d(dropout),
+    nn.Linear(hidden_width, nO),
+    nn.ReLU(),
+    nn.Dropout2d(dropout),
+    nn.Softmax(dim=1)
+)
+```
+
+The resulting wrapped `Model` can be used as a **custom architecture** as such,
+or can be a **subcomponent of a larger model**. For instance, we can use Thinc's
+[`chain`](https://thinc.ai/docs/api-layers#chain) combinator, which works like
+`Sequential` in PyTorch, to combine the wrapped model with other components in a
+larger network. This effectively means that you can easily wrap different
+components from different frameworks, and "glue" them together with Thinc:
+
+```python
+from thinc.api import chain, with_array, PyTorchWrapper
+from spacy.ml import CharacterEmbed
+
+wrapped_pt_model = PyTorchWrapper(torch_model)
+char_embed = CharacterEmbed(width, embed_size, nM, nC)
+model = chain(char_embed, with_array(wrapped_pt_model))
+```
+
+In the above example, we have combined our custom PyTorch model with a character
+embedding layer defined by spaCy.
+[CharacterEmbed](/api/architectures#CharacterEmbed) returns a `Model` that takes
+a ~~List[Doc]~~ as input, and outputs a ~~List[Floats2d]~~. To make sure that
+the wrapped PyTorch model receives valid inputs, we use Thinc's
+[`with_array`](https://thinc.ai/docs/api-layers#with_array) helper.
+
+You could also implement a model that only uses PyTorch for the transformer
+layers, and "native" Thinc layers to do fiddly input and output transformations
+and add on task-specific "heads", as efficiency is less of a consideration for
+those parts of the network.
+
+### Using wrapped models {#frameworks-usage}
+
+To use our custom model including the PyTorch subnetwork, all we need to do is
+register the architecture using the
+[`architectures` registry](/api/top-level#registry). This will assign the
+architecture a name so spaCy knows how to find it, and allows passing in
+arguments like hyperparameters via the [config](/usage/training#config). The
+full example then becomes:
+
+```python
+### Registering the architecture {highlight="9"}
+from typing import List
+from thinc.types import Floats2d
+from thinc.api import Model, PyTorchWrapper, chain, with_array
+import spacy
+from spacy.tokens.doc import Doc
+from spacy.ml import CharacterEmbed
+from torch import nn
+
+@spacy.registry.architectures("CustomTorchModel.v1")
+def create_torch_model(
+    nO: int,
+    width: int,
+    hidden_width: int,
+    embed_size: int,
+    nM: int,
+    nC: int,
+    dropout: float,
+) -> Model[List[Doc], List[Floats2d]]:
+    char_embed = CharacterEmbed(width, embed_size, nM, nC)
+    torch_model = nn.Sequential(
+        nn.Linear(width, hidden_width),
+        nn.ReLU(),
+        nn.Dropout2d(dropout),
+        nn.Linear(hidden_width, nO),
+        nn.ReLU(),
+        nn.Dropout2d(dropout),
+        nn.Softmax(dim=1)
+    )
+    wrapped_pt_model = PyTorchWrapper(torch_model)
+    model = chain(char_embed, with_array(wrapped_pt_model))
+    return model
+```
+
+The model definition can now be used in any existing trainable spaCy component,
+by specifying it in the config file. In this configuration, all required
+parameters for the various subcomponents of the custom architecture are passed
+in as settings via the config.
+
+```ini
+### config.cfg (excerpt) {highlight="5-5"}
+[components.tagger]
+factory = "tagger"
+
+[components.tagger.model]
+@architectures = "CustomTorchModel.v1"
+nO = 50
+width = 96
+hidden_width = 48
+embed_size = 2000
+nM = 64
+nC = 8
+dropout = 0.2
+```
+
+<Infobox variant="warning">
+
+Remember that it is best not to rely on any (hidden) default values, to ensure
+that training configs are complete and experiments fully reproducible.
+
+</Infobox>
+
+## Custom models with Thinc {#thinc}
+
+Of course it's also possible to define the `Model` from the previous section
+entirely in Thinc. The Thinc documentation provides details on the
+[various layers](https://thinc.ai/docs/api-layers) and helper functions
+available. Combinators can also be used to
+[overload operators](https://thinc.ai/docs/usage-models#operators) and a common
+usage pattern is to bind `chain` to `>>`. The "native" Thinc version of our
+simple neural network would then become:
+
+```python
+from thinc.api import chain, with_array, Model, Relu, Dropout, Softmax
+from spacy.ml import CharacterEmbed
+
+char_embed = CharacterEmbed(width, embed_size, nM, nC)
+with Model.define_operators({">>": chain}):
+    layers = (
+        Relu(hidden_width, width)
+        >> Dropout(dropout)
+        >> Relu(hidden_width, hidden_width)
+        >> Dropout(dropout)
+        >> Softmax(nO, hidden_width)
+    )
+    model = char_embed >> with_array(layers)
+```
+
+<Infobox variant="warning" title="Important note on inputs and outputs">
+
+Note that Thinc layers define the output dimension (`nO`) as the first argument,
+followed (optionally) by the input dimension (`nI`). This is in contrast to how
+the PyTorch layers are defined, where `in_features` precedes `out_features`.
+
+</Infobox>
+
+### Shape inference in Thinc {#thinc-shape-inference}
+
+It is **not** strictly necessary to define all the input and output dimensions
+for each layer, as Thinc can perform
+[shape inference](https://thinc.ai/docs/usage-models#validation) between
+sequential layers by matching up the output dimensionality of one layer to the
+input dimensionality of the next. This means that we can simplify the `layers`
+definition:
+
+> #### Diff
+>
+> ```diff
+> layers = (
+>     Relu(hidden_width, width)
+>     >> Dropout(dropout)
+> -   >> Relu(hidden_width, hidden_width)
+> +    >> Relu(hidden_width)
+>     >> Dropout(dropout)
+> -   >> Softmax(nO, hidden_width)
+> +   >> Softmax(nO)
+> )
+> ```
+
+```python
+with Model.define_operators({">>": chain}):
+    layers = (
+        Relu(hidden_width, width)
+        >> Dropout(dropout)
+        >> Relu(hidden_width)
+        >> Dropout(dropout)
+        >> Softmax(nO)
+    )
+```
+
+Thinc can even go one step further and **deduce the correct input dimension** of
+the first layer, and output dimension of the last. To enable this functionality,
+you have to call
+[`Model.initialize`](https://thinc.ai/docs/api-model#initialize) with an **input
+sample** `X` and an **output sample** `Y` with the correct dimensions:
+
+```python
+### Shape inference with initialization {highlight="3,7,10"}
+with Model.define_operators({">>": chain}):
+    layers = (
+        Relu(hidden_width)
+        >> Dropout(dropout)
+        >> Relu(hidden_width)
+        >> Dropout(dropout)
+        >> Softmax()
+    )
+    model = char_embed >> with_array(layers)
+    model.initialize(X=input_sample, Y=output_sample)
+```
+
+The built-in [pipeline components](/usage/processing-pipelines) in spaCy ensure
+that their internal models are **always initialized** with appropriate sample
+data. In this case, `X` is typically a ~~List[Doc]~~, while `Y` is typically a
+~~List[Array1d]~~ or ~~List[Array2d]~~, depending on the specific task. This
+functionality is triggered when
+[`nlp.begin_training`](/api/language#begin_training) is called.
+
+### Dropout and normalization in Thinc {#thinc-dropout-norm}
+
+Many of the available Thinc [layers](https://thinc.ai/docs/api-layers) allow you
+to define a `dropout` argument that will result in "chaining" an additional
+[`Dropout`](https://thinc.ai/docs/api-layers#dropout) layer. Optionally, you can
+often specify whether or not you want to add layer normalization, which would
+result in an additional
+[`LayerNorm`](https://thinc.ai/docs/api-layers#layernorm) layer. That means that
+the following `layers` definition is equivalent to the previous:
+
+```python
+with Model.define_operators({">>": chain}):
+    layers = (
+        Relu(hidden_width, dropout=dropout, normalize=False)
+        >> Relu(hidden_width, dropout=dropout, normalize=False)
+        >> Softmax()
+    )
+    model = char_embed >> with_array(layers)
+    model.initialize(X=input_sample, Y=output_sample)
+```
+
+## Create new trainable components {#components}
+
+<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
+</Infobox>
 
 <!-- TODO:
-
 - Interaction with `predict`, `get_loss` and `set_annotations`
-- Initialization life-cycle with `begin_training`.
-
+- Initialization life-cycle with `begin_training`, correlation with add_label
 Example: relation extraction component (implemented as project template)
-
+Avoid duplication with usage/processing-pipelines#trainable-components ?
 -->
 
-![Diagram of a pipeline component with its model](../images/layers-architectures.svg)
+<!-- ![Diagram of a pipeline component with its model](../images/layers-architectures.svg)
 
 ```python
 def update(self, examples):
@@ -263,3 +498,4 @@ def __call__(self, doc):
     predictions = self.model([doc])
     self.set_annotations(predictions)
 ```
+-->
diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md
index b36e9b71f..a229c18e9 100644
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@@ -854,24 +854,22 @@ The algorithm can be summarized as follows:
 </Accordion>
 
 **Global** and **language-specific** tokenizer data is supplied via the language
-data in
-[`spacy/lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang). The
-tokenizer exceptions define special cases like "don't" in English, which needs
-to be split into two tokens: `{ORTH: "do"}` and `{ORTH: "n't", NORM: "not"}`.
-The prefixes, suffixes and infixes mostly define punctuation rules – for
-example, when to split off periods (at the end of a sentence), and when to leave
-tokens containing periods intact (abbreviations like "U.S.").
+data in [`spacy/lang`](%%GITHUB_SPACY/spacy/lang). The tokenizer exceptions
+define special cases like "don't" in English, which needs to be split into two
+tokens: `{ORTH: "do"}` and `{ORTH: "n't", NORM: "not"}`. The prefixes, suffixes
+and infixes mostly define punctuation rules – for example, when to split off
+periods (at the end of a sentence), and when to leave tokens containing periods
+intact (abbreviations like "U.S.").
 
 <Accordion title="Should I change the language data or add custom tokenizer rules?" id="lang-data-vs-tokenizer">
 
 Tokenization rules that are specific to one language, but can be **generalized
 across that language** should ideally live in the language data in
-[`spacy/lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang) – we
-always appreciate pull requests! Anything that's specific to a domain or text
-type – like financial trading abbreviations, or Bavarian youth slang – should be
-added as a special case rule to your tokenizer instance. If you're dealing with
-a lot of customizations, it might make sense to create an entirely custom
-subclass.
+[`spacy/lang`](%%GITHUB_SPACY/spacy/lang) – we always appreciate pull requests!
+Anything that's specific to a domain or text type – like financial trading
+abbreviations, or Bavarian youth slang – should be added as a special case rule
+to your tokenizer instance. If you're dealing with a lot of customizations, it
+might make sense to create an entirely custom subclass.
 
 </Accordion>
 
@@ -1059,7 +1057,7 @@ but also detailed regular expressions that take the surrounding context into
 account. For example, there is a regular expression that treats a hyphen between
 letters as an infix. If you do not want the tokenizer to split on hyphens
 between letters, you can modify the existing infix definition from
-[`lang/punctuation.py`](https://github.com/explosion/spaCy/blob/master/spacy/lang/punctuation.py):
+[`lang/punctuation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py):
 
 ```python
 ### {executable="true"}
@@ -1096,10 +1094,10 @@ print([t.text for t in doc]) # ['mother-in-law']
 ```
 
 For an overview of the default regular expressions, see
-[`lang/punctuation.py`](https://github.com/explosion/spaCy/blob/master/spacy/lang/punctuation.py)
-and language-specific definitions such as
-[`lang/de/punctuation.py`](https://github.com/explosion/spaCy/blob/master/spacy/lang/de/punctuation.py)
-for German.
+[`lang/punctuation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py) and
+language-specific definitions such as
+[`lang/de/punctuation.py`](%%GITHUB_SPACY/spacy/lang/de/punctuation.py) for
+German.
 
 ### Hooking a custom tokenizer into the pipeline {#custom-tokenizer}
 
@@ -1366,7 +1364,7 @@ token.
 
 ```python
 ### {executable="true"}
-from spacy.gold import Alignment
+from spacy.training import Alignment
 
 other_tokens = ["i", "listened", "to", "obama", "'", "s", "podcasts", "."]
 spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts", "."]
diff --git a/website/docs/usage/models.md b/website/docs/usage/models.md
index 9b1e96e4e..e94cdfe9e 100644
--- a/website/docs/usage/models.md
+++ b/website/docs/usage/models.md
@@ -76,7 +76,7 @@ spaCy also supports pipelines trained on more than one language. This is
 especially useful for named entity recognition. The language ID used for
 multi-language or language-neutral pipelines is `xx`. The language class, a
 generic subclass containing only the base language data, can be found in
-[`lang/xx`](https://github.com/explosion/spaCy/tree/master/spacy/lang/xx).
+[`lang/xx`](%%GITHUB_SPACY/spacy/lang/xx).
 
 To train a pipeline using the neutral multi-language class, you can set
 `lang = "xx"` in your [training config](/usage/training#config). You can also
diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md
index 0da350f27..3d756215f 100644
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@@ -1028,11 +1028,11 @@ plug fully custom machine learning components into your pipeline. You'll need
 the following:
 
 1. **Model:** A Thinc [`Model`](https://thinc.ai/docs/api-model) instance. This
-   can be a model using [layers](https://thinc.ai/docs/api-layers) implemented
-   in Thinc, or a [wrapped model](https://thinc.ai/docs/usage-frameworks)
-   implemented in PyTorch, TensorFlow, MXNet or a fully custom solution. The
-   model must take a list of [`Doc`](/api/doc) objects as input and can have any
-   type of output.
+   can be a model using implemented in
+   [Thinc](/usage/layers-architectures#thinc), or a
+   [wrapped model](/usage/layers-architectures#frameworks) implemented in
+   PyTorch, TensorFlow, MXNet or a fully custom solution. The model must take a
+   list of [`Doc`](/api/doc) objects as input and can have any type of output.
 2. **Pipe subclass:** A subclass of [`Pipe`](/api/pipe) that implements at least
    two methods: [`Pipe.predict`](/api/pipe#predict) and
    [`Pipe.set_annotations`](/api/pipe#set_annotations).
@@ -1078,8 +1078,9 @@ _first_ create a `Model` from a [registered architecture](/api/architectures),
 validate its arguments and _then_ pass the object forward to the component. This
 means that the config can express very complex, nested trees of objects – but
 the objects don't have to pass the model settings all the way down to the
-components. It also makes the components more **modular** and lets you swap
-different architectures in your config, and re-use model definitions.
+components. It also makes the components more **modular** and lets you
+[swap](/usage/layers-architectures#swap-architectures) different architectures
+in your config, and re-use model definitions.
 
 ```ini
 ### config.cfg (excerpt)
@@ -1134,7 +1135,7 @@ loss is calculated and to add evaluation scores to the training output.
 For more details on how to implement your own trainable components and model
 architectures, and plug existing models implemented in PyTorch or TensorFlow
 into your spaCy pipeline, see the usage guide on
-[layers and model architectures](/usage/layers-architectures#components).
+[layers and model architectures](/usage/layers-architectures).
 
 </Infobox>
 
@@ -1500,7 +1501,7 @@ add those entities to the `doc.ents`, you can wrap it in a custom pipeline
 component function and pass it the token texts from the `Doc` object received by
 the component.
 
-The [`gold.spans_from_biluo_tags`](/api/top-level#spans_from_biluo_tags) is very
+The [`training.spans_from_biluo_tags`](/api/top-level#spans_from_biluo_tags) is very
 helpful here, because it takes a `Doc` object and token-based BILUO tags and
 returns a sequence of `Span` objects in the `Doc` with added labels. So all your
 wrapper has to do is compute the entity spans and overwrite the `doc.ents`.
@@ -1515,7 +1516,7 @@ wrapper has to do is compute the entity spans and overwrite the `doc.ents`.
 ```python
 ### {highlight="1,8-9"}
 import your_custom_entity_recognizer
-from spacy.gold import offsets_from_biluo_tags
+from spacy.training import offsets_from_biluo_tags
 from spacy.language import Language
 
 @Language.component("custom_ner_wrapper")
diff --git a/website/docs/usage/projects.md b/website/docs/usage/projects.md
index b6688cd5d..81ddf40fb 100644
--- a/website/docs/usage/projects.md
+++ b/website/docs/usage/projects.md
@@ -65,6 +65,8 @@ project template and copies the files to a local directory. You can then run the
 project, e.g. to train a pipeline and edit the commands and scripts to build
 fully custom workflows.
 
+<!-- TODO: update with real example project -->
+
 ```cli
 python -m spacy project clone some_example_project
 ```
@@ -217,7 +219,7 @@ pipelines.
 <!-- TODO: update with better (final) example -->
 
 ```yaml
-https://github.com/explosion/spacy-boilerplates/blob/master/ner_fashion/project.yml
+https://github.com/explosion/projects/tree/v3/tutorials/ner_fashion_brands/project.yml
 ```
 
 | Section       | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
@@ -726,18 +728,21 @@ workflows, but only one can be tracked by DVC.
 
 </Infobox>
 
-<Project id="integrations/dvc">
+<!-- TODO: <Project id="integrations/dvc">
 
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum
-sodales lectus, ut sodales orci ullamcorper id. Sed condimentum neque ut erat
-mattis pretium.
-
-</Project>
+</Project> -->
 
 ---
 
 ### Prodigy {#prodigy} <IntegrationLogo name="prodigy" width={100} height="auto" align="right" />
 
+<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
+
+The Prodigy integration will require a nightly version of Prodigy that supports
+spaCy v3+.
+
+</Infobox>
+
 [Prodigy](https://prodi.gy) is a modern annotation tool for creating training
 data for machine learning models, developed by us. It integrates with spaCy
 out-of-the-box and provides many different
@@ -793,9 +798,7 @@ results.
 
 <Project id="integrations/prodigy">
 
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum
-sodales lectus, ut sodales orci ullamcorper id. Sed condimentum neque ut erat
-mattis pretium.
+<!-- TODO: -->
 
 </Project>
 
@@ -803,43 +806,34 @@ mattis pretium.
 
 ### Streamlit {#streamlit} <IntegrationLogo name="streamlit" width={150} height="auto" align="right" />
 
-<Grid cols={2} gutterBottom={false}>
-
-<div>
-
 [Streamlit](https://streamlit.io) is a Python framework for building interactive
 data apps. The [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit)
 package helps you integrate spaCy visualizations into your Streamlit apps and
 quickly spin up demos to explore your pipelines interactively. It includes a
 full embedded visualizer, as well as individual components.
 
-```bash
-$ pip install spacy_streamlit
-```
+<!-- TODO: update once version is stable -->
 
-</div>
+> #### Installation
+>
+> ```bash
+> $ pip install "spacy_streamlit>=1.0.0a0"
+> ```
 
 ![](../images/spacy-streamlit.png)
 
-</Grid>
-
 Using [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit), your
 projects can easily define their own scripts that spin up an interactive
 visualizer, using the latest pipeline you trained, or a selection of pipelines
-so you can compare their results. The following script starts an
-[NER visualizer](/usage/visualizers#ent) and takes two positional command-line
-argument you can pass in from your `config.yml`: a comma-separated list of paths
-to load the pipelines from and an example text to use as the default text.
+so you can compare their results.
 
-```python
-### scripts/visualize.py
-import spacy_streamlit
-import sys
+<Project id="integrations/streamlit">
 
-DEFAULT_TEXT = sys.argv[2] if len(sys.argv) >= 3 else ""
-PIPELINES = [name.strip() for name in sys.argv[1].split(",")]
-spacy_streamlit.visualize(PIPELINES, DEFAULT_TEXT, visualizers=["ner"])
-```
+Get started with spaCy and Streamlit using our project template. It includes a
+script to spin up a custom visualizer and commands you can adjust to showcase
+and explore your own custom trained pipelines.
+
+</Project>
 
 > #### Example usage
 >
@@ -856,16 +850,16 @@ commands:
     script:
       - 'streamlit run ./scripts/visualize.py ./training/model-best "I like Adidas shoes."'
     deps:
-      - 'training/model-best'
+      - "training/model-best"
 ```
 
-<Project id="integrations/streamlit">
+The following script is called from the `project.yml` and takes two positional
+command-line argument: a comma-separated list of paths or packages to load the
+pipelines from and an example text to use as the default text.
 
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum
-sodales lectus, ut sodales orci ullamcorper id. Sed condimentum neque ut erat
-mattis pretium.
-
-</Project>
+```python
+https://github.com/explosion/projects/blob/v3/integrations/streamlit/scripts/visualize.py
+```
 
 ---
 
@@ -878,9 +872,13 @@ library for serving machine learning models and you can use it in your spaCy
 projects to quickly serve up a trained pipeline and make it available behind a
 REST API.
 
-```python
-# TODO: show an example that addresses some of the main concerns for serving ML (workers etc.)
-```
+<Project id="integrations/fastapi">
+
+Get started with spaCy and FastAPI using our project template. It includes a
+simple REST API for processing batches of text, and usage examples for how to
+query your API from Python and JavaScript (Vanilla JS and React).
+
+</Project>
 
 > #### Example usage
 >
@@ -891,32 +889,66 @@ REST API.
 <!-- prettier-ignore -->
 ```yaml
 ### project.yml
-commands:
-  - name: serve
-    help: "Serve the trained pipeline with FastAPI"
+  - name: "serve"
+    help: "Serve the models via a FastAPI REST API using the given host and port"
     script:
-      - 'python ./scripts/serve.py ./training/model-best'
+      - "uvicorn scripts.main:app --reload --host 127.0.0.1 --port 5000"
     deps:
-      - 'training/model-best'
+      - "scripts/main.py"
     no_skip: true
 ```
 
-<Project id="integrations/fastapi">
+The script included in the template shows a simple REST API with a `POST`
+endpoint that accepts batches of texts and returns batches of predictions, e.g.
+named entities found in the documents. Type hints and
+[`pydantic`](https://github.com/samuelcolvin/pydantic) are used to define the
+expected data types.
 
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum
-sodales lectus, ut sodales orci ullamcorper id. Sed condimentum neque ut erat
-mattis pretium.
-
-</Project>
+```python
+https://github.com/explosion/projects/blob/v3/integrations/fastapi/scripts/main.py
+```
 
 ---
 
 ### Ray {#ray} <IntegrationLogo name="ray" width={100} height="auto" align="right" />
 
+<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
+</Infobox>
+
 <!-- TODO: document -->
 
 ---
 
 ### Weights & Biases {#wandb} <IntegrationLogo name="wandb" width={175} height="auto" align="right" />
 
-<!-- TODO: link to WandB logger, explain that it's built-in but that you can also do other cool stuff with WandB? And then include example project (still need to decide what we want to do here)  -->
+[Weights & Biases](https://www.wandb.com/) is a popular platform for experiment
+tracking. spaCy integrates with it out-of-the-box via the
+[`WandbLogger`](/api/top-level#WandbLogger), which you can add as the
+`[training.logger]` block of your training [config](/usage/training#config). The
+results of each step are then logged in your project, together with the full
+**training config**. This means that _every_ hyperparameter, registered function
+name and argument will be tracked and you'll be able to see the impact it has on
+your results.
+
+> #### Example config
+>
+> ```ini
+> [training.logger]
+> @loggers = "spacy.WandbLogger.v1"
+> project_name = "monitor_spacy_training"
+> remove_config_values = ["paths.train", "paths.dev", "training.dev_corpus.path", "training.train_corpus.path"]
+> ```
+
+![Screenshot: Visualized training results](../images/wandb1.jpg)
+
+![Screenshot: Parameter importance using config values](../images/wandb2.jpg 'Parameter importance using config values')
+
+<Project id="integrations/wandb">
+
+Get started with tracking your spaCy training runs in Weights & Biases using our
+project template. It includes a simple config using the `WandbLogger`, as well
+as a custom logger implementation you can adjust for your specific use case.
+
+<!-- TODO: -->
+
+</Project>
diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md
index 01d60ddb8..2d6159f3d 100644
--- a/website/docs/usage/rule-based-matching.md
+++ b/website/docs/usage/rule-based-matching.md
@@ -192,12 +192,11 @@ of [`Token`](/api/token). This means that all of the attributes that refer to
 computed properties can't be accessed.
 
 The uppercase attribute names like `LOWER` or `IS_PUNCT` refer to symbols from
-the
-[`spacy.attrs`](https://github.com/explosion/spaCy/tree/master/spacy/attrs.pyx)
-enum table. They're passed into a function that essentially is a big case/switch
-statement, to figure out which struct field to return. The same attribute
-identifiers are used in [`Doc.to_array`](/api/doc#to_array), and a few other
-places in the code where you need to describe fields like this.
+the [`spacy.attrs`](%%GITHUB_SPACY/spacy/attrs.pyx) enum table. They're passed
+into a function that essentially is a big case/switch statement, to figure out
+which struct field to return. The same attribute identifiers are used in
+[`Doc.to_array`](/api/doc#to_array), and a few other places in the code where
+you need to describe fields like this.
 
 </Accordion>
 
diff --git a/website/docs/usage/saving-loading.md b/website/docs/usage/saving-loading.md
index 9955e7d84..c0fe1323c 100644
--- a/website/docs/usage/saving-loading.md
+++ b/website/docs/usage/saving-loading.md
@@ -187,11 +187,11 @@ add to that data and saves and loads the data to and from a JSON file.
 >
 > To see custom serialization methods in action, check out the new
 > [`EntityRuler`](/api/entityruler) component and its
-> [source](https://github.com/explosion/spaCy/tree/master/spacy/pipeline/entityruler.py).
-> Patterns added to the component will be saved to a `.jsonl` file if the
-> pipeline is serialized to disk, and to a bytestring if the pipeline is
-> serialized to bytes. This allows saving out a pipeline with a rule-based
-> entity recognizer and including all rules _with_ the component data.
+> [source](%%GITHUB_SPACY/spacy/pipeline/entityruler.py). Patterns added to the
+> component will be saved to a `.jsonl` file if the pipeline is serialized to
+> disk, and to a bytestring if the pipeline is serialized to bytes. This allows
+> saving out a pipeline with a rule-based entity recognizer and including all
+> rules _with_ the component data.
 
 ```python
 ### {highlight="14-18,20-25"}
diff --git a/website/docs/usage/spacy-101.md b/website/docs/usage/spacy-101.md
index 82fec4b6a..cd1b2cb0c 100644
--- a/website/docs/usage/spacy-101.md
+++ b/website/docs/usage/spacy-101.md
@@ -84,15 +84,13 @@ systems, or to pre-process text for **deep learning**.
 
 ### What spaCy isn't {#what-spacy-isnt}
 
-- **spaCy is not a platform or "an API"**. Unlike a platform, spaCy does not
+- ❌ **spaCy is not a platform or "an API"**. Unlike a platform, spaCy does not
   provide a software as a service, or a web application. It's an open-source
   library designed to help you build NLP applications, not a consumable service.
-
-- **spaCy is not an out-of-the-box chat bot engine**. While spaCy can be used to
-  power conversational applications, it's not designed specifically for chat
+- ❌ **spaCy is not an out-of-the-box chat bot engine**. While spaCy can be used
+  to power conversational applications, it's not designed specifically for chat
   bots, and only provides the underlying text processing capabilities.
-
-- **spaCy is not research software**. It's built on the latest research, but
+- ❌**spaCy is not research software**. It's built on the latest research, but
   it's designed to get things done. This leads to fairly different design
   decisions than [NLTK](https://github.com/nltk/nltk) or
   [CoreNLP](https://stanfordnlp.github.io/CoreNLP/), which were created as
@@ -101,8 +99,7 @@ systems, or to pre-process text for **deep learning**.
   between multiple algorithms that deliver equivalent functionality. Keeping the
   menu small lets spaCy deliver generally better performance and developer
   experience.
-
-- **spaCy is not a company**. It's an open-source library. Our company
+- ❌ **spaCy is not a company**. It's an open-source library. Our company
   publishing spaCy and other software is called
   [Explosion](https://explosion.ai).
 
@@ -494,7 +491,7 @@ regressions to the parts of the library that you care about the most.
 
 **For more details on the types of contributions we're looking for, the code
 conventions and other useful tips, make sure to check out the
-[contributing guidelines](https://github.com/explosion/spaCy/tree/master/CONTRIBUTING.md).**
+[contributing guidelines](%%GITHUB_SPACY/CONTRIBUTING.md).**
 
 <Infobox title="Code of Conduct" variant="warning">
 
diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md
index 9c18e4606..4b25d1c21 100644
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@@ -59,7 +59,7 @@ specific use case. It's also available in spaCy as the
 
 import QuickstartTraining from 'widgets/quickstart-training.js'
 
-<QuickstartTraining download="base_config.cfg" />
+<QuickstartTraining />
 
 After you've saved the starter config to a file `base_config.cfg`, you can use
 the [`init fill-config`](/api/cli#init-fill-config) command to fill in the
@@ -127,7 +127,7 @@ Some of the main advantages and features of spaCy's training config are:
   config which types of data to expect.
 
 ```ini
-https://github.com/explosion/spaCy/blob/develop/spacy/default_config.cfg
+%%GITHUB_SPACY/spacy/default_config.cfg
 ```
 
 Under the hood, the config is parsed into a dictionary. It's divided into
@@ -683,7 +683,7 @@ You can also implement your own batch size schedule to use during training. The
 import spacy
 
 @spacy.registry.schedules("my_custom_schedule.v1")
-def my_custom_schedule(start: int = 1, factor: int = 1.001):
+def my_custom_schedule(start: int = 1, factor: float = 1.001):
    while True:
       yield start
       start = start * factor
@@ -735,7 +735,7 @@ as **config settings** – in this case, `source`.
 ### functions.py {highlight="7-8"}
 from typing import Callable, Iterator, List
 import spacy
-from spacy.gold import Example
+from spacy.training import Example
 from spacy.language import Language
 import random
 
@@ -783,7 +783,7 @@ annotations are the same.
 ### functions.py
 from typing import Callable, Iterable, Iterator, List
 import spacy
-from spacy.gold import Example
+from spacy.training import Example
 
 @spacy.registry.batchers("filtering_batch.v1")
 def filter_batch(size: int) -> Callable[[Iterable[Example]], Iterator[List[Example]]]:
diff --git a/website/docs/usage/v2.md b/website/docs/usage/v2.md
index f7bcc17d3..aee3c24a6 100644
--- a/website/docs/usage/v2.md
+++ b/website/docs/usage/v2.md
@@ -76,9 +76,7 @@ noise contrastive estimation or reinforcement learning.
 ## New features {#features}
 
 This section contains an overview of the most important **new features and
-improvements**. The [API docs](/api) include additional deprecation notes. New
-methods and functions that were introduced in this version are marked with the
-tag <Tag variant="new">2</Tag>.
+improvements**. The [API docs](/api) include additional deprecation notes.
 
 ### Convolutional neural network models {#features-models}
 
diff --git a/website/docs/usage/v3.md b/website/docs/usage/v3.md
index 3cbccc352..791b641df 100644
--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@@ -8,20 +8,30 @@ menu:
   - ['Migrating from v2.x', 'migrating']
 ---
 
-## Summary {#summary}
+## Summary {#summary hidden="true"}
 
-<Grid cols={2}>
+<Grid cols={2} gutterBottom={false}>
 
 <div>
 
+spaCy v3.0 features all new **transformer-based pipelines** that bring spaCy's
+accuracy right up to the current **state-of-the-art**. You can use any
+pretrained transformer to train your own pipelines, and even share one
+transformer between multiple components with **multi-task learning**. Training
+is now fully configurable and extensible, and you can define your own custom
+models using **PyTorch**, **TensorFlow** and other frameworks. The new spaCy
+projects system lets you describe whole **end-to-end workflows** in a single
+file, giving you an easy path from prototype to production, and making it easy
+to clone and adapt best-practice projects for your own use cases.
+
 </div>
 
 <Infobox title="Table of Contents" id="toc">
 
 - [Summary](#summary)
 - [New features](#features)
-- [Training & config system](#features-training)
 - [Transformer-based pipelines](#features-transformers)
+- [Training & config system](#features-training)
 - [Custom models](#features-custom-models)
 - [End-to-end project workflows](#features-projects)
 - [New built-in components](#features-pipeline-components)
@@ -39,47 +49,126 @@ menu:
 
 ## New Features {#features}
 
-### New training workflow and config system {#features-training}
-
-<Infobox title="Details & Documentation" emoji="📖" list>
-
-- **Usage:** [Training pipelines and models](/usage/training)
-- **Thinc:** [Thinc's config system](https://thinc.ai/docs/usage-config),
-  [`Config`](https://thinc.ai/docs/api-config#config)
-- **CLI:** [`train`](/api/cli#train), [`pretrain`](/api/cli#pretrain),
-  [`evaluate`](/api/cli#evaluate)
-- **API:** [Config format](/api/data-formats#config),
-  [`registry`](/api/top-level#registry)
-
-</Infobox>
+This section contains an overview of the most important **new features and
+improvements**. The [API docs](/api) include additional deprecation notes. New
+methods and functions that were introduced in this version are marked with the
+tag <Tag variant="new">3</Tag>.
 
 ### Transformer-based pipelines {#features-transformers}
 
+> #### Example
+>
+> ```cli
+> $ python -m spacy download en_core_web_trf
+> ```
+
+spaCy v3.0 features all new transformer-based pipelines that bring spaCy's
+accuracy right up to the current **state-of-the-art**. You can use any
+pretrained transformer to train your own pipelines, and even share one
+transformer between multiple components with **multi-task learning**. spaCy's
+transformer support interoperates with [PyTorch](https://pytorch.org) and the
+[HuggingFace `transformers`](https://huggingface.co/transformers/) library,
+giving you access to thousands of pretrained models for your pipelines.
+
 ![Pipeline components listening to shared embedding component](../images/tok2vec-listener.svg)
 
+import Benchmarks from 'usage/\_benchmarks-models.md'
+
+<Benchmarks />
+
 <Infobox title="Details & Documentation" emoji="📖" list>
 
 - **Usage:** [Embeddings & Transformers](/usage/embeddings-transformers),
-  [Training pipelines and models](/usage/training)
+  [Training pipelines and models](/usage/training),
+  [Benchmarks](/usage/facts-figures#benchmarks)
 - **API:** [`Transformer`](/api/transformer),
   [`TransformerData`](/api/transformer#transformerdata),
   [`FullTransformerBatch`](/api/transformer#fulltransformerbatch)
 - **Architectures: ** [TransformerModel](/api/architectures#TransformerModel),
   [TransformerListener](/api/architectures#TransformerListener),
   [Tok2VecTransformer](/api/architectures#Tok2VecTransformer)
-- **Trained Pipelines:** [`en_core_trf_lg_sm`](/models/en)
+- **Trained Pipelines:** [`en_core_web_trf`](/models/en#en_core_web_trf)
 - **Implementation:**
   [`spacy-transformers`](https://github.com/explosion/spacy-transformers)
 
 </Infobox>
 
+### New training workflow and config system {#features-training}
+
+> #### Example
+>
+> ```ini
+> [training]
+> vectors = null
+> accumulate_gradient = 3
+>
+> [training.optimizer]
+> @optimizers = "Adam.v1"
+>
+> [training.optimizer.learn_rate]
+> @schedules = "warmup_linear.v1"
+> warmup_steps = 250
+> total_steps = 20000
+> initial_rate = 0.01
+> ```
+
+spaCy v3.0 introduces a comprehensive and extensible system for **configuring
+your training runs**. A single configuration file describes every detail of your
+training run, with no hidden defaults, making it easy to rerun your experiments
+and track changes. You can use the
+[quickstart widget](/usage/training#quickstart) or the `init config` command to
+get started. Instead of providing lots of arguments on the command line, you
+only need to pass your `config.cfg` file to `spacy train`.
+
+Training config files include all **settings and hyperparameters** for training
+your pipeline. Some settings can also be registered **functions** that you can
+swap out and customize, making it easy to implement your own custom models and
+architectures.
+
+<Infobox title="Details & Documentation" emoji="📖" list>
+
+- **Usage:** [Training pipelines and models](/usage/training)
+- **Thinc:** [Thinc's config system](https://thinc.ai/docs/usage-config),
+  [`Config`](https://thinc.ai/docs/api-config#config)
+- **CLI:** [`init config`](/api/cli#init-config),
+  [`init fill-config`](/api/cli#init-fill-config), [`train`](/api/cli#train),
+  [`pretrain`](/api/cli#pretrain), [`evaluate`](/api/cli#evaluate)
+- **API:** [Config format](/api/data-formats#config),
+  [`registry`](/api/top-level#registry)
+
+</Infobox>
+
 ### Custom models using any framework {#features-custom-models}
 
+> #### Example
+>
+> ```python
+> from torch import nn
+> from thinc.api import PyTorchWrapper
+>
+> torch_model = nn.Sequential(
+>     nn.Linear(32, 32),
+>     nn.ReLU(),
+>     nn.Softmax(dim=1)
+> )
+> model = PyTorchWrapper(torch_model)
+> ```
+
+spaCy's new configuration system makes it easy to customize the neural network
+models used by the different pipeline components. You can also implement your
+own architectures via spaCy's machine learning library [Thinc](https://thinc.ai)
+that provides various layers and utilities, as well as thin wrappers around
+frameworks like **PyTorch**, **TensorFlow** and **MXNet**. Component models all
+follow the same unified [`Model`](https://thinc.ai/docs/api-model) API and each
+`Model` can also be used as a sublayer of a larger network, allowing you to
+freely combine implementations from different frameworks into a single model.
+
 <Infobox title="Details & Documentation" emoji="📖" list>
 
 - **Usage: ** [Layers and architectures](/usage/layers-architectures)
 - **Thinc: **
-  [Wrapping PyTorch, TensorFlow & MXNet](https://thinc.ai/docs/usage-frameworks)
+  [Wrapping PyTorch, TensorFlow & MXNet](https://thinc.ai/docs/usage-frameworks),
+  [`Model` API](https://thinc.ai/docs/api-model)
 - **API:** [Model architectures](/api/architectures), [`Pipe`](/api/pipe)
 
 </Infobox>
@@ -159,8 +248,7 @@ add to your pipeline and customize for your use case:
 
 - **Usage:** [Processing pipelines](/usage/processing-pipelines)
 - **API:** [Built-in pipeline components](/api#architecture-pipeline)
-- **Implementation:**
-  [`spacy/pipeline`](https://github.com/explosion/spaCy/tree/develop/spacy/pipeline)
+- **Implementation:** [`spacy/pipeline`](%%GITHUB_SPACY/spacy/pipeline)
 
 </Infobox>
 
@@ -197,15 +285,12 @@ aren't set.
   [`@Language.factory`](/api/language#factory),
   [`Language.add_pipe`](/api/language#add_pipe),
   [`Language.analyze_pipes`](/api/language#analyze_pipes)
-- **Implementation:**
-  [`spacy/language.py`](https://github.com/explosion/spaCy/tree/develop/spacy/language.py)
+- **Implementation:** [`spacy/language.py`](%%GITHUB_SPACY/spacy/language.py)
 
 </Infobox>
 
 ### Dependency matching {#features-dep-matcher}
 
-<!-- TODO: improve summary -->
-
 > #### Example
 >
 > ```python
@@ -233,7 +318,7 @@ dictionaries**, with each dictionary describing a **token to match** and its
   [Dependency matching](/usage/rule-based-matching#dependencymatcher),
 - **API:** [`DependencyMatcher`](/api/dependencymatcher),
 - **Implementation:**
-  [`spacy/matcher/dependencymatcher.pyx`](https://github.com/explosion/spaCy/tree/develop/spacy/matcher/dependencymatcher.pyx)
+  [`spacy/matcher/dependencymatcher.pyx`](%%GITHUB_SPACY/spacy/matcher/dependencymatcher.pyx)
 
 </Infobox>
 
@@ -404,11 +489,12 @@ Note that spaCy v3.0 now requires **Python 3.6+**.
   [`Pipe.begin_training`](/api/pipe#begin_training) now take a function that
   returns a sequence of `Example` objects to initialize the model instead of a
   list of tuples.
-- [`Matcher.add`](/api/matcher#add),
-  [`PhraseMatcher.add`](/api/phrasematcher#add) and
-  [`DependencyMatcher.add`](/api/dependencymatcher#add) now only accept a list
-  of patterns as the second argument (instead of a variable number of
-  arguments). The `on_match` callback becomes an optional keyword argument.
+- [`Matcher.add`](/api/matcher#add) and
+  [`PhraseMatcher.add`](/api/phrasematcher#add) now only accept a list of
+  patterns as the second argument (instead of a variable number of arguments).
+  The `on_match` callback becomes an optional keyword argument.
+- The `spacy.gold` module has been renamed to
+  [`spacy.training`](%%GITHUB_SPACY/spacy/training).
 - The `PRON_LEMMA` symbol and `-PRON-` as an indicator for pronoun lemmas has
   been removed.
 - The `TAG_MAP` and `MORPH_RULES` in the language data have been replaced by the
@@ -779,6 +865,20 @@ python -m spacy package ./output ./packages
 - python setup.py sdist
 ```
 
+#### Data utilities and gold module {#migrating-gold}
+
+The `spacy.gold` module has been renamed to `spacy.training`. This mostly
+affects internals, but if you've been using the span offset conversion utilities
+[`biluo_tags_from_offsets`](/api/top-level#biluo_tags_from_offsets),
+[`offsets_from_biluo_tags`](/api/top-level#offsets_from_biluo_tags) or
+[`spans_from_biluo_tags`](/api/top-level#spans_from_biluo_tags), you'll have to
+change your imports:
+
+```diff
+- from spacy.gold import biluo_tags_from_offsets, spans_from_biluo_tags
++ from spacy.training import biluo_tags_from_offsets, spans_from_biluo_tags
+```
+
 #### Migration notes for plugin maintainers {#migrating-plugins}
 
 Thanks to everyone who's been contributing to the spaCy ecosystem by developing
diff --git a/website/gatsby-config.js b/website/gatsby-config.js
index 6c67de6ea..78fdc336f 100644
--- a/website/gatsby-config.js
+++ b/website/gatsby-config.js
@@ -8,7 +8,6 @@ const codeBlocksPlugin = require('./src/plugins/remark-code-blocks.js')
 
 // Import metadata
 const site = require('./meta/site.json')
-const logos = require('./meta/logos.json')
 const sidebars = require('./meta/sidebars.json')
 const models = require('./meta/languages.json')
 const universe = require('./meta/universe.json')
@@ -20,11 +19,16 @@ const favicon = isNightly ? `src/images/icon_nightly.png` : `src/images/icon.png
 const binderBranch = isNightly ? 'nightly' : site.binderBranch
 const siteUrl = isNightly ? site.siteUrlNightly : site.siteUrl
 const domain = isNightly ? site.domainNightly : site.domain
+const branch = isNightly ? 'develop' : 'master'
+
+// Those variables are going to be replaced in the Markdown, e.g. %%GITHUB_SPACY
+const replacements = {
+    GITHUB_SPACY: `https://github.com/explosion/spaCy/tree/${branch}`,
+}
 
 module.exports = {
     siteMetadata: {
         ...site,
-        ...logos,
         sidebars,
         ...models,
         universe,
@@ -121,6 +125,13 @@ module.exports = {
                     {
                         resolve: `gatsby-remark-copy-linked-files`,
                     },
+                    {
+                        resolve: 'gatsby-remark-find-replace',
+                        options: {
+                            replacements,
+                            prefix: '%%',
+                        },
+                    },
                 ],
             },
         },
diff --git a/website/meta/logos.json b/website/meta/logos.json
deleted file mode 100644
index 783995026..000000000
--- a/website/meta/logos.json
+++ /dev/null
@@ -1,37 +0,0 @@
-{
-    "logosUsers": [
-        { "id": "airbnb", "url": "https://www.airbnb.com" },
-        { "id": "uber", "url": "https://www.uber.com" },
-        { "id": "quora", "url": "https://www.quora.com" },
-        { "id": "retriever", "url": "https://www.retriever.no" },
-        { "id": "stitchfix", "url": "https://www.stitchfix.com" },
-        { "id": "chartbeat", "url": "https://chartbeat.com" },
-        { "id": "allenai", "url": "https://allenai.org" }
-    ],
-    "logosPublications": [
-        {
-            "id": "recode",
-            "url": "https://www.recode.net/2017/6/22/15855492/ai-artificial-intelligence-nonprofit-good-human-chatbots-machine-learning"
-        },
-        {
-            "id": "wapo",
-            "url": "https://www.washingtonpost.com/news/wonk/wp/2016/05/18/googles-new-artificial-intelligence-cant-understand-these-sentences-can-you/"
-        },
-        {
-            "id": "bbc",
-            "url": "http://www.bbc.co.uk/rd/blog/2017-08-irfs-weeknotes-number-250"
-        },
-        {
-            "id": "microsoft",
-            "url": "https://www.microsoft.com/developerblog/2016/09/13/training-a-classifier-for-relation-extraction-from-medical-literature/"
-        },
-        {
-            "id": "venturebeat",
-            "url": "https://venturebeat.com/2017/01/27/4-ai-startups-that-analyze-customer-reviews/"
-        },
-        {
-            "id": "thoughtworks",
-            "url": "https://www.thoughtworks.com/radar/tools"
-        }
-    ]
-}
diff --git a/website/meta/site.json b/website/meta/site.json
index d1162edf9..1955932b9 100644
--- a/website/meta/site.json
+++ b/website/meta/site.json
@@ -28,7 +28,7 @@
     },
     "binderUrl": "explosion/spacy-io-binder",
     "binderBranch": "live",
-    "binderVersion": "2.3.0",
+    "binderVersion": "3.0.0",
     "sections": [
         { "id": "usage", "title": "Usage Documentation", "theme": "blue" },
         { "id": "models", "title": "Models Documentation", "theme": "blue" },
@@ -47,20 +47,19 @@
             "items": [
                 { "text": "Usage", "url": "/usage" },
                 { "text": "Models", "url": "/models" },
-                { "text": "API", "url": "/api" },
-                { "text": "Universe", "url": "/universe" }
+                { "text": "API Reference", "url": "/api" },
+                { "text": "Online Course", "url": "https://course.spacy.io" }
             ]
         },
         {
-            "label": "Support",
+            "label": "Community",
             "items": [
+                { "text": "Universe", "url": "/universe" },
                 { "text": "Issue Tracker", "url": "https://github.com/explosion/spaCy/issues" },
                 {
                     "text": "Stack Overflow",
                     "url": "http://stackoverflow.com/questions/tagged/spacy"
-                },
-                { "text": "Reddit User Group", "url": "https://www.reddit.com/r/spacynlp/" },
-                { "text": "Gitter Chat", "url": "https://gitter.im/explosion/spaCy" }
+                }
             ]
         },
         {
diff --git a/website/meta/type-annotations.json b/website/meta/type-annotations.json
index b1d94403d..79d4d357d 100644
--- a/website/meta/type-annotations.json
+++ b/website/meta/type-annotations.json
@@ -34,6 +34,8 @@
     "Floats2d": "https://thinc.ai/docs/api-types#types",
     "Floats3d": "https://thinc.ai/docs/api-types#types",
     "FloatsXd": "https://thinc.ai/docs/api-types#types",
+    "Array1d": "https://thinc.ai/docs/api-types#types",
+    "Array2d": "https://thinc.ai/docs/api-types#types",
     "Ops": "https://thinc.ai/docs/api-backends#ops",
     "cymem.Pool": "https://github.com/explosion/cymem",
     "preshed.BloomFilter": "https://github.com/explosion/preshed",
diff --git a/website/package-lock.json b/website/package-lock.json
index d995f910e..d8444c2b2 100644
--- a/website/package-lock.json
+++ b/website/package-lock.json
@@ -14238,6 +14238,46 @@
                 }
             }
         },
+        "gatsby-remark-find-replace": {
+            "version": "0.3.0",
+            "resolved": "https://registry.npmjs.org/gatsby-remark-find-replace/-/gatsby-remark-find-replace-0.3.0.tgz",
+            "integrity": "sha512-tTXt+ZxD+7hEVtZVbZVrifcQUk2mt4uJNUHhc9cje+93sDa4PrrFBbny9IWgXLj9QH9xDxWOZrI768ApMtbPUQ==",
+            "requires": {
+                "escape-string-regexp": "^2.0.0",
+                "unist-util-visit": "^2.0.1"
+            },
+            "dependencies": {
+                "escape-string-regexp": {
+                    "version": "2.0.0",
+                    "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz",
+                    "integrity": "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w=="
+                },
+                "unist-util-is": {
+                    "version": "4.0.2",
+                    "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-4.0.2.tgz",
+                    "integrity": "sha512-Ofx8uf6haexJwI1gxWMGg6I/dLnF2yE+KibhD3/diOqY2TinLcqHXCV6OI5gFVn3xQqDH+u0M625pfKwIwgBKQ=="
+                },
+                "unist-util-visit": {
+                    "version": "2.0.3",
+                    "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-2.0.3.tgz",
+                    "integrity": "sha512-iJ4/RczbJMkD0712mGktuGpm/U4By4FfDonL7N/9tATGIF4imikjOuagyMY53tnZq3NP6BcmlrHhEKAfGWjh7Q==",
+                    "requires": {
+                        "@types/unist": "^2.0.0",
+                        "unist-util-is": "^4.0.0",
+                        "unist-util-visit-parents": "^3.0.0"
+                    }
+                },
+                "unist-util-visit-parents": {
+                    "version": "3.1.0",
+                    "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-3.1.0.tgz",
+                    "integrity": "sha512-0g4wbluTF93npyPrp/ymd3tCDTMnP0yo2akFD2FIBAYXq/Sga3lwaU1D8OYKbtpioaI6CkDcQ6fsMnmtzt7htw==",
+                    "requires": {
+                        "@types/unist": "^2.0.0",
+                        "unist-util-is": "^4.0.0"
+                    }
+                }
+            }
+        },
         "gatsby-remark-images": {
             "version": "3.0.4",
             "resolved": "https://registry.npmjs.org/gatsby-remark-images/-/gatsby-remark-images-3.0.4.tgz",
@@ -22152,6 +22192,14 @@
                 "clipboard": "^2.0.0"
             }
         },
+        "prismjs-bibtex": {
+            "version": "1.1.0",
+            "resolved": "https://registry.npmjs.org/prismjs-bibtex/-/prismjs-bibtex-1.1.0.tgz",
+            "integrity": "sha512-IjZUJP3iTkV1DZ8qcjUF7p7Ji/LPns56jw+INUBPtnBaX4Q/VhtzlRGHM0lxSvdfqUvqgTGF3oM8aogWqzZz2g==",
+            "requires": {
+                "prismjs": "^1.15"
+            }
+        },
         "private": {
             "version": "0.1.8",
             "resolved": "https://registry.npmjs.org/private/-/private-0.1.8.tgz",
diff --git a/website/package.json b/website/package.json
index 40018f532..def94a9c2 100644
--- a/website/package.json
+++ b/website/package.json
@@ -31,6 +31,7 @@
         "gatsby-plugin-sitemap": "^2.0.5",
         "gatsby-plugin-svgr": "^2.0.1",
         "gatsby-remark-copy-linked-files": "^2.0.9",
+        "gatsby-remark-find-replace": "^0.3.0",
         "gatsby-remark-images": "^3.0.4",
         "gatsby-remark-prismjs": "^3.2.4",
         "gatsby-remark-smartypants": "^2.0.8",
@@ -44,6 +45,7 @@
         "node-sass": "^4.11.0",
         "parse-numeric-range": "0.0.2",
         "prismjs": "^1.15.0",
+        "prismjs-bibtex": "^1.1.0",
         "prop-types": "^15.7.2",
         "react": "^16.8.2",
         "react-dom": "^16.8.2",
diff --git a/website/src/components/code.js b/website/src/components/code.js
index f075539ea..5a7828a33 100644
--- a/website/src/components/code.js
+++ b/website/src/components/code.js
@@ -2,6 +2,7 @@ import React, { Fragment } from 'react'
 import PropTypes from 'prop-types'
 import classNames from 'classnames'
 import highlightCode from 'gatsby-remark-prismjs/highlight-code.js'
+import 'prismjs-bibtex'
 import rangeParser from 'parse-numeric-range'
 import { StaticQuery, graphql } from 'gatsby'
 import { window } from 'browser-monads'
diff --git a/website/src/components/grid.js b/website/src/components/grid.js
index 1d11a748f..299fcf931 100644
--- a/website/src/components/grid.js
+++ b/website/src/components/grid.js
@@ -9,6 +9,7 @@ export default function Grid({
     narrow = false,
     gutterBottom = true,
     className,
+    style,
     children,
 }) {
     const gridClassNames = classNames(classes.root, className, {
@@ -18,7 +19,11 @@ export default function Grid({
         [classes.third]: cols === 3,
         [classes.quarter]: cols === 4,
     })
-    return <div className={gridClassNames}>{children}</div>
+    return (
+        <div className={gridClassNames} style={style}>
+            {children}
+        </div>
+    )
 }
 
 Grid.propTypes = {
diff --git a/website/src/components/icon.js b/website/src/components/icon.js
index 8dfba7426..799b20eda 100644
--- a/website/src/components/icon.js
+++ b/website/src/components/icon.js
@@ -1,4 +1,4 @@
-import React from 'react'
+import React, { Fragment } from 'react'
 import PropTypes from 'prop-types'
 import classNames from 'classnames'
 
@@ -25,6 +25,7 @@ import { ReactComponent as NetworkIcon } from '../images/icons/network.svg'
 import { ReactComponent as DownloadIcon } from '../images/icons/download.svg'
 import { ReactComponent as PackageIcon } from '../images/icons/package.svg'
 
+import { isString } from './util'
 import classes from '../styles/icon.module.sass'
 
 const icons = {
@@ -88,3 +89,44 @@ Icon.propTypes = {
     variant: PropTypes.oneOf(['success', 'error', 'subtle']),
     className: PropTypes.string,
 }
+
+export function replaceEmoji(cellChildren) {
+    const icons = {
+        '✅': { name: 'yes', variant: 'success', 'aria-label': 'positive' },
+        '❌': { name: 'no', variant: 'error', 'aria-label': 'negative' },
+    }
+    const iconRe = new RegExp(`^(${Object.keys(icons).join('|')})`, 'g')
+    let children = isString(cellChildren) ? [cellChildren] : cellChildren
+    let hasIcon = false
+    if (Array.isArray(children)) {
+        children = children.map((child, i) => {
+            if (isString(child)) {
+                const icon = icons[child.trim()]
+                if (icon) {
+                    hasIcon = true
+                    return (
+                        <Icon
+                            {...icon}
+                            inline={i < children.length}
+                            aria-hidden={undefined}
+                            key={i}
+                        />
+                    )
+                } else if (iconRe.test(child)) {
+                    hasIcon = true
+                    const [, iconName, text] = child.split(iconRe)
+                    return (
+                        <Fragment key={i}>
+                            <Icon {...icons[iconName]} aria-hidden={undefined} inline={true} />
+                            {text.replace(/^\s+/g, '')}
+                        </Fragment>
+                    )
+                }
+                // Work around prettier auto-escape
+                if (child.startsWith('\\')) return child.slice(1)
+            }
+            return child
+        })
+    }
+    return { content: children, hasIcon }
+}
diff --git a/website/src/components/infobox.js b/website/src/components/infobox.js
index 363638bf2..968b6cea8 100644
--- a/website/src/components/infobox.js
+++ b/website/src/components/infobox.js
@@ -23,7 +23,7 @@ export default function Infobox({
         <aside className={infoboxClassNames} id={id}>
             {title && (
                 <h4 className={classes.title}>
-                    {variant !== 'default' && (
+                    {variant !== 'default' && !emoji && (
                         <Icon width={18} name={variant} inline className={classes.icon} />
                     )}
                     <span className={classes.titleText}>
diff --git a/website/src/components/landing.js b/website/src/components/landing.js
index 2f9a8351a..64764ad2a 100644
--- a/website/src/components/landing.js
+++ b/website/src/components/landing.js
@@ -1,19 +1,17 @@
-import React, { Fragment } from 'react'
+import React from 'react'
 import classNames from 'classnames'
 
 import pattern from '../images/pattern_blue.jpg'
 import patternNightly from '../images/pattern_nightly.jpg'
 import patternOverlay from '../images/pattern_landing.jpg'
 import patternOverlayNightly from '../images/pattern_landing_nightly.jpg'
-import logoSvgs from '../images/logos'
 
 import Grid from './grid'
 import { Content } from './main'
 import Button from './button'
 import CodeBlock from './code'
-import { H1, H2, H3, Label, InlineList } from './typography'
+import { H1, H2, H3 } from './typography'
 import Link from './link'
-import { chunkArray } from './util'
 import classes from '../styles/landing.module.sass'
 
 export const LandingHeader = ({ nightly, style = {}, children }) => {
@@ -39,9 +37,9 @@ export const LandingSubtitle = ({ children }) => (
     </h2>
 )
 
-export const LandingGrid = ({ cols = 3, blocks = false, children }) => (
+export const LandingGrid = ({ cols = 3, blocks = false, style, children }) => (
     <Content className={classNames(classes.grid, { [classes.blocks]: blocks })}>
-        <Grid cols={cols} narrow={blocks}>
+        <Grid cols={cols} narrow={blocks} style={style}>
             {children}
         </Grid>
     </Content>
@@ -142,33 +140,3 @@ export const LandingBannerButton = ({ to, small, children }) => (
         </Button>
     </div>
 )
-
-export const LandingLogos = ({ logos = [], title, maxRow = 4, children }) => {
-    const rows = chunkArray(logos, maxRow)
-    return (
-        <Content className={classes.logos}>
-            {title && <Label>{title}</Label>}
-            {rows.map((logos, i) => (
-                <Fragment key={i}>
-                    <InlineList className={classes.logosContent}>
-                        {logos.map(({ id, url }, j) => {
-                            const Component = logoSvgs[id]
-                            return !Component ? null : (
-                                <Link
-                                    to={url}
-                                    key={j}
-                                    aria-label={id}
-                                    hidden
-                                    className={classes.logo}
-                                >
-                                    <Component />
-                                </Link>
-                            )
-                        })}
-                        {i === rows.length - 1 && children}
-                    </InlineList>
-                </Fragment>
-            ))}
-        </Content>
-    )
-}
diff --git a/website/src/components/list.js b/website/src/components/list.js
index 863eaddca..e0a3d9b64 100644
--- a/website/src/components/list.js
+++ b/website/src/components/list.js
@@ -1,7 +1,17 @@
 import React from 'react'
+import classNames from 'classnames'
 
 import classes from '../styles/list.module.sass'
+import { replaceEmoji } from './icon'
 
 export const Ol = props => <ol className={classes.ol} {...props} />
 export const Ul = props => <ul className={classes.ul} {...props} />
-export const Li = props => <li className={classes.li} {...props} />
+export const Li = ({ children, ...props }) => {
+    const { hasIcon, content } = replaceEmoji(children)
+    const liClassNames = classNames(classes.li, { [classes.liIcon]: hasIcon })
+    return (
+        <li className={liClassNames} {...props}>
+            {content}
+        </li>
+    )
+}
diff --git a/website/src/components/table.js b/website/src/components/table.js
index bd3d663f3..88ca86888 100644
--- a/website/src/components/table.js
+++ b/website/src/components/table.js
@@ -1,7 +1,7 @@
-import React from 'react'
+import React, { Fragment } from 'react'
 import classNames from 'classnames'
 
-import Icon from './icon'
+import { replaceEmoji } from './icon'
 import { isString } from './util'
 import classes from '../styles/table.module.sass'
 
@@ -11,29 +11,6 @@ function isNum(children) {
     return isString(children) && /^\d+[.,]?[\dx]+?(|x|ms|mb|gb|k|m)?$/i.test(children)
 }
 
-function getCellContent(cellChildren) {
-    const icons = {
-        '✅': { name: 'yes', variant: 'success', 'aria-label': 'positive' },
-        '❌': { name: 'no', variant: 'error', 'aria-label': 'negative' },
-    }
-    let children = isString(cellChildren) ? [cellChildren] : cellChildren
-    if (Array.isArray(children)) {
-        return children.map((child, i) => {
-            if (isString(child)) {
-                const icon = icons[child.trim()]
-                if (icon) {
-                    const props = { ...icon, inline: i < children.length, 'aria-hidden': undefined }
-                    return <Icon {...props} key={i} />
-                }
-                // Work around prettier auto-escape
-                if (child.startsWith('\\')) return child.slice(1)
-            }
-            return child
-        })
-    }
-    return children
-}
-
 function isDividerRow(children) {
     if (children.length && children[0].props && children[0].props.name == 'td') {
         const tdChildren = children[0].props.children
@@ -66,7 +43,22 @@ export const Table = ({ fixed, className, ...props }) => {
     return <table className={tableClassNames} {...props} />
 }
 
-export const Th = props => <th className={classes.th} {...props} />
+export const Th = ({ children, ...props }) => {
+    const isRotated = children && !isString(children) && children.type && children.type.name == 'Tx'
+    const thClassNames = classNames(classes.th, { [classes.thRotated]: isRotated })
+    return (
+        <th className={thClassNames} {...props}>
+            {children}
+        </th>
+    )
+}
+
+// Rotated head, child of Th
+export const Tx = ({ children, ...props }) => (
+    <div className={classes.tx} {...props}>
+        <span>{children}</span>
+    </div>
+)
 
 export const Tr = ({ evenodd = true, children, ...props }) => {
     const foot = isFootRow(children)
@@ -86,7 +78,7 @@ export const Tr = ({ evenodd = true, children, ...props }) => {
 }
 
 export const Td = ({ num, nowrap, className, children, ...props }) => {
-    const content = getCellContent(children)
+    const { content } = replaceEmoji(children)
     const tdClassNames = classNames(classes.td, className, {
         [classes.num]: num || isNum(children),
         [classes.nowrap]: nowrap,
diff --git a/website/src/components/typography.js b/website/src/components/typography.js
index d37c345b9..1c03b7a54 100644
--- a/website/src/components/typography.js
+++ b/website/src/components/typography.js
@@ -64,8 +64,8 @@ export const InlineList = ({ Component = 'p', gutterBottom = true, className, ch
     return <Component className={listClassNames}>{children}</Component>
 }
 
-export const Help = ({ children, size = 16 }) => (
-    <span className={classes.help} data-tooltip={children}>
+export const Help = ({ children, className, size = 16 }) => (
+    <span className={classNames(classes.help, className)} data-tooltip={children}>
         <Icon name="help2" width={size} />
     </span>
 )
diff --git a/website/src/components/util.js b/website/src/components/util.js
index a9c6efcf5..3d86cf37e 100644
--- a/website/src/components/util.js
+++ b/website/src/components/util.js
@@ -6,6 +6,8 @@ import siteMetadata from '../../meta/site.json'
 
 const htmlToReactParser = new HtmlToReactParser()
 
+// TODO: update this
+const DEFAULT_BRANCH = 'develop'
 export const repo = siteMetadata.repo
 export const modelsRepo = siteMetadata.modelsRepo
 
@@ -21,7 +23,7 @@ export const headingTextClassName = 'heading-text'
  * @param {string} [branch] - Optional branch. Defaults to master.
  * @returns {string} - URL to the file on GitHub.
  */
-export function github(filepath, branch = 'master') {
+export function github(filepath, branch = DEFAULT_BRANCH) {
     if (filepath && filepath.startsWith('github.com')) return `https://${filepath}`
     const path = filepath ? '/tree/' + (branch || 'master') + '/' + filepath : ''
     return `https://github.com/${repo}${path}`
@@ -33,7 +35,7 @@ export function github(filepath, branch = 'master') {
  * @param {boolean} [isIndex] - Whether the page is an index, e.g. /api/index.md
  * @param {string} [branch] - Optional branch on GitHub. Defaults to master.
  */
-export function getCurrentSource(slug, isIndex = false, branch = 'master') {
+export function getCurrentSource(slug, isIndex = false, branch = DEFAULT_BRANCH) {
     const ext = isIndex ? '/index.md' : '.md'
     return github(`website/docs${slug}${ext}`, branch)
 }
diff --git a/website/src/images/logos/airbnb.svg b/website/src/images/logos/airbnb.svg
deleted file mode 100644
index d2265cfee..000000000
--- a/website/src/images/logos/airbnb.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="150" height="45" viewBox="0 0 320 100">
-<path fill="#FF5A5F" d="M168.7 25.1c0 3.6-2.9 6.5-6.5 6.5s-6.5-2.9-6.5-6.5 2.8-6.5 6.5-6.5c3.7.1 6.5 3 6.5 6.5zm-26.8 13.1v1.6s-3.1-4-9.7-4c-10.9 0-19.4 8.3-19.4 19.8 0 11.4 8.4 19.8 19.4 19.8 6.7 0 9.7-4.1 9.7-4.1V73c0 .8.6 1.4 1.4 1.4h8.1V36.8h-8.1c-.8 0-1.4.7-1.4 1.4zm0 24.1c-1.5 2.2-4.5 4.1-8.1 4.1-6.4 0-11.3-4-11.3-10.8s4.9-10.8 11.3-10.8c3.5 0 6.7 2 8.1 4.1v13.4zm15.5-25.5h9.6v37.6h-9.6V36.8zm143.4-1c-6.6 0-9.7 4-9.7 4V18.7h-9.6v55.7h8.1c.8 0 1.4-.7 1.4-1.4v-1.7s3.1 4.1 9.7 4.1c10.9 0 19.4-8.4 19.4-19.8s-8.5-19.8-19.3-19.8zm-1.6 30.5c-3.7 0-6.6-1.9-8.1-4.1V48.8c1.5-2 4.7-4.1 8.1-4.1 6.4 0 11.3 4 11.3 10.8s-4.9 10.8-11.3 10.8zm-22.7-14.2v22.4h-9.6V53.2c0-6.2-2-8.7-7.4-8.7-2.9 0-5.9 1.5-7.8 3.7v26.2h-9.6V36.8h7.6c.8 0 1.4.7 1.4 1.4v1.6c2.8-2.9 6.5-4 10.2-4 4.2 0 7.7 1.2 10.5 3.6 3.4 2.8 4.7 6.4 4.7 12.7zm-57.7-16.3c-6.6 0-9.7 4-9.7 4V18.7h-9.6v55.7h8.1c.8 0 1.4-.7 1.4-1.4v-1.7s3.1 4.1 9.7 4.1c10.9 0 19.4-8.4 19.4-19.8.1-11.4-8.4-19.8-19.3-19.8zm-1.6 30.5c-3.7 0-6.6-1.9-8.1-4.1V48.8c1.5-2 4.7-4.1 8.1-4.1 6.4 0 11.3 4 11.3 10.8s-4.9 10.8-11.3 10.8zm-26-30.5c2.9 0 4.4.5 4.4.5v8.9s-8-2.7-13 3v26.3H173V36.8h8.1c.8 0 1.4.7 1.4 1.4v1.6c1.8-2.1 5.7-4 8.7-4zM91.5 71c-.5-1.2-1-2.5-1.5-3.6-.8-1.8-1.6-3.5-2.3-5.1l-.1-.1C80.7 47.2 73.3 32 65.5 17l-.3-.6c-.8-1.5-1.6-3.1-2.4-4.7-1-1.8-2-3.7-3.6-5.5C56 2.2 51.4 0 46.5 0c-5 0-9.5 2.2-12.8 6-1.5 1.8-2.6 3.7-3.6 5.5-.8 1.6-1.6 3.2-2.4 4.7l-.3.6C19.7 31.8 12.2 47 5.3 62l-.1.2c-.7 1.6-1.5 3.3-2.3 5.1-.5 1.1-1 2.3-1.5 3.6C.1 74.6-.3 78.1.2 81.7c1.1 7.5 6.1 13.8 13 16.6 2.6 1.1 5.3 1.6 8.1 1.6.8 0 1.8-.1 2.6-.2 3.3-.4 6.7-1.5 10-3.4 4.1-2.3 8-5.6 12.4-10.4 4.4 4.8 8.4 8.1 12.4 10.4 3.3 1.9 6.7 3 10 3.4.8.1 1.8.2 2.6.2 2.8 0 5.6-.5 8.1-1.6 7-2.8 11.9-9.2 13-16.6.8-3.5.4-7-.9-10.7zm-45.1 5.2C41 69.4 37.5 63 36.3 57.6c-.5-2.3-.6-4.3-.3-6.1.2-1.6.8-3 1.6-4.2 1.9-2.7 5.1-4.4 8.8-4.4 3.7 0 7 1.6 8.8 4.4.8 1.2 1.4 2.6 1.6 4.2.3 1.8.2 3.9-.3 6.1-1.2 5.3-4.7 11.7-10.1 18.6zm39.9 4.7c-.7 5.2-4.2 9.7-9.1 11.7-2.4 1-5 1.3-7.6 1-2.5-.3-5-1.1-7.6-2.6-3.6-2-7.2-5.1-11.4-9.7 6.6-8.1 10.6-15.5 12.1-22.1.7-3.1.8-5.9.5-8.5-.4-2.5-1.3-4.8-2.7-6.8-3.1-4.5-8.3-7.1-14.1-7.1s-11 2.7-14.1 7.1c-1.4 2-2.3 4.3-2.7 6.8-.4 2.6-.3 5.5.5 8.5 1.5 6.6 5.6 14.1 12.1 22.2-4.1 4.6-7.8 7.7-11.4 9.7-2.6 1.5-5.1 2.3-7.6 2.6-2.7.3-5.3-.1-7.6-1-4.9-2-8.4-6.5-9.1-11.7-.3-2.5-.1-5 .9-7.8.3-1 .8-2 1.3-3.2.7-1.6 1.5-3.3 2.3-5l.1-.2c6.9-14.9 14.3-30.1 22-44.9l.3-.6c.8-1.5 1.6-3.1 2.4-4.6.8-1.6 1.7-3.1 2.8-4.4 2.1-2.4 4.9-3.7 8-3.7 3.1 0 5.9 1.3 8 3.7 1.1 1.3 2 2.8 2.8 4.4.8 1.5 1.6 3.1 2.4 4.6l.3.6c7.6 14.9 15 30.1 21.9 45v.1c.8 1.6 1.5 3.4 2.3 5 .5 1.2 1 2.2 1.3 3.2.8 2.6 1.1 5.1.7 7.7z"></path>
-</svg>
diff --git a/website/src/images/logos/allenai.svg b/website/src/images/logos/allenai.svg
deleted file mode 100644
index c00569bf8..000000000
--- a/website/src/images/logos/allenai.svg
+++ /dev/null
@@ -1,18 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="220" height="37" viewBox="0 0 610 103">
-<defs>
-    <radialGradient id="gradient_allenai1" cx="75.721" cy="20.894" r="11.05" gradientUnits="userSpaceOnUse">
-        <stop offset=".3" stop-color="#FDEA65" />
-        <stop offset="1" stop-color="#FCB431" />
-    </radialGradient>
-    <radialGradient id="gradient_allenai2" cx="75.4" cy="42.297" r="82.993" gradientUnits="userSpaceOnUse">
-        <stop offset="0" stop-color="#3FA9D0" />
-        <stop offset="1" stop-color="#183A74" />
-    </radialGradient>
-</defs>
-
-<path fill="#FFE266" d="M87.446 21.648c-.8 0-1.6-.014-2.397.01-.177.006-.42.11-.5.247-.062.104.047.363.152.497.63.803 1.292 1.58 1.917 2.388.115.15.21.452.135.574-.08.13-.38.194-.57.174-1.09-.113-2.173-.263-3.26-.395-.607-.074-.776.146-.548.71.447 1.11.9 2.218 1.35 3.327.09.222.218.49-.05.626-.15.078-.43.04-.593-.052-1.16-.65-2.303-1.333-3.458-1.993-.39-.222-.52-.504-.41-.955.3-1.227.998-2.204 1.893-3.046 1.4-1.317 2.212-2.923 2.533-4.812.756-4.46-2.55-8.847-6.994-9.385-4.702-.57-9.366 3.225-9.05 8.6.128 2.157.943 4.006 2.517 5.508.923.882 1.675 1.868 1.98 3.143.1.42.002.71-.38.93-1.166.67-2.32 1.36-3.495 2.018-.16.09-.398.043-.6.06-.014-.204-.096-.432-.03-.605.422-1.104.877-2.194 1.32-3.29.26-.648.097-.86-.586-.774-1.07.137-2.14.28-3.215.39-.187.02-.49-.062-.56-.19-.068-.13.04-.42.156-.573.607-.783 1.252-1.537 1.86-2.318.12-.155.24-.434.177-.57-.062-.136-.355-.228-.548-.23-1.54-.013-3.078 0-4.617.005-.255 0-.583.026-.59-.296-.006-.18.212-.436.397-.538 1.243-.684 2.508-1.326 3.753-2.007.177-.097.282-.33.42-.498-.18-.127-.342-.323-.54-.373-1.52-.377-3.048-.716-4.567-1.097-.185-.046-.327-.266-.49-.405.163-.14.306-.36.49-.402 1.698-.395 3.405-.752 5.105-1.142.188-.043.345-.224.517-.34-.12-.17-.2-.393-.36-.5-1.32-.874-2.654-1.726-3.983-2.584-.063-.04-.152-.064-.184-.12-.09-.167-.158-.345-.235-.518.174-.076.358-.234.518-.212 1.084.145 2.163.328 3.244.5.467.074.932.172 1.402.212.177.015.43-.053.528-.178.073-.093.003-.37-.087-.51-.944-1.484-1.906-2.958-2.863-4.435-.04-.062-.108-.12-.117-.186-.026-.2-.03-.4-.04-.602.2.002.444-.07.596.018 1.178.683 2.336 1.396 3.5 2.1.494.297.98.613 1.486.885.156.084.44.132.55.047.112-.088.146-.372.105-.545-.45-1.943-.92-3.88-1.392-5.82-.063-.26-.12-.51.176-.65.315-.15.455.108.598.31 1.164 1.642 2.32 3.29 3.494 4.92.12.17.354.26.535.387.12-.2.316-.39.347-.602.45-2.39 1.082-4.377 1.354-5.93.04-.268.08-.543.4-.555.36-.013.39.28.43.557.336 1.762 1.266 3.96 1.33 5.71.043.28.156.69.352.777.387.172.517-.284.694-.53 1.093-1.53 2.174-3.065 3.26-4.6.05-.07.087-.172.157-.21.174-.094.364-.16.547-.237.063.19.214.402.176.57-.366 1.614-.763 3.22-1.15 4.83-.087.36-.197.715-.247 1.08-.026.178 0 .455.11.535.116.083.398.037.548-.052 1.628-.964 3.245-1.95 4.865-2.928.228-.138.5-.353.69-.047.093.15.026.484-.086.66-.93 1.478-1.893 2.932-2.827 4.406-.12.19-.13.45-.19.676.245.045.498.16.73.126 1.448-.21 2.89-.454 4.336-.686.25-.04.522-.102.633.225.107.32-.144.45-.354.587-1.34.868-2.69 1.725-4.02 2.607-.168.11-.25.353-.373.533.174.11.334.28.523.322 1.628.37 3.26.717 4.893 1.073.115.025.25.024.34.088.13.095.323.245.313.355-.013.146-.17.348-.312.4-.372.133-.77.2-1.158.29-1.18.275-2.363.536-3.537.835-.21.053-.378.262-.566.4.146.163.262.383.445.482 1.206.66 2.426 1.292 3.64 1.935.066.035.154.054.192.108.115.164.21.342.313.514-.166.09-.33.252-.5.26-.797.026-1.597.01-2.396.01.002-.003.002-.008.002-.012z" />
-<path fill="url(#gradient_allenai1)" d="M71.603 33.652c-.09-.64-.137-1.244-.264-1.832-.712-3.286-1.683-6.49-3.134-9.532-.502-1.054-1.07-2.076-1.304-3.233-.475-2.357-.127-4.583 1.133-6.643 1.34-2.192 3.3-3.552 5.795-4.066 2.904-.598 5.526.072 7.798 2.007 1.602 1.365 2.556 3.113 2.95 5.163.406 2.103.066 4.105-.988 5.968-.82 1.452-1.652 2.897-2.143 4.503-.34 1.117-.653 2.24-.99 3.36-.034.11-.138.253-.236.28-2.556.677-4.873 1.887-7.142 3.203-.476.276-.963.537-1.475.822z" />
-<path fill="#3D3D3D" d="M610.03 87.8v2.818h-13.405v-17.93h13.277v2.817h-10.13v4.663h8.98v2.818h-8.98V87.8zM584.123 75.272c-3.404 0-5.912 2.818-5.912 6.328v.05c0 3.51 2.484 6.354 5.913 6.354 2.2 0 3.607-.87 5.14-2.307l2.024 2.05c-1.866 1.946-3.914 3.177-7.27 3.177-5.217.004-9.104-4.07-9.104-9.22v-.053c0-5.097 3.81-9.27 9.233-9.27 3.303 0 5.298 1.15 7.013 2.792l-2.022 2.327c-1.435-1.332-2.967-2.228-5.015-2.228zM566.425 90.618L556.55 77.86v12.758h-3.1v-17.93h2.919l9.6 12.4v-12.4h3.09v17.93zM547.68 87.8v2.818h-13.406v-17.93h13.28v2.817H537.42v4.663h8.98v2.818h-8.98V87.8zM520.52 83.345V80.6h7.627v7.535c-1.766 1.51-4.298 2.794-7.472 2.794-5.608 0-9.292-4-9.292-9.226v-.052c0-5.02 3.838-9.27 9.236-9.27 3.12 0 5.04.87 6.885 2.433l-1.996 2.382c-1.385-1.178-2.737-1.922-5.015-1.922-3.303 0-5.81 2.896-5.81 6.328v.05c0 3.688 2.434 6.406 6.094 6.406 1.685 0 3.22-.54 4.322-1.36v-3.355h-4.58zM502.203 72.687h3.146V90.62h-3.147zM484.148 90.618v-17.93h3.147v15.06h9.418v2.87zM466.272 90.618v-17.93h3.15v15.06h9.413v2.87zM460.495 87.8v2.818h-13.402v-17.93h13.274v2.817h-10.13v4.663h8.982v2.818h-8.984V87.8zM436.045 75.606v15.012h-3.17V75.606h-5.685v-2.92h14.533v2.92zM419.19 90.618l-9.874-12.758v12.758h-3.097v-17.93h2.915l9.595 12.4v-12.4h3.095v17.93zM396.25 72.687h3.145V90.62h-3.146zM368.475 90.618v-17.93h3.146v15.06h9.415v2.87zM355.488 72.56h-2.92l-7.878 18.06h3.225l1.842-4.33h8.47l1.814 4.33h3.327l-7.88-18.06zm-4.58 10.936l3.068-7.172 3.097 7.172h-6.166zM336.25 72.687h3.145V90.62h-3.144zM323.568 75.272c-3.4 0-5.91 2.818-5.91 6.328v.05c0 3.51 2.484 6.354 5.91 6.354 2.202 0 3.61-.87 5.143-2.307l2.024 2.05c-1.867 1.946-3.916 3.177-7.27 3.177-5.218.004-9.104-4.07-9.104-9.22v-.053c0-5.097 3.81-9.27 9.234-9.27 3.302 0 5.298 1.15 7.01 2.792l-2.02 2.327c-1.434-1.332-2.967-2.228-5.016-2.228zM305.18 72.687h3.143V90.62h-3.144zM289.323 75.557v4.89h9.032v2.87h-9.032v7.3h-3.146v-17.93h13.33v2.87zM276.203 72.687h3.146V90.62h-3.147zM264.98 75.606v15.012h-3.173V75.606h-5.682v-2.92h14.534v2.92zM247.33 83.83c2.483-.718 4.25-2.483 4.25-5.43v-.052c0-1.562-.537-2.896-1.485-3.868-1.148-1.128-2.916-1.794-5.168-1.794h-7.982v17.932h3.146v-6.25h3.94l4.403 6.25h3.734l-4.836-6.788zm-7.237-2.254v-6.02h4.58c2.328 0 3.71 1.05 3.71 2.97v.054c0 1.816-1.434 2.996-3.686 2.996h-4.604zM223.96 72.56h-2.92l-7.883 18.06h3.226l1.842-4.33h8.47l1.816 4.33h3.327l-7.878-18.06zm-4.583 10.936l3.068-7.172 3.098 7.172h-6.166zM199.913 79.702c-.742 0-.972-.485-.972-.947 0-.486.13-.616.13-.744 0-.178-.103-.253-.333-.253-1.202 0-3.326 2.74-3.684 3.945l-1.866 6.353h-1.994l2.966-10.09h-1.767l.102-.412h.948c1.306 0 2.354-.053 3.044-.562l-1.05 3.175.05.053c.694-1.254 2.508-3.074 4.12-3.074 1.126 0 1.636.616 1.636 1.28 0 .845-.69 1.278-1.33 1.278zM184.16 76.99c-3.785 0-6.65 3.69-6.65 6.89 0 2.486 1.33 4.484 4.145 4.484 3.07 0 6.422-3.098 6.422-7.25 0-2.588-2.126-4.124-3.916-4.124zm.025 9.428c-.23.36-1.02 1.538-2.608 1.538-1.46 0-2.226-1-2.226-2.23 0-1.205 1.383-5.635 1.92-6.558.563-.974 1.435-1.768 2.737-1.77 1.458 0 2.226.794 2.226 2.052 0 1.487-1.382 5.842-2.048 6.968zM179.1 71.816c-.743 0-1.23-.538-1.23-1.1 0-.69.41-.82.41-1.05 0-.21-.207-.21-.36-.21-1.535 0-2.915 3.947-3.25 5.18l-.793 2.92h2.688l-.076.41h-2.687c-1.51 5.226-2.662 10.144-3.814 12.474-1.74 3.536-2.765 4.228-4.375 4.228-.87 0-1.538-.77-1.538-1.485 0-.77.566-1.278 1.152-1.278.69 0 1.15.433 1.15 1.05 0 .742-.487.742-.487 1.05 0 .18.156.255.384.255.436 0 1.434-.385 2.124-2.51.897-2.767 2-9.017 3.305-13.782h-2.254l.105-.41h2.252c.742-2.92 1.968-5.204 3.352-6.64 1.253-1.278 2.33-1.866 3.453-1.866.92 0 1.712.64 1.712 1.588 0 .69-.56 1.176-1.226 1.176zM517.48 56.98v2.395h-16.987v-23.3h16.818v2.398h-14.19v7.955h12.698v2.398H503.12v8.154zM484.43 38.508v20.87h-2.66v-20.87h-7.81v-2.434h18.285v2.434zM456.373 59.742c-5.752 0-9.676-3.525-9.676-10.117v-13.55h2.628v13.384c0 5.024 2.69 7.854 7.11 7.854 4.293 0 7.02-2.594 7.02-7.69v-13.55h2.624v13.35c0 6.793-3.892 10.318-9.707 10.318zM431.003 38.508v20.87h-2.656v-20.87h-7.814v-2.434h18.28v2.434zM409.43 36.074h2.623v23.303h-2.624zM393.133 38.508v20.87h-2.656v-20.87h-7.814v-2.434h18.284v2.434zM368.566 46.467c5.185 1.132 7.578 3.027 7.578 6.588v.066c0 3.996-3.32 6.593-7.945 6.593-3.693-.002-6.716-1.232-9.508-3.732l1.63-1.93c2.427 2.196 4.753 3.3 7.98 3.3 3.123 0 5.185-1.67 5.185-3.963v-.068c0-2.163-1.163-3.396-6.052-4.427-5.352-1.164-7.81-2.896-7.81-6.723v-.065c0-3.662 3.224-6.358 7.646-6.358 3.39 0 5.82.967 8.18 2.862l-1.528 2.03c-2.162-1.765-4.324-2.53-6.718-2.53-3.024 0-4.954 1.665-4.954 3.762v.067c0 2.195 1.197 3.426 6.316 4.527zM348.22 59.377L333.19 40.27v19.107h-2.56V36.074h2.46l14.66 18.676V36.074h2.56v23.303zM317.865 36.074h2.626v23.303h-2.625zM292.33 59.377L277.306 40.27v19.107h-2.56V36.074h2.46l14.66 18.676V36.074h2.56v23.303zM266.13 56.98v2.395h-16.987v-23.3h16.822v2.398H251.77v7.955h12.7v2.398h-12.7v8.154zM225.24 59.377V36.074h2.625v20.874h13.1v2.43zM201.335 59.377V36.074h2.626v20.874h13.1v2.43zM182.815 35.908h-2.46l-10.604 23.47h2.693l2.76-6.16h12.664l2.726 6.16h2.828l-10.605-23.47zm-6.582 14.914l5.32-11.848 5.284 11.848h-10.604z" />
-<path fill="url(#gradient_allenai2)" d="M108.15 90.71l15.184-23.13c5.5-8.402 8.25-15.42 8.25-21.055 0-6.14-2.144-10.94-6.428-14.4-4-3.227-10.812-4.947-17.815-5.163v-.023c-.167 0-.335.008-.503.01-.16-.003-.322-.01-.483-.01l.003.022C86.763 27.352 66 39.38 48.067 53.756L39.15 28.02H25.99L0 103.018h14.627S25.14 88.742 40.887 73.58l9.625 29.437h14.626l-13.546-39.09c5.52-4.648 11.43-9.11 17.588-12.973v52.063h13.16V43.88c8.4-3.722 17.02-5.946 25.504-5.757 6.715 0 10.073 2.802 10.073 8.402 0 4.725-2.346 10.544-7.035 17.46l-23.484 34.57v3.846h48.59V90.71h-27.84zM23.33 76.395l9.237-28.252 4.81 14.715c-5.06 4.56-9.782 9.158-14.048 13.537z" />
-<path fill="#3181AF" d="M71.614 33.688c.03.13.08.244.14.35 2.87-1.623 5.767-3.1 8.676-4.432.037-.115.08-.228.113-.343.037-.13.072-.26.11-.39H70.58c.25.998.478 2.003.658 3.02.107.602.24 1.2.376 1.795z" />
-</svg>
diff --git a/website/src/images/logos/bbc.svg b/website/src/images/logos/bbc.svg
deleted file mode 100644
index df1e5f2a4..000000000
--- a/website/src/images/logos/bbc.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="90" height="26" viewBox="0 0 1000 285">
-<path fill="#111" d="M542.1 184.3c0 30.97-38.56 29.14-38.56 29.14H465v-56h38.54c39.7-.23 38.56 26.85 38.56 26.85M465 72.05h29.36c30.53 1.6 29.38 24.33 29.38 24.33 0 28.22-33.74 28.68-33.74 28.68h-25V72.06zm70 64.03s26.38-11.25 26.15-41.1c0 0 4-48.87-60.76-54.84h-71.9V245.1h82.4s68.84.24 68.84-57.83c0 0 1.6-39.47-44.75-51.18M348.95 0h302.6v285.17h-302.6V0zM193.14 184.3c0 30.97-38.56 29.14-38.56 29.14h-38.56v-56h38.56c39.7-.23 38.56 26.85 38.56 26.85M116.02 72.05h29.38c30.52 1.6 29.38 24.33 29.38 24.33 0 28.22-33.74 28.68-33.74 28.68h-25.02V72.06zm70 64.03s26.4-11.25 26.17-41.1c0 0 4-48.87-60.78-54.84h-71.9V245.1h82.4s68.86.24 68.86-57.83c0 0 1.6-39.47-44.76-51.18M0 0h302.6v285.17H0V0zM938.8 54.85v37.87S901.85 70 861 69.54c0 0-76.2-1.5-79.64 73.04 0 0-2.75 68.57 78.72 72.47 0 0 34.2 4.13 80.56-25.48v39.25s-62.2 36.95-134.26 8.5c0 0-60.6-22.15-62.9-94.74 0 0-2.52-74.65 78.27-99.43 0 0 21.58-8.26 60.36-4.6 0 0 23.2 2.3 56.7 16.3M697.93 285.17h302.6V0h-302.6v285.17z" />
-</svg>
diff --git a/website/src/images/logos/chartbeat.svg b/website/src/images/logos/chartbeat.svg
deleted file mode 100644
index 75c19d661..000000000
--- a/website/src/images/logos/chartbeat.svg
+++ /dev/null
@@ -1,6 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="180" height="25" viewBox="0 0 915.7 130.1">
-<path fill="#51a4da" d="M157.8 8.6c-8.1 6.8-15.5 13-23 19.1-.8.7-2.2.7-3.4.7H23.3c-.8 0-1.6-.3-2.6-.4V8.6h137.1z" />
-<path fill="#b3e4fa" d="M0 33.9c2-.1 4.1-.3 6.1-.3H129c-8.7 7.2-16.2 13.5-23.8 19.6-.8.6-2.3.7-3.4.7H2.4c-.8 0-1.7-.3-2.5-.5.1-6.5.1-13 .1-19.5z" />
-<path fill="#5bc4bf" d="M36 79.2V59.4h63.2C91 66.3 83.6 72.5 76.1 78.7c-.5.4-1.3.5-1.9.5H36z" />
-<path fill="#657d8c" d="M613.3 49.4c6.5-3.8 12.5-8.7 19.3-10.9 19.6-6.4 39.7 2.9 48 21.8 6.8 15.4 6.3 31-1.3 46-12.1 24.2-47.3 28-66 8-.1 2-.3 3.6-.3 5.2.1 4.3-2 6.4-6.2 6.3-4.2 0-6.2-2.2-6.2-6.4V8.2c0-4.3 2-6.1 6.3-6.1 4.4 0 6.2 2.1 6.2 6.3.2 13.9.2 27.7.2 41zm-.2 30.1c0 2.6-.1 5.1 0 7.7.5 19.7 19.2 33.3 38.2 27.7 13.9-4.1 22.5-18.7 21-35.8-1.2-13.7-6.6-24.9-20.5-29.7-9.8-3.4-19.3-1.6-27.7 4.4-8.7 6.3-13.1 14.7-11 25.7z M561.7 39h27.7c.3 8.4-2.3 11.2-9.9 11.2-5.3 0-10.6 0-15.9.1-.3 0-.6.2-1.3.3-.2 1-.5 2.1-.5 3.2v45.6c0 1.5.1 3.1.2 4.6 1.2 10.6 8.8 15.1 18.7 10.8 3.5-1.5 7-4.1 9.7.8 1.8 3.2-.7 6.9-5.8 9.4-16.6 8-34.1-1.7-34.8-20-.7-16-.2-32.1-.3-48.2v-6.1c-7.6 0-14.6-.6-21.5.2-7.7.9-13.5 5.7-16.1 12.8-2.4 6.6-3.9 13.8-4.4 20.8-.7 11.4-.1 22.9-.2 34.3-.1 5.5-2.7 7.7-7.6 7-4.5-.6-5.2-3.8-5.2-7.5V72.2 45.5c0-4.3 1.1-7.4 6.2-7.5 5.1-.1 6.4 3 6.4 7.3 0 3.1 0 6.1.4 9.9 9-17.9 25.3-17 41.9-16.2 0-5.1.1-9.5 0-13.9s1.6-6.8 6.2-6.7c4.8 0 6.2 2.7 6.1 6.9-.1 4.3 0 8.6 0 13.7z M267.1 127.1c-36.6 0-61.2-28.5-57.6-66.8 4.8-50.7 52.1-62.9 83-48.9 6.3 2.8 11.5 8.2 16.6 13.1 2.6 2.5 3.4 6.6-.4 9.5-3.6 2.8-6.5 1.2-9.3-1.8-12.7-13.6-28.3-17.6-45.8-12.3-17.9 5.4-27 18.9-30.1 36.7-2.4 13.4-.5 26.3 6.5 38.1 14.1 23.7 48.5 28.7 67.2 9.7 2-2 3.8-4.4 5.3-6.9 1.8-2.9 4.4-4 6.9-2.3 1.9 1.3 4 4.7 3.5 6.4-1.1 3.8-3.2 7.6-5.9 10.5-10.8 11-24.7 15-39.9 15z M704.6 85.7c1.5 13.7 7.5 23.6 20 28 13 4.5 25.6 3.4 36.1-6.9 5.2-5.1 7.6-5.9 10.5-3.1 2.8 2.8 1.9 7-2.4 11.7-.6.6-1.2 1.2-1.9 1.8-15.4 11.7-32.5 13.2-49.8 6-16.7-6.9-24.3-20.9-24.9-38.5-.9-24 11.8-42.2 33-46.7 28.6-6.1 50.8 11.2 50.4 43-.1 4.4-2.9 4.8-6.2 4.8h-58.9c-1.9-.1-3.7-.1-5.9-.1zm1.2-11.4h55.9c-.1-15.3-11-26.2-26-26.3-15.6-.2-29.6 12.1-29.9 26.3zM842.8 73.1c1.4-14-5.1-22.5-18.2-24.3-11.1-1.5-20.7 1.6-28 10.3-2.5 2.9-4.9 5.1-8.6 2.4-3.7-2.8-2.7-6.3-.5-9.5 1.5-2.2 3.3-4.5 5.4-6.1 14.4-10.7 30.2-12.1 46.5-5.1 9.5 4.1 15.2 12.3 15.6 22.5.9 18.9.7 37.9.8 56.9 0 3.9-2.3 5.9-6.3 5.8-4-.1-6-2-5.8-6.1.1-1.8 0-3.7 0-6.5-1.6 1.3-2.4 1.9-3.2 2.6-12.4 11.4-26.8 13.7-42.4 8.8-9.1-2.9-14.5-9.4-15.5-19.1-1-9.9 2.6-17.8 11.3-22.9 10.8-6.3 22.9-7.7 35-8.7 4.5-.5 9.1-.7 13.9-1zm-.2 9.7c-9.8 1.2-19.4 2.1-28.9 3.6-3.8.6-7.5 2.2-10.9 4-5.9 3.1-8.4 8.4-7.4 14.4 1 6.2 5.3 9.5 11 10.7 17.7 3.9 40.5-6.1 36.2-32.7z M338.5 50.2c.7-1.1 1.3-2.4 2.2-3.3 10.5-10.7 23.3-12.4 36.9-8.2 13.3 4.1 20 14.6 20.9 27.7 1.2 18 .8 36.2.9 54.3 0 4-2.5 5.4-6.1 5.2-3.9-.1-5.8-2.1-5.8-6.2.1-13.7.1-27.3 0-41 0-3.2-.2-6.5-.7-9.7-1.9-11.5-8.4-18.5-18.2-20-12.1-1.8-23.5 3.1-28.1 13.2-2.1 4.7-3.2 10.1-3.4 15.3-.5 13.7-.1 27.3-.2 41 0 6-3.2 8.7-8.8 7.1-1.8-.5-3.2-2.9-4.5-4.6-.5-.7-.2-2-.2-3V9.9c0-6.2 1.2-7.5 6.3-7.6 5.3-.1 7.1 1.4 7.1 6.9.1 11.8 0 23.6 0 35.4 0 1.6.1 3.3.2 4.9.7.2 1.1.5 1.5.7z M469 73.1c1.3-13.6-5.3-22.3-17.9-24.2-11.3-1.7-21 1.4-28.5 10.2-2.5 2.9-5 5.1-8.6 2.4-3.7-2.8-2.7-6.4-.5-9.6 6.2-9.2 15.4-13.3 25.9-14.6 5.2-.7 10.6-.7 15.8.1 16.6 2.7 26.4 14.3 26.5 31.3.2 16.6.1 33.1 0 49.7 0 5.6-1.6 7.5-6 7.5-5 0-6.4-3.1-6.1-7.5.1-1.4 0-2.7 0-4.8-1.3 1-2.3 1.5-3 2.2-12.1 11.4-26.4 13.7-41.8 9.1-9.8-2.9-15.5-9.9-16.2-20.2-.9-10.1 3.4-17.8 12.4-22.7 10.6-5.7 22.3-7.1 34.1-8.1 4.6-.3 9.2-.5 13.9-.8zm0 9.9c-8.8.9-17.4 1.5-25.9 2.9-4.8.8-9.6 2.4-14 4.6-6.3 3.1-8.8 8.6-7.7 14.7.9 5.3 5.2 9.5 11.7 10.7 18.7 3.1 39.3-7.4 35.9-32.9z M63.9 127.4c-5.1-1.2-8.2-3.2-9.7-7.3-1.7-4.6-.3-8.3 3.2-11.5C68 98.9 78.6 89.2 89.1 79.5c24.2-22.1 48.4-44.3 72.7-66.4.5-.5.9-1.2 1.5-1.3 2-.6 4.1-1 6.1-1.5-.6 2.1-.5 4.7-1.8 6.1-31.8 35.3-63.8 70.4-95.8 105.5-2 2.3-5.2 3.7-7.9 5.5z M873.9 49.4h-8.8c-3.2 0-5.1-2-4.4-4.9.5-2 2.3-4.5 4.2-5.3 2.4-.9 5.3-.2 9-.2 0-4.6-.1-8.8 0-12.9.1-5.9 1.7-7.6 6.5-7.7 5.3-.1 6.1 3.3 6.1 7.4v12.9h27.8c-.2 8.1-2.7 10.6-9.7 10.7h-18.3v12.9l.3 35.9c0 1.5 0 3.1.2 4.6.9 12 8.5 16.6 19.5 11.6 3.3-1.5 6.6-3.2 8.8 1.1 2.1 4-.9 6.4-3.8 8.4-14.4 9.7-34.8 1-36.3-16.2-1.3-14.2-.8-28.7-1-43-.1-4.8-.1-9.6-.1-15.3z" />
-</svg>
diff --git a/website/src/images/logos/index.js b/website/src/images/logos/index.js
deleted file mode 100644
index f999c1468..000000000
--- a/website/src/images/logos/index.js
+++ /dev/null
@@ -1,31 +0,0 @@
-import { ReactComponent as AirbnbLogo } from './airbnb.svg'
-import { ReactComponent as UberLogo } from './uber.svg'
-import { ReactComponent as QuoraLogo } from './quora.svg'
-import { ReactComponent as RetrieverLogo } from './retriever.svg'
-import { ReactComponent as StitchfixLogo } from './stitchfix.svg'
-import { ReactComponent as ChartbeatLogo } from './chartbeat.svg'
-import { ReactComponent as AllenAILogo } from './allenai.svg'
-
-import { ReactComponent as RecodeLogo } from './recode.svg'
-import { ReactComponent as WapoLogo } from './wapo.svg'
-import { ReactComponent as BBCLogo } from './bbc.svg'
-import { ReactComponent as MicrosoftLogo } from './microsoft.svg'
-import { ReactComponent as VenturebeatLogo } from './venturebeat.svg'
-import { ReactComponent as ThoughtworksLogo } from './thoughtworks.svg'
-
-export default {
-    airbnb: AirbnbLogo,
-    uber: UberLogo,
-    quora: QuoraLogo,
-    retriever: RetrieverLogo,
-    stitchfix: StitchfixLogo,
-    chartbeat: ChartbeatLogo,
-    allenai: AllenAILogo,
-
-    recode: RecodeLogo,
-    wapo: WapoLogo,
-    bbc: BBCLogo,
-    microsoft: MicrosoftLogo,
-    venturebeat: VenturebeatLogo,
-    thoughtworks: ThoughtworksLogo,
-}
diff --git a/website/src/images/logos/microsoft.svg b/website/src/images/logos/microsoft.svg
deleted file mode 100644
index 419f240d4..000000000
--- a/website/src/images/logos/microsoft.svg
+++ /dev/null
@@ -1,7 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="130" height="28" viewBox="0 0 609 130">
-<path fill="#737373" d="M213.2 74.3l-3.6 10.2h-.3c-.6-2.3-1.7-5.8-3.5-10L186.5 26h-18.9v77.3h12.5V55.6c0-3 0-6.4-.1-10.6-.1-2.1-.3-3.7-.4-4.9h.3c.6 3 1.3 5.2 1.8 6.6l23.2 56.4h8.8l23-56.9c.5-1.3 1-3.9 1.5-6.1h.3c-.3 5.7-.5 10.8-.6 13.9v49h13.3V25.8H233l-19.8 48.5zm50.6-26.7h13V103h-13zm6.6-23.4c-2.2 0-4 .8-5.5 2.2-1.5 1.4-2.3 3.2-2.3 5.4 0 2.1.8 3.9 2.3 5.3 1.5 1.4 3.3 2.1 5.5 2.1s4.1-.8 5.5-2.1c1.5-1.4 2.3-3.2 2.3-5.3s-.8-3.9-2.3-5.4c-1.3-1.4-3.2-2.2-5.5-2.2m52.5 22.9c-2.4-.5-4.9-.8-7.3-.8-5.9 0-11.3 1.3-15.8 3.9-4.5 2.6-8.1 6.2-10.4 10.7-2.4 4.6-3.6 9.9-3.6 16 0 5.3 1.2 10 3.5 14.3 2.3 4.2 5.5 7.6 9.8 9.9 4.1 2.3 8.9 3.5 14.3 3.5 6.2 0 11.5-1.3 15.7-3.7l.1-.1v-12l-.5.4c-1.9 1.4-4.1 2.6-6.3 3.3-2.3.8-4.4 1.2-6.2 1.2-5.2 0-9.3-1.5-12.2-4.8-3-3.2-4.5-7.6-4.5-13.1 0-5.7 1.5-10.2 4.6-13.5 3.1-3.3 7.2-5 12.2-5 4.2 0 8.5 1.4 12.4 4.2l.5.4V49.2l-.1-.1c-1.7-.7-3.6-1.5-6.2-2m42.9-.4c-3.2 0-6.2 1-8.8 3.1-2.2 1.8-3.7 4.4-5 7.5h-.1v-9.7h-13V103h13V74.7c0-4.8 1-8.8 3.2-11.7 2.2-3 5-4.5 8.4-4.5 1.2 0 2.4.3 3.9.5 1.4.4 2.4.8 3.1 1.3l.5.4v-13l-.3-.1c-.9-.6-2.7-.9-4.9-.9m35.4-.3c-9.1 0-16.4 2.7-21.5 8-5.2 5.3-7.7 12.6-7.7 21.8 0 8.6 2.6 15.6 7.6 20.7 5 5 11.8 7.6 20.3 7.6 8.9 0 16-2.7 21.1-8.1 5.2-5.4 7.7-12.6 7.7-21.5 0-8.8-2.4-15.8-7.3-20.9-4.7-5.1-11.6-7.6-20.2-7.6M411.6 89c-2.4 3.1-6.2 4.6-10.9 4.6s-8.5-1.5-11.2-4.8c-2.7-3.1-4-7.6-4-13.3 0-5.9 1.4-10.4 4-13.6 2.7-3.2 6.4-4.8 11.1-4.8 4.6 0 8.2 1.5 10.8 4.6 2.6 3.1 4 7.6 4 13.5-.2 6-1.3 10.7-3.8 13.8m46.1-18.4c-4.1-1.7-6.7-3-7.9-4.1-1-1-1.5-2.4-1.5-4.2 0-1.5.6-3 2.1-4s3.2-1.5 5.7-1.5c2.2 0 4.5.4 6.7 1s4.2 1.5 5.8 2.7l.5.4V48.7l-.3-.1c-1.5-.6-3.5-1.2-5.9-1.7-2.4-.4-4.6-.6-6.4-.6-6.2 0-11.3 1.5-15.3 4.8-4 3.1-5.9 7.3-5.9 12.2 0 2.6.4 4.9 1.3 6.8.9 1.9 2.2 3.7 4 5.2 1.8 1.4 4.4 3 8 4.5 3 1.3 5.3 2.3 6.7 3.1 1.4.8 2.3 1.7 3 2.4.5.8.8 1.8.8 3.1 0 3.7-2.8 5.5-8.5 5.5-2.2 0-4.5-.4-7.2-1.3s-5.2-2.2-7.3-3.7l-.5-.4v12.7l.3.1c1.9.9 4.2 1.5 7 2.2 2.8.5 5.3.9 7.5.9 6.7 0 12.2-1.5 16.1-4.8 4-3.2 6.1-7.3 6.1-12.6 0-3.7-1-7-3.2-9.5-2.9-2.4-6.5-4.9-11.7-6.9m49.2-24.2c-9.1 0-16.4 2.7-21.5 8s-7.7 12.6-7.7 21.8c0 8.6 2.6 15.6 7.6 20.7 5 5 11.8 7.6 20.3 7.6 8.9 0 16-2.7 21.1-8.1 5.2-5.4 7.7-12.6 7.7-21.5 0-8.8-2.4-15.8-7.3-20.9-4.7-5.1-11.6-7.6-20.2-7.6M517.2 89c-2.4 3.1-6.2 4.6-10.9 4.6-4.8 0-8.5-1.5-11.2-4.8-2.7-3.1-4-7.6-4-13.3 0-5.9 1.4-10.4 4-13.6 2.7-3.2 6.4-4.8 11.1-4.8 4.5 0 8.2 1.5 10.8 4.6 2.6 3.1 4 7.6 4 13.5 0 6-1.3 10.7-3.8 13.8M603.9 58.3V47.6h-13.1V31.2l-.4.1L578 35l-.3.1v12.5h-19.6v-7c0-3.2.8-5.7 2.2-7.3s3.5-2.4 6.1-2.4c1.8 0 3.7.4 5.8 1.3l.5.3V21.2l-.3-.1c-1.8-.6-4.2-1-7.3-1-3.9 0-7.3.9-10.4 2.4-3.1 1.7-5.4 4-7.1 7.1-1.7 3-2.6 6.4-2.6 10.3v7.7h-9.1v10.6h9.1V103h13.1V58.3h19.6v28.5c0 11.7 5.5 17.6 16.5 17.6 1.8 0 3.7-.3 5.5-.6 1.9-.4 3.3-.9 4.1-1.3l.1-.1V91.7l-.5.4c-.8.5-1.5.9-2.7 1.2-1 .3-1.9.4-2.6.4-2.6 0-4.4-.6-5.7-2.1-1.2-1.4-1.8-3.7-1.8-7.1V58.3h13.3z" />
-<path fill="#F25022" d="M0 0h61.3v61.3H0z" />
-<path fill="#7FBA00" d="M67.7 0H129v61.3H67.7z" />
-<path fill="#00A4EF" d="M0 67.7h61.3V129H0z" />
-<path fill="#FFB900" d="M67.7 67.7H129V129H67.7z" />
-</svg>
diff --git a/website/src/images/logos/quora.svg b/website/src/images/logos/quora.svg
deleted file mode 100644
index edb12a23a..000000000
--- a/website/src/images/logos/quora.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="120" height="34" viewBox="0 0 201 56">
-<path fill="#b92b27" d="M29 43.62c-1.93-3.77-4.18-7.6-8.57-7.6-.84 0-1.68.15-2.45.5l-1.5-2.98c1.83-1.56 4.77-2.8 8.54-2.8 5.87 0 8.88 2.83 11.27 6.44 1.42-3.08 2.1-7.24 2.1-12.4 0-12.88-4.04-19.5-13.45-19.5-9.27 0-13.28 6.62-13.28 19.5 0 12.82 4 19.36 13.28 19.36 1.47 0 2.8-.16 4.04-.52zm2.3 4.5c-2.05.54-4.2.85-6.35.85C12.6 48.96.5 39.1.5 24.76.5 10.32 12.6.48 24.96.48c12.56 0 24.53 9.77 24.53 24.3 0 8.1-3.77 14.67-9.26 18.9 1.78 2.67 3.6 4.43 6.14 4.43 2.77 0 3.9-2.14 4.08-3.82h3.6c.22 2.24-.9 11.53-11 11.53-6.1 0-9.33-3.53-11.76-7.68zm26.12-12.3V19.27c0-1.9-.7-2.73-2.86-2.73h-2.3v-4.4H67.3v23.5c0 3.95 2.15 5.7 5.4 5.7 2.7 0 5.37-1.2 6.8-3.9V19.26c0-1.9-.7-2.73-2.85-2.73h-2.45v-4.4h15.2v24.6c0 2.45.92 3.57 3.72 3.57h.5v4.54L80 47v-4.67h-.28c-2.63 3.2-6.34 5.38-11.62 5.38-5.95 0-10.7-3-10.7-11.87m56 7.48c5.36 0 7.4-4.66 7.5-14.04.1-9.2-2.14-13.63-7.5-13.63-4.68 0-7.62 4.45-7.62 13.63 0 9.38 2.9 14.04 7.62 14.04zm0 4.4c-9.7 0-18.43-7.4-18.43-18.44 0-10.84 8.52-18.04 18.42-18.04 10.32 0 18.6 7.34 18.6 18.04 0 11.04-8.28 18.45-18.6 18.45zm18.9-.7v-4.4h1.47c3.62 0 3.97-1.04 3.97-4.2V19.27c0-1.9-.98-2.72-3.2-2.72h-1.97v-4.4h13.82l.7 7.2h.27c1.53-5.18 5.66-7.9 9.52-7.9 3.2 0 5.7 1.8 5.7 5.5 0 2.55-1.25 5.28-4.7 5.28-3.1 0-3.7-2.1-6.26-2.1-2.3 0-4.06 2.17-4.06 5.36V38.4c0 3.16.77 4.2 4.34 4.2h2.02V47h-21.64m46-5.12c4.4 0 6.2-4.17 6.2-8.36v-5.6c-3.2 3.34-10.68 3.46-10.68 9.4 0 2.9 1.72 4.56 4.47 4.56zm6.42-.02c-1.82 3.5-5.55 5.85-10.76 5.85-6.06 0-9.97-3.2-9.97-8.87 0-11.4 15.87-8.36 20.53-15.9v-.83c0-5.8-2.28-6.7-4.8-6.7-7.06 0-3.84 7.6-10.34 7.6-3.14 0-4.35-1.9-4.35-4.02 0-4.3 5.13-7.76 14.75-7.76 9.1 0 14.7 2.52 14.7 11.58v14.47c0 2.24.82 3.45 2.77 3.45.84 0 1.54-.23 2.08-.6l1.16 2.83c-.94 1.47-3.48 4.06-8.3 4.06-4.2 0-6.83-1.95-7.18-5.14h-.28z"></path>
-</svg>
diff --git a/website/src/images/logos/recode.svg b/website/src/images/logos/recode.svg
deleted file mode 100644
index 7ed7cfce3..000000000
--- a/website/src/images/logos/recode.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="100" height="24" viewBox="0 0 824 203">
-<path fill="#EE2C24" d="M84.8 90V49c-18 0-30.7 7.6-38 22.8v-20H4.6v148.1h45v-72.4c0-27.9 5.9-37.4 34.1-37.4h1.1zM823.4 137c0-54.5-31.9-88-78.5-88-19.9 0-39.2 8.2-54 22.9-7.9 7.9-13.8 17.1-17.6 27.2V4.2h-44.5v63.9c-10-13-24.1-19.1-42.4-19.1-19.1 0-36.3 7-50.4 20.8-10.2 9.8-17.3 21.3-21 34.1-3.8-12.5-10.8-24.2-20.8-33.7-14.7-13.9-33-21.3-54.3-21.3-19.9 0-39.3 8.2-54 22.9C378 79.7 372 89 368.2 99.2c-12.8-32.6-38.6-50.2-73-50.2-15.5 0-29.3 4.2-41.5 12.5l-21.2 51.3c-8.1-40-36.9-63.8-76.1-63.8-19.9 0-39.2 8.2-54 22.9C87.9 86.3 80 105.1 80 125.5c0 20.7 7.4 39.2 21.5 54.2 15 15.8 33 23.4 55.3 23.4 16.7 0 31.2-4.1 43.3-12.1l14-34h-33.6c-6.5 5.7-13.6 8.2-22.3 8.2-18.8 0-30-8.2-33.8-23.7h96.2c2.9 14.2 9.5 27.3 19.9 38.2 15 15.8 32.9 23.4 55.2 23.4 33.5 0 60.4-18.5 72.6-49.8 3.6 9.7 9.2 18.5 16.6 26.3 15 15.8 33 23.5 55.4 23.5 22.1 0 40.7-7.9 55.7-22.9 9.1-9.1 15.4-19.5 18.8-31.4 4 14 11.8 26.3 23.3 36.6 13.2 11.8 29 17.7 47.3 17.7 18 0 31.5-4.8 44.7-17.7v14.4h43.1v-47c3.6 9.8 9.2 18.9 16.8 26.9 15 15.8 33 23.4 55.3 23.4 32.7 0 57-15.5 71.1-46.1H769c-6.5 5.7-13.6 8.2-22.3 8.2-18.8 0-30-8.2-33.8-23.7h110.4V137zm-700.3-25.9c3-13.9 15.8-23.2 33.8-23.2 17.2 0 29.4 8.2 34.6 23.2h-68.4zm201.5 36c-7.1 9.5-16.6 14.4-28.3 14.4-20.7 0-34.8-13.9-34.8-36.2 0-20.1 14.1-34.8 33.5-34.8 14.1 0 24.2 5.4 30.2 15.5H366c-1.7 6.3-2.5 12.8-2.5 19.6 0 7.4 1 14.6 2.9 21.5h-41.8zm116.1 16.1c-19.3 0-35.6-16.5-35.6-37 0-20.7 16-37.3 35.6-37.3s35.6 16.5 35.6 37.3c0 20.5-16 37-35.6 37m154.7 0c-22.7 0-39-16-39-37 0-20.2 16.3-37.3 37.3-37.3 20.7 0 36.7 16.3 36.7 37.8.1 20.8-15.9 36.5-35 36.5m116.3-52.1c3-13.9 15.8-23.2 33.8-23.2 17.2 0 29.4 8.2 34.6 23.2h-68.4z"></path>
-</svg>
diff --git a/website/src/images/logos/retriever.svg b/website/src/images/logos/retriever.svg
deleted file mode 100644
index 23cd15d43..000000000
--- a/website/src/images/logos/retriever.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="150" height="33" viewBox="0 0 150 33">
-<image overflow="visible" width="150" height="33" xlink:href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAJYAAAAhCAIAAAC0rMV4AAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ bWFnZVJlYWR5ccllPAAAE6xJREFUeNrsW1lwHNd17dfb9Oz7DAY7QBAgSJEUSYkUKcVarGiJLDuL EpWjxM5HqvKTisqpyo/1EX3kx6lKVcpJXEnZKadkKbGqbCk2QyuyrC2UZZqgCIIkSIDYMcDsW/dM T+/9crsbGAIgQIIUFTFVfGxs08u77y7nnntfE/3B9z/wkChIkx6a8nK0x0URFINJFHDRnQGuLeDp jwe9LoaiSOLuuCMHOn5mRtH0qqzxklaQ9bJm5FUt3dQ43YxTRLuHPdYXO9QbH05F7lrxDjUhxhh+ GCYMbJjYxFjVjGpTGc9UToxnck1VJ4iIh/m9fd1PDKY4F3NXZXfaoJ0fFAlj5Q83ywS9XDzgJjB+ 88KypJuCrJ+4nEl5Xff3J++q7E4bV7ERtQ74IjDkv0eHOx8bSCw3VT9F5nnpwxmAWN06Zwfu3XHH mXAdvFpmRB4Xc7AnzhGEZmIGoYIg803lrsr+f5jQiUowpN/NhFlSMYGiEqphyrp5V2V3aC7capiY sEETsBORCJEk2upKvIrG1/6Otn7+9c9ef+A1KeCWB8YbH4HWy397B16vopXpsJO8bu4h+MYmtIyH RUUVNDPopgFL3QzlZanra9OitZZWMEQwdR2D3w7tO55lGCbMA3Pdsgc4YsM/G5SQxevQZxIuV7UE YpsOnq1o6RacRtcNuBWkpbdWLtYMcyJTFU0iTiLZNDvC3iDHrmbKdbLpupmrNhaKwlSxPi/IYPuQ i+4Jefqivh1tYeC3LY3DvZKiTS6XS3XJKTTRGmvCahqK1h0LDLVHGNpyF1XXR6ZzsmrQlDUr1Dg9 Md/O9hhFoXxVHJ0vzFQaXoY+0pfY1RGFJcEU/zO+qBiYsR6OwTYcQx4d6trgTo4kiqbNF/ilSuNS TijJGigy4mYHot7+mH/gqgDGVLa6WBICHAuq100z6nPv6YptVSXLqn5pqVyuSy6aSoW9PfEAy9DO dFC3lYUmaGkyL8zzkmwYPobqC3s6gp57uuNXtWTrAhb+yUyuIeswEdgCXCzgYe/pSXAMXatLI7M5 UDVLUfd2RejWkjb6C8bLlfovF8pwk4aJdj/3QE8EUZQjzVpdiLJ2ejb3xnjmiiDHaPJI0odIarIi nlis+kj0ZF/sD+/vD/ncrVsERfvOxeWTi9VBL+tCBHhj3cAF3ZRMHGbITEH8i/u6diSCjgZZmrqU r/1gqhSkyABFXhTkF/e17UiGp7P86+cXLxcbhG7KJj5TaLz0Ra497APZGJJ45Vy6pJluEi1q5n4f fWSwgySotTKDlgq1xkdTuZ9eyc9I6gEPMxDxQtaYLAqvzpYOBLgnBuJf2t/DgfpJVG3K3xtLZzXc z9I5VesJe/82wEUjgU2zQLkmfvvswicF4fe7gr+7t5u0dWVpDJunp3NvT2TfKzTiJD4c94XdzDIv vbLMGwTx5FL1d/Z19SRCaFW5YLjlmnT8Sl7QTReJeM24L+kH35Xl5r+emhpZFgjDAI2NFgTaWZKJ 17moqGrTef610zOzvESSVNDnevHYwFBH1EHItRLn+Oa/nJz4aKES4pinuyMvHN6RCHkt59X0dy4t /fjc4uuTuY+LwktHdw53xZx7Y17ub35j6KOZ3M+v5OuSDmXKb/dHnxhKuVga3FzTzYDHxbJ0K1a+ emQQQO6D+bIL4xDGfhczU+C/dXIqJ0hhhmIg8hBKC1JVlNtCXrj+2O6epom+e3oOvKYdylxL6HXe CRacyFX+7oPLaV6KuZg/HWr74weHSBuK603lxNj8v51NvzamTFTEr9/X3xX1PzjYkQp6f3hm7myW T9Jkoya+P5V/7kgAbZbLR7O1XLH+pVTwzx7clYz4ndgA2V4dmTk+lcea+WR3+GtHd3ZFA444H15e +smF5fdmS2eXqn/+8NChnoQddmBC6isH+2iG+snFZWwpguAQytfqPzy/NJKuhmgEDJMhybKo0gA1 laacrojMKtQAdID9Ti2UJ6pSkKP3JwPPDrfb9tsI14AMY+nyyflS2O3qDHBHdyQs+9lJFNz3UHcU zlayPF+T3p4p7EgGWdZq7oCIiaA3HHCbCAEuSQjB5x1RP+diN6ttCMjBvXG/sVCRdMvTQM73r+Qk w3i4M5wVlUJDAZ7cE/VGfG643rE6YL6bQk3d1Fdy8zoH1Qzj/cnlpZoc5SD4PI/t7gD7Obr2e1zH dqbem8wtycbFDP/xXPH5qB8eCNh+sCf2SaYGEC1pxpkc/2VFZa8RuCmr0xVRkLXBhD8Z8rQ+Pz1b eGu6qOu428N8YTDVGfa3HPTYjrZL+XqxqZYk9ePZYm/Unwh4CHshDEP3Rf1wkaqbdl1HfDRdmMgL h1NBSdMnyw1Ax64AZzl+plL/3skreVn3gOmhCrSMTtyTDDw1nEoGPINtQfc1sjrely43PpzOUzTt YaldbcE9bUHCdnlkWzrqdfcng2fzddIwMxVxplgf7oisqtE0DTA0tpsJVlfPNPEKP0Qb/RqwCCRw UaSkawmOPrdcM3TzG0f693fHeVEeXa4aprE3FU4GPa0b3SzDUqQIJRDahH8uVervTxchWwOGP7gj 0Rb02pwQOaaOeF27U8HzVwpempwpCKKkeuFxNNUX83eHPQuVJvy+LEjjy5UD/W0bEux8SRjP831B 7lBHmLB0aT1WlhXwcqQZEYqEh/SATzjRYjsowzIHU8GPFsqwwEuF+mS2FvVxNKR6+6zfRcP6QVU0 SRZEeVlUvjiQeO5AL8x1ejYvauZQMkBD1PbGgs/s64JpxrK8345iliRZmtzfGUmEfHgLJglUcKFS h7QMaAZaTvrdHtvSqGVlkvCytJsiIVfVFS0jNIfagiRFrUhPota1CK0E3KbcGk6RdooAHQMl/mCJ f2Fv+57OGCjW7WIeD3pgKidxrrG6deDNyyQ8vlguKEaXhwLG0R7y0tQK/qwCI3gMQ1tKIOpW998y IZyFBe5OBjK8DFdomnkuJxzoS66VWNb06aKQ5aXDnaH+eKD1+UK5XmrIwKdYGqX8nJ9jiHXTgfAW 2QJUVFS9IiqAHK3VkPbaTcJqrSwIyuHOMGQcP8fCp4/u7gS3B+Fpu35nn9zTdaQ38e7lpddGFz00 bRD41FK11Jz4+uF+YHobKIwzFM2AKKxrRoC0pvG4aEXTL8zml2tN8JGqpJQkfbbWFFRDMkyhqS7U IfoNF0XdUjd+xdKyYboRcaQ/6eNYG7ARvZ1yAl9loapmjKZLHpqEEGQoBAptqtqH42nAW0U3apKW a2hXqg03TZYVwxTVYkNqD3th7oCbubczejEnZHgJHjZbbhSEJmSE1iRFQYIshbD5QG/c523RNzxZ qANNBUoCoe51MQCRp69k8oJkYJOX9LKkLfJNXgaUJeqaulyXQAyOoVbXbMMUBAzGkkkc7o2lQj5k AxVUI5Tt3/SKmxMIcslz9w/0R7zf/O/xmJcFUL1YrP/Tycl/eP4BOH9t3lYNo9CQwEMQhRwGBboA kgJAB/QVW26FUhzV6fECUAKva3fRIKVrAxbfTCkErlJVjb0Rd7tFW1oBj26qMpNUPS3IgA14NV4h 1P5zbNHF0C6GBPxiKTTgYwf9LogGn4tmbM4MaqQoajgV3pMsz1VEP0MV6tJEnk8E3FBJrkRbrXmh KHSGvcNOQnE8RjUyogoZlLJMiEm7tHhnKjtXEIJulrT4Mxmi0OGE12EhSTeN8CYtNCgj/RzdHfGR lENyr5Z2dAvKnCUe6E+9+JDy/TMLBsIhhlpsqMdH55890GfD2Dqgg+wFeLKSQawtKh0Kpm88dbBV n+BWg2BVWbdQgG/owoua0RXx2U56iwPAA1IIY0sCosKfQFW+87WHV/ITJvDVns0KuW8pCyYGhHRN FyA5CbI+tlTZnQzFghZtAYCZLAhVSXt8IJkMuNcmSNPuGth7BzC7TjP0S88cskjWBgWtZg1qQ0fE 1jsmzHYP63e7rm2XkNdkHQIo2dGeiGo1LAgoxX46mR2Zzm7q65hYIew6xg3F0A1s2wk5B20dJIA1 Yx+fxn5rSzov51T5tz6cCgqkkQ0MlsB2RoFnUmhVbIpsHWszCPw+nAz2hz0lFe7CF3L8laKwEoIl 4XymGuSYPW0hlmHWRomJV4ibYUK1pmuG1Zdx5qJWVdQ6qGtMZFEt6wGE13p1gtxGmxshSDMP9cWB rarW5AQvaVASyKq2QW+kvVSHQALlrckq4MBnvkNtLf5TPQGciaNJw9IpkkzIRpDxzO1vniUC3M64 HxIqyFJqqvOVuhVnhmkRGUEaiHj7wt4NUWG10OwBk/KyJmu3qCVyi5xBXqsjMMy+rtjjg21NE+oq EyrnqWLj17MFjNdtU4D7WD0nmzUZBi7VpZqk3uG7ibA6yHkxNw1RBO4HAZGuNMAeiCDw9lIpRNgD PbGBANcwDIivyXy9IEjVpgJlNJStR7qjUIKtLWCgOgi6gDTaDUsTL9oXE/iW8sgW1GFzcHOxzIHu 6GDIA47DkAQs8ldzxUylsdZCQIVjAY60szRYFyhWhm8SW1UGmwqFCeIzM/dWD4ZCojfsBe+0nJVA s6WG43nENjzPWVh31L8rFRQVA0AvW5eytUZeaI6XGyEftzcZ4lhm7dVQRHVAEUCRUKNCOFabKhy3 d9nkpoICIHeE/X90Xy/NMQ7M/Gq59qOxRU3THYjHljtTg4lAp5dt6iZlVy0j6TKAKbppAdFt39jB q/RhA1Ow0h5NHtvZBssGwgmc+VShPjJXJG5G5qCXe3BHIu4igWALknpqpjAyX1xoKH3J4GAyQKy2 CFpL2tse9ntdDd0IsFRd0c8sFKu3deec3JrBoz2d0d/qixVlHZbKWvWNMFcSVuiyLSSQw11xX1nR OXAwTEzlhfmycFMeTdip2n756gY2uVl4vloFrbOiZcgdbeHhsFvUgZciN0KfpMumbpAIbT8FdIV9 wO9V3SQJ8mKOP7NY7nUzh2I+2g5BtB4JIkEf5EibmkKhRZ3P1BbX49n1UMTeuXNKQ7wFaF1v197F MI/u6mj3smXV4Cgq31BOXFwisNniWgEP99DOVNBFFVXdx1KLvPTGucVqvXltH2DD1EB8AywN5TO2 2DJRqsuiql2HhZqry7WL3BuHLF41OQiiY7RK31fO+dzsE3s6FIybhumjqctl8d/PTAuijK6xIt6S 1Hju7QgDsdIwLst6uqEOxvwH20Ob5goYjw4ku0KeuqYHWaoo6v8xMjuRqWyYYFMDmWtqDmf7Hd+E Ce1qJBZwP9gXMw2roUkjNJrhz80V1obZQDzw9FAKilhrT4QiR7P8K6en5wo1bQ071Q1TlNWmoq1R EGoPeDwcIxoYKp2FSnMyX9MNw6HgGyISbhIVix0DAaHtBoJ5o3CRNV23+wkQZ5JuOE9eSxEPdscO d4RkE+vY5BB6d6r45thCtlLXVq/EdqEtSvBtE3xws3RvzB/gGM16JQUB3R9a39feMPoTgaeGkmE3 09AM8PgrxfqPzy2Mp8tNRb2qE3ApSWnK6lo3aqo6uDll7QwTTX1z7ky9/PLL1zMisnZBy6I0WW0G GRokuFxu/OZAkmYo53UBlrYyYpyjJvNCQdZgPTlBOZeuzhf5Ci+WeXF8qfzG+fTx6YLfzfSFvC2y 43ez4PijOV4xrCibKTcWCsJSURiZyfk9LJTvLSGWysJb48uzVeBKyCAQ3LU7GYj63Fu9FADme+tC +nRWoOyCJ68Yu8JcImj1QlvB4WHpobjfjfBYnq9pJkUQs5XmaLqyWOSFhpSv1k/PFl8fXRzPVfem gKHQm7YY58v1YkOuakbCz311f3c86NsqXzA01R8PhFh6mRchZMGHSqJyLludztaqgljiG1PZys8m Mj+4sAh0tifqd7byi7z4zmRmotAA34cYqqr6rrAbFr6hIXyDBGBvWhAXl8rf+sVFUcdeitQw8cK+ jq/cP7ChdwzzjUzn3p7MXq7JJQPPqGZD1eH+Pi/7fMr/2FDq6FC7z+3acNdMpvKjs3MjGb6kmXOq sSTpjwSYf/zywT09ceeVEkPT//q/zkKYBly0k9/gG1CDv3xkeG9PnNoEsfG3f/bJqWw9CkTMXhrE ImDC8/d0PnNoYANIgussFmsfT2WPT+QLqlHRzXnVqCmQF4k9Ie5PukIPDaXu39mONuPYiqr/4nL6 zQvLC9Xm40Ntf/XYbpKmb/gqkCgrI1PZE5cylyrNso7TmgEeZhhmn4d5LOp5ZlfbA4Pt4G3AdaEQ //t3L45l+RBDObsAYEXAhKd3p5492O+iqZZU28jhGAMPfvXX0+/N5D0UBXr1+dhvPrKrNxFqvbjj OCn8JSlqqd4EngY1JElaRVjY6wr63Js2o1feMNB0oSnXZWC7Joge8bmDPo5stTZMDKhoy4jXEEsM 0b/BGa/uVyvaekVaKYlCJMfSm76vBU/jRbkqyg1ZczpqbhcT9XGAEwiR10m3AGwgNSQZF+R1lt4+ wwLzl+vNmqjYHQar2xDxu4MeqEfWvlpASJp2dS8erXwIAcox9Fqn2hYNM0wM6e3VM7NjGR7wCNjq sZ7o8wf7okEPXk/ANq0o0I1eesO374Wo21U+blOSm31tadMlf8pVb6tvCcjcGfEf6Y5h+z1E8L6x TO3UXAE4C/oUcqD/c4N97jJ8FtPdRDFUqkv//OHEz+dK/X62aRAcQw7HfPd1Rr6wq93F3v3vMp/b ILcPAWGP69GdSR9JKAaUqNau/S+Xqt+9sCw05Lt6/BzH/wowALWbjrMDkCkwAAAAAElFTkSuQmCC" />
-</svg>
diff --git a/website/src/images/logos/stitchfix.svg b/website/src/images/logos/stitchfix.svg
deleted file mode 100644
index 8d6357bc0..000000000
--- a/website/src/images/logos/stitchfix.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="150" height="18" viewBox="0 0 224.6 26.6">
-<path fill="#2F3237" d="M9.7 11.7l-.3-.1c-4-1.1-5.9-1.9-5.9-5.1 0-2.7 1.6-4.2 4.3-4.2 0 0 5.7 0 7.3 4.7l.1.2H17V0h-1.8l-.7 2.6C12.8 1 10.6 0 8.1 0c-4.7 0-8 3-8 7.3 0 4.8 4 6.2 8.5 7.3 4.5 1.1 6.4 2 6.4 5.3 0 2.9-2.4 4.3-4.5 4.3-3.8 0-6.5-1.5-8.6-4.6l-.1-.2H0v7.2h1.8l.8-3.1c2.2 2.1 4.6 3.1 7.6 3.1 4.8 0 8.2-3.3 8.2-7.7 0-4.9-4-6-8.7-7.2 M22.1 5.5h1.7l.1-.5c.6-2.9.7-2.9 2.7-2.9h4.6v22.3h-2.5v2.1h8.7v-2.1H35V2.1h4.5c2.1 0 2.3 0 2.9 2.9l.1.5h1.7V0H22.1v5.5z M63.2 5.5h1.7L65 5c.6-2.9.7-2.9 2.7-2.9h4.6v22.3h-2.4v2.1h8.7v-2.1h-2.4V2.1h4.5c2.1 0 2.3 0 2.9 2.9l.1.5h1.7V0H63.2v5.5zM49.3 0v2.1h2.5v22.4h-2.5v2.1H58v-2.1h-2.4V2.1H58V0zM132.4 0v2.1h2.5V13h-12.4V2.1h2.4V0h-8.7v2.1h2.6v22.4h-2.5v2.1h8.7v-2.1h-2.5v-9.4h12.4v9.4h-2.5v2.1h8.7v-2.1h-2.4V2.1h2.4V0zM186.4 0v2.1h2.5v22.4h-2.5v2.1h8.7v-2.1h-2.4V2.1h2.4V0zM215.9 0v2.1h2l-5.6 8.1-5.4-8.1h2V0h-8.7v2.1h2.5l7.6 11.3-7.5 11.1h-2.7v2.1h8.7v-2.1H206l5.8-8.7 5.8 8.7h-2.2v2.1h8.7v-2.1h-2.4l-7.8-11.9 7.2-10.5h3.5V0z M161.4 2.1h2.6v22.3h-2.6v2.1h8.7v-2.1h-2.3V15h4c1.5 0 1.6.2 2.1 1.7l.1.2v.2h1.7v-6.6H174v.2c0 .2-.1.3-.2.4-.5 1.2-.7 1.8-2.1 1.8h-4V2.1h9.1c1.7 0 2.1.5 3.1 3v.2h2V0h-20.6l.1 2.1z M108.2 16.1c-.8 5.2-3.6 8.1-7.9 8.1-5.1 0-8.2-4.3-8.2-11.1 0-6.5 3.1-10.7 7.9-10.7 4.6 0 7 2.9 8.1 5.4l.1.2h1.8V0h-2l-.7 2.8C105.4.9 103 0 100.1 0c-6.8 0-12 5.7-12 13.3 0 3.9 1.2 7.2 3.3 9.6 2.1 2.4 5.1 3.6 8.4 3.6 6 0 9.8-3.7 10.6-10.4v-.3h-2.3v.3z"></path>
-</svg>
diff --git a/website/src/images/logos/thoughtworks.svg b/website/src/images/logos/thoughtworks.svg
deleted file mode 100644
index 5e309a9ea..000000000
--- a/website/src/images/logos/thoughtworks.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-<svg  xmlns="http://www.w3.org/2000/svg" width="150" height="28" viewBox="35 20 150 25">
-<path fill="#001e2b" d="M175.1 28.47a2 2 0 1 1 2 2 2 2 0 0 1-2-2m3.46 0A1.47 1.47 0 1 0 177.1 30a1.45 1.45 0 0 0 1.46-1.53m-.48 1.1h-.52l-.48-.92h-.36v.92h-.46V27.4h1.05c.57 0 .84.16.84.67 0 .4-.2.56-.58.58zm-.77-1.24c.26 0 .42-.05.42-.32 0-.3-.3-.26-.5-.26h-.5v.6zM49.68 24.5h-4.6v14.74h-3.56V24.5h-4.6v-3h12.76v3M54.4 27.8a6.14 6.14 0 0 1 3.8-1.6c1.4 0 3.36.84 3.36 3.98v9.05h-3.38v-7.68c0-1.02.08-2.64-1.38-2.64a3.5 3.5 0 0 0-2.4 1.4v8.94h-3.37V21.5h3.37v6.3M62.95 32.85c0-3.4 1.87-6.65 5.6-6.65s5.62 3.26 5.62 6.65c0 3.38-1.9 6.66-5.6 6.66s-5.62-3.27-5.62-6.65m7.76 0c0-1.6-.32-4.17-2.14-4.17s-2.14 2.58-2.14 4.17.32 4.17 2.14 4.17 2.14-2.57 2.14-4.17M82.8 37.92a6.16 6.16 0 0 1-3.8 1.6c-1.4 0-3.35-.86-3.35-4v-9.05h3.37v7.68c0 1.03-.06 2.65 1.4 2.65a3.5 3.5 0 0 0 2.4-1.4v-8.93h3.37v12.77h-3.4v-1.32M91.7 35.04a.87.87 0 0 0-.96.8c0 1.66 8.85-1.23 8.85 3.76 0 1.9-1.3 3.9-6.35 3.9-4.4 0-5.9-1.55-5.9-3.2a2.2 2.2 0 0 1 1.4-1.95 2.03 2.03 0 0 1-1-1.62 2.9 2.9 0 0 1 1.65-2.55 4.8 4.8 0 0 1-1.6-3.5c0-2.84 2.4-4.48 5.15-4.48a6.13 6.13 0 0 1 3.15.9 3.13 3.13 0 0 1 2.3-1.06 4.97 4.97 0 0 1 .98.1v2.36a3 3 0 0 0-1.07-.27 1.86 1.86 0 0 0-.9.27 5.1 5.1 0 0 1 .68 2.17c0 2.82-2.37 4.46-5.13 4.46l-1.25-.1m-1.07 4.23a.88.88 0 0 0-.53.8c0 1.18 2.4 1.25 3.2 1.25.57 0 3.53-.06 3.53-1.08 0-.66-.45-.6-2.46-.7zm2.32-6.36c1.42 0 1.83-1.16 1.83-2.37 0-1.17-.55-2.15-1.83-2.15-1.4 0-1.84 1.2-1.84 2.4 0 1.16.55 2.12 1.85 2.12M104.2 27.8a6.14 6.14 0 0 1 3.82-1.6c1.4 0 3.35.84 3.35 3.98v9.05H108v-7.68c0-1.02.07-2.64-1.4-2.64a3.5 3.5 0 0 0-2.4 1.4v8.94h-3.36V21.5h3.37v6.3M117.5 26.47h2.63v2.8h-2.62v6.5c0 .62.2 1.25 1.28 1.25a2.4 2.4 0 0 0 1.35-.47v2.57a7.83 7.83 0 0 1-2.17.4c-2.1 0-3.83-1.2-3.83-3.98v-6.27h-1.57v-2.8h1.57v-3.2h3.38v3.2M134.17 39.24h-1.82l-3-14.5h-.06l-3.18 14.5h-1.82l-3.72-17.74h1.78l2.92 14.32h.05l3.2-14.32h1.74l3.04 14.32h.05l3.12-14.32h1.75l-4.05 17.74M137.46 32.94c0-4.7 2.23-6.65 5.03-6.65s5.03 1.93 5.03 6.64-2.24 6.66-5.04 6.66-5.04-1.94-5.04-6.66m8.3 0c0-2.14-.53-5.15-3.27-5.15s-3.26 3-3.26 5.14.52 5.16 3.25 5.16 3.26-3 3.26-5.16M150.7 29.36h.05c.7-1.5 1.6-2.9 3.48-2.9h.7v1.63c-2.34-.33-3.55 2.18-4.23 4v7.14h-1.78v-12.6h1.78v2.72M158.13 32.5l4.68-5.85h2l-3.48 4.3 4 8.3h-1.92l-3.22-6.85-2.05 2.53v4.3h-1.78V21.5h1.78v11M173.02 29.46a3.26 3.26 0 0 0-2.88-1.67c-1.16 0-2.25.58-2.25 1.86 0 3.08 6.62 1.73 6.62 5.97a3.84 3.84 0 0 1-4.12 3.97 5.17 5.17 0 0 1-4.7-2.64l1.4-.9a3.5 3.5 0 0 0 3.3 2.04 2.2 2.2 0 0 0 2.46-2.14c0-3.17-6.63-1.66-6.63-6.18a3.56 3.56 0 0 1 3.85-3.5 4.83 4.83 0 0 1 4.2 2.2l-1.26.98"></path>
-</svg>
diff --git a/website/src/images/logos/uber.svg b/website/src/images/logos/uber.svg
deleted file mode 100644
index 38e9a7f4a..000000000
--- a/website/src/images/logos/uber.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="90" height="31" viewBox="0 0 927 322">
-    <path fill="#010202" d="M53.328 229.809c3.917 10.395 9.34 19.283 16.27 26.664 6.93 7.382 15.14 13.031 24.63 16.948 9.491 3.917 19.81 5.875 30.958 5.875 10.847 0 21.015-2.034 30.506-6.102s17.776-9.792 24.856-17.173c7.08-7.382 12.579-16.194 16.496-26.438 3.917-10.244 5.875-21.692 5.875-34.347V0h47.453v316.354h-47.001v-29.376c-10.545 11.147-22.974 19.734-37.285 25.761-14.312 6.025-29.752 9.038-46.323 9.038-16.873 0-32.615-2.938-47.228-8.813-14.612-5.875-27.267-14.235-37.962-25.082S15.441 264.006 9.265 248.79C3.088 233.575 0 216.628 0 197.947V0h47.453v195.236C47.453 207.891 49.411 219.414 53.328 229.809zM332.168 0v115.243c10.545-10.545 22.748-18.905 36.607-25.082s28.924-9.265 45.193-9.265c16.873 0 32.689 3.163 47.453 9.49 14.763 6.327 27.567 14.914 38.414 25.761s19.434 23.651 25.761 38.414c6.327 14.764 9.49 30.431 9.49 47.002 0 16.57-3.163 32.162-9.49 46.774-6.327 14.613-14.914 27.343-25.761 38.188-10.847 10.847-23.651 19.434-38.414 25.761-14.764 6.327-30.581 9.49-47.453 9.49-16.27 0-31.409-3.088-45.419-9.265-14.01-6.176-26.288-14.537-36.833-25.082v28.924h-45.193V0H332.168zM337.365 232.746c4.067 9.642 9.717 18.078 16.948 25.309 7.231 7.231 15.667 12.956 25.308 17.174 9.642 4.218 20.036 6.327 31.184 6.327 10.847 0 21.09-2.109 30.731-6.327s18.001-9.942 25.083-17.174c7.08-7.23 12.729-15.667 16.947-25.309 4.218-9.641 6.327-20.035 6.327-31.183 0-11.148-2.109-21.618-6.327-31.41s-9.867-18.303-16.947-25.534c-7.081-7.23-15.441-12.88-25.083-16.947s-19.885-6.102-30.731-6.102c-10.846 0-21.09 2.034-30.731 6.102s-18.077 9.717-25.309 16.947c-7.23 7.231-12.955 15.742-17.173 25.534-4.218 9.792-6.327 20.262-6.327 31.41C331.264 212.711 333.298 223.105 337.365 232.746zM560.842 155.014c6.025-14.462 14.312-27.191 24.856-38.188s23.049-19.659 37.511-25.986 30.129-9.49 47.001-9.49c16.571 0 31.937 3.013 46.098 9.038 14.16 6.026 26.362 14.387 36.606 25.083 10.244 10.695 18.229 23.35 23.952 37.962 5.725 14.613 8.587 30.506 8.587 47.68v14.914H597.901c1.507 9.34 4.52 18.002 9.039 25.985 4.52 7.984 10.168 14.914 16.947 20.789 6.779 5.876 14.462 10.471 23.049 13.784 8.587 3.314 17.7 4.972 27.342 4.972 27.418 0 49.563-11.299 66.435-33.896l32.991 24.404c-11.449 15.366-25.609 27.418-42.481 36.155-16.873 8.737-35.854 13.106-56.944 13.106-17.174 0-33.217-3.014-48.131-9.039s-27.869-14.462-38.866-25.309-19.659-23.576-25.986-38.188-9.491-30.506-9.491-47.679C551.803 184.842 554.817 169.476 560.842 155.014zM624.339 137.162c-12.805 10.696-21.316 24.932-25.534 42.708h140.552c-3.917-17.776-12.278-32.012-25.083-42.708-12.805-10.695-27.794-16.043-44.967-16.043C652.133 121.119 637.144 126.467 624.339 137.162zM870.866 142.359c-9.641 10.545-14.462 24.856-14.462 42.934v131.062h-45.646V85.868h45.193v28.472c5.725-9.34 13.182-16.722 22.371-22.145 9.189-5.424 20.111-8.136 32.766-8.136h15.817v42.482h-18.981C892.86 126.542 880.507 131.814 870.866 142.359z"/>
-</svg>
diff --git a/website/src/images/logos/venturebeat.svg b/website/src/images/logos/venturebeat.svg
deleted file mode 100644
index da4d1f4a0..000000000
--- a/website/src/images/logos/venturebeat.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="150" height="19" viewBox="0 0 1743 222.2">
-<path d="M208 0v44.4c-3.5 0-6.5.4-9.4-.1-4.1-.8-5.5.9-6.6 4.5-13.9 45-28 89.9-42 134.8-3.2 10.3-6.3 20.7-9.8 30.9-.5 1.4-2.5 3.3-3.8 3.3-22.5.2-45 .1-67.8.1-.5-1.4-1.1-2.7-1.6-4.1-17.4-55-34.8-110.1-52-165.1-1.2-3.7-2.7-5.1-6.7-4.5-2.6.5-5.5-.1-8.3-.2V0h94v44.3H74.9c10.5 41.1 20.9 81.7 31.3 122.3.3 0 .6.1 1 .1 11.2-40.6 22.4-81.3 33.8-122.5h-18.9V0H208z M356 58.3h63.2c.6 7.4 1.2 14.7 1.9 22.2 3.8-4.4 7-8.9 11-12.4 17.9-15.4 38.5-18.4 60.2-10.4 16.4 6.1 23.4 19.6 23.7 36.5.4 24.1.2 48.3.2 72.5v6.6l12.9.6v43.7h-70.8V212v-92.5c0-8.4-2.9-12.7-9.3-14.8-6.7-2.2-13.6 0-18.2 6-1.1 1.4-1.9 3.1-2.7 4.8-.5 1.2-1 2.6-1 3.8-.1 17.9 0 35.8 0 54.2h9.7v44.1H356v-43.9h12.3v-70.8h-12.2c-.1-15.2-.1-29.7-.1-44.6zM741.9 102.4h-10.8v-44c.8-.1 1.6-.3 2.4-.3h66.6v115.6H813v43.9h-65.5v-16.5c-2.9 3.1-4.9 5.4-7.2 7.5-15.9 14.1-43.9 17.9-62.9 8.3-14.5-7.3-21.7-19.7-22.3-35.4-.9-24.3-.7-48.6-1-72.9v-6.3h-12.7v-44H712v5.6c0 29.3-.1 58.6.1 88 0 4.1.7 8.3 2 12.2 2 5.9 7 8.9 13.2 8.7 6.1-.2 10.5-3.1 12.6-8.8.8-2.2 1.7-4.5 1.7-6.7.4-18.1.3-36.3.3-54.9z M345.7 149h-98.5c-.2 9.1.1 17.6 4.5 25.4 3.6 6.5 9.6 8.9 16.8 8.6 7.2-.3 12.9-3.3 15.9-10.1 1.3-3 2.1-6.2 3.3-9.6h54.6c-2.2 17.5-8.9 32.3-22.9 43.3-9.9 7.8-21.4 12-33.8 13.8-16.7 2.5-33.2 1.8-49.4-3.4-21.7-7-36.3-21.4-43-43-7.3-23.3-7.6-47 .1-70.3 9.4-28.7 30.1-44.2 59.5-48.6 13.2-2 26.3-1.1 39.1 2.4 29.9 8.1 45.9 28.7 50.8 58.4 1.8 10.6 2 21.5 3 33.1zm-96.9-30.8H287c.5-8.5-.7-16.1-8.2-20.9-6.8-4.3-14.3-4.7-21.2-.4-7.7 4.9-8.7 12.8-8.8 21.3zM1114 148.9h-98.2c-.2 9-.2 17.6 4.3 25.4 3.8 6.7 9.9 9.1 17.3 8.7 7.4-.4 13.1-3.8 15.9-10.9 1.1-2.8 1.8-5.7 2.8-8.8h54.7c-3.5 32.1-26 53.9-59.4 57.6-15.6 1.7-30.9 1-46-3.7-22.3-7-37.2-21.7-44-44-6.9-23-7.2-46.3.3-69.3 9.5-28.9 30.3-44.5 59.9-48.8 13.9-2 27.6-.9 41 3.1 27.5 8.3 43 27.6 48.1 55.2 2.1 11.4 2.2 23.1 3.3 35.5zm-96.4-30.8h38c.1-16-7.7-24.9-20.6-23.9-11.9.9-19.2 11-17.4 23.9z M535.6 58h18c0-10.6.4-20.9-.1-31.2-.3-5.4 1.5-7.4 6.8-8.5 15.2-3.1 30.2-6.7 46-10.3v50h25.6v44.7h-25c-.2 1.8-.4 3.3-.4 4.7v51.5c0 1.8.2 3.7.4 5.5 1.3 9.8 8.2 14.9 18 13.3 1.6-.3 3.2-.6 5.6-1v27.7c0 12.9 0 12.9-12.7 14.9-13.6 2.2-27.1 2.9-40.7-.3-19.1-4.6-27.8-15.5-27.9-35.3V103h-13.7c.1-15.3.1-29.8.1-45zM826.2 217.6v-43.9h12.7v-70.9h-12.6V58.3h62.1l1.9 25.3 2-4.4c5.1-12.9 14.4-20.7 28.3-22.2 6.7-.7 13.6-.1 20.3.3 1.2.1 3.4 2 3.4 3.1.2 15.8 0 31.6 0 47.5 0 .3-.3.6-.6 1.1-7.6 0-15.5-1-23.1.2-16.2 2.6-23.8 12-24.5 28.5-.2 5.8-.2 11.7-.3 17.5v18.2h18v44.3c-29.1-.1-58.1-.1-87.6-.1z" />
-<path fill="#ED1E25" d="M1237 .3c8.5 1.4 17.1 2.2 25.4 4.3 34.3 8.6 51.7 50.6 33.5 80.3-4.4 7.2-10.5 12.4-17.7 16.5-3.2 1.8-6.4 3.5-10.3 5.5 2 .8 3.4 1.6 4.9 2 23.7 6.9 34.2 24.4 35.9 47.6 2.4 31.9-17.7 55.7-49.6 59.6-9.9 1.2-19.9 1.9-29.9 1.9-31.7.2-63.3.1-95 .1h-5.8v-43.8h18.9V44.4H1128V.2c36.3.1 72.7.1 109 .1zm-32.3 128.8c0 14.9-.1 28.5.1 42.2 0 .9 2 2.7 3 2.7 8.3 0 16.7 0 24.9-.7 6.1-.5 11.7-2.8 15.1-8.4 8-13.2.4-31.6-14.7-34.2-9-1.6-18.4-1.1-28.4-1.6zm.2-40.5c8.7-.5 16.9-.2 24.8-1.6 9.6-1.7 16.2-11 16.3-21.2 0-10.2-5.9-19.7-14.7-21.3-8.5-1.5-17.4-1.4-26.4-2v46.1z M1743 103.3c-7.5-.1-15-.4-22.4-.2-1.1 0-3.2 1.9-3.2 3-.2 18.8-.6 37.7.1 56.5.4 12.3 7.9 17.4 20 15.2 1-.2 2-.2 3.2-.3.2 1.2.5 2.3.5 3.4 0 10.8 0 21.7.1 32.5 0 2.4-.3 4.2-3.1 4.7-16.5 2.7-32.9 5.1-49.6 1.2-18.7-4.4-27.7-14.3-28.1-33.4-.5-25.5-.2-51-.3-76.5V103h-6.4c-8.3-.1-7.3.9-7.4-7.6V58.5h18.4c0-10.1-.1-19.8 0-29.4.1-10.6-1.5-8.2 8.7-10.7 14.2-3.4 28.5-6.5 43.5-10v49.9h26v45z M1569.2 119.2c0-5.4.3-10-.1-14.6-.6-8.5-6.1-14.1-13.8-14.3-7.7-.2-14.1 5.5-15.3 13.7-.3 1.8-.3 3.6-.5 5.8h-53.3c-1.9-20.2 8.6-38.7 28.2-47.2 28.5-12.3 57.2-11.2 85.1 2.2 17.1 8.2 25.9 22.7 26.2 41.7.4 20.3.2 40.7.3 61v6.6h12.8v43.8h-66.2c-.5-5.4-1-11-1.6-17.4-1.5 1.7-2.5 2.7-3.4 3.8-17.3 21.3-50.3 21.2-67.2 11.3-13.4-7.9-19.2-20.5-20.1-35.4-2-32.6 15.1-53.7 48.1-58.7 11.6-1.8 23.5-1.6 35.3-2.3 1.6-.1 3.2 0 5.5 0zm.7 28.2c-5.4 0-9.7-.6-13.9.1-12.9 2.1-19.5 11.1-18.1 24.1 1.2 10.7 10.4 16.1 20.3 11.9 5.3-2.2 8.9-6.3 9.7-11.8 1.2-7.9 1.4-16 2-24.3z M1475.6 149.2h-98.5c0 9.7.1 18.9 5.6 27 4.2 6.2 10.6 7.7 17.6 7 6.8-.7 11.9-4.1 14.6-10.5 1.2-2.7 1.8-5.7 2.8-9h54.4c-2.2 17.5-8.9 32.5-23.3 43.3-17 12.8-36.8 15.8-57.3 14.4-8.4-.5-16.9-2-25-4.5-21.4-6.5-36-20.6-42.8-41.9-8-25-8.2-50.2 1.1-74.9 10.3-27.1 31.1-41 59.2-44.8 13.7-1.8 27.3-.7 40.5 3.4 28.2 8.7 43.2 28.8 47.9 57 2.1 10.8 2.3 21.8 3.2 33.5zm-58.1-30.5c.1-9-.9-17.2-9.5-21.8-7.3-3.9-14.9-4-21.6 1.2-6.6 5.1-7.8 12.5-7.3 20.6h38.4z" />
-</svg>
diff --git a/website/src/images/logos/wapo.svg b/website/src/images/logos/wapo.svg
deleted file mode 100644
index 6c4d9d16c..000000000
--- a/website/src/images/logos/wapo.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="100" height="77" viewBox="0 0 268 206">
-<path fill="#111" d="M118.13 17.98v25.04c3.4-1.83 5.76-5 6.9-8.6l.28.14c-.74 8.93-6.83 17.86-16.3 17.86-9.54 0-16.58-7.1-16.58-18.47 0-8.53 5.55-13.88 12.65-18.68-1-.27-2.1-.4-3.18-.4-4.88 0-7.72 3.3-7.72 6.7h-.4c-.08-.48-.08-.95-.08-1.43 0-6.02 3.2-13.33 10.9-13.33 4.94 0 8.73 4.54 14.48 4.54 2.3 0 5.15-1.08 6.3-3.58h.2c-.07 4.4-1.42 8.93-7.45 10.22zm3.66 25.64c-3.4 3.6-7.05 6.64-11.8 6.64-8.78 0-15.55-6.9-15.55-16.92 0-5.9 2.44-9.07 5.14-12.8h-.28c-1.7 1.7-6.16 5.96-6.16 13.2 0 11.1 6.97 17.94 16.1 17.94 6.37 0 10.7-4.4 12.66-8l-.13-.05zm2.7-32.81c-1.16 2.17-3.6 4.94-8.53 4.94-4.94 0-9.48-3.38-13.47-3.38-3.65 0-6.16 2.44-7.1 4.26l.06.14c1.3-1.3 3.25-2.57 6.43-2.57 5.27 0 8.65 3.24 14 3.24 5.7 0 8.26-3.73 8.73-6.57l-.14-.06zm-18.82 4.67c-3.45 3.6-6.63 7.52-6.63 15.16 0 4.2 1.42 8.53 4.6 11.24l1.96-1.02V22.7l-1.8.96-.35-.68 9.47-5.14c-2.57-.54-4.87-1.7-7.24-2.37zm11.7 2.64c-.47.08-.94.08-1.48.08-.95 0-1.83-.07-2.7-.27v19.77l-8.87 4.8c1.83 1.36 4.06 2.17 6.9 2.17 2.3 0 4.33-.48 6.16-1.3V18.12zm-9.74 3.46l-1.28.75v18.14l1.28-.67v-18.2zm38.9 2.58v23.4c0 6.5-5.74 10.9-12.17 12.2l-.13-.28c3.18-1.56 5-4.74 5-8.12V27.1l-3.65-3.3-1.43 1.55v18.88l2.03 1.83v.14l-5.48 5.95-6.3-5.75v-.27l2.44-2.57v-29.9L134.04 6l.13.07V24.2l5.9-6.36 6.15 5.55 1.15-1.23.54.48-1.35 1.5zm-18.93 22.12v-.13l1.43-1.42V12.5L127.6 14v30.03l-2.16 2.17v.13l5.28 4.74.95-1.08-4.06-3.74zm13.8-20.23l-4.4-3.8-.93 1.02 3.92 3.52v24.37c0 1.62-.35 3.3-.96 4.26l.07.07c1.7-1.22 2.3-3.05 2.3-5.42V26.03zm15.9 11.7v3.46l6.58 5.13 3.8-4.06.53.54-8.8 9.42-8.92-7.18-1.36 1.43-.54-.55 1.56-1.62V27.18l12.8-9.34 6.7 10.83-12.33 9.07zm-5 5.96V26.5l-1.42 1.02v16.85l8.4 6.83 1.08-1.14-8.05-6.37zm5.15-20.85l-.14.07V36.8l6.04-4.4-5.9-9.55zm1.76-1.35l-1.2.88 5.95 9.54 1.22-.88-5.96-9.54zM31.74 115.16l-9.4-8.13-7.18 8.13-9.75-8.4V93.5H3.53c-1.7 0-2.57 1.08-2.98 2.37h-.2C.2 95.33 0 94.37 0 93.3c0-1.76.47-6.36 5.4-6.36V75.3c0-2.24-2.63-3.12-2.63-6.1 0-3.85 3.66-7.64 10.36-10l.27.2c-2.44 1.4-3.72 2.77-3.72 5.68 0 4.46 4.33 3.3 4.33 10.35v2.7l7.93-8.32 8.32 8.2 7.85-8.2 7.58 7.46v25.04l-13.94 12.86zm-24.16-9.48V91.4h-3.8c-1.95 0-2.83 1.3-2.97 2.44l.08.07c.68-.74 1.35-1.14 2.9-1.14h2.38v13.67l8.93 7.7 1-1.2-8.52-7.25zm0-31.06c0-2.78-2.44-3.72-2.44-5.48 0-2.44.95-4.47 2.44-6.36l-.14-.07c-2.03 1.57-3.92 4.07-3.92 6.5 0 2.44 2.64 3.6 2.64 5.76v11.98h1.42V74.62zm14.28 6.77l-5.14-5.02-2.7 2.77v23.96l6.15 5.22 1.7-1.9V81.4zm2.16 24.15v-25.1l-5.75-5.7-1.08 1.1 5.4 5.2v25.24l9.14 7.9 1.08-1-8.8-7.65zM37.15 80.5l-4.26-4.05-2.45 2.5v24.1l6.64 5.8.05-.06V80.5zm2.17-.94l-4.94-4.8-1.02 1.08 4.54 4.33v28.3l1.42-1.37V79.56zM68.07 115.16l-6.43-5.82-5.75 5.8-7.05-6.2-.94 1-.53-.54 1.2-1.28V89.98l12.74-9.2 7.3 5.68 1.3-1.35.54.55-1.5 1.56v18.75l3.6 3.32 1.15-1.22.53.47-6.16 6.64zm-17.32-7.52V89.37l-1.42 1.02v17.92l6.5 5.82 1.08-1.08-6.15-5.42zm10.9-17.8l-5.56-4.33-.2.15v19.7l4.6 4 1.14-1.16V89.84zm2.16 18.4v-19.4l-5.74-4.68-1.36.95 5.7 4.48v19.35l5.67 5.14L69.1 113l-5.3-4.75zM87.77 93.1h6.97v14.47l-10.76 7.58c-1.42-1.62-3.45-2.77-5.95-2.77-2.03 0-3.66.68-5.82 2.5l-.34-.13 9.2-13.4h-5.68v-12.6l11.3-7.84c1.35 1.1 2.37 1.7 4.13 1.7 1.36 0 3.32-.27 4.8-1.48l.28.13-8.12 11.84zM80.2 110.4c-1.98 0-3.53.68-4.9 1.62v.14c1.1-.48 2.18-.6 3.2-.6 1.55 0 4 .74 5.6 2.63l1.57-1.14c-1.3-1.5-2.85-2.64-5.48-2.64zm7.22-11.37h-3.8l-7.9 11.44.06.12c1.97-1.42 3.86-2.37 6.16-2.37 2.03 0 4.13.95 5.42 2.57l.06-.06v-11.7zm-9.87 0v-10.9l-1.43 1.02v11.37h5.48l1.1-1.5h-5.16zM91.7 85.78c-1.43 1.02-2.24 1.56-4.6 1.56-1.37 0-3-.6-4.28-1.96l-.13.07v9.88h2.56l6.5-9.48-.07-.06zm-3.26-.6c-1.22 0-2.77-.6-3.8-1.97l-1.68 1.16c1.28 1.56 2.5 2.24 4.4 2.24 1.2 0 2.97-.34 4.8-1.9l.07-.2c-1.15.47-2.57.67-3.8.67zm1.15 11.64h-4.4l-1.02 1.42h4v13.13l1.4-1.02V96.82zm28.48-9.68v23.4c0 6.5-5.75 10.9-12.18 12.2l-.14-.27c3.18-1.56 5-4.74 5-8.13V90.13l-3.64-3.32-1.42 1.56v18.88l2.03 1.82v.14l-5.48 5.96-6.3-5.76v-.27l2.44-2.57v-29.9l7.16-7.66.14.07V87.2l5.9-6.36 6.14 5.55 1.15-1.22.53.47-1.35 1.5zm-18.95 22.13v-.14l1.42-1.42V75.5L99.13 77v30.03l-2.16 2.17v.14l5.27 4.73.95-1.08-4.07-3.73zm13.8-20.24l-4.4-3.8-.94 1.03 3.92 3.52v24.36c0 1.62-.34 3.32-.95 4.26l.07.07c1.7-1.22 2.3-3.05 2.3-5.4V89.02zm17.47 26.13l-5.82-5.35-1.36 1.43-.54-.54 1.56-1.7V89.44l-2.84-2.7-1.1 1.2-.53-.53 6.1-6.62 5.4 4.87 1.36-1.5.6.48-1.7 1.83v19.76l3.26 3.05 1.48-1.56.54.55-6.42 6.9zm-4-6.64V88.36l-3.52-3.25-.94 1.1 3.04 2.9v20.1l5.42 4.87 1-1.08-5-4.48zm2.03-28.28l-5.55-5.08 5.08-5.4 5.55 5.06-5.07 5.42zm-3.45-6.16l-1 1 4.4 4.14 1-1.15-4.4-4zM159 115.16l-5.88-5.35V90.06l-3.6-3.25-1.88 2.03v18.27l2.1 1.96v.27l-5.55 5.82-6.3-5.75v-.2l2.43-2.57V89.57l-3.18-2.9-1.3 1.4-.53-.53 6.16-6.7 6.16 5.5v1.34l6.43-6.84 6.02 5.42 1.34-1.42.6.54-1.6 1.7v19.34l3.03 2.85 1.5-1.63.53.54-6.5 6.97zm-17.86-5.9v-.13c0 .14 1.36-1.35 1.36-1.35V88.56l-3.86-3.52-.95 1 3.38 3.13v17.86l-2.1 2.17v.13l5.2 4.74 1.1-1.15-4.14-3.65zm14.15-.53V88.9l-4.2-3.8-1.1 1.16 3.87 3.45v19.7l5.07 4.74 1.08-1.15-4.73-4.27zm22.8 18.6c-2.24-2.43-3.8-3.45-5.9-3.45-2.23 0-4.8.9-7.44 2.3l-.2-.2 9.34-10.82-6.63-5.28-1.3 1.35-.53-.47 1.5-1.63V89.98l12.84-9.2 7.18 5.68 1.22-1.35.54.55-1.42 1.56v23.15c0 3.52 1.96 3.4 1.96 6.02 0 3.05-4.4 5.9-11.17 10.97zm-9-18.74V89.43l-1.43 1.02v18.74l6.7 5.4 1.02-1.2-6.3-4.8zm4.73 13.53c-2.23 0-4.26.68-6.16 1.96v.13c1.22-.48 2.57-1.1 4.88-1.1 2.1 0 3.85 1.16 5.6 3.2l1.37-1.1c-1.7-1.95-3.73-3.1-5.7-3.1zm8.26-6.5c-1.7-1.08-2.1-2.23-2.1-5.15v-1.22l-11.25 13 .07.07c2.17-1.3 4.13-2.24 7.3-2.24 2.72 0 4.54 1.08 6.64 2.84 1.3-1.1 2.17-2.17 2.17-3.86 0-1.35-1.15-2.37-2.84-3.45zM180 89.85l-5.63-4.4-.13.13v20.78l4.33 3.38 1.42-1.62V89.85zm4.12 24.83c-1.76-1.35-1.97-2.5-1.97-5.4V88.7l-5.68-4.6-1.3.87 5.56 4.4v20.84c0 2.92.14 3.74 1.96 4.95 1.62 1.08 2.9 2.23 2.9 3.8 0 .4-.06 1.07-.06 1.07l.06.07c.34-.35.75-.9.75-1.77 0-1.48-.75-2.5-2.23-3.65zm16.24.47l-6.23-4.87-1.3 1.43-.53-.54 1.5-1.62v-23.7h-3.93l-.14-.12 2.37-3.52h1.7v-4.53l7.16-7.65.14.13V82.2h5.4l.15.13-2.37 3.52h-3.18v20.5l3.72 2.85 1.42-1.56.54.55-6.42 6.95zm-4.4-38.64l-1.42 1.57v4.13h1.42v-5.7zm0 32.36v-23h-1.42v23.8l5.75 4.48 1-1.08-5.34-4.2zm32.96-21.05v18.96l-11.44 8.46-8.6-6.5-1 1.16-.56-.54 1.22-1.36V89.44l11.98-8.6 7.98 6.23 1.3-1.35.53.54-1.4 1.55v.02zm-18.2 19.5V88.84l-1.43 1.02v18.2l8.1 6.23 1.3-.95-8-6.02zm10.9-17.25l-5.7-4.47-.07.07v20.17l5.76 4.33v-20.1zm2.15-1.22l-6.3-4.87-1.28.95 6.15 4.74v21.05l1.42-1.1V88.84zm31 26.33l-5.9-5.35V90.06l-3.58-3.25-1.9 2.03v18.27l2.1 1.96v.27l-5.56 5.82-6.3-5.75v-.2l2.45-2.57V89.57l-3.2-2.9-1.28 1.4-.54-.53 6.16-6.7 6.15 5.5v1.34l6.42-6.84 6.03 5.42 1.35-1.42.6.54-1.62 1.7v19.34l3.05 2.85 1.5-1.63.53.54-6.5 6.97zm-17.87-5.9v-.13c0 .14 1.35-1.35 1.35-1.35V88.56l-3.85-3.52-.95 1 3.4 3.13v17.86l-2.1 2.17v.13l5.2 4.74 1.08-1.15-4.13-3.65zm14.14-.53V88.9l-4.2-3.8-1.07 1.16 3.85 3.45v19.7l5.08 4.74 1.08-1.15-4.74-4.27zM105.56 182.32c-1.9-2.03-2.57-2.7-4.8-3.32v13.27l-.14.06-2.7-2.44-7.86 7.24-.14-.07v-18.2c-2.36.54-4.87 1.76-7.3 4.2l-.14-.07c.47-5.22 3.04-9.07 7.44-10.56v-13.2H88.5c-1.96 0-3.05 1.02-4 2.85h-.2c-.2-.62-.33-1.57-.33-3.12 0-2.85 2.1-5.82 5.55-5.82h.4v-7.32l-2.77-2.5-1.36 1.35-.48-.48 6.83-7.24 6.36 5.82v6.16l1.5-1.63v-7.78h.74v7.1l9-9.67 7.65 6.97v29.44l-11.84 8.92zm-15.63-5.96c-2.78.54-4.74 2.24-6.02 4.47l.07.14c1.9-1.62 3.66-2.37 5.96-2.9v-1.7zm2.16-19.42h-3.73c-2.57 0-3.6 1.63-3.65 3.52l.06.07c.82-1.35 1.63-1.83 3.25-2.03h2.64v36.88l1.42-1.35v-37.1zm0-12.18l-3.4-3.1-1.07 1.14 3.04 2.7v7.65h1.42v-8.4zm7.9 3.86l-1.48 1.55v39.2l1.5 1.34v-42.08zm8.88-1.56l-4-3.65-4.12 4.48v23.82c3.52.34 5.95 1.35 8.05 3.65l.07-.07v-28.22zm-8.12 29.3v1.77c2.3.54 3.24 1.22 4.87 3.1l1.55-1.2c-1.62-2.04-3.24-3.26-6.42-3.67zm10.28-30.38l-4.6-4.27-1.1 1.16 4.28 3.93v31.46l1.42-1.08v-31.2zm29.98 9v18.95l-11.44 8.46-8.6-6.5-1 1.14-.55-.54 1.22-1.36V156.6l11.97-8.6 8 6.24 1.27-1.36.53.54-1.4 1.56zm-18.2 19.5v-18.5l-1.43 1.03v18.2l8.1 6.24 1.3-.94-8-6.03zm10.9-17.27l-5.7-4.46-.06.07V173l5.75 4.33v-20.1zm2.15-1.2l-6.3-4.88-1.27.95 6.15 4.74v21.05l1.42-1.08V156zm20.84 4.26h7v14.48l-10.77 7.58c-1.43-1.63-3.46-2.78-5.96-2.78-2.03 0-3.65.68-5.82 2.5l-.34-.13 9.22-13.38h-5.7v-12.6l11.3-7.85c1.37 1.1 2.38 1.7 4.14 1.7 1.35 0 3.3-.28 4.8-1.5l.28.14-8.13 11.86zm-7.56 17.32c-1.97 0-3.52.68-4.88 1.63v.15c1.1-.48 2.17-.6 3.18-.6 1.56 0 4 .73 5.62 2.63l1.56-1.15c-1.3-1.5-2.85-2.65-5.48-2.65zm7.24-11.37h-3.8l-7.9 11.45.05.14c1.97-1.44 3.86-2.38 6.16-2.38 2.02 0 4.12.94 5.4 2.57l.07-.08v-11.7zm-9.88 0v-10.88l-1.43 1v11.38h5.48l1.1-1.5h-5.15zm14.14-13.25c-1.42 1.02-2.23 1.56-4.6 1.56-1.36 0-2.98-.6-4.27-1.96l-.14.07v9.9h2.57l6.5-9.48-.06-.07zm-3.25-.6c-1.23 0-2.78-.62-3.8-1.97l-1.7 1.15c1.3 1.55 2.52 2.23 4.4 2.23 1.23 0 3-.34 4.8-1.9l.08-.2c-1.15.48-2.57.68-3.8.68zm1.14 11.63h-4.4l-1 1.42h3.98v13.13l1.42-1v-13.55zm16.38 18.34l-6.23-4.87-1.3 1.42-.54-.54 1.5-1.63v-23.68h-3.93l-.14-.14 2.36-3.52h1.7v-4.53l7.16-7.65.14.14v12.04h5.4l.15.14-2.36 3.52h-3.18v20.5l3.72 2.85 1.42-1.56.54.55-6.42 6.97zm-4.4-38.64l-1.42 1.55v4.13h1.42v-5.68zm0 32.35v-23h-1.42v23.8l5.75 4.47 1.02-1.08-5.35-4.2z"></path>
-</svg>
diff --git a/website/src/plugins/remark-code-blocks.js b/website/src/plugins/remark-code-blocks.js
index 4cbb08b62..501917104 100644
--- a/website/src/plugins/remark-code-blocks.js
+++ b/website/src/plugins/remark-code-blocks.js
@@ -38,7 +38,10 @@ function remarkCodeBlocks(userOptions = {}) {
                     }
                     // Overwrite the code text with the rest of the lines
                     node.value = lines.slice(1).join('\n')
-                } else if (firstLine && /^https:\/\/github.com/.test(firstLine)) {
+                } else if (
+                    (firstLine && /^https:\/\/github.com/.test(firstLine)) ||
+                    firstLine.startsWith('%%GITHUB_')
+                ) {
                     // GitHub URL
                     attrs.github = 'true'
                 }
diff --git a/website/src/styles/landing.module.sass b/website/src/styles/landing.module.sass
index c29c0fffb..134016b0d 100644
--- a/website/src/styles/landing.module.sass
+++ b/website/src/styles/landing.module.sass
@@ -76,7 +76,7 @@
 .banner
     background: var(--color-theme)
     color: var(--color-back)
-    padding: 5rem
+    padding: 1rem 5rem
     margin-bottom: var(--spacing-md)
     background-size: cover
 
@@ -143,21 +143,3 @@
 
 .banner-button-element
     background: var(--color-theme)
-
-.logos
-    text-align: center
-    padding-bottom: 1rem
-
-    & + &
-        padding-bottom: 7.5rem
-
-.logos-content
-    display: flex
-    justify-content: center
-    align-items: center
-    flex-flow: row wrap
-    margin-bottom: 0
-
-.logo
-    display: inline-block
-    padding: 1.8rem
diff --git a/website/src/styles/layout.sass b/website/src/styles/layout.sass
index 3e6a2124b..76361899e 100644
--- a/website/src/styles/layout.sass
+++ b/website/src/styles/layout.sass
@@ -84,7 +84,6 @@
     --syntax-function: hsl(195, 70%, 54%)
     --syntax-keyword: hsl(343, 100%, 68%)
     --syntax-operator: var(--syntax-keyword)
-    --syntax-regex: hsl(45, 90%, 55%)
 
     // Other
     --color-inline-code-text: var(--color-dark)
@@ -345,9 +344,6 @@ body [id]:target
     &.atrule, &.function, &.selector
         color: var(--syntax-function)
 
-    &.regex, &.important
-        color: var(--syntax-regex)
-
     &.keyword
         color: var(--syntax-keyword)
 
@@ -528,11 +524,14 @@ body [id]:target
 .gatsby-resp-image-figure
     margin-bottom: var(--spacing-md)
 
-.gatsby-resp-image-figcaption
+.gatsby-resp-image-figcaption, .caption
     font: var(--font-size-xs)/var(--line-height-md) var(--font-primary)
     color: var(--color-subtle-dark)
     padding-top: 0.75rem
     padding-left: 3rem
 
-    code
+    p
+        font: inherit
+
+    code, a
         color: inherit
diff --git a/website/src/styles/list.module.sass b/website/src/styles/list.module.sass
index f0fb0dd5f..aec20da1c 100644
--- a/website/src/styles/list.module.sass
+++ b/website/src/styles/list.module.sass
@@ -35,3 +35,9 @@
         counter-increment: li
         box-sizing: content-box
         vertical-align: top
+
+.li-icon
+    text-indent: calc(-20px - 0.55em)
+
+    &:before
+        content: ""
diff --git a/website/src/styles/table.module.sass b/website/src/styles/table.module.sass
index b46c1af92..c0dd1a5dc 100644
--- a/website/src/styles/table.module.sass
+++ b/website/src/styles/table.module.sass
@@ -6,6 +6,9 @@
     margin-bottom: var(--spacing-md)
     max-width: 100%
 
+figure > .root
+    margin-bottom: var(--spacing-sm)
+
 .fixed
     table-layout: fixed
 
@@ -54,6 +57,21 @@
     border-bottom: 2px solid var(--color-theme)
     vertical-align: bottom
 
+.th-rotated
+    height: 100px
+
+.tx
+    transform: translate(15px, 0) rotate(315deg) // 45 = 360 - 45
+    width: 30px
+    white-space: nowrap
+
+    & > span
+        padding-left: 0.5rem
+
+    [data-tooltip]
+        display: inline-block
+        transform: rotate(-316deg)
+
 .divider
     height: 0
     border-bottom: 1px solid var(--color-subtle)
diff --git a/website/src/templates/index.js b/website/src/templates/index.js
index 027241d97..7016b3f9a 100644
--- a/website/src/templates/index.js
+++ b/website/src/templates/index.js
@@ -19,7 +19,7 @@ import Footer from '../components/footer'
 import SEO from '../components/seo'
 import Link from '../components/link'
 import Section, { Hr } from '../components/section'
-import { Table, Tr, Th, Td } from '../components/table'
+import { Table, Tr, Th, Tx, Td } from '../components/table'
 import { Pre, Code, InlineCode, TypeAnnotation } from '../components/code'
 import { Ol, Ul, Li } from '../components/list'
 import { H2, H3, H4, H5, P, Abbr, Help } from '../components/typography'
@@ -64,6 +64,7 @@ const scopeComponents = {
     Infobox,
     Table,
     Tr,
+    Tx,
     Th,
     Td,
     Help,
diff --git a/website/src/widgets/landing.js b/website/src/widgets/landing.js
index 0e83c98f8..d545fdb96 100644
--- a/website/src/widgets/landing.js
+++ b/website/src/widgets/landing.js
@@ -12,16 +12,21 @@ import {
     LandingDemo,
     LandingBannerGrid,
     LandingBanner,
-    LandingLogos,
 } from '../components/landing'
 import { H2 } from '../components/typography'
 import { Ul, Li } from '../components/list'
+import { InlineCode } from '../components/code'
 import Button from '../components/button'
 import Link from '../components/link'
 
+import QuickstartTraining from './quickstart-training'
+import Project from './project'
 import courseImage from '../../docs/images/course.jpg'
+import prodigyImage from '../../docs/images/prodigy_overview.jpg'
+import projectsImage from '../../docs/images/projects.png'
+import irlBackground from '../images/spacy-irl.jpg'
 
-import BenchmarksChoi from 'usage/_benchmarks-choi.md'
+import Benchmarks from 'usage/_benchmarks-models.md'
 
 const CODE_EXAMPLE = `# pip install spacy
 # python -m spacy download en_core_web_sm
@@ -82,8 +87,7 @@ const Landing = ({ data }) => {
                 <LandingCard title="Get things done" url="/usage/spacy-101" button="Get started">
                     spaCy is designed to help you do real work — to build real products, or gather
                     real insights. The library respects your time, and tries to avoid wasting it.
-                    It's easy to install, and its API is simple and productive. We like to think of
-                    spaCy as the Ruby on Rails of Natural Language Processing.
+                    It's easy to install, and its API is simple and productive.
                 </LandingCard>
                 <LandingCard
                     title="Blazing fast"
@@ -91,16 +95,14 @@ const Landing = ({ data }) => {
                     button="Facts &amp; Figures"
                 >
                     spaCy excels at large-scale information extraction tasks. It's written from the
-                    ground up in carefully memory-managed Cython. Independent research in 2015 found
-                    spaCy to be the fastest in the world. If your application needs to process
-                    entire web dumps, spaCy is the library you want to be using.
+                    ground up in carefully memory-managed Cython. If your application needs to
+                    process entire web dumps, spaCy is the library you want to be using.
                 </LandingCard>
 
-                <LandingCard title="Deep learning" url="/usage/training" button="Read more">
-                    spaCy is the best way to prepare text for deep learning. It interoperates
-                    seamlessly with TensorFlow, PyTorch, scikit-learn, Gensim and the rest of
-                    Python's awesome AI ecosystem. With spaCy, you can easily construct
-                    linguistically sophisticated statistical models for a variety of NLP problems.
+                <LandingCard title="Awesome ecosystem" url="/usage/projects" button="Read more">
+                    In the five years since its release, spaCy has become an industry standard with
+                    a huge ecosystem. Choose from a variety of plugins, integrate with your machine
+                    learning stack and build custom components and workflows.
                 </LandingCard>
             </LandingGrid>
 
@@ -111,42 +113,154 @@ const Landing = ({ data }) => {
                     <H2>Features</H2>
                     <Ul>
                         <Li>
-                            Non-destructive <strong>tokenization</strong>
+                            ✅ Support for <strong>{counts.langs}+ languages</strong>
                         </Li>
                         <Li>
-                            <strong>Named entity</strong> recognition
-                        </Li>
-                        <Li>
-                            Support for <strong>{counts.langs}+ languages</strong>
-                        </Li>
-                        <Li>
-                            <strong>{counts.models} statistical models</strong> for{' '}
+                            ✅ <strong>{counts.models} trained pipelines</strong> for{' '}
                             {counts.modelLangs} languages
                         </Li>
                         <Li>
-                            Pretrained <strong>word vectors</strong>
+                            ✅ Multi-task learning with pretrained <strong>transformers</strong>{' '}
+                            like BERT
                         </Li>
-                        <Li>State-of-the-art speed</Li>
                         <Li>
-                            Easy <strong>deep learning</strong> integration
+                            ✅ Pretrained <strong>word vectors</strong>
                         </Li>
-                        <Li>Part-of-speech tagging</Li>
-                        <Li>Labelled dependency parsing</Li>
-                        <Li>Syntax-driven sentence segmentation</Li>
+                        <Li>✅ State-of-the-art speed</Li>
                         <Li>
-                            Built in <strong>visualizers</strong> for syntax and NER
+                            ✅ Production-ready <strong>training system</strong>
                         </Li>
-                        <Li>Convenient string-to-hash mapping</Li>
-                        <Li>Export to numpy data arrays</Li>
-                        <Li>Efficient binary serialization</Li>
                         <Li>
-                            Easy <strong>model packaging</strong> and deployment
+                            ✅ Linguistically-motivated <strong>tokenization</strong>
                         </Li>
-                        <Li>Robust, rigorously evaluated accuracy</Li>
+                        <Li>
+                            ✅ Components for <strong>named entity</strong> recognition,
+                            part-of-speech-tagging, dependency parsing, sentence segmentation,{' '}
+                            <strong>text classification</strong>, lemmatization, morphological
+                            analysis, entity linking and more
+                        </Li>
+                        <Li>
+                            ✅ Easily extensible with <strong>custom components</strong> and
+                            attributes
+                        </Li>
+                        <Li>
+                            ✅ Support for custom models in <strong>PyTorch</strong>,{' '}
+                            <strong>TensorFlow</strong> and other frameworks
+                        </Li>
+                        <Li>
+                            ✅ Built in <strong>visualizers</strong> for syntax and NER
+                        </Li>
+                        <Li>
+                            ✅ Easy <strong>model packaging</strong>, deployment and workflow
+                            management
+                        </Li>
+                        <Li>✅ Robust, rigorously evaluated accuracy</Li>
                     </Ul>
                 </LandingCol>
             </LandingGrid>
 
+            <LandingBannerGrid>
+                <LandingBanner
+                    label="New in v3.0"
+                    title="Transformer-based pipelines, new training system, project templates &amp; more"
+                    to="/usage/v3"
+                    button="See what's new"
+                    small
+                >
+                    spaCy v3.0 features all new <strong>transformer-based pipelines</strong> that
+                    bring spaCy's accuracy right up to the current <strong>state-of-the-art</strong>
+                    . You can use any pretrained transformer to train your own pipelines, and even
+                    share one transformer between multiple components with{' '}
+                    <strong>multi-task learning</strong>. Training is now fully configurable and
+                    extensible, and you can define your own custom models using{' '}
+                    <strong>PyTorch</strong>, <strong>TensorFlow</strong> and other frameworks. The
+                    new spaCy projects system lets you describe whole{' '}
+                    <strong>end-to-end workflows</strong> in a single file, giving you an easy path
+                    from prototype to production, and making it easy to clone and adapt
+                    best-practice projects for your own use cases.
+                </LandingBanner>
+
+                <LandingBanner
+                    title="Prodigy: Radically efficient machine teaching"
+                    label="From the makers of spaCy"
+                    to="https://prodi.gy"
+                    button="Try it out"
+                    background="#f6f6f6"
+                    color="#000"
+                    small
+                >
+                    <Link to="https://prodi.gy" hidden>
+                        {/** Update image */}
+                        <img
+                            src={prodigyImage}
+                            alt="Prodigy: Radically efficient machine teaching"
+                        />
+                    </Link>
+                    <br />
+                    <br />
+                    Prodigy is an <strong>annotation tool</strong> so efficient that data scientists
+                    can do the annotation themselves, enabling a new level of rapid iteration.
+                    Whether you're working on entity recognition, intent detection or image
+                    classification, Prodigy can help you <strong>train and evaluate</strong> your
+                    models faster.
+                </LandingBanner>
+            </LandingBannerGrid>
+
+            <LandingGrid cols={2} style={{ gridTemplateColumns: '1fr calc(80ch + 14rem)' }}>
+                <LandingCol>
+                    <H2>Reproducible training for custom pipelines</H2>
+                    <p>
+                        spaCy v3.0 introduces a comprehensive and extensible system for{' '}
+                        <strong>configuring your training runs</strong>. Your configuration file
+                        will describe every detail of your training run, with no hidden defaults,
+                        making it easy to <strong>rerun your experiments</strong> and track changes.
+                        You can use the quickstart widget or the{' '}
+                        <Link to="/api/cli#init-config">
+                            <InlineCode>init config</InlineCode>
+                        </Link>{' '}
+                        command to get started, or clone a project template for an end-to-end
+                        workflow.
+                    </p>
+                    <p>
+                        <Button to="/usage/training">Get started</Button>
+                    </p>
+                </LandingCol>
+                <LandingCol>
+                    <QuickstartTraining />
+                </LandingCol>
+            </LandingGrid>
+
+            <LandingGrid cols={2}>
+                <LandingCol>
+                    <Link to="/usage/projects" hidden>
+                        <img src={projectsImage} />
+                    </Link>
+                    <br />
+                    <br />
+                    <br />
+                    {/** TODO: update with actual example */}
+                    <Project id="some_example">
+                        Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum
+                        sodales lectus.
+                    </Project>
+                </LandingCol>
+                <LandingCol>
+                    <H2>End-to-end workflows from prototype to production</H2>
+                    <p>
+                        spaCy's new project system gives you a smooth path from prototype to
+                        production. It lets you keep track of all those{' '}
+                        <strong>data transformation</strong>, preprocessing and{' '}
+                        <strong>training steps</strong>, so you can make sure your project is always
+                        ready to hand over for automation. It features source asset download,
+                        command execution, checksum verification, and caching with a variety of
+                        backends and integrations.
+                    </p>
+                    <p>
+                        <Button to="/usage/projects">Try it out</Button>
+                    </p>
+                </LandingCol>
+            </LandingGrid>
+
             <LandingBannerGrid>
                 <LandingBanner
                     to="https://course.spacy.io"
@@ -169,68 +283,41 @@ const Landing = ({ data }) => {
                     <strong>55 exercises</strong> featuring videos, slide decks, multiple-choice
                     questions and interactive coding practice in the browser.
                 </LandingBanner>
-
                 <LandingBanner
-                    title="Prodigy: Radically efficient machine teaching"
-                    label="From the makers of spaCy"
-                    to="https://prodi.gy"
-                    button="Try it out"
-                    background="#eee"
-                    color="#000"
+                    title="spaCy IRL: Two days of NLP"
+                    label="Watch the videos"
+                    to="https://www.youtube.com/playlist?list=PLBmcuObd5An4UC6jvK_-eSl6jCvP1gwXc"
+                    button="Watch the videos"
+                    background="#ffc194"
+                    backgroundImage={irlBackground}
+                    color="#1a1e23"
                     small
                 >
-                    Prodigy is an <strong>annotation tool</strong> so efficient that data scientists
-                    can do the annotation themselves, enabling a new level of rapid iteration.
-                    Whether you're working on entity recognition, intent detection or image
-                    classification, Prodigy can help you <strong>train and evaluate</strong> your
-                    models faster. Stream in your own examples or real-world data from live APIs,
-                    update your model in real-time and chain models together to build more complex
-                    systems.
+                    We were pleased to invite the spaCy community and other folks working on NLP to
+                    Berlin for a small and intimate event. We booked a beautiful venue, hand-picked
+                    an awesome lineup of speakers and scheduled plenty of social time to get to know
+                    each other. The YouTube playlist includes 12 talks about NLP research,
+                    development and applications, with keynotes by Sebastian Ruder (DeepMind) and
+                    Yoav Goldberg (Allen AI).
                 </LandingBanner>
             </LandingBannerGrid>
 
-            <LandingLogos title="spaCy is trusted by" logos={data.logosUsers}>
-                <Button to={`https://github.com/${data.repo}/stargazers`}>and many more</Button>
-            </LandingLogos>
-            <LandingLogos title="Featured on" logos={data.logosPublications} />
-
-            <LandingBanner
-                title="BERT-style language model pretraining"
-                label="New in v2.1"
-                to="/usage/v2-1"
-                button="Read more"
-            >
-                Learn more from small training corpora by initializing your models with{' '}
-                <strong>knowledge from raw text</strong>. The new pretrain command teaches spaCy's
-                CNN model to predict words based on their context, producing representations of
-                words in contexts. If you've seen Google's BERT system or fast.ai's ULMFiT, spaCy's
-                pretraining is similar – but much more efficient. It's still experimental, but users
-                are already reporting good results, so give it a try!
-            </LandingBanner>
-
-            <LandingGrid cols={2}>
+            <LandingGrid cols={2} style={{ gridTemplateColumns: '1fr 60%' }}>
                 <LandingCol>
                     <H2>Benchmarks</H2>
                     <p>
-                        In 2015, independent researchers from Emory University and Yahoo! Labs
-                        showed that spaCy offered the{' '}
-                        <strong>fastest syntactic parser in the world</strong> and that its accuracy
-                        was <strong>within 1% of the best</strong> available (
-                        <Link to="https://aclweb.org/anthology/P/P15/P15-1038.pdf">
-                            Choi et al., 2015
-                        </Link>
-                        ). spaCy v2.0, released in 2017, is more accurate than any of the systems
-                        Choi et al. evaluated.
+                        spaCy v3.0 introduces transformer-based pipelines that bring spaCy's
+                        accuracy right up to the current <strong>state-of-the-art</strong>. You can
+                        also use a CPU-optimized pipeline, which is less accurate but much cheaper
+                        to run.
                     </p>
                     <p>
-                        <Button to="/usage/facts-figures#benchmarks" large>
-                            See details
-                        </Button>
+                        <Button to="/usage/facts-figures#benchmarks">See details</Button>
                     </p>
                 </LandingCol>
 
                 <LandingCol>
-                    <BenchmarksChoi />
+                    <Benchmarks />
                 </LandingCol>
             </LandingGrid>
         </>
@@ -245,18 +332,6 @@ Landing.propTypes = {
                 models: PropTypes.arrayOf(PropTypes.string),
             })
         ),
-        logosUsers: PropTypes.arrayOf(
-            PropTypes.shape({
-                id: PropTypes.string.isRequired,
-                url: PropTypes.string.isRequired,
-            })
-        ),
-        logosPublications: PropTypes.arrayOf(
-            PropTypes.shape({
-                id: PropTypes.string.isRequired,
-                url: PropTypes.string.isRequired,
-            })
-        ),
     }),
 }
 
@@ -274,14 +349,6 @@ const landingQuery = graphql`
                     models
                     starters
                 }
-                logosUsers {
-                    id
-                    url
-                }
-                logosPublications {
-                    id
-                    url
-                }
             }
         }
     }
diff --git a/website/src/widgets/project.js b/website/src/widgets/project.js
index 799de7963..0bd74bc90 100644
--- a/website/src/widgets/project.js
+++ b/website/src/widgets/project.js
@@ -6,7 +6,7 @@ import Link from '../components/link'
 import { InlineCode } from '../components/code'
 
 // TODO: move to meta?
-const DEFAULT_REPO = 'https://github.com/explosion/projects'
+const DEFAULT_REPO = 'https://github.com/explosion/projects/tree/v3'
 const COMMAND = 'python -m spacy project clone'
 
 export default function Project({ id, repo, children }) {
diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js
index d6f7960d0..eb98cb1fc 100644
--- a/website/src/widgets/quickstart-install.js
+++ b/website/src/widgets/quickstart-install.js
@@ -58,7 +58,8 @@ const QuickstartInstall = ({ id, title }) => (
     <StaticQuery
         query={query}
         render={({ site }) => {
-            const models = site.siteMetadata.languages.filter(({ models }) => models !== null)
+            const { nightly, languages } = site.siteMetadata
+            const models = languages.filter(({ models }) => models !== null)
             const data = [
                 ...DATA,
                 {
@@ -82,7 +83,10 @@ const QuickstartInstall = ({ id, title }) => (
                     </QS>
                     <QS package="pip">pip install -U spacy</QS>
                     <QS package="conda">conda install -c conda-forge spacy</QS>
-                    <QS package="source">git clone https://github.com/{repo}</QS>
+                    <QS package="source">
+                        git clone https://github.com/{repo}
+                        {nightly ? ` --branch develop` : ''}
+                    </QS>
                     <QS package="source">cd spaCy</QS>
                     <QS package="source" os="linux">
                         export PYTHONPATH=`pwd`
@@ -127,6 +131,7 @@ const query = graphql`
     query QuickstartInstallQuery {
         site {
             siteMetadata {
+                nightly
                 languages {
                     code
                     name
diff --git a/website/src/widgets/quickstart-training.js b/website/src/widgets/quickstart-training.js
index ae8d41b64..4df554d78 100644
--- a/website/src/widgets/quickstart-training.js
+++ b/website/src/widgets/quickstart-training.js
@@ -47,7 +47,7 @@ const DATA = [
     },
 ]
 
-export default function QuickstartTraining({ id, title, download = 'config.cfg' }) {
+export default function QuickstartTraining({ id, title, download = 'base_config.cfg' }) {
     const [lang, setLang] = useState(DEFAULT_LANG)
     const [components, setComponents] = useState([])
     const [[hardware], setHardware] = useState([DEFAULT_HARDWARE])