diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py
index 7bbb30d8e..aaf31ed56 100644
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@@ -12,6 +12,7 @@ from spacy.vocab import Vocab
 from ...pipeline import DependencyParser
 from ...pipeline.dep_parser import DEFAULT_PARSER_MODEL
 from ..util import apply_transition_sequence, make_tempdir
+from ...pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
 
 TRAIN_DATA = [
     (
@@ -395,6 +396,34 @@ def test_overfitting_IO(pipe_name):
     assert_equal(batch_deps_1, no_batch_deps)
 
 
+# fmt: off
+@pytest.mark.slow
+@pytest.mark.parametrize("pipe_name", ["parser", "beam_parser"])
+@pytest.mark.parametrize(
+    "parser_config",
+    [
+        # TransitionBasedParser V1
+        ({"@architectures": "spacy.TransitionBasedParser.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "state_type": "parser", "extra_state_tokens": False, "hidden_width": 64, "maxout_pieces": 2, "use_upper": True}),
+        # TransitionBasedParser V2
+        ({"@architectures": "spacy.TransitionBasedParser.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "state_type": "parser", "extra_state_tokens": False, "hidden_width": 64, "maxout_pieces": 2, "use_upper": True}),
+    ],
+)
+# fmt: on
+def test_parser_configs(pipe_name, parser_config):
+    pipe_config = {"model": parser_config}
+    nlp = English()
+    parser = nlp.add_pipe(pipe_name, config=pipe_config)
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+        for dep in annotations.get("deps", []):
+            parser.add_label(dep)
+    optimizer = nlp.initialize()
+    for i in range(5):
+        losses = {}
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
+
+
 def test_beam_parser_scores():
     # Test that we can get confidence values out of the beam_parser pipe
     beam_width = 16
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 798dd165e..0bb036a33 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -382,6 +382,7 @@ def test_implicit_label(name, get_examples):
 
 
 # fmt: off
+@pytest.mark.slow
 @pytest.mark.parametrize(
     "name,textcat_config",
     [
@@ -390,7 +391,10 @@ def test_implicit_label(name, get_examples):
         ("textcat", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
         ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
         ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
-        # ENSEMBLE
+        # ENSEMBLE V1
+        ("textcat", {"@architectures": "spacy.TextCatEnsemble.v1", "exclusive_classes": False, "pretrained_vectors": None, "width": 64, "embed_size": 2000, "conv_depth": 2, "window_size": 1, "ngram_size": 1, "dropout": None}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v1", "exclusive_classes": False, "pretrained_vectors": None, "width": 64, "embed_size": 2000, "conv_depth": 2, "window_size": 1, "ngram_size": 1, "dropout": None}),
+        # ENSEMBLE V2
         ("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}}),
         ("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}}),
         ("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}}),
@@ -643,15 +647,28 @@ def test_overfitting_IO_multi():
 
 
 # fmt: off
+@pytest.mark.slow
 @pytest.mark.parametrize(
     "name,train_data,textcat_config",
     [
+        # BOW V1
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
+        # ENSEMBLE V1
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v1", "exclusive_classes": False, "pretrained_vectors": None, "width": 64, "embed_size": 2000, "conv_depth": 2, "window_size": 1, "ngram_size": 1, "dropout": None}),
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v1", "exclusive_classes": False, "pretrained_vectors": None, "width": 64, "embed_size": 2000, "conv_depth": 2, "window_size": 1, "ngram_size": 1, "dropout": None}),
+        # CNN V1
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
+        # BOW V2
         ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
         ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
         ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 3, "no_output_layer": True}),
         ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 2, "no_output_layer": True}),
+        # ENSEMBLE V2
         ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
         ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
+        # CNN V2
         ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
         ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
     ],
diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py
index 37104c78a..64faf133d 100644
--- a/spacy/tests/pipeline/test_tok2vec.py
+++ b/spacy/tests/pipeline/test_tok2vec.py
@@ -1,13 +1,13 @@
 import pytest
 from spacy.ml.models.tok2vec import build_Tok2Vec_model
-from spacy.ml.models.tok2vec import MultiHashEmbed, CharacterEmbed
-from spacy.ml.models.tok2vec import MishWindowEncoder, MaxoutWindowEncoder
+from spacy.ml.models.tok2vec import MultiHashEmbed, MaxoutWindowEncoder
 from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener
 from spacy.vocab import Vocab
 from spacy.tokens import Doc
 from spacy.training import Example
 from spacy import util
 from spacy.lang.en import English
+from spacy.util import registry
 from thinc.api import Config, get_current_ops
 from numpy.testing import assert_array_equal
 
@@ -55,24 +55,41 @@ def test_tok2vec_batch_sizes(batch_size, width, embed_size):
         assert doc_vec.shape == (len(doc), width)
 
 
+@pytest.mark.slow
+@pytest.mark.parametrize("width", [8])
 @pytest.mark.parametrize(
-    "width,embed_arch,embed_config,encode_arch,encode_config",
+    "embed_arch,embed_config",
     # fmt: off
     [
-        (8, MultiHashEmbed, {"rows": [100, 100], "attrs": ["SHAPE", "LOWER"], "include_static_vectors": False}, MaxoutWindowEncoder, {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
-        (8, MultiHashEmbed, {"rows": [100, 20], "attrs": ["ORTH", "PREFIX"], "include_static_vectors": False}, MishWindowEncoder, {"window_size": 1, "depth": 6}),
-        (8, CharacterEmbed, {"rows": 100, "nM": 64, "nC": 8, "include_static_vectors": False}, MaxoutWindowEncoder, {"window_size": 1, "maxout_pieces": 3, "depth": 3}),
-        (8, CharacterEmbed, {"rows": 100, "nM": 16, "nC": 2, "include_static_vectors": False}, MishWindowEncoder, {"window_size": 1, "depth": 3}),
+        ("spacy.MultiHashEmbed.v1", {"rows": [100, 100], "attrs": ["SHAPE", "LOWER"], "include_static_vectors": False}),
+        ("spacy.MultiHashEmbed.v1", {"rows": [100, 20], "attrs": ["ORTH", "PREFIX"], "include_static_vectors": False}),
+        ("spacy.CharacterEmbed.v1", {"rows": 100, "nM": 64, "nC": 8, "include_static_vectors": False}),
+        ("spacy.CharacterEmbed.v1", {"rows": 100, "nM": 16, "nC": 2, "include_static_vectors": False}),
     ],
     # fmt: on
 )
-def test_tok2vec_configs(width, embed_arch, embed_config, encode_arch, encode_config):
+@pytest.mark.parametrize(
+    "tok2vec_arch,encode_arch,encode_config",
+    # fmt: off
+    [
+        ("spacy.Tok2Vec.v1", "spacy.MaxoutWindowEncoder.v1", {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
+        ("spacy.Tok2Vec.v2", "spacy.MaxoutWindowEncoder.v2", {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
+        ("spacy.Tok2Vec.v1", "spacy.MishWindowEncoder.v1", {"window_size": 1, "depth": 6}),
+        ("spacy.Tok2Vec.v2", "spacy.MishWindowEncoder.v2", {"window_size": 1, "depth": 6}),
+    ],
+    # fmt: on
+)
+def test_tok2vec_configs(
+    width, tok2vec_arch, embed_arch, embed_config, encode_arch, encode_config
+):
+    embed = registry.get("architectures", embed_arch)
+    encode = registry.get("architectures", encode_arch)
+    tok2vec_model = registry.get("architectures", tok2vec_arch)
+
     embed_config["width"] = width
     encode_config["width"] = width
     docs = get_batch(3)
-    tok2vec = build_Tok2Vec_model(
-        embed_arch(**embed_config), encode_arch(**encode_config)
-    )
+    tok2vec = tok2vec_model(embed(**embed_config), encode(**encode_config))
     tok2vec.initialize(docs)
     vectors, backprop = tok2vec.begin_update(docs)
     assert len(vectors) == len(docs)
diff --git a/website/docs/api/legacy.md b/website/docs/api/legacy.md
index e24c37d77..31d178b67 100644
--- a/website/docs/api/legacy.md
+++ b/website/docs/api/legacy.md
@@ -103,11 +103,22 @@ and residual connections.
 | `depth`       | The number of convolutional layers. Recommended value is `4`. ~~int~~                                                                                                                                          |
 | **CREATES**   | The model using the architecture. ~~Model[Floats2d, Floats2d]~~                                                                                                                                                |
 
-### spacy.TransitionBasedParser.v1 {#TransitionBasedParser_v1}
+### spacy.HashEmbedCNN.v1 {#HashEmbedCNN_v1}
 
-Identical to
-[`spacy.TransitionBasedParser.v2`](/api/architectures#TransitionBasedParser)
-except the `use_upper` was set to `True` by default.
+Identical to [`spacy.HashEmbedCNN.v2`](/api/architectures#HashEmbedCNN) except
+using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are included.
+
+### spacy.MultiHashEmbed.v1 {#MultiHashEmbed_v1}
+
+Identical to [`spacy.MultiHashEmbed.v2`](/api/architectures#MultiHashEmbed)
+except with [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
+included.
+
+### spacy.CharacterEmbed.v1 {#CharacterEmbed_v1}
+
+Identical to [`spacy.CharacterEmbed.v2`](/api/architectures#CharacterEmbed)
+except using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
+included.
 
 ### spacy.TextCatEnsemble.v1 {#TextCatEnsemble_v1}
 
@@ -147,41 +158,6 @@ network has an internal CNN Tok2Vec layer and uses attention.
 | `nO`                 | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
 | **CREATES**          | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
 
-### spacy.HashEmbedCNN.v1 {#HashEmbedCNN_v1}
-
-Identical to [`spacy.HashEmbedCNN.v2`](/api/architectures#HashEmbedCNN) except
-using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are included.
-
-### spacy.MultiHashEmbed.v1 {#MultiHashEmbed_v1}
-
-Identical to [`spacy.MultiHashEmbed.v2`](/api/architectures#MultiHashEmbed)
-except with [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
-included.
-
-### spacy.CharacterEmbed.v1 {#CharacterEmbed_v1}
-
-Identical to [`spacy.CharacterEmbed.v2`](/api/architectures#CharacterEmbed)
-except using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
-included.
-
-## Layers {#layers}
-
-These functions are available from `@spacy.registry.layers`.
-
-### spacy.StaticVectors.v1 {#StaticVectors_v1}
-
-Identical to [`spacy.StaticVectors.v2`](/api/architectures#StaticVectors) except
-for the handling of tokens without vectors.
-
-<Infobox title="Bugs for tokens without vectors" variant="warning">
-
-`spacy.StaticVectors.v1` maps tokens without vectors to the final row in the
-vectors table, which causes the model predictions to change if new vectors are
-added to an existing vectors table. See more details in
-[issue #7662](https://github.com/explosion/spaCy/issues/7662#issuecomment-813925655).
-
-</Infobox>
-
 ### spacy.TextCatCNN.v1 {#TextCatCNN_v1}
 
 Since `spacy.TextCatCNN.v2`, this architecture has become resizable, which means
@@ -246,8 +222,35 @@ the others, but may not be as accurate, especially if texts are short.
 | `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
 | **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
 
+### spacy.TransitionBasedParser.v1 {#TransitionBasedParser_v1}
+
+Identical to
+[`spacy.TransitionBasedParser.v2`](/api/architectures#TransitionBasedParser)
+except the `use_upper` was set to `True` by default.
+
+## Layers {#layers}
+
+These functions are available from `@spacy.registry.layers`.
+
+### spacy.StaticVectors.v1 {#StaticVectors_v1}
+
+Identical to [`spacy.StaticVectors.v2`](/api/architectures#StaticVectors) except
+for the handling of tokens without vectors.
+
+<Infobox title="Bugs for tokens without vectors" variant="warning">
+
+`spacy.StaticVectors.v1` maps tokens without vectors to the final row in the
+vectors table, which causes the model predictions to change if new vectors are
+added to an existing vectors table. See more details in
+[issue #7662](https://github.com/explosion/spaCy/issues/7662#issuecomment-813925655).
+
+</Infobox>
+
 ## Loggers {#loggers}
 
-Logging utilities for spaCy are implemented in the [`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the functions are typically available from `@spacy.registry.loggers`.
+Logging utilities for spaCy are implemented in the
+[`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the
+functions are typically available from `@spacy.registry.loggers`.
 
-More documentation can be found in that repo's [readme](https://github.com/explosion/spacy-loggers/blob/main/README.md) file.
+More documentation can be found in that repo's
+[readme](https://github.com/explosion/spacy-loggers/blob/main/README.md) file.