diff --git a/pyproject.toml b/pyproject.toml
index 7a0e34376..882b31162 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ requires = [
     "cymem>=2.0.2,<2.1.0",
     "preshed>=3.0.2,<3.1.0",
     "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.0.3,<8.1.0",
+    "thinc>=8.0.4,<8.1.0",
     "blis>=0.4.0,<0.8.0",
     "pathy",
     "numpy>=1.15.0",
diff --git a/requirements.txt b/requirements.txt
index 46337389c..9837933ab 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
 # Our libraries
-spacy-legacy>=3.0.5,<3.1.0
+spacy-legacy>=3.0.6,<3.1.0
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.0.3,<8.1.0
+thinc>=8.0.4,<8.1.0
 blis>=0.4.0,<0.8.0
 ml_datasets>=0.2.0,<0.3.0
 murmurhash>=0.28.0,<1.1.0
diff --git a/setup.cfg b/setup.cfg
index 99bae6ac8..37c432205 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -37,14 +37,14 @@ setup_requires =
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
     murmurhash>=0.28.0,<1.1.0
-    thinc>=8.0.3,<8.1.0
+    thinc>=8.0.4,<8.1.0
 install_requires =
     # Our libraries
-    spacy-legacy>=3.0.5,<3.1.0
+    spacy-legacy>=3.0.6,<3.1.0
     murmurhash>=0.28.0,<1.1.0
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
-    thinc>=8.0.3,<8.1.0
+    thinc>=8.0.4,<8.1.0
     blis>=0.4.0,<0.8.0
     wasabi>=0.8.1,<1.1.0
     srsly>=2.4.1,<3.0.0
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 0d422318b..3f8b3e1cc 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -151,14 +151,14 @@ grad_factor = 1.0
 @layers = "reduce_mean.v1"
 
 [components.textcat.model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = true
 ngram_size = 1
 no_output_layer = false
 
 {% else -%}
 [components.textcat.model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = true
 ngram_size = 1
 no_output_layer = false
@@ -182,14 +182,14 @@ grad_factor = 1.0
 @layers = "reduce_mean.v1"
 
 [components.textcat_multilabel.model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = false
 ngram_size = 1
 no_output_layer = false
 
 {% else -%}
 [components.textcat_multilabel.model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = false
 ngram_size = 1
 no_output_layer = false
@@ -316,14 +316,14 @@ nO = null
 width = ${components.tok2vec.model.encode.width}
 
 [components.textcat.model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = true
 ngram_size = 1
 no_output_layer = false
 
 {% else -%}
 [components.textcat.model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = true
 ngram_size = 1
 no_output_layer = false
@@ -344,14 +344,14 @@ nO = null
 width = ${components.tok2vec.model.encode.width}
 
 [components.textcat_multilabel.model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = false
 ngram_size = 1
 no_output_layer = false
 
 {% else -%}
 [components.textcat_multilabel.model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = false
 ngram_size = 1
 no_output_layer = false
diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index a1855c5a0..e3f6e944a 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -1,11 +1,13 @@
+from functools import partial
 from typing import Optional, List
 
 from thinc.types import Floats2d
 from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic
 from thinc.api import chain, concatenate, clone, Dropout, ParametricAttention
 from thinc.api import SparseLinear, Softmax, softmax_activation, Maxout, reduce_sum
-from thinc.api import with_cpu, Relu, residual, LayerNorm
+from thinc.api import with_cpu, Relu, residual, LayerNorm, resizable
 from thinc.layers.chain import init as init_chain
+from thinc.layers.resizable import resize_model, resize_linear_weighted
 
 from ...attrs import ORTH
 from ...util import registry
@@ -15,7 +17,10 @@ from ...tokens import Doc
 from .tok2vec import get_tok2vec_width
 
 
-@registry.architectures("spacy.TextCatCNN.v1")
+NEG_VALUE = -5000
+
+
+@registry.architectures("spacy.TextCatCNN.v2")
 def build_simple_cnn_text_classifier(
     tok2vec: Model, exclusive_classes: bool, nO: Optional[int] = None
 ) -> Model[List[Doc], Floats2d]:
@@ -25,38 +30,75 @@ def build_simple_cnn_text_classifier(
     outputs sum to 1. If exclusive_classes=False, a logistic non-linearity
     is applied instead, so that outputs are in the range [0, 1].
     """
+    fill_defaults = {"b": 0, "W": 0}
     with Model.define_operators({">>": chain}):
         cnn = tok2vec >> list2ragged() >> reduce_mean()
+        nI = tok2vec.maybe_get_dim("nO")
         if exclusive_classes:
-            output_layer = Softmax(nO=nO, nI=tok2vec.maybe_get_dim("nO"))
-            model = cnn >> output_layer
-            model.set_ref("output_layer", output_layer)
+            output_layer = Softmax(nO=nO, nI=nI)
+            fill_defaults["b"] = NEG_VALUE
+            resizable_layer = resizable(
+                output_layer,
+                resize_layer=partial(
+                    resize_linear_weighted, fill_defaults=fill_defaults
+                ),
+            )
+            model = cnn >> resizable_layer
         else:
-            linear_layer = Linear(nO=nO, nI=tok2vec.maybe_get_dim("nO"))
-            model = cnn >> linear_layer >> Logistic()
-            model.set_ref("output_layer", linear_layer)
+            output_layer = Linear(nO=nO, nI=nI)
+            resizable_layer = resizable(
+                output_layer,
+                resize_layer=partial(
+                    resize_linear_weighted, fill_defaults=fill_defaults
+                ),
+            )
+            model = cnn >> resizable_layer >> Logistic()
+        model.set_ref("output_layer", output_layer)
+        model.attrs["resize_output"] = partial(
+            resize_and_set_ref,
+            resizable_layer=resizable_layer,
+        )
     model.set_ref("tok2vec", tok2vec)
     model.set_dim("nO", nO)
     model.attrs["multi_label"] = not exclusive_classes
     return model
 
 
-@registry.architectures("spacy.TextCatBOW.v1")
+def resize_and_set_ref(model, new_nO, resizable_layer):
+    resizable_layer = resize_model(resizable_layer, new_nO)
+    model.set_ref("output_layer", resizable_layer.layers[0])
+    model.set_dim("nO", new_nO, force=True)
+    return model
+
+
+@registry.architectures("spacy.TextCatBOW.v2")
 def build_bow_text_classifier(
     exclusive_classes: bool,
     ngram_size: int,
     no_output_layer: bool,
     nO: Optional[int] = None,
 ) -> Model[List[Doc], Floats2d]:
+    fill_defaults = {"b": 0, "W": 0}
     with Model.define_operators({">>": chain}):
-        sparse_linear = SparseLinear(nO)
-        model = extract_ngrams(ngram_size, attr=ORTH) >> sparse_linear
-        model = with_cpu(model, model.ops)
+        sparse_linear = SparseLinear(nO=nO)
+        output_layer = None
         if not no_output_layer:
+            fill_defaults["b"] = NEG_VALUE
             output_layer = softmax_activation() if exclusive_classes else Logistic()
+        resizable_layer = resizable(
+            sparse_linear,
+            resize_layer=partial(resize_linear_weighted, fill_defaults=fill_defaults),
+        )
+        model = extract_ngrams(ngram_size, attr=ORTH) >> resizable_layer
+        model = with_cpu(model, model.ops)
+        if output_layer:
             model = model >> with_cpu(output_layer, output_layer.ops)
+    model.set_dim("nO", nO)
     model.set_ref("output_layer", sparse_linear)
     model.attrs["multi_label"] = not exclusive_classes
+    model.attrs["resize_output"] = partial(
+        resize_and_set_ref, resizable_layer=resizable_layer
+    )
     return model
 
 
@@ -69,9 +111,7 @@ def build_text_classifier_v2(
     exclusive_classes = not linear_model.attrs["multi_label"]
     with Model.define_operators({">>": chain, "|": concatenate}):
         width = tok2vec.maybe_get_dim("nO")
-        attention_layer = ParametricAttention(
-            width
-        )  # TODO: benchmark performance difference of this layer
+        attention_layer = ParametricAttention(width)
         maxout_layer = Maxout(nO=width, nI=width)
         norm_layer = LayerNorm(nI=width)
         cnn_model = (
diff --git a/spacy/ml/tb_framework.py b/spacy/ml/tb_framework.py
index 4ab5830cd..e7e5561af 100644
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@@ -15,7 +15,7 @@ def TransitionModel(
     return Model(
         name="parser_model",
         forward=forward,
-        dims={"nI": tok2vec.get_dim("nI") if tok2vec.has_dim("nI") else None},
+        dims={"nI": tok2vec.maybe_get_dim("nI")},
         layers=[tok2vec, lower, upper],
         refs={"tok2vec": tok2vec, "lower": lower, "upper": upper},
         init=init,
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 1d652a483..0d3bbdf35 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -35,7 +35,7 @@ maxout_pieces = 3
 depth = 2
 
 [model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = true
 ngram_size = 1
 no_output_layer = false
@@ -44,7 +44,7 @@ DEFAULT_SINGLE_TEXTCAT_MODEL = Config().from_str(single_label_default_config)["m
 
 single_label_bow_config = """
 [model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = true
 ngram_size = 1
 no_output_layer = false
@@ -52,7 +52,7 @@ no_output_layer = false
 
 single_label_cnn_config = """
 [model]
-@architectures = "spacy.TextCatCNN.v1"
+@architectures = "spacy.TextCatCNN.v2"
 exclusive_classes = true
 
 [model.tok2vec]
@@ -298,6 +298,8 @@ class TextCategorizer(TrainablePipe):
             return 0
         self._allow_extra_label()
         self.cfg["labels"].append(label)
+        if self.model and "resize_output" in self.model.attrs:
+            self.model = self.model.attrs["resize_output"](self.model, len(self.cfg["labels"]))
         self.vocab.strings.add(label)
         return 1
 
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index 7267735b4..ba36881af 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -35,7 +35,7 @@ maxout_pieces = 3
 depth = 2
 
 [model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = false
 ngram_size = 1
 no_output_layer = false
@@ -44,7 +44,7 @@ DEFAULT_MULTI_TEXTCAT_MODEL = Config().from_str(multi_label_default_config)["mod
 
 multi_label_bow_config = """
 [model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = false
 ngram_size = 1
 no_output_layer = false
@@ -52,7 +52,7 @@ no_output_layer = false
 
 multi_label_cnn_config = """
 [model]
-@architectures = "spacy.TextCatCNN.v1"
+@architectures = "spacy.TextCatCNN.v2"
 exclusive_classes = false
 
 [model.tok2vec]
diff --git a/spacy/pipeline/trainable_pipe.pyx b/spacy/pipeline/trainable_pipe.pyx
index fe51f38e5..926e92e91 100644
--- a/spacy/pipeline/trainable_pipe.pyx
+++ b/spacy/pipeline/trainable_pipe.pyx
@@ -213,7 +213,12 @@ cdef class TrainablePipe(Pipe):
 
     def _allow_extra_label(self) -> None:
         """Raise an error if the component can not add any more labels."""
-        if self.model.has_dim("nO") and self.model.get_dim("nO") == len(self.labels):
+        nO = None
+        if self.model.has_dim("nO"):
+            nO = self.model.get_dim("nO")
+        elif self.model.has_ref("output_layer") and self.model.get_ref("output_layer").has_dim("nO"):
+            nO = self.model.get_ref("output_layer").get_dim("nO")
+        if nO is not None and nO == len(self.labels):
             if not self.is_resizable:
                 raise ValueError(Errors.E922.format(name=self.name, nO=self.model.get_dim("nO")))
 
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index b99e9a863..5a5ca140c 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -160,7 +160,7 @@ def test_pipe_class_component_model():
             "@architectures": "spacy.TextCatEnsemble.v2",
             "tok2vec": DEFAULT_TOK2VEC_MODEL,
             "linear_model": {
-                "@architectures": "spacy.TextCatBOW.v1",
+                "@architectures": "spacy.TextCatBOW.v2",
                 "exclusive_classes": False,
                 "ngram_size": 1,
                 "no_output_layer": False,
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 43dfff147..6f1d22eba 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -131,19 +131,129 @@ def test_implicit_label(name, get_examples):
     nlp.initialize(get_examples=get_examples(nlp))
 
 
-@pytest.mark.parametrize("name", ["textcat", "textcat_multilabel"])
-def test_no_resize(name):
+#fmt: off
+@pytest.mark.parametrize(
+    "name,textcat_config",
+    [
+        # BOW
+        ("textcat", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}),
+        ("textcat", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
+        # ENSEMBLE
+        ("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}}),
+        ("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}}),
+        # CNN
+        ("textcat", {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
+    ],
+)
+#fmt: on
+def test_no_resize(name, textcat_config):
+    """The old textcat architectures weren't resizable"""
     nlp = Language()
-    textcat = nlp.add_pipe(name)
+    pipe_config = {"model": textcat_config}
+    textcat = nlp.add_pipe(name, config=pipe_config)
     textcat.add_label("POSITIVE")
     textcat.add_label("NEGATIVE")
     nlp.initialize()
-    assert textcat.model.get_dim("nO") >= 2
+    assert textcat.model.maybe_get_dim("nO") in [2, None]
     # this throws an error because the textcat can't be resized after initialization
     with pytest.raises(ValueError):
         textcat.add_label("NEUTRAL")
 
 
+#fmt: off
+@pytest.mark.parametrize(
+    "name,textcat_config",
+    [
+        # BOW
+        ("textcat", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}),
+        ("textcat", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
+        # CNN
+        ("textcat", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
+    ],
+)
+#fmt: on
+def test_resize(name, textcat_config):
+    """The new textcat architectures are resizable"""
+    nlp = Language()
+    pipe_config = {"model": textcat_config}
+    textcat = nlp.add_pipe(name, config=pipe_config)
+    textcat.add_label("POSITIVE")
+    textcat.add_label("NEGATIVE")
+    assert textcat.model.maybe_get_dim("nO") in [2, None]
+    nlp.initialize()
+    assert textcat.model.maybe_get_dim("nO") in [2, None]
+    textcat.add_label("NEUTRAL")
+    assert textcat.model.maybe_get_dim("nO") in [3, None]
+
+
+#fmt: off
+@pytest.mark.parametrize(
+    "name,textcat_config",
+    [
+        # BOW
+        ("textcat", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}),
+        ("textcat", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
+        # CNN
+        ("textcat", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
+    ],
+)
+#fmt: on
+def test_resize_same_results(name, textcat_config):
+    # Ensure that the resized textcat classifiers still produce the same results for old labels
+    fix_random_seed(0)
+    nlp = English()
+    pipe_config = {"model": textcat_config}
+    textcat = nlp.add_pipe(name, config=pipe_config)
+
+    train_examples = []
+    for text, annotations in TRAIN_DATA_SINGLE_LABEL:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+    optimizer = nlp.initialize(get_examples=lambda: train_examples)
+    assert textcat.model.maybe_get_dim("nO") in [2, None]
+
+    for i in range(5):
+        losses = {}
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
+
+    # test the trained model before resizing
+    test_text = "I am happy."
+    doc = nlp(test_text)
+    assert len(doc.cats) == 2
+    pos_pred = doc.cats["POSITIVE"]
+    neg_pred = doc.cats["NEGATIVE"]
+
+    # test the trained model again after resizing
+    textcat.add_label("NEUTRAL")
+    doc = nlp(test_text)
+    assert len(doc.cats) == 3
+    assert doc.cats["POSITIVE"] == pos_pred
+    assert doc.cats["NEGATIVE"] == neg_pred
+    assert doc.cats["NEUTRAL"] <= 1
+
+    for i in range(5):
+        losses = {}
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
+
+    # test the trained model again after training further with new label
+    doc = nlp(test_text)
+    assert len(doc.cats) == 3
+    assert doc.cats["POSITIVE"] != pos_pred
+    assert doc.cats["NEGATIVE"] != neg_pred
+    for cat in doc.cats:
+        assert doc.cats[cat] <= 1
+
+
 def test_error_with_multi_labels():
     nlp = Language()
     nlp.add_pipe("textcat")
@@ -286,14 +396,14 @@ def test_overfitting_IO_multi():
 @pytest.mark.parametrize(
     "name,train_data,textcat_config",
     [
-        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
-        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
-        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "ngram_size": 3, "no_output_layer": True}),
-        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "ngram_size": 2, "no_output_layer": True}),
-        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
-        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
-        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
-        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 3, "no_output_layer": True}),
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 2, "no_output_layer": True}),
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
     ],
 )
 # fmt: on
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index b38a50f71..b851641d9 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -297,7 +297,7 @@ def test_util_dot_section():
     factory = "textcat"
 
     [components.textcat.model]
-    @architectures = "spacy.TextCatBOW.v1"
+    @architectures = "spacy.TextCatBOW.v2"
     exclusive_classes = true
     ngram_size = 1
     no_output_layer = false
diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md
index e09352ec9..4923ce18f 100644
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@@ -611,7 +611,7 @@ single-label use-cases where `exclusive_classes = true`, while the
 > nO = null
 >
 > [model.linear_model]
-> @architectures = "spacy.TextCatBOW.v1"
+> @architectures = "spacy.TextCatBOW.v2"
 > exclusive_classes = true
 > ngram_size = 1
 > no_output_layer = false
@@ -666,13 +666,13 @@ taking it as argument:
 
 </Accordion>
 
-### spacy.TextCatCNN.v1 {#TextCatCNN}
+### spacy.TextCatCNN.v2 {#TextCatCNN}
 
 > #### Example Config
 >
 > ```ini
 > [model]
-> @architectures = "spacy.TextCatCNN.v1"
+> @architectures = "spacy.TextCatCNN.v2"
 > exclusive_classes = false
 > nO = null
 >
@@ -698,13 +698,20 @@ architecture is usually less accurate than the ensemble, but runs faster.
 | `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
 | **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
 
-### spacy.TextCatBOW.v1 {#TextCatBOW}
+<Accordion title="spacy.TextCatCNN.v1 definition" spaced>
+
+[TextCatCNN.v1](/api/legacy#TextCatCNN_v1) had the exact same signature, but was not yet resizable. 
+Since v2, new labels can be added to this component, even after training.
+
+</Accordion>
+
+### spacy.TextCatBOW.v2 {#TextCatBOW}
 
 > #### Example Config
 >
 > ```ini
 > [model]
-> @architectures = "spacy.TextCatBOW.v1"
+> @architectures = "spacy.TextCatBOW.v2"
 > exclusive_classes = false
 > ngram_size = 1
 > no_output_layer = false
@@ -722,6 +729,13 @@ the others, but may not be as accurate, especially if texts are short.
 | `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
 | **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
 
+<Accordion title="spacy.TextCatBOW.v1 definition" spaced>
+
+[TextCatBOW.v1](/api/legacy#TextCatBOW_v1) had the exact same signature, but was not yet resizable. 
+Since v2, new labels can be added to this component, even after training.
+
+</Accordion>
+
 ## Entity linking architectures {#entitylinker source="spacy/ml/models/entity_linker.py"}
 
 An [`EntityLinker`](/api/entitylinker) component disambiguates textual mentions
diff --git a/website/docs/api/data-formats.md b/website/docs/api/data-formats.md
index 2b1c3480c..4ca5fb24d 100644
--- a/website/docs/api/data-formats.md
+++ b/website/docs/api/data-formats.md
@@ -93,7 +93,7 @@ Defines the `nlp` object, its tokenizer and
 > labels = ["POSITIVE", "NEGATIVE"]
 >
 > [components.textcat.model]
-> @architectures = "spacy.TextCatBOW.v1"
+> @architectures = "spacy.TextCatBOW.v2"
 > exclusive_classes = true
 > ngram_size = 1
 > no_output_layer = false
diff --git a/website/docs/api/legacy.md b/website/docs/api/legacy.md
index 96bc199bf..563d5aea8 100644
--- a/website/docs/api/legacy.md
+++ b/website/docs/api/legacy.md
@@ -176,6 +176,68 @@ added to an existing vectors table. See more details in
 
 </Infobox>
 
+### spacy.TextCatCNN.v1 {#TextCatCNN_v1}
+
+Since `spacy.TextCatCNN.v2`, this architecture has become resizable, which means that you can add 
+labels to a previously trained textcat. `TextCatCNN` v1 did not yet support that.
+
+> #### Example Config
+>
+> ```ini
+> [model]
+> @architectures = "spacy.TextCatCNN.v1"
+> exclusive_classes = false
+> nO = null
+>
+> [model.tok2vec]
+> @architectures = "spacy.HashEmbedCNN.v1"
+> pretrained_vectors = null
+> width = 96
+> depth = 4
+> embed_size = 2000
+> window_size = 1
+> maxout_pieces = 3
+> subword_features = true
+> ```
+
+A neural network model where token vectors are calculated using a CNN. The
+vectors are mean pooled and used as features in a feed-forward network. This
+architecture is usually less accurate than the ensemble, but runs faster.
+
+| Name                | Description                                                                                                                                                                                    |
+| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~                                                                                                                                     |
+| `tok2vec`           | The [`tok2vec`](#tok2vec) layer of the model. ~~Model~~                                                                                                                                        |
+| `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
+| **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
+
+### spacy.TextCatBOW.v1 {#TextCatBOW_v1}
+
+Since `spacy.TextCatBOW.v2`, this architecture has become resizable, which means that you can add 
+labels to a previously trained textcat. `TextCatBOW` v1 did not yet support that.
+
+> #### Example Config
+>
+> ```ini
+> [model]
+> @architectures = "spacy.TextCatBOW.v1"
+> exclusive_classes = false
+> ngram_size = 1
+> no_output_layer = false
+> nO = null
+> ```
+
+An n-gram "bag-of-words" model. This architecture should run much faster than
+the others, but may not be as accurate, especially if texts are short.
+
+| Name                | Description                                                                                                                                                                                    |
+| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~                                                                                                                                     |
+| `ngram_size`        | Determines the maximum length of the n-grams in the BOW model. For instance, `ngram_size=3` would give unigram, trigram and bigram features. ~~int~~                                           |
+| `no_output_layer`   | Whether or not to add an output layer to the model (`Softmax` activation if `exclusive_classes` is `True`, else `Logistic`). ~~bool~~                                                          |
+| `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
+| **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
+
 ## Loggers {#loggers}
 
 These functions are available from `@spacy.registry.loggers`.
diff --git a/website/docs/usage/layers-architectures.md b/website/docs/usage/layers-architectures.md
index 8fe2cf489..17043d599 100644
--- a/website/docs/usage/layers-architectures.md
+++ b/website/docs/usage/layers-architectures.md
@@ -151,7 +151,7 @@ maxout_pieces = 3
 depth = 2
 
 [components.textcat.model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = true
 ngram_size = 1
 no_output_layer = false
@@ -169,7 +169,7 @@ factory = "textcat"
 labels = []
 
 [components.textcat.model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = true
 ngram_size = 1
 no_output_layer = false
diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md
index bde3ab84f..87feee54a 100644
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@@ -1324,7 +1324,7 @@ labels = []
 # This function is created and then passed to the "textcat" component as
 # the argument "model"
 [components.textcat.model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = true
 ngram_size = 1
 no_output_layer = false