diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 1937ea935..2817147f3 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -271,8 +271,9 @@ grad_factor = 1.0
 @layers = "reduce_mean.v1"
 
 [components.textcat.model.linear_model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = true
+length = 262144
 ngram_size = 1
 no_output_layer = false
 
@@ -308,8 +309,9 @@ grad_factor = 1.0
 @layers = "reduce_mean.v1"
 
 [components.textcat_multilabel.model.linear_model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = false
+length = 262144
 ngram_size = 1
 no_output_layer = false
 
@@ -542,14 +544,15 @@ nO = null
 width = ${components.tok2vec.model.encode.width}
 
 [components.textcat.model.linear_model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = true
+length = 262144
 ngram_size = 1
 no_output_layer = false
 
 {% else -%}
 [components.textcat.model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = true
 ngram_size = 1
 no_output_layer = false
@@ -570,15 +573,17 @@ nO = null
 width = ${components.tok2vec.model.encode.width}
 
 [components.textcat_multilabel.model.linear_model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = false
+length = 262144
 ngram_size = 1
 no_output_layer = false
 
 {% else -%}
 [components.textcat_multilabel.model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = false
+length = 262144
 ngram_size = 1
 no_output_layer = false
 {%- endif %}
diff --git a/spacy/errors.py b/spacy/errors.py
index 8b290da6d..093c65f3d 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -983,6 +983,7 @@ class Errors(metaclass=ErrorsWithCodes):
              "predicted docs when training {component}.")
     E1055 = ("The 'replace_listener' callback expects {num_params} parameters, "
              "but only callbacks with one or three parameters are supported")
+    E1056 = ("The `TextCatBOW` architecture expects a length of at least 1, was {length}.")
 
 
 # Deprecated model shortcuts, only used in errors and warnings
diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index ab14110d2..e6d1f030f 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -1,5 +1,5 @@
 from functools import partial
-from typing import List, Optional, cast
+from typing import List, Optional, Tuple, cast
 
 from thinc.api import (
     Dropout,
@@ -12,6 +12,7 @@ from thinc.api import (
     Relu,
     Softmax,
     SparseLinear,
+    SparseLinear_v2,
     chain,
     clone,
     concatenate,
@@ -25,9 +26,10 @@ from thinc.api import (
 )
 from thinc.layers.chain import init as init_chain
 from thinc.layers.resizable import resize_linear_weighted, resize_model
-from thinc.types import Floats2d
+from thinc.types import ArrayXd, Floats2d
 
 from ...attrs import ORTH
+from ...errors import Errors
 from ...tokens import Doc
 from ...util import registry
 from ..extract_ngrams import extract_ngrams
@@ -95,10 +97,48 @@ def build_bow_text_classifier(
     ngram_size: int,
     no_output_layer: bool,
     nO: Optional[int] = None,
+) -> Model[List[Doc], Floats2d]:
+    return _build_bow_text_classifier(
+        exclusive_classes=exclusive_classes,
+        ngram_size=ngram_size,
+        no_output_layer=no_output_layer,
+        nO=nO,
+        sparse_linear=SparseLinear(nO=nO),
+    )
+
+
+@registry.architectures("spacy.TextCatBOW.v3")
+def build_bow_text_classifier_v3(
+    exclusive_classes: bool,
+    ngram_size: int,
+    no_output_layer: bool,
+    length: int = 262144,
+    nO: Optional[int] = None,
+) -> Model[List[Doc], Floats2d]:
+    if length < 1:
+        raise ValueError(Errors.E1056.format(length=length))
+
+    # Find k such that 2**(k-1) < length <= 2**k.
+    length = 2 ** (length - 1).bit_length()
+
+    return _build_bow_text_classifier(
+        exclusive_classes=exclusive_classes,
+        ngram_size=ngram_size,
+        no_output_layer=no_output_layer,
+        nO=nO,
+        sparse_linear=SparseLinear_v2(nO=nO, length=length),
+    )
+
+
+def _build_bow_text_classifier(
+    exclusive_classes: bool,
+    ngram_size: int,
+    no_output_layer: bool,
+    sparse_linear: Model[Tuple[ArrayXd, ArrayXd, ArrayXd], ArrayXd],
+    nO: Optional[int] = None,
 ) -> Model[List[Doc], Floats2d]:
     fill_defaults = {"b": 0, "W": 0}
     with Model.define_operators({">>": chain}):
-        sparse_linear = SparseLinear(nO=nO)
         output_layer = None
         if not no_output_layer:
             fill_defaults["b"] = NEG_VALUE
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 610ed99b6..43a335c4a 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -36,8 +36,9 @@ maxout_pieces = 3
 depth = 2
 
 [model.linear_model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = true
+length = 262144
 ngram_size = 1
 no_output_layer = false
 """
@@ -45,8 +46,9 @@ DEFAULT_SINGLE_TEXTCAT_MODEL = Config().from_str(single_label_default_config)["m
 
 single_label_bow_config = """
 [model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = true
+length = 262144
 ngram_size = 1
 no_output_layer = false
 """
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index 364e6f436..c917cc610 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -35,8 +35,9 @@ maxout_pieces = 3
 depth = 2
 
 [model.linear_model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = false
+length = 262144
 ngram_size = 1
 no_output_layer = false
 """
@@ -44,7 +45,7 @@ DEFAULT_MULTI_TEXTCAT_MODEL = Config().from_str(multi_label_default_config)["mod
 
 multi_label_bow_config = """
 [model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = false
 ngram_size = 1
 no_output_layer = false
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index 83b986784..c45dccb06 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -203,7 +203,7 @@ def test_pipe_class_component_model():
             "@architectures": "spacy.TextCatEnsemble.v2",
             "tok2vec": DEFAULT_TOK2VEC_MODEL,
             "linear_model": {
-                "@architectures": "spacy.TextCatBOW.v2",
+                "@architectures": "spacy.TextCatBOW.v3",
                 "exclusive_classes": False,
                 "ngram_size": 1,
                 "no_output_layer": False,
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 9ce5909f1..147ea4900 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -414,7 +414,7 @@ def test_implicit_label(name, get_examples):
 @pytest.mark.parametrize(
     "name,textcat_config",
     [
-        # BOW
+        # BOW V1
         ("textcat", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}),
         ("textcat", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
         ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
@@ -451,11 +451,11 @@ def test_no_resize(name, textcat_config):
 @pytest.mark.parametrize(
     "name,textcat_config",
     [
-        # BOW
-        ("textcat", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}),
-        ("textcat", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
-        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
-        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
+        # BOW V3
+        ("textcat", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}),
+        ("textcat", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
         # CNN
         ("textcat", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
         ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
@@ -480,11 +480,11 @@ def test_resize(name, textcat_config):
 @pytest.mark.parametrize(
     "name,textcat_config",
     [
-        # BOW
-        ("textcat", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}),
-        ("textcat", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
-        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
-        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
+        # BOW v3
+        ("textcat", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}),
+        ("textcat", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
         # CNN
         ("textcat", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
         ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
@@ -693,9 +693,14 @@ def test_overfitting_IO_multi():
         ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
         ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 3, "no_output_layer": True}),
         ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 2, "no_output_layer": True}),
+        # BOW V3
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "ngram_size": 3, "no_output_layer": True}),
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "ngram_size": 2, "no_output_layer": True}),
         # ENSEMBLE V2
-        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
-        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
         # CNN V2
         ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
         ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 2d1dd053a..1789d60ea 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -238,7 +238,7 @@ def test_project_push_pull(project_dir):
 
 def test_find_function_valid():
     # example of architecture in main code base
-    function = "spacy.TextCatBOW.v2"
+    function = "spacy.TextCatBOW.v3"
     result = CliRunner().invoke(app, ["find-function", function, "-r", "architectures"])
     assert f"Found registered function '{function}'" in result.stdout
     assert "textcat.py" in result.stdout
@@ -257,7 +257,7 @@ def test_find_function_valid():
 
 def test_find_function_invalid():
     # invalid registry
-    function = "spacy.TextCatBOW.v2"
+    function = "spacy.TextCatBOW.v3"
     registry = "foobar"
     result = CliRunner().invoke(
         app, ["find-function", function, "--registry", registry]
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index 704a40485..b1b4faa88 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -376,8 +376,9 @@ def test_util_dot_section():
     factory = "textcat"
 
     [components.textcat.model]
-    @architectures = "spacy.TextCatBOW.v2"
+    @architectures = "spacy.TextCatBOW.v3"
     exclusive_classes = true
+    length = 262144
     ngram_size = 1
     no_output_layer = false
     """
diff --git a/website/docs/api/architectures.mdx b/website/docs/api/architectures.mdx
index 0ec915bd3..9d8b3ddfa 100644
--- a/website/docs/api/architectures.mdx
+++ b/website/docs/api/architectures.mdx
@@ -78,16 +78,16 @@ subword features, and a
 [MaxoutWindowEncoder](/api/architectures#MaxoutWindowEncoder) encoding layer
 consisting of a CNN and a layer-normalized maxout activation function.
 
-| Name                 | Description                                                                                                                                                                                                                                                                   |
-| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `width`              | The width of the input and output. These are required to be the same, so that residual connections can be used. Recommended values are `96`, `128` or `300`. ~~int~~                                                                                                          |
-| `depth`              | The number of convolutional layers to use. Recommended values are between `2` and `8`. ~~int~~                                                                                                                                                                                |
-| `embed_size`         | The number of rows in the hash embedding tables. This can be surprisingly small, due to the use of the hash embeddings. Recommended values are between `2000` and `10000`. ~~int~~                                                                                            |
+| Name                 | Description                                                                                                                                                                                                                                                                 |
+| -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `width`              | The width of the input and output. These are required to be the same, so that residual connections can be used. Recommended values are `96`, `128` or `300`. ~~int~~                                                                                                        |
+| `depth`              | The number of convolutional layers to use. Recommended values are between `2` and `8`. ~~int~~                                                                                                                                                                              |
+| `embed_size`         | The number of rows in the hash embedding tables. This can be surprisingly small, due to the use of the hash embeddings. Recommended values are between `2000` and `10000`. ~~int~~                                                                                          |
 | `window_size`        | The number of tokens on either side to concatenate during the convolutions. The receptive field of the CNN will be `depth * window_size * 2 + 1`, so a 4-layer network with a window size of `2` will be sensitive to 17 words at a time. Recommended value is `1`. ~~int~~ |
-| `maxout_pieces`      | The number of pieces to use in the maxout non-linearity. If `1`, the [`Mish`](https://thinc.ai/docs/api-layers#mish) non-linearity is used instead. Recommended values are `1`-`3`. ~~int~~                                                                                   |
-| `subword_features`   | Whether to also embed subword features, specifically the prefix, suffix and word shape. This is recommended for alphabetic languages like English, but not if single-character tokens are used for a language such as Chinese. ~~bool~~                                       |
-| `pretrained_vectors` | Whether to also use static vectors. ~~bool~~                                                                                                                                                                                                                                  |
-| **CREATES**          | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                                                        |
+| `maxout_pieces`      | The number of pieces to use in the maxout non-linearity. If `1`, the [`Mish`](https://thinc.ai/docs/api-layers#mish) non-linearity is used instead. Recommended values are `1`-`3`. ~~int~~                                                                                 |
+| `subword_features`   | Whether to also embed subword features, specifically the prefix, suffix and word shape. This is recommended for alphabetic languages like English, but not if single-character tokens are used for a language such as Chinese. ~~bool~~                                     |
+| `pretrained_vectors` | Whether to also use static vectors. ~~bool~~                                                                                                                                                                                                                                |
+| **CREATES**          | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                                                      |
 
 ### spacy.Tok2VecListener.v1 {id="Tok2VecListener"}
 
@@ -962,8 +962,9 @@ single-label use-cases where `exclusive_classes = true`, while the
 > nO = null
 >
 > [model.linear_model]
-> @architectures = "spacy.TextCatBOW.v2"
+> @architectures = "spacy.TextCatBOW.v3"
 > exclusive_classes = true
+> length = 262144
 > ngram_size = 1
 > no_output_layer = false
 >
@@ -1057,14 +1058,15 @@ after training.
 
 </Accordion>
 
-### spacy.TextCatBOW.v2 {id="TextCatBOW"}
+### spacy.TextCatBOW.v3 {id="TextCatBOW"}
 
 > #### Example Config
 >
 > ```ini
 > [model]
-> @architectures = "spacy.TextCatBOW.v2"
+> @architectures = "spacy.TextCatBOW.v3"
 > exclusive_classes = false
+> length = 262144
 > ngram_size = 1
 > no_output_layer = false
 > nO = null
@@ -1078,14 +1080,19 @@ the others, but may not be as accurate, especially if texts are short.
 | `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~                                                                                                                                     |
 | `ngram_size`        | Determines the maximum length of the n-grams in the BOW model. For instance, `ngram_size=3` would give unigram, trigram and bigram features. ~~int~~                                           |
 | `no_output_layer`   | Whether or not to add an output layer to the model (`Softmax` activation if `exclusive_classes` is `True`, else `Logistic`). ~~bool~~                                                          |
+| `length`            | The size of the weights vector. The length will be rounded up to the next power of two if it is not a power of two. Defaults to `262144`. ~~int~~                                              |
 | `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
 | **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
 
-<Accordion title="spacy.TextCatBOW.v1 definition" spaced>
+<Accordion title="Previous versions of spacy.TextCatBOW" spaced>
 
-[TextCatBOW.v1](/api/legacy#TextCatBOW_v1) had the exact same signature, but was
-not yet resizable. Since v2, new labels can be added to this component, even
-after training.
+- [TextCatBOW.v1](/api/legacy#TextCatBOW_v1) was not yet resizable. Since v2,
+  new labels can be added to this component, even after training.
+- [TextCatBOW.v1](/api/legacy#TextCatBOW_v1) and
+  [TextCatBOW.v2](/api/legacy#TextCatBOW_v2) used an erroneous sparse linear
+  layer that only used a small number of the allocated parameters.
+- [TextCatBOW.v1](/api/legacy#TextCatBOW_v1) and
+  [TextCatBOW.v2](/api/legacy#TextCatBOW_v2) did not have the `length` argument.
 
 </Accordion>
 
diff --git a/website/docs/api/legacy.mdx b/website/docs/api/legacy.mdx
index ea6d3a899..32111ce92 100644
--- a/website/docs/api/legacy.mdx
+++ b/website/docs/api/legacy.mdx
@@ -198,7 +198,9 @@ architecture is usually less accurate than the ensemble, but runs faster.
 
 Since `spacy.TextCatBOW.v2`, this architecture has become resizable, which means
 that you can add labels to a previously trained textcat. `TextCatBOW` v1 did not
-yet support that.
+yet support that. Versions of this model before `spacy.TextCatBOW.v3` used an
+erroneous sparse linear layer that only used a small number of the allocated
+parameters.
 
 > #### Example Config
 >
@@ -222,6 +224,33 @@ the others, but may not be as accurate, especially if texts are short.
 | `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
 | **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
 
+### spacy.TextCatBOW.v2 {id="TextCatBOW"}
+
+Versions of this model before `spacy.TextCatBOW.v3` used an erroneous sparse
+linear layer that only used a small number of the allocated parameters.
+
+> #### Example Config
+>
+> ```ini
+> [model]
+> @architectures = "spacy.TextCatBOW.v2"
+> exclusive_classes = false
+> ngram_size = 1
+> no_output_layer = false
+> nO = null
+> ```
+
+An n-gram "bag-of-words" model. This architecture should run much faster than
+the others, but may not be as accurate, especially if texts are short.
+
+| Name                | Description                                                                                                                                                                                    |
+| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~                                                                                                                                     |
+| `ngram_size`        | Determines the maximum length of the n-grams in the BOW model. For instance, `ngram_size=3` would give unigram, trigram and bigram features. ~~int~~                                           |
+| `no_output_layer`   | Whether or not to add an output layer to the model (`Softmax` activation if `exclusive_classes` is `True`, else `Logistic`). ~~bool~~                                                          |
+| `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
+| **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
+
 ### spacy.TransitionBasedParser.v1 {id="TransitionBasedParser_v1"}
 
 Identical to
diff --git a/website/docs/usage/layers-architectures.mdx b/website/docs/usage/layers-architectures.mdx
index 8f6bf3a20..03b85f5af 100644
--- a/website/docs/usage/layers-architectures.mdx
+++ b/website/docs/usage/layers-architectures.mdx
@@ -153,8 +153,9 @@ maxout_pieces = 3
 depth = 2
 
 [components.textcat.model.linear_model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = true
+length = 262144
 ngram_size = 1
 no_output_layer = false
 ```
@@ -170,8 +171,9 @@ factory = "textcat"
 labels = []
 
 [components.textcat.model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = true
+length = 262144
 ngram_size = 1
 no_output_layer = false
 nO = null
diff --git a/website/docs/usage/processing-pipelines.mdx b/website/docs/usage/processing-pipelines.mdx
index 6ec8a0513..3e58b251d 100644
--- a/website/docs/usage/processing-pipelines.mdx
+++ b/website/docs/usage/processing-pipelines.mdx
@@ -1328,8 +1328,9 @@ labels = []
 # This function is created and then passed to the "textcat" component as
 # the argument "model"
 [components.textcat.model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = true
+length = 262144
 ngram_size = 1
 no_output_layer = false