diff --git a/pyproject.toml b/pyproject.toml
index 7a0e34376..882b31162 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ requires = [
"cymem>=2.0.2,<2.1.0",
"preshed>=3.0.2,<3.1.0",
"murmurhash>=0.28.0,<1.1.0",
- "thinc>=8.0.3,<8.1.0",
+ "thinc>=8.0.4,<8.1.0",
"blis>=0.4.0,<0.8.0",
"pathy",
"numpy>=1.15.0",
diff --git a/requirements.txt b/requirements.txt
index 46337389c..9837933ab 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
# Our libraries
-spacy-legacy>=3.0.5,<3.1.0
+spacy-legacy>=3.0.6,<3.1.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
-thinc>=8.0.3,<8.1.0
+thinc>=8.0.4,<8.1.0
blis>=0.4.0,<0.8.0
ml_datasets>=0.2.0,<0.3.0
murmurhash>=0.28.0,<1.1.0
diff --git a/setup.cfg b/setup.cfg
index 99bae6ac8..37c432205 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -37,14 +37,14 @@ setup_requires =
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
murmurhash>=0.28.0,<1.1.0
- thinc>=8.0.3,<8.1.0
+ thinc>=8.0.4,<8.1.0
install_requires =
# Our libraries
- spacy-legacy>=3.0.5,<3.1.0
+ spacy-legacy>=3.0.6,<3.1.0
murmurhash>=0.28.0,<1.1.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
- thinc>=8.0.3,<8.1.0
+ thinc>=8.0.4,<8.1.0
blis>=0.4.0,<0.8.0
wasabi>=0.8.1,<1.1.0
srsly>=2.4.1,<3.0.0
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 0d422318b..3f8b3e1cc 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -151,14 +151,14 @@ grad_factor = 1.0
@layers = "reduce_mean.v1"
[components.textcat.model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = true
ngram_size = 1
no_output_layer = false
{% else -%}
[components.textcat.model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = true
ngram_size = 1
no_output_layer = false
@@ -182,14 +182,14 @@ grad_factor = 1.0
@layers = "reduce_mean.v1"
[components.textcat_multilabel.model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = false
ngram_size = 1
no_output_layer = false
{% else -%}
[components.textcat_multilabel.model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = false
ngram_size = 1
no_output_layer = false
@@ -316,14 +316,14 @@ nO = null
width = ${components.tok2vec.model.encode.width}
[components.textcat.model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = true
ngram_size = 1
no_output_layer = false
{% else -%}
[components.textcat.model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = true
ngram_size = 1
no_output_layer = false
@@ -344,14 +344,14 @@ nO = null
width = ${components.tok2vec.model.encode.width}
[components.textcat_multilabel.model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = false
ngram_size = 1
no_output_layer = false
{% else -%}
[components.textcat_multilabel.model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = false
ngram_size = 1
no_output_layer = false
diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index a1855c5a0..e3f6e944a 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -1,11 +1,13 @@
+from functools import partial
from typing import Optional, List
from thinc.types import Floats2d
from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic
from thinc.api import chain, concatenate, clone, Dropout, ParametricAttention
from thinc.api import SparseLinear, Softmax, softmax_activation, Maxout, reduce_sum
-from thinc.api import with_cpu, Relu, residual, LayerNorm
+from thinc.api import with_cpu, Relu, residual, LayerNorm, resizable
from thinc.layers.chain import init as init_chain
+from thinc.layers.resizable import resize_model, resize_linear_weighted
from ...attrs import ORTH
from ...util import registry
@@ -15,7 +17,10 @@ from ...tokens import Doc
from .tok2vec import get_tok2vec_width
-@registry.architectures("spacy.TextCatCNN.v1")
+NEG_VALUE = -5000
+
+
+@registry.architectures("spacy.TextCatCNN.v2")
def build_simple_cnn_text_classifier(
tok2vec: Model, exclusive_classes: bool, nO: Optional[int] = None
) -> Model[List[Doc], Floats2d]:
@@ -25,38 +30,75 @@ def build_simple_cnn_text_classifier(
outputs sum to 1. If exclusive_classes=False, a logistic non-linearity
is applied instead, so that outputs are in the range [0, 1].
"""
+ fill_defaults = {"b": 0, "W": 0}
with Model.define_operators({">>": chain}):
cnn = tok2vec >> list2ragged() >> reduce_mean()
+ nI = tok2vec.maybe_get_dim("nO")
if exclusive_classes:
- output_layer = Softmax(nO=nO, nI=tok2vec.maybe_get_dim("nO"))
- model = cnn >> output_layer
- model.set_ref("output_layer", output_layer)
+ output_layer = Softmax(nO=nO, nI=nI)
+ fill_defaults["b"] = NEG_VALUE
+ resizable_layer = resizable(
+ output_layer,
+ resize_layer=partial(
+ resize_linear_weighted, fill_defaults=fill_defaults
+ ),
+ )
+ model = cnn >> resizable_layer
else:
- linear_layer = Linear(nO=nO, nI=tok2vec.maybe_get_dim("nO"))
- model = cnn >> linear_layer >> Logistic()
- model.set_ref("output_layer", linear_layer)
+ output_layer = Linear(nO=nO, nI=nI)
+ resizable_layer = resizable(
+ output_layer,
+ resize_layer=partial(
+ resize_linear_weighted, fill_defaults=fill_defaults
+ ),
+ )
+ model = cnn >> resizable_layer >> Logistic()
+ model.set_ref("output_layer", output_layer)
+ model.attrs["resize_output"] = partial(
+ resize_and_set_ref,
+ resizable_layer=resizable_layer,
+ )
model.set_ref("tok2vec", tok2vec)
model.set_dim("nO", nO)
model.attrs["multi_label"] = not exclusive_classes
return model
-@registry.architectures("spacy.TextCatBOW.v1")
+def resize_and_set_ref(model, new_nO, resizable_layer):
+ resizable_layer = resize_model(resizable_layer, new_nO)
+ model.set_ref("output_layer", resizable_layer.layers[0])
+ model.set_dim("nO", new_nO, force=True)
+ return model
+
+
+@registry.architectures("spacy.TextCatBOW.v2")
def build_bow_text_classifier(
exclusive_classes: bool,
ngram_size: int,
no_output_layer: bool,
nO: Optional[int] = None,
) -> Model[List[Doc], Floats2d]:
+ fill_defaults = {"b": 0, "W": 0}
with Model.define_operators({">>": chain}):
- sparse_linear = SparseLinear(nO)
- model = extract_ngrams(ngram_size, attr=ORTH) >> sparse_linear
- model = with_cpu(model, model.ops)
+ sparse_linear = SparseLinear(nO=nO)
+ output_layer = None
if not no_output_layer:
+ fill_defaults["b"] = NEG_VALUE
output_layer = softmax_activation() if exclusive_classes else Logistic()
+ resizable_layer = resizable(
+ sparse_linear,
+ resize_layer=partial(resize_linear_weighted, fill_defaults=fill_defaults),
+ )
+ model = extract_ngrams(ngram_size, attr=ORTH) >> resizable_layer
+ model = with_cpu(model, model.ops)
+ if output_layer:
model = model >> with_cpu(output_layer, output_layer.ops)
+ model.set_dim("nO", nO)
model.set_ref("output_layer", sparse_linear)
model.attrs["multi_label"] = not exclusive_classes
+ model.attrs["resize_output"] = partial(
+ resize_and_set_ref, resizable_layer=resizable_layer
+ )
return model
@@ -69,9 +111,7 @@ def build_text_classifier_v2(
exclusive_classes = not linear_model.attrs["multi_label"]
with Model.define_operators({">>": chain, "|": concatenate}):
width = tok2vec.maybe_get_dim("nO")
- attention_layer = ParametricAttention(
- width
- ) # TODO: benchmark performance difference of this layer
+ attention_layer = ParametricAttention(width)
maxout_layer = Maxout(nO=width, nI=width)
norm_layer = LayerNorm(nI=width)
cnn_model = (
diff --git a/spacy/ml/tb_framework.py b/spacy/ml/tb_framework.py
index 4ab5830cd..e7e5561af 100644
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@@ -15,7 +15,7 @@ def TransitionModel(
return Model(
name="parser_model",
forward=forward,
- dims={"nI": tok2vec.get_dim("nI") if tok2vec.has_dim("nI") else None},
+ dims={"nI": tok2vec.maybe_get_dim("nI")},
layers=[tok2vec, lower, upper],
refs={"tok2vec": tok2vec, "lower": lower, "upper": upper},
init=init,
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 1d652a483..0d3bbdf35 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -35,7 +35,7 @@ maxout_pieces = 3
depth = 2
[model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = true
ngram_size = 1
no_output_layer = false
@@ -44,7 +44,7 @@ DEFAULT_SINGLE_TEXTCAT_MODEL = Config().from_str(single_label_default_config)["m
single_label_bow_config = """
[model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = true
ngram_size = 1
no_output_layer = false
@@ -52,7 +52,7 @@ no_output_layer = false
single_label_cnn_config = """
[model]
-@architectures = "spacy.TextCatCNN.v1"
+@architectures = "spacy.TextCatCNN.v2"
exclusive_classes = true
[model.tok2vec]
@@ -298,6 +298,8 @@ class TextCategorizer(TrainablePipe):
return 0
self._allow_extra_label()
self.cfg["labels"].append(label)
+ if self.model and "resize_output" in self.model.attrs:
+ self.model = self.model.attrs["resize_output"](self.model, len(self.cfg["labels"]))
self.vocab.strings.add(label)
return 1
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index 7267735b4..ba36881af 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -35,7 +35,7 @@ maxout_pieces = 3
depth = 2
[model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = false
ngram_size = 1
no_output_layer = false
@@ -44,7 +44,7 @@ DEFAULT_MULTI_TEXTCAT_MODEL = Config().from_str(multi_label_default_config)["mod
multi_label_bow_config = """
[model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = false
ngram_size = 1
no_output_layer = false
@@ -52,7 +52,7 @@ no_output_layer = false
multi_label_cnn_config = """
[model]
-@architectures = "spacy.TextCatCNN.v1"
+@architectures = "spacy.TextCatCNN.v2"
exclusive_classes = false
[model.tok2vec]
diff --git a/spacy/pipeline/trainable_pipe.pyx b/spacy/pipeline/trainable_pipe.pyx
index fe51f38e5..926e92e91 100644
--- a/spacy/pipeline/trainable_pipe.pyx
+++ b/spacy/pipeline/trainable_pipe.pyx
@@ -213,7 +213,12 @@ cdef class TrainablePipe(Pipe):
def _allow_extra_label(self) -> None:
"""Raise an error if the component can not add any more labels."""
- if self.model.has_dim("nO") and self.model.get_dim("nO") == len(self.labels):
+ nO = None
+ if self.model.has_dim("nO"):
+ nO = self.model.get_dim("nO")
+ elif self.model.has_ref("output_layer") and self.model.get_ref("output_layer").has_dim("nO"):
+ nO = self.model.get_ref("output_layer").get_dim("nO")
+ if nO is not None and nO == len(self.labels):
if not self.is_resizable:
raise ValueError(Errors.E922.format(name=self.name, nO=self.model.get_dim("nO")))
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index b99e9a863..5a5ca140c 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -160,7 +160,7 @@ def test_pipe_class_component_model():
"@architectures": "spacy.TextCatEnsemble.v2",
"tok2vec": DEFAULT_TOK2VEC_MODEL,
"linear_model": {
- "@architectures": "spacy.TextCatBOW.v1",
+ "@architectures": "spacy.TextCatBOW.v2",
"exclusive_classes": False,
"ngram_size": 1,
"no_output_layer": False,
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 43dfff147..6f1d22eba 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -131,19 +131,129 @@ def test_implicit_label(name, get_examples):
nlp.initialize(get_examples=get_examples(nlp))
-@pytest.mark.parametrize("name", ["textcat", "textcat_multilabel"])
-def test_no_resize(name):
+#fmt: off
+@pytest.mark.parametrize(
+ "name,textcat_config",
+ [
+ # BOW
+ ("textcat", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}),
+ ("textcat", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
+ ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
+ ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
+ # ENSEMBLE
+ ("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}}),
+ ("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}}),
+ ("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}}),
+ ("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}}),
+ # CNN
+ ("textcat", {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
+ ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
+ ],
+)
+#fmt: on
+def test_no_resize(name, textcat_config):
+ """The old textcat architectures weren't resizable"""
nlp = Language()
- textcat = nlp.add_pipe(name)
+ pipe_config = {"model": textcat_config}
+ textcat = nlp.add_pipe(name, config=pipe_config)
textcat.add_label("POSITIVE")
textcat.add_label("NEGATIVE")
nlp.initialize()
- assert textcat.model.get_dim("nO") >= 2
+ assert textcat.model.maybe_get_dim("nO") in [2, None]
# this throws an error because the textcat can't be resized after initialization
with pytest.raises(ValueError):
textcat.add_label("NEUTRAL")
+#fmt: off
+@pytest.mark.parametrize(
+ "name,textcat_config",
+ [
+ # BOW
+ ("textcat", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}),
+ ("textcat", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
+ ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
+ ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
+ # CNN
+ ("textcat", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
+ ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
+ ],
+)
+#fmt: on
+def test_resize(name, textcat_config):
+ """The new textcat architectures are resizable"""
+ nlp = Language()
+ pipe_config = {"model": textcat_config}
+ textcat = nlp.add_pipe(name, config=pipe_config)
+ textcat.add_label("POSITIVE")
+ textcat.add_label("NEGATIVE")
+ assert textcat.model.maybe_get_dim("nO") in [2, None]
+ nlp.initialize()
+ assert textcat.model.maybe_get_dim("nO") in [2, None]
+ textcat.add_label("NEUTRAL")
+ assert textcat.model.maybe_get_dim("nO") in [3, None]
+
+
+#fmt: off
+@pytest.mark.parametrize(
+ "name,textcat_config",
+ [
+ # BOW
+ ("textcat", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}),
+ ("textcat", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
+ ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
+ ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
+ # CNN
+ ("textcat", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
+ ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
+ ],
+)
+#fmt: on
+def test_resize_same_results(name, textcat_config):
+ # Ensure that the resized textcat classifiers still produce the same results for old labels
+ fix_random_seed(0)
+ nlp = English()
+ pipe_config = {"model": textcat_config}
+ textcat = nlp.add_pipe(name, config=pipe_config)
+
+ train_examples = []
+ for text, annotations in TRAIN_DATA_SINGLE_LABEL:
+ train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+ optimizer = nlp.initialize(get_examples=lambda: train_examples)
+ assert textcat.model.maybe_get_dim("nO") in [2, None]
+
+ for i in range(5):
+ losses = {}
+ nlp.update(train_examples, sgd=optimizer, losses=losses)
+
+ # test the trained model before resizing
+ test_text = "I am happy."
+ doc = nlp(test_text)
+ assert len(doc.cats) == 2
+ pos_pred = doc.cats["POSITIVE"]
+ neg_pred = doc.cats["NEGATIVE"]
+
+ # test the trained model again after resizing
+ textcat.add_label("NEUTRAL")
+ doc = nlp(test_text)
+ assert len(doc.cats) == 3
+ assert doc.cats["POSITIVE"] == pos_pred
+ assert doc.cats["NEGATIVE"] == neg_pred
+ assert doc.cats["NEUTRAL"] <= 1
+
+ for i in range(5):
+ losses = {}
+ nlp.update(train_examples, sgd=optimizer, losses=losses)
+
+ # test the trained model again after training further with new label
+ doc = nlp(test_text)
+ assert len(doc.cats) == 3
+ assert doc.cats["POSITIVE"] != pos_pred
+ assert doc.cats["NEGATIVE"] != neg_pred
+ for cat in doc.cats:
+ assert doc.cats[cat] <= 1
+
+
def test_error_with_multi_labels():
nlp = Language()
nlp.add_pipe("textcat")
@@ -286,14 +396,14 @@ def test_overfitting_IO_multi():
@pytest.mark.parametrize(
"name,train_data,textcat_config",
[
- ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
- ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
- ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "ngram_size": 3, "no_output_layer": True}),
- ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "ngram_size": 2, "no_output_layer": True}),
- ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
- ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
- ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
- ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
+ ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
+ ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
+ ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 3, "no_output_layer": True}),
+ ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 2, "no_output_layer": True}),
+ ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
+ ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
+ ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
+ ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
],
)
# fmt: on
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index b38a50f71..b851641d9 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -297,7 +297,7 @@ def test_util_dot_section():
factory = "textcat"
[components.textcat.model]
- @architectures = "spacy.TextCatBOW.v1"
+ @architectures = "spacy.TextCatBOW.v2"
exclusive_classes = true
ngram_size = 1
no_output_layer = false
diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md
index e09352ec9..4923ce18f 100644
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@@ -611,7 +611,7 @@ single-label use-cases where `exclusive_classes = true`, while the
> nO = null
>
> [model.linear_model]
-> @architectures = "spacy.TextCatBOW.v1"
+> @architectures = "spacy.TextCatBOW.v2"
> exclusive_classes = true
> ngram_size = 1
> no_output_layer = false
@@ -666,13 +666,13 @@ taking it as argument:
-### spacy.TextCatCNN.v1 {#TextCatCNN}
+### spacy.TextCatCNN.v2 {#TextCatCNN}
> #### Example Config
>
> ```ini
> [model]
-> @architectures = "spacy.TextCatCNN.v1"
+> @architectures = "spacy.TextCatCNN.v2"
> exclusive_classes = false
> nO = null
>
@@ -698,13 +698,20 @@ architecture is usually less accurate than the ensemble, but runs faster.
| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ |
-### spacy.TextCatBOW.v1 {#TextCatBOW}
+
+
+[TextCatCNN.v1](/api/legacy#TextCatCNN_v1) had the exact same signature, but was not yet resizable.
+Since v2, new labels can be added to this component, even after training.
+
+
+
+### spacy.TextCatBOW.v2 {#TextCatBOW}
> #### Example Config
>
> ```ini
> [model]
-> @architectures = "spacy.TextCatBOW.v1"
+> @architectures = "spacy.TextCatBOW.v2"
> exclusive_classes = false
> ngram_size = 1
> no_output_layer = false
@@ -722,6 +729,13 @@ the others, but may not be as accurate, especially if texts are short.
| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ |
+
+
+[TextCatBOW.v1](/api/legacy#TextCatBOW_v1) had the exact same signature, but was not yet resizable.
+Since v2, new labels can be added to this component, even after training.
+
+
+
## Entity linking architectures {#entitylinker source="spacy/ml/models/entity_linker.py"}
An [`EntityLinker`](/api/entitylinker) component disambiguates textual mentions
diff --git a/website/docs/api/data-formats.md b/website/docs/api/data-formats.md
index 2b1c3480c..4ca5fb24d 100644
--- a/website/docs/api/data-formats.md
+++ b/website/docs/api/data-formats.md
@@ -93,7 +93,7 @@ Defines the `nlp` object, its tokenizer and
> labels = ["POSITIVE", "NEGATIVE"]
>
> [components.textcat.model]
-> @architectures = "spacy.TextCatBOW.v1"
+> @architectures = "spacy.TextCatBOW.v2"
> exclusive_classes = true
> ngram_size = 1
> no_output_layer = false
diff --git a/website/docs/api/legacy.md b/website/docs/api/legacy.md
index 96bc199bf..563d5aea8 100644
--- a/website/docs/api/legacy.md
+++ b/website/docs/api/legacy.md
@@ -176,6 +176,68 @@ added to an existing vectors table. See more details in
+### spacy.TextCatCNN.v1 {#TextCatCNN_v1}
+
+Since `spacy.TextCatCNN.v2`, this architecture has become resizable, which means that you can add
+labels to a previously trained textcat. `TextCatCNN` v1 did not yet support that.
+
+> #### Example Config
+>
+> ```ini
+> [model]
+> @architectures = "spacy.TextCatCNN.v1"
+> exclusive_classes = false
+> nO = null
+>
+> [model.tok2vec]
+> @architectures = "spacy.HashEmbedCNN.v1"
+> pretrained_vectors = null
+> width = 96
+> depth = 4
+> embed_size = 2000
+> window_size = 1
+> maxout_pieces = 3
+> subword_features = true
+> ```
+
+A neural network model where token vectors are calculated using a CNN. The
+vectors are mean pooled and used as features in a feed-forward network. This
+architecture is usually less accurate than the ensemble, but runs faster.
+
+| Name | Description |
+| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~ |
+| `tok2vec` | The [`tok2vec`](#tok2vec) layer of the model. ~~Model~~ |
+| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
+| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ |
+
+### spacy.TextCatBOW.v1 {#TextCatBOW_v1}
+
+Since `spacy.TextCatBOW.v2`, this architecture has become resizable, which means that you can add
+labels to a previously trained textcat. `TextCatBOW` v1 did not yet support that.
+
+> #### Example Config
+>
+> ```ini
+> [model]
+> @architectures = "spacy.TextCatBOW.v1"
+> exclusive_classes = false
+> ngram_size = 1
+> no_output_layer = false
+> nO = null
+> ```
+
+An n-gram "bag-of-words" model. This architecture should run much faster than
+the others, but may not be as accurate, especially if texts are short.
+
+| Name | Description |
+| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~ |
+| `ngram_size` | Determines the maximum length of the n-grams in the BOW model. For instance, `ngram_size=3` would give unigram, trigram and bigram features. ~~int~~ |
+| `no_output_layer` | Whether or not to add an output layer to the model (`Softmax` activation if `exclusive_classes` is `True`, else `Logistic`). ~~bool~~ |
+| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
+| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ |
+
## Loggers {#loggers}
These functions are available from `@spacy.registry.loggers`.
diff --git a/website/docs/usage/layers-architectures.md b/website/docs/usage/layers-architectures.md
index 8fe2cf489..17043d599 100644
--- a/website/docs/usage/layers-architectures.md
+++ b/website/docs/usage/layers-architectures.md
@@ -151,7 +151,7 @@ maxout_pieces = 3
depth = 2
[components.textcat.model.linear_model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = true
ngram_size = 1
no_output_layer = false
@@ -169,7 +169,7 @@ factory = "textcat"
labels = []
[components.textcat.model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = true
ngram_size = 1
no_output_layer = false
diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md
index bde3ab84f..87feee54a 100644
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@@ -1324,7 +1324,7 @@ labels = []
# This function is created and then passed to the "textcat" component as
# the argument "model"
[components.textcat.model]
-@architectures = "spacy.TextCatBOW.v1"
+@architectures = "spacy.TextCatBOW.v2"
exclusive_classes = true
ngram_size = 1
no_output_layer = false