diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index a9faa3029..c9f30d1d3 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1 +1 @@ -custom: https://explosion.ai/merch +custom: [https://explosion.ai/merch, https://explosion.ai/tailored-solutions] diff --git a/README.md b/README.md index 92f12fe81..afa96363b 100644 --- a/README.md +++ b/README.md @@ -39,26 +39,31 @@ open-source software, released under the | 🚀 **[New in v3.0]** | New features, backwards incompatibilities and migration guide. | | 🪐 **[Project Templates]** | End-to-end workflows you can clone, modify and run. | | 🎛 **[API Reference]** | The detailed reference for spaCy's API. | +| ⏩ **[GPU Processing]** | Use spaCy with CUDA-compatible GPU processing. | | 📦 **[Models]** | Download trained pipelines for spaCy. | +| 🦙 **[Large Language Models]** | Integrate LLMs into spaCy pipelines. | | 🌌 **[Universe]** | Plugins, extensions, demos and books from the spaCy ecosystem. | | ⚙️ **[spaCy VS Code Extension]** | Additional tooling and features for working with spaCy's config files. | | 👩‍🏫 **[Online Course]** | Learn spaCy in this free and interactive online course. | +| 📰 **[Blog]** | Read about current spaCy and Prodigy development, releases, talks and more from Explosion. | | 📺 **[Videos]** | Our YouTube channel with video tutorials, talks and more. | | 🛠 **[Changelog]** | Changes and version history. | | 💝 **[Contribute]** | How to contribute to the spaCy project and code base. | | 👕 **[Swag]** | Support us and our work with unique, custom-designed swag! | -| spaCy Tailored Pipelines | Get a custom spaCy pipeline, tailor-made for your NLP problem by spaCy's core developers. Streamlined, production-ready, predictable and maintainable. Start by completing our 5-minute questionnaire to tell us what you need and we'll be in touch! **[Learn more →](https://explosion.ai/spacy-tailored-pipelines)** | -| spaCy Tailored Pipelines | Bespoke advice for problem solving, strategy and analysis for applied NLP projects. Services include data strategy, code reviews, pipeline design and annotation coaching. Curious? Fill in our 5-minute questionnaire to tell us what you need and we'll be in touch! **[Learn more →](https://explosion.ai/spacy-tailored-analysis)** | +| Tailored Solutions | Custom NLP consulting, implementation and strategic advice by spaCy’s core development team. Streamlined, production-ready, predictable and maintainable. Send us an email or take our 5-minute questionnaire, and well'be in touch! **[Learn more →](https://explosion.ai/tailored-solutions)** | [spacy 101]: https://spacy.io/usage/spacy-101 [new in v3.0]: https://spacy.io/usage/v3 [usage guides]: https://spacy.io/usage/ [api reference]: https://spacy.io/api/ +[gpu processing]: https://spacy.io/usage#gpu [models]: https://spacy.io/models +[large language models]: https://spacy.io/usage/large-language-models [universe]: https://spacy.io/universe [spacy vs code extension]: https://github.com/explosion/spacy-vscode [videos]: https://www.youtube.com/c/ExplosionAI [online course]: https://course.spacy.io +[blog]: https://explosion.ai [project templates]: https://github.com/explosion/projects [changelog]: https://spacy.io/usage#changelog [contribute]: https://github.com/explosion/spaCy/blob/master/CONTRIBUTING.md diff --git a/spacy/errors.py b/spacy/errors.py index 093c65f3d..b6108dd0f 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -984,6 +984,9 @@ class Errors(metaclass=ErrorsWithCodes): E1055 = ("The 'replace_listener' callback expects {num_params} parameters, " "but only callbacks with one or three parameters are supported") E1056 = ("The `TextCatBOW` architecture expects a length of at least 1, was {length}.") + E1057 = ("The `TextCatReduce` architecture must be used with at least one " + "reduction. Please enable one of `use_reduce_first`, " + "`use_reduce_last`, `use_reduce_max` or `use_reduce_mean`.") # Deprecated model shortcuts, only used in errors and warnings diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py index 992b9e49e..61cd30f35 100644 --- a/spacy/ml/models/textcat.py +++ b/spacy/ml/models/textcat.py @@ -19,6 +19,9 @@ from thinc.api import ( clone, concatenate, list2ragged, + reduce_first, + reduce_last, + reduce_max, reduce_mean, reduce_sum, residual, @@ -51,39 +54,15 @@ def build_simple_cnn_text_classifier( outputs sum to 1. If exclusive_classes=False, a logistic non-linearity is applied instead, so that outputs are in the range [0, 1]. """ - fill_defaults = {"b": 0, "W": 0} - with Model.define_operators({">>": chain}): - cnn = tok2vec >> list2ragged() >> reduce_mean() - nI = tok2vec.maybe_get_dim("nO") - if exclusive_classes: - output_layer = Softmax(nO=nO, nI=nI) - fill_defaults["b"] = NEG_VALUE - resizable_layer: Model = resizable( - output_layer, - resize_layer=partial( - resize_linear_weighted, fill_defaults=fill_defaults - ), - ) - model = cnn >> resizable_layer - else: - output_layer = Linear(nO=nO, nI=nI) - resizable_layer = resizable( - output_layer, - resize_layer=partial( - resize_linear_weighted, fill_defaults=fill_defaults - ), - ) - model = cnn >> resizable_layer >> Logistic() - model.set_ref("output_layer", output_layer) - model.attrs["resize_output"] = partial( - resize_and_set_ref, - resizable_layer=resizable_layer, - ) - model.set_ref("tok2vec", tok2vec) - if nO is not None: - model.set_dim("nO", cast(int, nO)) - model.attrs["multi_label"] = not exclusive_classes - return model + return build_reduce_text_classifier( + tok2vec=tok2vec, + exclusive_classes=exclusive_classes, + use_reduce_first=False, + use_reduce_last=False, + use_reduce_max=False, + use_reduce_mean=True, + nO=nO, + ) def resize_and_set_ref(model, new_nO, resizable_layer): @@ -299,4 +278,80 @@ def _init_parametric_attention_with_residual_nonlinear(model, X, Y) -> Model: model.get_ref("norm_layer").set_dim("nI", tok2vec_width) model.get_ref("norm_layer").set_dim("nO", tok2vec_width) init_chain(model, X, Y) + + +@registry.architectures("spacy.TextCatReduce.v1") +def build_reduce_text_classifier( + tok2vec: Model, + exclusive_classes: bool, + use_reduce_first: bool, + use_reduce_last: bool, + use_reduce_max: bool, + use_reduce_mean: bool, + nO: Optional[int] = None, +) -> Model[List[Doc], Floats2d]: + """Build a model that classifies pooled `Doc` representations. + + Pooling is performed using reductions. Reductions are concatenated when + multiple reductions are used. + + tok2vec (Model): the tok2vec layer to pool over. + exclusive_classes (bool): Whether or not classes are mutually exclusive. + use_reduce_first (bool): Pool by using the hidden representation of the + first token of a `Doc`. + use_reduce_last (bool): Pool by using the hidden representation of the + last token of a `Doc`. + use_reduce_max (bool): Pool by taking the maximum values of the hidden + representations of a `Doc`. + use_reduce_mean (bool): Pool by taking the mean of all hidden + representations of a `Doc`. + nO (Optional[int]): Number of classes. + """ + + fill_defaults = {"b": 0, "W": 0} + reductions = [] + if use_reduce_first: + reductions.append(reduce_first()) + if use_reduce_last: + reductions.append(reduce_last()) + if use_reduce_max: + reductions.append(reduce_max()) + if use_reduce_mean: + reductions.append(reduce_mean()) + + if not len(reductions): + raise ValueError(Errors.E1057) + + with Model.define_operators({">>": chain}): + cnn = tok2vec >> list2ragged() >> concatenate(*reductions) + nO_tok2vec = tok2vec.maybe_get_dim("nO") + nI = nO_tok2vec * len(reductions) if nO_tok2vec is not None else None + if exclusive_classes: + output_layer = Softmax(nO=nO, nI=nI) + fill_defaults["b"] = NEG_VALUE + resizable_layer: Model = resizable( + output_layer, + resize_layer=partial( + resize_linear_weighted, fill_defaults=fill_defaults + ), + ) + model = cnn >> resizable_layer + else: + output_layer = Linear(nO=nO, nI=nI) + resizable_layer = resizable( + output_layer, + resize_layer=partial( + resize_linear_weighted, fill_defaults=fill_defaults + ), + ) + model = cnn >> resizable_layer >> Logistic() + model.set_ref("output_layer", output_layer) + model.attrs["resize_output"] = partial( + resize_and_set_ref, + resizable_layer=resizable_layer, + ) + model.set_ref("tok2vec", tok2vec) + if nO is not None: + model.set_dim("nO", cast(int, nO)) + model.attrs["multi_label"] = not exclusive_classes return model diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index 43a335c4a..ae227017a 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -55,8 +55,12 @@ no_output_layer = false single_label_cnn_config = """ [model] -@architectures = "spacy.TextCatCNN.v2" +@architectures = "spacy.TextCatReduce.v1" exclusive_classes = true +use_reduce_first = false +use_reduce_last = false +use_reduce_max = false +use_reduce_mean = true [model.tok2vec] @architectures = "spacy.HashEmbedCNN.v2" diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py index c917cc610..2f8d5e604 100644 --- a/spacy/pipeline/textcat_multilabel.py +++ b/spacy/pipeline/textcat_multilabel.py @@ -53,8 +53,12 @@ no_output_layer = false multi_label_cnn_config = """ [model] -@architectures = "spacy.TextCatCNN.v2" +@architectures = "spacy.TextCatReduce.v1" exclusive_classes = false +use_reduce_first = false +use_reduce_last = false +use_reduce_max = false +use_reduce_mean = true [model.tok2vec] @architectures = "spacy.HashEmbedCNN.v2" diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py index e9b5a9f90..7a78c3dac 100644 --- a/spacy/tests/pipeline/test_textcat.py +++ b/spacy/tests/pipeline/test_textcat.py @@ -457,8 +457,8 @@ def test_no_resize(name, textcat_config): ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}), ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}), # CNN - ("textcat", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}), - ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}), + ("textcat", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}), + ("textcat_multilabel", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}), ], ) # fmt: on @@ -485,9 +485,9 @@ def test_resize(name, textcat_config): ("textcat", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}), ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}), ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}), - # CNN - ("textcat", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}), - ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}), + # REDUCE + ("textcat", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}), + ("textcat_multilabel", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}), ], ) # fmt: on @@ -701,12 +701,15 @@ def test_overfitting_IO_multi(): # ENSEMBLE V2 ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}), ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}), - # CNN V2 + # CNN V2 (legacy) ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}), ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}), # PARAMETRIC ATTENTION V1 ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatParametricAttention.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}), ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatParametricAttention.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}), + # REDUCE V1 + ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}), + ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}), ], ) # fmt: on diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py index e6692ad92..5228b4544 100644 --- a/spacy/tests/test_models.py +++ b/spacy/tests/test_models.py @@ -26,6 +26,7 @@ from spacy.ml.models import ( build_Tok2Vec_model, ) from spacy.ml.staticvectors import StaticVectors +from spacy.util import registry def get_textcat_bow_kwargs(): @@ -284,3 +285,17 @@ def test_spancat_model_forward_backward(nO=5): Y, backprop = model((docs, spans), is_train=True) assert Y.shape == (spans.dataXd.shape[0], nO) backprop(Y) + + +def test_textcat_reduce_invalid_args(): + textcat_reduce = registry.architectures.get("spacy.TextCatReduce.v1") + tok2vec = make_test_tok2vec() + with pytest.raises(ValueError, match=r"must be used with at least one reduction"): + textcat_reduce( + tok2vec=tok2vec, + exclusive_classes=False, + use_reduce_first=False, + use_reduce_last=False, + use_reduce_max=False, + use_reduce_mean=False, + ) diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi index 365859d89..f0b68862c 100644 --- a/spacy/tokens/doc.pyi +++ b/spacy/tokens/doc.pyi @@ -125,7 +125,7 @@ class Doc: vector: Optional[Floats1d] = ..., alignment_mode: str = ..., span_id: Union[int, str] = ..., - ) -> Span: ... + ) -> Optional[Span]: ... def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ... @property def has_vector(self) -> bool: ... @@ -179,15 +179,13 @@ class Doc: self, path: Union[str, Path], *, exclude: Iterable[str] = ... ) -> None: ... def from_disk( - self, path: Union[str, Path], *, exclude: Union[List[str], Tuple[str]] = ... + self, path: Union[str, Path], *, exclude: Iterable[str] = ... ) -> Doc: ... - def to_bytes(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ... - def from_bytes( - self, bytes_data: bytes, *, exclude: Union[List[str], Tuple[str]] = ... - ) -> Doc: ... - def to_dict(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ... + def to_bytes(self, *, exclude: Iterable[str] = ...) -> bytes: ... + def from_bytes(self, bytes_data: bytes, *, exclude: Iterable[str] = ...) -> Doc: ... + def to_dict(self, *, exclude: Iterable[str] = ...) -> Dict[str, Any]: ... def from_dict( - self, msg: bytes, *, exclude: Union[List[str], Tuple[str]] = ... + self, msg: Dict[str, Any], *, exclude: Iterable[str] = ... ) -> Doc: ... def extend_tensor(self, tensor: Floats2d) -> None: ... def retokenize(self) -> Retokenizer: ... diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 745eb5ff3..181c0ce0f 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -1326,7 +1326,7 @@ cdef class Doc: path (str / Path): A path to a directory. Paths may be either strings or `Path`-like objects. - exclude (list): String names of serialization fields to exclude. + exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Doc): The modified `Doc` object. DOCS: https://spacy.io/api/doc#from_disk @@ -1339,7 +1339,7 @@ cdef class Doc: def to_bytes(self, *, exclude=tuple()): """Serialize, i.e. export the document contents to a binary string. - exclude (list): String names of serialization fields to exclude. + exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (bytes): A losslessly serialized copy of the `Doc`, including all annotations. @@ -1351,7 +1351,7 @@ cdef class Doc: """Deserialize, i.e. import the document contents from a binary string. data (bytes): The string to load from. - exclude (list): String names of serialization fields to exclude. + exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Doc): Itself. DOCS: https://spacy.io/api/doc#from_bytes @@ -1361,11 +1361,8 @@ cdef class Doc: def to_dict(self, *, exclude=tuple()): """Export the document contents to a dictionary for serialization. - exclude (list): String names of serialization fields to exclude. - RETURNS (bytes): A losslessly serialized copy of the `Doc`, including - all annotations. - - DOCS: https://spacy.io/api/doc#to_bytes + exclude (Iterable[str]): String names of serialization fields to exclude. + RETURNS (Dict[str, Any]): A dictionary representation of the `Doc` """ array_head = Doc._get_array_attrs() strings = set() @@ -1411,13 +1408,11 @@ cdef class Doc: return util.to_dict(serializers, exclude) def from_dict(self, msg, *, exclude=tuple()): - """Deserialize, i.e. import the document contents from a binary string. + """Deserialize the document contents from a dictionary representation. - data (bytes): The string to load from. - exclude (list): String names of serialization fields to exclude. + msg (Dict[str, Any]): The dictionary to load from. + exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Doc): Itself. - - DOCS: https://spacy.io/api/doc#from_dict """ if self.length != 0: raise ValueError(Errors.E033.format(length=self.length)) diff --git a/website/docs/api/architectures.mdx b/website/docs/api/architectures.mdx index b6723c6e5..956234ac0 100644 --- a/website/docs/api/architectures.mdx +++ b/website/docs/api/architectures.mdx @@ -1018,46 +1018,6 @@ but used an internal `tok2vec` instead of taking it as argument: -### spacy.TextCatCNN.v2 {id="TextCatCNN"} - -> #### Example Config -> -> ```ini -> [model] -> @architectures = "spacy.TextCatCNN.v2" -> exclusive_classes = false -> nO = null -> -> [model.tok2vec] -> @architectures = "spacy.HashEmbedCNN.v2" -> pretrained_vectors = null -> width = 96 -> depth = 4 -> embed_size = 2000 -> window_size = 1 -> maxout_pieces = 3 -> subword_features = true -> ``` - -A neural network model where token vectors are calculated using a CNN. The -vectors are mean pooled and used as features in a feed-forward network. This -architecture is usually less accurate than the ensemble, but runs faster. - -| Name | Description | -| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~ | -| `tok2vec` | The [`tok2vec`](#tok2vec) layer of the model. ~~Model~~ | -| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ | -| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ | - - - -[TextCatCNN.v1](/api/legacy#TextCatCNN_v1) had the exact same signature, but was -not yet resizable. Since v2, new labels can be added to this component, even -after training. - - - ### spacy.TextCatBOW.v3 {id="TextCatBOW"} > #### Example Config @@ -1134,6 +1094,54 @@ to attend to tokens that are relevant to text classification. | `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ | | **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ | +### spacy.TextCatReduce.v1 {id="TextCatReduce"} + +> #### Example Config +> +> ```ini +> [model] +> @architectures = "spacy.TextCatReduce.v1" +> exclusive_classes = false +> use_reduce_first = false +> use_reduce_last = false +> use_reduce_max = false +> use_reduce_mean = true +> nO = null +> +> [model.tok2vec] +> @architectures = "spacy.HashEmbedCNN.v2" +> pretrained_vectors = null +> width = 96 +> depth = 4 +> embed_size = 2000 +> window_size = 1 +> maxout_pieces = 3 +> subword_features = true +> ``` + +A classifier that pools token hidden representations of each `Doc` using first, +max or mean reduction and then applies a classification layer. Reductions are +concatenated when multiple reductions are used. + + + +`TextCatReduce` is a generalization of the older +[`TextCatCNN`](/api/legacy#TextCatCNN_v2) model. `TextCatCNN` always uses a mean +reduction, whereas `TextCatReduce` also supports first/max reductions. + + + +| Name | Description | +| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~ | +| `tok2vec` | The [`tok2vec`](#tok2vec) layer of the model. ~~Model~~ | +| `use_reduce_first` | Pool by using the hidden representation of the first token of a `Doc`. ~~bool~~ | +| `use_reduce_last` | Pool by using the hidden representation of the last token of a `Doc`. ~~bool~~ | +| `use_reduce_max` | Pool by taking the maximum values of the hidden representations of a `Doc`. ~~bool~~ | +| `use_reduce_mean` | Pool by taking the mean of all hidden representations of a `Doc`. ~~bool~~ | +| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ | +| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ | + ## Span classification architectures {id="spancat",source="spacy/ml/models/spancat.py"} ### spacy.SpanCategorizer.v1 {id="SpanCategorizer"} diff --git a/website/docs/api/legacy.mdx b/website/docs/api/legacy.mdx index 32111ce92..b44df5387 100644 --- a/website/docs/api/legacy.mdx +++ b/website/docs/api/legacy.mdx @@ -162,7 +162,10 @@ network has an internal CNN Tok2Vec layer and uses attention. Since `spacy.TextCatCNN.v2`, this architecture has become resizable, which means that you can add labels to a previously trained textcat. `TextCatCNN` v1 did not -yet support that. +yet support that. `TextCatCNN` has been replaced by the more general +[`TextCatReduce`](/api/architectures#TextCatReduce) layer. `TextCatCNN` is +identical to `TextCatReduce` with `use_reduce_mean=true`, +`use_reduce_first=false`, `reduce_last=false` and `use_reduce_max=false`. > #### Example Config > @@ -194,6 +197,51 @@ architecture is usually less accurate than the ensemble, but runs faster. | `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ | | **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ | +### spacy.TextCatCNN.v2 {id="TextCatCNN_v2"} + +> #### Example Config +> +> ```ini +> [model] +> @architectures = "spacy.TextCatCNN.v2" +> exclusive_classes = false +> nO = null +> +> [model.tok2vec] +> @architectures = "spacy.HashEmbedCNN.v2" +> pretrained_vectors = null +> width = 96 +> depth = 4 +> embed_size = 2000 +> window_size = 1 +> maxout_pieces = 3 +> subword_features = true +> ``` + +A neural network model where token vectors are calculated using a CNN. The +vectors are mean pooled and used as features in a feed-forward network. This +architecture is usually less accurate than the ensemble, but runs faster. + +`TextCatCNN` has been replaced by the more general +[`TextCatReduce`](/api/architectures#TextCatReduce) layer. `TextCatCNN` is +identical to `TextCatReduce` with `use_reduce_mean=true`, +`use_reduce_first=false`, `reduce_last=false` and `use_reduce_max=false`. + +| Name | Description | +| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~ | +| `tok2vec` | The [`tok2vec`](#tok2vec) layer of the model. ~~Model~~ | +| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ | +| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ | + + + +[TextCatCNN.v1](/api/legacy#TextCatCNN_v1) had the exact same signature, but was +not yet resizable. Since v2, new labels can be added to this component, even +after training. + + + ### spacy.TextCatBOW.v1 {id="TextCatBOW_v1"} Since `spacy.TextCatBOW.v2`, this architecture has become resizable, which means