From 8e85444955a888920acc2f7f4712d0767157a877 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Wed, 30 Nov 2022 18:35:15 +0900 Subject: [PATCH] Change GPU efficient textcat to use CNN, not BOW If you generate a config with a textcat component using GPU (transformers), the defaut option (efficiency) uses a BOW architecture, which does not use tok2vec features. While that can make sense as part of a larger pipeline, in the case of just a transformer and a textcat, that means the transformer is doing a lot of work for no purpose. This changes it so that the CNN architecture is used instead. It could also be changed to be the same as the accuracy config, which uses the ensemble architecture. --- spacy/cli/templates/quickstart_training.jinja | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja index 58864883a..f15eeaac3 100644 --- a/spacy/cli/templates/quickstart_training.jinja +++ b/spacy/cli/templates/quickstart_training.jinja @@ -221,10 +221,16 @@ no_output_layer = false {% else -%} [components.textcat.model] -@architectures = "spacy.TextCatBOW.v2" +@architectures = "spacy.TextCatCNN.v2" exclusive_classes = true -ngram_size = 1 -no_output_layer = false +nO = null + +[components.textcat.model.tok2vec] +@architectures = "spacy-transformers.TransformerListener.v1" +grad_factor = 1.0 + +[components.textcat.model.tok2vec.pooling] +@layers = "reduce_mean.v1" {%- endif %} {%- endif %} @@ -252,10 +258,16 @@ no_output_layer = false {% else -%} [components.textcat_multilabel.model] -@architectures = "spacy.TextCatBOW.v2" +@architectures = "spacy.TextCatCNN.v2" exclusive_classes = false -ngram_size = 1 -no_output_layer = false +nO = null + +[components.textcat_multilabel.model.tok2vec] +@architectures = "spacy-transformers.TransformerListener.v1" +grad_factor = 1.0 + +[components.textcat_multilabel.model.tok2vec.pooling] +@layers = "reduce_mean.v1" {%- endif %} {%- endif %}