Replace TexCatBOW length_exponent parameter by length

We now round up the length to the next power of two if it isn't
a power of two.
This commit is contained in:
Daniël de Kok 2023-11-27 16:15:33 +01:00
parent 0f4920dc5a
commit d865f9b223
9 changed files with 31 additions and 25 deletions

View File

@ -273,7 +273,7 @@ grad_factor = 1.0
[components.textcat.model.linear_model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = true
length_exponent = 18
length = 262144
ngram_size = 1
no_output_layer = false
@ -311,7 +311,7 @@ grad_factor = 1.0
[components.textcat_multilabel.model.linear_model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = false
length_exponent = 18
length = 262144
ngram_size = 1
no_output_layer = false
@ -546,7 +546,7 @@ width = ${components.tok2vec.model.encode.width}
[components.textcat.model.linear_model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = true
length_exponent = 18
length = 262144
ngram_size = 1
no_output_layer = false
@ -575,7 +575,7 @@ width = ${components.tok2vec.model.encode.width}
[components.textcat_multilabel.model.linear_model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = false
length_exponent = 18
length = 262144
ngram_size = 1
no_output_layer = false
@ -583,7 +583,7 @@ no_output_layer = false
[components.textcat_multilabel.model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = false
length_exponent = 18
length = 262144
ngram_size = 1
no_output_layer = false
{%- endif %}

View File

@ -983,6 +983,7 @@ class Errors(metaclass=ErrorsWithCodes):
"predicted docs when training {component}.")
E1055 = ("The 'replace_listener' callback expects {num_params} parameters, "
"but only callbacks with one or three parameters are supported")
E1056 = ("The `TextCatBOW` architecture expects a length of at least 1, was {length}.")
# Deprecated model shortcuts, only used in errors and warnings

View File

@ -111,15 +111,21 @@ def build_bow_text_classifier_v3(
exclusive_classes: bool,
ngram_size: int,
no_output_layer: bool,
length_exponent: int = 18,
length: int = 262144,
nO: Optional[int] = None,
) -> Model[List[Doc], Floats2d]:
if length < 1:
raise ValueError(Errors.E1056.format(length=length))
# Find k such that 2**(k-1) < length <= 2**k.
length = 2 ** (length - 1).bit_length()
return _build_bow_text_classifier(
exclusive_classes=exclusive_classes,
ngram_size=ngram_size,
no_output_layer=no_output_layer,
nO=nO,
sparse_linear=SparseLinear_v2(nO=nO, length=2**length_exponent),
sparse_linear=SparseLinear_v2(nO=nO, length=length),
)

View File

@ -38,7 +38,7 @@ depth = 2
[model.linear_model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = true
length_exponent = 18
length = 262144
ngram_size = 1
no_output_layer = false
"""
@ -48,7 +48,7 @@ single_label_bow_config = """
[model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = true
length_exponent = 18
length = 262144
ngram_size = 1
no_output_layer = false
"""

View File

@ -37,7 +37,7 @@ depth = 2
[model.linear_model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = false
length_exponent = 18
length = 262144
ngram_size = 1
no_output_layer = false
"""

View File

@ -378,7 +378,7 @@ def test_util_dot_section():
[components.textcat.model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = true
length_exponent = 18
length = 262144
ngram_size = 1
no_output_layer = false
"""

View File

@ -964,7 +964,7 @@ single-label use-cases where `exclusive_classes = true`, while the
> [model.linear_model]
> @architectures = "spacy.TextCatBOW.v3"
> exclusive_classes = true
> length_exponent = 18
> length = 262144
> ngram_size = 1
> no_output_layer = false
>
@ -1066,7 +1066,7 @@ after training.
> [model]
> @architectures = "spacy.TextCatBOW.v3"
> exclusive_classes = false
> length_exponent = 18
> length = 262144
> ngram_size = 1
> no_output_layer = false
> nO = null
@ -1080,20 +1080,19 @@ the others, but may not be as accurate, especially if texts are short.
| `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~ |
| `ngram_size` | Determines the maximum length of the n-grams in the BOW model. For instance, `ngram_size=3` would give unigram, trigram and bigram features. ~~int~~ |
| `no_output_layer` | Whether or not to add an output layer to the model (`Softmax` activation if `exclusive_classes` is `True`, else `Logistic`). ~~bool~~ |
| `length_exponent` | The size of the weights vector. The sizes is set to `2**length_exponent`. Defaults to `18`. ~~int~~ |
| `length` | The size of the weights vector. The length will be rounded up to the next power of two if it is not a power of two. Defaults to `262144`. ~~int~~ |
| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ |
<Accordion title="Previous versions of spacy.TextCatBOW" spaced>
* [TextCatBOW.v1](/api/legacy#TextCatBOW_v1) was not yet resizable. Since v2, new
labels can be added to this component, even after training.
* [TextCatBOW.v1](/api/legacy#TextCatBOW_v1) and
[TextCatBOW.v2](/api/legacy#TextCatBOW_v2) used an erroneous sparse linear layer
that only used a small number of the allocated parameters.
* [TextCatBOW.v1](/api/legacy#TextCatBOW_v1) and
[TextCatBOW.v2](/api/legacy#TextCatBOW_v2) did not have the `length_exponent`
argument.
- [TextCatBOW.v1](/api/legacy#TextCatBOW_v1) was not yet resizable. Since v2,
new labels can be added to this component, even after training.
- [TextCatBOW.v1](/api/legacy#TextCatBOW_v1) and
[TextCatBOW.v2](/api/legacy#TextCatBOW_v2) used an erroneous sparse linear
layer that only used a small number of the allocated parameters.
- [TextCatBOW.v1](/api/legacy#TextCatBOW_v1) and
[TextCatBOW.v2](/api/legacy#TextCatBOW_v2) did not have the `length` argument.
</Accordion>

View File

@ -155,7 +155,7 @@ depth = 2
[components.textcat.model.linear_model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = true
length_exponent = 18
length = 262144
ngram_size = 1
no_output_layer = false
```
@ -173,7 +173,7 @@ labels = []
[components.textcat.model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = true
length_exponent = 18
length = 262144
ngram_size = 1
no_output_layer = false
nO = null

View File

@ -1330,7 +1330,7 @@ labels = []
[components.textcat.model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = true
length_exponent = 18
length = 262144
ngram_size = 1
no_output_layer = false