mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Tagger: use unnormalized probabilities for inference (#10197)
* Tagger: use unnormalized probabilities for inference Using unnormalized softmax avoids use of the relatively expensive exp function, which can significantly speed up non-transformer models (e.g. I got a speedup of 27% on a German tagging + parsing pipeline). * Add spacy.Tagger.v2 with configurable normalization Normalization of probabilities is disabled by default to improve performance. * Update documentation, models, and tests to spacy.Tagger.v2 * Move Tagger.v1 to spacy-legacy * docs/architectures: run prettier * Unnormalized softmax is now a Softmax_v2 option * Require thinc 8.0.14 and spacy-legacy 3.0.9
This commit is contained in:
parent
e8357923ec
commit
e5debc68e4
|
@ -5,7 +5,7 @@ requires = [
|
||||||
"cymem>=2.0.2,<2.1.0",
|
"cymem>=2.0.2,<2.1.0",
|
||||||
"preshed>=3.0.2,<3.1.0",
|
"preshed>=3.0.2,<3.1.0",
|
||||||
"murmurhash>=0.28.0,<1.1.0",
|
"murmurhash>=0.28.0,<1.1.0",
|
||||||
"thinc>=8.0.12,<8.1.0",
|
"thinc>=8.0.14,<8.1.0",
|
||||||
"blis>=0.4.0,<0.8.0",
|
"blis>=0.4.0,<0.8.0",
|
||||||
"pathy",
|
"pathy",
|
||||||
"numpy>=1.15.0",
|
"numpy>=1.15.0",
|
||||||
|
|
|
@ -3,7 +3,7 @@ spacy-legacy>=3.0.9,<3.1.0
|
||||||
spacy-loggers>=1.0.0,<2.0.0
|
spacy-loggers>=1.0.0,<2.0.0
|
||||||
cymem>=2.0.2,<2.1.0
|
cymem>=2.0.2,<2.1.0
|
||||||
preshed>=3.0.2,<3.1.0
|
preshed>=3.0.2,<3.1.0
|
||||||
thinc>=8.0.12,<8.1.0
|
thinc>=8.0.14,<8.1.0
|
||||||
blis>=0.4.0,<0.8.0
|
blis>=0.4.0,<0.8.0
|
||||||
ml_datasets>=0.2.0,<0.3.0
|
ml_datasets>=0.2.0,<0.3.0
|
||||||
murmurhash>=0.28.0,<1.1.0
|
murmurhash>=0.28.0,<1.1.0
|
||||||
|
|
|
@ -38,7 +38,7 @@ setup_requires =
|
||||||
cymem>=2.0.2,<2.1.0
|
cymem>=2.0.2,<2.1.0
|
||||||
preshed>=3.0.2,<3.1.0
|
preshed>=3.0.2,<3.1.0
|
||||||
murmurhash>=0.28.0,<1.1.0
|
murmurhash>=0.28.0,<1.1.0
|
||||||
thinc>=8.0.12,<8.1.0
|
thinc>=8.0.14,<8.1.0
|
||||||
install_requires =
|
install_requires =
|
||||||
# Our libraries
|
# Our libraries
|
||||||
spacy-legacy>=3.0.9,<3.1.0
|
spacy-legacy>=3.0.9,<3.1.0
|
||||||
|
@ -46,7 +46,7 @@ install_requires =
|
||||||
murmurhash>=0.28.0,<1.1.0
|
murmurhash>=0.28.0,<1.1.0
|
||||||
cymem>=2.0.2,<2.1.0
|
cymem>=2.0.2,<2.1.0
|
||||||
preshed>=3.0.2,<3.1.0
|
preshed>=3.0.2,<3.1.0
|
||||||
thinc>=8.0.12,<8.1.0
|
thinc>=8.0.14,<8.1.0
|
||||||
blis>=0.4.0,<0.8.0
|
blis>=0.4.0,<0.8.0
|
||||||
wasabi>=0.8.1,<1.1.0
|
wasabi>=0.8.1,<1.1.0
|
||||||
srsly>=2.4.1,<3.0.0
|
srsly>=2.4.1,<3.0.0
|
||||||
|
|
|
@ -54,7 +54,7 @@ stride = 96
|
||||||
factory = "morphologizer"
|
factory = "morphologizer"
|
||||||
|
|
||||||
[components.morphologizer.model]
|
[components.morphologizer.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
nO = null
|
nO = null
|
||||||
|
|
||||||
[components.morphologizer.model.tok2vec]
|
[components.morphologizer.model.tok2vec]
|
||||||
|
@ -70,7 +70,7 @@ grad_factor = 1.0
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
nO = null
|
nO = null
|
||||||
|
|
||||||
[components.tagger.model.tok2vec]
|
[components.tagger.model.tok2vec]
|
||||||
|
@ -238,7 +238,7 @@ maxout_pieces = 3
|
||||||
factory = "morphologizer"
|
factory = "morphologizer"
|
||||||
|
|
||||||
[components.morphologizer.model]
|
[components.morphologizer.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
nO = null
|
nO = null
|
||||||
|
|
||||||
[components.morphologizer.model.tok2vec]
|
[components.morphologizer.model.tok2vec]
|
||||||
|
@ -251,7 +251,7 @@ width = ${components.tok2vec.model.encode.width}
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
nO = null
|
nO = null
|
||||||
|
|
||||||
[components.tagger.model.tok2vec]
|
[components.tagger.model.tok2vec]
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
from typing import Optional, List
|
from typing import Optional, List
|
||||||
from thinc.api import zero_init, with_array, Softmax, chain, Model
|
from thinc.api import zero_init, with_array, Softmax_v2, chain, Model
|
||||||
from thinc.types import Floats2d
|
from thinc.types import Floats2d
|
||||||
|
|
||||||
from ...util import registry
|
from ...util import registry
|
||||||
from ...tokens import Doc
|
from ...tokens import Doc
|
||||||
|
|
||||||
|
|
||||||
@registry.architectures("spacy.Tagger.v1")
|
@registry.architectures("spacy.Tagger.v2")
|
||||||
def build_tagger_model(
|
def build_tagger_model(
|
||||||
tok2vec: Model[List[Doc], List[Floats2d]], nO: Optional[int] = None
|
tok2vec: Model[List[Doc], List[Floats2d]], nO: Optional[int] = None, normalize=False
|
||||||
) -> Model[List[Doc], List[Floats2d]]:
|
) -> Model[List[Doc], List[Floats2d]]:
|
||||||
"""Build a tagger model, using a provided token-to-vector component. The tagger
|
"""Build a tagger model, using a provided token-to-vector component. The tagger
|
||||||
model simply adds a linear layer with softmax activation to predict scores
|
model simply adds a linear layer with softmax activation to predict scores
|
||||||
|
@ -19,7 +19,9 @@ def build_tagger_model(
|
||||||
"""
|
"""
|
||||||
# TODO: glorot_uniform_init seems to work a bit better than zero_init here?!
|
# TODO: glorot_uniform_init seems to work a bit better than zero_init here?!
|
||||||
t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
|
t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
|
||||||
output_layer = Softmax(nO, t2v_width, init_W=zero_init)
|
output_layer = Softmax_v2(
|
||||||
|
nO, t2v_width, init_W=zero_init, normalize_outputs=normalize
|
||||||
|
)
|
||||||
softmax = with_array(output_layer) # type: ignore
|
softmax = with_array(output_layer) # type: ignore
|
||||||
model = chain(tok2vec, softmax)
|
model = chain(tok2vec, softmax)
|
||||||
model.set_ref("tok2vec", tok2vec)
|
model.set_ref("tok2vec", tok2vec)
|
||||||
|
|
|
@ -25,7 +25,7 @@ BACKWARD_EXTEND = False
|
||||||
|
|
||||||
default_model_config = """
|
default_model_config = """
|
||||||
[model]
|
[model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
|
||||||
[model.tok2vec]
|
[model.tok2vec]
|
||||||
@architectures = "spacy.Tok2Vec.v2"
|
@architectures = "spacy.Tok2Vec.v2"
|
||||||
|
|
|
@ -20,7 +20,7 @@ BACKWARD_OVERWRITE = False
|
||||||
|
|
||||||
default_model_config = """
|
default_model_config = """
|
||||||
[model]
|
[model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
|
||||||
[model.tok2vec]
|
[model.tok2vec]
|
||||||
@architectures = "spacy.HashEmbedCNN.v2"
|
@architectures = "spacy.HashEmbedCNN.v2"
|
||||||
|
|
|
@ -27,7 +27,7 @@ BACKWARD_OVERWRITE = False
|
||||||
|
|
||||||
default_model_config = """
|
default_model_config = """
|
||||||
[model]
|
[model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
|
||||||
[model.tok2vec]
|
[model.tok2vec]
|
||||||
@architectures = "spacy.HashEmbedCNN.v2"
|
@architectures = "spacy.HashEmbedCNN.v2"
|
||||||
|
|
|
@ -100,7 +100,7 @@ cfg_string = """
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
nO = null
|
nO = null
|
||||||
|
|
||||||
[components.tagger.model.tok2vec]
|
[components.tagger.model.tok2vec]
|
||||||
|
@ -263,7 +263,7 @@ cfg_string_multi = """
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
nO = null
|
nO = null
|
||||||
|
|
||||||
[components.tagger.model.tok2vec]
|
[components.tagger.model.tok2vec]
|
||||||
|
@ -373,7 +373,7 @@ cfg_string_multi_textcat = """
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
nO = null
|
nO = null
|
||||||
|
|
||||||
[components.tagger.model.tok2vec]
|
[components.tagger.model.tok2vec]
|
||||||
|
|
|
@ -59,7 +59,7 @@ subword_features = true
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
|
||||||
[components.tagger.model.tok2vec]
|
[components.tagger.model.tok2vec]
|
||||||
@architectures = "spacy.Tok2VecListener.v1"
|
@architectures = "spacy.Tok2VecListener.v1"
|
||||||
|
@ -110,7 +110,7 @@ subword_features = true
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
|
||||||
[components.tagger.model.tok2vec]
|
[components.tagger.model.tok2vec]
|
||||||
@architectures = "spacy.Tok2VecListener.v1"
|
@architectures = "spacy.Tok2VecListener.v1"
|
||||||
|
|
|
@ -70,7 +70,7 @@ factory = "ner"
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
nO = null
|
nO = null
|
||||||
|
|
||||||
[components.tagger.model.tok2vec]
|
[components.tagger.model.tok2vec]
|
||||||
|
|
|
@ -38,7 +38,7 @@ subword_features = true
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
|
||||||
[components.tagger.model.tok2vec]
|
[components.tagger.model.tok2vec]
|
||||||
@architectures = "spacy.Tok2VecListener.v1"
|
@architectures = "spacy.Tok2VecListener.v1"
|
||||||
|
@ -62,7 +62,7 @@ pipeline = ["tagger"]
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
|
||||||
[components.tagger.model.tok2vec]
|
[components.tagger.model.tok2vec]
|
||||||
@architectures = "spacy.HashEmbedCNN.v1"
|
@architectures = "spacy.HashEmbedCNN.v1"
|
||||||
|
@ -106,7 +106,7 @@ subword_features = true
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
|
||||||
[components.tagger.model.tok2vec]
|
[components.tagger.model.tok2vec]
|
||||||
@architectures = "spacy.Tok2VecListener.v1"
|
@architectures = "spacy.Tok2VecListener.v1"
|
||||||
|
|
|
@ -241,7 +241,7 @@ maxout_pieces = 3
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v1"
|
@architectures = "spacy.Tagger.v2"
|
||||||
nO = null
|
nO = null
|
||||||
|
|
||||||
[components.tagger.model.tok2vec]
|
[components.tagger.model.tok2vec]
|
||||||
|
|
|
@ -104,7 +104,7 @@ consisting of a CNN and a layer-normalized maxout activation function.
|
||||||
> factory = "tagger"
|
> factory = "tagger"
|
||||||
>
|
>
|
||||||
> [components.tagger.model]
|
> [components.tagger.model]
|
||||||
> @architectures = "spacy.Tagger.v1"
|
> @architectures = "spacy.Tagger.v2"
|
||||||
>
|
>
|
||||||
> [components.tagger.model.tok2vec]
|
> [components.tagger.model.tok2vec]
|
||||||
> @architectures = "spacy.Tok2VecListener.v1"
|
> @architectures = "spacy.Tok2VecListener.v1"
|
||||||
|
@ -158,8 +158,8 @@ be configured with the `attrs` argument. The suggested attributes are `NORM`,
|
||||||
`PREFIX`, `SUFFIX` and `SHAPE`. This lets the model take into account some
|
`PREFIX`, `SUFFIX` and `SHAPE`. This lets the model take into account some
|
||||||
subword information, without construction a fully character-based
|
subword information, without construction a fully character-based
|
||||||
representation. If pretrained vectors are available, they can be included in the
|
representation. If pretrained vectors are available, they can be included in the
|
||||||
representation as well, with the vectors table kept static (i.e. it's
|
representation as well, with the vectors table kept static (i.e. it's not
|
||||||
not updated).
|
updated).
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
|
@ -613,14 +613,15 @@ same signature, but the `use_upper` argument was `True` by default.
|
||||||
|
|
||||||
## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"}
|
## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"}
|
||||||
|
|
||||||
### spacy.Tagger.v1 {#Tagger}
|
### spacy.Tagger.v2 {#Tagger}
|
||||||
|
|
||||||
> #### Example Config
|
> #### Example Config
|
||||||
>
|
>
|
||||||
> ```ini
|
> ```ini
|
||||||
> [model]
|
> [model]
|
||||||
> @architectures = "spacy.Tagger.v1"
|
> @architectures = "spacy.Tagger.v2"
|
||||||
> nO = null
|
> nO = null
|
||||||
|
> normalize = false
|
||||||
>
|
>
|
||||||
> [model.tok2vec]
|
> [model.tok2vec]
|
||||||
> # ...
|
> # ...
|
||||||
|
@ -634,8 +635,18 @@ the token vectors.
|
||||||
| ----------- | ------------------------------------------------------------------------------------------ |
|
| ----------- | ------------------------------------------------------------------------------------------ |
|
||||||
| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
|
| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
| `nO` | The number of tags to output. Inferred from the data if `None`. ~~Optional[int]~~ |
|
| `nO` | The number of tags to output. Inferred from the data if `None`. ~~Optional[int]~~ |
|
||||||
|
| `normalize` | Normalize probabilities during inference. Defaults to `False`. ~~bool~~ |
|
||||||
| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ |
|
| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
|
|
||||||
|
<Accordion title="Previous versions of spacy.Tagger" spaced>
|
||||||
|
|
||||||
|
- The `normalize` argument was added in `spacy.Tagger.v2`. `spacy.Tagger.v1`
|
||||||
|
always normalizes probabilities during inference.
|
||||||
|
|
||||||
|
The other arguments are shared between all versions.
|
||||||
|
|
||||||
|
</Accordion>
|
||||||
|
|
||||||
## Text classification architectures {#textcat source="spacy/ml/models/textcat.py"}
|
## Text classification architectures {#textcat source="spacy/ml/models/textcat.py"}
|
||||||
|
|
||||||
A text classification architecture needs to take a [`Doc`](/api/doc) as input,
|
A text classification architecture needs to take a [`Doc`](/api/doc) as input,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user