Fix types of Tok2Vec encoding architectures (#6442)

* fix TorchBiLSTMEncoder documentation

* ensure the types of the encoding Tok2vec layers are correct

* update references from v1 to v2 for the new architectures
This commit is contained in:
Sofie Van Landeghem 2021-01-07 06:39:27 +01:00 committed by GitHub
parent 8c1a23209f
commit 75d9019343
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 116 additions and 37 deletions

View File

@ -205,7 +205,7 @@ no_output_layer = false
factory = "tok2vec" factory = "tok2vec"
[components.tok2vec.model] [components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v1" @architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed] [components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v1" @architectures = "spacy.MultiHashEmbed.v1"
@ -220,7 +220,7 @@ rows = [5000, 2500]
include_static_vectors = {{ "true" if optimize == "accuracy" else "false" }} include_static_vectors = {{ "true" if optimize == "accuracy" else "false" }}
[components.tok2vec.model.encode] [components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1" @architectures = "spacy.MaxoutWindowEncoder.v2"
width = {{ 96 if optimize == "efficiency" else 256 }} width = {{ 96 if optimize == "efficiency" else 256 }}
depth = {{ 4 if optimize == "efficiency" else 8 }} depth = {{ 4 if optimize == "efficiency" else 8 }}
window_size = 1 window_size = 1

View File

@ -87,6 +87,7 @@ def build_hash_embed_cnn_tok2vec(
) )
# TODO: archive
@registry.architectures.register("spacy.Tok2Vec.v1") @registry.architectures.register("spacy.Tok2Vec.v1")
def build_Tok2Vec_model( def build_Tok2Vec_model(
embed: Model[List[Doc], List[Floats2d]], embed: Model[List[Doc], List[Floats2d]],
@ -108,6 +109,28 @@ def build_Tok2Vec_model(
return tok2vec return tok2vec
@registry.architectures.register("spacy.Tok2Vec.v2")
def build_Tok2Vec_model(
embed: Model[List[Doc], List[Floats2d]],
encode: Model[List[Floats2d], List[Floats2d]],
) -> Model[List[Doc], List[Floats2d]]:
"""Construct a tok2vec model out of embedding and encoding subnetworks.
See https://explosion.ai/blog/deep-learning-formula-nlp
embed (Model[List[Doc], List[Floats2d]]): Embed tokens into context-independent
word vector representations.
encode (Model[List[Floats2d], List[Floats2d]]): Encode context into the
embeddings, using an architecture such as a CNN, BiLSTM or transformer.
"""
tok2vec = chain(embed, encode)
tok2vec.set_dim("nO", encode.get_dim("nO"))
tok2vec.set_ref("embed", embed)
tok2vec.set_ref("encode", encode)
return tok2vec
@registry.architectures.register("spacy.MultiHashEmbed.v1") @registry.architectures.register("spacy.MultiHashEmbed.v1")
def MultiHashEmbed( def MultiHashEmbed(
width: int, width: int,
@ -255,6 +278,7 @@ def CharacterEmbed(
return model return model
# TODO: archive
@registry.architectures.register("spacy.MaxoutWindowEncoder.v1") @registry.architectures.register("spacy.MaxoutWindowEncoder.v1")
def MaxoutWindowEncoder( def MaxoutWindowEncoder(
width: int, window_size: int, maxout_pieces: int, depth: int width: int, window_size: int, maxout_pieces: int, depth: int
@ -286,7 +310,39 @@ def MaxoutWindowEncoder(
model.attrs["receptive_field"] = window_size * depth model.attrs["receptive_field"] = window_size * depth
return model return model
@registry.architectures.register("spacy.MaxoutWindowEncoder.v2")
def MaxoutWindowEncoder(
width: int, window_size: int, maxout_pieces: int, depth: int
) -> Model[List[Floats2d], List[Floats2d]]:
"""Encode context using convolutions with maxout activation, layer
normalization and residual connections.
width (int): The input and output width. These are required to be the same,
to allow residual connections. This value will be determined by the
width of the inputs. Recommended values are between 64 and 300.
window_size (int): The number of words to concatenate around each token
to construct the convolution. Recommended value is 1.
maxout_pieces (int): The number of maxout pieces to use. Recommended
values are 2 or 3.
depth (int): The number of convolutional layers. Recommended value is 4.
"""
cnn = chain(
expand_window(window_size=window_size),
Maxout(
nO=width,
nI=width * ((window_size * 2) + 1),
nP=maxout_pieces,
dropout=0.0,
normalize=True,
),
)
model = clone(residual(cnn), depth)
model.set_dim("nO", width)
receptive_field = window_size * depth
return with_array(model, pad=receptive_field)
# TODO: archive
@registry.architectures.register("spacy.MishWindowEncoder.v1") @registry.architectures.register("spacy.MishWindowEncoder.v1")
def MishWindowEncoder( def MishWindowEncoder(
width: int, window_size: int, depth: int width: int, window_size: int, depth: int
@ -310,6 +366,29 @@ def MishWindowEncoder(
return model return model
@registry.architectures.register("spacy.MishWindowEncoder.v2")
def MishWindowEncoder(
width: int, window_size: int, depth: int
) -> Model[List[Floats2d], List[Floats2d]]:
"""Encode context using convolutions with mish activation, layer
normalization and residual connections.
width (int): The input and output width. These are required to be the same,
to allow residual connections. This value will be determined by the
width of the inputs. Recommended values are between 64 and 300.
window_size (int): The number of words to concatenate around each token
to construct the convolution. Recommended value is 1.
depth (int): The number of convolutional layers. Recommended value is 4.
"""
cnn = chain(
expand_window(window_size=window_size),
Mish(nO=width, nI=width * ((window_size * 2) + 1), dropout=0.0, normalize=True),
)
model = clone(residual(cnn), depth)
model.set_dim("nO", width)
return with_array(model)
@registry.architectures.register("spacy.TorchBiLSTMEncoder.v1") @registry.architectures.register("spacy.TorchBiLSTMEncoder.v1")
def BiLSTMEncoder( def BiLSTMEncoder(
width: int, depth: int, dropout: float width: int, depth: int, dropout: float
@ -319,9 +398,9 @@ def BiLSTMEncoder(
width (int): The input and output width. These are required to be the same, width (int): The input and output width. These are required to be the same,
to allow residual connections. This value will be determined by the to allow residual connections. This value will be determined by the
width of the inputs. Recommended values are between 64 and 300. width of the inputs. Recommended values are between 64 and 300.
window_size (int): The number of words to concatenate around each token depth (int): The number of recurrent layers.
to construct the convolution. Recommended value is 1. dropout (float): Creates a Dropout layer on the outputs of each LSTM layer
depth (int): The number of convolutional layers. Recommended value is 4. except the last layer. Set to 0 to disable this functionality.
""" """
if depth == 0: if depth == 0:
return noop() return noop()

View File

@ -24,7 +24,7 @@ default_model_config = """
@architectures = "spacy.Tagger.v1" @architectures = "spacy.Tagger.v1"
[model.tok2vec] [model.tok2vec]
@architectures = "spacy.Tok2Vec.v1" @architectures = "spacy.Tok2Vec.v2"
[model.tok2vec.embed] [model.tok2vec.embed]
@architectures = "spacy.CharacterEmbed.v1" @architectures = "spacy.CharacterEmbed.v1"
@ -35,7 +35,7 @@ nC = 8
include_static_vectors = false include_static_vectors = false
[model.tok2vec.encode] [model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1" @architectures = "spacy.MaxoutWindowEncoder.v2"
width = 128 width = 128
depth = 4 depth = 4
window_size = 1 window_size = 1

View File

@ -19,7 +19,7 @@ single_label_default_config = """
@architectures = "spacy.TextCatEnsemble.v2" @architectures = "spacy.TextCatEnsemble.v2"
[model.tok2vec] [model.tok2vec]
@architectures = "spacy.Tok2Vec.v1" @architectures = "spacy.Tok2Vec.v2"
[model.tok2vec.embed] [model.tok2vec.embed]
@architectures = "spacy.MultiHashEmbed.v1" @architectures = "spacy.MultiHashEmbed.v1"
@ -29,7 +29,7 @@ attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
include_static_vectors = false include_static_vectors = false
[model.tok2vec.encode] [model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1" @architectures = "spacy.MaxoutWindowEncoder.v2"
width = ${model.tok2vec.embed.width} width = ${model.tok2vec.embed.width}
window_size = 1 window_size = 1
maxout_pieces = 3 maxout_pieces = 3

View File

@ -113,7 +113,7 @@ cfg_string = """
factory = "tok2vec" factory = "tok2vec"
[components.tok2vec.model] [components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v1" @architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed] [components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v1" @architectures = "spacy.MultiHashEmbed.v1"
@ -123,7 +123,7 @@ cfg_string = """
include_static_vectors = false include_static_vectors = false
[components.tok2vec.model.encode] [components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1" @architectures = "spacy.MaxoutWindowEncoder.v2"
width = 96 width = 96
depth = 4 depth = 4
window_size = 1 window_size = 1

View File

@ -26,20 +26,20 @@ usage documentation on
## Tok2Vec architectures {#tok2vec-arch source="spacy/ml/models/tok2vec.py"} ## Tok2Vec architectures {#tok2vec-arch source="spacy/ml/models/tok2vec.py"}
### spacy.Tok2Vec.v1 {#Tok2Vec} ### spacy.Tok2Vec.v2 {#Tok2Vec}
> #### Example config > #### Example config
> >
> ```ini > ```ini
> [model] > [model]
> @architectures = "spacy.Tok2Vec.v1" > @architectures = "spacy.Tok2Vec.v2"
> >
> [model.embed] > [model.embed]
> @architectures = "spacy.CharacterEmbed.v1" > @architectures = "spacy.CharacterEmbed.v1"
> # ... > # ...
> >
> [model.encode] > [model.encode]
> @architectures = "spacy.MaxoutWindowEncoder.v1" > @architectures = "spacy.MaxoutWindowEncoder.v2"
> # ... > # ...
> ``` > ```
@ -197,13 +197,13 @@ network to construct a single vector to represent the information.
| `nC` | The number of UTF-8 bytes to embed per word. Recommended values are between `3` and `8`, although it may depend on the length of words in the language. ~~int~~ | | `nC` | The number of UTF-8 bytes to embed per word. Recommended values are between `3` and `8`, although it may depend on the length of words in the language. ~~int~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ | | **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ |
### spacy.MaxoutWindowEncoder.v1 {#MaxoutWindowEncoder} ### spacy.MaxoutWindowEncoder.v2 {#MaxoutWindowEncoder}
> #### Example config > #### Example config
> >
> ```ini > ```ini
> [model] > [model]
> @architectures = "spacy.MaxoutWindowEncoder.v1" > @architectures = "spacy.MaxoutWindowEncoder.v2"
> width = 128 > width = 128
> window_size = 1 > window_size = 1
> maxout_pieces = 3 > maxout_pieces = 3
@ -221,13 +221,13 @@ and residual connections.
| `depth` | The number of convolutional layers. Recommended value is `4`. ~~int~~ | | `depth` | The number of convolutional layers. Recommended value is `4`. ~~int~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Floats2d], List[Floats2d]]~~ | | **CREATES** | The model using the architecture. ~~Model[List[Floats2d], List[Floats2d]]~~ |
### spacy.MishWindowEncoder.v1 {#MishWindowEncoder} ### spacy.MishWindowEncoder.v2 {#MishWindowEncoder}
> #### Example config > #### Example config
> >
> ```ini > ```ini
> [model] > [model]
> @architectures = "spacy.MishWindowEncoder.v1" > @architectures = "spacy.MishWindowEncoder.v2"
> width = 64 > width = 64
> window_size = 1 > window_size = 1
> depth = 4 > depth = 4
@ -252,19 +252,19 @@ and residual connections.
> [model] > [model]
> @architectures = "spacy.TorchBiLSTMEncoder.v1" > @architectures = "spacy.TorchBiLSTMEncoder.v1"
> width = 64 > width = 64
> window_size = 1 > depth = 2
> depth = 4 > dropout = 0.0
> ``` > ```
Encode context using bidirectional LSTM layers. Requires Encode context using bidirectional LSTM layers. Requires
[PyTorch](https://pytorch.org). [PyTorch](https://pytorch.org).
| Name | Description | | Name | Description |
| ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `width` | The input and output width. These are required to be the same, to allow residual connections. This value will be determined by the width of the inputs. Recommended values are between `64` and `300`. ~~int~~ | | `width` | The input and output width. These are required to be the same, to allow residual connections. This value will be determined by the width of the inputs. Recommended values are between `64` and `300`. ~~int~~ |
| `window_size` | The number of words to concatenate around each token to construct the convolution. Recommended value is `1`. ~~int~~ | | `depth` | The number of recurrent layers, for instance `depth=2` results in stacking two LSTMs together. ~~int~~ |
| `depth` | The number of convolutional layers. Recommended value is `4`. ~~int~~ | | `dropout` | Creates a Dropout layer on the outputs of each LSTM layer except the last layer. Set to 0.0 to disable this functionality. ~~float~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Floats2d], List[Floats2d]]~~ | | **CREATES** | The model using the architecture. ~~Model[List[Floats2d], List[Floats2d]]~~ |
### spacy.StaticVectors.v1 {#StaticVectors} ### spacy.StaticVectors.v1 {#StaticVectors}
@ -600,7 +600,7 @@ specific data and challenge.
> no_output_layer = false > no_output_layer = false
> >
> [model.tok2vec] > [model.tok2vec]
> @architectures = "spacy.Tok2Vec.v1" > @architectures = "spacy.Tok2Vec.v2"
> >
> [model.tok2vec.embed] > [model.tok2vec.embed]
> @architectures = "spacy.MultiHashEmbed.v1" > @architectures = "spacy.MultiHashEmbed.v1"
@ -610,7 +610,7 @@ specific data and challenge.
> include_static_vectors = false > include_static_vectors = false
> >
> [model.tok2vec.encode] > [model.tok2vec.encode]
> @architectures = "spacy.MaxoutWindowEncoder.v1" > @architectures = "spacy.MaxoutWindowEncoder.v2"
> width = ${model.tok2vec.embed.width} > width = ${model.tok2vec.embed.width}
> window_size = 1 > window_size = 1
> maxout_pieces = 3 > maxout_pieces = 3

View File

@ -129,13 +129,13 @@ the entity recognizer, use a
factory = "tok2vec" factory = "tok2vec"
[components.tok2vec.model] [components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v1" @architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed] [components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v1" @architectures = "spacy.MultiHashEmbed.v1"
[components.tok2vec.model.encode] [components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1" @architectures = "spacy.MaxoutWindowEncoder.v2"
[components.ner] [components.ner]
factory = "ner" factory = "ner"
@ -161,13 +161,13 @@ factory = "ner"
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
[components.ner.model.tok2vec] [components.ner.model.tok2vec]
@architectures = "spacy.Tok2Vec.v1" @architectures = "spacy.Tok2Vec.v2"
[components.ner.model.tok2vec.embed] [components.ner.model.tok2vec.embed]
@architectures = "spacy.MultiHashEmbed.v1" @architectures = "spacy.MultiHashEmbed.v1"
[components.ner.model.tok2vec.encode] [components.ner.model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1" @architectures = "spacy.MaxoutWindowEncoder.v2"
``` ```
<!-- TODO: Once rehearsal is tested, mention it here. --> <!-- TODO: Once rehearsal is tested, mention it here. -->

View File

@ -134,7 +134,7 @@ labels = []
nO = null nO = null
[components.textcat.model.tok2vec] [components.textcat.model.tok2vec]
@architectures = "spacy.Tok2Vec.v1" @architectures = "spacy.Tok2Vec.v2"
[components.textcat.model.tok2vec.embed] [components.textcat.model.tok2vec.embed]
@architectures = "spacy.MultiHashEmbed.v1" @architectures = "spacy.MultiHashEmbed.v1"
@ -144,7 +144,7 @@ attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
include_static_vectors = false include_static_vectors = false
[components.textcat.model.tok2vec.encode] [components.textcat.model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1" @architectures = "spacy.MaxoutWindowEncoder.v2"
width = ${components.textcat.model.tok2vec.embed.width} width = ${components.textcat.model.tok2vec.embed.width}
window_size = 1 window_size = 1
maxout_pieces = 3 maxout_pieces = 3
@ -201,14 +201,14 @@ tokens, and their combination forms a typical
factory = "tok2vec" factory = "tok2vec"
[components.tok2vec.model] [components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v1" @architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed] [components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v1" @architectures = "spacy.MultiHashEmbed.v1"
# ... # ...
[components.tok2vec.model.encode] [components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1" @architectures = "spacy.MaxoutWindowEncoder.v2"
# ... # ...
``` ```
@ -224,7 +224,7 @@ architecture:
# ... # ...
[components.tok2vec.model.encode] [components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1" @architectures = "spacy.MaxoutWindowEncoder.v2"
# ... # ...
``` ```