Fix types of Tok2Vec encoding architectures (#6442)

* fix TorchBiLSTMEncoder documentation

* ensure the types of the encoding Tok2vec layers are correct

* update references from v1 to v2 for the new architectures
This commit is contained in:
Sofie Van Landeghem 2021-01-07 06:39:27 +01:00 committed by GitHub
parent 8c1a23209f
commit 75d9019343
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 116 additions and 37 deletions

View File

@ -205,7 +205,7 @@ no_output_layer = false
factory = "tok2vec"
[components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v1"
@architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v1"
@ -220,7 +220,7 @@ rows = [5000, 2500]
include_static_vectors = {{ "true" if optimize == "accuracy" else "false" }}
[components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1"
@architectures = "spacy.MaxoutWindowEncoder.v2"
width = {{ 96 if optimize == "efficiency" else 256 }}
depth = {{ 4 if optimize == "efficiency" else 8 }}
window_size = 1

View File

@ -87,6 +87,7 @@ def build_hash_embed_cnn_tok2vec(
)
# TODO: archive
@registry.architectures.register("spacy.Tok2Vec.v1")
def build_Tok2Vec_model(
embed: Model[List[Doc], List[Floats2d]],
@ -108,6 +109,28 @@ def build_Tok2Vec_model(
return tok2vec
@registry.architectures.register("spacy.Tok2Vec.v2")
def build_Tok2Vec_model(
embed: Model[List[Doc], List[Floats2d]],
encode: Model[List[Floats2d], List[Floats2d]],
) -> Model[List[Doc], List[Floats2d]]:
"""Construct a tok2vec model out of embedding and encoding subnetworks.
See https://explosion.ai/blog/deep-learning-formula-nlp
embed (Model[List[Doc], List[Floats2d]]): Embed tokens into context-independent
word vector representations.
encode (Model[List[Floats2d], List[Floats2d]]): Encode context into the
embeddings, using an architecture such as a CNN, BiLSTM or transformer.
"""
tok2vec = chain(embed, encode)
tok2vec.set_dim("nO", encode.get_dim("nO"))
tok2vec.set_ref("embed", embed)
tok2vec.set_ref("encode", encode)
return tok2vec
@registry.architectures.register("spacy.MultiHashEmbed.v1")
def MultiHashEmbed(
width: int,
@ -255,6 +278,7 @@ def CharacterEmbed(
return model
# TODO: archive
@registry.architectures.register("spacy.MaxoutWindowEncoder.v1")
def MaxoutWindowEncoder(
width: int, window_size: int, maxout_pieces: int, depth: int
@ -286,7 +310,39 @@ def MaxoutWindowEncoder(
model.attrs["receptive_field"] = window_size * depth
return model
@registry.architectures.register("spacy.MaxoutWindowEncoder.v2")
def MaxoutWindowEncoder(
width: int, window_size: int, maxout_pieces: int, depth: int
) -> Model[List[Floats2d], List[Floats2d]]:
"""Encode context using convolutions with maxout activation, layer
normalization and residual connections.
width (int): The input and output width. These are required to be the same,
to allow residual connections. This value will be determined by the
width of the inputs. Recommended values are between 64 and 300.
window_size (int): The number of words to concatenate around each token
to construct the convolution. Recommended value is 1.
maxout_pieces (int): The number of maxout pieces to use. Recommended
values are 2 or 3.
depth (int): The number of convolutional layers. Recommended value is 4.
"""
cnn = chain(
expand_window(window_size=window_size),
Maxout(
nO=width,
nI=width * ((window_size * 2) + 1),
nP=maxout_pieces,
dropout=0.0,
normalize=True,
),
)
model = clone(residual(cnn), depth)
model.set_dim("nO", width)
receptive_field = window_size * depth
return with_array(model, pad=receptive_field)
# TODO: archive
@registry.architectures.register("spacy.MishWindowEncoder.v1")
def MishWindowEncoder(
width: int, window_size: int, depth: int
@ -310,6 +366,29 @@ def MishWindowEncoder(
return model
@registry.architectures.register("spacy.MishWindowEncoder.v2")
def MishWindowEncoder(
width: int, window_size: int, depth: int
) -> Model[List[Floats2d], List[Floats2d]]:
"""Encode context using convolutions with mish activation, layer
normalization and residual connections.
width (int): The input and output width. These are required to be the same,
to allow residual connections. This value will be determined by the
width of the inputs. Recommended values are between 64 and 300.
window_size (int): The number of words to concatenate around each token
to construct the convolution. Recommended value is 1.
depth (int): The number of convolutional layers. Recommended value is 4.
"""
cnn = chain(
expand_window(window_size=window_size),
Mish(nO=width, nI=width * ((window_size * 2) + 1), dropout=0.0, normalize=True),
)
model = clone(residual(cnn), depth)
model.set_dim("nO", width)
return with_array(model)
@registry.architectures.register("spacy.TorchBiLSTMEncoder.v1")
def BiLSTMEncoder(
width: int, depth: int, dropout: float
@ -319,9 +398,9 @@ def BiLSTMEncoder(
width (int): The input and output width. These are required to be the same,
to allow residual connections. This value will be determined by the
width of the inputs. Recommended values are between 64 and 300.
window_size (int): The number of words to concatenate around each token
to construct the convolution. Recommended value is 1.
depth (int): The number of convolutional layers. Recommended value is 4.
depth (int): The number of recurrent layers.
dropout (float): Creates a Dropout layer on the outputs of each LSTM layer
except the last layer. Set to 0 to disable this functionality.
"""
if depth == 0:
return noop()

View File

@ -24,7 +24,7 @@ default_model_config = """
@architectures = "spacy.Tagger.v1"
[model.tok2vec]
@architectures = "spacy.Tok2Vec.v1"
@architectures = "spacy.Tok2Vec.v2"
[model.tok2vec.embed]
@architectures = "spacy.CharacterEmbed.v1"
@ -35,7 +35,7 @@ nC = 8
include_static_vectors = false
[model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1"
@architectures = "spacy.MaxoutWindowEncoder.v2"
width = 128
depth = 4
window_size = 1

View File

@ -19,7 +19,7 @@ single_label_default_config = """
@architectures = "spacy.TextCatEnsemble.v2"
[model.tok2vec]
@architectures = "spacy.Tok2Vec.v1"
@architectures = "spacy.Tok2Vec.v2"
[model.tok2vec.embed]
@architectures = "spacy.MultiHashEmbed.v1"
@ -29,7 +29,7 @@ attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
include_static_vectors = false
[model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1"
@architectures = "spacy.MaxoutWindowEncoder.v2"
width = ${model.tok2vec.embed.width}
window_size = 1
maxout_pieces = 3

View File

@ -113,7 +113,7 @@ cfg_string = """
factory = "tok2vec"
[components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v1"
@architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v1"
@ -123,7 +123,7 @@ cfg_string = """
include_static_vectors = false
[components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1"
@architectures = "spacy.MaxoutWindowEncoder.v2"
width = 96
depth = 4
window_size = 1

View File

@ -26,20 +26,20 @@ usage documentation on
## Tok2Vec architectures {#tok2vec-arch source="spacy/ml/models/tok2vec.py"}
### spacy.Tok2Vec.v1 {#Tok2Vec}
### spacy.Tok2Vec.v2 {#Tok2Vec}
> #### Example config
>
> ```ini
> [model]
> @architectures = "spacy.Tok2Vec.v1"
> @architectures = "spacy.Tok2Vec.v2"
>
> [model.embed]
> @architectures = "spacy.CharacterEmbed.v1"
> # ...
>
> [model.encode]
> @architectures = "spacy.MaxoutWindowEncoder.v1"
> @architectures = "spacy.MaxoutWindowEncoder.v2"
> # ...
> ```
@ -197,13 +197,13 @@ network to construct a single vector to represent the information.
| `nC` | The number of UTF-8 bytes to embed per word. Recommended values are between `3` and `8`, although it may depend on the length of words in the language. ~~int~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ |
### spacy.MaxoutWindowEncoder.v1 {#MaxoutWindowEncoder}
### spacy.MaxoutWindowEncoder.v2 {#MaxoutWindowEncoder}
> #### Example config
>
> ```ini
> [model]
> @architectures = "spacy.MaxoutWindowEncoder.v1"
> @architectures = "spacy.MaxoutWindowEncoder.v2"
> width = 128
> window_size = 1
> maxout_pieces = 3
@ -221,13 +221,13 @@ and residual connections.
| `depth` | The number of convolutional layers. Recommended value is `4`. ~~int~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Floats2d], List[Floats2d]]~~ |
### spacy.MishWindowEncoder.v1 {#MishWindowEncoder}
### spacy.MishWindowEncoder.v2 {#MishWindowEncoder}
> #### Example config
>
> ```ini
> [model]
> @architectures = "spacy.MishWindowEncoder.v1"
> @architectures = "spacy.MishWindowEncoder.v2"
> width = 64
> window_size = 1
> depth = 4
@ -252,19 +252,19 @@ and residual connections.
> [model]
> @architectures = "spacy.TorchBiLSTMEncoder.v1"
> width = 64
> window_size = 1
> depth = 4
> depth = 2
> dropout = 0.0
> ```
Encode context using bidirectional LSTM layers. Requires
[PyTorch](https://pytorch.org).
| Name | Description |
| ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `width` | The input and output width. These are required to be the same, to allow residual connections. This value will be determined by the width of the inputs. Recommended values are between `64` and `300`. ~~int~~ |
| `window_size` | The number of words to concatenate around each token to construct the convolution. Recommended value is `1`. ~~int~~ |
| `depth` | The number of convolutional layers. Recommended value is `4`. ~~int~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Floats2d], List[Floats2d]]~~ |
| Name | Description |
| ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `width` | The input and output width. These are required to be the same, to allow residual connections. This value will be determined by the width of the inputs. Recommended values are between `64` and `300`. ~~int~~ |
| `depth` | The number of recurrent layers, for instance `depth=2` results in stacking two LSTMs together. ~~int~~ |
| `dropout` | Creates a Dropout layer on the outputs of each LSTM layer except the last layer. Set to 0.0 to disable this functionality. ~~float~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Floats2d], List[Floats2d]]~~ |
### spacy.StaticVectors.v1 {#StaticVectors}
@ -600,7 +600,7 @@ specific data and challenge.
> no_output_layer = false
>
> [model.tok2vec]
> @architectures = "spacy.Tok2Vec.v1"
> @architectures = "spacy.Tok2Vec.v2"
>
> [model.tok2vec.embed]
> @architectures = "spacy.MultiHashEmbed.v1"
@ -610,7 +610,7 @@ specific data and challenge.
> include_static_vectors = false
>
> [model.tok2vec.encode]
> @architectures = "spacy.MaxoutWindowEncoder.v1"
> @architectures = "spacy.MaxoutWindowEncoder.v2"
> width = ${model.tok2vec.embed.width}
> window_size = 1
> maxout_pieces = 3

View File

@ -129,13 +129,13 @@ the entity recognizer, use a
factory = "tok2vec"
[components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v1"
@architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v1"
[components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1"
@architectures = "spacy.MaxoutWindowEncoder.v2"
[components.ner]
factory = "ner"
@ -161,13 +161,13 @@ factory = "ner"
@architectures = "spacy.TransitionBasedParser.v1"
[components.ner.model.tok2vec]
@architectures = "spacy.Tok2Vec.v1"
@architectures = "spacy.Tok2Vec.v2"
[components.ner.model.tok2vec.embed]
@architectures = "spacy.MultiHashEmbed.v1"
[components.ner.model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1"
@architectures = "spacy.MaxoutWindowEncoder.v2"
```
<!-- TODO: Once rehearsal is tested, mention it here. -->

View File

@ -134,7 +134,7 @@ labels = []
nO = null
[components.textcat.model.tok2vec]
@architectures = "spacy.Tok2Vec.v1"
@architectures = "spacy.Tok2Vec.v2"
[components.textcat.model.tok2vec.embed]
@architectures = "spacy.MultiHashEmbed.v1"
@ -144,7 +144,7 @@ attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
include_static_vectors = false
[components.textcat.model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1"
@architectures = "spacy.MaxoutWindowEncoder.v2"
width = ${components.textcat.model.tok2vec.embed.width}
window_size = 1
maxout_pieces = 3
@ -201,14 +201,14 @@ tokens, and their combination forms a typical
factory = "tok2vec"
[components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v1"
@architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v1"
# ...
[components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1"
@architectures = "spacy.MaxoutWindowEncoder.v2"
# ...
```
@ -224,7 +224,7 @@ architecture:
# ...
[components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1"
@architectures = "spacy.MaxoutWindowEncoder.v2"
# ...
```