diff --git a/requirements.txt b/requirements.txt
index 86ffa9945..91fac7894 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
# Our libraries
-spacy-legacy>=3.0.2,<3.1.0
+spacy-legacy>=3.0.3,<3.1.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc>=8.0.2,<8.1.0
diff --git a/setup.cfg b/setup.cfg
index 6c65277c6..c60d78fc4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -37,7 +37,7 @@ setup_requires =
thinc>=8.0.2,<8.1.0
install_requires =
# Our libraries
- spacy-legacy>=3.0.2,<3.1.0
+ spacy-legacy>=3.0.3,<3.1.0
murmurhash>=0.28.0,<1.1.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 38fc23272..e43c21bbd 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -206,7 +206,7 @@ factory = "tok2vec"
@architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
width = ${components.tok2vec.model.encode.width}
{% if has_letters -%}
attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py
index 5790af631..76ec87054 100644
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@@ -31,7 +31,7 @@ def get_tok2vec_width(model: Model):
return nO
-@registry.architectures("spacy.HashEmbedCNN.v1")
+@registry.architectures("spacy.HashEmbedCNN.v2")
def build_hash_embed_cnn_tok2vec(
*,
width: int,
@@ -108,7 +108,7 @@ def build_Tok2Vec_model(
return tok2vec
-@registry.architectures("spacy.MultiHashEmbed.v1")
+@registry.architectures("spacy.MultiHashEmbed.v2")
def MultiHashEmbed(
width: int,
attrs: List[Union[str, int]],
@@ -182,7 +182,7 @@ def MultiHashEmbed(
return model
-@registry.architectures("spacy.CharacterEmbed.v1")
+@registry.architectures("spacy.CharacterEmbed.v2")
def CharacterEmbed(
width: int,
rows: int,
diff --git a/spacy/ml/staticvectors.py b/spacy/ml/staticvectors.py
index ea4c7fb77..cfd25c24b 100644
--- a/spacy/ml/staticvectors.py
+++ b/spacy/ml/staticvectors.py
@@ -8,7 +8,7 @@ from ..tokens import Doc
from ..errors import Errors
-@registry.layers("spacy.StaticVectors.v1")
+@registry.layers("spacy.StaticVectors.v2")
def StaticVectors(
nO: Optional[int] = None,
nM: Optional[int] = None,
@@ -46,6 +46,8 @@ def forward(
vectors_data = model.ops.gemm(model.ops.as_contig(V[rows]), W, trans2=True)
except ValueError:
raise RuntimeError(Errors.E896)
+ # Convert negative indices to 0-vectors (TODO: more options for UNK tokens)
+ vectors_data[rows < 0] = 0
output = Ragged(
vectors_data, model.ops.asarray([len(doc) for doc in docs], dtype="i")
)
diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx
index 7290c4637..37f09ce3a 100644
--- a/spacy/pipeline/dep_parser.pyx
+++ b/spacy/pipeline/dep_parser.pyx
@@ -24,7 +24,7 @@ maxout_pieces = 2
use_upper = true
[model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index 6ab52fb35..66070916e 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -26,7 +26,7 @@ default_model_config = """
@architectures = "spacy.EntityLinker.v1"
[model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 2
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index cd0081346..3ba05e616 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -27,7 +27,7 @@ default_model_config = """
@architectures = "spacy.Tok2Vec.v2"
[model.tok2vec.embed]
-@architectures = "spacy.CharacterEmbed.v1"
+@architectures = "spacy.CharacterEmbed.v2"
width = 128
rows = 7000
nM = 64
diff --git a/spacy/pipeline/multitask.pyx b/spacy/pipeline/multitask.pyx
index 990b6a1de..8c44061e2 100644
--- a/spacy/pipeline/multitask.pyx
+++ b/spacy/pipeline/multitask.pyx
@@ -22,7 +22,7 @@ maxout_pieces = 3
token_vector_width = 96
[model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx
index 3a2151b01..0b9b0d324 100644
--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@@ -21,7 +21,7 @@ maxout_pieces = 2
use_upper = true
[model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx
index 83cd06739..f9472abf5 100644
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@@ -19,7 +19,7 @@ default_model_config = """
@architectures = "spacy.Tagger.v1"
[model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 12
depth = 1
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index 9af5245c1..938131f6f 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -26,7 +26,7 @@ default_model_config = """
@architectures = "spacy.Tagger.v1"
[model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 174ffd273..1d652a483 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -21,7 +21,7 @@ single_label_default_config = """
@architectures = "spacy.Tok2Vec.v2"
[model.tok2vec.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
width = 64
rows = [2000, 2000, 1000, 1000, 1000, 1000]
attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
@@ -56,7 +56,7 @@ single_label_cnn_config = """
exclusive_classes = true
[model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index 036bc8dc5..7267735b4 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -21,7 +21,7 @@ multi_label_default_config = """
@architectures = "spacy.Tok2Vec.v1"
[model.tok2vec.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
width = 64
rows = [2000, 2000, 1000, 1000, 1000, 1000]
attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
@@ -56,7 +56,7 @@ multi_label_cnn_config = """
exclusive_classes = false
[model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py
index 26a4c998c..3ee324d50 100644
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@@ -11,7 +11,7 @@ from ..errors import Errors
default_model_config = """
[model]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md
index 4c4bf73f4..e09352ec9 100644
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@@ -35,7 +35,7 @@ usage documentation on
> @architectures = "spacy.Tok2Vec.v2"
>
> [model.embed]
-> @architectures = "spacy.CharacterEmbed.v1"
+> @architectures = "spacy.CharacterEmbed.v2"
> # ...
>
> [model.encode]
@@ -54,13 +54,13 @@ blog post for background.
| `encode` | Encode context into the embeddings, using an architecture such as a CNN, BiLSTM or transformer. For example, [MaxoutWindowEncoder](/api/architectures#MaxoutWindowEncoder). ~~Model[List[Floats2d], List[Floats2d]]~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ |
-### spacy.HashEmbedCNN.v1 {#HashEmbedCNN}
+### spacy.HashEmbedCNN.v2 {#HashEmbedCNN}
> #### Example Config
>
> ```ini
> [model]
-> @architectures = "spacy.HashEmbedCNN.v1"
+> @architectures = "spacy.HashEmbedCNN.v2"
> pretrained_vectors = null
> width = 96
> depth = 4
@@ -96,7 +96,7 @@ consisting of a CNN and a layer-normalized maxout activation function.
> factory = "tok2vec"
>
> [components.tok2vec.model]
-> @architectures = "spacy.HashEmbedCNN.v1"
+> @architectures = "spacy.HashEmbedCNN.v2"
> width = 342
>
> [components.tagger]
@@ -129,13 +129,13 @@ argument that connects to the shared `tok2vec` component in the pipeline.
| `upstream` | A string to identify the "upstream" `Tok2Vec` component to communicate with. By default, the upstream name is the wildcard string `"*"`, but you could also specify the name of the `Tok2Vec` component. You'll almost never have multiple upstream `Tok2Vec` components, so the wildcard string will almost always be fine. ~~str~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ |
-### spacy.MultiHashEmbed.v1 {#MultiHashEmbed}
+### spacy.MultiHashEmbed.v2 {#MultiHashEmbed}
> #### Example config
>
> ```ini
> [model]
-> @architectures = "spacy.MultiHashEmbed.v1"
+> @architectures = "spacy.MultiHashEmbed.v2"
> width = 64
> attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
> rows = [2000, 1000, 1000, 1000]
@@ -160,13 +160,13 @@ not updated).
| `include_static_vectors` | Whether to also use static word vectors. Requires a vectors table to be loaded in the [`Doc`](/api/doc) objects' vocab. ~~bool~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ |
-### spacy.CharacterEmbed.v1 {#CharacterEmbed}
+### spacy.CharacterEmbed.v2 {#CharacterEmbed}
> #### Example config
>
> ```ini
> [model]
-> @architectures = "spacy.CharacterEmbed.v1"
+> @architectures = "spacy.CharacterEmbed.v2"
> width = 128
> rows = 7000
> nM = 64
@@ -266,13 +266,13 @@ Encode context using bidirectional LSTM layers. Requires
| `dropout` | Creates a Dropout layer on the outputs of each LSTM layer except the last layer. Set to 0.0 to disable this functionality. ~~float~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Floats2d], List[Floats2d]]~~ |
-### spacy.StaticVectors.v1 {#StaticVectors}
+### spacy.StaticVectors.v2 {#StaticVectors}
> #### Example config
>
> ```ini
> [model]
-> @architectures = "spacy.StaticVectors.v1"
+> @architectures = "spacy.StaticVectors.v2"
> nO = null
> nM = null
> dropout = 0.2
@@ -283,8 +283,9 @@ Encode context using bidirectional LSTM layers. Requires
> ```
Embed [`Doc`](/api/doc) objects with their vocab's vectors table, applying a
-learned linear projection to control the dimensionality. See the documentation
-on [static vectors](/usage/embeddings-transformers#static-vectors) for details.
+learned linear projection to control the dimensionality. Unknown tokens are
+mapped to a zero vector. See the documentation on [static
+vectors](/usage/embeddings-transformers#static-vectors) for details.
| Name | Â Description |
| ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
@@ -513,7 +514,7 @@ for a Tok2Vec layer.
> use_upper = true
>
> [model.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v1"
+> @architectures = "spacy.HashEmbedCNN.v2"
> pretrained_vectors = null
> width = 96
> depth = 4
@@ -619,7 +620,7 @@ single-label use-cases where `exclusive_classes = true`, while the
> @architectures = "spacy.Tok2Vec.v2"
>
> [model.tok2vec.embed]
-> @architectures = "spacy.MultiHashEmbed.v1"
+> @architectures = "spacy.MultiHashEmbed.v2"
> width = 64
> rows = [2000, 2000, 1000, 1000, 1000, 1000]
> attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
@@ -676,7 +677,7 @@ taking it as argument:
> nO = null
>
> [model.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v1"
+> @architectures = "spacy.HashEmbedCNN.v2"
> pretrained_vectors = null
> width = 96
> depth = 4
@@ -744,7 +745,7 @@ into the "real world". This requires 3 main components:
> nO = null
>
> [model.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v1"
+> @architectures = "spacy.HashEmbedCNN.v2"
> pretrained_vectors = null
> width = 96
> depth = 2
diff --git a/website/docs/api/data-formats.md b/website/docs/api/data-formats.md
index 1196d20af..69bdae446 100644
--- a/website/docs/api/data-formats.md
+++ b/website/docs/api/data-formats.md
@@ -29,8 +29,8 @@ recommended settings for your use case, check out the
>
> The `@` syntax lets you refer to function names registered in the
> [function registry](/api/top-level#registry). For example,
-> `@architectures = "spacy.HashEmbedCNN.v1"` refers to a registered function of
-> the name [spacy.HashEmbedCNN.v1](/api/architectures#HashEmbedCNN) and all
+> `@architectures = "spacy.HashEmbedCNN.v2"` refers to a registered function of
+> the name [spacy.HashEmbedCNN.v2](/api/architectures#HashEmbedCNN) and all
> other values defined in its block will be passed into that function as
> arguments. Those arguments depend on the registered function. See the usage
> guide on [registered functions](/usage/training#config-functions) for details.
diff --git a/website/docs/api/legacy.md b/website/docs/api/legacy.md
index 3e5c7f75f..96bc199bf 100644
--- a/website/docs/api/legacy.md
+++ b/website/docs/api/legacy.md
@@ -4,12 +4,13 @@ teaser: Archived implementations available through spacy-legacy
source: spacy/legacy
---
-The [`spacy-legacy`](https://github.com/explosion/spacy-legacy) package includes
-outdated registered functions and architectures. It is installed automatically as
-a dependency of spaCy, and provides backwards compatibility for archived functions
-that may still be used in projects.
+The [`spacy-legacy`](https://github.com/explosion/spacy-legacy) package includes
+outdated registered functions and architectures. It is installed automatically
+as a dependency of spaCy, and provides backwards compatibility for archived
+functions that may still be used in projects.
-You can find the detailed documentation of each such legacy function on this page.
+You can find the detailed documentation of each such legacy function on this
+page.
## Architectures {#architectures}
@@ -17,8 +18,8 @@ These functions are available from `@spacy.registry.architectures`.
### spacy.Tok2Vec.v1 {#Tok2Vec_v1}
-The `spacy.Tok2Vec.v1` architecture was expecting an `encode` model of type
-`Model[Floats2D, Floats2D]` such as `spacy.MaxoutWindowEncoder.v1` or
+The `spacy.Tok2Vec.v1` architecture was expecting an `encode` model of type
+`Model[Floats2D, Floats2D]` such as `spacy.MaxoutWindowEncoder.v1` or
`spacy.MishWindowEncoder.v1`.
> #### Example config
@@ -44,15 +45,14 @@ blog post for background.
| Name | Description |
| ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `embed` | Embed tokens into context-independent word vector representations. For example, [CharacterEmbed](/api/architectures#CharacterEmbed) or [MultiHashEmbed](/api/architectures#MultiHashEmbed). ~~Model[List[Doc], List[Floats2d]]~~ |
-| `encode` | Encode context into the embeddings, using an architecture such as a CNN, BiLSTM or transformer. For example, [MaxoutWindowEncoder.v1](/api/legacy#MaxoutWindowEncoder_v1). ~~Model[Floats2d, Floats2d]~~ |
+| `encode` | Encode context into the embeddings, using an architecture such as a CNN, BiLSTM or transformer. For example, [MaxoutWindowEncoder.v1](/api/legacy#MaxoutWindowEncoder_v1). ~~Model[Floats2d, Floats2d]~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ |
### spacy.MaxoutWindowEncoder.v1 {#MaxoutWindowEncoder_v1}
-The `spacy.MaxoutWindowEncoder.v1` architecture was producing a model of type
-`Model[Floats2D, Floats2D]`. Since `spacy.MaxoutWindowEncoder.v2`, this has been changed to output
-type `Model[List[Floats2d], List[Floats2d]]`.
-
+The `spacy.MaxoutWindowEncoder.v1` architecture was producing a model of type
+`Model[Floats2D, Floats2D]`. Since `spacy.MaxoutWindowEncoder.v2`, this has been
+changed to output type `Model[List[Floats2d], List[Floats2d]]`.
> #### Example config
>
@@ -78,9 +78,9 @@ and residual connections.
### spacy.MishWindowEncoder.v1 {#MishWindowEncoder_v1}
-The `spacy.MishWindowEncoder.v1` architecture was producing a model of type
-`Model[Floats2D, Floats2D]`. Since `spacy.MishWindowEncoder.v2`, this has been changed to output
-type `Model[List[Floats2d], List[Floats2d]]`.
+The `spacy.MishWindowEncoder.v1` architecture was producing a model of type
+`Model[Floats2D, Floats2D]`. Since `spacy.MishWindowEncoder.v2`, this has been
+changed to output type `Model[List[Floats2d], List[Floats2d]]`.
> #### Example config
>
@@ -103,12 +103,11 @@ and residual connections.
| `depth` | The number of convolutional layers. Recommended value is `4`. ~~int~~ |
| **CREATES** | The model using the architecture. ~~Model[Floats2d, Floats2d]~~ |
-
### spacy.TextCatEnsemble.v1 {#TextCatEnsemble_v1}
-The `spacy.TextCatEnsemble.v1` architecture built an internal `tok2vec` and `linear_model`.
-Since `spacy.TextCatEnsemble.v2`, this has been refactored so that the `TextCatEnsemble` takes these
-two sublayers as input.
+The `spacy.TextCatEnsemble.v1` architecture built an internal `tok2vec` and
+`linear_model`. Since `spacy.TextCatEnsemble.v2`, this has been refactored so
+that the `TextCatEnsemble` takes these two sublayers as input.
> #### Example Config
>
@@ -142,6 +141,40 @@ network has an internal CNN Tok2Vec layer and uses attention.
| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ |
+### spacy.HashEmbedCNN.v1 {#HashEmbedCNN_v1}
+
+Identical to [`spacy.HashEmbedCNN.v2`](/api/architectures#HashEmbedCNN) except
+using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are included.
+
+### spacy.MultiHashEmbed.v1 {#MultiHashEmbed_v1}
+
+Identical to [`spacy.MultiHashEmbed.v2`](/api/architectures#MultiHashEmbed)
+except with [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
+included.
+
+### spacy.CharacterEmbed.v1 {#CharacterEmbed_v1}
+
+Identical to [`spacy.CharacterEmbed.v2`](/api/architectures#CharacterEmbed)
+except using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
+included.
+
+## Layers {#layers}
+
+These functions are available from `@spacy.registry.layers`.
+
+### spacy.StaticVectors.v1 {#StaticVectors_v1}
+
+Identical to [`spacy.StaticVectors.v2`](/api/architectures#StaticVectors) except
+for the handling of tokens without vectors.
+
+
+
+`spacy.StaticVectors.v1` maps tokens without vectors to the final row in the
+vectors table, which causes the model predictions to change if new vectors are
+added to an existing vectors table. See more details in
+[issue #7662](https://github.com/explosion/spaCy/issues/7662#issuecomment-813925655).
+
+
## Loggers {#loggers}
@@ -149,7 +182,7 @@ These functions are available from `@spacy.registry.loggers`.
### spacy.WandbLogger.v1 {#WandbLogger_v1}
-The first version of the [`WandbLogger`](/api/top-level#WandbLogger) did not yet
+The first version of the [`WandbLogger`](/api/top-level#WandbLogger) did not yet
support the `log_dataset_dir` and `model_log_interval` arguments.
> #### Example config
@@ -160,7 +193,8 @@ support the `log_dataset_dir` and `model_log_interval` arguments.
> project_name = "monitor_spacy_training"
> remove_config_values = ["paths.train", "paths.dev", "corpora.train.path", "corpora.dev.path"]
> ```
-| Name | Description |
-| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
-| `project_name` | The name of the project in the Weights & Biases interface. The project will be created automatically if it doesn't exist yet. ~~str~~ |
-| `remove_config_values` | A list of values to include from the config before it is uploaded to W&B (default: empty). ~~List[str]~~ |
+>
+> | Name | Description |
+> | ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
+> | `project_name` | The name of the project in the Weights & Biases interface. The project will be created automatically if it doesn't exist yet. ~~str~~ |
+> | `remove_config_values` | A list of values to include from the config before it is uploaded to W&B (default: empty). ~~List[str]~~ |
diff --git a/website/docs/usage/embeddings-transformers.md b/website/docs/usage/embeddings-transformers.md
index e71336e84..4113e9394 100644
--- a/website/docs/usage/embeddings-transformers.md
+++ b/website/docs/usage/embeddings-transformers.md
@@ -132,7 +132,7 @@ factory = "tok2vec"
@architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
[components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v2"
@@ -164,7 +164,7 @@ factory = "ner"
@architectures = "spacy.Tok2Vec.v2"
[components.ner.model.tok2vec.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
[components.ner.model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v2"
@@ -541,7 +541,7 @@ word vector tables using the `include_static_vectors` flag.
```ini
[tagger.model.tok2vec.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
width = 128
attrs = ["LOWER","PREFIX","SUFFIX","SHAPE"]
rows = [5000,2500,2500,2500]
@@ -550,7 +550,7 @@ include_static_vectors = true
-The configuration system will look up the string `"spacy.MultiHashEmbed.v1"` in
+The configuration system will look up the string `"spacy.MultiHashEmbed.v2"` in
the `architectures` [registry](/api/top-level#registry), and call the returned
object with the rest of the arguments from the block. This will result in a call
to the
diff --git a/website/docs/usage/layers-architectures.md b/website/docs/usage/layers-architectures.md
index 0bc935d51..8fe2cf489 100644
--- a/website/docs/usage/layers-architectures.md
+++ b/website/docs/usage/layers-architectures.md
@@ -137,7 +137,7 @@ nO = null
@architectures = "spacy.Tok2Vec.v2"
[components.textcat.model.tok2vec.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
width = 64
rows = [2000, 2000, 1000, 1000, 1000, 1000]
attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
@@ -204,7 +204,7 @@ factory = "tok2vec"
@architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
# ...
[components.tok2vec.model.encode]
@@ -220,7 +220,7 @@ architecture:
```ini
### config.cfg (excerpt)
[components.tok2vec.model.embed]
-@architectures = "spacy.CharacterEmbed.v1"
+@architectures = "spacy.CharacterEmbed.v2"
# ...
[components.tok2vec.model.encode]
@@ -638,7 +638,7 @@ that has the full implementation.
> @architectures = "rel_instance_tensor.v1"
>
> [model.create_instance_tensor.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v1"
+> @architectures = "spacy.HashEmbedCNN.v2"
> # ...
>
> [model.create_instance_tensor.pooling]