Replace negative rows with 0 in StaticVectors (#7674)

* Replace negative rows with 0 in StaticVectors Replace negative row indices with 0-vectors in `StaticVectors`. * Increase versions related to StaticVectors * Increase versions of all architctures and layers related to `StaticVectors` * Improve efficiency of 0-vector operations Parallel `spacy-legacy` PR: https://github.com/explosion/spacy-legacy/pull/5 * Update config defaults to new versions * Update docs
2025-10-26 05:31:15 +03:00 · 2021-04-22 10:04:15 +02:00 · 2021-04-22 10:04:15 +02:00 · d2bdaa7823
commit d2bdaa7823
parent 6f565cf39d
20 changed files with 106 additions and 69 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 # Our libraries
-spacy-legacy>=3.0.2,<3.1.0
+spacy-legacy>=3.0.3,<3.1.0
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
 thinc>=8.0.2,<8.1.0
--- a/setup.cfg
+++ b/setup.cfg
@ -37,7 +37,7 @@ setup_requires =
    thinc>=8.0.2,<8.1.0
 install_requires =
    # Our libraries
-    spacy-legacy>=3.0.2,<3.1.0
+    spacy-legacy>=3.0.3,<3.1.0
    murmurhash>=0.28.0,<1.1.0
    cymem>=2.0.2,<2.1.0
    preshed>=3.0.2,<3.1.0
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@ -206,7 +206,7 @@ factory = "tok2vec"
@architectures = "spacy.Tok2Vec.v2"
 [components.tok2vec.model.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
 width = ${components.tok2vec.model.encode.width}
 {% if has_letters -%}
 attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@ -31,7 +31,7 @@ def get_tok2vec_width(model: Model):
    return nO
-@registry.architectures("spacy.HashEmbedCNN.v1")
+@registry.architectures("spacy.HashEmbedCNN.v2")
 def build_hash_embed_cnn_tok2vec(
    *,
    width: int,
@ -108,7 +108,7 @@ def build_Tok2Vec_model(
    return tok2vec
-@registry.architectures("spacy.MultiHashEmbed.v1")
+@registry.architectures("spacy.MultiHashEmbed.v2")
 def MultiHashEmbed(
    width: int,
    attrs: List[Union[str, int]],
@ -182,7 +182,7 @@ def MultiHashEmbed(
    return model
-@registry.architectures("spacy.CharacterEmbed.v1")
+@registry.architectures("spacy.CharacterEmbed.v2")
 def CharacterEmbed(
    width: int,
    rows: int,
--- a/spacy/ml/staticvectors.py
+++ b/spacy/ml/staticvectors.py
@ -8,7 +8,7 @@ from ..tokens import Doc
 from ..errors import Errors
-@registry.layers("spacy.StaticVectors.v1")
+@registry.layers("spacy.StaticVectors.v2")
 def StaticVectors(
    nO: Optional[int] = None,
    nM: Optional[int] = None,
@ -46,6 +46,8 @@ def forward(
        vectors_data = model.ops.gemm(model.ops.as_contig(V[rows]), W, trans2=True)
    except ValueError:
        raise RuntimeError(Errors.E896)
    # Convert negative indices to 0-vectors (TODO: more options for UNK tokens)
    vectors_data[rows < 0] = 0
    output = Ragged(
        vectors_data, model.ops.asarray([len(doc) for doc in docs], dtype="i")
    )
--- a/spacy/pipeline/dep_parser.pyx
+++ b/spacy/pipeline/dep_parser.pyx
@ -24,7 +24,7 @@ maxout_pieces = 2
 use_upper = true
 [model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
 pretrained_vectors = null
 width = 96
 depth = 4
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@ -26,7 +26,7 @@ default_model_config = """
@architectures = "spacy.EntityLinker.v1"
 [model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
 pretrained_vectors = null
 width = 96
 depth = 2
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -27,7 +27,7 @@ default_model_config = """
@architectures = "spacy.Tok2Vec.v2"
 [model.tok2vec.embed]
-@architectures = "spacy.CharacterEmbed.v1"
+@architectures = "spacy.CharacterEmbed.v2"
 width = 128
 rows = 7000
 nM = 64
--- a/spacy/pipeline/multitask.pyx
+++ b/spacy/pipeline/multitask.pyx
@ -22,7 +22,7 @@ maxout_pieces = 3
 token_vector_width = 96
 [model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
 pretrained_vectors = null
 width = 96
 depth = 4
--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@ -21,7 +21,7 @@ maxout_pieces = 2
 use_upper = true
 [model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
 pretrained_vectors = null
 width = 96
 depth = 4
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@ -19,7 +19,7 @@ default_model_config = """
@architectures = "spacy.Tagger.v1"
 [model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
 pretrained_vectors = null
 width = 12
 depth = 1
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@ -26,7 +26,7 @@ default_model_config = """
@architectures = "spacy.Tagger.v1"
 [model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
 pretrained_vectors = null
 width = 96
 depth = 4
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@ -21,7 +21,7 @@ single_label_default_config = """
@architectures = "spacy.Tok2Vec.v2"
 [model.tok2vec.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
 width = 64
 rows = [2000, 2000, 1000, 1000, 1000, 1000]
 attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
@ -56,7 +56,7 @@ single_label_cnn_config = """
 exclusive_classes = true
 [model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
 pretrained_vectors = null
 width = 96
 depth = 4
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@ -21,7 +21,7 @@ multi_label_default_config = """
@architectures = "spacy.Tok2Vec.v1"
 [model.tok2vec.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
 width = 64
 rows = [2000, 2000, 1000, 1000, 1000, 1000]
 attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
@ -56,7 +56,7 @@ multi_label_cnn_config = """
 exclusive_classes = false
 [model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
 pretrained_vectors = null
 width = 96
 depth = 4
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@ -11,7 +11,7 @@ from ..errors import Errors
 default_model_config = """
 [model]
-@architectures = "spacy.HashEmbedCNN.v1"
+@architectures = "spacy.HashEmbedCNN.v2"
 pretrained_vectors = null
 width = 96
 depth = 4
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@ -35,7 +35,7 @@ usage documentation on
 > @architectures = "spacy.Tok2Vec.v2"
 >
 > [model.embed]
-> @architectures = "spacy.CharacterEmbed.v1"
+> @architectures = "spacy.CharacterEmbed.v2"
 > # ...
 >
 > [model.encode]
@ -54,13 +54,13 @@ blog post for background.
 | `encode`    | Encode context into the embeddings, using an architecture such as a CNN, BiLSTM or transformer. For example, [MaxoutWindowEncoder](/api/architectures#MaxoutWindowEncoder). ~~Model[List[Floats2d], List[Floats2d]]~~            |
 | **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                           |
-### spacy.HashEmbedCNN.v1 {#HashEmbedCNN}
+### spacy.HashEmbedCNN.v2 {#HashEmbedCNN}
 > #### Example Config
 >
 > ```ini
 > [model]
-> @architectures = "spacy.HashEmbedCNN.v1"
+> @architectures = "spacy.HashEmbedCNN.v2"
 > pretrained_vectors = null
 > width = 96
 > depth = 4
@ -96,7 +96,7 @@ consisting of a CNN and a layer-normalized maxout activation function.
 > factory = "tok2vec"
 >
 > [components.tok2vec.model]
-> @architectures = "spacy.HashEmbedCNN.v1"
+> @architectures = "spacy.HashEmbedCNN.v2"
 > width = 342
 >
 > [components.tagger]
@ -129,13 +129,13 @@ argument that connects to the shared `tok2vec` component in the pipeline.
 | `upstream`  | A string to identify the "upstream" `Tok2Vec` component to communicate with. By default, the upstream name is the wildcard string `"*"`, but you could also specify the name of the `Tok2Vec` component. You'll almost never have multiple upstream `Tok2Vec` components, so the wildcard string will almost always be fine. ~~str~~ |
 | **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                                                                                                               |
-### spacy.MultiHashEmbed.v1 {#MultiHashEmbed}
+### spacy.MultiHashEmbed.v2 {#MultiHashEmbed}
 > #### Example config
 >
 > ```ini
 > [model]
-> @architectures = "spacy.MultiHashEmbed.v1"
+> @architectures = "spacy.MultiHashEmbed.v2"
 > width = 64
 > attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
 > rows = [2000, 1000, 1000, 1000]
@ -160,13 +160,13 @@ not updated).
 | `include_static_vectors` | Whether to also use static word vectors. Requires a vectors table to be loaded in the [`Doc`](/api/doc) objects' vocab. ~~bool~~                                                                                                                                                                                                                                                                                                                   |
 | **CREATES**              | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                                                                                                                                                                                                                             |
-### spacy.CharacterEmbed.v1 {#CharacterEmbed}
+### spacy.CharacterEmbed.v2 {#CharacterEmbed}
 > #### Example config
 >
 > ```ini
 > [model]
-> @architectures = "spacy.CharacterEmbed.v1"
+> @architectures = "spacy.CharacterEmbed.v2"
 > width = 128
 > rows = 7000
 > nM = 64
@ -266,13 +266,13 @@ Encode context using bidirectional LSTM layers. Requires
 | `dropout`   | Creates a Dropout layer on the outputs of each LSTM layer except the last layer. Set to 0.0 to disable this functionality. ~~float~~                                                                           |
 | **CREATES** | The model using the architecture. ~~Model[List[Floats2d], List[Floats2d]]~~                                                                                                                                    |
-### spacy.StaticVectors.v1 {#StaticVectors}
+### spacy.StaticVectors.v2 {#StaticVectors}
 > #### Example config
 >
 > ```ini
 > [model]
-> @architectures = "spacy.StaticVectors.v1"
+> @architectures = "spacy.StaticVectors.v2"
 > nO = null
 > nM = null
 > dropout = 0.2
@ -283,8 +283,9 @@ Encode context using bidirectional LSTM layers. Requires
 > ```
 Embed [`Doc`](/api/doc) objects with their vocab's vectors table, applying a
-learned linear projection to control the dimensionality. See the documentation
+learned linear projection to control the dimensionality. Unknown tokens are
-on [static vectors](/usage/embeddings-transformers#static-vectors) for details.
+mapped to a zero vector. See the documentation on [static
 vectors](/usage/embeddings-transformers#static-vectors) for details.
 | Name        |  Description                                                                                                                                                                                                            |
 | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
@ -513,7 +514,7 @@ for a Tok2Vec layer.
 > use_upper = true
 >
 > [model.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v1"
+> @architectures = "spacy.HashEmbedCNN.v2"
 > pretrained_vectors = null
 > width = 96
 > depth = 4
@ -619,7 +620,7 @@ single-label use-cases where `exclusive_classes = true`, while the
 > @architectures = "spacy.Tok2Vec.v2"
 >
 > [model.tok2vec.embed]
-> @architectures = "spacy.MultiHashEmbed.v1"
+> @architectures = "spacy.MultiHashEmbed.v2"
 > width = 64
 > rows = [2000, 2000, 1000, 1000, 1000, 1000]
 > attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
@ -676,7 +677,7 @@ taking it as argument:
 > nO = null
 >
 > [model.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v1"
+> @architectures = "spacy.HashEmbedCNN.v2"
 > pretrained_vectors = null
 > width = 96
 > depth = 4
@ -744,7 +745,7 @@ into the "real world". This requires 3 main components:
 > nO = null
 >
 > [model.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v1"
+> @architectures = "spacy.HashEmbedCNN.v2"
 > pretrained_vectors = null
 > width = 96
 > depth = 2
--- a/website/docs/api/data-formats.md
+++ b/website/docs/api/data-formats.md
@ -29,8 +29,8 @@ recommended settings for your use case, check out the
 >
 > The `@` syntax lets you refer to function names registered in the
 > [function registry](/api/top-level#registry). For example,
-> `@architectures = "spacy.HashEmbedCNN.v1"` refers to a registered function of
+> `@architectures = "spacy.HashEmbedCNN.v2"` refers to a registered function of
-> the name [spacy.HashEmbedCNN.v1](/api/architectures#HashEmbedCNN) and all
+> the name [spacy.HashEmbedCNN.v2](/api/architectures#HashEmbedCNN) and all
 > other values defined in its block will be passed into that function as
 > arguments. Those arguments depend on the registered function. See the usage
 > guide on [registered functions](/usage/training#config-functions) for details.
--- a/website/docs/api/legacy.md
+++ b/website/docs/api/legacy.md
@ -5,11 +5,12 @@ source: spacy/legacy
 ---
 The [`spacy-legacy`](https://github.com/explosion/spacy-legacy) package includes
-outdated registered functions and architectures. It is installed automatically as 
+outdated registered functions and architectures. It is installed automatically
-a dependency of spaCy, and provides backwards compatibility for archived functions 
+as a dependency of spaCy, and provides backwards compatibility for archived
-that may still be used in projects.
+functions that may still be used in projects.
-You can find the detailed documentation of each such legacy function on this page.
+You can find the detailed documentation of each such legacy function on this
 page.
 ## Architectures {#architectures}
@ -44,15 +45,14 @@ blog post for background.
 | Name        | Description                                                                                                                                                                                                                      |
 | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `embed`     | Embed tokens into context-independent word vector representations. For example, [CharacterEmbed](/api/architectures#CharacterEmbed) or [MultiHashEmbed](/api/architectures#MultiHashEmbed). ~~Model[List[Doc], List[Floats2d]]~~ |
-| `encode`    | Encode context into the embeddings, using an architecture such as a CNN, BiLSTM or transformer. For example, [MaxoutWindowEncoder.v1](/api/legacy#MaxoutWindowEncoder_v1). ~~Model[Floats2d, Floats2d]~~                            |
+| `encode`    | Encode context into the embeddings, using an architecture such as a CNN, BiLSTM or transformer. For example, [MaxoutWindowEncoder.v1](/api/legacy#MaxoutWindowEncoder_v1). ~~Model[Floats2d, Floats2d]~~                         |
 | **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                           |
 ### spacy.MaxoutWindowEncoder.v1 {#MaxoutWindowEncoder_v1}
 The `spacy.MaxoutWindowEncoder.v1` architecture was producing a model of type
-`Model[Floats2D, Floats2D]`. Since `spacy.MaxoutWindowEncoder.v2`, this has been changed to output 
+`Model[Floats2D, Floats2D]`. Since `spacy.MaxoutWindowEncoder.v2`, this has been
-type `Model[List[Floats2d], List[Floats2d]]`.
+changed to output type `Model[List[Floats2d], List[Floats2d]]`.
 > #### Example config
 >
@ -79,8 +79,8 @@ and residual connections.
 ### spacy.MishWindowEncoder.v1 {#MishWindowEncoder_v1}
 The `spacy.MishWindowEncoder.v1` architecture was producing a model of type
-`Model[Floats2D, Floats2D]`. Since `spacy.MishWindowEncoder.v2`, this has been changed to output 
+`Model[Floats2D, Floats2D]`. Since `spacy.MishWindowEncoder.v2`, this has been
-type `Model[List[Floats2d], List[Floats2d]]`.
+changed to output type `Model[List[Floats2d], List[Floats2d]]`.
 > #### Example config
 >
@ -103,12 +103,11 @@ and residual connections.
 | `depth`       | The number of convolutional layers. Recommended value is `4`. ~~int~~                                                                                                                                          |
 | **CREATES**   | The model using the architecture. ~~Model[Floats2d, Floats2d]~~                                                                                                                                                |
 ### spacy.TextCatEnsemble.v1 {#TextCatEnsemble_v1}
-The `spacy.TextCatEnsemble.v1` architecture built an internal `tok2vec` and `linear_model`. 
+The `spacy.TextCatEnsemble.v1` architecture built an internal `tok2vec` and
-Since `spacy.TextCatEnsemble.v2`, this has been refactored so that the `TextCatEnsemble` takes these 
+`linear_model`. Since `spacy.TextCatEnsemble.v2`, this has been refactored so
-two sublayers as input.
+that the `TextCatEnsemble` takes these two sublayers as input.
 > #### Example Config
 >
@ -142,6 +141,40 @@ network has an internal CNN Tok2Vec layer and uses attention.
 | `nO`                 | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
 | **CREATES**          | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
 ### spacy.HashEmbedCNN.v1 {#HashEmbedCNN_v1}
 Identical to [`spacy.HashEmbedCNN.v2`](/api/architectures#HashEmbedCNN) except
 using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are included.
 ### spacy.MultiHashEmbed.v1 {#MultiHashEmbed_v1}
 Identical to [`spacy.MultiHashEmbed.v2`](/api/architectures#MultiHashEmbed)
 except with [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
 included.
 ### spacy.CharacterEmbed.v1 {#CharacterEmbed_v1}
 Identical to [`spacy.CharacterEmbed.v2`](/api/architectures#CharacterEmbed)
 except using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
 included.
 ## Layers {#layers}
 These functions are available from `@spacy.registry.layers`.
 ### spacy.StaticVectors.v1 {#StaticVectors_v1}
 Identical to [`spacy.StaticVectors.v2`](/api/architectures#StaticVectors) except
 for the handling of tokens without vectors.
 <Infobox title="Bugs for tokens without vectors" variant="warning">
 `spacy.StaticVectors.v1` maps tokens without vectors to the final row in the
 vectors table, which causes the model predictions to change if new vectors are
 added to an existing vectors table. See more details in
 [issue #7662](https://github.com/explosion/spaCy/issues/7662#issuecomment-813925655).
 </Infobox>
 ## Loggers {#loggers}
@ -160,7 +193,8 @@ support the `log_dataset_dir` and `model_log_interval` arguments.
 > project_name = "monitor_spacy_training"
 > remove_config_values = ["paths.train", "paths.dev", "corpora.train.path", "corpora.dev.path"]
 > ```
-| Name                   | Description                                                                                                                           |
+>
-| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
+> | Name                   | Description                                                                                                                           |
-| `project_name`         | The name of the project in the Weights & Biases interface. The project will be created automatically if it doesn't exist yet. ~~str~~ |
+> | ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
-| `remove_config_values` | A list of values to include from the config before it is uploaded to W&B (default: empty). ~~List[str]~~                              |
+> | `project_name`         | The name of the project in the Weights & Biases interface. The project will be created automatically if it doesn't exist yet. ~~str~~ |
 > | `remove_config_values` | A list of values to include from the config before it is uploaded to W&B (default: empty). ~~List[str]~~                              |
--- a/website/docs/usage/embeddings-transformers.md
+++ b/website/docs/usage/embeddings-transformers.md
@ -132,7 +132,7 @@ factory = "tok2vec"
@architectures = "spacy.Tok2Vec.v2"
 [components.tok2vec.model.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
 [components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v2"
@ -164,7 +164,7 @@ factory = "ner"
@architectures = "spacy.Tok2Vec.v2"
 [components.ner.model.tok2vec.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
 [components.ner.model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v2"
@ -541,7 +541,7 @@ word vector tables using the `include_static_vectors` flag.
 ```ini
 [tagger.model.tok2vec.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
 width = 128
 attrs = ["LOWER","PREFIX","SUFFIX","SHAPE"]
 rows = [5000,2500,2500,2500]
@ -550,7 +550,7 @@ include_static_vectors = true
 <Infobox title="How it works" emoji="💡">
-The configuration system will look up the string `"spacy.MultiHashEmbed.v1"` in
+The configuration system will look up the string `"spacy.MultiHashEmbed.v2"` in
 the `architectures` [registry](/api/top-level#registry), and call the returned
 object with the rest of the arguments from the block. This will result in a call
 to the
--- a/website/docs/usage/layers-architectures.md
+++ b/website/docs/usage/layers-architectures.md
@ -137,7 +137,7 @@ nO = null
@architectures = "spacy.Tok2Vec.v2"
 [components.textcat.model.tok2vec.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
 width = 64
 rows = [2000, 2000, 1000, 1000, 1000, 1000]
 attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
@ -204,7 +204,7 @@ factory = "tok2vec"
@architectures = "spacy.Tok2Vec.v2"
 [components.tok2vec.model.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
 # ...
 [components.tok2vec.model.encode]
@ -220,7 +220,7 @@ architecture:
 ```ini
 ### config.cfg (excerpt)
 [components.tok2vec.model.embed]
-@architectures = "spacy.CharacterEmbed.v1"
+@architectures = "spacy.CharacterEmbed.v2"
 # ...
 [components.tok2vec.model.encode]
@ -638,7 +638,7 @@ that has the full implementation.
 > @architectures = "rel_instance_tensor.v1"
 >
 > [model.create_instance_tensor.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v1"
+> @architectures = "spacy.HashEmbedCNN.v2"
 > # ...
 >
 > [model.create_instance_tensor.pooling]