diff --git a/requirements.txt b/requirements.txt index 86ffa9945..91fac7894 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # Our libraries -spacy-legacy>=3.0.2,<3.1.0 +spacy-legacy>=3.0.3,<3.1.0 cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 thinc>=8.0.2,<8.1.0 diff --git a/setup.cfg b/setup.cfg index 6c65277c6..c60d78fc4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ setup_requires = thinc>=8.0.2,<8.1.0 install_requires = # Our libraries - spacy-legacy>=3.0.2,<3.1.0 + spacy-legacy>=3.0.3,<3.1.0 murmurhash>=0.28.0,<1.1.0 cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja index 38fc23272..e43c21bbd 100644 --- a/spacy/cli/templates/quickstart_training.jinja +++ b/spacy/cli/templates/quickstart_training.jinja @@ -206,7 +206,7 @@ factory = "tok2vec" @architectures = "spacy.Tok2Vec.v2" [components.tok2vec.model.embed] -@architectures = "spacy.MultiHashEmbed.v1" +@architectures = "spacy.MultiHashEmbed.v2" width = ${components.tok2vec.model.encode.width} {% if has_letters -%} attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"] diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py index 5790af631..76ec87054 100644 --- a/spacy/ml/models/tok2vec.py +++ b/spacy/ml/models/tok2vec.py @@ -31,7 +31,7 @@ def get_tok2vec_width(model: Model): return nO -@registry.architectures("spacy.HashEmbedCNN.v1") +@registry.architectures("spacy.HashEmbedCNN.v2") def build_hash_embed_cnn_tok2vec( *, width: int, @@ -108,7 +108,7 @@ def build_Tok2Vec_model( return tok2vec -@registry.architectures("spacy.MultiHashEmbed.v1") +@registry.architectures("spacy.MultiHashEmbed.v2") def MultiHashEmbed( width: int, attrs: List[Union[str, int]], @@ -182,7 +182,7 @@ def MultiHashEmbed( return model -@registry.architectures("spacy.CharacterEmbed.v1") +@registry.architectures("spacy.CharacterEmbed.v2") def CharacterEmbed( width: int, rows: int, diff --git a/spacy/ml/staticvectors.py b/spacy/ml/staticvectors.py index ea4c7fb77..cfd25c24b 100644 --- a/spacy/ml/staticvectors.py +++ b/spacy/ml/staticvectors.py @@ -8,7 +8,7 @@ from ..tokens import Doc from ..errors import Errors -@registry.layers("spacy.StaticVectors.v1") +@registry.layers("spacy.StaticVectors.v2") def StaticVectors( nO: Optional[int] = None, nM: Optional[int] = None, @@ -46,6 +46,8 @@ def forward( vectors_data = model.ops.gemm(model.ops.as_contig(V[rows]), W, trans2=True) except ValueError: raise RuntimeError(Errors.E896) + # Convert negative indices to 0-vectors (TODO: more options for UNK tokens) + vectors_data[rows < 0] = 0 output = Ragged( vectors_data, model.ops.asarray([len(doc) for doc in docs], dtype="i") ) diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx index 7290c4637..37f09ce3a 100644 --- a/spacy/pipeline/dep_parser.pyx +++ b/spacy/pipeline/dep_parser.pyx @@ -24,7 +24,7 @@ maxout_pieces = 2 use_upper = true [model.tok2vec] -@architectures = "spacy.HashEmbedCNN.v1" +@architectures = "spacy.HashEmbedCNN.v2" pretrained_vectors = null width = 96 depth = 4 diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py index 6ab52fb35..66070916e 100644 --- a/spacy/pipeline/entity_linker.py +++ b/spacy/pipeline/entity_linker.py @@ -26,7 +26,7 @@ default_model_config = """ @architectures = "spacy.EntityLinker.v1" [model.tok2vec] -@architectures = "spacy.HashEmbedCNN.v1" +@architectures = "spacy.HashEmbedCNN.v2" pretrained_vectors = null width = 96 depth = 2 diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index cd0081346..3ba05e616 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -27,7 +27,7 @@ default_model_config = """ @architectures = "spacy.Tok2Vec.v2" [model.tok2vec.embed] -@architectures = "spacy.CharacterEmbed.v1" +@architectures = "spacy.CharacterEmbed.v2" width = 128 rows = 7000 nM = 64 diff --git a/spacy/pipeline/multitask.pyx b/spacy/pipeline/multitask.pyx index 990b6a1de..8c44061e2 100644 --- a/spacy/pipeline/multitask.pyx +++ b/spacy/pipeline/multitask.pyx @@ -22,7 +22,7 @@ maxout_pieces = 3 token_vector_width = 96 [model.tok2vec] -@architectures = "spacy.HashEmbedCNN.v1" +@architectures = "spacy.HashEmbedCNN.v2" pretrained_vectors = null width = 96 depth = 4 diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx index 3a2151b01..0b9b0d324 100644 --- a/spacy/pipeline/ner.pyx +++ b/spacy/pipeline/ner.pyx @@ -21,7 +21,7 @@ maxout_pieces = 2 use_upper = true [model.tok2vec] -@architectures = "spacy.HashEmbedCNN.v1" +@architectures = "spacy.HashEmbedCNN.v2" pretrained_vectors = null width = 96 depth = 4 diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx index 83cd06739..f9472abf5 100644 --- a/spacy/pipeline/senter.pyx +++ b/spacy/pipeline/senter.pyx @@ -19,7 +19,7 @@ default_model_config = """ @architectures = "spacy.Tagger.v1" [model.tok2vec] -@architectures = "spacy.HashEmbedCNN.v1" +@architectures = "spacy.HashEmbedCNN.v2" pretrained_vectors = null width = 12 depth = 1 diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index 9af5245c1..938131f6f 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -26,7 +26,7 @@ default_model_config = """ @architectures = "spacy.Tagger.v1" [model.tok2vec] -@architectures = "spacy.HashEmbedCNN.v1" +@architectures = "spacy.HashEmbedCNN.v2" pretrained_vectors = null width = 96 depth = 4 diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index 174ffd273..1d652a483 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -21,7 +21,7 @@ single_label_default_config = """ @architectures = "spacy.Tok2Vec.v2" [model.tok2vec.embed] -@architectures = "spacy.MultiHashEmbed.v1" +@architectures = "spacy.MultiHashEmbed.v2" width = 64 rows = [2000, 2000, 1000, 1000, 1000, 1000] attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"] @@ -56,7 +56,7 @@ single_label_cnn_config = """ exclusive_classes = true [model.tok2vec] -@architectures = "spacy.HashEmbedCNN.v1" +@architectures = "spacy.HashEmbedCNN.v2" pretrained_vectors = null width = 96 depth = 4 diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py index 036bc8dc5..7267735b4 100644 --- a/spacy/pipeline/textcat_multilabel.py +++ b/spacy/pipeline/textcat_multilabel.py @@ -21,7 +21,7 @@ multi_label_default_config = """ @architectures = "spacy.Tok2Vec.v1" [model.tok2vec.embed] -@architectures = "spacy.MultiHashEmbed.v1" +@architectures = "spacy.MultiHashEmbed.v2" width = 64 rows = [2000, 2000, 1000, 1000, 1000, 1000] attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"] @@ -56,7 +56,7 @@ multi_label_cnn_config = """ exclusive_classes = false [model.tok2vec] -@architectures = "spacy.HashEmbedCNN.v1" +@architectures = "spacy.HashEmbedCNN.v2" pretrained_vectors = null width = 96 depth = 4 diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py index 26a4c998c..3ee324d50 100644 --- a/spacy/pipeline/tok2vec.py +++ b/spacy/pipeline/tok2vec.py @@ -11,7 +11,7 @@ from ..errors import Errors default_model_config = """ [model] -@architectures = "spacy.HashEmbedCNN.v1" +@architectures = "spacy.HashEmbedCNN.v2" pretrained_vectors = null width = 96 depth = 4 diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md index 4c4bf73f4..e09352ec9 100644 --- a/website/docs/api/architectures.md +++ b/website/docs/api/architectures.md @@ -35,7 +35,7 @@ usage documentation on > @architectures = "spacy.Tok2Vec.v2" > > [model.embed] -> @architectures = "spacy.CharacterEmbed.v1" +> @architectures = "spacy.CharacterEmbed.v2" > # ... > > [model.encode] @@ -54,13 +54,13 @@ blog post for background. | `encode` | Encode context into the embeddings, using an architecture such as a CNN, BiLSTM or transformer. For example, [MaxoutWindowEncoder](/api/architectures#MaxoutWindowEncoder). ~~Model[List[Floats2d], List[Floats2d]]~~ | | **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ | -### spacy.HashEmbedCNN.v1 {#HashEmbedCNN} +### spacy.HashEmbedCNN.v2 {#HashEmbedCNN} > #### Example Config > > ```ini > [model] -> @architectures = "spacy.HashEmbedCNN.v1" +> @architectures = "spacy.HashEmbedCNN.v2" > pretrained_vectors = null > width = 96 > depth = 4 @@ -96,7 +96,7 @@ consisting of a CNN and a layer-normalized maxout activation function. > factory = "tok2vec" > > [components.tok2vec.model] -> @architectures = "spacy.HashEmbedCNN.v1" +> @architectures = "spacy.HashEmbedCNN.v2" > width = 342 > > [components.tagger] @@ -129,13 +129,13 @@ argument that connects to the shared `tok2vec` component in the pipeline. | `upstream` | A string to identify the "upstream" `Tok2Vec` component to communicate with. By default, the upstream name is the wildcard string `"*"`, but you could also specify the name of the `Tok2Vec` component. You'll almost never have multiple upstream `Tok2Vec` components, so the wildcard string will almost always be fine. ~~str~~ | | **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ | -### spacy.MultiHashEmbed.v1 {#MultiHashEmbed} +### spacy.MultiHashEmbed.v2 {#MultiHashEmbed} > #### Example config > > ```ini > [model] -> @architectures = "spacy.MultiHashEmbed.v1" +> @architectures = "spacy.MultiHashEmbed.v2" > width = 64 > attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"] > rows = [2000, 1000, 1000, 1000] @@ -160,13 +160,13 @@ not updated). | `include_static_vectors` | Whether to also use static word vectors. Requires a vectors table to be loaded in the [`Doc`](/api/doc) objects' vocab. ~~bool~~ | | **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ | -### spacy.CharacterEmbed.v1 {#CharacterEmbed} +### spacy.CharacterEmbed.v2 {#CharacterEmbed} > #### Example config > > ```ini > [model] -> @architectures = "spacy.CharacterEmbed.v1" +> @architectures = "spacy.CharacterEmbed.v2" > width = 128 > rows = 7000 > nM = 64 @@ -266,13 +266,13 @@ Encode context using bidirectional LSTM layers. Requires | `dropout` | Creates a Dropout layer on the outputs of each LSTM layer except the last layer. Set to 0.0 to disable this functionality. ~~float~~ | | **CREATES** | The model using the architecture. ~~Model[List[Floats2d], List[Floats2d]]~~ | -### spacy.StaticVectors.v1 {#StaticVectors} +### spacy.StaticVectors.v2 {#StaticVectors} > #### Example config > > ```ini > [model] -> @architectures = "spacy.StaticVectors.v1" +> @architectures = "spacy.StaticVectors.v2" > nO = null > nM = null > dropout = 0.2 @@ -283,8 +283,9 @@ Encode context using bidirectional LSTM layers. Requires > ``` Embed [`Doc`](/api/doc) objects with their vocab's vectors table, applying a -learned linear projection to control the dimensionality. See the documentation -on [static vectors](/usage/embeddings-transformers#static-vectors) for details. +learned linear projection to control the dimensionality. Unknown tokens are +mapped to a zero vector. See the documentation on [static +vectors](/usage/embeddings-transformers#static-vectors) for details. | Name |  Description | | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | @@ -513,7 +514,7 @@ for a Tok2Vec layer. > use_upper = true > > [model.tok2vec] -> @architectures = "spacy.HashEmbedCNN.v1" +> @architectures = "spacy.HashEmbedCNN.v2" > pretrained_vectors = null > width = 96 > depth = 4 @@ -619,7 +620,7 @@ single-label use-cases where `exclusive_classes = true`, while the > @architectures = "spacy.Tok2Vec.v2" > > [model.tok2vec.embed] -> @architectures = "spacy.MultiHashEmbed.v1" +> @architectures = "spacy.MultiHashEmbed.v2" > width = 64 > rows = [2000, 2000, 1000, 1000, 1000, 1000] > attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"] @@ -676,7 +677,7 @@ taking it as argument: > nO = null > > [model.tok2vec] -> @architectures = "spacy.HashEmbedCNN.v1" +> @architectures = "spacy.HashEmbedCNN.v2" > pretrained_vectors = null > width = 96 > depth = 4 @@ -744,7 +745,7 @@ into the "real world". This requires 3 main components: > nO = null > > [model.tok2vec] -> @architectures = "spacy.HashEmbedCNN.v1" +> @architectures = "spacy.HashEmbedCNN.v2" > pretrained_vectors = null > width = 96 > depth = 2 diff --git a/website/docs/api/data-formats.md b/website/docs/api/data-formats.md index 1196d20af..69bdae446 100644 --- a/website/docs/api/data-formats.md +++ b/website/docs/api/data-formats.md @@ -29,8 +29,8 @@ recommended settings for your use case, check out the > > The `@` syntax lets you refer to function names registered in the > [function registry](/api/top-level#registry). For example, -> `@architectures = "spacy.HashEmbedCNN.v1"` refers to a registered function of -> the name [spacy.HashEmbedCNN.v1](/api/architectures#HashEmbedCNN) and all +> `@architectures = "spacy.HashEmbedCNN.v2"` refers to a registered function of +> the name [spacy.HashEmbedCNN.v2](/api/architectures#HashEmbedCNN) and all > other values defined in its block will be passed into that function as > arguments. Those arguments depend on the registered function. See the usage > guide on [registered functions](/usage/training#config-functions) for details. diff --git a/website/docs/api/legacy.md b/website/docs/api/legacy.md index 3e5c7f75f..96bc199bf 100644 --- a/website/docs/api/legacy.md +++ b/website/docs/api/legacy.md @@ -4,12 +4,13 @@ teaser: Archived implementations available through spacy-legacy source: spacy/legacy --- -The [`spacy-legacy`](https://github.com/explosion/spacy-legacy) package includes -outdated registered functions and architectures. It is installed automatically as -a dependency of spaCy, and provides backwards compatibility for archived functions -that may still be used in projects. +The [`spacy-legacy`](https://github.com/explosion/spacy-legacy) package includes +outdated registered functions and architectures. It is installed automatically +as a dependency of spaCy, and provides backwards compatibility for archived +functions that may still be used in projects. -You can find the detailed documentation of each such legacy function on this page. +You can find the detailed documentation of each such legacy function on this +page. ## Architectures {#architectures} @@ -17,8 +18,8 @@ These functions are available from `@spacy.registry.architectures`. ### spacy.Tok2Vec.v1 {#Tok2Vec_v1} -The `spacy.Tok2Vec.v1` architecture was expecting an `encode` model of type -`Model[Floats2D, Floats2D]` such as `spacy.MaxoutWindowEncoder.v1` or +The `spacy.Tok2Vec.v1` architecture was expecting an `encode` model of type +`Model[Floats2D, Floats2D]` such as `spacy.MaxoutWindowEncoder.v1` or `spacy.MishWindowEncoder.v1`. > #### Example config @@ -44,15 +45,14 @@ blog post for background. | Name | Description | | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `embed` | Embed tokens into context-independent word vector representations. For example, [CharacterEmbed](/api/architectures#CharacterEmbed) or [MultiHashEmbed](/api/architectures#MultiHashEmbed). ~~Model[List[Doc], List[Floats2d]]~~ | -| `encode` | Encode context into the embeddings, using an architecture such as a CNN, BiLSTM or transformer. For example, [MaxoutWindowEncoder.v1](/api/legacy#MaxoutWindowEncoder_v1). ~~Model[Floats2d, Floats2d]~~ | +| `encode` | Encode context into the embeddings, using an architecture such as a CNN, BiLSTM or transformer. For example, [MaxoutWindowEncoder.v1](/api/legacy#MaxoutWindowEncoder_v1). ~~Model[Floats2d, Floats2d]~~ | | **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ | ### spacy.MaxoutWindowEncoder.v1 {#MaxoutWindowEncoder_v1} -The `spacy.MaxoutWindowEncoder.v1` architecture was producing a model of type -`Model[Floats2D, Floats2D]`. Since `spacy.MaxoutWindowEncoder.v2`, this has been changed to output -type `Model[List[Floats2d], List[Floats2d]]`. - +The `spacy.MaxoutWindowEncoder.v1` architecture was producing a model of type +`Model[Floats2D, Floats2D]`. Since `spacy.MaxoutWindowEncoder.v2`, this has been +changed to output type `Model[List[Floats2d], List[Floats2d]]`. > #### Example config > @@ -78,9 +78,9 @@ and residual connections. ### spacy.MishWindowEncoder.v1 {#MishWindowEncoder_v1} -The `spacy.MishWindowEncoder.v1` architecture was producing a model of type -`Model[Floats2D, Floats2D]`. Since `spacy.MishWindowEncoder.v2`, this has been changed to output -type `Model[List[Floats2d], List[Floats2d]]`. +The `spacy.MishWindowEncoder.v1` architecture was producing a model of type +`Model[Floats2D, Floats2D]`. Since `spacy.MishWindowEncoder.v2`, this has been +changed to output type `Model[List[Floats2d], List[Floats2d]]`. > #### Example config > @@ -103,12 +103,11 @@ and residual connections. | `depth` | The number of convolutional layers. Recommended value is `4`. ~~int~~ | | **CREATES** | The model using the architecture. ~~Model[Floats2d, Floats2d]~~ | - ### spacy.TextCatEnsemble.v1 {#TextCatEnsemble_v1} -The `spacy.TextCatEnsemble.v1` architecture built an internal `tok2vec` and `linear_model`. -Since `spacy.TextCatEnsemble.v2`, this has been refactored so that the `TextCatEnsemble` takes these -two sublayers as input. +The `spacy.TextCatEnsemble.v1` architecture built an internal `tok2vec` and +`linear_model`. Since `spacy.TextCatEnsemble.v2`, this has been refactored so +that the `TextCatEnsemble` takes these two sublayers as input. > #### Example Config > @@ -142,6 +141,40 @@ network has an internal CNN Tok2Vec layer and uses attention. | `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ | | **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ | +### spacy.HashEmbedCNN.v1 {#HashEmbedCNN_v1} + +Identical to [`spacy.HashEmbedCNN.v2`](/api/architectures#HashEmbedCNN) except +using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are included. + +### spacy.MultiHashEmbed.v1 {#MultiHashEmbed_v1} + +Identical to [`spacy.MultiHashEmbed.v2`](/api/architectures#MultiHashEmbed) +except with [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are +included. + +### spacy.CharacterEmbed.v1 {#CharacterEmbed_v1} + +Identical to [`spacy.CharacterEmbed.v2`](/api/architectures#CharacterEmbed) +except using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are +included. + +## Layers {#layers} + +These functions are available from `@spacy.registry.layers`. + +### spacy.StaticVectors.v1 {#StaticVectors_v1} + +Identical to [`spacy.StaticVectors.v2`](/api/architectures#StaticVectors) except +for the handling of tokens without vectors. + + + +`spacy.StaticVectors.v1` maps tokens without vectors to the final row in the +vectors table, which causes the model predictions to change if new vectors are +added to an existing vectors table. See more details in +[issue #7662](https://github.com/explosion/spaCy/issues/7662#issuecomment-813925655). + + ## Loggers {#loggers} @@ -149,7 +182,7 @@ These functions are available from `@spacy.registry.loggers`. ### spacy.WandbLogger.v1 {#WandbLogger_v1} -The first version of the [`WandbLogger`](/api/top-level#WandbLogger) did not yet +The first version of the [`WandbLogger`](/api/top-level#WandbLogger) did not yet support the `log_dataset_dir` and `model_log_interval` arguments. > #### Example config @@ -160,7 +193,8 @@ support the `log_dataset_dir` and `model_log_interval` arguments. > project_name = "monitor_spacy_training" > remove_config_values = ["paths.train", "paths.dev", "corpora.train.path", "corpora.dev.path"] > ``` -| Name | Description | -| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | -| `project_name` | The name of the project in the Weights & Biases interface. The project will be created automatically if it doesn't exist yet. ~~str~~ | -| `remove_config_values` | A list of values to include from the config before it is uploaded to W&B (default: empty). ~~List[str]~~ | +> +> | Name | Description | +> | ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | +> | `project_name` | The name of the project in the Weights & Biases interface. The project will be created automatically if it doesn't exist yet. ~~str~~ | +> | `remove_config_values` | A list of values to include from the config before it is uploaded to W&B (default: empty). ~~List[str]~~ | diff --git a/website/docs/usage/embeddings-transformers.md b/website/docs/usage/embeddings-transformers.md index e71336e84..4113e9394 100644 --- a/website/docs/usage/embeddings-transformers.md +++ b/website/docs/usage/embeddings-transformers.md @@ -132,7 +132,7 @@ factory = "tok2vec" @architectures = "spacy.Tok2Vec.v2" [components.tok2vec.model.embed] -@architectures = "spacy.MultiHashEmbed.v1" +@architectures = "spacy.MultiHashEmbed.v2" [components.tok2vec.model.encode] @architectures = "spacy.MaxoutWindowEncoder.v2" @@ -164,7 +164,7 @@ factory = "ner" @architectures = "spacy.Tok2Vec.v2" [components.ner.model.tok2vec.embed] -@architectures = "spacy.MultiHashEmbed.v1" +@architectures = "spacy.MultiHashEmbed.v2" [components.ner.model.tok2vec.encode] @architectures = "spacy.MaxoutWindowEncoder.v2" @@ -541,7 +541,7 @@ word vector tables using the `include_static_vectors` flag. ```ini [tagger.model.tok2vec.embed] -@architectures = "spacy.MultiHashEmbed.v1" +@architectures = "spacy.MultiHashEmbed.v2" width = 128 attrs = ["LOWER","PREFIX","SUFFIX","SHAPE"] rows = [5000,2500,2500,2500] @@ -550,7 +550,7 @@ include_static_vectors = true -The configuration system will look up the string `"spacy.MultiHashEmbed.v1"` in +The configuration system will look up the string `"spacy.MultiHashEmbed.v2"` in the `architectures` [registry](/api/top-level#registry), and call the returned object with the rest of the arguments from the block. This will result in a call to the diff --git a/website/docs/usage/layers-architectures.md b/website/docs/usage/layers-architectures.md index 0bc935d51..8fe2cf489 100644 --- a/website/docs/usage/layers-architectures.md +++ b/website/docs/usage/layers-architectures.md @@ -137,7 +137,7 @@ nO = null @architectures = "spacy.Tok2Vec.v2" [components.textcat.model.tok2vec.embed] -@architectures = "spacy.MultiHashEmbed.v1" +@architectures = "spacy.MultiHashEmbed.v2" width = 64 rows = [2000, 2000, 1000, 1000, 1000, 1000] attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"] @@ -204,7 +204,7 @@ factory = "tok2vec" @architectures = "spacy.Tok2Vec.v2" [components.tok2vec.model.embed] -@architectures = "spacy.MultiHashEmbed.v1" +@architectures = "spacy.MultiHashEmbed.v2" # ... [components.tok2vec.model.encode] @@ -220,7 +220,7 @@ architecture: ```ini ### config.cfg (excerpt) [components.tok2vec.model.embed] -@architectures = "spacy.CharacterEmbed.v1" +@architectures = "spacy.CharacterEmbed.v2" # ... [components.tok2vec.model.encode] @@ -638,7 +638,7 @@ that has the full implementation. > @architectures = "rel_instance_tensor.v1" > > [model.create_instance_tensor.tok2vec] -> @architectures = "spacy.HashEmbedCNN.v1" +> @architectures = "spacy.HashEmbedCNN.v2" > # ... > > [model.create_instance_tensor.pooling]