mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Add test for old architectures (#10751)
* add v1 and v2 tests for tok2vec architectures * textcat architectures are not "layers" * test older textcat architectures * test older parser architecture
This commit is contained in:
		
							parent
							
								
									733114bdd9
								
							
						
					
					
						commit
						1543558d08
					
				| 
						 | 
					@ -12,6 +12,7 @@ from spacy.vocab import Vocab
 | 
				
			||||||
from ...pipeline import DependencyParser
 | 
					from ...pipeline import DependencyParser
 | 
				
			||||||
from ...pipeline.dep_parser import DEFAULT_PARSER_MODEL
 | 
					from ...pipeline.dep_parser import DEFAULT_PARSER_MODEL
 | 
				
			||||||
from ..util import apply_transition_sequence, make_tempdir
 | 
					from ..util import apply_transition_sequence, make_tempdir
 | 
				
			||||||
 | 
					from ...pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TRAIN_DATA = [
 | 
					TRAIN_DATA = [
 | 
				
			||||||
    (
 | 
					    (
 | 
				
			||||||
| 
						 | 
					@ -395,6 +396,34 @@ def test_overfitting_IO(pipe_name):
 | 
				
			||||||
    assert_equal(batch_deps_1, no_batch_deps)
 | 
					    assert_equal(batch_deps_1, no_batch_deps)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# fmt: off
 | 
				
			||||||
 | 
					@pytest.mark.slow
 | 
				
			||||||
 | 
					@pytest.mark.parametrize("pipe_name", ["parser", "beam_parser"])
 | 
				
			||||||
 | 
					@pytest.mark.parametrize(
 | 
				
			||||||
 | 
					    "parser_config",
 | 
				
			||||||
 | 
					    [
 | 
				
			||||||
 | 
					        # TransitionBasedParser V1
 | 
				
			||||||
 | 
					        ({"@architectures": "spacy.TransitionBasedParser.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "state_type": "parser", "extra_state_tokens": False, "hidden_width": 64, "maxout_pieces": 2, "use_upper": True}),
 | 
				
			||||||
 | 
					        # TransitionBasedParser V2
 | 
				
			||||||
 | 
					        ({"@architectures": "spacy.TransitionBasedParser.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "state_type": "parser", "extra_state_tokens": False, "hidden_width": 64, "maxout_pieces": 2, "use_upper": True}),
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					# fmt: on
 | 
				
			||||||
 | 
					def test_parser_configs(pipe_name, parser_config):
 | 
				
			||||||
 | 
					    pipe_config = {"model": parser_config}
 | 
				
			||||||
 | 
					    nlp = English()
 | 
				
			||||||
 | 
					    parser = nlp.add_pipe(pipe_name, config=pipe_config)
 | 
				
			||||||
 | 
					    train_examples = []
 | 
				
			||||||
 | 
					    for text, annotations in TRAIN_DATA:
 | 
				
			||||||
 | 
					        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
 | 
				
			||||||
 | 
					        for dep in annotations.get("deps", []):
 | 
				
			||||||
 | 
					            parser.add_label(dep)
 | 
				
			||||||
 | 
					    optimizer = nlp.initialize()
 | 
				
			||||||
 | 
					    for i in range(5):
 | 
				
			||||||
 | 
					        losses = {}
 | 
				
			||||||
 | 
					        nlp.update(train_examples, sgd=optimizer, losses=losses)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_beam_parser_scores():
 | 
					def test_beam_parser_scores():
 | 
				
			||||||
    # Test that we can get confidence values out of the beam_parser pipe
 | 
					    # Test that we can get confidence values out of the beam_parser pipe
 | 
				
			||||||
    beam_width = 16
 | 
					    beam_width = 16
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -382,6 +382,7 @@ def test_implicit_label(name, get_examples):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# fmt: off
 | 
					# fmt: off
 | 
				
			||||||
 | 
					@pytest.mark.slow
 | 
				
			||||||
@pytest.mark.parametrize(
 | 
					@pytest.mark.parametrize(
 | 
				
			||||||
    "name,textcat_config",
 | 
					    "name,textcat_config",
 | 
				
			||||||
    [
 | 
					    [
 | 
				
			||||||
| 
						 | 
					@ -390,7 +391,10 @@ def test_implicit_label(name, get_examples):
 | 
				
			||||||
        ("textcat", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
 | 
					        ("textcat", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
 | 
				
			||||||
        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
 | 
					        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
 | 
				
			||||||
        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
 | 
					        ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
 | 
				
			||||||
        # ENSEMBLE
 | 
					        # ENSEMBLE V1
 | 
				
			||||||
 | 
					        ("textcat", {"@architectures": "spacy.TextCatEnsemble.v1", "exclusive_classes": False, "pretrained_vectors": None, "width": 64, "embed_size": 2000, "conv_depth": 2, "window_size": 1, "ngram_size": 1, "dropout": None}),
 | 
				
			||||||
 | 
					        ("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v1", "exclusive_classes": False, "pretrained_vectors": None, "width": 64, "embed_size": 2000, "conv_depth": 2, "window_size": 1, "ngram_size": 1, "dropout": None}),
 | 
				
			||||||
 | 
					        # ENSEMBLE V2
 | 
				
			||||||
        ("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}}),
 | 
					        ("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}}),
 | 
				
			||||||
        ("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}}),
 | 
					        ("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}}),
 | 
				
			||||||
        ("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}}),
 | 
					        ("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}}),
 | 
				
			||||||
| 
						 | 
					@ -643,15 +647,28 @@ def test_overfitting_IO_multi():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# fmt: off
 | 
					# fmt: off
 | 
				
			||||||
 | 
					@pytest.mark.slow
 | 
				
			||||||
@pytest.mark.parametrize(
 | 
					@pytest.mark.parametrize(
 | 
				
			||||||
    "name,train_data,textcat_config",
 | 
					    "name,train_data,textcat_config",
 | 
				
			||||||
    [
 | 
					    [
 | 
				
			||||||
 | 
					        # BOW V1
 | 
				
			||||||
 | 
					        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
 | 
				
			||||||
 | 
					        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
 | 
				
			||||||
 | 
					        # ENSEMBLE V1
 | 
				
			||||||
 | 
					        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v1", "exclusive_classes": False, "pretrained_vectors": None, "width": 64, "embed_size": 2000, "conv_depth": 2, "window_size": 1, "ngram_size": 1, "dropout": None}),
 | 
				
			||||||
 | 
					        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v1", "exclusive_classes": False, "pretrained_vectors": None, "width": 64, "embed_size": 2000, "conv_depth": 2, "window_size": 1, "ngram_size": 1, "dropout": None}),
 | 
				
			||||||
 | 
					        # CNN V1
 | 
				
			||||||
 | 
					        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
 | 
				
			||||||
 | 
					        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
 | 
				
			||||||
 | 
					        # BOW V2
 | 
				
			||||||
        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
 | 
					        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
 | 
				
			||||||
        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
 | 
					        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
 | 
				
			||||||
        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 3, "no_output_layer": True}),
 | 
					        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 3, "no_output_layer": True}),
 | 
				
			||||||
        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 2, "no_output_layer": True}),
 | 
					        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 2, "no_output_layer": True}),
 | 
				
			||||||
 | 
					        # ENSEMBLE V2
 | 
				
			||||||
        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
 | 
					        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
 | 
				
			||||||
        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
 | 
					        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
 | 
				
			||||||
 | 
					        # CNN V2
 | 
				
			||||||
        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
 | 
					        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
 | 
				
			||||||
        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
 | 
					        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,13 +1,13 @@
 | 
				
			||||||
import pytest
 | 
					import pytest
 | 
				
			||||||
from spacy.ml.models.tok2vec import build_Tok2Vec_model
 | 
					from spacy.ml.models.tok2vec import build_Tok2Vec_model
 | 
				
			||||||
from spacy.ml.models.tok2vec import MultiHashEmbed, CharacterEmbed
 | 
					from spacy.ml.models.tok2vec import MultiHashEmbed, MaxoutWindowEncoder
 | 
				
			||||||
from spacy.ml.models.tok2vec import MishWindowEncoder, MaxoutWindowEncoder
 | 
					 | 
				
			||||||
from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener
 | 
					from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener
 | 
				
			||||||
from spacy.vocab import Vocab
 | 
					from spacy.vocab import Vocab
 | 
				
			||||||
from spacy.tokens import Doc
 | 
					from spacy.tokens import Doc
 | 
				
			||||||
from spacy.training import Example
 | 
					from spacy.training import Example
 | 
				
			||||||
from spacy import util
 | 
					from spacy import util
 | 
				
			||||||
from spacy.lang.en import English
 | 
					from spacy.lang.en import English
 | 
				
			||||||
 | 
					from spacy.util import registry
 | 
				
			||||||
from thinc.api import Config, get_current_ops
 | 
					from thinc.api import Config, get_current_ops
 | 
				
			||||||
from numpy.testing import assert_array_equal
 | 
					from numpy.testing import assert_array_equal
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -55,24 +55,41 @@ def test_tok2vec_batch_sizes(batch_size, width, embed_size):
 | 
				
			||||||
        assert doc_vec.shape == (len(doc), width)
 | 
					        assert doc_vec.shape == (len(doc), width)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@pytest.mark.slow
 | 
				
			||||||
 | 
					@pytest.mark.parametrize("width", [8])
 | 
				
			||||||
@pytest.mark.parametrize(
 | 
					@pytest.mark.parametrize(
 | 
				
			||||||
    "width,embed_arch,embed_config,encode_arch,encode_config",
 | 
					    "embed_arch,embed_config",
 | 
				
			||||||
    # fmt: off
 | 
					    # fmt: off
 | 
				
			||||||
    [
 | 
					    [
 | 
				
			||||||
        (8, MultiHashEmbed, {"rows": [100, 100], "attrs": ["SHAPE", "LOWER"], "include_static_vectors": False}, MaxoutWindowEncoder, {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
 | 
					        ("spacy.MultiHashEmbed.v1", {"rows": [100, 100], "attrs": ["SHAPE", "LOWER"], "include_static_vectors": False}),
 | 
				
			||||||
        (8, MultiHashEmbed, {"rows": [100, 20], "attrs": ["ORTH", "PREFIX"], "include_static_vectors": False}, MishWindowEncoder, {"window_size": 1, "depth": 6}),
 | 
					        ("spacy.MultiHashEmbed.v1", {"rows": [100, 20], "attrs": ["ORTH", "PREFIX"], "include_static_vectors": False}),
 | 
				
			||||||
        (8, CharacterEmbed, {"rows": 100, "nM": 64, "nC": 8, "include_static_vectors": False}, MaxoutWindowEncoder, {"window_size": 1, "maxout_pieces": 3, "depth": 3}),
 | 
					        ("spacy.CharacterEmbed.v1", {"rows": 100, "nM": 64, "nC": 8, "include_static_vectors": False}),
 | 
				
			||||||
        (8, CharacterEmbed, {"rows": 100, "nM": 16, "nC": 2, "include_static_vectors": False}, MishWindowEncoder, {"window_size": 1, "depth": 3}),
 | 
					        ("spacy.CharacterEmbed.v1", {"rows": 100, "nM": 16, "nC": 2, "include_static_vectors": False}),
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
    # fmt: on
 | 
					    # fmt: on
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
def test_tok2vec_configs(width, embed_arch, embed_config, encode_arch, encode_config):
 | 
					@pytest.mark.parametrize(
 | 
				
			||||||
 | 
					    "tok2vec_arch,encode_arch,encode_config",
 | 
				
			||||||
 | 
					    # fmt: off
 | 
				
			||||||
 | 
					    [
 | 
				
			||||||
 | 
					        ("spacy.Tok2Vec.v1", "spacy.MaxoutWindowEncoder.v1", {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
 | 
				
			||||||
 | 
					        ("spacy.Tok2Vec.v2", "spacy.MaxoutWindowEncoder.v2", {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
 | 
				
			||||||
 | 
					        ("spacy.Tok2Vec.v1", "spacy.MishWindowEncoder.v1", {"window_size": 1, "depth": 6}),
 | 
				
			||||||
 | 
					        ("spacy.Tok2Vec.v2", "spacy.MishWindowEncoder.v2", {"window_size": 1, "depth": 6}),
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
 | 
					    # fmt: on
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					def test_tok2vec_configs(
 | 
				
			||||||
 | 
					    width, tok2vec_arch, embed_arch, embed_config, encode_arch, encode_config
 | 
				
			||||||
 | 
					):
 | 
				
			||||||
 | 
					    embed = registry.get("architectures", embed_arch)
 | 
				
			||||||
 | 
					    encode = registry.get("architectures", encode_arch)
 | 
				
			||||||
 | 
					    tok2vec_model = registry.get("architectures", tok2vec_arch)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    embed_config["width"] = width
 | 
					    embed_config["width"] = width
 | 
				
			||||||
    encode_config["width"] = width
 | 
					    encode_config["width"] = width
 | 
				
			||||||
    docs = get_batch(3)
 | 
					    docs = get_batch(3)
 | 
				
			||||||
    tok2vec = build_Tok2Vec_model(
 | 
					    tok2vec = tok2vec_model(embed(**embed_config), encode(**encode_config))
 | 
				
			||||||
        embed_arch(**embed_config), encode_arch(**encode_config)
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
    tok2vec.initialize(docs)
 | 
					    tok2vec.initialize(docs)
 | 
				
			||||||
    vectors, backprop = tok2vec.begin_update(docs)
 | 
					    vectors, backprop = tok2vec.begin_update(docs)
 | 
				
			||||||
    assert len(vectors) == len(docs)
 | 
					    assert len(vectors) == len(docs)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -103,11 +103,22 @@ and residual connections.
 | 
				
			||||||
| `depth`       | The number of convolutional layers. Recommended value is `4`. ~~int~~                                                                                                                                          |
 | 
					| `depth`       | The number of convolutional layers. Recommended value is `4`. ~~int~~                                                                                                                                          |
 | 
				
			||||||
| **CREATES**   | The model using the architecture. ~~Model[Floats2d, Floats2d]~~                                                                                                                                                |
 | 
					| **CREATES**   | The model using the architecture. ~~Model[Floats2d, Floats2d]~~                                                                                                                                                |
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### spacy.TransitionBasedParser.v1 {#TransitionBasedParser_v1}
 | 
					### spacy.HashEmbedCNN.v1 {#HashEmbedCNN_v1}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Identical to
 | 
					Identical to [`spacy.HashEmbedCNN.v2`](/api/architectures#HashEmbedCNN) except
 | 
				
			||||||
[`spacy.TransitionBasedParser.v2`](/api/architectures#TransitionBasedParser)
 | 
					using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are included.
 | 
				
			||||||
except the `use_upper` was set to `True` by default.
 | 
					
 | 
				
			||||||
 | 
					### spacy.MultiHashEmbed.v1 {#MultiHashEmbed_v1}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Identical to [`spacy.MultiHashEmbed.v2`](/api/architectures#MultiHashEmbed)
 | 
				
			||||||
 | 
					except with [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
 | 
				
			||||||
 | 
					included.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### spacy.CharacterEmbed.v1 {#CharacterEmbed_v1}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Identical to [`spacy.CharacterEmbed.v2`](/api/architectures#CharacterEmbed)
 | 
				
			||||||
 | 
					except using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
 | 
				
			||||||
 | 
					included.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### spacy.TextCatEnsemble.v1 {#TextCatEnsemble_v1}
 | 
					### spacy.TextCatEnsemble.v1 {#TextCatEnsemble_v1}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -147,41 +158,6 @@ network has an internal CNN Tok2Vec layer and uses attention.
 | 
				
			||||||
| `nO`                 | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
 | 
					| `nO`                 | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
 | 
				
			||||||
| **CREATES**          | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
 | 
					| **CREATES**          | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### spacy.HashEmbedCNN.v1 {#HashEmbedCNN_v1}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Identical to [`spacy.HashEmbedCNN.v2`](/api/architectures#HashEmbedCNN) except
 | 
					 | 
				
			||||||
using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are included.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### spacy.MultiHashEmbed.v1 {#MultiHashEmbed_v1}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Identical to [`spacy.MultiHashEmbed.v2`](/api/architectures#MultiHashEmbed)
 | 
					 | 
				
			||||||
except with [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
 | 
					 | 
				
			||||||
included.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### spacy.CharacterEmbed.v1 {#CharacterEmbed_v1}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Identical to [`spacy.CharacterEmbed.v2`](/api/architectures#CharacterEmbed)
 | 
					 | 
				
			||||||
except using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
 | 
					 | 
				
			||||||
included.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Layers {#layers}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
These functions are available from `@spacy.registry.layers`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### spacy.StaticVectors.v1 {#StaticVectors_v1}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Identical to [`spacy.StaticVectors.v2`](/api/architectures#StaticVectors) except
 | 
					 | 
				
			||||||
for the handling of tokens without vectors.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
<Infobox title="Bugs for tokens without vectors" variant="warning">
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
`spacy.StaticVectors.v1` maps tokens without vectors to the final row in the
 | 
					 | 
				
			||||||
vectors table, which causes the model predictions to change if new vectors are
 | 
					 | 
				
			||||||
added to an existing vectors table. See more details in
 | 
					 | 
				
			||||||
[issue #7662](https://github.com/explosion/spaCy/issues/7662#issuecomment-813925655).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
</Infobox>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### spacy.TextCatCNN.v1 {#TextCatCNN_v1}
 | 
					### spacy.TextCatCNN.v1 {#TextCatCNN_v1}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Since `spacy.TextCatCNN.v2`, this architecture has become resizable, which means
 | 
					Since `spacy.TextCatCNN.v2`, this architecture has become resizable, which means
 | 
				
			||||||
| 
						 | 
					@ -246,8 +222,35 @@ the others, but may not be as accurate, especially if texts are short.
 | 
				
			||||||
| `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
 | 
					| `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
 | 
				
			||||||
| **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
 | 
					| **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### spacy.TransitionBasedParser.v1 {#TransitionBasedParser_v1}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Identical to
 | 
				
			||||||
 | 
					[`spacy.TransitionBasedParser.v2`](/api/architectures#TransitionBasedParser)
 | 
				
			||||||
 | 
					except the `use_upper` was set to `True` by default.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Layers {#layers}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					These functions are available from `@spacy.registry.layers`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### spacy.StaticVectors.v1 {#StaticVectors_v1}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Identical to [`spacy.StaticVectors.v2`](/api/architectures#StaticVectors) except
 | 
				
			||||||
 | 
					for the handling of tokens without vectors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<Infobox title="Bugs for tokens without vectors" variant="warning">
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					`spacy.StaticVectors.v1` maps tokens without vectors to the final row in the
 | 
				
			||||||
 | 
					vectors table, which causes the model predictions to change if new vectors are
 | 
				
			||||||
 | 
					added to an existing vectors table. See more details in
 | 
				
			||||||
 | 
					[issue #7662](https://github.com/explosion/spaCy/issues/7662#issuecomment-813925655).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					</Infobox>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Loggers {#loggers}
 | 
					## Loggers {#loggers}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Logging utilities for spaCy are implemented in the [`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the functions are typically available from `@spacy.registry.loggers`.
 | 
					Logging utilities for spaCy are implemented in the
 | 
				
			||||||
 | 
					[`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the
 | 
				
			||||||
 | 
					functions are typically available from `@spacy.registry.loggers`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
More documentation can be found in that repo's [readme](https://github.com/explosion/spacy-loggers/blob/main/README.md) file.
 | 
					More documentation can be found in that repo's
 | 
				
			||||||
 | 
					[readme](https://github.com/explosion/spacy-loggers/blob/main/README.md) file.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user