mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 02:36:32 +03:00
Add test for old architectures (#10751)
* add v1 and v2 tests for tok2vec architectures * textcat architectures are not "layers" * test older textcat architectures * test older parser architecture
This commit is contained in:
parent
733114bdd9
commit
1543558d08
|
@ -12,6 +12,7 @@ from spacy.vocab import Vocab
|
||||||
from ...pipeline import DependencyParser
|
from ...pipeline import DependencyParser
|
||||||
from ...pipeline.dep_parser import DEFAULT_PARSER_MODEL
|
from ...pipeline.dep_parser import DEFAULT_PARSER_MODEL
|
||||||
from ..util import apply_transition_sequence, make_tempdir
|
from ..util import apply_transition_sequence, make_tempdir
|
||||||
|
from ...pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
|
||||||
|
|
||||||
TRAIN_DATA = [
|
TRAIN_DATA = [
|
||||||
(
|
(
|
||||||
|
@ -395,6 +396,34 @@ def test_overfitting_IO(pipe_name):
|
||||||
assert_equal(batch_deps_1, no_batch_deps)
|
assert_equal(batch_deps_1, no_batch_deps)
|
||||||
|
|
||||||
|
|
||||||
|
# fmt: off
|
||||||
|
@pytest.mark.slow
|
||||||
|
@pytest.mark.parametrize("pipe_name", ["parser", "beam_parser"])
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"parser_config",
|
||||||
|
[
|
||||||
|
# TransitionBasedParser V1
|
||||||
|
({"@architectures": "spacy.TransitionBasedParser.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "state_type": "parser", "extra_state_tokens": False, "hidden_width": 64, "maxout_pieces": 2, "use_upper": True}),
|
||||||
|
# TransitionBasedParser V2
|
||||||
|
({"@architectures": "spacy.TransitionBasedParser.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "state_type": "parser", "extra_state_tokens": False, "hidden_width": 64, "maxout_pieces": 2, "use_upper": True}),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
# fmt: on
|
||||||
|
def test_parser_configs(pipe_name, parser_config):
|
||||||
|
pipe_config = {"model": parser_config}
|
||||||
|
nlp = English()
|
||||||
|
parser = nlp.add_pipe(pipe_name, config=pipe_config)
|
||||||
|
train_examples = []
|
||||||
|
for text, annotations in TRAIN_DATA:
|
||||||
|
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
|
||||||
|
for dep in annotations.get("deps", []):
|
||||||
|
parser.add_label(dep)
|
||||||
|
optimizer = nlp.initialize()
|
||||||
|
for i in range(5):
|
||||||
|
losses = {}
|
||||||
|
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||||
|
|
||||||
|
|
||||||
def test_beam_parser_scores():
|
def test_beam_parser_scores():
|
||||||
# Test that we can get confidence values out of the beam_parser pipe
|
# Test that we can get confidence values out of the beam_parser pipe
|
||||||
beam_width = 16
|
beam_width = 16
|
||||||
|
|
|
@ -382,6 +382,7 @@ def test_implicit_label(name, get_examples):
|
||||||
|
|
||||||
|
|
||||||
# fmt: off
|
# fmt: off
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"name,textcat_config",
|
"name,textcat_config",
|
||||||
[
|
[
|
||||||
|
@ -390,7 +391,10 @@ def test_implicit_label(name, get_examples):
|
||||||
("textcat", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
|
("textcat", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
|
||||||
("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
|
("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
|
||||||
("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
|
("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
|
||||||
# ENSEMBLE
|
# ENSEMBLE V1
|
||||||
|
("textcat", {"@architectures": "spacy.TextCatEnsemble.v1", "exclusive_classes": False, "pretrained_vectors": None, "width": 64, "embed_size": 2000, "conv_depth": 2, "window_size": 1, "ngram_size": 1, "dropout": None}),
|
||||||
|
("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v1", "exclusive_classes": False, "pretrained_vectors": None, "width": 64, "embed_size": 2000, "conv_depth": 2, "window_size": 1, "ngram_size": 1, "dropout": None}),
|
||||||
|
# ENSEMBLE V2
|
||||||
("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}}),
|
("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": False, "ngram_size": 3}}),
|
||||||
("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}}),
|
("textcat", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}}),
|
||||||
("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}}),
|
("textcat_multilabel", {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}}),
|
||||||
|
@ -643,15 +647,28 @@ def test_overfitting_IO_multi():
|
||||||
|
|
||||||
|
|
||||||
# fmt: off
|
# fmt: off
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"name,train_data,textcat_config",
|
"name,train_data,textcat_config",
|
||||||
[
|
[
|
||||||
|
# BOW V1
|
||||||
|
("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
|
||||||
|
("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v1", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
|
||||||
|
# ENSEMBLE V1
|
||||||
|
("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v1", "exclusive_classes": False, "pretrained_vectors": None, "width": 64, "embed_size": 2000, "conv_depth": 2, "window_size": 1, "ngram_size": 1, "dropout": None}),
|
||||||
|
("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v1", "exclusive_classes": False, "pretrained_vectors": None, "width": 64, "embed_size": 2000, "conv_depth": 2, "window_size": 1, "ngram_size": 1, "dropout": None}),
|
||||||
|
# CNN V1
|
||||||
|
("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
|
||||||
|
("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
|
||||||
|
# BOW V2
|
||||||
("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
|
("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}),
|
||||||
("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
|
("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 4, "no_output_layer": False}),
|
||||||
("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 3, "no_output_layer": True}),
|
("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 3, "no_output_layer": True}),
|
||||||
("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 2, "no_output_layer": True}),
|
("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 2, "no_output_layer": True}),
|
||||||
|
# ENSEMBLE V2
|
||||||
("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
|
("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
|
||||||
("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
|
("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v2", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
|
||||||
|
# CNN V2
|
||||||
("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
|
("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
|
||||||
("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
|
("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
|
||||||
],
|
],
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
import pytest
|
import pytest
|
||||||
from spacy.ml.models.tok2vec import build_Tok2Vec_model
|
from spacy.ml.models.tok2vec import build_Tok2Vec_model
|
||||||
from spacy.ml.models.tok2vec import MultiHashEmbed, CharacterEmbed
|
from spacy.ml.models.tok2vec import MultiHashEmbed, MaxoutWindowEncoder
|
||||||
from spacy.ml.models.tok2vec import MishWindowEncoder, MaxoutWindowEncoder
|
|
||||||
from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener
|
from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
from spacy.training import Example
|
from spacy.training import Example
|
||||||
from spacy import util
|
from spacy import util
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
|
from spacy.util import registry
|
||||||
from thinc.api import Config, get_current_ops
|
from thinc.api import Config, get_current_ops
|
||||||
from numpy.testing import assert_array_equal
|
from numpy.testing import assert_array_equal
|
||||||
|
|
||||||
|
@ -55,24 +55,41 @@ def test_tok2vec_batch_sizes(batch_size, width, embed_size):
|
||||||
assert doc_vec.shape == (len(doc), width)
|
assert doc_vec.shape == (len(doc), width)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
@pytest.mark.parametrize("width", [8])
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"width,embed_arch,embed_config,encode_arch,encode_config",
|
"embed_arch,embed_config",
|
||||||
# fmt: off
|
# fmt: off
|
||||||
[
|
[
|
||||||
(8, MultiHashEmbed, {"rows": [100, 100], "attrs": ["SHAPE", "LOWER"], "include_static_vectors": False}, MaxoutWindowEncoder, {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
|
("spacy.MultiHashEmbed.v1", {"rows": [100, 100], "attrs": ["SHAPE", "LOWER"], "include_static_vectors": False}),
|
||||||
(8, MultiHashEmbed, {"rows": [100, 20], "attrs": ["ORTH", "PREFIX"], "include_static_vectors": False}, MishWindowEncoder, {"window_size": 1, "depth": 6}),
|
("spacy.MultiHashEmbed.v1", {"rows": [100, 20], "attrs": ["ORTH", "PREFIX"], "include_static_vectors": False}),
|
||||||
(8, CharacterEmbed, {"rows": 100, "nM": 64, "nC": 8, "include_static_vectors": False}, MaxoutWindowEncoder, {"window_size": 1, "maxout_pieces": 3, "depth": 3}),
|
("spacy.CharacterEmbed.v1", {"rows": 100, "nM": 64, "nC": 8, "include_static_vectors": False}),
|
||||||
(8, CharacterEmbed, {"rows": 100, "nM": 16, "nC": 2, "include_static_vectors": False}, MishWindowEncoder, {"window_size": 1, "depth": 3}),
|
("spacy.CharacterEmbed.v1", {"rows": 100, "nM": 16, "nC": 2, "include_static_vectors": False}),
|
||||||
],
|
],
|
||||||
# fmt: on
|
# fmt: on
|
||||||
)
|
)
|
||||||
def test_tok2vec_configs(width, embed_arch, embed_config, encode_arch, encode_config):
|
@pytest.mark.parametrize(
|
||||||
|
"tok2vec_arch,encode_arch,encode_config",
|
||||||
|
# fmt: off
|
||||||
|
[
|
||||||
|
("spacy.Tok2Vec.v1", "spacy.MaxoutWindowEncoder.v1", {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
|
||||||
|
("spacy.Tok2Vec.v2", "spacy.MaxoutWindowEncoder.v2", {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
|
||||||
|
("spacy.Tok2Vec.v1", "spacy.MishWindowEncoder.v1", {"window_size": 1, "depth": 6}),
|
||||||
|
("spacy.Tok2Vec.v2", "spacy.MishWindowEncoder.v2", {"window_size": 1, "depth": 6}),
|
||||||
|
],
|
||||||
|
# fmt: on
|
||||||
|
)
|
||||||
|
def test_tok2vec_configs(
|
||||||
|
width, tok2vec_arch, embed_arch, embed_config, encode_arch, encode_config
|
||||||
|
):
|
||||||
|
embed = registry.get("architectures", embed_arch)
|
||||||
|
encode = registry.get("architectures", encode_arch)
|
||||||
|
tok2vec_model = registry.get("architectures", tok2vec_arch)
|
||||||
|
|
||||||
embed_config["width"] = width
|
embed_config["width"] = width
|
||||||
encode_config["width"] = width
|
encode_config["width"] = width
|
||||||
docs = get_batch(3)
|
docs = get_batch(3)
|
||||||
tok2vec = build_Tok2Vec_model(
|
tok2vec = tok2vec_model(embed(**embed_config), encode(**encode_config))
|
||||||
embed_arch(**embed_config), encode_arch(**encode_config)
|
|
||||||
)
|
|
||||||
tok2vec.initialize(docs)
|
tok2vec.initialize(docs)
|
||||||
vectors, backprop = tok2vec.begin_update(docs)
|
vectors, backprop = tok2vec.begin_update(docs)
|
||||||
assert len(vectors) == len(docs)
|
assert len(vectors) == len(docs)
|
||||||
|
|
|
@ -103,11 +103,22 @@ and residual connections.
|
||||||
| `depth` | The number of convolutional layers. Recommended value is `4`. ~~int~~ |
|
| `depth` | The number of convolutional layers. Recommended value is `4`. ~~int~~ |
|
||||||
| **CREATES** | The model using the architecture. ~~Model[Floats2d, Floats2d]~~ |
|
| **CREATES** | The model using the architecture. ~~Model[Floats2d, Floats2d]~~ |
|
||||||
|
|
||||||
### spacy.TransitionBasedParser.v1 {#TransitionBasedParser_v1}
|
### spacy.HashEmbedCNN.v1 {#HashEmbedCNN_v1}
|
||||||
|
|
||||||
Identical to
|
Identical to [`spacy.HashEmbedCNN.v2`](/api/architectures#HashEmbedCNN) except
|
||||||
[`spacy.TransitionBasedParser.v2`](/api/architectures#TransitionBasedParser)
|
using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are included.
|
||||||
except the `use_upper` was set to `True` by default.
|
|
||||||
|
### spacy.MultiHashEmbed.v1 {#MultiHashEmbed_v1}
|
||||||
|
|
||||||
|
Identical to [`spacy.MultiHashEmbed.v2`](/api/architectures#MultiHashEmbed)
|
||||||
|
except with [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
|
||||||
|
included.
|
||||||
|
|
||||||
|
### spacy.CharacterEmbed.v1 {#CharacterEmbed_v1}
|
||||||
|
|
||||||
|
Identical to [`spacy.CharacterEmbed.v2`](/api/architectures#CharacterEmbed)
|
||||||
|
except using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
|
||||||
|
included.
|
||||||
|
|
||||||
### spacy.TextCatEnsemble.v1 {#TextCatEnsemble_v1}
|
### spacy.TextCatEnsemble.v1 {#TextCatEnsemble_v1}
|
||||||
|
|
||||||
|
@ -147,41 +158,6 @@ network has an internal CNN Tok2Vec layer and uses attention.
|
||||||
| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
|
| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
|
||||||
| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ |
|
| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ |
|
||||||
|
|
||||||
### spacy.HashEmbedCNN.v1 {#HashEmbedCNN_v1}
|
|
||||||
|
|
||||||
Identical to [`spacy.HashEmbedCNN.v2`](/api/architectures#HashEmbedCNN) except
|
|
||||||
using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are included.
|
|
||||||
|
|
||||||
### spacy.MultiHashEmbed.v1 {#MultiHashEmbed_v1}
|
|
||||||
|
|
||||||
Identical to [`spacy.MultiHashEmbed.v2`](/api/architectures#MultiHashEmbed)
|
|
||||||
except with [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
|
|
||||||
included.
|
|
||||||
|
|
||||||
### spacy.CharacterEmbed.v1 {#CharacterEmbed_v1}
|
|
||||||
|
|
||||||
Identical to [`spacy.CharacterEmbed.v2`](/api/architectures#CharacterEmbed)
|
|
||||||
except using [`spacy.StaticVectors.v1`](#StaticVectors_v1) if vectors are
|
|
||||||
included.
|
|
||||||
|
|
||||||
## Layers {#layers}
|
|
||||||
|
|
||||||
These functions are available from `@spacy.registry.layers`.
|
|
||||||
|
|
||||||
### spacy.StaticVectors.v1 {#StaticVectors_v1}
|
|
||||||
|
|
||||||
Identical to [`spacy.StaticVectors.v2`](/api/architectures#StaticVectors) except
|
|
||||||
for the handling of tokens without vectors.
|
|
||||||
|
|
||||||
<Infobox title="Bugs for tokens without vectors" variant="warning">
|
|
||||||
|
|
||||||
`spacy.StaticVectors.v1` maps tokens without vectors to the final row in the
|
|
||||||
vectors table, which causes the model predictions to change if new vectors are
|
|
||||||
added to an existing vectors table. See more details in
|
|
||||||
[issue #7662](https://github.com/explosion/spaCy/issues/7662#issuecomment-813925655).
|
|
||||||
|
|
||||||
</Infobox>
|
|
||||||
|
|
||||||
### spacy.TextCatCNN.v1 {#TextCatCNN_v1}
|
### spacy.TextCatCNN.v1 {#TextCatCNN_v1}
|
||||||
|
|
||||||
Since `spacy.TextCatCNN.v2`, this architecture has become resizable, which means
|
Since `spacy.TextCatCNN.v2`, this architecture has become resizable, which means
|
||||||
|
@ -246,8 +222,35 @@ the others, but may not be as accurate, especially if texts are short.
|
||||||
| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
|
| `nO` | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
|
||||||
| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ |
|
| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ |
|
||||||
|
|
||||||
|
### spacy.TransitionBasedParser.v1 {#TransitionBasedParser_v1}
|
||||||
|
|
||||||
|
Identical to
|
||||||
|
[`spacy.TransitionBasedParser.v2`](/api/architectures#TransitionBasedParser)
|
||||||
|
except the `use_upper` was set to `True` by default.
|
||||||
|
|
||||||
|
## Layers {#layers}
|
||||||
|
|
||||||
|
These functions are available from `@spacy.registry.layers`.
|
||||||
|
|
||||||
|
### spacy.StaticVectors.v1 {#StaticVectors_v1}
|
||||||
|
|
||||||
|
Identical to [`spacy.StaticVectors.v2`](/api/architectures#StaticVectors) except
|
||||||
|
for the handling of tokens without vectors.
|
||||||
|
|
||||||
|
<Infobox title="Bugs for tokens without vectors" variant="warning">
|
||||||
|
|
||||||
|
`spacy.StaticVectors.v1` maps tokens without vectors to the final row in the
|
||||||
|
vectors table, which causes the model predictions to change if new vectors are
|
||||||
|
added to an existing vectors table. See more details in
|
||||||
|
[issue #7662](https://github.com/explosion/spaCy/issues/7662#issuecomment-813925655).
|
||||||
|
|
||||||
|
</Infobox>
|
||||||
|
|
||||||
## Loggers {#loggers}
|
## Loggers {#loggers}
|
||||||
|
|
||||||
Logging utilities for spaCy are implemented in the [`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the functions are typically available from `@spacy.registry.loggers`.
|
Logging utilities for spaCy are implemented in the
|
||||||
|
[`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the
|
||||||
|
functions are typically available from `@spacy.registry.loggers`.
|
||||||
|
|
||||||
More documentation can be found in that repo's [readme](https://github.com/explosion/spacy-loggers/blob/main/README.md) file.
|
More documentation can be found in that repo's
|
||||||
|
[readme](https://github.com/explosion/spacy-loggers/blob/main/README.md) file.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user