Merge pull request #6127 from explosion/feature/literal-nr_feature_tokens

2025-07-21 21:49:49 +03:00 · 2020-09-23 16:56:08 +02:00 · 2020-09-23 16:56:08 +02:00 · 916050bf2f
commit 916050bf2f
parent 3c3863654e 50a4425cda
6 changed files with 24 additions and 6 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -20,6 +20,7 @@ pytokenizations
 setuptools
 packaging
 importlib_metadata>=0.20; python_version < "3.8"
 typing_extensions>=3.7.4; python_version < "3.8"
 # Development dependencies
 cython>=0.25
 pytest>=4.6.5
--- a/setup.cfg
+++ b/setup.cfg
@ -57,6 +57,7 @@ install_requires =
    setuptools
    packaging
    importlib_metadata>=0.20; python_version < "3.8"
    typing_extensions>=3.7.4; python_version < "3.8"
 [options.entry_points]
 console_scripts =
--- a/spacy/compat.py
+++ b/spacy/compat.py
@ -22,6 +22,11 @@ try:
 except ImportError:
    cupy = None
 try:  # Python 3.8+
    from typing import Literal
 except ImportError:
    from typing_extensions import Literal  # noqa: F401
 from thinc.api import Optimizer  # noqa: F401
 pickle = pickle
--- a/spacy/ml/models/parser.py
+++ b/spacy/ml/models/parser.py
@ -2,6 +2,7 @@ from typing import Optional, List
 from thinc.api import Model, chain, list2array, Linear, zero_init, use_ops
 from thinc.types import Floats2d
 from ...compat import Literal
 from ...util import registry
 from .._precomputable_affine import PrecomputableAffine
 from ..tb_framework import TransitionModel
@ -11,7 +12,7 @@ from ...tokens import Doc
@registry.architectures.register("spacy.TransitionBasedParser.v1")
 def build_tb_parser_model(
    tok2vec: Model[List[Doc], List[Floats2d]],
-    nr_feature_tokens: int,
+    nr_feature_tokens: Literal[3, 6, 8, 13],
    hidden_width: int,
    maxout_pieces: int,
    use_upper: bool = True,
@ -41,8 +42,8 @@ def build_tb_parser_model(
    tok2vec (Model[List[Doc], List[Floats2d]]):
        Subnetwork to map tokens into vector representations.
    nr_feature_tokens (int): The number of tokens in the context to use to
-        construct the state vector. Valid choices are 1, 2, 3, 6, 8 and 13. The
+        construct the state vector. Valid choices are 3, 6, 8 and 13. The
-        2, 8 and 13 feature sets are designed for the parser, while the 3 and 6
+        8 and 13 feature sets are designed for the parser, while the 3 and 6
        feature sets are designed for the NER. The recommended feature sets are
        3 for NER, and 8 for the dependency parser.
--- a/spacy/tests/serialize/test_serialize_config.py
+++ b/spacy/tests/serialize/test_serialize_config.py
@ -67,7 +67,7 @@ width = ${components.tok2vec.model.width}
 parser_config_string = """
 [model]
@architectures = "spacy.TransitionBasedParser.v1"
-nr_feature_tokens = 99
+nr_feature_tokens = 3
 hidden_width = 66
 maxout_pieces = 2
@ -95,7 +95,7 @@ def my_parser():
        MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2),
    )
    parser = build_tb_parser_model(
-        tok2vec=tok2vec, nr_feature_tokens=7, hidden_width=65, maxout_pieces=5
+        tok2vec=tok2vec, nr_feature_tokens=8, hidden_width=65, maxout_pieces=5
    )
    return parser
@ -340,3 +340,13 @@ def test_config_auto_fill_extra_fields():
    assert "extra" not in nlp.config["training"]
    # Make sure the config generated is valid
    load_model_from_config(nlp.config)
 def test_config_validate_literal():
    nlp = English()
    config = Config().from_str(parser_config_string)
    config["model"]["nr_feature_tokens"] = 666
    with pytest.raises(ConfigValidationError):
        nlp.add_pipe("parser", config=config)
    config["model"]["nr_feature_tokens"] = 13
    nlp.add_pipe("parser", config=config)
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@ -449,7 +449,7 @@ consists of either two or three subnetworks:
 | Name                | Description                                                                                                                                                                                                                                                                                                                                                             |
 | ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `tok2vec`           | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                                                                                                                              |
-| `nr_feature_tokens` | The number of tokens in the context to use to construct the state vector. Valid choices are `1`, `2`, `3`, `6`, `8` and `13`. The `2`, `8` and `13` feature sets are designed for the parser, while the `3` and `6` feature sets are designed for the entity recognizer. The recommended feature sets are `3` for NER, and `8` for the dependency parser. ~~int~~       |
+| `nr_feature_tokens` | The number of tokens in the context to use to construct the state vector. Valid choices are `3`, `6`, `8` and `13`. The `8` and `13` feature sets are designed for the parser, while the `3` and `6` feature sets are designed for the entity recognizer. The recommended feature sets are `3` for NER, and `8` for the dependency parser. ~~int~~                      |
 | `hidden_width`      | The width of the hidden layer. ~~int~~                                                                                                                                                                                                                                                                                                                                  |
 | `maxout_pieces`     | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~                                                                                      |
 | `use_upper`         | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ |