From 76bbed3466519d384834715f48f240140c43e02e Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 23 Sep 2020 16:00:03 +0200 Subject: [PATCH 1/2] Use Literal type for nr_feature_tokens --- requirements.txt | 1 + setup.cfg | 1 + spacy/compat.py | 5 +++++ spacy/ml/models/parser.py | 3 ++- spacy/tests/serialize/test_serialize_config.py | 14 ++++++++++++-- 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4d6c1dfd0..a8b237aa1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,7 @@ pytokenizations setuptools packaging importlib_metadata>=0.20; python_version < "3.8" +typing_extensions>=3.7.4; python_version < "3.8" # Development dependencies cython>=0.25 pytest>=4.6.5 diff --git a/setup.cfg b/setup.cfg index dd0975800..9831402d1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,6 +57,7 @@ install_requires = setuptools packaging importlib_metadata>=0.20; python_version < "3.8" + typing_extensions>=3.7.4; python_version < "3.8" [options.entry_points] console_scripts = diff --git a/spacy/compat.py b/spacy/compat.py index 2d51ff0ae..6eca18b80 100644 --- a/spacy/compat.py +++ b/spacy/compat.py @@ -22,6 +22,11 @@ try: except ImportError: cupy = None +try: # Python 3.8+ + from typing import Literal +except ImportError: + from typing_extensions import Literal # noqa: F401 + from thinc.api import Optimizer # noqa: F401 pickle = pickle diff --git a/spacy/ml/models/parser.py b/spacy/ml/models/parser.py index 868f9d6d2..68cc20e9b 100644 --- a/spacy/ml/models/parser.py +++ b/spacy/ml/models/parser.py @@ -2,6 +2,7 @@ from typing import Optional, List from thinc.api import Model, chain, list2array, Linear, zero_init, use_ops from thinc.types import Floats2d +from ...compat import Literal from ...util import registry from .._precomputable_affine import PrecomputableAffine from ..tb_framework import TransitionModel @@ -11,7 +12,7 @@ from ...tokens import Doc @registry.architectures.register("spacy.TransitionBasedParser.v1") def build_tb_parser_model( tok2vec: Model[List[Doc], List[Floats2d]], - nr_feature_tokens: int, + nr_feature_tokens: Literal[3, 6, 8, 13], hidden_width: int, maxout_pieces: int, use_upper: bool = True, diff --git a/spacy/tests/serialize/test_serialize_config.py b/spacy/tests/serialize/test_serialize_config.py index 1e17b3212..5f25cbfe1 100644 --- a/spacy/tests/serialize/test_serialize_config.py +++ b/spacy/tests/serialize/test_serialize_config.py @@ -67,7 +67,7 @@ width = ${components.tok2vec.model.width} parser_config_string = """ [model] @architectures = "spacy.TransitionBasedParser.v1" -nr_feature_tokens = 99 +nr_feature_tokens = 3 hidden_width = 66 maxout_pieces = 2 @@ -95,7 +95,7 @@ def my_parser(): MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2), ) parser = build_tb_parser_model( - tok2vec=tok2vec, nr_feature_tokens=7, hidden_width=65, maxout_pieces=5 + tok2vec=tok2vec, nr_feature_tokens=8, hidden_width=65, maxout_pieces=5 ) return parser @@ -340,3 +340,13 @@ def test_config_auto_fill_extra_fields(): assert "extra" not in nlp.config["training"] # Make sure the config generated is valid load_model_from_config(nlp.config) + + +def test_config_validate_literal(): + nlp = English() + config = Config().from_str(parser_config_string) + config["model"]["nr_feature_tokens"] = 666 + with pytest.raises(ConfigValidationError): + nlp.add_pipe("parser", config=config) + config["model"]["nr_feature_tokens"] = 13 + nlp.add_pipe("parser", config=config) From 50a4425cdaed350653368c9c350f95717e9414d9 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 23 Sep 2020 16:03:32 +0200 Subject: [PATCH 2/2] Adjust docs --- spacy/ml/models/parser.py | 4 ++-- website/docs/api/architectures.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spacy/ml/models/parser.py b/spacy/ml/models/parser.py index 68cc20e9b..5d091c590 100644 --- a/spacy/ml/models/parser.py +++ b/spacy/ml/models/parser.py @@ -42,8 +42,8 @@ def build_tb_parser_model( tok2vec (Model[List[Doc], List[Floats2d]]): Subnetwork to map tokens into vector representations. nr_feature_tokens (int): The number of tokens in the context to use to - construct the state vector. Valid choices are 1, 2, 3, 6, 8 and 13. The - 2, 8 and 13 feature sets are designed for the parser, while the 3 and 6 + construct the state vector. Valid choices are 3, 6, 8 and 13. The + 8 and 13 feature sets are designed for the parser, while the 3 and 6 feature sets are designed for the NER. The recommended feature sets are 3 for NER, and 8 for the dependency parser. diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md index 30d863b17..8797b2f31 100644 --- a/website/docs/api/architectures.md +++ b/website/docs/api/architectures.md @@ -449,7 +449,7 @@ consists of either two or three subnetworks: | Name | Description | | ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ | -| `nr_feature_tokens` | The number of tokens in the context to use to construct the state vector. Valid choices are `1`, `2`, `3`, `6`, `8` and `13`. The `2`, `8` and `13` feature sets are designed for the parser, while the `3` and `6` feature sets are designed for the entity recognizer. The recommended feature sets are `3` for NER, and `8` for the dependency parser. ~~int~~ | +| `nr_feature_tokens` | The number of tokens in the context to use to construct the state vector. Valid choices are `3`, `6`, `8` and `13`. The `8` and `13` feature sets are designed for the parser, while the `3` and `6` feature sets are designed for the entity recognizer. The recommended feature sets are `3` for NER, and `8` for the dependency parser. ~~int~~ | | `hidden_width` | The width of the hidden layer. ~~int~~ | | `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~ | | `use_upper` | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ |