mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-07 21:33:13 +03:00
Merge pull request #6127 from explosion/feature/literal-nr_feature_tokens
This commit is contained in:
commit
916050bf2f
|
@ -20,6 +20,7 @@ pytokenizations
|
||||||
setuptools
|
setuptools
|
||||||
packaging
|
packaging
|
||||||
importlib_metadata>=0.20; python_version < "3.8"
|
importlib_metadata>=0.20; python_version < "3.8"
|
||||||
|
typing_extensions>=3.7.4; python_version < "3.8"
|
||||||
# Development dependencies
|
# Development dependencies
|
||||||
cython>=0.25
|
cython>=0.25
|
||||||
pytest>=4.6.5
|
pytest>=4.6.5
|
||||||
|
|
|
@ -57,6 +57,7 @@ install_requires =
|
||||||
setuptools
|
setuptools
|
||||||
packaging
|
packaging
|
||||||
importlib_metadata>=0.20; python_version < "3.8"
|
importlib_metadata>=0.20; python_version < "3.8"
|
||||||
|
typing_extensions>=3.7.4; python_version < "3.8"
|
||||||
|
|
||||||
[options.entry_points]
|
[options.entry_points]
|
||||||
console_scripts =
|
console_scripts =
|
||||||
|
|
|
@ -22,6 +22,11 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
cupy = None
|
cupy = None
|
||||||
|
|
||||||
|
try: # Python 3.8+
|
||||||
|
from typing import Literal
|
||||||
|
except ImportError:
|
||||||
|
from typing_extensions import Literal # noqa: F401
|
||||||
|
|
||||||
from thinc.api import Optimizer # noqa: F401
|
from thinc.api import Optimizer # noqa: F401
|
||||||
|
|
||||||
pickle = pickle
|
pickle = pickle
|
||||||
|
|
|
@ -2,6 +2,7 @@ from typing import Optional, List
|
||||||
from thinc.api import Model, chain, list2array, Linear, zero_init, use_ops
|
from thinc.api import Model, chain, list2array, Linear, zero_init, use_ops
|
||||||
from thinc.types import Floats2d
|
from thinc.types import Floats2d
|
||||||
|
|
||||||
|
from ...compat import Literal
|
||||||
from ...util import registry
|
from ...util import registry
|
||||||
from .._precomputable_affine import PrecomputableAffine
|
from .._precomputable_affine import PrecomputableAffine
|
||||||
from ..tb_framework import TransitionModel
|
from ..tb_framework import TransitionModel
|
||||||
|
@ -11,7 +12,7 @@ from ...tokens import Doc
|
||||||
@registry.architectures.register("spacy.TransitionBasedParser.v1")
|
@registry.architectures.register("spacy.TransitionBasedParser.v1")
|
||||||
def build_tb_parser_model(
|
def build_tb_parser_model(
|
||||||
tok2vec: Model[List[Doc], List[Floats2d]],
|
tok2vec: Model[List[Doc], List[Floats2d]],
|
||||||
nr_feature_tokens: int,
|
nr_feature_tokens: Literal[3, 6, 8, 13],
|
||||||
hidden_width: int,
|
hidden_width: int,
|
||||||
maxout_pieces: int,
|
maxout_pieces: int,
|
||||||
use_upper: bool = True,
|
use_upper: bool = True,
|
||||||
|
@ -41,8 +42,8 @@ def build_tb_parser_model(
|
||||||
tok2vec (Model[List[Doc], List[Floats2d]]):
|
tok2vec (Model[List[Doc], List[Floats2d]]):
|
||||||
Subnetwork to map tokens into vector representations.
|
Subnetwork to map tokens into vector representations.
|
||||||
nr_feature_tokens (int): The number of tokens in the context to use to
|
nr_feature_tokens (int): The number of tokens in the context to use to
|
||||||
construct the state vector. Valid choices are 1, 2, 3, 6, 8 and 13. The
|
construct the state vector. Valid choices are 3, 6, 8 and 13. The
|
||||||
2, 8 and 13 feature sets are designed for the parser, while the 3 and 6
|
8 and 13 feature sets are designed for the parser, while the 3 and 6
|
||||||
feature sets are designed for the NER. The recommended feature sets are
|
feature sets are designed for the NER. The recommended feature sets are
|
||||||
3 for NER, and 8 for the dependency parser.
|
3 for NER, and 8 for the dependency parser.
|
||||||
|
|
||||||
|
|
|
@ -67,7 +67,7 @@ width = ${components.tok2vec.model.width}
|
||||||
parser_config_string = """
|
parser_config_string = """
|
||||||
[model]
|
[model]
|
||||||
@architectures = "spacy.TransitionBasedParser.v1"
|
@architectures = "spacy.TransitionBasedParser.v1"
|
||||||
nr_feature_tokens = 99
|
nr_feature_tokens = 3
|
||||||
hidden_width = 66
|
hidden_width = 66
|
||||||
maxout_pieces = 2
|
maxout_pieces = 2
|
||||||
|
|
||||||
|
@ -95,7 +95,7 @@ def my_parser():
|
||||||
MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2),
|
MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2),
|
||||||
)
|
)
|
||||||
parser = build_tb_parser_model(
|
parser = build_tb_parser_model(
|
||||||
tok2vec=tok2vec, nr_feature_tokens=7, hidden_width=65, maxout_pieces=5
|
tok2vec=tok2vec, nr_feature_tokens=8, hidden_width=65, maxout_pieces=5
|
||||||
)
|
)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
@ -340,3 +340,13 @@ def test_config_auto_fill_extra_fields():
|
||||||
assert "extra" not in nlp.config["training"]
|
assert "extra" not in nlp.config["training"]
|
||||||
# Make sure the config generated is valid
|
# Make sure the config generated is valid
|
||||||
load_model_from_config(nlp.config)
|
load_model_from_config(nlp.config)
|
||||||
|
|
||||||
|
|
||||||
|
def test_config_validate_literal():
|
||||||
|
nlp = English()
|
||||||
|
config = Config().from_str(parser_config_string)
|
||||||
|
config["model"]["nr_feature_tokens"] = 666
|
||||||
|
with pytest.raises(ConfigValidationError):
|
||||||
|
nlp.add_pipe("parser", config=config)
|
||||||
|
config["model"]["nr_feature_tokens"] = 13
|
||||||
|
nlp.add_pipe("parser", config=config)
|
||||||
|
|
|
@ -449,7 +449,7 @@ consists of either two or three subnetworks:
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
|
| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
| `nr_feature_tokens` | The number of tokens in the context to use to construct the state vector. Valid choices are `1`, `2`, `3`, `6`, `8` and `13`. The `2`, `8` and `13` feature sets are designed for the parser, while the `3` and `6` feature sets are designed for the entity recognizer. The recommended feature sets are `3` for NER, and `8` for the dependency parser. ~~int~~ |
|
| `nr_feature_tokens` | The number of tokens in the context to use to construct the state vector. Valid choices are `3`, `6`, `8` and `13`. The `8` and `13` feature sets are designed for the parser, while the `3` and `6` feature sets are designed for the entity recognizer. The recommended feature sets are `3` for NER, and `8` for the dependency parser. ~~int~~ |
|
||||||
| `hidden_width` | The width of the hidden layer. ~~int~~ |
|
| `hidden_width` | The width of the hidden layer. ~~int~~ |
|
||||||
| `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~ |
|
| `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~ |
|
||||||
| `use_upper` | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ |
|
| `use_upper` | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ |
|
||||||
|
|
Loading…
Reference in New Issue
Block a user