mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Merge pull request #6128 from svlandeg/fix/nr_features
This commit is contained in:
commit
cea9431a04
|
@ -59,7 +59,8 @@ factory = "parser"
|
|||
|
||||
[components.parser.model]
|
||||
@architectures = "spacy.TransitionBasedParser.v1"
|
||||
nr_feature_tokens = 8
|
||||
state_type = "parser"
|
||||
extra_state_tokens = false
|
||||
hidden_width = 128
|
||||
maxout_pieces = 3
|
||||
use_upper = false
|
||||
|
@ -79,7 +80,8 @@ factory = "ner"
|
|||
|
||||
[components.ner.model]
|
||||
@architectures = "spacy.TransitionBasedParser.v1"
|
||||
nr_feature_tokens = 3
|
||||
state_type = "ner"
|
||||
extra_state_tokens = false
|
||||
hidden_width = 64
|
||||
maxout_pieces = 2
|
||||
use_upper = false
|
||||
|
@ -183,7 +185,8 @@ factory = "parser"
|
|||
|
||||
[components.parser.model]
|
||||
@architectures = "spacy.TransitionBasedParser.v1"
|
||||
nr_feature_tokens = 8
|
||||
state_type = "parser"
|
||||
extra_state_tokens = false
|
||||
hidden_width = 128
|
||||
maxout_pieces = 3
|
||||
use_upper = true
|
||||
|
@ -200,7 +203,8 @@ factory = "ner"
|
|||
|
||||
[components.ner.model]
|
||||
@architectures = "spacy.TransitionBasedParser.v1"
|
||||
nr_feature_tokens = 6
|
||||
state_type = "ner"
|
||||
extra_state_tokens = false
|
||||
hidden_width = 64
|
||||
maxout_pieces = 2
|
||||
use_upper = true
|
||||
|
|
|
@ -480,6 +480,8 @@ class Errors:
|
|||
E201 = ("Span index out of range.")
|
||||
|
||||
# TODO: fix numbering after merging develop into master
|
||||
E917 = ("Received invalid value {value} for 'state_type' in "
|
||||
"TransitionBasedParser: only 'parser' or 'ner' are valid options.")
|
||||
E918 = ("Received invalid value for vocab: {vocab} ({vocab_type}). Valid "
|
||||
"values are an instance of spacy.vocab.Vocab or True to create one"
|
||||
" (default).")
|
||||
|
|
|
@ -2,6 +2,7 @@ from typing import Optional, List
|
|||
from thinc.api import Model, chain, list2array, Linear, zero_init, use_ops
|
||||
from thinc.types import Floats2d
|
||||
|
||||
from ...errors import Errors
|
||||
from ...compat import Literal
|
||||
from ...util import registry
|
||||
from .._precomputable_affine import PrecomputableAffine
|
||||
|
@ -12,7 +13,8 @@ from ...tokens import Doc
|
|||
@registry.architectures.register("spacy.TransitionBasedParser.v1")
|
||||
def build_tb_parser_model(
|
||||
tok2vec: Model[List[Doc], List[Floats2d]],
|
||||
nr_feature_tokens: Literal[3, 6, 8, 13],
|
||||
state_type: Literal["parser", "ner"],
|
||||
extra_state_tokens: bool,
|
||||
hidden_width: int,
|
||||
maxout_pieces: int,
|
||||
use_upper: bool = True,
|
||||
|
@ -41,20 +43,12 @@ def build_tb_parser_model(
|
|||
|
||||
tok2vec (Model[List[Doc], List[Floats2d]]):
|
||||
Subnetwork to map tokens into vector representations.
|
||||
nr_feature_tokens (int): The number of tokens in the context to use to
|
||||
construct the state vector. Valid choices are 3, 6, 8 and 13. The
|
||||
8 and 13 feature sets are designed for the parser, while the 3 and 6
|
||||
feature sets are designed for the NER. The recommended feature sets are
|
||||
3 for NER, and 8 for the dependency parser.
|
||||
|
||||
TODO: This feature should be split into two, state_type: ["deps", "ner"]
|
||||
and extra_state_features: [True, False]. This would map into:
|
||||
|
||||
(deps, False): 8
|
||||
(deps, True): 13
|
||||
(ner, False): 3
|
||||
(ner, True): 6
|
||||
|
||||
state_type (str):
|
||||
String value denoting the type of parser model: "parser" or "ner"
|
||||
extra_state_tokens (bool): Whether or not to use additional tokens in the context
|
||||
to construct the state vector. Defaults to `False`, which means 3 and 8
|
||||
for the NER and parser respectively. When set to `True`, this would become 6
|
||||
feature sets (for the NER) or 13 (for the parser).
|
||||
hidden_width (int): The width of the hidden layer.
|
||||
maxout_pieces (int): How many pieces to use in the state prediction layer.
|
||||
Recommended values are 1, 2 or 3. If 1, the maxout non-linearity
|
||||
|
@ -69,8 +63,14 @@ def build_tb_parser_model(
|
|||
Usually inferred from data at the beginning of training, or loaded from
|
||||
disk.
|
||||
"""
|
||||
if state_type == "parser":
|
||||
nr_feature_tokens = 13 if extra_state_tokens else 8
|
||||
elif state_type == "ner":
|
||||
nr_feature_tokens = 6 if extra_state_tokens else 3
|
||||
else:
|
||||
raise ValueError(Errors.E917.format(value=state_type))
|
||||
t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
|
||||
tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width),)
|
||||
tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width))
|
||||
tok2vec.set_dim("nO", hidden_width)
|
||||
lower = PrecomputableAffine(
|
||||
nO=hidden_width if use_upper else nO,
|
||||
|
|
|
@ -15,7 +15,8 @@ from ..training import validate_examples
|
|||
default_model_config = """
|
||||
[model]
|
||||
@architectures = "spacy.TransitionBasedParser.v1"
|
||||
nr_feature_tokens = 8
|
||||
state_type = "parser"
|
||||
extra_state_tokens = false
|
||||
hidden_width = 64
|
||||
maxout_pieces = 2
|
||||
|
||||
|
|
|
@ -13,7 +13,8 @@ from ..training import validate_examples
|
|||
default_model_config = """
|
||||
[model]
|
||||
@architectures = "spacy.TransitionBasedParser.v1"
|
||||
nr_feature_tokens = 6
|
||||
state_type = "ner"
|
||||
extra_state_tokens = false
|
||||
hidden_width = 64
|
||||
maxout_pieces = 2
|
||||
|
||||
|
|
|
@ -67,7 +67,8 @@ width = ${components.tok2vec.model.width}
|
|||
parser_config_string = """
|
||||
[model]
|
||||
@architectures = "spacy.TransitionBasedParser.v1"
|
||||
nr_feature_tokens = 3
|
||||
state_type = "parser"
|
||||
extra_state_tokens = false
|
||||
hidden_width = 66
|
||||
maxout_pieces = 2
|
||||
|
||||
|
@ -95,7 +96,11 @@ def my_parser():
|
|||
MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2),
|
||||
)
|
||||
parser = build_tb_parser_model(
|
||||
tok2vec=tok2vec, nr_feature_tokens=8, hidden_width=65, maxout_pieces=5
|
||||
tok2vec=tok2vec,
|
||||
state_type="parser",
|
||||
extra_state_tokens=True,
|
||||
hidden_width=65,
|
||||
maxout_pieces=5,
|
||||
)
|
||||
return parser
|
||||
|
||||
|
@ -345,8 +350,8 @@ def test_config_auto_fill_extra_fields():
|
|||
def test_config_validate_literal():
|
||||
nlp = English()
|
||||
config = Config().from_str(parser_config_string)
|
||||
config["model"]["nr_feature_tokens"] = 666
|
||||
config["model"]["state_type"] = "nonsense"
|
||||
with pytest.raises(ConfigValidationError):
|
||||
nlp.add_pipe("parser", config=config)
|
||||
config["model"]["nr_feature_tokens"] = 13
|
||||
config["model"]["state_type"] = "ner"
|
||||
nlp.add_pipe("parser", config=config)
|
||||
|
|
|
@ -414,7 +414,8 @@ one component.
|
|||
> ```ini
|
||||
> [model]
|
||||
> @architectures = "spacy.TransitionBasedParser.v1"
|
||||
> nr_feature_tokens = 6
|
||||
> state_type = "ner"
|
||||
> extra_state_tokens = false
|
||||
> hidden_width = 64
|
||||
> maxout_pieces = 2
|
||||
>
|
||||
|
@ -446,15 +447,16 @@ consists of either two or three subnetworks:
|
|||
state representation. If not present, the output from the lower model is used
|
||||
as action scores directly.
|
||||
|
||||
| Name | Description |
|
||||
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `nr_feature_tokens` | The number of tokens in the context to use to construct the state vector. Valid choices are `3`, `6`, `8` and `13`. The `8` and `13` feature sets are designed for the parser, while the `3` and `6` feature sets are designed for the entity recognizer. The recommended feature sets are `3` for NER, and `8` for the dependency parser. ~~int~~ |
|
||||
| `hidden_width` | The width of the hidden layer. ~~int~~ |
|
||||
| `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~ |
|
||||
| `use_upper` | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ |
|
||||
| `nO` | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~ |
|
||||
| **CREATES** | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~ |
|
||||
| Name | Description |
|
||||
| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `state_type` | Which task to extract features for. Possible values are "ner" and "parser". ~~str~~ |
|
||||
| `extra_state_tokens` | Whether to use an expanded feature set when extracting the state tokens. Slightly slower, but sometimes improves accuracy slightly. Defaults to `False`. ~~bool~~ |
|
||||
| `hidden_width` | The width of the hidden layer. ~~int~~ |
|
||||
| `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~ |
|
||||
| `use_upper` | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ |
|
||||
| `nO` | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~ |
|
||||
| **CREATES** | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~ |
|
||||
|
||||
## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"}
|
||||
|
||||
|
|
|
@ -448,7 +448,8 @@ factory = "ner"
|
|||
|
||||
[nlp.pipeline.ner.model]
|
||||
@architectures = "spacy.TransitionBasedParser.v1"
|
||||
nr_feature_tokens = 3
|
||||
state_type = "ner"
|
||||
extra_state_tokens = false
|
||||
hidden_width = 128
|
||||
maxout_pieces = 3
|
||||
use_upper = false
|
||||
|
|
Loading…
Reference in New Issue
Block a user