state_type and extra_state_tokens instead of nr_feature_tokens

This commit is contained in:
svlandeg 2020-09-23 13:35:09 +02:00
parent ae5dacf75f
commit 6c85fab316
7 changed files with 48 additions and 35 deletions

View File

@ -59,7 +59,8 @@ factory = "parser"
[components.parser.model] [components.parser.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 8 state_type = "deps"
extra_state_tokens = false
hidden_width = 128 hidden_width = 128
maxout_pieces = 3 maxout_pieces = 3
use_upper = false use_upper = false
@ -79,7 +80,8 @@ factory = "ner"
[components.ner.model] [components.ner.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 3 state_type = "ner"
extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2
use_upper = false use_upper = false
@ -183,7 +185,8 @@ factory = "parser"
[components.parser.model] [components.parser.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 8 state_type = "deps"
extra_state_tokens = false
hidden_width = 128 hidden_width = 128
maxout_pieces = 3 maxout_pieces = 3
use_upper = true use_upper = true
@ -200,7 +203,8 @@ factory = "ner"
[components.ner.model] [components.ner.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 6 state_type = "ner"
extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2
use_upper = true use_upper = true

View File

@ -11,7 +11,8 @@ from ...tokens import Doc
@registry.architectures.register("spacy.TransitionBasedParser.v1") @registry.architectures.register("spacy.TransitionBasedParser.v1")
def build_tb_parser_model( def build_tb_parser_model(
tok2vec: Model[List[Doc], List[Floats2d]], tok2vec: Model[List[Doc], List[Floats2d]],
nr_feature_tokens: int, state_type: str,
extra_state_tokens: bool,
hidden_width: int, hidden_width: int,
maxout_pieces: int, maxout_pieces: int,
use_upper: bool = True, use_upper: bool = True,
@ -40,20 +41,12 @@ def build_tb_parser_model(
tok2vec (Model[List[Doc], List[Floats2d]]): tok2vec (Model[List[Doc], List[Floats2d]]):
Subnetwork to map tokens into vector representations. Subnetwork to map tokens into vector representations.
nr_feature_tokens (int): The number of tokens in the context to use to state_type (str):
construct the state vector. Valid choices are 1, 2, 3, 6, 8 and 13. The String value denoting the type of parser model: "deps" or "ner"
2, 8 and 13 feature sets are designed for the parser, while the 3 and 6 extra_state_tokens (bool): Whether or not to use additional tokens in the context
feature sets are designed for the NER. The recommended feature sets are to construct the state vector. Defaults to `False`, which means 3 and 8
3 for NER, and 8 for the dependency parser. for the NER and parser respectively. When set to `True`, this would become 6
feature sets (for the NER) or 13 (for the parser).
TODO: This feature should be split into two, state_type: ["deps", "ner"]
and extra_state_features: [True, False]. This would map into:
(deps, False): 8
(deps, True): 13
(ner, False): 3
(ner, True): 6
hidden_width (int): The width of the hidden layer. hidden_width (int): The width of the hidden layer.
maxout_pieces (int): How many pieces to use in the state prediction layer. maxout_pieces (int): How many pieces to use in the state prediction layer.
Recommended values are 1, 2 or 3. If 1, the maxout non-linearity Recommended values are 1, 2 or 3. If 1, the maxout non-linearity
@ -68,8 +61,14 @@ def build_tb_parser_model(
Usually inferred from data at the beginning of training, or loaded from Usually inferred from data at the beginning of training, or loaded from
disk. disk.
""" """
if state_type == "deps":
nr_feature_tokens = 13 if extra_state_tokens else 8
elif state_type == "ner":
nr_feature_tokens = 6 if extra_state_tokens else 3
else:
raise ValueError(f"unknown state type {state_type}") # TODO error
t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width),) tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width))
tok2vec.set_dim("nO", hidden_width) tok2vec.set_dim("nO", hidden_width)
lower = PrecomputableAffine( lower = PrecomputableAffine(
nO=hidden_width if use_upper else nO, nO=hidden_width if use_upper else nO,

View File

@ -15,7 +15,8 @@ from ..training import validate_examples
default_model_config = """ default_model_config = """
[model] [model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 8 state_type = "deps"
extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2

View File

@ -13,7 +13,8 @@ from ..training import validate_examples
default_model_config = """ default_model_config = """
[model] [model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 6 state_type = "ner"
extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2

View File

@ -67,7 +67,8 @@ width = ${components.tok2vec.model.width}
parser_config_string = """ parser_config_string = """
[model] [model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 99 state_type = "deps"
extra_state_tokens = false
hidden_width = 66 hidden_width = 66
maxout_pieces = 2 maxout_pieces = 2
@ -95,7 +96,11 @@ def my_parser():
MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2), MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2),
) )
parser = build_tb_parser_model( parser = build_tb_parser_model(
tok2vec=tok2vec, nr_feature_tokens=7, hidden_width=65, maxout_pieces=5 tok2vec=tok2vec,
state_type="deps",
extra_state_tokens=True,
hidden_width=65,
maxout_pieces=5,
) )
return parser return parser

View File

@ -414,7 +414,8 @@ one component.
> ```ini > ```ini
> [model] > [model]
> @architectures = "spacy.TransitionBasedParser.v1" > @architectures = "spacy.TransitionBasedParser.v1"
> nr_feature_tokens = 6 > state_type = "ner"
> extra_state_tokens = false
> hidden_width = 64 > hidden_width = 64
> maxout_pieces = 2 > maxout_pieces = 2
> >
@ -446,15 +447,16 @@ consists of either two or three subnetworks:
state representation. If not present, the output from the lower model is used state representation. If not present, the output from the lower model is used
as action scores directly. as action scores directly.
| Name | Description | | Name | Description |
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ | | `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
| `nr_feature_tokens` | The number of tokens in the context to use to construct the state vector. Valid choices are `1`, `2`, `3`, `6`, `8` and `13`. The `2`, `8` and `13` feature sets are designed for the parser, while the `3` and `6` feature sets are designed for the entity recognizer. The recommended feature sets are `3` for NER, and `8` for the dependency parser. ~~int~~ | | `state_type` | Which task to extract features for. Possible values are "ner" and "dependencies". ~~str~~ |
| `hidden_width` | The width of the hidden layer. ~~int~~ | | `extra_state_tokens` | Whether to use an expanded feature set when extracting the state tokens. Slightly slower, but sometimes improves accuracy slightly. Defaults to `False`. ~~bool~~ |
| `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~ | | `hidden_width` | The width of the hidden layer. ~~int~~ |
| `use_upper` | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ | | `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~ |
| `nO` | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~ | | `use_upper` | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~ | | `nO` | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~ |
## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"} ## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"}

View File

@ -448,7 +448,8 @@ factory = "ner"
[nlp.pipeline.ner.model] [nlp.pipeline.ner.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 3 state_type = "ner"
extra_state_tokens = false
hidden_width = 128 hidden_width = 128
maxout_pieces = 3 maxout_pieces = 3
use_upper = false use_upper = false