Merge pull request #6128 from svlandeg/fix/nr_features

This commit is contained in:
Ines Montani 2020-09-23 19:38:19 +02:00 committed by GitHub
commit cea9431a04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 53 additions and 37 deletions

View File

@ -59,7 +59,8 @@ factory = "parser"
[components.parser.model] [components.parser.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 8 state_type = "parser"
extra_state_tokens = false
hidden_width = 128 hidden_width = 128
maxout_pieces = 3 maxout_pieces = 3
use_upper = false use_upper = false
@ -79,7 +80,8 @@ factory = "ner"
[components.ner.model] [components.ner.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 3 state_type = "ner"
extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2
use_upper = false use_upper = false
@ -183,7 +185,8 @@ factory = "parser"
[components.parser.model] [components.parser.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 8 state_type = "parser"
extra_state_tokens = false
hidden_width = 128 hidden_width = 128
maxout_pieces = 3 maxout_pieces = 3
use_upper = true use_upper = true
@ -200,7 +203,8 @@ factory = "ner"
[components.ner.model] [components.ner.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 6 state_type = "ner"
extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2
use_upper = true use_upper = true

View File

@ -480,6 +480,8 @@ class Errors:
E201 = ("Span index out of range.") E201 = ("Span index out of range.")
# TODO: fix numbering after merging develop into master # TODO: fix numbering after merging develop into master
E917 = ("Received invalid value {value} for 'state_type' in "
"TransitionBasedParser: only 'parser' or 'ner' are valid options.")
E918 = ("Received invalid value for vocab: {vocab} ({vocab_type}). Valid " E918 = ("Received invalid value for vocab: {vocab} ({vocab_type}). Valid "
"values are an instance of spacy.vocab.Vocab or True to create one" "values are an instance of spacy.vocab.Vocab or True to create one"
" (default).") " (default).")

View File

@ -2,6 +2,7 @@ from typing import Optional, List
from thinc.api import Model, chain, list2array, Linear, zero_init, use_ops from thinc.api import Model, chain, list2array, Linear, zero_init, use_ops
from thinc.types import Floats2d from thinc.types import Floats2d
from ...errors import Errors
from ...compat import Literal from ...compat import Literal
from ...util import registry from ...util import registry
from .._precomputable_affine import PrecomputableAffine from .._precomputable_affine import PrecomputableAffine
@ -12,7 +13,8 @@ from ...tokens import Doc
@registry.architectures.register("spacy.TransitionBasedParser.v1") @registry.architectures.register("spacy.TransitionBasedParser.v1")
def build_tb_parser_model( def build_tb_parser_model(
tok2vec: Model[List[Doc], List[Floats2d]], tok2vec: Model[List[Doc], List[Floats2d]],
nr_feature_tokens: Literal[3, 6, 8, 13], state_type: Literal["parser", "ner"],
extra_state_tokens: bool,
hidden_width: int, hidden_width: int,
maxout_pieces: int, maxout_pieces: int,
use_upper: bool = True, use_upper: bool = True,
@ -41,20 +43,12 @@ def build_tb_parser_model(
tok2vec (Model[List[Doc], List[Floats2d]]): tok2vec (Model[List[Doc], List[Floats2d]]):
Subnetwork to map tokens into vector representations. Subnetwork to map tokens into vector representations.
nr_feature_tokens (int): The number of tokens in the context to use to state_type (str):
construct the state vector. Valid choices are 3, 6, 8 and 13. The String value denoting the type of parser model: "parser" or "ner"
8 and 13 feature sets are designed for the parser, while the 3 and 6 extra_state_tokens (bool): Whether or not to use additional tokens in the context
feature sets are designed for the NER. The recommended feature sets are to construct the state vector. Defaults to `False`, which means 3 and 8
3 for NER, and 8 for the dependency parser. for the NER and parser respectively. When set to `True`, this would become 6
feature sets (for the NER) or 13 (for the parser).
TODO: This feature should be split into two, state_type: ["deps", "ner"]
and extra_state_features: [True, False]. This would map into:
(deps, False): 8
(deps, True): 13
(ner, False): 3
(ner, True): 6
hidden_width (int): The width of the hidden layer. hidden_width (int): The width of the hidden layer.
maxout_pieces (int): How many pieces to use in the state prediction layer. maxout_pieces (int): How many pieces to use in the state prediction layer.
Recommended values are 1, 2 or 3. If 1, the maxout non-linearity Recommended values are 1, 2 or 3. If 1, the maxout non-linearity
@ -69,8 +63,14 @@ def build_tb_parser_model(
Usually inferred from data at the beginning of training, or loaded from Usually inferred from data at the beginning of training, or loaded from
disk. disk.
""" """
if state_type == "parser":
nr_feature_tokens = 13 if extra_state_tokens else 8
elif state_type == "ner":
nr_feature_tokens = 6 if extra_state_tokens else 3
else:
raise ValueError(Errors.E917.format(value=state_type))
t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width),) tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width))
tok2vec.set_dim("nO", hidden_width) tok2vec.set_dim("nO", hidden_width)
lower = PrecomputableAffine( lower = PrecomputableAffine(
nO=hidden_width if use_upper else nO, nO=hidden_width if use_upper else nO,

View File

@ -15,7 +15,8 @@ from ..training import validate_examples
default_model_config = """ default_model_config = """
[model] [model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 8 state_type = "parser"
extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2

View File

@ -13,7 +13,8 @@ from ..training import validate_examples
default_model_config = """ default_model_config = """
[model] [model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 6 state_type = "ner"
extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2

View File

@ -67,7 +67,8 @@ width = ${components.tok2vec.model.width}
parser_config_string = """ parser_config_string = """
[model] [model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 3 state_type = "parser"
extra_state_tokens = false
hidden_width = 66 hidden_width = 66
maxout_pieces = 2 maxout_pieces = 2
@ -95,7 +96,11 @@ def my_parser():
MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2), MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2),
) )
parser = build_tb_parser_model( parser = build_tb_parser_model(
tok2vec=tok2vec, nr_feature_tokens=8, hidden_width=65, maxout_pieces=5 tok2vec=tok2vec,
state_type="parser",
extra_state_tokens=True,
hidden_width=65,
maxout_pieces=5,
) )
return parser return parser
@ -345,8 +350,8 @@ def test_config_auto_fill_extra_fields():
def test_config_validate_literal(): def test_config_validate_literal():
nlp = English() nlp = English()
config = Config().from_str(parser_config_string) config = Config().from_str(parser_config_string)
config["model"]["nr_feature_tokens"] = 666 config["model"]["state_type"] = "nonsense"
with pytest.raises(ConfigValidationError): with pytest.raises(ConfigValidationError):
nlp.add_pipe("parser", config=config) nlp.add_pipe("parser", config=config)
config["model"]["nr_feature_tokens"] = 13 config["model"]["state_type"] = "ner"
nlp.add_pipe("parser", config=config) nlp.add_pipe("parser", config=config)

View File

@ -414,7 +414,8 @@ one component.
> ```ini > ```ini
> [model] > [model]
> @architectures = "spacy.TransitionBasedParser.v1" > @architectures = "spacy.TransitionBasedParser.v1"
> nr_feature_tokens = 6 > state_type = "ner"
> extra_state_tokens = false
> hidden_width = 64 > hidden_width = 64
> maxout_pieces = 2 > maxout_pieces = 2
> >
@ -446,15 +447,16 @@ consists of either two or three subnetworks:
state representation. If not present, the output from the lower model is used state representation. If not present, the output from the lower model is used
as action scores directly. as action scores directly.
| Name | Description | | Name | Description |
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ | | `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
| `nr_feature_tokens` | The number of tokens in the context to use to construct the state vector. Valid choices are `3`, `6`, `8` and `13`. The `8` and `13` feature sets are designed for the parser, while the `3` and `6` feature sets are designed for the entity recognizer. The recommended feature sets are `3` for NER, and `8` for the dependency parser. ~~int~~ | | `state_type` | Which task to extract features for. Possible values are "ner" and "parser". ~~str~~ |
| `hidden_width` | The width of the hidden layer. ~~int~~ | | `extra_state_tokens` | Whether to use an expanded feature set when extracting the state tokens. Slightly slower, but sometimes improves accuracy slightly. Defaults to `False`. ~~bool~~ |
| `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~ | | `hidden_width` | The width of the hidden layer. ~~int~~ |
| `use_upper` | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ | | `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~ |
| `nO` | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~ | | `use_upper` | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~ | | `nO` | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~ |
## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"} ## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"}

View File

@ -448,7 +448,8 @@ factory = "ner"
[nlp.pipeline.ner.model] [nlp.pipeline.ner.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v1"
nr_feature_tokens = 3 state_type = "ner"
extra_state_tokens = false
hidden_width = 128 hidden_width = 128
maxout_pieces = 3 maxout_pieces = 3
use_upper = false use_upper = false