mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	state_type and extra_state_tokens instead of nr_feature_tokens
This commit is contained in:
		
							parent
							
								
									ae5dacf75f
								
							
						
					
					
						commit
						6c85fab316
					
				| 
						 | 
					@ -59,7 +59,8 @@ factory = "parser"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[components.parser.model]
 | 
					[components.parser.model]
 | 
				
			||||||
@architectures = "spacy.TransitionBasedParser.v1"
 | 
					@architectures = "spacy.TransitionBasedParser.v1"
 | 
				
			||||||
nr_feature_tokens = 8
 | 
					state_type = "deps"
 | 
				
			||||||
 | 
					extra_state_tokens = false
 | 
				
			||||||
hidden_width = 128
 | 
					hidden_width = 128
 | 
				
			||||||
maxout_pieces = 3
 | 
					maxout_pieces = 3
 | 
				
			||||||
use_upper = false
 | 
					use_upper = false
 | 
				
			||||||
| 
						 | 
					@ -79,7 +80,8 @@ factory = "ner"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[components.ner.model]
 | 
					[components.ner.model]
 | 
				
			||||||
@architectures = "spacy.TransitionBasedParser.v1"
 | 
					@architectures = "spacy.TransitionBasedParser.v1"
 | 
				
			||||||
nr_feature_tokens = 3
 | 
					state_type = "ner"
 | 
				
			||||||
 | 
					extra_state_tokens = false
 | 
				
			||||||
hidden_width = 64
 | 
					hidden_width = 64
 | 
				
			||||||
maxout_pieces = 2
 | 
					maxout_pieces = 2
 | 
				
			||||||
use_upper = false
 | 
					use_upper = false
 | 
				
			||||||
| 
						 | 
					@ -183,7 +185,8 @@ factory = "parser"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[components.parser.model]
 | 
					[components.parser.model]
 | 
				
			||||||
@architectures = "spacy.TransitionBasedParser.v1"
 | 
					@architectures = "spacy.TransitionBasedParser.v1"
 | 
				
			||||||
nr_feature_tokens = 8
 | 
					state_type = "deps"
 | 
				
			||||||
 | 
					extra_state_tokens = false
 | 
				
			||||||
hidden_width = 128
 | 
					hidden_width = 128
 | 
				
			||||||
maxout_pieces = 3
 | 
					maxout_pieces = 3
 | 
				
			||||||
use_upper = true
 | 
					use_upper = true
 | 
				
			||||||
| 
						 | 
					@ -200,7 +203,8 @@ factory = "ner"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[components.ner.model]
 | 
					[components.ner.model]
 | 
				
			||||||
@architectures = "spacy.TransitionBasedParser.v1"
 | 
					@architectures = "spacy.TransitionBasedParser.v1"
 | 
				
			||||||
nr_feature_tokens = 6
 | 
					state_type = "ner"
 | 
				
			||||||
 | 
					extra_state_tokens = false
 | 
				
			||||||
hidden_width = 64
 | 
					hidden_width = 64
 | 
				
			||||||
maxout_pieces = 2
 | 
					maxout_pieces = 2
 | 
				
			||||||
use_upper = true
 | 
					use_upper = true
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -11,7 +11,8 @@ from ...tokens import Doc
 | 
				
			||||||
@registry.architectures.register("spacy.TransitionBasedParser.v1")
 | 
					@registry.architectures.register("spacy.TransitionBasedParser.v1")
 | 
				
			||||||
def build_tb_parser_model(
 | 
					def build_tb_parser_model(
 | 
				
			||||||
    tok2vec: Model[List[Doc], List[Floats2d]],
 | 
					    tok2vec: Model[List[Doc], List[Floats2d]],
 | 
				
			||||||
    nr_feature_tokens: int,
 | 
					    state_type: str,
 | 
				
			||||||
 | 
					    extra_state_tokens: bool,
 | 
				
			||||||
    hidden_width: int,
 | 
					    hidden_width: int,
 | 
				
			||||||
    maxout_pieces: int,
 | 
					    maxout_pieces: int,
 | 
				
			||||||
    use_upper: bool = True,
 | 
					    use_upper: bool = True,
 | 
				
			||||||
| 
						 | 
					@ -40,20 +41,12 @@ def build_tb_parser_model(
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tok2vec (Model[List[Doc], List[Floats2d]]):
 | 
					    tok2vec (Model[List[Doc], List[Floats2d]]):
 | 
				
			||||||
        Subnetwork to map tokens into vector representations.
 | 
					        Subnetwork to map tokens into vector representations.
 | 
				
			||||||
    nr_feature_tokens (int): The number of tokens in the context to use to
 | 
					    state_type (str):
 | 
				
			||||||
        construct the state vector. Valid choices are 1, 2, 3, 6, 8 and 13. The
 | 
					        String value denoting the type of parser model: "deps" or "ner"
 | 
				
			||||||
        2, 8 and 13 feature sets are designed for the parser, while the 3 and 6
 | 
					    extra_state_tokens (bool): Whether or not to use additional tokens in the context
 | 
				
			||||||
        feature sets are designed for the NER. The recommended feature sets are
 | 
					        to construct the state vector. Defaults to `False`, which means 3 and 8
 | 
				
			||||||
        3 for NER, and 8 for the dependency parser.
 | 
					        for the NER and parser respectively. When set to `True`, this would become 6
 | 
				
			||||||
 | 
					        feature sets (for the NER) or 13 (for the parser).
 | 
				
			||||||
        TODO: This feature should be split into two, state_type: ["deps", "ner"]
 | 
					 | 
				
			||||||
        and extra_state_features: [True, False]. This would map into:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        (deps, False): 8
 | 
					 | 
				
			||||||
        (deps, True): 13
 | 
					 | 
				
			||||||
        (ner, False): 3
 | 
					 | 
				
			||||||
        (ner, True): 6
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    hidden_width (int): The width of the hidden layer.
 | 
					    hidden_width (int): The width of the hidden layer.
 | 
				
			||||||
    maxout_pieces (int): How many pieces to use in the state prediction layer.
 | 
					    maxout_pieces (int): How many pieces to use in the state prediction layer.
 | 
				
			||||||
        Recommended values are 1, 2 or 3. If 1, the maxout non-linearity
 | 
					        Recommended values are 1, 2 or 3. If 1, the maxout non-linearity
 | 
				
			||||||
| 
						 | 
					@ -68,8 +61,14 @@ def build_tb_parser_model(
 | 
				
			||||||
        Usually inferred from data at the beginning of training, or loaded from
 | 
					        Usually inferred from data at the beginning of training, or loaded from
 | 
				
			||||||
        disk.
 | 
					        disk.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					    if state_type == "deps":
 | 
				
			||||||
 | 
					        nr_feature_tokens = 13 if extra_state_tokens else 8
 | 
				
			||||||
 | 
					    elif state_type == "ner":
 | 
				
			||||||
 | 
					        nr_feature_tokens = 6 if extra_state_tokens else 3
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        raise ValueError(f"unknown state type {state_type}")  # TODO error
 | 
				
			||||||
    t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
 | 
					    t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
 | 
				
			||||||
    tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width),)
 | 
					    tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width))
 | 
				
			||||||
    tok2vec.set_dim("nO", hidden_width)
 | 
					    tok2vec.set_dim("nO", hidden_width)
 | 
				
			||||||
    lower = PrecomputableAffine(
 | 
					    lower = PrecomputableAffine(
 | 
				
			||||||
        nO=hidden_width if use_upper else nO,
 | 
					        nO=hidden_width if use_upper else nO,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -15,7 +15,8 @@ from ..training import validate_examples
 | 
				
			||||||
default_model_config = """
 | 
					default_model_config = """
 | 
				
			||||||
[model]
 | 
					[model]
 | 
				
			||||||
@architectures = "spacy.TransitionBasedParser.v1"
 | 
					@architectures = "spacy.TransitionBasedParser.v1"
 | 
				
			||||||
nr_feature_tokens = 8
 | 
					state_type = "deps"
 | 
				
			||||||
 | 
					extra_state_tokens = false
 | 
				
			||||||
hidden_width = 64
 | 
					hidden_width = 64
 | 
				
			||||||
maxout_pieces = 2
 | 
					maxout_pieces = 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13,7 +13,8 @@ from ..training import validate_examples
 | 
				
			||||||
default_model_config = """
 | 
					default_model_config = """
 | 
				
			||||||
[model]
 | 
					[model]
 | 
				
			||||||
@architectures = "spacy.TransitionBasedParser.v1"
 | 
					@architectures = "spacy.TransitionBasedParser.v1"
 | 
				
			||||||
nr_feature_tokens = 6
 | 
					state_type = "ner"
 | 
				
			||||||
 | 
					extra_state_tokens = false
 | 
				
			||||||
hidden_width = 64
 | 
					hidden_width = 64
 | 
				
			||||||
maxout_pieces = 2
 | 
					maxout_pieces = 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -67,7 +67,8 @@ width = ${components.tok2vec.model.width}
 | 
				
			||||||
parser_config_string = """
 | 
					parser_config_string = """
 | 
				
			||||||
[model]
 | 
					[model]
 | 
				
			||||||
@architectures = "spacy.TransitionBasedParser.v1"
 | 
					@architectures = "spacy.TransitionBasedParser.v1"
 | 
				
			||||||
nr_feature_tokens = 99
 | 
					state_type = "deps"
 | 
				
			||||||
 | 
					extra_state_tokens = false
 | 
				
			||||||
hidden_width = 66
 | 
					hidden_width = 66
 | 
				
			||||||
maxout_pieces = 2
 | 
					maxout_pieces = 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -95,7 +96,11 @@ def my_parser():
 | 
				
			||||||
        MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2),
 | 
					        MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    parser = build_tb_parser_model(
 | 
					    parser = build_tb_parser_model(
 | 
				
			||||||
        tok2vec=tok2vec, nr_feature_tokens=7, hidden_width=65, maxout_pieces=5
 | 
					        tok2vec=tok2vec,
 | 
				
			||||||
 | 
					        state_type="deps",
 | 
				
			||||||
 | 
					        extra_state_tokens=True,
 | 
				
			||||||
 | 
					        hidden_width=65,
 | 
				
			||||||
 | 
					        maxout_pieces=5,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    return parser
 | 
					    return parser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -414,7 +414,8 @@ one component.
 | 
				
			||||||
> ```ini
 | 
					> ```ini
 | 
				
			||||||
> [model]
 | 
					> [model]
 | 
				
			||||||
> @architectures = "spacy.TransitionBasedParser.v1"
 | 
					> @architectures = "spacy.TransitionBasedParser.v1"
 | 
				
			||||||
> nr_feature_tokens = 6
 | 
					> state_type = "ner"
 | 
				
			||||||
 | 
					> extra_state_tokens = false
 | 
				
			||||||
> hidden_width = 64
 | 
					> hidden_width = 64
 | 
				
			||||||
> maxout_pieces = 2
 | 
					> maxout_pieces = 2
 | 
				
			||||||
>
 | 
					>
 | 
				
			||||||
| 
						 | 
					@ -447,9 +448,10 @@ consists of either two or three subnetworks:
 | 
				
			||||||
  as action scores directly.
 | 
					  as action scores directly.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| Name                 | Description                                                                                                                                                                                                                                                                                                                                                             |
 | 
					| Name                 | Description                                                                                                                                                                                                                                                                                                                                                             |
 | 
				
			||||||
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | 
					| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | 
				
			||||||
| `tok2vec`            | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                                                                                                                              |
 | 
					| `tok2vec`            | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                                                                                                                              |
 | 
				
			||||||
| `nr_feature_tokens` | The number of tokens in the context to use to construct the state vector. Valid choices are `1`, `2`, `3`, `6`, `8` and `13`. The `2`, `8` and `13` feature sets are designed for the parser, while the `3` and `6` feature sets are designed for the entity recognizer. The recommended feature sets are `3` for NER, and `8` for the dependency parser. ~~int~~       |
 | 
					| `state_type`         | Which task to extract features for. Possible values are "ner" and "dependencies". ~~str~~                                                                                                                                                                                                                                                                               |
 | 
				
			||||||
 | 
					| `extra_state_tokens` | Whether to use an expanded feature set when extracting the state tokens. Slightly slower, but sometimes improves accuracy slightly. Defaults to `False`. ~~bool~~                                                                                                                                                                                                       |
 | 
				
			||||||
| `hidden_width`       | The width of the hidden layer. ~~int~~                                                                                                                                                                                                                                                                                                                                  |
 | 
					| `hidden_width`       | The width of the hidden layer. ~~int~~                                                                                                                                                                                                                                                                                                                                  |
 | 
				
			||||||
| `maxout_pieces`      | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~                                                                                      |
 | 
					| `maxout_pieces`      | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~                                                                                      |
 | 
				
			||||||
| `use_upper`          | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ |
 | 
					| `use_upper`          | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ |
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -448,7 +448,8 @@ factory = "ner"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[nlp.pipeline.ner.model]
 | 
					[nlp.pipeline.ner.model]
 | 
				
			||||||
@architectures = "spacy.TransitionBasedParser.v1"
 | 
					@architectures = "spacy.TransitionBasedParser.v1"
 | 
				
			||||||
nr_feature_tokens = 3
 | 
					state_type = "ner"
 | 
				
			||||||
 | 
					extra_state_tokens = false
 | 
				
			||||||
hidden_width = 128
 | 
					hidden_width = 128
 | 
				
			||||||
maxout_pieces = 3
 | 
					maxout_pieces = 3
 | 
				
			||||||
use_upper = false
 | 
					use_upper = false
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user