spaCy/spacy/ml/models/parser.py
Adriane Boyd ec45f704b1
Drop python 3.6/3.7, remove unneeded compat (#12187)
* Drop python 3.6/3.7, remove unneeded compat

* Remove unused import

* Minimal python 3.8+ docs updates
2023-01-27 15:48:20 +01:00

115 lines
3.9 KiB
Python

from typing import Optional, List, Tuple, Any, Literal
from thinc.types import Floats2d
from thinc.api import Model
import warnings
from ...errors import Errors, Warnings
from ...util import registry
from ..tb_framework import TransitionModel
from ...tokens.doc import Doc
TransitionSystem = Any # TODO
State = Any # TODO
@registry.architectures.register("spacy.TransitionBasedParser.v2")
def transition_parser_v2(
tok2vec: Model[List[Doc], List[Floats2d]],
state_type: Literal["parser", "ner"],
extra_state_tokens: bool,
hidden_width: int,
maxout_pieces: int,
use_upper: bool,
nO: Optional[int] = None,
) -> Model:
if not use_upper:
warnings.warn(Warnings.W400)
return build_tb_parser_model(
tok2vec,
state_type,
extra_state_tokens,
hidden_width,
maxout_pieces,
nO=nO,
)
@registry.architectures.register("spacy.TransitionBasedParser.v3")
def transition_parser_v3(
tok2vec: Model[List[Doc], List[Floats2d]],
state_type: Literal["parser", "ner"],
extra_state_tokens: bool,
hidden_width: int,
maxout_pieces: int,
nO: Optional[int] = None,
) -> Model:
return build_tb_parser_model(
tok2vec,
state_type,
extra_state_tokens,
hidden_width,
maxout_pieces,
nO=nO,
)
def build_tb_parser_model(
tok2vec: Model[List[Doc], List[Floats2d]],
state_type: Literal["parser", "ner"],
extra_state_tokens: bool,
hidden_width: int,
maxout_pieces: int,
nO: Optional[int] = None,
) -> Model:
"""
Build a transition-based parser model. Can apply to NER or dependency-parsing.
Transition-based parsing is an approach to structured prediction where the
task of predicting the structure is mapped to a series of state transitions.
You might find this tutorial helpful as background:
https://explosion.ai/blog/parsing-english-in-python
The neural network state prediction model consists of either two or three
subnetworks:
* tok2vec: Map each token into a vector representations. This subnetwork
is run once for each batch.
* lower: Construct a feature-specific vector for each (token, feature) pair.
This is also run once for each batch. Constructing the state
representation is then simply a matter of summing the component features
and applying the non-linearity.
* upper (optional): A feed-forward network that predicts scores from the
state representation. If not present, the output from the lower model is
used as action scores directly.
tok2vec (Model[List[Doc], List[Floats2d]]):
Subnetwork to map tokens into vector representations.
state_type (str):
String value denoting the type of parser model: "parser" or "ner"
extra_state_tokens (bool): Whether or not to use additional tokens in the context
to construct the state vector. Defaults to `False`, which means 3 and 8
for the NER and parser respectively. When set to `True`, this would become 6
feature sets (for the NER) or 13 (for the parser).
hidden_width (int): The width of the hidden layer.
maxout_pieces (int): How many pieces to use in the state prediction layer.
Recommended values are 1, 2 or 3.
nO (int or None): The number of actions the model will predict between.
Usually inferred from data at the beginning of training, or loaded from
disk.
"""
if state_type == "parser":
nr_feature_tokens = 13 if extra_state_tokens else 8
elif state_type == "ner":
nr_feature_tokens = 6 if extra_state_tokens else 3
else:
raise ValueError(Errors.E917.format(value=state_type))
return TransitionModel(
tok2vec=tok2vec,
state_tokens=nr_feature_tokens,
hidden_width=hidden_width,
maxout_pieces=maxout_pieces,
nO=nO,
unseen_classes=set(),
)