Wire up parser model

This commit is contained in:
Matthew Honnibal 2021-10-25 12:50:33 +02:00
parent 71abe2e42d
commit 45ca12f07a

View File

@ -1,13 +1,15 @@
from typing import Optional, List from typing import Optional, List, Tuple, Any
from thinc.api import Model, chain, list2array, Linear, zero_init, use_ops
from thinc.types import Floats2d from thinc.types import Floats2d
from thinc.api import Model
from ...errors import Errors from ...errors import Errors
from ...compat import Literal from ...compat import Literal
from ...util import registry from ...util import registry
from .._precomputable_affine import PrecomputableAffine
from ..tb_framework import TransitionModel from ..tb_framework import TransitionModel
from ...tokens import Doc from ...tokens.doc import Doc
TransitionSystem = Any # TODO
State = Any # TODO
@registry.architectures.register("spacy.TransitionBasedParser.v1") @registry.architectures.register("spacy.TransitionBasedParser.v1")
@ -19,7 +21,7 @@ def transition_parser_v1(
maxout_pieces: int, maxout_pieces: int,
use_upper: bool = True, use_upper: bool = True,
nO: Optional[int] = None, nO: Optional[int] = None,
) -> Model: ) -> Model[Tuple[List[Doc], TransitionSystem], List[Tuple[State, List[Floats2d]]]]:
return build_tb_parser_model( return build_tb_parser_model(
tok2vec, tok2vec,
state_type, state_type,
@ -47,8 +49,26 @@ def transition_parser_v2(
extra_state_tokens, extra_state_tokens,
hidden_width, hidden_width,
maxout_pieces, maxout_pieces,
use_upper, nO=nO,
nO, )
@registry.architectures.register("spacy.TransitionBasedParser.v3")
def transition_parser_v2(
tok2vec: Model[List[Doc], List[Floats2d]],
state_type: Literal["parser", "ner"],
extra_state_tokens: bool,
hidden_width: int,
maxout_pieces: int,
nO: Optional[int] = None,
) -> Model:
return build_tb_parser_model(
tok2vec,
state_type,
extra_state_tokens,
hidden_width,
maxout_pieces,
nO=nO,
) )
@ -58,7 +78,6 @@ def build_tb_parser_model(
extra_state_tokens: bool, extra_state_tokens: bool,
hidden_width: int, hidden_width: int,
maxout_pieces: int, maxout_pieces: int,
use_upper: bool,
nO: Optional[int] = None, nO: Optional[int] = None,
) -> Model: ) -> Model:
""" """
@ -110,102 +129,11 @@ def build_tb_parser_model(
nr_feature_tokens = 6 if extra_state_tokens else 3 nr_feature_tokens = 6 if extra_state_tokens else 3
else: else:
raise ValueError(Errors.E917.format(value=state_type)) raise ValueError(Errors.E917.format(value=state_type))
t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None return TransitionModel(
tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width)) tok2vec=tok2vec,
tok2vec.set_dim("nO", hidden_width) state_tokens=nr_feature_tokens,
lower = _define_lower( hidden_width=hidden_width,
nO=hidden_width if use_upper else nO, maxout_pieces=maxout_pieces,
nF=nr_feature_tokens, nO=nO,
nI=tok2vec.get_dim("nO"), unseen_classes=set(),
nP=maxout_pieces,
) )
upper = None
if use_upper:
with use_ops("numpy"):
# Initialize weights at zero, as it's a classification layer.
upper = _define_upper(nO=nO, nI=None)
return TransitionModel(tok2vec, lower, upper, resize_output)
def _define_upper(nO, nI):
return Linear(nO=nO, nI=nI, init_W=zero_init)
def _define_lower(nO, nF, nI, nP):
return PrecomputableAffine(nO=nO, nF=nF, nI=nI, nP=nP)
def resize_output(model, new_nO):
if model.attrs["has_upper"]:
return _resize_upper(model, new_nO)
return _resize_lower(model, new_nO)
def _resize_upper(model, new_nO):
upper = model.get_ref("upper")
if upper.has_dim("nO") is None:
upper.set_dim("nO", new_nO)
return model
elif new_nO == upper.get_dim("nO"):
return model
smaller = upper
nI = smaller.maybe_get_dim("nI")
with use_ops("numpy"):
larger = _define_upper(nO=new_nO, nI=nI)
# it could be that the model is not initialized yet, then skip this bit
if smaller.has_param("W"):
larger_W = larger.ops.alloc2f(new_nO, nI)
larger_b = larger.ops.alloc1f(new_nO)
smaller_W = smaller.get_param("W")
smaller_b = smaller.get_param("b")
# Weights are stored in (nr_out, nr_in) format, so we're basically
# just adding rows here.
if smaller.has_dim("nO"):
old_nO = smaller.get_dim("nO")
larger_W[:old_nO] = smaller_W
larger_b[:old_nO] = smaller_b
for i in range(old_nO, new_nO):
model.attrs["unseen_classes"].add(i)
larger.set_param("W", larger_W)
larger.set_param("b", larger_b)
model._layers[-1] = larger
model.set_ref("upper", larger)
return model
def _resize_lower(model, new_nO):
lower = model.get_ref("lower")
if lower.has_dim("nO") is None:
lower.set_dim("nO", new_nO)
return model
smaller = lower
nI = smaller.maybe_get_dim("nI")
nF = smaller.maybe_get_dim("nF")
nP = smaller.maybe_get_dim("nP")
larger = _define_lower(nO=new_nO, nI=nI, nF=nF, nP=nP)
# it could be that the model is not initialized yet, then skip this bit
if smaller.has_param("W"):
larger_W = larger.ops.alloc4f(nF, new_nO, nP, nI)
larger_b = larger.ops.alloc2f(new_nO, nP)
larger_pad = larger.ops.alloc4f(1, nF, new_nO, nP)
smaller_W = smaller.get_param("W")
smaller_b = smaller.get_param("b")
smaller_pad = smaller.get_param("pad")
# Copy the old weights and padding into the new layer
if smaller.has_dim("nO"):
old_nO = smaller.get_dim("nO")
larger_W[:, 0:old_nO, :, :] = smaller_W
larger_pad[:, :, 0:old_nO, :] = smaller_pad
larger_b[0:old_nO, :] = smaller_b
for i in range(old_nO, new_nO):
model.attrs["unseen_classes"].add(i)
larger.set_param("W", larger_W)
larger.set_param("b", larger_b)
larger.set_param("pad", larger_pad)
model._layers[1] = larger
model.set_ref("lower", larger)
return model