mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 16:22:29 +03:00
Wire up parser model
This commit is contained in:
parent
71abe2e42d
commit
45ca12f07a
|
@ -1,13 +1,15 @@
|
||||||
from typing import Optional, List
|
from typing import Optional, List, Tuple, Any
|
||||||
from thinc.api import Model, chain, list2array, Linear, zero_init, use_ops
|
|
||||||
from thinc.types import Floats2d
|
from thinc.types import Floats2d
|
||||||
|
from thinc.api import Model
|
||||||
|
|
||||||
from ...errors import Errors
|
from ...errors import Errors
|
||||||
from ...compat import Literal
|
from ...compat import Literal
|
||||||
from ...util import registry
|
from ...util import registry
|
||||||
from .._precomputable_affine import PrecomputableAffine
|
|
||||||
from ..tb_framework import TransitionModel
|
from ..tb_framework import TransitionModel
|
||||||
from ...tokens import Doc
|
from ...tokens.doc import Doc
|
||||||
|
|
||||||
|
TransitionSystem = Any # TODO
|
||||||
|
State = Any # TODO
|
||||||
|
|
||||||
|
|
||||||
@registry.architectures.register("spacy.TransitionBasedParser.v1")
|
@registry.architectures.register("spacy.TransitionBasedParser.v1")
|
||||||
|
@ -19,7 +21,7 @@ def transition_parser_v1(
|
||||||
maxout_pieces: int,
|
maxout_pieces: int,
|
||||||
use_upper: bool = True,
|
use_upper: bool = True,
|
||||||
nO: Optional[int] = None,
|
nO: Optional[int] = None,
|
||||||
) -> Model:
|
) -> Model[Tuple[List[Doc], TransitionSystem], List[Tuple[State, List[Floats2d]]]]:
|
||||||
return build_tb_parser_model(
|
return build_tb_parser_model(
|
||||||
tok2vec,
|
tok2vec,
|
||||||
state_type,
|
state_type,
|
||||||
|
@ -47,8 +49,26 @@ def transition_parser_v2(
|
||||||
extra_state_tokens,
|
extra_state_tokens,
|
||||||
hidden_width,
|
hidden_width,
|
||||||
maxout_pieces,
|
maxout_pieces,
|
||||||
use_upper,
|
nO=nO,
|
||||||
nO,
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@registry.architectures.register("spacy.TransitionBasedParser.v3")
|
||||||
|
def transition_parser_v2(
|
||||||
|
tok2vec: Model[List[Doc], List[Floats2d]],
|
||||||
|
state_type: Literal["parser", "ner"],
|
||||||
|
extra_state_tokens: bool,
|
||||||
|
hidden_width: int,
|
||||||
|
maxout_pieces: int,
|
||||||
|
nO: Optional[int] = None,
|
||||||
|
) -> Model:
|
||||||
|
return build_tb_parser_model(
|
||||||
|
tok2vec,
|
||||||
|
state_type,
|
||||||
|
extra_state_tokens,
|
||||||
|
hidden_width,
|
||||||
|
maxout_pieces,
|
||||||
|
nO=nO,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -58,7 +78,6 @@ def build_tb_parser_model(
|
||||||
extra_state_tokens: bool,
|
extra_state_tokens: bool,
|
||||||
hidden_width: int,
|
hidden_width: int,
|
||||||
maxout_pieces: int,
|
maxout_pieces: int,
|
||||||
use_upper: bool,
|
|
||||||
nO: Optional[int] = None,
|
nO: Optional[int] = None,
|
||||||
) -> Model:
|
) -> Model:
|
||||||
"""
|
"""
|
||||||
|
@ -110,102 +129,11 @@ def build_tb_parser_model(
|
||||||
nr_feature_tokens = 6 if extra_state_tokens else 3
|
nr_feature_tokens = 6 if extra_state_tokens else 3
|
||||||
else:
|
else:
|
||||||
raise ValueError(Errors.E917.format(value=state_type))
|
raise ValueError(Errors.E917.format(value=state_type))
|
||||||
t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
|
return TransitionModel(
|
||||||
tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width))
|
tok2vec=tok2vec,
|
||||||
tok2vec.set_dim("nO", hidden_width)
|
state_tokens=nr_feature_tokens,
|
||||||
lower = _define_lower(
|
hidden_width=hidden_width,
|
||||||
nO=hidden_width if use_upper else nO,
|
maxout_pieces=maxout_pieces,
|
||||||
nF=nr_feature_tokens,
|
nO=nO,
|
||||||
nI=tok2vec.get_dim("nO"),
|
unseen_classes=set(),
|
||||||
nP=maxout_pieces,
|
|
||||||
)
|
)
|
||||||
upper = None
|
|
||||||
if use_upper:
|
|
||||||
with use_ops("numpy"):
|
|
||||||
# Initialize weights at zero, as it's a classification layer.
|
|
||||||
upper = _define_upper(nO=nO, nI=None)
|
|
||||||
return TransitionModel(tok2vec, lower, upper, resize_output)
|
|
||||||
|
|
||||||
|
|
||||||
def _define_upper(nO, nI):
|
|
||||||
return Linear(nO=nO, nI=nI, init_W=zero_init)
|
|
||||||
|
|
||||||
|
|
||||||
def _define_lower(nO, nF, nI, nP):
|
|
||||||
return PrecomputableAffine(nO=nO, nF=nF, nI=nI, nP=nP)
|
|
||||||
|
|
||||||
|
|
||||||
def resize_output(model, new_nO):
|
|
||||||
if model.attrs["has_upper"]:
|
|
||||||
return _resize_upper(model, new_nO)
|
|
||||||
return _resize_lower(model, new_nO)
|
|
||||||
|
|
||||||
|
|
||||||
def _resize_upper(model, new_nO):
|
|
||||||
upper = model.get_ref("upper")
|
|
||||||
if upper.has_dim("nO") is None:
|
|
||||||
upper.set_dim("nO", new_nO)
|
|
||||||
return model
|
|
||||||
elif new_nO == upper.get_dim("nO"):
|
|
||||||
return model
|
|
||||||
|
|
||||||
smaller = upper
|
|
||||||
nI = smaller.maybe_get_dim("nI")
|
|
||||||
with use_ops("numpy"):
|
|
||||||
larger = _define_upper(nO=new_nO, nI=nI)
|
|
||||||
# it could be that the model is not initialized yet, then skip this bit
|
|
||||||
if smaller.has_param("W"):
|
|
||||||
larger_W = larger.ops.alloc2f(new_nO, nI)
|
|
||||||
larger_b = larger.ops.alloc1f(new_nO)
|
|
||||||
smaller_W = smaller.get_param("W")
|
|
||||||
smaller_b = smaller.get_param("b")
|
|
||||||
# Weights are stored in (nr_out, nr_in) format, so we're basically
|
|
||||||
# just adding rows here.
|
|
||||||
if smaller.has_dim("nO"):
|
|
||||||
old_nO = smaller.get_dim("nO")
|
|
||||||
larger_W[:old_nO] = smaller_W
|
|
||||||
larger_b[:old_nO] = smaller_b
|
|
||||||
for i in range(old_nO, new_nO):
|
|
||||||
model.attrs["unseen_classes"].add(i)
|
|
||||||
|
|
||||||
larger.set_param("W", larger_W)
|
|
||||||
larger.set_param("b", larger_b)
|
|
||||||
model._layers[-1] = larger
|
|
||||||
model.set_ref("upper", larger)
|
|
||||||
return model
|
|
||||||
|
|
||||||
|
|
||||||
def _resize_lower(model, new_nO):
|
|
||||||
lower = model.get_ref("lower")
|
|
||||||
if lower.has_dim("nO") is None:
|
|
||||||
lower.set_dim("nO", new_nO)
|
|
||||||
return model
|
|
||||||
|
|
||||||
smaller = lower
|
|
||||||
nI = smaller.maybe_get_dim("nI")
|
|
||||||
nF = smaller.maybe_get_dim("nF")
|
|
||||||
nP = smaller.maybe_get_dim("nP")
|
|
||||||
larger = _define_lower(nO=new_nO, nI=nI, nF=nF, nP=nP)
|
|
||||||
# it could be that the model is not initialized yet, then skip this bit
|
|
||||||
if smaller.has_param("W"):
|
|
||||||
larger_W = larger.ops.alloc4f(nF, new_nO, nP, nI)
|
|
||||||
larger_b = larger.ops.alloc2f(new_nO, nP)
|
|
||||||
larger_pad = larger.ops.alloc4f(1, nF, new_nO, nP)
|
|
||||||
smaller_W = smaller.get_param("W")
|
|
||||||
smaller_b = smaller.get_param("b")
|
|
||||||
smaller_pad = smaller.get_param("pad")
|
|
||||||
# Copy the old weights and padding into the new layer
|
|
||||||
if smaller.has_dim("nO"):
|
|
||||||
old_nO = smaller.get_dim("nO")
|
|
||||||
larger_W[:, 0:old_nO, :, :] = smaller_W
|
|
||||||
larger_pad[:, :, 0:old_nO, :] = smaller_pad
|
|
||||||
larger_b[0:old_nO, :] = smaller_b
|
|
||||||
for i in range(old_nO, new_nO):
|
|
||||||
model.attrs["unseen_classes"].add(i)
|
|
||||||
|
|
||||||
larger.set_param("W", larger_W)
|
|
||||||
larger.set_param("b", larger_b)
|
|
||||||
larger.set_param("pad", larger_pad)
|
|
||||||
model._layers[1] = larger
|
|
||||||
model.set_ref("lower", larger)
|
|
||||||
return model
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user