mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Add dropout to parser hidden layer
This commit is contained in:
parent
1eb1654941
commit
d1fd3438c3
|
@ -1,13 +1,14 @@
|
|||
from thinc.api import Model, normal_init
|
||||
|
||||
|
||||
def PrecomputableAffine(nO, nI, nF, nP):
|
||||
def PrecomputableAffine(nO, nI, nF, nP, dropout=0.1):
|
||||
model = Model(
|
||||
"precomputable_affine",
|
||||
forward,
|
||||
init=init,
|
||||
dims={"nO": nO, "nI": nI, "nF": nF, "nP": nP},
|
||||
params={"W": None, "b": None, "pad": None},
|
||||
attrs={"dropout_rate": dropout}
|
||||
)
|
||||
return model
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ from thinc.api import Model, noop, use_ops, Linear
|
|||
from ..syntax._parser_model import ParserStepModel
|
||||
|
||||
|
||||
def TransitionModel(tok2vec, lower, upper, unseen_classes=set()):
|
||||
def TransitionModel(tok2vec, lower, upper, dropout=0.2, unseen_classes=set()):
|
||||
"""Set up a stepwise transition-based model"""
|
||||
if upper is None:
|
||||
has_upper = False
|
||||
|
|
|
@ -219,9 +219,11 @@ cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) no
|
|||
|
||||
|
||||
class ParserStepModel(Model):
|
||||
def __init__(self, docs, layers, *, has_upper, unseen_classes=None, train=True):
|
||||
def __init__(self, docs, layers, *, has_upper, unseen_classes=None, train=True,
|
||||
dropout=0.1):
|
||||
Model.__init__(self, name="parser_step_model", forward=step_forward)
|
||||
self.attrs["has_upper"] = has_upper
|
||||
self.attrs["dropout_rate"] = dropout
|
||||
self.tokvecs, self.bp_tokvecs = layers[0](docs, is_train=train)
|
||||
if layers[1].get_dim("nP") >= 2:
|
||||
activation = "maxout"
|
||||
|
@ -289,11 +291,17 @@ class ParserStepModel(Model):
|
|||
self.bp_tokvecs(d_tokvecs[:-1])
|
||||
return d_tokvecs
|
||||
|
||||
NUMPY_OPS = NumpyOps()
|
||||
|
||||
def step_forward(model: ParserStepModel, states, is_train):
|
||||
token_ids = model.get_token_ids(states)
|
||||
vector, get_d_tokvecs = model.state2vec(token_ids, is_train)
|
||||
mask = None
|
||||
if model.attrs["has_upper"]:
|
||||
dropout_rate = model.attrs["dropout_rate"]
|
||||
if is_train and dropout_rate > 0:
|
||||
mask = NUMPY_OPS.get_dropout_mask(vector.shape, 0.1)
|
||||
vector *= mask
|
||||
scores, get_d_vector = model.vec2scores(vector, is_train)
|
||||
else:
|
||||
scores = NumpyOps().asarray(vector)
|
||||
|
@ -305,6 +313,8 @@ def step_forward(model: ParserStepModel, states, is_train):
|
|||
# Zero vectors for unseen classes
|
||||
d_scores *= model._class_mask
|
||||
d_vector = get_d_vector(d_scores)
|
||||
if mask is not None:
|
||||
d_vector *= mask
|
||||
if isinstance(model.state2vec.ops, CupyOps) \
|
||||
and not isinstance(token_ids, model.state2vec.ops.xp.ndarray):
|
||||
# Move token_ids and d_vector to GPU, asynchronously
|
||||
|
@ -437,7 +447,7 @@ cdef class precompute_hiddens:
|
|||
sum_state_features(<float*>state_vector.data,
|
||||
feat_weights, &ids[0,0],
|
||||
token_ids.shape[0], self.nF, self.nO*self.nP)
|
||||
state_vector = state_vector + self.bias
|
||||
state_vector += self.bias
|
||||
state_vector, bp_nonlinearity = self._nonlinearity(state_vector)
|
||||
|
||||
def backward(d_state_vector_ids):
|
||||
|
|
Loading…
Reference in New Issue
Block a user