Add dropout to parser hidden layer

This commit is contained in:
Matthw Honnibal 2020-07-07 01:38:15 +02:00
parent 1eb1654941
commit d1fd3438c3
3 changed files with 15 additions and 4 deletions

View File

@ -1,13 +1,14 @@
from thinc.api import Model, normal_init
def PrecomputableAffine(nO, nI, nF, nP):
def PrecomputableAffine(nO, nI, nF, nP, dropout=0.1):
model = Model(
"precomputable_affine",
forward,
init=init,
dims={"nO": nO, "nI": nI, "nF": nF, "nP": nP},
params={"W": None, "b": None, "pad": None},
attrs={"dropout_rate": dropout}
)
return model

View File

@ -2,7 +2,7 @@ from thinc.api import Model, noop, use_ops, Linear
from ..syntax._parser_model import ParserStepModel
def TransitionModel(tok2vec, lower, upper, unseen_classes=set()):
def TransitionModel(tok2vec, lower, upper, dropout=0.2, unseen_classes=set()):
"""Set up a stepwise transition-based model"""
if upper is None:
has_upper = False

View File

@ -219,9 +219,11 @@ cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) no
class ParserStepModel(Model):
def __init__(self, docs, layers, *, has_upper, unseen_classes=None, train=True):
def __init__(self, docs, layers, *, has_upper, unseen_classes=None, train=True,
dropout=0.1):
Model.__init__(self, name="parser_step_model", forward=step_forward)
self.attrs["has_upper"] = has_upper
self.attrs["dropout_rate"] = dropout
self.tokvecs, self.bp_tokvecs = layers[0](docs, is_train=train)
if layers[1].get_dim("nP") >= 2:
activation = "maxout"
@ -289,11 +291,17 @@ class ParserStepModel(Model):
self.bp_tokvecs(d_tokvecs[:-1])
return d_tokvecs
NUMPY_OPS = NumpyOps()
def step_forward(model: ParserStepModel, states, is_train):
token_ids = model.get_token_ids(states)
vector, get_d_tokvecs = model.state2vec(token_ids, is_train)
mask = None
if model.attrs["has_upper"]:
dropout_rate = model.attrs["dropout_rate"]
if is_train and dropout_rate > 0:
mask = NUMPY_OPS.get_dropout_mask(vector.shape, 0.1)
vector *= mask
scores, get_d_vector = model.vec2scores(vector, is_train)
else:
scores = NumpyOps().asarray(vector)
@ -305,6 +313,8 @@ def step_forward(model: ParserStepModel, states, is_train):
# Zero vectors for unseen classes
d_scores *= model._class_mask
d_vector = get_d_vector(d_scores)
if mask is not None:
d_vector *= mask
if isinstance(model.state2vec.ops, CupyOps) \
and not isinstance(token_ids, model.state2vec.ops.xp.ndarray):
# Move token_ids and d_vector to GPU, asynchronously
@ -437,7 +447,7 @@ cdef class precompute_hiddens:
sum_state_features(<float*>state_vector.data,
feat_weights, &ids[0,0],
token_ids.shape[0], self.nF, self.nO*self.nP)
state_vector = state_vector + self.bias
state_vector += self.bias
state_vector, bp_nonlinearity = self._nonlinearity(state_vector)
def backward(d_state_vector_ids):