Fix dropout and learn rate in parser

This commit is contained in:
Matthew Honnibal 2017-08-12 05:44:39 -05:00
parent b40bc20b12
commit 1a59db1c86

View File

@ -39,6 +39,7 @@ from preshed.maps cimport map_get
from thinc.api import layerize, chain, noop, clone from thinc.api import layerize, chain, noop, clone
from thinc.neural import Model, Affine, ReLu, Maxout from thinc.neural import Model, Affine, ReLu, Maxout
from thinc.neural._classes.selu import SELU from thinc.neural._classes.selu import SELU
from thinc.neural._classes.layernorm import LayerNorm
from thinc.neural.ops import NumpyOps, CupyOps from thinc.neural.ops import NumpyOps, CupyOps
from thinc.neural.util import get_array_module from thinc.neural.util import get_array_module
@ -467,7 +468,7 @@ cdef class Parser:
docs = [docs] docs = [docs]
golds = [golds] golds = [golds]
if USE_FINE_TUNE: if USE_FINE_TUNE:
my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=0.) my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
my_tokvecs = self.model[0].ops.flatten(my_tokvecs) my_tokvecs = self.model[0].ops.flatten(my_tokvecs)
tokvecs += my_tokvecs tokvecs += my_tokvecs
@ -496,13 +497,13 @@ cdef class Parser:
scores, bp_scores = vec2scores.begin_update(vector, drop=drop) scores, bp_scores = vec2scores.begin_update(vector, drop=drop)
d_scores = self.get_batch_loss(states, golds, scores) d_scores = self.get_batch_loss(states, golds, scores)
d_vector = bp_scores(d_scores / d_scores.shape[0], sgd=sgd) d_vector = bp_scores(d_scores, sgd=sgd)
if drop != 0: if drop != 0:
d_vector *= mask d_vector *= mask
if isinstance(self.model[0].ops, CupyOps) \ if isinstance(self.model[0].ops, CupyOps) \
and not isinstance(token_ids, state2vec.ops.xp.ndarray): and not isinstance(token_ids, state2vec.ops.xp.ndarray):
# Move token_ids and d_vector to CPU, asynchronously # Move token_ids and d_vector to GPU, asynchronously
backprops.append(( backprops.append((
get_async(cuda_stream, token_ids), get_async(cuda_stream, token_ids),
get_async(cuda_stream, d_vector), get_async(cuda_stream, d_vector),