Tweak efficiency of precomputable affine

This commit is contained in:
Matthew Honnibal 2020-06-24 00:00:40 +02:00
parent d85d063231
commit 306a591e1f

View File

@ -48,8 +48,7 @@ def forward(model, X, is_train):
model.inc_grad("b", dY.sum(axis=0))
dY = dY.reshape((dY.shape[0], nO * nP))
Wopfi = W.transpose((1, 2, 0, 3))
Wopfi = model.ops.xp.ascontiguousarray(Wopfi)
Wopfi = model.ops.as_contig(W.transpose((1, 2, 0, 3)))
Wopfi = Wopfi.reshape((nO * nP, nF * nI))
dXf = model.ops.gemm(dY.reshape((dY.shape[0], nO * nP)), Wopfi)
@ -59,7 +58,8 @@ def forward(model, X, is_train):
model.ops.gemm(dY, Xf, out=dWopfi, trans1=True)
dWopfi = dWopfi.reshape((nO, nP, nF, nI))
# (o, p, f, i) --> (f, o, p, i)
model.inc_grad("W", dWopfi.transpose((2, 0, 1, 3)))
dWopfi = model.ops.as_contig(dWopfi.transpose((2, 0, 1, 3)))
model.inc_grad("W", dWopfi)
return dXf.reshape((dXf.shape[0], nF, nI))
return Yf, backward