From d84607f6bb7fa561d65734b1d2d15770c5de05b9 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 14 Sep 2017 20:34:40 +0200 Subject: [PATCH] Vectorize update in AddHistory --- spacy/_ml.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/spacy/_ml.py b/spacy/_ml.py index d3c82897f..1f3d50cbd 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -81,31 +81,28 @@ def add_tuples(X, drop=0.): def AddHistory(layer, decay=0.0001): ops = layer.ops nonlocals = [] - if layer.nI: - average_inputs = ops.allocate((layer.nO, layer.nI-layer.nO)) - nonlocals = [] def history_fwd(X, drop=0.): if not nonlocals: - nonlocals.append(ops.allocate((layer.nO, X.shape[1]))) + if hasattr(layer, 'nO'): + nO = layer.nO + else: + nO = layer._layers[-1].nO + nonlocals.append(ops.allocate((nO, X.shape[1]))) model.history = nonlocals[0] average_inputs = nonlocals[0] hist = ops.xp.tensordot(X, average_inputs, axes=[[1], [1]]) X_hist = ops.xp.hstack((X, hist)) Y, bp_Y = layer.begin_update(X_hist, drop=drop) - for i in range(Y.shape[0]): - amax = Y[i].argmax() - average_inputs[amax] *= 1-decay - average_inputs[amax] += decay * X[i] + amax = Y.argmax(axis=1) + average_inputs *= 1-decay + ops.scatter_add(average_inputs, amax, X * decay) def history_bwd(dY, sgd=None): dX_hist = bp_Y(dY, sgd=sgd) dX = dX_hist[:, :X.shape[1]] - return dX + return ops.xp.ascontiguousarray(dX) return Y, history_bwd model = wrap(history_fwd, layer) - if layer.nI: - model.history = average_inputs - else: - model.history = None + model.history = None return model