mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Fix efficiency of parser backprop_nonlinearity
This commit is contained in:
parent
57e09747dc
commit
d525552979
|
@ -339,6 +339,7 @@ cdef class precompute_hiddens:
|
||||||
cdef readonly int nF, nO, nP
|
cdef readonly int nF, nO, nP
|
||||||
cdef bint _is_synchronized
|
cdef bint _is_synchronized
|
||||||
cdef public object ops
|
cdef public object ops
|
||||||
|
cdef public object numpy_ops
|
||||||
cdef np.ndarray _features
|
cdef np.ndarray _features
|
||||||
cdef np.ndarray _cached
|
cdef np.ndarray _cached
|
||||||
cdef np.ndarray bias
|
cdef np.ndarray bias
|
||||||
|
@ -368,6 +369,7 @@ cdef class precompute_hiddens:
|
||||||
self.nP = 1
|
self.nP = 1
|
||||||
self.nO = cached.shape[2]
|
self.nO = cached.shape[2]
|
||||||
self.ops = lower_model.ops
|
self.ops = lower_model.ops
|
||||||
|
self.numpy_ops = NumpyOps()
|
||||||
assert activation in (None, "relu", "maxout")
|
assert activation in (None, "relu", "maxout")
|
||||||
self.activation = activation
|
self.activation = activation
|
||||||
self._is_synchronized = False
|
self._is_synchronized = False
|
||||||
|
@ -446,44 +448,32 @@ cdef class precompute_hiddens:
|
||||||
return state_vector, backward
|
return state_vector, backward
|
||||||
|
|
||||||
def _nonlinearity(self, state_vector):
|
def _nonlinearity(self, state_vector):
|
||||||
if isinstance(state_vector, numpy.ndarray):
|
|
||||||
ops = NumpyOps()
|
|
||||||
else:
|
|
||||||
ops = CupyOps()
|
|
||||||
|
|
||||||
if self.activation == "maxout":
|
if self.activation == "maxout":
|
||||||
state_vector, mask = ops.maxout(state_vector)
|
return self._maxout_nonlinearity(state_vector)
|
||||||
else:
|
else:
|
||||||
state_vector = state_vector.reshape(state_vector.shape[:-1])
|
return self._relu_nonlinearity(state_vector)
|
||||||
if self.activation == "relu":
|
|
||||||
mask = state_vector >= 0.
|
|
||||||
state_vector *= mask
|
|
||||||
else:
|
|
||||||
mask = None
|
|
||||||
|
|
||||||
def backprop_nonlinearity(d_best):
|
def _maxout_nonlinearity(self, state_vector):
|
||||||
if isinstance(d_best, numpy.ndarray):
|
state_vector, mask = self.numpy_ops.maxout(state_vector)
|
||||||
ops = NumpyOps()
|
# We're outputting to CPU, but we need this variable on GPU for the
|
||||||
else:
|
# backward pass.
|
||||||
ops = CupyOps()
|
mask = self.ops.asarray(mask)
|
||||||
if mask is not None:
|
|
||||||
mask_ = ops.asarray(mask)
|
def backprop_maxout(d_best):
|
||||||
# This will usually be on GPU
|
return self.ops.backprop_maxout(d_best, mask, self.nP)
|
||||||
d_best = ops.asarray(d_best)
|
|
||||||
# Fix nans (which can occur from unseen classes.)
|
return state_vector, backprop_maxout
|
||||||
try:
|
|
||||||
d_best[ops.xp.isnan(d_best)] = 0.
|
def _relu_nonlinearity(self, state_vector):
|
||||||
except:
|
mask = state_vector >= 0.
|
||||||
print(ops.xp.isnan(d_best))
|
state_vector *= mask
|
||||||
raise
|
# We're outputting to CPU, but we need this variable on GPU for the
|
||||||
if self.activation == "maxout":
|
# backward pass.
|
||||||
mask_ = ops.asarray(mask)
|
mask = self.ops.asarray(mask)
|
||||||
return ops.backprop_maxout(d_best, mask_, self.nP)
|
|
||||||
elif self.activation == "relu":
|
def backprop_relu(d_best):
|
||||||
mask_ = ops.asarray(mask)
|
d_best *= mask
|
||||||
d_best *= mask_
|
d_best = d_best.reshape((d_best.shape + (1,)))
|
||||||
d_best = d_best.reshape((d_best.shape + (1,)))
|
return d_best
|
||||||
return d_best
|
|
||||||
else:
|
return state_vector, backprop_relu
|
||||||
return d_best.reshape((d_best.shape + (1,)))
|
|
||||||
return state_vector, backprop_nonlinearity
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user