From 76989036173aa7c89501fcd32c0e251675debacd Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Tue, 31 Oct 2017 02:33:16 +0100
Subject: [PATCH] Fix GPU usage

---
 spacy/syntax/nn_parser.pyx | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index e480bd1dc..68301238d 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -89,11 +89,14 @@ cdef class precompute_hiddens:
             cached = gpu_cached.get(stream=cuda_stream)
         else:
             cached = gpu_cached
+        if not isinstance(lower_model.b, numpy.ndarray):
+            self.bias = lower_model.b.get()
+        else:
+            self.bias = lower_model.b
         self.nF = cached.shape[1]
         self.nP = getattr(lower_model, 'nP', 1)
         self.nO = cached.shape[2]
         self.ops = lower_model.ops
-        self.bias = lower_model.b
         self._is_synchronized = False
         self._cuda_stream = cuda_stream
         self._cached = cached
@@ -126,7 +129,8 @@ cdef class precompute_hiddens:
         state_vector += self.bias
         state_vector, bp_nonlinearity = self._nonlinearity(state_vector)
 
-        def backward(d_state_vector, sgd=None):
+        def backward(d_state_vector_ids, sgd=None):
+            d_state_vector, token_ids = d_state_vector_ids
             d_state_vector = bp_nonlinearity(d_state_vector, sgd)
             # This will usually be on GPU
             if not isinstance(d_state_vector, self.ops.xp.ndarray):
@@ -157,7 +161,8 @@ cdef void sum_state_features(float* output,
         const float* cached, const int* token_ids, int B, int F, int O) nogil:
     cdef int idx, b, f, i
     cdef const float* feature
-    padding = cached - (F * O)
+    padding = cached
+    cached += F * O
     for b in range(B):
         for f in range(F):
             if token_ids[f] < 0:
@@ -657,7 +662,7 @@ cdef class Parser:
             cuda_stream.synchronize()
         xp = get_array_module(d_tokvecs)
         for ids, d_vector, bp_vector in backprops:
-            d_state_features = bp_vector(d_vector, sgd=sgd)
+            d_state_features = bp_vector((d_vector, ids), sgd=sgd)
             ids = ids.flatten()
             d_state_features = d_state_features.reshape(
                 (ids.size, d_state_features.shape[2]))