From a1d4c97fb7ada8b655292409014d92ab7a6fd9f7 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 27 May 2017 17:59:00 -0500
Subject: [PATCH] Improve correctness of minibatching

---
 spacy/syntax/nn_parser.pyx | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index b7aca26b8..ffd7c8da6 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -427,7 +427,7 @@ cdef class Parser:
 
         cuda_stream = get_cuda_stream()
 
-        states, golds, max_length = self._init_gold_batch(docs, golds)
+        states, golds, max_steps = self._init_gold_batch(docs, golds)
         state2vec, vec2scores = self.get_batch_model(len(states), tokvecs, cuda_stream,
                                                       0.0)
         todo = [(s, g) for (s, g) in zip(states, golds)
@@ -438,6 +438,7 @@ cdef class Parser:
         backprops = []
         d_tokvecs = state2vec.ops.allocate(tokvecs.shape)
         cdef float loss = 0.
+        n_steps = 0
         while todo:
             states, golds = zip(*todo)
 
@@ -467,7 +468,8 @@ cdef class Parser:
             todo = [st for st in todo if not st[0].is_final()]
             if losses is not None:
                 losses[self.name] += (d_scores**2).sum()
-            if len(backprops) >= (max_length * 2):
+            n_steps += 1
+            if n_steps >= max_steps:
                 break
         self._make_updates(d_tokvecs,
             backprops, sgd, cuda_stream)
@@ -482,7 +484,8 @@ cdef class Parser:
             StateClass state
             Transition action
         whole_states = self.moves.init_batch(whole_docs)
-        max_length = max(5, min(20, min([len(doc) for doc in whole_docs])))
+        max_length = max(5, min(50, min([len(doc) for doc in whole_docs])))
+        max_moves = 0
         states = []
         golds = []
         for doc, state, gold in zip(whole_docs, whole_states, whole_golds):
@@ -493,16 +496,20 @@ cdef class Parser:
             start = 0
             while start < len(doc):
                 state = state.copy()
+                n_moves = 0
                 while state.B(0) < start and not state.is_final():
                     action = self.moves.c[oracle_actions.pop(0)]
                     action.do(state.c, action.label)
+                    n_moves += 1
                 has_gold = self.moves.has_gold(gold, start=start,
                                                end=start+max_length)
                 if not state.is_final() and has_gold:
                     states.append(state)
                     golds.append(gold)
+                    max_moves = max(max_moves, n_moves)
                 start += min(max_length, len(doc)-start)
-        return states, golds, max_length
+            max_moves = max(max_moves, len(oracle_actions))
+        return states, golds, max_moves
 
     def _make_updates(self, d_tokvecs, backprops, sgd, cuda_stream=None):
         # Tells CUDA to block, so our async copies complete.