From 180e5afede4dfd4803e3c0f94d82fef3885a316d Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 21 May 2017 09:05:34 -0500
Subject: [PATCH] Fix tokvecs flattening in pipeline

---
 spacy/pipeline.pyx | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index 09e79d67d..b6c85009d 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -105,16 +105,19 @@ class NeuralTagger(object):
 
     def pipe(self, stream, batch_size=128, n_threads=-1):
         for docs in cytoolz.partition_all(batch_size, stream):
-            tokvecs = self.model.ops.flatten([d.tensor for d in docs])
+            tokvecs = [d.tensor for d in docs]
             tag_ids = self.predict(tokvecs)
             self.set_annotations(docs, tag_ids)
             yield from docs
 
     def predict(self, tokvecs):
         scores = self.model(tokvecs)
+        scores = self.model.ops.flatten(scores)
         guesses = scores.argmax(axis=1)
         if not isinstance(guesses, numpy.ndarray):
             guesses = guesses.get()
+        guesses = self.model.ops.unflatten(guesses,
+                    [tv.shape[0] for tv in tokvecs])
         return guesses
 
     def set_annotations(self, docs, batch_tag_ids):
@@ -122,10 +125,9 @@ class NeuralTagger(object):
             docs = [docs]
         cdef Doc doc
         cdef int idx = 0
-        cdef int i, j, tag_id
         cdef Vocab vocab = self.vocab
         for i, doc in enumerate(docs):
-            doc_tag_ids = batch_tag_ids[idx:idx+len(doc)]
+            doc_tag_ids = batch_tag_ids[i]
             for j, tag_id in enumerate(doc_tag_ids):
                 vocab.morphology.assign_tag_id(&doc.c[j], tag_id)
                 idx += 1