From cdc10e9a1ca68e29c6c97e57325962d60c86e103 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 May 2016 10:14:06 +0200
Subject: [PATCH] * Fix Issue #375: noun phrase iteration results in index
 error if noun phrases are merged during the loop. Fix by accumulating the
 spans inside the noun_chunks property, allowing the Span index tricks to
 work.

---
 spacy/tokens/doc.pyx | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index eaec68675..e432c83be 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -247,8 +247,15 @@ cdef class Doc:
                 "requires data to be installed. If you haven't done so, run: "
                 "\npython -m spacy.%s.download all\n"
                 "to install the data" % self.vocab.lang)
+        # Accumulate the result before beginning to iterate over it. This prevents
+        # the tokenisation from being changed out from under us during the iteration.
+        # The tricky thing here is that Span accepts its tokenisation changing,
+        # so it's okay once we have the Span objects. See Issue #375
+        spans = []
         for start, end, label in self.noun_chunks_iterator(self):
-            yield Span(self, start, end, label=label)
+            spans.append(Span(self, start, end, label=label))
+        for span in spans:
+            yield span
 
     @property
     def sents(self):