From deab391cbf64f405151818ca9779e65565d99618 Mon Sep 17 00:00:00 2001
From: Thomas Opsomer <thomasopsomer.enpc@gmail.com>
Date: Thu, 15 Feb 2018 16:58:30 +0100
Subject: [PATCH] correct check on sent_start & raise if no boundaries

---
 spacy/tokens/span.pyx | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index da2bc800f..cc4b0a26a 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -291,6 +291,7 @@ cdef class Span:
             # if doc is parsed we can use the deps to find the sentence
             # otherwise we use the `sent_start` token attribute
             cdef int n = 0
+            cdef int i
             if self.doc.is_parsed:
                 root = &self.doc.c[self.start]
                 while root.head != 0:
@@ -300,19 +301,30 @@ cdef class Span:
                         raise RuntimeError
                 return self.doc[root.l_edge:root.r_edge + 1]
             else:
+                # Check if the document has sentence boundaries,
+                # i.e at least one tok has the sent_start == 1
+                for i in range(self.doc.length):
+                    if self.doc.c[i].sent_start == 1:
+                        break
+                else:
+                    raise ValueError(
+                        "Access to sentence requires either the dependency parse "
+                        "or sentence boundaries to be set by setting " +
+                        "doc[i].is_sent_start = True")
                 # find start of the sentence
                 start = self.start
-                while not self.doc.c[start].sent_start and start > 0:
+                while self.doc.c[start].sent_start != 1 and start > 0:
                     start += -1
                 # find end of the sentence
                 end = self.end
-                while not self.doc.c[end].sent_start:
+                while self.doc.c[end].sent_start != 1:
                     end += 1
                     if n >= self.doc.length:
                         break
                 #
                 return self.doc[start:end]
 
+
     property has_vector:
         """RETURNS (bool): Whether a word vector is associated with the object.
         """