From f292f7f0e6756bb54eda2a011a7fa4094fff7c10 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Wed, 2 Nov 2016 23:47:46 +0100
Subject: [PATCH] Fix Issue #599, by considering empty documents to be parsed
 and tagged. Implementation is a bit dodgy.

---
 spacy/tokens/doc.pyx | 9 +++++++++
 1 file changed, 9 insertions(+)
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 32106da99..1200a0517 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -151,6 +151,11 @@ cdef class Doc:
                 # must be created.
                 self.push_back(
                     <const LexemeC*>self.vocab.get(self.mem, orth), has_space)
+        # Tough to decide on policy for this. Is an empty doc tagged and parsed?
+        # There's no information we'd like to add to it, so I guess so?
+        if self.length == 0:
+            self.is_tagged = True
+            self.is_parsed = True
     
     def __getitem__(self, object i):
         '''
@@ -430,6 +435,10 @@ cdef class Doc:
                 yield Span(self, start, self.length)
 
     cdef int push_back(self, LexemeOrToken lex_or_tok, bint has_space) except -1:
+        if self.length == 0:
+            # Flip these to false when we see the first token.
+            self.is_tagged = False
+            self.is_parsed = False
         if self.length == self.max_length:
             self._realloc(self.length * 2)
         cdef TokenC* t = &self.c[self.length]