From 4b3c96d76d7fee39216dfc7fa6d406b43c54cd97 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 7 Nov 2015 17:05:16 +1100 Subject: [PATCH] * Fix zero-length spans --- spacy/tokens/span.pyx | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 3e872139d..53653de5a 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -23,9 +23,12 @@ cdef class Span: self.doc = tokens self.start = start - self.start_char = self.doc[start].idx + self.start_char = self.doc[start].idx if start < self.doc.length else 0 self.end = end - self.end_char = self.doc[end - 1].idx + len(self.doc[end - 1]) + if end >= 1: + self.end_char = self.doc[end - 1].idx + len(self.doc[end - 1]) + else: + self.end_char = 0 self.label = label self._vector = vector self._vector_norm = vector_norm @@ -81,7 +84,7 @@ cdef class Span: return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm) cpdef int _recalculate_indices(self) except -1: - if self.end >= self.doc.length \ + if self.end > self.doc.length \ or self.doc.c[self.start].idx != self.start_char \ or (self.doc.c[self.end-1].idx + self.doc.c[self.end-1].lex.length) != self.end_char: start = token_by_start(self.doc.c, self.doc.length, self.start_char)