From 8f8a5c33861bff2d7c3f19914e289139ab3a2c28 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 31 Jul 2020 14:18:55 +0200 Subject: [PATCH] Fix index boundaries in Span --- spacy/errors.py | 1 + spacy/tokens/span.pyx | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index e90e6bf08..91e179f8f 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -606,6 +606,7 @@ class Errors: 'nlp = Chinese(config=cfg)') E1001 = ("Target token outside of matched span for match with tokens " "'{span}' and offset '{index}' matched by patterns '{patterns}'.") + E1002 = ("Span index out of range.") @add_codes diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 203308749..07ca4bcad 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -177,9 +177,17 @@ cdef class Span: return Span(self.doc, start + self.start, end + self.start) else: if i < 0: - return self.doc[self.end + i] + token = self.doc[self.end + i] + if self.start <= token.i < self.end: + return token + else: + raise IndexError(Errors.E1002) else: - return self.doc[self.start + i] + token = self.doc[self.start + i] + if self.start <= token.i < self.end: + return token + else: + raise IndexError(Errors.E1002) def __iter__(self): """Iterate over `Token` objects.