diff --git a/spacy/errors.py b/spacy/errors.py index 79ed5ecdb..fe59453c0 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -588,6 +588,7 @@ class Errors(object): E199 = ("Unable to merge 0-length span at doc[{start}:{end}].") E200 = ("Specifying a base model with a pretrained component '{component}' " "can not be combined with adding a pretrained Tok2Vec layer.") + E201 = ("Span index out of range.") @add_codes diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py index e76ca4697..25fa421b7 100644 --- a/spacy/tests/doc/test_span.py +++ b/spacy/tests/doc/test_span.py @@ -287,3 +287,15 @@ def test_span_eq_hash(doc, doc_not_parsed): assert hash(doc[0:2]) == hash(doc[0:2]) assert hash(doc[0:2]) != hash(doc[1:3]) assert hash(doc[0:2]) != hash(doc_not_parsed[0:2]) + + +def test_span_boundaries(doc): + start = 1 + end = 5 + span = doc[start:end] + for i in range(start, end): + assert span[i - start] == doc[i] + with pytest.raises(IndexError): + _ = span[-5] + with pytest.raises(IndexError): + _ = span[5] diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 2f1418a5b..29b87fa8d 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -181,9 +181,13 @@ cdef class Span: return Span(self.doc, start + self.start, end + self.start) else: if i < 0: - return self.doc[self.end + i] + token_i = self.end + i else: - return self.doc[self.start + i] + token_i = self.start + i + if self.start <= token_i < self.end: + return self.doc[token_i] + else: + raise IndexError(Errors.E201) def __iter__(self): """Iterate over `Token` objects.