mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Add Span index boundary checks (#5861)
* Add Span index boundary checks * Return Span-specific IndexError in all cases * Simplify and fix if/else
This commit is contained in:
parent
cd59979ab4
commit
b841248589
|
@ -588,6 +588,7 @@ class Errors(object):
|
||||||
E199 = ("Unable to merge 0-length span at doc[{start}:{end}].")
|
E199 = ("Unable to merge 0-length span at doc[{start}:{end}].")
|
||||||
E200 = ("Specifying a base model with a pretrained component '{component}' "
|
E200 = ("Specifying a base model with a pretrained component '{component}' "
|
||||||
"can not be combined with adding a pretrained Tok2Vec layer.")
|
"can not be combined with adding a pretrained Tok2Vec layer.")
|
||||||
|
E201 = ("Span index out of range.")
|
||||||
|
|
||||||
|
|
||||||
@add_codes
|
@add_codes
|
||||||
|
|
|
@ -287,3 +287,15 @@ def test_span_eq_hash(doc, doc_not_parsed):
|
||||||
assert hash(doc[0:2]) == hash(doc[0:2])
|
assert hash(doc[0:2]) == hash(doc[0:2])
|
||||||
assert hash(doc[0:2]) != hash(doc[1:3])
|
assert hash(doc[0:2]) != hash(doc[1:3])
|
||||||
assert hash(doc[0:2]) != hash(doc_not_parsed[0:2])
|
assert hash(doc[0:2]) != hash(doc_not_parsed[0:2])
|
||||||
|
|
||||||
|
|
||||||
|
def test_span_boundaries(doc):
|
||||||
|
start = 1
|
||||||
|
end = 5
|
||||||
|
span = doc[start:end]
|
||||||
|
for i in range(start, end):
|
||||||
|
assert span[i - start] == doc[i]
|
||||||
|
with pytest.raises(IndexError):
|
||||||
|
_ = span[-5]
|
||||||
|
with pytest.raises(IndexError):
|
||||||
|
_ = span[5]
|
||||||
|
|
|
@ -181,9 +181,13 @@ cdef class Span:
|
||||||
return Span(self.doc, start + self.start, end + self.start)
|
return Span(self.doc, start + self.start, end + self.start)
|
||||||
else:
|
else:
|
||||||
if i < 0:
|
if i < 0:
|
||||||
return self.doc[self.end + i]
|
token_i = self.end + i
|
||||||
else:
|
else:
|
||||||
return self.doc[self.start + i]
|
token_i = self.start + i
|
||||||
|
if self.start <= token_i < self.end:
|
||||||
|
return self.doc[token_i]
|
||||||
|
else:
|
||||||
|
raise IndexError(Errors.E201)
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
"""Iterate over `Token` objects.
|
"""Iterate over `Token` objects.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user