mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-13 09:42:26 +03:00
Test and fix issue13769
This commit is contained in:
parent
bec546cec0
commit
75a9d9b9ad
|
@ -49,7 +49,7 @@ def doc_not_parsed(en_tokenizer):
|
||||||
def test_issue1537():
|
def test_issue1537():
|
||||||
"""Test that Span.as_doc() doesn't segfault."""
|
"""Test that Span.as_doc() doesn't segfault."""
|
||||||
string = "The sky is blue . The man is pink . The dog is purple ."
|
string = "The sky is blue . The man is pink . The dog is purple ."
|
||||||
doc = Doc(Vocab(), words=string.split())
|
doc = Doc(Vocab(), words=list(string.split()))
|
||||||
doc[0].sent_start = True
|
doc[0].sent_start = True
|
||||||
for word in doc[1:]:
|
for word in doc[1:]:
|
||||||
if word.nbor(-1).text == ".":
|
if word.nbor(-1).text == ".":
|
||||||
|
@ -225,6 +225,18 @@ def test_spans_span_sent(doc, doc_not_parsed):
|
||||||
assert doc_not_parsed[10:14].sent == doc_not_parsed[5:]
|
assert doc_not_parsed[10:14].sent == doc_not_parsed[5:]
|
||||||
|
|
||||||
|
|
||||||
|
def test_issue13769():
|
||||||
|
# Test issue 13769: Incorrect output of span.sents when final token is a sentence outside of the span.
|
||||||
|
doc = Doc(Vocab(), words=list("This is a sentence . This is another sentence . Third".split()))
|
||||||
|
doc[0].is_sent_start = True
|
||||||
|
doc[5].is_sent_start = True
|
||||||
|
doc[10].is_sent_start = True
|
||||||
|
doc.ents = [('ENTITY', 7, 9)] # "another sentence" phrase in the second sentence
|
||||||
|
entity = doc.ents[0]
|
||||||
|
ent_sents = list(entity.sents)
|
||||||
|
assert len(ent_sents) == 1
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"start,end,expected_sentence",
|
"start,end,expected_sentence",
|
||||||
[
|
[
|
||||||
|
|
|
@ -479,8 +479,9 @@ cdef class Span:
|
||||||
break
|
break
|
||||||
elif i == self.doc.length - 1:
|
elif i == self.doc.length - 1:
|
||||||
yield Span(self.doc, start, self.doc.length)
|
yield Span(self.doc, start, self.doc.length)
|
||||||
|
else:
|
||||||
# Ensure that trailing parts of the Span instance are included in last element of .sents.
|
# Ensure that trailing parts of the Span instance are included in last element of .sents.
|
||||||
|
# We only want to do this if we didn't break above
|
||||||
if start == self.doc.length - 1:
|
if start == self.doc.length - 1:
|
||||||
yield Span(self.doc, start, self.doc.length)
|
yield Span(self.doc, start, self.doc.length)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user