Fix Span.sents for edge case of Span being the only Span in the last sentence of a Doc. (#12484)

This commit is contained in:
Raphael Mitsch 2023-03-29 18:54:47 +02:00 committed by GitHub
parent 372a90885e
commit d85df9d577
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 0 deletions

View File

@ -716,3 +716,18 @@ def test_for_partial_ent_sents():
# equal to the sentences referenced in ent.sents.
for doc_sent, ent_sent in zip(doc.sents, doc.ents[0].sents):
assert doc_sent == ent_sent
def test_for_no_ent_sents():
"""Span.sents() should set .sents correctly, even if Span in question is trailing and doesn't form a full
sentence.
"""
doc = Doc(
English().vocab,
words=["This", "is", "a", "test.", "ENTITY"],
sent_starts=[1, 0, 0, 0, 1],
)
doc.set_ents([Span(doc, 4, 5, "WORK")])
sents = list(doc.ents[0].sents)
assert len(sents) == 1
assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY"

View File

@ -463,6 +463,10 @@ cdef class Span:
elif i == self.doc.length - 1:
yield Span(self.doc, start, self.doc.length)
# Ensure that trailing parts of the Span instance are included in last element of .sents.
if start == self.doc.length - 1:
yield Span(self.doc, start, self.doc.length)
@property
def ents(self):
"""The named entities that fall completely within the span. Returns