mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 00:46:28 +03:00
Fix Span.sents for edge case of Span being the only Span in the last sentence of a Doc. (#12484)
This commit is contained in:
parent
372a90885e
commit
d85df9d577
|
@ -716,3 +716,18 @@ def test_for_partial_ent_sents():
|
|||
# equal to the sentences referenced in ent.sents.
|
||||
for doc_sent, ent_sent in zip(doc.sents, doc.ents[0].sents):
|
||||
assert doc_sent == ent_sent
|
||||
|
||||
|
||||
def test_for_no_ent_sents():
|
||||
"""Span.sents() should set .sents correctly, even if Span in question is trailing and doesn't form a full
|
||||
sentence.
|
||||
"""
|
||||
doc = Doc(
|
||||
English().vocab,
|
||||
words=["This", "is", "a", "test.", "ENTITY"],
|
||||
sent_starts=[1, 0, 0, 0, 1],
|
||||
)
|
||||
doc.set_ents([Span(doc, 4, 5, "WORK")])
|
||||
sents = list(doc.ents[0].sents)
|
||||
assert len(sents) == 1
|
||||
assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY"
|
||||
|
|
|
@ -463,6 +463,10 @@ cdef class Span:
|
|||
elif i == self.doc.length - 1:
|
||||
yield Span(self.doc, start, self.doc.length)
|
||||
|
||||
# Ensure that trailing parts of the Span instance are included in last element of .sents.
|
||||
if start == self.doc.length - 1:
|
||||
yield Span(self.doc, start, self.doc.length)
|
||||
|
||||
@property
|
||||
def ents(self):
|
||||
"""The named entities that fall completely within the span. Returns
|
||||
|
|
Loading…
Reference in New Issue
Block a user