diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py index b4631037a..a70a71de1 100644 --- a/spacy/tests/doc/test_span.py +++ b/spacy/tests/doc/test_span.py @@ -7,6 +7,7 @@ from spacy.lang.en import English from spacy.tokens import Doc, Span, Token from spacy.vocab import Vocab from spacy.util import filter_spans +from spacy.training import Example from thinc.api import get_current_ops from ..util import add_vecs_to_vocab @@ -700,3 +701,18 @@ def test_span_group_copy(doc): assert len(doc.spans["test"]) == 3 # check that the copy spans were not modified and this is an isolated doc assert len(doc_copy.spans["test"]) == 2 + + +def test_for_partial_ent_sents(): + """Spans may be associated with multiple sentences. These .sents should always be complete, not partial, sentences, + which this tests for. + """ + nlp = English() + text = ["Mahler's", "Symphony", "No.", "8", "was", "beautiful."] + doc = Doc(nlp.vocab, words=text, sent_starts=[1, 0, 0, 1, 0, 0]) + doc.set_ents([Span(doc, 1, 4, "WORK")]) + # The specified entity is associated with both sentences in this doc, so we expect all sentences in the doc to be + # equal to the sentences referenced in ent.sents. + for doc_sent, ent_sent in zip(doc.sents, doc.ents[0].sents): + assert doc_sent == ent_sent + diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index cfe1236df..72a216e05 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -460,9 +460,8 @@ cdef class Span: start = i if start >= self.end: break - if start < self.end: - yield Span(self.doc, start, self.end) - + elif i == self.doc.length - 1: + yield Span(self.doc, start, i + 1) @property def ents(self):