diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py index 8cd4347c2..926624633 100644 --- a/spacy/tests/doc/test_span.py +++ b/spacy/tests/doc/test_span.py @@ -133,3 +133,31 @@ def test_span_as_doc(doc): span = doc[4:10] span_doc = span.as_doc() assert span.text == span_doc.text.strip() + +def test_span_ents_property(doc): + """Test span.ents for the """ + doc.ents = [ + (doc.vocab.strings['PRODUCT'], 0, 1), + (doc.vocab.strings['PRODUCT'], 7, 8), + (doc.vocab.strings['PRODUCT'], 11, 14) + ] + assert len(list(doc.ents)) == 3 + sentences = list(doc.sents) + assert len(sentences) == 3 + assert len(sentences[0].ents) == 1 + # First sentence, also tests start of sentence + assert sentences[0].ents[0].text == "This" + assert sentences[0].ents[0].label_ == "PRODUCT" + assert sentences[0].ents[0].start == 0 + assert sentences[0].ents[0].end == 1 + # Second sentence + assert len(sentences[1].ents) == 1 + assert sentences[1].ents[0].text == "another" + assert sentences[1].ents[0].label_ == "PRODUCT" + assert sentences[1].ents[0].start == 7 + assert sentences[1].ents[0].end == 8 + # Third sentence ents, Also tests end of sentence + assert sentences[2].ents[0].text == "a third ." + assert sentences[2].ents[0].label_ == "PRODUCT" + assert sentences[2].ents[0].start == 11 + assert sentences[2].ents[0].end == 14 diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index ae57bc721..4bb60e2b9 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -302,6 +302,15 @@ cdef class Span: raise RuntimeError(Errors.E038) return self.doc[root.l_edge:root.r_edge + 1] + property ents: + """RETURNS (list): A list of tokens that belong to the current span.""" + def __get__(self): + ents = [] + for ent in self.doc.ents: + if ent.start >= self.start and ent.end <= self.end: + ents.append(ent) + return ents + property has_vector: """RETURNS (bool): Whether a word vector is associated with the object. """