Feature/span ents (#2599)

* Created Span.ents property

* Add tests for span.ents

* Add tests for start and end of sentence
This commit is contained in:
Ole Henrik Skogstrøm 2018-08-07 13:52:32 +02:00 committed by Ines Montani
parent 87fa847e6e
commit 0473add369
2 changed files with 37 additions and 0 deletions

View File

@ -133,3 +133,31 @@ def test_span_as_doc(doc):
span = doc[4:10] span = doc[4:10]
span_doc = span.as_doc() span_doc = span.as_doc()
assert span.text == span_doc.text.strip() assert span.text == span_doc.text.strip()
def test_span_ents_property(doc):
"""Test span.ents for the """
doc.ents = [
(doc.vocab.strings['PRODUCT'], 0, 1),
(doc.vocab.strings['PRODUCT'], 7, 8),
(doc.vocab.strings['PRODUCT'], 11, 14)
]
assert len(list(doc.ents)) == 3
sentences = list(doc.sents)
assert len(sentences) == 3
assert len(sentences[0].ents) == 1
# First sentence, also tests start of sentence
assert sentences[0].ents[0].text == "This"
assert sentences[0].ents[0].label_ == "PRODUCT"
assert sentences[0].ents[0].start == 0
assert sentences[0].ents[0].end == 1
# Second sentence
assert len(sentences[1].ents) == 1
assert sentences[1].ents[0].text == "another"
assert sentences[1].ents[0].label_ == "PRODUCT"
assert sentences[1].ents[0].start == 7
assert sentences[1].ents[0].end == 8
# Third sentence ents, Also tests end of sentence
assert sentences[2].ents[0].text == "a third ."
assert sentences[2].ents[0].label_ == "PRODUCT"
assert sentences[2].ents[0].start == 11
assert sentences[2].ents[0].end == 14

View File

@ -302,6 +302,15 @@ cdef class Span:
raise RuntimeError(Errors.E038) raise RuntimeError(Errors.E038)
return self.doc[root.l_edge:root.r_edge + 1] return self.doc[root.l_edge:root.r_edge + 1]
property ents:
"""RETURNS (list): A list of tokens that belong to the current span."""
def __get__(self):
ents = []
for ent in self.doc.ents:
if ent.start >= self.start and ent.end <= self.end:
ents.append(ent)
return ents
property has_vector: property has_vector:
"""RETURNS (bool): Whether a word vector is associated with the object. """RETURNS (bool): Whether a word vector is associated with the object.
""" """