mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Feature/span ents (#2599)
* Created Span.ents property * Add tests for span.ents * Add tests for start and end of sentence
This commit is contained in:
parent
87fa847e6e
commit
0473add369
|
@ -133,3 +133,31 @@ def test_span_as_doc(doc):
|
|||
span = doc[4:10]
|
||||
span_doc = span.as_doc()
|
||||
assert span.text == span_doc.text.strip()
|
||||
|
||||
def test_span_ents_property(doc):
|
||||
"""Test span.ents for the """
|
||||
doc.ents = [
|
||||
(doc.vocab.strings['PRODUCT'], 0, 1),
|
||||
(doc.vocab.strings['PRODUCT'], 7, 8),
|
||||
(doc.vocab.strings['PRODUCT'], 11, 14)
|
||||
]
|
||||
assert len(list(doc.ents)) == 3
|
||||
sentences = list(doc.sents)
|
||||
assert len(sentences) == 3
|
||||
assert len(sentences[0].ents) == 1
|
||||
# First sentence, also tests start of sentence
|
||||
assert sentences[0].ents[0].text == "This"
|
||||
assert sentences[0].ents[0].label_ == "PRODUCT"
|
||||
assert sentences[0].ents[0].start == 0
|
||||
assert sentences[0].ents[0].end == 1
|
||||
# Second sentence
|
||||
assert len(sentences[1].ents) == 1
|
||||
assert sentences[1].ents[0].text == "another"
|
||||
assert sentences[1].ents[0].label_ == "PRODUCT"
|
||||
assert sentences[1].ents[0].start == 7
|
||||
assert sentences[1].ents[0].end == 8
|
||||
# Third sentence ents, Also tests end of sentence
|
||||
assert sentences[2].ents[0].text == "a third ."
|
||||
assert sentences[2].ents[0].label_ == "PRODUCT"
|
||||
assert sentences[2].ents[0].start == 11
|
||||
assert sentences[2].ents[0].end == 14
|
||||
|
|
|
@ -302,6 +302,15 @@ cdef class Span:
|
|||
raise RuntimeError(Errors.E038)
|
||||
return self.doc[root.l_edge:root.r_edge + 1]
|
||||
|
||||
property ents:
|
||||
"""RETURNS (list): A list of tokens that belong to the current span."""
|
||||
def __get__(self):
|
||||
ents = []
|
||||
for ent in self.doc.ents:
|
||||
if ent.start >= self.start and ent.end <= self.end:
|
||||
ents.append(ent)
|
||||
return ents
|
||||
|
||||
property has_vector:
|
||||
"""RETURNS (bool): Whether a word vector is associated with the object.
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue
Block a user