mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Feature/span ents (#2599)
* Created Span.ents property * Add tests for span.ents * Add tests for start and end of sentence
This commit is contained in:
parent
87fa847e6e
commit
0473add369
|
@ -133,3 +133,31 @@ def test_span_as_doc(doc):
|
||||||
span = doc[4:10]
|
span = doc[4:10]
|
||||||
span_doc = span.as_doc()
|
span_doc = span.as_doc()
|
||||||
assert span.text == span_doc.text.strip()
|
assert span.text == span_doc.text.strip()
|
||||||
|
|
||||||
|
def test_span_ents_property(doc):
|
||||||
|
"""Test span.ents for the """
|
||||||
|
doc.ents = [
|
||||||
|
(doc.vocab.strings['PRODUCT'], 0, 1),
|
||||||
|
(doc.vocab.strings['PRODUCT'], 7, 8),
|
||||||
|
(doc.vocab.strings['PRODUCT'], 11, 14)
|
||||||
|
]
|
||||||
|
assert len(list(doc.ents)) == 3
|
||||||
|
sentences = list(doc.sents)
|
||||||
|
assert len(sentences) == 3
|
||||||
|
assert len(sentences[0].ents) == 1
|
||||||
|
# First sentence, also tests start of sentence
|
||||||
|
assert sentences[0].ents[0].text == "This"
|
||||||
|
assert sentences[0].ents[0].label_ == "PRODUCT"
|
||||||
|
assert sentences[0].ents[0].start == 0
|
||||||
|
assert sentences[0].ents[0].end == 1
|
||||||
|
# Second sentence
|
||||||
|
assert len(sentences[1].ents) == 1
|
||||||
|
assert sentences[1].ents[0].text == "another"
|
||||||
|
assert sentences[1].ents[0].label_ == "PRODUCT"
|
||||||
|
assert sentences[1].ents[0].start == 7
|
||||||
|
assert sentences[1].ents[0].end == 8
|
||||||
|
# Third sentence ents, Also tests end of sentence
|
||||||
|
assert sentences[2].ents[0].text == "a third ."
|
||||||
|
assert sentences[2].ents[0].label_ == "PRODUCT"
|
||||||
|
assert sentences[2].ents[0].start == 11
|
||||||
|
assert sentences[2].ents[0].end == 14
|
||||||
|
|
|
@ -302,6 +302,15 @@ cdef class Span:
|
||||||
raise RuntimeError(Errors.E038)
|
raise RuntimeError(Errors.E038)
|
||||||
return self.doc[root.l_edge:root.r_edge + 1]
|
return self.doc[root.l_edge:root.r_edge + 1]
|
||||||
|
|
||||||
|
property ents:
|
||||||
|
"""RETURNS (list): A list of tokens that belong to the current span."""
|
||||||
|
def __get__(self):
|
||||||
|
ents = []
|
||||||
|
for ent in self.doc.ents:
|
||||||
|
if ent.start >= self.start and ent.end <= self.end:
|
||||||
|
ents.append(ent)
|
||||||
|
return ents
|
||||||
|
|
||||||
property has_vector:
|
property has_vector:
|
||||||
"""RETURNS (bool): Whether a word vector is associated with the object.
|
"""RETURNS (bool): Whether a word vector is associated with the object.
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue
Block a user