* Add span.sent property, re Issue #366

This commit is contained in:
Matthew Honnibal 2016-05-06 00:17:38 +02:00
parent 472f576b82
commit 26095f9722
2 changed files with 27 additions and 0 deletions

View File

@ -37,3 +37,16 @@ def test_root2():
doc.from_array([HEAD], heads.T) doc.from_array([HEAD], heads.T)
south_carolina = doc[-2:] south_carolina = doc[-2:]
assert south_carolina.root.text == 'Carolina' assert south_carolina.root.text == 'Carolina'
def test_sent(doc):
'''Test new span.sent property'''
#return EN('This is a sentence. This is another sentence. And a third.')
heads = np.asarray([[1, 0, -1, -1, -1, 1, 0, -1, -1, -1, 2, 1, 0, -1]], dtype='int32')
doc.from_array([HEAD], heads.T)
assert len(list(doc.sents))
span = doc[:2]
assert span.sent.root.text == 'is'
assert span.sent.text == 'This is a sentence.'
span = doc[6:7]
assert span.sent.root.left_edge.text == 'This'

View File

@ -98,6 +98,20 @@ cdef class Span:
self.start = start self.start = start
self.end = end + 1 self.end = end + 1
property sent:
'''Get the sentence span that this span is a part of.'''
def __get__(self):
# This should raise if we're not parsed.
doc.sents
cdef int n = 0
root = &self.doc.c[self.start]
while root.head != 0:
root += root.head
n += 1
if n >= self.doc.length:
raise RuntimeError
return self.doc[root.l_edge : root.r_edge + 1]
property vector: property vector:
def __get__(self): def __get__(self):