* Add parse navigation to Span objects

This commit is contained in:
Matthew Honnibal 2015-05-13 21:45:19 +02:00
parent ca320afe86
commit badf030b6c
2 changed files with 31 additions and 7 deletions

View File

@ -12,6 +12,3 @@ cdef class Span:
cdef public int start
cdef public int end
cdef readonly int label
cdef public Span head
cdef public list rights
cdef public list lefts

View File

@ -9,10 +9,6 @@ cdef class Span:
self.start = start
self.end = end
self.label = label
self.head = self
self.rights = []
self.lefts = []
def __richcmp__(self, Span other, int op):
# Eq
@ -41,6 +37,37 @@ cdef class Span:
for i in range(self.start, self.end):
yield self._seq[i]
property head:
"""The highest Token in the dependency tree in the Span, or None if
the Span is not internally connected (i.e. there are multiple heads).
"""
def __get__(self):
heads = []
for token in self:
head_i = token.head.i
if token.head is token or head_i >= self.end or head_i < self.start:
heads.append(token)
if len(heads) != 1:
return None
else:
return heads[0]
property lefts:
"""Tokens that are to the left of the Span, whose head is within the Span."""
def __get__(self):
for token in reversed(self): # Reverse, so we get the tokens in order
for left in token.lefts:
if left.i < self.start:
yield left
property right:
"""Tokens that are to the right of the Span, whose head is within the Span."""
def __get__(self):
for token in self:
for right in token.rights:
if right.i >= self.end:
yield right
property orth_:
def __get__(self):
return ''.join([t.string for t in self]).strip()