* Add parse navigation to Span objects

This commit is contained in:
Matthew Honnibal 2015-05-13 21:45:19 +02:00
parent ca320afe86
commit badf030b6c
2 changed files with 31 additions and 7 deletions

View File

@ -12,6 +12,3 @@ cdef class Span:
cdef public int start cdef public int start
cdef public int end cdef public int end
cdef readonly int label cdef readonly int label
cdef public Span head
cdef public list rights
cdef public list lefts

View File

@ -9,10 +9,6 @@ cdef class Span:
self.start = start self.start = start
self.end = end self.end = end
self.label = label self.label = label
self.head = self
self.rights = []
self.lefts = []
def __richcmp__(self, Span other, int op): def __richcmp__(self, Span other, int op):
# Eq # Eq
@ -41,6 +37,37 @@ cdef class Span:
for i in range(self.start, self.end): for i in range(self.start, self.end):
yield self._seq[i] yield self._seq[i]
property head:
"""The highest Token in the dependency tree in the Span, or None if
the Span is not internally connected (i.e. there are multiple heads).
"""
def __get__(self):
heads = []
for token in self:
head_i = token.head.i
if token.head is token or head_i >= self.end or head_i < self.start:
heads.append(token)
if len(heads) != 1:
return None
else:
return heads[0]
property lefts:
"""Tokens that are to the left of the Span, whose head is within the Span."""
def __get__(self):
for token in reversed(self): # Reverse, so we get the tokens in order
for left in token.lefts:
if left.i < self.start:
yield left
property right:
"""Tokens that are to the right of the Span, whose head is within the Span."""
def __get__(self):
for token in self:
for right in token.rights:
if right.i >= self.end:
yield right
property orth_: property orth_:
def __get__(self): def __get__(self):
return ''.join([t.string for t in self]).strip() return ''.join([t.string for t in self]).strip()