diff --git a/spacy/spans.pxd b/spacy/spans.pxd index 180a991ee..ffbac9c36 100644 --- a/spacy/spans.pxd +++ b/spacy/spans.pxd @@ -12,6 +12,3 @@ cdef class Span: cdef public int start cdef public int end cdef readonly int label - cdef public Span head - cdef public list rights - cdef public list lefts diff --git a/spacy/spans.pyx b/spacy/spans.pyx index 76f281b7c..50ccf2bfa 100644 --- a/spacy/spans.pyx +++ b/spacy/spans.pyx @@ -9,10 +9,6 @@ cdef class Span: self.start = start self.end = end self.label = label - self.head = self - - self.rights = [] - self.lefts = [] def __richcmp__(self, Span other, int op): # Eq @@ -41,6 +37,37 @@ cdef class Span: for i in range(self.start, self.end): yield self._seq[i] + property head: + """The highest Token in the dependency tree in the Span, or None if + the Span is not internally connected (i.e. there are multiple heads). + """ + def __get__(self): + heads = [] + for token in self: + head_i = token.head.i + if token.head is token or head_i >= self.end or head_i < self.start: + heads.append(token) + if len(heads) != 1: + return None + else: + return heads[0] + + property lefts: + """Tokens that are to the left of the Span, whose head is within the Span.""" + def __get__(self): + for token in reversed(self): # Reverse, so we get the tokens in order + for left in token.lefts: + if left.i < self.start: + yield left + + property right: + """Tokens that are to the right of the Span, whose head is within the Span.""" + def __get__(self): + for token in self: + for right in token.rights: + if right.i >= self.end: + yield right + property orth_: def __get__(self): return ''.join([t.string for t in self]).strip()