* Use parse tree for sbd in Tokens.sents

This commit is contained in:
Matthew Honnibal 2015-02-02 12:17:32 +11:00
parent ba1e91189b
commit de772088e6

View File

@ -212,20 +212,22 @@ cdef class Tokens:
def sents(self): def sents(self):
"""This is really only a place-holder for a proper solution.""" """This is really only a place-holder for a proper solution."""
cdef int i cdef int i
sentences = []
cdef Tokens sent = Tokens(self.vocab, self._string[self.data[0].idx:]) cdef Tokens sent = Tokens(self.vocab, self._string[self.data[0].idx:])
cdef attr_t period = self.vocab.strings['.'] #cdef attr_t period = self.vocab.strings['.']
cdef attr_t question = self.vocab.strings['?'] #cdef attr_t question = self.vocab.strings['?']
cdef attr_t exclamation = self.vocab.strings['!'] #cdef attr_t exclamation = self.vocab.strings['!']
spans = [] spans = []
start = None start = None
for i in range(self.length): for i in range(self.length):
if start is None: if start is None:
start = i start = i
if self.data[i].lex.orth == period or self.data[i].lex.orth == exclamation or \ if self.data[i].sent_end:
self.data[i].lex.orth == question:
spans.append((start, i+1)) spans.append((start, i+1))
start = None start = None
#if self.data[i].lex.orth == period or self.data[i].lex.orth == exclamation or \
# self.data[i].lex.orth == question:
# spans.append((start, i+1))
# start = None
if start is not None: if start is not None:
spans.append((start, self.length)) spans.append((start, self.length))
return spans return spans