mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
* Use parse tree for sbd in Tokens.sents
This commit is contained in:
parent
ba1e91189b
commit
de772088e6
|
@ -212,20 +212,22 @@ cdef class Tokens:
|
||||||
def sents(self):
|
def sents(self):
|
||||||
"""This is really only a place-holder for a proper solution."""
|
"""This is really only a place-holder for a proper solution."""
|
||||||
cdef int i
|
cdef int i
|
||||||
sentences = []
|
|
||||||
cdef Tokens sent = Tokens(self.vocab, self._string[self.data[0].idx:])
|
cdef Tokens sent = Tokens(self.vocab, self._string[self.data[0].idx:])
|
||||||
cdef attr_t period = self.vocab.strings['.']
|
#cdef attr_t period = self.vocab.strings['.']
|
||||||
cdef attr_t question = self.vocab.strings['?']
|
#cdef attr_t question = self.vocab.strings['?']
|
||||||
cdef attr_t exclamation = self.vocab.strings['!']
|
#cdef attr_t exclamation = self.vocab.strings['!']
|
||||||
spans = []
|
spans = []
|
||||||
start = None
|
start = None
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
if start is None:
|
if start is None:
|
||||||
start = i
|
start = i
|
||||||
if self.data[i].lex.orth == period or self.data[i].lex.orth == exclamation or \
|
if self.data[i].sent_end:
|
||||||
self.data[i].lex.orth == question:
|
|
||||||
spans.append((start, i+1))
|
spans.append((start, i+1))
|
||||||
start = None
|
start = None
|
||||||
|
#if self.data[i].lex.orth == period or self.data[i].lex.orth == exclamation or \
|
||||||
|
# self.data[i].lex.orth == question:
|
||||||
|
# spans.append((start, i+1))
|
||||||
|
# start = None
|
||||||
if start is not None:
|
if start is not None:
|
||||||
spans.append((start, self.length))
|
spans.append((start, self.length))
|
||||||
return spans
|
return spans
|
||||||
|
|
Loading…
Reference in New Issue
Block a user