mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 18:56:36 +03:00
* Have SBD return start/end indices
This commit is contained in:
parent
b183dff72d
commit
a27b23cc8f
|
@ -92,15 +92,18 @@ cdef class Tokens:
|
||||||
cdef attr_t period = self.vocab.strings['.']
|
cdef attr_t period = self.vocab.strings['.']
|
||||||
cdef attr_t question = self.vocab.strings['?']
|
cdef attr_t question = self.vocab.strings['?']
|
||||||
cdef attr_t exclamation = self.vocab.strings['!']
|
cdef attr_t exclamation = self.vocab.strings['!']
|
||||||
|
spans = []
|
||||||
|
start = None
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
sent.push_back(self.data[i].idx, &self.data[i])
|
if start is None:
|
||||||
|
start = i
|
||||||
if self.data[i].lex.sic == period or self.data[i].lex.sic == exclamation or \
|
if self.data[i].lex.sic == period or self.data[i].lex.sic == exclamation or \
|
||||||
self.data[i].lex.sic == question:
|
self.data[i].lex.sic == question:
|
||||||
sentences.append(sent)
|
spans.append((start, i+1))
|
||||||
sent = Tokens(self.vocab, self._string[self.data[i].idx:])
|
start = None
|
||||||
if sent.length:
|
if start is not None:
|
||||||
sentences.append(sent)
|
spans.append((start, self.length))
|
||||||
return sentences
|
return spans
|
||||||
|
|
||||||
def __getitem__(self, i):
|
def __getitem__(self, i):
|
||||||
"""Retrieve a token.
|
"""Retrieve a token.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user