mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
* Add Span class to Python API
This commit is contained in:
parent
b9b695fb1b
commit
64db61bff1
|
@ -54,6 +54,12 @@ cdef class Tokens:
|
|||
cdef int set_parse(self, const TokenC* parsed, dict label_ids) except -1
|
||||
|
||||
|
||||
cdef class Span:
|
||||
cdef Tokens _seq
|
||||
cdef public int start
|
||||
cdef public int end
|
||||
|
||||
|
||||
cdef class Token:
|
||||
cdef Vocab vocab
|
||||
cdef unicode _string
|
||||
|
|
|
@ -237,24 +237,15 @@ cdef class Tokens:
|
|||
"""This is really only a place-holder for a proper solution."""
|
||||
cdef int i
|
||||
cdef Tokens sent = Tokens(self.vocab, self._string[self.data[0].idx:])
|
||||
#cdef attr_t period = self.vocab.strings['.']
|
||||
#cdef attr_t question = self.vocab.strings['?']
|
||||
#cdef attr_t exclamation = self.vocab.strings['!']
|
||||
spans = []
|
||||
start = None
|
||||
for i in range(self.length):
|
||||
if start is None:
|
||||
start = i
|
||||
if self.data[i].sent_end:
|
||||
spans.append((start, i+1))
|
||||
yield Span(self, start, i+1)
|
||||
start = None
|
||||
#if self.data[i].lex.orth == period or self.data[i].lex.orth == exclamation or \
|
||||
# self.data[i].lex.orth == question:
|
||||
# spans.append((start, i+1))
|
||||
# start = None
|
||||
if start is not None:
|
||||
spans.append((start, self.length))
|
||||
return spans
|
||||
yield Span(self, start, self.length)
|
||||
|
||||
cdef int set_parse(self, const TokenC* parsed, dict label_ids) except -1:
|
||||
self._py_tokens = [None] * self.length
|
||||
|
@ -267,6 +258,26 @@ cdef class Tokens:
|
|||
self._dep_strings = tuple(dep_strings)
|
||||
|
||||
|
||||
cdef class Span:
|
||||
"""A slice from a Tokens object."""
|
||||
def __cinit__(self, Tokens tokens, int start, int end):
|
||||
self._seq = tokens
|
||||
self.start = start
|
||||
self.end = end
|
||||
|
||||
def __len__(self):
|
||||
if self.end < self.start:
|
||||
return 0
|
||||
return self.end - self.start
|
||||
|
||||
def __getitem__(self, int i):
|
||||
return self._seq[self.start + i]
|
||||
|
||||
def __iter__(self):
|
||||
for i in range(self.start, self.end):
|
||||
yield self._seq[i]
|
||||
|
||||
|
||||
cdef class Token:
|
||||
"""An individual token --- i.e. a word, a punctuation symbol, etc. Created
|
||||
via Tokens.__getitem__ and Tokens.__iter__.
|
||||
|
|
Loading…
Reference in New Issue
Block a user