mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 02:16:32 +03:00
* Have 'string' refer to the whitespace-padded string
This commit is contained in:
parent
706305ee26
commit
5fd72bc220
|
@ -65,3 +65,4 @@ cdef class Token:
|
||||||
cdef readonly attr_t dep
|
cdef readonly attr_t dep
|
||||||
|
|
||||||
cdef readonly ndarray repvec
|
cdef readonly ndarray repvec
|
||||||
|
cdef readonly unicode string
|
||||||
|
|
|
@ -235,16 +235,10 @@ cdef class Token:
|
||||||
self.tag = t.tag
|
self.tag = t.tag
|
||||||
self.dep = t.dep
|
self.dep = t.dep
|
||||||
self.repvec = numpy.asarray(<float[:300,]> t.lex.repvec)
|
self.repvec = numpy.asarray(<float[:300,]> t.lex.repvec)
|
||||||
|
cdef int next_idx = (t+1).idx
|
||||||
def __unicode__(self):
|
if next_idx <= self.idx:
|
||||||
cdef const TokenC* t = &self._seq.data[self.i]
|
next_idx = self.idx + self.length
|
||||||
cdef int end_idx = t.idx + t.lex.length
|
self.string = tokens._string[self.idx:next_idx]
|
||||||
if self.i + 1 == self._seq.length:
|
|
||||||
return self.string
|
|
||||||
if end_idx == t[1].idx:
|
|
||||||
return self.string
|
|
||||||
else:
|
|
||||||
return self.string + ' '
|
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
"""The number of unicode code-points in the original string.
|
"""The number of unicode code-points in the original string.
|
||||||
|
@ -260,13 +254,10 @@ cdef class Token:
|
||||||
cdef const TokenC* t = &self._seq.data[self.i]
|
cdef const TokenC* t = &self._seq.data[self.i]
|
||||||
return Token(self._seq, self.i + t.head)
|
return Token(self._seq, self.i + t.head)
|
||||||
|
|
||||||
property string:
|
property whitespace:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
cdef const TokenC* t = &self._seq.data[self.i]
|
cdef int end_idx = self.idx + self.length
|
||||||
if t.lex.orth == 0:
|
|
||||||
return ''
|
|
||||||
cdef unicode py_ustr = self._seq.vocab.strings[t.lex.orth]
|
|
||||||
return py_ustr
|
|
||||||
|
|
||||||
property orth_:
|
property orth_:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user