mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-28 02:04:07 +03:00
* Tweak documentation for Tokens, and hide constructor as __cinit__
This commit is contained in:
parent
e10b712d29
commit
e6c3d3471f
|
@ -63,9 +63,11 @@ cdef attr_t get_lex_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
|
||||||
|
|
||||||
|
|
||||||
cdef class Tokens:
|
cdef class Tokens:
|
||||||
"""Access and set annotations onto some text.
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Vocab vocab, unicode string):
|
Container class for annotated text. Constructed via English.__call__ or
|
||||||
|
Tokenizer.__call__.
|
||||||
|
"""
|
||||||
|
def __cinit__(self, Vocab vocab, unicode string):
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self._string = string
|
self._string = string
|
||||||
string_length = len(string)
|
string_length = len(string)
|
||||||
|
@ -222,7 +224,7 @@ cdef class Tokens:
|
||||||
@cython.freelist(64)
|
@cython.freelist(64)
|
||||||
cdef class Token:
|
cdef class Token:
|
||||||
"""An individual token."""
|
"""An individual token."""
|
||||||
def __init__(self, Tokens tokens, int i):
|
def __cinit__(self, Tokens tokens, int i):
|
||||||
self._seq = tokens
|
self._seq = tokens
|
||||||
self.i = i
|
self.i = i
|
||||||
cdef const TokenC* t = &tokens.data[i]
|
cdef const TokenC* t = &tokens.data[i]
|
||||||
|
@ -249,11 +251,6 @@ cdef class Token:
|
||||||
self.string = tokens._string[self.idx:next_idx]
|
self.string = tokens._string[self.idx:next_idx]
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
"""The number of unicode code-points in the original string.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
length (int):
|
|
||||||
"""
|
|
||||||
return self._seq.data[self.i].lex.length
|
return self._seq.data[self.i].lex.length
|
||||||
|
|
||||||
def nbor(self, int i=1):
|
def nbor(self, int i=1):
|
||||||
|
@ -287,7 +284,7 @@ cdef class Token:
|
||||||
cdef const TokenC* t = &self._seq.data[self.i]
|
cdef const TokenC* t = &self._seq.data[self.i]
|
||||||
return Token(self._seq, self.i + t.head)
|
return Token(self._seq, self.i + t.head)
|
||||||
|
|
||||||
property whitespace:
|
property whitespace_:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.string[self.length:]
|
return self.string[self.length:]
|
||||||
|
|
||||||
|
@ -337,7 +334,6 @@ cdef class Token:
|
||||||
return self._seq._dep_strings[self.dep]
|
return self._seq._dep_strings[self.dep]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil:
|
cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil:
|
||||||
cdef int i
|
cdef int i
|
||||||
for i in range(32):
|
for i in range(32):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user