* Tweak documentation for Tokens, and hide constructor as __cinit__

This commit is contained in:
Matthew Honnibal 2015-01-27 18:57:52 +11:00
parent e10b712d29
commit e6c3d3471f

View File

@ -63,9 +63,11 @@ cdef attr_t get_lex_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
cdef class Tokens:
"""Access and set annotations onto some text.
"""
def __init__(self, Vocab vocab, unicode string):
Container class for annotated text. Constructed via English.__call__ or
Tokenizer.__call__.
"""
def __cinit__(self, Vocab vocab, unicode string):
self.vocab = vocab
self._string = string
string_length = len(string)
@ -222,7 +224,7 @@ cdef class Tokens:
@cython.freelist(64)
cdef class Token:
"""An individual token."""
def __init__(self, Tokens tokens, int i):
def __cinit__(self, Tokens tokens, int i):
self._seq = tokens
self.i = i
cdef const TokenC* t = &tokens.data[i]
@ -249,11 +251,6 @@ cdef class Token:
self.string = tokens._string[self.idx:next_idx]
def __len__(self):
"""The number of unicode code-points in the original string.
Returns:
length (int):
"""
return self._seq.data[self.i].lex.length
def nbor(self, int i=1):
@ -287,7 +284,7 @@ cdef class Token:
cdef const TokenC* t = &self._seq.data[self.i]
return Token(self._seq, self.i + t.head)
property whitespace:
property whitespace_:
def __get__(self):
return self.string[self.length:]
@ -337,7 +334,6 @@ cdef class Token:
return self._seq._dep_strings[self.dep]
cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil:
cdef int i
for i in range(32):