* Tweak documentation for Tokens, and hide constructor as __cinit__

This commit is contained in:
Matthew Honnibal 2015-01-27 18:57:52 +11:00
parent e10b712d29
commit e6c3d3471f

View File

@ -63,9 +63,11 @@ cdef attr_t get_lex_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
cdef class Tokens: cdef class Tokens:
"""Access and set annotations onto some text.
""" """
def __init__(self, Vocab vocab, unicode string): Container class for annotated text. Constructed via English.__call__ or
Tokenizer.__call__.
"""
def __cinit__(self, Vocab vocab, unicode string):
self.vocab = vocab self.vocab = vocab
self._string = string self._string = string
string_length = len(string) string_length = len(string)
@ -222,7 +224,7 @@ cdef class Tokens:
@cython.freelist(64) @cython.freelist(64)
cdef class Token: cdef class Token:
"""An individual token.""" """An individual token."""
def __init__(self, Tokens tokens, int i): def __cinit__(self, Tokens tokens, int i):
self._seq = tokens self._seq = tokens
self.i = i self.i = i
cdef const TokenC* t = &tokens.data[i] cdef const TokenC* t = &tokens.data[i]
@ -249,11 +251,6 @@ cdef class Token:
self.string = tokens._string[self.idx:next_idx] self.string = tokens._string[self.idx:next_idx]
def __len__(self): def __len__(self):
"""The number of unicode code-points in the original string.
Returns:
length (int):
"""
return self._seq.data[self.i].lex.length return self._seq.data[self.i].lex.length
def nbor(self, int i=1): def nbor(self, int i=1):
@ -287,7 +284,7 @@ cdef class Token:
cdef const TokenC* t = &self._seq.data[self.i] cdef const TokenC* t = &self._seq.data[self.i]
return Token(self._seq, self.i + t.head) return Token(self._seq, self.i + t.head)
property whitespace: property whitespace_:
def __get__(self): def __get__(self):
return self.string[self.length:] return self.string[self.length:]
@ -337,7 +334,6 @@ cdef class Token:
return self._seq._dep_strings[self.dep] return self._seq._dep_strings[self.dep]
cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil: cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil:
cdef int i cdef int i
for i in range(32): for i in range(32):