2014-08-27 19:15:39 +04:00
|
|
|
from spacy.lang cimport Language
|
2014-08-25 18:42:22 +04:00
|
|
|
from spacy.word cimport Lexeme
|
2014-09-10 20:11:13 +04:00
|
|
|
from spacy.tokens cimport Tokens
|
|
|
|
|
|
|
|
|
|
|
|
cdef class EnglishTokens(Tokens):
|
|
|
|
cpdef unicode canon_string(self, size_t i)
|
|
|
|
cpdef unicode shape_string(self, size_t i)
|
|
|
|
cpdef unicode non_sparse_string(self, size_t i)
|
|
|
|
cpdef unicode asciied(self, size_t i)
|
|
|
|
cpdef bint is_alpha(self, size_t i)
|
|
|
|
cpdef bint is_ascii(self, size_t i)
|
|
|
|
cpdef bint is_digit(self, size_t i)
|
|
|
|
cpdef bint is_lower(self, size_t i)
|
|
|
|
cpdef bint is_punct(self, size_t i)
|
|
|
|
cpdef bint is_space(self, size_t i)
|
|
|
|
cpdef bint is_title(self, size_t i)
|
|
|
|
cpdef bint is_upper(self, size_t i)
|
|
|
|
cpdef bint can_adj(self, size_t i)
|
|
|
|
cpdef bint can_adp(self, size_t i)
|
|
|
|
cpdef bint can_adv(self, size_t i)
|
|
|
|
cpdef bint can_conj(self, size_t i)
|
|
|
|
cpdef bint can_det(self, size_t i)
|
|
|
|
cpdef bint can_noun(self, size_t i)
|
|
|
|
cpdef bint can_num(self, size_t i)
|
|
|
|
cpdef bint can_pdt(self, size_t i)
|
|
|
|
cpdef bint can_pos(self, size_t i)
|
|
|
|
cpdef bint can_pron(self, size_t i)
|
|
|
|
cpdef bint can_prt(self, size_t i)
|
|
|
|
cpdef bint can_punct(self, size_t i)
|
|
|
|
cpdef bint can_verb(self, size_t i)
|
|
|
|
cpdef bint oft_lower(self, size_t i)
|
|
|
|
cpdef bint oft_title(self, size_t i)
|
|
|
|
cpdef bint oft_upper(self, size_t i)
|
2014-08-22 01:49:14 +04:00
|
|
|
|
|
|
|
|
2014-08-27 19:15:39 +04:00
|
|
|
cdef class English(Language):
|
2014-08-29 03:59:23 +04:00
|
|
|
cdef int _split_one(self, unicode word)
|