2014-09-11 18:57:08 +04:00
|
|
|
from spacy.lexeme cimport LexemeC
|
2014-09-15 05:22:40 +04:00
|
|
|
from libcpp.vector cimport vector
|
2014-10-22 05:55:42 +04:00
|
|
|
from thinc.typedefs cimport atom_t
|
2014-09-11 18:57:08 +04:00
|
|
|
|
|
|
|
|
2014-09-15 05:22:40 +04:00
|
|
|
cdef class Tokens:
|
2014-10-14 11:22:41 +04:00
|
|
|
cdef vector[LexemeC*] *lex
|
|
|
|
cdef vector[int] *idx
|
|
|
|
cdef vector[int] *pos
|
2014-10-14 08:21:03 +04:00
|
|
|
|
|
|
|
cdef int extend(self, int i, LexemeC** lexemes, int n) except -1
|
|
|
|
cdef int push_back(self, int i, LexemeC* lexeme) except -1
|
2014-10-22 05:55:42 +04:00
|
|
|
cdef int int_array(self, atom_t* atoms, int i, int* indices, int n_idx,
|
|
|
|
int* features, int n_feat) except -1
|
|
|
|
cdef int string_array(self, atom_t* atoms, int i, int* indices, int n_idx,
|
|
|
|
int* features, int n_feat) except -1
|
|
|
|
cdef int bool_array(self, atom_t* atoms, int i, int* indices, int n_idx,
|
|
|
|
int* features, int n_feat) except -1
|
2014-09-10 20:11:13 +04:00
|
|
|
|
2014-10-10 12:17:22 +04:00
|
|
|
cpdef int id(self, size_t i) except -1
|
2014-10-10 01:11:31 +04:00
|
|
|
cpdef float prob(self, size_t i) except 1
|
|
|
|
cpdef int cluster(self, size_t i) except *
|
|
|
|
cpdef bint check_orth_flag(self, size_t i, size_t flag_id) except *
|
|
|
|
cpdef bint check_dist_flag(self, size_t i, size_t flag_id) except *
|
2014-09-10 20:11:13 +04:00
|
|
|
cpdef unicode string_view(self, size_t i, size_t view_id)
|
2014-09-15 03:31:44 +04:00
|
|
|
|
2014-10-14 08:47:06 +04:00
|
|
|
cpdef unicode string(self, size_t i)
|
2014-10-14 08:21:03 +04:00
|
|
|
cpdef unicode orig(self, size_t i)
|
|
|
|
cpdef unicode norm(self, size_t i)
|
|
|
|
cpdef unicode shape(self, size_t i)
|
2014-10-10 01:11:31 +04:00
|
|
|
cpdef unicode unsparse(self, size_t i)
|
|
|
|
cpdef unicode asciied(self, size_t i)
|
2014-09-15 05:22:40 +04:00
|
|
|
cpdef bint is_alpha(self, size_t i) except *
|
|
|
|
cpdef bint is_ascii(self, size_t i) except *
|
|
|
|
cpdef bint is_digit(self, size_t i) except *
|
|
|
|
cpdef bint is_lower(self, size_t i) except *
|
|
|
|
cpdef bint is_punct(self, size_t i) except *
|
|
|
|
cpdef bint is_space(self, size_t i) except *
|
|
|
|
cpdef bint is_title(self, size_t i) except *
|
|
|
|
cpdef bint is_upper(self, size_t i) except *
|
|
|
|
cpdef bint can_adj(self, size_t i) except *
|
|
|
|
cpdef bint can_adp(self, size_t i) except *
|
|
|
|
cpdef bint can_adv(self, size_t i) except *
|
|
|
|
cpdef bint can_conj(self, size_t i) except *
|
|
|
|
cpdef bint can_det(self, size_t i) except *
|
|
|
|
cpdef bint can_noun(self, size_t i) except *
|
|
|
|
cpdef bint can_num(self, size_t i) except *
|
|
|
|
cpdef bint can_pdt(self, size_t i) except *
|
|
|
|
cpdef bint can_pos(self, size_t i) except *
|
|
|
|
cpdef bint can_pron(self, size_t i) except *
|
|
|
|
cpdef bint can_prt(self, size_t i) except *
|
|
|
|
cpdef bint can_punct(self, size_t i) except *
|
|
|
|
cpdef bint can_verb(self, size_t i) except *
|
|
|
|
cpdef bint oft_lower(self, size_t i) except *
|
|
|
|
cpdef bint oft_title(self, size_t i) except *
|
|
|
|
cpdef bint oft_upper(self, size_t i) except *
|