2014-09-10 22:41:37 +04:00
|
|
|
from .typedefs cimport hash_t, utf8_t, flag_t, id_t
|
2014-09-18 01:09:24 +04:00
|
|
|
from cymem.cymem cimport Pool
|
2014-09-10 22:41:37 +04:00
|
|
|
|
|
|
|
|
2014-10-09 12:53:30 +04:00
|
|
|
cpdef flag_t OOV_DIST_FLAGS
|
|
|
|
|
|
|
|
|
|
|
|
cpdef enum LexInts:
|
2014-10-14 08:19:07 +04:00
|
|
|
LexInt_id
|
2014-10-09 12:53:30 +04:00
|
|
|
LexInt_length
|
|
|
|
LexInt_cluster
|
|
|
|
LexInt_pos
|
|
|
|
LexInt_supersense
|
|
|
|
LexInt_N
|
|
|
|
|
|
|
|
|
|
|
|
cpdef enum LexFloats:
|
|
|
|
LexFloat_prob
|
|
|
|
LexFloat_sentiment
|
|
|
|
LexFloat_N
|
|
|
|
|
|
|
|
|
|
|
|
cpdef enum LexStrs:
|
2014-10-10 12:17:22 +04:00
|
|
|
LexStr_orig
|
2014-10-14 08:19:07 +04:00
|
|
|
LexStr_norm
|
2014-10-09 12:53:30 +04:00
|
|
|
LexStr_shape
|
|
|
|
LexStr_unsparse
|
|
|
|
LexStr_asciied
|
|
|
|
LexStr_N
|
|
|
|
|
|
|
|
|
|
|
|
cpdef enum LexOrthFlags:
|
|
|
|
LexOrth_alpha
|
|
|
|
LexOrth_ascii
|
|
|
|
LexOrth_digit
|
|
|
|
LexOrth_lower
|
|
|
|
LexOrth_punct
|
|
|
|
LexOrth_space
|
|
|
|
LexOrth_title
|
|
|
|
LexOrth_upper
|
|
|
|
LexOrth_N
|
|
|
|
|
|
|
|
|
|
|
|
cpdef enum LexDistFlags:
|
|
|
|
LexDist_adj
|
|
|
|
LexDist_adp
|
|
|
|
LexDist_adv
|
|
|
|
LexDist_conj
|
|
|
|
LexDist_det
|
|
|
|
LexDist_noun
|
|
|
|
LexDist_num
|
|
|
|
LexDist_pdt
|
|
|
|
LexDist_pos
|
|
|
|
LexDist_pron
|
|
|
|
LexDist_prt
|
|
|
|
LexDist_punct
|
|
|
|
LexDist_verb
|
|
|
|
|
|
|
|
LexDist_lower
|
|
|
|
LexDist_title
|
|
|
|
LexDist_upper
|
|
|
|
|
|
|
|
LexDist_N
|
|
|
|
|
|
|
|
|
2014-09-10 22:41:37 +04:00
|
|
|
cdef struct LexemeC:
|
2014-10-09 12:53:30 +04:00
|
|
|
int[<int>LexInt_N] ints
|
|
|
|
float[<int>LexFloat_N] floats
|
|
|
|
utf8_t[<int>LexStr_N] strings
|
|
|
|
flag_t orth_flags
|
|
|
|
flag_t dist_flags
|
|
|
|
|
2014-10-10 12:17:22 +04:00
|
|
|
|
2014-10-10 01:11:31 +04:00
|
|
|
cpdef dict get_lexeme_dict(size_t i, unicode string)
|
2014-10-09 12:53:30 +04:00
|
|
|
|
|
|
|
cdef char* intern_and_encode(unicode string, size_t* length) except NULL
|
2014-09-10 22:41:37 +04:00
|
|
|
|
2014-10-09 12:53:30 +04:00
|
|
|
cdef int lexeme_get_int(LexemeC* lexeme, size_t i) except *
|
2014-09-10 22:41:37 +04:00
|
|
|
|
2014-10-09 12:53:30 +04:00
|
|
|
cdef float lexeme_get_float(LexemeC* lexeme, size_t i) except *
|
2014-09-10 22:41:37 +04:00
|
|
|
|
2014-10-09 12:53:30 +04:00
|
|
|
cdef unicode lexeme_get_string(LexemeC* lexeme, size_t i)
|
2014-09-10 22:41:37 +04:00
|
|
|
|
2014-10-09 12:53:30 +04:00
|
|
|
cdef bint lexeme_check_orth_flag(LexemeC* lexeme, size_t flag_id) except *
|
2014-10-09 07:10:46 +04:00
|
|
|
|
2014-10-09 12:53:30 +04:00
|
|
|
cdef bint lexeme_check_dist_flag(LexemeC* lexeme, size_t flag_id) except *
|
2014-10-09 07:10:46 +04:00
|
|
|
|
|
|
|
cdef dict lexeme_pack(LexemeC* lexeme)
|
2014-10-09 07:51:35 +04:00
|
|
|
cdef int lexeme_unpack(LexemeC* lexeme, dict p) except -1
|