2014-08-03 00:51:52 +04:00
|
|
|
from libc.stdint cimport uint32_t
|
2014-07-05 22:51:42 +04:00
|
|
|
from libc.stdint cimport uint64_t
|
2014-08-22 01:49:14 +04:00
|
|
|
cimport cython
|
2014-08-20 15:39:39 +04:00
|
|
|
|
2014-07-05 22:51:42 +04:00
|
|
|
ctypedef int ClusterID
|
2014-08-03 00:51:52 +04:00
|
|
|
ctypedef uint32_t StringHash
|
2014-08-20 15:39:39 +04:00
|
|
|
ctypedef size_t LexID
|
|
|
|
ctypedef char OrthFlags
|
|
|
|
ctypedef char DistFlags
|
|
|
|
ctypedef uint64_t TagFlags
|
2014-07-07 18:58:48 +04:00
|
|
|
|
|
|
|
|
2014-07-05 22:51:42 +04:00
|
|
|
cdef struct Lexeme:
|
2014-08-20 15:39:39 +04:00
|
|
|
StringHash lex
|
2014-08-19 06:21:20 +04:00
|
|
|
char* string
|
|
|
|
size_t length
|
2014-08-20 15:39:39 +04:00
|
|
|
double prob
|
|
|
|
ClusterID cluster
|
|
|
|
TagFlags possible_tags
|
|
|
|
DistFlags dist_flags
|
|
|
|
OrthFlags orth_flags
|
|
|
|
StringHash* string_views
|
2014-07-07 22:27:02 +04:00
|
|
|
|
2014-08-19 04:40:37 +04:00
|
|
|
|
2014-08-21 18:37:13 +04:00
|
|
|
cpdef StringHash lex_of(LexID lex_id) except 0
|
2014-08-20 15:39:39 +04:00
|
|
|
cpdef char first_of(LexID lex_id) except 0
|
|
|
|
cpdef size_t length_of(LexID lex_id) except 0
|
2014-08-22 18:35:48 +04:00
|
|
|
cpdef double prob_of(LexID lex_id) except 1
|
2014-08-20 15:39:39 +04:00
|
|
|
cpdef ClusterID cluster_of(LexID lex_id) except 0
|
2014-08-19 04:40:37 +04:00
|
|
|
|
2014-08-22 18:35:48 +04:00
|
|
|
|
|
|
|
cpdef bint is_often_titled(size_t lex_id)
|
|
|
|
cpdef bint is_often_uppered(size_t lex_id)
|
|
|
|
|
|
|
|
|
|
|
|
cpdef bint can_tag(LexID lex, TagFlags flag) except *
|
2014-08-20 15:39:39 +04:00
|
|
|
cpdef bint check_dist_flag(LexID lex, DistFlags flag) except *
|
|
|
|
cpdef bint check_orth_flag(LexID lex, OrthFlags flag) except *
|
2014-08-19 04:40:37 +04:00
|
|
|
|
2014-08-20 15:39:39 +04:00
|
|
|
cpdef StringHash view_of(LexID lex_id, size_t view) except 0
|