spaCy/spacy/lexeme.pxd

84 lines
1.6 KiB
Cython
Raw Normal View History

2014-08-03 00:51:52 +04:00
from libc.stdint cimport uint32_t
from libc.stdint cimport uint64_t
# Put these above import to avoid circular import problem
ctypedef int ClusterID
2014-08-03 00:51:52 +04:00
ctypedef uint32_t StringHash
ctypedef size_t Lexeme_addr
ctypedef char Bits8
ctypedef uint64_t Bits64
2014-08-19 04:40:37 +04:00
cdef enum OrthFlag:
IS_ALPHA
IS_DIGIT
IS_PUNCT
IS_WHITE
IS_LOWER
IS_UPPER
IS_TITLE
IS_ASCII
cdef enum DistFlag:
OFT_UPPER
OFT_TITLE
DIST_FLAG3
DIST_FLAG4
DIST_FLAG5
DIST_FLAG6
DIST_FLAG7
DIST_FLAG8
cdef struct Orthography:
StringHash shape
StringHash norm
StringHash last3
Bits8 flags
cdef struct Distribution:
double prob
ClusterID cluster
Bits64 tagdict
Bits8 flags
cdef struct Lexeme:
char* string
size_t length
StringHash lex
Orthography orth # Extra orthographic views
Distribution dist # Distribution info
cdef Lexeme BLANK_WORD = Lexeme(NULL, 0, 0,
Orthography(0, 0, 0, 0),
Distribution(0.0, 0, 0, 0)
)
2014-07-07 22:27:02 +04:00
cdef enum StringAttr:
LEX
NORM
SHAPE
LAST3
2014-08-03 00:26:44 +04:00
LENGTH
2014-07-07 22:27:02 +04:00
cpdef StringHash attr_of(size_t lex_id, StringAttr attr) except 0
cpdef StringHash lex_of(size_t lex_id) except 0
cpdef StringHash norm_of(size_t lex_id) except 0
2014-07-07 21:12:19 +04:00
cpdef StringHash shape_of(size_t lex_id) except 0
2014-07-07 22:27:02 +04:00
cpdef StringHash last3_of(size_t lex_id) except 0
2014-08-19 04:40:37 +04:00
cpdef size_t length_of(size_t lex_id) except *
cpdef double prob_of(size_t lex_id) except 0
cpdef ClusterID cluster_of(size_t lex_id) except 0
cpdef bint check_orth_flag(size_t lex, OrthFlag flag) except *
cpdef bint check_dist_flag(size_t lex, DistFlag flag) except *