2014-12-21 23:25:43 +03:00
|
|
|
from .typedefs cimport hash_t, flags_t, id_t, len_t, tag_t, attr_t, attr_id_t
|
2015-01-12 03:23:44 +03:00
|
|
|
from .typedefs cimport ID, SIC, NORM1, NORM2, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
2015-01-12 02:26:22 +03:00
|
|
|
from .structs cimport LexemeC
|
2014-12-19 22:51:03 +03:00
|
|
|
from .strings cimport StringStore
|
2014-10-09 12:53:30 +04:00
|
|
|
|
|
|
|
|
2015-01-12 02:26:22 +03:00
|
|
|
cdef LexemeC EMPTY_LEXEME
|
2014-09-10 22:41:37 +04:00
|
|
|
|
2014-12-03 03:04:00 +03:00
|
|
|
|
2015-01-12 02:26:22 +03:00
|
|
|
cdef LexemeC init(id_t i, unicode string, hash_t hashed, StringStore store,
|
2014-12-03 03:04:00 +03:00
|
|
|
dict props) except *
|
2014-10-29 15:19:38 +03:00
|
|
|
|
2014-10-09 07:10:46 +04:00
|
|
|
|
2015-01-12 03:23:44 +03:00
|
|
|
cdef class Lexeme:
|
|
|
|
cdef const float* vec
|
|
|
|
|
|
|
|
cdef readonly flags_t flags
|
|
|
|
cdef readonly attr_t id
|
|
|
|
cdef readonly attr_t length
|
|
|
|
|
|
|
|
cdef readonly unicode sic
|
|
|
|
cdef readonly unicode norm1
|
|
|
|
cdef readonly unicode norm2
|
|
|
|
cdef readonly unicode shape
|
|
|
|
cdef readonly unicode prefix
|
|
|
|
cdef readonly unicode suffix
|
|
|
|
|
|
|
|
cdef readonly attr_t sic_id
|
|
|
|
cdef readonly attr_t norm1_id
|
|
|
|
cdef readonly attr_t norm2_id
|
|
|
|
cdef readonly attr_t shape_id
|
|
|
|
cdef readonly attr_t prefix_id
|
|
|
|
cdef readonly attr_t suffix_id
|
|
|
|
|
|
|
|
cdef readonly attr_t cluster
|
|
|
|
cdef readonly float prob
|
|
|
|
cdef readonly float sentiment
|
|
|
|
|
|
|
|
|
|
|
|
cdef Lexeme Lexeme_cinit(const LexemeC* c, StringStore strings)
|
|
|
|
|
|
|
|
|
2015-01-12 02:26:22 +03:00
|
|
|
cdef inline bint check_flag(const LexemeC* lexeme, attr_id_t flag_id) nogil:
|
2014-10-23 17:59:17 +04:00
|
|
|
return lexeme.flags & (1 << flag_id)
|
2014-12-03 03:04:00 +03:00
|
|
|
|
|
|
|
|
2015-01-12 02:26:22 +03:00
|
|
|
cdef inline attr_t get_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
|
2014-12-04 12:46:20 +03:00
|
|
|
if feat_name < (sizeof(flags_t) * 8):
|
|
|
|
return check_flag(lex, feat_name)
|
|
|
|
elif feat_name == ID:
|
|
|
|
return lex.id
|
|
|
|
elif feat_name == SIC:
|
|
|
|
return lex.sic
|
2015-01-12 03:23:44 +03:00
|
|
|
elif feat_name == NORM1:
|
|
|
|
return lex.norm1
|
|
|
|
elif feat_name == NORM2:
|
|
|
|
return lex.norm2
|
2014-12-04 12:46:20 +03:00
|
|
|
elif feat_name == SHAPE:
|
|
|
|
return lex.shape
|
|
|
|
elif feat_name == PREFIX:
|
|
|
|
return lex.prefix
|
|
|
|
elif feat_name == SUFFIX:
|
|
|
|
return lex.suffix
|
|
|
|
elif feat_name == LENGTH:
|
|
|
|
return lex.length
|
|
|
|
elif feat_name == CLUSTER:
|
|
|
|
return lex.cluster
|
|
|
|
else:
|
|
|
|
return 0
|