2015-07-18 23:39:57 +03:00
|
|
|
from .typedefs cimport attr_t, hash_t, flags_t, len_t, tag_t
|
2015-07-16 01:58:51 +03:00
|
|
|
from .attrs cimport attr_id_t
|
2015-07-16 12:20:08 +03:00
|
|
|
from .attrs cimport ID, ORTH, LOWER, NORM, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
2015-07-16 01:58:51 +03:00
|
|
|
|
2015-01-12 02:26:22 +03:00
|
|
|
from .structs cimport LexemeC
|
2014-12-19 22:51:03 +03:00
|
|
|
from .strings cimport StringStore
|
2014-10-09 12:53:30 +04:00
|
|
|
|
2015-01-17 08:21:17 +03:00
|
|
|
from numpy cimport ndarray
|
|
|
|
|
|
|
|
|
2015-01-12 02:26:22 +03:00
|
|
|
cdef LexemeC EMPTY_LEXEME
|
2014-09-10 22:41:37 +04:00
|
|
|
|
2015-01-12 03:23:44 +03:00
|
|
|
cdef class Lexeme:
|
2015-08-22 23:04:34 +03:00
|
|
|
cdef LexemeC* c
|
|
|
|
cdef readonly Vocab vocab
|
2015-01-22 18:08:25 +03:00
|
|
|
cdef readonly attr_t orth
|
2015-01-12 03:23:44 +03:00
|
|
|
|
2015-08-22 23:04:34 +03:00
|
|
|
cdef int set_struct_props(Vocab vocab, LexemeC* lex, dict props) except -1:
|
|
|
|
lex.length = props['length']
|
|
|
|
lex.orth = vocab.strings[props['orth']]
|
|
|
|
lex.lower = vocab.strings[props['lower']]
|
|
|
|
lex.norm = vocab.strings[props['norm']]
|
|
|
|
lex.shape = vocab.strings[props['shape']]
|
|
|
|
lex.prefix = vocab.strings[props['prefix']]
|
|
|
|
lex.suffix = vocab.strings[props['suffix']]
|
2015-01-12 03:23:44 +03:00
|
|
|
|
2015-08-22 23:04:34 +03:00
|
|
|
lex.cluster = props['cluster']
|
|
|
|
lex.prob = props['prob']
|
|
|
|
lex.sentiment = props['sentiment']
|
2015-01-17 08:21:17 +03:00
|
|
|
|
2015-08-22 23:04:34 +03:00
|
|
|
lex.flags = props['flags']
|
|
|
|
lex.repvec = empty_vec
|
2015-01-17 08:21:17 +03:00
|
|
|
|
2015-08-22 23:04:34 +03:00
|
|
|
@staticmethod
|
|
|
|
cdef inline attr_t get_struct_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
|
|
|
|
if feat_name < (sizeof(flags_t) * 8):
|
|
|
|
return Lexeme.check_flag(lex, feat_name)
|
|
|
|
elif feat_name == ID:
|
|
|
|
return lex.id
|
|
|
|
elif feat_name == ORTH:
|
|
|
|
return lex.orth
|
|
|
|
elif feat_name == LOWER:
|
|
|
|
return lex.lower
|
|
|
|
elif feat_name == NORM:
|
|
|
|
return lex.norm
|
|
|
|
elif feat_name == SHAPE:
|
|
|
|
return lex.shape
|
|
|
|
elif feat_name == PREFIX:
|
|
|
|
return lex.prefix
|
|
|
|
elif feat_name == SUFFIX:
|
|
|
|
return lex.suffix
|
|
|
|
elif feat_name == LENGTH:
|
|
|
|
return lex.length
|
|
|
|
elif feat_name == CLUSTER:
|
|
|
|
return lex.cluster
|
|
|
|
else:
|
|
|
|
return 0
|
|
|
|
|
|
|
|
cdef inline bint check_flag(const LexemeC* lexeme, attr_id_t flag_id) nogil:
|
|
|
|
return lexeme.flags & (1 << flag_id)
|