2014-10-22 18:57:59 +04:00
|
|
|
from cymem.cymem cimport Pool
|
|
|
|
|
2014-10-23 17:59:17 +04:00
|
|
|
from .lexeme cimport Lexeme
|
|
|
|
from .typedefs cimport flag_t
|
|
|
|
from .utf8string cimport StringStore
|
2014-11-04 19:42:14 +03:00
|
|
|
from .tagger cimport TagType
|
2014-10-23 17:59:17 +04:00
|
|
|
|
2014-10-22 05:55:42 +04:00
|
|
|
from thinc.typedefs cimport atom_t
|
2014-09-11 18:57:08 +04:00
|
|
|
|
|
|
|
|
2014-09-15 05:22:40 +04:00
|
|
|
cdef class Tokens:
|
2014-10-22 18:57:59 +04:00
|
|
|
cdef Pool mem
|
2014-10-23 17:59:17 +04:00
|
|
|
cdef StringStore _string_store
|
2014-10-22 18:57:59 +04:00
|
|
|
|
2014-10-23 17:59:17 +04:00
|
|
|
cdef Lexeme** _lex_ptr
|
2014-10-22 18:57:59 +04:00
|
|
|
cdef int* _idx_ptr
|
|
|
|
cdef int* _pos_ptr
|
2014-11-05 12:45:29 +03:00
|
|
|
cdef int* _ner_ptr
|
2014-10-23 17:59:17 +04:00
|
|
|
cdef Lexeme** lex
|
2014-10-22 18:57:59 +04:00
|
|
|
cdef int* idx
|
|
|
|
cdef int* pos
|
2014-11-05 12:45:29 +03:00
|
|
|
cdef int* ner
|
2014-10-22 18:57:59 +04:00
|
|
|
|
|
|
|
cdef int length
|
|
|
|
cdef int max_length
|
2014-10-14 08:21:03 +04:00
|
|
|
|
2014-10-23 17:59:17 +04:00
|
|
|
cdef int extend(self, int i, Lexeme** lexemes, int n) except -1
|
|
|
|
cdef int push_back(self, int i, Lexeme* lexeme) except -1
|
2014-11-04 19:42:14 +03:00
|
|
|
cpdef int set_tag(self, int i, TagType tag_type, int tag) except -1
|
2014-10-23 17:59:17 +04:00
|
|
|
|
|
|
|
|
|
|
|
cdef class Token:
|
|
|
|
cdef StringStore _string_store
|
|
|
|
cdef public int i
|
|
|
|
cdef public int idx
|
|
|
|
cdef public int pos
|
2014-11-05 12:45:29 +03:00
|
|
|
cdef public int ner
|
2014-10-23 17:59:17 +04:00
|
|
|
|
2014-10-31 09:44:39 +03:00
|
|
|
cdef public atom_t id
|
2014-10-23 17:59:17 +04:00
|
|
|
cdef public atom_t cluster
|
|
|
|
cdef public atom_t length
|
2014-10-31 09:44:39 +03:00
|
|
|
cdef public atom_t postype
|
|
|
|
cdef public atom_t sensetype
|
2014-10-23 17:59:17 +04:00
|
|
|
|
2014-10-29 15:19:38 +03:00
|
|
|
cdef public atom_t sic
|
2014-10-23 17:59:17 +04:00
|
|
|
cdef public atom_t norm
|
|
|
|
cdef public atom_t shape
|
|
|
|
cdef public atom_t asciied
|
|
|
|
cdef public atom_t prefix
|
|
|
|
cdef public atom_t suffix
|
|
|
|
|
|
|
|
cdef public float prob
|
|
|
|
|
|
|
|
cdef public flag_t flags
|