2014-12-19 22:54:03 +03:00
|
|
|
from libcpp.vector cimport vector
|
|
|
|
|
|
|
|
from preshed.maps cimport PreshMap
|
|
|
|
from cymem.cymem cimport Pool
|
|
|
|
from murmurhash.mrmr cimport hash64
|
|
|
|
|
2015-07-22 05:49:39 +03:00
|
|
|
from .structs cimport LexemeC, TokenC
|
2015-07-18 23:39:57 +03:00
|
|
|
from .typedefs cimport utf8_t, hash_t
|
2014-12-19 22:54:03 +03:00
|
|
|
from .strings cimport StringStore
|
|
|
|
|
|
|
|
|
2015-01-12 02:26:22 +03:00
|
|
|
cdef LexemeC EMPTY_LEXEME
|
2014-12-24 09:42:00 +03:00
|
|
|
|
|
|
|
|
2014-12-19 22:54:03 +03:00
|
|
|
cdef union LexemesOrTokens:
|
2015-01-12 02:26:22 +03:00
|
|
|
const LexemeC* const* lexemes
|
2014-12-19 22:54:03 +03:00
|
|
|
TokenC* tokens
|
|
|
|
|
|
|
|
|
|
|
|
cdef struct _Cached:
|
|
|
|
LexemesOrTokens data
|
|
|
|
bint is_lex
|
|
|
|
int length
|
|
|
|
|
|
|
|
|
|
|
|
cdef class Vocab:
|
2015-01-13 16:03:48 +03:00
|
|
|
cpdef public lexeme_props_getter
|
2014-12-19 22:54:03 +03:00
|
|
|
cdef Pool mem
|
|
|
|
cpdef readonly StringStore strings
|
2015-07-08 13:36:38 +03:00
|
|
|
cdef readonly object pos_tags
|
2015-07-18 23:42:15 +03:00
|
|
|
cdef readonly int length
|
2015-07-19 16:18:17 +03:00
|
|
|
cdef public object packer
|
2014-12-19 22:54:03 +03:00
|
|
|
|
2015-07-22 05:49:39 +03:00
|
|
|
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
|
2015-01-13 16:03:48 +03:00
|
|
|
cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
|
2015-04-19 11:31:31 +03:00
|
|
|
|
2015-07-18 23:42:15 +03:00
|
|
|
cdef PreshMap _by_hash
|
|
|
|
cdef PreshMap _by_orth
|
2015-06-05 17:26:39 +03:00
|
|
|
cdef readonly int repvec_length
|