spaCy/spacy/vocab.pxd

43 lines
1014 B
Cython
Raw Normal View History

from libcpp.vector cimport vector
from preshed.maps cimport PreshMap
from cymem.cymem cimport Pool
from murmurhash.mrmr cimport hash64
from .structs cimport LexemeC, TokenC
from .typedefs cimport utf8_t, attr_t, hash_t
from .strings cimport StringStore
cdef LexemeC EMPTY_LEXEME
2014-12-24 09:42:00 +03:00
cdef union LexemesOrTokens:
const LexemeC* const* lexemes
TokenC* tokens
cdef struct _Cached:
LexemesOrTokens data
bint is_lex
int length
cdef class Vocab:
cpdef public lexeme_props_getter
cdef Pool mem
cpdef readonly StringStore strings
2015-07-08 13:36:38 +03:00
cdef readonly object pos_tags
cdef readonly int length
cdef public object _serializer
cdef public object data_dir
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
2015-04-19 11:31:31 +03:00
cdef PreshMap _by_hash
cdef PreshMap _by_orth
cdef readonly int repvec_length