spaCy/spacy/vocab.pxd
2021-04-26 16:54:02 +02:00

48 lines
1.3 KiB
Cython

from libcpp.vector cimport vector
from preshed.maps cimport PreshMap
from cymem.cymem cimport Pool
from murmurhash.mrmr cimport hash64
from .structs cimport LexemeC, TokenC
from .typedefs cimport attr_t, hash_t
from .strings cimport StringStore
from .morphology cimport Morphology
cdef LexemeC EMPTY_LEXEME
cdef union LexemesOrTokens:
const LexemeC* const* lexemes
const TokenC* tokens
cdef struct _Cached:
LexemesOrTokens data
bint is_lex
int length
cdef class Vocab:
cdef Pool mem
cdef readonly StringStore strings
cdef public Morphology morphology
cdef public object vectors
cdef public object _lookups
cdef public object writing_system
cdef public object get_noun_chunks
cdef readonly int length
cdef public object data_dir
cdef public object lex_attr_getters
cdef public object cfg
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
cdef const TokenC* make_fused_token(self, substrings) except NULL
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
cdef PreshMap _by_orth