spaCy/spacy/vocab.pxd

from libcpp.vector cimport vector
from preshed.maps cimport PreshMap
from cymem.cymem cimport Pool
from murmurhash.mrmr cimport hash64

from .structs cimport LexemeC, TokenC
from .typedefs cimport attr_t, hash_t
from .strings cimport StringStore
from .morphology cimport Morphology


cdef LexemeC EMPTY_LEXEME


cdef union LexemesOrTokens:
    const LexemeC* const* lexemes
    const TokenC* tokens


cdef struct _Cached:
    LexemesOrTokens data
    bint is_lex
    int length


cdef class Vocab:
    cdef Pool mem
    cdef readonly StringStore strings
    cdef public Morphology morphology
    cdef public object _vectors
    cdef public object _lookups
    cdef public object writing_system
    cdef public object get_noun_chunks
    cdef readonly int length
    cdef public object _unused_object # TODO remove in v4, see #9150
    cdef public object lex_attr_getters
    cdef public object cfg

    cdef const LexemeC* get(self, Pool mem, str string) except NULL
    cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
    cdef const TokenC* make_fused_token(self, substrings) except NULL

    cdef const LexemeC* _new_lexeme(self, Pool mem, str string) except NULL
    cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
    cdef const LexemeC* _new_lexeme(self, Pool mem, str string) except NULL

    cdef PreshMap _by_orth