1
1
mirror of https://github.com/explosion/spaCy.git synced 2025-01-20 22:34:32 +03:00
spaCy/spacy/vocab.pxd

48 lines
1.3 KiB
Cython
Raw Normal View History

from libcpp.vector cimport vector
from preshed.maps cimport PreshMap
from cymem.cymem cimport Pool
from murmurhash.mrmr cimport hash64
from .structs cimport LexemeC, TokenC
from .typedefs cimport utf8_t, attr_t, hash_t
from .strings cimport StringStore
2015-08-26 20:21:03 +03:00
from .morphology cimport Morphology
cdef LexemeC EMPTY_LEXEME
2014-12-24 09:42:00 +03:00
cdef union LexemesOrTokens:
const LexemeC* const* lexemes
const TokenC* tokens
cdef struct _Cached:
LexemesOrTokens data
bint is_lex
int length
cdef class Vocab:
cdef Pool mem
cpdef readonly StringStore strings
2015-08-26 20:21:03 +03:00
cpdef readonly Morphology morphology
cdef readonly int length
cdef public object _serializer
cdef public object data_dir
cdef public object lex_attr_getters
2015-09-10 16:22:48 +03:00
cdef public object serializer_freqs
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
cdef const TokenC* make_fused_token(self, substrings) except NULL
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
2015-04-19 11:31:31 +03:00
cdef PreshMap _by_hash
cdef PreshMap _by_orth
2015-09-15 07:43:31 +03:00
cdef readonly int vectors_length