mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-01 04:46:38 +03:00
b0228d8ea6
* chore: add cython-linter dev dependency * fix: lexeme.pyx * fix: morphology.pxd * fix: tokenizer.pxd * fix: vocab.pxd * fix: morphology.pxd (line length) * ci: add cython-lint * ci: fix cython-lint call * Fix kb/candidate.pyx. * Fix kb/kb.pyx. * Fix kb/kb_in_memory.pyx. * Fix kb. * Fix training/ partially. * Fix training/. Ignore trailing whitespaces and too long lines. * Fix ml/. * Fix matcher/. * Fix pipeline/. * Fix tokens/. * Fix build errors. Fix vocab.pyx. * Fix cython-lint install and run. * Fix lexeme.pyx, parts_of_speech.pxd, vectors.pyx. Temporarily disable cython-lint execution. * Fix attrs.pyx, lexeme.pyx, symbols.pxd, isort issues. * Make cython-lint install conditional. Fix tokenizer.pyx. * Fix remaining files. Reenable cython-lint check. * Readded parentheses. * Fix test_build_dependencies(). * Add explanatory comment to cython-lint execution. --------- Co-authored-by: Raphael Mitsch <r.mitsch@outlook.com>
46 lines
1.2 KiB
Cython
46 lines
1.2 KiB
Cython
from cymem.cymem cimport Pool
|
|
from libcpp.vector cimport vector
|
|
from murmurhash.mrmr cimport hash64
|
|
from preshed.maps cimport PreshMap
|
|
|
|
from .morphology cimport Morphology
|
|
from .strings cimport StringStore
|
|
from .structs cimport LexemeC, TokenC
|
|
from .typedefs cimport attr_t, hash_t
|
|
|
|
|
|
cdef LexemeC EMPTY_LEXEME
|
|
|
|
|
|
cdef union LexemesOrTokens:
|
|
const LexemeC* const* lexemes
|
|
const TokenC* tokens
|
|
|
|
|
|
cdef struct _Cached:
|
|
LexemesOrTokens data
|
|
bint is_lex
|
|
int length
|
|
|
|
|
|
cdef class Vocab:
|
|
cdef Pool mem
|
|
cdef readonly StringStore strings
|
|
cdef public Morphology morphology
|
|
cdef public object _vectors
|
|
cdef public object _lookups
|
|
cdef public object writing_system
|
|
cdef public object get_noun_chunks
|
|
cdef readonly int length
|
|
cdef public object lex_attr_getters
|
|
cdef public object cfg
|
|
|
|
cdef const LexemeC* get(self, str string) except NULL
|
|
cdef const LexemeC* get_by_orth(self, attr_t orth) except NULL
|
|
cdef const TokenC* make_fused_token(self, substrings) except NULL
|
|
|
|
cdef const LexemeC* _new_lexeme(self, str string) except NULL
|
|
cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
|
|
|
|
cdef PreshMap _by_orth
|