fix: tokenizer.pxd

This commit is contained in:
Basile Dura 2023-06-02 09:44:21 +02:00
parent c2f0256606
commit 25ce9f48c6
No known key found for this signature in database

View File

@ -31,24 +31,58 @@ cdef class Tokenizer:
cdef Doc _tokenize_affixes(self, str string, bint with_special_cases) cdef Doc _tokenize_affixes(self, str string, bint with_special_cases)
cdef int _apply_special_cases(self, Doc doc) except -1 cdef int _apply_special_cases(self, Doc doc) except -1
cdef void _filter_special_spans(self, vector[SpanC] &original, cdef void _filter_special_spans(
vector[SpanC] &filtered, int doc_len) nogil self,
cdef object _prepare_special_spans(self, Doc doc, vector[SpanC] &original,
vector[SpanC] &filtered) vector[SpanC] &filtered,
cdef int _retokenize_special_spans(self, Doc doc, TokenC* tokens, int doc_len,
object span_data) ) nogil
cdef int _try_specials_and_cache(self, hash_t key, Doc tokens, cdef object _prepare_special_spans(
int* has_special, self,
bint with_special_cases) except -1 Doc doc,
cdef int _tokenize(self, Doc tokens, str span, hash_t key, vector[SpanC] &filtered,
int* has_special, bint with_special_cases) except -1 )
cdef str _split_affixes(self, Pool mem, str string, cdef int _retokenize_special_spans(
vector[LexemeC*] *prefixes, self,
vector[LexemeC*] *suffixes, int* has_special, Doc doc,
bint with_special_cases) TokenC* tokens,
cdef int _attach_tokens(self, Doc tokens, str string, object span_data,
vector[LexemeC*] *prefixes, )
vector[LexemeC*] *suffixes, int* has_special, cdef int _try_specials_and_cache(
bint with_special_cases) except -1 self,
cdef int _save_cached(self, const TokenC* tokens, hash_t key, hash_t key,
int* has_special, int n) except -1 Doc tokens,
int* has_special,
bint with_special_cases,
) except -1
cdef int _tokenize(
self,
Doc tokens,
str span,
hash_t key,
int* has_special,
bint with_special_cases,
) except -1
cdef str _split_affixes(
self,
Pool mem,
str string,
vector[LexemeC*] *prefixes,
vector[LexemeC*] *suffixes, int* has_special,
bint with_special_cases,
)
cdef int _attach_tokens(
self,
Doc tokens,
str string,
vector[LexemeC*] *prefixes,
vector[LexemeC*] *suffixes, int* has_special,
bint with_special_cases,
) except -1
cdef int _save_cached(
self,
const TokenC* tokens,
hash_t key,
int* has_special,
int n,
) except -1