fix: tokenizer.pxd

This commit is contained in:
Basile Dura 2023-06-02 09:44:21 +02:00
parent c2f0256606
commit 25ce9f48c6
No known key found for this signature in database

View File

@ -31,24 +31,58 @@ cdef class Tokenizer:
cdef Doc _tokenize_affixes(self, str string, bint with_special_cases)
cdef int _apply_special_cases(self, Doc doc) except -1
cdef void _filter_special_spans(self, vector[SpanC] &original,
vector[SpanC] &filtered, int doc_len) nogil
cdef object _prepare_special_spans(self, Doc doc,
vector[SpanC] &filtered)
cdef int _retokenize_special_spans(self, Doc doc, TokenC* tokens,
object span_data)
cdef int _try_specials_and_cache(self, hash_t key, Doc tokens,
int* has_special,
bint with_special_cases) except -1
cdef int _tokenize(self, Doc tokens, str span, hash_t key,
int* has_special, bint with_special_cases) except -1
cdef str _split_affixes(self, Pool mem, str string,
vector[LexemeC*] *prefixes,
vector[LexemeC*] *suffixes, int* has_special,
bint with_special_cases)
cdef int _attach_tokens(self, Doc tokens, str string,
vector[LexemeC*] *prefixes,
vector[LexemeC*] *suffixes, int* has_special,
bint with_special_cases) except -1
cdef int _save_cached(self, const TokenC* tokens, hash_t key,
int* has_special, int n) except -1
cdef void _filter_special_spans(
self,
vector[SpanC] &original,
vector[SpanC] &filtered,
int doc_len,
) nogil
cdef object _prepare_special_spans(
self,
Doc doc,
vector[SpanC] &filtered,
)
cdef int _retokenize_special_spans(
self,
Doc doc,
TokenC* tokens,
object span_data,
)
cdef int _try_specials_and_cache(
self,
hash_t key,
Doc tokens,
int* has_special,
bint with_special_cases,
) except -1
cdef int _tokenize(
self,
Doc tokens,
str span,
hash_t key,
int* has_special,
bint with_special_cases,
) except -1
cdef str _split_affixes(
self,
Pool mem,
str string,
vector[LexemeC*] *prefixes,
vector[LexemeC*] *suffixes, int* has_special,
bint with_special_cases,
)
cdef int _attach_tokens(
self,
Doc tokens,
str string,
vector[LexemeC*] *prefixes,
vector[LexemeC*] *suffixes, int* has_special,
bint with_special_cases,
) except -1
cdef int _save_cached(
self,
const TokenC* tokens,
hash_t key,
int* has_special,
int n,
) except -1