2023-06-14 18:48:41 +03:00
|
|
|
from cymem.cymem cimport Pool
|
2019-02-07 11:42:25 +03:00
|
|
|
from libc.stdint cimport int32_t
|
|
|
|
from libcpp.vector cimport vector
|
|
|
|
|
|
|
|
from ..lexeme cimport attr_id_t
|
2023-06-14 18:48:41 +03:00
|
|
|
from ..structs cimport TokenC
|
|
|
|
from ..typedefs cimport attr_t, hash_t
|
|
|
|
from ..vocab cimport Vocab
|
2019-02-07 11:42:25 +03:00
|
|
|
|
|
|
|
|
|
|
|
cdef enum action_t:
|
|
|
|
REJECT = 0000
|
|
|
|
MATCH = 1000
|
|
|
|
ADVANCE = 0100
|
|
|
|
RETRY = 0010
|
|
|
|
RETRY_EXTEND = 0011
|
|
|
|
RETRY_ADVANCE = 0110
|
|
|
|
MATCH_EXTEND = 1001
|
|
|
|
MATCH_REJECT = 2000
|
2019-08-21 23:46:56 +03:00
|
|
|
MATCH_DOUBLE = 3000
|
2019-02-07 11:42:25 +03:00
|
|
|
|
|
|
|
|
|
|
|
cdef enum quantifier_t:
|
|
|
|
ZERO
|
|
|
|
ZERO_ONE
|
|
|
|
ZERO_PLUS
|
|
|
|
ONE
|
|
|
|
ONE_PLUS
|
2020-10-31 14:18:48 +03:00
|
|
|
FINAL_ID
|
2019-02-07 11:42:25 +03:00
|
|
|
|
|
|
|
|
|
|
|
cdef struct AttrValueC:
|
|
|
|
attr_id_t attr
|
|
|
|
attr_t value
|
|
|
|
|
|
|
|
cdef struct IndexValueC:
|
|
|
|
int32_t index
|
|
|
|
attr_t value
|
|
|
|
|
|
|
|
cdef struct TokenPatternC:
|
|
|
|
AttrValueC* attrs
|
|
|
|
int32_t* py_predicates
|
|
|
|
IndexValueC* extra_attrs
|
|
|
|
int32_t nr_attr
|
|
|
|
int32_t nr_extra_attr
|
|
|
|
int32_t nr_py
|
|
|
|
quantifier_t quantifier
|
|
|
|
hash_t key
|
2021-04-08 11:10:14 +03:00
|
|
|
int32_t token_idx
|
|
|
|
|
|
|
|
|
|
|
|
cdef struct MatchAlignmentC:
|
|
|
|
int32_t token_idx
|
|
|
|
int32_t length
|
2019-02-07 11:42:25 +03:00
|
|
|
|
|
|
|
|
|
|
|
cdef struct PatternStateC:
|
|
|
|
TokenPatternC* pattern
|
|
|
|
int32_t start
|
|
|
|
int32_t length
|
|
|
|
|
|
|
|
|
|
|
|
cdef struct MatchC:
|
|
|
|
attr_t pattern_id
|
|
|
|
int32_t start
|
|
|
|
int32_t length
|
|
|
|
|
|
|
|
|
|
|
|
cdef class Matcher:
|
|
|
|
cdef Pool mem
|
|
|
|
cdef vector[TokenPatternC*] patterns
|
|
|
|
cdef readonly Vocab vocab
|
2019-12-25 14:39:49 +03:00
|
|
|
cdef public object validate
|
2019-02-07 11:42:25 +03:00
|
|
|
cdef public object _patterns
|
|
|
|
cdef public object _callbacks
|
2020-07-29 12:04:43 +03:00
|
|
|
cdef public object _filter
|
2019-02-07 11:42:25 +03:00
|
|
|
cdef public object _extensions
|
|
|
|
cdef public object _extra_predicates
|
2019-08-21 21:52:36 +03:00
|
|
|
cdef public object _seen_attrs
|
2023-01-10 12:36:17 +03:00
|
|
|
cdef public object _fuzzy_compare
|