mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
70 lines
1.3 KiB
Cython
70 lines
1.3 KiB
Cython
|
from libc.stdint cimport int32_t
|
||
|
from libcpp.vector cimport vector
|
||
|
from cymem.cymem cimport Pool
|
||
|
|
||
|
from ..vocab cimport Vocab
|
||
|
from ..typedefs cimport attr_t, hash_t
|
||
|
from ..structs cimport TokenC
|
||
|
from ..lexeme cimport attr_id_t
|
||
|
|
||
|
|
||
|
cdef enum action_t:
|
||
|
REJECT = 0000
|
||
|
MATCH = 1000
|
||
|
ADVANCE = 0100
|
||
|
RETRY = 0010
|
||
|
RETRY_EXTEND = 0011
|
||
|
RETRY_ADVANCE = 0110
|
||
|
MATCH_EXTEND = 1001
|
||
|
MATCH_REJECT = 2000
|
||
|
|
||
|
|
||
|
cdef enum quantifier_t:
|
||
|
ZERO
|
||
|
ZERO_ONE
|
||
|
ZERO_PLUS
|
||
|
ONE
|
||
|
ONE_PLUS
|
||
|
|
||
|
|
||
|
cdef struct AttrValueC:
|
||
|
attr_id_t attr
|
||
|
attr_t value
|
||
|
|
||
|
cdef struct IndexValueC:
|
||
|
int32_t index
|
||
|
attr_t value
|
||
|
|
||
|
cdef struct TokenPatternC:
|
||
|
AttrValueC* attrs
|
||
|
int32_t* py_predicates
|
||
|
IndexValueC* extra_attrs
|
||
|
int32_t nr_attr
|
||
|
int32_t nr_extra_attr
|
||
|
int32_t nr_py
|
||
|
quantifier_t quantifier
|
||
|
hash_t key
|
||
|
|
||
|
|
||
|
cdef struct PatternStateC:
|
||
|
TokenPatternC* pattern
|
||
|
int32_t start
|
||
|
int32_t length
|
||
|
|
||
|
|
||
|
cdef struct MatchC:
|
||
|
attr_t pattern_id
|
||
|
int32_t start
|
||
|
int32_t length
|
||
|
|
||
|
|
||
|
cdef class Matcher:
|
||
|
cdef Pool mem
|
||
|
cdef vector[TokenPatternC*] patterns
|
||
|
cdef readonly Vocab vocab
|
||
|
cdef public object _patterns
|
||
|
cdef public object _entities
|
||
|
cdef public object _callbacks
|
||
|
cdef public object _extensions
|
||
|
cdef public object _extra_predicates
|