spaCy/spacy/matcher/matcher.pxd
Ines Montani 483dddc9bc 💫 Add token match pattern validation via JSON schemas (#3244)
* Add custom MatchPatternError

* Improve validators and add validation option to Matcher

* Adjust formatting

* Never validate in Matcher within PhraseMatcher

If we do decide to make validate default to True, the PhraseMatcher's Matcher shouldn't ever validate. Here, we create the patterns automatically anyways (and it's currently unclear whether the validation has performance impacts at a very large scale).
2019-02-13 01:47:26 +11:00

70 lines
1.3 KiB
Cython

from libc.stdint cimport int32_t
from libcpp.vector cimport vector
from cymem.cymem cimport Pool
from ..vocab cimport Vocab
from ..typedefs cimport attr_t, hash_t
from ..structs cimport TokenC
from ..lexeme cimport attr_id_t
cdef enum action_t:
REJECT = 0000
MATCH = 1000
ADVANCE = 0100
RETRY = 0010
RETRY_EXTEND = 0011
RETRY_ADVANCE = 0110
MATCH_EXTEND = 1001
MATCH_REJECT = 2000
cdef enum quantifier_t:
ZERO
ZERO_ONE
ZERO_PLUS
ONE
ONE_PLUS
cdef struct AttrValueC:
attr_id_t attr
attr_t value
cdef struct IndexValueC:
int32_t index
attr_t value
cdef struct TokenPatternC:
AttrValueC* attrs
int32_t* py_predicates
IndexValueC* extra_attrs
int32_t nr_attr
int32_t nr_extra_attr
int32_t nr_py
quantifier_t quantifier
hash_t key
cdef struct PatternStateC:
TokenPatternC* pattern
int32_t start
int32_t length
cdef struct MatchC:
attr_t pattern_id
int32_t start
int32_t length
cdef class Matcher:
cdef Pool mem
cdef vector[TokenPatternC*] patterns
cdef readonly Vocab vocab
cdef public object validator
cdef public object _patterns
cdef public object _callbacks
cdef public object _extensions
cdef public object _extra_predicates