spaCy/spacy/matcher/matcher.pxd
Sofie Van Landeghem 40c995b1be
Option for returning only greedy matches (#5771)
* add "greedy" option for match pattern

* distinction between greedy FIRST or LONGEST

* check for proper values, throw custom warning otherwise

* unxfail one more test

* add comment in docstring

* add test that LONGEST also prefers first match if equal length

* use c arrays for more efficient processing

* rename 'greediness' to 'greedy'
2020-07-29 11:04:43 +02:00

73 lines
1.4 KiB
Cython

from libc.stdint cimport int32_t
from libcpp.vector cimport vector
from cymem.cymem cimport Pool
from ..vocab cimport Vocab
from ..typedefs cimport attr_t, hash_t
from ..structs cimport TokenC
from ..lexeme cimport attr_id_t
cdef enum action_t:
REJECT = 0000
MATCH = 1000
ADVANCE = 0100
RETRY = 0010
RETRY_EXTEND = 0011
RETRY_ADVANCE = 0110
MATCH_EXTEND = 1001
MATCH_REJECT = 2000
MATCH_DOUBLE = 3000
cdef enum quantifier_t:
ZERO
ZERO_ONE
ZERO_PLUS
ONE
ONE_PLUS
cdef struct AttrValueC:
attr_id_t attr
attr_t value
cdef struct IndexValueC:
int32_t index
attr_t value
cdef struct TokenPatternC:
AttrValueC* attrs
int32_t* py_predicates
IndexValueC* extra_attrs
int32_t nr_attr
int32_t nr_extra_attr
int32_t nr_py
quantifier_t quantifier
hash_t key
cdef struct PatternStateC:
TokenPatternC* pattern
int32_t start
int32_t length
cdef struct MatchC:
attr_t pattern_id
int32_t start
int32_t length
cdef class Matcher:
cdef Pool mem
cdef vector[TokenPatternC*] patterns
cdef readonly Vocab vocab
cdef public object validate
cdef public object _patterns
cdef public object _callbacks
cdef public object _filter
cdef public object _extensions
cdef public object _extra_predicates
cdef public object _seen_attrs