spaCy/spacy/matcher/matcher.pxd
broaddeep ee159b8543
Support match alignments (#7321)
* Support match alignments

* change naming from match_alignments to with_alignments, add conditional flow if with_alignments is given, validate with_alignments, add related test case

* remove added errors, utilize bint type, cleanup whitespace

* fix no new line in end of file

* Minor formatting

* Skip alignments processing if as_spans is set

* Add with_alignments to Matcher API docs

* Update website/docs/api/matcher.md

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
2021-04-08 18:10:14 +10:00

80 lines
1.5 KiB
Cython

from libc.stdint cimport int32_t
from libcpp.vector cimport vector
from cymem.cymem cimport Pool
from ..vocab cimport Vocab
from ..typedefs cimport attr_t, hash_t
from ..structs cimport TokenC
from ..lexeme cimport attr_id_t
cdef enum action_t:
REJECT = 0000
MATCH = 1000
ADVANCE = 0100
RETRY = 0010
RETRY_EXTEND = 0011
RETRY_ADVANCE = 0110
MATCH_EXTEND = 1001
MATCH_REJECT = 2000
MATCH_DOUBLE = 3000
cdef enum quantifier_t:
ZERO
ZERO_ONE
ZERO_PLUS
ONE
ONE_PLUS
FINAL_ID
cdef struct AttrValueC:
attr_id_t attr
attr_t value
cdef struct IndexValueC:
int32_t index
attr_t value
cdef struct TokenPatternC:
AttrValueC* attrs
int32_t* py_predicates
IndexValueC* extra_attrs
int32_t nr_attr
int32_t nr_extra_attr
int32_t nr_py
quantifier_t quantifier
hash_t key
int32_t token_idx
cdef struct MatchAlignmentC:
int32_t token_idx
int32_t length
cdef struct PatternStateC:
TokenPatternC* pattern
int32_t start
int32_t length
cdef struct MatchC:
attr_t pattern_id
int32_t start
int32_t length
cdef class Matcher:
cdef Pool mem
cdef vector[TokenPatternC*] patterns
cdef readonly Vocab vocab
cdef public object validate
cdef public object _patterns
cdef public object _callbacks
cdef public object _filter
cdef public object _extensions
cdef public object _extra_predicates
cdef public object _seen_attrs