mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
ee159b8543
* Support match alignments * change naming from match_alignments to with_alignments, add conditional flow if with_alignments is given, validate with_alignments, add related test case * remove added errors, utilize bint type, cleanup whitespace * fix no new line in end of file * Minor formatting * Skip alignments processing if as_spans is set * Add with_alignments to Matcher API docs * Update website/docs/api/matcher.md Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
80 lines
1.5 KiB
Cython
80 lines
1.5 KiB
Cython
from libc.stdint cimport int32_t
|
|
from libcpp.vector cimport vector
|
|
from cymem.cymem cimport Pool
|
|
|
|
from ..vocab cimport Vocab
|
|
from ..typedefs cimport attr_t, hash_t
|
|
from ..structs cimport TokenC
|
|
from ..lexeme cimport attr_id_t
|
|
|
|
|
|
cdef enum action_t:
|
|
REJECT = 0000
|
|
MATCH = 1000
|
|
ADVANCE = 0100
|
|
RETRY = 0010
|
|
RETRY_EXTEND = 0011
|
|
RETRY_ADVANCE = 0110
|
|
MATCH_EXTEND = 1001
|
|
MATCH_REJECT = 2000
|
|
MATCH_DOUBLE = 3000
|
|
|
|
|
|
cdef enum quantifier_t:
|
|
ZERO
|
|
ZERO_ONE
|
|
ZERO_PLUS
|
|
ONE
|
|
ONE_PLUS
|
|
FINAL_ID
|
|
|
|
|
|
cdef struct AttrValueC:
|
|
attr_id_t attr
|
|
attr_t value
|
|
|
|
cdef struct IndexValueC:
|
|
int32_t index
|
|
attr_t value
|
|
|
|
cdef struct TokenPatternC:
|
|
AttrValueC* attrs
|
|
int32_t* py_predicates
|
|
IndexValueC* extra_attrs
|
|
int32_t nr_attr
|
|
int32_t nr_extra_attr
|
|
int32_t nr_py
|
|
quantifier_t quantifier
|
|
hash_t key
|
|
int32_t token_idx
|
|
|
|
|
|
cdef struct MatchAlignmentC:
|
|
int32_t token_idx
|
|
int32_t length
|
|
|
|
|
|
cdef struct PatternStateC:
|
|
TokenPatternC* pattern
|
|
int32_t start
|
|
int32_t length
|
|
|
|
|
|
cdef struct MatchC:
|
|
attr_t pattern_id
|
|
int32_t start
|
|
int32_t length
|
|
|
|
|
|
cdef class Matcher:
|
|
cdef Pool mem
|
|
cdef vector[TokenPatternC*] patterns
|
|
cdef readonly Vocab vocab
|
|
cdef public object validate
|
|
cdef public object _patterns
|
|
cdef public object _callbacks
|
|
cdef public object _filter
|
|
cdef public object _extensions
|
|
cdef public object _extra_predicates
|
|
cdef public object _seen_attrs
|