mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
fix attrs field in the matcher (#4423)
* raise specific error when removing a matcher rule that doesn't exist * rephrasing * ensure attrs is NULL when nr_attr == 0 + several fixes to prevent OOB
This commit is contained in:
parent
5efae495f1
commit
da6e0de34f
|
@ -523,6 +523,7 @@ cdef char get_is_match(PatternStateC state,
|
||||||
if predicate_matches[state.pattern.py_predicates[i]] == -1:
|
if predicate_matches[state.pattern.py_predicates[i]] == -1:
|
||||||
return 0
|
return 0
|
||||||
spec = state.pattern
|
spec = state.pattern
|
||||||
|
if spec.nr_attr > 0:
|
||||||
for attr in spec.attrs[:spec.nr_attr]:
|
for attr in spec.attrs[:spec.nr_attr]:
|
||||||
if get_token_attr(token, attr.attr) != attr.value:
|
if get_token_attr(token, attr.attr) != attr.value:
|
||||||
return 0
|
return 0
|
||||||
|
@ -533,7 +534,11 @@ cdef char get_is_match(PatternStateC state,
|
||||||
|
|
||||||
|
|
||||||
cdef char get_is_final(PatternStateC state) nogil:
|
cdef char get_is_final(PatternStateC state) nogil:
|
||||||
if state.pattern[1].attrs[0].attr == ID and state.pattern[1].nr_attr == 0:
|
if state.pattern[1].nr_attr == 0 and state.pattern[1].attrs != NULL:
|
||||||
|
id_attr = state.pattern[1].attrs[0]
|
||||||
|
if id_attr.attr != ID:
|
||||||
|
with gil:
|
||||||
|
raise ValueError(Errors.E074.format(attr=ID, bad_attr=id_attr.attr))
|
||||||
return 1
|
return 1
|
||||||
else:
|
else:
|
||||||
return 0
|
return 0
|
||||||
|
@ -548,6 +553,8 @@ cdef TokenPatternC* init_pattern(Pool mem, attr_t entity_id, object token_specs)
|
||||||
cdef int i, index
|
cdef int i, index
|
||||||
for i, (quantifier, spec, extensions, predicates) in enumerate(token_specs):
|
for i, (quantifier, spec, extensions, predicates) in enumerate(token_specs):
|
||||||
pattern[i].quantifier = quantifier
|
pattern[i].quantifier = quantifier
|
||||||
|
# Ensure attrs refers to a null pointer if nr_attr == 0
|
||||||
|
if len(spec) > 0:
|
||||||
pattern[i].attrs = <AttrValueC*>mem.alloc(len(spec), sizeof(AttrValueC))
|
pattern[i].attrs = <AttrValueC*>mem.alloc(len(spec), sizeof(AttrValueC))
|
||||||
pattern[i].nr_attr = len(spec)
|
pattern[i].nr_attr = len(spec)
|
||||||
for j, (attr, value) in enumerate(spec):
|
for j, (attr, value) in enumerate(spec):
|
||||||
|
@ -564,6 +571,7 @@ cdef TokenPatternC* init_pattern(Pool mem, attr_t entity_id, object token_specs)
|
||||||
pattern[i].nr_py = len(predicates)
|
pattern[i].nr_py = len(predicates)
|
||||||
pattern[i].key = hash64(pattern[i].attrs, pattern[i].nr_attr * sizeof(AttrValueC), 0)
|
pattern[i].key = hash64(pattern[i].attrs, pattern[i].nr_attr * sizeof(AttrValueC), 0)
|
||||||
i = len(token_specs)
|
i = len(token_specs)
|
||||||
|
# Even though here, nr_attr == 0, we're storing the ID value in attrs[0] (bug-prone, thread carefully!)
|
||||||
pattern[i].attrs = <AttrValueC*>mem.alloc(2, sizeof(AttrValueC))
|
pattern[i].attrs = <AttrValueC*>mem.alloc(2, sizeof(AttrValueC))
|
||||||
pattern[i].attrs[0].attr = ID
|
pattern[i].attrs[0].attr = ID
|
||||||
pattern[i].attrs[0].value = entity_id
|
pattern[i].attrs[0].value = entity_id
|
||||||
|
|
Loading…
Reference in New Issue
Block a user