Add warning when Matcher subpattern is discarded (#5873)

* Add a warning when a subpattern is not processed and discarded

* Normalize subpattern attribute/operator keys to upper case like
top-level attributes
This commit is contained in:
Adriane Boyd 2020-08-05 14:56:14 +02:00 committed by GitHub
parent 9e45d064bb
commit 4193402c47
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 18 additions and 2 deletions

View File

@ -133,6 +133,8 @@ class Warnings(object):
"normalization table, please ignore this warning.")
W034 = ("Please install the package spacy-lookups-data in order to include "
"the default lexeme normalization table for the language '{lang}'.")
W035 = ('Discarding subpattern "{pattern}" due to an unrecognized '
"attribute or operator.")
@add_codes

View File

@ -811,9 +811,11 @@ def _get_extra_predicates(spec, extra_predicates):
attr = "ORTH"
attr = IDS.get(attr.upper())
if isinstance(value, dict):
processed = False
value_with_upper_keys = {k.upper(): v for k, v in value.items()}
for type_, cls in predicate_types.items():
if type_ in value:
predicate = cls(len(extra_predicates), attr, value[type_], type_)
if type_ in value_with_upper_keys:
predicate = cls(len(extra_predicates), attr, value_with_upper_keys[type_], type_)
# Don't create a redundant predicates.
# This helps with efficiency, as we're caching the results.
if predicate.key in seen_predicates:
@ -822,6 +824,9 @@ def _get_extra_predicates(spec, extra_predicates):
extra_predicates.append(predicate)
output.append(predicate.i)
seen_predicates[predicate.key] = predicate.i
processed = True
if not processed:
warnings.warn(Warnings.W035.format(pattern=value))
return output

View File

@ -76,3 +76,12 @@ def test_minimal_pattern_validation(en_vocab, pattern, n_errors, n_min_errors):
matcher.add("TEST", [pattern])
elif n_errors == 0:
matcher.add("TEST", [pattern])
def test_pattern_warnings(en_vocab):
matcher = Matcher(en_vocab)
# normalize "regex" to upper like "text"
matcher.add("TEST1", [[{"text": {"regex": "regex"}}]])
# warn if subpattern attribute isn't recognized and processed
with pytest.warns(UserWarning):
matcher.add("TEST2", [[{"TEXT": {"XX": "xx"}}]])