mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Add warning when Matcher subpattern is discarded (#5873)
* Add a warning when a subpattern is not processed and discarded * Normalize subpattern attribute/operator keys to upper case like top-level attributes
This commit is contained in:
parent
9e45d064bb
commit
4193402c47
|
@ -133,6 +133,8 @@ class Warnings(object):
|
|||
"normalization table, please ignore this warning.")
|
||||
W034 = ("Please install the package spacy-lookups-data in order to include "
|
||||
"the default lexeme normalization table for the language '{lang}'.")
|
||||
W035 = ('Discarding subpattern "{pattern}" due to an unrecognized '
|
||||
"attribute or operator.")
|
||||
|
||||
|
||||
@add_codes
|
||||
|
|
|
@ -811,9 +811,11 @@ def _get_extra_predicates(spec, extra_predicates):
|
|||
attr = "ORTH"
|
||||
attr = IDS.get(attr.upper())
|
||||
if isinstance(value, dict):
|
||||
processed = False
|
||||
value_with_upper_keys = {k.upper(): v for k, v in value.items()}
|
||||
for type_, cls in predicate_types.items():
|
||||
if type_ in value:
|
||||
predicate = cls(len(extra_predicates), attr, value[type_], type_)
|
||||
if type_ in value_with_upper_keys:
|
||||
predicate = cls(len(extra_predicates), attr, value_with_upper_keys[type_], type_)
|
||||
# Don't create a redundant predicates.
|
||||
# This helps with efficiency, as we're caching the results.
|
||||
if predicate.key in seen_predicates:
|
||||
|
@ -822,6 +824,9 @@ def _get_extra_predicates(spec, extra_predicates):
|
|||
extra_predicates.append(predicate)
|
||||
output.append(predicate.i)
|
||||
seen_predicates[predicate.key] = predicate.i
|
||||
processed = True
|
||||
if not processed:
|
||||
warnings.warn(Warnings.W035.format(pattern=value))
|
||||
return output
|
||||
|
||||
|
||||
|
|
|
@ -76,3 +76,12 @@ def test_minimal_pattern_validation(en_vocab, pattern, n_errors, n_min_errors):
|
|||
matcher.add("TEST", [pattern])
|
||||
elif n_errors == 0:
|
||||
matcher.add("TEST", [pattern])
|
||||
|
||||
|
||||
def test_pattern_warnings(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
# normalize "regex" to upper like "text"
|
||||
matcher.add("TEST1", [[{"text": {"regex": "regex"}}]])
|
||||
# warn if subpattern attribute isn't recognized and processed
|
||||
with pytest.warns(UserWarning):
|
||||
matcher.add("TEST2", [[{"TEXT": {"XX": "xx"}}]])
|
||||
|
|
Loading…
Reference in New Issue
Block a user