mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Add warning when Matcher subpattern is discarded (#5873)
* Add a warning when a subpattern is not processed and discarded * Normalize subpattern attribute/operator keys to upper case like top-level attributes
This commit is contained in:
parent
9e45d064bb
commit
4193402c47
|
@ -133,6 +133,8 @@ class Warnings(object):
|
||||||
"normalization table, please ignore this warning.")
|
"normalization table, please ignore this warning.")
|
||||||
W034 = ("Please install the package spacy-lookups-data in order to include "
|
W034 = ("Please install the package spacy-lookups-data in order to include "
|
||||||
"the default lexeme normalization table for the language '{lang}'.")
|
"the default lexeme normalization table for the language '{lang}'.")
|
||||||
|
W035 = ('Discarding subpattern "{pattern}" due to an unrecognized '
|
||||||
|
"attribute or operator.")
|
||||||
|
|
||||||
|
|
||||||
@add_codes
|
@add_codes
|
||||||
|
|
|
@ -811,9 +811,11 @@ def _get_extra_predicates(spec, extra_predicates):
|
||||||
attr = "ORTH"
|
attr = "ORTH"
|
||||||
attr = IDS.get(attr.upper())
|
attr = IDS.get(attr.upper())
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
|
processed = False
|
||||||
|
value_with_upper_keys = {k.upper(): v for k, v in value.items()}
|
||||||
for type_, cls in predicate_types.items():
|
for type_, cls in predicate_types.items():
|
||||||
if type_ in value:
|
if type_ in value_with_upper_keys:
|
||||||
predicate = cls(len(extra_predicates), attr, value[type_], type_)
|
predicate = cls(len(extra_predicates), attr, value_with_upper_keys[type_], type_)
|
||||||
# Don't create a redundant predicates.
|
# Don't create a redundant predicates.
|
||||||
# This helps with efficiency, as we're caching the results.
|
# This helps with efficiency, as we're caching the results.
|
||||||
if predicate.key in seen_predicates:
|
if predicate.key in seen_predicates:
|
||||||
|
@ -822,6 +824,9 @@ def _get_extra_predicates(spec, extra_predicates):
|
||||||
extra_predicates.append(predicate)
|
extra_predicates.append(predicate)
|
||||||
output.append(predicate.i)
|
output.append(predicate.i)
|
||||||
seen_predicates[predicate.key] = predicate.i
|
seen_predicates[predicate.key] = predicate.i
|
||||||
|
processed = True
|
||||||
|
if not processed:
|
||||||
|
warnings.warn(Warnings.W035.format(pattern=value))
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -76,3 +76,12 @@ def test_minimal_pattern_validation(en_vocab, pattern, n_errors, n_min_errors):
|
||||||
matcher.add("TEST", [pattern])
|
matcher.add("TEST", [pattern])
|
||||||
elif n_errors == 0:
|
elif n_errors == 0:
|
||||||
matcher.add("TEST", [pattern])
|
matcher.add("TEST", [pattern])
|
||||||
|
|
||||||
|
|
||||||
|
def test_pattern_warnings(en_vocab):
|
||||||
|
matcher = Matcher(en_vocab)
|
||||||
|
# normalize "regex" to upper like "text"
|
||||||
|
matcher.add("TEST1", [[{"text": {"regex": "regex"}}]])
|
||||||
|
# warn if subpattern attribute isn't recognized and processed
|
||||||
|
with pytest.warns(UserWarning):
|
||||||
|
matcher.add("TEST2", [[{"TEXT": {"XX": "xx"}}]])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user