simplify fuzzy sets

This commit is contained in:
Kevin Humphreys 2022-08-29 12:49:14 +02:00
parent 9c0f9368a9
commit e636f4941b

View File

@ -880,7 +880,7 @@ class _FuzzyPredicate:
value = token._.get(self.attr) value = token._.get(self.attr)
else: else:
value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)] value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)]
return bool(fuzz_cpp.ratio(self.value, value) >= self.fuzzy) return bool(self.fuzzy and fuzz_cpp.ratio(self.value, value) >= self.fuzzy)
class _RegexPredicate: class _RegexPredicate:
@ -1006,7 +1006,6 @@ class _ComparisonPredicate:
def _get_extra_predicates(spec, extra_predicates, vocab, fuzzy, fuzzy_attrs): def _get_extra_predicates(spec, extra_predicates, vocab, fuzzy, fuzzy_attrs):
predicate_types = { predicate_types = {
"FUZZY": _FuzzyPredicate,
"REGEX": _RegexPredicate, "REGEX": _RegexPredicate,
"IN": _SetPredicate, "IN": _SetPredicate,
"NOT_IN": _SetPredicate, "NOT_IN": _SetPredicate,
@ -1019,6 +1018,7 @@ def _get_extra_predicates(spec, extra_predicates, vocab, fuzzy, fuzzy_attrs):
"<=": _ComparisonPredicate, "<=": _ComparisonPredicate,
">": _ComparisonPredicate, ">": _ComparisonPredicate,
"<": _ComparisonPredicate, "<": _ComparisonPredicate,
"FUZZY": _FuzzyPredicate,
} }
seen_predicates = {pred.key: pred.i for pred in extra_predicates} seen_predicates = {pred.key: pred.i for pred in extra_predicates}
output = [] output = []
@ -1037,28 +1037,32 @@ def _get_extra_predicates(spec, extra_predicates, vocab, fuzzy, fuzzy_attrs):
attr = IDS.get(attr.upper()) attr = IDS.get(attr.upper())
if isinstance(value, dict): if isinstance(value, dict):
output.extend(_get_extra_predicates_helper(attr, value, vocab, fuzzy, fuzzy_attrs, fuzzy_match = attr in fuzzy_attrs # fuzzy match enabled for this attr
output.extend(_get_extra_predicates_dict(attr, value, vocab, fuzzy, fuzzy_match,
predicate_types, predicate_types,
extra_predicates, seen_predicates)) extra_predicates, seen_predicates))
return output return output
def _get_extra_predicates_helper(attr, value, vocab, fuzzy, fuzzy_attrs, def _get_extra_predicates_dict(attr, value_dict, vocab, fuzzy, fuzzy_match,
predicate_types, extra_predicates, seen_predicates): predicate_types, extra_predicates, seen_predicates):
output = [] output = []
processed = False #TODO: not working as intended for type_, value in value_dict.items():
value_with_upper_keys = {k.upper(): v for k, v in value.items()} if type_ == 'FUZZY':
for type_, cls in predicate_types.items(): #TODO: switch this loop fuzzy_match = True # explicit fuzzy match
if type_ in value_with_upper_keys: if isinstance(value, dict):
if type_ == 'FUZZY' and isinstance(value_with_upper_keys[type_], dict):
# add predicates inside fuzzy operator # add predicates inside fuzzy operator
output.extend(_get_extra_predicates_helper(attr, value_with_upper_keys[type_], output.extend(_get_extra_predicates_dict(attr, value, vocab, fuzzy, fuzzy_match,
vocab, fuzzy, fuzzy_attrs,
predicate_types, predicate_types,
extra_predicates, seen_predicates)) extra_predicates, seen_predicates))
else: continue
predicate = cls(len(extra_predicates), attr, value_with_upper_keys[type_], type_, cls = predicate_types.get(type_.upper())
vocab=vocab, fuzzy=fuzzy)###??? if attr in fuzzy_attrs else 0) if cls is None:
warnings.warn(Warnings.W035.format(pattern=value_dict))
# ignore unrecongized predicate type
continue
predicate = cls(len(extra_predicates), attr, value, type_, vocab=vocab,
fuzzy=fuzzy if fuzzy_match else 0)
# Don't create a redundant predicates. # Don't create a redundant predicates.
# This helps with efficiency, as we're caching the results. # This helps with efficiency, as we're caching the results.
if predicate.key in seen_predicates: if predicate.key in seen_predicates:
@ -1067,9 +1071,6 @@ def _get_extra_predicates_helper(attr, value, vocab, fuzzy, fuzzy_attrs,
extra_predicates.append(predicate) extra_predicates.append(predicate)
output.append(predicate.i) output.append(predicate.i)
seen_predicates[predicate.key] = predicate.i seen_predicates[predicate.key] = predicate.i
processed = True
if not processed:
warnings.warn(Warnings.W035.format(pattern=value))
return output return output