mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-06 05:10:21 +03:00
simplify fuzzy sets
This commit is contained in:
parent
9c0f9368a9
commit
e636f4941b
|
@ -880,7 +880,7 @@ class _FuzzyPredicate:
|
||||||
value = token._.get(self.attr)
|
value = token._.get(self.attr)
|
||||||
else:
|
else:
|
||||||
value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)]
|
value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)]
|
||||||
return bool(fuzz_cpp.ratio(self.value, value) >= self.fuzzy)
|
return bool(self.fuzzy and fuzz_cpp.ratio(self.value, value) >= self.fuzzy)
|
||||||
|
|
||||||
|
|
||||||
class _RegexPredicate:
|
class _RegexPredicate:
|
||||||
|
@ -1006,7 +1006,6 @@ class _ComparisonPredicate:
|
||||||
|
|
||||||
def _get_extra_predicates(spec, extra_predicates, vocab, fuzzy, fuzzy_attrs):
|
def _get_extra_predicates(spec, extra_predicates, vocab, fuzzy, fuzzy_attrs):
|
||||||
predicate_types = {
|
predicate_types = {
|
||||||
"FUZZY": _FuzzyPredicate,
|
|
||||||
"REGEX": _RegexPredicate,
|
"REGEX": _RegexPredicate,
|
||||||
"IN": _SetPredicate,
|
"IN": _SetPredicate,
|
||||||
"NOT_IN": _SetPredicate,
|
"NOT_IN": _SetPredicate,
|
||||||
|
@ -1019,6 +1018,7 @@ def _get_extra_predicates(spec, extra_predicates, vocab, fuzzy, fuzzy_attrs):
|
||||||
"<=": _ComparisonPredicate,
|
"<=": _ComparisonPredicate,
|
||||||
">": _ComparisonPredicate,
|
">": _ComparisonPredicate,
|
||||||
"<": _ComparisonPredicate,
|
"<": _ComparisonPredicate,
|
||||||
|
"FUZZY": _FuzzyPredicate,
|
||||||
}
|
}
|
||||||
seen_predicates = {pred.key: pred.i for pred in extra_predicates}
|
seen_predicates = {pred.key: pred.i for pred in extra_predicates}
|
||||||
output = []
|
output = []
|
||||||
|
@ -1037,28 +1037,32 @@ def _get_extra_predicates(spec, extra_predicates, vocab, fuzzy, fuzzy_attrs):
|
||||||
attr = IDS.get(attr.upper())
|
attr = IDS.get(attr.upper())
|
||||||
|
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
output.extend(_get_extra_predicates_helper(attr, value, vocab, fuzzy, fuzzy_attrs,
|
fuzzy_match = attr in fuzzy_attrs # fuzzy match enabled for this attr
|
||||||
|
output.extend(_get_extra_predicates_dict(attr, value, vocab, fuzzy, fuzzy_match,
|
||||||
predicate_types,
|
predicate_types,
|
||||||
extra_predicates, seen_predicates))
|
extra_predicates, seen_predicates))
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
def _get_extra_predicates_helper(attr, value, vocab, fuzzy, fuzzy_attrs,
|
def _get_extra_predicates_dict(attr, value_dict, vocab, fuzzy, fuzzy_match,
|
||||||
predicate_types, extra_predicates, seen_predicates):
|
predicate_types, extra_predicates, seen_predicates):
|
||||||
output = []
|
output = []
|
||||||
processed = False #TODO: not working as intended
|
for type_, value in value_dict.items():
|
||||||
value_with_upper_keys = {k.upper(): v for k, v in value.items()}
|
if type_ == 'FUZZY':
|
||||||
for type_, cls in predicate_types.items(): #TODO: switch this loop
|
fuzzy_match = True # explicit fuzzy match
|
||||||
if type_ in value_with_upper_keys:
|
if isinstance(value, dict):
|
||||||
if type_ == 'FUZZY' and isinstance(value_with_upper_keys[type_], dict):
|
|
||||||
# add predicates inside fuzzy operator
|
# add predicates inside fuzzy operator
|
||||||
output.extend(_get_extra_predicates_helper(attr, value_with_upper_keys[type_],
|
output.extend(_get_extra_predicates_dict(attr, value, vocab, fuzzy, fuzzy_match,
|
||||||
vocab, fuzzy, fuzzy_attrs,
|
|
||||||
predicate_types,
|
predicate_types,
|
||||||
extra_predicates, seen_predicates))
|
extra_predicates, seen_predicates))
|
||||||
else:
|
continue
|
||||||
predicate = cls(len(extra_predicates), attr, value_with_upper_keys[type_], type_,
|
cls = predicate_types.get(type_.upper())
|
||||||
vocab=vocab, fuzzy=fuzzy)###??? if attr in fuzzy_attrs else 0)
|
if cls is None:
|
||||||
|
warnings.warn(Warnings.W035.format(pattern=value_dict))
|
||||||
|
# ignore unrecongized predicate type
|
||||||
|
continue
|
||||||
|
predicate = cls(len(extra_predicates), attr, value, type_, vocab=vocab,
|
||||||
|
fuzzy=fuzzy if fuzzy_match else 0)
|
||||||
# Don't create a redundant predicates.
|
# Don't create a redundant predicates.
|
||||||
# This helps with efficiency, as we're caching the results.
|
# This helps with efficiency, as we're caching the results.
|
||||||
if predicate.key in seen_predicates:
|
if predicate.key in seen_predicates:
|
||||||
|
@ -1067,9 +1071,6 @@ def _get_extra_predicates_helper(attr, value, vocab, fuzzy, fuzzy_attrs,
|
||||||
extra_predicates.append(predicate)
|
extra_predicates.append(predicate)
|
||||||
output.append(predicate.i)
|
output.append(predicate.i)
|
||||||
seen_predicates[predicate.key] = predicate.i
|
seen_predicates[predicate.key] = predicate.i
|
||||||
processed = True
|
|
||||||
if not processed:
|
|
||||||
warnings.warn(Warnings.W035.format(pattern=value))
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user