From 252e9ab3af320ac1f3672eab2e906c1ec7972d3c Mon Sep 17 00:00:00 2001 From: Kevin Humphreys Date: Thu, 15 Sep 2022 15:50:07 -0700 Subject: [PATCH] exclude whitespace tokens --- spacy/matcher/matcher.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index b0c02aa07..7d111ca7d 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -849,7 +849,7 @@ class _FuzzyPredicate: value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)] if self.value == value: return True - elif self.distance and token.is_oov: + elif self.distance and token.is_oov and not token.is_space: return bool(levenshtein(self.value, value) <= self.distance) return False @@ -924,7 +924,7 @@ class _SetPredicate: if self.predicate == "IN": if value in self.value: return True - elif self.distance and token.is_oov: + elif self.distance and token.is_oov and not token.is_space: for v in self.value: if levenshtein(self.vocab.strings[value], self.vocab.strings[v]) <= self.distance: @@ -933,7 +933,7 @@ class _SetPredicate: elif self.predicate == "NOT_IN": if value in self.value: return False - elif self.distance and token.is_oov: + elif self.distance and token.is_oov and not token.is_space: for v in self.value: if levenshtein(self.vocab.strings[value], self.vocab.strings[v]) <= self.distance: