From 252e9ab3af320ac1f3672eab2e906c1ec7972d3c Mon Sep 17 00:00:00 2001
From: Kevin Humphreys <kevin.humphreys@dialpad.com>
Date: Thu, 15 Sep 2022 15:50:07 -0700
Subject: [PATCH] exclude whitespace tokens

---
 spacy/matcher/matcher.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx
index b0c02aa07..7d111ca7d 100644
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@@ -849,7 +849,7 @@ class _FuzzyPredicate:
             value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)]
         if self.value == value:
             return True
-        elif self.distance and token.is_oov:
+        elif self.distance and token.is_oov and not token.is_space:
             return bool(levenshtein(self.value, value) <= self.distance)
         return False
 
@@ -924,7 +924,7 @@ class _SetPredicate:
         if self.predicate == "IN":
             if value in self.value:
                 return True
-            elif self.distance and token.is_oov:
+            elif self.distance and token.is_oov and not token.is_space:
                 for v in self.value:
                     if levenshtein(self.vocab.strings[value],
                                    self.vocab.strings[v]) <= self.distance:
@@ -933,7 +933,7 @@ class _SetPredicate:
         elif self.predicate == "NOT_IN":
             if value in self.value:
                 return False
-            elif self.distance and token.is_oov:
+            elif self.distance and token.is_oov and not token.is_space:
                 for v in self.value:
                     if levenshtein(self.vocab.strings[value],
                                    self.vocab.strings[v]) <= self.distance: