From 1d8168d1fd8220ecd27dd6fbc8d604572d0b040b Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 3 Jun 2020 14:15:58 +0200
Subject: [PATCH] Fix problems with lower and whitespace in variants

Port relevant changes from #5361:

* Initialize lower flag explicitly

* Handle whitespace words from GoldParse correctly when creating raw
text with orth variants
---
 spacy/gold.pyx | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/spacy/gold.pyx b/spacy/gold.pyx
index 5aa7da456..4d564d8f6 100644
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@@ -362,6 +362,7 @@ def make_orth_variants(nlp, example, orth_variant_level=0.0):
     if not example.token_annotation:
         return example
     raw = example.text
+    lower = False
     if random.random() >= 0.5:
         lower = True
         if raw is not None:
@@ -429,8 +430,11 @@ def make_orth_variants(nlp, example, orth_variant_level=0.0):
             raw_idx += 1
         for word in variant_example.token_annotation.words:
             match_found = False
+            # skip whitespace words
+            if word.isspace():
+                match_found = True
             # add identical word
-            if word not in variants and raw[raw_idx:].startswith(word):
+            elif word not in variants and raw[raw_idx:].startswith(word):
                 variant_raw += word
                 raw_idx += len(word)
                 match_found = True
@@ -445,6 +449,7 @@ def make_orth_variants(nlp, example, orth_variant_level=0.0):
             # something went wrong, abort
             # (add a warning message?)
             if not match_found:
+                print("aborting")
                 return example
             # add following whitespace
             while raw_idx < len(raw) and re.match("\s", raw[raw_idx]):