From 1c35b8efcdeee8b701077c57b80c1ae0efcda749 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Tue, 16 Jun 2020 12:08:25 +0200
Subject: [PATCH] fix spaces

---
 spacy/gold/example.pyx   |  3 ++-
 spacy/tests/test_gold.py | 13 ++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx
index ee81e0481..adae9335b 100644
--- a/spacy/gold/example.pyx
+++ b/spacy/gold/example.pyx
@@ -55,8 +55,9 @@ cdef class Example:
         tok_dict, doc_dict = _parse_example_dict_data(example_dict)
         if "ORTH" not in tok_dict:
             tok_dict["ORTH"] = [tok.text for tok in predicted]
-        if "SPACY" not in tok_dict:
             tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
+        if "SPACY" not in tok_dict:
+            tok_dict["SPACY"] = None
         return Example(
             predicted,
             annotations2doc(predicted.vocab, tok_dict, doc_dict)
diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py
index f7d7d70bb..5f92a476c 100644
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@@ -166,7 +166,7 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
     spaces = [True, True, True, True, True, False, False]
     doc = Doc(en_vocab, words=words, spaces=spaces)
     entities = [(len("I flew to "), len("I flew to San Francisco Valley"), "LOC")]
-    gold_words =["I", "flew to", "San Francisco Valley", "."]
+    gold_words = ["I", "flew to", "San Francisco Valley", "."]
     example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
     assert example.get_aligned("ENT_IOB") == [2, 2, 2, 3, 1, 1, 2]
     assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "LOC", "LOC", "LOC", ""]
@@ -188,12 +188,11 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
     )
     doc = Doc(en_vocab, words=words, spaces=spaces)
     entities = [(len("I flew  to "), len("I flew  to San Francisco Valley"), "LOC")]
-    gp = GoldParse(
-        doc,
-        words=["I", "flew", " ", "to", "San Francisco Valley", "."],
-        entities=entities,
-    )
-    assert gp.ner == ["O", "O", "O", "O", "B-LOC", "L-LOC", "O"]
+    gold_words = ["I", "flew", " ", "to", "San Francisco Valley", "."]
+    gold_spaces = [True, True, False, True, False, False]
+    example = Example.from_dict(doc, {"words": gold_words, "spaces": gold_spaces, "entities": entities})
+    assert example.get_aligned("ENT_IOB") == [2, 2, 2, 2, 3, 1, 2]
+    assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "", "", "LOC", "LOC", ""]
 
     # from issue #4791
     data = (