diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py
index d75aca966..c911b8d81 100644
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@@ -421,6 +421,37 @@ def test_language_from_config_before_after_init_invalid():
             English.from_config(config)
 
 
+def test_language_whitespace_tokenizer():
+    """Test the custom whitespace tokenizer from the docs."""
+
+    class WhitespaceTokenizer:
+        def __init__(self, vocab):
+            self.vocab = vocab
+
+        def __call__(self, text):
+            words = text.split(" ")
+            spaces = [True] * len(words)
+            # Avoid zero-length tokens
+            for i, word in enumerate(words):
+                if word == "":
+                    words[i] = " "
+                    spaces[i] = False
+            # Remove the final trailing space
+            if words[-1] == " ":
+                words = words[0:-1]
+                spaces = spaces[0:-1]
+            else:
+                spaces[-1] = False
+
+            return Doc(self.vocab, words=words, spaces=spaces)
+
+    nlp = spacy.blank("en")
+    nlp.tokenizer = WhitespaceTokenizer(nlp.vocab)
+    text = "   What's happened to    me? he thought. It wasn't a dream.    "
+    doc = nlp(text)
+    assert doc.text == text
+
+
 def test_language_custom_tokenizer():
     """Test that a fully custom tokenizer can be plugged in via the registry."""
     name = "test_language_custom_tokenizer"
diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md
index b05d16da3..42476cd98 100644
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@@ -1169,7 +1169,20 @@ class WhitespaceTokenizer:
 
     def __call__(self, text):
         words = text.split(" ")
-        return Doc(self.vocab, words=words)
+        spaces = [True] * len(words)
+        # Avoid zero-length tokens
+        for i, word in enumerate(words):
+            if word == "":
+                words[i] = " "
+                spaces[i] = False
+        # Remove the final trailing space
+        if words[-1] == " ":
+            words = words[0:-1]
+            spaces = spaces[0:-1]
+        else:
+           spaces[-1] = False
+            
+        return Doc(self.vocab, words=words, spaces=spaces)
 
 nlp = spacy.blank("en")
 nlp.tokenizer = WhitespaceTokenizer(nlp.vocab)