diff --git a/spacy/pipeline/coref.py b/spacy/pipeline/coref.py
index 1e11a0417..af40d9b06 100644
--- a/spacy/pipeline/coref.py
+++ b/spacy/pipeline/coref.py
@@ -218,6 +218,13 @@ class CoreferenceResolver(TrainablePipe):
         total_loss = 0
 
         for eg in examples:
+            if eg.x.text != eg.y.text:
+                # TODO assign error number
+                raise ValueError(
+                    """Text, including whitespace, must match between reference and
+                    predicted docs in coref training.
+                    """
+                )
             # TODO check this causes no issues (in practice it runs)
             preds, backprop = self.model.begin_update([eg.predicted])
             score_matrix, mention_idx = preds
@@ -277,7 +284,7 @@ class CoreferenceResolver(TrainablePipe):
                 if span is None:
                     # TODO log more details
                     raise IndexError(Errors.E1043)
-                cc.append( (span.start, span.end) )
+                cc.append((span.start, span.end))
             clusters.append(cc)
 
         span_idxs = create_head_span_idxs(ops, len(example.predicted))
diff --git a/spacy/pipeline/span_predictor.py b/spacy/pipeline/span_predictor.py
index c9343a97e..aee11ba8e 100644
--- a/spacy/pipeline/span_predictor.py
+++ b/spacy/pipeline/span_predictor.py
@@ -178,6 +178,13 @@ class SpanPredictor(TrainablePipe):
 
         total_loss = 0
         for eg in examples:
+            if eg.x.text != eg.y.text:
+                # TODO assign error number
+                raise ValueError(
+                    """Text, including whitespace, must match between reference and
+                    predicted docs in span predictor training.
+                    """
+                )
             span_scores, backprop = self.model.begin_update([eg.predicted])
             # FIXME, this only happens once in the first 1000 docs of OntoNotes
             # and I'm not sure yet why.
diff --git a/spacy/tests/pipeline/test_coref.py b/spacy/tests/pipeline/test_coref.py
index 9a969acdd..7fc4864a3 100644
--- a/spacy/tests/pipeline/test_coref.py
+++ b/spacy/tests/pipeline/test_coref.py
@@ -218,3 +218,20 @@ def test_sentence_map(snlp):
     doc = snlp("I like text. This is text.")
     sm = get_sentence_ids(doc)
     assert sm == [0, 0, 0, 0, 1, 1, 1, 1]
+
+
+@pytest.mark.skipif(not has_torch, reason="Torch not available")
+def test_whitespace_mismatch(nlp):
+    train_examples = []
+    for text, annot in TRAIN_DATA:
+        eg = Example.from_dict(nlp.make_doc(text), annot)
+        eg.predicted = nlp.make_doc("  " + text)
+        train_examples.append(eg)
+
+    nlp.add_pipe("coref", config=CONFIG)
+    optimizer = nlp.initialize()
+    test_text = TRAIN_DATA[0][0]
+    doc = nlp(test_text)
+
+    with pytest.raises(ValueError, match="whitespace"):
+        nlp.update(train_examples, sgd=optimizer)
diff --git a/spacy/tests/pipeline/test_span_predictor.py b/spacy/tests/pipeline/test_span_predictor.py
index 3a3111bd4..a79756d88 100644
--- a/spacy/tests/pipeline/test_span_predictor.py
+++ b/spacy/tests/pipeline/test_span_predictor.py
@@ -106,7 +106,7 @@ def test_overfitting_IO(nlp):
         pred = eg.predicted
         for key, spans in ref.spans.items():
             if key.startswith("coref_head_clusters"):
-                pred.spans[key] = [pred[span.start:span.end] for span in spans]
+                pred.spans[key] = [pred[span.start : span.end] for span in spans]
 
         train_examples.append(eg)
     nlp.add_pipe("span_predictor", config=CONFIG)
@@ -209,3 +209,19 @@ def test_tokenization_mismatch(nlp):
     assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs2[0])
     assert _spans_to_offsets(docs1[0]) == _spans_to_offsets(docs3[0])
 
+
+@pytest.mark.skipif(not has_torch, reason="Torch not available")
+def test_whitespace_mismatch(nlp):
+    train_examples = []
+    for text, annot in TRAIN_DATA:
+        eg = Example.from_dict(nlp.make_doc(text), annot)
+        eg.predicted = nlp.make_doc("  " + text)
+        train_examples.append(eg)
+
+    nlp.add_pipe("span_predictor", config=CONFIG)
+    optimizer = nlp.initialize()
+    test_text = TRAIN_DATA[0][0]
+    doc = nlp(test_text)
+
+    with pytest.raises(ValueError, match="whitespace"):
+        nlp.update(train_examples, sgd=optimizer)