diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx
index f50f91f21..7c4bb0184 100644
--- a/spacy/pipeline/_parser_internals/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@@ -204,6 +204,32 @@ cdef class ArcEagerGold:
     def update(self, StateClass stcls):
         update_gold_state(&self.c, stcls.c)
 
+def _get_aligned_sent_starts(example):
+    """Get list of SENT_START attributes aligned to the predicted tokenization.
+    If the reference has not sentence starts, return a list of None values.
+
+    This function is slightly different from the one on Example, because we also
+    check whether the reference sentences align across multiple sentences,
+    and return missing values if they do. This prevents a problem where you have
+    the start of a sentence merged onto a token that belongs to two sentences.
+    """
+    if example.y.has_annotation("SENT_START"):
+        align = example.alignment.y2x
+        sent_starts = [False] * len(example.x)
+        seen_words = set()
+        for y_sent in example.y.sents:
+            x_indices = list(align[y_sent.start : y_sent.end].dataXd)
+            if any(x_idx in seen_words for x_idx in x_indices):
+                # If there are any tokens in X that align across two sentences,
+                # regard the sentence annotations as missing, as we can't
+                # reliably use them.
+                return [None] * len(example.x)
+            seen_words.update(x_indices)
+            sent_starts[x_indices[0]] = True
+        return sent_starts
+    else:
+        return [None] * len(example.x)
+
 
 cdef int check_state_gold(char state_bits, char flag) nogil:
     cdef char one = 1
diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx
index fe4ee6ff4..bbe59e9f4 100644
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@@ -200,10 +200,6 @@ cdef class Example:
     def get_aligned_sent_starts(self):
         """Get list of SENT_START attributes aligned to the predicted tokenization.
         If the reference has not sentence starts, return a list of None values.
-
-        The aligned sentence starts use the get_aligned_spans method, rather
-        than aligning the list of tags, so that it handles cases where a mistaken
-        tokenization starts the sentence.
         """
         if self.y.has_annotation("SENT_START"):
             align = self.alignment.y2x