diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index b649e6666..fe6cccf81 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -360,6 +360,16 @@ def debug_data(
             )
         )
 
+        # check for documents with multiple sentences
+        sents_per_doc = gold_train_data["n_sents"] / len(gold_train_data["texts"])
+        if sents_per_doc < 1.1:
+            msg.warn(
+                "The training data contains {:.2f} sentences per "
+                "document. When there are very few documents containing more "
+                "than one sentence, the parser will not learn how to segment "
+                "longer texts into sentences.".format(sents_per_doc)
+            )
+
         # profile labels
         labels_train = [label for label in gold_train_data["deps"]]
         labels_train_unpreprocessed = [