diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index f2d743e10..9a9ad9ae1 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -693,6 +693,18 @@ def debug_data(
             # and we might actually want a warning?
             msg.info("All trees in dev data present in training data.")
 
+        if gold_train_data["n_low_cardinality_lemmas"] > 0:
+            n = gold_train_data["n_low_cardinality_lemmas"]
+            msg.warn(f"{n} docs with 1 or 0 unique lemmas.")
+        else:
+            msg.good("All training docs meet lemma uniqueness requirements.")
+
+        if gold_dev_data["n_low_cardinality_lemmas"] > 0:
+            n = gold_dev_data["n_low_cardinality_lemmas"]
+            msg.warn(f"{n} docs with 1 or 0 unique lemmas.")
+        else:
+            msg.good("All dev docs meet lemma uniqueness requirements.")
+
         if gold_train_data["no_lemma_annotations"] > 0:
             n = gold_train_data["no_lemma_annotations"]
             msg.warn(f"{n} docs with no lemma annotations.")
@@ -781,6 +793,7 @@ def _compile_gold(
         "lemmatizer_trees": set(),
         "no_lemma_annotations": 0,
         "partial_lemma_annotations": 0,
+        "n_low_cardinality_lemmas": 0,
     }
     if "trainable_lemmatizer" in factory_names:
         trees = EditTrees(nlp.vocab.strings)
@@ -920,11 +933,18 @@ def _compile_gold(
                 continue
             if any(token.lemma == 0 for token in gold):
                 data["partial_lemma_annotations"] += 1
+            lemma_set = set()
             for token in gold:
                 if token.lemma != 0:
+                    lemma_set.add(token.lemma)
                     tree_id = trees.add(token.text, token.lemma_)
                     tree_str = trees.tree_to_str(tree_id)
                     data["lemmatizer_trees"].add(tree_str)
+            # We want to identify cases where lemmas aren't assigned
+            # or are all assigned the same value, as this would indicate
+            # an issue since we're expecting a large set of lemmas
+            if len(lemma_set) < 2 and len(gold) > 1:
+                data["n_low_cardinality_lemmas"] += 1
     return data
 
 
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index bc5d6b826..ba7efc704 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -1244,6 +1244,19 @@ def test_debug_data_trainable_lemmatizer_partial():
     data = _compile_gold(train_examples, ["trainable_lemmatizer"], nlp, True)
     assert data["partial_lemma_annotations"] == 2
 
+def test_debug_data_trainable_lemmatizer_low_cardinality():
+    low_cardinality_examples = [
+        ("She likes green eggs", {"lemmas": ["no", "no", "no", "no"]}),
+        ("Eat blue ham", {"lemmas": ["no", "no", "no"]}),
+    ]
+    nlp = Language()
+    train_examples = []
+    for t in low_cardinality_examples:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+
+    data = _compile_gold(train_examples, ["trainable_lemmatizer"], nlp, True)
+    assert data["n_low_cardinality_lemmas"] == 2
+
 def test_debug_data_trainable_lemmatizer_not_annotated():
     unannotated_examples = [
         ("She likes green eggs", {}),