diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index 5cdbee065..83281543a 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -82,8 +82,7 @@ def evaluate(
         "NER P": "ents_p",
         "NER R": "ents_r",
         "NER F": "ents_f",
-        "Textcat AUC": "textcat_macro_auc",
-        "Textcat F": "textcat_macro_f",
+        "Textcat": "cats_score",
         "Sent P": "sents_p",
         "Sent R": "sents_r",
         "Sent F": "sents_f",
@@ -91,6 +90,8 @@ def evaluate(
     results = {}
     for metric, key in metrics.items():
         if key in scores:
+            if key == "cats_score":
+                metric = metric + " (" + scores.get("cats_score_desc", "unk") + ")"
             results[metric] = f"{scores[key]*100:.2f}"
     data = {re.sub(r"[\s/]", "_", k.lower()): v for k, v in results.items()}
 
@@ -99,12 +100,12 @@ def evaluate(
     if "ents_per_type" in scores:
         if scores["ents_per_type"]:
             print_ents_per_type(msg, scores["ents_per_type"])
-    if "textcat_f_per_cat" in scores:
-        if scores["textcat_f_per_cat"]:
-            print_textcats_f_per_cat(msg, scores["textcat_f_per_cat"])
-    if "textcat_auc_per_cat" in scores:
-        if scores["textcat_auc_per_cat"]:
-            print_textcats_auc_per_cat(msg, scores["textcat_auc_per_cat"])
+    if "cats_f_per_type" in scores:
+        if scores["cats_f_per_type"]:
+            print_textcats_f_per_cat(msg, scores["cats_f_per_type"])
+    if "cats_auc_per_type" in scores:
+        if scores["cats_auc_per_type"]:
+            print_textcats_auc_per_cat(msg, scores["cats_auc_per_type"])
 
     if displacy_path:
         factory_names = [nlp.get_pipe_meta(pipe).factory for pipe in nlp.pipe_names]
@@ -170,7 +171,7 @@ def print_textcats_f_per_cat(msg: Printer, scores: Dict[str, Dict[str, float]])
         data,
         header=("", "P", "R", "F"),
         aligns=("l", "r", "r", "r"),
-        title="Textcat F (per type)",
+        title="Textcat F (per label)",
     )
 
 
diff --git a/spacy/scorer.py b/spacy/scorer.py
index a95fe70cf..2bbf453e7 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -298,7 +298,8 @@ class Scorer:
         **cfg
     ):
         """Returns PRF and ROC AUC scores for a doc-level attribute with a
-        dict with scores for each label like Doc.cats.
+        dict with scores for each label like Doc.cats. The reported overall
+        score depends on the scorer settings.
 
         examples (Iterable[Example]): Examples to score
         attr (str): The attribute to score.
@@ -309,11 +310,16 @@ class Scorer:
             Defaults to True.
         positive_label (str): The positive label for a binary task with
             exclusive classes. Defaults to None.
-        RETURNS (dict): A dictionary containing the scores:
-            for binary exclusive with positive label: attr_p/r/f,
-            for 3+ exclusive classes, macro-averaged fscore: attr_macro_f,
-            for multilabel, macro-averaged AUC: attr_macro_auc,
-            for all: attr_f_per_type, attr_auc_per_type
+        RETURNS (dict): A dictionary containing the scores, with inapplicable
+                scores as None:
+            for all:
+                attr_score (one of attr_f / attr_macro_f / attr_macro_auc),
+                attr_score_desc (text description of the overall score),
+                attr_f_per_type,
+                attr_auc_per_type
+            for binary exclusive with positive label: attr_p/r/f
+            for 3+ exclusive classes, macro-averaged fscore: attr_macro_f
+            for multilabel, macro-averaged AUC: attr_macro_auc
         """
         score = PRFScore()
         f_per_type = dict()
@@ -362,6 +368,13 @@ class Scorer:
                     )
                 )
         results = {
+            attr + "_score": None,
+            attr + "_score_desc": None,
+            attr + "_p": None,
+            attr + "_r": None,
+            attr + "_f": None,
+            attr + "_macro_f": None,
+            attr + "_macro_auc": None,
             attr + "_f_per_type": {k: v.to_dict() for k, v in f_per_type.items()},
             attr + "_auc_per_type": {k: v.score for k, v in auc_per_type.items()},
         }
@@ -369,16 +382,22 @@ class Scorer:
             results[attr + "_p"] = score.precision
             results[attr + "_r"] = score.recall
             results[attr + "_f"] = score.fscore
+            results[attr + "_score"] = results[attr + "_f"]
+            results[attr + "_score_desc"] = "F (" + positive_label + ")"
         elif not multi_label:
             results[attr + "_macro_f"] = sum(
                 [score.fscore for label, score in f_per_type.items()]
             ) / (len(f_per_type) + 1e-100)
+            results[attr + "_score"] = results[attr + "_macro_f"]
+            results[attr + "_score_desc"] = "macro F"
         else:
             results[attr + "_macro_auc"] = max(
                 sum([score.score for label, score in auc_per_type.items()])
                 / (len(auc_per_type) + 1e-100),
                 -1,
             )
+            results[attr + "_score"] = results[attr + "_macro_auc"]
+            results[attr + "_score_desc"] = "macro AUC"
         return results
 
     @staticmethod
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 5e8dab0bd..15832d4bd 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -121,6 +121,8 @@ def test_overfitting_IO():
         train_examples, component_cfg={"scorer": {"positive_label": "POSITIVE"}}
     )
     assert scores["cats_f"] == 1.0
+    assert scores["cats_score"] == 1.0
+    assert "cats_score_desc" in scores
 
 
 # fmt: off