diff --git a/spacy/errors.py b/spacy/errors.py
index 1ce7e9b93..1a1e9ea10 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -955,7 +955,7 @@ class Errors(metaclass=ErrorsWithCodes):
     E4000 = ("Expected a Doc as input, but got: '{type}'")
     E4001 = ("Expected input to be one of the following types: ({expected_types}), "
              "but got '{received_type}'")
-    E4002 = ("Pipe '{name}' requires teacher pipe for distillation.")
+    E4002 = ("Pipe '{name}' requires a teacher pipe for distillation.")
 
 
 # fmt: on
diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index c2027f054..20f83fffc 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -164,6 +164,8 @@ class EditTreeLemmatizer(TrainablePipe):
         teacher_scores: Scores representing the teacher model's predictions.
         student_scores: Scores representing the student model's predictions.
 
+        RETURNS (Tuple[float, float]): The loss and the gradient.
+        
         DOCS: https://spacy.io/api/edittreelemmatizer#get_teacher_student_loss
         """
         loss_func = LegacySequenceCategoricalCrossentropy(normalize=False)
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index 41e6634f9..a6be51c3c 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -275,6 +275,8 @@ class Tagger(TrainablePipe):
         teacher_scores: Scores representing the teacher model's predictions.
         student_scores: Scores representing the student model's predictions.
 
+        RETURNS (Tuple[float, float]): The loss and the gradient.
+        
         DOCS: https://spacy.io/api/tagger#get_teacher_student_loss
         """
         loss_func = LegacySequenceCategoricalCrossentropy(normalize=False)
diff --git a/spacy/pipeline/trainable_pipe.pyx b/spacy/pipeline/trainable_pipe.pyx
index 42e612c8e..5a28204cf 100644
--- a/spacy/pipeline/trainable_pipe.pyx
+++ b/spacy/pipeline/trainable_pipe.pyx
@@ -78,6 +78,8 @@ cdef class TrainablePipe(Pipe):
         losses (Optional[Dict[str, float]]): Optional record of loss during
             distillation.
         RETURNS: The updated losses dictionary.
+        
+        DOCS: https://spacy.io/api/pipe#distill
         """
         # By default we require a teacher pipe, but there are downstream
         # implementations that don't require a pipe.
@@ -220,6 +222,8 @@ cdef class TrainablePipe(Pipe):
         teacher_scores: Scores representing the teacher model's predictions.
         student_scores: Scores representing the student model's predictions.
 
+        RETURNS (Tuple[float, float]): The loss and the gradient.
+        
         DOCS: https://spacy.io/api/pipe#get_teacher_student_loss
         """
         raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="get_teacher_student_loss", name=self.name))
diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx
index 02696739e..44decbf5f 100644
--- a/spacy/pipeline/transition_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@@ -227,6 +227,8 @@ cdef class Parser(TrainablePipe):
         losses (Optional[Dict[str, float]]): Optional record of loss during
             distillation.
         RETURNS: The updated losses dictionary.
+        
+        DOCS: https://spacy.io/api/dependencyparser#distill
         """
         if teacher_pipe is None:
             raise ValueError(Errors.E4002.format(name=self.name))
@@ -308,6 +310,10 @@ cdef class Parser(TrainablePipe):
         teacher_scores: Scores representing the teacher model's predictions.
         student_scores: Scores representing the student model's predictions.
 
+        RETURNS (Tuple[float, float]): The loss and the gradient.
+        
+        RETURNS (Tuple[float, float]): The loss and the gradient.
+        
         DOCS: https://spacy.io/api/dependencyparser#get_teacher_student_loss
         """
         loss_func = LegacySequenceCategoricalCrossentropy(normalize=False)
diff --git a/website/docs/api/pipe.mdx b/website/docs/api/pipe.mdx
index 0c916dd42..9813da197 100644
--- a/website/docs/api/pipe.mdx
+++ b/website/docs/api/pipe.mdx
@@ -234,7 +234,7 @@ predictions and gold-standard annotations, and update the component's model.
 | `losses`       | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
 | **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                    |
 
-## TrainablePipe.distill {id="rehearse", tag="method,experimental", version="4"}
+## TrainablePipe.distill {id="distill", tag="method,experimental", version="4"}
 
 Train a pipe (the student) on the predictions of another pipe (the teacher). The
 student is typically trained on the probability distribution of the teacher, but