diff --git a/spacy/language.py b/spacy/language.py
index 01931b2c9..c3fa717b6 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1033,7 +1033,9 @@ class Language:
     ):
         """Update the models in the pipeline.
         teacher (Language): Teacher to distill from.
-        examples (Iterable[Example]): A batch of examples
+        examples (Iterable[Example]): Distillation examples. The reference
+            (teacher) and predicted (student) docs must have the same number of
+            tokens and the same orthography.
         drop (float): The dropout rate.
         sgd (Optional[Optimizer]): An optimizer.
         losses (Optional(Dict[str, float])): Dictionary to update with the loss,
diff --git a/spacy/pipeline/trainable_pipe.pyx b/spacy/pipeline/trainable_pipe.pyx
index 77259fc0b..fcffd11ee 100644
--- a/spacy/pipeline/trainable_pipe.pyx
+++ b/spacy/pipeline/trainable_pipe.pyx
@@ -71,8 +71,8 @@ cdef class TrainablePipe(Pipe):
         teacher_pipe (Optional[TrainablePipe]): The teacher pipe to learn
             from.
         examples (Iterable[Example]): Distillation examples. The reference
-            and predicted docs must have the same number of tokens and the
-            same orthography.
+            (teacher) and predicted (student) docs must have the same number of
+            tokens and the same orthography.
         drop (float): dropout rate.
         sgd (Optional[Optimizer]): An optimizer. Will be created via
             create_optimizer if not set.
diff --git a/website/docs/api/dependencyparser.mdx b/website/docs/api/dependencyparser.mdx
index 5179ce48b..296d6d87d 100644
--- a/website/docs/api/dependencyparser.mdx
+++ b/website/docs/api/dependencyparser.mdx
@@ -154,15 +154,15 @@ This feature is experimental.
 > losses = student.distill(teacher_pipe, examples, sgd=optimizer)
 > ```
 
-| Name           | Description                                                                                                                                 |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
-| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                 |
-| `examples`     | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ |
-| _keyword-only_ |                                                                                                                                             |
-| `drop`         | Dropout rate. ~~float~~                                                                                                                     |
-| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                               |
-| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                |
-| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                       |
+| Name           | Description                                                                                                                                                                                 |
+| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                                                                 |
+| `examples`     | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ |
+| _keyword-only_ |                                                                                                                                                                                             |
+| `drop`         | Dropout rate. ~~float~~                                                                                                                                                                     |
+| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                                                                               |
+| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                                                                |
+| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                                                                       |
 
 ## DependencyParser.pipe {id="pipe",tag="method"}
 
diff --git a/website/docs/api/edittreelemmatizer.mdx b/website/docs/api/edittreelemmatizer.mdx
index 2e0993657..c8b5c7180 100644
--- a/website/docs/api/edittreelemmatizer.mdx
+++ b/website/docs/api/edittreelemmatizer.mdx
@@ -138,15 +138,15 @@ This feature is experimental.
 > losses = student.distill(teacher_pipe, examples, sgd=optimizer)
 > ```
 
-| Name           | Description                                                                                                                                 |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
-| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                 |
-| `examples`     | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ |
-| _keyword-only_ |                                                                                                                                             |
-| `drop`         | Dropout rate. ~~float~~                                                                                                                     |
-| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                               |
-| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                |
-| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                       |
+| Name           | Description                                                                                                                                                                                 |
+| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                                                                 |
+| `examples`     | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ |
+| _keyword-only_ |                                                                                                                                                                                             |
+| `drop`         | Dropout rate. ~~float~~                                                                                                                                                                     |
+| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                                                                               |
+| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                                                                |
+| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                                                                       |
 
 ## EditTreeLemmatizer.pipe {id="pipe",tag="method"}
 
diff --git a/website/docs/api/entityrecognizer.mdx b/website/docs/api/entityrecognizer.mdx
index 005d5d11d..f503cc998 100644
--- a/website/docs/api/entityrecognizer.mdx
+++ b/website/docs/api/entityrecognizer.mdx
@@ -150,15 +150,15 @@ This feature is experimental.
 > losses = student.distill(teacher_pipe, examples, sgd=optimizer)
 > ```
 
-| Name           | Description                                                                                                                                 |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
-| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                 |
-| `examples`     | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ |
-| _keyword-only_ |                                                                                                                                             |
-| `drop`         | Dropout rate. ~~float~~                                                                                                                     |
-| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                               |
-| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                |
-| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                       |
+| Name           | Description                                                                                                                                                                                 |
+| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                                                                 |
+| `examples`     | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ |
+| _keyword-only_ |                                                                                                                                                                                             |
+| `drop`         | Dropout rate. ~~float~~                                                                                                                                                                     |
+| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                                                                               |
+| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                                                                |
+| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                                                                       |
 
 ## EntityRecognizer.pipe {id="pipe",tag="method"}
 
diff --git a/website/docs/api/language.mdx b/website/docs/api/language.mdx
index 22f3bbba1..8262d6e68 100644
--- a/website/docs/api/language.mdx
+++ b/website/docs/api/language.mdx
@@ -347,19 +347,19 @@ Distill the models in a student pipeline from a teacher pipeline.
 > student.distill(teacher, examples, sgd=optimizer)
 > ```
 
-| Name            | Description                                                                                                                                    |
-| --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
-| `teacher`       | The teacher pipeline to distill from. ~~Language~~                                                                                             |
-| `examples`      | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~                                                              |
-| _keyword-only_  |                                                                                                                                                |
-| `drop`          | The dropout rate. ~~float~~                                                                                                                    |
-| `sgd`           | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                                  |
-| `losses`        | Dictionary to update with the loss, keyed by pipeline component. ~~Optional[Dict[str, float]]~~                                                |
-| `component_cfg` | Optional dictionary of keyword arguments for components, keyed by component names. Defaults to `None`. ~~Optional[Dict[str, Dict[str, Any]]]~~ |
-| `exclude`       | Names of components that shouldn't be updated. Defaults to `[]`. ~~Iterable[str]~~                                                             |
-| `annotates`     | Names of components that should set annotations on the prediced examples after updating. Defaults to `[]`. ~~Iterable[str]~~                   |
-| `component_map` | Map student component names to teacher component names, only necessary when the names differ. Defaults to `None`. ~~Iterable[str]~~            |
-| **RETURNS**     | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                          |
+| Name            | Description                                                                                                                                                                                 |
+| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `teacher`       | The teacher pipeline to distill from. ~~Language~~                                                                                                                                          |
+| `examples`      | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ |
+| _keyword-only_  |                                                                                                                                                                                             |
+| `drop`          | The dropout rate. ~~float~~                                                                                                                                                                 |
+| `sgd`           | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                                                                               |
+| `losses`        | Dictionary to update with the loss, keyed by pipeline component. ~~Optional[Dict[str, float]]~~                                                                                             |
+| `component_cfg` | Optional dictionary of keyword arguments for components, keyed by component names. Defaults to `None`. ~~Optional[Dict[str, Dict[str, Any]]]~~                                              |
+| `exclude`       | Names of components that shouldn't be updated. Defaults to `[]`. ~~Iterable[str]~~                                                                                                          |
+| `annotates`     | Names of components that should set annotations on the prediced examples after updating. Defaults to `[]`. ~~Iterable[str]~~                                                                |
+| `component_map` | Map student component names to teacher component names, only necessary when the names differ. Defaults to `None`. ~~Iterable[str]~~                                                         |
+| **RETURNS**     | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                                                                       |
 
 ## Language.rehearse {id="rehearse",tag="method,experimental",version="3"}
 
diff --git a/website/docs/api/morphologizer.mdx b/website/docs/api/morphologizer.mdx
index 4f79458d3..4660ec312 100644
--- a/website/docs/api/morphologizer.mdx
+++ b/website/docs/api/morphologizer.mdx
@@ -144,15 +144,15 @@ This feature is experimental.
 > losses = student.distill(teacher_pipe, examples, sgd=optimizer)
 > ```
 
-| Name           | Description                                                                                                                                 |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
-| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                 |
-| `examples`     | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ |
-| _keyword-only_ |                                                                                                                                             |
-| `drop`         | Dropout rate. ~~float~~                                                                                                                     |
-| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                               |
-| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                |
-| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                       |
+| Name           | Description                                                                                                                                                                                 |
+| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                                                                 |
+| `examples`     | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ |
+| _keyword-only_ |                                                                                                                                                                                             |
+| `drop`         | Dropout rate. ~~float~~                                                                                                                                                                     |
+| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                                                                               |
+| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                                                                |
+| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                                                                       |
 
 ## Morphologizer.pipe {id="pipe",tag="method"}
 
diff --git a/website/docs/api/pipe.mdx b/website/docs/api/pipe.mdx
index 120c8f690..e1e7f5d70 100644
--- a/website/docs/api/pipe.mdx
+++ b/website/docs/api/pipe.mdx
@@ -257,15 +257,15 @@ This feature is experimental.
 > losses = student.distill(teacher_pipe, examples, sgd=optimizer)
 > ```
 
-| Name           | Description                                                                                                                                 |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
-| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                 |
-| `examples`     | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ |
-| _keyword-only_ |                                                                                                                                             |
-| `drop`         | Dropout rate. ~~float~~                                                                                                                     |
-| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                               |
-| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                |
-| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                       |
+| Name           | Description                                                                                                                                                                                 |
+| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                                                                 |
+| `examples`     | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ |
+| _keyword-only_ |                                                                                                                                                                                             |
+| `drop`         | Dropout rate. ~~float~~                                                                                                                                                                     |
+| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                                                                               |
+| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                                                                |
+| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                                                                       |
 
 ## TrainablePipe.rehearse {id="rehearse",tag="method,experimental",version="3"}
 
diff --git a/website/docs/api/sentencerecognizer.mdx b/website/docs/api/sentencerecognizer.mdx
index 02fd57102..dfb7ed308 100644
--- a/website/docs/api/sentencerecognizer.mdx
+++ b/website/docs/api/sentencerecognizer.mdx
@@ -129,15 +129,15 @@ This feature is experimental.
 > losses = student.distill(teacher_pipe, examples, sgd=optimizer)
 > ```
 
-| Name           | Description                                                                                                                                 |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
-| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                 |
-| `examples`     | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ |
-| _keyword-only_ |                                                                                                                                             |
-| `drop`         | Dropout rate. ~~float~~                                                                                                                     |
-| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                               |
-| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                |
-| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                       |
+| Name           | Description                                                                                                                                                                                 |
+| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                                                                 |
+| `examples`     | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ |
+| _keyword-only_ |                                                                                                                                                                                             |
+| `drop`         | Dropout rate. ~~float~~                                                                                                                                                                     |
+| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                                                                               |
+| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                                                                |
+| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                                                                       |
 
 ## SentenceRecognizer.pipe {id="pipe",tag="method"}
 
diff --git a/website/docs/api/tagger.mdx b/website/docs/api/tagger.mdx
index 664fd7940..35e7a23b1 100644
--- a/website/docs/api/tagger.mdx
+++ b/website/docs/api/tagger.mdx
@@ -128,15 +128,15 @@ This feature is experimental.
 > losses = student.distill(teacher_pipe, examples, sgd=optimizer)
 > ```
 
-| Name           | Description                                                                                                                                 |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
-| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                 |
-| `examples`     | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ |
-| _keyword-only_ |                                                                                                                                             |
-| `drop`         | Dropout rate. ~~float~~                                                                                                                     |
-| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                               |
-| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                |
-| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                       |
+| Name           | Description                                                                                                                                                                                 |
+| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~                                                                                                                                 |
+| `examples`     | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ |
+| _keyword-only_ |                                                                                                                                                                                             |
+| `drop`         | Dropout rate. ~~float~~                                                                                                                                                                     |
+| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~                                                                               |
+| `losses`       | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~                                                                |
+| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                                                                                       |
 
 ## Tagger.pipe {id="pipe",tag="method"}