From 72c0f7c798e074b37d982fc3d8bb54a7061cc132 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@github.danieldk.eu>
Date: Thu, 11 Apr 2024 15:27:32 +0200
Subject: [PATCH] Fixes from Sofie

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/cli/distill.py     | 8 ++++----
 website/docs/api/cli.mdx | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/spacy/cli/distill.py b/spacy/cli/distill.py
index 0dbb2129e..fb3563045 100644
--- a/spacy/cli/distill.py
+++ b/spacy/cli/distill.py
@@ -64,7 +64,7 @@ def distill(
 ):
     student_config_path = util.ensure_path(student_config_path)
     output_path = util.ensure_path(output_path)
-    # Make sure all files and paths exists if they are needed
+    # Make sure all files and paths exist if they are needed
     if not student_config_path or (
         str(student_config_path) != "-" and not student_config_path.exists()
     ):
@@ -82,12 +82,12 @@ def distill(
         config = util.load_config(
             student_config_path, overrides=overrides, interpolate=False
         )
-    msg.divider("Initializing pipeline")
+    msg.divider("Initializing student pipeline")
     with show_validation_error(student_config_path, hint_fill=False):
         student = init_nlp_student(config, teacher, use_gpu=use_gpu)
 
-    msg.good("Initialized pipeline")
-    msg.divider("Distilling pipeline")
+    msg.good("Initialized student pipeline")
+    msg.divider("Distilling student pipeline from teacher")
     distill_nlp(
         teacher,
         student,
diff --git a/website/docs/api/cli.mdx b/website/docs/api/cli.mdx
index d016b58a5..2191fd83c 100644
--- a/website/docs/api/cli.mdx
+++ b/website/docs/api/cli.mdx
@@ -1707,11 +1707,11 @@ $ python -m spacy project dvc [project_dir] [workflow] [--force] [--verbose] [--
 Distill a _student_ pipeline from a _teacher_ pipeline. Distillation trains the
 models in the student pipeline on the activations of the teacher's models. A
 typical use case for distillation is to extract a smaller, more performant model
-from large high-accuracy model. Since distillation uses the activations of the
-teacher, distillation can be performed on a corpus without (gold standard)
+from a larger high-accuracy model. Since distillation uses the activations of the
+teacher, distillation can be performed on a corpus of raw text without (gold standard)
 annotations.
 
-`distill` will save out the best model from all epochs, as well as the final
+`distill` will save out the best performing pipeline across all epochs, as well as the final
 pipeline. The `--code` argument can be used to provide a Python file that's
 imported before the training process starts. This lets you register
 [custom functions](/usage/training#custom-functions) and architectures and refer