From c07941d14a4639933e7e4c2697b7d426078bf1d1 Mon Sep 17 00:00:00 2001 From: shadeMe Date: Mon, 6 Mar 2023 10:31:18 +0530 Subject: [PATCH] Update `Tok2Vec.distill` docstring --- spacy/pipeline/tok2vec.py | 14 +++++++------- website/docs/api/tok2vec.mdx | 16 ++++++++++------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py index 72761abf7..d9639f8d5 100644 --- a/spacy/pipeline/tok2vec.py +++ b/spacy/pipeline/tok2vec.py @@ -199,14 +199,14 @@ class Tok2Vec(TrainablePipe): sgd: Optional[Optimizer] = None, losses: Optional[Dict[str, float]] = None, ) -> Dict[str, float]: - """Train a pipe (the student) on the predictions of another pipe - (the teacher). The student is typically trained on the probability - distribution of the teacher, but details may differ per pipe. + """Performs an update of the student pipe's model using the + student's distillation examples and sets the annotations + of the teacher's distillation examples using the teacher pipe. - teacher_pipe (Optional[TrainablePipe]): The teacher pipe to learn - from. - examples (Iterable[Example]): Distillation examples. The reference - and predicted docs must have the same number of tokens and the + teacher_pipe (Optional[TrainablePipe]): The teacher pipe to use + for prediction. + examples (Iterable[Example]): Distillation examples. The reference (teacher) + and predicted (student) docs must have the same number of tokens and the same orthography. drop (float): dropout rate. sgd (Optional[Optimizer]): An optimizer. Will be created via diff --git a/website/docs/api/tok2vec.mdx b/website/docs/api/tok2vec.mdx index 6b410d724..8b6d2380b 100644 --- a/website/docs/api/tok2vec.mdx +++ b/website/docs/api/tok2vec.mdx @@ -102,10 +102,14 @@ pipeline components are applied to the `Doc` in order. Both ## Tok2Vec.distill {id="distill", tag="method,experimental", version="4"} -Train a pipe (the student) on the predictions of another pipe (the teacher). The -student is typically trained on the probability distribution of the teacher, but -details may differ per pipe. The goal of distillation is to transfer knowledge -from the teacher to the student. +Performs an update of the student pipe's model using the student's distillation +examples and sets the annotations of the teacher's distillation examples using +the teacher pipe. + +Unlike other trainable pipes, the student pipe doesn't directly learn its +representations from the teacher. However, since downstream pipes that do +perform distillation expect the tok2vec annotations to be present on the +correct distillation examples, we need to ensure that they are set beforehand. The distillation is performed on ~~Example~~ objects. The `Example.reference` and `Example.predicted` ~~Doc~~s must have the same number of tokens and the @@ -125,8 +129,8 @@ This feature is experimental. | Name | Description | | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | -| `examples` | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ | +| `teacher_pipe` | The teacher pipe to use for prediction. ~~Optional[TrainablePipe]~~ | +| `examples` | Distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ | | _keyword-only_ | | | `drop` | Dropout rate. ~~float~~ | | `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ |