Merge branch 'feature/pipe-distill-mainline' of github.com:danieldk/spaCy into feature/pipe-distill-mainline

This commit is contained in:
Daniël de Kok 2023-01-12 17:26:59 +01:00
commit dd83157594
6 changed files with 16 additions and 2 deletions

View File

@ -955,7 +955,7 @@ class Errors(metaclass=ErrorsWithCodes):
E4000 = ("Expected a Doc as input, but got: '{type}'") E4000 = ("Expected a Doc as input, but got: '{type}'")
E4001 = ("Expected input to be one of the following types: ({expected_types}), " E4001 = ("Expected input to be one of the following types: ({expected_types}), "
"but got '{received_type}'") "but got '{received_type}'")
E4002 = ("Pipe '{name}' requires teacher pipe for distillation.") E4002 = ("Pipe '{name}' requires a teacher pipe for distillation.")
# fmt: on # fmt: on

View File

@ -164,6 +164,8 @@ class EditTreeLemmatizer(TrainablePipe):
teacher_scores: Scores representing the teacher model's predictions. teacher_scores: Scores representing the teacher model's predictions.
student_scores: Scores representing the student model's predictions. student_scores: Scores representing the student model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://spacy.io/api/edittreelemmatizer#get_teacher_student_loss DOCS: https://spacy.io/api/edittreelemmatizer#get_teacher_student_loss
""" """
loss_func = LegacySequenceCategoricalCrossentropy(normalize=False) loss_func = LegacySequenceCategoricalCrossentropy(normalize=False)

View File

@ -275,6 +275,8 @@ class Tagger(TrainablePipe):
teacher_scores: Scores representing the teacher model's predictions. teacher_scores: Scores representing the teacher model's predictions.
student_scores: Scores representing the student model's predictions. student_scores: Scores representing the student model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://spacy.io/api/tagger#get_teacher_student_loss DOCS: https://spacy.io/api/tagger#get_teacher_student_loss
""" """
loss_func = LegacySequenceCategoricalCrossentropy(normalize=False) loss_func = LegacySequenceCategoricalCrossentropy(normalize=False)

View File

@ -78,6 +78,8 @@ cdef class TrainablePipe(Pipe):
losses (Optional[Dict[str, float]]): Optional record of loss during losses (Optional[Dict[str, float]]): Optional record of loss during
distillation. distillation.
RETURNS: The updated losses dictionary. RETURNS: The updated losses dictionary.
DOCS: https://spacy.io/api/pipe#distill
""" """
# By default we require a teacher pipe, but there are downstream # By default we require a teacher pipe, but there are downstream
# implementations that don't require a pipe. # implementations that don't require a pipe.
@ -220,6 +222,8 @@ cdef class TrainablePipe(Pipe):
teacher_scores: Scores representing the teacher model's predictions. teacher_scores: Scores representing the teacher model's predictions.
student_scores: Scores representing the student model's predictions. student_scores: Scores representing the student model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://spacy.io/api/pipe#get_teacher_student_loss DOCS: https://spacy.io/api/pipe#get_teacher_student_loss
""" """
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="get_teacher_student_loss", name=self.name)) raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="get_teacher_student_loss", name=self.name))

View File

@ -227,6 +227,8 @@ cdef class Parser(TrainablePipe):
losses (Optional[Dict[str, float]]): Optional record of loss during losses (Optional[Dict[str, float]]): Optional record of loss during
distillation. distillation.
RETURNS: The updated losses dictionary. RETURNS: The updated losses dictionary.
DOCS: https://spacy.io/api/dependencyparser#distill
""" """
if teacher_pipe is None: if teacher_pipe is None:
raise ValueError(Errors.E4002.format(name=self.name)) raise ValueError(Errors.E4002.format(name=self.name))
@ -308,6 +310,10 @@ cdef class Parser(TrainablePipe):
teacher_scores: Scores representing the teacher model's predictions. teacher_scores: Scores representing the teacher model's predictions.
student_scores: Scores representing the student model's predictions. student_scores: Scores representing the student model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://spacy.io/api/dependencyparser#get_teacher_student_loss DOCS: https://spacy.io/api/dependencyparser#get_teacher_student_loss
""" """
loss_func = LegacySequenceCategoricalCrossentropy(normalize=False) loss_func = LegacySequenceCategoricalCrossentropy(normalize=False)

View File

@ -234,7 +234,7 @@ predictions and gold-standard annotations, and update the component's model.
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | | `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | | **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
## TrainablePipe.distill {id="rehearse", tag="method,experimental", version="4"} ## TrainablePipe.distill {id="distill", tag="method,experimental", version="4"}
Train a pipe (the student) on the predictions of another pipe (the teacher). The Train a pipe (the student) on the predictions of another pipe (the teacher). The
student is typically trained on the probability distribution of the teacher, but student is typically trained on the probability distribution of the teacher, but