diff --git a/spacy/language.py b/spacy/language.py index 01931b2c9..c3fa717b6 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1033,7 +1033,9 @@ class Language: ): """Update the models in the pipeline. teacher (Language): Teacher to distill from. - examples (Iterable[Example]): A batch of examples + examples (Iterable[Example]): Distillation examples. The reference + (teacher) and predicted (student) docs must have the same number of + tokens and the same orthography. drop (float): The dropout rate. sgd (Optional[Optimizer]): An optimizer. losses (Optional(Dict[str, float])): Dictionary to update with the loss, diff --git a/spacy/pipeline/trainable_pipe.pyx b/spacy/pipeline/trainable_pipe.pyx index 77259fc0b..fcffd11ee 100644 --- a/spacy/pipeline/trainable_pipe.pyx +++ b/spacy/pipeline/trainable_pipe.pyx @@ -71,8 +71,8 @@ cdef class TrainablePipe(Pipe): teacher_pipe (Optional[TrainablePipe]): The teacher pipe to learn from. examples (Iterable[Example]): Distillation examples. The reference - and predicted docs must have the same number of tokens and the - same orthography. + (teacher) and predicted (student) docs must have the same number of + tokens and the same orthography. drop (float): dropout rate. sgd (Optional[Optimizer]): An optimizer. Will be created via create_optimizer if not set. diff --git a/website/docs/api/dependencyparser.mdx b/website/docs/api/dependencyparser.mdx index 5179ce48b..296d6d87d 100644 --- a/website/docs/api/dependencyparser.mdx +++ b/website/docs/api/dependencyparser.mdx @@ -154,15 +154,15 @@ This feature is experimental. > losses = student.distill(teacher_pipe, examples, sgd=optimizer) > ``` -| Name | Description | -| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | -| `examples` | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ | -| _keyword-only_ | | -| `drop` | Dropout rate. ~~float~~ | -| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | -| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | -| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | +| Name | Description | +| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | +| `examples` | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ | +| _keyword-only_ | | +| `drop` | Dropout rate. ~~float~~ | +| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | +| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | +| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | ## DependencyParser.pipe {id="pipe",tag="method"} diff --git a/website/docs/api/edittreelemmatizer.mdx b/website/docs/api/edittreelemmatizer.mdx index 2e0993657..c8b5c7180 100644 --- a/website/docs/api/edittreelemmatizer.mdx +++ b/website/docs/api/edittreelemmatizer.mdx @@ -138,15 +138,15 @@ This feature is experimental. > losses = student.distill(teacher_pipe, examples, sgd=optimizer) > ``` -| Name | Description | -| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | -| `examples` | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ | -| _keyword-only_ | | -| `drop` | Dropout rate. ~~float~~ | -| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | -| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | -| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | +| Name | Description | +| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | +| `examples` | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ | +| _keyword-only_ | | +| `drop` | Dropout rate. ~~float~~ | +| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | +| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | +| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | ## EditTreeLemmatizer.pipe {id="pipe",tag="method"} diff --git a/website/docs/api/entityrecognizer.mdx b/website/docs/api/entityrecognizer.mdx index 005d5d11d..f503cc998 100644 --- a/website/docs/api/entityrecognizer.mdx +++ b/website/docs/api/entityrecognizer.mdx @@ -150,15 +150,15 @@ This feature is experimental. > losses = student.distill(teacher_pipe, examples, sgd=optimizer) > ``` -| Name | Description | -| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | -| `examples` | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ | -| _keyword-only_ | | -| `drop` | Dropout rate. ~~float~~ | -| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | -| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | -| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | +| Name | Description | +| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | +| `examples` | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ | +| _keyword-only_ | | +| `drop` | Dropout rate. ~~float~~ | +| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | +| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | +| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | ## EntityRecognizer.pipe {id="pipe",tag="method"} diff --git a/website/docs/api/language.mdx b/website/docs/api/language.mdx index 22f3bbba1..8262d6e68 100644 --- a/website/docs/api/language.mdx +++ b/website/docs/api/language.mdx @@ -347,19 +347,19 @@ Distill the models in a student pipeline from a teacher pipeline. > student.distill(teacher, examples, sgd=optimizer) > ``` -| Name | Description | -| --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | -| `teacher` | The teacher pipeline to distill from. ~~Language~~ | -| `examples` | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~ | -| _keyword-only_ | | -| `drop` | The dropout rate. ~~float~~ | -| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | -| `losses` | Dictionary to update with the loss, keyed by pipeline component. ~~Optional[Dict[str, float]]~~ | -| `component_cfg` | Optional dictionary of keyword arguments for components, keyed by component names. Defaults to `None`. ~~Optional[Dict[str, Dict[str, Any]]]~~ | -| `exclude` | Names of components that shouldn't be updated. Defaults to `[]`. ~~Iterable[str]~~ | -| `annotates` | Names of components that should set annotations on the prediced examples after updating. Defaults to `[]`. ~~Iterable[str]~~ | -| `component_map` | Map student component names to teacher component names, only necessary when the names differ. Defaults to `None`. ~~Iterable[str]~~ | -| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | +| Name | Description | +| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `teacher` | The teacher pipeline to distill from. ~~Language~~ | +| `examples` | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ | +| _keyword-only_ | | +| `drop` | The dropout rate. ~~float~~ | +| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | +| `losses` | Dictionary to update with the loss, keyed by pipeline component. ~~Optional[Dict[str, float]]~~ | +| `component_cfg` | Optional dictionary of keyword arguments for components, keyed by component names. Defaults to `None`. ~~Optional[Dict[str, Dict[str, Any]]]~~ | +| `exclude` | Names of components that shouldn't be updated. Defaults to `[]`. ~~Iterable[str]~~ | +| `annotates` | Names of components that should set annotations on the prediced examples after updating. Defaults to `[]`. ~~Iterable[str]~~ | +| `component_map` | Map student component names to teacher component names, only necessary when the names differ. Defaults to `None`. ~~Iterable[str]~~ | +| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | ## Language.rehearse {id="rehearse",tag="method,experimental",version="3"} diff --git a/website/docs/api/morphologizer.mdx b/website/docs/api/morphologizer.mdx index 4f79458d3..4660ec312 100644 --- a/website/docs/api/morphologizer.mdx +++ b/website/docs/api/morphologizer.mdx @@ -144,15 +144,15 @@ This feature is experimental. > losses = student.distill(teacher_pipe, examples, sgd=optimizer) > ``` -| Name | Description | -| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | -| `examples` | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ | -| _keyword-only_ | | -| `drop` | Dropout rate. ~~float~~ | -| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | -| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | -| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | +| Name | Description | +| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | +| `examples` | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ | +| _keyword-only_ | | +| `drop` | Dropout rate. ~~float~~ | +| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | +| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | +| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | ## Morphologizer.pipe {id="pipe",tag="method"} diff --git a/website/docs/api/pipe.mdx b/website/docs/api/pipe.mdx index 120c8f690..e1e7f5d70 100644 --- a/website/docs/api/pipe.mdx +++ b/website/docs/api/pipe.mdx @@ -257,15 +257,15 @@ This feature is experimental. > losses = student.distill(teacher_pipe, examples, sgd=optimizer) > ``` -| Name | Description | -| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | -| `examples` | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ | -| _keyword-only_ | | -| `drop` | Dropout rate. ~~float~~ | -| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | -| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | -| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | +| Name | Description | +| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | +| `examples` | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ | +| _keyword-only_ | | +| `drop` | Dropout rate. ~~float~~ | +| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | +| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | +| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | ## TrainablePipe.rehearse {id="rehearse",tag="method,experimental",version="3"} diff --git a/website/docs/api/sentencerecognizer.mdx b/website/docs/api/sentencerecognizer.mdx index 02fd57102..dfb7ed308 100644 --- a/website/docs/api/sentencerecognizer.mdx +++ b/website/docs/api/sentencerecognizer.mdx @@ -129,15 +129,15 @@ This feature is experimental. > losses = student.distill(teacher_pipe, examples, sgd=optimizer) > ``` -| Name | Description | -| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | -| `examples` | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ | -| _keyword-only_ | | -| `drop` | Dropout rate. ~~float~~ | -| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | -| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | -| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | +| Name | Description | +| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | +| `examples` | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ | +| _keyword-only_ | | +| `drop` | Dropout rate. ~~float~~ | +| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | +| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | +| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | ## SentenceRecognizer.pipe {id="pipe",tag="method"} diff --git a/website/docs/api/tagger.mdx b/website/docs/api/tagger.mdx index 664fd7940..35e7a23b1 100644 --- a/website/docs/api/tagger.mdx +++ b/website/docs/api/tagger.mdx @@ -128,15 +128,15 @@ This feature is experimental. > losses = student.distill(teacher_pipe, examples, sgd=optimizer) > ``` -| Name | Description | -| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | -| `examples` | Distillation examples. The reference and predicted docs must have the same number of tokens and the same orthography. ~~Iterable[Example]~~ | -| _keyword-only_ | | -| `drop` | Dropout rate. ~~float~~ | -| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | -| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | -| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | +| Name | Description | +| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `teacher_pipe` | The teacher pipe to learn from. ~~Optional[TrainablePipe]~~ | +| `examples` | A batch of [`Example`](/api/example) distillation examples. The reference (teacher) and predicted (student) docs must have the same number of tokens and orthography. ~~Iterable[Example]~~ | +| _keyword-only_ | | +| `drop` | Dropout rate. ~~float~~ | +| `sgd` | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~ | +| `losses` | Optional record of the loss during distillation. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ | +| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ | ## Tagger.pipe {id="pipe",tag="method"}