Fix naming and add additional rehearse loss

This commit is contained in:
thomashacker 2023-03-31 11:54:19 +02:00
parent d65c00df9c
commit 3fccb969f8
13 changed files with 69 additions and 63 deletions

View File

@ -85,7 +85,7 @@ frozen_components = []
# Names of pipeline components that should set annotations during training # Names of pipeline components that should set annotations during training
annotating_components = [] annotating_components = []
# Names of pipeline components that should get rehearsed during training # Names of pipeline components that should get rehearsed during training
rehearse_components = [] rehearsal_components = []
# Location in the config where the dev corpus is defined # Location in the config where the dev corpus is defined
dev_corpus = "corpora.dev" dev_corpus = "corpora.dev"
# Location in the config where the train corpus is defined # Location in the config where the train corpus is defined

View File

@ -1183,7 +1183,7 @@ class Language:
losses: Optional[Dict[str, float]] = None, losses: Optional[Dict[str, float]] = None,
component_cfg: Optional[Dict[str, Dict[str, Any]]] = None, component_cfg: Optional[Dict[str, Dict[str, Any]]] = None,
exclude: Iterable[str] = SimpleFrozenList(), exclude: Iterable[str] = SimpleFrozenList(),
rehearse_components: List[str] = [], rehearsal_components: List[str] = [],
) -> Dict[str, float]: ) -> Dict[str, float]:
"""Make a "rehearsal" update to the models in the pipeline, to prevent """Make a "rehearsal" update to the models in the pipeline, to prevent
forgetting. Rehearsal updates run an initial copy of the model over some forgetting. Rehearsal updates run an initial copy of the model over some
@ -1196,7 +1196,7 @@ class Language:
component_cfg (Dict[str, Dict]): Config parameters for specific pipeline component_cfg (Dict[str, Dict]): Config parameters for specific pipeline
components, keyed by component name. components, keyed by component name.
exclude (Iterable[str]): Names of components that shouldn't be updated. exclude (Iterable[str]): Names of components that shouldn't be updated.
rehearse_components (List[str]): Names of components that should be rehearsed rehearsal_components (List[str]): Names of components that should be rehearsed
RETURNS (dict): Results from the update. RETURNS (dict): Results from the update.
EXAMPLE: EXAMPLE:
@ -1221,20 +1221,12 @@ class Language:
if ( if (
name in exclude name in exclude
or not hasattr(proc, "rehearse") or not hasattr(proc, "rehearse")
or name not in rehearse_components or name not in rehearsal_components
): ):
continue continue
proc.rehearse( # type: ignore[attr-defined] proc.rehearse(
examples, sgd=None, losses=losses, **component_cfg.get(name, {}) examples, sgd=sgd, losses=losses, **component_cfg.get(name, {})
) )
if isinstance(sgd, Optimizer):
if (
name not in exclude
and isinstance(proc, ty.TrainableComponent)
and proc.is_trainable
and proc.model not in (True, False, None)
):
proc.finish_update(sgd)
return losses return losses

View File

@ -228,7 +228,7 @@ class Tagger(TrainablePipe):
loss_func = SequenceCategoricalCrossentropy() loss_func = SequenceCategoricalCrossentropy()
if losses is None: if losses is None:
losses = {} losses = {}
losses.setdefault(self.name, 0.0) losses.setdefault(self.name+"_rehearse", 0.0)
validate_examples(examples, "Tagger.rehearse") validate_examples(examples, "Tagger.rehearse")
docs = [eg.predicted for eg in examples] docs = [eg.predicted for eg in examples]
if self._rehearsal_model is None: if self._rehearsal_model is None:
@ -243,7 +243,7 @@ class Tagger(TrainablePipe):
bp_tag_scores(grads) bp_tag_scores(grads)
if sgd is not None: if sgd is not None:
self.finish_update(sgd) self.finish_update(sgd)
losses[self.name] += loss losses[self.name+"_rehearse"] += loss
return losses return losses
def get_loss(self, examples, scores): def get_loss(self, examples, scores):

View File

@ -276,7 +276,7 @@ class TextCategorizer(TrainablePipe):
""" """
if losses is None: if losses is None:
losses = {} losses = {}
losses.setdefault(self.name, 0.0) losses.setdefault(self.name+"_rehearse", 0.0)
if self._rehearsal_model is None: if self._rehearsal_model is None:
return losses return losses
validate_examples(examples, "TextCategorizer.rehearse") validate_examples(examples, "TextCategorizer.rehearse")
@ -292,7 +292,7 @@ class TextCategorizer(TrainablePipe):
bp_scores(gradient) bp_scores(gradient)
if sgd is not None: if sgd is not None:
self.finish_update(sgd) self.finish_update(sgd)
losses[self.name] += (gradient**2).sum() losses[self.name+"_rehearse"] += (gradient**2).sum()
return losses return losses
def _examples_to_truth( def _examples_to_truth(

View File

@ -444,7 +444,7 @@ cdef class Parser(TrainablePipe):
multitask.rehearse(examples, losses=losses, sgd=sgd) multitask.rehearse(examples, losses=losses, sgd=sgd)
if self._rehearsal_model is None: if self._rehearsal_model is None:
return None return None
losses.setdefault(self.name, 0.) losses.setdefault(self.name+"_rehearse", 0.)
validate_examples(examples, "Parser.rehearse") validate_examples(examples, "Parser.rehearse")
docs = [eg.predicted for eg in examples] docs = [eg.predicted for eg in examples]
states = self.moves.init_batch(docs) states = self.moves.init_batch(docs)
@ -475,7 +475,7 @@ cdef class Parser(TrainablePipe):
backprop_tok2vec(docs) backprop_tok2vec(docs)
if sgd is not None: if sgd is not None:
self.finish_update(sgd) self.finish_update(sgd)
losses[self.name] += loss / n_scores losses[self.name+"_rehearse"] += loss / n_scores
del backprop del backprop
del backprop_tok2vec del backprop_tok2vec
model.clear_memory() model.clear_memory()

View File

@ -356,7 +356,7 @@ class ConfigSchemaTraining(BaseModel):
logger: Logger = Field(..., title="The logger to track training progress") logger: Logger = Field(..., title="The logger to track training progress")
frozen_components: List[str] = Field(..., title="Pipeline components that shouldn't be updated during training") frozen_components: List[str] = Field(..., title="Pipeline components that shouldn't be updated during training")
annotating_components: List[str] = Field(..., title="Pipeline components that should set annotations during training") annotating_components: List[str] = Field(..., title="Pipeline components that should set annotations during training")
rehearse_components: List[str] = Field(..., title="Pipeline components that should be rehearsed during training") rehearsal_components: List[str] = Field(..., title="Pipeline components that should be rehearsed during training")
before_to_disk: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object after training, before it's saved to disk") before_to_disk: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object after training, before it's saved to disk")
before_update: Optional[Callable[["Language", Dict[str, Any]], None]] = Field(..., title="Optional callback that is invoked at the start of each training step") before_update: Optional[Callable[["Language", Dict[str, Any]], None]] = Field(..., title="Optional callback that is invoked at the start of each training step")
# fmt: on # fmt: on

View File

@ -1144,7 +1144,7 @@ def test_training_before_update(doc):
max_steps=100, max_steps=100,
exclude=[], exclude=[],
annotating_components=[], annotating_components=[],
rehearse_components=[], rehearsal_components=[],
before_update=before_update, before_update=before_update,
) )

View File

@ -67,10 +67,10 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
with nlp.select_pipes(enable=resume_components): with nlp.select_pipes(enable=resume_components):
logger.info(f"Resuming training for: {resume_components}") logger.info(f"Resuming training for: {resume_components}")
nlp.resume_training(sgd=optimizer) nlp.resume_training(sgd=optimizer)
# Components that shouldn't be updated during training # Components that should be updated during training
rehearse_components = T["rehearse_components"] rehearsal_components = T["rehearsal_components"]
if rehearse_components: if rehearsal_components:
logger.info(f"Rehearsing components: {rehearse_components}") logger.info(f"Rehearsing components: {rehearsal_components}")
# Make sure that listeners are defined before initializing further # Make sure that listeners are defined before initializing further
nlp._link_components() nlp._link_components()
with nlp.select_pipes(disable=[*frozen_components, *resume_components]): with nlp.select_pipes(disable=[*frozen_components, *resume_components]):

View File

@ -78,7 +78,7 @@ def train(
# Components that should set annotations on update # Components that should set annotations on update
annotating_components = T["annotating_components"] annotating_components = T["annotating_components"]
# Components that should be rehearsed after update # Components that should be rehearsed after update
rehearse_components = T["rehearse_components"] rehearsal_components = T["rehearsal_components"]
# Create iterator, which yields out info after each optimization step. # Create iterator, which yields out info after each optimization step.
training_step_iterator = train_while_improving( training_step_iterator = train_while_improving(
nlp, nlp,
@ -92,7 +92,7 @@ def train(
eval_frequency=T["eval_frequency"], eval_frequency=T["eval_frequency"],
exclude=frozen_components, exclude=frozen_components,
annotating_components=annotating_components, annotating_components=annotating_components,
rehearse_components=rehearse_components, rehearsal_components=rehearsal_components,
before_update=before_update, before_update=before_update,
) )
clean_output_dir(output_path) clean_output_dir(output_path)
@ -155,7 +155,7 @@ def train_while_improving(
max_steps: int, max_steps: int,
exclude: List[str], exclude: List[str],
annotating_components: List[str], annotating_components: List[str],
rehearse_components: List[str], rehearsal_components: List[str],
before_update: Optional[Callable[["Language", Dict[str, Any]], None]], before_update: Optional[Callable[["Language", Dict[str, Any]], None]],
): ):
"""Train until an evaluation stops improving. Works as a generator, """Train until an evaluation stops improving. Works as a generator,
@ -223,7 +223,7 @@ def train_while_improving(
losses=losses, losses=losses,
sgd=None, sgd=None,
exclude=exclude, exclude=exclude,
rehearse_components=rehearse_components, rehearsal_components=rehearsal_components,
) )
# TODO: refactor this so we don't have to run it separately in here # TODO: refactor this so we don't have to run it separately in here
for name, proc in nlp.pipeline: for name, proc in nlp.pipeline:

View File

@ -182,7 +182,7 @@ This section defines settings and controls for the training and evaluation
process that are used when you run [`spacy train`](/api/cli#train). process that are used when you run [`spacy train`](/api/cli#train).
| Name | Description | | Name | Description |
| ---------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `accumulate_gradient` | Whether to divide the batch up into substeps. Defaults to `1`. ~~int~~ | | `accumulate_gradient` | Whether to divide the batch up into substeps. Defaults to `1`. ~~int~~ |
| `batcher` | Callable that takes an iterator of [`Doc`](/api/doc) objects and yields batches of `Doc`s. Defaults to [`batch_by_words`](/api/top-level#batch_by_words). ~~Callable[[Iterator[Doc], Iterator[List[Doc]]]]~~ | | `batcher` | Callable that takes an iterator of [`Doc`](/api/doc) objects and yields batches of `Doc`s. Defaults to [`batch_by_words`](/api/top-level#batch_by_words). ~~Callable[[Iterator[Doc], Iterator[List[Doc]]]]~~ |
| `before_to_disk` | Optional callback to modify `nlp` object right before it is saved to disk during and after training. Can be used to remove or reset config values or disable components. Defaults to `null`. ~~Optional[Callable[[Language], Language]]~~ | | `before_to_disk` | Optional callback to modify `nlp` object right before it is saved to disk during and after training. Can be used to remove or reset config values or disable components. Defaults to `null`. ~~Optional[Callable[[Language], Language]]~~ |
@ -192,7 +192,7 @@ process that are used when you run [`spacy train`](/api/cli#train).
| `eval_frequency` | How often to evaluate during training (steps). Defaults to `200`. ~~int~~ | | `eval_frequency` | How often to evaluate during training (steps). Defaults to `200`. ~~int~~ |
| `frozen_components` | Pipeline component names that are "frozen" and shouldn't be initialized or updated during training. See [here](/usage/training#config-components) for details. Defaults to `[]`. ~~List[str]~~ | | `frozen_components` | Pipeline component names that are "frozen" and shouldn't be initialized or updated during training. See [here](/usage/training#config-components) for details. Defaults to `[]`. ~~List[str]~~ |
| `annotating_components` <Tag variant="new">3.1</Tag> | Pipeline component names that should set annotations on the predicted docs during training. See [here](/usage/training#annotating-components) for details. Defaults to `[]`. ~~List[str]~~ | | `annotating_components` <Tag variant="new">3.1</Tag> | Pipeline component names that should set annotations on the predicted docs during training. See [here](/usage/training#annotating-components) for details. Defaults to `[]`. ~~List[str]~~ |
| `rehearse_components` <Tag variant="new">3.5.1</Tag> | Pipeline component names that should get rehearsed during training. See [here](/usage/training#rehearse-components) for details. Defaults to `[]`. ~~List[str]~~ | | `rehearsal_components` <Tag variant="new">3.5.1</Tag> | Pipeline component names that should get rehearsed during training. See [here](/usage/training#rehearse-components) for details. Defaults to `[]`. ~~List[str]~~ |
| `gpu_allocator` | Library for cupy to route GPU memory allocation to. Can be `"pytorch"` or `"tensorflow"`. Defaults to variable `${system.gpu_allocator}`. ~~str~~ | | `gpu_allocator` | Library for cupy to route GPU memory allocation to. Can be `"pytorch"` or `"tensorflow"`. Defaults to variable `${system.gpu_allocator}`. ~~str~~ |
| `logger` | Callable that takes the `nlp` and stdout and stderr `IO` objects, sets up the logger, and returns two new callables to log a training step and to finalize the logger. Defaults to [`ConsoleLogger`](/api/top-level#ConsoleLogger). ~~Callable[[Language, IO, IO], [Tuple[Callable[[Dict[str, Any]], None], Callable[[], None]]]]~~ | | `logger` | Callable that takes the `nlp` and stdout and stderr `IO` objects, sets up the logger, and returns two new callables to log a training step and to finalize the logger. Defaults to [`ConsoleLogger`](/api/top-level#ConsoleLogger). ~~Callable[[Language, IO, IO], [Tuple[Callable[[Dict[str, Any]], None], Callable[[], None]]]]~~ |
| `max_epochs` | Maximum number of epochs to train for. `0` means an unlimited number of epochs. `-1` means that the train corpus should be streamed rather than loaded into memory with no shuffling within the training loop. Defaults to `0`. ~~int~~ | | `max_epochs` | Maximum number of epochs to train for. `0` means an unlimited number of epochs. `-1` means that the train corpus should be streamed rather than loaded into memory with no shuffling within the training loop. Defaults to `0`. ~~int~~ |

View File

@ -346,7 +346,14 @@ and custom registered functions if needed. See the
Perform a "rehearsal" update from a batch of data. Rehearsal updates teach the Perform a "rehearsal" update from a batch of data. Rehearsal updates teach the
current model to make predictions similar to an initial model, to try to address current model to make predictions similar to an initial model, to try to address
the "catastrophic forgetting" problem. Please note that `Language.rehearse` needs to be used together with `Language.update`. This feature is experimental. the "catastrophic forgetting" problem.
<Infobox variant="warning">
Note that `Language.rehearse` needs to be used together with `Language.update`.
This feature is experimental.
</Infobox>
> #### Example > #### Example
> >

View File

@ -244,7 +244,14 @@ predictions and gold-standard annotations, and update the component's model.
Perform a "rehearsal" update from a batch of data. Rehearsal updates teach the Perform a "rehearsal" update from a batch of data. Rehearsal updates teach the
current model to make predictions similar to an initial model, to try to address current model to make predictions similar to an initial model, to try to address
the "catastrophic forgetting" problem. Please note that `TrainablePipe.update` needs to be used together with `TrainablePipe.update`. This feature is experimental. the "catastrophic forgetting" problem.
<Infobox variant="warning">
Note that `TrainablePipe.rehearse` needs to be used together with
`TrainablePipe.update`. This feature is experimental.
</Infobox>
> #### Example > #### Example
> >

View File

@ -577,7 +577,15 @@ now-updated model to the predicted docs.
### Using rehearsing to address catastrophic forgetting {id="rehearse-components", tag="experimental", version="3.5.1"} ### Using rehearsing to address catastrophic forgetting {id="rehearse-components", tag="experimental", version="3.5.1"}
Perform “rehearsal” updates to pre-trained components. Rehearsal updates teach the current component to make predictions similar to an initial model, to try to address the “catastrophic forgetting” problem. This feature is experimental. When fine-tuning pre-trained components, we can perform an additional
`rehearsal` update after every regular update to address the problem of
[`catastrophic forgetting`](https://explosion.ai/blog/pseudo-rehearsal-catastrophic-forgetting).
These updates teach the fine-tuned component to make predictions similar to its
initial, pre-trained version.
Perform “rehearsal” updates to pre-trained components. Rehearsal updates teach
the current component to make predictions similar to an initial model, to try to
address the “catastrophic forgetting” problem. This feature is experimental.
```ini {title="config.cfg (excerpt)"} ```ini {title="config.cfg (excerpt)"}
[nlp] [nlp]
@ -587,17 +595,9 @@ pipeline = ["sentencizer", "ner", "entity_linker"]
source = "en_core_web_sm" source = "en_core_web_sm"
[training] [training]
rehearse_components = ["ner"] rehearsal_components = ["ner"]
``` ```
<Infobox variant="warning" title="Loss calculation" id="rehearse-components-loss">
Be aware that the loss is calculated by the sum of both the `update` and `rehearse` function.
If both the loss and accuracy of the component increases over time, it can be caused due to the trained component making more different predictions that the inital model,
indicating `catastrophic forgetting`.
</Infobox>
### Using registered functions {id="config-functions"} ### Using registered functions {id="config-functions"}
The training configuration defined in the config file doesn't have to only The training configuration defined in the config file doesn't have to only