From 8c1d86ea9262adb6ed4cd73da8e7baa5748eddc2 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Thu, 26 Aug 2021 09:50:35 +0200
Subject: [PATCH] Document use-case of freezing tok2vec (#8992)

* update error msg

* add sentence to docs

* expand note on frozen components
---
 spacy/errors.py                | 12 +++++-------
 spacy/training/initialize.py   |  3 ++-
 website/docs/usage/training.md | 25 ++++++++++++++++---------
 3 files changed, 23 insertions(+), 17 deletions(-)
diff --git a/spacy/errors.py b/spacy/errors.py
index 36331fe15..a206826ff 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -116,13 +116,11 @@ class Warnings:
 
     # New warnings added in v3.x
     W086 = ("Component '{listener}' will be (re)trained, but it needs the component "
-            "'{name}' which is frozen. You can either freeze both, or neither "
-            "of the two. If you're sourcing the component from "
-            "an existing pipeline, you can use the `replace_listeners` setting in "
-            "the config block to replace its token-to-vector listener with a copy "
-            "and make it independent. For example, `replace_listeners = "
-            "[\"model.tok2vec\"]` See the documentation for details: "
-            "https://spacy.io/usage/training#config-components-listeners")
+            "'{name}' which is frozen. If you want to prevent retraining '{name}' "
+            "but want to train '{listener}' on top of it, you should add '{name}' to the "
+            "list of 'annotating_components' in the 'training' block in the config. "
+            "See the documentation for details: "
+            "https://spacy.io/usage/training#annotating-components")
     W087 = ("Component '{name}' will be (re)trained, but the component '{listener}' "
             "depends on it via a listener and is frozen. This means that the "
             "performance of '{listener}' will be degraded. You can either freeze "
diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py
index 04d030964..bd014f75f 100644
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@@ -95,7 +95,8 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
                 logger.warning(Warnings.W087.format(name=name, listener=listener))
             # We always check this regardless, in case user freezes tok2vec
             if listener not in frozen_components and name in frozen_components:
-                logger.warning(Warnings.W086.format(name=name, listener=listener))
+                if name not in T["annotating_components"]:
+                    logger.warning(Warnings.W086.format(name=name, listener=listener))
     return nlp
 
 
diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md
index 0fe34f2a2..94fdad209 100644
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@@ -480,7 +480,10 @@ as-is. They are also excluded when calling
 > still impact your model's performance – for instance, a sentence boundary
 > detector can impact what the parser or entity recognizer considers a valid
 > parse. So the evaluation results should always reflect what your pipeline will
-> produce at runtime.
+> produce at runtime. If you want a frozen component to run (without updating)
+> during training as well, so that downstream components can use its
+> **predictions**, you can add it to the list of
+> [`annotating_components`](/usage/training#annotating-components).
 
 ```ini
 [nlp]
@@ -567,6 +570,10 @@ frozen_components = ["ner"]
 annotating_components = ["sentencizer", "ner"]
 ```
 
+Similarly, a pretrained `tok2vec` layer can be frozen and specified in the list
+of `annotating_components` to ensure that a downstream component can use the
+embedding layer without updating it.
+
 <Infobox variant="warning" title="Training speed with annotating components" id="annotating-components-speed">
 
 Be aware that non-frozen annotating components with statistical models will
@@ -699,14 +706,14 @@ excluded from the logs and the score won't be weighted.
 
 <Accordion title="Understanding the training output and score types" spaced id="score-types">
 
-| Name                       | Description                                                                                                             |
-| -------------------------- | ----------------------------------------------------------------------------------------------------------------------- |
-| **Loss**                   | The training loss representing the amount of work left for the optimizer. Should decrease, but usually not to `0`.      |
-| **Precision** (P)          | Percentage of predicted annotations that were correct. Should increase.                                                 |
-| **Recall** (R)             | Percentage of reference annotations recovered. Should increase.                                                         |
-| **F-Score** (F)            | Harmonic mean of precision and recall. Should increase.                                                                 |
-| **UAS** / **LAS**          | Unlabeled and labeled attachment score for the dependency parser, i.e. the percentage of correct arcs. Should increase. |
-| **Speed** | Prediction speed in words per second (WPS). Should stay stable.                                                               |
+| Name              | Description                                                                                                             |
+| ----------------- | ----------------------------------------------------------------------------------------------------------------------- |
+| **Loss**          | The training loss representing the amount of work left for the optimizer. Should decrease, but usually not to `0`.      |
+| **Precision** (P) | Percentage of predicted annotations that were correct. Should increase.                                                 |
+| **Recall** (R)    | Percentage of reference annotations recovered. Should increase.                                                         |
+| **F-Score** (F)   | Harmonic mean of precision and recall. Should increase.                                                                 |
+| **UAS** / **LAS** | Unlabeled and labeled attachment score for the dependency parser, i.e. the percentage of correct arcs. Should increase. |
+| **Speed**         | Prediction speed in words per second (WPS). Should stay stable.                                                         |
 
 Note that if the development data has raw text, some of the gold-standard
 entities might not align to the predicted tokenization. These tokenization