mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Document use-case of freezing tok2vec (#8992)
* update error msg * add sentence to docs * expand note on frozen components
This commit is contained in:
		
							parent
							
								
									31c0a75e6d
								
							
						
					
					
						commit
						8c1d86ea92
					
				| 
						 | 
					@ -116,13 +116,11 @@ class Warnings:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # New warnings added in v3.x
 | 
					    # New warnings added in v3.x
 | 
				
			||||||
    W086 = ("Component '{listener}' will be (re)trained, but it needs the component "
 | 
					    W086 = ("Component '{listener}' will be (re)trained, but it needs the component "
 | 
				
			||||||
            "'{name}' which is frozen. You can either freeze both, or neither "
 | 
					            "'{name}' which is frozen. If you want to prevent retraining '{name}' "
 | 
				
			||||||
            "of the two. If you're sourcing the component from "
 | 
					            "but want to train '{listener}' on top of it, you should add '{name}' to the "
 | 
				
			||||||
            "an existing pipeline, you can use the `replace_listeners` setting in "
 | 
					            "list of 'annotating_components' in the 'training' block in the config. "
 | 
				
			||||||
            "the config block to replace its token-to-vector listener with a copy "
 | 
					            "See the documentation for details: "
 | 
				
			||||||
            "and make it independent. For example, `replace_listeners = "
 | 
					            "https://spacy.io/usage/training#annotating-components")
 | 
				
			||||||
            "[\"model.tok2vec\"]` See the documentation for details: "
 | 
					 | 
				
			||||||
            "https://spacy.io/usage/training#config-components-listeners")
 | 
					 | 
				
			||||||
    W087 = ("Component '{name}' will be (re)trained, but the component '{listener}' "
 | 
					    W087 = ("Component '{name}' will be (re)trained, but the component '{listener}' "
 | 
				
			||||||
            "depends on it via a listener and is frozen. This means that the "
 | 
					            "depends on it via a listener and is frozen. This means that the "
 | 
				
			||||||
            "performance of '{listener}' will be degraded. You can either freeze "
 | 
					            "performance of '{listener}' will be degraded. You can either freeze "
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -95,6 +95,7 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
 | 
				
			||||||
                logger.warning(Warnings.W087.format(name=name, listener=listener))
 | 
					                logger.warning(Warnings.W087.format(name=name, listener=listener))
 | 
				
			||||||
            # We always check this regardless, in case user freezes tok2vec
 | 
					            # We always check this regardless, in case user freezes tok2vec
 | 
				
			||||||
            if listener not in frozen_components and name in frozen_components:
 | 
					            if listener not in frozen_components and name in frozen_components:
 | 
				
			||||||
 | 
					                if name not in T["annotating_components"]:
 | 
				
			||||||
                    logger.warning(Warnings.W086.format(name=name, listener=listener))
 | 
					                    logger.warning(Warnings.W086.format(name=name, listener=listener))
 | 
				
			||||||
    return nlp
 | 
					    return nlp
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -480,7 +480,10 @@ as-is. They are also excluded when calling
 | 
				
			||||||
> still impact your model's performance – for instance, a sentence boundary
 | 
					> still impact your model's performance – for instance, a sentence boundary
 | 
				
			||||||
> detector can impact what the parser or entity recognizer considers a valid
 | 
					> detector can impact what the parser or entity recognizer considers a valid
 | 
				
			||||||
> parse. So the evaluation results should always reflect what your pipeline will
 | 
					> parse. So the evaluation results should always reflect what your pipeline will
 | 
				
			||||||
> produce at runtime.
 | 
					> produce at runtime. If you want a frozen component to run (without updating)
 | 
				
			||||||
 | 
					> during training as well, so that downstream components can use its
 | 
				
			||||||
 | 
					> **predictions**, you can add it to the list of
 | 
				
			||||||
 | 
					> [`annotating_components`](/usage/training#annotating-components).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```ini
 | 
					```ini
 | 
				
			||||||
[nlp]
 | 
					[nlp]
 | 
				
			||||||
| 
						 | 
					@ -567,6 +570,10 @@ frozen_components = ["ner"]
 | 
				
			||||||
annotating_components = ["sentencizer", "ner"]
 | 
					annotating_components = ["sentencizer", "ner"]
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Similarly, a pretrained `tok2vec` layer can be frozen and specified in the list
 | 
				
			||||||
 | 
					of `annotating_components` to ensure that a downstream component can use the
 | 
				
			||||||
 | 
					embedding layer without updating it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
<Infobox variant="warning" title="Training speed with annotating components" id="annotating-components-speed">
 | 
					<Infobox variant="warning" title="Training speed with annotating components" id="annotating-components-speed">
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Be aware that non-frozen annotating components with statistical models will
 | 
					Be aware that non-frozen annotating components with statistical models will
 | 
				
			||||||
| 
						 | 
					@ -700,7 +707,7 @@ excluded from the logs and the score won't be weighted.
 | 
				
			||||||
<Accordion title="Understanding the training output and score types" spaced id="score-types">
 | 
					<Accordion title="Understanding the training output and score types" spaced id="score-types">
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| Name              | Description                                                                                                             |
 | 
					| Name              | Description                                                                                                             |
 | 
				
			||||||
| -------------------------- | ----------------------------------------------------------------------------------------------------------------------- |
 | 
					| ----------------- | ----------------------------------------------------------------------------------------------------------------------- |
 | 
				
			||||||
| **Loss**          | The training loss representing the amount of work left for the optimizer. Should decrease, but usually not to `0`.      |
 | 
					| **Loss**          | The training loss representing the amount of work left for the optimizer. Should decrease, but usually not to `0`.      |
 | 
				
			||||||
| **Precision** (P) | Percentage of predicted annotations that were correct. Should increase.                                                 |
 | 
					| **Precision** (P) | Percentage of predicted annotations that were correct. Should increase.                                                 |
 | 
				
			||||||
| **Recall** (R)    | Percentage of reference annotations recovered. Should increase.                                                         |
 | 
					| **Recall** (R)    | Percentage of reference annotations recovered. Should increase.                                                         |
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user