mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Rename activations
- "probs" -> "probabilities" - "guesses" -> "label_ids", except in the edit tree lemmatizer, where "guesses" -> "tree_ids".
This commit is contained in:
		
							parent
							
								
									6f80e80305
								
							
						
					
					
						commit
						cd6e4fa8f4
					
				|  | @ -165,12 +165,12 @@ class EditTreeLemmatizer(TrainablePipe): | |||
|                 self.model.ops.alloc((0, n_labels), dtype="i") for doc in docs | ||||
|             ] | ||||
|             assert len(guesses) == n_docs | ||||
|             return {"probs": scores, "guesses": guesses} | ||||
|             return {"probabilities": scores, "tree_ids": guesses} | ||||
|         scores = self.model.predict(docs) | ||||
|         assert len(scores) == n_docs | ||||
|         guesses = self._scores2guesses(docs, scores) | ||||
|         assert len(guesses) == n_docs | ||||
|         return {"probs": scores, "guesses": guesses} | ||||
|         return {"probabilities": scores, "tree_ids": guesses} | ||||
| 
 | ||||
|     def _scores2guesses(self, docs, scores): | ||||
|         guesses = [] | ||||
|  | @ -199,7 +199,7 @@ class EditTreeLemmatizer(TrainablePipe): | |||
|         return guesses | ||||
| 
 | ||||
|     def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT): | ||||
|         batch_tree_ids = activations["guesses"] | ||||
|         batch_tree_ids = activations["tree_ids"] | ||||
|         for i, doc in enumerate(docs): | ||||
|             if self.save_activations: | ||||
|                 doc.activations[self.name] = {} | ||||
|  |  | |||
|  | @ -237,7 +237,7 @@ class Morphologizer(Tagger): | |||
| 
 | ||||
|         DOCS: https://spacy.io/api/morphologizer#set_annotations | ||||
|         """ | ||||
|         batch_tag_ids = activations["guesses"] | ||||
|         batch_tag_ids = activations["label_ids"] | ||||
|         if isinstance(docs, Doc): | ||||
|             docs = [docs] | ||||
|         cdef Doc doc | ||||
|  |  | |||
|  | @ -129,7 +129,7 @@ class SentenceRecognizer(Tagger): | |||
| 
 | ||||
|         DOCS: https://spacy.io/api/sentencerecognizer#set_annotations | ||||
|         """ | ||||
|         batch_tag_ids = activations["guesses"] | ||||
|         batch_tag_ids = activations["label_ids"] | ||||
|         if isinstance(docs, Doc): | ||||
|             docs = [docs] | ||||
|         cdef Doc doc | ||||
|  |  | |||
|  | @ -153,12 +153,12 @@ class Tagger(TrainablePipe): | |||
|             n_labels = len(self.labels) | ||||
|             guesses = [self.model.ops.alloc((0, n_labels)) for doc in docs] | ||||
|             assert len(guesses) == len(docs) | ||||
|             return {"probs": guesses, "guesses": guesses} | ||||
|             return {"probabilities": guesses, "label_ids": guesses} | ||||
|         scores = self.model.predict(docs) | ||||
|         assert len(scores) == len(docs), (len(scores), len(docs)) | ||||
|         guesses = self._scores2guesses(scores) | ||||
|         assert len(guesses) == len(docs) | ||||
|         return {"probs": scores, "guesses": guesses} | ||||
|         return {"probabilities": scores, "label_ids": guesses} | ||||
| 
 | ||||
|     def _scores2guesses(self, scores): | ||||
|         guesses = [] | ||||
|  | @ -177,7 +177,7 @@ class Tagger(TrainablePipe): | |||
| 
 | ||||
|         DOCS: https://spacy.io/api/tagger#set_annotations | ||||
|         """ | ||||
|         batch_tag_ids = activations["guesses"] | ||||
|         batch_tag_ids = activations["label_ids"] | ||||
|         if isinstance(docs, Doc): | ||||
|             docs = [docs] | ||||
|         cdef Doc doc | ||||
|  |  | |||
|  | @ -209,10 +209,10 @@ class TextCategorizer(TrainablePipe): | |||
|             tensors = [doc.tensor for doc in docs] | ||||
|             xp = self.model.ops.xp | ||||
|             scores = xp.zeros((len(list(docs)), len(self.labels))) | ||||
|             return {"probs": scores} | ||||
|             return {"probabilities": scores} | ||||
|         scores = self.model.predict(docs) | ||||
|         scores = self.model.ops.asarray(scores) | ||||
|         return {"probs": scores} | ||||
|         return {"probabilities": scores} | ||||
| 
 | ||||
|     def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT) -> None: | ||||
|         """Modify a batch of Doc objects, using pre-computed scores. | ||||
|  | @ -222,11 +222,11 @@ class TextCategorizer(TrainablePipe): | |||
| 
 | ||||
|         DOCS: https://spacy.io/api/textcategorizer#set_annotations | ||||
|         """ | ||||
|         probs = activations["probs"] | ||||
|         probs = activations["probabilities"] | ||||
|         for i, doc in enumerate(docs): | ||||
|             if self.save_activations: | ||||
|                 doc.activations[self.name] = {} | ||||
|                 doc.activations[self.name]["probs"] = probs[i] | ||||
|                 doc.activations[self.name]["probabilities"] = probs[i] | ||||
|             for j, label in enumerate(self.labels): | ||||
|                 doc.cats[label] = float(probs[i, j]) | ||||
| 
 | ||||
|  |  | |||
|  | @ -297,6 +297,9 @@ def test_save_activations(): | |||
| 
 | ||||
|     lemmatizer.save_activations = True | ||||
|     doc = nlp("This is a test.") | ||||
|     assert list(doc.activations["trainable_lemmatizer"].keys()) == ["probs", "guesses"] | ||||
|     assert doc.activations["trainable_lemmatizer"]["probs"].shape == (5, nO) | ||||
|     assert doc.activations["trainable_lemmatizer"]["guesses"].shape == (5,) | ||||
|     assert list(doc.activations["trainable_lemmatizer"].keys()) == [ | ||||
|         "probabilities", | ||||
|         "tree_ids", | ||||
|     ] | ||||
|     assert doc.activations["trainable_lemmatizer"]["probabilities"].shape == (5, nO) | ||||
|     assert doc.activations["trainable_lemmatizer"]["tree_ids"].shape == (5,) | ||||
|  |  | |||
|  | @ -215,6 +215,9 @@ def test_save_activations(): | |||
|     morphologizer.save_activations = True | ||||
|     doc = nlp("This is a test.") | ||||
|     assert "morphologizer" in doc.activations | ||||
|     assert set(doc.activations["morphologizer"].keys()) == {"guesses", "probs"} | ||||
|     assert doc.activations["morphologizer"]["probs"].shape == (5, 6) | ||||
|     assert doc.activations["morphologizer"]["guesses"].shape == (5,) | ||||
|     assert set(doc.activations["morphologizer"].keys()) == { | ||||
|         "label_ids", | ||||
|         "probabilities", | ||||
|     } | ||||
|     assert doc.activations["morphologizer"]["probabilities"].shape == (5, 6) | ||||
|     assert doc.activations["morphologizer"]["label_ids"].shape == (5,) | ||||
|  |  | |||
|  | @ -123,6 +123,6 @@ def test_save_activations(): | |||
|     senter.save_activations = True | ||||
|     doc = nlp("This is a test.") | ||||
|     assert "senter" in doc.activations | ||||
|     assert set(doc.activations["senter"].keys()) == {"guesses", "probs"} | ||||
|     assert doc.activations["senter"]["probs"].shape == (5, nO) | ||||
|     assert doc.activations["senter"]["guesses"].shape == (5,) | ||||
|     assert set(doc.activations["senter"].keys()) == {"label_ids", "probabilities"} | ||||
|     assert doc.activations["senter"]["probabilities"].shape == (5, nO) | ||||
|     assert doc.activations["senter"]["label_ids"].shape == (5,) | ||||
|  |  | |||
|  | @ -228,9 +228,9 @@ def test_save_activations(): | |||
|     tagger.save_activations = True | ||||
|     doc = nlp("This is a test.") | ||||
|     assert "tagger" in doc.activations | ||||
|     assert set(doc.activations["tagger"].keys()) == {"guesses", "probs"} | ||||
|     assert doc.activations["tagger"]["probs"].shape == (5, len(TAGS)) | ||||
|     assert doc.activations["tagger"]["guesses"].shape == (5,) | ||||
|     assert set(doc.activations["tagger"].keys()) == {"label_ids", "probabilities"} | ||||
|     assert doc.activations["tagger"]["probabilities"].shape == (5, len(TAGS)) | ||||
|     assert doc.activations["tagger"]["label_ids"].shape == (5,) | ||||
| 
 | ||||
| 
 | ||||
| def test_tagger_requires_labels(): | ||||
|  |  | |||
|  | @ -286,7 +286,7 @@ def test_issue9904(): | |||
|     nlp.initialize(get_examples) | ||||
| 
 | ||||
|     examples = get_examples() | ||||
|     scores = textcat.predict([eg.predicted for eg in examples])["probs"] | ||||
|     scores = textcat.predict([eg.predicted for eg in examples])["probabilities"] | ||||
| 
 | ||||
|     loss = textcat.get_loss(examples, scores)[0] | ||||
|     loss_double_bs = textcat.get_loss(examples * 2, scores.repeat(2, axis=0))[0] | ||||
|  | @ -890,8 +890,8 @@ def test_save_activations(): | |||
| 
 | ||||
|     textcat.save_activations = True | ||||
|     doc = nlp("This is a test.") | ||||
|     assert list(doc.activations["textcat"].keys()) == ["probs"] | ||||
|     assert doc.activations["textcat"]["probs"].shape == (nO,) | ||||
|     assert list(doc.activations["textcat"].keys()) == ["probabilities"] | ||||
|     assert doc.activations["textcat"]["probabilities"].shape == (nO,) | ||||
| 
 | ||||
| 
 | ||||
| def test_save_activations_multi(): | ||||
|  | @ -910,5 +910,5 @@ def test_save_activations_multi(): | |||
| 
 | ||||
|     textcat.save_activations = True | ||||
|     doc = nlp("This is a test.") | ||||
|     assert list(doc.activations["textcat_multilabel"].keys()) == ["probs"] | ||||
|     assert doc.activations["textcat_multilabel"]["probs"].shape == (nO,) | ||||
|     assert list(doc.activations["textcat_multilabel"].keys()) == ["probabilities"] | ||||
|     assert doc.activations["textcat_multilabel"]["probabilities"].shape == (nO,) | ||||
|  |  | |||
|  | @ -52,7 +52,7 @@ architectures and their arguments and hyperparameters. | |||
| | `overwrite`                                     | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                                                                          | | ||||
| | `top_k`                                         | The number of most probable edit trees to try before resorting to `backoff`. Defaults to `1`. ~~int~~                                                                                                                                                                                                              | | ||||
| | `scorer`                                        | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~                                                                                                                                                                      | | ||||
| | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~                                                                                                                                                                                             | | ||||
| | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"tree_ids"`. ~~Union[bool, list[str]]~~                                                                                                                                                                                    | | ||||
| 
 | ||||
| ```python | ||||
| %%GITHUB_SPACY/spacy/pipeline/edit_tree_lemmatizer.py | ||||
|  |  | |||
|  | @ -48,7 +48,7 @@ architectures and their arguments and hyperparameters. | |||
| | `overwrite` <Tag variant="new">3.2</Tag>        | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                  | | ||||
| | `extend` <Tag variant="new">3.2</Tag>           | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~                                                                                                                      | | ||||
| | `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ | | ||||
| | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~                                                                                                                                                 | | ||||
| | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~                                                                                                                                       | | ||||
| 
 | ||||
| ```python | ||||
| %%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx | ||||
|  |  | |||
|  | @ -44,7 +44,7 @@ architectures and their arguments and hyperparameters. | |||
| | `model`                                         | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ | | ||||
| | `overwrite` <Tag variant="new">3.2</Tag>        | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~                                                                                             | | ||||
| | `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~                                   | | ||||
| | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~                                                | | ||||
| | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~                                      | | ||||
| 
 | ||||
| ```python | ||||
| %%GITHUB_SPACY/spacy/pipeline/senter.pyx | ||||
|  |  | |||
|  | @ -46,7 +46,7 @@ architectures and their arguments and hyperparameters. | |||
| | `overwrite` <Tag variant="new">3.2</Tag>        | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                                                              | | ||||
| | `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~                                                                                                                                                            | | ||||
| | `neg_prefix` <Tag variant="new">3.2.1</Tag>     | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~                                                                                                                                                              | | ||||
| | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~                                                                                                                                                                                 | | ||||
| | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~                                                                                                                                                                       | | ||||
| 
 | ||||
| ```python | ||||
| %%GITHUB_SPACY/spacy/pipeline/tagger.pyx | ||||
|  |  | |||
|  | @ -125,7 +125,7 @@ shortcut for this and instantiate the component using its string name and | |||
| | _keyword-only_                                  |                                                                                                                                  | | ||||
| | `threshold`                                     | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~                                   | | ||||
| | `scorer`                                        | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ | | ||||
| | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. The supported activations is `"probs"`. ~~Union[bool, list[str]]~~                    | | ||||
| | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. The supported activations is `"probabilities"`. ~~Union[bool, list[str]]~~            | | ||||
| 
 | ||||
| ## TextCategorizer.\_\_call\_\_ {#call tag="method"} | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user