From cd6e4fa8f45d52a9bf37e326f3f5d9e612ff2009 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= <me@danieldk.eu>
Date: Wed, 31 Aug 2022 11:18:40 +0200
Subject: [PATCH] Rename activations

- "probs" -> "probabilities"
- "guesses" -> "label_ids", except in the edit tree lemmatizer, where
  "guesses" -> "tree_ids".
---
 spacy/pipeline/edit_tree_lemmatizer.py            |  6 +++---
 spacy/pipeline/morphologizer.pyx                  |  2 +-
 spacy/pipeline/senter.pyx                         |  2 +-
 spacy/pipeline/tagger.pyx                         |  6 +++---
 spacy/pipeline/textcat.py                         |  8 ++++----
 spacy/tests/pipeline/test_edit_tree_lemmatizer.py |  9 ++++++---
 spacy/tests/pipeline/test_morphologizer.py        |  9 ++++++---
 spacy/tests/pipeline/test_senter.py               |  6 +++---
 spacy/tests/pipeline/test_tagger.py               |  6 +++---
 spacy/tests/pipeline/test_textcat.py              | 10 +++++-----
 website/docs/api/edittreelemmatizer.md            |  2 +-
 website/docs/api/morphologizer.md                 |  2 +-
 website/docs/api/sentencerecognizer.md            |  2 +-
 website/docs/api/tagger.md                        |  2 +-
 website/docs/api/textcategorizer.md               |  2 +-
 15 files changed, 40 insertions(+), 34 deletions(-)

diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index 2d14a5285..37aa9663b 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -165,12 +165,12 @@ class EditTreeLemmatizer(TrainablePipe):
                 self.model.ops.alloc((0, n_labels), dtype="i") for doc in docs
             ]
             assert len(guesses) == n_docs
-            return {"probs": scores, "guesses": guesses}
+            return {"probabilities": scores, "tree_ids": guesses}
         scores = self.model.predict(docs)
         assert len(scores) == n_docs
         guesses = self._scores2guesses(docs, scores)
         assert len(guesses) == n_docs
-        return {"probs": scores, "guesses": guesses}
+        return {"probabilities": scores, "tree_ids": guesses}
 
     def _scores2guesses(self, docs, scores):
         guesses = []
@@ -199,7 +199,7 @@ class EditTreeLemmatizer(TrainablePipe):
         return guesses
 
     def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT):
-        batch_tree_ids = activations["guesses"]
+        batch_tree_ids = activations["tree_ids"]
         for i, doc in enumerate(docs):
             if self.save_activations:
                 doc.activations[self.name] = {}
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index d256cbf0b..6786f4539 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -237,7 +237,7 @@ class Morphologizer(Tagger):
 
         DOCS: https://spacy.io/api/morphologizer#set_annotations
         """
-        batch_tag_ids = activations["guesses"]
+        batch_tag_ids = activations["label_ids"]
         if isinstance(docs, Doc):
             docs = [docs]
         cdef Doc doc
diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx
index ee6ee9ff9..cb1979b11 100644
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@@ -129,7 +129,7 @@ class SentenceRecognizer(Tagger):
 
         DOCS: https://spacy.io/api/sentencerecognizer#set_annotations
         """
-        batch_tag_ids = activations["guesses"]
+        batch_tag_ids = activations["label_ids"]
         if isinstance(docs, Doc):
             docs = [docs]
         cdef Doc doc
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index 23c7828fd..ab369c58b 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -153,12 +153,12 @@ class Tagger(TrainablePipe):
             n_labels = len(self.labels)
             guesses = [self.model.ops.alloc((0, n_labels)) for doc in docs]
             assert len(guesses) == len(docs)
-            return {"probs": guesses, "guesses": guesses}
+            return {"probabilities": guesses, "label_ids": guesses}
         scores = self.model.predict(docs)
         assert len(scores) == len(docs), (len(scores), len(docs))
         guesses = self._scores2guesses(scores)
         assert len(guesses) == len(docs)
-        return {"probs": scores, "guesses": guesses}
+        return {"probabilities": scores, "label_ids": guesses}
 
     def _scores2guesses(self, scores):
         guesses = []
@@ -177,7 +177,7 @@ class Tagger(TrainablePipe):
 
         DOCS: https://spacy.io/api/tagger#set_annotations
         """
-        batch_tag_ids = activations["guesses"]
+        batch_tag_ids = activations["label_ids"]
         if isinstance(docs, Doc):
             docs = [docs]
         cdef Doc doc
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index afce29e6f..506cdb61c 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -209,10 +209,10 @@ class TextCategorizer(TrainablePipe):
             tensors = [doc.tensor for doc in docs]
             xp = self.model.ops.xp
             scores = xp.zeros((len(list(docs)), len(self.labels)))
-            return {"probs": scores}
+            return {"probabilities": scores}
         scores = self.model.predict(docs)
         scores = self.model.ops.asarray(scores)
-        return {"probs": scores}
+        return {"probabilities": scores}
 
     def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT) -> None:
         """Modify a batch of Doc objects, using pre-computed scores.
@@ -222,11 +222,11 @@ class TextCategorizer(TrainablePipe):
 
         DOCS: https://spacy.io/api/textcategorizer#set_annotations
         """
-        probs = activations["probs"]
+        probs = activations["probabilities"]
         for i, doc in enumerate(docs):
             if self.save_activations:
                 doc.activations[self.name] = {}
-                doc.activations[self.name]["probs"] = probs[i]
+                doc.activations[self.name]["probabilities"] = probs[i]
             for j, label in enumerate(self.labels):
                 doc.cats[label] = float(probs[i, j])
 
diff --git a/spacy/tests/pipeline/test_edit_tree_lemmatizer.py b/spacy/tests/pipeline/test_edit_tree_lemmatizer.py
index d00f9e622..ad2e56729 100644
--- a/spacy/tests/pipeline/test_edit_tree_lemmatizer.py
+++ b/spacy/tests/pipeline/test_edit_tree_lemmatizer.py
@@ -297,6 +297,9 @@ def test_save_activations():
 
     lemmatizer.save_activations = True
     doc = nlp("This is a test.")
-    assert list(doc.activations["trainable_lemmatizer"].keys()) == ["probs", "guesses"]
-    assert doc.activations["trainable_lemmatizer"]["probs"].shape == (5, nO)
-    assert doc.activations["trainable_lemmatizer"]["guesses"].shape == (5,)
+    assert list(doc.activations["trainable_lemmatizer"].keys()) == [
+        "probabilities",
+        "tree_ids",
+    ]
+    assert doc.activations["trainable_lemmatizer"]["probabilities"].shape == (5, nO)
+    assert doc.activations["trainable_lemmatizer"]["tree_ids"].shape == (5,)
diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py
index cad558a7d..70fc77304 100644
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@@ -215,6 +215,9 @@ def test_save_activations():
     morphologizer.save_activations = True
     doc = nlp("This is a test.")
     assert "morphologizer" in doc.activations
-    assert set(doc.activations["morphologizer"].keys()) == {"guesses", "probs"}
-    assert doc.activations["morphologizer"]["probs"].shape == (5, 6)
-    assert doc.activations["morphologizer"]["guesses"].shape == (5,)
+    assert set(doc.activations["morphologizer"].keys()) == {
+        "label_ids",
+        "probabilities",
+    }
+    assert doc.activations["morphologizer"]["probabilities"].shape == (5, 6)
+    assert doc.activations["morphologizer"]["label_ids"].shape == (5,)
diff --git a/spacy/tests/pipeline/test_senter.py b/spacy/tests/pipeline/test_senter.py
index fca4ce821..f83228510 100644
--- a/spacy/tests/pipeline/test_senter.py
+++ b/spacy/tests/pipeline/test_senter.py
@@ -123,6 +123,6 @@ def test_save_activations():
     senter.save_activations = True
     doc = nlp("This is a test.")
     assert "senter" in doc.activations
-    assert set(doc.activations["senter"].keys()) == {"guesses", "probs"}
-    assert doc.activations["senter"]["probs"].shape == (5, nO)
-    assert doc.activations["senter"]["guesses"].shape == (5,)
+    assert set(doc.activations["senter"].keys()) == {"label_ids", "probabilities"}
+    assert doc.activations["senter"]["probabilities"].shape == (5, nO)
+    assert doc.activations["senter"]["label_ids"].shape == (5,)
diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py
index 360985def..754e9f6be 100644
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@@ -228,9 +228,9 @@ def test_save_activations():
     tagger.save_activations = True
     doc = nlp("This is a test.")
     assert "tagger" in doc.activations
-    assert set(doc.activations["tagger"].keys()) == {"guesses", "probs"}
-    assert doc.activations["tagger"]["probs"].shape == (5, len(TAGS))
-    assert doc.activations["tagger"]["guesses"].shape == (5,)
+    assert set(doc.activations["tagger"].keys()) == {"label_ids", "probabilities"}
+    assert doc.activations["tagger"]["probabilities"].shape == (5, len(TAGS))
+    assert doc.activations["tagger"]["label_ids"].shape == (5,)
 
 
 def test_tagger_requires_labels():
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 2e427868b..c2b990923 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -286,7 +286,7 @@ def test_issue9904():
     nlp.initialize(get_examples)
 
     examples = get_examples()
-    scores = textcat.predict([eg.predicted for eg in examples])["probs"]
+    scores = textcat.predict([eg.predicted for eg in examples])["probabilities"]
 
     loss = textcat.get_loss(examples, scores)[0]
     loss_double_bs = textcat.get_loss(examples * 2, scores.repeat(2, axis=0))[0]
@@ -890,8 +890,8 @@ def test_save_activations():
 
     textcat.save_activations = True
     doc = nlp("This is a test.")
-    assert list(doc.activations["textcat"].keys()) == ["probs"]
-    assert doc.activations["textcat"]["probs"].shape == (nO,)
+    assert list(doc.activations["textcat"].keys()) == ["probabilities"]
+    assert doc.activations["textcat"]["probabilities"].shape == (nO,)
 
 
 def test_save_activations_multi():
@@ -910,5 +910,5 @@ def test_save_activations_multi():
 
     textcat.save_activations = True
     doc = nlp("This is a test.")
-    assert list(doc.activations["textcat_multilabel"].keys()) == ["probs"]
-    assert doc.activations["textcat_multilabel"]["probs"].shape == (nO,)
+    assert list(doc.activations["textcat_multilabel"].keys()) == ["probabilities"]
+    assert doc.activations["textcat_multilabel"]["probabilities"].shape == (nO,)
diff --git a/website/docs/api/edittreelemmatizer.md b/website/docs/api/edittreelemmatizer.md
index f6abe2bcd..8bee74316 100644
--- a/website/docs/api/edittreelemmatizer.md
+++ b/website/docs/api/edittreelemmatizer.md
@@ -52,7 +52,7 @@ architectures and their arguments and hyperparameters.
 | `overwrite`                                     | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                                                                          |
 | `top_k`                                         | The number of most probable edit trees to try before resorting to `backoff`. Defaults to `1`. ~~int~~                                                                                                                                                                                                              |
 | `scorer`                                        | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~                                                                                                                                                                      |
-| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~                                                                                                                                                                                             |
+| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"tree_ids"`. ~~Union[bool, list[str]]~~                                                                                                                                                                                    |
 
 ```python
 %%GITHUB_SPACY/spacy/pipeline/edit_tree_lemmatizer.py
diff --git a/website/docs/api/morphologizer.md b/website/docs/api/morphologizer.md
index 475c48ee7..97444b157 100644
--- a/website/docs/api/morphologizer.md
+++ b/website/docs/api/morphologizer.md
@@ -48,7 +48,7 @@ architectures and their arguments and hyperparameters.
 | `overwrite` <Tag variant="new">3.2</Tag>        | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                  |
 | `extend` <Tag variant="new">3.2</Tag>           | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~                                                                                                                      |
 | `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
-| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~                                                                                                                                                 |
+| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~                                                                                                                                       |
 
 ```python
 %%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx
diff --git a/website/docs/api/sentencerecognizer.md b/website/docs/api/sentencerecognizer.md
index aa73a78d5..03744e1b5 100644
--- a/website/docs/api/sentencerecognizer.md
+++ b/website/docs/api/sentencerecognizer.md
@@ -44,7 +44,7 @@ architectures and their arguments and hyperparameters.
 | `model`                                         | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
 | `overwrite` <Tag variant="new">3.2</Tag>        | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~                                                                                             |
 | `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~                                   |
-| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~                                                |
+| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~                                      |
 
 ```python
 %%GITHUB_SPACY/spacy/pipeline/senter.pyx
diff --git a/website/docs/api/tagger.md b/website/docs/api/tagger.md
index 3dfc0dbf1..0d77d9bf4 100644
--- a/website/docs/api/tagger.md
+++ b/website/docs/api/tagger.md
@@ -46,7 +46,7 @@ architectures and their arguments and hyperparameters.
 | `overwrite` <Tag variant="new">3.2</Tag>        | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                                                              |
 | `scorer` <Tag variant="new">3.2</Tag>           | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~                                                                                                                                                            |
 | `neg_prefix` <Tag variant="new">3.2.1</Tag>     | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~                                                                                                                                                              |
-| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~                                                                                                                                                                                 |
+| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~                                                                                                                                                                       |
 
 ```python
 %%GITHUB_SPACY/spacy/pipeline/tagger.pyx
diff --git a/website/docs/api/textcategorizer.md b/website/docs/api/textcategorizer.md
index 0077b936c..d8a609693 100644
--- a/website/docs/api/textcategorizer.md
+++ b/website/docs/api/textcategorizer.md
@@ -125,7 +125,7 @@ shortcut for this and instantiate the component using its string name and
 | _keyword-only_                                  |                                                                                                                                  |
 | `threshold`                                     | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~                                   |
 | `scorer`                                        | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
-| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. The supported activations is `"probs"`. ~~Union[bool, list[str]]~~                    |
+| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. The supported activations is `"probabilities"`. ~~Union[bool, list[str]]~~            |
 
 ## TextCategorizer.\_\_call\_\_ {#call tag="method"}