diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py index 2d14a5285..37aa9663b 100644 --- a/spacy/pipeline/edit_tree_lemmatizer.py +++ b/spacy/pipeline/edit_tree_lemmatizer.py @@ -165,12 +165,12 @@ class EditTreeLemmatizer(TrainablePipe): self.model.ops.alloc((0, n_labels), dtype="i") for doc in docs ] assert len(guesses) == n_docs - return {"probs": scores, "guesses": guesses} + return {"probabilities": scores, "tree_ids": guesses} scores = self.model.predict(docs) assert len(scores) == n_docs guesses = self._scores2guesses(docs, scores) assert len(guesses) == n_docs - return {"probs": scores, "guesses": guesses} + return {"probabilities": scores, "tree_ids": guesses} def _scores2guesses(self, docs, scores): guesses = [] @@ -199,7 +199,7 @@ class EditTreeLemmatizer(TrainablePipe): return guesses def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT): - batch_tree_ids = activations["guesses"] + batch_tree_ids = activations["tree_ids"] for i, doc in enumerate(docs): if self.save_activations: doc.activations[self.name] = {} diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index d256cbf0b..6786f4539 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -237,7 +237,7 @@ class Morphologizer(Tagger): DOCS: https://spacy.io/api/morphologizer#set_annotations """ - batch_tag_ids = activations["guesses"] + batch_tag_ids = activations["label_ids"] if isinstance(docs, Doc): docs = [docs] cdef Doc doc diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx index ee6ee9ff9..cb1979b11 100644 --- a/spacy/pipeline/senter.pyx +++ b/spacy/pipeline/senter.pyx @@ -129,7 +129,7 @@ class SentenceRecognizer(Tagger): DOCS: https://spacy.io/api/sentencerecognizer#set_annotations """ - batch_tag_ids = activations["guesses"] + batch_tag_ids = activations["label_ids"] if isinstance(docs, Doc): docs = [docs] cdef Doc doc diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index 23c7828fd..ab369c58b 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -153,12 +153,12 @@ class Tagger(TrainablePipe): n_labels = len(self.labels) guesses = [self.model.ops.alloc((0, n_labels)) for doc in docs] assert len(guesses) == len(docs) - return {"probs": guesses, "guesses": guesses} + return {"probabilities": guesses, "label_ids": guesses} scores = self.model.predict(docs) assert len(scores) == len(docs), (len(scores), len(docs)) guesses = self._scores2guesses(scores) assert len(guesses) == len(docs) - return {"probs": scores, "guesses": guesses} + return {"probabilities": scores, "label_ids": guesses} def _scores2guesses(self, scores): guesses = [] @@ -177,7 +177,7 @@ class Tagger(TrainablePipe): DOCS: https://spacy.io/api/tagger#set_annotations """ - batch_tag_ids = activations["guesses"] + batch_tag_ids = activations["label_ids"] if isinstance(docs, Doc): docs = [docs] cdef Doc doc diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index afce29e6f..506cdb61c 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -209,10 +209,10 @@ class TextCategorizer(TrainablePipe): tensors = [doc.tensor for doc in docs] xp = self.model.ops.xp scores = xp.zeros((len(list(docs)), len(self.labels))) - return {"probs": scores} + return {"probabilities": scores} scores = self.model.predict(docs) scores = self.model.ops.asarray(scores) - return {"probs": scores} + return {"probabilities": scores} def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT) -> None: """Modify a batch of Doc objects, using pre-computed scores. @@ -222,11 +222,11 @@ class TextCategorizer(TrainablePipe): DOCS: https://spacy.io/api/textcategorizer#set_annotations """ - probs = activations["probs"] + probs = activations["probabilities"] for i, doc in enumerate(docs): if self.save_activations: doc.activations[self.name] = {} - doc.activations[self.name]["probs"] = probs[i] + doc.activations[self.name]["probabilities"] = probs[i] for j, label in enumerate(self.labels): doc.cats[label] = float(probs[i, j]) diff --git a/spacy/tests/pipeline/test_edit_tree_lemmatizer.py b/spacy/tests/pipeline/test_edit_tree_lemmatizer.py index d00f9e622..ad2e56729 100644 --- a/spacy/tests/pipeline/test_edit_tree_lemmatizer.py +++ b/spacy/tests/pipeline/test_edit_tree_lemmatizer.py @@ -297,6 +297,9 @@ def test_save_activations(): lemmatizer.save_activations = True doc = nlp("This is a test.") - assert list(doc.activations["trainable_lemmatizer"].keys()) == ["probs", "guesses"] - assert doc.activations["trainable_lemmatizer"]["probs"].shape == (5, nO) - assert doc.activations["trainable_lemmatizer"]["guesses"].shape == (5,) + assert list(doc.activations["trainable_lemmatizer"].keys()) == [ + "probabilities", + "tree_ids", + ] + assert doc.activations["trainable_lemmatizer"]["probabilities"].shape == (5, nO) + assert doc.activations["trainable_lemmatizer"]["tree_ids"].shape == (5,) diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py index cad558a7d..70fc77304 100644 --- a/spacy/tests/pipeline/test_morphologizer.py +++ b/spacy/tests/pipeline/test_morphologizer.py @@ -215,6 +215,9 @@ def test_save_activations(): morphologizer.save_activations = True doc = nlp("This is a test.") assert "morphologizer" in doc.activations - assert set(doc.activations["morphologizer"].keys()) == {"guesses", "probs"} - assert doc.activations["morphologizer"]["probs"].shape == (5, 6) - assert doc.activations["morphologizer"]["guesses"].shape == (5,) + assert set(doc.activations["morphologizer"].keys()) == { + "label_ids", + "probabilities", + } + assert doc.activations["morphologizer"]["probabilities"].shape == (5, 6) + assert doc.activations["morphologizer"]["label_ids"].shape == (5,) diff --git a/spacy/tests/pipeline/test_senter.py b/spacy/tests/pipeline/test_senter.py index fca4ce821..f83228510 100644 --- a/spacy/tests/pipeline/test_senter.py +++ b/spacy/tests/pipeline/test_senter.py @@ -123,6 +123,6 @@ def test_save_activations(): senter.save_activations = True doc = nlp("This is a test.") assert "senter" in doc.activations - assert set(doc.activations["senter"].keys()) == {"guesses", "probs"} - assert doc.activations["senter"]["probs"].shape == (5, nO) - assert doc.activations["senter"]["guesses"].shape == (5,) + assert set(doc.activations["senter"].keys()) == {"label_ids", "probabilities"} + assert doc.activations["senter"]["probabilities"].shape == (5, nO) + assert doc.activations["senter"]["label_ids"].shape == (5,) diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py index 360985def..754e9f6be 100644 --- a/spacy/tests/pipeline/test_tagger.py +++ b/spacy/tests/pipeline/test_tagger.py @@ -228,9 +228,9 @@ def test_save_activations(): tagger.save_activations = True doc = nlp("This is a test.") assert "tagger" in doc.activations - assert set(doc.activations["tagger"].keys()) == {"guesses", "probs"} - assert doc.activations["tagger"]["probs"].shape == (5, len(TAGS)) - assert doc.activations["tagger"]["guesses"].shape == (5,) + assert set(doc.activations["tagger"].keys()) == {"label_ids", "probabilities"} + assert doc.activations["tagger"]["probabilities"].shape == (5, len(TAGS)) + assert doc.activations["tagger"]["label_ids"].shape == (5,) def test_tagger_requires_labels(): diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py index 2e427868b..c2b990923 100644 --- a/spacy/tests/pipeline/test_textcat.py +++ b/spacy/tests/pipeline/test_textcat.py @@ -286,7 +286,7 @@ def test_issue9904(): nlp.initialize(get_examples) examples = get_examples() - scores = textcat.predict([eg.predicted for eg in examples])["probs"] + scores = textcat.predict([eg.predicted for eg in examples])["probabilities"] loss = textcat.get_loss(examples, scores)[0] loss_double_bs = textcat.get_loss(examples * 2, scores.repeat(2, axis=0))[0] @@ -890,8 +890,8 @@ def test_save_activations(): textcat.save_activations = True doc = nlp("This is a test.") - assert list(doc.activations["textcat"].keys()) == ["probs"] - assert doc.activations["textcat"]["probs"].shape == (nO,) + assert list(doc.activations["textcat"].keys()) == ["probabilities"] + assert doc.activations["textcat"]["probabilities"].shape == (nO,) def test_save_activations_multi(): @@ -910,5 +910,5 @@ def test_save_activations_multi(): textcat.save_activations = True doc = nlp("This is a test.") - assert list(doc.activations["textcat_multilabel"].keys()) == ["probs"] - assert doc.activations["textcat_multilabel"]["probs"].shape == (nO,) + assert list(doc.activations["textcat_multilabel"].keys()) == ["probabilities"] + assert doc.activations["textcat_multilabel"]["probabilities"].shape == (nO,) diff --git a/website/docs/api/edittreelemmatizer.md b/website/docs/api/edittreelemmatizer.md index f6abe2bcd..8bee74316 100644 --- a/website/docs/api/edittreelemmatizer.md +++ b/website/docs/api/edittreelemmatizer.md @@ -52,7 +52,7 @@ architectures and their arguments and hyperparameters. | `overwrite` | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | | `top_k` | The number of most probable edit trees to try before resorting to `backoff`. Defaults to `1`. ~~int~~ | | `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~ | -| `save_activations` 4.0 | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | +| `save_activations` 4.0 | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"tree_ids"`. ~~Union[bool, list[str]]~~ | ```python %%GITHUB_SPACY/spacy/pipeline/edit_tree_lemmatizer.py diff --git a/website/docs/api/morphologizer.md b/website/docs/api/morphologizer.md index 475c48ee7..97444b157 100644 --- a/website/docs/api/morphologizer.md +++ b/website/docs/api/morphologizer.md @@ -48,7 +48,7 @@ architectures and their arguments and hyperparameters. | `overwrite` 3.2 | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~ | | `extend` 3.2 | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~ | | `scorer` 3.2 | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ | -| `save_activations` 4.0 | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | +| `save_activations` 4.0 | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~ | ```python %%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx diff --git a/website/docs/api/sentencerecognizer.md b/website/docs/api/sentencerecognizer.md index aa73a78d5..03744e1b5 100644 --- a/website/docs/api/sentencerecognizer.md +++ b/website/docs/api/sentencerecognizer.md @@ -44,7 +44,7 @@ architectures and their arguments and hyperparameters. | `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ | | `overwrite` 3.2 | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | | `scorer` 3.2 | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ | -| `save_activations` 4.0 | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | +| `save_activations` 4.0 | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~ | ```python %%GITHUB_SPACY/spacy/pipeline/senter.pyx diff --git a/website/docs/api/tagger.md b/website/docs/api/tagger.md index 3dfc0dbf1..0d77d9bf4 100644 --- a/website/docs/api/tagger.md +++ b/website/docs/api/tagger.md @@ -46,7 +46,7 @@ architectures and their arguments and hyperparameters. | `overwrite` 3.2 | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | | `scorer` 3.2 | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~ | | `neg_prefix` 3.2.1 | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~ | -| `save_activations` 4.0 | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | +| `save_activations` 4.0 | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~ | ```python %%GITHUB_SPACY/spacy/pipeline/tagger.pyx diff --git a/website/docs/api/textcategorizer.md b/website/docs/api/textcategorizer.md index 0077b936c..d8a609693 100644 --- a/website/docs/api/textcategorizer.md +++ b/website/docs/api/textcategorizer.md @@ -125,7 +125,7 @@ shortcut for this and instantiate the component using its string name and | _keyword-only_ | | | `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ | | `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ | -| `save_activations` 4.0 | Save activations in `Doc` when annotating. The supported activations is `"probs"`. ~~Union[bool, list[str]]~~ | +| `save_activations` 4.0 | Save activations in `Doc` when annotating. The supported activations is `"probabilities"`. ~~Union[bool, list[str]]~~ | ## TextCategorizer.\_\_call\_\_ {#call tag="method"}