Rename activations

- "probs" -> "probabilities"
- "guesses" -> "label_ids", except in the edit tree lemmatizer, where
  "guesses" -> "tree_ids".
This commit is contained in:
Daniël de Kok 2022-08-31 11:18:40 +02:00
parent 6f80e80305
commit cd6e4fa8f4
15 changed files with 40 additions and 34 deletions

View File

@ -165,12 +165,12 @@ class EditTreeLemmatizer(TrainablePipe):
self.model.ops.alloc((0, n_labels), dtype="i") for doc in docs self.model.ops.alloc((0, n_labels), dtype="i") for doc in docs
] ]
assert len(guesses) == n_docs assert len(guesses) == n_docs
return {"probs": scores, "guesses": guesses} return {"probabilities": scores, "tree_ids": guesses}
scores = self.model.predict(docs) scores = self.model.predict(docs)
assert len(scores) == n_docs assert len(scores) == n_docs
guesses = self._scores2guesses(docs, scores) guesses = self._scores2guesses(docs, scores)
assert len(guesses) == n_docs assert len(guesses) == n_docs
return {"probs": scores, "guesses": guesses} return {"probabilities": scores, "tree_ids": guesses}
def _scores2guesses(self, docs, scores): def _scores2guesses(self, docs, scores):
guesses = [] guesses = []
@ -199,7 +199,7 @@ class EditTreeLemmatizer(TrainablePipe):
return guesses return guesses
def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT): def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT):
batch_tree_ids = activations["guesses"] batch_tree_ids = activations["tree_ids"]
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
if self.save_activations: if self.save_activations:
doc.activations[self.name] = {} doc.activations[self.name] = {}

View File

@ -237,7 +237,7 @@ class Morphologizer(Tagger):
DOCS: https://spacy.io/api/morphologizer#set_annotations DOCS: https://spacy.io/api/morphologizer#set_annotations
""" """
batch_tag_ids = activations["guesses"] batch_tag_ids = activations["label_ids"]
if isinstance(docs, Doc): if isinstance(docs, Doc):
docs = [docs] docs = [docs]
cdef Doc doc cdef Doc doc

View File

@ -129,7 +129,7 @@ class SentenceRecognizer(Tagger):
DOCS: https://spacy.io/api/sentencerecognizer#set_annotations DOCS: https://spacy.io/api/sentencerecognizer#set_annotations
""" """
batch_tag_ids = activations["guesses"] batch_tag_ids = activations["label_ids"]
if isinstance(docs, Doc): if isinstance(docs, Doc):
docs = [docs] docs = [docs]
cdef Doc doc cdef Doc doc

View File

@ -153,12 +153,12 @@ class Tagger(TrainablePipe):
n_labels = len(self.labels) n_labels = len(self.labels)
guesses = [self.model.ops.alloc((0, n_labels)) for doc in docs] guesses = [self.model.ops.alloc((0, n_labels)) for doc in docs]
assert len(guesses) == len(docs) assert len(guesses) == len(docs)
return {"probs": guesses, "guesses": guesses} return {"probabilities": guesses, "label_ids": guesses}
scores = self.model.predict(docs) scores = self.model.predict(docs)
assert len(scores) == len(docs), (len(scores), len(docs)) assert len(scores) == len(docs), (len(scores), len(docs))
guesses = self._scores2guesses(scores) guesses = self._scores2guesses(scores)
assert len(guesses) == len(docs) assert len(guesses) == len(docs)
return {"probs": scores, "guesses": guesses} return {"probabilities": scores, "label_ids": guesses}
def _scores2guesses(self, scores): def _scores2guesses(self, scores):
guesses = [] guesses = []
@ -177,7 +177,7 @@ class Tagger(TrainablePipe):
DOCS: https://spacy.io/api/tagger#set_annotations DOCS: https://spacy.io/api/tagger#set_annotations
""" """
batch_tag_ids = activations["guesses"] batch_tag_ids = activations["label_ids"]
if isinstance(docs, Doc): if isinstance(docs, Doc):
docs = [docs] docs = [docs]
cdef Doc doc cdef Doc doc

View File

@ -209,10 +209,10 @@ class TextCategorizer(TrainablePipe):
tensors = [doc.tensor for doc in docs] tensors = [doc.tensor for doc in docs]
xp = self.model.ops.xp xp = self.model.ops.xp
scores = xp.zeros((len(list(docs)), len(self.labels))) scores = xp.zeros((len(list(docs)), len(self.labels)))
return {"probs": scores} return {"probabilities": scores}
scores = self.model.predict(docs) scores = self.model.predict(docs)
scores = self.model.ops.asarray(scores) scores = self.model.ops.asarray(scores)
return {"probs": scores} return {"probabilities": scores}
def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT) -> None: def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT) -> None:
"""Modify a batch of Doc objects, using pre-computed scores. """Modify a batch of Doc objects, using pre-computed scores.
@ -222,11 +222,11 @@ class TextCategorizer(TrainablePipe):
DOCS: https://spacy.io/api/textcategorizer#set_annotations DOCS: https://spacy.io/api/textcategorizer#set_annotations
""" """
probs = activations["probs"] probs = activations["probabilities"]
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
if self.save_activations: if self.save_activations:
doc.activations[self.name] = {} doc.activations[self.name] = {}
doc.activations[self.name]["probs"] = probs[i] doc.activations[self.name]["probabilities"] = probs[i]
for j, label in enumerate(self.labels): for j, label in enumerate(self.labels):
doc.cats[label] = float(probs[i, j]) doc.cats[label] = float(probs[i, j])

View File

@ -297,6 +297,9 @@ def test_save_activations():
lemmatizer.save_activations = True lemmatizer.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert list(doc.activations["trainable_lemmatizer"].keys()) == ["probs", "guesses"] assert list(doc.activations["trainable_lemmatizer"].keys()) == [
assert doc.activations["trainable_lemmatizer"]["probs"].shape == (5, nO) "probabilities",
assert doc.activations["trainable_lemmatizer"]["guesses"].shape == (5,) "tree_ids",
]
assert doc.activations["trainable_lemmatizer"]["probabilities"].shape == (5, nO)
assert doc.activations["trainable_lemmatizer"]["tree_ids"].shape == (5,)

View File

@ -215,6 +215,9 @@ def test_save_activations():
morphologizer.save_activations = True morphologizer.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "morphologizer" in doc.activations assert "morphologizer" in doc.activations
assert set(doc.activations["morphologizer"].keys()) == {"guesses", "probs"} assert set(doc.activations["morphologizer"].keys()) == {
assert doc.activations["morphologizer"]["probs"].shape == (5, 6) "label_ids",
assert doc.activations["morphologizer"]["guesses"].shape == (5,) "probabilities",
}
assert doc.activations["morphologizer"]["probabilities"].shape == (5, 6)
assert doc.activations["morphologizer"]["label_ids"].shape == (5,)

View File

@ -123,6 +123,6 @@ def test_save_activations():
senter.save_activations = True senter.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "senter" in doc.activations assert "senter" in doc.activations
assert set(doc.activations["senter"].keys()) == {"guesses", "probs"} assert set(doc.activations["senter"].keys()) == {"label_ids", "probabilities"}
assert doc.activations["senter"]["probs"].shape == (5, nO) assert doc.activations["senter"]["probabilities"].shape == (5, nO)
assert doc.activations["senter"]["guesses"].shape == (5,) assert doc.activations["senter"]["label_ids"].shape == (5,)

View File

@ -228,9 +228,9 @@ def test_save_activations():
tagger.save_activations = True tagger.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "tagger" in doc.activations assert "tagger" in doc.activations
assert set(doc.activations["tagger"].keys()) == {"guesses", "probs"} assert set(doc.activations["tagger"].keys()) == {"label_ids", "probabilities"}
assert doc.activations["tagger"]["probs"].shape == (5, len(TAGS)) assert doc.activations["tagger"]["probabilities"].shape == (5, len(TAGS))
assert doc.activations["tagger"]["guesses"].shape == (5,) assert doc.activations["tagger"]["label_ids"].shape == (5,)
def test_tagger_requires_labels(): def test_tagger_requires_labels():

View File

@ -286,7 +286,7 @@ def test_issue9904():
nlp.initialize(get_examples) nlp.initialize(get_examples)
examples = get_examples() examples = get_examples()
scores = textcat.predict([eg.predicted for eg in examples])["probs"] scores = textcat.predict([eg.predicted for eg in examples])["probabilities"]
loss = textcat.get_loss(examples, scores)[0] loss = textcat.get_loss(examples, scores)[0]
loss_double_bs = textcat.get_loss(examples * 2, scores.repeat(2, axis=0))[0] loss_double_bs = textcat.get_loss(examples * 2, scores.repeat(2, axis=0))[0]
@ -890,8 +890,8 @@ def test_save_activations():
textcat.save_activations = True textcat.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert list(doc.activations["textcat"].keys()) == ["probs"] assert list(doc.activations["textcat"].keys()) == ["probabilities"]
assert doc.activations["textcat"]["probs"].shape == (nO,) assert doc.activations["textcat"]["probabilities"].shape == (nO,)
def test_save_activations_multi(): def test_save_activations_multi():
@ -910,5 +910,5 @@ def test_save_activations_multi():
textcat.save_activations = True textcat.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert list(doc.activations["textcat_multilabel"].keys()) == ["probs"] assert list(doc.activations["textcat_multilabel"].keys()) == ["probabilities"]
assert doc.activations["textcat_multilabel"]["probs"].shape == (nO,) assert doc.activations["textcat_multilabel"]["probabilities"].shape == (nO,)

View File

@ -52,7 +52,7 @@ architectures and their arguments and hyperparameters.
| `overwrite` | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | | `overwrite` | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
| `top_k` | The number of most probable edit trees to try before resorting to `backoff`. Defaults to `1`. ~~int~~ | | `top_k` | The number of most probable edit trees to try before resorting to `backoff`. Defaults to `1`. ~~int~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~ | | `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~ |
| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"tree_ids"`. ~~Union[bool, list[str]]~~ |
```python ```python
%%GITHUB_SPACY/spacy/pipeline/edit_tree_lemmatizer.py %%GITHUB_SPACY/spacy/pipeline/edit_tree_lemmatizer.py

View File

@ -48,7 +48,7 @@ architectures and their arguments and hyperparameters.
| `overwrite` <Tag variant="new">3.2</Tag> | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~ | | `overwrite` <Tag variant="new">3.2</Tag> | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~ |
| `extend` <Tag variant="new">3.2</Tag> | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~ | | `extend` <Tag variant="new">3.2</Tag> | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~ |
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ | | `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~ |
```python ```python
%%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx %%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx

View File

@ -44,7 +44,7 @@ architectures and their arguments and hyperparameters.
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | | `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ | | `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ |
| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~ |
```python ```python
%%GITHUB_SPACY/spacy/pipeline/senter.pyx %%GITHUB_SPACY/spacy/pipeline/senter.pyx

View File

@ -46,7 +46,7 @@ architectures and their arguments and hyperparameters.
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | | `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~ | | `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~ |
| `neg_prefix` <Tag variant="new">3.2.1</Tag> | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~ | | `neg_prefix` <Tag variant="new">3.2.1</Tag> | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~ |
| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. Saved activations are `"probabilities"` and `"label_ids"`. ~~Union[bool, list[str]]~~ |
```python ```python
%%GITHUB_SPACY/spacy/pipeline/tagger.pyx %%GITHUB_SPACY/spacy/pipeline/tagger.pyx

View File

@ -125,7 +125,7 @@ shortcut for this and instantiate the component using its string name and
| _keyword-only_ | | | _keyword-only_ | |
| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ | | `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ | | `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
| `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. The supported activations is `"probs"`. ~~Union[bool, list[str]]~~ | | `save_activations` <Tag variant="new">4.0</Tag> | Save activations in `Doc` when annotating. The supported activations is `"probabilities"`. ~~Union[bool, list[str]]~~ |
## TextCategorizer.\_\_call\_\_ {#call tag="method"} ## TextCategorizer.\_\_call\_\_ {#call tag="method"}