diff --git a/spacy/errors.py b/spacy/errors.py index 5e51b78b1..0d4b08af1 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -941,7 +941,7 @@ class Errors(metaclass=ErrorsWithCodes): "`{arg2}`={arg2_values} but these arguments are conflicting.") E1043 = ("Expected None or a value in range [{range_start}, {range_end}] for entity linker threshold, but got " "{value}.") - E1400 = ("store_activations attribute must be set to List[str] or bool") + E1400 = ("save_activations attribute must be set to List[str] or bool") # Deprecated model shortcuts, only used in errors and warnings diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py index d94e8981b..2d14a5285 100644 --- a/spacy/pipeline/edit_tree_lemmatizer.py +++ b/spacy/pipeline/edit_tree_lemmatizer.py @@ -52,7 +52,7 @@ DEFAULT_EDIT_TREE_LEMMATIZER_MODEL = Config().from_str(default_model_config)["mo "overwrite": False, "top_k": 1, "scorer": {"@scorers": "spacy.lemmatizer_scorer.v1"}, - "store_activations": False, + "save_activations": False, }, default_score_weights={"lemma_acc": 1.0}, ) @@ -65,7 +65,7 @@ def make_edit_tree_lemmatizer( overwrite: bool, top_k: int, scorer: Optional[Callable], - store_activations: bool, + save_activations: bool, ): """Construct an EditTreeLemmatizer component.""" return EditTreeLemmatizer( @@ -77,7 +77,7 @@ def make_edit_tree_lemmatizer( overwrite=overwrite, top_k=top_k, scorer=scorer, - store_activations=store_activations, + save_activations=save_activations, ) @@ -97,7 +97,7 @@ class EditTreeLemmatizer(TrainablePipe): overwrite: bool = False, top_k: int = 1, scorer: Optional[Callable] = lemmatizer_score, - store_activations: bool = False, + save_activations: bool = False, ): """ Construct an edit tree lemmatizer. @@ -109,7 +109,7 @@ class EditTreeLemmatizer(TrainablePipe): frequency in the training data. overwrite (bool): overwrite existing lemma annotations. top_k (int): try to apply at most the k most probable edit trees. - store_activations (bool): store model activations in Doc when annotating. + save_activations (bool): save model activations in Doc when annotating. """ self.vocab = vocab self.model = model @@ -124,7 +124,7 @@ class EditTreeLemmatizer(TrainablePipe): self.cfg: Dict[str, Any] = {"labels": []} self.scorer = scorer - self.store_activations = store_activations + self.save_activations = save_activations def get_loss( self, examples: Iterable[Example], scores: List[Floats2d] @@ -201,7 +201,7 @@ class EditTreeLemmatizer(TrainablePipe): def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT): batch_tree_ids = activations["guesses"] for i, doc in enumerate(docs): - if self.store_activations: + if self.save_activations: doc.activations[self.name] = {} for act_name, acts in activations.items(): doc.activations[self.name][act_name] = acts[i] diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py index 17680b95d..e69fa618b 100644 --- a/spacy/pipeline/entity_linker.py +++ b/spacy/pipeline/entity_linker.py @@ -62,7 +62,7 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"] "scorer": {"@scorers": "spacy.entity_linker_scorer.v1"}, "use_gold_ents": True, "threshold": None, - "store_activations": False, + "save_activations": False, }, default_score_weights={ "nel_micro_f": 1.0, @@ -85,7 +85,7 @@ def make_entity_linker( scorer: Optional[Callable], use_gold_ents: bool, threshold: Optional[float] = None, - store_activations: bool, + save_activations: bool, ): """Construct an EntityLinker component. @@ -104,7 +104,7 @@ def make_entity_linker( component must provide entity annotations. threshold (Optional[float]): Confidence threshold for entity predictions. If confidence is below the threshold, prediction is discarded. If None, predictions are not filtered by any threshold. - store_activations (bool): store model activations in Doc when annotating. + save_activations (bool): save model activations in Doc when annotating. """ if not model.attrs.get("include_span_maker", False): @@ -136,7 +136,7 @@ def make_entity_linker( scorer=scorer, use_gold_ents=use_gold_ents, threshold=threshold, - store_activations=store_activations, + save_activations=save_activations, ) @@ -173,7 +173,7 @@ class EntityLinker(TrainablePipe): scorer: Optional[Callable] = entity_linker_score, use_gold_ents: bool, threshold: Optional[float] = None, - store_activations: bool = False, + save_activations: bool = False, ) -> None: """Initialize an entity linker. @@ -222,7 +222,7 @@ class EntityLinker(TrainablePipe): self.scorer = scorer self.use_gold_ents = use_gold_ents self.threshold = threshold - self.store_activations = store_activations + self.save_activations = save_activations def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]): """Define the KB of this pipe by providing a function that will @@ -550,7 +550,7 @@ class EntityLinker(TrainablePipe): i = 0 overwrite = self.cfg["overwrite"] for j, doc in enumerate(docs): - if self.store_activations: + if self.save_activations: doc.activations[self.name] = {} for act_name, acts in activations.items(): if act_name != "kb_ids": @@ -664,7 +664,7 @@ class EntityLinker(TrainablePipe): doc_scores: List[Floats1d], doc_ents: List[Ints1d], ): - if not self.store_activations: + if not self.save_activations: return ops = self.model.ops lengths = ops.asarray1i([s.shape[0] for s in doc_scores]) @@ -679,7 +679,7 @@ class EntityLinker(TrainablePipe): scores: Sequence[float], ents: Sequence[int], ): - if not self.store_activations: + if not self.save_activations: return ops = self.model.ops doc_scores.append(ops.asarray1f(scores)) diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index 2d60a841a..d256cbf0b 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -58,7 +58,7 @@ DEFAULT_MORPH_MODEL = Config().from_str(default_model_config)["model"] "overwrite": True, "extend": False, "scorer": {"@scorers": "spacy.morphologizer_scorer.v1"}, - "store_activations": False + "save_activations": False }, default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5, "morph_per_feat": None}, ) @@ -69,10 +69,10 @@ def make_morphologizer( overwrite: bool, extend: bool, scorer: Optional[Callable], - store_activations: bool, + save_activations: bool, ): return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, scorer=scorer, - store_activations=store_activations) + save_activations=save_activations) def morphologizer_score(examples, **kwargs): @@ -104,7 +104,7 @@ class Morphologizer(Tagger): overwrite: bool = BACKWARD_OVERWRITE, extend: bool = BACKWARD_EXTEND, scorer: Optional[Callable] = morphologizer_score, - store_activations: bool = False, + save_activations: bool = False, ): """Initialize a morphologizer. @@ -115,7 +115,7 @@ class Morphologizer(Tagger): scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_token_attr for the attributes "pos" and "morph" and Scorer.score_token_attr_per_feat for the attribute "morph". - store_activations (bool): store model activations in Doc when annotating. + save_activations (bool): save model activations in Doc when annotating. DOCS: https://spacy.io/api/morphologizer#init """ @@ -135,7 +135,7 @@ class Morphologizer(Tagger): } self.cfg = dict(sorted(cfg.items())) self.scorer = scorer - self.store_activations = store_activations + self.save_activations = save_activations @property def labels(self): @@ -249,7 +249,7 @@ class Morphologizer(Tagger): # to allocate a compatible container out of the iterable. labels = tuple(self.labels) for i, doc in enumerate(docs): - if self.store_activations: + if self.save_activations: doc.activations[self.name] = {} for act_name, acts in activations.items(): doc.activations[self.name][act_name] = acts[i] diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx index ff2b9f384..ee6ee9ff9 100644 --- a/spacy/pipeline/senter.pyx +++ b/spacy/pipeline/senter.pyx @@ -43,7 +43,7 @@ DEFAULT_SENTER_MODEL = Config().from_str(default_model_config)["model"] "model": DEFAULT_SENTER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.senter_scorer.v1"}, - "store_activations": False + "save_activations": False }, default_score_weights={"sents_f": 1.0, "sents_p": 0.0, "sents_r": 0.0}, ) @@ -52,8 +52,8 @@ def make_senter(nlp: Language, model: Model, overwrite: bool, scorer: Optional[Callable], - store_activations: bool): - return SentenceRecognizer(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, store_activations=store_activations) + save_activations: bool): + return SentenceRecognizer(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, save_activations=save_activations) def senter_score(examples, **kwargs): @@ -83,7 +83,7 @@ class SentenceRecognizer(Tagger): *, overwrite=BACKWARD_OVERWRITE, scorer=senter_score, - store_activations: bool = False, + save_activations: bool = False, ): """Initialize a sentence recognizer. @@ -93,7 +93,7 @@ class SentenceRecognizer(Tagger): losses during training. scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_spans for the attribute "sents". - store_activations (bool): store model activations in Doc when annotating. + save_activations (bool): save model activations in Doc when annotating. DOCS: https://spacy.io/api/sentencerecognizer#init """ @@ -103,7 +103,7 @@ class SentenceRecognizer(Tagger): self._rehearsal_model = None self.cfg = {"overwrite": overwrite} self.scorer = scorer - self.store_activations = store_activations + self.save_activations = save_activations @property def labels(self): @@ -135,7 +135,7 @@ class SentenceRecognizer(Tagger): cdef Doc doc cdef bint overwrite = self.cfg["overwrite"] for i, doc in enumerate(docs): - if self.store_activations: + if self.save_activations: doc.activations[self.name] = {} for act_name, acts in activations.items(): doc.activations[self.name][act_name] = acts[i] diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py index ae00690df..c517991f5 100644 --- a/spacy/pipeline/spancat.py +++ b/spacy/pipeline/spancat.py @@ -110,7 +110,7 @@ def build_ngram_range_suggester(min_size: int, max_size: int) -> Suggester: "model": DEFAULT_SPANCAT_MODEL, "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]}, "scorer": {"@scorers": "spacy.spancat_scorer.v1"}, - "store_activations": False, + "save_activations": False, }, default_score_weights={"spans_sc_f": 1.0, "spans_sc_p": 0.0, "spans_sc_r": 0.0}, ) @@ -123,7 +123,7 @@ def make_spancat( scorer: Optional[Callable], threshold: float, max_positive: Optional[int], - store_activations: bool, + save_activations: bool, ) -> "SpanCategorizer": """Create a SpanCategorizer component. The span categorizer consists of two parts: a suggester function that proposes candidate spans, and a labeller @@ -144,7 +144,7 @@ def make_spancat( 0.5. max_positive (Optional[int]): Maximum number of labels to consider positive per span. Defaults to None, indicating no limit. - store_activations (bool): store model activations in Doc when annotating. + save_activations (bool): save model activations in Doc when annotating. """ return SpanCategorizer( nlp.vocab, @@ -155,7 +155,7 @@ def make_spancat( max_positive=max_positive, name=name, scorer=scorer, - store_activations=store_activations, + save_activations=save_activations, ) @@ -194,7 +194,7 @@ class SpanCategorizer(TrainablePipe): threshold: float = 0.5, max_positive: Optional[int] = None, scorer: Optional[Callable] = spancat_score, - store_activations: bool = False, + save_activations: bool = False, ) -> None: """Initialize the span categorizer. vocab (Vocab): The shared vocabulary. @@ -227,7 +227,7 @@ class SpanCategorizer(TrainablePipe): self.model = model self.name = name self.scorer = scorer - self.store_activations = store_activations + self.save_activations = save_activations @property def key(self) -> str: @@ -317,7 +317,7 @@ class SpanCategorizer(TrainablePipe): offset = 0 for i, doc in enumerate(docs): indices_i = indices[i].dataXd - if self.store_activations: + if self.save_activations: doc.activations[self.name] = {} doc.activations[self.name]["indices"] = indices_i doc.activations[self.name]["scores"] = scores[ diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index 12bdf209d..23c7828fd 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -53,7 +53,7 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"] "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", - "store_activations": False, + "save_activations": False, }, default_score_weights={"tag_acc": 1.0}, ) @@ -64,7 +64,7 @@ def make_tagger( overwrite: bool, scorer: Optional[Callable], neg_prefix: str, - store_activations: bool, + save_activations: bool, ): """Construct a part-of-speech tagger component. @@ -74,7 +74,7 @@ def make_tagger( with the rows summing to 1). """ return Tagger(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, neg_prefix=neg_prefix, - store_activations=store_activations) + save_activations=save_activations) def tagger_score(examples, **kwargs): @@ -100,7 +100,7 @@ class Tagger(TrainablePipe): overwrite=BACKWARD_OVERWRITE, scorer=tagger_score, neg_prefix="!", - store_activations: bool = False, + save_activations: bool = False, ): """Initialize a part-of-speech tagger. @@ -110,7 +110,7 @@ class Tagger(TrainablePipe): losses during training. scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_token_attr for the attribute "tag". - store_activations (bool): store model activations in Doc when annotating. + save_activations (bool): save model activations in Doc when annotating. DOCS: https://spacy.io/api/tagger#init """ @@ -121,7 +121,7 @@ class Tagger(TrainablePipe): cfg = {"labels": [], "overwrite": overwrite, "neg_prefix": neg_prefix} self.cfg = dict(sorted(cfg.items())) self.scorer = scorer - self.store_activations = store_activations + self.save_activations = save_activations @property def labels(self): @@ -185,7 +185,7 @@ class Tagger(TrainablePipe): cdef bint overwrite = self.cfg["overwrite"] labels = self.labels for i, doc in enumerate(docs): - if self.store_activations: + if self.save_activations: doc.activations[self.name] = {} for act_name, acts in activations.items(): doc.activations[self.name][act_name] = acts[i] diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index 761c42f4f..afce29e6f 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -78,7 +78,7 @@ subword_features = true "threshold": 0.5, "model": DEFAULT_SINGLE_TEXTCAT_MODEL, "scorer": {"@scorers": "spacy.textcat_scorer.v1"}, - "store_activations": False, + "save_activations": False, }, default_score_weights={ "cats_score": 1.0, @@ -100,7 +100,7 @@ def make_textcat( model: Model[List[Doc], List[Floats2d]], threshold: float, scorer: Optional[Callable], - store_activations: bool, + save_activations: bool, ) -> "TextCategorizer": """Create a TextCategorizer component. The text categorizer predicts categories over a whole document. It can learn one or more labels, and the labels are considered @@ -110,7 +110,7 @@ def make_textcat( scores for each category. threshold (float): Cutoff to consider a prediction "positive". scorer (Optional[Callable]): The scoring method. - store_activations (bool): store model activations in Doc when annotating. + save_activations (bool): save model activations in Doc when annotating. """ return TextCategorizer( nlp.vocab, @@ -118,7 +118,7 @@ def make_textcat( name, threshold=threshold, scorer=scorer, - store_activations=store_activations, + save_activations=save_activations, ) @@ -150,7 +150,7 @@ class TextCategorizer(TrainablePipe): *, threshold: float, scorer: Optional[Callable] = textcat_score, - store_activations: bool = False, + save_activations: bool = False, ) -> None: """Initialize a text categorizer for single-label classification. @@ -171,7 +171,7 @@ class TextCategorizer(TrainablePipe): cfg = {"labels": [], "threshold": threshold, "positive_label": None} self.cfg = dict(cfg) self.scorer = scorer - self.store_activations = store_activations + self.save_activations = save_activations @property def support_missing_values(self): @@ -224,7 +224,7 @@ class TextCategorizer(TrainablePipe): """ probs = activations["probs"] for i, doc in enumerate(docs): - if self.store_activations: + if self.save_activations: doc.activations[self.name] = {} doc.activations[self.name]["probs"] = probs[i] for j, label in enumerate(self.labels): diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py index 7ac56fba3..3a6dd0b0c 100644 --- a/spacy/pipeline/textcat_multilabel.py +++ b/spacy/pipeline/textcat_multilabel.py @@ -75,7 +75,7 @@ subword_features = true "threshold": 0.5, "model": DEFAULT_MULTI_TEXTCAT_MODEL, "scorer": {"@scorers": "spacy.textcat_multilabel_scorer.v1"}, - "store_activations": False, + "save_activations": False, }, default_score_weights={ "cats_score": 1.0, @@ -97,7 +97,7 @@ def make_multilabel_textcat( model: Model[List[Doc], List[Floats2d]], threshold: float, scorer: Optional[Callable], - store_activations: bool, + save_activations: bool, ) -> "TextCategorizer": """Create a TextCategorizer component. The text categorizer predicts categories over a whole document. It can learn one or more labels, and the labels are considered @@ -114,7 +114,7 @@ def make_multilabel_textcat( name, threshold=threshold, scorer=scorer, - store_activations=store_activations, + save_activations=save_activations, ) @@ -146,7 +146,7 @@ class MultiLabel_TextCategorizer(TextCategorizer): *, threshold: float, scorer: Optional[Callable] = textcat_multilabel_score, - store_activations: bool = False, + save_activations: bool = False, ) -> None: """Initialize a text categorizer for multi-label classification. @@ -155,7 +155,7 @@ class MultiLabel_TextCategorizer(TextCategorizer): name (str): The component instance name, used to add entries to the losses during training. threshold (float): Cutoff to consider a prediction "positive". - store_activations (bool): store model activations in Doc when annotating. + save_activations (bool): save model activations in Doc when annotating. DOCS: https://spacy.io/api/textcategorizer#init """ @@ -166,7 +166,7 @@ class MultiLabel_TextCategorizer(TextCategorizer): cfg = {"labels": [], "threshold": threshold} self.cfg = dict(cfg) self.scorer = scorer - self.store_activations = store_activations + self.save_activations = save_activations @property def support_missing_values(self): diff --git a/spacy/pipeline/trainable_pipe.pxd b/spacy/pipeline/trainable_pipe.pxd index 6ca4a8c89..180f86f45 100644 --- a/spacy/pipeline/trainable_pipe.pxd +++ b/spacy/pipeline/trainable_pipe.pxd @@ -6,4 +6,4 @@ cdef class TrainablePipe(Pipe): cdef public object model cdef public object cfg cdef public object scorer - cdef bint _store_activations + cdef bint _save_activations diff --git a/spacy/pipeline/trainable_pipe.pyx b/spacy/pipeline/trainable_pipe.pyx index a21cea33b..c82f2830c 100644 --- a/spacy/pipeline/trainable_pipe.pyx +++ b/spacy/pipeline/trainable_pipe.pyx @@ -345,9 +345,9 @@ cdef class TrainablePipe(Pipe): return self @property - def store_activations(self): - return self._store_activations + def save_activations(self): + return self._save_activations - @store_activations.setter - def store_activations(self, store_activations: bool): - self._store_activations = store_activations + @save_activations.setter + def save_activations(self, save_activations: bool): + self._save_activations = save_activations diff --git a/spacy/tests/pipeline/test_edit_tree_lemmatizer.py b/spacy/tests/pipeline/test_edit_tree_lemmatizer.py index 9c49e6bcf..d00f9e622 100644 --- a/spacy/tests/pipeline/test_edit_tree_lemmatizer.py +++ b/spacy/tests/pipeline/test_edit_tree_lemmatizer.py @@ -282,7 +282,7 @@ def test_empty_strings(): assert no_change == empty -def test_store_activations(): +def test_save_activations(): nlp = English() lemmatizer = cast(TrainablePipe, nlp.add_pipe("trainable_lemmatizer")) lemmatizer.min_tree_freq = 1 @@ -295,7 +295,7 @@ def test_store_activations(): doc = nlp("This is a test.") assert "trainable_lemmatizer" not in doc.activations - lemmatizer.store_activations = True + lemmatizer.save_activations = True doc = nlp("This is a test.") assert list(doc.activations["trainable_lemmatizer"].keys()) == ["probs", "guesses"] assert doc.activations["trainable_lemmatizer"]["probs"].shape == (5, nO) diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py index d3901d7d9..50e187f61 100644 --- a/spacy/tests/pipeline/test_entity_linker.py +++ b/spacy/tests/pipeline/test_entity_linker.py @@ -1179,7 +1179,7 @@ def test_threshold(meet_threshold: bool, config: Dict[str, Any]): assert doc.ents[0].kb_id_ == entity_id if meet_threshold else EntityLinker.NIL -def test_store_activations(): +def test_save_activations(): nlp = English() vector_length = 3 assert "Q2146908" not in nlp.vocab.strings @@ -1231,7 +1231,7 @@ def test_store_activations(): doc = nlp("Russ Cochran was a publisher") assert "entity_linker" not in doc.activations - entity_linker.store_activations = True + entity_linker.save_activations = True doc = nlp("Russ Cochran was a publisher") assert set(doc.activations["entity_linker"].keys()) == {"ents", "scores"} ents = doc.activations["entity_linker"]["ents"] diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py index a92db4fdf..17aeeaadf 100644 --- a/spacy/tests/pipeline/test_morphologizer.py +++ b/spacy/tests/pipeline/test_morphologizer.py @@ -201,7 +201,7 @@ def test_overfitting_IO(): assert [t.pos_ for t in doc] == gold_pos_tags -def test_store_activations(): +def test_save_activations(): # Simple test to try and quickly overfit the morphologizer - ensuring the ML models work correctly nlp = English() morphologizer = cast(TrainablePipe, nlp.add_pipe("morphologizer")) @@ -213,7 +213,7 @@ def test_store_activations(): doc = nlp("This is a test.") assert "morphologizer" not in doc.activations - morphologizer.store_activations = True + morphologizer.save_activations = True doc = nlp("This is a test.") assert "morphologizer" in doc.activations assert set(doc.activations["morphologizer"].keys()) == {"guesses", "probs"} diff --git a/spacy/tests/pipeline/test_senter.py b/spacy/tests/pipeline/test_senter.py index 34b4e60f9..fca4ce821 100644 --- a/spacy/tests/pipeline/test_senter.py +++ b/spacy/tests/pipeline/test_senter.py @@ -105,7 +105,7 @@ def test_overfitting_IO(): assert "senter" not in nlp.pipe_labels -def test_store_activations(): +def test_save_activations(): # Simple test to try and quickly overfit the senter - ensuring the ML models work correctly nlp = English() senter = cast(TrainablePipe, nlp.add_pipe("senter")) @@ -120,7 +120,7 @@ def test_store_activations(): doc = nlp("This is a test.") assert "senter" not in doc.activations - senter.store_activations = True + senter.save_activations = True doc = nlp("This is a test.") assert "senter" in doc.activations assert set(doc.activations["senter"].keys()) == {"guesses", "probs"} diff --git a/spacy/tests/pipeline/test_spancat.py b/spacy/tests/pipeline/test_spancat.py index 0fab5a9c4..f6bc29ab8 100644 --- a/spacy/tests/pipeline/test_spancat.py +++ b/spacy/tests/pipeline/test_spancat.py @@ -421,7 +421,7 @@ def test_set_candidates(): assert docs[0].spans["candidates"][4].text == "Just a" -def test_store_activations(): +def test_save_activations(): # Simple test to try and quickly overfit the spancat component - ensuring the ML models work correctly nlp = English() spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY}) @@ -434,7 +434,7 @@ def test_store_activations(): doc = nlp("This is a test.") assert "spancat" not in doc.activations - spancat.store_activations = True + spancat.save_activations = True doc = nlp("This is a test.") assert set(doc.activations["spancat"].keys()) == {"indices", "scores"} assert doc.activations["spancat"]["indices"].shape == (12, 2) diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py index fa698eac6..360985def 100644 --- a/spacy/tests/pipeline/test_tagger.py +++ b/spacy/tests/pipeline/test_tagger.py @@ -213,7 +213,7 @@ def test_overfitting_IO(): assert doc3[0].tag_ != "N" -def test_store_activations(): +def test_save_activations(): # Simple test to try and quickly overfit the tagger - ensuring the ML models work correctly nlp = English() tagger = cast(TrainablePipe, nlp.add_pipe("tagger")) @@ -225,7 +225,7 @@ def test_store_activations(): doc = nlp("This is a test.") assert "tagger" not in doc.activations - tagger.store_activations = True + tagger.save_activations = True doc = nlp("This is a test.") assert "tagger" in doc.activations assert set(doc.activations["tagger"].keys()) == {"guesses", "probs"} diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py index 97edd7a6c..2e427868b 100644 --- a/spacy/tests/pipeline/test_textcat.py +++ b/spacy/tests/pipeline/test_textcat.py @@ -874,7 +874,7 @@ def test_textcat_multi_threshold(): assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0 -def test_store_activations(): +def test_save_activations(): fix_random_seed(0) nlp = English() textcat = cast(TrainablePipe, nlp.add_pipe("textcat")) @@ -888,13 +888,13 @@ def test_store_activations(): doc = nlp("This is a test.") assert "textcat" not in doc.activations - textcat.store_activations = True + textcat.save_activations = True doc = nlp("This is a test.") assert list(doc.activations["textcat"].keys()) == ["probs"] assert doc.activations["textcat"]["probs"].shape == (nO,) -def test_store_activations_multi(): +def test_save_activations_multi(): fix_random_seed(0) nlp = English() textcat = cast(TrainablePipe, nlp.add_pipe("textcat_multilabel")) @@ -908,7 +908,7 @@ def test_store_activations_multi(): doc = nlp("This is a test.") assert "textcat_multilabel" not in doc.activations - textcat.store_activations = True + textcat.save_activations = True doc = nlp("This is a test.") assert list(doc.activations["textcat_multilabel"].keys()) == ["probs"] assert doc.activations["textcat_multilabel"]["probs"].shape == (nO,) diff --git a/website/docs/api/doc.md b/website/docs/api/doc.md index 4e29140aa..e98fe19ed 100644 --- a/website/docs/api/doc.md +++ b/website/docs/api/doc.md @@ -751,23 +751,23 @@ The L2 norm of the document's vector representation. ## Attributes {#attributes} -| Name | Description | -| ------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------- | -| `text` | A string representation of the document text. ~~str~~ | -| `text_with_ws` | An alias of `Doc.text`, provided for duck-type compatibility with `Span` and `Token`. ~~str~~ | -| `mem` | The document's local memory heap, for all C data it owns. ~~cymem.Pool~~ | -| `vocab` | The store of lexical types. ~~Vocab~~ | -| `tensor` 2 | Container for dense vector representations. ~~numpy.ndarray~~ | -| `user_data` | A generic storage area, for user custom data. ~~Dict[str, Any]~~ | -| `lang` 2.1 | Language of the document's vocabulary. ~~int~~ | -| `lang_` 2.1 | Language of the document's vocabulary. ~~str~~ | -| `sentiment` | The document's positivity/negativity score, if available. ~~float~~ | -| `user_hooks` | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~ | -| `user_token_hooks` | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~ | -| `user_span_hooks` | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~ | -| `has_unknown_spaces` | Whether the document was constructed without known spacing between tokens (typically when created from gold tokenization). ~~bool~~ | -| `_` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ | -| `activations` | A dictionary of activations per trainable pipe (available when the `store_activations` option of a pipe is enabled). ~~Dict[str, Option[Any]]~~ | +| Name | Description | +| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| `text` | A string representation of the document text. ~~str~~ | +| `text_with_ws` | An alias of `Doc.text`, provided for duck-type compatibility with `Span` and `Token`. ~~str~~ | +| `mem` | The document's local memory heap, for all C data it owns. ~~cymem.Pool~~ | +| `vocab` | The store of lexical types. ~~Vocab~~ | +| `tensor` 2 | Container for dense vector representations. ~~numpy.ndarray~~ | +| `user_data` | A generic storage area, for user custom data. ~~Dict[str, Any]~~ | +| `lang` 2.1 | Language of the document's vocabulary. ~~int~~ | +| `lang_` 2.1 | Language of the document's vocabulary. ~~str~~ | +| `sentiment` | The document's positivity/negativity score, if available. ~~float~~ | +| `user_hooks` | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~ | +| `user_token_hooks` | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~ | +| `user_span_hooks` | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~ | +| `has_unknown_spaces` | Whether the document was constructed without known spacing between tokens (typically when created from gold tokenization). ~~bool~~ | +| `_` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ | +| `activations` | A dictionary of activations per trainable pipe (available when the `save_activations` option of a pipe is enabled). ~~Dict[str, Option[Any]]~~ | ## Serialization fields {#serialization-fields} diff --git a/website/docs/api/edittreelemmatizer.md b/website/docs/api/edittreelemmatizer.md index 969a7b3ac..582a83fd9 100644 --- a/website/docs/api/edittreelemmatizer.md +++ b/website/docs/api/edittreelemmatizer.md @@ -44,15 +44,15 @@ architectures and their arguments and hyperparameters. > nlp.add_pipe("trainable_lemmatizer", config=config, name="lemmatizer") > ``` -| Setting | Description | -| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `model` | A model instance that predicts the edit tree probabilities. The output vectors should match the number of edit trees in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ | -| `backoff` | ~~Token~~ attribute to use when no applicable edit tree is found. Defaults to `orth`. ~~str~~ | -| `min_tree_freq` | Minimum frequency of an edit tree in the training set to be used. Defaults to `3`. ~~int~~ | -| `overwrite` | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | -| `top_k` | The number of most probable edit trees to try before resorting to `backoff`. Defaults to `1`. ~~int~~ | -| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~ | -| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | +| Setting | Description | +| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `model` | A model instance that predicts the edit tree probabilities. The output vectors should match the number of edit trees in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ | +| `backoff` | ~~Token~~ attribute to use when no applicable edit tree is found. Defaults to `orth`. ~~str~~ | +| `min_tree_freq` | Minimum frequency of an edit tree in the training set to be used. Defaults to `3`. ~~int~~ | +| `overwrite` | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | +| `top_k` | The number of most probable edit trees to try before resorting to `backoff`. Defaults to `1`. ~~int~~ | +| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~ | +| `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | ```python %%GITHUB_SPACY/spacy/pipeline/edit_tree_lemmatizer.py diff --git a/website/docs/api/entitylinker.md b/website/docs/api/entitylinker.md index 2cdbab396..cea26a1a1 100644 --- a/website/docs/api/entitylinker.md +++ b/website/docs/api/entitylinker.md @@ -64,7 +64,7 @@ architectures and their arguments and hyperparameters. | `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ | | `overwrite` 3.2 | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ | | `scorer` 3.2 | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ | -| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"ents"` and `"scores"`. ~~Union[bool, list[str]]~~ | +| `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"ents"` and `"scores"`. ~~Union[bool, list[str]]~~ | | `threshold` 3.4 | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ | ```python diff --git a/website/docs/api/morphologizer.md b/website/docs/api/morphologizer.md index 2bc39e2fe..ebd358bad 100644 --- a/website/docs/api/morphologizer.md +++ b/website/docs/api/morphologizer.md @@ -48,7 +48,7 @@ architectures and their arguments and hyperparameters. | `overwrite` 3.2 | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~ | | `extend` 3.2 | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~ | | `scorer` 3.2 | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ | -| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | +| `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | ```python %%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx @@ -400,8 +400,8 @@ coarse-grained POS as the feature `POS`. > assert "Mood=Ind|POS=VERB|Tense=Past|VerbForm=Fin" in morphologizer.labels > ``` -| Name | Description | -| ----------- | ------------------------------------------------------ | +| Name | Description | +| ----------- | --------------------------------------------------------- | | **RETURNS** | The labels added to the component. ~~Iterable[str, ...]~~ | ## Morphologizer.label_data {#label_data tag="property" new="3"} diff --git a/website/docs/api/sentencerecognizer.md b/website/docs/api/sentencerecognizer.md index bc71cf6fd..c53f31e0c 100644 --- a/website/docs/api/sentencerecognizer.md +++ b/website/docs/api/sentencerecognizer.md @@ -44,7 +44,7 @@ architectures and their arguments and hyperparameters. | `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ | | `overwrite` 3.2 | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | | `scorer` 3.2 | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ | -| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | +| `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | ```python %%GITHUB_SPACY/spacy/pipeline/senter.pyx diff --git a/website/docs/api/spancategorizer.md b/website/docs/api/spancategorizer.md index 11c2b7f68..e76f1f589 100644 --- a/website/docs/api/spancategorizer.md +++ b/website/docs/api/spancategorizer.md @@ -52,15 +52,15 @@ architectures and their arguments and hyperparameters. > nlp.add_pipe("spancat", config=config) > ``` -| Setting | Description | -| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `suggester` | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. Defaults to [`ngram_suggester`](#ngram_suggester). ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~ | -| `model` | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. Defaults to [SpanCategorizer](/api/architectures#SpanCategorizer). ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ | -| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ | -| `threshold` | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~ | -| `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~ | -| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ | -| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"indices"` and `"scores"`. ~~Union[bool, list[str]]~~ | +| Setting | Description | +| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `suggester` | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. Defaults to [`ngram_suggester`](#ngram_suggester). ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~ | +| `model` | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. Defaults to [SpanCategorizer](/api/architectures#SpanCategorizer). ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ | +| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ | +| `threshold` | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~ | +| `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~ | +| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ | +| `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"indices"` and `"scores"`. ~~Union[bool, list[str]]~~ | ```python %%GITHUB_SPACY/spacy/pipeline/spancat.py diff --git a/website/docs/api/tagger.md b/website/docs/api/tagger.md index 20b3ca0bd..6c54d30dd 100644 --- a/website/docs/api/tagger.md +++ b/website/docs/api/tagger.md @@ -46,7 +46,7 @@ architectures and their arguments and hyperparameters. | `overwrite` 3.2 | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | | `scorer` 3.2 | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~ | | `neg_prefix` 3.2.1 | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~ | -| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | +| `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | ```python %%GITHUB_SPACY/spacy/pipeline/tagger.pyx diff --git a/website/docs/api/textcategorizer.md b/website/docs/api/textcategorizer.md index 84ca3575c..684bed127 100644 --- a/website/docs/api/textcategorizer.md +++ b/website/docs/api/textcategorizer.md @@ -117,15 +117,15 @@ Create a new pipeline instance. In your application, you would normally use a shortcut for this and instantiate the component using its string name and [`nlp.add_pipe`](/api/language#create_pipe). -| Name | Description | -| ------------------- | -------------------------------------------------------------------------------------------------------------------------------- | -| `vocab` | The shared vocabulary. ~~Vocab~~ | -| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ | -| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ | -| _keyword-only_ | | -| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ | -| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ | -| `store_activations` | Store activations in `Doc` when annotating. The supported activations is `"probs"`. ~~Union[bool, list[str]]~~ | +| Name | Description | +| ------------------ | -------------------------------------------------------------------------------------------------------------------------------- | +| `vocab` | The shared vocabulary. ~~Vocab~~ | +| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ | +| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ | +| _keyword-only_ | | +| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ | +| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ | +| `save_activations` | Save activations in `Doc` when annotating. The supported activations is `"probs"`. ~~Union[bool, list[str]]~~ | ## TextCategorizer.\_\_call\_\_ {#call tag="method"}