Rename TrainablePipe.store_activations to save_activations

This commit is contained in:
Daniël de Kok 2022-08-30 10:20:59 +02:00
parent 3937abd2e7
commit 2290a04d55
26 changed files with 130 additions and 130 deletions

View File

@ -941,7 +941,7 @@ class Errors(metaclass=ErrorsWithCodes):
"`{arg2}`={arg2_values} but these arguments are conflicting.") "`{arg2}`={arg2_values} but these arguments are conflicting.")
E1043 = ("Expected None or a value in range [{range_start}, {range_end}] for entity linker threshold, but got " E1043 = ("Expected None or a value in range [{range_start}, {range_end}] for entity linker threshold, but got "
"{value}.") "{value}.")
E1400 = ("store_activations attribute must be set to List[str] or bool") E1400 = ("save_activations attribute must be set to List[str] or bool")
# Deprecated model shortcuts, only used in errors and warnings # Deprecated model shortcuts, only used in errors and warnings

View File

@ -52,7 +52,7 @@ DEFAULT_EDIT_TREE_LEMMATIZER_MODEL = Config().from_str(default_model_config)["mo
"overwrite": False, "overwrite": False,
"top_k": 1, "top_k": 1,
"scorer": {"@scorers": "spacy.lemmatizer_scorer.v1"}, "scorer": {"@scorers": "spacy.lemmatizer_scorer.v1"},
"store_activations": False, "save_activations": False,
}, },
default_score_weights={"lemma_acc": 1.0}, default_score_weights={"lemma_acc": 1.0},
) )
@ -65,7 +65,7 @@ def make_edit_tree_lemmatizer(
overwrite: bool, overwrite: bool,
top_k: int, top_k: int,
scorer: Optional[Callable], scorer: Optional[Callable],
store_activations: bool, save_activations: bool,
): ):
"""Construct an EditTreeLemmatizer component.""" """Construct an EditTreeLemmatizer component."""
return EditTreeLemmatizer( return EditTreeLemmatizer(
@ -77,7 +77,7 @@ def make_edit_tree_lemmatizer(
overwrite=overwrite, overwrite=overwrite,
top_k=top_k, top_k=top_k,
scorer=scorer, scorer=scorer,
store_activations=store_activations, save_activations=save_activations,
) )
@ -97,7 +97,7 @@ class EditTreeLemmatizer(TrainablePipe):
overwrite: bool = False, overwrite: bool = False,
top_k: int = 1, top_k: int = 1,
scorer: Optional[Callable] = lemmatizer_score, scorer: Optional[Callable] = lemmatizer_score,
store_activations: bool = False, save_activations: bool = False,
): ):
""" """
Construct an edit tree lemmatizer. Construct an edit tree lemmatizer.
@ -109,7 +109,7 @@ class EditTreeLemmatizer(TrainablePipe):
frequency in the training data. frequency in the training data.
overwrite (bool): overwrite existing lemma annotations. overwrite (bool): overwrite existing lemma annotations.
top_k (int): try to apply at most the k most probable edit trees. top_k (int): try to apply at most the k most probable edit trees.
store_activations (bool): store model activations in Doc when annotating. save_activations (bool): save model activations in Doc when annotating.
""" """
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
@ -124,7 +124,7 @@ class EditTreeLemmatizer(TrainablePipe):
self.cfg: Dict[str, Any] = {"labels": []} self.cfg: Dict[str, Any] = {"labels": []}
self.scorer = scorer self.scorer = scorer
self.store_activations = store_activations self.save_activations = save_activations
def get_loss( def get_loss(
self, examples: Iterable[Example], scores: List[Floats2d] self, examples: Iterable[Example], scores: List[Floats2d]
@ -201,7 +201,7 @@ class EditTreeLemmatizer(TrainablePipe):
def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT): def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT):
batch_tree_ids = activations["guesses"] batch_tree_ids = activations["guesses"]
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
if self.store_activations: if self.save_activations:
doc.activations[self.name] = {} doc.activations[self.name] = {}
for act_name, acts in activations.items(): for act_name, acts in activations.items():
doc.activations[self.name][act_name] = acts[i] doc.activations[self.name][act_name] = acts[i]

View File

@ -62,7 +62,7 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
"scorer": {"@scorers": "spacy.entity_linker_scorer.v1"}, "scorer": {"@scorers": "spacy.entity_linker_scorer.v1"},
"use_gold_ents": True, "use_gold_ents": True,
"threshold": None, "threshold": None,
"store_activations": False, "save_activations": False,
}, },
default_score_weights={ default_score_weights={
"nel_micro_f": 1.0, "nel_micro_f": 1.0,
@ -85,7 +85,7 @@ def make_entity_linker(
scorer: Optional[Callable], scorer: Optional[Callable],
use_gold_ents: bool, use_gold_ents: bool,
threshold: Optional[float] = None, threshold: Optional[float] = None,
store_activations: bool, save_activations: bool,
): ):
"""Construct an EntityLinker component. """Construct an EntityLinker component.
@ -104,7 +104,7 @@ def make_entity_linker(
component must provide entity annotations. component must provide entity annotations.
threshold (Optional[float]): Confidence threshold for entity predictions. If confidence is below the threshold, threshold (Optional[float]): Confidence threshold for entity predictions. If confidence is below the threshold,
prediction is discarded. If None, predictions are not filtered by any threshold. prediction is discarded. If None, predictions are not filtered by any threshold.
store_activations (bool): store model activations in Doc when annotating. save_activations (bool): save model activations in Doc when annotating.
""" """
if not model.attrs.get("include_span_maker", False): if not model.attrs.get("include_span_maker", False):
@ -136,7 +136,7 @@ def make_entity_linker(
scorer=scorer, scorer=scorer,
use_gold_ents=use_gold_ents, use_gold_ents=use_gold_ents,
threshold=threshold, threshold=threshold,
store_activations=store_activations, save_activations=save_activations,
) )
@ -173,7 +173,7 @@ class EntityLinker(TrainablePipe):
scorer: Optional[Callable] = entity_linker_score, scorer: Optional[Callable] = entity_linker_score,
use_gold_ents: bool, use_gold_ents: bool,
threshold: Optional[float] = None, threshold: Optional[float] = None,
store_activations: bool = False, save_activations: bool = False,
) -> None: ) -> None:
"""Initialize an entity linker. """Initialize an entity linker.
@ -222,7 +222,7 @@ class EntityLinker(TrainablePipe):
self.scorer = scorer self.scorer = scorer
self.use_gold_ents = use_gold_ents self.use_gold_ents = use_gold_ents
self.threshold = threshold self.threshold = threshold
self.store_activations = store_activations self.save_activations = save_activations
def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]): def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]):
"""Define the KB of this pipe by providing a function that will """Define the KB of this pipe by providing a function that will
@ -550,7 +550,7 @@ class EntityLinker(TrainablePipe):
i = 0 i = 0
overwrite = self.cfg["overwrite"] overwrite = self.cfg["overwrite"]
for j, doc in enumerate(docs): for j, doc in enumerate(docs):
if self.store_activations: if self.save_activations:
doc.activations[self.name] = {} doc.activations[self.name] = {}
for act_name, acts in activations.items(): for act_name, acts in activations.items():
if act_name != "kb_ids": if act_name != "kb_ids":
@ -664,7 +664,7 @@ class EntityLinker(TrainablePipe):
doc_scores: List[Floats1d], doc_scores: List[Floats1d],
doc_ents: List[Ints1d], doc_ents: List[Ints1d],
): ):
if not self.store_activations: if not self.save_activations:
return return
ops = self.model.ops ops = self.model.ops
lengths = ops.asarray1i([s.shape[0] for s in doc_scores]) lengths = ops.asarray1i([s.shape[0] for s in doc_scores])
@ -679,7 +679,7 @@ class EntityLinker(TrainablePipe):
scores: Sequence[float], scores: Sequence[float],
ents: Sequence[int], ents: Sequence[int],
): ):
if not self.store_activations: if not self.save_activations:
return return
ops = self.model.ops ops = self.model.ops
doc_scores.append(ops.asarray1f(scores)) doc_scores.append(ops.asarray1f(scores))

View File

@ -58,7 +58,7 @@ DEFAULT_MORPH_MODEL = Config().from_str(default_model_config)["model"]
"overwrite": True, "overwrite": True,
"extend": False, "extend": False,
"scorer": {"@scorers": "spacy.morphologizer_scorer.v1"}, "scorer": {"@scorers": "spacy.morphologizer_scorer.v1"},
"store_activations": False "save_activations": False
}, },
default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5, "morph_per_feat": None}, default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5, "morph_per_feat": None},
) )
@ -69,10 +69,10 @@ def make_morphologizer(
overwrite: bool, overwrite: bool,
extend: bool, extend: bool,
scorer: Optional[Callable], scorer: Optional[Callable],
store_activations: bool, save_activations: bool,
): ):
return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, scorer=scorer, return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, scorer=scorer,
store_activations=store_activations) save_activations=save_activations)
def morphologizer_score(examples, **kwargs): def morphologizer_score(examples, **kwargs):
@ -104,7 +104,7 @@ class Morphologizer(Tagger):
overwrite: bool = BACKWARD_OVERWRITE, overwrite: bool = BACKWARD_OVERWRITE,
extend: bool = BACKWARD_EXTEND, extend: bool = BACKWARD_EXTEND,
scorer: Optional[Callable] = morphologizer_score, scorer: Optional[Callable] = morphologizer_score,
store_activations: bool = False, save_activations: bool = False,
): ):
"""Initialize a morphologizer. """Initialize a morphologizer.
@ -115,7 +115,7 @@ class Morphologizer(Tagger):
scorer (Optional[Callable]): The scoring method. Defaults to scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_token_attr for the attributes "pos" and "morph" and Scorer.score_token_attr for the attributes "pos" and "morph" and
Scorer.score_token_attr_per_feat for the attribute "morph". Scorer.score_token_attr_per_feat for the attribute "morph".
store_activations (bool): store model activations in Doc when annotating. save_activations (bool): save model activations in Doc when annotating.
DOCS: https://spacy.io/api/morphologizer#init DOCS: https://spacy.io/api/morphologizer#init
""" """
@ -135,7 +135,7 @@ class Morphologizer(Tagger):
} }
self.cfg = dict(sorted(cfg.items())) self.cfg = dict(sorted(cfg.items()))
self.scorer = scorer self.scorer = scorer
self.store_activations = store_activations self.save_activations = save_activations
@property @property
def labels(self): def labels(self):
@ -249,7 +249,7 @@ class Morphologizer(Tagger):
# to allocate a compatible container out of the iterable. # to allocate a compatible container out of the iterable.
labels = tuple(self.labels) labels = tuple(self.labels)
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
if self.store_activations: if self.save_activations:
doc.activations[self.name] = {} doc.activations[self.name] = {}
for act_name, acts in activations.items(): for act_name, acts in activations.items():
doc.activations[self.name][act_name] = acts[i] doc.activations[self.name][act_name] = acts[i]

View File

@ -43,7 +43,7 @@ DEFAULT_SENTER_MODEL = Config().from_str(default_model_config)["model"]
"model": DEFAULT_SENTER_MODEL, "model": DEFAULT_SENTER_MODEL,
"overwrite": False, "overwrite": False,
"scorer": {"@scorers": "spacy.senter_scorer.v1"}, "scorer": {"@scorers": "spacy.senter_scorer.v1"},
"store_activations": False "save_activations": False
}, },
default_score_weights={"sents_f": 1.0, "sents_p": 0.0, "sents_r": 0.0}, default_score_weights={"sents_f": 1.0, "sents_p": 0.0, "sents_r": 0.0},
) )
@ -52,8 +52,8 @@ def make_senter(nlp: Language,
model: Model, model: Model,
overwrite: bool, overwrite: bool,
scorer: Optional[Callable], scorer: Optional[Callable],
store_activations: bool): save_activations: bool):
return SentenceRecognizer(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, store_activations=store_activations) return SentenceRecognizer(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, save_activations=save_activations)
def senter_score(examples, **kwargs): def senter_score(examples, **kwargs):
@ -83,7 +83,7 @@ class SentenceRecognizer(Tagger):
*, *,
overwrite=BACKWARD_OVERWRITE, overwrite=BACKWARD_OVERWRITE,
scorer=senter_score, scorer=senter_score,
store_activations: bool = False, save_activations: bool = False,
): ):
"""Initialize a sentence recognizer. """Initialize a sentence recognizer.
@ -93,7 +93,7 @@ class SentenceRecognizer(Tagger):
losses during training. losses during training.
scorer (Optional[Callable]): The scoring method. Defaults to scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_spans for the attribute "sents". Scorer.score_spans for the attribute "sents".
store_activations (bool): store model activations in Doc when annotating. save_activations (bool): save model activations in Doc when annotating.
DOCS: https://spacy.io/api/sentencerecognizer#init DOCS: https://spacy.io/api/sentencerecognizer#init
""" """
@ -103,7 +103,7 @@ class SentenceRecognizer(Tagger):
self._rehearsal_model = None self._rehearsal_model = None
self.cfg = {"overwrite": overwrite} self.cfg = {"overwrite": overwrite}
self.scorer = scorer self.scorer = scorer
self.store_activations = store_activations self.save_activations = save_activations
@property @property
def labels(self): def labels(self):
@ -135,7 +135,7 @@ class SentenceRecognizer(Tagger):
cdef Doc doc cdef Doc doc
cdef bint overwrite = self.cfg["overwrite"] cdef bint overwrite = self.cfg["overwrite"]
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
if self.store_activations: if self.save_activations:
doc.activations[self.name] = {} doc.activations[self.name] = {}
for act_name, acts in activations.items(): for act_name, acts in activations.items():
doc.activations[self.name][act_name] = acts[i] doc.activations[self.name][act_name] = acts[i]

View File

@ -110,7 +110,7 @@ def build_ngram_range_suggester(min_size: int, max_size: int) -> Suggester:
"model": DEFAULT_SPANCAT_MODEL, "model": DEFAULT_SPANCAT_MODEL,
"suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]}, "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
"scorer": {"@scorers": "spacy.spancat_scorer.v1"}, "scorer": {"@scorers": "spacy.spancat_scorer.v1"},
"store_activations": False, "save_activations": False,
}, },
default_score_weights={"spans_sc_f": 1.0, "spans_sc_p": 0.0, "spans_sc_r": 0.0}, default_score_weights={"spans_sc_f": 1.0, "spans_sc_p": 0.0, "spans_sc_r": 0.0},
) )
@ -123,7 +123,7 @@ def make_spancat(
scorer: Optional[Callable], scorer: Optional[Callable],
threshold: float, threshold: float,
max_positive: Optional[int], max_positive: Optional[int],
store_activations: bool, save_activations: bool,
) -> "SpanCategorizer": ) -> "SpanCategorizer":
"""Create a SpanCategorizer component. The span categorizer consists of two """Create a SpanCategorizer component. The span categorizer consists of two
parts: a suggester function that proposes candidate spans, and a labeller parts: a suggester function that proposes candidate spans, and a labeller
@ -144,7 +144,7 @@ def make_spancat(
0.5. 0.5.
max_positive (Optional[int]): Maximum number of labels to consider positive max_positive (Optional[int]): Maximum number of labels to consider positive
per span. Defaults to None, indicating no limit. per span. Defaults to None, indicating no limit.
store_activations (bool): store model activations in Doc when annotating. save_activations (bool): save model activations in Doc when annotating.
""" """
return SpanCategorizer( return SpanCategorizer(
nlp.vocab, nlp.vocab,
@ -155,7 +155,7 @@ def make_spancat(
max_positive=max_positive, max_positive=max_positive,
name=name, name=name,
scorer=scorer, scorer=scorer,
store_activations=store_activations, save_activations=save_activations,
) )
@ -194,7 +194,7 @@ class SpanCategorizer(TrainablePipe):
threshold: float = 0.5, threshold: float = 0.5,
max_positive: Optional[int] = None, max_positive: Optional[int] = None,
scorer: Optional[Callable] = spancat_score, scorer: Optional[Callable] = spancat_score,
store_activations: bool = False, save_activations: bool = False,
) -> None: ) -> None:
"""Initialize the span categorizer. """Initialize the span categorizer.
vocab (Vocab): The shared vocabulary. vocab (Vocab): The shared vocabulary.
@ -227,7 +227,7 @@ class SpanCategorizer(TrainablePipe):
self.model = model self.model = model
self.name = name self.name = name
self.scorer = scorer self.scorer = scorer
self.store_activations = store_activations self.save_activations = save_activations
@property @property
def key(self) -> str: def key(self) -> str:
@ -317,7 +317,7 @@ class SpanCategorizer(TrainablePipe):
offset = 0 offset = 0
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
indices_i = indices[i].dataXd indices_i = indices[i].dataXd
if self.store_activations: if self.save_activations:
doc.activations[self.name] = {} doc.activations[self.name] = {}
doc.activations[self.name]["indices"] = indices_i doc.activations[self.name]["indices"] = indices_i
doc.activations[self.name]["scores"] = scores[ doc.activations[self.name]["scores"] = scores[

View File

@ -53,7 +53,7 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"]
"overwrite": False, "overwrite": False,
"scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "scorer": {"@scorers": "spacy.tagger_scorer.v1"},
"neg_prefix": "!", "neg_prefix": "!",
"store_activations": False, "save_activations": False,
}, },
default_score_weights={"tag_acc": 1.0}, default_score_weights={"tag_acc": 1.0},
) )
@ -64,7 +64,7 @@ def make_tagger(
overwrite: bool, overwrite: bool,
scorer: Optional[Callable], scorer: Optional[Callable],
neg_prefix: str, neg_prefix: str,
store_activations: bool, save_activations: bool,
): ):
"""Construct a part-of-speech tagger component. """Construct a part-of-speech tagger component.
@ -74,7 +74,7 @@ def make_tagger(
with the rows summing to 1). with the rows summing to 1).
""" """
return Tagger(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, neg_prefix=neg_prefix, return Tagger(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, neg_prefix=neg_prefix,
store_activations=store_activations) save_activations=save_activations)
def tagger_score(examples, **kwargs): def tagger_score(examples, **kwargs):
@ -100,7 +100,7 @@ class Tagger(TrainablePipe):
overwrite=BACKWARD_OVERWRITE, overwrite=BACKWARD_OVERWRITE,
scorer=tagger_score, scorer=tagger_score,
neg_prefix="!", neg_prefix="!",
store_activations: bool = False, save_activations: bool = False,
): ):
"""Initialize a part-of-speech tagger. """Initialize a part-of-speech tagger.
@ -110,7 +110,7 @@ class Tagger(TrainablePipe):
losses during training. losses during training.
scorer (Optional[Callable]): The scoring method. Defaults to scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_token_attr for the attribute "tag". Scorer.score_token_attr for the attribute "tag".
store_activations (bool): store model activations in Doc when annotating. save_activations (bool): save model activations in Doc when annotating.
DOCS: https://spacy.io/api/tagger#init DOCS: https://spacy.io/api/tagger#init
""" """
@ -121,7 +121,7 @@ class Tagger(TrainablePipe):
cfg = {"labels": [], "overwrite": overwrite, "neg_prefix": neg_prefix} cfg = {"labels": [], "overwrite": overwrite, "neg_prefix": neg_prefix}
self.cfg = dict(sorted(cfg.items())) self.cfg = dict(sorted(cfg.items()))
self.scorer = scorer self.scorer = scorer
self.store_activations = store_activations self.save_activations = save_activations
@property @property
def labels(self): def labels(self):
@ -185,7 +185,7 @@ class Tagger(TrainablePipe):
cdef bint overwrite = self.cfg["overwrite"] cdef bint overwrite = self.cfg["overwrite"]
labels = self.labels labels = self.labels
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
if self.store_activations: if self.save_activations:
doc.activations[self.name] = {} doc.activations[self.name] = {}
for act_name, acts in activations.items(): for act_name, acts in activations.items():
doc.activations[self.name][act_name] = acts[i] doc.activations[self.name][act_name] = acts[i]

View File

@ -78,7 +78,7 @@ subword_features = true
"threshold": 0.5, "threshold": 0.5,
"model": DEFAULT_SINGLE_TEXTCAT_MODEL, "model": DEFAULT_SINGLE_TEXTCAT_MODEL,
"scorer": {"@scorers": "spacy.textcat_scorer.v1"}, "scorer": {"@scorers": "spacy.textcat_scorer.v1"},
"store_activations": False, "save_activations": False,
}, },
default_score_weights={ default_score_weights={
"cats_score": 1.0, "cats_score": 1.0,
@ -100,7 +100,7 @@ def make_textcat(
model: Model[List[Doc], List[Floats2d]], model: Model[List[Doc], List[Floats2d]],
threshold: float, threshold: float,
scorer: Optional[Callable], scorer: Optional[Callable],
store_activations: bool, save_activations: bool,
) -> "TextCategorizer": ) -> "TextCategorizer":
"""Create a TextCategorizer component. The text categorizer predicts categories """Create a TextCategorizer component. The text categorizer predicts categories
over a whole document. It can learn one or more labels, and the labels are considered over a whole document. It can learn one or more labels, and the labels are considered
@ -110,7 +110,7 @@ def make_textcat(
scores for each category. scores for each category.
threshold (float): Cutoff to consider a prediction "positive". threshold (float): Cutoff to consider a prediction "positive".
scorer (Optional[Callable]): The scoring method. scorer (Optional[Callable]): The scoring method.
store_activations (bool): store model activations in Doc when annotating. save_activations (bool): save model activations in Doc when annotating.
""" """
return TextCategorizer( return TextCategorizer(
nlp.vocab, nlp.vocab,
@ -118,7 +118,7 @@ def make_textcat(
name, name,
threshold=threshold, threshold=threshold,
scorer=scorer, scorer=scorer,
store_activations=store_activations, save_activations=save_activations,
) )
@ -150,7 +150,7 @@ class TextCategorizer(TrainablePipe):
*, *,
threshold: float, threshold: float,
scorer: Optional[Callable] = textcat_score, scorer: Optional[Callable] = textcat_score,
store_activations: bool = False, save_activations: bool = False,
) -> None: ) -> None:
"""Initialize a text categorizer for single-label classification. """Initialize a text categorizer for single-label classification.
@ -171,7 +171,7 @@ class TextCategorizer(TrainablePipe):
cfg = {"labels": [], "threshold": threshold, "positive_label": None} cfg = {"labels": [], "threshold": threshold, "positive_label": None}
self.cfg = dict(cfg) self.cfg = dict(cfg)
self.scorer = scorer self.scorer = scorer
self.store_activations = store_activations self.save_activations = save_activations
@property @property
def support_missing_values(self): def support_missing_values(self):
@ -224,7 +224,7 @@ class TextCategorizer(TrainablePipe):
""" """
probs = activations["probs"] probs = activations["probs"]
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
if self.store_activations: if self.save_activations:
doc.activations[self.name] = {} doc.activations[self.name] = {}
doc.activations[self.name]["probs"] = probs[i] doc.activations[self.name]["probs"] = probs[i]
for j, label in enumerate(self.labels): for j, label in enumerate(self.labels):

View File

@ -75,7 +75,7 @@ subword_features = true
"threshold": 0.5, "threshold": 0.5,
"model": DEFAULT_MULTI_TEXTCAT_MODEL, "model": DEFAULT_MULTI_TEXTCAT_MODEL,
"scorer": {"@scorers": "spacy.textcat_multilabel_scorer.v1"}, "scorer": {"@scorers": "spacy.textcat_multilabel_scorer.v1"},
"store_activations": False, "save_activations": False,
}, },
default_score_weights={ default_score_weights={
"cats_score": 1.0, "cats_score": 1.0,
@ -97,7 +97,7 @@ def make_multilabel_textcat(
model: Model[List[Doc], List[Floats2d]], model: Model[List[Doc], List[Floats2d]],
threshold: float, threshold: float,
scorer: Optional[Callable], scorer: Optional[Callable],
store_activations: bool, save_activations: bool,
) -> "TextCategorizer": ) -> "TextCategorizer":
"""Create a TextCategorizer component. The text categorizer predicts categories """Create a TextCategorizer component. The text categorizer predicts categories
over a whole document. It can learn one or more labels, and the labels are considered over a whole document. It can learn one or more labels, and the labels are considered
@ -114,7 +114,7 @@ def make_multilabel_textcat(
name, name,
threshold=threshold, threshold=threshold,
scorer=scorer, scorer=scorer,
store_activations=store_activations, save_activations=save_activations,
) )
@ -146,7 +146,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
*, *,
threshold: float, threshold: float,
scorer: Optional[Callable] = textcat_multilabel_score, scorer: Optional[Callable] = textcat_multilabel_score,
store_activations: bool = False, save_activations: bool = False,
) -> None: ) -> None:
"""Initialize a text categorizer for multi-label classification. """Initialize a text categorizer for multi-label classification.
@ -155,7 +155,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
name (str): The component instance name, used to add entries to the name (str): The component instance name, used to add entries to the
losses during training. losses during training.
threshold (float): Cutoff to consider a prediction "positive". threshold (float): Cutoff to consider a prediction "positive".
store_activations (bool): store model activations in Doc when annotating. save_activations (bool): save model activations in Doc when annotating.
DOCS: https://spacy.io/api/textcategorizer#init DOCS: https://spacy.io/api/textcategorizer#init
""" """
@ -166,7 +166,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
cfg = {"labels": [], "threshold": threshold} cfg = {"labels": [], "threshold": threshold}
self.cfg = dict(cfg) self.cfg = dict(cfg)
self.scorer = scorer self.scorer = scorer
self.store_activations = store_activations self.save_activations = save_activations
@property @property
def support_missing_values(self): def support_missing_values(self):

View File

@ -6,4 +6,4 @@ cdef class TrainablePipe(Pipe):
cdef public object model cdef public object model
cdef public object cfg cdef public object cfg
cdef public object scorer cdef public object scorer
cdef bint _store_activations cdef bint _save_activations

View File

@ -345,9 +345,9 @@ cdef class TrainablePipe(Pipe):
return self return self
@property @property
def store_activations(self): def save_activations(self):
return self._store_activations return self._save_activations
@store_activations.setter @save_activations.setter
def store_activations(self, store_activations: bool): def save_activations(self, save_activations: bool):
self._store_activations = store_activations self._save_activations = save_activations

View File

@ -282,7 +282,7 @@ def test_empty_strings():
assert no_change == empty assert no_change == empty
def test_store_activations(): def test_save_activations():
nlp = English() nlp = English()
lemmatizer = cast(TrainablePipe, nlp.add_pipe("trainable_lemmatizer")) lemmatizer = cast(TrainablePipe, nlp.add_pipe("trainable_lemmatizer"))
lemmatizer.min_tree_freq = 1 lemmatizer.min_tree_freq = 1
@ -295,7 +295,7 @@ def test_store_activations():
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "trainable_lemmatizer" not in doc.activations assert "trainable_lemmatizer" not in doc.activations
lemmatizer.store_activations = True lemmatizer.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert list(doc.activations["trainable_lemmatizer"].keys()) == ["probs", "guesses"] assert list(doc.activations["trainable_lemmatizer"].keys()) == ["probs", "guesses"]
assert doc.activations["trainable_lemmatizer"]["probs"].shape == (5, nO) assert doc.activations["trainable_lemmatizer"]["probs"].shape == (5, nO)

View File

@ -1179,7 +1179,7 @@ def test_threshold(meet_threshold: bool, config: Dict[str, Any]):
assert doc.ents[0].kb_id_ == entity_id if meet_threshold else EntityLinker.NIL assert doc.ents[0].kb_id_ == entity_id if meet_threshold else EntityLinker.NIL
def test_store_activations(): def test_save_activations():
nlp = English() nlp = English()
vector_length = 3 vector_length = 3
assert "Q2146908" not in nlp.vocab.strings assert "Q2146908" not in nlp.vocab.strings
@ -1231,7 +1231,7 @@ def test_store_activations():
doc = nlp("Russ Cochran was a publisher") doc = nlp("Russ Cochran was a publisher")
assert "entity_linker" not in doc.activations assert "entity_linker" not in doc.activations
entity_linker.store_activations = True entity_linker.save_activations = True
doc = nlp("Russ Cochran was a publisher") doc = nlp("Russ Cochran was a publisher")
assert set(doc.activations["entity_linker"].keys()) == {"ents", "scores"} assert set(doc.activations["entity_linker"].keys()) == {"ents", "scores"}
ents = doc.activations["entity_linker"]["ents"] ents = doc.activations["entity_linker"]["ents"]

View File

@ -201,7 +201,7 @@ def test_overfitting_IO():
assert [t.pos_ for t in doc] == gold_pos_tags assert [t.pos_ for t in doc] == gold_pos_tags
def test_store_activations(): def test_save_activations():
# Simple test to try and quickly overfit the morphologizer - ensuring the ML models work correctly # Simple test to try and quickly overfit the morphologizer - ensuring the ML models work correctly
nlp = English() nlp = English()
morphologizer = cast(TrainablePipe, nlp.add_pipe("morphologizer")) morphologizer = cast(TrainablePipe, nlp.add_pipe("morphologizer"))
@ -213,7 +213,7 @@ def test_store_activations():
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "morphologizer" not in doc.activations assert "morphologizer" not in doc.activations
morphologizer.store_activations = True morphologizer.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "morphologizer" in doc.activations assert "morphologizer" in doc.activations
assert set(doc.activations["morphologizer"].keys()) == {"guesses", "probs"} assert set(doc.activations["morphologizer"].keys()) == {"guesses", "probs"}

View File

@ -105,7 +105,7 @@ def test_overfitting_IO():
assert "senter" not in nlp.pipe_labels assert "senter" not in nlp.pipe_labels
def test_store_activations(): def test_save_activations():
# Simple test to try and quickly overfit the senter - ensuring the ML models work correctly # Simple test to try and quickly overfit the senter - ensuring the ML models work correctly
nlp = English() nlp = English()
senter = cast(TrainablePipe, nlp.add_pipe("senter")) senter = cast(TrainablePipe, nlp.add_pipe("senter"))
@ -120,7 +120,7 @@ def test_store_activations():
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "senter" not in doc.activations assert "senter" not in doc.activations
senter.store_activations = True senter.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "senter" in doc.activations assert "senter" in doc.activations
assert set(doc.activations["senter"].keys()) == {"guesses", "probs"} assert set(doc.activations["senter"].keys()) == {"guesses", "probs"}

View File

@ -421,7 +421,7 @@ def test_set_candidates():
assert docs[0].spans["candidates"][4].text == "Just a" assert docs[0].spans["candidates"][4].text == "Just a"
def test_store_activations(): def test_save_activations():
# Simple test to try and quickly overfit the spancat component - ensuring the ML models work correctly # Simple test to try and quickly overfit the spancat component - ensuring the ML models work correctly
nlp = English() nlp = English()
spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY}) spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
@ -434,7 +434,7 @@ def test_store_activations():
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "spancat" not in doc.activations assert "spancat" not in doc.activations
spancat.store_activations = True spancat.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert set(doc.activations["spancat"].keys()) == {"indices", "scores"} assert set(doc.activations["spancat"].keys()) == {"indices", "scores"}
assert doc.activations["spancat"]["indices"].shape == (12, 2) assert doc.activations["spancat"]["indices"].shape == (12, 2)

View File

@ -213,7 +213,7 @@ def test_overfitting_IO():
assert doc3[0].tag_ != "N" assert doc3[0].tag_ != "N"
def test_store_activations(): def test_save_activations():
# Simple test to try and quickly overfit the tagger - ensuring the ML models work correctly # Simple test to try and quickly overfit the tagger - ensuring the ML models work correctly
nlp = English() nlp = English()
tagger = cast(TrainablePipe, nlp.add_pipe("tagger")) tagger = cast(TrainablePipe, nlp.add_pipe("tagger"))
@ -225,7 +225,7 @@ def test_store_activations():
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "tagger" not in doc.activations assert "tagger" not in doc.activations
tagger.store_activations = True tagger.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "tagger" in doc.activations assert "tagger" in doc.activations
assert set(doc.activations["tagger"].keys()) == {"guesses", "probs"} assert set(doc.activations["tagger"].keys()) == {"guesses", "probs"}

View File

@ -874,7 +874,7 @@ def test_textcat_multi_threshold():
assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0 assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0
def test_store_activations(): def test_save_activations():
fix_random_seed(0) fix_random_seed(0)
nlp = English() nlp = English()
textcat = cast(TrainablePipe, nlp.add_pipe("textcat")) textcat = cast(TrainablePipe, nlp.add_pipe("textcat"))
@ -888,13 +888,13 @@ def test_store_activations():
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "textcat" not in doc.activations assert "textcat" not in doc.activations
textcat.store_activations = True textcat.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert list(doc.activations["textcat"].keys()) == ["probs"] assert list(doc.activations["textcat"].keys()) == ["probs"]
assert doc.activations["textcat"]["probs"].shape == (nO,) assert doc.activations["textcat"]["probs"].shape == (nO,)
def test_store_activations_multi(): def test_save_activations_multi():
fix_random_seed(0) fix_random_seed(0)
nlp = English() nlp = English()
textcat = cast(TrainablePipe, nlp.add_pipe("textcat_multilabel")) textcat = cast(TrainablePipe, nlp.add_pipe("textcat_multilabel"))
@ -908,7 +908,7 @@ def test_store_activations_multi():
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert "textcat_multilabel" not in doc.activations assert "textcat_multilabel" not in doc.activations
textcat.store_activations = True textcat.save_activations = True
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert list(doc.activations["textcat_multilabel"].keys()) == ["probs"] assert list(doc.activations["textcat_multilabel"].keys()) == ["probs"]
assert doc.activations["textcat_multilabel"]["probs"].shape == (nO,) assert doc.activations["textcat_multilabel"]["probs"].shape == (nO,)

View File

@ -751,23 +751,23 @@ The L2 norm of the document's vector representation.
## Attributes {#attributes} ## Attributes {#attributes}
| Name | Description | | Name | Description |
| ------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------- | | ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------- |
| `text` | A string representation of the document text. ~~str~~ | | `text` | A string representation of the document text. ~~str~~ |
| `text_with_ws` | An alias of `Doc.text`, provided for duck-type compatibility with `Span` and `Token`. ~~str~~ | | `text_with_ws` | An alias of `Doc.text`, provided for duck-type compatibility with `Span` and `Token`. ~~str~~ |
| `mem` | The document's local memory heap, for all C data it owns. ~~cymem.Pool~~ | | `mem` | The document's local memory heap, for all C data it owns. ~~cymem.Pool~~ |
| `vocab` | The store of lexical types. ~~Vocab~~ | | `vocab` | The store of lexical types. ~~Vocab~~ |
| `tensor` <Tag variant="new">2</Tag> | Container for dense vector representations. ~~numpy.ndarray~~ | | `tensor` <Tag variant="new">2</Tag> | Container for dense vector representations. ~~numpy.ndarray~~ |
| `user_data` | A generic storage area, for user custom data. ~~Dict[str, Any]~~ | | `user_data` | A generic storage area, for user custom data. ~~Dict[str, Any]~~ |
| `lang` <Tag variant="new">2.1</Tag> | Language of the document's vocabulary. ~~int~~ | | `lang` <Tag variant="new">2.1</Tag> | Language of the document's vocabulary. ~~int~~ |
| `lang_` <Tag variant="new">2.1</Tag> | Language of the document's vocabulary. ~~str~~ | | `lang_` <Tag variant="new">2.1</Tag> | Language of the document's vocabulary. ~~str~~ |
| `sentiment` | The document's positivity/negativity score, if available. ~~float~~ | | `sentiment` | The document's positivity/negativity score, if available. ~~float~~ |
| `user_hooks` | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~ | | `user_hooks` | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~ |
| `user_token_hooks` | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~ | | `user_token_hooks` | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~ |
| `user_span_hooks` | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~ | | `user_span_hooks` | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~ |
| `has_unknown_spaces` | Whether the document was constructed without known spacing between tokens (typically when created from gold tokenization). ~~bool~~ | | `has_unknown_spaces` | Whether the document was constructed without known spacing between tokens (typically when created from gold tokenization). ~~bool~~ |
| `_` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ | | `_` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ |
| `activations` | A dictionary of activations per trainable pipe (available when the `store_activations` option of a pipe is enabled). ~~Dict[str, Option[Any]]~~ | | `activations` | A dictionary of activations per trainable pipe (available when the `save_activations` option of a pipe is enabled). ~~Dict[str, Option[Any]]~~ |
## Serialization fields {#serialization-fields} ## Serialization fields {#serialization-fields}

View File

@ -44,15 +44,15 @@ architectures and their arguments and hyperparameters.
> nlp.add_pipe("trainable_lemmatizer", config=config, name="lemmatizer") > nlp.add_pipe("trainable_lemmatizer", config=config, name="lemmatizer")
> ``` > ```
| Setting | Description | | Setting | Description |
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `model` | A model instance that predicts the edit tree probabilities. The output vectors should match the number of edit trees in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | A model instance that predicts the edit tree probabilities. The output vectors should match the number of edit trees in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
| `backoff` | ~~Token~~ attribute to use when no applicable edit tree is found. Defaults to `orth`. ~~str~~ | | `backoff` | ~~Token~~ attribute to use when no applicable edit tree is found. Defaults to `orth`. ~~str~~ |
| `min_tree_freq` | Minimum frequency of an edit tree in the training set to be used. Defaults to `3`. ~~int~~ | | `min_tree_freq` | Minimum frequency of an edit tree in the training set to be used. Defaults to `3`. ~~int~~ |
| `overwrite` | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | | `overwrite` | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
| `top_k` | The number of most probable edit trees to try before resorting to `backoff`. Defaults to `1`. ~~int~~ | | `top_k` | The number of most probable edit trees to try before resorting to `backoff`. Defaults to `1`. ~~int~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~ | | `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~ |
| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | | `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ |
```python ```python
%%GITHUB_SPACY/spacy/pipeline/edit_tree_lemmatizer.py %%GITHUB_SPACY/spacy/pipeline/edit_tree_lemmatizer.py

View File

@ -64,7 +64,7 @@ architectures and their arguments and hyperparameters.
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ | | `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ | | `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ |
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ | | `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"ents"` and `"scores"`. ~~Union[bool, list[str]]~~ | | `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"ents"` and `"scores"`. ~~Union[bool, list[str]]~~ |
| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ | | `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |
```python ```python

View File

@ -48,7 +48,7 @@ architectures and their arguments and hyperparameters.
| `overwrite` <Tag variant="new">3.2</Tag> | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~ | | `overwrite` <Tag variant="new">3.2</Tag> | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~ |
| `extend` <Tag variant="new">3.2</Tag> | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~ | | `extend` <Tag variant="new">3.2</Tag> | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~ |
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ | | `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | | `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ |
```python ```python
%%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx %%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx
@ -400,8 +400,8 @@ coarse-grained POS as the feature `POS`.
> assert "Mood=Ind|POS=VERB|Tense=Past|VerbForm=Fin" in morphologizer.labels > assert "Mood=Ind|POS=VERB|Tense=Past|VerbForm=Fin" in morphologizer.labels
> ``` > ```
| Name | Description | | Name | Description |
| ----------- | ------------------------------------------------------ | | ----------- | --------------------------------------------------------- |
| **RETURNS** | The labels added to the component. ~~Iterable[str, ...]~~ | | **RETURNS** | The labels added to the component. ~~Iterable[str, ...]~~ |
## Morphologizer.label_data {#label_data tag="property" new="3"} ## Morphologizer.label_data {#label_data tag="property" new="3"}

View File

@ -44,7 +44,7 @@ architectures and their arguments and hyperparameters.
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | | `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ | | `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ |
| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | | `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ |
```python ```python
%%GITHUB_SPACY/spacy/pipeline/senter.pyx %%GITHUB_SPACY/spacy/pipeline/senter.pyx

View File

@ -52,15 +52,15 @@ architectures and their arguments and hyperparameters.
> nlp.add_pipe("spancat", config=config) > nlp.add_pipe("spancat", config=config)
> ``` > ```
| Setting | Description | | Setting | Description |
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `suggester` | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. Defaults to [`ngram_suggester`](#ngram_suggester). ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~ | | `suggester` | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. Defaults to [`ngram_suggester`](#ngram_suggester). ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~ |
| `model` | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. Defaults to [SpanCategorizer](/api/architectures#SpanCategorizer). ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ | | `model` | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. Defaults to [SpanCategorizer](/api/architectures#SpanCategorizer). ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ |
| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ | | `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
| `threshold` | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~ | | `threshold` | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~ |
| `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~ | | `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ | | `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"indices"` and `"scores"`. ~~Union[bool, list[str]]~~ | | `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"indices"` and `"scores"`. ~~Union[bool, list[str]]~~ |
```python ```python
%%GITHUB_SPACY/spacy/pipeline/spancat.py %%GITHUB_SPACY/spacy/pipeline/spancat.py

View File

@ -46,7 +46,7 @@ architectures and their arguments and hyperparameters.
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | | `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~ | | `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~ |
| `neg_prefix` <Tag variant="new">3.2.1</Tag> | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~ | | `neg_prefix` <Tag variant="new">3.2.1</Tag> | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~ |
| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ | | `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ |
```python ```python
%%GITHUB_SPACY/spacy/pipeline/tagger.pyx %%GITHUB_SPACY/spacy/pipeline/tagger.pyx

View File

@ -117,15 +117,15 @@ Create a new pipeline instance. In your application, you would normally use a
shortcut for this and instantiate the component using its string name and shortcut for this and instantiate the component using its string name and
[`nlp.add_pipe`](/api/language#create_pipe). [`nlp.add_pipe`](/api/language#create_pipe).
| Name | Description | | Name | Description |
| ------------------- | -------------------------------------------------------------------------------------------------------------------------------- | | ------------------ | -------------------------------------------------------------------------------------------------------------------------------- |
| `vocab` | The shared vocabulary. ~~Vocab~~ | | `vocab` | The shared vocabulary. ~~Vocab~~ |
| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ | | `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
| _keyword-only_ | | | _keyword-only_ | |
| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ | | `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ | | `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
| `store_activations` | Store activations in `Doc` when annotating. The supported activations is `"probs"`. ~~Union[bool, list[str]]~~ | | `save_activations` | Save activations in `Doc` when annotating. The supported activations is `"probs"`. ~~Union[bool, list[str]]~~ |
## TextCategorizer.\_\_call\_\_ {#call tag="method"} ## TextCategorizer.\_\_call\_\_ {#call tag="method"}