mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-28 02:46:35 +03:00
Rename TrainablePipe.store_activations
to save_activations
This commit is contained in:
parent
3937abd2e7
commit
2290a04d55
|
@ -941,7 +941,7 @@ class Errors(metaclass=ErrorsWithCodes):
|
||||||
"`{arg2}`={arg2_values} but these arguments are conflicting.")
|
"`{arg2}`={arg2_values} but these arguments are conflicting.")
|
||||||
E1043 = ("Expected None or a value in range [{range_start}, {range_end}] for entity linker threshold, but got "
|
E1043 = ("Expected None or a value in range [{range_start}, {range_end}] for entity linker threshold, but got "
|
||||||
"{value}.")
|
"{value}.")
|
||||||
E1400 = ("store_activations attribute must be set to List[str] or bool")
|
E1400 = ("save_activations attribute must be set to List[str] or bool")
|
||||||
|
|
||||||
|
|
||||||
# Deprecated model shortcuts, only used in errors and warnings
|
# Deprecated model shortcuts, only used in errors and warnings
|
||||||
|
|
|
@ -52,7 +52,7 @@ DEFAULT_EDIT_TREE_LEMMATIZER_MODEL = Config().from_str(default_model_config)["mo
|
||||||
"overwrite": False,
|
"overwrite": False,
|
||||||
"top_k": 1,
|
"top_k": 1,
|
||||||
"scorer": {"@scorers": "spacy.lemmatizer_scorer.v1"},
|
"scorer": {"@scorers": "spacy.lemmatizer_scorer.v1"},
|
||||||
"store_activations": False,
|
"save_activations": False,
|
||||||
},
|
},
|
||||||
default_score_weights={"lemma_acc": 1.0},
|
default_score_weights={"lemma_acc": 1.0},
|
||||||
)
|
)
|
||||||
|
@ -65,7 +65,7 @@ def make_edit_tree_lemmatizer(
|
||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
top_k: int,
|
top_k: int,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
store_activations: bool,
|
save_activations: bool,
|
||||||
):
|
):
|
||||||
"""Construct an EditTreeLemmatizer component."""
|
"""Construct an EditTreeLemmatizer component."""
|
||||||
return EditTreeLemmatizer(
|
return EditTreeLemmatizer(
|
||||||
|
@ -77,7 +77,7 @@ def make_edit_tree_lemmatizer(
|
||||||
overwrite=overwrite,
|
overwrite=overwrite,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
scorer=scorer,
|
scorer=scorer,
|
||||||
store_activations=store_activations,
|
save_activations=save_activations,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -97,7 +97,7 @@ class EditTreeLemmatizer(TrainablePipe):
|
||||||
overwrite: bool = False,
|
overwrite: bool = False,
|
||||||
top_k: int = 1,
|
top_k: int = 1,
|
||||||
scorer: Optional[Callable] = lemmatizer_score,
|
scorer: Optional[Callable] = lemmatizer_score,
|
||||||
store_activations: bool = False,
|
save_activations: bool = False,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Construct an edit tree lemmatizer.
|
Construct an edit tree lemmatizer.
|
||||||
|
@ -109,7 +109,7 @@ class EditTreeLemmatizer(TrainablePipe):
|
||||||
frequency in the training data.
|
frequency in the training data.
|
||||||
overwrite (bool): overwrite existing lemma annotations.
|
overwrite (bool): overwrite existing lemma annotations.
|
||||||
top_k (int): try to apply at most the k most probable edit trees.
|
top_k (int): try to apply at most the k most probable edit trees.
|
||||||
store_activations (bool): store model activations in Doc when annotating.
|
save_activations (bool): save model activations in Doc when annotating.
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.model = model
|
self.model = model
|
||||||
|
@ -124,7 +124,7 @@ class EditTreeLemmatizer(TrainablePipe):
|
||||||
|
|
||||||
self.cfg: Dict[str, Any] = {"labels": []}
|
self.cfg: Dict[str, Any] = {"labels": []}
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
self.store_activations = store_activations
|
self.save_activations = save_activations
|
||||||
|
|
||||||
def get_loss(
|
def get_loss(
|
||||||
self, examples: Iterable[Example], scores: List[Floats2d]
|
self, examples: Iterable[Example], scores: List[Floats2d]
|
||||||
|
@ -201,7 +201,7 @@ class EditTreeLemmatizer(TrainablePipe):
|
||||||
def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT):
|
def set_annotations(self, docs: Iterable[Doc], activations: ActivationsT):
|
||||||
batch_tree_ids = activations["guesses"]
|
batch_tree_ids = activations["guesses"]
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
if self.store_activations:
|
if self.save_activations:
|
||||||
doc.activations[self.name] = {}
|
doc.activations[self.name] = {}
|
||||||
for act_name, acts in activations.items():
|
for act_name, acts in activations.items():
|
||||||
doc.activations[self.name][act_name] = acts[i]
|
doc.activations[self.name][act_name] = acts[i]
|
||||||
|
|
|
@ -62,7 +62,7 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
|
||||||
"scorer": {"@scorers": "spacy.entity_linker_scorer.v1"},
|
"scorer": {"@scorers": "spacy.entity_linker_scorer.v1"},
|
||||||
"use_gold_ents": True,
|
"use_gold_ents": True,
|
||||||
"threshold": None,
|
"threshold": None,
|
||||||
"store_activations": False,
|
"save_activations": False,
|
||||||
},
|
},
|
||||||
default_score_weights={
|
default_score_weights={
|
||||||
"nel_micro_f": 1.0,
|
"nel_micro_f": 1.0,
|
||||||
|
@ -85,7 +85,7 @@ def make_entity_linker(
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
use_gold_ents: bool,
|
use_gold_ents: bool,
|
||||||
threshold: Optional[float] = None,
|
threshold: Optional[float] = None,
|
||||||
store_activations: bool,
|
save_activations: bool,
|
||||||
):
|
):
|
||||||
"""Construct an EntityLinker component.
|
"""Construct an EntityLinker component.
|
||||||
|
|
||||||
|
@ -104,7 +104,7 @@ def make_entity_linker(
|
||||||
component must provide entity annotations.
|
component must provide entity annotations.
|
||||||
threshold (Optional[float]): Confidence threshold for entity predictions. If confidence is below the threshold,
|
threshold (Optional[float]): Confidence threshold for entity predictions. If confidence is below the threshold,
|
||||||
prediction is discarded. If None, predictions are not filtered by any threshold.
|
prediction is discarded. If None, predictions are not filtered by any threshold.
|
||||||
store_activations (bool): store model activations in Doc when annotating.
|
save_activations (bool): save model activations in Doc when annotating.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not model.attrs.get("include_span_maker", False):
|
if not model.attrs.get("include_span_maker", False):
|
||||||
|
@ -136,7 +136,7 @@ def make_entity_linker(
|
||||||
scorer=scorer,
|
scorer=scorer,
|
||||||
use_gold_ents=use_gold_ents,
|
use_gold_ents=use_gold_ents,
|
||||||
threshold=threshold,
|
threshold=threshold,
|
||||||
store_activations=store_activations,
|
save_activations=save_activations,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -173,7 +173,7 @@ class EntityLinker(TrainablePipe):
|
||||||
scorer: Optional[Callable] = entity_linker_score,
|
scorer: Optional[Callable] = entity_linker_score,
|
||||||
use_gold_ents: bool,
|
use_gold_ents: bool,
|
||||||
threshold: Optional[float] = None,
|
threshold: Optional[float] = None,
|
||||||
store_activations: bool = False,
|
save_activations: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize an entity linker.
|
"""Initialize an entity linker.
|
||||||
|
|
||||||
|
@ -222,7 +222,7 @@ class EntityLinker(TrainablePipe):
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
self.use_gold_ents = use_gold_ents
|
self.use_gold_ents = use_gold_ents
|
||||||
self.threshold = threshold
|
self.threshold = threshold
|
||||||
self.store_activations = store_activations
|
self.save_activations = save_activations
|
||||||
|
|
||||||
def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]):
|
def set_kb(self, kb_loader: Callable[[Vocab], KnowledgeBase]):
|
||||||
"""Define the KB of this pipe by providing a function that will
|
"""Define the KB of this pipe by providing a function that will
|
||||||
|
@ -550,7 +550,7 @@ class EntityLinker(TrainablePipe):
|
||||||
i = 0
|
i = 0
|
||||||
overwrite = self.cfg["overwrite"]
|
overwrite = self.cfg["overwrite"]
|
||||||
for j, doc in enumerate(docs):
|
for j, doc in enumerate(docs):
|
||||||
if self.store_activations:
|
if self.save_activations:
|
||||||
doc.activations[self.name] = {}
|
doc.activations[self.name] = {}
|
||||||
for act_name, acts in activations.items():
|
for act_name, acts in activations.items():
|
||||||
if act_name != "kb_ids":
|
if act_name != "kb_ids":
|
||||||
|
@ -664,7 +664,7 @@ class EntityLinker(TrainablePipe):
|
||||||
doc_scores: List[Floats1d],
|
doc_scores: List[Floats1d],
|
||||||
doc_ents: List[Ints1d],
|
doc_ents: List[Ints1d],
|
||||||
):
|
):
|
||||||
if not self.store_activations:
|
if not self.save_activations:
|
||||||
return
|
return
|
||||||
ops = self.model.ops
|
ops = self.model.ops
|
||||||
lengths = ops.asarray1i([s.shape[0] for s in doc_scores])
|
lengths = ops.asarray1i([s.shape[0] for s in doc_scores])
|
||||||
|
@ -679,7 +679,7 @@ class EntityLinker(TrainablePipe):
|
||||||
scores: Sequence[float],
|
scores: Sequence[float],
|
||||||
ents: Sequence[int],
|
ents: Sequence[int],
|
||||||
):
|
):
|
||||||
if not self.store_activations:
|
if not self.save_activations:
|
||||||
return
|
return
|
||||||
ops = self.model.ops
|
ops = self.model.ops
|
||||||
doc_scores.append(ops.asarray1f(scores))
|
doc_scores.append(ops.asarray1f(scores))
|
||||||
|
|
|
@ -58,7 +58,7 @@ DEFAULT_MORPH_MODEL = Config().from_str(default_model_config)["model"]
|
||||||
"overwrite": True,
|
"overwrite": True,
|
||||||
"extend": False,
|
"extend": False,
|
||||||
"scorer": {"@scorers": "spacy.morphologizer_scorer.v1"},
|
"scorer": {"@scorers": "spacy.morphologizer_scorer.v1"},
|
||||||
"store_activations": False
|
"save_activations": False
|
||||||
},
|
},
|
||||||
default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5, "morph_per_feat": None},
|
default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5, "morph_per_feat": None},
|
||||||
)
|
)
|
||||||
|
@ -69,10 +69,10 @@ def make_morphologizer(
|
||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
extend: bool,
|
extend: bool,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
store_activations: bool,
|
save_activations: bool,
|
||||||
):
|
):
|
||||||
return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, scorer=scorer,
|
return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, scorer=scorer,
|
||||||
store_activations=store_activations)
|
save_activations=save_activations)
|
||||||
|
|
||||||
|
|
||||||
def morphologizer_score(examples, **kwargs):
|
def morphologizer_score(examples, **kwargs):
|
||||||
|
@ -104,7 +104,7 @@ class Morphologizer(Tagger):
|
||||||
overwrite: bool = BACKWARD_OVERWRITE,
|
overwrite: bool = BACKWARD_OVERWRITE,
|
||||||
extend: bool = BACKWARD_EXTEND,
|
extend: bool = BACKWARD_EXTEND,
|
||||||
scorer: Optional[Callable] = morphologizer_score,
|
scorer: Optional[Callable] = morphologizer_score,
|
||||||
store_activations: bool = False,
|
save_activations: bool = False,
|
||||||
):
|
):
|
||||||
"""Initialize a morphologizer.
|
"""Initialize a morphologizer.
|
||||||
|
|
||||||
|
@ -115,7 +115,7 @@ class Morphologizer(Tagger):
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
Scorer.score_token_attr for the attributes "pos" and "morph" and
|
Scorer.score_token_attr for the attributes "pos" and "morph" and
|
||||||
Scorer.score_token_attr_per_feat for the attribute "morph".
|
Scorer.score_token_attr_per_feat for the attribute "morph".
|
||||||
store_activations (bool): store model activations in Doc when annotating.
|
save_activations (bool): save model activations in Doc when annotating.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/morphologizer#init
|
DOCS: https://spacy.io/api/morphologizer#init
|
||||||
"""
|
"""
|
||||||
|
@ -135,7 +135,7 @@ class Morphologizer(Tagger):
|
||||||
}
|
}
|
||||||
self.cfg = dict(sorted(cfg.items()))
|
self.cfg = dict(sorted(cfg.items()))
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
self.store_activations = store_activations
|
self.save_activations = save_activations
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def labels(self):
|
def labels(self):
|
||||||
|
@ -249,7 +249,7 @@ class Morphologizer(Tagger):
|
||||||
# to allocate a compatible container out of the iterable.
|
# to allocate a compatible container out of the iterable.
|
||||||
labels = tuple(self.labels)
|
labels = tuple(self.labels)
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
if self.store_activations:
|
if self.save_activations:
|
||||||
doc.activations[self.name] = {}
|
doc.activations[self.name] = {}
|
||||||
for act_name, acts in activations.items():
|
for act_name, acts in activations.items():
|
||||||
doc.activations[self.name][act_name] = acts[i]
|
doc.activations[self.name][act_name] = acts[i]
|
||||||
|
|
|
@ -43,7 +43,7 @@ DEFAULT_SENTER_MODEL = Config().from_str(default_model_config)["model"]
|
||||||
"model": DEFAULT_SENTER_MODEL,
|
"model": DEFAULT_SENTER_MODEL,
|
||||||
"overwrite": False,
|
"overwrite": False,
|
||||||
"scorer": {"@scorers": "spacy.senter_scorer.v1"},
|
"scorer": {"@scorers": "spacy.senter_scorer.v1"},
|
||||||
"store_activations": False
|
"save_activations": False
|
||||||
},
|
},
|
||||||
default_score_weights={"sents_f": 1.0, "sents_p": 0.0, "sents_r": 0.0},
|
default_score_weights={"sents_f": 1.0, "sents_p": 0.0, "sents_r": 0.0},
|
||||||
)
|
)
|
||||||
|
@ -52,8 +52,8 @@ def make_senter(nlp: Language,
|
||||||
model: Model,
|
model: Model,
|
||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
store_activations: bool):
|
save_activations: bool):
|
||||||
return SentenceRecognizer(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, store_activations=store_activations)
|
return SentenceRecognizer(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, save_activations=save_activations)
|
||||||
|
|
||||||
|
|
||||||
def senter_score(examples, **kwargs):
|
def senter_score(examples, **kwargs):
|
||||||
|
@ -83,7 +83,7 @@ class SentenceRecognizer(Tagger):
|
||||||
*,
|
*,
|
||||||
overwrite=BACKWARD_OVERWRITE,
|
overwrite=BACKWARD_OVERWRITE,
|
||||||
scorer=senter_score,
|
scorer=senter_score,
|
||||||
store_activations: bool = False,
|
save_activations: bool = False,
|
||||||
):
|
):
|
||||||
"""Initialize a sentence recognizer.
|
"""Initialize a sentence recognizer.
|
||||||
|
|
||||||
|
@ -93,7 +93,7 @@ class SentenceRecognizer(Tagger):
|
||||||
losses during training.
|
losses during training.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
Scorer.score_spans for the attribute "sents".
|
Scorer.score_spans for the attribute "sents".
|
||||||
store_activations (bool): store model activations in Doc when annotating.
|
save_activations (bool): save model activations in Doc when annotating.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencerecognizer#init
|
DOCS: https://spacy.io/api/sentencerecognizer#init
|
||||||
"""
|
"""
|
||||||
|
@ -103,7 +103,7 @@ class SentenceRecognizer(Tagger):
|
||||||
self._rehearsal_model = None
|
self._rehearsal_model = None
|
||||||
self.cfg = {"overwrite": overwrite}
|
self.cfg = {"overwrite": overwrite}
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
self.store_activations = store_activations
|
self.save_activations = save_activations
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def labels(self):
|
def labels(self):
|
||||||
|
@ -135,7 +135,7 @@ class SentenceRecognizer(Tagger):
|
||||||
cdef Doc doc
|
cdef Doc doc
|
||||||
cdef bint overwrite = self.cfg["overwrite"]
|
cdef bint overwrite = self.cfg["overwrite"]
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
if self.store_activations:
|
if self.save_activations:
|
||||||
doc.activations[self.name] = {}
|
doc.activations[self.name] = {}
|
||||||
for act_name, acts in activations.items():
|
for act_name, acts in activations.items():
|
||||||
doc.activations[self.name][act_name] = acts[i]
|
doc.activations[self.name][act_name] = acts[i]
|
||||||
|
|
|
@ -110,7 +110,7 @@ def build_ngram_range_suggester(min_size: int, max_size: int) -> Suggester:
|
||||||
"model": DEFAULT_SPANCAT_MODEL,
|
"model": DEFAULT_SPANCAT_MODEL,
|
||||||
"suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
|
"suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
|
||||||
"scorer": {"@scorers": "spacy.spancat_scorer.v1"},
|
"scorer": {"@scorers": "spacy.spancat_scorer.v1"},
|
||||||
"store_activations": False,
|
"save_activations": False,
|
||||||
},
|
},
|
||||||
default_score_weights={"spans_sc_f": 1.0, "spans_sc_p": 0.0, "spans_sc_r": 0.0},
|
default_score_weights={"spans_sc_f": 1.0, "spans_sc_p": 0.0, "spans_sc_r": 0.0},
|
||||||
)
|
)
|
||||||
|
@ -123,7 +123,7 @@ def make_spancat(
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
threshold: float,
|
threshold: float,
|
||||||
max_positive: Optional[int],
|
max_positive: Optional[int],
|
||||||
store_activations: bool,
|
save_activations: bool,
|
||||||
) -> "SpanCategorizer":
|
) -> "SpanCategorizer":
|
||||||
"""Create a SpanCategorizer component. The span categorizer consists of two
|
"""Create a SpanCategorizer component. The span categorizer consists of two
|
||||||
parts: a suggester function that proposes candidate spans, and a labeller
|
parts: a suggester function that proposes candidate spans, and a labeller
|
||||||
|
@ -144,7 +144,7 @@ def make_spancat(
|
||||||
0.5.
|
0.5.
|
||||||
max_positive (Optional[int]): Maximum number of labels to consider positive
|
max_positive (Optional[int]): Maximum number of labels to consider positive
|
||||||
per span. Defaults to None, indicating no limit.
|
per span. Defaults to None, indicating no limit.
|
||||||
store_activations (bool): store model activations in Doc when annotating.
|
save_activations (bool): save model activations in Doc when annotating.
|
||||||
"""
|
"""
|
||||||
return SpanCategorizer(
|
return SpanCategorizer(
|
||||||
nlp.vocab,
|
nlp.vocab,
|
||||||
|
@ -155,7 +155,7 @@ def make_spancat(
|
||||||
max_positive=max_positive,
|
max_positive=max_positive,
|
||||||
name=name,
|
name=name,
|
||||||
scorer=scorer,
|
scorer=scorer,
|
||||||
store_activations=store_activations,
|
save_activations=save_activations,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -194,7 +194,7 @@ class SpanCategorizer(TrainablePipe):
|
||||||
threshold: float = 0.5,
|
threshold: float = 0.5,
|
||||||
max_positive: Optional[int] = None,
|
max_positive: Optional[int] = None,
|
||||||
scorer: Optional[Callable] = spancat_score,
|
scorer: Optional[Callable] = spancat_score,
|
||||||
store_activations: bool = False,
|
save_activations: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize the span categorizer.
|
"""Initialize the span categorizer.
|
||||||
vocab (Vocab): The shared vocabulary.
|
vocab (Vocab): The shared vocabulary.
|
||||||
|
@ -227,7 +227,7 @@ class SpanCategorizer(TrainablePipe):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.name = name
|
self.name = name
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
self.store_activations = store_activations
|
self.save_activations = save_activations
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def key(self) -> str:
|
def key(self) -> str:
|
||||||
|
@ -317,7 +317,7 @@ class SpanCategorizer(TrainablePipe):
|
||||||
offset = 0
|
offset = 0
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
indices_i = indices[i].dataXd
|
indices_i = indices[i].dataXd
|
||||||
if self.store_activations:
|
if self.save_activations:
|
||||||
doc.activations[self.name] = {}
|
doc.activations[self.name] = {}
|
||||||
doc.activations[self.name]["indices"] = indices_i
|
doc.activations[self.name]["indices"] = indices_i
|
||||||
doc.activations[self.name]["scores"] = scores[
|
doc.activations[self.name]["scores"] = scores[
|
||||||
|
|
|
@ -53,7 +53,7 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"]
|
||||||
"overwrite": False,
|
"overwrite": False,
|
||||||
"scorer": {"@scorers": "spacy.tagger_scorer.v1"},
|
"scorer": {"@scorers": "spacy.tagger_scorer.v1"},
|
||||||
"neg_prefix": "!",
|
"neg_prefix": "!",
|
||||||
"store_activations": False,
|
"save_activations": False,
|
||||||
},
|
},
|
||||||
default_score_weights={"tag_acc": 1.0},
|
default_score_weights={"tag_acc": 1.0},
|
||||||
)
|
)
|
||||||
|
@ -64,7 +64,7 @@ def make_tagger(
|
||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
neg_prefix: str,
|
neg_prefix: str,
|
||||||
store_activations: bool,
|
save_activations: bool,
|
||||||
):
|
):
|
||||||
"""Construct a part-of-speech tagger component.
|
"""Construct a part-of-speech tagger component.
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ def make_tagger(
|
||||||
with the rows summing to 1).
|
with the rows summing to 1).
|
||||||
"""
|
"""
|
||||||
return Tagger(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, neg_prefix=neg_prefix,
|
return Tagger(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, neg_prefix=neg_prefix,
|
||||||
store_activations=store_activations)
|
save_activations=save_activations)
|
||||||
|
|
||||||
|
|
||||||
def tagger_score(examples, **kwargs):
|
def tagger_score(examples, **kwargs):
|
||||||
|
@ -100,7 +100,7 @@ class Tagger(TrainablePipe):
|
||||||
overwrite=BACKWARD_OVERWRITE,
|
overwrite=BACKWARD_OVERWRITE,
|
||||||
scorer=tagger_score,
|
scorer=tagger_score,
|
||||||
neg_prefix="!",
|
neg_prefix="!",
|
||||||
store_activations: bool = False,
|
save_activations: bool = False,
|
||||||
):
|
):
|
||||||
"""Initialize a part-of-speech tagger.
|
"""Initialize a part-of-speech tagger.
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@ class Tagger(TrainablePipe):
|
||||||
losses during training.
|
losses during training.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
Scorer.score_token_attr for the attribute "tag".
|
Scorer.score_token_attr for the attribute "tag".
|
||||||
store_activations (bool): store model activations in Doc when annotating.
|
save_activations (bool): save model activations in Doc when annotating.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#init
|
DOCS: https://spacy.io/api/tagger#init
|
||||||
"""
|
"""
|
||||||
|
@ -121,7 +121,7 @@ class Tagger(TrainablePipe):
|
||||||
cfg = {"labels": [], "overwrite": overwrite, "neg_prefix": neg_prefix}
|
cfg = {"labels": [], "overwrite": overwrite, "neg_prefix": neg_prefix}
|
||||||
self.cfg = dict(sorted(cfg.items()))
|
self.cfg = dict(sorted(cfg.items()))
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
self.store_activations = store_activations
|
self.save_activations = save_activations
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def labels(self):
|
def labels(self):
|
||||||
|
@ -185,7 +185,7 @@ class Tagger(TrainablePipe):
|
||||||
cdef bint overwrite = self.cfg["overwrite"]
|
cdef bint overwrite = self.cfg["overwrite"]
|
||||||
labels = self.labels
|
labels = self.labels
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
if self.store_activations:
|
if self.save_activations:
|
||||||
doc.activations[self.name] = {}
|
doc.activations[self.name] = {}
|
||||||
for act_name, acts in activations.items():
|
for act_name, acts in activations.items():
|
||||||
doc.activations[self.name][act_name] = acts[i]
|
doc.activations[self.name][act_name] = acts[i]
|
||||||
|
|
|
@ -78,7 +78,7 @@ subword_features = true
|
||||||
"threshold": 0.5,
|
"threshold": 0.5,
|
||||||
"model": DEFAULT_SINGLE_TEXTCAT_MODEL,
|
"model": DEFAULT_SINGLE_TEXTCAT_MODEL,
|
||||||
"scorer": {"@scorers": "spacy.textcat_scorer.v1"},
|
"scorer": {"@scorers": "spacy.textcat_scorer.v1"},
|
||||||
"store_activations": False,
|
"save_activations": False,
|
||||||
},
|
},
|
||||||
default_score_weights={
|
default_score_weights={
|
||||||
"cats_score": 1.0,
|
"cats_score": 1.0,
|
||||||
|
@ -100,7 +100,7 @@ def make_textcat(
|
||||||
model: Model[List[Doc], List[Floats2d]],
|
model: Model[List[Doc], List[Floats2d]],
|
||||||
threshold: float,
|
threshold: float,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
store_activations: bool,
|
save_activations: bool,
|
||||||
) -> "TextCategorizer":
|
) -> "TextCategorizer":
|
||||||
"""Create a TextCategorizer component. The text categorizer predicts categories
|
"""Create a TextCategorizer component. The text categorizer predicts categories
|
||||||
over a whole document. It can learn one or more labels, and the labels are considered
|
over a whole document. It can learn one or more labels, and the labels are considered
|
||||||
|
@ -110,7 +110,7 @@ def make_textcat(
|
||||||
scores for each category.
|
scores for each category.
|
||||||
threshold (float): Cutoff to consider a prediction "positive".
|
threshold (float): Cutoff to consider a prediction "positive".
|
||||||
scorer (Optional[Callable]): The scoring method.
|
scorer (Optional[Callable]): The scoring method.
|
||||||
store_activations (bool): store model activations in Doc when annotating.
|
save_activations (bool): save model activations in Doc when annotating.
|
||||||
"""
|
"""
|
||||||
return TextCategorizer(
|
return TextCategorizer(
|
||||||
nlp.vocab,
|
nlp.vocab,
|
||||||
|
@ -118,7 +118,7 @@ def make_textcat(
|
||||||
name,
|
name,
|
||||||
threshold=threshold,
|
threshold=threshold,
|
||||||
scorer=scorer,
|
scorer=scorer,
|
||||||
store_activations=store_activations,
|
save_activations=save_activations,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -150,7 +150,7 @@ class TextCategorizer(TrainablePipe):
|
||||||
*,
|
*,
|
||||||
threshold: float,
|
threshold: float,
|
||||||
scorer: Optional[Callable] = textcat_score,
|
scorer: Optional[Callable] = textcat_score,
|
||||||
store_activations: bool = False,
|
save_activations: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize a text categorizer for single-label classification.
|
"""Initialize a text categorizer for single-label classification.
|
||||||
|
|
||||||
|
@ -171,7 +171,7 @@ class TextCategorizer(TrainablePipe):
|
||||||
cfg = {"labels": [], "threshold": threshold, "positive_label": None}
|
cfg = {"labels": [], "threshold": threshold, "positive_label": None}
|
||||||
self.cfg = dict(cfg)
|
self.cfg = dict(cfg)
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
self.store_activations = store_activations
|
self.save_activations = save_activations
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def support_missing_values(self):
|
def support_missing_values(self):
|
||||||
|
@ -224,7 +224,7 @@ class TextCategorizer(TrainablePipe):
|
||||||
"""
|
"""
|
||||||
probs = activations["probs"]
|
probs = activations["probs"]
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
if self.store_activations:
|
if self.save_activations:
|
||||||
doc.activations[self.name] = {}
|
doc.activations[self.name] = {}
|
||||||
doc.activations[self.name]["probs"] = probs[i]
|
doc.activations[self.name]["probs"] = probs[i]
|
||||||
for j, label in enumerate(self.labels):
|
for j, label in enumerate(self.labels):
|
||||||
|
|
|
@ -75,7 +75,7 @@ subword_features = true
|
||||||
"threshold": 0.5,
|
"threshold": 0.5,
|
||||||
"model": DEFAULT_MULTI_TEXTCAT_MODEL,
|
"model": DEFAULT_MULTI_TEXTCAT_MODEL,
|
||||||
"scorer": {"@scorers": "spacy.textcat_multilabel_scorer.v1"},
|
"scorer": {"@scorers": "spacy.textcat_multilabel_scorer.v1"},
|
||||||
"store_activations": False,
|
"save_activations": False,
|
||||||
},
|
},
|
||||||
default_score_weights={
|
default_score_weights={
|
||||||
"cats_score": 1.0,
|
"cats_score": 1.0,
|
||||||
|
@ -97,7 +97,7 @@ def make_multilabel_textcat(
|
||||||
model: Model[List[Doc], List[Floats2d]],
|
model: Model[List[Doc], List[Floats2d]],
|
||||||
threshold: float,
|
threshold: float,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
store_activations: bool,
|
save_activations: bool,
|
||||||
) -> "TextCategorizer":
|
) -> "TextCategorizer":
|
||||||
"""Create a TextCategorizer component. The text categorizer predicts categories
|
"""Create a TextCategorizer component. The text categorizer predicts categories
|
||||||
over a whole document. It can learn one or more labels, and the labels are considered
|
over a whole document. It can learn one or more labels, and the labels are considered
|
||||||
|
@ -114,7 +114,7 @@ def make_multilabel_textcat(
|
||||||
name,
|
name,
|
||||||
threshold=threshold,
|
threshold=threshold,
|
||||||
scorer=scorer,
|
scorer=scorer,
|
||||||
store_activations=store_activations,
|
save_activations=save_activations,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -146,7 +146,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
|
||||||
*,
|
*,
|
||||||
threshold: float,
|
threshold: float,
|
||||||
scorer: Optional[Callable] = textcat_multilabel_score,
|
scorer: Optional[Callable] = textcat_multilabel_score,
|
||||||
store_activations: bool = False,
|
save_activations: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize a text categorizer for multi-label classification.
|
"""Initialize a text categorizer for multi-label classification.
|
||||||
|
|
||||||
|
@ -155,7 +155,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
|
||||||
name (str): The component instance name, used to add entries to the
|
name (str): The component instance name, used to add entries to the
|
||||||
losses during training.
|
losses during training.
|
||||||
threshold (float): Cutoff to consider a prediction "positive".
|
threshold (float): Cutoff to consider a prediction "positive".
|
||||||
store_activations (bool): store model activations in Doc when annotating.
|
save_activations (bool): save model activations in Doc when annotating.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer#init
|
DOCS: https://spacy.io/api/textcategorizer#init
|
||||||
"""
|
"""
|
||||||
|
@ -166,7 +166,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
|
||||||
cfg = {"labels": [], "threshold": threshold}
|
cfg = {"labels": [], "threshold": threshold}
|
||||||
self.cfg = dict(cfg)
|
self.cfg = dict(cfg)
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
self.store_activations = store_activations
|
self.save_activations = save_activations
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def support_missing_values(self):
|
def support_missing_values(self):
|
||||||
|
|
|
@ -6,4 +6,4 @@ cdef class TrainablePipe(Pipe):
|
||||||
cdef public object model
|
cdef public object model
|
||||||
cdef public object cfg
|
cdef public object cfg
|
||||||
cdef public object scorer
|
cdef public object scorer
|
||||||
cdef bint _store_activations
|
cdef bint _save_activations
|
||||||
|
|
|
@ -345,9 +345,9 @@ cdef class TrainablePipe(Pipe):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def store_activations(self):
|
def save_activations(self):
|
||||||
return self._store_activations
|
return self._save_activations
|
||||||
|
|
||||||
@store_activations.setter
|
@save_activations.setter
|
||||||
def store_activations(self, store_activations: bool):
|
def save_activations(self, save_activations: bool):
|
||||||
self._store_activations = store_activations
|
self._save_activations = save_activations
|
||||||
|
|
|
@ -282,7 +282,7 @@ def test_empty_strings():
|
||||||
assert no_change == empty
|
assert no_change == empty
|
||||||
|
|
||||||
|
|
||||||
def test_store_activations():
|
def test_save_activations():
|
||||||
nlp = English()
|
nlp = English()
|
||||||
lemmatizer = cast(TrainablePipe, nlp.add_pipe("trainable_lemmatizer"))
|
lemmatizer = cast(TrainablePipe, nlp.add_pipe("trainable_lemmatizer"))
|
||||||
lemmatizer.min_tree_freq = 1
|
lemmatizer.min_tree_freq = 1
|
||||||
|
@ -295,7 +295,7 @@ def test_store_activations():
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert "trainable_lemmatizer" not in doc.activations
|
assert "trainable_lemmatizer" not in doc.activations
|
||||||
|
|
||||||
lemmatizer.store_activations = True
|
lemmatizer.save_activations = True
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert list(doc.activations["trainable_lemmatizer"].keys()) == ["probs", "guesses"]
|
assert list(doc.activations["trainable_lemmatizer"].keys()) == ["probs", "guesses"]
|
||||||
assert doc.activations["trainable_lemmatizer"]["probs"].shape == (5, nO)
|
assert doc.activations["trainable_lemmatizer"]["probs"].shape == (5, nO)
|
||||||
|
|
|
@ -1179,7 +1179,7 @@ def test_threshold(meet_threshold: bool, config: Dict[str, Any]):
|
||||||
assert doc.ents[0].kb_id_ == entity_id if meet_threshold else EntityLinker.NIL
|
assert doc.ents[0].kb_id_ == entity_id if meet_threshold else EntityLinker.NIL
|
||||||
|
|
||||||
|
|
||||||
def test_store_activations():
|
def test_save_activations():
|
||||||
nlp = English()
|
nlp = English()
|
||||||
vector_length = 3
|
vector_length = 3
|
||||||
assert "Q2146908" not in nlp.vocab.strings
|
assert "Q2146908" not in nlp.vocab.strings
|
||||||
|
@ -1231,7 +1231,7 @@ def test_store_activations():
|
||||||
doc = nlp("Russ Cochran was a publisher")
|
doc = nlp("Russ Cochran was a publisher")
|
||||||
assert "entity_linker" not in doc.activations
|
assert "entity_linker" not in doc.activations
|
||||||
|
|
||||||
entity_linker.store_activations = True
|
entity_linker.save_activations = True
|
||||||
doc = nlp("Russ Cochran was a publisher")
|
doc = nlp("Russ Cochran was a publisher")
|
||||||
assert set(doc.activations["entity_linker"].keys()) == {"ents", "scores"}
|
assert set(doc.activations["entity_linker"].keys()) == {"ents", "scores"}
|
||||||
ents = doc.activations["entity_linker"]["ents"]
|
ents = doc.activations["entity_linker"]["ents"]
|
||||||
|
|
|
@ -201,7 +201,7 @@ def test_overfitting_IO():
|
||||||
assert [t.pos_ for t in doc] == gold_pos_tags
|
assert [t.pos_ for t in doc] == gold_pos_tags
|
||||||
|
|
||||||
|
|
||||||
def test_store_activations():
|
def test_save_activations():
|
||||||
# Simple test to try and quickly overfit the morphologizer - ensuring the ML models work correctly
|
# Simple test to try and quickly overfit the morphologizer - ensuring the ML models work correctly
|
||||||
nlp = English()
|
nlp = English()
|
||||||
morphologizer = cast(TrainablePipe, nlp.add_pipe("morphologizer"))
|
morphologizer = cast(TrainablePipe, nlp.add_pipe("morphologizer"))
|
||||||
|
@ -213,7 +213,7 @@ def test_store_activations():
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert "morphologizer" not in doc.activations
|
assert "morphologizer" not in doc.activations
|
||||||
|
|
||||||
morphologizer.store_activations = True
|
morphologizer.save_activations = True
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert "morphologizer" in doc.activations
|
assert "morphologizer" in doc.activations
|
||||||
assert set(doc.activations["morphologizer"].keys()) == {"guesses", "probs"}
|
assert set(doc.activations["morphologizer"].keys()) == {"guesses", "probs"}
|
||||||
|
|
|
@ -105,7 +105,7 @@ def test_overfitting_IO():
|
||||||
assert "senter" not in nlp.pipe_labels
|
assert "senter" not in nlp.pipe_labels
|
||||||
|
|
||||||
|
|
||||||
def test_store_activations():
|
def test_save_activations():
|
||||||
# Simple test to try and quickly overfit the senter - ensuring the ML models work correctly
|
# Simple test to try and quickly overfit the senter - ensuring the ML models work correctly
|
||||||
nlp = English()
|
nlp = English()
|
||||||
senter = cast(TrainablePipe, nlp.add_pipe("senter"))
|
senter = cast(TrainablePipe, nlp.add_pipe("senter"))
|
||||||
|
@ -120,7 +120,7 @@ def test_store_activations():
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert "senter" not in doc.activations
|
assert "senter" not in doc.activations
|
||||||
|
|
||||||
senter.store_activations = True
|
senter.save_activations = True
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert "senter" in doc.activations
|
assert "senter" in doc.activations
|
||||||
assert set(doc.activations["senter"].keys()) == {"guesses", "probs"}
|
assert set(doc.activations["senter"].keys()) == {"guesses", "probs"}
|
||||||
|
|
|
@ -421,7 +421,7 @@ def test_set_candidates():
|
||||||
assert docs[0].spans["candidates"][4].text == "Just a"
|
assert docs[0].spans["candidates"][4].text == "Just a"
|
||||||
|
|
||||||
|
|
||||||
def test_store_activations():
|
def test_save_activations():
|
||||||
# Simple test to try and quickly overfit the spancat component - ensuring the ML models work correctly
|
# Simple test to try and quickly overfit the spancat component - ensuring the ML models work correctly
|
||||||
nlp = English()
|
nlp = English()
|
||||||
spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
|
spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
|
||||||
|
@ -434,7 +434,7 @@ def test_store_activations():
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert "spancat" not in doc.activations
|
assert "spancat" not in doc.activations
|
||||||
|
|
||||||
spancat.store_activations = True
|
spancat.save_activations = True
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert set(doc.activations["spancat"].keys()) == {"indices", "scores"}
|
assert set(doc.activations["spancat"].keys()) == {"indices", "scores"}
|
||||||
assert doc.activations["spancat"]["indices"].shape == (12, 2)
|
assert doc.activations["spancat"]["indices"].shape == (12, 2)
|
||||||
|
|
|
@ -213,7 +213,7 @@ def test_overfitting_IO():
|
||||||
assert doc3[0].tag_ != "N"
|
assert doc3[0].tag_ != "N"
|
||||||
|
|
||||||
|
|
||||||
def test_store_activations():
|
def test_save_activations():
|
||||||
# Simple test to try and quickly overfit the tagger - ensuring the ML models work correctly
|
# Simple test to try and quickly overfit the tagger - ensuring the ML models work correctly
|
||||||
nlp = English()
|
nlp = English()
|
||||||
tagger = cast(TrainablePipe, nlp.add_pipe("tagger"))
|
tagger = cast(TrainablePipe, nlp.add_pipe("tagger"))
|
||||||
|
@ -225,7 +225,7 @@ def test_store_activations():
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert "tagger" not in doc.activations
|
assert "tagger" not in doc.activations
|
||||||
|
|
||||||
tagger.store_activations = True
|
tagger.save_activations = True
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert "tagger" in doc.activations
|
assert "tagger" in doc.activations
|
||||||
assert set(doc.activations["tagger"].keys()) == {"guesses", "probs"}
|
assert set(doc.activations["tagger"].keys()) == {"guesses", "probs"}
|
||||||
|
|
|
@ -874,7 +874,7 @@ def test_textcat_multi_threshold():
|
||||||
assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0
|
assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0
|
||||||
|
|
||||||
|
|
||||||
def test_store_activations():
|
def test_save_activations():
|
||||||
fix_random_seed(0)
|
fix_random_seed(0)
|
||||||
nlp = English()
|
nlp = English()
|
||||||
textcat = cast(TrainablePipe, nlp.add_pipe("textcat"))
|
textcat = cast(TrainablePipe, nlp.add_pipe("textcat"))
|
||||||
|
@ -888,13 +888,13 @@ def test_store_activations():
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert "textcat" not in doc.activations
|
assert "textcat" not in doc.activations
|
||||||
|
|
||||||
textcat.store_activations = True
|
textcat.save_activations = True
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert list(doc.activations["textcat"].keys()) == ["probs"]
|
assert list(doc.activations["textcat"].keys()) == ["probs"]
|
||||||
assert doc.activations["textcat"]["probs"].shape == (nO,)
|
assert doc.activations["textcat"]["probs"].shape == (nO,)
|
||||||
|
|
||||||
|
|
||||||
def test_store_activations_multi():
|
def test_save_activations_multi():
|
||||||
fix_random_seed(0)
|
fix_random_seed(0)
|
||||||
nlp = English()
|
nlp = English()
|
||||||
textcat = cast(TrainablePipe, nlp.add_pipe("textcat_multilabel"))
|
textcat = cast(TrainablePipe, nlp.add_pipe("textcat_multilabel"))
|
||||||
|
@ -908,7 +908,7 @@ def test_store_activations_multi():
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert "textcat_multilabel" not in doc.activations
|
assert "textcat_multilabel" not in doc.activations
|
||||||
|
|
||||||
textcat.store_activations = True
|
textcat.save_activations = True
|
||||||
doc = nlp("This is a test.")
|
doc = nlp("This is a test.")
|
||||||
assert list(doc.activations["textcat_multilabel"].keys()) == ["probs"]
|
assert list(doc.activations["textcat_multilabel"].keys()) == ["probs"]
|
||||||
assert doc.activations["textcat_multilabel"]["probs"].shape == (nO,)
|
assert doc.activations["textcat_multilabel"]["probs"].shape == (nO,)
|
||||||
|
|
|
@ -751,23 +751,23 @@ The L2 norm of the document's vector representation.
|
||||||
|
|
||||||
## Attributes {#attributes}
|
## Attributes {#attributes}
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `text` | A string representation of the document text. ~~str~~ |
|
| `text` | A string representation of the document text. ~~str~~ |
|
||||||
| `text_with_ws` | An alias of `Doc.text`, provided for duck-type compatibility with `Span` and `Token`. ~~str~~ |
|
| `text_with_ws` | An alias of `Doc.text`, provided for duck-type compatibility with `Span` and `Token`. ~~str~~ |
|
||||||
| `mem` | The document's local memory heap, for all C data it owns. ~~cymem.Pool~~ |
|
| `mem` | The document's local memory heap, for all C data it owns. ~~cymem.Pool~~ |
|
||||||
| `vocab` | The store of lexical types. ~~Vocab~~ |
|
| `vocab` | The store of lexical types. ~~Vocab~~ |
|
||||||
| `tensor` <Tag variant="new">2</Tag> | Container for dense vector representations. ~~numpy.ndarray~~ |
|
| `tensor` <Tag variant="new">2</Tag> | Container for dense vector representations. ~~numpy.ndarray~~ |
|
||||||
| `user_data` | A generic storage area, for user custom data. ~~Dict[str, Any]~~ |
|
| `user_data` | A generic storage area, for user custom data. ~~Dict[str, Any]~~ |
|
||||||
| `lang` <Tag variant="new">2.1</Tag> | Language of the document's vocabulary. ~~int~~ |
|
| `lang` <Tag variant="new">2.1</Tag> | Language of the document's vocabulary. ~~int~~ |
|
||||||
| `lang_` <Tag variant="new">2.1</Tag> | Language of the document's vocabulary. ~~str~~ |
|
| `lang_` <Tag variant="new">2.1</Tag> | Language of the document's vocabulary. ~~str~~ |
|
||||||
| `sentiment` | The document's positivity/negativity score, if available. ~~float~~ |
|
| `sentiment` | The document's positivity/negativity score, if available. ~~float~~ |
|
||||||
| `user_hooks` | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~ |
|
| `user_hooks` | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~ |
|
||||||
| `user_token_hooks` | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~ |
|
| `user_token_hooks` | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~ |
|
||||||
| `user_span_hooks` | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~ |
|
| `user_span_hooks` | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~ |
|
||||||
| `has_unknown_spaces` | Whether the document was constructed without known spacing between tokens (typically when created from gold tokenization). ~~bool~~ |
|
| `has_unknown_spaces` | Whether the document was constructed without known spacing between tokens (typically when created from gold tokenization). ~~bool~~ |
|
||||||
| `_` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ |
|
| `_` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ |
|
||||||
| `activations` | A dictionary of activations per trainable pipe (available when the `store_activations` option of a pipe is enabled). ~~Dict[str, Option[Any]]~~ |
|
| `activations` | A dictionary of activations per trainable pipe (available when the `save_activations` option of a pipe is enabled). ~~Dict[str, Option[Any]]~~ |
|
||||||
|
|
||||||
## Serialization fields {#serialization-fields}
|
## Serialization fields {#serialization-fields}
|
||||||
|
|
||||||
|
|
|
@ -44,15 +44,15 @@ architectures and their arguments and hyperparameters.
|
||||||
> nlp.add_pipe("trainable_lemmatizer", config=config, name="lemmatizer")
|
> nlp.add_pipe("trainable_lemmatizer", config=config, name="lemmatizer")
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Setting | Description |
|
| Setting | Description |
|
||||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||||
| `model` | A model instance that predicts the edit tree probabilities. The output vectors should match the number of edit trees in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
| `model` | A model instance that predicts the edit tree probabilities. The output vectors should match the number of edit trees in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
| `backoff` | ~~Token~~ attribute to use when no applicable edit tree is found. Defaults to `orth`. ~~str~~ |
|
| `backoff` | ~~Token~~ attribute to use when no applicable edit tree is found. Defaults to `orth`. ~~str~~ |
|
||||||
| `min_tree_freq` | Minimum frequency of an edit tree in the training set to be used. Defaults to `3`. ~~int~~ |
|
| `min_tree_freq` | Minimum frequency of an edit tree in the training set to be used. Defaults to `3`. ~~int~~ |
|
||||||
| `overwrite` | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
|
| `overwrite` | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
|
||||||
| `top_k` | The number of most probable edit trees to try before resorting to `backoff`. Defaults to `1`. ~~int~~ |
|
| `top_k` | The number of most probable edit trees to try before resorting to `backoff`. Defaults to `1`. ~~int~~ |
|
||||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~ |
|
| `scorer` | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"lemma"`. ~~Optional[Callable]~~ |
|
||||||
| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ |
|
| `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
%%GITHUB_SPACY/spacy/pipeline/edit_tree_lemmatizer.py
|
%%GITHUB_SPACY/spacy/pipeline/edit_tree_lemmatizer.py
|
||||||
|
|
|
@ -64,7 +64,7 @@ architectures and their arguments and hyperparameters.
|
||||||
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
||||||
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ |
|
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ |
|
||||||
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
|
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
|
||||||
| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"ents"` and `"scores"`. ~~Union[bool, list[str]]~~ |
|
| `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"ents"` and `"scores"`. ~~Union[bool, list[str]]~~ |
|
||||||
| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |
|
| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|
|
@ -48,7 +48,7 @@ architectures and their arguments and hyperparameters.
|
||||||
| `overwrite` <Tag variant="new">3.2</Tag> | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~ |
|
| `overwrite` <Tag variant="new">3.2</Tag> | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~ |
|
||||||
| `extend` <Tag variant="new">3.2</Tag> | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~ |
|
| `extend` <Tag variant="new">3.2</Tag> | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~ |
|
||||||
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
|
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
|
||||||
| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ |
|
| `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
%%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx
|
%%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx
|
||||||
|
@ -400,8 +400,8 @@ coarse-grained POS as the feature `POS`.
|
||||||
> assert "Mood=Ind|POS=VERB|Tense=Past|VerbForm=Fin" in morphologizer.labels
|
> assert "Mood=Ind|POS=VERB|Tense=Past|VerbForm=Fin" in morphologizer.labels
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ----------- | ------------------------------------------------------ |
|
| ----------- | --------------------------------------------------------- |
|
||||||
| **RETURNS** | The labels added to the component. ~~Iterable[str, ...]~~ |
|
| **RETURNS** | The labels added to the component. ~~Iterable[str, ...]~~ |
|
||||||
|
|
||||||
## Morphologizer.label_data {#label_data tag="property" new="3"}
|
## Morphologizer.label_data {#label_data tag="property" new="3"}
|
||||||
|
|
|
@ -44,7 +44,7 @@ architectures and their arguments and hyperparameters.
|
||||||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
|
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
|
||||||
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ |
|
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"`. ~~Optional[Callable]~~ |
|
||||||
| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ |
|
| `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
%%GITHUB_SPACY/spacy/pipeline/senter.pyx
|
%%GITHUB_SPACY/spacy/pipeline/senter.pyx
|
||||||
|
|
|
@ -52,15 +52,15 @@ architectures and their arguments and hyperparameters.
|
||||||
> nlp.add_pipe("spancat", config=config)
|
> nlp.add_pipe("spancat", config=config)
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Setting | Description |
|
| Setting | Description |
|
||||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `suggester` | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. Defaults to [`ngram_suggester`](#ngram_suggester). ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~ |
|
| `suggester` | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. Defaults to [`ngram_suggester`](#ngram_suggester). ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~ |
|
||||||
| `model` | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. Defaults to [SpanCategorizer](/api/architectures#SpanCategorizer). ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ |
|
| `model` | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. Defaults to [SpanCategorizer](/api/architectures#SpanCategorizer). ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ |
|
||||||
| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
|
| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
|
||||||
| `threshold` | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~ |
|
| `threshold` | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~ |
|
||||||
| `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~ |
|
| `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~ |
|
||||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
|
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
|
||||||
| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"indices"` and `"scores"`. ~~Union[bool, list[str]]~~ |
|
| `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"indices"` and `"scores"`. ~~Union[bool, list[str]]~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
%%GITHUB_SPACY/spacy/pipeline/spancat.py
|
%%GITHUB_SPACY/spacy/pipeline/spancat.py
|
||||||
|
|
|
@ -46,7 +46,7 @@ architectures and their arguments and hyperparameters.
|
||||||
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
|
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
|
||||||
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~ |
|
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~ |
|
||||||
| `neg_prefix` <Tag variant="new">3.2.1</Tag> | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~ |
|
| `neg_prefix` <Tag variant="new">3.2.1</Tag> | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~ |
|
||||||
| `store_activations` | Store activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ |
|
| `save_activations` | Save activations in `Doc` when annotating. Supported activations are `"probs"` and `"guesses"`. ~~Union[bool, list[str]]~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
%%GITHUB_SPACY/spacy/pipeline/tagger.pyx
|
%%GITHUB_SPACY/spacy/pipeline/tagger.pyx
|
||||||
|
|
|
@ -117,15 +117,15 @@ Create a new pipeline instance. In your application, you would normally use a
|
||||||
shortcut for this and instantiate the component using its string name and
|
shortcut for this and instantiate the component using its string name and
|
||||||
[`nlp.add_pipe`](/api/language#create_pipe).
|
[`nlp.add_pipe`](/api/language#create_pipe).
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------- | -------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------ | -------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||||
| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ |
|
| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ |
|
||||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
|
| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
|
||||||
| `store_activations` | Store activations in `Doc` when annotating. The supported activations is `"probs"`. ~~Union[bool, list[str]]~~ |
|
| `save_activations` | Save activations in `Doc` when annotating. The supported activations is `"probs"`. ~~Union[bool, list[str]]~~ |
|
||||||
|
|
||||||
## TextCategorizer.\_\_call\_\_ {#call tag="method"}
|
## TextCategorizer.\_\_call\_\_ {#call tag="method"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user