Inherit from SpanCat instead of TrainablePipe

This commit changes the inheritance structure of Exclusive_Spancat, now it's inheriting from SpanCategorizer than TrainablePipe. This allows me to remove duplicate methods that are already present in the parent function.
2025-07-09 06:13:08 +03:00 · 2022-11-02 12:30:41 +08:00 · 2022-11-02 12:30:41 +08:00 · 8548e2c311
commit 8548e2c311
parent bdf2a1d1fe
1 changed files with 5 additions and 118 deletions
--- a/spacy/pipeline/spancat_exclusive.py
+++ b/spacy/pipeline/spancat_exclusive.py
@ -13,7 +13,7 @@ from ..tokens import Doc, Span, SpanGroup
 from ..training import Example, validate_examples
 from ..vocab import Vocab
 from .spancat import spancat_score, build_ngram_suggester
-from .trainable_pipe import TrainablePipe
+from .spancat import SpanCategorizer
 spancat_exclusive_default_config = """
@ -71,7 +71,7 @@ def make_spancat(
    scorer: Optional[Callable],
    negative_weight: float = 1.0,
    allow_overlap: bool = True,
-) -> "SpanCategorizerExclusive":
+) -> "Exclusive_SpanCategorizer":
    """Create a SpanCategorizerExclusive component. The span categorizer consists of two
    parts: a suggester function that proposes candidate spans, and a labeller
    model that predicts a single label for each span.
@ -94,7 +94,7 @@ def make_spancat(
    allow_overlap (bool): If True the data is assumed to
        contain overlapping spans.
    """
-    return SpanCategorizerExclusive(
+    return Exclusive_SpanCategorizer(
        nlp.vocab,
        suggester=suggester,
        model=model,
@ -127,8 +127,8 @@ class Ranges:
        return False
-class SpanCategorizerExclusive(TrainablePipe):
+class Exclusive_SpanCategorizer(SpanCategorizer):
-    """Pipeline component to label spans of text.
+    """Pipeline component to label non-overlapping spans of text.
    DOCS: https://spacy.io/api/spancategorizerexclusive
    """
@ -176,47 +176,6 @@ class SpanCategorizerExclusive(TrainablePipe):
        self.name = name
        self.scorer = scorer
    @property
    def key(self) -> str:
        """Key of the doc.spans dict to save the spans under. During
        initialization and training, the component will look for spans on the
        reference document under the same key.
        """
        return str(self.cfg["spans_key"])
    def add_label(self, label: str) -> int:
        """Add a new label to the pipe.
        label (str): The label to add.
        RETURNS (int): 0 if label is already present, otherwise 1.
        DOCS: https://spacy.io/api/spancategorizerexclusive#add_label
        """
        if not isinstance(label, str):
            raise ValueError(Errors.E187)
        if label in self.labels:
            return 0
        self._allow_extra_label()
        self.cfg["labels"].append(label)  # type: ignore
        self.vocab.strings.add(label)
        return 1
    @property
    def labels(self) -> Tuple[str]:
        """RETURNS (Tuple[str]): The labels currently added to the component.
        DOCS: https://spacy.io/api/spancategorizerexclusive#labels
        """
        return tuple(self.cfg["labels"])  # type: ignore
    @property
    def label_data(self) -> List[str]:
        """RETURNS (List[str]): Information about the component's labels.
        DOCS: https://spacy.io/api/spancategorizerexclusive#label_data
        """
        return list(self.labels)
    @property
    def label_map(self) -> Dict[str, int]:
        """RETURNS (Dict[str, int]): The label map."""
@ -232,37 +191,6 @@ class SpanCategorizerExclusive(TrainablePipe):
        """RETURNS (int): Number of labels including the negative label."""
        return len(self.label_data) + 1
    def predict(self, docs: Iterable[Doc]):
        """Apply the pipeline's model to a batch of docs, without modifying them.
        docs (Iterable[Doc]): The documents to predict.
        RETURNS: The models prediction for each document.
        DOCS: https://spacy.io/api/spancategorizerexclusive#predict
        """
        indices = self.suggester(docs, ops=self.model.ops)
        scores = self.model.predict((docs, indices))  # type: ignore
        return indices, scores
    def set_candidates(
        self, docs: Iterable[Doc], *, candidates_key: str = "candidates"
    ) -> None:
        """Use the spancat suggester to add a list of span candidates to a
        list of docs. Intended to be used for debugging purposes.
        docs (Iterable[Doc]): The documents to modify.
        candidates_key (str): Key of the Doc.spans dict to save the
            candidate spans under.
        DOCS: https://spacy.io/api/spancategorizerexclusive#set_candidates
        """
        suggester_output = self.suggester(docs, ops=self.model.ops)
        for candidates, doc in zip(suggester_output, docs):  # type: ignore
            doc.spans[candidates_key] = []
            for index in candidates.dataXd:
                doc.spans[candidates_key].append(doc[index[0] : index[1]])
    def set_annotations(self, docs: Iterable[Doc], indices_scores) -> None:
        """Modify a batch of Doc objects, using pre-computed scores.
@ -286,47 +214,6 @@ class SpanCategorizerExclusive(TrainablePipe):
            )
            offset += indices.lengths[i]
    def update(
        self,
        examples: Iterable[Example],
        *,
        drop: float = 0.0,
        sgd: Optional[Optimizer] = None,
        losses: Optional[Dict[str, float]] = None,
    ) -> Dict[str, float]:
        """Learn from a batch of documents and gold-standard information,
        updating the pipe's model. Delegates to predict and get_loss.
        examples (Iterable[Example]): A batch of Example objects.
        drop (float): The dropout rate.
        sgd (thinc.api.Optimizer): The optimizer.
        losses (Dict[str, float]): Optional record of the loss during training.
            Updated using the component name as the key.
        RETURNS (Dict[str, float]): The updated losses dictionary.
        DOCS: https://spacy.io/api/spancategorizerexclusive#update
        """
        if losses is None:
            losses = {}
        losses.setdefault(self.name, 0.0)
        validate_examples(examples, "SpanCategorizer.update")
        self._validate_categories(examples)
        if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
            # Handle cases where there are no tokens in any docs.
            return losses
        docs = [eg.predicted for eg in examples]
        spans = self.suggester(docs, ops=self.model.ops)
        if spans.lengths.sum() == 0:
            return losses
        set_dropout_rate(self.model, drop)
        scores, backprop_scores = self.model.begin_update((docs, spans))
        loss, d_scores = self.get_loss(examples, (spans, scores))
        backprop_scores(d_scores)  # type: ignore
        if sgd is not None:
            self.finish_update(sgd)
        losses[self.name] += loss
        return losses
    def get_loss(
        self, examples: Iterable[Example], spans_scores: Tuple[Ragged, Floats2d]
    ) -> Tuple[float, float]: