Add _n_labels property to SpanCategorizer

Instead of using len(self.labels) in initialize() I am using a private property self._n_labels. This achieves implementation parity and allows me to delete the whole initialize() method for spancat_exclusive (since it's now the same with spancat).
2025-10-27 22:21:08 +03:00 · 2022-11-02 12:27:54 +08:00 · 2022-11-02 12:27:54 +08:00 · bdf2a1d1fe
commit bdf2a1d1fe
parent 023a1a6c04
2 changed files with 6 additions and 52 deletions
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@ -260,6 +260,11 @@ class SpanCategorizer(TrainablePipe):
        """
        return list(self.labels)

+    @property
+    def _n_labels(self) -> int:
+        """RETURNS (int): Number of labels."""
+        return len(self.labels)
+
    def predict(self, docs: Iterable[Doc]):
        """Apply the pipeline's model to a batch of docs, without modifying them.

@ -432,7 +437,7 @@ class SpanCategorizer(TrainablePipe):
        if subbatch:
            docs = [eg.x for eg in subbatch]
            spans = build_ngram_suggester(sizes=[1])(docs)
-            Y = self.model.ops.alloc2f(spans.dataXd.shape[0], len(self.labels))
+            Y = self.model.ops.alloc2f(spans.dataXd.shape[0], self._n_labels)
            self.model.initialize(X=(docs, spans), Y=Y)
        else:
            self.model.initialize()
--- a/spacy/pipeline/spancat_exclusive.py
+++ b/spacy/pipeline/spancat_exclusive.py
@ -378,57 +378,6 @@ class SpanCategorizerExclusive(TrainablePipe):
        loss = float((d_scores**2).sum())
        return loss, d_scores

-    def initialize(
-        self,
-        get_examples: Callable[[], Iterable[Example]],
-        *,
-        nlp: Optional[Language] = None,
-        labels: Optional[List[str]] = None,
-    ) -> None:
-        """Initialize the pipe for training, using a representative set
-        of data examples.
-
-        get_examples (Callable[[], Iterable[Example]]): Function that
-            returns a representative sample of gold-standard Example objects.
-        nlp (Optional[Language]): The current nlp object the component is part of.
-        labels (Optional[List[str]]): The labels to add to the component, typically generated by the
-            `init labels` command. If no labels are provided, the get_examples
-            callback is used to extract the labels from the data.
-
-        DOCS: https://spacy.io/api/spancategorizerexclusive#initialize
-        """
-        subbatch: List[Example] = []
-        if labels is not None:
-            for label in labels:
-                self.add_label(label)
-        for eg in get_examples():
-            if labels is None:
-                for span in eg.reference.spans.get(self.key, []):
-                    self.add_label(span.label_)
-            if len(subbatch) < 10:
-                subbatch.append(eg)
-        self._require_labels()
-        if subbatch:
-            docs = [eg.x for eg in subbatch]
-            spans = build_ngram_suggester(sizes=[1])(docs)
-            # + 1 for the "no-label" category
-            Y = self.model.ops.alloc2f(spans.dataXd.shape[0], self._n_labels)
-            self.model.initialize(X=(docs, spans), Y=Y)
-        else:
-            # FIXME: Ideally we want to raise an error to avoid implicitly
-            # raising it when initializing without examples. For now, we'll just
-            # copy over what `spancat` did.
-            self.model.initialize()
-
-    def _validate_categories(self, examples: Iterable[Example]):
-        # TODO
-        pass
-
-    def _get_aligned_spans(self, eg: Example):
-        return eg.get_aligned_spans_y2x(
-            eg.reference.spans.get(self.key, []), allow_overlap=True
-        )
-
    def _make_span_group(
        self,
        doc: Doc,