Update docstrings and docs

2025-08-04 04:10:20 +03:00 · 2023-06-05 12:55:41 +02:00 · 2023-06-05 12:55:41 +02:00 · b2c56a089e
commit b2c56a089e
parent d52d7d9c87
2 changed files with 40 additions and 18 deletions
--- a/spacy/pipeline/span_finder.py
+++ b/spacy/pipeline/span_finder.py
@ -78,8 +78,10 @@ def make_span_finder(
        initialization and training, the component will look for spans on the
        reference document under the same key.
    threshold (float): Minimum probability to consider a prediction positive.
-    max_length (Optional[int]): Max length of the produced spans, defaults to None meaning unlimited length.
-    min_length (Optional[int]): Min length of the produced spans, defaults to None meaining shortest span is length 1.
+    max_length (Optional[int]): Maximum length of the produced spans, defaults
+        to None meaning unlimited length.
+    min_length (Optional[int]): Minimum length of the produced spans, defaults
+        to None meaning shortest span length is 1.
    scorer (Optional[Callable]): The scoring method. Defaults to
        Scorer.score_spans for the Doc.spans[spans_key] with overlapping
        spans allowed.
@ -124,7 +126,10 @@ def _char_indices(span: Span) -> Tuple[int, int]:


 class SpanFinder(TrainablePipe):
-    """Pipeline that learns span boundaries"""
+    """Pipeline that learns span boundaries.
+
+    DOCS: https://spacy.io/api/spancategorizer
+    """

    def __init__(
        self,
@ -153,6 +158,8 @@ class SpanFinder(TrainablePipe):
            defaults to None meaning unlimited length.
        min_length (Optional[int]): Minimum length of the produced spans,
            defaults to None meaning shortest span length is 1.
+
+        DOCS: https://spacy.io/api/spanfinder#init
        """
        self.vocab = nlp.vocab
        if (max_length is not None and max_length < 1) or (
@ -172,9 +179,13 @@ class SpanFinder(TrainablePipe):
        }

    def predict(self, docs: Iterable[Doc]):
-        """Apply the pipeline's model to a batch of docs, without modifying them.
+        """Apply the pipeline's model to a batch of docs, without modifying
+        them.
+
        docs (Iterable[Doc]): The documents to predict.
        RETURNS: The models prediction for each document.
+
+        DOCS: https://spacy.io/api/spanfinder#predict
        """
        scores = self.model.predict(docs)
        return scores
@ -183,6 +194,8 @@ class SpanFinder(TrainablePipe):
        """Modify a batch of Doc objects, using pre-computed scores.
        docs (Iterable[Doc]): The documents to modify.
        scores: The scores to set, produced by SpanFinder predict method.
+
+        DOCS: https://spacy.io/api/spanfinder#set_annotations
        """
        offset = 0
        for i, doc in enumerate(docs):
@ -225,9 +238,11 @@ class SpanFinder(TrainablePipe):
        examples (Iterable[Example]): A batch of Example objects.
        drop (float): The dropout rate.
        sgd (Optional[thinc.api.Optimizer]): The optimizer.
-        losses (Optional[Dict[str, float]]): Optional record of the loss during training.
-            Updated using the component name as the key.
+        losses (Optional[Dict[str, float]]): Optional record of the loss during
+            training. Updated using the component name as the key.
        RETURNS (Dict[str, float]): The updated losses dictionary.
+
+        DOCS: https://spacy.io/api/spanfinder#update
        """
        if losses is None:
            losses = {}
@ -247,7 +262,9 @@ class SpanFinder(TrainablePipe):
        their predicted scores.
        examples (Iterable[Examples]): The batch of examples.
        scores: Scores representing the model's predictions.
-        RETURNS (Tuple[float, float]): The loss and the gradient.
+        RETURNS (Tuple[float, Floats2d]): The loss and the gradient.
+
+        DOCS: https://spacy.io/api/spanfinder#get_loss
        """
        truths, masks = self._get_aligned_truth_scores(examples, self.model.ops)
        d_scores = scores - self.model.ops.asarray2f(truths)
@ -256,7 +273,9 @@ class SpanFinder(TrainablePipe):
        return loss, d_scores

    def _get_aligned_truth_scores(self, examples, ops) -> Tuple[Floats2d, Floats2d]:
-        """Align scores of the predictions to the references for calculating the loss"""
+        """Align scores of the predictions to the references for calculating
+        the loss.
+        """
        truths = []
        masks = []
        for eg in examples:
@ -298,7 +317,10 @@ class SpanFinder(TrainablePipe):
        of data examples.
        get_examples (Callable[[], Iterable[Example]]): Function that
            returns a representative sample of gold-standard Example objects.
-        nlp (Optional[Language]): The current nlp object the component is part of.
+        nlp (Optional[Language]): The current nlp object the component is part
+            of.
+
+        DOCS: https://spacy.io/api/spanfinder#initialize
        """
        subbatch: List[Example] = []

--- a/website/docs/api/spanfinder.mdx
+++ b/website/docs/api/spanfinder.mdx
@ -60,8 +60,8 @@ architectures and their arguments and hyperparameters.
 | `model`      | A model instance that is given a list of documents and predicts a probability for each token. ~~Model[List[Doc], Floats2d]~~                                                                                           |
 | `spans_key`  | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
 | `threshold`  | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~                                                                                                                                    |
-| `max_length` | Max length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~                                                                                                                       |
-| `min_length` | Min length of the produced spans, defaults to `None` meaning shortest span is length 1. ~~Optional[int]~~                                                                                                              |
+| `max_length` | Maximum length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~                                                                                                                   |
+| `min_length` | Minimum length of the produced spans, defaults to `None` meaning shortest span length is 1. ~~Optional[int]~~                                                                                                          |
 | `scorer`     | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~                                                      |

 ```python
@ -97,8 +97,8 @@ shortcut for this and instantiate the component using its string name and
 | _keyword-only_ |                                                                                                                                                                                                                        |
 | `spans_key`    | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
 | `threshold`    | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~                                                                                                                                    |
-| `max_length`   | Max length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~                                                                                                                       |
-| `min_length`   | Min length of the produced spans, defaults to `None` meaning shortest span is length 1. ~~Optional[int]~~                                                                                                              |
+| `max_length`   | Maximum length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~                                                                                                                   |
+| `min_length`   | Minimum length of the produced spans, defaults to `None` meaning shortest span length is 1. ~~Optional[int]~~                                                                                                          |
 | `scorer`       | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~                                                      |

 ## SpanFinder.\_\_call\_\_ {id="call",tag="method"}
@ -246,10 +246,10 @@ predicted scores.
 > ```

 | Name           | Description                                                                    |
-| -------------- | --------------------------------------------------------------------------- |
+| -------------- | ------------------------------------------------------------------------------ |
 | `examples`     | The batch of examples. ~~Iterable[Example]~~                                   |
 | `spans_scores` | Scores representing the model's predictions. ~~Tuple[Ragged, Floats2d]~~       |
-| **RETURNS**    | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ |
+| **RETURNS**    | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, Floats2d]~~ |

 ## SpanFinder.create_optimizer {id="create_optimizer",tag="method"}