Update docstrings and docs

This commit is contained in:
Adriane Boyd 2023-06-05 12:55:41 +02:00
parent d52d7d9c87
commit b2c56a089e
2 changed files with 40 additions and 18 deletions

View File

@ -78,8 +78,10 @@ def make_span_finder(
initialization and training, the component will look for spans on the initialization and training, the component will look for spans on the
reference document under the same key. reference document under the same key.
threshold (float): Minimum probability to consider a prediction positive. threshold (float): Minimum probability to consider a prediction positive.
max_length (Optional[int]): Max length of the produced spans, defaults to None meaning unlimited length. max_length (Optional[int]): Maximum length of the produced spans, defaults
min_length (Optional[int]): Min length of the produced spans, defaults to None meaining shortest span is length 1. to None meaning unlimited length.
min_length (Optional[int]): Minimum length of the produced spans, defaults
to None meaning shortest span length is 1.
scorer (Optional[Callable]): The scoring method. Defaults to scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_spans for the Doc.spans[spans_key] with overlapping Scorer.score_spans for the Doc.spans[spans_key] with overlapping
spans allowed. spans allowed.
@ -124,7 +126,10 @@ def _char_indices(span: Span) -> Tuple[int, int]:
class SpanFinder(TrainablePipe): class SpanFinder(TrainablePipe):
"""Pipeline that learns span boundaries""" """Pipeline that learns span boundaries.
DOCS: https://spacy.io/api/spancategorizer
"""
def __init__( def __init__(
self, self,
@ -153,6 +158,8 @@ class SpanFinder(TrainablePipe):
defaults to None meaning unlimited length. defaults to None meaning unlimited length.
min_length (Optional[int]): Minimum length of the produced spans, min_length (Optional[int]): Minimum length of the produced spans,
defaults to None meaning shortest span length is 1. defaults to None meaning shortest span length is 1.
DOCS: https://spacy.io/api/spanfinder#init
""" """
self.vocab = nlp.vocab self.vocab = nlp.vocab
if (max_length is not None and max_length < 1) or ( if (max_length is not None and max_length < 1) or (
@ -172,9 +179,13 @@ class SpanFinder(TrainablePipe):
} }
def predict(self, docs: Iterable[Doc]): def predict(self, docs: Iterable[Doc]):
"""Apply the pipeline's model to a batch of docs, without modifying them. """Apply the pipeline's model to a batch of docs, without modifying
them.
docs (Iterable[Doc]): The documents to predict. docs (Iterable[Doc]): The documents to predict.
RETURNS: The models prediction for each document. RETURNS: The models prediction for each document.
DOCS: https://spacy.io/api/spanfinder#predict
""" """
scores = self.model.predict(docs) scores = self.model.predict(docs)
return scores return scores
@ -183,6 +194,8 @@ class SpanFinder(TrainablePipe):
"""Modify a batch of Doc objects, using pre-computed scores. """Modify a batch of Doc objects, using pre-computed scores.
docs (Iterable[Doc]): The documents to modify. docs (Iterable[Doc]): The documents to modify.
scores: The scores to set, produced by SpanFinder predict method. scores: The scores to set, produced by SpanFinder predict method.
DOCS: https://spacy.io/api/spanfinder#set_annotations
""" """
offset = 0 offset = 0
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
@ -225,9 +238,11 @@ class SpanFinder(TrainablePipe):
examples (Iterable[Example]): A batch of Example objects. examples (Iterable[Example]): A batch of Example objects.
drop (float): The dropout rate. drop (float): The dropout rate.
sgd (Optional[thinc.api.Optimizer]): The optimizer. sgd (Optional[thinc.api.Optimizer]): The optimizer.
losses (Optional[Dict[str, float]]): Optional record of the loss during training. losses (Optional[Dict[str, float]]): Optional record of the loss during
Updated using the component name as the key. training. Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary. RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://spacy.io/api/spanfinder#update
""" """
if losses is None: if losses is None:
losses = {} losses = {}
@ -247,7 +262,9 @@ class SpanFinder(TrainablePipe):
their predicted scores. their predicted scores.
examples (Iterable[Examples]): The batch of examples. examples (Iterable[Examples]): The batch of examples.
scores: Scores representing the model's predictions. scores: Scores representing the model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient. RETURNS (Tuple[float, Floats2d]): The loss and the gradient.
DOCS: https://spacy.io/api/spanfinder#get_loss
""" """
truths, masks = self._get_aligned_truth_scores(examples, self.model.ops) truths, masks = self._get_aligned_truth_scores(examples, self.model.ops)
d_scores = scores - self.model.ops.asarray2f(truths) d_scores = scores - self.model.ops.asarray2f(truths)
@ -256,7 +273,9 @@ class SpanFinder(TrainablePipe):
return loss, d_scores return loss, d_scores
def _get_aligned_truth_scores(self, examples, ops) -> Tuple[Floats2d, Floats2d]: def _get_aligned_truth_scores(self, examples, ops) -> Tuple[Floats2d, Floats2d]:
"""Align scores of the predictions to the references for calculating the loss""" """Align scores of the predictions to the references for calculating
the loss.
"""
truths = [] truths = []
masks = [] masks = []
for eg in examples: for eg in examples:
@ -298,7 +317,10 @@ class SpanFinder(TrainablePipe):
of data examples. of data examples.
get_examples (Callable[[], Iterable[Example]]): Function that get_examples (Callable[[], Iterable[Example]]): Function that
returns a representative sample of gold-standard Example objects. returns a representative sample of gold-standard Example objects.
nlp (Optional[Language]): The current nlp object the component is part of. nlp (Optional[Language]): The current nlp object the component is part
of.
DOCS: https://spacy.io/api/spanfinder#initialize
""" """
subbatch: List[Example] = [] subbatch: List[Example] = []

View File

@ -60,8 +60,8 @@ architectures and their arguments and hyperparameters.
| `model` | A model instance that is given a list of documents and predicts a probability for each token. ~~Model[List[Doc], Floats2d]~~ | | `model` | A model instance that is given a list of documents and predicts a probability for each token. ~~Model[List[Doc], Floats2d]~~ |
| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ | | `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
| `threshold` | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~ | | `threshold` | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~ |
| `max_length` | Max length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~ | | `max_length` | Maximum length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~ |
| `min_length` | Min length of the produced spans, defaults to `None` meaning shortest span is length 1. ~~Optional[int]~~ | | `min_length` | Minimum length of the produced spans, defaults to `None` meaning shortest span length is 1. ~~Optional[int]~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ | | `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
```python ```python
@ -97,8 +97,8 @@ shortcut for this and instantiate the component using its string name and
| _keyword-only_ | | | _keyword-only_ | |
| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ | | `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
| `threshold` | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~ | | `threshold` | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~ |
| `max_length` | Max length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~ | | `max_length` | Maximum length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~ |
| `min_length` | Min length of the produced spans, defaults to `None` meaning shortest span is length 1. ~~Optional[int]~~ | | `min_length` | Minimum length of the produced spans, defaults to `None` meaning shortest span length is 1. ~~Optional[int]~~ |
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ | | `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
## SpanFinder.\_\_call\_\_ {id="call",tag="method"} ## SpanFinder.\_\_call\_\_ {id="call",tag="method"}
@ -246,10 +246,10 @@ predicted scores.
> ``` > ```
| Name | Description | | Name | Description |
| -------------- | --------------------------------------------------------------------------- | | -------------- | ------------------------------------------------------------------------------ |
| `examples` | The batch of examples. ~~Iterable[Example]~~ | | `examples` | The batch of examples. ~~Iterable[Example]~~ |
| `spans_scores` | Scores representing the model's predictions. ~~Tuple[Ragged, Floats2d]~~ | | `spans_scores` | Scores representing the model's predictions. ~~Tuple[Ragged, Floats2d]~~ |
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ | | **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, Floats2d]~~ |
## SpanFinder.create_optimizer {id="create_optimizer",tag="method"} ## SpanFinder.create_optimizer {id="create_optimizer",tag="method"}