mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-04 04:10:20 +03:00
Update docstrings and docs
This commit is contained in:
parent
d52d7d9c87
commit
b2c56a089e
|
@ -78,8 +78,10 @@ def make_span_finder(
|
||||||
initialization and training, the component will look for spans on the
|
initialization and training, the component will look for spans on the
|
||||||
reference document under the same key.
|
reference document under the same key.
|
||||||
threshold (float): Minimum probability to consider a prediction positive.
|
threshold (float): Minimum probability to consider a prediction positive.
|
||||||
max_length (Optional[int]): Max length of the produced spans, defaults to None meaning unlimited length.
|
max_length (Optional[int]): Maximum length of the produced spans, defaults
|
||||||
min_length (Optional[int]): Min length of the produced spans, defaults to None meaining shortest span is length 1.
|
to None meaning unlimited length.
|
||||||
|
min_length (Optional[int]): Minimum length of the produced spans, defaults
|
||||||
|
to None meaning shortest span length is 1.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
Scorer.score_spans for the Doc.spans[spans_key] with overlapping
|
Scorer.score_spans for the Doc.spans[spans_key] with overlapping
|
||||||
spans allowed.
|
spans allowed.
|
||||||
|
@ -124,7 +126,10 @@ def _char_indices(span: Span) -> Tuple[int, int]:
|
||||||
|
|
||||||
|
|
||||||
class SpanFinder(TrainablePipe):
|
class SpanFinder(TrainablePipe):
|
||||||
"""Pipeline that learns span boundaries"""
|
"""Pipeline that learns span boundaries.
|
||||||
|
|
||||||
|
DOCS: https://spacy.io/api/spancategorizer
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -153,6 +158,8 @@ class SpanFinder(TrainablePipe):
|
||||||
defaults to None meaning unlimited length.
|
defaults to None meaning unlimited length.
|
||||||
min_length (Optional[int]): Minimum length of the produced spans,
|
min_length (Optional[int]): Minimum length of the produced spans,
|
||||||
defaults to None meaning shortest span length is 1.
|
defaults to None meaning shortest span length is 1.
|
||||||
|
|
||||||
|
DOCS: https://spacy.io/api/spanfinder#init
|
||||||
"""
|
"""
|
||||||
self.vocab = nlp.vocab
|
self.vocab = nlp.vocab
|
||||||
if (max_length is not None and max_length < 1) or (
|
if (max_length is not None and max_length < 1) or (
|
||||||
|
@ -172,9 +179,13 @@ class SpanFinder(TrainablePipe):
|
||||||
}
|
}
|
||||||
|
|
||||||
def predict(self, docs: Iterable[Doc]):
|
def predict(self, docs: Iterable[Doc]):
|
||||||
"""Apply the pipeline's model to a batch of docs, without modifying them.
|
"""Apply the pipeline's model to a batch of docs, without modifying
|
||||||
|
them.
|
||||||
|
|
||||||
docs (Iterable[Doc]): The documents to predict.
|
docs (Iterable[Doc]): The documents to predict.
|
||||||
RETURNS: The models prediction for each document.
|
RETURNS: The models prediction for each document.
|
||||||
|
|
||||||
|
DOCS: https://spacy.io/api/spanfinder#predict
|
||||||
"""
|
"""
|
||||||
scores = self.model.predict(docs)
|
scores = self.model.predict(docs)
|
||||||
return scores
|
return scores
|
||||||
|
@ -183,6 +194,8 @@ class SpanFinder(TrainablePipe):
|
||||||
"""Modify a batch of Doc objects, using pre-computed scores.
|
"""Modify a batch of Doc objects, using pre-computed scores.
|
||||||
docs (Iterable[Doc]): The documents to modify.
|
docs (Iterable[Doc]): The documents to modify.
|
||||||
scores: The scores to set, produced by SpanFinder predict method.
|
scores: The scores to set, produced by SpanFinder predict method.
|
||||||
|
|
||||||
|
DOCS: https://spacy.io/api/spanfinder#set_annotations
|
||||||
"""
|
"""
|
||||||
offset = 0
|
offset = 0
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
|
@ -225,9 +238,11 @@ class SpanFinder(TrainablePipe):
|
||||||
examples (Iterable[Example]): A batch of Example objects.
|
examples (Iterable[Example]): A batch of Example objects.
|
||||||
drop (float): The dropout rate.
|
drop (float): The dropout rate.
|
||||||
sgd (Optional[thinc.api.Optimizer]): The optimizer.
|
sgd (Optional[thinc.api.Optimizer]): The optimizer.
|
||||||
losses (Optional[Dict[str, float]]): Optional record of the loss during training.
|
losses (Optional[Dict[str, float]]): Optional record of the loss during
|
||||||
Updated using the component name as the key.
|
training. Updated using the component name as the key.
|
||||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||||
|
|
||||||
|
DOCS: https://spacy.io/api/spanfinder#update
|
||||||
"""
|
"""
|
||||||
if losses is None:
|
if losses is None:
|
||||||
losses = {}
|
losses = {}
|
||||||
|
@ -247,7 +262,9 @@ class SpanFinder(TrainablePipe):
|
||||||
their predicted scores.
|
their predicted scores.
|
||||||
examples (Iterable[Examples]): The batch of examples.
|
examples (Iterable[Examples]): The batch of examples.
|
||||||
scores: Scores representing the model's predictions.
|
scores: Scores representing the model's predictions.
|
||||||
RETURNS (Tuple[float, float]): The loss and the gradient.
|
RETURNS (Tuple[float, Floats2d]): The loss and the gradient.
|
||||||
|
|
||||||
|
DOCS: https://spacy.io/api/spanfinder#get_loss
|
||||||
"""
|
"""
|
||||||
truths, masks = self._get_aligned_truth_scores(examples, self.model.ops)
|
truths, masks = self._get_aligned_truth_scores(examples, self.model.ops)
|
||||||
d_scores = scores - self.model.ops.asarray2f(truths)
|
d_scores = scores - self.model.ops.asarray2f(truths)
|
||||||
|
@ -256,7 +273,9 @@ class SpanFinder(TrainablePipe):
|
||||||
return loss, d_scores
|
return loss, d_scores
|
||||||
|
|
||||||
def _get_aligned_truth_scores(self, examples, ops) -> Tuple[Floats2d, Floats2d]:
|
def _get_aligned_truth_scores(self, examples, ops) -> Tuple[Floats2d, Floats2d]:
|
||||||
"""Align scores of the predictions to the references for calculating the loss"""
|
"""Align scores of the predictions to the references for calculating
|
||||||
|
the loss.
|
||||||
|
"""
|
||||||
truths = []
|
truths = []
|
||||||
masks = []
|
masks = []
|
||||||
for eg in examples:
|
for eg in examples:
|
||||||
|
@ -298,7 +317,10 @@ class SpanFinder(TrainablePipe):
|
||||||
of data examples.
|
of data examples.
|
||||||
get_examples (Callable[[], Iterable[Example]]): Function that
|
get_examples (Callable[[], Iterable[Example]]): Function that
|
||||||
returns a representative sample of gold-standard Example objects.
|
returns a representative sample of gold-standard Example objects.
|
||||||
nlp (Optional[Language]): The current nlp object the component is part of.
|
nlp (Optional[Language]): The current nlp object the component is part
|
||||||
|
of.
|
||||||
|
|
||||||
|
DOCS: https://spacy.io/api/spanfinder#initialize
|
||||||
"""
|
"""
|
||||||
subbatch: List[Example] = []
|
subbatch: List[Example] = []
|
||||||
|
|
||||||
|
|
|
@ -60,8 +60,8 @@ architectures and their arguments and hyperparameters.
|
||||||
| `model` | A model instance that is given a list of documents and predicts a probability for each token. ~~Model[List[Doc], Floats2d]~~ |
|
| `model` | A model instance that is given a list of documents and predicts a probability for each token. ~~Model[List[Doc], Floats2d]~~ |
|
||||||
| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
|
| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
|
||||||
| `threshold` | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~ |
|
| `threshold` | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~ |
|
||||||
| `max_length` | Max length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~ |
|
| `max_length` | Maximum length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~ |
|
||||||
| `min_length` | Min length of the produced spans, defaults to `None` meaning shortest span is length 1. ~~Optional[int]~~ |
|
| `min_length` | Minimum length of the produced spans, defaults to `None` meaning shortest span length is 1. ~~Optional[int]~~ |
|
||||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
|
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -97,8 +97,8 @@ shortcut for this and instantiate the component using its string name and
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
|
| `spans_key` | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
|
||||||
| `threshold` | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~ |
|
| `threshold` | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~ |
|
||||||
| `max_length` | Max length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~ |
|
| `max_length` | Maximum length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~ |
|
||||||
| `min_length` | Min length of the produced spans, defaults to `None` meaning shortest span is length 1. ~~Optional[int]~~ |
|
| `min_length` | Minimum length of the produced spans, defaults to `None` meaning shortest span length is 1. ~~Optional[int]~~ |
|
||||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
|
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
|
||||||
|
|
||||||
## SpanFinder.\_\_call\_\_ {id="call",tag="method"}
|
## SpanFinder.\_\_call\_\_ {id="call",tag="method"}
|
||||||
|
@ -245,11 +245,11 @@ predicted scores.
|
||||||
> loss, d_loss = span_finder.get_loss(examples, scores)
|
> loss, d_loss = span_finder.get_loss(examples, scores)
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| -------------- | --------------------------------------------------------------------------- |
|
| -------------- | ------------------------------------------------------------------------------ |
|
||||||
| `examples` | The batch of examples. ~~Iterable[Example]~~ |
|
| `examples` | The batch of examples. ~~Iterable[Example]~~ |
|
||||||
| `spans_scores` | Scores representing the model's predictions. ~~Tuple[Ragged, Floats2d]~~ |
|
| `spans_scores` | Scores representing the model's predictions. ~~Tuple[Ragged, Floats2d]~~ |
|
||||||
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, float]~~ |
|
| **RETURNS** | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, Floats2d]~~ |
|
||||||
|
|
||||||
## SpanFinder.create_optimizer {id="create_optimizer",tag="method"}
|
## SpanFinder.create_optimizer {id="create_optimizer",tag="method"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user