add docstrings

This commit is contained in:
kadarakos 2023-01-31 17:06:20 +00:00
parent 079f09b97c
commit edf9134e45

View File

@ -149,7 +149,9 @@ def make_spancat(
threshold: float, threshold: float,
max_positive: Optional[int], max_positive: Optional[int],
) -> "SpanCategorizer": ) -> "SpanCategorizer":
"""Create a SpanCategorizer component. The span categorizer consists of two """Create a SpanCategorizer component and configure it for multilabel
classification to be able to assign multiple labels for each span.
The span categorizer consists of two
parts: a suggester function that proposes candidate spans, and a labeller parts: a suggester function that proposes candidate spans, and a labeller
model that predicts one or more labels for each span. model that predicts one or more labels for each span.
@ -207,7 +209,9 @@ def make_spancat_singlelabel(
allow_overlap: bool, allow_overlap: bool,
scorer: Optional[Callable], scorer: Optional[Callable],
) -> "SpanCategorizer": ) -> "SpanCategorizer":
"""Create a SpanCategorizer component. The span categorizer consists of two """Create a SpanCategorizer component and configure it for multiclass
classification. With this configuration each span can get at most one
label. The span categorizer consists of two
parts: a suggester function that proposes candidate spans, and a labeller parts: a suggester function that proposes candidate spans, and a labeller
model that predicts one or more labels for each span. model that predicts one or more labels for each span.
@ -224,11 +228,11 @@ def make_spancat_singlelabel(
scorer (Optional[Callable]): The scoring method. Defaults to scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_spans for the Doc.spans[spans_key] with overlapping Scorer.score_spans for the Doc.spans[spans_key] with overlapping
spans allowed. spans allowed.
threshold (float): Minimum probability to consider a prediction positive. negative_weight (float): Multiplier for the loss terms.
Spans with a positive prediction will be saved on the Doc. Defaults to Can be used to downweight the negative samples if there are too many.
0.5. allow_overlap (bool): If True the data is assumed to contain overlapping spans.
max_positive (Optional[int]): Maximum number of labels to consider positive Otherwise it produces non-overlapping spans greedily prioritizing
per span. Defaults to None, indicating no limit. higher assigned label scores.
""" """
return SpanCategorizer( return SpanCategorizer(
nlp.vocab, nlp.vocab,
@ -317,11 +321,16 @@ class SpanCategorizer(TrainablePipe):
During initialization and training, the component will look for During initialization and training, the component will look for
spans on the reference document under the same key. Defaults to spans on the reference document under the same key. Defaults to
`"spans"`. `"spans"`.
threshold (float): Minimum probability to consider a prediction threshold (Optional[float]): Minimum probability to consider a prediction
positive. Spans with a positive prediction will be saved on the Doc. positive. Spans with a positive prediction will be saved on the Doc.
Defaults to 0.5. Defaults to 0.5.
max_positive (Optional[int]): Maximum number of labels to consider max_positive (Optional[int]): Maximum number of labels to consider
positive per span. Defaults to None, indicating no limit. positive per span. Defaults to None, indicating no limit.
negative_weight (float): Multiplier for the loss terms.
Can be used to downweight the negative samples if there are too many.
allow_overlap (bool): If True the data is assumed to contain overlapping spans.
Otherwise it produces non-overlapping spans greedily prioritizing
higher assigned label scores.
scorer (Optional[Callable]): The scoring method. Defaults to scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_spans for the Doc.spans[spans_key] with overlapping Scorer.score_spans for the Doc.spans[spans_key] with overlapping
spans allowed. spans allowed.
@ -640,6 +649,7 @@ class SpanCategorizer(TrainablePipe):
indices: Ints2d, indices: Ints2d,
scores: Floats2d, scores: Floats2d,
labels: List[str], labels: List[str],
# XXX Unused, does it make sense?
allow_overlap: bool = True, allow_overlap: bool = True,
) -> SpanGroup: ) -> SpanGroup:
spans = SpanGroup(doc, name=self.key) spans = SpanGroup(doc, name=self.key)