[wip] Update

This commit is contained in:
Lj Miranda 2022-08-24 17:54:34 +08:00
parent 6f08d83731
commit 1db65b8e78

View File

@ -95,7 +95,7 @@ def make_spancat(
spans_key (str): Key of the doc.spans dict to save the spans under. During spans_key (str): Key of the doc.spans dict to save the spans under. During
initialization and training, the component will look for spans on the initialization and training, the component will look for spans on the
reference document under the same key. reference document under the same key.
negative_weight (optional[float]): Multiplier for the loss terms. negative_weight (Optional[float]): Multiplier for the loss terms.
Can be used to down weigh the negative samples if there are too many. Can be used to down weigh the negative samples if there are too many.
allow_overlap (Optional[bool]): If True the data is assumed to allow_overlap (Optional[bool]): If True the data is assumed to
contain overlapping spans. contain overlapping spans.
@ -133,9 +133,11 @@ class Ranges:
return False return False
# TODO: Documentation
class SpanCategorizerExclusive(TrainablePipe): class SpanCategorizerExclusive(TrainablePipe):
"""Pipeline component to label spans of text. """Pipeline component to label spans of text.
DOCS: https://spacy.io/api/spancategorizer
DOCS: https://spacy.io/api/spancategorizerexclusive
""" """
def __init__( def __init__(
@ -159,14 +161,15 @@ class SpanCategorizerExclusive(TrainablePipe):
During initialization and training, the component will look for During initialization and training, the component will look for
spans on the reference document under the same key. Defaults to spans on the reference document under the same key. Defaults to
`"spans"`. `"spans"`.
negative_weight (optional[float]): Multiplier for the loss terms. negative_weight (Optional[float]): Multiplier for the loss terms.
Can be used to down weigh the negative samples if there are too many. Can be used to down weigh the negative samples if there are too many.
scorer (Optional[Callable]): The scoring method. Defaults to scorer (Optional[Callable]): The scoring method. Defaults to
Scorer.score_spans for the Doc.spans[spans_key] with overlapping
spans allowed.
allow_overlap (Optional[bool]): If True the data is assumed to allow_overlap (Optional[bool]): If True the data is assumed to
contains overlapping spans. contain overlapping spans.
Scorer.score_spans for the Doc.spans[spans_key] with overlapping
spans allowed. DOCS: https://spacy.io/api/spancategorizerexclusive#init
DOCS: https://spacy.io/api/spancategorizer#init
""" """
self.cfg = { self.cfg = {
"labels": [], "labels": [],
@ -190,9 +193,11 @@ class SpanCategorizerExclusive(TrainablePipe):
def add_label(self, label: str) -> int: def add_label(self, label: str) -> int:
"""Add a new label to the pipe. """Add a new label to the pipe.
label (str): The label to add. label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1. RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://spacy.io/api/spancategorizer#add_label
DOCS: https://spacy.io/api/spancategorizerexclusive#add_label
""" """
if not isinstance(label, str): if not isinstance(label, str):
raise ValueError(Errors.E187) raise ValueError(Errors.E187)
@ -206,14 +211,16 @@ class SpanCategorizerExclusive(TrainablePipe):
@property @property
def labels(self) -> Tuple[str]: def labels(self) -> Tuple[str]:
"""RETURNS (Tuple[str]): The labels currently added to the component. """RETURNS (Tuple[str]): The labels currently added to the component.
DOCS: https://spacy.io/api/spancategorizer#labels
DOCS: https://spacy.io/api/spancategorizerexclusive#labels
""" """
return tuple(self.cfg["labels"]) # type: ignore return tuple(self.cfg["labels"]) # type: ignore
@property @property
def label_data(self) -> List[str]: def label_data(self) -> List[str]:
"""RETURNS (List[str]): Information about the component's labels. """RETURNS (List[str]): Information about the component's labels.
DOCS: https://spacy.io/api/spancategorizer#label_data
DOCS: https://spacy.io/api/spancategorizerexclusive#label_data
""" """
return list(self.labels) return list(self.labels)
@ -233,9 +240,11 @@ class SpanCategorizerExclusive(TrainablePipe):
def predict(self, docs: Iterable[Doc]): def predict(self, docs: Iterable[Doc]):
"""Apply the pipeline's model to a batch of docs, without modifying them. """Apply the pipeline's model to a batch of docs, without modifying them.
docs (Iterable[Doc]): The documents to predict. docs (Iterable[Doc]): The documents to predict.
RETURNS: The models prediction for each document. RETURNS: The models prediction for each document.
DOCS: https://spacy.io/api/spancategorizer#predict
DOCS: https://spacy.io/api/spancategorizerexclusive#predict
""" """
indices = self.suggester(docs, ops=self.model.ops) indices = self.suggester(docs, ops=self.model.ops)
scores = self.model.predict((docs, indices)) # type: ignore scores = self.model.predict((docs, indices)) # type: ignore
@ -246,10 +255,12 @@ class SpanCategorizerExclusive(TrainablePipe):
) -> None: ) -> None:
"""Use the spancat suggester to add a list of span candidates to a """Use the spancat suggester to add a list of span candidates to a
list of docs. Intended to be used for debugging purposes. list of docs. Intended to be used for debugging purposes.
docs (Iterable[Doc]): The documents to modify. docs (Iterable[Doc]): The documents to modify.
candidates_key (str): Key of the Doc.spans dict to save the candidates_key (str): Key of the Doc.spans dict to save the
candidate spans under. candidate spans under.
DOCS: https://spacy.io/api/spancategorizer#set_candidates
DOCS: https://spacy.io/api/spancategorizerexclusive#set_candidates
""" """
suggester_output = self.suggester(docs, ops=self.model.ops) suggester_output = self.suggester(docs, ops=self.model.ops)
@ -260,9 +271,11 @@ class SpanCategorizerExclusive(TrainablePipe):
def set_annotations(self, docs: Iterable[Doc], indices_scores) -> None: def set_annotations(self, docs: Iterable[Doc], indices_scores) -> None:
"""Modify a batch of Doc objects, using pre-computed scores. """Modify a batch of Doc objects, using pre-computed scores.
docs (Iterable[Doc]): The documents to modify. docs (Iterable[Doc]): The documents to modify.
scores: The scores to set, produced by SpanCategorizer.predict. scores: The scores to set, produced by SpanCategorizer.predict.
DOCS: https://spacy.io/api/spancategorizer#set_annotations
DOCS: https://spacy.io/api/spancategorizerexclusive#set_annotations
""" """
allow_overlap = self.cfg["allow_overlap"] allow_overlap = self.cfg["allow_overlap"]
labels = self.labels labels = self.labels
@ -290,12 +303,14 @@ class SpanCategorizerExclusive(TrainablePipe):
"""Learn from a batch of documents and gold-standard information, """Learn from a batch of documents and gold-standard information,
updating the pipe's model. Delegates to predict and get_loss. updating the pipe's model. Delegates to predict and get_loss.
examples (Iterable[Example]): A batch of Example objects. examples (Iterable[Example]): A batch of Example objects.
drop (float): The dropout rate. drop (float): The dropout rate.
sgd (thinc.api.Optimizer): The optimizer. sgd (thinc.api.Optimizer): The optimizer.
losses (Dict[str, float]): Optional record of the loss during training. losses (Dict[str, float]): Optional record of the loss during training.
Updated using the component name as the key. Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary. RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://spacy.io/api/spancategorizer#update
DOCS: https://spacy.io/api/spancategorizerexclusive#update
""" """
if losses is None: if losses is None:
losses = {} losses = {}
@ -323,10 +338,12 @@ class SpanCategorizerExclusive(TrainablePipe):
) -> Tuple[float, float]: ) -> Tuple[float, float]:
"""Find the loss and gradient of loss for the batch of documents and """Find the loss and gradient of loss for the batch of documents and
their predicted scores. their predicted scores.
examples (Iterable[Examples]): The batch of examples. examples (Iterable[Examples]): The batch of examples.
spans_scores: Scores representing the model's predictions. spans_scores: Scores representing the model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient. RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://spacy.io/api/spancategorizer#get_loss
DOCS: https://spacy.io/api/spancategorizerexclusive#get_loss
""" """
spans, scores = spans_scores spans, scores = spans_scores
spans = Ragged( spans = Ragged(