mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 08:12:24 +03:00
Add spancat_exclusive to pipeline
This commit is contained in:
parent
527a1818e5
commit
3d07c05cba
|
@ -13,6 +13,7 @@ from .sentencizer import Sentencizer
|
||||||
from .tagger import Tagger
|
from .tagger import Tagger
|
||||||
from .textcat import TextCategorizer
|
from .textcat import TextCategorizer
|
||||||
from .spancat import SpanCategorizer
|
from .spancat import SpanCategorizer
|
||||||
|
from .spancat_exclusive import SpanCategorizerExclusive
|
||||||
from .span_ruler import SpanRuler
|
from .span_ruler import SpanRuler
|
||||||
from .textcat_multilabel import MultiLabel_TextCategorizer
|
from .textcat_multilabel import MultiLabel_TextCategorizer
|
||||||
from .tok2vec import Tok2Vec
|
from .tok2vec import Tok2Vec
|
||||||
|
@ -31,6 +32,7 @@ __all__ = [
|
||||||
"SentenceRecognizer",
|
"SentenceRecognizer",
|
||||||
"Sentencizer",
|
"Sentencizer",
|
||||||
"SpanCategorizer",
|
"SpanCategorizer",
|
||||||
|
"SpanCategorizerExclusive",
|
||||||
"SpanRuler",
|
"SpanRuler",
|
||||||
"Tagger",
|
"Tagger",
|
||||||
"TextCategorizer",
|
"TextCategorizer",
|
||||||
|
|
|
@ -78,7 +78,7 @@ def make_spancat(
|
||||||
model: Model[Tuple[List[Doc], Ragged], Floats2d],
|
model: Model[Tuple[List[Doc], Ragged], Floats2d],
|
||||||
spans_key: str,
|
spans_key: str,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
negative_weight: Optional[float] = 1.0,
|
negative_weight: float = 1.0,
|
||||||
allow_overlap: Optional[bool] = True,
|
allow_overlap: Optional[bool] = True,
|
||||||
) -> "SpanCategorizerExclusive":
|
) -> "SpanCategorizerExclusive":
|
||||||
"""Create a SpanCategorizer component. The span categorizer consists of two
|
"""Create a SpanCategorizer component. The span categorizer consists of two
|
||||||
|
@ -95,7 +95,7 @@ def make_spancat(
|
||||||
spans_key (str): Key of the doc.spans dict to save the spans under. During
|
spans_key (str): Key of the doc.spans dict to save the spans under. During
|
||||||
initialization and training, the component will look for spans on the
|
initialization and training, the component will look for spans on the
|
||||||
reference document under the same key.
|
reference document under the same key.
|
||||||
negative_weight (Optional[float]): Multiplier for the loss terms.
|
negative_weight (float): Multiplier for the loss terms.
|
||||||
Can be used to down weigh the negative samples if there are too many.
|
Can be used to down weigh the negative samples if there are too many.
|
||||||
allow_overlap (Optional[bool]): If True the data is assumed to
|
allow_overlap (Optional[bool]): If True the data is assumed to
|
||||||
contain overlapping spans.
|
contain overlapping spans.
|
||||||
|
@ -133,7 +133,6 @@ class Ranges:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
# TODO: Documentation
|
|
||||||
class SpanCategorizerExclusive(TrainablePipe):
|
class SpanCategorizerExclusive(TrainablePipe):
|
||||||
"""Pipeline component to label spans of text.
|
"""Pipeline component to label spans of text.
|
||||||
|
|
||||||
|
@ -148,7 +147,7 @@ class SpanCategorizerExclusive(TrainablePipe):
|
||||||
name: str = "spancat_exclusive",
|
name: str = "spancat_exclusive",
|
||||||
*,
|
*,
|
||||||
spans_key: str = "spans",
|
spans_key: str = "spans",
|
||||||
negative_weight: Optional[float],
|
negative_weight: float = 1.0,
|
||||||
scorer: Optional[Callable] = spancat_score,
|
scorer: Optional[Callable] = spancat_score,
|
||||||
allow_overlap: Optional[bool] = True,
|
allow_overlap: Optional[bool] = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -161,7 +160,7 @@ class SpanCategorizerExclusive(TrainablePipe):
|
||||||
During initialization and training, the component will look for
|
During initialization and training, the component will look for
|
||||||
spans on the reference document under the same key. Defaults to
|
spans on the reference document under the same key. Defaults to
|
||||||
`"spans"`.
|
`"spans"`.
|
||||||
negative_weight (Optional[float]): Multiplier for the loss terms.
|
negative_weight (float): Multiplier for the loss terms.
|
||||||
Can be used to down weigh the negative samples if there are too many.
|
Can be used to down weigh the negative samples if there are too many.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
Scorer.score_spans for the Doc.spans[spans_key] with overlapping
|
Scorer.score_spans for the Doc.spans[spans_key] with overlapping
|
||||||
|
@ -394,13 +393,15 @@ class SpanCategorizerExclusive(TrainablePipe):
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize the pipe for training, using a representative set
|
"""Initialize the pipe for training, using a representative set
|
||||||
of data examples.
|
of data examples.
|
||||||
|
|
||||||
get_examples (Callable[[], Iterable[Example]]): Function that
|
get_examples (Callable[[], Iterable[Example]]): Function that
|
||||||
returns a representative sample of gold-standard Example objects.
|
returns a representative sample of gold-standard Example objects.
|
||||||
nlp (Optional[Language]): The current nlp object the component is part of.
|
nlp (Optional[Language]): The current nlp object the component is part of.
|
||||||
labels (Optional[List[str]]): The labels to add to the component, typically generated by the
|
labels (Optional[List[str]]): The labels to add to the component, typically generated by the
|
||||||
`init labels` command. If no labels are provided, the get_examples
|
`init labels` command. If no labels are provided, the get_examples
|
||||||
callback is used to extract the labels from the data.
|
callback is used to extract the labels from the data.
|
||||||
DOCS: https://spacy.io/api/spancategorizer#initialize
|
|
||||||
|
DOCS: https://spacy.io/api/spancategorizerexclusive#initialize
|
||||||
"""
|
"""
|
||||||
subbatch: List[Example] = []
|
subbatch: List[Example] = []
|
||||||
if labels is not None:
|
if labels is not None:
|
||||||
|
@ -419,9 +420,11 @@ class SpanCategorizerExclusive(TrainablePipe):
|
||||||
# + 1 for the "no-label" category
|
# + 1 for the "no-label" category
|
||||||
Y = self.model.ops.alloc2f(spans.dataXd.shape[0], self._n_labels)
|
Y = self.model.ops.alloc2f(spans.dataXd.shape[0], self._n_labels)
|
||||||
self.model.initialize(X=(docs, spans), Y=Y)
|
self.model.initialize(X=(docs, spans), Y=Y)
|
||||||
# FIXME I think this branch is broken
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("Cannot initialize without examples.")
|
# FIXME: Ideally we want to raise an error to avoid implicitly
|
||||||
|
# raising it when initializing without examples. For now, we'll just
|
||||||
|
# copy over what `spancat` did.
|
||||||
|
self.model.initialize()
|
||||||
|
|
||||||
def _validate_categories(self, examples: Iterable[Example]):
|
def _validate_categories(self, examples: Iterable[Example]):
|
||||||
# TODO
|
# TODO
|
||||||
|
|
Loading…
Reference in New Issue
Block a user