diff --git a/spacy/pipeline/span_finder.py b/spacy/pipeline/span_finder.py index 3c412c07c..bd69a4c2a 100644 --- a/spacy/pipeline/span_finder.py +++ b/spacy/pipeline/span_finder.py @@ -1,7 +1,8 @@ from functools import partial from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, cast -from thinc.api import Config, Model, Ops, Optimizer, get_current_ops, set_dropout_rate +from thinc.api import (Config, Model, Ops, Optimizer, get_current_ops, + set_dropout_rate) from thinc.types import Floats2d, Ints1d, Ragged from spacy.language import Language @@ -11,7 +12,7 @@ from spacy.tokens import Doc from spacy.training import Example from ..util import registry -from .spancat import Suggester +from .spancat import DEFAULT_SPAN_KEY, Suggester span_finder_default_config = """ [model] @@ -41,8 +42,6 @@ depth = 4 DEFAULT_SPAN_FINDER_MODEL = Config().from_str(span_finder_default_config)["model"] DEFAULT_PREDICTED_KEY = "span_candidates" -# XXX What was this TODO for? -DEFAULT_TRAINING_KEY = "sc" # TODO: define in spancat @Language.factory( @@ -52,14 +51,14 @@ DEFAULT_TRAINING_KEY = "sc" # TODO: define in spancat "threshold": 0.5, "model": DEFAULT_SPAN_FINDER_MODEL, "predicted_key": DEFAULT_PREDICTED_KEY, - "training_key": DEFAULT_TRAINING_KEY, + "training_key": DEFAULT_SPAN_KEY, # XXX Doesn't 0 seem bad compared to None instead? "max_length": 0, "min_length": 0, "scorer": { "@scorers": "spacy.span_finder_scorer.v1", "predicted_key": DEFAULT_PREDICTED_KEY, - "training_key": DEFAULT_TRAINING_KEY, + "training_key": DEFAULT_SPAN_KEY, }, }, default_score_weights={ @@ -77,7 +76,7 @@ def make_span_finder( max_length: int, min_length: int, predicted_key: str = DEFAULT_PREDICTED_KEY, - training_key: str = DEFAULT_TRAINING_KEY, + training_key: str = DEFAULT_SPAN_KEY, ) -> "SpanFinder": """Create a SpanFinder component. The component predicts whether a token is the start or the end of a potential span. @@ -110,7 +109,7 @@ def make_span_finder( @registry.scorers("spacy.span_finder_scorer.v1") def make_span_finder_scorer( predicted_key: str = DEFAULT_PREDICTED_KEY, - training_key: str = DEFAULT_TRAINING_KEY, + training_key: str = DEFAULT_SPAN_KEY, ): return partial( span_finder_score, predicted_key=predicted_key, training_key=training_key @@ -121,7 +120,7 @@ def span_finder_score( examples: Iterable[Example], *, predicted_key: str = DEFAULT_PREDICTED_KEY, - training_key: str = DEFAULT_TRAINING_KEY, + training_key: str = DEFAULT_SPAN_KEY, **kwargs, ) -> Dict[str, Any]: kwargs = dict(kwargs) @@ -165,10 +164,10 @@ class SpanFinder(TrainablePipe): scorer: Optional[Callable] = partial( span_finder_score, predicted_key=DEFAULT_PREDICTED_KEY, - training_key=DEFAULT_TRAINING_KEY, + training_key=DEFAULT_SPAN_KEY, ), predicted_key: str = DEFAULT_PREDICTED_KEY, - training_key: str = DEFAULT_TRAINING_KEY, + training_key: str = DEFAULT_SPAN_KEY, ) -> None: """Initialize the span boundary detector. model (thinc.api.Model): The Thinc Model powering the pipeline component. diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py index 5a087e42a..c83b03d38 100644 --- a/spacy/pipeline/spancat.py +++ b/spacy/pipeline/spancat.py @@ -1,22 +1,22 @@ -from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast, Union from dataclasses import dataclass from functools import partial -from thinc.api import Config, Model, get_current_ops, set_dropout_rate, Ops -from thinc.api import Optimizer -from thinc.types import Ragged, Ints2d, Floats2d +from typing import (Any, Callable, Dict, Iterable, List, Optional, Tuple, + Union, cast) import numpy +from thinc.api import (Config, Model, Ops, Optimizer, get_current_ops, + set_dropout_rate) +from thinc.types import Floats2d, Ints2d, Ragged from ..compat import Protocol, runtime_checkable -from ..scorer import Scorer -from ..language import Language -from .trainable_pipe import TrainablePipe -from ..tokens import Doc, SpanGroup, Span -from ..vocab import Vocab -from ..training import Example, validate_examples from ..errors import Errors +from ..language import Language +from ..scorer import Scorer +from ..tokens import Doc, Span, SpanGroup +from ..training import Example, validate_examples from ..util import registry - +from ..vocab import Vocab +from .trainable_pipe import TrainablePipe spancat_default_config = """ [model] @@ -71,6 +71,7 @@ maxout_pieces = 3 depth = 4 """ +DEFAULT_SPAN_KEY = "sc" DEFAULT_SPANCAT_MODEL = Config().from_str(spancat_default_config)["model"] DEFAULT_SPANCAT_SINGLELABEL_MODEL = Config().from_str( spancat_singlelabel_default_config @@ -135,7 +136,7 @@ def build_ngram_range_suggester(min_size: int, max_size: int) -> Suggester: assigns=["doc.spans"], default_config={ "threshold": 0.5, - "spans_key": "sc", + "spans_key": DEFAULT_SPAN_KEY, "max_positive": None, "model": DEFAULT_SPANCAT_MODEL, "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]}, @@ -199,7 +200,7 @@ def make_spancat( "spancat_singlelabel", assigns=["doc.spans"], default_config={ - "spans_key": "sc", + "spans_key": DEFAULT_SPAN_KEY, "model": DEFAULT_SPANCAT_SINGLELABEL_MODEL, "negative_weight": 1.0, "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},