default spankey constant

This commit is contained in:
kadarakos 2023-04-13 10:01:12 +00:00
parent 4d88616c4f
commit 85dd4d4c3b
2 changed files with 24 additions and 24 deletions

View File

@ -1,7 +1,8 @@
from functools import partial
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, cast
from thinc.api import Config, Model, Ops, Optimizer, get_current_ops, set_dropout_rate
from thinc.api import (Config, Model, Ops, Optimizer, get_current_ops,
set_dropout_rate)
from thinc.types import Floats2d, Ints1d, Ragged
from spacy.language import Language
@ -11,7 +12,7 @@ from spacy.tokens import Doc
from spacy.training import Example
from ..util import registry
from .spancat import Suggester
from .spancat import DEFAULT_SPAN_KEY, Suggester
span_finder_default_config = """
[model]
@ -41,8 +42,6 @@ depth = 4
DEFAULT_SPAN_FINDER_MODEL = Config().from_str(span_finder_default_config)["model"]
DEFAULT_PREDICTED_KEY = "span_candidates"
# XXX What was this TODO for?
DEFAULT_TRAINING_KEY = "sc" # TODO: define in spancat
@Language.factory(
@ -52,14 +51,14 @@ DEFAULT_TRAINING_KEY = "sc" # TODO: define in spancat
"threshold": 0.5,
"model": DEFAULT_SPAN_FINDER_MODEL,
"predicted_key": DEFAULT_PREDICTED_KEY,
"training_key": DEFAULT_TRAINING_KEY,
"training_key": DEFAULT_SPAN_KEY,
# XXX Doesn't 0 seem bad compared to None instead?
"max_length": 0,
"min_length": 0,
"scorer": {
"@scorers": "spacy.span_finder_scorer.v1",
"predicted_key": DEFAULT_PREDICTED_KEY,
"training_key": DEFAULT_TRAINING_KEY,
"training_key": DEFAULT_SPAN_KEY,
},
},
default_score_weights={
@ -77,7 +76,7 @@ def make_span_finder(
max_length: int,
min_length: int,
predicted_key: str = DEFAULT_PREDICTED_KEY,
training_key: str = DEFAULT_TRAINING_KEY,
training_key: str = DEFAULT_SPAN_KEY,
) -> "SpanFinder":
"""Create a SpanFinder component. The component predicts whether a token is
the start or the end of a potential span.
@ -110,7 +109,7 @@ def make_span_finder(
@registry.scorers("spacy.span_finder_scorer.v1")
def make_span_finder_scorer(
predicted_key: str = DEFAULT_PREDICTED_KEY,
training_key: str = DEFAULT_TRAINING_KEY,
training_key: str = DEFAULT_SPAN_KEY,
):
return partial(
span_finder_score, predicted_key=predicted_key, training_key=training_key
@ -121,7 +120,7 @@ def span_finder_score(
examples: Iterable[Example],
*,
predicted_key: str = DEFAULT_PREDICTED_KEY,
training_key: str = DEFAULT_TRAINING_KEY,
training_key: str = DEFAULT_SPAN_KEY,
**kwargs,
) -> Dict[str, Any]:
kwargs = dict(kwargs)
@ -165,10 +164,10 @@ class SpanFinder(TrainablePipe):
scorer: Optional[Callable] = partial(
span_finder_score,
predicted_key=DEFAULT_PREDICTED_KEY,
training_key=DEFAULT_TRAINING_KEY,
training_key=DEFAULT_SPAN_KEY,
),
predicted_key: str = DEFAULT_PREDICTED_KEY,
training_key: str = DEFAULT_TRAINING_KEY,
training_key: str = DEFAULT_SPAN_KEY,
) -> None:
"""Initialize the span boundary detector.
model (thinc.api.Model): The Thinc Model powering the pipeline component.

View File

@ -1,22 +1,22 @@
from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast, Union
from dataclasses import dataclass
from functools import partial
from thinc.api import Config, Model, get_current_ops, set_dropout_rate, Ops
from thinc.api import Optimizer
from thinc.types import Ragged, Ints2d, Floats2d
from typing import (Any, Callable, Dict, Iterable, List, Optional, Tuple,
Union, cast)
import numpy
from thinc.api import (Config, Model, Ops, Optimizer, get_current_ops,
set_dropout_rate)
from thinc.types import Floats2d, Ints2d, Ragged
from ..compat import Protocol, runtime_checkable
from ..scorer import Scorer
from ..language import Language
from .trainable_pipe import TrainablePipe
from ..tokens import Doc, SpanGroup, Span
from ..vocab import Vocab
from ..training import Example, validate_examples
from ..errors import Errors
from ..language import Language
from ..scorer import Scorer
from ..tokens import Doc, Span, SpanGroup
from ..training import Example, validate_examples
from ..util import registry
from ..vocab import Vocab
from .trainable_pipe import TrainablePipe
spancat_default_config = """
[model]
@ -71,6 +71,7 @@ maxout_pieces = 3
depth = 4
"""
DEFAULT_SPAN_KEY = "sc"
DEFAULT_SPANCAT_MODEL = Config().from_str(spancat_default_config)["model"]
DEFAULT_SPANCAT_SINGLELABEL_MODEL = Config().from_str(
spancat_singlelabel_default_config
@ -135,7 +136,7 @@ def build_ngram_range_suggester(min_size: int, max_size: int) -> Suggester:
assigns=["doc.spans"],
default_config={
"threshold": 0.5,
"spans_key": "sc",
"spans_key": DEFAULT_SPAN_KEY,
"max_positive": None,
"model": DEFAULT_SPANCAT_MODEL,
"suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
@ -199,7 +200,7 @@ def make_spancat(
"spancat_singlelabel",
assigns=["doc.spans"],
default_config={
"spans_key": "sc",
"spans_key": DEFAULT_SPAN_KEY,
"model": DEFAULT_SPANCAT_SINGLELABEL_MODEL,
"negative_weight": 1.0,
"suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},