use True/False instead of floats

This commit is contained in:
vinit 2023-02-17 00:53:17 +05:30
parent 8be0e39a75
commit 38e5a75014
2 changed files with 12 additions and 7 deletions

View File

@ -45,7 +45,7 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"]
@Language.factory(
"tagger",
assigns=["token.tag"],
default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", "label_smoothing": 0.0},
default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", "label_smoothing": False},
default_score_weights={"tag_acc": 1.0},
)
def make_tagger(
@ -55,7 +55,7 @@ def make_tagger(
overwrite: bool,
scorer: Optional[Callable],
neg_prefix: str,
label_smoothing: float,
label_smoothing: bool,
):
"""Construct a part-of-speech tagger component.
@ -90,7 +90,7 @@ class Tagger(TrainablePipe):
overwrite=BACKWARD_OVERWRITE,
scorer=tagger_score,
neg_prefix="!",
label_smoothing=0.0,
label_smoothing=False,
):
"""Initialize a part-of-speech tagger.
@ -258,7 +258,7 @@ class Tagger(TrainablePipe):
DOCS: https://spacy.io/api/tagger#get_loss
"""
validate_examples(examples, "Tagger.get_loss")
# label_smoothing = 0.1 if self.cfg["label_smoothing"] else 0.0
self.cfg["label_smoothing"] = 0.05 if self.cfg["label_smoothing"] else 0.0
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"], label_smoothing=self.cfg["label_smoothing"])
# Convert empty tag "" to missing value None so that both misaligned
# tokens and tokens with missing annotation have the default missing

View File

@ -68,17 +68,22 @@ PARTIAL_DATA = [
def test_label_smoothing():
util.fix_random_seed()
nlp = Language()
tagger = nlp.add_pipe("tagger", config=dict(label_smoothing=True))
tagger_no_ls = nlp.add_pipe("tagger", "no_label_smoothing", config=dict(label_smoothing=False))
tagger_ls = nlp.add_pipe("tagger", "label_smoothing", config=dict(label_smoothing=True))
train_examples = []
losses = {}
for tag in TAGS:
tagger.add_label(tag)
tagger_no_ls.add_label(tag)
tagger_ls.add_label(tag)
for t in TRAIN_DATA:
train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
optimizer = nlp.initialize(get_examples=lambda: train_examples)
for i in range(1):
for i in range(5):
losses = {}
nlp.update(train_examples, sgd=optimizer, losses=losses)
assert losses == {'no_label_smoothing': 1.4892945885658264, 'label_smoothing': 1.1432453989982605}
def test_no_label():