From 38e5a750145ae58e8e8a1997c815d5fc4f55b90d Mon Sep 17 00:00:00 2001 From: vinit Date: Fri, 17 Feb 2023 00:53:17 +0530 Subject: [PATCH] use True/False instead of floats --- spacy/pipeline/tagger.pyx | 8 ++++---- spacy/tests/pipeline/test_tagger.py | 11 ++++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index a877062fa..ae59d0983 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -45,7 +45,7 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"] @Language.factory( "tagger", assigns=["token.tag"], - default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", "label_smoothing": 0.0}, + default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", "label_smoothing": False}, default_score_weights={"tag_acc": 1.0}, ) def make_tagger( @@ -55,7 +55,7 @@ def make_tagger( overwrite: bool, scorer: Optional[Callable], neg_prefix: str, - label_smoothing: float, + label_smoothing: bool, ): """Construct a part-of-speech tagger component. @@ -90,7 +90,7 @@ class Tagger(TrainablePipe): overwrite=BACKWARD_OVERWRITE, scorer=tagger_score, neg_prefix="!", - label_smoothing=0.0, + label_smoothing=False, ): """Initialize a part-of-speech tagger. @@ -258,7 +258,7 @@ class Tagger(TrainablePipe): DOCS: https://spacy.io/api/tagger#get_loss """ validate_examples(examples, "Tagger.get_loss") - # label_smoothing = 0.1 if self.cfg["label_smoothing"] else 0.0 + self.cfg["label_smoothing"] = 0.05 if self.cfg["label_smoothing"] else 0.0 loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"], label_smoothing=self.cfg["label_smoothing"]) # Convert empty tag "" to missing value None so that both misaligned # tokens and tokens with missing annotation have the default missing diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py index 04284988f..32cdd196b 100644 --- a/spacy/tests/pipeline/test_tagger.py +++ b/spacy/tests/pipeline/test_tagger.py @@ -68,17 +68,22 @@ PARTIAL_DATA = [ def test_label_smoothing(): + util.fix_random_seed() nlp = Language() - tagger = nlp.add_pipe("tagger", config=dict(label_smoothing=True)) + tagger_no_ls = nlp.add_pipe("tagger", "no_label_smoothing", config=dict(label_smoothing=False)) + tagger_ls = nlp.add_pipe("tagger", "label_smoothing", config=dict(label_smoothing=True)) train_examples = [] + losses = {} for tag in TAGS: - tagger.add_label(tag) + tagger_no_ls.add_label(tag) + tagger_ls.add_label(tag) for t in TRAIN_DATA: train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1])) optimizer = nlp.initialize(get_examples=lambda: train_examples) - for i in range(1): + for i in range(5): losses = {} nlp.update(train_examples, sgd=optimizer, losses=losses) + assert losses == {'no_label_smoothing': 1.4892945885658264, 'label_smoothing': 1.1432453989982605} def test_no_label():