diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja index b961ac892..ab61396d9 100644 --- a/spacy/cli/templates/quickstart_training.jinja +++ b/spacy/cli/templates/quickstart_training.jinja @@ -69,6 +69,7 @@ grad_factor = 1.0 {% if "tagger" in components %} [components.tagger] factory = "tagger" +label_smoothing = 0.05 [components.tagger.model] @architectures = "spacy.Tagger.v2" diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index 86fbff911..5e4a5a5b7 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -45,7 +45,7 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"] @Language.factory( "tagger", assigns=["token.tag"], - default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", "label_smoothing": 0.05}, + default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", "label_smoothing": 0.0}, default_score_weights={"tag_acc": 1.0}, ) def make_tagger( diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py index de3602603..65c4cbe0a 100644 --- a/spacy/tests/pipeline/test_tagger.py +++ b/spacy/tests/pipeline/test_tagger.py @@ -70,10 +70,10 @@ PARTIAL_DATA = [ def test_label_smoothing(): util.fix_random_seed() nlp = Language() - tagger_no_ls = nlp.add_pipe( - "tagger", "no_label_smoothing", config=dict(label_smoothing=0.0) + tagger_no_ls = nlp.add_pipe("tagger", "no_label_smoothing") + tagger_ls = nlp.add_pipe( + "tagger", "label_smoothing", config=dict(label_smoothing=0.05) ) - tagger_ls = nlp.add_pipe("tagger", "label_smoothing") train_examples = [] losses = {} for tag in TAGS: @@ -160,7 +160,7 @@ def test_no_data(): def test_incomplete_data(): # Test that the tagger works with incomplete information nlp = English() - nlp.add_pipe("tagger", config=dict(label_smoothing=0.0)) + nlp.add_pipe("tagger") train_examples = [] for t in PARTIAL_DATA: train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1])) @@ -180,7 +180,7 @@ def test_incomplete_data(): def test_overfitting_IO(): # Simple test to try and quickly overfit the tagger - ensuring the ML models work correctly nlp = English() - tagger = nlp.add_pipe("tagger", config=dict(label_smoothing=0.0)) + tagger = nlp.add_pipe("tagger") train_examples = [] for t in TRAIN_DATA: train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1])) diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py index 8fae0ef66..e423d9a19 100644 --- a/spacy/tests/pipeline/test_tok2vec.py +++ b/spacy/tests/pipeline/test_tok2vec.py @@ -115,7 +115,6 @@ cfg_string = """ [components.tagger] factory = "tagger" - label_smoothing = 0.0 [components.tagger.model] @architectures = "spacy.Tagger.v2" diff --git a/website/docs/api/tagger.mdx b/website/docs/api/tagger.mdx index e425a95d3..fdd234335 100644 --- a/website/docs/api/tagger.mdx +++ b/website/docs/api/tagger.mdx @@ -46,7 +46,7 @@ architectures and their arguments and hyperparameters. | `overwrite` 3.2 | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ | | `scorer` 3.2 | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~ | | `neg_prefix` 3.2.1 | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~ | -| `label_smoothing` 3.6 | Label smoothing factor. Defaults to `0.05`. ~~float~~ | +| `label_smoothing` 3.6 | Label smoothing factor. Defaults to `0.0`. ~~float~~ | ```python %%GITHUB_SPACY/spacy/pipeline/tagger.pyx