From 36634d2adfe270a8f240230cf652c10c1c7c6ba3 Mon Sep 17 00:00:00 2001
From: vinit <vinit.ravishankar@gmail.com>
Date: Mon, 6 Mar 2023 14:54:37 +0530
Subject: [PATCH] update morphologizer, tagger test

---
 spacy/cli/templates/quickstart_training.jinja |  3 ++-
 spacy/pipeline/morphologizer.pyx              | 11 +++++---
 spacy/tests/pipeline/test_morphologizer.py    | 27 ++++++++++++++++++-
 spacy/tests/pipeline/test_tagger.py           |  5 ++--
 website/docs/api/morphologizer.mdx            | 13 ++++-----
 5 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index ab61396d9..046ceb81e 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -69,7 +69,6 @@ grad_factor = 1.0
 {% if "tagger" in components %}
 [components.tagger]
 factory = "tagger"
-label_smoothing = 0.05
 
 [components.tagger.model]
 @architectures = "spacy.Tagger.v2"
@@ -287,6 +286,7 @@ maxout_pieces = 3
 {% if "morphologizer" in components %}
 [components.morphologizer]
 factory = "morphologizer"
+label_smoothing = 0.05
 
 [components.morphologizer.model]
 @architectures = "spacy.Tagger.v2"
@@ -300,6 +300,7 @@ width = ${components.tok2vec.model.encode.width}
 {% if "tagger" in components %}
 [components.tagger]
 factory = "tagger"
+label_smoothing = 0.05
 
 [components.tagger.model]
 @architectures = "spacy.Tagger.v2"
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index 24f98508f..be8f82212 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -52,7 +52,8 @@ DEFAULT_MORPH_MODEL = Config().from_str(default_model_config)["model"]
 @Language.factory(
     "morphologizer",
     assigns=["token.morph", "token.pos"],
-    default_config={"model": DEFAULT_MORPH_MODEL, "overwrite": True, "extend": False, "scorer": {"@scorers": "spacy.morphologizer_scorer.v1"}},
+    default_config={"model": DEFAULT_MORPH_MODEL, "overwrite": True, "extend": False,
+                    "scorer": {"@scorers": "spacy.morphologizer_scorer.v1"}, "label_smoothing": 0.0},
     default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5, "morph_per_feat": None},
 )
 def make_morphologizer(
@@ -61,9 +62,10 @@ def make_morphologizer(
     name: str,
     overwrite: bool,
     extend: bool,
+    label_smoothing: float,
     scorer: Optional[Callable],
 ):
-    return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, scorer=scorer)
+    return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, label_smoothing=label_smoothing, scorer=scorer)
 
 
 def morphologizer_score(examples, **kwargs):
@@ -94,6 +96,7 @@ class Morphologizer(Tagger):
         *,
         overwrite: bool = BACKWARD_OVERWRITE,
         extend: bool = BACKWARD_EXTEND,
+        label_smoothing: float = 0.0,
         scorer: Optional[Callable] = morphologizer_score,
     ):
         """Initialize a morphologizer.
@@ -121,6 +124,7 @@ class Morphologizer(Tagger):
             "labels_pos": {},
             "overwrite": overwrite,
             "extend": extend,
+            "label_smoothing": label_smoothing,
         }
         self.cfg = dict(sorted(cfg.items()))
         self.scorer = scorer
@@ -270,7 +274,8 @@ class Morphologizer(Tagger):
         DOCS: https://spacy.io/api/morphologizer#get_loss
         """
         validate_examples(examples, "Morphologizer.get_loss")
-        loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
+        loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False,
+                                                    label_smoothing=self.cfg["label_smoothing"])
         truths = []
         for eg in examples:
             eg_truths = []
diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py
index 33696bfd8..8ce74ccfa 100644
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@@ -1,5 +1,5 @@
 import pytest
-from numpy.testing import assert_equal
+from numpy.testing import assert_equal, assert_almost_equal
 
 from spacy import util
 from spacy.training import Example
@@ -19,6 +19,8 @@ def test_label_types():
         morphologizer.add_label(9)
 
 
+TAGS = ["Feat=N", "Feat=V", "Feat=J"]
+
 TRAIN_DATA = [
     (
         "I like green eggs",
@@ -32,6 +34,29 @@ TRAIN_DATA = [
 ]
 
 
+def test_label_smoothing():
+    nlp = Language()
+    morph_no_ls = nlp.add_pipe("morphologizer", "no_label_smoothing")
+    morph_ls = nlp.add_pipe(
+        "morphologizer", "label_smoothing", config=dict(label_smoothing=0.05)
+    )
+    train_examples = []
+    losses = {}
+    for tag in TAGS:
+        morph_no_ls.add_label(tag)
+        morph_ls.add_label(tag)
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+
+    nlp.initialize(get_examples=lambda: train_examples)
+    tag_scores, bp_tag_scores = morph_ls.model.begin_update(
+        [eg.predicted for eg in train_examples]
+    )
+    no_ls_grads = morph_no_ls.get_loss(train_examples, tag_scores)[1][0]
+    ls_grads = morph_ls.get_loss(train_examples, tag_scores)[1][0]
+    assert_almost_equal(ls_grads / no_ls_grads, 0.94285715)
+
+
 def test_no_label():
     nlp = Language()
     nlp.add_pipe("morphologizer")
diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py
index 65c4cbe0a..0cc25a64b 100644
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@@ -1,5 +1,5 @@
 import pytest
-from numpy.testing import assert_equal, assert_array_almost_equal
+from numpy.testing import assert_equal, assert_almost_equal
 from spacy.attrs import TAG
 
 from spacy import util
@@ -68,7 +68,6 @@ PARTIAL_DATA = [
 
 
 def test_label_smoothing():
-    util.fix_random_seed()
     nlp = Language()
     tagger_no_ls = nlp.add_pipe("tagger", "no_label_smoothing")
     tagger_ls = nlp.add_pipe(
@@ -88,7 +87,7 @@ def test_label_smoothing():
     )
     no_ls_grads = tagger_no_ls.get_loss(train_examples, tag_scores)[1][0]
     ls_grads = tagger_ls.get_loss(train_examples, tag_scores)[1][0]
-    assert_array_almost_equal((ls_grads - no_ls_grads)[0], [0.05, -0.025, -0.025])
+    assert_almost_equal(ls_grads / no_ls_grads, 0.925)
 
 
 def test_no_label():
diff --git a/website/docs/api/morphologizer.mdx b/website/docs/api/morphologizer.mdx
index f097f2ae3..440061dc5 100644
--- a/website/docs/api/morphologizer.mdx
+++ b/website/docs/api/morphologizer.mdx
@@ -42,12 +42,13 @@ architectures and their arguments and hyperparameters.
 > nlp.add_pipe("morphologizer", config=config)
 > ```
 
-| Setting                                  | Description                                                                                                                                                                                                                                                            |
-| ---------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `model`                                  | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                |
-| `overwrite` <Tag variant="new">3.2</Tag> | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                  |
-| `extend` <Tag variant="new">3.2</Tag>    | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~                                                                                                                      |
-| `scorer` <Tag variant="new">3.2</Tag>    | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
+| Setting                                        | Description                                                                                                                                                                                                                                                            |
+| ---------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `model`                                        | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                |
+| `overwrite` <Tag variant="new">3.2</Tag>       | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                  |
+| `extend` <Tag variant="new">3.2</Tag>          | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~                                                                                                                      |
+| `scorer` <Tag variant="new">3.2</Tag>          | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
+| `label_smoothing` <Tag variant="new">3.6</Tag> | Whether or not to use label smoothing. Defaults to `False`. ~~bool~~                                                                                                                                                                                                   |
 
 ```python
 %%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx