diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index e3e952d1d..97b4db285 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -7,7 +7,7 @@ import srsly
 from wasabi import Printer, MESSAGES, msg
 import typer
 import math
-import numpy as np
+import numpy
 
 from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
 from ._util import import_code, debug_cli, _format_number
@@ -524,9 +524,9 @@ def debug_data(
         msg.divider("Part-of-speech Tagging")
         label_list, counts = zip(*gold_train_data["tags"].items())
         msg.info(f"{len(label_list)} label(s) in train data")
-        p = np.array(counts)
+        p = numpy.array(counts)
         p = p / p.sum()
-        norm_entropy = (-p * np.log2(p)).sum() / np.log2(len(label_list))
+        norm_entropy = (-p * numpy.log2(p)).sum() / numpy.log2(len(label_list))
         msg.info(f"{norm_entropy} is the normalised label entropy")
         model_labels = _get_labels_from_model(nlp, "tagger")
         labels = set(label_list)
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index ae59d0983..86fbff911 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -45,7 +45,7 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"]
 @Language.factory(
     "tagger",
     assigns=["token.tag"],
-    default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", "label_smoothing": False},
+    default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", "label_smoothing": 0.05},
     default_score_weights={"tag_acc": 1.0},
 )
 def make_tagger(
@@ -55,7 +55,7 @@ def make_tagger(
     overwrite: bool,
     scorer: Optional[Callable],
     neg_prefix: str,
-    label_smoothing: bool,
+    label_smoothing: float,
 ):
     """Construct a part-of-speech tagger component.
 
@@ -90,7 +90,7 @@ class Tagger(TrainablePipe):
         overwrite=BACKWARD_OVERWRITE,
         scorer=tagger_score,
         neg_prefix="!",
-        label_smoothing=False,
+        label_smoothing=0.05,
     ):
         """Initialize a part-of-speech tagger.
 
@@ -258,7 +258,6 @@ class Tagger(TrainablePipe):
         DOCS: https://spacy.io/api/tagger#get_loss
         """
         validate_examples(examples, "Tagger.get_loss")
-        self.cfg["label_smoothing"] = 0.05 if self.cfg["label_smoothing"] else 0.0
         loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"], label_smoothing=self.cfg["label_smoothing"])
         # Convert empty tag "" to missing value None so that both misaligned
         # tokens and tokens with missing annotation have the default missing
diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py
index c717794d8..f5de582b1 100644
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@@ -70,10 +70,10 @@ PARTIAL_DATA = [
 def test_label_smoothing():
     nlp = Language()
     tagger_no_ls = nlp.add_pipe(
-        "tagger", "no_label_smoothing", config=dict(label_smoothing=False)
+        "tagger", "no_label_smoothing", config=dict(label_smoothing=0.0)
     )
     tagger_ls = nlp.add_pipe(
-        "tagger", "label_smoothing", config=dict(label_smoothing=True)
+        "tagger", "label_smoothing"
     )
     train_examples = []
     losses = {}
@@ -87,9 +87,9 @@ def test_label_smoothing():
     tag_scores, bp_tag_scores = tagger_ls.model.begin_update(
         [eg.predicted for eg in train_examples]
     )
-    no_ls_probs = tagger_no_ls.get_loss(train_examples, tag_scores)[1][0]
-    ls_probs = tagger_ls.get_loss(train_examples, tag_scores)[1][0]
-    assert_array_almost_equal((ls_probs - no_ls_probs)[0], [0.05, -0.025, -0.025])
+    no_ls_grads= tagger_no_ls.get_loss(train_examples, tag_scores)[1][0]
+    ls_grads= tagger_ls.get_loss(train_examples, tag_scores)[1][0]
+    assert_array_almost_equal((ls_grads - no_ls_grads)[0], [0.05, -0.025, -0.025])
 
 
 def test_no_label():