formatting

2025-07-15 10:42:34 +03:00 · 2023-02-17 10:26:29 +05:30 · 2023-02-17 10:26:29 +05:30 · e48a662e46
commit e48a662e46
parent d7406fffb0
2 changed files with 12 additions and 5 deletions
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -526,8 +526,8 @@ def debug_data(
        msg.info(f"{len(label_list)} label(s) in train data")
        p = np.array(counts)
        p = p / p.sum()
-        entropy = np.round((-p*np.log2(p)).sum(), 2)
-        msg.info(f"{entropy} is the train data label entropy")
+        norm_entropy = (-p * np.log2(p)).sum() / np.log2(len(label_list))
+        msg.info(f"{norm_entropy} is the normalised label entropy")
        model_labels = _get_labels_from_model(nlp, "tagger")
        labels = set(label_list)
        missing_labels = model_labels - labels
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@ -70,8 +70,12 @@ PARTIAL_DATA = [
 def test_label_smoothing():
    util.fix_random_seed()
    nlp = Language()
-    tagger_no_ls = nlp.add_pipe("tagger", "no_label_smoothing", config=dict(label_smoothing=False))
-    tagger_ls = nlp.add_pipe("tagger", "label_smoothing", config=dict(label_smoothing=True))
+    tagger_no_ls = nlp.add_pipe(
+        "tagger", "no_label_smoothing", config=dict(label_smoothing=False)
+    )
+    tagger_ls = nlp.add_pipe(
+        "tagger", "label_smoothing", config=dict(label_smoothing=True)
+    )
    train_examples = []
    losses = {}
    for tag in TAGS:
@ -83,7 +87,10 @@ def test_label_smoothing():
    for i in range(5):
        losses = {}
        nlp.update(train_examples, sgd=optimizer, losses=losses)
-    assert losses == {'no_label_smoothing': 1.4892945885658264, 'label_smoothing': 1.1432453989982605}
+    assert losses == {
+        "no_label_smoothing": 1.4892945885658264,
+        "label_smoothing": 1.1432453989982605,
+    }


 def test_no_label():