From 113e8d082b87f9a394a76dc8faced2a94023dc7a Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Mon, 22 Feb 2021 01:06:50 +0100
Subject: [PATCH] only evaluate named entities for NEL if there is a
 corresponding gold span (#7074)

---
 spacy/scorer.py                          | 43 ++++++++++---------
 spacy/tests/regression/test_issue7062.py | 54 ++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 21 deletions(-)
 create mode 100644 spacy/tests/regression/test_issue7062.py

diff --git a/spacy/scorer.py b/spacy/scorer.py
index f20a0d786..f10824fd6 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -531,27 +531,28 @@ class Scorer:
                 gold_span = gold_ent_by_offset.get(
                     (pred_ent.start_char, pred_ent.end_char), None
                 )
-                label = gold_span.label_
-                if label not in f_per_type:
-                    f_per_type[label] = PRFScore()
-                gold = gold_span.kb_id_
-                # only evaluating entities that overlap between gold and pred,
-                # to disentangle the performance of the NEL from the NER
-                if gold is not None:
-                    pred = pred_ent.kb_id_
-                    if gold in negative_labels and pred in negative_labels:
-                        # ignore true negatives
-                        pass
-                    elif gold == pred:
-                        f_per_type[label].tp += 1
-                    elif gold in negative_labels:
-                        f_per_type[label].fp += 1
-                    elif pred in negative_labels:
-                        f_per_type[label].fn += 1
-                    else:
-                        # a wrong prediction (e.g. Q42 != Q3) counts as both a FP as well as a FN
-                        f_per_type[label].fp += 1
-                        f_per_type[label].fn += 1
+                if gold_span is not None:
+                    label = gold_span.label_
+                    if label not in f_per_type:
+                        f_per_type[label] = PRFScore()
+                    gold = gold_span.kb_id_
+                    # only evaluating entities that overlap between gold and pred,
+                    # to disentangle the performance of the NEL from the NER
+                    if gold is not None:
+                        pred = pred_ent.kb_id_
+                        if gold in negative_labels and pred in negative_labels:
+                            # ignore true negatives
+                            pass
+                        elif gold == pred:
+                            f_per_type[label].tp += 1
+                        elif gold in negative_labels:
+                            f_per_type[label].fp += 1
+                        elif pred in negative_labels:
+                            f_per_type[label].fn += 1
+                        else:
+                            # a wrong prediction (e.g. Q42 != Q3) counts as both a FP as well as a FN
+                            f_per_type[label].fp += 1
+                            f_per_type[label].fn += 1
         micro_prf = PRFScore()
         for label_prf in f_per_type.values():
             micro_prf.tp += label_prf.tp
diff --git a/spacy/tests/regression/test_issue7062.py b/spacy/tests/regression/test_issue7062.py
new file mode 100644
index 000000000..88e5d2520
--- /dev/null
+++ b/spacy/tests/regression/test_issue7062.py
@@ -0,0 +1,54 @@
+from spacy.kb import KnowledgeBase
+from spacy.training import Example
+from spacy.lang.en import English
+
+
+# fmt: off
+TRAIN_DATA = [
+    ("Russ Cochran his reprints include EC Comics.",
+        {"links": {(0, 12): {"Q2146908": 1.0}},
+         "entities": [(0, 12, "PERSON")],
+         "sent_starts": [1, -1, 0, 0, 0, 0, 0, 0]})
+]
+# fmt: on
+
+
+def test_partial_links():
+    # Test that having some entities on the doc without gold links, doesn't crash
+    nlp = English()
+    vector_length = 3
+    train_examples = []
+    for text, annotation in TRAIN_DATA:
+        doc = nlp(text)
+        train_examples.append(Example.from_dict(doc, annotation))
+
+    def create_kb(vocab):
+        # create artificial KB
+        mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
+        mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
+        mykb.add_alias("Russ Cochran", ["Q2146908"], [0.9])
+        return mykb
+
+    # Create and train the Entity Linker
+    entity_linker = nlp.add_pipe("entity_linker", last=True)
+    entity_linker.set_kb(create_kb)
+    optimizer = nlp.initialize(get_examples=lambda: train_examples)
+    for i in range(2):
+        losses = {}
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
+
+    # adding additional components that are required for the entity_linker
+    nlp.add_pipe("sentencizer", first=True)
+    patterns = [
+        {"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]},
+        {"label": "ORG", "pattern": [{"LOWER": "ec"}, {"LOWER": "comics"}]}
+    ]
+    ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
+    ruler.add_patterns(patterns)
+
+    # this will run the pipeline on the examples and shouldn't crash
+    results = nlp.evaluate(train_examples)
+    assert "PERSON" in results["ents_per_type"]
+    assert "PERSON" in results["nel_f_per_type"]
+    assert "ORG" in results["ents_per_type"]
+    assert "ORG" not in results["nel_f_per_type"]