From 402dbc5baecb36db1c5cbe898626b1913facf547 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Wed, 6 Jan 2021 12:02:32 +0100
Subject: [PATCH] Getting scores out of beam_ner (#6575)

* small fixes and formatting

* bring test_issue4313 up-to-date, currently fails

* formatting

* add get_beam_parses method back

* add scored_ents function

* delete tag map
---
 .../_parser_internals/_beam_utils.pyx         |  2 +-
 .../pipeline/_parser_internals/arc_eager.pyx  |  2 +-
 spacy/pipeline/_parser_internals/ner.pyx      | 15 ++++
 spacy/pipeline/ner.pyx                        | 14 ++++
 spacy/tests/parser/test_ner.py                | 82 +++++++++++++++++++
 spacy/tests/regression/test_issue4001-4500.py | 22 +++--
 6 files changed, 123 insertions(+), 14 deletions(-)

diff --git a/spacy/pipeline/_parser_internals/_beam_utils.pyx b/spacy/pipeline/_parser_internals/_beam_utils.pyx
index a7f34daaf..fa7df2056 100644
--- a/spacy/pipeline/_parser_internals/_beam_utils.pyx
+++ b/spacy/pipeline/_parser_internals/_beam_utils.pyx
@@ -255,7 +255,7 @@ def get_gradient(nr_class, beam_maps, histories, losses):
     for a beam state -- so we have "the gradient of loss for taking
     action i given history H."
 
-    Histories: Each hitory is a list of actions
+    Histories: Each history is a list of actions
     Each candidate has a history
     Each beam has multiple candidates
     Each batch has multiple beams
diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx
index 6f63e6943..1d92efd7b 100644
--- a/spacy/pipeline/_parser_internals/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@@ -611,7 +611,7 @@ cdef class ArcEager(TransitionSystem):
         return gold
 
     def init_gold_batch(self, examples):
-        # TODO: Projectivitity?
+        # TODO: Projectivity?
         all_states = self.init_batch([eg.predicted for eg in examples])
         golds = []
         states = []
diff --git a/spacy/pipeline/_parser_internals/ner.pyx b/spacy/pipeline/_parser_internals/ner.pyx
index f2bd9d134..d3e58e245 100644
--- a/spacy/pipeline/_parser_internals/ner.pyx
+++ b/spacy/pipeline/_parser_internals/ner.pyx
@@ -2,6 +2,7 @@ from libc.stdint cimport int32_t
 from cymem.cymem cimport Pool
 
 from collections import Counter
+from thinc.extra.search cimport Beam
 
 from ...tokens.doc cimport Doc
 from ...tokens.span import Span
@@ -246,6 +247,20 @@ cdef class BiluoPushDown(TransitionSystem):
             if doc.c[i].ent_iob == 0:
                 doc.c[i].ent_iob = 2
 
+    def get_beam_parses(self, Beam beam):
+        parses = []
+        probs = beam.probs
+        for i in range(beam.size):
+            state = <StateC*>beam.at(i)
+            if state.is_final():
+                prob = probs[i]
+                parse = []
+                for j in range(state._ents.size()):
+                    ent = state._ents.at(j)
+                    parse.append((ent.start, ent.end, self.strings[ent.label]))
+                parses.append((prob, parse))
+        return parses
+
     def init_gold(self, StateClass state, Example example):
         return BiluoGold(self, state, example)
 
diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx
index e89f5b3dd..0dfb055d3 100644
--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@@ -1,4 +1,5 @@
 # cython: infer_types=True, profile=True, binding=True
+from collections import defaultdict
 from typing import Optional, Iterable
 from thinc.api import Model, Config
 
@@ -197,3 +198,16 @@ cdef class EntityRecognizer(Parser):
         """
         validate_examples(examples, "EntityRecognizer.score")
         return get_ner_prf(examples)
+
+    def scored_ents(self, beams):
+        """Return a dictionary of (start, end, label) tuples with corresponding scores
+        for each beam/doc that was processed.
+        """
+        entity_scores = []
+        for beam in beams:
+            score_dict = defaultdict(float)
+            for score, ents in self.moves.get_beam_parses(beam):
+                for start, end, label in ents:
+                    score_dict[(start, end, label)] += score
+            entity_scores.append(score_dict)
+        return entity_scores
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index 8b771cafa..fec69bf66 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -1,3 +1,5 @@
+from collections import defaultdict
+
 import pytest
 from numpy.testing import assert_equal
 from spacy.attrs import ENT_IOB
@@ -359,6 +361,86 @@ def test_overfitting_IO(use_upper):
     assert_equal(batch_deps_1, no_batch_deps)
 
 
+def test_beam_ner_scores():
+    # Test that we can get confidence values out of the beam_ner pipe
+    beam_width = 16
+    beam_density = 0.0001
+    nlp = English()
+    config = {
+        "beam_width": beam_width,
+        "beam_density": beam_density,
+    }
+    ner = nlp.add_pipe("beam_ner", config=config)
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+        for ent in annotations.get("entities"):
+            ner.add_label(ent[2])
+    optimizer = nlp.initialize()
+
+    # update once
+    losses = {}
+    nlp.update(train_examples, sgd=optimizer, losses=losses)
+
+    # test the scores from the beam
+    test_text = "I like London."
+    doc = nlp.make_doc(test_text)
+    docs = [doc]
+    ner = nlp.get_pipe("beam_ner")
+    beams = ner.predict(docs)
+    entity_scores = ner.scored_ents(beams)[0]
+
+    for j in range(len(doc)):
+        for label in ner.labels:
+            score = entity_scores[(j, j+1, label)]
+            eps = 0.00001
+            assert 0 - eps <= score <= 1 + eps
+
+
+def test_beam_overfitting_IO():
+    # Simple test to try and quickly overfit the Beam NER component
+    nlp = English()
+    beam_width = 16
+    beam_density = 0.0001
+    config = {
+        "beam_width": beam_width,
+        "beam_density": beam_density,
+    }
+    ner = nlp.add_pipe("beam_ner", config=config)
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+        for ent in annotations.get("entities"):
+            ner.add_label(ent[2])
+    optimizer = nlp.initialize()
+
+    # run overfitting
+    for i in range(50):
+        losses = {}
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
+    assert losses["beam_ner"] < 0.0001
+
+    # test the scores from the beam
+    test_text = "I like London."
+    docs = [nlp.make_doc(test_text)]
+    ner = nlp.get_pipe("beam_ner")
+    beams = ner.predict(docs)
+    entity_scores = ner.scored_ents(beams)[0]
+    assert entity_scores[(2, 3, "LOC")] == 1.0
+    assert entity_scores[(2, 3, "PERSON")] == 0.0
+
+    # Also test the results are still the same after IO
+    with make_tempdir() as tmp_dir:
+        nlp.to_disk(tmp_dir)
+        nlp2 = util.load_model_from_path(tmp_dir)
+        docs2 = [nlp2(test_text)]
+        ner2 = nlp2.get_pipe("beam_ner")
+        beams2 = ner2.predict(docs2)
+        entity_scores2 = ner2.scored_ents(beams2)[0]
+        assert entity_scores2[(2, 3, "LOC")] == 1.0
+        assert entity_scores2[(2, 3, "PERSON")] == 0.0
+
+
 def test_ner_warns_no_lookups(caplog):
     nlp = English()
     assert nlp.lang in util.LEXEME_NORM_LANGS
diff --git a/spacy/tests/regression/test_issue4001-4500.py b/spacy/tests/regression/test_issue4001-4500.py
index 873ef9c1d..521fa0d73 100644
--- a/spacy/tests/regression/test_issue4001-4500.py
+++ b/spacy/tests/regression/test_issue4001-4500.py
@@ -288,35 +288,33 @@ def test_multiple_predictions():
     dummy_pipe(doc)
 
 
-@pytest.mark.skip(reason="removed Beam stuff during the Example/GoldParse refactor")
 def test_issue4313():
     """ This should not crash or exit with some strange error code """
     beam_width = 16
     beam_density = 0.0001
     nlp = English()
-    config = {}
-    ner = nlp.create_pipe("ner", config=config)
+    config = {
+        "beam_width": beam_width,
+        "beam_density": beam_density,
+    }
+    ner = nlp.add_pipe("beam_ner", config=config)
     ner.add_label("SOME_LABEL")
-    ner.initialize(lambda: [])
+    nlp.initialize()
     # add a new label to the doc
     doc = nlp("What do you think about Apple ?")
     assert len(ner.labels) == 1
     assert "SOME_LABEL" in ner.labels
+    ner.add_label("MY_ORG")   # TODO: not sure if we want this to be necessary...
     apple_ent = Span(doc, 5, 6, label="MY_ORG")
     doc.ents = list(doc.ents) + [apple_ent]
 
     # ensure the beam_parse still works with the new label
     docs = [doc]
-    beams = nlp.entity.beam_parse(
-        docs, beam_width=beam_width, beam_density=beam_density
+    ner = nlp.get_pipe("beam_ner")
+    beams = ner.beam_parse(
+        docs, drop=0.0, beam_width=beam_width, beam_density=beam_density
     )
 
-    for doc, beam in zip(docs, beams):
-        entity_scores = defaultdict(float)
-        for score, ents in nlp.entity.moves.get_beam_parses(beam):
-            for start, end, label in ents:
-                entity_scores[(start, end, label)] += score
-
 
 def test_issue4348():
     """Test that training the tagger with empty data, doesn't throw errors"""