Getting scores out of beam_ner (#6575)

* small fixes and formatting * bring test_issue4313 up-to-date, currently fails * formatting * add get_beam_parses method back * add scored_ents function * delete tag map
2025-09-16 09:02:35 +03:00 · 2021-01-06 12:02:32 +01:00 · 2021-01-06 12:02:32 +01:00 · 402dbc5bae
commit 402dbc5bae
parent 82ae95267a
6 changed files with 123 additions and 14 deletions
--- a/spacy/pipeline/_parser_internals/_beam_utils.pyx
+++ b/spacy/pipeline/_parser_internals/_beam_utils.pyx
@ -255,7 +255,7 @@ def get_gradient(nr_class, beam_maps, histories, losses):
    for a beam state -- so we have "the gradient of loss for taking
    action i given history H."

-    Histories: Each hitory is a list of actions
+    Histories: Each history is a list of actions
    Each candidate has a history
    Each beam has multiple candidates
    Each batch has multiple beams
--- a/spacy/pipeline/_parser_internals/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@ -611,7 +611,7 @@ cdef class ArcEager(TransitionSystem):
        return gold

    def init_gold_batch(self, examples):
-        # TODO: Projectivitity?
+        # TODO: Projectivity?
        all_states = self.init_batch([eg.predicted for eg in examples])
        golds = []
        states = []
--- a/spacy/pipeline/_parser_internals/ner.pyx
+++ b/spacy/pipeline/_parser_internals/ner.pyx
@ -2,6 +2,7 @@ from libc.stdint cimport int32_t
 from cymem.cymem cimport Pool

 from collections import Counter
+from thinc.extra.search cimport Beam

 from ...tokens.doc cimport Doc
 from ...tokens.span import Span
@ -246,6 +247,20 @@ cdef class BiluoPushDown(TransitionSystem):
            if doc.c[i].ent_iob == 0:
                doc.c[i].ent_iob = 2

+    def get_beam_parses(self, Beam beam):
+        parses = []
+        probs = beam.probs
+        for i in range(beam.size):
+            state = <StateC*>beam.at(i)
+            if state.is_final():
+                prob = probs[i]
+                parse = []
+                for j in range(state._ents.size()):
+                    ent = state._ents.at(j)
+                    parse.append((ent.start, ent.end, self.strings[ent.label]))
+                parses.append((prob, parse))
+        return parses
+
    def init_gold(self, StateClass state, Example example):
        return BiluoGold(self, state, example)

--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@ -1,4 +1,5 @@
 # cython: infer_types=True, profile=True, binding=True
+from collections import defaultdict
 from typing import Optional, Iterable
 from thinc.api import Model, Config

@ -197,3 +198,16 @@ cdef class EntityRecognizer(Parser):
        """
        validate_examples(examples, "EntityRecognizer.score")
        return get_ner_prf(examples)
+
+    def scored_ents(self, beams):
+        """Return a dictionary of (start, end, label) tuples with corresponding scores
+        for each beam/doc that was processed.
+        """
+        entity_scores = []
+        for beam in beams:
+            score_dict = defaultdict(float)
+            for score, ents in self.moves.get_beam_parses(beam):
+                for start, end, label in ents:
+                    score_dict[(start, end, label)] += score
+            entity_scores.append(score_dict)
+        return entity_scores
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@ -1,3 +1,5 @@
+from collections import defaultdict
+
 import pytest
 from numpy.testing import assert_equal
 from spacy.attrs import ENT_IOB
@ -359,6 +361,86 @@ def test_overfitting_IO(use_upper):
    assert_equal(batch_deps_1, no_batch_deps)


+def test_beam_ner_scores():
+    # Test that we can get confidence values out of the beam_ner pipe
+    beam_width = 16
+    beam_density = 0.0001
+    nlp = English()
+    config = {
+        "beam_width": beam_width,
+        "beam_density": beam_density,
+    }
+    ner = nlp.add_pipe("beam_ner", config=config)
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+        for ent in annotations.get("entities"):
+            ner.add_label(ent[2])
+    optimizer = nlp.initialize()
+
+    # update once
+    losses = {}
+    nlp.update(train_examples, sgd=optimizer, losses=losses)
+
+    # test the scores from the beam
+    test_text = "I like London."
+    doc = nlp.make_doc(test_text)
+    docs = [doc]
+    ner = nlp.get_pipe("beam_ner")
+    beams = ner.predict(docs)
+    entity_scores = ner.scored_ents(beams)[0]
+
+    for j in range(len(doc)):
+        for label in ner.labels:
+            score = entity_scores[(j, j+1, label)]
+            eps = 0.00001
+            assert 0 - eps <= score <= 1 + eps
+
+
+def test_beam_overfitting_IO():
+    # Simple test to try and quickly overfit the Beam NER component
+    nlp = English()
+    beam_width = 16
+    beam_density = 0.0001
+    config = {
+        "beam_width": beam_width,
+        "beam_density": beam_density,
+    }
+    ner = nlp.add_pipe("beam_ner", config=config)
+    train_examples = []
+    for text, annotations in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+        for ent in annotations.get("entities"):
+            ner.add_label(ent[2])
+    optimizer = nlp.initialize()
+
+    # run overfitting
+    for i in range(50):
+        losses = {}
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
+    assert losses["beam_ner"] < 0.0001
+
+    # test the scores from the beam
+    test_text = "I like London."
+    docs = [nlp.make_doc(test_text)]
+    ner = nlp.get_pipe("beam_ner")
+    beams = ner.predict(docs)
+    entity_scores = ner.scored_ents(beams)[0]
+    assert entity_scores[(2, 3, "LOC")] == 1.0
+    assert entity_scores[(2, 3, "PERSON")] == 0.0
+
+    # Also test the results are still the same after IO
+    with make_tempdir() as tmp_dir:
+        nlp.to_disk(tmp_dir)
+        nlp2 = util.load_model_from_path(tmp_dir)
+        docs2 = [nlp2(test_text)]
+        ner2 = nlp2.get_pipe("beam_ner")
+        beams2 = ner2.predict(docs2)
+        entity_scores2 = ner2.scored_ents(beams2)[0]
+        assert entity_scores2[(2, 3, "LOC")] == 1.0
+        assert entity_scores2[(2, 3, "PERSON")] == 0.0
+
+
 def test_ner_warns_no_lookups(caplog):
    nlp = English()
    assert nlp.lang in util.LEXEME_NORM_LANGS
--- a/spacy/tests/regression/test_issue4001-4500.py
+++ b/spacy/tests/regression/test_issue4001-4500.py
@ -288,35 +288,33 @@ def test_multiple_predictions():
    dummy_pipe(doc)


-@pytest.mark.skip(reason="removed Beam stuff during the Example/GoldParse refactor")
 def test_issue4313():
    """ This should not crash or exit with some strange error code """
    beam_width = 16
    beam_density = 0.0001
    nlp = English()
-    config = {}
-    ner = nlp.create_pipe("ner", config=config)
+    config = {
+        "beam_width": beam_width,
+        "beam_density": beam_density,
+    }
+    ner = nlp.add_pipe("beam_ner", config=config)
    ner.add_label("SOME_LABEL")
-    ner.initialize(lambda: [])
+    nlp.initialize()
    # add a new label to the doc
    doc = nlp("What do you think about Apple ?")
    assert len(ner.labels) == 1
    assert "SOME_LABEL" in ner.labels
+    ner.add_label("MY_ORG")   # TODO: not sure if we want this to be necessary...
    apple_ent = Span(doc, 5, 6, label="MY_ORG")
    doc.ents = list(doc.ents) + [apple_ent]

    # ensure the beam_parse still works with the new label
    docs = [doc]
-    beams = nlp.entity.beam_parse(
-        docs, beam_width=beam_width, beam_density=beam_density
+    ner = nlp.get_pipe("beam_ner")
+    beams = ner.beam_parse(
+        docs, drop=0.0, beam_width=beam_width, beam_density=beam_density
    )

-    for doc, beam in zip(docs, beams):
-        entity_scores = defaultdict(float)
-        for score, ents in nlp.entity.moves.get_beam_parses(beam):
-            for start, end, label in ents:
-                entity_scores[(start, end, label)] += score
-

 def test_issue4348():
    """Test that training the tagger with empty data, doesn't throw errors"""