Getting scores out of beam_ner (#6575)

* small fixes and formatting * bring test_issue4313 up-to-date, currently fails * formatting * add get_beam_parses method back * add scored_ents function * delete tag map
2025-07-18 20:22:25 +03:00 · 2021-01-06 12:02:32 +01:00 · 2021-01-06 12:02:32 +01:00 · 402dbc5bae
commit 402dbc5bae
parent 82ae95267a
6 changed files with 123 additions and 14 deletions
--- a/spacy/pipeline/_parser_internals/_beam_utils.pyx
+++ b/spacy/pipeline/_parser_internals/_beam_utils.pyx
@ -255,7 +255,7 @@ def get_gradient(nr_class, beam_maps, histories, losses):
    for a beam state -- so we have "the gradient of loss for taking
    action i given history H."
-    Histories: Each hitory is a list of actions
+    Histories: Each history is a list of actions
    Each candidate has a history
    Each beam has multiple candidates
    Each batch has multiple beams
--- a/spacy/pipeline/_parser_internals/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@ -611,7 +611,7 @@ cdef class ArcEager(TransitionSystem):
        return gold
    def init_gold_batch(self, examples):
-        # TODO: Projectivitity?
+        # TODO: Projectivity?
        all_states = self.init_batch([eg.predicted for eg in examples])
        golds = []
        states = []
--- a/spacy/pipeline/_parser_internals/ner.pyx
+++ b/spacy/pipeline/_parser_internals/ner.pyx
@ -2,6 +2,7 @@ from libc.stdint cimport int32_t
 from cymem.cymem cimport Pool
 from collections import Counter
 from thinc.extra.search cimport Beam
 from ...tokens.doc cimport Doc
 from ...tokens.span import Span
@ -246,6 +247,20 @@ cdef class BiluoPushDown(TransitionSystem):
            if doc.c[i].ent_iob == 0:
                doc.c[i].ent_iob = 2
    def get_beam_parses(self, Beam beam):
        parses = []
        probs = beam.probs
        for i in range(beam.size):
            state = <StateC*>beam.at(i)
            if state.is_final():
                prob = probs[i]
                parse = []
                for j in range(state._ents.size()):
                    ent = state._ents.at(j)
                    parse.append((ent.start, ent.end, self.strings[ent.label]))
                parses.append((prob, parse))
        return parses
    def init_gold(self, StateClass state, Example example):
        return BiluoGold(self, state, example)
--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@ -1,4 +1,5 @@
 # cython: infer_types=True, profile=True, binding=True
 from collections import defaultdict
 from typing import Optional, Iterable
 from thinc.api import Model, Config
@ -197,3 +198,16 @@ cdef class EntityRecognizer(Parser):
        """
        validate_examples(examples, "EntityRecognizer.score")
        return get_ner_prf(examples)
    def scored_ents(self, beams):
        """Return a dictionary of (start, end, label) tuples with corresponding scores
        for each beam/doc that was processed.
        """
        entity_scores = []
        for beam in beams:
            score_dict = defaultdict(float)
            for score, ents in self.moves.get_beam_parses(beam):
                for start, end, label in ents:
                    score_dict[(start, end, label)] += score
            entity_scores.append(score_dict)
        return entity_scores
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@ -1,3 +1,5 @@
 from collections import defaultdict
 import pytest
 from numpy.testing import assert_equal
 from spacy.attrs import ENT_IOB
@ -359,6 +361,86 @@ def test_overfitting_IO(use_upper):
    assert_equal(batch_deps_1, no_batch_deps)
 def test_beam_ner_scores():
    # Test that we can get confidence values out of the beam_ner pipe
    beam_width = 16
    beam_density = 0.0001
    nlp = English()
    config = {
        "beam_width": beam_width,
        "beam_density": beam_density,
    }
    ner = nlp.add_pipe("beam_ner", config=config)
    train_examples = []
    for text, annotations in TRAIN_DATA:
        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])
    optimizer = nlp.initialize()
    # update once
    losses = {}
    nlp.update(train_examples, sgd=optimizer, losses=losses)
    # test the scores from the beam
    test_text = "I like London."
    doc = nlp.make_doc(test_text)
    docs = [doc]
    ner = nlp.get_pipe("beam_ner")
    beams = ner.predict(docs)
    entity_scores = ner.scored_ents(beams)[0]
    for j in range(len(doc)):
        for label in ner.labels:
            score = entity_scores[(j, j+1, label)]
            eps = 0.00001
            assert 0 - eps <= score <= 1 + eps
 def test_beam_overfitting_IO():
    # Simple test to try and quickly overfit the Beam NER component
    nlp = English()
    beam_width = 16
    beam_density = 0.0001
    config = {
        "beam_width": beam_width,
        "beam_density": beam_density,
    }
    ner = nlp.add_pipe("beam_ner", config=config)
    train_examples = []
    for text, annotations in TRAIN_DATA:
        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])
    optimizer = nlp.initialize()
    # run overfitting
    for i in range(50):
        losses = {}
        nlp.update(train_examples, sgd=optimizer, losses=losses)
    assert losses["beam_ner"] < 0.0001
    # test the scores from the beam
    test_text = "I like London."
    docs = [nlp.make_doc(test_text)]
    ner = nlp.get_pipe("beam_ner")
    beams = ner.predict(docs)
    entity_scores = ner.scored_ents(beams)[0]
    assert entity_scores[(2, 3, "LOC")] == 1.0
    assert entity_scores[(2, 3, "PERSON")] == 0.0
    # Also test the results are still the same after IO
    with make_tempdir() as tmp_dir:
        nlp.to_disk(tmp_dir)
        nlp2 = util.load_model_from_path(tmp_dir)
        docs2 = [nlp2(test_text)]
        ner2 = nlp2.get_pipe("beam_ner")
        beams2 = ner2.predict(docs2)
        entity_scores2 = ner2.scored_ents(beams2)[0]
        assert entity_scores2[(2, 3, "LOC")] == 1.0
        assert entity_scores2[(2, 3, "PERSON")] == 0.0
 def test_ner_warns_no_lookups(caplog):
    nlp = English()
    assert nlp.lang in util.LEXEME_NORM_LANGS
--- a/spacy/tests/regression/test_issue4001-4500.py
+++ b/spacy/tests/regression/test_issue4001-4500.py
@ -288,35 +288,33 @@ def test_multiple_predictions():
    dummy_pipe(doc)
@pytest.mark.skip(reason="removed Beam stuff during the Example/GoldParse refactor")
 def test_issue4313():
    """ This should not crash or exit with some strange error code """
    beam_width = 16
    beam_density = 0.0001
    nlp = English()
-    config = {}
+    config = {
-    ner = nlp.create_pipe("ner", config=config)
+        "beam_width": beam_width,
        "beam_density": beam_density,
    }
    ner = nlp.add_pipe("beam_ner", config=config)
    ner.add_label("SOME_LABEL")
-    ner.initialize(lambda: [])
+    nlp.initialize()
    # add a new label to the doc
    doc = nlp("What do you think about Apple ?")
    assert len(ner.labels) == 1
    assert "SOME_LABEL" in ner.labels
    ner.add_label("MY_ORG")   # TODO: not sure if we want this to be necessary...
    apple_ent = Span(doc, 5, 6, label="MY_ORG")
    doc.ents = list(doc.ents) + [apple_ent]
    # ensure the beam_parse still works with the new label
    docs = [doc]
-    beams = nlp.entity.beam_parse(
+    ner = nlp.get_pipe("beam_ner")
-        docs, beam_width=beam_width, beam_density=beam_density
+    beams = ner.beam_parse(
        docs, drop=0.0, beam_width=beam_width, beam_density=beam_density
    )
    for doc, beam in zip(docs, beams):
        entity_scores = defaultdict(float)
        for score, ents in nlp.entity.moves.get_beam_parses(beam):
            for start, end, label in ents:
                entity_scores[(start, end, label)] += score
 def test_issue4348():
    """Test that training the tagger with empty data, doesn't throw errors"""