From 402dbc5baecb36db1c5cbe898626b1913facf547 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Wed, 6 Jan 2021 12:02:32 +0100 Subject: [PATCH] Getting scores out of beam_ner (#6575) * small fixes and formatting * bring test_issue4313 up-to-date, currently fails * formatting * add get_beam_parses method back * add scored_ents function * delete tag map --- .../_parser_internals/_beam_utils.pyx | 2 +- .../pipeline/_parser_internals/arc_eager.pyx | 2 +- spacy/pipeline/_parser_internals/ner.pyx | 15 ++++ spacy/pipeline/ner.pyx | 14 ++++ spacy/tests/parser/test_ner.py | 82 +++++++++++++++++++ spacy/tests/regression/test_issue4001-4500.py | 22 +++-- 6 files changed, 123 insertions(+), 14 deletions(-) diff --git a/spacy/pipeline/_parser_internals/_beam_utils.pyx b/spacy/pipeline/_parser_internals/_beam_utils.pyx index a7f34daaf..fa7df2056 100644 --- a/spacy/pipeline/_parser_internals/_beam_utils.pyx +++ b/spacy/pipeline/_parser_internals/_beam_utils.pyx @@ -255,7 +255,7 @@ def get_gradient(nr_class, beam_maps, histories, losses): for a beam state -- so we have "the gradient of loss for taking action i given history H." - Histories: Each hitory is a list of actions + Histories: Each history is a list of actions Each candidate has a history Each beam has multiple candidates Each batch has multiple beams diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx index 6f63e6943..1d92efd7b 100644 --- a/spacy/pipeline/_parser_internals/arc_eager.pyx +++ b/spacy/pipeline/_parser_internals/arc_eager.pyx @@ -611,7 +611,7 @@ cdef class ArcEager(TransitionSystem): return gold def init_gold_batch(self, examples): - # TODO: Projectivitity? + # TODO: Projectivity? all_states = self.init_batch([eg.predicted for eg in examples]) golds = [] states = [] diff --git a/spacy/pipeline/_parser_internals/ner.pyx b/spacy/pipeline/_parser_internals/ner.pyx index f2bd9d134..d3e58e245 100644 --- a/spacy/pipeline/_parser_internals/ner.pyx +++ b/spacy/pipeline/_parser_internals/ner.pyx @@ -2,6 +2,7 @@ from libc.stdint cimport int32_t from cymem.cymem cimport Pool from collections import Counter +from thinc.extra.search cimport Beam from ...tokens.doc cimport Doc from ...tokens.span import Span @@ -246,6 +247,20 @@ cdef class BiluoPushDown(TransitionSystem): if doc.c[i].ent_iob == 0: doc.c[i].ent_iob = 2 + def get_beam_parses(self, Beam beam): + parses = [] + probs = beam.probs + for i in range(beam.size): + state = beam.at(i) + if state.is_final(): + prob = probs[i] + parse = [] + for j in range(state._ents.size()): + ent = state._ents.at(j) + parse.append((ent.start, ent.end, self.strings[ent.label])) + parses.append((prob, parse)) + return parses + def init_gold(self, StateClass state, Example example): return BiluoGold(self, state, example) diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx index e89f5b3dd..0dfb055d3 100644 --- a/spacy/pipeline/ner.pyx +++ b/spacy/pipeline/ner.pyx @@ -1,4 +1,5 @@ # cython: infer_types=True, profile=True, binding=True +from collections import defaultdict from typing import Optional, Iterable from thinc.api import Model, Config @@ -197,3 +198,16 @@ cdef class EntityRecognizer(Parser): """ validate_examples(examples, "EntityRecognizer.score") return get_ner_prf(examples) + + def scored_ents(self, beams): + """Return a dictionary of (start, end, label) tuples with corresponding scores + for each beam/doc that was processed. + """ + entity_scores = [] + for beam in beams: + score_dict = defaultdict(float) + for score, ents in self.moves.get_beam_parses(beam): + for start, end, label in ents: + score_dict[(start, end, label)] += score + entity_scores.append(score_dict) + return entity_scores diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py index 8b771cafa..fec69bf66 100644 --- a/spacy/tests/parser/test_ner.py +++ b/spacy/tests/parser/test_ner.py @@ -1,3 +1,5 @@ +from collections import defaultdict + import pytest from numpy.testing import assert_equal from spacy.attrs import ENT_IOB @@ -359,6 +361,86 @@ def test_overfitting_IO(use_upper): assert_equal(batch_deps_1, no_batch_deps) +def test_beam_ner_scores(): + # Test that we can get confidence values out of the beam_ner pipe + beam_width = 16 + beam_density = 0.0001 + nlp = English() + config = { + "beam_width": beam_width, + "beam_density": beam_density, + } + ner = nlp.add_pipe("beam_ner", config=config) + train_examples = [] + for text, annotations in TRAIN_DATA: + train_examples.append(Example.from_dict(nlp.make_doc(text), annotations)) + for ent in annotations.get("entities"): + ner.add_label(ent[2]) + optimizer = nlp.initialize() + + # update once + losses = {} + nlp.update(train_examples, sgd=optimizer, losses=losses) + + # test the scores from the beam + test_text = "I like London." + doc = nlp.make_doc(test_text) + docs = [doc] + ner = nlp.get_pipe("beam_ner") + beams = ner.predict(docs) + entity_scores = ner.scored_ents(beams)[0] + + for j in range(len(doc)): + for label in ner.labels: + score = entity_scores[(j, j+1, label)] + eps = 0.00001 + assert 0 - eps <= score <= 1 + eps + + +def test_beam_overfitting_IO(): + # Simple test to try and quickly overfit the Beam NER component + nlp = English() + beam_width = 16 + beam_density = 0.0001 + config = { + "beam_width": beam_width, + "beam_density": beam_density, + } + ner = nlp.add_pipe("beam_ner", config=config) + train_examples = [] + for text, annotations in TRAIN_DATA: + train_examples.append(Example.from_dict(nlp.make_doc(text), annotations)) + for ent in annotations.get("entities"): + ner.add_label(ent[2]) + optimizer = nlp.initialize() + + # run overfitting + for i in range(50): + losses = {} + nlp.update(train_examples, sgd=optimizer, losses=losses) + assert losses["beam_ner"] < 0.0001 + + # test the scores from the beam + test_text = "I like London." + docs = [nlp.make_doc(test_text)] + ner = nlp.get_pipe("beam_ner") + beams = ner.predict(docs) + entity_scores = ner.scored_ents(beams)[0] + assert entity_scores[(2, 3, "LOC")] == 1.0 + assert entity_scores[(2, 3, "PERSON")] == 0.0 + + # Also test the results are still the same after IO + with make_tempdir() as tmp_dir: + nlp.to_disk(tmp_dir) + nlp2 = util.load_model_from_path(tmp_dir) + docs2 = [nlp2(test_text)] + ner2 = nlp2.get_pipe("beam_ner") + beams2 = ner2.predict(docs2) + entity_scores2 = ner2.scored_ents(beams2)[0] + assert entity_scores2[(2, 3, "LOC")] == 1.0 + assert entity_scores2[(2, 3, "PERSON")] == 0.0 + + def test_ner_warns_no_lookups(caplog): nlp = English() assert nlp.lang in util.LEXEME_NORM_LANGS diff --git a/spacy/tests/regression/test_issue4001-4500.py b/spacy/tests/regression/test_issue4001-4500.py index 873ef9c1d..521fa0d73 100644 --- a/spacy/tests/regression/test_issue4001-4500.py +++ b/spacy/tests/regression/test_issue4001-4500.py @@ -288,35 +288,33 @@ def test_multiple_predictions(): dummy_pipe(doc) -@pytest.mark.skip(reason="removed Beam stuff during the Example/GoldParse refactor") def test_issue4313(): """ This should not crash or exit with some strange error code """ beam_width = 16 beam_density = 0.0001 nlp = English() - config = {} - ner = nlp.create_pipe("ner", config=config) + config = { + "beam_width": beam_width, + "beam_density": beam_density, + } + ner = nlp.add_pipe("beam_ner", config=config) ner.add_label("SOME_LABEL") - ner.initialize(lambda: []) + nlp.initialize() # add a new label to the doc doc = nlp("What do you think about Apple ?") assert len(ner.labels) == 1 assert "SOME_LABEL" in ner.labels + ner.add_label("MY_ORG") # TODO: not sure if we want this to be necessary... apple_ent = Span(doc, 5, 6, label="MY_ORG") doc.ents = list(doc.ents) + [apple_ent] # ensure the beam_parse still works with the new label docs = [doc] - beams = nlp.entity.beam_parse( - docs, beam_width=beam_width, beam_density=beam_density + ner = nlp.get_pipe("beam_ner") + beams = ner.beam_parse( + docs, drop=0.0, beam_width=beam_width, beam_density=beam_density ) - for doc, beam in zip(docs, beams): - entity_scores = defaultdict(float) - for score, ents in nlp.entity.moves.get_beam_parses(beam): - for start, end, label in ents: - entity_scores[(start, end, label)] += score - def test_issue4348(): """Test that training the tagger with empty data, doesn't throw errors"""