Getting scores out of beam_ner (#6575)

* small fixes and formatting

* bring test_issue4313 up-to-date, currently fails

* formatting

* add get_beam_parses method back

* add scored_ents function

* delete tag map
This commit is contained in:
Sofie Van Landeghem 2021-01-06 12:02:32 +01:00 committed by GitHub
parent 82ae95267a
commit 402dbc5bae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 123 additions and 14 deletions

View File

@ -255,7 +255,7 @@ def get_gradient(nr_class, beam_maps, histories, losses):
for a beam state -- so we have "the gradient of loss for taking
action i given history H."
Histories: Each hitory is a list of actions
Histories: Each history is a list of actions
Each candidate has a history
Each beam has multiple candidates
Each batch has multiple beams

View File

@ -611,7 +611,7 @@ cdef class ArcEager(TransitionSystem):
return gold
def init_gold_batch(self, examples):
# TODO: Projectivitity?
# TODO: Projectivity?
all_states = self.init_batch([eg.predicted for eg in examples])
golds = []
states = []

View File

@ -2,6 +2,7 @@ from libc.stdint cimport int32_t
from cymem.cymem cimport Pool
from collections import Counter
from thinc.extra.search cimport Beam
from ...tokens.doc cimport Doc
from ...tokens.span import Span
@ -246,6 +247,20 @@ cdef class BiluoPushDown(TransitionSystem):
if doc.c[i].ent_iob == 0:
doc.c[i].ent_iob = 2
def get_beam_parses(self, Beam beam):
parses = []
probs = beam.probs
for i in range(beam.size):
state = <StateC*>beam.at(i)
if state.is_final():
prob = probs[i]
parse = []
for j in range(state._ents.size()):
ent = state._ents.at(j)
parse.append((ent.start, ent.end, self.strings[ent.label]))
parses.append((prob, parse))
return parses
def init_gold(self, StateClass state, Example example):
return BiluoGold(self, state, example)

View File

@ -1,4 +1,5 @@
# cython: infer_types=True, profile=True, binding=True
from collections import defaultdict
from typing import Optional, Iterable
from thinc.api import Model, Config
@ -197,3 +198,16 @@ cdef class EntityRecognizer(Parser):
"""
validate_examples(examples, "EntityRecognizer.score")
return get_ner_prf(examples)
def scored_ents(self, beams):
"""Return a dictionary of (start, end, label) tuples with corresponding scores
for each beam/doc that was processed.
"""
entity_scores = []
for beam in beams:
score_dict = defaultdict(float)
for score, ents in self.moves.get_beam_parses(beam):
for start, end, label in ents:
score_dict[(start, end, label)] += score
entity_scores.append(score_dict)
return entity_scores

View File

@ -1,3 +1,5 @@
from collections import defaultdict
import pytest
from numpy.testing import assert_equal
from spacy.attrs import ENT_IOB
@ -359,6 +361,86 @@ def test_overfitting_IO(use_upper):
assert_equal(batch_deps_1, no_batch_deps)
def test_beam_ner_scores():
# Test that we can get confidence values out of the beam_ner pipe
beam_width = 16
beam_density = 0.0001
nlp = English()
config = {
"beam_width": beam_width,
"beam_density": beam_density,
}
ner = nlp.add_pipe("beam_ner", config=config)
train_examples = []
for text, annotations in TRAIN_DATA:
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
for ent in annotations.get("entities"):
ner.add_label(ent[2])
optimizer = nlp.initialize()
# update once
losses = {}
nlp.update(train_examples, sgd=optimizer, losses=losses)
# test the scores from the beam
test_text = "I like London."
doc = nlp.make_doc(test_text)
docs = [doc]
ner = nlp.get_pipe("beam_ner")
beams = ner.predict(docs)
entity_scores = ner.scored_ents(beams)[0]
for j in range(len(doc)):
for label in ner.labels:
score = entity_scores[(j, j+1, label)]
eps = 0.00001
assert 0 - eps <= score <= 1 + eps
def test_beam_overfitting_IO():
# Simple test to try and quickly overfit the Beam NER component
nlp = English()
beam_width = 16
beam_density = 0.0001
config = {
"beam_width": beam_width,
"beam_density": beam_density,
}
ner = nlp.add_pipe("beam_ner", config=config)
train_examples = []
for text, annotations in TRAIN_DATA:
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
for ent in annotations.get("entities"):
ner.add_label(ent[2])
optimizer = nlp.initialize()
# run overfitting
for i in range(50):
losses = {}
nlp.update(train_examples, sgd=optimizer, losses=losses)
assert losses["beam_ner"] < 0.0001
# test the scores from the beam
test_text = "I like London."
docs = [nlp.make_doc(test_text)]
ner = nlp.get_pipe("beam_ner")
beams = ner.predict(docs)
entity_scores = ner.scored_ents(beams)[0]
assert entity_scores[(2, 3, "LOC")] == 1.0
assert entity_scores[(2, 3, "PERSON")] == 0.0
# Also test the results are still the same after IO
with make_tempdir() as tmp_dir:
nlp.to_disk(tmp_dir)
nlp2 = util.load_model_from_path(tmp_dir)
docs2 = [nlp2(test_text)]
ner2 = nlp2.get_pipe("beam_ner")
beams2 = ner2.predict(docs2)
entity_scores2 = ner2.scored_ents(beams2)[0]
assert entity_scores2[(2, 3, "LOC")] == 1.0
assert entity_scores2[(2, 3, "PERSON")] == 0.0
def test_ner_warns_no_lookups(caplog):
nlp = English()
assert nlp.lang in util.LEXEME_NORM_LANGS

View File

@ -288,35 +288,33 @@ def test_multiple_predictions():
dummy_pipe(doc)
@pytest.mark.skip(reason="removed Beam stuff during the Example/GoldParse refactor")
def test_issue4313():
""" This should not crash or exit with some strange error code """
beam_width = 16
beam_density = 0.0001
nlp = English()
config = {}
ner = nlp.create_pipe("ner", config=config)
config = {
"beam_width": beam_width,
"beam_density": beam_density,
}
ner = nlp.add_pipe("beam_ner", config=config)
ner.add_label("SOME_LABEL")
ner.initialize(lambda: [])
nlp.initialize()
# add a new label to the doc
doc = nlp("What do you think about Apple ?")
assert len(ner.labels) == 1
assert "SOME_LABEL" in ner.labels
ner.add_label("MY_ORG") # TODO: not sure if we want this to be necessary...
apple_ent = Span(doc, 5, 6, label="MY_ORG")
doc.ents = list(doc.ents) + [apple_ent]
# ensure the beam_parse still works with the new label
docs = [doc]
beams = nlp.entity.beam_parse(
docs, beam_width=beam_width, beam_density=beam_density
ner = nlp.get_pipe("beam_ner")
beams = ner.beam_parse(
docs, drop=0.0, beam_width=beam_width, beam_density=beam_density
)
for doc, beam in zip(docs, beams):
entity_scores = defaultdict(float)
for score, ents in nlp.entity.moves.get_beam_parses(beam):
for start, end, label in ents:
entity_scores[(start, end, label)] += score
def test_issue4348():
"""Test that training the tagger with empty data, doesn't throw errors"""