Getting scores out of beam_ner (#6575)

* small fixes and formatting

* bring test_issue4313 up-to-date, currently fails

* formatting

* add get_beam_parses method back

* add scored_ents function

* delete tag map
This commit is contained in:
Sofie Van Landeghem 2021-01-06 12:02:32 +01:00 committed by GitHub
parent 82ae95267a
commit 402dbc5bae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 123 additions and 14 deletions

View File

@ -255,7 +255,7 @@ def get_gradient(nr_class, beam_maps, histories, losses):
for a beam state -- so we have "the gradient of loss for taking for a beam state -- so we have "the gradient of loss for taking
action i given history H." action i given history H."
Histories: Each hitory is a list of actions Histories: Each history is a list of actions
Each candidate has a history Each candidate has a history
Each beam has multiple candidates Each beam has multiple candidates
Each batch has multiple beams Each batch has multiple beams

View File

@ -611,7 +611,7 @@ cdef class ArcEager(TransitionSystem):
return gold return gold
def init_gold_batch(self, examples): def init_gold_batch(self, examples):
# TODO: Projectivitity? # TODO: Projectivity?
all_states = self.init_batch([eg.predicted for eg in examples]) all_states = self.init_batch([eg.predicted for eg in examples])
golds = [] golds = []
states = [] states = []

View File

@ -2,6 +2,7 @@ from libc.stdint cimport int32_t
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from collections import Counter from collections import Counter
from thinc.extra.search cimport Beam
from ...tokens.doc cimport Doc from ...tokens.doc cimport Doc
from ...tokens.span import Span from ...tokens.span import Span
@ -246,6 +247,20 @@ cdef class BiluoPushDown(TransitionSystem):
if doc.c[i].ent_iob == 0: if doc.c[i].ent_iob == 0:
doc.c[i].ent_iob = 2 doc.c[i].ent_iob = 2
def get_beam_parses(self, Beam beam):
parses = []
probs = beam.probs
for i in range(beam.size):
state = <StateC*>beam.at(i)
if state.is_final():
prob = probs[i]
parse = []
for j in range(state._ents.size()):
ent = state._ents.at(j)
parse.append((ent.start, ent.end, self.strings[ent.label]))
parses.append((prob, parse))
return parses
def init_gold(self, StateClass state, Example example): def init_gold(self, StateClass state, Example example):
return BiluoGold(self, state, example) return BiluoGold(self, state, example)

View File

@ -1,4 +1,5 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, profile=True, binding=True
from collections import defaultdict
from typing import Optional, Iterable from typing import Optional, Iterable
from thinc.api import Model, Config from thinc.api import Model, Config
@ -197,3 +198,16 @@ cdef class EntityRecognizer(Parser):
""" """
validate_examples(examples, "EntityRecognizer.score") validate_examples(examples, "EntityRecognizer.score")
return get_ner_prf(examples) return get_ner_prf(examples)
def scored_ents(self, beams):
"""Return a dictionary of (start, end, label) tuples with corresponding scores
for each beam/doc that was processed.
"""
entity_scores = []
for beam in beams:
score_dict = defaultdict(float)
for score, ents in self.moves.get_beam_parses(beam):
for start, end, label in ents:
score_dict[(start, end, label)] += score
entity_scores.append(score_dict)
return entity_scores

View File

@ -1,3 +1,5 @@
from collections import defaultdict
import pytest import pytest
from numpy.testing import assert_equal from numpy.testing import assert_equal
from spacy.attrs import ENT_IOB from spacy.attrs import ENT_IOB
@ -359,6 +361,86 @@ def test_overfitting_IO(use_upper):
assert_equal(batch_deps_1, no_batch_deps) assert_equal(batch_deps_1, no_batch_deps)
def test_beam_ner_scores():
# Test that we can get confidence values out of the beam_ner pipe
beam_width = 16
beam_density = 0.0001
nlp = English()
config = {
"beam_width": beam_width,
"beam_density": beam_density,
}
ner = nlp.add_pipe("beam_ner", config=config)
train_examples = []
for text, annotations in TRAIN_DATA:
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
for ent in annotations.get("entities"):
ner.add_label(ent[2])
optimizer = nlp.initialize()
# update once
losses = {}
nlp.update(train_examples, sgd=optimizer, losses=losses)
# test the scores from the beam
test_text = "I like London."
doc = nlp.make_doc(test_text)
docs = [doc]
ner = nlp.get_pipe("beam_ner")
beams = ner.predict(docs)
entity_scores = ner.scored_ents(beams)[0]
for j in range(len(doc)):
for label in ner.labels:
score = entity_scores[(j, j+1, label)]
eps = 0.00001
assert 0 - eps <= score <= 1 + eps
def test_beam_overfitting_IO():
# Simple test to try and quickly overfit the Beam NER component
nlp = English()
beam_width = 16
beam_density = 0.0001
config = {
"beam_width": beam_width,
"beam_density": beam_density,
}
ner = nlp.add_pipe("beam_ner", config=config)
train_examples = []
for text, annotations in TRAIN_DATA:
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
for ent in annotations.get("entities"):
ner.add_label(ent[2])
optimizer = nlp.initialize()
# run overfitting
for i in range(50):
losses = {}
nlp.update(train_examples, sgd=optimizer, losses=losses)
assert losses["beam_ner"] < 0.0001
# test the scores from the beam
test_text = "I like London."
docs = [nlp.make_doc(test_text)]
ner = nlp.get_pipe("beam_ner")
beams = ner.predict(docs)
entity_scores = ner.scored_ents(beams)[0]
assert entity_scores[(2, 3, "LOC")] == 1.0
assert entity_scores[(2, 3, "PERSON")] == 0.0
# Also test the results are still the same after IO
with make_tempdir() as tmp_dir:
nlp.to_disk(tmp_dir)
nlp2 = util.load_model_from_path(tmp_dir)
docs2 = [nlp2(test_text)]
ner2 = nlp2.get_pipe("beam_ner")
beams2 = ner2.predict(docs2)
entity_scores2 = ner2.scored_ents(beams2)[0]
assert entity_scores2[(2, 3, "LOC")] == 1.0
assert entity_scores2[(2, 3, "PERSON")] == 0.0
def test_ner_warns_no_lookups(caplog): def test_ner_warns_no_lookups(caplog):
nlp = English() nlp = English()
assert nlp.lang in util.LEXEME_NORM_LANGS assert nlp.lang in util.LEXEME_NORM_LANGS

View File

@ -288,35 +288,33 @@ def test_multiple_predictions():
dummy_pipe(doc) dummy_pipe(doc)
@pytest.mark.skip(reason="removed Beam stuff during the Example/GoldParse refactor")
def test_issue4313(): def test_issue4313():
""" This should not crash or exit with some strange error code """ """ This should not crash or exit with some strange error code """
beam_width = 16 beam_width = 16
beam_density = 0.0001 beam_density = 0.0001
nlp = English() nlp = English()
config = {} config = {
ner = nlp.create_pipe("ner", config=config) "beam_width": beam_width,
"beam_density": beam_density,
}
ner = nlp.add_pipe("beam_ner", config=config)
ner.add_label("SOME_LABEL") ner.add_label("SOME_LABEL")
ner.initialize(lambda: []) nlp.initialize()
# add a new label to the doc # add a new label to the doc
doc = nlp("What do you think about Apple ?") doc = nlp("What do you think about Apple ?")
assert len(ner.labels) == 1 assert len(ner.labels) == 1
assert "SOME_LABEL" in ner.labels assert "SOME_LABEL" in ner.labels
ner.add_label("MY_ORG") # TODO: not sure if we want this to be necessary...
apple_ent = Span(doc, 5, 6, label="MY_ORG") apple_ent = Span(doc, 5, 6, label="MY_ORG")
doc.ents = list(doc.ents) + [apple_ent] doc.ents = list(doc.ents) + [apple_ent]
# ensure the beam_parse still works with the new label # ensure the beam_parse still works with the new label
docs = [doc] docs = [doc]
beams = nlp.entity.beam_parse( ner = nlp.get_pipe("beam_ner")
docs, beam_width=beam_width, beam_density=beam_density beams = ner.beam_parse(
docs, drop=0.0, beam_width=beam_width, beam_density=beam_density
) )
for doc, beam in zip(docs, beams):
entity_scores = defaultdict(float)
for score, ents in nlp.entity.moves.get_beam_parses(beam):
for start, end, label in ents:
entity_scores[(start, end, label)] += score
def test_issue4348(): def test_issue4348():
"""Test that training the tagger with empty data, doesn't throw errors""" """Test that training the tagger with empty data, doesn't throw errors"""