mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-24 16:24:16 +03:00
Getting scores out of beam_parser (#6684)
* clean up of ner tests * beam_parser tests * implement get_beam_parses and scored_parses for the dep parser * we don't have to add the parse if there are no arcs
This commit is contained in:
parent
3983bc6b1e
commit
8c1a23209f
|
@ -4,4 +4,4 @@ from .transition_system cimport Transition, TransitionSystem
|
|||
|
||||
|
||||
cdef class ArcEager(TransitionSystem):
|
||||
pass
|
||||
cdef get_arcs(self, StateC* state)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# cython: profile=True, cdivision=True, infer_types=True
|
||||
from cymem.cymem cimport Pool, Address
|
||||
from libc.stdint cimport int32_t
|
||||
from libcpp.vector cimport vector
|
||||
|
||||
from collections import defaultdict, Counter
|
||||
|
||||
|
@ -10,9 +11,9 @@ from ...structs cimport TokenC
|
|||
from ...tokens.doc cimport Doc, set_children_from_heads
|
||||
from ...training.example cimport Example
|
||||
from .stateclass cimport StateClass
|
||||
from ._state cimport StateC
|
||||
|
||||
from ._state cimport StateC, ArcC
|
||||
from ...errors import Errors
|
||||
from thinc.extra.search cimport Beam
|
||||
|
||||
cdef weight_t MIN_SCORE = -90000
|
||||
cdef attr_t SUBTOK_LABEL = hash_string(u'subtok')
|
||||
|
@ -707,6 +708,28 @@ cdef class ArcEager(TransitionSystem):
|
|||
doc.c[i].dep = self.root_label
|
||||
set_children_from_heads(doc.c, 0, doc.length)
|
||||
|
||||
def get_beam_parses(self, Beam beam):
|
||||
parses = []
|
||||
probs = beam.probs
|
||||
for i in range(beam.size):
|
||||
state = <StateC*>beam.at(i)
|
||||
if state.is_final():
|
||||
prob = probs[i]
|
||||
parse = []
|
||||
arcs = self.get_arcs(state)
|
||||
if arcs:
|
||||
for arc in arcs:
|
||||
dep = arc["label"]
|
||||
label = self.strings[dep]
|
||||
parse.append((arc["head"], arc["child"], label))
|
||||
parses.append((prob, parse))
|
||||
return parses
|
||||
|
||||
cdef get_arcs(self, StateC* state):
|
||||
cdef vector[ArcC] arcs
|
||||
state.get_arcs(&arcs)
|
||||
return list(arcs)
|
||||
|
||||
def has_gold(self, Example eg, start=0, end=None):
|
||||
for word in eg.y[start:end]:
|
||||
if word.dep != 0:
|
||||
|
|
|
@ -257,7 +257,8 @@ cdef class BiluoPushDown(TransitionSystem):
|
|||
parse = []
|
||||
for j in range(state._ents.size()):
|
||||
ent = state._ents.at(j)
|
||||
parse.append((ent.start, ent.end, self.strings[ent.label]))
|
||||
if ent.start != -1 and ent.end != -1:
|
||||
parse.append((ent.start, ent.end, self.strings[ent.label]))
|
||||
parses.append((prob, parse))
|
||||
return parses
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
from collections import defaultdict
|
||||
from typing import Optional, Iterable
|
||||
from thinc.api import Model, Config
|
||||
|
||||
|
@ -258,3 +259,20 @@ cdef class DependencyParser(Parser):
|
|||
results.update(Scorer.score_deps(examples, "dep", **kwargs))
|
||||
del results["sents_per_type"]
|
||||
return results
|
||||
|
||||
def scored_parses(self, beams):
|
||||
"""Return two dictionaries with scores for each beam/doc that was processed:
|
||||
one containing (i, head) keys, and another containing (i, label) keys.
|
||||
"""
|
||||
head_scores = []
|
||||
label_scores = []
|
||||
for beam in beams:
|
||||
score_head_dict = defaultdict(float)
|
||||
score_label_dict = defaultdict(float)
|
||||
for score, parses in self.moves.get_beam_parses(beam):
|
||||
for head, i, label in parses:
|
||||
score_head_dict[(i, head)] += score
|
||||
score_label_dict[(i, label)] += score
|
||||
head_scores.append(score_head_dict)
|
||||
label_scores.append(score_label_dict)
|
||||
return head_scores, label_scores
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
from collections import defaultdict
|
||||
|
||||
import pytest
|
||||
from numpy.testing import assert_equal
|
||||
from spacy.attrs import ENT_IOB
|
||||
|
@ -305,7 +303,7 @@ def test_block_ner():
|
|||
|
||||
@pytest.mark.parametrize("use_upper", [True, False])
|
||||
def test_overfitting_IO(use_upper):
|
||||
# Simple test to try and quickly overfit the NER component - ensuring the ML models work correctly
|
||||
# Simple test to try and quickly overfit the NER component
|
||||
nlp = English()
|
||||
ner = nlp.add_pipe("ner", config={"model": {"use_upper": use_upper}})
|
||||
train_examples = []
|
||||
|
@ -386,7 +384,6 @@ def test_beam_ner_scores():
|
|||
test_text = "I like London."
|
||||
doc = nlp.make_doc(test_text)
|
||||
docs = [doc]
|
||||
ner = nlp.get_pipe("beam_ner")
|
||||
beams = ner.predict(docs)
|
||||
entity_scores = ner.scored_ents(beams)[0]
|
||||
|
||||
|
@ -423,7 +420,6 @@ def test_beam_overfitting_IO():
|
|||
# test the scores from the beam
|
||||
test_text = "I like London."
|
||||
docs = [nlp.make_doc(test_text)]
|
||||
ner = nlp.get_pipe("beam_ner")
|
||||
beams = ner.predict(docs)
|
||||
entity_scores = ner.scored_ents(beams)[0]
|
||||
assert entity_scores[(2, 3, "LOC")] == 1.0
|
||||
|
@ -433,7 +429,7 @@ def test_beam_overfitting_IO():
|
|||
with make_tempdir() as tmp_dir:
|
||||
nlp.to_disk(tmp_dir)
|
||||
nlp2 = util.load_model_from_path(tmp_dir)
|
||||
docs2 = [nlp2(test_text)]
|
||||
docs2 = [nlp2.make_doc(test_text)]
|
||||
ner2 = nlp2.get_pipe("beam_ner")
|
||||
beams2 = ner2.predict(docs2)
|
||||
entity_scores2 = ner2.scored_ents(beams2)[0]
|
||||
|
|
|
@ -28,6 +28,26 @@ TRAIN_DATA = [
|
|||
]
|
||||
|
||||
|
||||
CONFLICTING_DATA = [
|
||||
(
|
||||
"I like London and Berlin.",
|
||||
{
|
||||
"heads": [1, 1, 1, 2, 2, 1],
|
||||
"deps": ["nsubj", "ROOT", "dobj", "cc", "conj", "punct"],
|
||||
},
|
||||
),
|
||||
(
|
||||
"I like London and Berlin.",
|
||||
{
|
||||
"heads": [0, 0, 0, 0, 0, 0],
|
||||
"deps": ["ROOT", "nsubj", "nsubj", "cc", "conj", "punct"],
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
eps = 0.01
|
||||
|
||||
|
||||
def test_parser_root(en_vocab):
|
||||
words = ["i", "do", "n't", "have", "other", "assistance"]
|
||||
heads = [3, 3, 3, 3, 5, 3]
|
||||
|
@ -185,26 +205,31 @@ def test_parser_set_sent_starts(en_vocab):
|
|||
assert token.head in sent
|
||||
|
||||
|
||||
def test_overfitting_IO():
|
||||
# Simple test to try and quickly overfit the dependency parser - ensuring the ML models work correctly
|
||||
@pytest.mark.parametrize("pipe_name", ["parser", "beam_parser"])
|
||||
def test_overfitting_IO(pipe_name):
|
||||
# Simple test to try and quickly overfit the dependency parser (normal or beam)
|
||||
nlp = English()
|
||||
parser = nlp.add_pipe("parser")
|
||||
parser = nlp.add_pipe(pipe_name)
|
||||
train_examples = []
|
||||
for text, annotations in TRAIN_DATA:
|
||||
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
|
||||
for dep in annotations.get("deps", []):
|
||||
parser.add_label(dep)
|
||||
optimizer = nlp.initialize()
|
||||
for i in range(100):
|
||||
# run overfitting
|
||||
for i in range(150):
|
||||
losses = {}
|
||||
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||
assert losses["parser"] < 0.0001
|
||||
assert losses[pipe_name] < 0.0001
|
||||
# test the trained model
|
||||
test_text = "I like securities."
|
||||
doc = nlp(test_text)
|
||||
assert doc[0].dep_ == "nsubj"
|
||||
assert doc[2].dep_ == "dobj"
|
||||
assert doc[3].dep_ == "punct"
|
||||
assert doc[0].head.i == 1
|
||||
assert doc[2].head.i == 1
|
||||
assert doc[3].head.i == 1
|
||||
# Also test the results are still the same after IO
|
||||
with make_tempdir() as tmp_dir:
|
||||
nlp.to_disk(tmp_dir)
|
||||
|
@ -213,6 +238,9 @@ def test_overfitting_IO():
|
|||
assert doc2[0].dep_ == "nsubj"
|
||||
assert doc2[2].dep_ == "dobj"
|
||||
assert doc2[3].dep_ == "punct"
|
||||
assert doc2[0].head.i == 1
|
||||
assert doc2[2].head.i == 1
|
||||
assert doc2[3].head.i == 1
|
||||
|
||||
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
||||
texts = [
|
||||
|
@ -226,3 +254,123 @@ def test_overfitting_IO():
|
|||
no_batch_deps = [doc.to_array([DEP]) for doc in [nlp(text) for text in texts]]
|
||||
assert_equal(batch_deps_1, batch_deps_2)
|
||||
assert_equal(batch_deps_1, no_batch_deps)
|
||||
|
||||
|
||||
def test_beam_parser_scores():
|
||||
# Test that we can get confidence values out of the beam_parser pipe
|
||||
beam_width = 16
|
||||
beam_density = 0.0001
|
||||
nlp = English()
|
||||
config = {
|
||||
"beam_width": beam_width,
|
||||
"beam_density": beam_density,
|
||||
}
|
||||
parser = nlp.add_pipe("beam_parser", config=config)
|
||||
train_examples = []
|
||||
for text, annotations in CONFLICTING_DATA:
|
||||
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
|
||||
for dep in annotations.get("deps", []):
|
||||
parser.add_label(dep)
|
||||
optimizer = nlp.initialize()
|
||||
|
||||
# update a bit with conflicting data
|
||||
for i in range(10):
|
||||
losses = {}
|
||||
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||
|
||||
# test the scores from the beam
|
||||
test_text = "I like securities."
|
||||
doc = nlp.make_doc(test_text)
|
||||
docs = [doc]
|
||||
beams = parser.predict(docs)
|
||||
head_scores, label_scores = parser.scored_parses(beams)
|
||||
|
||||
for j in range(len(doc)):
|
||||
for label in parser.labels:
|
||||
label_score = label_scores[0][(j, label)]
|
||||
assert 0 - eps <= label_score <= 1 + eps
|
||||
for i in range(len(doc)):
|
||||
head_score = head_scores[0][(j, i)]
|
||||
assert 0 - eps <= head_score <= 1 + eps
|
||||
|
||||
|
||||
def test_beam_overfitting_IO():
|
||||
# Simple test to try and quickly overfit the Beam dependency parser
|
||||
nlp = English()
|
||||
beam_width = 16
|
||||
beam_density = 0.0001
|
||||
config = {
|
||||
"beam_width": beam_width,
|
||||
"beam_density": beam_density,
|
||||
}
|
||||
parser = nlp.add_pipe("beam_parser", config=config)
|
||||
train_examples = []
|
||||
for text, annotations in TRAIN_DATA:
|
||||
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
|
||||
for dep in annotations.get("deps", []):
|
||||
parser.add_label(dep)
|
||||
optimizer = nlp.initialize()
|
||||
# run overfitting
|
||||
for i in range(150):
|
||||
losses = {}
|
||||
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||
assert losses["beam_parser"] < 0.0001
|
||||
# test the scores from the beam
|
||||
test_text = "I like securities."
|
||||
docs = [nlp.make_doc(test_text)]
|
||||
beams = parser.predict(docs)
|
||||
head_scores, label_scores = parser.scored_parses(beams)
|
||||
# we only processed one document
|
||||
head_scores = head_scores[0]
|
||||
label_scores = label_scores[0]
|
||||
# test label annotations: 0=nsubj, 2=dobj, 3=punct
|
||||
assert label_scores[(0, "nsubj")] == pytest.approx(1.0, eps)
|
||||
assert label_scores[(0, "dobj")] == pytest.approx(0.0, eps)
|
||||
assert label_scores[(0, "punct")] == pytest.approx(0.0, eps)
|
||||
assert label_scores[(2, "nsubj")] == pytest.approx(0.0, eps)
|
||||
assert label_scores[(2, "dobj")] == pytest.approx(1.0, eps)
|
||||
assert label_scores[(2, "punct")] == pytest.approx(0.0, eps)
|
||||
assert label_scores[(3, "nsubj")] == pytest.approx(0.0, eps)
|
||||
assert label_scores[(3, "dobj")] == pytest.approx(0.0, eps)
|
||||
assert label_scores[(3, "punct")] == pytest.approx(1.0, eps)
|
||||
# test head annotations: the root is token at index 1
|
||||
assert head_scores[(0, 0)] == pytest.approx(0.0, eps)
|
||||
assert head_scores[(0, 1)] == pytest.approx(1.0, eps)
|
||||
assert head_scores[(0, 2)] == pytest.approx(0.0, eps)
|
||||
assert head_scores[(2, 0)] == pytest.approx(0.0, eps)
|
||||
assert head_scores[(2, 1)] == pytest.approx(1.0, eps)
|
||||
assert head_scores[(2, 2)] == pytest.approx(0.0, eps)
|
||||
assert head_scores[(3, 0)] == pytest.approx(0.0, eps)
|
||||
assert head_scores[(3, 1)] == pytest.approx(1.0, eps)
|
||||
assert head_scores[(3, 2)] == pytest.approx(0.0, eps)
|
||||
|
||||
# Also test the results are still the same after IO
|
||||
with make_tempdir() as tmp_dir:
|
||||
nlp.to_disk(tmp_dir)
|
||||
nlp2 = util.load_model_from_path(tmp_dir)
|
||||
docs2 = [nlp2.make_doc(test_text)]
|
||||
parser2 = nlp2.get_pipe("beam_parser")
|
||||
beams2 = parser2.predict(docs2)
|
||||
head_scores2, label_scores2 = parser2.scored_parses(beams2)
|
||||
# we only processed one document
|
||||
head_scores2 = head_scores2[0]
|
||||
label_scores2 = label_scores2[0]
|
||||
# check the results again
|
||||
assert label_scores2[(0, "nsubj")] == pytest.approx(1.0, eps)
|
||||
assert label_scores2[(0, "dobj")] == pytest.approx(0.0, eps)
|
||||
assert label_scores2[(0, "punct")] == pytest.approx(0.0, eps)
|
||||
assert label_scores2[(2, "nsubj")] == pytest.approx(0.0, eps)
|
||||
assert label_scores2[(2, "dobj")] == pytest.approx(1.0, eps)
|
||||
assert label_scores2[(2, "punct")] == pytest.approx(0.0, eps)
|
||||
assert label_scores2[(3, "nsubj")] == pytest.approx(0.0, eps)
|
||||
assert label_scores2[(3, "dobj")] == pytest.approx(0.0, eps)
|
||||
assert label_scores2[(3, "punct")] == pytest.approx(1.0, eps)
|
||||
assert head_scores2[(0, 0)] == pytest.approx(0.0, eps)
|
||||
assert head_scores2[(0, 1)] == pytest.approx(1.0, eps)
|
||||
assert head_scores2[(0, 2)] == pytest.approx(0.0, eps)
|
||||
assert head_scores2[(2, 0)] == pytest.approx(0.0, eps)
|
||||
assert head_scores2[(2, 1)] == pytest.approx(1.0, eps)
|
||||
assert head_scores2[(2, 2)] == pytest.approx(0.0, eps)
|
||||
assert head_scores2[(3, 0)] == pytest.approx(0.0, eps)
|
||||
assert head_scores2[(3, 1)] == pytest.approx(1.0, eps)
|
||||
assert head_scores2[(3, 2)] == pytest.approx(0.0, eps)
|
||||
|
|
Loading…
Reference in New Issue
Block a user