mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-03 13:14:11 +03:00
Getting scores out of beam_parser (#6684)
* clean up of ner tests * beam_parser tests * implement get_beam_parses and scored_parses for the dep parser * we don't have to add the parse if there are no arcs
This commit is contained in:
parent
3983bc6b1e
commit
8c1a23209f
|
@ -4,4 +4,4 @@ from .transition_system cimport Transition, TransitionSystem
|
||||||
|
|
||||||
|
|
||||||
cdef class ArcEager(TransitionSystem):
|
cdef class ArcEager(TransitionSystem):
|
||||||
pass
|
cdef get_arcs(self, StateC* state)
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# cython: profile=True, cdivision=True, infer_types=True
|
# cython: profile=True, cdivision=True, infer_types=True
|
||||||
from cymem.cymem cimport Pool, Address
|
from cymem.cymem cimport Pool, Address
|
||||||
from libc.stdint cimport int32_t
|
from libc.stdint cimport int32_t
|
||||||
|
from libcpp.vector cimport vector
|
||||||
|
|
||||||
from collections import defaultdict, Counter
|
from collections import defaultdict, Counter
|
||||||
|
|
||||||
|
@ -10,9 +11,9 @@ from ...structs cimport TokenC
|
||||||
from ...tokens.doc cimport Doc, set_children_from_heads
|
from ...tokens.doc cimport Doc, set_children_from_heads
|
||||||
from ...training.example cimport Example
|
from ...training.example cimport Example
|
||||||
from .stateclass cimport StateClass
|
from .stateclass cimport StateClass
|
||||||
from ._state cimport StateC
|
from ._state cimport StateC, ArcC
|
||||||
|
|
||||||
from ...errors import Errors
|
from ...errors import Errors
|
||||||
|
from thinc.extra.search cimport Beam
|
||||||
|
|
||||||
cdef weight_t MIN_SCORE = -90000
|
cdef weight_t MIN_SCORE = -90000
|
||||||
cdef attr_t SUBTOK_LABEL = hash_string(u'subtok')
|
cdef attr_t SUBTOK_LABEL = hash_string(u'subtok')
|
||||||
|
@ -707,6 +708,28 @@ cdef class ArcEager(TransitionSystem):
|
||||||
doc.c[i].dep = self.root_label
|
doc.c[i].dep = self.root_label
|
||||||
set_children_from_heads(doc.c, 0, doc.length)
|
set_children_from_heads(doc.c, 0, doc.length)
|
||||||
|
|
||||||
|
def get_beam_parses(self, Beam beam):
|
||||||
|
parses = []
|
||||||
|
probs = beam.probs
|
||||||
|
for i in range(beam.size):
|
||||||
|
state = <StateC*>beam.at(i)
|
||||||
|
if state.is_final():
|
||||||
|
prob = probs[i]
|
||||||
|
parse = []
|
||||||
|
arcs = self.get_arcs(state)
|
||||||
|
if arcs:
|
||||||
|
for arc in arcs:
|
||||||
|
dep = arc["label"]
|
||||||
|
label = self.strings[dep]
|
||||||
|
parse.append((arc["head"], arc["child"], label))
|
||||||
|
parses.append((prob, parse))
|
||||||
|
return parses
|
||||||
|
|
||||||
|
cdef get_arcs(self, StateC* state):
|
||||||
|
cdef vector[ArcC] arcs
|
||||||
|
state.get_arcs(&arcs)
|
||||||
|
return list(arcs)
|
||||||
|
|
||||||
def has_gold(self, Example eg, start=0, end=None):
|
def has_gold(self, Example eg, start=0, end=None):
|
||||||
for word in eg.y[start:end]:
|
for word in eg.y[start:end]:
|
||||||
if word.dep != 0:
|
if word.dep != 0:
|
||||||
|
|
|
@ -257,7 +257,8 @@ cdef class BiluoPushDown(TransitionSystem):
|
||||||
parse = []
|
parse = []
|
||||||
for j in range(state._ents.size()):
|
for j in range(state._ents.size()):
|
||||||
ent = state._ents.at(j)
|
ent = state._ents.at(j)
|
||||||
parse.append((ent.start, ent.end, self.strings[ent.label]))
|
if ent.start != -1 and ent.end != -1:
|
||||||
|
parse.append((ent.start, ent.end, self.strings[ent.label]))
|
||||||
parses.append((prob, parse))
|
parses.append((prob, parse))
|
||||||
return parses
|
return parses
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, profile=True, binding=True
|
||||||
|
from collections import defaultdict
|
||||||
from typing import Optional, Iterable
|
from typing import Optional, Iterable
|
||||||
from thinc.api import Model, Config
|
from thinc.api import Model, Config
|
||||||
|
|
||||||
|
@ -258,3 +259,20 @@ cdef class DependencyParser(Parser):
|
||||||
results.update(Scorer.score_deps(examples, "dep", **kwargs))
|
results.update(Scorer.score_deps(examples, "dep", **kwargs))
|
||||||
del results["sents_per_type"]
|
del results["sents_per_type"]
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def scored_parses(self, beams):
|
||||||
|
"""Return two dictionaries with scores for each beam/doc that was processed:
|
||||||
|
one containing (i, head) keys, and another containing (i, label) keys.
|
||||||
|
"""
|
||||||
|
head_scores = []
|
||||||
|
label_scores = []
|
||||||
|
for beam in beams:
|
||||||
|
score_head_dict = defaultdict(float)
|
||||||
|
score_label_dict = defaultdict(float)
|
||||||
|
for score, parses in self.moves.get_beam_parses(beam):
|
||||||
|
for head, i, label in parses:
|
||||||
|
score_head_dict[(i, head)] += score
|
||||||
|
score_label_dict[(i, label)] += score
|
||||||
|
head_scores.append(score_head_dict)
|
||||||
|
label_scores.append(score_label_dict)
|
||||||
|
return head_scores, label_scores
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from numpy.testing import assert_equal
|
from numpy.testing import assert_equal
|
||||||
from spacy.attrs import ENT_IOB
|
from spacy.attrs import ENT_IOB
|
||||||
|
@ -305,7 +303,7 @@ def test_block_ner():
|
||||||
|
|
||||||
@pytest.mark.parametrize("use_upper", [True, False])
|
@pytest.mark.parametrize("use_upper", [True, False])
|
||||||
def test_overfitting_IO(use_upper):
|
def test_overfitting_IO(use_upper):
|
||||||
# Simple test to try and quickly overfit the NER component - ensuring the ML models work correctly
|
# Simple test to try and quickly overfit the NER component
|
||||||
nlp = English()
|
nlp = English()
|
||||||
ner = nlp.add_pipe("ner", config={"model": {"use_upper": use_upper}})
|
ner = nlp.add_pipe("ner", config={"model": {"use_upper": use_upper}})
|
||||||
train_examples = []
|
train_examples = []
|
||||||
|
@ -386,7 +384,6 @@ def test_beam_ner_scores():
|
||||||
test_text = "I like London."
|
test_text = "I like London."
|
||||||
doc = nlp.make_doc(test_text)
|
doc = nlp.make_doc(test_text)
|
||||||
docs = [doc]
|
docs = [doc]
|
||||||
ner = nlp.get_pipe("beam_ner")
|
|
||||||
beams = ner.predict(docs)
|
beams = ner.predict(docs)
|
||||||
entity_scores = ner.scored_ents(beams)[0]
|
entity_scores = ner.scored_ents(beams)[0]
|
||||||
|
|
||||||
|
@ -423,7 +420,6 @@ def test_beam_overfitting_IO():
|
||||||
# test the scores from the beam
|
# test the scores from the beam
|
||||||
test_text = "I like London."
|
test_text = "I like London."
|
||||||
docs = [nlp.make_doc(test_text)]
|
docs = [nlp.make_doc(test_text)]
|
||||||
ner = nlp.get_pipe("beam_ner")
|
|
||||||
beams = ner.predict(docs)
|
beams = ner.predict(docs)
|
||||||
entity_scores = ner.scored_ents(beams)[0]
|
entity_scores = ner.scored_ents(beams)[0]
|
||||||
assert entity_scores[(2, 3, "LOC")] == 1.0
|
assert entity_scores[(2, 3, "LOC")] == 1.0
|
||||||
|
@ -433,7 +429,7 @@ def test_beam_overfitting_IO():
|
||||||
with make_tempdir() as tmp_dir:
|
with make_tempdir() as tmp_dir:
|
||||||
nlp.to_disk(tmp_dir)
|
nlp.to_disk(tmp_dir)
|
||||||
nlp2 = util.load_model_from_path(tmp_dir)
|
nlp2 = util.load_model_from_path(tmp_dir)
|
||||||
docs2 = [nlp2(test_text)]
|
docs2 = [nlp2.make_doc(test_text)]
|
||||||
ner2 = nlp2.get_pipe("beam_ner")
|
ner2 = nlp2.get_pipe("beam_ner")
|
||||||
beams2 = ner2.predict(docs2)
|
beams2 = ner2.predict(docs2)
|
||||||
entity_scores2 = ner2.scored_ents(beams2)[0]
|
entity_scores2 = ner2.scored_ents(beams2)[0]
|
||||||
|
|
|
@ -28,6 +28,26 @@ TRAIN_DATA = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
CONFLICTING_DATA = [
|
||||||
|
(
|
||||||
|
"I like London and Berlin.",
|
||||||
|
{
|
||||||
|
"heads": [1, 1, 1, 2, 2, 1],
|
||||||
|
"deps": ["nsubj", "ROOT", "dobj", "cc", "conj", "punct"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"I like London and Berlin.",
|
||||||
|
{
|
||||||
|
"heads": [0, 0, 0, 0, 0, 0],
|
||||||
|
"deps": ["ROOT", "nsubj", "nsubj", "cc", "conj", "punct"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
eps = 0.01
|
||||||
|
|
||||||
|
|
||||||
def test_parser_root(en_vocab):
|
def test_parser_root(en_vocab):
|
||||||
words = ["i", "do", "n't", "have", "other", "assistance"]
|
words = ["i", "do", "n't", "have", "other", "assistance"]
|
||||||
heads = [3, 3, 3, 3, 5, 3]
|
heads = [3, 3, 3, 3, 5, 3]
|
||||||
|
@ -185,26 +205,31 @@ def test_parser_set_sent_starts(en_vocab):
|
||||||
assert token.head in sent
|
assert token.head in sent
|
||||||
|
|
||||||
|
|
||||||
def test_overfitting_IO():
|
@pytest.mark.parametrize("pipe_name", ["parser", "beam_parser"])
|
||||||
# Simple test to try and quickly overfit the dependency parser - ensuring the ML models work correctly
|
def test_overfitting_IO(pipe_name):
|
||||||
|
# Simple test to try and quickly overfit the dependency parser (normal or beam)
|
||||||
nlp = English()
|
nlp = English()
|
||||||
parser = nlp.add_pipe("parser")
|
parser = nlp.add_pipe(pipe_name)
|
||||||
train_examples = []
|
train_examples = []
|
||||||
for text, annotations in TRAIN_DATA:
|
for text, annotations in TRAIN_DATA:
|
||||||
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
|
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
|
||||||
for dep in annotations.get("deps", []):
|
for dep in annotations.get("deps", []):
|
||||||
parser.add_label(dep)
|
parser.add_label(dep)
|
||||||
optimizer = nlp.initialize()
|
optimizer = nlp.initialize()
|
||||||
for i in range(100):
|
# run overfitting
|
||||||
|
for i in range(150):
|
||||||
losses = {}
|
losses = {}
|
||||||
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||||
assert losses["parser"] < 0.0001
|
assert losses[pipe_name] < 0.0001
|
||||||
# test the trained model
|
# test the trained model
|
||||||
test_text = "I like securities."
|
test_text = "I like securities."
|
||||||
doc = nlp(test_text)
|
doc = nlp(test_text)
|
||||||
assert doc[0].dep_ == "nsubj"
|
assert doc[0].dep_ == "nsubj"
|
||||||
assert doc[2].dep_ == "dobj"
|
assert doc[2].dep_ == "dobj"
|
||||||
assert doc[3].dep_ == "punct"
|
assert doc[3].dep_ == "punct"
|
||||||
|
assert doc[0].head.i == 1
|
||||||
|
assert doc[2].head.i == 1
|
||||||
|
assert doc[3].head.i == 1
|
||||||
# Also test the results are still the same after IO
|
# Also test the results are still the same after IO
|
||||||
with make_tempdir() as tmp_dir:
|
with make_tempdir() as tmp_dir:
|
||||||
nlp.to_disk(tmp_dir)
|
nlp.to_disk(tmp_dir)
|
||||||
|
@ -213,6 +238,9 @@ def test_overfitting_IO():
|
||||||
assert doc2[0].dep_ == "nsubj"
|
assert doc2[0].dep_ == "nsubj"
|
||||||
assert doc2[2].dep_ == "dobj"
|
assert doc2[2].dep_ == "dobj"
|
||||||
assert doc2[3].dep_ == "punct"
|
assert doc2[3].dep_ == "punct"
|
||||||
|
assert doc2[0].head.i == 1
|
||||||
|
assert doc2[2].head.i == 1
|
||||||
|
assert doc2[3].head.i == 1
|
||||||
|
|
||||||
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
||||||
texts = [
|
texts = [
|
||||||
|
@ -226,3 +254,123 @@ def test_overfitting_IO():
|
||||||
no_batch_deps = [doc.to_array([DEP]) for doc in [nlp(text) for text in texts]]
|
no_batch_deps = [doc.to_array([DEP]) for doc in [nlp(text) for text in texts]]
|
||||||
assert_equal(batch_deps_1, batch_deps_2)
|
assert_equal(batch_deps_1, batch_deps_2)
|
||||||
assert_equal(batch_deps_1, no_batch_deps)
|
assert_equal(batch_deps_1, no_batch_deps)
|
||||||
|
|
||||||
|
|
||||||
|
def test_beam_parser_scores():
|
||||||
|
# Test that we can get confidence values out of the beam_parser pipe
|
||||||
|
beam_width = 16
|
||||||
|
beam_density = 0.0001
|
||||||
|
nlp = English()
|
||||||
|
config = {
|
||||||
|
"beam_width": beam_width,
|
||||||
|
"beam_density": beam_density,
|
||||||
|
}
|
||||||
|
parser = nlp.add_pipe("beam_parser", config=config)
|
||||||
|
train_examples = []
|
||||||
|
for text, annotations in CONFLICTING_DATA:
|
||||||
|
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
|
||||||
|
for dep in annotations.get("deps", []):
|
||||||
|
parser.add_label(dep)
|
||||||
|
optimizer = nlp.initialize()
|
||||||
|
|
||||||
|
# update a bit with conflicting data
|
||||||
|
for i in range(10):
|
||||||
|
losses = {}
|
||||||
|
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||||
|
|
||||||
|
# test the scores from the beam
|
||||||
|
test_text = "I like securities."
|
||||||
|
doc = nlp.make_doc(test_text)
|
||||||
|
docs = [doc]
|
||||||
|
beams = parser.predict(docs)
|
||||||
|
head_scores, label_scores = parser.scored_parses(beams)
|
||||||
|
|
||||||
|
for j in range(len(doc)):
|
||||||
|
for label in parser.labels:
|
||||||
|
label_score = label_scores[0][(j, label)]
|
||||||
|
assert 0 - eps <= label_score <= 1 + eps
|
||||||
|
for i in range(len(doc)):
|
||||||
|
head_score = head_scores[0][(j, i)]
|
||||||
|
assert 0 - eps <= head_score <= 1 + eps
|
||||||
|
|
||||||
|
|
||||||
|
def test_beam_overfitting_IO():
|
||||||
|
# Simple test to try and quickly overfit the Beam dependency parser
|
||||||
|
nlp = English()
|
||||||
|
beam_width = 16
|
||||||
|
beam_density = 0.0001
|
||||||
|
config = {
|
||||||
|
"beam_width": beam_width,
|
||||||
|
"beam_density": beam_density,
|
||||||
|
}
|
||||||
|
parser = nlp.add_pipe("beam_parser", config=config)
|
||||||
|
train_examples = []
|
||||||
|
for text, annotations in TRAIN_DATA:
|
||||||
|
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
|
||||||
|
for dep in annotations.get("deps", []):
|
||||||
|
parser.add_label(dep)
|
||||||
|
optimizer = nlp.initialize()
|
||||||
|
# run overfitting
|
||||||
|
for i in range(150):
|
||||||
|
losses = {}
|
||||||
|
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||||
|
assert losses["beam_parser"] < 0.0001
|
||||||
|
# test the scores from the beam
|
||||||
|
test_text = "I like securities."
|
||||||
|
docs = [nlp.make_doc(test_text)]
|
||||||
|
beams = parser.predict(docs)
|
||||||
|
head_scores, label_scores = parser.scored_parses(beams)
|
||||||
|
# we only processed one document
|
||||||
|
head_scores = head_scores[0]
|
||||||
|
label_scores = label_scores[0]
|
||||||
|
# test label annotations: 0=nsubj, 2=dobj, 3=punct
|
||||||
|
assert label_scores[(0, "nsubj")] == pytest.approx(1.0, eps)
|
||||||
|
assert label_scores[(0, "dobj")] == pytest.approx(0.0, eps)
|
||||||
|
assert label_scores[(0, "punct")] == pytest.approx(0.0, eps)
|
||||||
|
assert label_scores[(2, "nsubj")] == pytest.approx(0.0, eps)
|
||||||
|
assert label_scores[(2, "dobj")] == pytest.approx(1.0, eps)
|
||||||
|
assert label_scores[(2, "punct")] == pytest.approx(0.0, eps)
|
||||||
|
assert label_scores[(3, "nsubj")] == pytest.approx(0.0, eps)
|
||||||
|
assert label_scores[(3, "dobj")] == pytest.approx(0.0, eps)
|
||||||
|
assert label_scores[(3, "punct")] == pytest.approx(1.0, eps)
|
||||||
|
# test head annotations: the root is token at index 1
|
||||||
|
assert head_scores[(0, 0)] == pytest.approx(0.0, eps)
|
||||||
|
assert head_scores[(0, 1)] == pytest.approx(1.0, eps)
|
||||||
|
assert head_scores[(0, 2)] == pytest.approx(0.0, eps)
|
||||||
|
assert head_scores[(2, 0)] == pytest.approx(0.0, eps)
|
||||||
|
assert head_scores[(2, 1)] == pytest.approx(1.0, eps)
|
||||||
|
assert head_scores[(2, 2)] == pytest.approx(0.0, eps)
|
||||||
|
assert head_scores[(3, 0)] == pytest.approx(0.0, eps)
|
||||||
|
assert head_scores[(3, 1)] == pytest.approx(1.0, eps)
|
||||||
|
assert head_scores[(3, 2)] == pytest.approx(0.0, eps)
|
||||||
|
|
||||||
|
# Also test the results are still the same after IO
|
||||||
|
with make_tempdir() as tmp_dir:
|
||||||
|
nlp.to_disk(tmp_dir)
|
||||||
|
nlp2 = util.load_model_from_path(tmp_dir)
|
||||||
|
docs2 = [nlp2.make_doc(test_text)]
|
||||||
|
parser2 = nlp2.get_pipe("beam_parser")
|
||||||
|
beams2 = parser2.predict(docs2)
|
||||||
|
head_scores2, label_scores2 = parser2.scored_parses(beams2)
|
||||||
|
# we only processed one document
|
||||||
|
head_scores2 = head_scores2[0]
|
||||||
|
label_scores2 = label_scores2[0]
|
||||||
|
# check the results again
|
||||||
|
assert label_scores2[(0, "nsubj")] == pytest.approx(1.0, eps)
|
||||||
|
assert label_scores2[(0, "dobj")] == pytest.approx(0.0, eps)
|
||||||
|
assert label_scores2[(0, "punct")] == pytest.approx(0.0, eps)
|
||||||
|
assert label_scores2[(2, "nsubj")] == pytest.approx(0.0, eps)
|
||||||
|
assert label_scores2[(2, "dobj")] == pytest.approx(1.0, eps)
|
||||||
|
assert label_scores2[(2, "punct")] == pytest.approx(0.0, eps)
|
||||||
|
assert label_scores2[(3, "nsubj")] == pytest.approx(0.0, eps)
|
||||||
|
assert label_scores2[(3, "dobj")] == pytest.approx(0.0, eps)
|
||||||
|
assert label_scores2[(3, "punct")] == pytest.approx(1.0, eps)
|
||||||
|
assert head_scores2[(0, 0)] == pytest.approx(0.0, eps)
|
||||||
|
assert head_scores2[(0, 1)] == pytest.approx(1.0, eps)
|
||||||
|
assert head_scores2[(0, 2)] == pytest.approx(0.0, eps)
|
||||||
|
assert head_scores2[(2, 0)] == pytest.approx(0.0, eps)
|
||||||
|
assert head_scores2[(2, 1)] == pytest.approx(1.0, eps)
|
||||||
|
assert head_scores2[(2, 2)] == pytest.approx(0.0, eps)
|
||||||
|
assert head_scores2[(3, 0)] == pytest.approx(0.0, eps)
|
||||||
|
assert head_scores2[(3, 1)] == pytest.approx(1.0, eps)
|
||||||
|
assert head_scores2[(3, 2)] == pytest.approx(0.0, eps)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user