diff --git a/spacy/pipeline/_parser_internals/arc_eager.pxd b/spacy/pipeline/_parser_internals/arc_eager.pxd index 3732dd1b7..b618bc587 100644 --- a/spacy/pipeline/_parser_internals/arc_eager.pxd +++ b/spacy/pipeline/_parser_internals/arc_eager.pxd @@ -4,4 +4,4 @@ from .transition_system cimport Transition, TransitionSystem cdef class ArcEager(TransitionSystem): - pass + cdef get_arcs(self, StateC* state) diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx index 1d92efd7b..90a70b17b 100644 --- a/spacy/pipeline/_parser_internals/arc_eager.pyx +++ b/spacy/pipeline/_parser_internals/arc_eager.pyx @@ -1,6 +1,7 @@ # cython: profile=True, cdivision=True, infer_types=True from cymem.cymem cimport Pool, Address from libc.stdint cimport int32_t +from libcpp.vector cimport vector from collections import defaultdict, Counter @@ -10,9 +11,9 @@ from ...structs cimport TokenC from ...tokens.doc cimport Doc, set_children_from_heads from ...training.example cimport Example from .stateclass cimport StateClass -from ._state cimport StateC - +from ._state cimport StateC, ArcC from ...errors import Errors +from thinc.extra.search cimport Beam cdef weight_t MIN_SCORE = -90000 cdef attr_t SUBTOK_LABEL = hash_string(u'subtok') @@ -707,6 +708,28 @@ cdef class ArcEager(TransitionSystem): doc.c[i].dep = self.root_label set_children_from_heads(doc.c, 0, doc.length) + def get_beam_parses(self, Beam beam): + parses = [] + probs = beam.probs + for i in range(beam.size): + state = beam.at(i) + if state.is_final(): + prob = probs[i] + parse = [] + arcs = self.get_arcs(state) + if arcs: + for arc in arcs: + dep = arc["label"] + label = self.strings[dep] + parse.append((arc["head"], arc["child"], label)) + parses.append((prob, parse)) + return parses + + cdef get_arcs(self, StateC* state): + cdef vector[ArcC] arcs + state.get_arcs(&arcs) + return list(arcs) + def has_gold(self, Example eg, start=0, end=None): for word in eg.y[start:end]: if word.dep != 0: diff --git a/spacy/pipeline/_parser_internals/ner.pyx b/spacy/pipeline/_parser_internals/ner.pyx index d3e58e245..d0da6ff70 100644 --- a/spacy/pipeline/_parser_internals/ner.pyx +++ b/spacy/pipeline/_parser_internals/ner.pyx @@ -257,7 +257,8 @@ cdef class BiluoPushDown(TransitionSystem): parse = [] for j in range(state._ents.size()): ent = state._ents.at(j) - parse.append((ent.start, ent.end, self.strings[ent.label])) + if ent.start != -1 and ent.end != -1: + parse.append((ent.start, ent.end, self.strings[ent.label])) parses.append((prob, parse)) return parses diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx index 3399ef677..1fe29eb9b 100644 --- a/spacy/pipeline/dep_parser.pyx +++ b/spacy/pipeline/dep_parser.pyx @@ -1,4 +1,5 @@ # cython: infer_types=True, profile=True, binding=True +from collections import defaultdict from typing import Optional, Iterable from thinc.api import Model, Config @@ -258,3 +259,20 @@ cdef class DependencyParser(Parser): results.update(Scorer.score_deps(examples, "dep", **kwargs)) del results["sents_per_type"] return results + + def scored_parses(self, beams): + """Return two dictionaries with scores for each beam/doc that was processed: + one containing (i, head) keys, and another containing (i, label) keys. + """ + head_scores = [] + label_scores = [] + for beam in beams: + score_head_dict = defaultdict(float) + score_label_dict = defaultdict(float) + for score, parses in self.moves.get_beam_parses(beam): + for head, i, label in parses: + score_head_dict[(i, head)] += score + score_label_dict[(i, label)] += score + head_scores.append(score_head_dict) + label_scores.append(score_label_dict) + return head_scores, label_scores diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py index fec69bf66..d110eb11c 100644 --- a/spacy/tests/parser/test_ner.py +++ b/spacy/tests/parser/test_ner.py @@ -1,5 +1,3 @@ -from collections import defaultdict - import pytest from numpy.testing import assert_equal from spacy.attrs import ENT_IOB @@ -305,7 +303,7 @@ def test_block_ner(): @pytest.mark.parametrize("use_upper", [True, False]) def test_overfitting_IO(use_upper): - # Simple test to try and quickly overfit the NER component - ensuring the ML models work correctly + # Simple test to try and quickly overfit the NER component nlp = English() ner = nlp.add_pipe("ner", config={"model": {"use_upper": use_upper}}) train_examples = [] @@ -386,7 +384,6 @@ def test_beam_ner_scores(): test_text = "I like London." doc = nlp.make_doc(test_text) docs = [doc] - ner = nlp.get_pipe("beam_ner") beams = ner.predict(docs) entity_scores = ner.scored_ents(beams)[0] @@ -423,7 +420,6 @@ def test_beam_overfitting_IO(): # test the scores from the beam test_text = "I like London." docs = [nlp.make_doc(test_text)] - ner = nlp.get_pipe("beam_ner") beams = ner.predict(docs) entity_scores = ner.scored_ents(beams)[0] assert entity_scores[(2, 3, "LOC")] == 1.0 @@ -433,7 +429,7 @@ def test_beam_overfitting_IO(): with make_tempdir() as tmp_dir: nlp.to_disk(tmp_dir) nlp2 = util.load_model_from_path(tmp_dir) - docs2 = [nlp2(test_text)] + docs2 = [nlp2.make_doc(test_text)] ner2 = nlp2.get_pipe("beam_ner") beams2 = ner2.predict(docs2) entity_scores2 = ner2.scored_ents(beams2)[0] diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py index a914eb17a..e7728baaf 100644 --- a/spacy/tests/parser/test_parse.py +++ b/spacy/tests/parser/test_parse.py @@ -28,6 +28,26 @@ TRAIN_DATA = [ ] +CONFLICTING_DATA = [ + ( + "I like London and Berlin.", + { + "heads": [1, 1, 1, 2, 2, 1], + "deps": ["nsubj", "ROOT", "dobj", "cc", "conj", "punct"], + }, + ), + ( + "I like London and Berlin.", + { + "heads": [0, 0, 0, 0, 0, 0], + "deps": ["ROOT", "nsubj", "nsubj", "cc", "conj", "punct"], + }, + ), +] + +eps = 0.01 + + def test_parser_root(en_vocab): words = ["i", "do", "n't", "have", "other", "assistance"] heads = [3, 3, 3, 3, 5, 3] @@ -185,26 +205,31 @@ def test_parser_set_sent_starts(en_vocab): assert token.head in sent -def test_overfitting_IO(): - # Simple test to try and quickly overfit the dependency parser - ensuring the ML models work correctly +@pytest.mark.parametrize("pipe_name", ["parser", "beam_parser"]) +def test_overfitting_IO(pipe_name): + # Simple test to try and quickly overfit the dependency parser (normal or beam) nlp = English() - parser = nlp.add_pipe("parser") + parser = nlp.add_pipe(pipe_name) train_examples = [] for text, annotations in TRAIN_DATA: train_examples.append(Example.from_dict(nlp.make_doc(text), annotations)) for dep in annotations.get("deps", []): parser.add_label(dep) optimizer = nlp.initialize() - for i in range(100): + # run overfitting + for i in range(150): losses = {} nlp.update(train_examples, sgd=optimizer, losses=losses) - assert losses["parser"] < 0.0001 + assert losses[pipe_name] < 0.0001 # test the trained model test_text = "I like securities." doc = nlp(test_text) assert doc[0].dep_ == "nsubj" assert doc[2].dep_ == "dobj" assert doc[3].dep_ == "punct" + assert doc[0].head.i == 1 + assert doc[2].head.i == 1 + assert doc[3].head.i == 1 # Also test the results are still the same after IO with make_tempdir() as tmp_dir: nlp.to_disk(tmp_dir) @@ -213,6 +238,9 @@ def test_overfitting_IO(): assert doc2[0].dep_ == "nsubj" assert doc2[2].dep_ == "dobj" assert doc2[3].dep_ == "punct" + assert doc2[0].head.i == 1 + assert doc2[2].head.i == 1 + assert doc2[3].head.i == 1 # Make sure that running pipe twice, or comparing to call, always amounts to the same predictions texts = [ @@ -226,3 +254,123 @@ def test_overfitting_IO(): no_batch_deps = [doc.to_array([DEP]) for doc in [nlp(text) for text in texts]] assert_equal(batch_deps_1, batch_deps_2) assert_equal(batch_deps_1, no_batch_deps) + + +def test_beam_parser_scores(): + # Test that we can get confidence values out of the beam_parser pipe + beam_width = 16 + beam_density = 0.0001 + nlp = English() + config = { + "beam_width": beam_width, + "beam_density": beam_density, + } + parser = nlp.add_pipe("beam_parser", config=config) + train_examples = [] + for text, annotations in CONFLICTING_DATA: + train_examples.append(Example.from_dict(nlp.make_doc(text), annotations)) + for dep in annotations.get("deps", []): + parser.add_label(dep) + optimizer = nlp.initialize() + + # update a bit with conflicting data + for i in range(10): + losses = {} + nlp.update(train_examples, sgd=optimizer, losses=losses) + + # test the scores from the beam + test_text = "I like securities." + doc = nlp.make_doc(test_text) + docs = [doc] + beams = parser.predict(docs) + head_scores, label_scores = parser.scored_parses(beams) + + for j in range(len(doc)): + for label in parser.labels: + label_score = label_scores[0][(j, label)] + assert 0 - eps <= label_score <= 1 + eps + for i in range(len(doc)): + head_score = head_scores[0][(j, i)] + assert 0 - eps <= head_score <= 1 + eps + + +def test_beam_overfitting_IO(): + # Simple test to try and quickly overfit the Beam dependency parser + nlp = English() + beam_width = 16 + beam_density = 0.0001 + config = { + "beam_width": beam_width, + "beam_density": beam_density, + } + parser = nlp.add_pipe("beam_parser", config=config) + train_examples = [] + for text, annotations in TRAIN_DATA: + train_examples.append(Example.from_dict(nlp.make_doc(text), annotations)) + for dep in annotations.get("deps", []): + parser.add_label(dep) + optimizer = nlp.initialize() + # run overfitting + for i in range(150): + losses = {} + nlp.update(train_examples, sgd=optimizer, losses=losses) + assert losses["beam_parser"] < 0.0001 + # test the scores from the beam + test_text = "I like securities." + docs = [nlp.make_doc(test_text)] + beams = parser.predict(docs) + head_scores, label_scores = parser.scored_parses(beams) + # we only processed one document + head_scores = head_scores[0] + label_scores = label_scores[0] + # test label annotations: 0=nsubj, 2=dobj, 3=punct + assert label_scores[(0, "nsubj")] == pytest.approx(1.0, eps) + assert label_scores[(0, "dobj")] == pytest.approx(0.0, eps) + assert label_scores[(0, "punct")] == pytest.approx(0.0, eps) + assert label_scores[(2, "nsubj")] == pytest.approx(0.0, eps) + assert label_scores[(2, "dobj")] == pytest.approx(1.0, eps) + assert label_scores[(2, "punct")] == pytest.approx(0.0, eps) + assert label_scores[(3, "nsubj")] == pytest.approx(0.0, eps) + assert label_scores[(3, "dobj")] == pytest.approx(0.0, eps) + assert label_scores[(3, "punct")] == pytest.approx(1.0, eps) + # test head annotations: the root is token at index 1 + assert head_scores[(0, 0)] == pytest.approx(0.0, eps) + assert head_scores[(0, 1)] == pytest.approx(1.0, eps) + assert head_scores[(0, 2)] == pytest.approx(0.0, eps) + assert head_scores[(2, 0)] == pytest.approx(0.0, eps) + assert head_scores[(2, 1)] == pytest.approx(1.0, eps) + assert head_scores[(2, 2)] == pytest.approx(0.0, eps) + assert head_scores[(3, 0)] == pytest.approx(0.0, eps) + assert head_scores[(3, 1)] == pytest.approx(1.0, eps) + assert head_scores[(3, 2)] == pytest.approx(0.0, eps) + + # Also test the results are still the same after IO + with make_tempdir() as tmp_dir: + nlp.to_disk(tmp_dir) + nlp2 = util.load_model_from_path(tmp_dir) + docs2 = [nlp2.make_doc(test_text)] + parser2 = nlp2.get_pipe("beam_parser") + beams2 = parser2.predict(docs2) + head_scores2, label_scores2 = parser2.scored_parses(beams2) + # we only processed one document + head_scores2 = head_scores2[0] + label_scores2 = label_scores2[0] + # check the results again + assert label_scores2[(0, "nsubj")] == pytest.approx(1.0, eps) + assert label_scores2[(0, "dobj")] == pytest.approx(0.0, eps) + assert label_scores2[(0, "punct")] == pytest.approx(0.0, eps) + assert label_scores2[(2, "nsubj")] == pytest.approx(0.0, eps) + assert label_scores2[(2, "dobj")] == pytest.approx(1.0, eps) + assert label_scores2[(2, "punct")] == pytest.approx(0.0, eps) + assert label_scores2[(3, "nsubj")] == pytest.approx(0.0, eps) + assert label_scores2[(3, "dobj")] == pytest.approx(0.0, eps) + assert label_scores2[(3, "punct")] == pytest.approx(1.0, eps) + assert head_scores2[(0, 0)] == pytest.approx(0.0, eps) + assert head_scores2[(0, 1)] == pytest.approx(1.0, eps) + assert head_scores2[(0, 2)] == pytest.approx(0.0, eps) + assert head_scores2[(2, 0)] == pytest.approx(0.0, eps) + assert head_scores2[(2, 1)] == pytest.approx(1.0, eps) + assert head_scores2[(2, 2)] == pytest.approx(0.0, eps) + assert head_scores2[(3, 0)] == pytest.approx(0.0, eps) + assert head_scores2[(3, 1)] == pytest.approx(1.0, eps) + assert head_scores2[(3, 2)] == pytest.approx(0.0, eps)