From 880182afdbaf9c85e33238d31ab862656c9cf00f Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 27 Oct 2021 23:02:29 +0200 Subject: [PATCH] Work on parser. 15 tests failing --- spacy/ml/tb_framework.py | 30 ++++++----- spacy/pipeline/transition_parser.pyx | 1 + .../tests/serialize/test_serialize_config.py | 52 +++++-------------- 3 files changed, 30 insertions(+), 53 deletions(-) diff --git a/spacy/ml/tb_framework.py b/spacy/ml/tb_framework.py index 906884e87..207f4bd5d 100644 --- a/spacy/ml/tb_framework.py +++ b/spacy/ml/tb_framework.py @@ -42,7 +42,7 @@ def TransitionModel( "nO": None, # Output size "nP": maxout_pieces, "nH": hidden_width, - "nI": tok2vec.maybe_get_dim("nO"), + "nI": tok2vec_projected.maybe_get_dim("nO"), "nF": state_tokens, }, attrs={ @@ -69,6 +69,9 @@ def resize_output(model: Model, new_nO: int) -> Model: new_b[:old_nO] = old_b # type: ignore for i in range(old_nO, new_nO): model.attrs["unseen_classes"].add(i) + model.set_param("upper_W", new_W) + model.set_param("upper_b", new_b) + model.set_dim("nO", new_nO, force=True) return model @@ -167,9 +170,8 @@ def forward(model, docs_moves: Tuple[List[Doc], TransitionSystem], is_train: boo if (d_scores[:, clas] < 0).any(): model.attrs["unseen_classes"].remove(clas) d_scores *= unseen_mask - ids = ops.xp.concatenate(all_ids) - statevecs = ops.xp.concatenate(all_statevecs) - which = ops.xp.concatenate(all_which) + statevecs = ops.xp.vstack(all_statevecs) + which = ops.xp.vstack(all_which) # Calculate the gradients for the parameters of the upper layer. model.inc_grad("upper_b", d_scores.sum(axis=0)) model.inc_grad("upper_W", model.ops.gemm(d_scores, statevecs, trans1=True)) @@ -178,8 +180,12 @@ def forward(model, docs_moves: Tuple[List[Doc], TransitionSystem], is_train: boo # Backprop through the maxout activation d_preacts = model.ops.backprop_maxout(d_statevecs, which, model.get_dim("nP")) # We don't need to backprop the summation, because we pass back the IDs instead - d_tokvecs = backprop_feats((d_preacts, ids)) - return (backprop_tok2vec(d_tokvecs), None) + d_state_features = backprop_feats((d_preacts, all_ids)) + ids1d = model.ops.xp.vstack(all_ids).flatten() + d_state_features = d_state_features.reshape((ids1d.size, -1)) + d_tokvecs = model.ops.alloc((tokvecs.shape[0] + 1, tokvecs.shape[1])) + model.ops.scatter_add(d_tokvecs, ids1d, d_state_features) + return (backprop_tok2vec(d_tokvecs[:-1]), None) return (states, all_scores), backprop_parser @@ -200,6 +206,7 @@ def _forward_precomputable_affine(model, X: Floats2d, is_train: bool): nH = model.get_dim("nH") nP = model.get_dim("nP") nI = model.get_dim("nI") + assert X.shape == (X.shape[0], nI), X.shape Yf_ = model.ops.gemm(X, model.ops.reshape2f(W, nF * nH * nP, nI), trans2=True) Yf = model.ops.reshape4f(Yf_, Yf_.shape[0], nF, nH, nP) Yf = model.ops.xp.vstack((Yf, pad)) @@ -226,19 +233,13 @@ def _forward_precomputable_affine(model, X: Floats2d, is_train: bool): model.inc_grad( "lower_pad", _backprop_precomputable_affine_padding(model, dY, ids) ) - print("X", X.shape) - print("ids", ids.shape) - print("dims", "nF", "nI") - print("X[ids]", X[ids].shape) - Xf = model.ops.reshape2f(X[ids], ids.shape[0], nF * nI) - model.inc_grad("lower_b", dY.sum(axis=0)) # type: ignore dY = model.ops.reshape2f(dY, dY.shape[0], nH * nP) - Wopfi = W.transpose((1, 2, 0, 3)) Wopfi = Wopfi.reshape((nH * nP, nF * nI)) dXf = model.ops.gemm(dY.reshape((dY.shape[0], nH * nP)), Wopfi) - + ids1d = model.ops.xp.vstack(ids).flatten() + Xf = model.ops.reshape2f(X[ids1d], -1, nF * nI) dWopfi = model.ops.gemm(dY, Xf, trans1=True) dWopfi = dWopfi.reshape((nH, nP, nF, nI)) # (o, p, f, i) --> (f, o, p, i) @@ -250,6 +251,7 @@ def _forward_precomputable_affine(model, X: Floats2d, is_train: bool): def _backprop_precomputable_affine_padding(model, dY, ids): + ids = model.ops.xp.vstack(ids) nB = dY.shape[0] nF = model.get_dim("nF") nP = model.get_dim("nP") diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx index d9135b5d4..047805239 100644 --- a/spacy/pipeline/transition_parser.pyx +++ b/spacy/pipeline/transition_parser.pyx @@ -263,6 +263,7 @@ class Parser(TrainablePipe): best_costs = costs.min(axis=1, keepdims=True) gscores = scores.copy() min_score = scores.min() + assert costs.shape == scores.shape, (costs.shape, scores.shape) gscores[costs > best_costs] = min_score max_ = scores.max(axis=1, keepdims=True) gmax = gscores.max(axis=1, keepdims=True) diff --git a/spacy/tests/serialize/test_serialize_config.py b/spacy/tests/serialize/test_serialize_config.py index 6709defb8..ef650d7cd 100644 --- a/spacy/tests/serialize/test_serialize_config.py +++ b/spacy/tests/serialize/test_serialize_config.py @@ -120,33 +120,11 @@ width = ${components.tok2vec.model.width} parser_config_string_upper = """ [model] -@architectures = "spacy.TransitionBasedParser.v2" +@architectures = "spacy.TransitionBasedParser.v3" state_type = "parser" extra_state_tokens = false hidden_width = 66 maxout_pieces = 2 -use_upper = true - -[model.tok2vec] -@architectures = "spacy.HashEmbedCNN.v1" -pretrained_vectors = null -width = 333 -depth = 4 -embed_size = 5555 -window_size = 1 -maxout_pieces = 7 -subword_features = false -""" - - -parser_config_string_no_upper = """ -[model] -@architectures = "spacy.TransitionBasedParser.v2" -state_type = "parser" -extra_state_tokens = false -hidden_width = 66 -maxout_pieces = 2 -use_upper = false [model.tok2vec] @architectures = "spacy.HashEmbedCNN.v1" @@ -177,7 +155,6 @@ def my_parser(): extra_state_tokens=True, hidden_width=65, maxout_pieces=5, - use_upper=True, ) return parser @@ -264,15 +241,14 @@ def test_serialize_custom_nlp(): nlp.to_disk(d) nlp2 = spacy.load(d) model = nlp2.get_pipe("parser").model - model.get_ref("tok2vec") - # check that we have the correct settings, not the default ones - assert model.get_ref("upper").get_dim("nI") == 65 - assert model.get_ref("lower").get_dim("nI") == 65 + assert model.get_ref("tok2vec") is not None + assert model.has_param("lower_W") + assert model.has_param("upper_W") + assert model.has_param("lower_b") + assert model.has_param("upper_b") -@pytest.mark.parametrize( - "parser_config_string", [parser_config_string_upper, parser_config_string_no_upper] -) +@pytest.mark.parametrize("parser_config_string", [parser_config_string_upper]) def test_serialize_parser(parser_config_string): """ Create a non-default parser config to check nlp serializes it correctly """ nlp = English() @@ -285,11 +261,11 @@ def test_serialize_parser(parser_config_string): nlp.to_disk(d) nlp2 = spacy.load(d) model = nlp2.get_pipe("parser").model - model.get_ref("tok2vec") - # check that we have the correct settings, not the default ones - if model.attrs["has_upper"]: - assert model.get_ref("upper").get_dim("nI") == 66 - assert model.get_ref("lower").get_dim("nI") == 66 + assert model.get_ref("tok2vec") is not None + assert model.has_param("lower_W") + assert model.has_param("upper_W") + assert model.has_param("lower_b") + assert model.has_param("upper_b") def test_config_nlp_roundtrip(): @@ -436,9 +412,7 @@ def test_config_auto_fill_extra_fields(): load_model_from_config(nlp.config) -@pytest.mark.parametrize( - "parser_config_string", [parser_config_string_upper, parser_config_string_no_upper] -) +@pytest.mark.parametrize("parser_config_string", [parser_config_string_upper]) def test_config_validate_literal(parser_config_string): nlp = English() config = Config().from_str(parser_config_string)