Work on parser. 15 tests failing

2025-08-02 19:30:19 +03:00 · 2021-10-27 23:02:29 +02:00 · 2021-10-27 23:02:29 +02:00 · 880182afdb
commit 880182afdb
parent af9a30b192
3 changed files with 30 additions and 53 deletions
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@ -42,7 +42,7 @@ def TransitionModel(
            "nO": None,  # Output size
            "nP": maxout_pieces,
            "nH": hidden_width,
-            "nI": tok2vec.maybe_get_dim("nO"),
+            "nI": tok2vec_projected.maybe_get_dim("nO"),
            "nF": state_tokens,
        },
        attrs={
@ -69,6 +69,9 @@ def resize_output(model: Model, new_nO: int) -> Model:
        new_b[:old_nO] = old_b  # type: ignore
        for i in range(old_nO, new_nO):
            model.attrs["unseen_classes"].add(i)
+        model.set_param("upper_W", new_W)
+        model.set_param("upper_b", new_b)
+    model.set_dim("nO", new_nO, force=True)
    return model


@ -167,9 +170,8 @@ def forward(model, docs_moves: Tuple[List[Doc], TransitionSystem], is_train: boo
                if (d_scores[:, clas] < 0).any():
                    model.attrs["unseen_classes"].remove(clas)
        d_scores *= unseen_mask
-        ids = ops.xp.concatenate(all_ids)
-        statevecs = ops.xp.concatenate(all_statevecs)
-        which = ops.xp.concatenate(all_which)
+        statevecs = ops.xp.vstack(all_statevecs)
+        which = ops.xp.vstack(all_which)
        # Calculate the gradients for the parameters of the upper layer.
        model.inc_grad("upper_b", d_scores.sum(axis=0))
        model.inc_grad("upper_W", model.ops.gemm(d_scores, statevecs, trans1=True))
@ -178,8 +180,12 @@ def forward(model, docs_moves: Tuple[List[Doc], TransitionSystem], is_train: boo
        # Backprop through the maxout activation
        d_preacts = model.ops.backprop_maxout(d_statevecs, which, model.get_dim("nP"))
        # We don't need to backprop the summation, because we pass back the IDs instead
-        d_tokvecs = backprop_feats((d_preacts, ids))
-        return (backprop_tok2vec(d_tokvecs), None)
+        d_state_features = backprop_feats((d_preacts, all_ids))
+        ids1d = model.ops.xp.vstack(all_ids).flatten()
+        d_state_features = d_state_features.reshape((ids1d.size, -1))
+        d_tokvecs = model.ops.alloc((tokvecs.shape[0] + 1, tokvecs.shape[1]))
+        model.ops.scatter_add(d_tokvecs, ids1d, d_state_features)
+        return (backprop_tok2vec(d_tokvecs[:-1]), None)

    return (states, all_scores), backprop_parser

@ -200,6 +206,7 @@ def _forward_precomputable_affine(model, X: Floats2d, is_train: bool):
    nH = model.get_dim("nH")
    nP = model.get_dim("nP")
    nI = model.get_dim("nI")
+    assert X.shape == (X.shape[0], nI), X.shape
    Yf_ = model.ops.gemm(X, model.ops.reshape2f(W, nF * nH * nP, nI), trans2=True)
    Yf = model.ops.reshape4f(Yf_, Yf_.shape[0], nF, nH, nP)
    Yf = model.ops.xp.vstack((Yf, pad))
@ -226,19 +233,13 @@ def _forward_precomputable_affine(model, X: Floats2d, is_train: bool):
        model.inc_grad(
            "lower_pad", _backprop_precomputable_affine_padding(model, dY, ids)
        )
-        print("X", X.shape)
-        print("ids", ids.shape)
-        print("dims", "nF", "nI")
-        print("X[ids]", X[ids].shape)
-        Xf = model.ops.reshape2f(X[ids], ids.shape[0], nF * nI)
-
        model.inc_grad("lower_b", dY.sum(axis=0))  # type: ignore
        dY = model.ops.reshape2f(dY, dY.shape[0], nH * nP)
-
        Wopfi = W.transpose((1, 2, 0, 3))
        Wopfi = Wopfi.reshape((nH * nP, nF * nI))
        dXf = model.ops.gemm(dY.reshape((dY.shape[0], nH * nP)), Wopfi)
-
+        ids1d = model.ops.xp.vstack(ids).flatten()
+        Xf = model.ops.reshape2f(X[ids1d], -1, nF * nI)
        dWopfi = model.ops.gemm(dY, Xf, trans1=True)
        dWopfi = dWopfi.reshape((nH, nP, nF, nI))
        # (o, p, f, i) --> (f, o, p, i)
@ -250,6 +251,7 @@ def _forward_precomputable_affine(model, X: Floats2d, is_train: bool):


 def _backprop_precomputable_affine_padding(model, dY, ids):
+    ids = model.ops.xp.vstack(ids)
    nB = dY.shape[0]
    nF = model.get_dim("nF")
    nP = model.get_dim("nP")
--- a/spacy/pipeline/transition_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@ -263,6 +263,7 @@ class Parser(TrainablePipe):
        best_costs = costs.min(axis=1, keepdims=True)
        gscores = scores.copy()
        min_score = scores.min()
+        assert costs.shape == scores.shape, (costs.shape, scores.shape)
        gscores[costs > best_costs] = min_score
        max_ = scores.max(axis=1, keepdims=True)
        gmax = gscores.max(axis=1, keepdims=True)
--- a/spacy/tests/serialize/test_serialize_config.py
+++ b/spacy/tests/serialize/test_serialize_config.py
@ -120,33 +120,11 @@ width = ${components.tok2vec.model.width}

 parser_config_string_upper = """
 [model]
-@architectures = "spacy.TransitionBasedParser.v2"
+@architectures = "spacy.TransitionBasedParser.v3"
 state_type = "parser"
 extra_state_tokens = false
 hidden_width = 66
 maxout_pieces = 2
-use_upper = true
-
-[model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
-pretrained_vectors = null
-width = 333
-depth = 4
-embed_size = 5555
-window_size = 1
-maxout_pieces = 7
-subword_features = false
-"""
-
-
-parser_config_string_no_upper = """
-[model]
-@architectures = "spacy.TransitionBasedParser.v2"
-state_type = "parser"
-extra_state_tokens = false
-hidden_width = 66
-maxout_pieces = 2
-use_upper = false

 [model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1"
@ -177,7 +155,6 @@ def my_parser():
        extra_state_tokens=True,
        hidden_width=65,
        maxout_pieces=5,
-        use_upper=True,
    )
    return parser

@ -264,15 +241,14 @@ def test_serialize_custom_nlp():
        nlp.to_disk(d)
        nlp2 = spacy.load(d)
        model = nlp2.get_pipe("parser").model
-        model.get_ref("tok2vec")
-        # check that we have the correct settings, not the default ones
-        assert model.get_ref("upper").get_dim("nI") == 65
-        assert model.get_ref("lower").get_dim("nI") == 65
+        assert model.get_ref("tok2vec") is not None
+        assert model.has_param("lower_W")
+        assert model.has_param("upper_W")
+        assert model.has_param("lower_b")
+        assert model.has_param("upper_b")


-@pytest.mark.parametrize(
-    "parser_config_string", [parser_config_string_upper, parser_config_string_no_upper]
-)
+@pytest.mark.parametrize("parser_config_string", [parser_config_string_upper])
 def test_serialize_parser(parser_config_string):
    """ Create a non-default parser config to check nlp serializes it correctly """
    nlp = English()
@ -285,11 +261,11 @@ def test_serialize_parser(parser_config_string):
        nlp.to_disk(d)
        nlp2 = spacy.load(d)
        model = nlp2.get_pipe("parser").model
-        model.get_ref("tok2vec")
-        # check that we have the correct settings, not the default ones
-        if model.attrs["has_upper"]:
-            assert model.get_ref("upper").get_dim("nI") == 66
-        assert model.get_ref("lower").get_dim("nI") == 66
+        assert model.get_ref("tok2vec") is not None
+        assert model.has_param("lower_W")
+        assert model.has_param("upper_W")
+        assert model.has_param("lower_b")
+        assert model.has_param("upper_b")


 def test_config_nlp_roundtrip():
@ -436,9 +412,7 @@ def test_config_auto_fill_extra_fields():
    load_model_from_config(nlp.config)


-@pytest.mark.parametrize(
-    "parser_config_string", [parser_config_string_upper, parser_config_string_no_upper]
-)
+@pytest.mark.parametrize("parser_config_string", [parser_config_string_upper])
 def test_config_validate_literal(parser_config_string):
    nlp = English()
    config = Config().from_str(parser_config_string)