Add width prior feature

Not necessary for convergence, but in coref-hoi this seems to add a few f1 points. Note that there are two width-related features in coref-hoi. This is a "prior" that is added to mention scores. The other width related feature is appended to the span embedding representation for other layers to reference.
2025-09-18 18:12:45 +03:00 · 2021-07-05 21:06:28 +09:00 · 2021-07-05 21:06:28 +09:00 · 13bef2ddb6
commit 13bef2ddb6
parent 8f66176b2d
1 changed files with 39 additions and 3 deletions
--- a/spacy/ml/models/coref.py
+++ b/spacy/ml/models/coref.py
@ -1,7 +1,7 @@
 from dataclasses import dataclass
 import warnings

-from thinc.api import Model, Linear, Relu, Dropout, chain, noop
+from thinc.api import Model, Linear, Relu, Dropout, chain, noop, Embed, add
 from thinc.types import Floats2d, Floats1d, Ints2d, Ragged
 from typing import List, Callable, Tuple, Any
 from ...tokens import Doc
@ -27,7 +27,7 @@ def build_coref(

    span_embedder = build_span_embedder(get_mentions, max_span_width)

-    with Model.define_operators({">>": chain, "&": tuplify}):
+    with Model.define_operators({">>": chain, "&": tuplify, "+": add}):

        mention_scorer = (
            Linear(nI=dim, nO=hidden)
@ -37,10 +37,14 @@ def build_coref(
        )
        mention_scorer.initialize()

+        #TODO make feature_embed_size a param
+        feature_embed_size = 20
+        width_scorer = build_width_scorer(max_span_width, hidden, feature_embed_size)
+
        bilinear = Linear(nI=dim, nO=dim) >> Dropout(dropout)
        bilinear.initialize()

-        ms = build_take_vecs() >> mention_scorer
+        ms = (build_take_vecs() >> mention_scorer) + width_scorer

        model = (
            (tok2vec & noop())
@ -129,6 +133,38 @@ class SpanEmbeddings:
        return self


+def build_width_scorer(max_span_width, hidden_size, feature_embed_size=20):
+    span_width_prior = (
+        Embed(nV=max_span_width, nO=feature_embed_size)
+        >> Linear(nI=feature_embed_size, nO=hidden_size)
+        >> Relu(nI=hidden_size, nO=hidden_size)
+        >> Dropout()
+        >> Linear(nI=hidden_size, nO=1)
+    )
+    span_width_prior.initialize()
+    return Model(
+            "WidthScorer",
+            forward=width_score_forward,
+            layers=[span_width_prior])
+
+
+def width_score_forward(model, embeds: SpanEmbeddings, is_train) -> Tuple[Floats1d, Callable]:
+    # calculate widths, subtracting 1 so it's 0-index
+    w_ffnn = model.layers[0]
+    idxs = embeds.indices
+    widths = idxs[:,1] - idxs[:,0] - 1
+    wscores, width_b = w_ffnn(widths, is_train)
+
+    lens = embeds.vectors.lengths
+
+    def width_score_backward(d_score: Floats1d) -> SpanEmbeddings:
+
+        dX = width_b(d_score)
+        vecs = Ragged(dX, lens)
+        return SpanEmbeddings(idxs, vecs)
+
+    return wscores, width_score_backward
+
 # model converting a Doc/Mention to span embeddings
 # get_mentions: Callable[Doc, Pairs[int]]
 def build_span_embedder(