mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-19 12:42:20 +03:00
Add width prior feature
Not necessary for convergence, but in coref-hoi this seems to add a few f1 points. Note that there are two width-related features in coref-hoi. This is a "prior" that is added to mention scores. The other width related feature is appended to the span embedding representation for other layers to reference.
This commit is contained in:
parent
8f66176b2d
commit
13bef2ddb6
|
@ -1,7 +1,7 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from thinc.api import Model, Linear, Relu, Dropout, chain, noop
|
from thinc.api import Model, Linear, Relu, Dropout, chain, noop, Embed, add
|
||||||
from thinc.types import Floats2d, Floats1d, Ints2d, Ragged
|
from thinc.types import Floats2d, Floats1d, Ints2d, Ragged
|
||||||
from typing import List, Callable, Tuple, Any
|
from typing import List, Callable, Tuple, Any
|
||||||
from ...tokens import Doc
|
from ...tokens import Doc
|
||||||
|
@ -27,7 +27,7 @@ def build_coref(
|
||||||
|
|
||||||
span_embedder = build_span_embedder(get_mentions, max_span_width)
|
span_embedder = build_span_embedder(get_mentions, max_span_width)
|
||||||
|
|
||||||
with Model.define_operators({">>": chain, "&": tuplify}):
|
with Model.define_operators({">>": chain, "&": tuplify, "+": add}):
|
||||||
|
|
||||||
mention_scorer = (
|
mention_scorer = (
|
||||||
Linear(nI=dim, nO=hidden)
|
Linear(nI=dim, nO=hidden)
|
||||||
|
@ -37,10 +37,14 @@ def build_coref(
|
||||||
)
|
)
|
||||||
mention_scorer.initialize()
|
mention_scorer.initialize()
|
||||||
|
|
||||||
|
#TODO make feature_embed_size a param
|
||||||
|
feature_embed_size = 20
|
||||||
|
width_scorer = build_width_scorer(max_span_width, hidden, feature_embed_size)
|
||||||
|
|
||||||
bilinear = Linear(nI=dim, nO=dim) >> Dropout(dropout)
|
bilinear = Linear(nI=dim, nO=dim) >> Dropout(dropout)
|
||||||
bilinear.initialize()
|
bilinear.initialize()
|
||||||
|
|
||||||
ms = build_take_vecs() >> mention_scorer
|
ms = (build_take_vecs() >> mention_scorer) + width_scorer
|
||||||
|
|
||||||
model = (
|
model = (
|
||||||
(tok2vec & noop())
|
(tok2vec & noop())
|
||||||
|
@ -129,6 +133,38 @@ class SpanEmbeddings:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
def build_width_scorer(max_span_width, hidden_size, feature_embed_size=20):
|
||||||
|
span_width_prior = (
|
||||||
|
Embed(nV=max_span_width, nO=feature_embed_size)
|
||||||
|
>> Linear(nI=feature_embed_size, nO=hidden_size)
|
||||||
|
>> Relu(nI=hidden_size, nO=hidden_size)
|
||||||
|
>> Dropout()
|
||||||
|
>> Linear(nI=hidden_size, nO=1)
|
||||||
|
)
|
||||||
|
span_width_prior.initialize()
|
||||||
|
return Model(
|
||||||
|
"WidthScorer",
|
||||||
|
forward=width_score_forward,
|
||||||
|
layers=[span_width_prior])
|
||||||
|
|
||||||
|
|
||||||
|
def width_score_forward(model, embeds: SpanEmbeddings, is_train) -> Tuple[Floats1d, Callable]:
|
||||||
|
# calculate widths, subtracting 1 so it's 0-index
|
||||||
|
w_ffnn = model.layers[0]
|
||||||
|
idxs = embeds.indices
|
||||||
|
widths = idxs[:,1] - idxs[:,0] - 1
|
||||||
|
wscores, width_b = w_ffnn(widths, is_train)
|
||||||
|
|
||||||
|
lens = embeds.vectors.lengths
|
||||||
|
|
||||||
|
def width_score_backward(d_score: Floats1d) -> SpanEmbeddings:
|
||||||
|
|
||||||
|
dX = width_b(d_score)
|
||||||
|
vecs = Ragged(dX, lens)
|
||||||
|
return SpanEmbeddings(idxs, vecs)
|
||||||
|
|
||||||
|
return wscores, width_score_backward
|
||||||
|
|
||||||
# model converting a Doc/Mention to span embeddings
|
# model converting a Doc/Mention to span embeddings
|
||||||
# get_mentions: Callable[Doc, Pairs[int]]
|
# get_mentions: Callable[Doc, Pairs[int]]
|
||||||
def build_span_embedder(
|
def build_span_embedder(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user