diff --git a/spacy/ml/models/coref.py b/spacy/ml/models/coref.py index 5d2dc9ffb..52a549509 100644 --- a/spacy/ml/models/coref.py +++ b/spacy/ml/models/coref.py @@ -89,15 +89,17 @@ def build_width_scorer(max_span_width, hidden_size, feature_embed_size=20): >> Linear(nI=hidden_size, nO=1) ) span_width_prior.initialize() - return Model( + model = Model( "WidthScorer", forward=width_score_forward, layers=[span_width_prior]) + model.set_ref("width_prior", span_width_prior) + return model def width_score_forward(model, embeds: SpanEmbeddings, is_train) -> Tuple[Floats1d, Callable]: # calculate widths, subtracting 1 so it's 0-index - w_ffnn = model.layers[0] + w_ffnn = model.get_ref("width_prior") idxs = embeds.indices widths = idxs[:,1] - idxs[:,0] - 1 wscores, width_b = w_ffnn(widths, is_train) @@ -227,6 +229,7 @@ def coarse_prune( cscores = scores[offset:hi] # negate it so highest numbers come first + # This is relatively slow but can't be skipped. tops = (model.ops.xp.argsort(-1 * cscores)).tolist() starts = spanembeds.indices[offset:hi, 0].tolist() ends = spanembeds.indices[offset:hi:, 1].tolist() @@ -298,7 +301,7 @@ def take_vecs_forward(model, inputs: SpanEmbeddings, is_train) -> Floats2d: def build_ant_scorer( bilinear, dropout, ant_limit=50 ) -> Model[Tuple[Floats1d, SpanEmbeddings], List[Floats2d]]: - return Model( + model = Model( "AntScorer", forward=ant_scorer_forward, layers=[bilinear, dropout], @@ -306,6 +309,9 @@ def build_ant_scorer( "ant_limit": ant_limit, }, ) + model.set_ref("bilinear", bilinear) + model.set_ref("dropout", dropout) + return model def ant_scorer_forward( @@ -318,13 +324,8 @@ def ant_scorer_forward( # this contains the coarse bilinear in coref-hoi # coarse bilinear is a single layer linear network # TODO make these proper refs - bilinear = model.layers[0] - dropout = model.layers[1] - - # XXX Note on dimensions: This won't work as a ragged because the floats2ds - # are not all the same dimensions. Each floats2d is a square in the size of - # the number of antecedents in the document. Actually, that will have the - # same size if antecedents are padded... Needs checking. + bilinear = model.get_ref("bilinear") + dropout = model.get_ref("dropout") mscores, sembeds = inputs vecs = sembeds.vectors # ragged @@ -362,7 +363,6 @@ def ant_scorer_forward( # now add the placeholder placeholder = ops.alloc2f(scores.shape[0], 1) top_scores = xp.concatenate( (placeholder, top_scores), 1) - #top_scores = ops.softmax(top_scores, axis=1) out.append((top_scores, top_scores_idx)) @@ -389,6 +389,7 @@ def ant_scorer_forward( offset = 0 for dy, (prod_back, pw_sum_back), ll in zip(dYscores, backprops, veclens): + hi = offset + ll dyscore, dyidx = dy # remove the placeholder dyscore = dyscore[:, 1:] @@ -398,10 +399,10 @@ def ant_scorer_forward( for ii, (ridx, rscores) in enumerate(zip(dyidx, dyscore)): fullscore[ii][ridx] = rscores - dXembeds.data[offset : offset + ll] = prod_back(fullscore) - dXscores[offset : offset + ll] = pw_sum_back(fullscore) + dXembeds.data[offset : hi] = prod_back(fullscore) + dXscores[offset : hi] = pw_sum_back(fullscore) - offset += ll + offset = hi # make it fit back into the linear dXscores = xp.expand_dims(dXscores, 1) return (dXscores, SpanEmbeddings(idxes, dXembeds))