diff --git a/spacy/ml/models/coref.py b/spacy/ml/models/coref.py index 2155d489c..e77797d4a 100644 --- a/spacy/ml/models/coref.py +++ b/spacy/ml/models/coref.py @@ -387,7 +387,13 @@ def ant_scorer_forward( scores = pw_prod + pw_sum + mask - top_scores, top_scores_idx = topk(xp, scores, min(ant_limit, len(scores))) + top_limit = min(ant_limit, len(scores)) + top_scores, top_scores_idx = topk(xp, scores, top_limit) + # now add the placeholder + placeholder = ops.alloc2f(scores.shape[0], 1) + top_scores = xp.concatenate( (placeholder, top_scores), 1) + top_scores = ops.softmax(top_scores, axis=1) + out.append((top_scores, top_scores_idx)) # In the full model these scores can be further refined. In the current @@ -414,6 +420,8 @@ def ant_scorer_forward( offset = 0 for dy, (prod_back, pw_sum_back), ll in zip(dYscores, backprops, veclens): dyscore, dyidx = dy + # remove the placeholder + dyscore = dyscore[:, 1:] # the full score grid is square fullscore = ops.alloc2f(ll, ll) diff --git a/spacy/pipeline/coref.py b/spacy/pipeline/coref.py index 4caf02359..f0ae62fa9 100644 --- a/spacy/pipeline/coref.py +++ b/spacy/pipeline/coref.py @@ -142,10 +142,6 @@ class CoreferenceResolver(TrainablePipe): starts = idxs[offset:hi, 0] ends = idxs[offset:hi, 1] - # need to add the placeholder - placeholder = self.model.ops.alloc2f(cscores.shape[0], 1) - cscores = xp.concatenate((placeholder, cscores), 1) - predicted = get_predicted_clusters(xp, starts, ends, ant_idxs, cscores) clusters_by_doc.append(predicted) return clusters_by_doc @@ -291,9 +287,8 @@ class CoreferenceResolver(TrainablePipe): offset = 0 gradients = [] - loss = 0 + total_loss = 0 for example, (cscores, cidx) in zip(examples, score_matrix): - # assume cids has absolute mention ids ll = cscores.shape[0] hi = offset + ll @@ -310,20 +305,14 @@ class CoreferenceResolver(TrainablePipe): # boolean to float top_gscores = ops.asarray2f(top_gscores) - # add the placeholder to cscores - placeholder = self.model.ops.alloc2f(ll, 1) - cscores = xp.concatenate((placeholder, cscores), 1) + grad, loss = self.loss(cscores.T, top_gscores.T) - # do softmax to cscores - cscores = ops.softmax(cscores, axis=1) + gradients.append((grad.T, cidx)) + total_loss += float(loss) - diff = self.loss.get_grad(cscores.T, top_gscores.T).T - diff = diff[:, 1:] - gradients.append((diff, cidx)) + offset = hi - loss += float(self.loss.get_loss(cscores.T, top_gscores.T)) - offset += ll - return loss, gradients + return total_loss, gradients def initialize( self,