From 1eaf8fb0cf01dec6d6a01f20e109eb21fd5f530d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A1d=C3=A1r=20=C3=81kos?= <akos@onyx.uvt.nl>
Date: Wed, 23 Mar 2022 11:24:27 +0100
Subject: [PATCH] span predictor debug start

---
 spacy/ml/models/coref.py |  9 ++++-----
 spacy/pipeline/coref.py  | 14 +++++++-------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/spacy/ml/models/coref.py b/spacy/ml/models/coref.py
index 382d7a98b..29f3ad819 100644
--- a/spacy/ml/models/coref.py
+++ b/spacy/ml/models/coref.py
@@ -91,7 +91,7 @@ def build_span_predictor(
         # TODO fix device - should be automatic
         device = "cuda:0"
         span_predictor = PyTorchWrapper(
-            SpanPredictor(hidden_size, dist_emb_size, device),
+            SpanPredictor(dim, dist_emb_size, device),
             convert_inputs=convert_span_predictor_inputs
         )
         # TODO use proper parameter for prefix
@@ -148,7 +148,6 @@ def convert_span_predictor_inputs(
     # Normally we shoudl use the input is_train, but for these two it's not relevant
     sent_ids = xp2torch(sent_ids[0], requires_grad=False)
     head_ids = xp2torch(head_ids[0], requires_grad=False)
-
     word_features = xp2torch(tok2vec[0], requires_grad=is_train)
 
     argskwargs = ArgsKwargs(args=(sent_ids, word_features, head_ids), kwargs={})
@@ -557,7 +556,6 @@ class SpanPredictor(torch.nn.Module):
         sent_id = torch.tensor(sent_id, device=words.device)
         heads_ids = heads_ids.long()
         same_sent = (sent_id[heads_ids].unsqueeze(1) == sent_id.unsqueeze(0))
-
         # To save memory, only pass candidates from one sentence for each head
         # pair_matrix contains concatenated span_head_emb + candidate_emb + distance_emb
         # for each candidate among the words in the same sentence as span_head
@@ -568,11 +566,11 @@ class SpanPredictor(torch.nn.Module):
             words[cols],
             self.emb(emb_ids[rows, cols]),
         ), dim=1)
-
+        input(len(heads_ids))
         lengths = same_sent.sum(dim=1)
         padding_mask = torch.arange(0, lengths.max().item(), device=words.device).unsqueeze(0)
         padding_mask = (padding_mask < lengths.unsqueeze(1))  # [n_heads, max_sent_len]
-
+        input(padding_mask.shape)
         # [n_heads, max_sent_len, input_size * 2 + distance_emb_size]
         # This is necessary to allow the convolution layer to look at several
         # word scores
@@ -592,6 +590,7 @@ class SpanPredictor(torch.nn.Module):
             valid_positions = torch.stack((valid_starts, valid_ends), dim=2)
             return scores + valid_positions
         return scores
+
 class DistancePairwiseEncoder(torch.nn.Module):
 
     def __init__(self, embedding_size, dropout_rate):
diff --git a/spacy/pipeline/coref.py b/spacy/pipeline/coref.py
index 54e9d8cfd..b3ced454c 100644
--- a/spacy/pipeline/coref.py
+++ b/spacy/pipeline/coref.py
@@ -3,7 +3,7 @@ import warnings
 
 from thinc.types import Floats2d, Floats3d, Ints2d
 from thinc.api import Model, Config, Optimizer, CategoricalCrossentropy
-from thinc.api import set_dropout_rate
+from thinc.api import set_dropout_rate, to_categorical
 from itertools import islice
 from statistics import mean
 
@@ -513,10 +513,8 @@ class SpanPredictor(TrainablePipe):
         total_loss = 0
 
         for eg in examples:
-            preds, backprop = self.model.begin_update([eg.predicted])
-            score_matrix, mention_idx = preds
-
-            loss, d_scores = self.get_loss([eg], score_matrix, mention_idx)
+            span_scores, backprop = self.model.begin_update([eg.predicted])
+            loss, d_scores = self.get_loss([eg], span_scores)
             total_loss += loss
             # TODO check shape here
             backprop((d_scores, mention_idx))
@@ -573,8 +571,10 @@ class SpanPredictor(TrainablePipe):
             for cluster in gold:
                 for mention in cluster:
                     starts.append(mention[0])
-                    ends.append(mention[1])
-
+                    # XXX I think this was missing here
+                    ends.append(mention[1] - 1)
+            starts = self.model.ops.xp.asarray(starts)
+            ends = self.model.ops.xp.asarray(ends)
             start_scores = span_scores[:, :, 0]
             end_scores = span_scores[:, :, 1]
             n_classes = start_scores.shape[1]