From b7ac4b33e2503d73dccd12984eb917953c9325a7 Mon Sep 17 00:00:00 2001
From: kadarakos <kadar.akos@gmail.com>
Date: Wed, 11 May 2022 14:59:59 +0000
Subject: [PATCH 1/2] fixing arguments

---
 spacy/ml/models/coref.py          | 8 ++++----
 spacy/ml/models/span_predictor.py | 3 ++-
 spacy/pipeline/coref.py           | 4 ++--
 spacy/pipeline/span_predictor.py  | 1 +
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/spacy/ml/models/coref.py b/spacy/ml/models/coref.py
index 24b5500a2..5042c10da 100644
--- a/spacy/ml/models/coref.py
+++ b/spacy/ml/models/coref.py
@@ -16,10 +16,10 @@ def build_wl_coref_model(
     tok2vec: Model[List[Doc], List[Floats2d]],
     embedding_size: int = 20,
     hidden_size: int = 1024,
-    n_hidden_layers: int = 1,  # TODO rename to "depth"?
+    depth: int = 1,
     dropout: float = 0.3,
     # pairs to keep per mention after rough scoring
-    rough_k: int = 50,
+    rough_candidates: int = 50,
     # TODO is this not a training loop setting?
     a_scoring_batch_size: int = 512,
     # span predictor embeddings
@@ -38,9 +38,9 @@ def build_wl_coref_model(
                 dim,
                 embedding_size,
                 hidden_size,
-                n_hidden_layers,
+                depth,
                 dropout,
-                rough_k,
+                rough_candidates,
                 a_scoring_batch_size,
             ),
             convert_inputs=convert_coref_scorer_inputs,
diff --git a/spacy/ml/models/span_predictor.py b/spacy/ml/models/span_predictor.py
index b990b4019..ea445913b 100644
--- a/spacy/ml/models/span_predictor.py
+++ b/spacy/ml/models/span_predictor.py
@@ -16,6 +16,7 @@ def build_span_predictor(
     tok2vec: Model[List[Doc], List[Floats2d]],
     hidden_size: int = 1024,
     dist_emb_size: int = 64,
+    prefix: str = "coref_head_clusters"
 ):
     # TODO fix this
     try:
@@ -30,7 +31,7 @@ def build_span_predictor(
             convert_inputs=convert_span_predictor_inputs,
         )
         # TODO use proper parameter for prefix
-        head_info = build_get_head_metadata("coref_head_clusters")
+        head_info = build_get_head_metadata(prefix)
         model = (tok2vec & head_info) >> span_predictor
 
     return model
diff --git a/spacy/pipeline/coref.py b/spacy/pipeline/coref.py
index 5237788cc..a8813b7a3 100644
--- a/spacy/pipeline/coref.py
+++ b/spacy/pipeline/coref.py
@@ -33,9 +33,9 @@ default_config = """
 @architectures = "spacy.Coref.v1"
 embedding_size = 20
 hidden_size = 1024
-n_hidden_layers = 1
+depth = 1
 dropout = 0.3
-rough_k = 50
+rough_candidates = 50
 a_scoring_batch_size = 512
 sp_embedding_size = 64
 
diff --git a/spacy/pipeline/span_predictor.py b/spacy/pipeline/span_predictor.py
index 50c2e4ec6..d0561054d 100644
--- a/spacy/pipeline/span_predictor.py
+++ b/spacy/pipeline/span_predictor.py
@@ -26,6 +26,7 @@ default_span_predictor_config = """
 @architectures = "spacy.SpanPredictor.v1"
 hidden_size = 1024
 dist_emb_size = 64
+prefix = coref_head_clusters
 
 [model.tok2vec]
 @architectures = "spacy.Tok2Vec.v2"

From 1dc38944472d3506f0a9427f8d71163eb7550da1 Mon Sep 17 00:00:00 2001
From: kadarakos <kadar.akos@gmail.com>
Date: Tue, 17 May 2022 15:36:32 +0000
Subject: [PATCH 2/2] new parameters

---
 spacy/ml/models/span_predictor.py | 43 +++++++++++++++++++++++++------
 spacy/pipeline/span_predictor.py  |  5 +++-
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/spacy/ml/models/span_predictor.py b/spacy/ml/models/span_predictor.py
index da9c78c98..7375c2153 100644
--- a/spacy/ml/models/span_predictor.py
+++ b/spacy/ml/models/span_predictor.py
@@ -15,7 +15,10 @@ from .coref_util import get_sentence_ids
 def build_span_predictor(
     tok2vec: Model[List[Doc], List[Floats2d]],
     hidden_size: int = 1024,
-    dist_emb_size: int = 64,
+    distance_embedding_size: int = 64,
+    conv_channels: int = 4,
+    window_size: int = 1,
+    max_distance: int = 128,
     prefix: str = "coref_head_clusters"
 ):
     # TODO add model return types
@@ -28,7 +31,14 @@ def build_span_predictor(
 
     with Model.define_operators({">>": chain, "&": tuplify}):
         span_predictor = PyTorchWrapper(
-            SpanPredictor(dim, hidden_size, dist_emb_size),
+            SpanPredictor(
+                dim,
+                hidden_size,
+                distance_embedding_size,
+                conv_channels,
+                window_size,
+                max_distance
+            ),
             convert_inputs=convert_span_predictor_inputs,
         )
         # TODO use proper parameter for prefix
@@ -123,8 +133,21 @@ def head_data_forward(model, docs, is_train):
 
 # TODO this should maybe have a different name from the component
 class SpanPredictor(torch.nn.Module):
-    def __init__(self, input_size: int, hidden_size: int, dist_emb_size: int):
+    def __init__(
+            self,
+            input_size: int,
+            hidden_size: int,
+            dist_emb_size: int,
+            conv_channels: int,
+            window_size: int,
+            max_distance: int
+
+    ):
         super().__init__()
+        if max_distance % 2 != 0:
+            raise ValueError(
+                "max_distance has to be an even number"
+            )
         # input size = single token size
         # 64 = probably distance emb size
         # TODO check that dist_emb_size use is correct
@@ -139,12 +162,15 @@ class SpanPredictor(torch.nn.Module):
             # this use of dist_emb_size looks wrong but it was 64...?
             torch.nn.Linear(256, dist_emb_size),
         )
-        # TODO make the Convs also parametrizeable
+        kernel_size = window_size * 2 + 1
         self.conv = torch.nn.Sequential(
-            torch.nn.Conv1d(64, 4, 3, 1, 1), torch.nn.Conv1d(4, 2, 3, 1, 1)
+            torch.nn.Conv1d(dist_emb_size, conv_channels, kernel_size, 1, 1),
+            torch.nn.Conv1d(conv_channels, 2, kernel_size, 1, 1)
         )
         # TODO make embeddings size a parameter
-        self.emb = torch.nn.Embedding(128, dist_emb_size)  # [-63, 63] + too_far
+        self.max_distance = max_distance
+        # handle distances between +-(max_distance - 2 / 2)
+        self.emb = torch.nn.Embedding(max_distance, dist_emb_size)
 
     def forward(
         self,
@@ -170,10 +196,11 @@ class SpanPredictor(torch.nn.Module):
         relative_positions = heads_ids.unsqueeze(1) - torch.arange(
             words.shape[0]
         ).unsqueeze(0)
+        md = self.max_distance
         # make all valid distances positive
-        emb_ids = relative_positions + 63
+        emb_ids = relative_positions + (md - 2) // 2
         # "too_far"
-        emb_ids[(emb_ids < 0) + (emb_ids > 126)] = 127
+        emb_ids[(emb_ids < 0) + (emb_ids > md - 2)] = md - 1
         # Obtain "same sentence" boolean mask: (n_heads x n_words)
         heads_ids = heads_ids.long()
         same_sent = sent_id[heads_ids].unsqueeze(1) == sent_id.unsqueeze(0)
diff --git a/spacy/pipeline/span_predictor.py b/spacy/pipeline/span_predictor.py
index d0561054d..12ea6611c 100644
--- a/spacy/pipeline/span_predictor.py
+++ b/spacy/pipeline/span_predictor.py
@@ -25,7 +25,10 @@ default_span_predictor_config = """
 [model]
 @architectures = "spacy.SpanPredictor.v1"
 hidden_size = 1024
-dist_emb_size = 64
+distance_embedding_size = 64
+conv_channels = 4
+window_size = 1
+max_distance = 128
 prefix = coref_head_clusters
 
 [model.tok2vec]