From b7ac4b33e2503d73dccd12984eb917953c9325a7 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Wed, 11 May 2022 14:59:59 +0000 Subject: [PATCH 1/2] fixing arguments --- spacy/ml/models/coref.py | 8 ++++---- spacy/ml/models/span_predictor.py | 3 ++- spacy/pipeline/coref.py | 4 ++-- spacy/pipeline/span_predictor.py | 1 + 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/spacy/ml/models/coref.py b/spacy/ml/models/coref.py index 24b5500a2..5042c10da 100644 --- a/spacy/ml/models/coref.py +++ b/spacy/ml/models/coref.py @@ -16,10 +16,10 @@ def build_wl_coref_model( tok2vec: Model[List[Doc], List[Floats2d]], embedding_size: int = 20, hidden_size: int = 1024, - n_hidden_layers: int = 1, # TODO rename to "depth"? + depth: int = 1, dropout: float = 0.3, # pairs to keep per mention after rough scoring - rough_k: int = 50, + rough_candidates: int = 50, # TODO is this not a training loop setting? a_scoring_batch_size: int = 512, # span predictor embeddings @@ -38,9 +38,9 @@ def build_wl_coref_model( dim, embedding_size, hidden_size, - n_hidden_layers, + depth, dropout, - rough_k, + rough_candidates, a_scoring_batch_size, ), convert_inputs=convert_coref_scorer_inputs, diff --git a/spacy/ml/models/span_predictor.py b/spacy/ml/models/span_predictor.py index b990b4019..ea445913b 100644 --- a/spacy/ml/models/span_predictor.py +++ b/spacy/ml/models/span_predictor.py @@ -16,6 +16,7 @@ def build_span_predictor( tok2vec: Model[List[Doc], List[Floats2d]], hidden_size: int = 1024, dist_emb_size: int = 64, + prefix: str = "coref_head_clusters" ): # TODO fix this try: @@ -30,7 +31,7 @@ def build_span_predictor( convert_inputs=convert_span_predictor_inputs, ) # TODO use proper parameter for prefix - head_info = build_get_head_metadata("coref_head_clusters") + head_info = build_get_head_metadata(prefix) model = (tok2vec & head_info) >> span_predictor return model diff --git a/spacy/pipeline/coref.py b/spacy/pipeline/coref.py index 5237788cc..a8813b7a3 100644 --- a/spacy/pipeline/coref.py +++ b/spacy/pipeline/coref.py @@ -33,9 +33,9 @@ default_config = """ @architectures = "spacy.Coref.v1" embedding_size = 20 hidden_size = 1024 -n_hidden_layers = 1 +depth = 1 dropout = 0.3 -rough_k = 50 +rough_candidates = 50 a_scoring_batch_size = 512 sp_embedding_size = 64 diff --git a/spacy/pipeline/span_predictor.py b/spacy/pipeline/span_predictor.py index 50c2e4ec6..d0561054d 100644 --- a/spacy/pipeline/span_predictor.py +++ b/spacy/pipeline/span_predictor.py @@ -26,6 +26,7 @@ default_span_predictor_config = """ @architectures = "spacy.SpanPredictor.v1" hidden_size = 1024 dist_emb_size = 64 +prefix = coref_head_clusters [model.tok2vec] @architectures = "spacy.Tok2Vec.v2" From 1dc38944472d3506f0a9427f8d71163eb7550da1 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Tue, 17 May 2022 15:36:32 +0000 Subject: [PATCH 2/2] new parameters --- spacy/ml/models/span_predictor.py | 43 +++++++++++++++++++++++++------ spacy/pipeline/span_predictor.py | 5 +++- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/spacy/ml/models/span_predictor.py b/spacy/ml/models/span_predictor.py index da9c78c98..7375c2153 100644 --- a/spacy/ml/models/span_predictor.py +++ b/spacy/ml/models/span_predictor.py @@ -15,7 +15,10 @@ from .coref_util import get_sentence_ids def build_span_predictor( tok2vec: Model[List[Doc], List[Floats2d]], hidden_size: int = 1024, - dist_emb_size: int = 64, + distance_embedding_size: int = 64, + conv_channels: int = 4, + window_size: int = 1, + max_distance: int = 128, prefix: str = "coref_head_clusters" ): # TODO add model return types @@ -28,7 +31,14 @@ def build_span_predictor( with Model.define_operators({">>": chain, "&": tuplify}): span_predictor = PyTorchWrapper( - SpanPredictor(dim, hidden_size, dist_emb_size), + SpanPredictor( + dim, + hidden_size, + distance_embedding_size, + conv_channels, + window_size, + max_distance + ), convert_inputs=convert_span_predictor_inputs, ) # TODO use proper parameter for prefix @@ -123,8 +133,21 @@ def head_data_forward(model, docs, is_train): # TODO this should maybe have a different name from the component class SpanPredictor(torch.nn.Module): - def __init__(self, input_size: int, hidden_size: int, dist_emb_size: int): + def __init__( + self, + input_size: int, + hidden_size: int, + dist_emb_size: int, + conv_channels: int, + window_size: int, + max_distance: int + + ): super().__init__() + if max_distance % 2 != 0: + raise ValueError( + "max_distance has to be an even number" + ) # input size = single token size # 64 = probably distance emb size # TODO check that dist_emb_size use is correct @@ -139,12 +162,15 @@ class SpanPredictor(torch.nn.Module): # this use of dist_emb_size looks wrong but it was 64...? torch.nn.Linear(256, dist_emb_size), ) - # TODO make the Convs also parametrizeable + kernel_size = window_size * 2 + 1 self.conv = torch.nn.Sequential( - torch.nn.Conv1d(64, 4, 3, 1, 1), torch.nn.Conv1d(4, 2, 3, 1, 1) + torch.nn.Conv1d(dist_emb_size, conv_channels, kernel_size, 1, 1), + torch.nn.Conv1d(conv_channels, 2, kernel_size, 1, 1) ) # TODO make embeddings size a parameter - self.emb = torch.nn.Embedding(128, dist_emb_size) # [-63, 63] + too_far + self.max_distance = max_distance + # handle distances between +-(max_distance - 2 / 2) + self.emb = torch.nn.Embedding(max_distance, dist_emb_size) def forward( self, @@ -170,10 +196,11 @@ class SpanPredictor(torch.nn.Module): relative_positions = heads_ids.unsqueeze(1) - torch.arange( words.shape[0] ).unsqueeze(0) + md = self.max_distance # make all valid distances positive - emb_ids = relative_positions + 63 + emb_ids = relative_positions + (md - 2) // 2 # "too_far" - emb_ids[(emb_ids < 0) + (emb_ids > 126)] = 127 + emb_ids[(emb_ids < 0) + (emb_ids > md - 2)] = md - 1 # Obtain "same sentence" boolean mask: (n_heads x n_words) heads_ids = heads_ids.long() same_sent = sent_id[heads_ids].unsqueeze(1) == sent_id.unsqueeze(0) diff --git a/spacy/pipeline/span_predictor.py b/spacy/pipeline/span_predictor.py index d0561054d..12ea6611c 100644 --- a/spacy/pipeline/span_predictor.py +++ b/spacy/pipeline/span_predictor.py @@ -25,7 +25,10 @@ default_span_predictor_config = """ [model] @architectures = "spacy.SpanPredictor.v1" hidden_size = 1024 -dist_emb_size = 64 +distance_embedding_size = 64 +conv_channels = 4 +window_size = 1 +max_distance = 128 prefix = coref_head_clusters [model.tok2vec]