From e00bd422d9bb2cd4dbf1db04d048348b62e8eceb Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Sat, 10 Jul 2021 20:44:20 +0900 Subject: [PATCH] Fix span embeds Some of the lengths and backprop weren't right. Also various cleanup. --- spacy/ml/models/coref.py | 23 +++++++++++------------ spacy/ml/models/spancat.py | 2 +- spacy/pipeline/coref.py | 5 +---- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/spacy/ml/models/coref.py b/spacy/ml/models/coref.py index 37f6ff0ff..5d2dc9ffb 100644 --- a/spacy/ml/models/coref.py +++ b/spacy/ml/models/coref.py @@ -2,7 +2,8 @@ from dataclasses import dataclass import warnings from thinc.api import Model, Linear, Relu, Dropout -from thinc.api import chain, noop, Embed, add, tuplify +from thinc.api import chain, noop, Embed, add, tuplify, concatenate +from thinc.api import reduce_first, reduce_last, reduce_mean from thinc.types import Floats2d, Floats1d, Ints2d, Ragged from typing import List, Callable, Tuple, Any from ...tokens import Doc @@ -163,7 +164,8 @@ def span_embeddings_forward( # TODO support attention here tokvecs = xp.concatenate(tokvecs) - tokvecs_r = Ragged(tokvecs, docmenlens) + doclens = [len(doc) for doc in docs] + tokvecs_r = Ragged(tokvecs, doclens) mentions_r = Ragged(mentions, docmenlens) span_reduce = model.get_ref("span_reducer") @@ -172,16 +174,15 @@ def span_embeddings_forward( embeds = Ragged(spanvecs, docmenlens) def backprop_span_embed(dY: SpanEmbeddings) -> Tuple[List[Floats2d], List[Doc]]: + grad, idxes = span_reduce_back(dY.vectors.data) oweights = [] offset = 0 - for mlen in dY.vectors.lengths: - hi = offset + mlen - vecs = dY.vectors.data[offset:hi] - out, out_idx = span_reduce_back(vecs) - oweights.append(out.data) - + for doclen in doclens: + hi = offset + doclen + oweights.append(grad.data[offset:hi]) offset = hi + return oweights, docs return SpanEmbeddings(mentions, embeds), backprop_span_embed @@ -420,10 +421,8 @@ def pairwise_sum(ops, mention_scores: Floats1d) -> Tuple[Floats2d, Callable]: def backward(d_pwsum: Floats2d) -> Floats1d: # For the backward pass, the gradient is distributed over the whole row and # column, so pull it all in. - dim = d_pwsum.shape[0] - out = ops.alloc1f(dim) - for ii in range(dim): - out[ii] = d_pwsum[:, ii].sum() + d_pwsum[ii, :].sum() + + out = d_pwsum.sum(axis=0) + d_pwsum.sum(axis=1) return out diff --git a/spacy/ml/models/spancat.py b/spacy/ml/models/spancat.py index 5c49fef40..b3fd7bd98 100644 --- a/spacy/ml/models/spancat.py +++ b/spacy/ml/models/spancat.py @@ -25,7 +25,7 @@ def build_mean_max_reducer(hidden_size: int) -> Model[Ragged, Floats2d]: return chain( concatenate(reduce_last(), reduce_first(), reduce_mean(), reduce_max()), Maxout(nO=hidden_size, normalize=True, dropout=0.0), - ) + ) @registry.architectures.register("spacy.SpanCategorizer.v1") diff --git a/spacy/pipeline/coref.py b/spacy/pipeline/coref.py index f040e6637..3fa59ab72 100644 --- a/spacy/pipeline/coref.py +++ b/spacy/pipeline/coref.py @@ -296,7 +296,7 @@ class CoreferenceResolver(TrainablePipe): clusters = get_clusters_from_doc(example.reference) gscores = create_gold_scores(mention_idx[offset:hi], clusters) - gscores = xp.asarray(gscores) + gscores = ops.asarray2f(gscores) top_gscores = xp.take_along_axis(gscores, cidx, axis=1) # now add the placeholder gold_placeholder = ~top_gscores.any(axis=1).T @@ -311,9 +311,6 @@ class CoreferenceResolver(TrainablePipe): log_marg = ops.softmax(cscores + ops.xp.log(top_gscores), axis=1) log_norm = ops.softmax(cscores, axis=1) grad = log_norm - log_marg - # XXX might be better to not square this - loss = (grad ** 2).sum() - gradients.append((grad, cidx)) total_loss += float(loss)