From 8f66176b2dd1196d90ab7c72b7cca5080ad98314 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Mon, 5 Jul 2021 18:17:10 +0900 Subject: [PATCH] Fix loss? This rewrites the loss to not use the Thinc crossentropy code at all. The main difference here is that the negative predictions are being masked out (= marginalized over), but negative gradient is still being reflected. I'm still not sure this is exactly right but models seem to train reliably now. --- spacy/ml/models/coref.py | 2 +- spacy/pipeline/coref.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/spacy/ml/models/coref.py b/spacy/ml/models/coref.py index 2545f7325..33c278b3d 100644 --- a/spacy/ml/models/coref.py +++ b/spacy/ml/models/coref.py @@ -394,7 +394,7 @@ def ant_scorer_forward( # now add the placeholder placeholder = ops.alloc2f(scores.shape[0], 1) top_scores = xp.concatenate( (placeholder, top_scores), 1) - top_scores = ops.softmax(top_scores, axis=1) + #top_scores = ops.softmax(top_scores, axis=1) out.append((top_scores, top_scores_idx)) diff --git a/spacy/pipeline/coref.py b/spacy/pipeline/coref.py index 2f9baaeb4..f040e6637 100644 --- a/spacy/pipeline/coref.py +++ b/spacy/pipeline/coref.py @@ -1,4 +1,5 @@ from typing import Iterable, Tuple, Optional, Dict, Callable, Any, List +import warnings from thinc.types import Floats2d, Ints2d from thinc.api import Model, Config, Optimizer, CategoricalCrossentropy @@ -305,9 +306,15 @@ class CoreferenceResolver(TrainablePipe): # boolean to float top_gscores = ops.asarray2f(top_gscores) - grad, loss = self.loss(cscores.T, top_gscores.T) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=RuntimeWarning) + log_marg = ops.softmax(cscores + ops.xp.log(top_gscores), axis=1) + log_norm = ops.softmax(cscores, axis=1) + grad = log_norm - log_marg + # XXX might be better to not square this + loss = (grad ** 2).sum() - gradients.append((grad.T, cidx)) + gradients.append((grad, cidx)) total_loss += float(loss) offset = hi