Fix loss?

This rewrites the loss to not use the Thinc crossentropy code at all.
The main difference here is that the negative predictions are being
masked out (= marginalized over), but negative gradient is still being
reflected.

I'm still not sure this is exactly right but models seem to train
reliably now.
This commit is contained in:
Paul O'Leary McCann 2021-07-05 18:17:10 +09:00
parent 5db28ec2fd
commit 8f66176b2d
2 changed files with 10 additions and 3 deletions

View File

@ -394,7 +394,7 @@ def ant_scorer_forward(
# now add the placeholder # now add the placeholder
placeholder = ops.alloc2f(scores.shape[0], 1) placeholder = ops.alloc2f(scores.shape[0], 1)
top_scores = xp.concatenate( (placeholder, top_scores), 1) top_scores = xp.concatenate( (placeholder, top_scores), 1)
top_scores = ops.softmax(top_scores, axis=1) #top_scores = ops.softmax(top_scores, axis=1)
out.append((top_scores, top_scores_idx)) out.append((top_scores, top_scores_idx))

View File

@ -1,4 +1,5 @@
from typing import Iterable, Tuple, Optional, Dict, Callable, Any, List from typing import Iterable, Tuple, Optional, Dict, Callable, Any, List
import warnings
from thinc.types import Floats2d, Ints2d from thinc.types import Floats2d, Ints2d
from thinc.api import Model, Config, Optimizer, CategoricalCrossentropy from thinc.api import Model, Config, Optimizer, CategoricalCrossentropy
@ -305,9 +306,15 @@ class CoreferenceResolver(TrainablePipe):
# boolean to float # boolean to float
top_gscores = ops.asarray2f(top_gscores) top_gscores = ops.asarray2f(top_gscores)
grad, loss = self.loss(cscores.T, top_gscores.T) with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=RuntimeWarning)
log_marg = ops.softmax(cscores + ops.xp.log(top_gscores), axis=1)
log_norm = ops.softmax(cscores, axis=1)
grad = log_norm - log_marg
# XXX might be better to not square this
loss = (grad ** 2).sum()
gradients.append((grad.T, cidx)) gradients.append((grad, cidx))
total_loss += float(loss) total_loss += float(loss)
offset = hi offset = hi