mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-19 20:52:23 +03:00
Fix loss?
This rewrites the loss to not use the Thinc crossentropy code at all. The main difference here is that the negative predictions are being masked out (= marginalized over), but negative gradient is still being reflected. I'm still not sure this is exactly right but models seem to train reliably now.
This commit is contained in:
parent
5db28ec2fd
commit
8f66176b2d
|
@ -394,7 +394,7 @@ def ant_scorer_forward(
|
||||||
# now add the placeholder
|
# now add the placeholder
|
||||||
placeholder = ops.alloc2f(scores.shape[0], 1)
|
placeholder = ops.alloc2f(scores.shape[0], 1)
|
||||||
top_scores = xp.concatenate( (placeholder, top_scores), 1)
|
top_scores = xp.concatenate( (placeholder, top_scores), 1)
|
||||||
top_scores = ops.softmax(top_scores, axis=1)
|
#top_scores = ops.softmax(top_scores, axis=1)
|
||||||
|
|
||||||
out.append((top_scores, top_scores_idx))
|
out.append((top_scores, top_scores_idx))
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from typing import Iterable, Tuple, Optional, Dict, Callable, Any, List
|
from typing import Iterable, Tuple, Optional, Dict, Callable, Any, List
|
||||||
|
import warnings
|
||||||
|
|
||||||
from thinc.types import Floats2d, Ints2d
|
from thinc.types import Floats2d, Ints2d
|
||||||
from thinc.api import Model, Config, Optimizer, CategoricalCrossentropy
|
from thinc.api import Model, Config, Optimizer, CategoricalCrossentropy
|
||||||
|
@ -305,9 +306,15 @@ class CoreferenceResolver(TrainablePipe):
|
||||||
# boolean to float
|
# boolean to float
|
||||||
top_gscores = ops.asarray2f(top_gscores)
|
top_gscores = ops.asarray2f(top_gscores)
|
||||||
|
|
||||||
grad, loss = self.loss(cscores.T, top_gscores.T)
|
with warnings.catch_warnings():
|
||||||
|
warnings.filterwarnings('ignore', category=RuntimeWarning)
|
||||||
|
log_marg = ops.softmax(cscores + ops.xp.log(top_gscores), axis=1)
|
||||||
|
log_norm = ops.softmax(cscores, axis=1)
|
||||||
|
grad = log_norm - log_marg
|
||||||
|
# XXX might be better to not square this
|
||||||
|
loss = (grad ** 2).sum()
|
||||||
|
|
||||||
gradients.append((grad.T, cidx))
|
gradients.append((grad, cidx))
|
||||||
total_loss += float(loss)
|
total_loss += float(loss)
|
||||||
|
|
||||||
offset = hi
|
offset = hi
|
||||||
|
|
Loading…
Reference in New Issue
Block a user