mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-28 12:53:41 +03:00
Fix get_loss for values outside of labels in senter (#5730)
* Fix get_loss for None alignments in senter When converting the `sent_start` values back to `SentenceRecognizer` labels, handle `None` alignments. * Handle SENT_START as -1 Handle SENT_START as -1 (or -1 converted to uint64) by treating any values other than 1 the same as 0 in `SentenceRecognizer.get_loss`.
This commit is contained in:
parent
9b49787f35
commit
ad15499b3b
|
@ -523,7 +523,18 @@ class SentenceRecognizer(Tagger):
|
||||||
def get_loss(self, examples, scores):
|
def get_loss(self, examples, scores):
|
||||||
labels = self.labels
|
labels = self.labels
|
||||||
loss_func = SequenceCategoricalCrossentropy(names=labels, normalize=False)
|
loss_func = SequenceCategoricalCrossentropy(names=labels, normalize=False)
|
||||||
truths = [[labels[x] for x in eg.get_aligned("sent_start")] for eg in examples]
|
truths = []
|
||||||
|
for eg in examples:
|
||||||
|
eg_truth = []
|
||||||
|
for x in eg.get_aligned("sent_start"):
|
||||||
|
if x == None:
|
||||||
|
eg_truth.append(None)
|
||||||
|
elif x == 1:
|
||||||
|
eg_truth.append(labels[1])
|
||||||
|
else:
|
||||||
|
# anything other than 1: 0, -1, -1 as uint64
|
||||||
|
eg_truth.append(labels[0])
|
||||||
|
truths.append(eg_truth)
|
||||||
d_scores, loss = loss_func(scores, truths)
|
d_scores, loss = loss_func(scores, truths)
|
||||||
if self.model.ops.xp.isnan(loss):
|
if self.model.ops.xp.isnan(loss):
|
||||||
raise ValueError("nan value when computing loss")
|
raise ValueError("nan value when computing loss")
|
||||||
|
|
|
@ -38,6 +38,11 @@ def test_overfitting_IO():
|
||||||
train_examples = []
|
train_examples = []
|
||||||
for t in TRAIN_DATA:
|
for t in TRAIN_DATA:
|
||||||
train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
|
train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
|
||||||
|
# add some cases where SENT_START == -1
|
||||||
|
train_examples[0].reference[10].is_sent_start = False
|
||||||
|
train_examples[1].reference[1].is_sent_start = False
|
||||||
|
train_examples[1].reference[11].is_sent_start = False
|
||||||
|
|
||||||
nlp.add_pipe(senter)
|
nlp.add_pipe(senter)
|
||||||
optimizer = nlp.begin_training()
|
optimizer = nlp.begin_training()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user