mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Fix get_loss for values outside of labels in senter (#5730)
* Fix get_loss for None alignments in senter When converting the `sent_start` values back to `SentenceRecognizer` labels, handle `None` alignments. * Handle SENT_START as -1 Handle SENT_START as -1 (or -1 converted to uint64) by treating any values other than 1 the same as 0 in `SentenceRecognizer.get_loss`.
This commit is contained in:
parent
9b49787f35
commit
ad15499b3b
|
@ -523,7 +523,18 @@ class SentenceRecognizer(Tagger):
|
|||
def get_loss(self, examples, scores):
|
||||
labels = self.labels
|
||||
loss_func = SequenceCategoricalCrossentropy(names=labels, normalize=False)
|
||||
truths = [[labels[x] for x in eg.get_aligned("sent_start")] for eg in examples]
|
||||
truths = []
|
||||
for eg in examples:
|
||||
eg_truth = []
|
||||
for x in eg.get_aligned("sent_start"):
|
||||
if x == None:
|
||||
eg_truth.append(None)
|
||||
elif x == 1:
|
||||
eg_truth.append(labels[1])
|
||||
else:
|
||||
# anything other than 1: 0, -1, -1 as uint64
|
||||
eg_truth.append(labels[0])
|
||||
truths.append(eg_truth)
|
||||
d_scores, loss = loss_func(scores, truths)
|
||||
if self.model.ops.xp.isnan(loss):
|
||||
raise ValueError("nan value when computing loss")
|
||||
|
|
|
@ -38,6 +38,11 @@ def test_overfitting_IO():
|
|||
train_examples = []
|
||||
for t in TRAIN_DATA:
|
||||
train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
|
||||
# add some cases where SENT_START == -1
|
||||
train_examples[0].reference[10].is_sent_start = False
|
||||
train_examples[1].reference[1].is_sent_start = False
|
||||
train_examples[1].reference[11].is_sent_start = False
|
||||
|
||||
nlp.add_pipe(senter)
|
||||
optimizer = nlp.begin_training()
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user