mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Fix get_loss for values outside of labels in senter (#5730)
* Fix get_loss for None alignments in senter When converting the `sent_start` values back to `SentenceRecognizer` labels, handle `None` alignments. * Handle SENT_START as -1 Handle SENT_START as -1 (or -1 converted to uint64) by treating any values other than 1 the same as 0 in `SentenceRecognizer.get_loss`.
This commit is contained in:
		
							parent
							
								
									9b49787f35
								
							
						
					
					
						commit
						ad15499b3b
					
				|  | @ -523,7 +523,18 @@ class SentenceRecognizer(Tagger): | |||
|     def get_loss(self, examples, scores): | ||||
|         labels = self.labels | ||||
|         loss_func = SequenceCategoricalCrossentropy(names=labels, normalize=False) | ||||
|         truths = [[labels[x] for x in eg.get_aligned("sent_start")] for eg in examples] | ||||
|         truths = [] | ||||
|         for eg in examples: | ||||
|             eg_truth = [] | ||||
|             for x in eg.get_aligned("sent_start"): | ||||
|                 if x == None: | ||||
|                     eg_truth.append(None) | ||||
|                 elif x == 1: | ||||
|                     eg_truth.append(labels[1]) | ||||
|                 else: | ||||
|                     # anything other than 1: 0, -1, -1 as uint64 | ||||
|                     eg_truth.append(labels[0]) | ||||
|             truths.append(eg_truth) | ||||
|         d_scores, loss = loss_func(scores, truths) | ||||
|         if self.model.ops.xp.isnan(loss): | ||||
|             raise ValueError("nan value when computing loss") | ||||
|  |  | |||
|  | @ -38,6 +38,11 @@ def test_overfitting_IO(): | |||
|     train_examples = [] | ||||
|     for t in TRAIN_DATA: | ||||
|         train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1])) | ||||
|     # add some cases where SENT_START == -1 | ||||
|     train_examples[0].reference[10].is_sent_start = False | ||||
|     train_examples[1].reference[1].is_sent_start = False | ||||
|     train_examples[1].reference[11].is_sent_start = False | ||||
| 
 | ||||
|     nlp.add_pipe(senter) | ||||
|     optimizer = nlp.begin_training() | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user