mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	it's only oversized if the tolerance level is also exceeded
This commit is contained in:
		
							parent
							
								
									ef834b4cd7
								
							
						
					
					
						commit
						f2e162fc60
					
				| 
						 | 
					@ -678,7 +678,7 @@ def minibatch_by_words(examples, size, count_words=len, tolerance=0.2, discard_o
 | 
				
			||||||
        n_words = count_words(example.doc)
 | 
					        n_words = count_words(example.doc)
 | 
				
			||||||
        # if the current example exceeds the batch size, it is returned separately
 | 
					        # if the current example exceeds the batch size, it is returned separately
 | 
				
			||||||
        # but only if discard_oversize=False.
 | 
					        # but only if discard_oversize=False.
 | 
				
			||||||
        if n_words > target_size:
 | 
					        if n_words > target_size + tol_size:
 | 
				
			||||||
            if not discard_oversize:
 | 
					            if not discard_oversize:
 | 
				
			||||||
                yield [example]
 | 
					                yield [example]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user