mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
it's only oversized if the tolerance level is also exceeded
This commit is contained in:
parent
ef834b4cd7
commit
f2e162fc60
|
@ -678,7 +678,7 @@ def minibatch_by_words(examples, size, count_words=len, tolerance=0.2, discard_o
|
||||||
n_words = count_words(example.doc)
|
n_words = count_words(example.doc)
|
||||||
# if the current example exceeds the batch size, it is returned separately
|
# if the current example exceeds the batch size, it is returned separately
|
||||||
# but only if discard_oversize=False.
|
# but only if discard_oversize=False.
|
||||||
if n_words > target_size:
|
if n_words > target_size + tol_size:
|
||||||
if not discard_oversize:
|
if not discard_oversize:
|
||||||
yield [example]
|
yield [example]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user