Test with update_with_oracle_cut_size={0, 1, 5, 100}

And fix a git that occurs with a cut size of 1.
2025-09-21 03:22:37 +03:00 · 2023-01-13 16:04:07 +01:00 · 2023-01-13 16:04:07 +01:00 · c20572a82a
commit c20572a82a
parent 850ce0583d
2 changed files with 7 additions and 3 deletions
--- a/spacy/pipeline/transition_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@ -306,7 +306,7 @@ class Parser(TrainablePipe):
        if max_moves >= 1:
            # Chop sequences into lengths of this many words, to make the
            # batch uniform length.
-            max_moves = int(random.uniform(max_moves // 2, max_moves * 2))
+            max_moves = int(random.uniform(max(max_moves // 2, 1), max_moves * 2))
            init_states, gold_states, _ = self._init_gold_batch(
                examples,
                max_length=max_moves
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@ -1,3 +1,4 @@
 import itertools
 import pytest
 import numpy
 from numpy.testing import assert_equal
@ -401,12 +402,15 @@ def test_incomplete_data(pipe_name):
    assert doc[2].head.i == 1
-@pytest.mark.parametrize("pipe_name", PARSERS)
+@pytest.mark.parametrize(
-def test_overfitting_IO(pipe_name):
+    "pipe_name,max_moves", itertools.product(PARSERS, [0, 1, 5, 100])
 )
 def test_overfitting_IO(pipe_name, max_moves):
    fix_random_seed(0)
    # Simple test to try and quickly overfit the dependency parser (normal or beam)
    nlp = English()
    parser = nlp.add_pipe(pipe_name)
    parser.cfg["update_with_oracle_cut_size"] = max_moves
    train_examples = []
    for text, annotations in TRAIN_DATA:
        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))