From ae58d003279dd24b6260fcd4e173c82d4d59e396 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 25 Jun 2020 21:18:29 +0200 Subject: [PATCH] Restore random cuts in parser/ner training --- spacy/syntax/nn_parser.pyx | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 1f28130fb..23dca79e3 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -272,7 +272,13 @@ cdef class Parser: # Prepare the stepwise model, and get the callback for finishing the batch model, backprop_tok2vec = self.model.begin_update( [eg.predicted for eg in examples]) - states, golds, max_steps = self._init_gold_batch(examples) + # Chop sequences into lengths of this many transitions, to make the + # batch uniform length. We randomize this to overfit less. + cut_gold = numpy.random.choice(range(20, 100)) + states, golds, max_steps = self._init_gold_batch( + examples, + max_length=cut_gold + ) all_states = list(states) states_golds = zip(states, golds) for _ in range(max_steps):