From cd6bd91c3a17d99674b5ed8c3b1092696ee59373 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Thu, 20 May 2021 14:48:09 +0200 Subject: [PATCH] Switch default train corpus max_length to 0 in quickstart (#8142) The behavior of `spacy.Corpus.v1` is unexpected enough for `max_length != 0` that `0` is a better default for users creating a new config with the quickstart. If not, documents are skipped, sometimes the entire corpus is skipped, and sometimes documents are (quite unexpectedly for your average user) split into sentences. --- spacy/cli/templates/quickstart_training.jinja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja index e43c21bbd..0d422318b 100644 --- a/spacy/cli/templates/quickstart_training.jinja +++ b/spacy/cli/templates/quickstart_training.jinja @@ -372,7 +372,7 @@ factory = "{{ pipe }}" [corpora.train] @readers = "spacy.Corpus.v1" path = ${paths.train} -max_length = {{ 500 if hardware == "gpu" else 2000 }} +max_length = 0 [corpora.dev] @readers = "spacy.Corpus.v1"