From 5e84e5b41c9404dc1846a324c059988459182c3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= Date: Thu, 19 Jan 2023 11:24:11 +0100 Subject: [PATCH] Only strip newline/carriage return --- spacy/training/corpus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/training/corpus.py b/spacy/training/corpus.py index b43b31c99..d626ad0e0 100644 --- a/spacy/training/corpus.py +++ b/spacy/training/corpus.py @@ -318,7 +318,7 @@ class PlainTextCorpus: for loc in walk_corpus(self.path, ".txt"): with open(loc, encoding="utf-8") as f: for text in f: - text = text.strip() + text = text.rstrip("\r\n") if len(text): doc = nlp.make_doc(text) if self.min_length >= 1 and len(doc) < self.min_length: