Only strip newline/carriage return

This commit is contained in:
Daniël de Kok 2023-01-19 11:24:11 +01:00
parent 2c5a36ac28
commit 5e84e5b41c

View File

@ -318,7 +318,7 @@ class PlainTextCorpus:
for loc in walk_corpus(self.path, ".txt"): for loc in walk_corpus(self.path, ".txt"):
with open(loc, encoding="utf-8") as f: with open(loc, encoding="utf-8") as f:
for text in f: for text in f:
text = text.strip() text = text.rstrip("\r\n")
if len(text): if len(text):
doc = nlp.make_doc(text) doc = nlp.make_doc(text)
if self.min_length >= 1 and len(doc) < self.min_length: if self.min_length >= 1 and len(doc) < self.min_length: