mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-04 04:10:20 +03:00
Only strip newline/carriage return
This commit is contained in:
parent
2c5a36ac28
commit
5e84e5b41c
|
@ -318,7 +318,7 @@ class PlainTextCorpus:
|
|||
for loc in walk_corpus(self.path, ".txt"):
|
||||
with open(loc, encoding="utf-8") as f:
|
||||
for text in f:
|
||||
text = text.strip()
|
||||
text = text.rstrip("\r\n")
|
||||
if len(text):
|
||||
doc = nlp.make_doc(text)
|
||||
if self.min_length >= 1 and len(doc) < self.min_length:
|
||||
|
|
Loading…
Reference in New Issue
Block a user