Add links to Reddit data (see #2401)

This commit is contained in:
Ines Montani 2018-05-31 16:22:43 +02:00 committed by GitHub
parent b8ef9c1000
commit 3f2e3cbd27
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -34,6 +34,10 @@ formatted in jsonl as a sequence of entries like this:
{"text":"Appalachia"} {"text":"Appalachia"}
{"text":"Argentina"} {"text":"Argentina"}
Reddit comments corpus:
* https://files.pushshift.io/reddit/
* https://archive.org/details/2015_reddit_comments_corpus
Compatible with: spaCy v2.0.0+ Compatible with: spaCy v2.0.0+
""" """
from __future__ import print_function, unicode_literals, division from __future__ import print_function, unicode_literals, division