mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Add links to Reddit data (see #2401)
This commit is contained in:
parent
b8ef9c1000
commit
3f2e3cbd27
|
@ -34,6 +34,10 @@ formatted in jsonl as a sequence of entries like this:
|
||||||
{"text":"Appalachia"}
|
{"text":"Appalachia"}
|
||||||
{"text":"Argentina"}
|
{"text":"Argentina"}
|
||||||
|
|
||||||
|
Reddit comments corpus:
|
||||||
|
* https://files.pushshift.io/reddit/
|
||||||
|
* https://archive.org/details/2015_reddit_comments_corpus
|
||||||
|
|
||||||
Compatible with: spaCy v2.0.0+
|
Compatible with: spaCy v2.0.0+
|
||||||
"""
|
"""
|
||||||
from __future__ import print_function, unicode_literals, division
|
from __future__ import print_function, unicode_literals, division
|
||||||
|
|
Loading…
Reference in New Issue
Block a user