Draft Corpus class for DocBin

This commit is contained in:
Matthew Honnibal 2020-06-20 18:31:07 +02:00
parent 7360d3db72
commit 0de361cd00

View File

@ -5,7 +5,7 @@ from .example import Example
from ..tokens import DocBin from ..tokens import DocBin
class GoldCorpus(object): class Corpus:
"""An annotated corpus, using the JSON file format. Manages """An annotated corpus, using the JSON file format. Manages
annotations for tagging, dependency parsing and NER. annotations for tagging, dependency parsing and NER.
@ -38,7 +38,7 @@ class GoldCorpus(object):
continue continue
elif path.is_dir(): elif path.is_dir():
paths.extend(path.iterdir()) paths.extend(path.iterdir())
elif path.parts[-1].endswith(".spacy") elif path.parts[-1].endswith(".spacy"):
locs.append(path) locs.append(path)
return locs return locs