From 549164c31cf273339487e97aae4f6d4e84ee7779 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 9 Jun 2020 12:33:14 +0200 Subject: [PATCH] Fix corpus when no raw text supplied --- spacy/gold/corpus.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/spacy/gold/corpus.py b/spacy/gold/corpus.py index df13ab505..e8bb91359 100644 --- a/spacy/gold/corpus.py +++ b/spacy/gold/corpus.py @@ -201,7 +201,16 @@ class GoldCorpus(object): ): """ Setting gold_preproc will result in creating a doc per sentence """ for eg_dict in annotations: - example = Example.from_dict(eg_dict, doc=nlp.make_doc(eg_dict["text"])) + if eg_dict["text"]: + example = Example.from_dict( + eg_dict, + doc=nlp.make_doc(eg_dict["text"]) + ) + else: + example = Example.from_dict( + eg_dict, + doc=Doc(nlp.vocab, words=eg_dict["words"]) + ) example_docs = [] if gold_preproc: split_examples = example.split_sents()