From a93d42861d683081793f43a40eb42122cc247cca Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Thu, 15 Oct 2020 09:44:21 +0200 Subject: [PATCH] Use null raw for has_unknown_spaces in docs_to_json --- spacy/training/gold_io.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/training/gold_io.pyx b/spacy/training/gold_io.pyx index 8fb6b8565..327748d01 100644 --- a/spacy/training/gold_io.pyx +++ b/spacy/training/gold_io.pyx @@ -20,7 +20,8 @@ def docs_to_json(docs, doc_id=0, ner_missing_tag="O"): docs = [docs] json_doc = {"id": doc_id, "paragraphs": []} for i, doc in enumerate(docs): - json_para = {'raw': doc.text, "sentences": [], "cats": [], "entities": [], "links": []} + raw = None if doc.has_unknown_spaces else doc.text + json_para = {'raw': raw, "sentences": [], "cats": [], "entities": [], "links": []} for cat, val in doc.cats.items(): json_cat = {"label": cat, "value": val} json_para["cats"].append(json_cat)