From 0750d59e5a3e4f7e021a588523c1e1d24f4538f7 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 21 Dec 2019 13:47:21 +0100 Subject: [PATCH] Allow setting ner_missing_tag on docs_to_json --- spacy/gold.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 5aecc2584..1a74d2206 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -852,7 +852,7 @@ cdef class GoldParse: self.c.sent_start[i] = 0 -def docs_to_json(docs, id=0): +def docs_to_json(docs, id=0, ner_missing_tag="O"): """Convert a list of Doc objects into the JSON-serializable format used by the spacy train command. @@ -870,7 +870,7 @@ def docs_to_json(docs, id=0): json_cat = {"label": cat, "value": val} json_para["cats"].append(json_cat) ent_offsets = [(e.start_char, e.end_char, e.label_) for e in doc.ents] - biluo_tags = biluo_tags_from_offsets(doc, ent_offsets) + biluo_tags = biluo_tags_from_offsets(doc, ent_offsets, missing=ner_missing_tag) for j, sent in enumerate(doc.sents): json_sent = {"tokens": [], "brackets": []} for token in sent: