Allow setting ner_missing_tag on docs_to_json

This commit is contained in:
Ines Montani 2019-12-21 13:47:21 +01:00
parent 8ebbb85117
commit 0750d59e5a

View File

@ -852,7 +852,7 @@ cdef class GoldParse:
self.c.sent_start[i] = 0 self.c.sent_start[i] = 0
def docs_to_json(docs, id=0): def docs_to_json(docs, id=0, ner_missing_tag="O"):
"""Convert a list of Doc objects into the JSON-serializable format used by """Convert a list of Doc objects into the JSON-serializable format used by
the spacy train command. the spacy train command.
@ -870,7 +870,7 @@ def docs_to_json(docs, id=0):
json_cat = {"label": cat, "value": val} json_cat = {"label": cat, "value": val}
json_para["cats"].append(json_cat) json_para["cats"].append(json_cat)
ent_offsets = [(e.start_char, e.end_char, e.label_) for e in doc.ents] ent_offsets = [(e.start_char, e.end_char, e.label_) for e in doc.ents]
biluo_tags = biluo_tags_from_offsets(doc, ent_offsets) biluo_tags = biluo_tags_from_offsets(doc, ent_offsets, missing=ner_missing_tag)
for j, sent in enumerate(doc.sents): for j, sent in enumerate(doc.sents):
json_sent = {"tokens": [], "brackets": []} json_sent = {"tokens": [], "brackets": []}
for token in sent: for token in sent: