mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Allow setting ner_missing_tag on docs_to_json
This commit is contained in:
parent
8ebbb85117
commit
0750d59e5a
|
@ -852,7 +852,7 @@ cdef class GoldParse:
|
||||||
self.c.sent_start[i] = 0
|
self.c.sent_start[i] = 0
|
||||||
|
|
||||||
|
|
||||||
def docs_to_json(docs, id=0):
|
def docs_to_json(docs, id=0, ner_missing_tag="O"):
|
||||||
"""Convert a list of Doc objects into the JSON-serializable format used by
|
"""Convert a list of Doc objects into the JSON-serializable format used by
|
||||||
the spacy train command.
|
the spacy train command.
|
||||||
|
|
||||||
|
@ -870,7 +870,7 @@ def docs_to_json(docs, id=0):
|
||||||
json_cat = {"label": cat, "value": val}
|
json_cat = {"label": cat, "value": val}
|
||||||
json_para["cats"].append(json_cat)
|
json_para["cats"].append(json_cat)
|
||||||
ent_offsets = [(e.start_char, e.end_char, e.label_) for e in doc.ents]
|
ent_offsets = [(e.start_char, e.end_char, e.label_) for e in doc.ents]
|
||||||
biluo_tags = biluo_tags_from_offsets(doc, ent_offsets)
|
biluo_tags = biluo_tags_from_offsets(doc, ent_offsets, missing=ner_missing_tag)
|
||||||
for j, sent in enumerate(doc.sents):
|
for j, sent in enumerate(doc.sents):
|
||||||
json_sent = {"tokens": [], "brackets": []}
|
json_sent = {"tokens": [], "brackets": []}
|
||||||
for token in sent:
|
for token in sent:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user