Merge pull request #6256 from adrianeboyd/bugfix/docs-to-json-raw

This commit is contained in:
Ines Montani 2020-10-15 10:35:01 +02:00 committed by GitHub
commit 4e17ddf75e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -20,7 +20,8 @@ def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
docs = [docs] docs = [docs]
json_doc = {"id": doc_id, "paragraphs": []} json_doc = {"id": doc_id, "paragraphs": []}
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
json_para = {'raw': doc.text, "sentences": [], "cats": [], "entities": [], "links": []} raw = None if doc.has_unknown_spaces else doc.text
json_para = {'raw': raw, "sentences": [], "cats": [], "entities": [], "links": []}
for cat, val in doc.cats.items(): for cat, val in doc.cats.items():
json_cat = {"label": cat, "value": val} json_cat = {"label": cat, "value": val}
json_para["cats"].append(json_cat) json_para["cats"].append(json_cat)