spaCy/spacy/gold/converters/json2docs.py
Matthew Honnibal a5ebfb20f5 Serialize all attrs by default
Move converters under spacy.gold

Move things around

Fix naming

Fix name

Update converter to produce DocBin

Update converters

Make spacy convert output docbin

Fix import

Fix docbin

Fix import

Update converter

Remove jsonl converter

Add json2docs converter
2020-06-22 00:46:08 +02:00

39 lines
1.1 KiB
Python

import tempfile
import contextlib
import shutil
from pathlib import Path
from ..gold_io import read_json_file
from ..example import annotations2doc
from ..example import _fix_legacy_dict_data, _parse_example_dict_data
from ...util import load_model
from ...lang.xx import MultiLanguage
@contextlib.contextmanager
def make_tempdir():
d = Path(tempfile.mkdtemp())
yield d
shutil.rmtree(str(d))
def json2docs(
input_data,
model=None,
**kwargs
):
nlp = load_model(model) if model is not None else MultiLanguage()
docs = []
with make_tempdir() as tmp_dir:
json_path = Path(tmp_dir) / "data.json"
with (json_path).open("w") as file_:
file_.write(input_data)
for json_annot in read_json_file(json_path):
example_dict = _fix_legacy_dict_data(json_annot)
tok_dict, doc_dict = _parse_example_dict_data(example_dict)
doc = annotations2doc(
nlp.vocab,
tok_dict,
doc_dict
)
docs.append(doc)
return docs