diff --git a/spacy/gold/converters/json2docs.py b/spacy/gold/converters/json2docs.py new file mode 100644 index 000000000..98219bb04 --- /dev/null +++ b/spacy/gold/converters/json2docs.py @@ -0,0 +1,38 @@ +import tempfile +import contextlib +import shutil +from pathlib import Path +from ..gold_io import read_json_file +from ..example import annotations2doc +from ..example import _fix_legacy_dict_data, _parse_example_dict_data +from ...util import load_model +from ...lang.xx import MultiLanguage + +@contextlib.contextmanager +def make_tempdir(): + d = Path(tempfile.mkdtemp()) + yield d + shutil.rmtree(str(d)) + + +def json2docs( + input_data, + model=None, + **kwargs +): + nlp = load_model(model) if model is not None else MultiLanguage() + docs = [] + with make_tempdir() as tmp_dir: + json_path = Path(tmp_dir) / "data.json" + with (json_path).open("w") as file_: + file_.write(input_data) + for json_annot in read_json_file(json_path): + example_dict = _fix_legacy_dict_data(json_annot) + tok_dict, doc_dict = _parse_example_dict_data(example_dict) + doc = annotations2doc( + nlp.vocab, + tok_dict, + doc_dict + ) + docs.append(doc) + return docs