spaCy/spacy/gold/converters/json2docs.py
Matthew Honnibal 6a75992af6 Format
2020-06-22 01:11:43 +02:00

32 lines
1002 B
Python

import tempfile
import contextlib
import shutil
from pathlib import Path
from ..gold_io import read_json_file
from ..example import annotations2doc
from ..example import _fix_legacy_dict_data, _parse_example_dict_data
from ...util import load_model
from ...lang.xx import MultiLanguage
@contextlib.contextmanager
def make_tempdir():
d = Path(tempfile.mkdtemp())
yield d
shutil.rmtree(str(d))
def json2docs(input_data, model=None, **kwargs):
nlp = load_model(model) if model is not None else MultiLanguage()
docs = []
with make_tempdir() as tmp_dir:
json_path = Path(tmp_dir) / "data.json"
with (json_path).open("w") as file_:
file_.write(input_data)
for json_annot in read_json_file(json_path):
example_dict = _fix_legacy_dict_data(json_annot)
tok_dict, doc_dict = _parse_example_dict_data(example_dict)
doc = annotations2doc(nlp.vocab, tok_dict, doc_dict)
docs.append(doc)
return docs