spaCy/spacy/gold/converters/json2docs.py

32 lines
1002 B
Python
Raw Normal View History

import tempfile
import contextlib
import shutil
from pathlib import Path
from ..gold_io import read_json_file
from ..example import annotations2doc
from ..example import _fix_legacy_dict_data, _parse_example_dict_data
from ...util import load_model
from ...lang.xx import MultiLanguage
2020-06-22 02:11:43 +03:00
@contextlib.contextmanager
def make_tempdir():
d = Path(tempfile.mkdtemp())
yield d
shutil.rmtree(str(d))
2020-06-22 02:11:43 +03:00
def json2docs(input_data, model=None, **kwargs):
nlp = load_model(model) if model is not None else MultiLanguage()
docs = []
with make_tempdir() as tmp_dir:
json_path = Path(tmp_dir) / "data.json"
with (json_path).open("w") as file_:
file_.write(input_data)
for json_annot in read_json_file(json_path):
example_dict = _fix_legacy_dict_data(json_annot)
tok_dict, doc_dict = _parse_example_dict_data(example_dict)
2020-06-22 02:11:43 +03:00
doc = annotations2doc(nlp.vocab, tok_dict, doc_dict)
docs.append(doc)
return docs