mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-06 22:50:33 +03:00
Move converters under spacy.gold Move things around Fix naming Fix name Update converter to produce DocBin Update converters Make spacy convert output docbin Fix import Fix docbin Fix import Update converter Remove jsonl converter Add json2docs converter
39 lines
1.1 KiB
Python
39 lines
1.1 KiB
Python
import tempfile
|
|
import contextlib
|
|
import shutil
|
|
from pathlib import Path
|
|
from ..gold_io import read_json_file
|
|
from ..example import annotations2doc
|
|
from ..example import _fix_legacy_dict_data, _parse_example_dict_data
|
|
from ...util import load_model
|
|
from ...lang.xx import MultiLanguage
|
|
|
|
@contextlib.contextmanager
|
|
def make_tempdir():
|
|
d = Path(tempfile.mkdtemp())
|
|
yield d
|
|
shutil.rmtree(str(d))
|
|
|
|
|
|
def json2docs(
|
|
input_data,
|
|
model=None,
|
|
**kwargs
|
|
):
|
|
nlp = load_model(model) if model is not None else MultiLanguage()
|
|
docs = []
|
|
with make_tempdir() as tmp_dir:
|
|
json_path = Path(tmp_dir) / "data.json"
|
|
with (json_path).open("w") as file_:
|
|
file_.write(input_data)
|
|
for json_annot in read_json_file(json_path):
|
|
example_dict = _fix_legacy_dict_data(json_annot)
|
|
tok_dict, doc_dict = _parse_example_dict_data(example_dict)
|
|
doc = annotations2doc(
|
|
nlp.vocab,
|
|
tok_dict,
|
|
doc_dict
|
|
)
|
|
docs.append(doc)
|
|
return docs
|