mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-11 09:00:36 +03:00
Add json2docs converter
This commit is contained in:
parent
f1756a6a22
commit
7360d3db72
38
spacy/gold/converters/json2docs.py
Normal file
38
spacy/gold/converters/json2docs.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
import tempfile
|
||||
import contextlib
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from ..gold_io import read_json_file
|
||||
from ..example import annotations2doc
|
||||
from ..example import _fix_legacy_dict_data, _parse_example_dict_data
|
||||
from ...util import load_model
|
||||
from ...lang.xx import MultiLanguage
|
||||
|
||||
@contextlib.contextmanager
|
||||
def make_tempdir():
|
||||
d = Path(tempfile.mkdtemp())
|
||||
yield d
|
||||
shutil.rmtree(str(d))
|
||||
|
||||
|
||||
def json2docs(
|
||||
input_data,
|
||||
model=None,
|
||||
**kwargs
|
||||
):
|
||||
nlp = load_model(model) if model is not None else MultiLanguage()
|
||||
docs = []
|
||||
with make_tempdir() as tmp_dir:
|
||||
json_path = Path(tmp_dir) / "data.json"
|
||||
with (json_path).open("w") as file_:
|
||||
file_.write(input_data)
|
||||
for json_annot in read_json_file(json_path):
|
||||
example_dict = _fix_legacy_dict_data(json_annot)
|
||||
tok_dict, doc_dict = _parse_example_dict_data(example_dict)
|
||||
doc = annotations2doc(
|
||||
nlp.vocab,
|
||||
tok_dict,
|
||||
doc_dict
|
||||
)
|
||||
docs.append(doc)
|
||||
return docs
|
Loading…
Reference in New Issue
Block a user