spaCy/spacy/cli/converters/jsonl2json.py
Ines Montani ffdd5e964f
Small CLI improvements (#3030)
* Add todo

* Auto-format

* Update wasabi pin

* Format training results with wasabi

* Remove loading animation from model saving

Currently behaves weirdly

* Inline messages

* Remove unnecessary path2str

Already taken care of by printer

* Inline messages in CLI

* Remove unused function

* Move loading indicator into loading function

* Check for invalid whitespace entities
2018-12-08 11:49:43 +01:00

21 lines
671 B
Python

# coding: utf8
from __future__ import unicode_literals
import srsly
from ...util import get_lang_class
def ner_jsonl2json(input_data, lang=None, n_sents=10, use_morphology=False):
if lang is None:
raise ValueError("No --lang specified, but tokenization required")
json_docs = []
input_tuples = [srsly.json_loads(line) for line in input_data]
nlp = get_lang_class(lang)()
for i, (raw_text, ents) in enumerate(input_tuples):
doc = nlp.make_doc(raw_text)
doc[0].is_sent_start = True
doc.ents = [doc.char_span(s, e, label=L) for s, e, L in ents["entities"]]
json_docs.append(doc.to_json())
return json_docs