mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* Add todo * Auto-format * Update wasabi pin * Format training results with wasabi * Remove loading animation from model saving Currently behaves weirdly * Inline messages * Remove unnecessary path2str Already taken care of by printer * Inline messages in CLI * Remove unused function * Move loading indicator into loading function * Check for invalid whitespace entities
		
			
				
	
	
		
			21 lines
		
	
	
		
			671 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			21 lines
		
	
	
		
			671 B
		
	
	
	
		
			Python
		
	
	
	
	
	
# coding: utf8
 | 
						|
from __future__ import unicode_literals
 | 
						|
 | 
						|
import srsly
 | 
						|
 | 
						|
from ...util import get_lang_class
 | 
						|
 | 
						|
 | 
						|
def ner_jsonl2json(input_data, lang=None, n_sents=10, use_morphology=False):
 | 
						|
    if lang is None:
 | 
						|
        raise ValueError("No --lang specified, but tokenization required")
 | 
						|
    json_docs = []
 | 
						|
    input_tuples = [srsly.json_loads(line) for line in input_data]
 | 
						|
    nlp = get_lang_class(lang)()
 | 
						|
    for i, (raw_text, ents) in enumerate(input_tuples):
 | 
						|
        doc = nlp.make_doc(raw_text)
 | 
						|
        doc[0].is_sent_start = True
 | 
						|
        doc.ents = [doc.char_span(s, e, label=L) for s, e, L in ents["entities"]]
 | 
						|
        json_docs.append(doc.to_json())
 | 
						|
    return json_docs
 |