mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Make docs_to_json backwards-compatible with v2 (#5714)
				
					
				
			* In `spacy convert -t json` output the JSON docs wrapped in a list * Add back token-level `ner` alongside the doc-level `entities`
This commit is contained in:
		
							parent
							
								
									412dbb1f38
								
							
						
					
					
						commit
						c67fc6aa5b
					
				|  | @ -135,7 +135,7 @@ def convert( | ||||||
| 
 | 
 | ||||||
| def _print_docs_to_stdout(docs, output_type): | def _print_docs_to_stdout(docs, output_type): | ||||||
|     if output_type == "json": |     if output_type == "json": | ||||||
|         srsly.write_json("-", docs_to_json(docs)) |         srsly.write_json("-", [docs_to_json(docs)]) | ||||||
|     else: |     else: | ||||||
|         sys.stdout.buffer.write(DocBin(docs=docs, store_user_data=True).to_bytes()) |         sys.stdout.buffer.write(DocBin(docs=docs, store_user_data=True).to_bytes()) | ||||||
| 
 | 
 | ||||||
|  | @ -144,7 +144,7 @@ def _write_docs_to_file(docs, output_file, output_type): | ||||||
|     if not output_file.parent.exists(): |     if not output_file.parent.exists(): | ||||||
|         output_file.parent.mkdir(parents=True) |         output_file.parent.mkdir(parents=True) | ||||||
|     if output_type == "json": |     if output_type == "json": | ||||||
|         srsly.write_json(output_file, docs_to_json(docs)) |         srsly.write_json(output_file, [docs_to_json(docs)]) | ||||||
|     else: |     else: | ||||||
|         data = DocBin(docs=docs, store_user_data=True).to_bytes() |         data = DocBin(docs=docs, store_user_data=True).to_bytes() | ||||||
|         with output_file.open("wb") as file_: |         with output_file.open("wb") as file_: | ||||||
|  |  | ||||||
|  | @ -24,14 +24,15 @@ def docs_to_json(docs, doc_id=0, ner_missing_tag="O"): | ||||||
|         for cat, val in doc.cats.items(): |         for cat, val in doc.cats.items(): | ||||||
|             json_cat = {"label": cat, "value": val} |             json_cat = {"label": cat, "value": val} | ||||||
|             json_para["cats"].append(json_cat) |             json_para["cats"].append(json_cat) | ||||||
|  |         # warning: entities information is currently duplicated as | ||||||
|  |         # doc-level "entities" and token-level "ner" | ||||||
|         for ent in doc.ents: |         for ent in doc.ents: | ||||||
|             ent_tuple = (ent.start_char, ent.end_char, ent.label_) |             ent_tuple = (ent.start_char, ent.end_char, ent.label_) | ||||||
|             json_para["entities"].append(ent_tuple) |             json_para["entities"].append(ent_tuple) | ||||||
|             if ent.kb_id_: |             if ent.kb_id_: | ||||||
|                 link_dict = {(ent.start_char, ent.end_char): {ent.kb_id_: 1.0}} |                 link_dict = {(ent.start_char, ent.end_char): {ent.kb_id_: 1.0}} | ||||||
|                 json_para["links"].append(link_dict) |                 json_para["links"].append(link_dict) | ||||||
|         ent_offsets = [(e.start_char, e.end_char, e.label_) for e in doc.ents] |         biluo_tags = biluo_tags_from_offsets(doc, json_para["entities"], missing=ner_missing_tag) | ||||||
|         biluo_tags = biluo_tags_from_offsets(doc, ent_offsets, missing=ner_missing_tag) |  | ||||||
|         for j, sent in enumerate(doc.sents): |         for j, sent in enumerate(doc.sents): | ||||||
|             json_sent = {"tokens": [], "brackets": []} |             json_sent = {"tokens": [], "brackets": []} | ||||||
|             for token in sent: |             for token in sent: | ||||||
|  | @ -44,6 +45,7 @@ def docs_to_json(docs, doc_id=0, ner_missing_tag="O"): | ||||||
|                 if doc.is_parsed: |                 if doc.is_parsed: | ||||||
|                     json_token["head"] = token.head.i-token.i |                     json_token["head"] = token.head.i-token.i | ||||||
|                     json_token["dep"] = token.dep_ |                     json_token["dep"] = token.dep_ | ||||||
|  |                 json_token["ner"] = biluo_tags[token.i] | ||||||
|                 json_sent["tokens"].append(json_token) |                 json_sent["tokens"].append(json_token) | ||||||
|             json_para["sentences"].append(json_sent) |             json_para["sentences"].append(json_sent) | ||||||
|         json_doc["paragraphs"].append(json_para) |         json_doc["paragraphs"].append(json_para) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user