mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Fix convert command
This commit is contained in:
		
							parent
							
								
									8722b65bce
								
							
						
					
					
						commit
						1682a60a20
					
				|  | @ -60,11 +60,13 @@ def convert_cli( | ||||||
|     if isinstance(file_type, FileTypes): |     if isinstance(file_type, FileTypes): | ||||||
|         # We get an instance of the FileTypes from the CLI so we need its string value |         # We get an instance of the FileTypes from the CLI so we need its string value | ||||||
|         file_type = file_type.value |         file_type = file_type.value | ||||||
|  |     input_path = Path(input_path) | ||||||
|  |     output_dir = Path(output_dir) if output_dir != "-" else "-" | ||||||
|     cli_args = locals() |     cli_args = locals() | ||||||
|     silent = output_dir == "-" |     silent = output_dir == "-" | ||||||
|     output_dir = Path(output_dir) if output_dir != "-" else "-" |  | ||||||
|     msg = Printer(no_print=silent) |     msg = Printer(no_print=silent) | ||||||
|     verify_cli_args(msg, **cli_args) |     verify_cli_args(msg, **cli_args) | ||||||
|  |     converter = _get_converter(msg, converter, input_path) | ||||||
|     convert( |     convert( | ||||||
|         input_path, |         input_path, | ||||||
|         output_dir, |         output_dir, | ||||||
|  | @ -117,24 +119,31 @@ def convert( | ||||||
|             no_print=silent, |             no_print=silent, | ||||||
|             ner_map=ner_map, |             ner_map=ner_map, | ||||||
|         ) |         ) | ||||||
|     if output_dir != "-": |         if output_dir == "-": | ||||||
|         # Export data to a file |             _print_docs_to_stdout(docs, file_type) | ||||||
|         suffix = f".{file_type}" |  | ||||||
|         subpath = input_loc.relative_to(input_path) |  | ||||||
|         output_file = Path(output_dir) / subpath.with_suffix(suffix) |  | ||||||
|         if not output_file.parent.exists(): |  | ||||||
|             output_file.parent.mkdir(parents=True) |  | ||||||
|         if file_type == "json": |  | ||||||
|             srsly.write_json(output_file, docs_to_json(docs)) |  | ||||||
|         else: |         else: | ||||||
|             data = DocBin(docs=docs).to_bytes() |             subpath = input_loc.relative_to(input_path) | ||||||
|             with output_file.open("wb") as file_: |             output_file = Path(output_dir) / subpath.with_suffix(f".{file_type}") | ||||||
|                 file_.write(data) |             _write_docs_to_file(docs, output_file, file_type) | ||||||
|         msg.good(f"Generated output file ({len(docs)} documents): {output_file}") |             msg.good(f"Generated output file ({len(docs)} documents): {output_file}") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _print_docs_to_stdout(docs, output_type): | ||||||
|  |     if output_type == "json": | ||||||
|  |         srsly.write_json("-", docs_to_json(docs)) | ||||||
|     else: |     else: | ||||||
|         # Print to stdout |         sys.stdout.buffer.write(DocBin(docs=docs).to_bytes()) | ||||||
|         if file_type == "json": | 
 | ||||||
|             srsly.write_json("-", docs) | 
 | ||||||
|  | def _write_docs_to_file(docs, output_file, output_type): | ||||||
|  |     if not output_file.parent.exists(): | ||||||
|  |         output_file.parent.mkdir(parents=True) | ||||||
|  |     if output_type == "json": | ||||||
|  |         srsly.write_json(output_file, docs_to_json(docs)) | ||||||
|  |     else: | ||||||
|  |         data = DocBin(docs=docs).to_bytes() | ||||||
|  |         with output_file.open("wb") as file_: | ||||||
|  |             file_.write(data) | ||||||
|   |   | ||||||
| 
 | 
 | ||||||
| def autodetect_ner_format(input_data: str) -> str: | def autodetect_ner_format(input_data: str) -> str: | ||||||
|  | @ -189,20 +198,7 @@ def verify_cli_args( | ||||||
|     ner_map, |     ner_map, | ||||||
|     lang, |     lang, | ||||||
| ): | ): | ||||||
|     if converter == "ner" or converter == "iob": |     input_path = Path(input_path) | ||||||
|         input_data = input_path.open("r", encoding="utf-8").read() |  | ||||||
|         converter_autodetect = autodetect_ner_format(input_data) |  | ||||||
|         if converter_autodetect == "ner": |  | ||||||
|             msg.info("Auto-detected token-per-line NER format") |  | ||||||
|             converter = converter_autodetect |  | ||||||
|         elif converter_autodetect == "iob": |  | ||||||
|             msg.info("Auto-detected sentence-per-line NER format") |  | ||||||
|             converter = converter_autodetect |  | ||||||
|         else: |  | ||||||
|             msg.warn( |  | ||||||
|                 "Can't automatically detect NER format. Conversion may not", |  | ||||||
|                 "succeed. See https://spacy.io/api/cli#convert", |  | ||||||
|             ) |  | ||||||
|     if file_type not in FILE_TYPES_STDOUT and output_dir == "-": |     if file_type not in FILE_TYPES_STDOUT and output_dir == "-": | ||||||
|         # TODO: support msgpack via stdout in srsly? |         # TODO: support msgpack via stdout in srsly? | ||||||
|         msg.fail( |         msg.fail( | ||||||
|  | @ -222,10 +218,7 @@ def verify_cli_args( | ||||||
|         if len(file_types) >= 2: |         if len(file_types) >= 2: | ||||||
|             file_types = ",".join(file_types) |             file_types = ",".join(file_types) | ||||||
|             msg.fail("All input files must be same type", file_types, exits=1) |             msg.fail("All input files must be same type", file_types, exits=1) | ||||||
|         if converter == "auto": |     converter = _get_converter(msg, converter, input_path) | ||||||
|             converter = file_types[0] |  | ||||||
|     else: |  | ||||||
|         converter = input_path.suffix[1:] |  | ||||||
|     if converter not in CONVERTERS: |     if converter not in CONVERTERS: | ||||||
|         msg.fail(f"Can't find converter for {converter}", exits=1) |         msg.fail(f"Can't find converter for {converter}", exits=1) | ||||||
|     return converter |     return converter | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user