From c294261a040f69c0a9efb1bb96e4c6ad98206214 Mon Sep 17 00:00:00 2001 From: Alexander-D-Karpov Date: Mon, 27 Nov 2023 14:39:28 +0300 Subject: [PATCH] added extract_folder, updated podcasts --- extract_folder.py | 54 ++++++++++++++++++++++++++++++++++++ podcasts/yandex/compose.yaml | 3 ++ 2 files changed, 57 insertions(+) create mode 100644 extract_folder.py diff --git a/extract_folder.py b/extract_folder.py new file mode 100644 index 0000000..dbf32a8 --- /dev/null +++ b/extract_folder.py @@ -0,0 +1,54 @@ +import os +import sys + + +def structure_directory_content(input_dir, output_file=None, extensions=None): + """ + This function goes through the input directory recursively and structures + all the file contents into one output file based on the given extensions. + + :param input_dir: The input directory to search for files. + :param output_file: The output file where the content will be structured. + If None, 'data.txt' or 'data.' will be used. + :param extensions: A list of file extensions to include. If None, all files are included. + """ + if extensions: + extensions = [ext.strip() for ext in extensions.split(",") if ext.strip() != ""] + # If only one extension is given and no output file is specified, use 'data.' + if not output_file and len(extensions) == 1: + output_file = f"data.{extensions[0]}" + else: + extensions = None + + # If no output file is specified, default to 'data.txt' + if not output_file: + output_file = "data.txt" + + with open(output_file, "w") as outfile: + for root, dirs, files in os.walk(input_dir): + for file in files: + if extensions is None or any( + file.endswith(f".{ext}") for ext in extensions + ): + file_path = os.path.join(root, file) + outfile.write(f"# {os.path.relpath(file_path, input_dir)}\n") + with open(file_path, "r") as infile: + outfile.write(infile.read()) + outfile.write("\n\n") + + +if __name__ == "__main__": + if len(sys.argv) == 1: + input_directory = input("directory path: ") + output_filename = input("output file name (optional): ") + file_extensions = input("file extensions separated by commas (optional): ") + structure_directory_content( + input_directory, + output_filename if output_filename else None, + file_extensions if file_extensions else None, + ) + else: + input_directory = sys.argv[1] if len(sys.argv) > 1 else "." + output_filename = sys.argv[2] if len(sys.argv) > 2 else None + file_extensions = sys.argv[3] if len(sys.argv) > 3 else None + structure_directory_content(input_directory, output_filename, file_extensions) diff --git a/podcasts/yandex/compose.yaml b/podcasts/yandex/compose.yaml index 81b27d2..cf66bcc 100644 --- a/podcasts/yandex/compose.yaml +++ b/podcasts/yandex/compose.yaml @@ -5,3 +5,6 @@ services: build: . env_file: - .env + extra_hosts: + - "host.docker.internal:host-gateway" +