2023-11-27 14:39:28 +03:00
|
|
|
import os
|
|
|
|
import sys
|
2024-03-15 12:28:45 +03:00
|
|
|
import pathspec
|
|
|
|
|
|
|
|
|
|
|
|
def read_gitignore(input_dir):
|
|
|
|
"""
|
|
|
|
Reads the .gitignore file in the input directory and returns a pathspec object.
|
|
|
|
"""
|
|
|
|
gitignore_path = os.path.join(input_dir, ".gitignore")
|
|
|
|
if os.path.isfile(gitignore_path):
|
|
|
|
with open(gitignore_path, "r") as gitignore_file:
|
|
|
|
return pathspec.PathSpec.from_lines("gitwildmatch", gitignore_file)
|
|
|
|
else:
|
|
|
|
return pathspec.PathSpec.from_lines("gitwildmatch", [])
|
2023-11-27 14:39:28 +03:00
|
|
|
|
|
|
|
|
|
|
|
def structure_directory_content(input_dir, output_file=None, extensions=None):
|
|
|
|
"""
|
|
|
|
This function goes through the input directory recursively and structures
|
|
|
|
all the file contents into one output file based on the given extensions.
|
|
|
|
|
|
|
|
:param input_dir: The input directory to search for files.
|
|
|
|
:param output_file: The output file where the content will be structured.
|
|
|
|
If None, 'data.txt' or 'data.<extension>' will be used.
|
|
|
|
:param extensions: A list of file extensions to include. If None, all files are included.
|
|
|
|
"""
|
2024-03-15 12:28:45 +03:00
|
|
|
gitignore_spec = read_gitignore(input_dir)
|
|
|
|
|
2023-11-27 14:39:28 +03:00
|
|
|
if extensions:
|
|
|
|
extensions = [ext.strip() for ext in extensions.split(",") if ext.strip() != ""]
|
|
|
|
if not output_file and len(extensions) == 1:
|
|
|
|
output_file = f"data.{extensions[0]}"
|
|
|
|
else:
|
|
|
|
extensions = None
|
|
|
|
|
|
|
|
if not output_file:
|
|
|
|
output_file = "data.txt"
|
|
|
|
|
|
|
|
with open(output_file, "w") as outfile:
|
|
|
|
for root, dirs, files in os.walk(input_dir):
|
2024-03-15 12:28:45 +03:00
|
|
|
files = [
|
|
|
|
f
|
|
|
|
for f in files
|
|
|
|
if not gitignore_spec.match_file(os.path.join(root, str(f)))
|
|
|
|
]
|
|
|
|
dirs[:] = [
|
|
|
|
d
|
|
|
|
for d in dirs
|
|
|
|
if not gitignore_spec.match_file(os.path.join(root, str(d)))
|
|
|
|
]
|
2023-11-27 14:39:28 +03:00
|
|
|
for file in files:
|
|
|
|
if extensions is None or any(
|
|
|
|
file.endswith(f".{ext}") for ext in extensions
|
|
|
|
):
|
|
|
|
file_path = os.path.join(root, file)
|
2023-12-04 10:30:25 +03:00
|
|
|
try:
|
|
|
|
with open(file_path, "r") as infile:
|
|
|
|
data = infile.read()
|
|
|
|
outfile.write(
|
|
|
|
f"# {os.path.relpath(file_path, input_dir)}\n"
|
|
|
|
)
|
|
|
|
outfile.write(data)
|
|
|
|
outfile.write("\n\n")
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
continue
|
2023-11-27 14:39:28 +03:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
if len(sys.argv) == 1:
|
|
|
|
input_directory = input("directory path: ")
|
|
|
|
output_filename = input("output file name (optional): ")
|
|
|
|
file_extensions = input("file extensions separated by commas (optional): ")
|
|
|
|
structure_directory_content(
|
|
|
|
input_directory,
|
|
|
|
output_filename if output_filename else None,
|
|
|
|
file_extensions if file_extensions else None,
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
input_directory = sys.argv[1] if len(sys.argv) > 1 else "."
|
|
|
|
output_filename = sys.argv[2] if len(sys.argv) > 2 else None
|
|
|
|
file_extensions = sys.argv[3] if len(sys.argv) > 3 else None
|
|
|
|
structure_directory_content(input_directory, output_filename, file_extensions)
|