mirror of
https://github.com/Alexander-D-Karpov/scripts.git
synced 2025-06-30 09:33:04 +03:00
updated extract_folder script
This commit is contained in:
parent
cb6e37274c
commit
06cd5b7bea
|
@ -2,24 +2,38 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import pathspec
|
import pathspec
|
||||||
|
|
||||||
|
|
||||||
def read_gitignore(input_dir):
|
def read_gitignore(input_dir):
|
||||||
"""
|
"""
|
||||||
Reads the .gitignore file in the input directory and returns a pathspec object.
|
Reads the .gitignore file in the input directory and returns a pathspec object.
|
||||||
"""
|
"""
|
||||||
|
# Default patterns to always ignore
|
||||||
|
default_patterns = [
|
||||||
|
"migrations/*.py", # Ignore Django migration files
|
||||||
|
"node_modules/", # Ignore node_modules directory
|
||||||
|
]
|
||||||
|
|
||||||
gitignore_path = os.path.join(input_dir, ".gitignore")
|
gitignore_path = os.path.join(input_dir, ".gitignore")
|
||||||
if os.path.isfile(gitignore_path):
|
if os.path.isfile(gitignore_path):
|
||||||
with open(gitignore_path, "r") as gitignore_file:
|
with open(gitignore_path, "r") as gitignore_file:
|
||||||
return pathspec.PathSpec.from_lines("gitwildmatch", gitignore_file)
|
patterns = default_patterns + list(gitignore_file)
|
||||||
|
return pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
||||||
else:
|
else:
|
||||||
return pathspec.PathSpec.from_lines("gitwildmatch", [])
|
return pathspec.PathSpec.from_lines("gitwildmatch", default_patterns)
|
||||||
|
|
||||||
|
def should_ignore_path(path, gitignore_spec):
|
||||||
|
"""
|
||||||
|
Additional check for paths that should be ignored.
|
||||||
|
"""
|
||||||
|
# Check if the path contains node_modules or migrations directory
|
||||||
|
if "node_modules" in path.split(os.sep) or \
|
||||||
|
("migrations" in path.split(os.sep) and path.endswith(".py")):
|
||||||
|
return True
|
||||||
|
return gitignore_spec.match_file(path)
|
||||||
|
|
||||||
def structure_directory_content(input_dir, output_file=None, extensions=None):
|
def structure_directory_content(input_dir, output_file=None, extensions=None):
|
||||||
"""
|
"""
|
||||||
This function goes through the input directory recursively and structures
|
This function goes through the input directory recursively and structures
|
||||||
all the file contents into one output file based on the given extensions.
|
all the file contents into one output file based on the given extensions.
|
||||||
|
|
||||||
:param input_dir: The input directory to search for files.
|
:param input_dir: The input directory to search for files.
|
||||||
:param output_file: The output file where the content will be structured.
|
:param output_file: The output file where the content will be structured.
|
||||||
If None, 'data.txt' or 'data.<extension>' will be used.
|
If None, 'data.txt' or 'data.<extension>' will be used.
|
||||||
|
@ -39,16 +53,18 @@ def structure_directory_content(input_dir, output_file=None, extensions=None):
|
||||||
|
|
||||||
with open(output_file, "w") as outfile:
|
with open(output_file, "w") as outfile:
|
||||||
for root, dirs, files in os.walk(input_dir):
|
for root, dirs, files in os.walk(input_dir):
|
||||||
|
# Filter files and directories using the enhanced ignore check
|
||||||
files = [
|
files = [
|
||||||
f
|
f for f in files
|
||||||
for f in files
|
if not should_ignore_path(os.path.join(root, str(f)), gitignore_spec)
|
||||||
if not gitignore_spec.match_file(os.path.join(root, str(f)))
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Filter directories in-place
|
||||||
dirs[:] = [
|
dirs[:] = [
|
||||||
d
|
d for d in dirs
|
||||||
for d in dirs
|
if not should_ignore_path(os.path.join(root, str(d)), gitignore_spec)
|
||||||
if not gitignore_spec.match_file(os.path.join(root, str(d)))
|
|
||||||
]
|
]
|
||||||
|
|
||||||
for file in files:
|
for file in files:
|
||||||
if extensions is None or any(
|
if extensions is None or any(
|
||||||
file.endswith(f".{ext}") for ext in extensions
|
file.endswith(f".{ext}") for ext in extensions
|
||||||
|
@ -65,7 +81,6 @@ def structure_directory_content(input_dir, output_file=None, extensions=None):
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) == 1:
|
if len(sys.argv) == 1:
|
||||||
input_directory = input("directory path: ")
|
input_directory = input("directory path: ")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user