Files
dumpy/dumpy.py
2026-01-24 18:11:23 -06:00

108 lines
3.1 KiB
Python

from pathlib import Path
import pathspec
import argparse
import mimetypes
import pyperclip
def get_file_description(path: Path):
mime_type, _ = mimetypes.guess_type(path)
return mime_type or "Unknown mimetype"
def generate_metadata_string(path: Path, root: Path) -> list[str]:
desc = get_file_description(path)
relative_path = path.relative_to(root)
return_data = []
return_data.append("#################")
return_data.append(f"## Filename: {relative_path}")
return_data.append(f"## Mimetype: {desc}")
return_data.append("#################")
return return_data
def get_ignore_spec(root: Path):
gitignore_path = root / ".gitignore"
if gitignore_path.exists():
with gitignore_path.open("r") as f:
return pathspec.PathSpec.from_lines('gitwildmatch', f)
return None
def should_include(path: Path, spec: pathspec.PathSpec | None, root: Path, include_git_dir: bool = False):
relative_path = path.relative_to(root)
if ".git" in path.parts and not include_git_dir:
return False
if spec is None:
return True
return not spec.match_file(str(relative_path))
def walk_filesystem(ignore_gitignore: bool, include_git_dir: bool = False) -> list[str]:
root = Path(".")
spec = None
if not ignore_gitignore:
spec = get_ignore_spec(root)
final_content: list[str] = []
for path in root.rglob("*"):
if path.is_file():
# print the filename (for now)
if should_include(path, spec, root, include_git_dir):
final_content += generate_metadata_string(path, root)
success, content = get_file_contents(path)
if success:
final_content.append("## File Contents:\n```")
final_content.append(content)
final_content.append("```\n")
return final_content
def get_file_contents(path: Path) -> tuple[bool, str]:
try:
with path.open("r", encoding="utf-8") as f:
return (True, f.read())
except (UnicodeDecodeError, PermissionError):
return (False, "")
def main():
parser = argparse.ArgumentParser(description="Dumpy: A tool for providing a text representation of a project formatted in a way that LLMs will understand.")
parser.add_argument(
"--no-gitignore",
action="store_true",
help="Ignore the .gitignore file and include all files"
)
parser.add_argument(
"--include-git-dir",
action="store_true",
help="Include the .git directory in the output"
)
parser.add_argument(
"--no-clipboard",
action="store_true",
help="Skip putting content into the clipboard and ouput directly to the console"
)
args = parser.parse_args()
content: list[str] = []
content.append(f"Root Directory: {Path(".").absolute()}\n")
content = walk_filesystem(ignore_gitignore=args.no_gitignore)
if args.no_clipboard:
print("\n".join(content))
else:
pyperclip.copy("\n".join(content))
if not content:
return
if __name__ == "__main__":
main()