Files
dumpy/dumpy.py
Daniel Henry fb0548b7d5 Initial Commit
Signed-off-by: Daniel Henry <iamdanhenry@gmail.com>
2026-01-24 15:26:41 -06:00

84 lines
2.4 KiB
Python

from pathlib import Path
import pathspec
import argparse
import mimetypes
def get_file_description(path: Path):
mime_type, _ = mimetypes.guess_type(path)
return mime_type or "Unknown mimetype"
def output_metadata(path: Path, root: Path):
desc = get_file_description(path)
relative_path = path.relative_to(root)
print("#################")
print(f"## Filename: {relative_path}")
print(f"## Mimetype: {desc}")
print("#################")
def get_ignore_spec(root: Path):
gitignore_path = root / ".gitignore"
if gitignore_path.exists():
with gitignore_path.open("r") as f:
return pathspec.PathSpec.from_lines('gitwildmatch', f)
return None
def should_include(path: Path, spec: pathspec.PathSpec | None, root: Path, include_git_dir: bool = False):
relative_path = path.relative_to(root)
if ".git" in path.parts and not include_git_dir:
return False
if spec is None:
return True
return not spec.match_file(str(relative_path))
def walk_filesystem(ignore_gitignore: bool, include_git_dir: bool = False):
root = Path(".")
spec = None
if not ignore_gitignore:
spec = get_ignore_spec(root)
for path in root.rglob("*"):
if path.is_file():
# print the filename (for now)
if should_include(path, spec, root, include_git_dir):
output_metadata(path, root)
success, content = get_file_contents(path)
if success:
print("## File Contents:")
print(content)
print("\n")
def get_file_contents(path: Path) -> tuple[bool, str]:
try:
with path.open("r", encoding="utf-8") as f:
return (True, f.read())
except (UnicodeDecodeError, PermissionError):
return (False, "")
def main():
parser = argparse.ArgumentParser(description="Dumpy: A tool for providing a text representation of a project formatted in a way that LLMs will understand.")
parser.add_argument(
"--no-gitignore",
action="store_true",
help="Ignore the .gitignore file and include all files"
)
parser.add_argument(
"--include-git-dir",
action="store_true",
help="Include the .git directory in the output"
)
args = parser.parse_args()
walk_filesystem(ignore_gitignore=args.no_gitignore)
if __name__ == "__main__":
main()