from pathlib import Path import pathspec import argparse import mimetypes def get_file_description(path: Path): mime_type, _ = mimetypes.guess_type(path) return mime_type or "Unknown mimetype" def output_metadata(path: Path, root: Path): desc = get_file_description(path) relative_path = path.relative_to(root) print("#################") print(f"## Filename: {relative_path}") print(f"## Mimetype: {desc}") print("#################") def get_ignore_spec(root: Path): gitignore_path = root / ".gitignore" if gitignore_path.exists(): with gitignore_path.open("r") as f: return pathspec.PathSpec.from_lines('gitwildmatch', f) return None def should_include(path: Path, spec: pathspec.PathSpec | None, root: Path, include_git_dir: bool = False): relative_path = path.relative_to(root) if ".git" in path.parts and not include_git_dir: return False if spec is None: return True return not spec.match_file(str(relative_path)) def walk_filesystem(ignore_gitignore: bool, include_git_dir: bool = False): root = Path(".") spec = None if not ignore_gitignore: spec = get_ignore_spec(root) for path in root.rglob("*"): if path.is_file(): # print the filename (for now) if should_include(path, spec, root, include_git_dir): output_metadata(path, root) success, content = get_file_contents(path) if success: print("## File Contents:") print(content) print("\n") def get_file_contents(path: Path) -> tuple[bool, str]: try: with path.open("r", encoding="utf-8") as f: return (True, f.read()) except (UnicodeDecodeError, PermissionError): return (False, "") def main(): parser = argparse.ArgumentParser(description="Dumpy: A tool for providing a text representation of a project formatted in a way that LLMs will understand.") parser.add_argument( "--no-gitignore", action="store_true", help="Ignore the .gitignore file and include all files" ) parser.add_argument( "--include-git-dir", action="store_true", help="Include the .git directory in the output" ) args = parser.parse_args() walk_filesystem(ignore_gitignore=args.no_gitignore) if __name__ == "__main__": main()