from pathlib import Path import pathspec import argparse import mimetypes import pyperclip def get_file_description(path: Path): mime_type, _ = mimetypes.guess_type(path) return mime_type or "Unknown mimetype" def generate_metadata_string(path: Path, root: Path) -> list[str]: desc = get_file_description(path) relative_path = path.relative_to(root) return_data = [] return_data.append("#################") return_data.append(f"## Filename: {relative_path}") return_data.append(f"## Mimetype: {desc}") return_data.append("#################") return return_data def get_ignore_spec(root: Path): gitignore_path = root / ".gitignore" if gitignore_path.exists(): with gitignore_path.open("r") as f: return pathspec.PathSpec.from_lines('gitwildmatch', f) return None def should_include(path: Path, spec: pathspec.PathSpec | None, root: Path, include_git_dir: bool = False): relative_path = path.relative_to(root) if ".git" in path.parts and not include_git_dir: return False if spec is None: return True return not spec.match_file(str(relative_path)) def walk_filesystem(ignore_gitignore: bool, include_git_dir: bool = False) -> list[str]: root = Path(".") spec = None if not ignore_gitignore: spec = get_ignore_spec(root) final_content: list[str] = [] for path in root.rglob("*"): if path.is_file(): # print the filename (for now) if should_include(path, spec, root, include_git_dir): final_content += generate_metadata_string(path, root) success, content = get_file_contents(path) if success: final_content.append("## File Contents:\n```") final_content.append(content) final_content.append("```\n") return final_content def get_file_contents(path: Path) -> tuple[bool, str]: try: with path.open("r", encoding="utf-8") as f: return (True, f.read()) except (UnicodeDecodeError, PermissionError): return (False, "") def main(): parser = argparse.ArgumentParser(description="Dumpy: A tool for providing a text representation of a project formatted in a way that LLMs will understand.") parser.add_argument( "--no-gitignore", action="store_true", help="Ignore the .gitignore file and include all files" ) parser.add_argument( "--include-git-dir", action="store_true", help="Include the .git directory in the output" ) parser.add_argument( "--no-clipboard", action="store_true", help="Skip putting content into the clipboard and ouput directly to the console" ) args = parser.parse_args() content: list[str] = [] content.append(f"Root Directory: {Path(".").absolute()}\n") content = walk_filesystem(ignore_gitignore=args.no_gitignore) if args.no_clipboard: print("\n".join(content)) else: pyperclip.copy("\n".join(content)) if not content: return if __name__ == "__main__": main()