from pathlib import Path from rich import print as rprint from rich.console import Console from rich.text import Text import pathspec import argparse import mimetypes import pyperclip def get_file_description(path: Path): mime_type, _ = mimetypes.guess_type(path) return mime_type or "Unknown mimetype" def generate_metadata_string(path: Path, root: Path) -> list[str]: desc = get_file_description(path) relative_path = path.relative_to(root) return_data = [] return_data.append("#################") return_data.append(f"## Filename: {relative_path}") return_data.append(f"## Mimetype: {desc}") return_data.append("#################") return return_data def get_ignore_spec(root: Path): gitignore_path = root / ".gitignore" if gitignore_path.exists(): with gitignore_path.open("r") as f: return pathspec.PathSpec.from_lines('gitwildmatch', f) return None def should_include(path: Path, spec: pathspec.PathSpec | None, root: Path, include_git_dir: bool = False, manual_ignores = []): relative_path = path.relative_to(root) if any(part in manual_ignores for part in path.parts): return False if ".git" in path.parts and not include_git_dir: return False if spec is None: return True return not spec.match_file(str(relative_path)) def walk_filesystem(ignore_gitignore: bool, include_git_dir: bool = False, manual_ignores = []) -> tuple[int, list[str]]: root = Path(".") spec = None if not ignore_gitignore: spec = get_ignore_spec(root) final_content: list[str] = [] file_count: int = 0 for path in root.rglob("*"): if path.is_file(): # print the filename (for now) if should_include(path, spec, root, include_git_dir, manual_ignores): final_content += generate_metadata_string(path, root) file_count += 1 success, content = get_file_contents(path) if success: final_content.append("## File Contents:\n```") final_content.append(content) final_content.append("```\n") return (file_count, final_content) def get_file_contents(path: Path) -> tuple[bool, str]: try: with path.open("r", encoding="utf-8") as f: return (True, f.read()) except (UnicodeDecodeError, PermissionError): return (False, "") def main(): parser = argparse.ArgumentParser(description="Dumpy: A tool for providing a text representation of a project formatted in a way that LLMs will understand.") parser.add_argument( "--no-gitignore", action="store_true", help="Ignore the .gitignore file and include all files" ) parser.add_argument( "--include-git-dir", action="store_true", help="Include the .git directory in the output" ) parser.add_argument( "--no-clipboard", action="store_true", help="Skip putting content into the clipboard and ouput directly to the console" ) parser.add_argument( "--no-stats", action="store_true", help="Skip printing stats at the end of the output" ) parser.add_argument( "--ignore", type=str, help="Comma separated list of patterns to ignore." ) args = parser.parse_args() content: list[str] = [] content.append(f"Root Directory: {Path(".").absolute()}\n") manual_ignores = [] if args.ignore: manual_ignores = [item.strip() for item in args.ignore.split(",")] count, content = walk_filesystem(ignore_gitignore=args.no_gitignore, manual_ignores=manual_ignores) string_content = "\n".join(content) if args.no_clipboard: print(string_content) else: pyperclip.copy(string_content) if not args.no_stats: rprint("\n[bold green]Directoy contents copied to clipboard...[/]") if not args.no_clipboard: print() # Fix styling from the "contents copied" message if not args.no_stats: token_estimate = round(len(string_content) / 4) if token_estimate > 3000: tokens_color = "orange3" elif token_estimate > 4000: tokens_color = "red" else: tokens_color = "green" console = Console() # Output stats to the command line rprint(f"[bold blue]Total Files:[/] [white]{count}[/white]") rprint(f"[bold blue]Total Characters:[/] [white]{len(string_content)}[/white]") label = Text("Estimated Tokens: ", style="bold blue") label.append(Text(f"~{token_estimate}", style=tokens_color)) console.print(label + "\n") if __name__ == "__main__": main()