147 lines
4.3 KiB
Python
147 lines
4.3 KiB
Python
from pathlib import Path
|
|
from rich import print as rprint
|
|
from rich.console import Console
|
|
from rich.text import Text
|
|
|
|
|
|
import pathspec
|
|
import argparse
|
|
import mimetypes
|
|
import pyperclip
|
|
|
|
def get_file_description(path: Path):
|
|
mime_type, _ = mimetypes.guess_type(path)
|
|
return mime_type or "Unknown mimetype"
|
|
|
|
def generate_metadata_string(path: Path, root: Path) -> list[str]:
|
|
desc = get_file_description(path)
|
|
relative_path = path.relative_to(root)
|
|
return_data = []
|
|
return_data.append("#################")
|
|
return_data.append(f"## Filename: {relative_path}")
|
|
return_data.append(f"## Mimetype: {desc}")
|
|
return_data.append("#################")
|
|
|
|
return return_data
|
|
|
|
def get_ignore_spec(root: Path):
|
|
gitignore_path = root / ".gitignore"
|
|
if gitignore_path.exists():
|
|
with gitignore_path.open("r") as f:
|
|
return pathspec.PathSpec.from_lines('gitwildmatch', f)
|
|
return None
|
|
|
|
def should_include(path: Path, spec: pathspec.PathSpec | None, root: Path, include_git_dir: bool = False):
|
|
relative_path = path.relative_to(root)
|
|
if ".git" in path.parts and not include_git_dir:
|
|
return False
|
|
|
|
if spec is None:
|
|
return True
|
|
|
|
return not spec.match_file(str(relative_path))
|
|
|
|
def walk_filesystem(ignore_gitignore: bool, include_git_dir: bool = False) -> tuple[int, list[str]]:
|
|
root = Path(".")
|
|
|
|
spec = None
|
|
if not ignore_gitignore:
|
|
spec = get_ignore_spec(root)
|
|
|
|
final_content: list[str] = []
|
|
file_count: int = 0
|
|
|
|
for path in root.rglob("*"):
|
|
if path.is_file():
|
|
# print the filename (for now)
|
|
if should_include(path, spec, root, include_git_dir):
|
|
final_content += generate_metadata_string(path, root)
|
|
file_count += 1
|
|
success, content = get_file_contents(path)
|
|
if success:
|
|
final_content.append("## File Contents:\n```")
|
|
final_content.append(content)
|
|
final_content.append("```\n")
|
|
return (file_count, final_content)
|
|
|
|
def get_file_contents(path: Path) -> tuple[bool, str]:
|
|
|
|
try:
|
|
with path.open("r", encoding="utf-8") as f:
|
|
return (True, f.read())
|
|
except (UnicodeDecodeError, PermissionError):
|
|
return (False, "")
|
|
|
|
def main():
|
|
|
|
parser = argparse.ArgumentParser(description="Dumpy: A tool for providing a text representation of a project formatted in a way that LLMs will understand.")
|
|
|
|
parser.add_argument(
|
|
"--no-gitignore",
|
|
action="store_true",
|
|
help="Ignore the .gitignore file and include all files"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--include-git-dir",
|
|
action="store_true",
|
|
help="Include the .git directory in the output"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--no-clipboard",
|
|
action="store_true",
|
|
help="Skip putting content into the clipboard and ouput directly to the console"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--no-stats",
|
|
action="store_true",
|
|
help="Skip printing stats at the end of the output"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
content: list[str] = []
|
|
|
|
content.append(f"Root Directory: {Path(".").absolute()}\n")
|
|
|
|
count, content = walk_filesystem(ignore_gitignore=args.no_gitignore)
|
|
string_content = "\n".join(content)
|
|
if args.no_clipboard:
|
|
print(string_content)
|
|
else:
|
|
pyperclip.copy(string_content)
|
|
|
|
if not args.no_stats:
|
|
rprint("\n[bold green]Directoy contents copied to clipboard...[/]")
|
|
|
|
if not args.no_clipboard:
|
|
print() # Fix styling from the "contents copied" message
|
|
|
|
if not args.no_stats:
|
|
token_estimate = round(len(string_content) / 4)
|
|
|
|
if token_estimate > 3000:
|
|
tokens_color = "orange3"
|
|
elif token_estimate > 4000:
|
|
tokens_color = "red"
|
|
else:
|
|
tokens_color = "green"
|
|
|
|
|
|
console = Console()
|
|
|
|
# Output stats to the command line
|
|
rprint(f"[bold blue]Total Files:[/] [white]{count}[/white]")
|
|
rprint(f"[bold blue]Total Characters:[/] [white]{len(string_content)}[/white]")
|
|
|
|
label = Text("Estimated Tokens: ", style="bold blue")
|
|
label.append(Text(f"~{token_estimate}", style=tokens_color))
|
|
|
|
console.print(label + "\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|