Fix bug with multiple "source" outputs in chats with multiple messages

Signed-off-by: Daniel Henry <iamdanhenry@gmail.com>
2026-01-20 23:37:51 -06:00
parent 9f8c654019
commit 3795a441f6
1 changed files with 25 additions and 6 deletions
--- a/ObsidianRAGPipe.py
+++ b/ObsidianRAGPipe.py
@@ -1,8 +1,7 @@
 """
-title: Obsidian RAG
+title: Obsidian RAG Pipeline
-author: Daniel
+author: Daniel Henry
-version: 6.0
+version: 0.15
 required_open_webui_version: 0.3.9
 """
 import asyncio
@@ -45,7 +44,7 @@ class Pipe:
        )
        # LLM
-        embedding_model: str = Field(default="nomic-embed-text")
+        embedding_model: str = Field(default="nomic-embed-text:latest")
        llm_model: str = Field(default="llama3.2:3b")
        llm_context_size: int = Field(default=8192)
        llm_timeout: float = Field(default=300.0)
@@ -177,7 +176,11 @@ Rewrite the question to be standalone (respond with ONLY the rewritten question,
        try:
            async with session.post(
                f"{self.valves.ollama_url}/api/embeddings",
-                json={"model": self.valves.embedding_model, "prompt": search_query},
+                json={
                    "model": self.valves.embedding_model,
                    "prompt": search_query,
                    "options": {"num_ctx": 8192},
                },
                timeout=aiohttp.ClientTimeout(total=15),
            ) as resp:
                if resp.status != 200:
@@ -374,6 +377,22 @@ Rewrite the question to be standalone (respond with ONLY the rewritten question,
        # Only keep user/assistant messages
        conversation = [m for m in messages if m.get("role") in ("user", "assistant")]
        # UPDATED: Robustly strip previous "Sources" to prevent pattern matching
        conversation = []
        for m in messages:
            if m.get("role") not in ("user", "assistant"):
                continue
            msg = m.copy()
            if msg["role"] == "assistant":
                content = msg.get("content", "")
                # Split on "**Sources:**" which is the visible header.
                # This catches it even if the newlines/separators are slightly different.
                if "**Sources:**" in content:
                    msg["content"] = content.split("**Sources:**")[0].strip()
            conversation.append(msg)
        llm_payload = {
            "model": self.valves.llm_model,
            "messages": [