Fix bug with multiple "source" outputs in chats with multiple messages

Signed-off-by: Daniel Henry <iamdanhenry@gmail.com>
2026-01-20 23:37:51 -06:00
parent 9f8c654019
commit 3795a441f6
1 changed files with 25 additions and 6 deletions
--- a/ObsidianRAGPipe.py
+++ b/ObsidianRAGPipe.py
@@ -1,8 +1,7 @@
 """
-title: Obsidian RAG
-author: Daniel
-version: 6.0
-required_open_webui_version: 0.3.9
+title: Obsidian RAG Pipeline
+author: Daniel Henry
+version: 0.15
 """

 import asyncio
@@ -45,7 +44,7 @@ class Pipe:
        )

        # LLM
-        embedding_model: str = Field(default="nomic-embed-text")
+        embedding_model: str = Field(default="nomic-embed-text:latest")
        llm_model: str = Field(default="llama3.2:3b")
        llm_context_size: int = Field(default=8192)
        llm_timeout: float = Field(default=300.0)
@@ -177,7 +176,11 @@ Rewrite the question to be standalone (respond with ONLY the rewritten question,
        try:
            async with session.post(
                f"{self.valves.ollama_url}/api/embeddings",
-                json={"model": self.valves.embedding_model, "prompt": search_query},
+                json={
+                    "model": self.valves.embedding_model,
+                    "prompt": search_query,
+                    "options": {"num_ctx": 8192},
+                },
                timeout=aiohttp.ClientTimeout(total=15),
            ) as resp:
                if resp.status != 200:
@@ -374,6 +377,22 @@ Rewrite the question to be standalone (respond with ONLY the rewritten question,
        # Only keep user/assistant messages
        conversation = [m for m in messages if m.get("role") in ("user", "assistant")]

+        # UPDATED: Robustly strip previous "Sources" to prevent pattern matching
+        conversation = []
+        for m in messages:
+            if m.get("role") not in ("user", "assistant"):
+                continue
+
+            msg = m.copy()
+            if msg["role"] == "assistant":
+                content = msg.get("content", "")
+                # Split on "**Sources:**" which is the visible header.
+                # This catches it even if the newlines/separators are slightly different.
+                if "**Sources:**" in content:
+                    msg["content"] = content.split("**Sources:**")[0].strip()
+
+            conversation.append(msg)
+
        llm_payload = {
            "model": self.valves.llm_model,
            "messages": [