Fix bug with multiple "source" outputs in chats with multiple messages

Signed-off-by: Daniel Henry <iamdanhenry@gmail.com>
This commit is contained in:
2026-01-20 23:37:51 -06:00
parent 9f8c654019
commit 3795a441f6

View File

@@ -1,8 +1,7 @@
""" """
title: Obsidian RAG title: Obsidian RAG Pipeline
author: Daniel author: Daniel Henry
version: 6.0 version: 0.15
required_open_webui_version: 0.3.9
""" """
import asyncio import asyncio
@@ -45,7 +44,7 @@ class Pipe:
) )
# LLM # LLM
embedding_model: str = Field(default="nomic-embed-text") embedding_model: str = Field(default="nomic-embed-text:latest")
llm_model: str = Field(default="llama3.2:3b") llm_model: str = Field(default="llama3.2:3b")
llm_context_size: int = Field(default=8192) llm_context_size: int = Field(default=8192)
llm_timeout: float = Field(default=300.0) llm_timeout: float = Field(default=300.0)
@@ -177,7 +176,11 @@ Rewrite the question to be standalone (respond with ONLY the rewritten question,
try: try:
async with session.post( async with session.post(
f"{self.valves.ollama_url}/api/embeddings", f"{self.valves.ollama_url}/api/embeddings",
json={"model": self.valves.embedding_model, "prompt": search_query}, json={
"model": self.valves.embedding_model,
"prompt": search_query,
"options": {"num_ctx": 8192},
},
timeout=aiohttp.ClientTimeout(total=15), timeout=aiohttp.ClientTimeout(total=15),
) as resp: ) as resp:
if resp.status != 200: if resp.status != 200:
@@ -374,6 +377,22 @@ Rewrite the question to be standalone (respond with ONLY the rewritten question,
# Only keep user/assistant messages # Only keep user/assistant messages
conversation = [m for m in messages if m.get("role") in ("user", "assistant")] conversation = [m for m in messages if m.get("role") in ("user", "assistant")]
# UPDATED: Robustly strip previous "Sources" to prevent pattern matching
conversation = []
for m in messages:
if m.get("role") not in ("user", "assistant"):
continue
msg = m.copy()
if msg["role"] == "assistant":
content = msg.get("content", "")
# Split on "**Sources:**" which is the visible header.
# This catches it even if the newlines/separators are slightly different.
if "**Sources:**" in content:
msg["content"] = content.split("**Sources:**")[0].strip()
conversation.append(msg)
llm_payload = { llm_payload = {
"model": self.valves.llm_model, "model": self.valves.llm_model,
"messages": [ "messages": [