from __future__ import annotations import base64 import json import respx from httpx import Response from notebook_tools.llama_client import LlamaClient @respx.mock async def test_llama_client_parses_openai_style_response() -> None: # Arrange: mock llama endpoint route = respx.post("http://llama.local/v1/chat/completions").mock( return_value=Response(200, json={"choices": [{"message": {"content": "Hello\nWorld"}}]}) ) client = LlamaClient(base_url="http://llama.local", model="m") # Act out = await client.ocr_jpeg(jpeg_bytes=b"\xff\xd8\xff\xe0fakejpeg") # Assert assert out == "Hello\nWorld" assert route.called # Optional: sanity-check that we really sent a base64 data URL sent = json.loads(route.calls[0].request.content.decode("utf-8")) url = sent["messages"][0]["content"][1]["image_url"]["url"] assert url.startswith("data:image/jpeg;base64,") b64 = url.split(",", 1)[1] # If this fails, our payload construction changed. base64.b64decode(b64)