from __future__ import annotations import os from functools import lru_cache from typing import Literal from pydantic import BaseModel, Field Provider = Literal["openai", "anthropic"] class LLMSettings(BaseModel): provider: Provider = Field(default=os.getenv("LLM_PROVIDER", "openai")) api_key: str = Field(default=os.getenv("LLM_API_KEY", "none")) model: str = Field(default=os.getenv("LLM_MODEL", "qwen2.5-7b-instruct.q4_k_m")) base_url: str = Field(default=os.getenv("LLM_BASE_URL", "http://ollama.internal.henryhosted.com:9292/v1")) temperature: float = Field(default=float(os.getenv("LLM_TEMPERATURE", "0.1"))) timeout_seconds: float = Field(default=float(os.getenv("LLM_TIMEOUT_SECONDS", "60"))) max_retries: int = Field(default=int(os.getenv("LLM_MAX_RETRIES", "3"))) @lru_cache(maxsize=1) def get_settings() -> LLMSettings: return LLMSettings() def get_request_settings( provider: str | None = None, model: str | None = None, base_url: str | None = None, api_key: str | None = None, temperature: float | None = None, ) -> LLMSettings: base = get_settings() data = base.model_dump() if provider is not None: data["provider"] = provider if model is not None: data["model"] = model if base_url is not None: data["base_url"] = base_url if api_key is not None: data["api_key"] = api_key if temperature is not None: data["temperature"] = temperature return LLMSettings(**data)