From 3e9904576fa968e59e1ae043c332379c1ee092db Mon Sep 17 00:00:00 2001
From: Steve W <steve-w@local>
Date: Thu, 9 Apr 2026 21:06:46 +0000
Subject: [PATCH] Add YAML config support and Compose deployment example

---
 app/config.py      | 60 +++++++++++++++++++++++++------
 docs/deployment.md | 88 ++++++++++++++++++++++++++++++++++++++++------
 pyproject.toml     |  1 +
 3 files changed, 129 insertions(+), 20 deletions(-)

diff --git a/app/config.py b/app/config.py
index b2d1d87..82b2951 100644
--- a/app/config.py
+++ b/app/config.py
@@ -2,26 +2,66 @@ from __future__ import annotations
 
 import os
 from functools import lru_cache
-from typing import Literal
+from pathlib import Path
+from typing import Any, Literal
 
-from pydantic import BaseModel, Field
+import yaml
+from pydantic import BaseModel
 
 Provider = Literal["openai", "anthropic"]
+DEFAULT_CONFIG_PATHS = ["config.yml", "config.yaml", "/config/config.yml", "/config/config.yaml"]
 
 
 class LLMSettings(BaseModel):
-    provider: Provider = Field(default=os.getenv("LLM_PROVIDER", "openai"))
-    api_key: str = Field(default=os.getenv("LLM_API_KEY", "none"))
-    model: str = Field(default=os.getenv("LLM_MODEL", "qwen2.5-7b-instruct.q4_k_m"))
-    base_url: str = Field(default=os.getenv("LLM_BASE_URL", "http://ollama.internal.henryhosted.com:9292/v1"))
-    temperature: float = Field(default=float(os.getenv("LLM_TEMPERATURE", "0.1")))
-    timeout_seconds: float = Field(default=float(os.getenv("LLM_TIMEOUT_SECONDS", "60")))
-    max_retries: int = Field(default=int(os.getenv("LLM_MAX_RETRIES", "3")))
+    provider: Provider = "openai"
+    api_key: str = "none"
+    model: str = "qwen2.5-7b-instruct.q4_k_m"
+    base_url: str = "http://ollama.internal.henryhosted.com:9292/v1"
+    temperature: float = 0.1
+    timeout_seconds: float = 60
+    max_retries: int = 3
+
+
+def _load_yaml_config() -> dict[str, Any]:
+    explicit = os.getenv("EMAIL_CLASSIFIER_CONFIG") or os.getenv("APP_CONFIG_FILE")
+    candidates = [explicit] if explicit else DEFAULT_CONFIG_PATHS
+    for candidate in candidates:
+        if not candidate:
+            continue
+        path = Path(candidate)
+        if not path.exists() or not path.is_file():
+            continue
+        data = yaml.safe_load(path.read_text()) or {}
+        if not isinstance(data, dict):
+            raise ValueError(f"Config file must contain a mapping/object: {path}")
+        llm = data.get("llm", data)
+        if not isinstance(llm, dict):
+            raise ValueError(f"LLM config must be a mapping/object: {path}")
+        return llm
+    return {}
+
+
+def _env_or_yaml(env_name: str, yaml_data: dict[str, Any], yaml_key: str, default: Any) -> Any:
+    value = os.getenv(env_name)
+    if value is not None:
+        return value
+    if yaml_key in yaml_data and yaml_data[yaml_key] is not None:
+        return yaml_data[yaml_key]
+    return default
 
 
 @lru_cache(maxsize=1)
 def get_settings() -> LLMSettings:
-    return LLMSettings()
+    yaml_data = _load_yaml_config()
+    return LLMSettings(
+        provider=_env_or_yaml("LLM_PROVIDER", yaml_data, "provider", "openai"),
+        api_key=_env_or_yaml("LLM_API_KEY", yaml_data, "api_key", "none"),
+        model=_env_or_yaml("LLM_MODEL", yaml_data, "model", "qwen2.5-7b-instruct.q4_k_m"),
+        base_url=_env_or_yaml("LLM_BASE_URL", yaml_data, "base_url", "http://ollama.internal.henryhosted.com:9292/v1"),
+        temperature=float(_env_or_yaml("LLM_TEMPERATURE", yaml_data, "temperature", 0.1)),
+        timeout_seconds=float(_env_or_yaml("LLM_TIMEOUT_SECONDS", yaml_data, "timeout_seconds", 60)),
+        max_retries=int(_env_or_yaml("LLM_MAX_RETRIES", yaml_data, "max_retries", 3)),
+    )
 
 
 def get_request_settings(
diff --git a/docs/deployment.md b/docs/deployment.md
index dcfa49a..19984bc 100644
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -4,6 +4,43 @@
 
 The service ships with a `Dockerfile` based on `python:3.12-slim-bookworm` using [uv](https://astral.sh/uv/) for fast dependency installation.
 
+### Configuration sources
+
+The application now supports two configuration sources:
+- environment variables
+- a YAML config file
+
+Load order:
+1. per-request overrides
+2. environment variables
+3. YAML config file
+4. built-in defaults
+
+Supported config file locations:
+- `config.yml`
+- `config.yaml`
+- `/config/config.yml`
+- `/config/config.yaml`
+
+You can also set an explicit config path with:
+
+```bash
+export EMAIL_CLASSIFIER_CONFIG=/path/to/config.yml
+```
+
+Example `config.yml`:
+
+```yaml
+llm:
+  provider: anthropic
+  base_url: https://api.minimax.io/anthropic
+  api_key: your_api_key_here
+  model: MiniMax-M2.7
+  temperature: 0.1
+  timeout_seconds: 60
+  max_retries: 3
+```
+
 ### Building
 
 ```bash
@@ -15,19 +52,50 @@ docker build -t email-classifier .
 ```bash
 docker run -d --name email-classifier \
   -p 7999:7999 \
-  -e LLM_PROVIDER=openai \
-  -e LLM_BASE_URL=http://your-ollama:11434/v1 \
-  -e LLM_API_KEY=none \
-  -e LLM_MODEL=qwen2.5-7b-instruct.q4_k_m \
-  -e LLM_TEMPERATURE=0.1 \
+  -e EMAIL_CLASSIFIER_CONFIG=/config/config.yml \
   -e EMAIL_CLASSIFIER_DB_PATH=/data/email_classifier.db \
+  -v /path/to/config.yml:/config/config.yml:ro \
   -v /path/to/local/data:/data \
   email-classifier
 ```
 
 Mount a persistent volume for `/data` (or wherever `EMAIL_CLASSIFIER_DB_PATH` points) to preserve the dedupe database across container restarts.
 
-### Building for a Remote Registry
+Environment variables still override file-based config, so you can keep most settings in YAML and override just one or two values at deploy time.
+
+## Docker Compose example
+
+```yaml
+services:
+  email-classifier:
+    image: your-registry.example.com/your-org/email-classifier:latest
+    container_name: email-classifier
+    ports:
+      - "7999:7999"
+    environment:
+      EMAIL_CLASSIFIER_CONFIG: /config/config.yml
+      EMAIL_CLASSIFIER_DB_PATH: /data/email_classifier.db
+      # Optional overrides. Env vars win over YAML values.
+      # LLM_MODEL: MiniMax-M2.7
+      # LLM_TIMEOUT_SECONDS: "90"
+    volumes:
+      - ./config.yml:/config/config.yml:ro
+      - ./data:/data
+    restart: unless-stopped
+    # If your LLM backend runs on the Docker host, one option is:
+    # extra_hosts:
+    #   - "host.docker.internal:host-gateway"
+```
+
+### Compose notes
+
+- Mount the YAML config read-only into the container, typically at `/config/config.yml`
+- Mount a writable volume for `/data` so dedupe state survives restarts
+- Override specific values with environment variables when needed
+- If the LLM backend is another container on the same Compose network, use its service name in `base_url`
+- If the LLM backend runs on the host, use `host.docker.internal` or a host-gateway mapping where appropriate
+
+## Building for a Remote Registry
 
 ```bash
 docker build -t \
@@ -57,16 +125,16 @@ The workflow tags the image as:
 
 ### Deployment Considerations
 
-- **Network access** — The container needs to reach your LLM backend. If using Ollama on the host, use `host.docker.internal` (Linux) or `docker.for.mac.localhost` (macOS) as the base URL.
+- **Network access** — The container needs to reach your LLM backend. If using Ollama or another service on the host, use `host.docker.internal` or an explicit host-gateway mapping.
 - **Dedupe persistence** — Mount a volume for the SQLite database to persist dedupe state across deploys.
 - **Port** — The container exposes port `7999`. Map it to any host port you prefer.
 - **Health check** — The service does not currently expose a dedicated `/health` endpoint. Use `GET /docs` as a liveness probe.
 
 ## Production Checklist
 
-- [ ] Set `LLM_API_KEY` to a real key (not `none`) in production
-- [ ] Use HTTPS for `LLM_BASE_URL` in production
+- [ ] Provide either a YAML config file or the required `LLM_*` environment variables
+- [ ] Use HTTPS for remote `LLM_BASE_URL` values in production
 - [ ] Mount a persistent volume for `EMAIL_CLASSIFIER_DB_PATH`
 - [ ] Set appropriate resource limits (CPU/memory) on the container
 - [ ] Configure `LLM_MAX_RETRIES` and `LLM_TIMEOUT_SECONDS` to suit your LLM backend's reliability
-- [ ] Set `LLM_TEMPERATURE=0.1` (or similar low value) for consistent classification results
+- [ ] Keep `LLM_TEMPERATURE` low for consistent classification results
diff --git a/pyproject.toml b/pyproject.toml
index 17c2c0b..8ac5fc9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,5 +9,6 @@ dependencies = [
     "beautifulsoup4>=4.14.3",
     "fastapi>=0.128.0",
     "openai>=2.16.0",
+    "PyYAML>=6.0.2",
     "uvicorn>=0.40.0",
 ]