Add YAML config support and Compose deployment example

2026-04-09 21:06:46 +00:00
parent 8d1109c309
commit 3e9904576f
3 changed files with 129 additions and 20 deletions
--- a/app/config.py
+++ b/app/config.py
@@ -2,26 +2,66 @@ from __future__ import annotations
 import os
 from functools import lru_cache
-from typing import Literal
+from pathlib import Path
 from typing import Any, Literal
-from pydantic import BaseModel, Field
+import yaml
 from pydantic import BaseModel
 Provider = Literal["openai", "anthropic"]
 DEFAULT_CONFIG_PATHS = ["config.yml", "config.yaml", "/config/config.yml", "/config/config.yaml"]
 class LLMSettings(BaseModel):
-    provider: Provider = Field(default=os.getenv("LLM_PROVIDER", "openai"))
+    provider: Provider = "openai"
-    api_key: str = Field(default=os.getenv("LLM_API_KEY", "none"))
+    api_key: str = "none"
-    model: str = Field(default=os.getenv("LLM_MODEL", "qwen2.5-7b-instruct.q4_k_m"))
+    model: str = "qwen2.5-7b-instruct.q4_k_m"
-    base_url: str = Field(default=os.getenv("LLM_BASE_URL", "http://ollama.internal.henryhosted.com:9292/v1"))
+    base_url: str = "http://ollama.internal.henryhosted.com:9292/v1"
-    temperature: float = Field(default=float(os.getenv("LLM_TEMPERATURE", "0.1")))
+    temperature: float = 0.1
-    timeout_seconds: float = Field(default=float(os.getenv("LLM_TIMEOUT_SECONDS", "60")))
+    timeout_seconds: float = 60
-    max_retries: int = Field(default=int(os.getenv("LLM_MAX_RETRIES", "3")))
+    max_retries: int = 3
 def _load_yaml_config() -> dict[str, Any]:
    explicit = os.getenv("EMAIL_CLASSIFIER_CONFIG") or os.getenv("APP_CONFIG_FILE")
    candidates = [explicit] if explicit else DEFAULT_CONFIG_PATHS
    for candidate in candidates:
        if not candidate:
            continue
        path = Path(candidate)
        if not path.exists() or not path.is_file():
            continue
        data = yaml.safe_load(path.read_text()) or {}
        if not isinstance(data, dict):
            raise ValueError(f"Config file must contain a mapping/object: {path}")
        llm = data.get("llm", data)
        if not isinstance(llm, dict):
            raise ValueError(f"LLM config must be a mapping/object: {path}")
        return llm
    return {}
 def _env_or_yaml(env_name: str, yaml_data: dict[str, Any], yaml_key: str, default: Any) -> Any:
    value = os.getenv(env_name)
    if value is not None:
        return value
    if yaml_key in yaml_data and yaml_data[yaml_key] is not None:
        return yaml_data[yaml_key]
    return default
@lru_cache(maxsize=1)
 def get_settings() -> LLMSettings:
-    return LLMSettings()
+    yaml_data = _load_yaml_config()
    return LLMSettings(
        provider=_env_or_yaml("LLM_PROVIDER", yaml_data, "provider", "openai"),
        api_key=_env_or_yaml("LLM_API_KEY", yaml_data, "api_key", "none"),
        model=_env_or_yaml("LLM_MODEL", yaml_data, "model", "qwen2.5-7b-instruct.q4_k_m"),
        base_url=_env_or_yaml("LLM_BASE_URL", yaml_data, "base_url", "http://ollama.internal.henryhosted.com:9292/v1"),
        temperature=float(_env_or_yaml("LLM_TEMPERATURE", yaml_data, "temperature", 0.1)),
        timeout_seconds=float(_env_or_yaml("LLM_TIMEOUT_SECONDS", yaml_data, "timeout_seconds", 60)),
        max_retries=int(_env_or_yaml("LLM_MAX_RETRIES", yaml_data, "max_retries", 3)),
    )
 def get_request_settings(
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -4,6 +4,43 @@
 The service ships with a `Dockerfile` based on `python:3.12-slim-bookworm` using [uv](https://astral.sh/uv/) for fast dependency installation.
 ### Configuration sources
 The application now supports two configuration sources:
 - environment variables
 - a YAML config file
 Load order:
 1. per-request overrides
 2. environment variables
 3. YAML config file
 4. built-in defaults
 Supported config file locations:
 - `config.yml`
 - `config.yaml`
 - `/config/config.yml`
 - `/config/config.yaml`
 You can also set an explicit config path with:
 ```bash
 export EMAIL_CLASSIFIER_CONFIG=/path/to/config.yml
 ```
 Example `config.yml`:
 ```yaml
 llm:
  provider: anthropic
  base_url: https://api.minimax.io/anthropic
  api_key: your_api_key_here
  model: MiniMax-M2.7
  temperature: 0.1
  timeout_seconds: 60
  max_retries: 3
 ```
 ### Building
 ```bash
@@ -15,19 +52,50 @@ docker build -t email-classifier .
 ```bash
 docker run -d --name email-classifier \
  -p 7999:7999 \
-  -e LLM_PROVIDER=openai \
+  -e EMAIL_CLASSIFIER_CONFIG=/config/config.yml \
  -e LLM_BASE_URL=http://your-ollama:11434/v1 \
  -e LLM_API_KEY=none \
  -e LLM_MODEL=qwen2.5-7b-instruct.q4_k_m \
  -e LLM_TEMPERATURE=0.1 \
  -e EMAIL_CLASSIFIER_DB_PATH=/data/email_classifier.db \
  -v /path/to/config.yml:/config/config.yml:ro \
  -v /path/to/local/data:/data \
  email-classifier
 ```
 Mount a persistent volume for `/data` (or wherever `EMAIL_CLASSIFIER_DB_PATH` points) to preserve the dedupe database across container restarts.
-### Building for a Remote Registry
+Environment variables still override file-based config, so you can keep most settings in YAML and override just one or two values at deploy time.
 ## Docker Compose example
 ```yaml
 services:
  email-classifier:
    image: your-registry.example.com/your-org/email-classifier:latest
    container_name: email-classifier
    ports:
      - "7999:7999"
    environment:
      EMAIL_CLASSIFIER_CONFIG: /config/config.yml
      EMAIL_CLASSIFIER_DB_PATH: /data/email_classifier.db
      # Optional overrides. Env vars win over YAML values.
      # LLM_MODEL: MiniMax-M2.7
      # LLM_TIMEOUT_SECONDS: "90"
    volumes:
      - ./config.yml:/config/config.yml:ro
      - ./data:/data
    restart: unless-stopped
    # If your LLM backend runs on the Docker host, one option is:
    # extra_hosts:
    #   - "host.docker.internal:host-gateway"
 ```
 ### Compose notes
 - Mount the YAML config read-only into the container, typically at `/config/config.yml`
 - Mount a writable volume for `/data` so dedupe state survives restarts
 - Override specific values with environment variables when needed
 - If the LLM backend is another container on the same Compose network, use its service name in `base_url`
 - If the LLM backend runs on the host, use `host.docker.internal` or a host-gateway mapping where appropriate
 ## Building for a Remote Registry
 ```bash
 docker build -t \
@@ -57,16 +125,16 @@ The workflow tags the image as:
 ### Deployment Considerations
- **Network access** — The container needs to reach your LLM backend. If using Ollama on the host, use `host.docker.internal` (Linux) or `docker.for.mac.localhost` (macOS) as the base URL.
+- **Network access** — The container needs to reach your LLM backend. If using Ollama or another service on the host, use `host.docker.internal` or an explicit host-gateway mapping.
 - **Dedupe persistence** — Mount a volume for the SQLite database to persist dedupe state across deploys.
 - **Port** — The container exposes port `7999`. Map it to any host port you prefer.
 - **Health check** — The service does not currently expose a dedicated `/health` endpoint. Use `GET /docs` as a liveness probe.
 ## Production Checklist
- [ ] Set `LLM_API_KEY` to a real key (not `none`) in production
+- [ ] Provide either a YAML config file or the required `LLM_*` environment variables
- [ ] Use HTTPS for `LLM_BASE_URL` in production
+- [ ] Use HTTPS for remote `LLM_BASE_URL` values in production
 - [ ] Mount a persistent volume for `EMAIL_CLASSIFIER_DB_PATH`
 - [ ] Set appropriate resource limits (CPU/memory) on the container
 - [ ] Configure `LLM_MAX_RETRIES` and `LLM_TIMEOUT_SECONDS` to suit your LLM backend's reliability
- [ ] Set `LLM_TEMPERATURE=0.1` (or similar low value) for consistent classification results
+- [ ] Keep `LLM_TEMPERATURE` low for consistent classification results
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,5 +9,6 @@ dependencies = [
    "beautifulsoup4>=4.14.3",
    "fastapi>=0.128.0",
    "openai>=2.16.0",
    "PyYAML>=6.0.2",
    "uvicorn>=0.40.0",
 ]