Compare commits
1 Commits
ab14d55824
...
ci/update-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
97abc74297 |
78
.github/workflows/build-publish.yaml
vendored
78
.github/workflows/build-publish.yaml
vendored
@@ -3,11 +3,38 @@ name: Build and Publish Docker Image
|
|||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- main # Trigger on pushes to main
|
- '**'
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
create:
|
||||||
|
refs/tags/v*
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build-and-push:
|
build-only:
|
||||||
runs-on: ubuntu-latest # Ensure your Gitea runner has this label
|
runs-on: ubuntu-latest
|
||||||
|
# All branches, all PRs, and anything that's not a push to main or a version tag
|
||||||
|
if: github.event_name != 'push' || (github.event_name == 'push' && !startsWith(gitea.ref, 'refs/tags/v') && gitea.ref != 'refs/heads/main')
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Build (no push)
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: Dockerfile
|
||||||
|
push: false
|
||||||
|
tags: |
|
||||||
|
${{ secrets.DOCKER_REGISTRY }}/${{ secrets.DOCKER_USERNAME }}/email-classifier:build-test
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
build-and-push-main:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: github.event_name == 'push' && gitea.ref == 'refs/heads/main'
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -15,27 +42,58 @@ jobs:
|
|||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
# Login to your registry (Docker Hub, Gitea Package Registry, or Harbor)
|
|
||||||
- name: Login to Docker Registry
|
- name: Login to Docker Registry
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@v3
|
||||||
with:
|
with:
|
||||||
registry: ${{ secrets.DOCKER_REGISTRY }} # Remove if using Docker Hub
|
registry: ${{ secrets.DOCKER_REGISTRY }}
|
||||||
username: ${{ secrets.DOCKER_USERNAME }}
|
username: ${{ secrets.DOCKER_USERNAME }}
|
||||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||||
|
|
||||||
- name: Build and push
|
- name: Build and push (main branch)
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: Dockerfile
|
file: Dockerfile
|
||||||
push: true
|
push: true
|
||||||
# Tags the image as 'latest' and also uses the git SHA for versioning
|
|
||||||
tags: |
|
tags: |
|
||||||
${{ secrets.DOCKER_REGISTRY }}/${{ secrets.DOCKER_USERNAME }}/email-classifier:${{ gitea.sha }}
|
${{ secrets.DOCKER_REGISTRY }}/${{ secrets.DOCKER_USERNAME }}/email-classifier:main
|
||||||
${{ secrets.DOCKER_REGISTRY }}/${{ secrets.DOCKER_USERNAME }}/email-classifier:latest
|
${{ secrets.DOCKER_REGISTRY }}/${{ secrets.DOCKER_USERNAME }}/email-classifier:latest
|
||||||
# Caching speeds up builds by reusing layers (crucial for 'uv' installs)
|
${{ secrets.DOCKER_REGISTRY }}/${{ secrets.DOCKER_USERNAME }}/email-classifier:${{ gitea.sha }}
|
||||||
labels: |
|
labels: |
|
||||||
org.opencontainers.image.source=${{ gitea.server_url }}/${{ gitea.repository }}
|
org.opencontainers.image.source=${{ gitea.server_url }}/${{ gitea.repository }}
|
||||||
org.opencontainers.image.description=Email Classifier Service
|
org.opencontainers.image.description=Email Classifier Service
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
build-and-push-tag:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: github.event_name == 'push' && startsWith(gitea.ref, 'refs/tags/v')
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Login to Docker Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ secrets.DOCKER_REGISTRY }}
|
||||||
|
username: ${{ secrets.DOCKER_USERNAME }}
|
||||||
|
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||||
|
|
||||||
|
- name: Build and push (tagged release)
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: Dockerfile
|
||||||
|
push: true
|
||||||
|
tags: |
|
||||||
|
${{ secrets.DOCKER_REGISTRY }}/${{ secrets.DOCKER_USERNAME }}/email-classifier:${{ gitea.ref_name }}
|
||||||
|
${{ secrets.DOCKER_REGISTRY }}/${{ secrets.DOCKER_USERNAME }}/email-classifier:latest
|
||||||
|
${{ secrets.DOCKER_REGISTRY }}/${{ secrets.DOCKER_USERNAME }}/email-classifier:${{ gitea.sha }}
|
||||||
|
labels: |
|
||||||
|
org.opencontainers.image.source=${{ gitea.server_url }}/${{ gitea.repository }}
|
||||||
|
org.opencontainers.image.description=Email Classifier Service
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
|||||||
@@ -2,66 +2,26 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from typing import Literal
|
||||||
from typing import Any, Literal
|
|
||||||
|
|
||||||
import yaml
|
from pydantic import BaseModel, Field
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
Provider = Literal["openai", "anthropic"]
|
Provider = Literal["openai", "anthropic"]
|
||||||
DEFAULT_CONFIG_PATHS = ["config.yml", "config.yaml", "/config/config.yml", "/config/config.yaml"]
|
|
||||||
|
|
||||||
|
|
||||||
class LLMSettings(BaseModel):
|
class LLMSettings(BaseModel):
|
||||||
provider: Provider = "openai"
|
provider: Provider = Field(default=os.getenv("LLM_PROVIDER", "openai"))
|
||||||
api_key: str = "none"
|
api_key: str = Field(default=os.getenv("LLM_API_KEY", "none"))
|
||||||
model: str = "qwen2.5-7b-instruct.q4_k_m"
|
model: str = Field(default=os.getenv("LLM_MODEL", "qwen2.5-7b-instruct.q4_k_m"))
|
||||||
base_url: str = "http://ollama.internal.henryhosted.com:9292/v1"
|
base_url: str = Field(default=os.getenv("LLM_BASE_URL", "http://ollama.internal.henryhosted.com:9292/v1"))
|
||||||
temperature: float = 0.1
|
temperature: float = Field(default=float(os.getenv("LLM_TEMPERATURE", "0.1")))
|
||||||
timeout_seconds: float = 60
|
timeout_seconds: float = Field(default=float(os.getenv("LLM_TIMEOUT_SECONDS", "60")))
|
||||||
max_retries: int = 3
|
max_retries: int = Field(default=int(os.getenv("LLM_MAX_RETRIES", "3")))
|
||||||
|
|
||||||
|
|
||||||
def _load_yaml_config() -> dict[str, Any]:
|
|
||||||
explicit = os.getenv("EMAIL_CLASSIFIER_CONFIG") or os.getenv("APP_CONFIG_FILE")
|
|
||||||
candidates = [explicit] if explicit else DEFAULT_CONFIG_PATHS
|
|
||||||
for candidate in candidates:
|
|
||||||
if not candidate:
|
|
||||||
continue
|
|
||||||
path = Path(candidate)
|
|
||||||
if not path.exists() or not path.is_file():
|
|
||||||
continue
|
|
||||||
data = yaml.safe_load(path.read_text()) or {}
|
|
||||||
if not isinstance(data, dict):
|
|
||||||
raise ValueError(f"Config file must contain a mapping/object: {path}")
|
|
||||||
llm = data.get("llm", data)
|
|
||||||
if not isinstance(llm, dict):
|
|
||||||
raise ValueError(f"LLM config must be a mapping/object: {path}")
|
|
||||||
return llm
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|
||||||
def _env_or_yaml(env_name: str, yaml_data: dict[str, Any], yaml_key: str, default: Any) -> Any:
|
|
||||||
value = os.getenv(env_name)
|
|
||||||
if value is not None:
|
|
||||||
return value
|
|
||||||
if yaml_key in yaml_data and yaml_data[yaml_key] is not None:
|
|
||||||
return yaml_data[yaml_key]
|
|
||||||
return default
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=1)
|
@lru_cache(maxsize=1)
|
||||||
def get_settings() -> LLMSettings:
|
def get_settings() -> LLMSettings:
|
||||||
yaml_data = _load_yaml_config()
|
return LLMSettings()
|
||||||
return LLMSettings(
|
|
||||||
provider=_env_or_yaml("LLM_PROVIDER", yaml_data, "provider", "openai"),
|
|
||||||
api_key=_env_or_yaml("LLM_API_KEY", yaml_data, "api_key", "none"),
|
|
||||||
model=_env_or_yaml("LLM_MODEL", yaml_data, "model", "qwen2.5-7b-instruct.q4_k_m"),
|
|
||||||
base_url=_env_or_yaml("LLM_BASE_URL", yaml_data, "base_url", "http://ollama.internal.henryhosted.com:9292/v1"),
|
|
||||||
temperature=float(_env_or_yaml("LLM_TEMPERATURE", yaml_data, "temperature", 0.1)),
|
|
||||||
timeout_seconds=float(_env_or_yaml("LLM_TIMEOUT_SECONDS", yaml_data, "timeout_seconds", 60)),
|
|
||||||
max_retries=int(_env_or_yaml("LLM_MAX_RETRIES", yaml_data, "max_retries", 3)),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_request_settings(
|
def get_request_settings(
|
||||||
|
|||||||
@@ -4,43 +4,6 @@
|
|||||||
|
|
||||||
The service ships with a `Dockerfile` based on `python:3.12-slim-bookworm` using [uv](https://astral.sh/uv/) for fast dependency installation.
|
The service ships with a `Dockerfile` based on `python:3.12-slim-bookworm` using [uv](https://astral.sh/uv/) for fast dependency installation.
|
||||||
|
|
||||||
### Configuration sources
|
|
||||||
|
|
||||||
The application now supports two configuration sources:
|
|
||||||
- environment variables
|
|
||||||
- a YAML config file
|
|
||||||
|
|
||||||
Load order:
|
|
||||||
1. per-request overrides
|
|
||||||
2. environment variables
|
|
||||||
3. YAML config file
|
|
||||||
4. built-in defaults
|
|
||||||
|
|
||||||
Supported config file locations:
|
|
||||||
- `config.yml`
|
|
||||||
- `config.yaml`
|
|
||||||
- `/config/config.yml`
|
|
||||||
- `/config/config.yaml`
|
|
||||||
|
|
||||||
You can also set an explicit config path with:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export EMAIL_CLASSIFIER_CONFIG=/path/to/config.yml
|
|
||||||
```
|
|
||||||
|
|
||||||
Example `config.yml`:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
llm:
|
|
||||||
provider: anthropic
|
|
||||||
base_url: https://api.minimax.io/anthropic
|
|
||||||
api_key: your_api_key_here
|
|
||||||
model: MiniMax-M2.7
|
|
||||||
temperature: 0.1
|
|
||||||
timeout_seconds: 60
|
|
||||||
max_retries: 3
|
|
||||||
```
|
|
||||||
|
|
||||||
### Building
|
### Building
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -52,50 +15,19 @@ docker build -t email-classifier .
|
|||||||
```bash
|
```bash
|
||||||
docker run -d --name email-classifier \
|
docker run -d --name email-classifier \
|
||||||
-p 7999:7999 \
|
-p 7999:7999 \
|
||||||
-e EMAIL_CLASSIFIER_CONFIG=/config/config.yml \
|
-e LLM_PROVIDER=openai \
|
||||||
|
-e LLM_BASE_URL=http://your-ollama:11434/v1 \
|
||||||
|
-e LLM_API_KEY=none \
|
||||||
|
-e LLM_MODEL=qwen2.5-7b-instruct.q4_k_m \
|
||||||
|
-e LLM_TEMPERATURE=0.1 \
|
||||||
-e EMAIL_CLASSIFIER_DB_PATH=/data/email_classifier.db \
|
-e EMAIL_CLASSIFIER_DB_PATH=/data/email_classifier.db \
|
||||||
-v /path/to/config.yml:/config/config.yml:ro \
|
|
||||||
-v /path/to/local/data:/data \
|
-v /path/to/local/data:/data \
|
||||||
email-classifier
|
email-classifier
|
||||||
```
|
```
|
||||||
|
|
||||||
Mount a persistent volume for `/data` (or wherever `EMAIL_CLASSIFIER_DB_PATH` points) to preserve the dedupe database across container restarts.
|
Mount a persistent volume for `/data` (or wherever `EMAIL_CLASSIFIER_DB_PATH` points) to preserve the dedupe database across container restarts.
|
||||||
|
|
||||||
Environment variables still override file-based config, so you can keep most settings in YAML and override just one or two values at deploy time.
|
### Building for a Remote Registry
|
||||||
|
|
||||||
## Docker Compose example
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
services:
|
|
||||||
email-classifier:
|
|
||||||
image: your-registry.example.com/your-org/email-classifier:latest
|
|
||||||
container_name: email-classifier
|
|
||||||
ports:
|
|
||||||
- "7999:7999"
|
|
||||||
environment:
|
|
||||||
EMAIL_CLASSIFIER_CONFIG: /config/config.yml
|
|
||||||
EMAIL_CLASSIFIER_DB_PATH: /data/email_classifier.db
|
|
||||||
# Optional overrides. Env vars win over YAML values.
|
|
||||||
# LLM_MODEL: MiniMax-M2.7
|
|
||||||
# LLM_TIMEOUT_SECONDS: "90"
|
|
||||||
volumes:
|
|
||||||
- ./config.yml:/config/config.yml:ro
|
|
||||||
- ./data:/data
|
|
||||||
restart: unless-stopped
|
|
||||||
# If your LLM backend runs on the Docker host, one option is:
|
|
||||||
# extra_hosts:
|
|
||||||
# - "host.docker.internal:host-gateway"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Compose notes
|
|
||||||
|
|
||||||
- Mount the YAML config read-only into the container, typically at `/config/config.yml`
|
|
||||||
- Mount a writable volume for `/data` so dedupe state survives restarts
|
|
||||||
- Override specific values with environment variables when needed
|
|
||||||
- If the LLM backend is another container on the same Compose network, use its service name in `base_url`
|
|
||||||
- If the LLM backend runs on the host, use `host.docker.internal` or a host-gateway mapping where appropriate
|
|
||||||
|
|
||||||
## Building for a Remote Registry
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -t \
|
docker build -t \
|
||||||
@@ -125,16 +57,16 @@ The workflow tags the image as:
|
|||||||
|
|
||||||
### Deployment Considerations
|
### Deployment Considerations
|
||||||
|
|
||||||
- **Network access** — The container needs to reach your LLM backend. If using Ollama or another service on the host, use `host.docker.internal` or an explicit host-gateway mapping.
|
- **Network access** — The container needs to reach your LLM backend. If using Ollama on the host, use `host.docker.internal` (Linux) or `docker.for.mac.localhost` (macOS) as the base URL.
|
||||||
- **Dedupe persistence** — Mount a volume for the SQLite database to persist dedupe state across deploys.
|
- **Dedupe persistence** — Mount a volume for the SQLite database to persist dedupe state across deploys.
|
||||||
- **Port** — The container exposes port `7999`. Map it to any host port you prefer.
|
- **Port** — The container exposes port `7999`. Map it to any host port you prefer.
|
||||||
- **Health check** — The service does not currently expose a dedicated `/health` endpoint. Use `GET /docs` as a liveness probe.
|
- **Health check** — The service does not currently expose a dedicated `/health` endpoint. Use `GET /docs` as a liveness probe.
|
||||||
|
|
||||||
## Production Checklist
|
## Production Checklist
|
||||||
|
|
||||||
- [ ] Provide either a YAML config file or the required `LLM_*` environment variables
|
- [ ] Set `LLM_API_KEY` to a real key (not `none`) in production
|
||||||
- [ ] Use HTTPS for remote `LLM_BASE_URL` values in production
|
- [ ] Use HTTPS for `LLM_BASE_URL` in production
|
||||||
- [ ] Mount a persistent volume for `EMAIL_CLASSIFIER_DB_PATH`
|
- [ ] Mount a persistent volume for `EMAIL_CLASSIFIER_DB_PATH`
|
||||||
- [ ] Set appropriate resource limits (CPU/memory) on the container
|
- [ ] Set appropriate resource limits (CPU/memory) on the container
|
||||||
- [ ] Configure `LLM_MAX_RETRIES` and `LLM_TIMEOUT_SECONDS` to suit your LLM backend's reliability
|
- [ ] Configure `LLM_MAX_RETRIES` and `LLM_TIMEOUT_SECONDS` to suit your LLM backend's reliability
|
||||||
- [ ] Keep `LLM_TEMPERATURE` low for consistent classification results
|
- [ ] Set `LLM_TEMPERATURE=0.1` (or similar low value) for consistent classification results
|
||||||
|
|||||||
@@ -9,6 +9,5 @@ dependencies = [
|
|||||||
"beautifulsoup4>=4.14.3",
|
"beautifulsoup4>=4.14.3",
|
||||||
"fastapi>=0.128.0",
|
"fastapi>=0.128.0",
|
||||||
"openai>=2.16.0",
|
"openai>=2.16.0",
|
||||||
"PyYAML>=6.0.2",
|
|
||||||
"uvicorn>=0.40.0",
|
"uvicorn>=0.40.0",
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user