from __future__ import annotations import json import sqlite3 from pathlib import Path from typing import Any class DedupeStore: def __init__(self, db_path: str = ".data/email_classifier.db"): self.db_path = Path(db_path) self.db_path.parent.mkdir(parents=True, exist_ok=True) self._init_db() def _connect(self) -> sqlite3.Connection: conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row return conn def _init_db(self) -> None: with self._connect() as conn: conn.execute( """ CREATE TABLE IF NOT EXISTS todoist_sync ( id INTEGER PRIMARY KEY AUTOINCREMENT, message_id TEXT, thread_id TEXT, fingerprint TEXT NOT NULL, todoist_task_id TEXT NOT NULL, classification_hash TEXT NOT NULL, source_payload TEXT NOT NULL, last_result TEXT NOT NULL, created_at TEXT DEFAULT CURRENT_TIMESTAMP, updated_at TEXT DEFAULT CURRENT_TIMESTAMP ) """ ) conn.execute("CREATE INDEX IF NOT EXISTS idx_sync_message_id ON todoist_sync(message_id)") conn.execute("CREATE INDEX IF NOT EXISTS idx_sync_thread_id ON todoist_sync(thread_id)") conn.execute("CREATE INDEX IF NOT EXISTS idx_sync_fingerprint ON todoist_sync(fingerprint)") def find_existing(self, *, message_id: str | None, thread_id: str | None, fingerprint: str) -> dict[str, Any] | None: queries = [] if message_id: queries.append(("SELECT * FROM todoist_sync WHERE message_id = ? ORDER BY id DESC LIMIT 1", (message_id,))) if thread_id: queries.append(("SELECT * FROM todoist_sync WHERE thread_id = ? ORDER BY id DESC LIMIT 1", (thread_id,))) queries.append(("SELECT * FROM todoist_sync WHERE fingerprint = ? ORDER BY id DESC LIMIT 1", (fingerprint,))) with self._connect() as conn: for sql, params in queries: row = conn.execute(sql, params).fetchone() if row: data = dict(row) data["source_payload"] = json.loads(data["source_payload"]) data["last_result"] = json.loads(data["last_result"]) return data return None def upsert( self, *, existing_id: int | None, message_id: str | None, thread_id: str | None, fingerprint: str, todoist_task_id: str, classification_hash: str, source_payload: dict[str, Any], last_result: dict[str, Any], ) -> None: with self._connect() as conn: if existing_id is None: conn.execute( """ INSERT INTO todoist_sync (message_id, thread_id, fingerprint, todoist_task_id, classification_hash, source_payload, last_result) VALUES (?, ?, ?, ?, ?, ?, ?) """, ( message_id, thread_id, fingerprint, todoist_task_id, classification_hash, json.dumps(source_payload, sort_keys=True), json.dumps(last_result, sort_keys=True), ), ) else: conn.execute( """ UPDATE todoist_sync SET message_id = ?, thread_id = ?, fingerprint = ?, todoist_task_id = ?, classification_hash = ?, source_payload = ?, last_result = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ? """, ( message_id, thread_id, fingerprint, todoist_task_id, classification_hash, json.dumps(source_payload, sort_keys=True), json.dumps(last_result, sort_keys=True), existing_id, ), )