Files
email-classifier/app/dedupe_store.py

107 lines
4.2 KiB
Python

from __future__ import annotations
import json
import sqlite3
from pathlib import Path
from typing import Any
class DedupeStore:
def __init__(self, db_path: str = ".data/email_classifier.db"):
self.db_path = Path(db_path)
self.db_path.parent.mkdir(parents=True, exist_ok=True)
self._init_db()
def _connect(self) -> sqlite3.Connection:
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
return conn
def _init_db(self) -> None:
with self._connect() as conn:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS todoist_sync (
id INTEGER PRIMARY KEY AUTOINCREMENT,
message_id TEXT,
thread_id TEXT,
fingerprint TEXT NOT NULL,
todoist_task_id TEXT NOT NULL,
classification_hash TEXT NOT NULL,
source_payload TEXT NOT NULL,
last_result TEXT NOT NULL,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
)
"""
)
conn.execute("CREATE INDEX IF NOT EXISTS idx_sync_message_id ON todoist_sync(message_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_sync_thread_id ON todoist_sync(thread_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_sync_fingerprint ON todoist_sync(fingerprint)")
def find_existing(self, *, message_id: str | None, thread_id: str | None, fingerprint: str) -> dict[str, Any] | None:
queries = []
if message_id:
queries.append(("SELECT * FROM todoist_sync WHERE message_id = ? ORDER BY id DESC LIMIT 1", (message_id,)))
if thread_id:
queries.append(("SELECT * FROM todoist_sync WHERE thread_id = ? ORDER BY id DESC LIMIT 1", (thread_id,)))
queries.append(("SELECT * FROM todoist_sync WHERE fingerprint = ? ORDER BY id DESC LIMIT 1", (fingerprint,)))
with self._connect() as conn:
for sql, params in queries:
row = conn.execute(sql, params).fetchone()
if row:
data = dict(row)
data["source_payload"] = json.loads(data["source_payload"])
data["last_result"] = json.loads(data["last_result"])
return data
return None
def upsert(
self,
*,
existing_id: int | None,
message_id: str | None,
thread_id: str | None,
fingerprint: str,
todoist_task_id: str,
classification_hash: str,
source_payload: dict[str, Any],
last_result: dict[str, Any],
) -> None:
with self._connect() as conn:
if existing_id is None:
conn.execute(
"""
INSERT INTO todoist_sync (message_id, thread_id, fingerprint, todoist_task_id, classification_hash, source_payload, last_result)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
message_id,
thread_id,
fingerprint,
todoist_task_id,
classification_hash,
json.dumps(source_payload, sort_keys=True),
json.dumps(last_result, sort_keys=True),
),
)
else:
conn.execute(
"""
UPDATE todoist_sync
SET message_id = ?, thread_id = ?, fingerprint = ?, todoist_task_id = ?, classification_hash = ?,
source_payload = ?, last_result = ?, updated_at = CURRENT_TIMESTAMP
WHERE id = ?
""",
(
message_id,
thread_id,
fingerprint,
todoist_task_id,
classification_hash,
json.dumps(source_payload, sort_keys=True),
json.dumps(last_result, sort_keys=True),
existing_id,
),
)