Add enriched classification output and Todoist dedupe sync
This commit is contained in:
@@ -5,7 +5,8 @@ from typing import Any
|
||||
|
||||
from app.config import get_request_settings
|
||||
from app.llm_adapters import build_adapter, coerce_json_text
|
||||
from app.models import ClassificationResult, ClassifyRequest, EmailData
|
||||
from app.models import ClassificationDetails, ClassificationResult, ClassifyRequest, EmailData
|
||||
from app.sync import build_fingerprint, sync_todoist
|
||||
|
||||
VALID_CATEGORIES = {
|
||||
"action_required",
|
||||
@@ -21,7 +22,7 @@ VALID_PRIORITIES = {"high", "medium", "low"}
|
||||
|
||||
|
||||
async def classify_email(request: ClassifyRequest) -> ClassificationResult:
|
||||
clean_email = _clean_email(request.email_data)
|
||||
clean_email = _clean_email(request)
|
||||
settings = get_request_settings(
|
||||
provider=request.provider,
|
||||
model=request.model,
|
||||
@@ -32,40 +33,50 @@ async def classify_email(request: ClassifyRequest) -> ClassificationResult:
|
||||
adapter = build_adapter(settings)
|
||||
|
||||
attempts = 0
|
||||
result: ClassificationResult | None = None
|
||||
while attempts < settings.max_retries:
|
||||
raw_response = await adapter.classify(clean_email)
|
||||
raw_response = await adapter.classify(clean_email.email_data)
|
||||
try:
|
||||
payload = json.loads(coerce_json_text(raw_response))
|
||||
result = _normalize_result(payload)
|
||||
result = _normalize_result(payload, clean_email)
|
||||
if result.needs_action and not result.task_description:
|
||||
attempts += 1
|
||||
continue
|
||||
return result
|
||||
break
|
||||
except (json.JSONDecodeError, ValueError, TypeError):
|
||||
attempts += 1
|
||||
|
||||
return ClassificationResult(
|
||||
needs_action=False,
|
||||
category="uncategorized",
|
||||
priority="low",
|
||||
task_description=None,
|
||||
reasoning="System failed to classify after multiple attempts.",
|
||||
confidence=0.0,
|
||||
)
|
||||
if result is None:
|
||||
result = ClassificationResult(
|
||||
needs_action=False,
|
||||
category="uncategorized",
|
||||
priority="low",
|
||||
task_description=None,
|
||||
reasoning="System failed to classify after multiple attempts.",
|
||||
confidence=0.0,
|
||||
details=ClassificationDetails(dedupe_key=build_fingerprint(clean_email)),
|
||||
)
|
||||
|
||||
result.todoist = await sync_todoist(clean_email, result)
|
||||
return result
|
||||
|
||||
|
||||
def _clean_email(email: EmailData) -> EmailData:
|
||||
def _clean_email(request: ClassifyRequest) -> ClassifyRequest:
|
||||
from app.helpers.clean_email_html import clean_email_html
|
||||
from app.helpers.extract_latest_message import extract_latest_message
|
||||
from app.helpers.remove_disclaimer import remove_disclaimer
|
||||
|
||||
return EmailData(
|
||||
subject=email.subject,
|
||||
body=remove_disclaimer(clean_email_html(extract_latest_message(email.body))),
|
||||
return request.model_copy(
|
||||
update={
|
||||
"email_data": EmailData(
|
||||
subject=request.email_data.subject,
|
||||
body=remove_disclaimer(clean_email_html(extract_latest_message(request.email_data.body))),
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _normalize_result(data: dict[str, Any]) -> ClassificationResult:
|
||||
def _normalize_result(data: dict[str, Any], request: ClassifyRequest) -> ClassificationResult:
|
||||
needs_action = bool(data.get("needs_action", False))
|
||||
category = str(data.get("category", "uncategorized") or "uncategorized").lower()
|
||||
if category not in VALID_CATEGORIES:
|
||||
@@ -81,6 +92,24 @@ def _normalize_result(data: dict[str, Any]) -> ClassificationResult:
|
||||
reasoning = str(data.get("reasoning", "") or "").strip() or "No reasoning provided."
|
||||
confidence_raw = data.get("confidence", 0.0)
|
||||
confidence = max(0.0, min(1.0, float(confidence_raw)))
|
||||
details_payload = data.get("details") or {}
|
||||
details = ClassificationDetails(
|
||||
summary=_clean_text(details_payload.get("summary")),
|
||||
suggested_title=_clean_text(details_payload.get("suggested_title")),
|
||||
suggested_notes=_clean_text(details_payload.get("suggested_notes")),
|
||||
deadline=_clean_text(details_payload.get("deadline")),
|
||||
people=_string_list(details_payload.get("people")),
|
||||
organizations=_string_list(details_payload.get("organizations")),
|
||||
attachments_referenced=_string_list(details_payload.get("attachments_referenced")),
|
||||
next_steps=_string_list(details_payload.get("next_steps")),
|
||||
key_points=_string_list(details_payload.get("key_points")),
|
||||
source_signals=_string_list(details_payload.get("source_signals")),
|
||||
dedupe_key=build_fingerprint(request),
|
||||
)
|
||||
if needs_action and not details.suggested_title:
|
||||
details.suggested_title = task_description
|
||||
if not details.summary:
|
||||
details.summary = reasoning
|
||||
return ClassificationResult(
|
||||
needs_action=needs_action,
|
||||
category=category,
|
||||
@@ -88,4 +117,27 @@ def _normalize_result(data: dict[str, Any]) -> ClassificationResult:
|
||||
task_description=task_description,
|
||||
reasoning=reasoning,
|
||||
confidence=confidence,
|
||||
details=details,
|
||||
)
|
||||
|
||||
|
||||
def _clean_text(value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
text = str(value).strip()
|
||||
return text or None
|
||||
|
||||
|
||||
def _string_list(value: Any) -> list[str]:
|
||||
if not value:
|
||||
return []
|
||||
if isinstance(value, list):
|
||||
items = value
|
||||
else:
|
||||
items = [value]
|
||||
output = []
|
||||
for item in items:
|
||||
text = str(item).strip()
|
||||
if text and text not in output:
|
||||
output.append(text)
|
||||
return output
|
||||
|
||||
Reference in New Issue
Block a user