"""Email processing with LLM extraction.""" import logging import json from typing import Dict, Any from shared.llm import LLMClient logger = logging.getLogger(__name__) EXTRACTION_PROMPT = """Extract calendar event details from this email. Email Subject: {subject} Email Body: {body} Extract the following fields: - summary: Brief title for calendar event (max 60 chars) - description: Full context including sender and original subject - start_datetime: ISO 8601 datetime (e.g., 2026-04-23T15:00:00) - end_datetime: ISO 8601 datetime (estimate if not specified) - location: Address or "TBD" if not mentioned - confidence: 0-1 score on extraction certainty Rules: - Use nearest logical future date (not distant future) - If time not specified, assume 9:00 AM - If duration not specified, assume 1 hour - Output ONLY valid JSON Response format: {{"summary": "...", "description": "...", "start_datetime": "...", "end_datetime": "...", "location": "...", "confidence": 0.85}}""" class EmailProcessor: """Extract structured calendar data from unstructured emails.""" def __init__(self, llm_client: LLMClient): self.llm = llm_client async def extract_event(self, subject: str, body: str) -> Dict[str, Any]: """Extract event details from email using LLM. Returns parsed event dict or empty dict if extraction fails. """ prompt = EXTRACTION_PROMPT.format(subject=subject, body=body[:2000]) try: result = await self.llm.generate( prompt=prompt, format="json", temperature=0.1, # Low temp for consistent extraction prefer_cloud=False # Use local first ) parsed = json.loads(result["content"]) # Validate required fields required = ["summary", "start_datetime", "end_datetime"] if not all(k in parsed for k in required): logger.warning(f"LLM missing required fields: {parsed.keys()}") return {} logger.info(f"Extracted event: {parsed['summary']} ({parsed.get('confidence', 0):.2f})") return parsed except json.JSONDecodeError as e: logger.error(f"LLM returned invalid JSON: {e}") return {} except Exception as e: logger.error(f"Extraction failed: {e}") return {} def should_process(self, subject: str, sender: str) -> bool: """Quick filter to skip obviously non-event emails.""" subject_lower = subject.lower() # Skip patterns skip_keywords = ["re:", "fw:", "unsubscribe", "newsletter", "promotion", "sale"] if any(kw in subject_lower for kw in skip_keywords): return False # Event-positive patterns event_keywords = ["appointment", "meeting", "schedule", "confirm", "reminder", "reservation", "booking", "interview", "call", "sync"] return any(kw in subject_lower for kw in event_keywords)