"""Email processing with LLM extraction."""
import logging
import json
from typing import Dict, Any
from shared.llm import LLMClient
logger = logging.getLogger(name)
EXTRACTION_PROMPT = """Extract calendar event details from this email.
Email Subject: {subject}
Email Body: {body}
Extract the following fields:
- summary: Brief title for calendar event (max 60 chars)
- description: Full context including sender and original subject
- start_datetime: ISO 8601 datetime (e.g., 2026-04-23T15:00:00)
- end_datetime: ISO 8601 datetime (estimate if not specified)
- location: Address or "TBD" if not mentioned
- confidence: 0-1 score on extraction certainty
Rules:
- Use nearest logical future date (not distant future)
- If time not specified, assume 9:00 AM
- If duration not specified, assume 1 hour
- Output ONLY valid JSON
Response format:
{{"summary": "...", "description": "...", "start_datetime": "...", "end_datetime": "...", "location": "...", "confidence": 0.85}}"""
class EmailProcessor:
"""Extract structured calendar data from unstructured emails."""
def __init__(self, llm_client: LLMClient):
self.llm = llm_client
async def extract_event(self, subject: str, body: str) -> Dict[str, Any]:
"""Extract event details from email using LLM.
Returns parsed event dict or empty dict if extraction fails.
"""
prompt = EXTRACTION_PROMPT.format(subject=subject, body=body[:2000])
try:
result = await self.llm.generate(
prompt=prompt,
format="json",
temperature=0.1, # Low temp for consistent extraction
prefer_cloud=False # Use local first
)
parsed = json.loads(result["content"])
# Validate required fields
required = ["summary", "start_datetime", "end_datetime"]
if not all(k in parsed for k in required):
logger.warning(f"LLM missing required fields: {parsed.keys()}")
return {}
logger.info(f"Extracted event: {parsed['summary']} ({parsed.get('confidence', 0):.2f})")
return parsed
except json.JSONDecodeError as e:
logger.error(f"LLM returned invalid JSON: {e}")
return {}
except Exception as e:
logger.error(f"Extraction failed: {e}")
return {}
def should_process(self, subject: str, sender: str) -> bool:
"""Quick filter to skip obviously non-event emails."""
subject_lower = subject.lower()
# Skip patterns
skip_keywords = ["re:", "fw:", "unsubscribe", "newsletter", "promotion", "sale"]
if any(kw in subject_lower for kw in skip_keywords):
return False
# Event-positive patterns
event_keywords = ["appointment", "meeting", "schedule", "confirm", "reminder",
"reservation", "booking", "interview", "call", "sync"]
return any(kw in subject_lower for kw in event_keywords)