📄 email.py 3,128 bytes Apr 22, 2026 📋 Raw

"""Email processing with LLM extraction."""

import logging
import json
from typing import Dict, Any
from shared.llm import LLMClient

logger = logging.getLogger(name)

EXTRACTION_PROMPT = """Extract calendar event details from this email.

Email Subject: {subject}
Email Body: {body}

Extract the following fields:
- summary: Brief title for calendar event (max 60 chars)
- description: Full context including sender and original subject
- start_datetime: ISO 8601 datetime (e.g., 2026-04-23T15:00:00)
- end_datetime: ISO 8601 datetime (estimate if not specified)
- location: Address or "TBD" if not mentioned
- confidence: 0-1 score on extraction certainty

Rules:
- Use nearest logical future date (not distant future)
- If time not specified, assume 9:00 AM
- If duration not specified, assume 1 hour
- Output ONLY valid JSON

Response format:
{{"summary": "...", "description": "...", "start_datetime": "...", "end_datetime": "...", "location": "...", "confidence": 0.85}}"""

class EmailProcessor:
"""Extract structured calendar data from unstructured emails."""

def __init__(self, llm_client: LLMClient):
    self.llm = llm_client

async def extract_event(self, subject: str, body: str) -> Dict[str, Any]:
    """Extract event details from email using LLM.

    Returns parsed event dict or empty dict if extraction fails.
    """
    prompt = EXTRACTION_PROMPT.format(subject=subject, body=body[:2000])

    try:
        result = await self.llm.generate(
            prompt=prompt,
            format="json",
            temperature=0.1,  # Low temp for consistent extraction
            prefer_cloud=False  # Use local first
        )

        parsed = json.loads(result["content"])

        # Validate required fields
        required = ["summary", "start_datetime", "end_datetime"]
        if not all(k in parsed for k in required):
            logger.warning(f"LLM missing required fields: {parsed.keys()}")
            return {}

        logger.info(f"Extracted event: {parsed['summary']} ({parsed.get('confidence', 0):.2f})")
        return parsed

    except json.JSONDecodeError as e:
        logger.error(f"LLM returned invalid JSON: {e}")
        return {}
    except Exception as e:
        logger.error(f"Extraction failed: {e}")
        return {}

def should_process(self, subject: str, sender: str) -> bool:
    """Quick filter to skip obviously non-event emails."""
    subject_lower = subject.lower()

    # Skip patterns
    skip_keywords = ["re:", "fw:", "unsubscribe", "newsletter", "promotion", "sale"]
    if any(kw in subject_lower for kw in skip_keywords):
        return False

    # Event-positive patterns
    event_keywords = ["appointment", "meeting", "schedule", "confirm", "reminder", 
                     "reservation", "booking", "interview", "call", "sync"]

    return any(kw in subject_lower for kw in event_keywords)