"""LLM-based email classifier.""" import logging from typing import Optional, Dict, Any from shared.llm import LLMClient logger = logging.getLogger(__name__) CLASSIFICATION_PROMPT = """Classify this email into exactly one category. Categories: - appointment: Personal appointment (doctor, dentist, meeting) with specific date/time for YOU - newsletter: Digest, weekly updates, informational broadcast (not personal) - family: Kid activities, school events, family gatherings, household matters - other: Everything else IMPORTANT DISTINCTIONS: - "Soccer practice" or "school event" = FAMILY (even if it has a time) - "Doctor appointment" or "meeting with client" = APPOINTMENT - If sender is coach, teacher, school = FAMILY - If sender is doctor, clinic, business = APPOINTMENT Email Subject: {subject} Sender: {sender} Email Body: {body} Respond with ONLY the category name, lowercase, no explanation.""" class EmailClassifier: """Classify emails using local LLM.""" def __init__(self, llm_client: LLMClient): self.llm = llm_client async def classify( self, subject: str, body: str, sender: str ) -> str: """Classify email into category. Returns one of: appointment, newsletter, family, other """ prompt = CLASSIFICATION_PROMPT.format( subject=subject, sender=sender, body=body[:1500] # Reasonable limit for classification ) try: result = await self.llm.generate( prompt=prompt, model="qwen2.5-coder:7b", temperature=0.1, # Low temp for consistent classification prefer_cloud=False # Use local first ) category = result["content"].strip().lower() # Validate valid = {"appointment", "newsletter", "family", "other"} if category not in valid: logger.warning(f"Invalid classification: '{category}', defaulting to 'other'") category = "other" logger.info(f"Classified as '{category}': {subject[:50]}...") return category except Exception as e: logger.error(f"Classification failed: {e}") return "other" async def classify_batch( self, emails: list[Dict[str, str]] ) -> list[str]: """Classify multiple emails.""" results = [] for email in emails: cat = await self.classify( email.get("subject", ""), email.get("body", ""), email.get("sender", "") ) results.append(cat) return results