"""LLM-based email classifier."""
import logging
from typing import Optional, Dict, Any
from shared.llm import LLMClient
logger = logging.getLogger(name)
CLASSIFICATION_PROMPT = """Classify this email into exactly one category.
Categories:
- appointment: Personal appointment (doctor, dentist, meeting) with specific date/time for YOU
- newsletter: Digest, weekly updates, informational broadcast (not personal)
- family: Kid activities, school events, family gatherings, household matters
- other: Everything else
IMPORTANT DISTINCTIONS:
- "Soccer practice" or "school event" = FAMILY (even if it has a time)
- "Doctor appointment" or "meeting with client" = APPOINTMENT
- If sender is coach, teacher, school = FAMILY
- If sender is doctor, clinic, business = APPOINTMENT
Email Subject: {subject}
Sender: {sender}
Email Body:
{body}
Respond with ONLY the category name, lowercase, no explanation."""
class EmailClassifier:
"""Classify emails using local LLM."""
def __init__(self, llm_client: LLMClient):
self.llm = llm_client
async def classify(
self,
subject: str,
body: str,
sender: str
) -> str:
"""Classify email into category.
Returns one of: appointment, newsletter, family, other
"""
prompt = CLASSIFICATION_PROMPT.format(
subject=subject,
sender=sender,
body=body[:1500] # Reasonable limit for classification
)
try:
result = await self.llm.generate(
prompt=prompt,
model="qwen2.5-coder:7b",
temperature=0.1, # Low temp for consistent classification
prefer_cloud=False # Use local first
)
category = result["content"].strip().lower()
# Validate
valid = {"appointment", "newsletter", "family", "other"}
if category not in valid:
logger.warning(f"Invalid classification: '{category}', defaulting to 'other'")
category = "other"
logger.info(f"Classified as '{category}': {subject[:50]}...")
return category
except Exception as e:
logger.error(f"Classification failed: {e}")
return "other"
async def classify_batch(
self,
emails: list[Dict[str, str]]
) -> list[str]:
"""Classify multiple emails."""
results = []
for email in emails:
cat = await self.classify(
email.get("subject", ""),
email.get("body", ""),
email.get("sender", "")
)
results.append(cat)
return results