📄 classifier.py 2,780 bytes Apr 23, 2026 📋 Raw

"""LLM-based email classifier."""

import logging
from typing import Optional, Dict, Any
from shared.llm import LLMClient

logger = logging.getLogger(name)

CLASSIFICATION_PROMPT = """Classify this email into exactly one category.

Categories:
- appointment: Personal appointment (doctor, dentist, meeting) with specific date/time for YOU
- newsletter: Digest, weekly updates, informational broadcast (not personal)
- family: Kid activities, school events, family gatherings, household matters
- other: Everything else

IMPORTANT DISTINCTIONS:
- "Soccer practice" or "school event" = FAMILY (even if it has a time)
- "Doctor appointment" or "meeting with client" = APPOINTMENT
- If sender is coach, teacher, school = FAMILY
- If sender is doctor, clinic, business = APPOINTMENT

Email Subject: {subject}
Sender: {sender}
Email Body:
{body}

Respond with ONLY the category name, lowercase, no explanation."""

class EmailClassifier:
"""Classify emails using local LLM."""

def __init__(self, llm_client: LLMClient):
    self.llm = llm_client

async def classify(
    self,
    subject: str,
    body: str,
    sender: str
) -> str:
    """Classify email into category.

    Returns one of: appointment, newsletter, family, other
    """
    prompt = CLASSIFICATION_PROMPT.format(
        subject=subject,
        sender=sender,
        body=body[:1500]  # Reasonable limit for classification
    )

    try:
        result = await self.llm.generate(
            prompt=prompt,
            model="qwen2.5-coder:7b",
            temperature=0.1,  # Low temp for consistent classification
            prefer_cloud=False  # Use local first
        )

        category = result["content"].strip().lower()

        # Validate
        valid = {"appointment", "newsletter", "family", "other"}
        if category not in valid:
            logger.warning(f"Invalid classification: '{category}', defaulting to 'other'")
            category = "other"

        logger.info(f"Classified as '{category}': {subject[:50]}...")
        return category

    except Exception as e:
        logger.error(f"Classification failed: {e}")
        return "other"

async def classify_batch(
    self,
    emails: list[Dict[str, str]]
) -> list[str]:
    """Classify multiple emails."""
    results = []
    for email in emails:
        cat = await self.classify(
            email.get("subject", ""),
            email.get("body", ""),
            email.get("sender", "")
        )
        results.append(cat)
    return results