"""Sanity checks for LLM classification — prevent hallucinations and drops.

Adapts costco_route/pipeline.py validation logic:
- Item name matching → Document hash matching
- Zone defaults → Member keyword inference
- Costco-specific keywords → Family-specific keywords

Sovereign: Zero imports from costco_route.
"""

import difflib
from typing import Optional


# Family-specific keywords for fallback inference
MEMBER_KEYWORDS = {
    "sully": ["first grade", "mrs. smith", "sullivan", "sully", "dinosaur", "space", "lego"],
    "harper": ["pre-k", "preschool", "ms. johnson", "harper", "unicorn", "dance", "art"],
    "aundrea": ["hospital", "work", "night shift", "aundrea", "mom"],
    "matt": ["software", "meeting", "work", "matt", "dad"],
}


def validate_classification(
    classified: dict[str, list[dict]],
    original_documents: list[dict]
) -> dict[str, list[dict]]:
    """Remove hallucinated classifications, recover dropped documents.

    The LLM sometimes:
    1. Assigns documents not in original list (hallucination)
    2. Drops documents from original list

    Args:
        classified: {member_id: [document_dicts]} from LLM
        original_documents: Original document list with content_hash

    Returns:
        Validated classification dict
    """
    # Build lookup of original docs by content hash
    original_hashes = {doc["content_hash"]: doc for doc in original_documents}
    matched_hashes = set()

    # Filter out hallucinations
    validated = {}
    for member_id, docs in classified.items():
        kept = []
        for doc in docs:
            doc_hash = doc.get("content_hash")
            if doc_hash in original_hashes:
                matched_hashes.add(doc_hash)
                kept.append(doc)
        if kept:
            validated[member_id] = kept

    # Find dropped documents
    dropped = [doc for h, doc in original_hashes.items()
               if h not in matched_hashes]

    if dropped:
        # Assign to member based on keywords
        for doc in dropped:
            member = _infer_member_from_content(doc)
            validated.setdefault(member, []).append(doc)

    return validated


def _infer_member_from_content(doc: dict) -> str:
    """Infer family member from document content (keyword fallback)."""
    content = doc.get("content", "").lower()

    for member, keywords in MEMBER_KEYWORDS.items():
        if any(kw in content for kw in keywords):
            return member

    return "family"  # Default — general family document


def validate_confidence(classified: dict[str, list[dict]], threshold: float = 0.7) -> dict[str, list[dict]]:
    """Flag low-confidence classifications for user review.

    Args:
        classified: {member_id: [document_dicts]}
        threshold: Minimum confidence for auto-acceptance

    Returns:
        Same structure but with _meta.flags added to low-confidence docs
    """
    for member_id, docs in classified.items():
        for doc in docs:
            confidence = doc.get("confidence", 0.0)
            if confidence < threshold:
                if "_meta" not in doc:
                    doc["_meta"] = {}
                if "flags" not in doc["_meta"]:
                    doc["_meta"]["flags"] = []
                doc["_meta"]["flags"].append("low_confidence")
                doc["_meta"]["suggested_action"] = "user_review"

    return classified


def detect_conflicts(classified: dict[str, list[dict]], calendar_events: list[dict] = None) -> list[dict]:
    """Detect calendar conflicts for classified documents.

    Args:
        classified: {member_id: [document_dicts]}
        calendar_events: List of upcoming calendar events

    Returns:
        List of conflict descriptions
    """
    if not calendar_events:
        return []

    conflicts = []

    for member_id, docs in classified.items():
        for doc in docs:
            doc_date = doc.get("date")
            if not doc_date:
                continue

            for event in calendar_events:
                event_date = event.get("start", {}).get("date") or event.get("start", {}).get("dateTime", "")[:10]
                if event_date == doc_date:
                    conflicts.append({
                        "document": doc.get("title", "Untitled"),
                        "member": member_id,
                        "conflict_with": event.get("summary", "Unknown event"),
                        "date": doc_date,
                        "severity": "warning"
                    })

    return conflicts