📄 generator.py 11,568 bytes Apr 28, 2026 📋 Raw

"""Generate contextual briefing cards from parsed documents.

Takes parsed document text + calendar context → structured briefing card.
Uses 8B models for complex reasoning and JSON generation.

Briefing cards now include action_buttons metadata when calendar-applicable
events are detected, enabling "Add to Calendar" inline keyboard buttons
in the Telegram handler.

All processed briefings are persisted to the Event Graph for
coordination tracking and Phase 7 HBM planning.
"""

import json
from datetime import datetime, timedelta
from typing import Optional

import httpx

from icarus.core.config.staging import OLLAMA_BASE_URL, OLLAMA_PRIMARY_MODEL
from icarus.core.utils.model_gate import validate_ollama_request, get_model_for_task
from icarus.core.family_loader import get_family_config
from icarus.core.db.event_graph import EventGraphWriter, classify_event_type

BRIEFING_PROMPT = """You are Icarus, a family context engine. Generate a structured briefing card from this document.

DOCUMENT TEXT:
{document_text}

CALENDAR CONTEXT (upcoming events):
{calendar_context}

FAMILY MEMBERS: {family_members}

Analyze the document and generate a briefing card with these fields:

  1. title: Clear, concise title (e.g., "Field Trip: Museum of Science")
  2. summary: One-paragraph summary of what this is and why it matters
  3. key_details: Object with any known fields:
    - date: YYYY-MM-DD if mentioned
    - time: Start/end time if mentioned
    - location: Where the event takes place
    - cost: Any fees or payment required
    - deadline: RSVP or permission slip deadline
    - contact: Contact person or organization
    - requirements: What to bring (lunch, permission slip, etc.)
  4. conflicts: Array of strings describing calendar conflicts (e.g., ["Harper violin 4:00 PM same day"])
  5. suggested_actions: Array of specific actions to take (e.g., ["Sign permission slip", "Pack lunch", "Set reminder"])
  6. confidence: Number 0-1 indicating certainty of extraction
  7. category: One of ["event", "appointment", "deadline", "info", "urgent"]

Return ONLY valid JSON matching this structure:
{{
"title": "...",
"summary": "...",
"key_details": {{...}},
"conflicts": [...],
"suggested_actions": [...],
"confidence": 0.92,
"category": "event"
}}"""

async def generate_briefing(
parsed_doc: dict,
calendar_events: list = None,
family_members: list = None,
urgency: str = "medium"
) -> dict:
"""
Generate a briefing card from parsed document + context.

Args:
    parsed_doc: Output from vision parser {"text", "method", "confidence"}
    calendar_events: List of upcoming calendar events for conflict detection
    family_members: List of family members this affects (auto-detected if None)
    urgency: "low", "medium", "high"

Returns:
    Structured briefing card with title, summary, details, conflicts, actions
"""
# Load family config for context
family_config = get_family_config()

# Auto-detect family members from document text if not provided
if family_members is None:
    text = parsed_doc.get("text", "")
    inferred = family_config.infer_recipients(text)
    if inferred:
        family_members = [m["member_id"] for m in inferred]
        # Add confidence metadata
        member_confidence = {m["member_id"]: m["confidence"] for m in inferred}
    else:
        family_members = ["Family"]
        member_confidence = {}
else:
    member_confidence = {}

# Determine model based on task complexity
model = get_model_for_task("complex")  # Uses 8B for briefing generation
validate_ollama_request(model)

# Prepare context
calendar_context = json.dumps(calendar_events or [], indent=2, default=str)
family_context = family_config.build_context_prompt()

prompt = BRIEFING_PROMPT.format(
    document_text=parsed_doc.get("text", "")[:4000],  # Limit context
    calendar_context=calendar_context[:1000],
    family_members=family_context
)

async with httpx.AsyncClient(timeout=120.0) as client:
    response = await client.post(
        f"{OLLAMA_BASE_URL}/api/chat",
        json={
            "model": model,
            "messages": [{"role": "user", "content": prompt}],
            "format": "json",
            "stream": False,
            "options": {
                "temperature": 0.3,  # Lower for consistent JSON
                "num_predict": 2048
            }
        }
    )
    response.raise_for_status()
    result = response.json()

# Parse JSON response
try:
    content = result.get("message", {}).get("content", "")
    # Handle markdown code blocks if present
    if "```json" in content:
        content = content.split("```json")[1].split("```")[0].strip()
    elif "```" in content:
        content = content.split("```")[1].split("```")[0].strip()

    briefing = json.loads(content)

    # Ensure required fields
    briefing.setdefault("title", "Briefing")
    briefing.setdefault("summary", parsed_doc.get("text", "")[:200] + "...")
    briefing.setdefault("key_details", {})
    briefing.setdefault("conflicts", [])
    briefing.setdefault("suggested_actions", [])
    briefing.setdefault("confidence", 0.5)
    briefing.setdefault("category", "info")

    # Add metadata
    briefing["_meta"] = {
        "model_used": model,
        "parser_method": parsed_doc.get("method", "unknown"),
        "generated_at": datetime.now().isoformat(),
        "urgency": urgency
    }

    # Determine if this briefing has calendar-applicable events
    # (events with detected dates/times that can be added to calendar)
    briefing["action_buttons"] = _detect_calendar_actions(briefing)

    # Classify event type and persist to Event Graph
    briefing["event_type"] = classify_event_type(briefing)
    try:
        doc_id = _generate_document_id(parsed_doc)
        EventGraphWriter().write(doc_id, briefing, briefing)
    except Exception as e:
        # Event Graph write failure is non-fatal — briefing still returned
        print(f"    [Event Graph] Write failed: {e}", file=sys.stderr)

    return briefing

except json.JSONDecodeError as e:
    # Fallback if JSON parsing fails
    return {
        "title": "Document Summary",
        "summary": parsed_doc.get("text", "")[:300] + "...",
        "key_details": {},
        "conflicts": [],
        "suggested_actions": ["Review document manually"],
        "confidence": 0.3,
        "category": "info",
        "action_buttons": {"has_calendar_event": False, "can_add_to_calendar": False},
        "_meta": {
            "model_used": model,
            "parser_method": parsed_doc.get("method", "unknown"),
            "error": f"JSON parse failed: {e}",
            "generated_at": datetime.now().isoformat()
        }
    }

async def generate_quick_summary(text: str, max_words: int = 50) -> str:
"""Generate a quick text summary (uses 3B model for speed)."""
model = get_model_for_task("fast")
validate_ollama_request(model)

prompt = f"Summarize this in {max_words} words or less:\n\n{text[:2000]}"

async with httpx.AsyncClient(timeout=30.0) as client:
    response = await client.post(
        f"{OLLAMA_BASE_URL}/api/chat",
        json={
            "model": model,
            "messages": [{"role": "user", "content": prompt}],
            "stream": False
        }
    )
    response.raise_for_status()
    result = response.json()
    return result.get("message", {}).get("content", "")[:500]

def _detect_calendar_actions(briefing: dict) -> dict:
"""Analyze a briefing card to determine if calendar action buttons should appear.

Returns a dict with:
  has_calendar_event: bool  whether the briefing contains date/time info
  can_add_to_calendar: bool  whether we have enough info to create a calendar event
  event_summary: str  the event title for the button
  event_start: str  ISO datetime string if available
  event_end: str  ISO datetime string if available
  event_location: str  location if available
  category: str  briefing category (event, appointment, deadline, info, urgent)

Calendar buttons are shown for briefings that:
- Have detected dates/times in key_details
- Are categorized as event, appointment, or deadline
- Are NOT pure info (no actionable date/time)
"""
category = briefing.get("category", "info")
key_details = briefing.get("key_details", {})
if not isinstance(key_details, dict):
    key_details = {}

# Check for date/time fields
date_val = key_details.get("date", "") or key_details.get("Date", "")
time_val = key_details.get("time", "") or key_details.get("Time", "")
location = key_details.get("location", "") or key_details.get("Location", "")
start_time = key_details.get("start", "") or key_details.get("Start", "")
end_time = key_details.get("end", "") or key_details.get("End", "")

# Try to construct start/end from date + time fields
event_start = start_time or ""
event_end = end_time or ""

if date_val and time_val:
    # Combine date and time: "2026-05-03" + "2:00 PM" → "2026-05-03 2:00 PM"
    event_start = f"{date_val} {time_val}" if not start_time else start_time
elif date_val and not event_start:
    event_start = date_val

# Determine if this is calendar-applicable
has_date = bool(date_val or event_start)
calendar_categories = {"event", "appointment", "deadline", "urgent"}
is_calendar_category = category in calendar_categories

# Info-only documents without dates don't get calendar buttons
has_calendar_event = has_date and is_calendar_category
can_add_to_calendar = has_calendar_event and bool(briefing.get("title"))

return {
    "has_calendar_event": has_calendar_event,
    "can_add_to_calendar": can_add_to_calendar,
    "event_summary": briefing.get("title", "Event"),
    "event_start": event_start,
    "event_end": event_end,
    "event_location": location if isinstance(location, str) else str(location),
    "category": category,
}

def _generate_document_id(parsed_doc: dict) -> str:
"""Generate a stable document ID for Event Graph persistence.

Uses filename hash if available, or text hash as fallback.

Args:
    parsed_doc: Parsed document with "text", optional "filename" metadata

Returns:
    Stable string identifier for the document
"""
import hashlib

# Try to get filename from metadata
meta = parsed_doc.get("_meta", {})
if isinstance(meta, dict):
    filename = meta.get("source_filename") or meta.get("filename")
else:
    filename = None

if not filename:
    # Fallback to text hash
    text = parsed_doc.get("text", "")
    content_hash = hashlib.sha256(text.encode()).hexdigest()[:16]
    return f"text_{content_hash}"

# Generate from filename
safe_name = "".join(c if c.isalnum() else "_" for c in filename[:32])
text = parsed_doc.get("text", "")
content_hash = hashlib.sha256(text.encode()).hexdigest()[:12]
return f"{safe_name}_{content_hash}"