"""The Clicker — URL-following concierge for action items.

When the newsletter parser identifies an action_item with a URL (e.g., a
Sign-Up Genius link for conferences), the Clicker:

1. Fetches the URL via Jina Reader API (renders JS, returns Markdown)
2. Passes the page content to the LLM to extract available slots
3. Returns structured slot data for Hermes to post with inline buttons

The UX upgrade: instead of "Sign up for conferences", the bot posts:
  "Sign-ups found for Parent-Teacher Conferences.
   Available slots: 3:00 PM, 3:20 PM, 4:00 PM.
   Tap a time to select."
"""

import json
import os
import re
import sys

import requests

from family_assistant.config import LLM_MODEL, LLM_URL, LLM_TIMEOUT, CHICAGO_TZ
from family_assistant.url_fetcher import fetch_url_as_markdown

# ---------------------------------------------------------------------------
# Slot Extraction
# ---------------------------------------------------------------------------

SLOT_EXTRACT_PROMPT = """You are a slot extraction engine. Given a web page's content, extract the available signup slots as structured JSON.

Today's date: {today} (America/Chicago)

## What to extract

Look for:
- Available time slots on signup forms (Sign-Up Genius, VolunteerSpot, Google Forms, etc.)
- Available dates for events, conferences, or appointments
- Pricing tiers or ticket types if relevant
- Any capacity/availability indicators (e.g., "2 spots left", "full")

## Rules

- Return ONLY a JSON array. No markdown, no explanation, no code fences.
- Each slot has: time, date, label, spots_remaining (null if unknown), category
- time: ISO 8601 datetime with timezone (e.g., "2026-04-22T15:00:00-05:00"). If no specific time, use date only.
- date: ISO 8601 date (e.g., "2026-04-22"). Only if no specific time.
- label: human-readable description of what this slot is for (e.g., "Parent-Teacher Conference - Sullivan")
- spots_remaining: integer or null if unknown
- category: grouping label if slots are organized by category (e.g., "Conferences", "Volunteer", "Chaperone")
- If the page is NOT a signup form (just informational), return []
- If no slots are found, return []
- If dates are ambiguous, resolve to nearest future date relative to today ({today})

## Example output

[
  {{"time": "2026-04-22T15:00:00-05:00", "date": null, "label": "Parent-Teacher Conference - Sullivan", "spots_remaining": 3, "category": "Conferences"}},
  {{"time": "2026-04-22T15:20:00-05:00", "date": null, "label": "Parent-Teacher Conference - Sullivan", "spots_remaining": 2, "category": "Conferences"}},
  {{"time": "2026-04-22T16:00:00-05:00", "date": null, "label": "Parent-Teacher Conference - Sullivan", "spots_remaining": 5, "category": "Conferences"}}
]"""


def click_url(url, context_summary="", dry_run=False):
    """Follow an action_item URL and extract available signup slots.

    Args:
        url: The URL to follow (e.g., Sign-Up Genius link)
        context_summary: Brief context about what the signup is for
        dry_run: If True, don't actually fetch or extract

    Returns:
        Dict with status, slots, and formatted message for Telegram.
    """
    if dry_run:
        return {
            "status": "DRY_RUN",
            "url": url,
            "slots": [],
            "message": f"🔍 Would fetch and extract slots from: {url[:60]}",
        }

    # Step 1: Fetch the URL via Jina Reader
    markdown = fetch_url_as_markdown(url)
    if not markdown:
        return {
            "status": "FETCH_FAILED",
            "url": url,
            "slots": [],
            "message": f"❌ Couldn't load signup page: {url[:60]}",
        }

    # Step 2: Extract slots via LLM
    slots = _extract_slots(markdown, context_summary)

    if not slots:
        return {
            "status": "NO_SLOTS",
            "url": url,
            "slots": [],
            "message": f"ℹ️ No signup slots found at: {url[:60]}",
        }

    # Step 3: Format for Telegram
    message = _format_slots_message(slots, context_summary, url)

    return {
        "status": "SLOTS_FOUND",
        "url": url,
        "slots": slots,
        "message": message,
    }


def _extract_slots(markdown, context_summary=""):
    """Use the LLM to extract signup slots from fetched page content.

    Returns a list of slot dicts, or empty list if none found.
    """
    from datetime import datetime

    today = datetime.now(CHICAGO_TZ).strftime("%A, %B %d, %Y")

    prompt = SLOT_EXTRACT_PROMPT.replace("{today}", today)

    # Truncate markdown if too long (keep first 8000 chars — enough for most signup forms)
    if len(markdown) > 8000:
        markdown = markdown[:8000] + "\n\n[Content truncated]"

    user_message = f"Context: {context_summary}\n\nPage content:\n{markdown}" if context_summary else f"Page content:\n{markdown}"

    payload = {
        "model": LLM_MODEL,
        "messages": [
            {"role": "system", "content": prompt},
            {"role": "user", "content": user_message},
        ],
        "temperature": 0.1,
        "stream": False,
    }

    try:
        resp = requests.post(LLM_URL, json=payload, timeout=LLM_TIMEOUT)
        resp.raise_for_status()
        data = resp.json()

        # Handle both OpenAI and Ollama response formats
        choices = data.get("choices", [])
        if choices:
            raw = choices[0].get("message", {}).get("content", "").strip()
        elif "message" in data:
            raw = data["message"].get("content", "").strip()
        else:
            raw = data.get("response", "").strip()

        # Strip markdown code fences
        raw = re.sub(r"^```(?:json)?\s*", "", raw)
        raw = re.sub(r"\s*```$", "", raw)

        try:
            slots = json.loads(raw)
        except json.JSONDecodeError:
            # Try to extract JSON array
            match = re.search(r"\[.*\]", raw, re.DOTALL)
            if match:
                slots = json.loads(match.group())
            else:
                print("  [Clicker] Could not parse LLM response as JSON", file=sys.stderr)
                return []

        if not isinstance(slots, list):
            return []

        return slots

    except requests.exceptions.Timeout:
        print("  [Clicker] LLM timeout during slot extraction", file=sys.stderr)
        return []
    except Exception as e:
        print(f"  [Clicker] Error extracting slots: {e}", file=sys.stderr)
        return []


def _format_slots_message(slots, context_summary, url):
    """Format extracted slots as a Telegram message with inline buttons.

    Groups slots by category, then by date, then lists times.
    """
    from datetime import datetime

    # Group by category
    categories = {}
    for slot in slots:
        cat = slot.get("category", "General")
        if cat not in categories:
            categories[cat] = []
        categories[cat].append(slot)

    lines = []
    if context_summary:
        lines.append(f"📋 **{context_summary}**\n")

    for cat, cat_slots in categories.items():
        if len(categories) > 1:
            lines.append(f"**{cat}**")

        # Group by date within category
        by_date = {}
        for slot in cat_slots:
            time_str = slot.get("time", "")
            date_str = slot.get("date", "")
            if time_str:
                try:
                    dt = datetime.fromisoformat(time_str)
                    date_key = dt.strftime("%a %b %d")
                    time_label = dt.strftime("%-I:%M %p")
                except (ValueError, TypeError):
                    date_key = "Unknown date"
                    time_label = time_str
            elif date_str:
                try:
                    dt = datetime.fromisoformat(date_str)
                    date_key = dt.strftime("%a %b %d")
                    time_label = None
                except (ValueError, TypeError):
                    date_key = "Unknown date"
                    time_label = None
            else:
                date_key = "Unknown date"
                time_label = None

            if date_key not in by_date:
                by_date[date_key] = []
            by_date[date_key].append({
                "time_label": time_label,
                "label": slot.get("label", ""),
                "spots": slot.get("spots_remaining"),
            })

        for date, date_slots in by_date.items():
            lines.append(f"  📅 {date}")
            for s in date_slots:
                entry = f"    • {s['time_label']}" if s["time_label"] else f"    • {s['label']}"
                if s["spots"] is not None:
                    entry += f" ({s['spots']} spots)"
                lines.append(entry)

        lines.append("")

    lines.append(f"🔗 [Open signup page]({url})")

    return "\n".join(lines)


def build_slot_buttons(slots, url_hash=None):
    """Build Telegram inline buttons for slot selection.

    Each button has callback_data: slot|<url_hash>|<slot_index>
    Max 1 row per slot (Telegram limit), max ~10 buttons.

    Args:
        slots: List of slot dicts from _extract_slots()
        url_hash: Short hash of the URL for callback identification

    Returns:
        List of button rows for Telegram inline keyboard.
    """
    if not url_hash:
        url_hash = "0"

    rows = []
    for i, slot in enumerate(slots[:10]):  # Max 10 buttons
        time_str = slot.get("time", "")
        label = slot.get("label", "")

        # Format button text — prefer time over label
        if time_str:
            try:
                from datetime import datetime
                dt = datetime.fromisoformat(time_str)
                button_text = dt.strftime("%a %-I:%M %p")  # e.g. "Sat 8:00 AM"
            except (ValueError, TypeError):
                button_text = time_str[:20]
        elif label:
            # Use label but allow longer text for single-slot scenarios
            button_text = label[:30] if len(slots) > 1 else label[:40]
        else:
            button_text = f"Slot {i+1}"

        # Add spots remaining
        spots = slot.get("spots_remaining")
        if spots is not None:
            button_text += f" ({spots})"

        callback = f"slot|{url_hash}|{i}"
        rows.append([{"text": button_text, "callback_data": callback}])

    return rows


def hash_url(url):
    """Generate a short hash of a URL for callback identification."""
    import hashlib
    return hashlib.md5(url.encode()).hexdigest()[:8]