📄 clicker.py 10,468 bytes Apr 17, 2026 📋 Raw

"""The Clicker — URL-following concierge for action items.

When the newsletter parser identifies an action_item with a URL (e.g., a
Sign-Up Genius link for conferences), the Clicker:

Fetches the URL via Jina Reader API (renders JS, returns Markdown)
Passes the page content to the LLM to extract available slots
Returns structured slot data for Hermes to post with inline buttons

The UX upgrade: instead of "Sign up for conferences", the bot posts:
"Sign-ups found for Parent-Teacher Conferences.
Available slots: 3:00 PM, 3:20 PM, 4:00 PM.
Tap a time to select."
"""

import json
import os
import re
import sys

import requests

from family_assistant.config import LLM_MODEL, LLM_URL, LLM_TIMEOUT, CHICAGO_TZ
from family_assistant.url_fetcher import fetch_url_as_markdown

---------------------------------------------------------------------------

Slot Extraction

---------------------------------------------------------------------------

SLOT_EXTRACT_PROMPT = """You are a slot extraction engine. Given a web page's content, extract the available signup slots as structured JSON.

Today's date: {today} (America/Chicago)

What to extract

Look for:
- Available time slots on signup forms (Sign-Up Genius, VolunteerSpot, Google Forms, etc.)
- Available dates for events, conferences, or appointments
- Pricing tiers or ticket types if relevant
- Any capacity/availability indicators (e.g., "2 spots left", "full")

Rules

Return ONLY a JSON array. No markdown, no explanation, no code fences.
Each slot has: time, date, label, spots_remaining (null if unknown), category
time: ISO 8601 datetime with timezone (e.g., "2026-04-22T15:00:00-05:00"). If no specific time, use date only.
date: ISO 8601 date (e.g., "2026-04-22"). Only if no specific time.
label: human-readable description of what this slot is for (e.g., "Parent-Teacher Conference - Sullivan")
spots_remaining: integer or null if unknown
category: grouping label if slots are organized by category (e.g., "Conferences", "Volunteer", "Chaperone")
If the page is NOT a signup form (just informational), return []
If no slots are found, return []
If dates are ambiguous, resolve to nearest future date relative to today ({today})

Example output

[
{{"time": "2026-04-22T15:00:00-05:00", "date": null, "label": "Parent-Teacher Conference - Sullivan", "spots_remaining": 3, "category": "Conferences"}},
{{"time": "2026-04-22T15:20:00-05:00", "date": null, "label": "Parent-Teacher Conference - Sullivan", "spots_remaining": 2, "category": "Conferences"}},
{{"time": "2026-04-22T16:00:00-05:00", "date": null, "label": "Parent-Teacher Conference - Sullivan", "spots_remaining": 5, "category": "Conferences"}}
]"""

def click_url(url, context_summary="", dry_run=False):
"""Follow an action_item URL and extract available signup slots.

Args:
    url: The URL to follow (e.g., Sign-Up Genius link)
    context_summary: Brief context about what the signup is for
    dry_run: If True, don't actually fetch or extract

Returns:
    Dict with status, slots, and formatted message for Telegram.
"""
if dry_run:
    return {
        "status": "DRY_RUN",
        "url": url,
        "slots": [],
        "message": f"🔍 Would fetch and extract slots from: {url[:60]}",
    }

# Step 1: Fetch the URL via Jina Reader
markdown = fetch_url_as_markdown(url)
if not markdown:
    return {
        "status": "FETCH_FAILED",
        "url": url,
        "slots": [],
        "message": f"❌ Couldn't load signup page: {url[:60]}",
    }

# Step 2: Extract slots via LLM
slots = _extract_slots(markdown, context_summary)

if not slots:
    return {
        "status": "NO_SLOTS",
        "url": url,
        "slots": [],
        "message": f"ℹ️ No signup slots found at: {url[:60]}",
    }

# Step 3: Format for Telegram
message = _format_slots_message(slots, context_summary, url)

return {
    "status": "SLOTS_FOUND",
    "url": url,
    "slots": slots,
    "message": message,
}

def _extract_slots(markdown, context_summary=""):
"""Use the LLM to extract signup slots from fetched page content.

Returns a list of slot dicts, or empty list if none found.
"""
from datetime import datetime

today = datetime.now(CHICAGO_TZ).strftime("%A, %B %d, %Y")

prompt = SLOT_EXTRACT_PROMPT.replace("{today}", today)

# Truncate markdown if too long (keep first 8000 chars — enough for most signup forms)
if len(markdown) > 8000:
    markdown = markdown[:8000] + "\n\n[Content truncated]"

user_message = f"Context: {context_summary}\n\nPage content:\n{markdown}" if context_summary else f"Page content:\n{markdown}"

payload = {
    "model": LLM_MODEL,
    "messages": [
        {"role": "system", "content": prompt},
        {"role": "user", "content": user_message},
    ],
    "temperature": 0.1,
    "stream": False,
}

try:
    resp = requests.post(LLM_URL, json=payload, timeout=LLM_TIMEOUT)
    resp.raise_for_status()
    data = resp.json()

    # Handle both OpenAI and Ollama response formats
    choices = data.get("choices", [])
    if choices:
        raw = choices[0].get("message", {}).get("content", "").strip()
    elif "message" in data:
        raw = data["message"].get("content", "").strip()
    else:
        raw = data.get("response", "").strip()

    # Strip markdown code fences
    raw = re.sub(r"^```(?:json)?\s*", "", raw)
    raw = re.sub(r"\s*```$", "", raw)

    try:
        slots = json.loads(raw)
    except json.JSONDecodeError:
        # Try to extract JSON array
        match = re.search(r"\[.*\]", raw, re.DOTALL)
        if match:
            slots = json.loads(match.group())
        else:
            print("  [Clicker] Could not parse LLM response as JSON", file=sys.stderr)
            return []

    if not isinstance(slots, list):
        return []

    return slots

except requests.exceptions.Timeout:
    print("  [Clicker] LLM timeout during slot extraction", file=sys.stderr)
    return []
except Exception as e:
    print(f"  [Clicker] Error extracting slots: {e}", file=sys.stderr)
    return []

def _format_slots_message(slots, context_summary, url):
"""Format extracted slots as a Telegram message with inline buttons.

Groups slots by category, then by date, then lists times.
"""
from datetime import datetime

# Group by category
categories = {}
for slot in slots:
    cat = slot.get("category", "General")
    if cat not in categories:
        categories[cat] = []
    categories[cat].append(slot)

lines = []
if context_summary:
    lines.append(f"📋 **{context_summary}**\n")

for cat, cat_slots in categories.items():
    if len(categories) > 1:
        lines.append(f"**{cat}**")

    # Group by date within category
    by_date = {}
    for slot in cat_slots:
        time_str = slot.get("time", "")
        date_str = slot.get("date", "")
        if time_str:
            try:
                dt = datetime.fromisoformat(time_str)
                date_key = dt.strftime("%a %b %d")
                time_label = dt.strftime("%-I:%M %p")
            except (ValueError, TypeError):
                date_key = "Unknown date"
                time_label = time_str
        elif date_str:
            try:
                dt = datetime.fromisoformat(date_str)
                date_key = dt.strftime("%a %b %d")
                time_label = None
            except (ValueError, TypeError):
                date_key = "Unknown date"
                time_label = None
        else:
            date_key = "Unknown date"
            time_label = None

        if date_key not in by_date:
            by_date[date_key] = []
        by_date[date_key].append({
            "time_label": time_label,
            "label": slot.get("label", ""),
            "spots": slot.get("spots_remaining"),
        })

    for date, date_slots in by_date.items():
        lines.append(f"  📅 {date}")
        for s in date_slots:
            entry = f"    • {s['time_label']}" if s["time_label"] else f"    • {s['label']}"
            if s["spots"] is not None:
                entry += f" ({s['spots']} spots)"
            lines.append(entry)

    lines.append("")

lines.append(f"🔗 [Open signup page]({url})")

return "\n".join(lines)

def build_slot_buttons(slots, url_hash=None):
"""Build Telegram inline buttons for slot selection.

Each button has callback_data: slot|<url_hash>|<slot_index>
Max 1 row per slot (Telegram limit), max ~10 buttons.

Args:
    slots: List of slot dicts from _extract_slots()
    url_hash: Short hash of the URL for callback identification

Returns:
    List of button rows for Telegram inline keyboard.
"""
if not url_hash:
    url_hash = "0"

rows = []
for i, slot in enumerate(slots[:10]):  # Max 10 buttons
    time_str = slot.get("time", "")
    label = slot.get("label", "")

    # Format button text — prefer time over label
    if time_str:
        try:
            from datetime import datetime
            dt = datetime.fromisoformat(time_str)
            button_text = dt.strftime("%a %-I:%M %p")  # e.g. "Sat 8:00 AM"
        except (ValueError, TypeError):
            button_text = time_str[:20]
    elif label:
        # Use label but allow longer text for single-slot scenarios
        button_text = label[:30] if len(slots) > 1 else label[:40]
    else:
        button_text = f"Slot {i+1}"

    # Add spots remaining
    spots = slot.get("spots_remaining")
    if spots is not None:
        button_text += f" ({spots})"

    callback = f"slot|{url_hash}|{i}"
    rows.append([{"text": button_text, "callback_data": callback}])

return rows

def hash_url(url):
"""Generate a short hash of a URL for callback identification."""
import hashlib
return hashlib.md5(url.encode()).hexdigest()[:8]

← Back