"""The Clicker â URL-following concierge for action items.
When the newsletter parser identifies an action_item with a URL (e.g., a
Sign-Up Genius link for conferences), the Clicker:
- Fetches the URL via Jina Reader API (renders JS, returns Markdown)
- Passes the page content to the LLM to extract available slots
- Returns structured slot data for Hermes to post with inline buttons
The UX upgrade: instead of "Sign up for conferences", the bot posts:
"Sign-ups found for Parent-Teacher Conferences.
Available slots: 3:00 PM, 3:20 PM, 4:00 PM.
Tap a time to select."
"""
import json
import os
import re
import sys
import requests
from family_assistant.config import LLM_MODEL, LLM_URL, LLM_TIMEOUT, CHICAGO_TZ
from family_assistant.url_fetcher import fetch_url_as_markdown
---------------------------------------------------------------------------
Slot Extraction
---------------------------------------------------------------------------
SLOT_EXTRACT_PROMPT = """You are a slot extraction engine. Given a web page's content, extract the available signup slots as structured JSON.
Today's date: {today} (America/Chicago)
What to extract
Look for:
- Available time slots on signup forms (Sign-Up Genius, VolunteerSpot, Google Forms, etc.)
- Available dates for events, conferences, or appointments
- Pricing tiers or ticket types if relevant
- Any capacity/availability indicators (e.g., "2 spots left", "full")
Rules
- Return ONLY a JSON array. No markdown, no explanation, no code fences.
- Each slot has: time, date, label, spots_remaining (null if unknown), category
- time: ISO 8601 datetime with timezone (e.g., "2026-04-22T15:00:00-05:00"). If no specific time, use date only.
- date: ISO 8601 date (e.g., "2026-04-22"). Only if no specific time.
- label: human-readable description of what this slot is for (e.g., "Parent-Teacher Conference - Sullivan")
- spots_remaining: integer or null if unknown
- category: grouping label if slots are organized by category (e.g., "Conferences", "Volunteer", "Chaperone")
- If the page is NOT a signup form (just informational), return []
- If no slots are found, return []
- If dates are ambiguous, resolve to nearest future date relative to today ({today})
Example output
[
{{"time": "2026-04-22T15:00:00-05:00", "date": null, "label": "Parent-Teacher Conference - Sullivan", "spots_remaining": 3, "category": "Conferences"}},
{{"time": "2026-04-22T15:20:00-05:00", "date": null, "label": "Parent-Teacher Conference - Sullivan", "spots_remaining": 2, "category": "Conferences"}},
{{"time": "2026-04-22T16:00:00-05:00", "date": null, "label": "Parent-Teacher Conference - Sullivan", "spots_remaining": 5, "category": "Conferences"}}
]"""
def click_url(url, context_summary="", dry_run=False):
"""Follow an action_item URL and extract available signup slots.
Args:
url: The URL to follow (e.g., Sign-Up Genius link)
context_summary: Brief context about what the signup is for
dry_run: If True, don't actually fetch or extract
Returns:
Dict with status, slots, and formatted message for Telegram.
"""
if dry_run:
return {
"status": "DRY_RUN",
"url": url,
"slots": [],
"message": f"đ Would fetch and extract slots from: {url[:60]}",
}
# Step 1: Fetch the URL via Jina Reader
markdown = fetch_url_as_markdown(url)
if not markdown:
return {
"status": "FETCH_FAILED",
"url": url,
"slots": [],
"message": f"â Couldn't load signup page: {url[:60]}",
}
# Step 2: Extract slots via LLM
slots = _extract_slots(markdown, context_summary)
if not slots:
return {
"status": "NO_SLOTS",
"url": url,
"slots": [],
"message": f"âšī¸ No signup slots found at: {url[:60]}",
}
# Step 3: Format for Telegram
message = _format_slots_message(slots, context_summary, url)
return {
"status": "SLOTS_FOUND",
"url": url,
"slots": slots,
"message": message,
}
def _extract_slots(markdown, context_summary=""):
"""Use the LLM to extract signup slots from fetched page content.
Returns a list of slot dicts, or empty list if none found.
"""
from datetime import datetime
today = datetime.now(CHICAGO_TZ).strftime("%A, %B %d, %Y")
prompt = SLOT_EXTRACT_PROMPT.replace("{today}", today)
# Truncate markdown if too long (keep first 8000 chars â enough for most signup forms)
if len(markdown) > 8000:
markdown = markdown[:8000] + "\n\n[Content truncated]"
user_message = f"Context: {context_summary}\n\nPage content:\n{markdown}" if context_summary else f"Page content:\n{markdown}"
payload = {
"model": LLM_MODEL,
"messages": [
{"role": "system", "content": prompt},
{"role": "user", "content": user_message},
],
"temperature": 0.1,
"stream": False,
}
try:
resp = requests.post(LLM_URL, json=payload, timeout=LLM_TIMEOUT)
resp.raise_for_status()
data = resp.json()
# Handle both OpenAI and Ollama response formats
choices = data.get("choices", [])
if choices:
raw = choices[0].get("message", {}).get("content", "").strip()
elif "message" in data:
raw = data["message"].get("content", "").strip()
else:
raw = data.get("response", "").strip()
# Strip markdown code fences
raw = re.sub(r"^```(?:json)?\s*", "", raw)
raw = re.sub(r"\s*```$", "", raw)
try:
slots = json.loads(raw)
except json.JSONDecodeError:
# Try to extract JSON array
match = re.search(r"\[.*\]", raw, re.DOTALL)
if match:
slots = json.loads(match.group())
else:
print(" [Clicker] Could not parse LLM response as JSON", file=sys.stderr)
return []
if not isinstance(slots, list):
return []
return slots
except requests.exceptions.Timeout:
print(" [Clicker] LLM timeout during slot extraction", file=sys.stderr)
return []
except Exception as e:
print(f" [Clicker] Error extracting slots: {e}", file=sys.stderr)
return []
def _format_slots_message(slots, context_summary, url):
"""Format extracted slots as a Telegram message with inline buttons.
Groups slots by category, then by date, then lists times.
"""
from datetime import datetime
# Group by category
categories = {}
for slot in slots:
cat = slot.get("category", "General")
if cat not in categories:
categories[cat] = []
categories[cat].append(slot)
lines = []
if context_summary:
lines.append(f"đ **{context_summary}**\n")
for cat, cat_slots in categories.items():
if len(categories) > 1:
lines.append(f"**{cat}**")
# Group by date within category
by_date = {}
for slot in cat_slots:
time_str = slot.get("time", "")
date_str = slot.get("date", "")
if time_str:
try:
dt = datetime.fromisoformat(time_str)
date_key = dt.strftime("%a %b %d")
time_label = dt.strftime("%-I:%M %p")
except (ValueError, TypeError):
date_key = "Unknown date"
time_label = time_str
elif date_str:
try:
dt = datetime.fromisoformat(date_str)
date_key = dt.strftime("%a %b %d")
time_label = None
except (ValueError, TypeError):
date_key = "Unknown date"
time_label = None
else:
date_key = "Unknown date"
time_label = None
if date_key not in by_date:
by_date[date_key] = []
by_date[date_key].append({
"time_label": time_label,
"label": slot.get("label", ""),
"spots": slot.get("spots_remaining"),
})
for date, date_slots in by_date.items():
lines.append(f" đ
{date}")
for s in date_slots:
entry = f" âĸ {s['time_label']}" if s["time_label"] else f" âĸ {s['label']}"
if s["spots"] is not None:
entry += f" ({s['spots']} spots)"
lines.append(entry)
lines.append("")
lines.append(f"đ [Open signup page]({url})")
return "\n".join(lines)
def build_slot_buttons(slots, url_hash=None):
"""Build Telegram inline buttons for slot selection.
Each button has callback_data: slot|<url_hash>|<slot_index>
Max 1 row per slot (Telegram limit), max ~10 buttons.
Args:
slots: List of slot dicts from _extract_slots()
url_hash: Short hash of the URL for callback identification
Returns:
List of button rows for Telegram inline keyboard.
"""
if not url_hash:
url_hash = "0"
rows = []
for i, slot in enumerate(slots[:10]): # Max 10 buttons
time_str = slot.get("time", "")
label = slot.get("label", "")
# Format button text â prefer time over label
if time_str:
try:
from datetime import datetime
dt = datetime.fromisoformat(time_str)
button_text = dt.strftime("%a %-I:%M %p") # e.g. "Sat 8:00 AM"
except (ValueError, TypeError):
button_text = time_str[:20]
elif label:
# Use label but allow longer text for single-slot scenarios
button_text = label[:30] if len(slots) > 1 else label[:40]
else:
button_text = f"Slot {i+1}"
# Add spots remaining
spots = slot.get("spots_remaining")
if spots is not None:
button_text += f" ({spots})"
callback = f"slot|{url_hash}|{i}"
rows.append([{"text": button_text, "callback_data": callback}])
return rows
def hash_url(url):
"""Generate a short hash of a URL for callback identification."""
import hashlib
return hashlib.md5(url.encode()).hexdigest()[:8]