"""The Clicker — URL-following concierge for action items. When the newsletter parser identifies an action_item with a URL (e.g., a Sign-Up Genius link for conferences), the Clicker: 1. Fetches the URL via Jina Reader API (renders JS, returns Markdown) 2. Passes the page content to the LLM to extract available slots 3. Returns structured slot data for Hermes to post with inline buttons The UX upgrade: instead of "Sign up for conferences", the bot posts: "Sign-ups found for Parent-Teacher Conferences. Available slots: 3:00 PM, 3:20 PM, 4:00 PM. Tap a time to select." """ import json import os import re import sys import requests from family_assistant.config import LLM_MODEL, LLM_URL, LLM_TIMEOUT, CHICAGO_TZ from family_assistant.url_fetcher import fetch_url_as_markdown # --------------------------------------------------------------------------- # Slot Extraction # --------------------------------------------------------------------------- SLOT_EXTRACT_PROMPT = """You are a slot extraction engine. Given a web page's content, extract the available signup slots as structured JSON. Today's date: {today} (America/Chicago) ## What to extract Look for: - Available time slots on signup forms (Sign-Up Genius, VolunteerSpot, Google Forms, etc.) - Available dates for events, conferences, or appointments - Pricing tiers or ticket types if relevant - Any capacity/availability indicators (e.g., "2 spots left", "full") ## Rules - Return ONLY a JSON array. No markdown, no explanation, no code fences. - Each slot has: time, date, label, spots_remaining (null if unknown), category - time: ISO 8601 datetime with timezone (e.g., "2026-04-22T15:00:00-05:00"). If no specific time, use date only. - date: ISO 8601 date (e.g., "2026-04-22"). Only if no specific time. - label: human-readable description of what this slot is for (e.g., "Parent-Teacher Conference - Sullivan") - spots_remaining: integer or null if unknown - category: grouping label if slots are organized by category (e.g., "Conferences", "Volunteer", "Chaperone") - If the page is NOT a signup form (just informational), return [] - If no slots are found, return [] - If dates are ambiguous, resolve to nearest future date relative to today ({today}) ## Example output [ {{"time": "2026-04-22T15:00:00-05:00", "date": null, "label": "Parent-Teacher Conference - Sullivan", "spots_remaining": 3, "category": "Conferences"}}, {{"time": "2026-04-22T15:20:00-05:00", "date": null, "label": "Parent-Teacher Conference - Sullivan", "spots_remaining": 2, "category": "Conferences"}}, {{"time": "2026-04-22T16:00:00-05:00", "date": null, "label": "Parent-Teacher Conference - Sullivan", "spots_remaining": 5, "category": "Conferences"}} ]""" def click_url(url, context_summary="", dry_run=False): """Follow an action_item URL and extract available signup slots. Args: url: The URL to follow (e.g., Sign-Up Genius link) context_summary: Brief context about what the signup is for dry_run: If True, don't actually fetch or extract Returns: Dict with status, slots, and formatted message for Telegram. """ if dry_run: return { "status": "DRY_RUN", "url": url, "slots": [], "message": f"🔍 Would fetch and extract slots from: {url[:60]}", } # Step 1: Fetch the URL via Jina Reader markdown = fetch_url_as_markdown(url) if not markdown: return { "status": "FETCH_FAILED", "url": url, "slots": [], "message": f"❌ Couldn't load signup page: {url[:60]}", } # Step 2: Extract slots via LLM slots = _extract_slots(markdown, context_summary) if not slots: return { "status": "NO_SLOTS", "url": url, "slots": [], "message": f"â„šī¸ No signup slots found at: {url[:60]}", } # Step 3: Format for Telegram message = _format_slots_message(slots, context_summary, url) return { "status": "SLOTS_FOUND", "url": url, "slots": slots, "message": message, } def _extract_slots(markdown, context_summary=""): """Use the LLM to extract signup slots from fetched page content. Returns a list of slot dicts, or empty list if none found. """ from datetime import datetime today = datetime.now(CHICAGO_TZ).strftime("%A, %B %d, %Y") prompt = SLOT_EXTRACT_PROMPT.replace("{today}", today) # Truncate markdown if too long (keep first 8000 chars — enough for most signup forms) if len(markdown) > 8000: markdown = markdown[:8000] + "\n\n[Content truncated]" user_message = f"Context: {context_summary}\n\nPage content:\n{markdown}" if context_summary else f"Page content:\n{markdown}" payload = { "model": LLM_MODEL, "messages": [ {"role": "system", "content": prompt}, {"role": "user", "content": user_message}, ], "temperature": 0.1, "stream": False, } try: resp = requests.post(LLM_URL, json=payload, timeout=LLM_TIMEOUT) resp.raise_for_status() data = resp.json() # Handle both OpenAI and Ollama response formats choices = data.get("choices", []) if choices: raw = choices[0].get("message", {}).get("content", "").strip() elif "message" in data: raw = data["message"].get("content", "").strip() else: raw = data.get("response", "").strip() # Strip markdown code fences raw = re.sub(r"^```(?:json)?\s*", "", raw) raw = re.sub(r"\s*```$", "", raw) try: slots = json.loads(raw) except json.JSONDecodeError: # Try to extract JSON array match = re.search(r"\[.*\]", raw, re.DOTALL) if match: slots = json.loads(match.group()) else: print(" [Clicker] Could not parse LLM response as JSON", file=sys.stderr) return [] if not isinstance(slots, list): return [] return slots except requests.exceptions.Timeout: print(" [Clicker] LLM timeout during slot extraction", file=sys.stderr) return [] except Exception as e: print(f" [Clicker] Error extracting slots: {e}", file=sys.stderr) return [] def _format_slots_message(slots, context_summary, url): """Format extracted slots as a Telegram message with inline buttons. Groups slots by category, then by date, then lists times. """ from datetime import datetime # Group by category categories = {} for slot in slots: cat = slot.get("category", "General") if cat not in categories: categories[cat] = [] categories[cat].append(slot) lines = [] if context_summary: lines.append(f"📋 **{context_summary}**\n") for cat, cat_slots in categories.items(): if len(categories) > 1: lines.append(f"**{cat}**") # Group by date within category by_date = {} for slot in cat_slots: time_str = slot.get("time", "") date_str = slot.get("date", "") if time_str: try: dt = datetime.fromisoformat(time_str) date_key = dt.strftime("%a %b %d") time_label = dt.strftime("%-I:%M %p") except (ValueError, TypeError): date_key = "Unknown date" time_label = time_str elif date_str: try: dt = datetime.fromisoformat(date_str) date_key = dt.strftime("%a %b %d") time_label = None except (ValueError, TypeError): date_key = "Unknown date" time_label = None else: date_key = "Unknown date" time_label = None if date_key not in by_date: by_date[date_key] = [] by_date[date_key].append({ "time_label": time_label, "label": slot.get("label", ""), "spots": slot.get("spots_remaining"), }) for date, date_slots in by_date.items(): lines.append(f" 📅 {date}") for s in date_slots: entry = f" â€ĸ {s['time_label']}" if s["time_label"] else f" â€ĸ {s['label']}" if s["spots"] is not None: entry += f" ({s['spots']} spots)" lines.append(entry) lines.append("") lines.append(f"🔗 [Open signup page]({url})") return "\n".join(lines) def build_slot_buttons(slots, url_hash=None): """Build Telegram inline buttons for slot selection. Each button has callback_data: slot|| Max 1 row per slot (Telegram limit), max ~10 buttons. Args: slots: List of slot dicts from _extract_slots() url_hash: Short hash of the URL for callback identification Returns: List of button rows for Telegram inline keyboard. """ if not url_hash: url_hash = "0" rows = [] for i, slot in enumerate(slots[:10]): # Max 10 buttons time_str = slot.get("time", "") label = slot.get("label", "") # Format button text — prefer time over label if time_str: try: from datetime import datetime dt = datetime.fromisoformat(time_str) button_text = dt.strftime("%a %-I:%M %p") # e.g. "Sat 8:00 AM" except (ValueError, TypeError): button_text = time_str[:20] elif label: # Use label but allow longer text for single-slot scenarios button_text = label[:30] if len(slots) > 1 else label[:40] else: button_text = f"Slot {i+1}" # Add spots remaining spots = slot.get("spots_remaining") if spots is not None: button_text += f" ({spots})" callback = f"slot|{url_hash}|{i}" rows.append([{"text": button_text, "callback_data": callback}]) return rows def hash_url(url): """Generate a short hash of a URL for callback identification.""" import hashlib return hashlib.md5(url.encode()).hexdigest()[:8]