#!/usr/bin/env python3 """ Session Cleanup Script for OpenClaw Agents Cleans up stale session data across all agent session stores: 1. Cron :run: sessions older than 24 hours (historical heartbeat runs) 2. Completed subagent sessions older than 24 hours 3. Orphan transcript files not tracked in sessions.json Always backs up before deletion. Safe to run as a daily cron job. Usage: # Dry run (show what would be deleted) python3 session_cleanup.py --dry-run # Execute cleanup python3 session_cleanup.py # Verbose output python3 session_cleanup.py --dry-run --verbose """ import json import os import sys import shutil from datetime import datetime, timezone from pathlib import Path # ── Configuration ────────────────────────────────────────────────────────── AGENTS_DIR = os.path.expanduser("~/.openclaw/agents") AGENTS = ["main", "socrates", "daedalus"] BACKUP_ROOT = os.path.expanduser("~/.openclaw/agents/session-cleanup-backups") # Age thresholds (hours) CRON_RUN_MAX_AGE_HOURS = 24 # Delete cron :run: sessions older than this SUBAGENT_MAX_AGE_HOURS = 24 # Delete subagent sessions older than this ORPHAN_MAX_AGE_FILE_HOURS = 24 # Delete orphan .jsonl files older than this # Session types to ALWAYS KEEP KEEP_PREFIXES = [ "agent:", # keep everything with agent prefix... ] KEEP_TYPES = [ # Telegram/chat sessions — active conversations "telegram:direct", "telegram:group", "telegram:slash", "telegram:default", # Cron templates and heartbeats — active infrastructure "cron:template", "cron:heartbeat", # Root sessions "main", # Dreaming sessions "dreaming", ] # Session types eligible for cleanup CLEANUP_TYPES = { "cron:run": CRON_RUN_MAX_AGE_HOURS, "subagent": SUBAGENT_MAX_AGE_HOURS, } def parse_args(): dry_run = "--dry-run" in sys.argv verbose = "--verbose" in sys.argv return dry_run, verbose def ts_to_age_hours(ts_ms): """Convert epoch milliseconds to age in hours.""" if not ts_ms: return 999 # Unknown age = treat as old now_ms = int(datetime.now(timezone.utc).timestamp() * 1000) return (now_ms - ts_ms) / (1000 * 3600) def get_session_type(key): """Extract the session type from a session key.""" parts = key.split(":") if len(parts) < 3: return "unknown" type_label = parts[2] if type_label == "cron": if ":run:" in key: return "cron:run" elif ":heartbeat" in key: return "cron:heartbeat" else: return "cron:template" elif type_label == "telegram": # telegram:direct, telegram:group, telegram:slash, etc. return f"telegram:{parts[3]}" if len(parts) > 3 else "telegram" elif type_label == "dreaming": return "dreaming" return type_label def should_keep(key): """Check if a session should always be kept regardless of age.""" session_type = get_session_type(key) if session_type in KEEP_TYPES: return True # Keep all telegram subtypes (direct, group, slash, etc.) if session_type.startswith("telegram:") or session_type == "telegram": return True return False def backup_session(agent, sid, backup_dir): """Back up a session transcript file before deletion.""" sessions_dir = os.path.join(AGENTS_DIR, agent, "sessions") src = os.path.join(sessions_dir, f"{sid}.jsonl") if os.path.exists(src): agent_backup = os.path.join(backup_dir, agent) os.makedirs(agent_backup, exist_ok=True) dst = os.path.join(agent_backup, f"{sid}.jsonl") shutil.copy2(src, dst) return os.path.getsize(src) return 0 def cleanup_agent(agent, backup_dir, dry_run, verbose): """Clean up stale sessions for a single agent.""" sessions_dir = os.path.join(AGENTS_DIR, agent, "sessions") store_path = os.path.join(sessions_dir, "sessions.json") if not os.path.exists(store_path): print(f" {agent}: No sessions.json found, skipping") return 0, 0, 0, 0 with open(store_path) as f: store = json.load(f) # ── Phase 1: Remove stale sessions from store ────────────────────── to_remove = [] bytes_freed_sessions = 0 sessions_removed = 0 for key, value in store.items(): if should_keep(key): continue session_type = get_session_type(key) max_age = CLEANUP_TYPES.get(session_type) if max_age is None: continue # Type not eligible for cleanup # OpenClaw uses sessionId and updatedAt in session store entries last_active = value.get("lastActiveAt", value.get("updatedAt", 0)) age_hours = ts_to_age_hours(last_active) if age_hours > max_age: sid = value.get("sessionId", value.get("id", "")) # Calculate file size even in dry run for reporting sessions_dir = os.path.join(AGENTS_DIR, agent, "sessions") fpath = os.path.join(sessions_dir, f"{sid}.jsonl") size = os.path.getsize(fpath) if os.path.exists(fpath) else 0 to_remove.append((key, sid, session_type, age_hours, size)) for key, sid, session_type, age_hours, size in to_remove: if verbose: print(f" Remove: {key[:80]} ({session_type}, {age_hours:.1f}h old, {size / 1024:.1f} KB)") sessions_removed += 1 bytes_freed_sessions += size if to_remove and not dry_run: for key, sid, session_type, age_hours, size in to_remove: del store[key] with open(store_path, "w") as f: json.dump(store, f, indent=2) # ── Phase 2: Remove orphan transcript files ──────────────────────── store_ids = {v.get("sessionId", v.get("id", "")) for v in store.values()} orphan_count = 0 bytes_freed_orphans = 0 orphan_files = [] if os.path.isdir(sessions_dir): for fname in os.listdir(sessions_dir): if not fname.endswith(".jsonl") or fname == "sessions.json": continue sid = fname.replace(".jsonl", "") if sid not in store_ids: fpath = os.path.join(sessions_dir, fname) # Check file age file_age_hours = (datetime.now(timezone.utc) - datetime.fromtimestamp( os.path.getmtime(fpath), tz=timezone.utc )).total_seconds() / 3600 if file_age_hours > ORPHAN_MAX_AGE_FILE_HOURS: fsize = os.path.getsize(fpath) orphan_files.append((fname, fsize, file_age_hours)) for fname, fsize, age_hours in orphan_files: if verbose: print(f" Orphan: {fname} ({fsize / 1024:.1f} KB, {age_hours:.1f}h old)") if not dry_run: # Back up orphan before removing agent_backup = os.path.join(backup_dir, agent, "orphans") os.makedirs(agent_backup, exist_ok=True) src = os.path.join(sessions_dir, fname) shutil.copy2(src, os.path.join(agent_backup, fname)) os.remove(src) orphan_count += 1 bytes_freed_orphans += fsize return sessions_removed, bytes_freed_sessions, orphan_count, bytes_freed_orphans def main(): dry_run, verbose = parse_args() timestamp = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S") backup_dir = os.path.join(BACKUP_ROOT, timestamp) if not dry_run else None if dry_run: print("╔══════════════════════════════════════════════╗") print("║ DRY RUN — Nothing will be deleted ║") print("╚══════════════════════════════════════════════╝") else: os.makedirs(backup_dir, exist_ok=True) # Back up all session stores first for agent in AGENTS: store_path = os.path.join(AGENTS_DIR, agent, "sessions", "sessions.json") if os.path.exists(store_path): shutil.copy2(store_path, os.path.join(backup_dir, f"sessions-{agent}.json")) print(f"Backups → {backup_dir}") print() total_sessions = 0 total_session_bytes = 0 total_orphans = 0 total_orphan_bytes = 0 for agent in AGENTS: s_removed, s_bytes, o_count, o_bytes = cleanup_agent( agent, backup_dir, dry_run, verbose ) if s_removed > 0 or o_count > 0: print(f" {agent}: {s_removed} sessions, {s_bytes / 1024:.1f} KB | " f"{o_count} orphans, {o_bytes / 1024:.1f} KB") else: print(f" {agent}: Clean — nothing to remove") total_sessions += s_removed total_session_bytes += s_bytes total_orphans += o_count total_orphan_bytes += o_bytes print() print("─" * 50) print(f"Total: {total_sessions} sessions ({total_session_bytes / 1024:.1f} KB) | " f"{total_orphans} orphans ({total_orphan_bytes / 1024:.1f} KB)") if dry_run: print() print("Run without --dry-run to execute cleanup.") else: print(f"Backups saved to: {backup_dir}") return 0 if __name__ == "__main__": sys.exit(main())