!/usr/bin/env python3
"""
Session Cleanup Script for OpenClaw Agents
Cleans up stale session data across all agent session stores:
1. Cron :run: sessions older than 24 hours (historical heartbeat runs)
2. Completed subagent sessions older than 24 hours
3. Orphan transcript files not tracked in sessions.json
Always backs up before deletion. Safe to run as a daily cron job.
Usage:
# Dry run (show what would be deleted)
python3 session_cleanup.py --dry-run
# Execute cleanup
python3 session_cleanup.py
# Verbose output
python3 session_cleanup.py --dry-run --verbose
"""
import json
import os
import sys
import shutil
from datetime import datetime, timezone
from pathlib import Path
── Configuration ──────────────────────────────────────────────────────────
AGENTS_DIR = os.path.expanduser("~/.openclaw/agents")
AGENTS = ["main", "socrates", "daedalus"]
BACKUP_ROOT = os.path.expanduser("~/.openclaw/agents/session-cleanup-backups")
Age thresholds (hours)
CRON_RUN_MAX_AGE_HOURS = 24 # Delete cron :run: sessions older than this
SUBAGENT_MAX_AGE_HOURS = 24 # Delete subagent sessions older than this
ORPHAN_MAX_AGE_FILE_HOURS = 24 # Delete orphan .jsonl files older than this
Session types to ALWAYS KEEP
KEEP_PREFIXES = [
"agent:", # keep everything with agent prefix...
]
KEEP_TYPES = [
# Telegram/chat sessions — active conversations
"telegram:direct",
"telegram:group",
"telegram:slash",
"telegram:default",
# Cron templates and heartbeats — active infrastructure
"cron:template",
"cron:heartbeat",
# Root sessions
"main",
# Dreaming sessions
"dreaming",
]
Session types eligible for cleanup
CLEANUP_TYPES = {
"cron:run": CRON_RUN_MAX_AGE_HOURS,
"subagent": SUBAGENT_MAX_AGE_HOURS,
}
def parse_args():
dry_run = "--dry-run" in sys.argv
verbose = "--verbose" in sys.argv
return dry_run, verbose
def ts_to_age_hours(ts_ms):
"""Convert epoch milliseconds to age in hours."""
if not ts_ms:
return 999 # Unknown age = treat as old
now_ms = int(datetime.now(timezone.utc).timestamp() * 1000)
return (now_ms - ts_ms) / (1000 * 3600)
def get_session_type(key):
"""Extract the session type from a session key."""
parts = key.split(":")
if len(parts) < 3:
return "unknown"
type_label = parts[2]
if type_label == "cron":
if ":run:" in key:
return "cron:run"
elif ":heartbeat" in key:
return "cron:heartbeat"
else:
return "cron:template"
elif type_label == "telegram":
# telegram:direct, telegram:group, telegram:slash, etc.
return f"telegram:{parts[3]}" if len(parts) > 3 else "telegram"
elif type_label == "dreaming":
return "dreaming"
return type_label
def should_keep(key):
"""Check if a session should always be kept regardless of age."""
session_type = get_session_type(key)
if session_type in KEEP_TYPES:
return True
# Keep all telegram subtypes (direct, group, slash, etc.)
if session_type.startswith("telegram:") or session_type == "telegram":
return True
return False
def backup_session(agent, sid, backup_dir):
"""Back up a session transcript file before deletion."""
sessions_dir = os.path.join(AGENTS_DIR, agent, "sessions")
src = os.path.join(sessions_dir, f"{sid}.jsonl")
if os.path.exists(src):
agent_backup = os.path.join(backup_dir, agent)
os.makedirs(agent_backup, exist_ok=True)
dst = os.path.join(agent_backup, f"{sid}.jsonl")
shutil.copy2(src, dst)
return os.path.getsize(src)
return 0
def cleanup_agent(agent, backup_dir, dry_run, verbose):
"""Clean up stale sessions for a single agent."""
sessions_dir = os.path.join(AGENTS_DIR, agent, "sessions")
store_path = os.path.join(sessions_dir, "sessions.json")
if not os.path.exists(store_path):
print(f" {agent}: No sessions.json found, skipping")
return 0, 0, 0, 0
with open(store_path) as f:
store = json.load(f)
# ── Phase 1: Remove stale sessions from store ──────────────────────
to_remove = []
bytes_freed_sessions = 0
sessions_removed = 0
for key, value in store.items():
if should_keep(key):
continue
session_type = get_session_type(key)
max_age = CLEANUP_TYPES.get(session_type)
if max_age is None:
continue # Type not eligible for cleanup
# OpenClaw uses sessionId and updatedAt in session store entries
last_active = value.get("lastActiveAt", value.get("updatedAt", 0))
age_hours = ts_to_age_hours(last_active)
if age_hours > max_age:
sid = value.get("sessionId", value.get("id", ""))
# Calculate file size even in dry run for reporting
sessions_dir = os.path.join(AGENTS_DIR, agent, "sessions")
fpath = os.path.join(sessions_dir, f"{sid}.jsonl")
size = os.path.getsize(fpath) if os.path.exists(fpath) else 0
to_remove.append((key, sid, session_type, age_hours, size))
for key, sid, session_type, age_hours, size in to_remove:
if verbose:
print(f" Remove: {key[:80]} ({session_type}, {age_hours:.1f}h old, {size / 1024:.1f} KB)")
sessions_removed += 1
bytes_freed_sessions += size
if to_remove and not dry_run:
for key, sid, session_type, age_hours, size in to_remove:
del store[key]
with open(store_path, "w") as f:
json.dump(store, f, indent=2)
# ── Phase 2: Remove orphan transcript files ────────────────────────
store_ids = {v.get("sessionId", v.get("id", "")) for v in store.values()}
orphan_count = 0
bytes_freed_orphans = 0
orphan_files = []
if os.path.isdir(sessions_dir):
for fname in os.listdir(sessions_dir):
if not fname.endswith(".jsonl") or fname == "sessions.json":
continue
sid = fname.replace(".jsonl", "")
if sid not in store_ids:
fpath = os.path.join(sessions_dir, fname)
# Check file age
file_age_hours = (datetime.now(timezone.utc) - datetime.fromtimestamp(
os.path.getmtime(fpath), tz=timezone.utc
)).total_seconds() / 3600
if file_age_hours > ORPHAN_MAX_AGE_FILE_HOURS:
fsize = os.path.getsize(fpath)
orphan_files.append((fname, fsize, file_age_hours))
for fname, fsize, age_hours in orphan_files:
if verbose:
print(f" Orphan: {fname} ({fsize / 1024:.1f} KB, {age_hours:.1f}h old)")
if not dry_run:
# Back up orphan before removing
agent_backup = os.path.join(backup_dir, agent, "orphans")
os.makedirs(agent_backup, exist_ok=True)
src = os.path.join(sessions_dir, fname)
shutil.copy2(src, os.path.join(agent_backup, fname))
os.remove(src)
orphan_count += 1
bytes_freed_orphans += fsize
return sessions_removed, bytes_freed_sessions, orphan_count, bytes_freed_orphans
def main():
dry_run, verbose = parse_args()
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S")
backup_dir = os.path.join(BACKUP_ROOT, timestamp) if not dry_run else None
if dry_run:
print("╔══════════════════════════════════════════════╗")
print("║ DRY RUN — Nothing will be deleted ║")
print("╚══════════════════════════════════════════════╝")
else:
os.makedirs(backup_dir, exist_ok=True)
# Back up all session stores first
for agent in AGENTS:
store_path = os.path.join(AGENTS_DIR, agent, "sessions", "sessions.json")
if os.path.exists(store_path):
shutil.copy2(store_path, os.path.join(backup_dir, f"sessions-{agent}.json"))
print(f"Backups → {backup_dir}")
print()
total_sessions = 0
total_session_bytes = 0
total_orphans = 0
total_orphan_bytes = 0
for agent in AGENTS:
s_removed, s_bytes, o_count, o_bytes = cleanup_agent(
agent, backup_dir, dry_run, verbose
)
if s_removed > 0 or o_count > 0:
print(f" {agent}: {s_removed} sessions, {s_bytes / 1024:.1f} KB | "
f"{o_count} orphans, {o_bytes / 1024:.1f} KB")
else:
print(f" {agent}: Clean — nothing to remove")
total_sessions += s_removed
total_session_bytes += s_bytes
total_orphans += o_count
total_orphan_bytes += o_bytes
print()
print("─" * 50)
print(f"Total: {total_sessions} sessions ({total_session_bytes / 1024:.1f} KB) | "
f"{total_orphans} orphans ({total_orphan_bytes / 1024:.1f} KB)")
if dry_run:
print()
print("Run without --dry-run to execute cleanup.")
else:
print(f"Backups saved to: {backup_dir}")
return 0
if name == "main":
sys.exit(main())