📄 session_cleanup.py 9,744 bytes Apr 20, 2026 📋 Raw

!/usr/bin/env python3

"""
Session Cleanup Script for OpenClaw Agents

Cleans up stale session data across all agent session stores:
1. Cron :run: sessions older than 24 hours (historical heartbeat runs)
2. Completed subagent sessions older than 24 hours
3. Orphan transcript files not tracked in sessions.json

Always backs up before deletion. Safe to run as a daily cron job.

Usage:
# Dry run (show what would be deleted)
python3 session_cleanup.py --dry-run

# Execute cleanup
python3 session_cleanup.py

# Verbose output
python3 session_cleanup.py --dry-run --verbose

"""

import json
import os
import sys
import shutil
from datetime import datetime, timezone
from pathlib import Path

── Configuration ──────────────────────────────────────────────────────────

AGENTS_DIR = os.path.expanduser("~/.openclaw/agents")
AGENTS = ["main", "socrates", "daedalus"]
BACKUP_ROOT = os.path.expanduser("~/.openclaw/agents/session-cleanup-backups")

Age thresholds (hours)

CRON_RUN_MAX_AGE_HOURS = 24 # Delete cron :run: sessions older than this
SUBAGENT_MAX_AGE_HOURS = 24 # Delete subagent sessions older than this
ORPHAN_MAX_AGE_FILE_HOURS = 24 # Delete orphan .jsonl files older than this

Session types to ALWAYS KEEP

KEEP_PREFIXES = [
"agent:", # keep everything with agent prefix...
]
KEEP_TYPES = [
# Telegram/chat sessions — active conversations
"telegram:direct",
"telegram:group",
"telegram:slash",
"telegram:default",
# Cron templates and heartbeats — active infrastructure
"cron:template",
"cron:heartbeat",
# Root sessions
"main",
# Dreaming sessions
"dreaming",
]

Session types eligible for cleanup

CLEANUP_TYPES = {
"cron:run": CRON_RUN_MAX_AGE_HOURS,
"subagent": SUBAGENT_MAX_AGE_HOURS,
}

def parse_args():
dry_run = "--dry-run" in sys.argv
verbose = "--verbose" in sys.argv
return dry_run, verbose

def ts_to_age_hours(ts_ms):
"""Convert epoch milliseconds to age in hours."""
if not ts_ms:
return 999 # Unknown age = treat as old
now_ms = int(datetime.now(timezone.utc).timestamp() * 1000)
return (now_ms - ts_ms) / (1000 * 3600)

def get_session_type(key):
"""Extract the session type from a session key."""
parts = key.split(":")
if len(parts) < 3:
return "unknown"

type_label = parts[2]

if type_label == "cron":
    if ":run:" in key:
        return "cron:run"
    elif ":heartbeat" in key:
        return "cron:heartbeat"
    else:
        return "cron:template"
elif type_label == "telegram":
    # telegram:direct, telegram:group, telegram:slash, etc.
    return f"telegram:{parts[3]}" if len(parts) > 3 else "telegram"
elif type_label == "dreaming":
    return "dreaming"

return type_label

def should_keep(key):
"""Check if a session should always be kept regardless of age."""
session_type = get_session_type(key)
if session_type in KEEP_TYPES:
return True
# Keep all telegram subtypes (direct, group, slash, etc.)
if session_type.startswith("telegram:") or session_type == "telegram":
return True
return False

def backup_session(agent, sid, backup_dir):
"""Back up a session transcript file before deletion."""
sessions_dir = os.path.join(AGENTS_DIR, agent, "sessions")
src = os.path.join(sessions_dir, f"{sid}.jsonl")

if os.path.exists(src):
    agent_backup = os.path.join(backup_dir, agent)
    os.makedirs(agent_backup, exist_ok=True)
    dst = os.path.join(agent_backup, f"{sid}.jsonl")
    shutil.copy2(src, dst)
    return os.path.getsize(src)
return 0

def cleanup_agent(agent, backup_dir, dry_run, verbose):
"""Clean up stale sessions for a single agent."""
sessions_dir = os.path.join(AGENTS_DIR, agent, "sessions")
store_path = os.path.join(sessions_dir, "sessions.json")

if not os.path.exists(store_path):
    print(f"  {agent}: No sessions.json found, skipping")
    return 0, 0, 0, 0

with open(store_path) as f:
    store = json.load(f)

# ── Phase 1: Remove stale sessions from store ──────────────────────
to_remove = []
bytes_freed_sessions = 0
sessions_removed = 0

for key, value in store.items():
    if should_keep(key):
        continue

    session_type = get_session_type(key)
    max_age = CLEANUP_TYPES.get(session_type)

    if max_age is None:
        continue  # Type not eligible for cleanup

    # OpenClaw uses sessionId and updatedAt in session store entries
    last_active = value.get("lastActiveAt", value.get("updatedAt", 0))
    age_hours = ts_to_age_hours(last_active)

    if age_hours > max_age:
        sid = value.get("sessionId", value.get("id", ""))
        # Calculate file size even in dry run for reporting
        sessions_dir = os.path.join(AGENTS_DIR, agent, "sessions")
        fpath = os.path.join(sessions_dir, f"{sid}.jsonl")
        size = os.path.getsize(fpath) if os.path.exists(fpath) else 0
        to_remove.append((key, sid, session_type, age_hours, size))

for key, sid, session_type, age_hours, size in to_remove:
    if verbose:
        print(f"    Remove: {key[:80]} ({session_type}, {age_hours:.1f}h old, {size / 1024:.1f} KB)")

    sessions_removed += 1
    bytes_freed_sessions += size

if to_remove and not dry_run:
    for key, sid, session_type, age_hours, size in to_remove:
        del store[key]
    with open(store_path, "w") as f:
        json.dump(store, f, indent=2)

# ── Phase 2: Remove orphan transcript files ────────────────────────
store_ids = {v.get("sessionId", v.get("id", "")) for v in store.values()}
orphan_count = 0
bytes_freed_orphans = 0
orphan_files = []

if os.path.isdir(sessions_dir):
    for fname in os.listdir(sessions_dir):
        if not fname.endswith(".jsonl") or fname == "sessions.json":
            continue

        sid = fname.replace(".jsonl", "")
        if sid not in store_ids:
            fpath = os.path.join(sessions_dir, fname)
            # Check file age
            file_age_hours = (datetime.now(timezone.utc) - datetime.fromtimestamp(
                os.path.getmtime(fpath), tz=timezone.utc
            )).total_seconds() / 3600

            if file_age_hours > ORPHAN_MAX_AGE_FILE_HOURS:
                fsize = os.path.getsize(fpath)
                orphan_files.append((fname, fsize, file_age_hours))

for fname, fsize, age_hours in orphan_files:
    if verbose:
        print(f"    Orphan: {fname} ({fsize / 1024:.1f} KB, {age_hours:.1f}h old)")

    if not dry_run:
        # Back up orphan before removing
        agent_backup = os.path.join(backup_dir, agent, "orphans")
        os.makedirs(agent_backup, exist_ok=True)
        src = os.path.join(sessions_dir, fname)
        shutil.copy2(src, os.path.join(agent_backup, fname))
        os.remove(src)
        orphan_count += 1
        bytes_freed_orphans += fsize

return sessions_removed, bytes_freed_sessions, orphan_count, bytes_freed_orphans

def main():
dry_run, verbose = parse_args()

timestamp = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S")
backup_dir = os.path.join(BACKUP_ROOT, timestamp) if not dry_run else None

if dry_run:
    print("╔══════════════════════════════════════════════╗")
    print("║         DRY RUN — Nothing will be deleted    ║")
    print("╚══════════════════════════════════════════════╝")
else:
    os.makedirs(backup_dir, exist_ok=True)
    # Back up all session stores first
    for agent in AGENTS:
        store_path = os.path.join(AGENTS_DIR, agent, "sessions", "sessions.json")
        if os.path.exists(store_path):
            shutil.copy2(store_path, os.path.join(backup_dir, f"sessions-{agent}.json"))
    print(f"Backups → {backup_dir}")

print()

total_sessions = 0
total_session_bytes = 0
total_orphans = 0
total_orphan_bytes = 0

for agent in AGENTS:
    s_removed, s_bytes, o_count, o_bytes = cleanup_agent(
        agent, backup_dir, dry_run, verbose
    )

    if s_removed > 0 or o_count > 0:
        print(f"  {agent}: {s_removed} sessions, {s_bytes / 1024:.1f} KB | "
              f"{o_count} orphans, {o_bytes / 1024:.1f} KB")
    else:
        print(f"  {agent}: Clean — nothing to remove")

    total_sessions += s_removed
    total_session_bytes += s_bytes
    total_orphans += o_count
    total_orphan_bytes += o_bytes

print()
print("─" * 50)
print(f"Total: {total_sessions} sessions ({total_session_bytes / 1024:.1f} KB) | "
      f"{total_orphans} orphans ({total_orphan_bytes / 1024:.1f} KB)")

if dry_run:
    print()
    print("Run without --dry-run to execute cleanup.")
else:
    print(f"Backups saved to: {backup_dir}")

return 0

if name == "main":
sys.exit(main())