"""Grounding Context Generator — Extract real struggles from memory files. Provides LLM with actual project context to prevent hallucination. """ import os import re import glob from datetime import datetime, timedelta from typing import List, Dict, Optional from pathlib import Path MEMORY_DIR = Path("/home/hoffmann_admin/.openclaw/workspace-socrates/memory") # Known real projects from MEMORY.md KNOWN_PROJECTS = [ { "name": "OpenClaw Gateway", "description": "Multi-agent orchestration framework running on Beelink (titanium-butler)", "stack": ["Python", "FastAPI", "systemd", "Tailscale"], "struggles": [ "Token auth across multiple agents (Socrates, Daedalus, Wadsworth)", "Session contamination when wrong bot handles messages", "Identity confusion between agents" ] }, { "name": "Family Assistant", "description": "Email-to-calendar pipeline for family scheduling", "stack": ["Python", "Radicale CalDAV", "ChromaDB", "Telegram Bot API"], "struggles": [ "Google killing IMAP access (forced migration to webhooks)", "Radicale CalDAV migration from Google Calendar API", "Conflict detection between family members' schedules", "Document sorting with OCR on Gaming PC" ] }, { "name": "HoffDesk Blog", "description": "Technical blog with LocalAI content generation", "stack": ["FastAPI", "SQLite", "Jinja2", "Ollama on Gaming PC"], "struggles": [ "V1 content generation was too generic/corporate", "Magic Wand auth token issues between HTMX and fetch", "Struggle-first narrative vs tutorial-first structure" ] }, { "name": "Sovereign Infrastructure", "description": "Self-hosted stack avoiding cloud lock-in", "stack": ["Tailscale mesh VPN", "Radicale", "Cloudflare Workers", "Local LLMs"], "struggles": [ "Gmail IMAP deprecation forcing webhook architecture", "Gaming PC (Windows) vs Beelink (Linux) coordination", "Local LLM inference vs cloud API costs" ] } ] # Banned narrative patterns (anti-patterns from V1) BANNED_NARRATIVES = [ "team's API", "e-commerce platform", "startup", "enterprise", "scaling to millions", "VC funding", "agile methodology", "sprint planning" ] def extract_recent_struggles(days: int = 14) -> List[Dict[str, str]]: """Extract struggle mentions from recent memory files. Looks for patterns like: - "Issue discovered: ..." - "**Issue:** ..." - "Problem: ..." - "Blocker: ..." - "Failed: ..." """ struggles = [] cutoff = datetime.now() - timedelta(days=days) # Find memory files within date range for md_file in MEMORY_DIR.glob("*.md"): # Extract date from filename (e.g., 2026-04-22.md) try: date_str = md_file.stem[:10] file_date = datetime.strptime(date_str, "%Y-%m-%d") if file_date < cutoff: continue except (ValueError, IndexError): continue content = md_file.read_text() # Extract struggle patterns patterns = [ r"Issue discovered:\s*(.+?)(?:\n\n|\n##|$)", r"\*\*Issue:\*\*\s*(.+?)(?:\n\n|\n##|$)", r"Problem:\s*(.+?)(?:\n\n|\n##|$)", r"Blocker:\s*(.+?)(?:\n\n|\n##|$)", r"Failed:\s*(.+?)(?:\n\n|\n##|$)", r"Root cause:\s*(.+?)(?:\n\n|\n##|$)", ] for pattern in patterns: for match in re.finditer(pattern, content, re.IGNORECASE | re.DOTALL): struggle = match.group(1).strip()[:200] # Limit length if struggle and len(struggle) > 10: struggles.append({ "date": date_str, "struggle": struggle, "source": md_file.name }) # Deduplicate by struggle text seen = set() unique = [] for s in struggles: key = s["struggle"].lower()[:50] if key not in seen: seen.add(key) unique.append(s) return unique[:10] # Return top 10 def get_grounding_context(topic: Optional[str] = None) -> str: """Generate grounding context for LLM based on real projects. This prevents hallucination of fake corporate scenarios. """ # Get recent struggles recent = extract_recent_struggles(days=30) # Build context lines = [ "=== GROUNDING CONTEXT: REAL PROJECTS ===", "", "The author (Matt) is building sovereign home infrastructure in Green Bay, WI.", "He has a wife and kids. His dog is Maggie. These names MUST NOT appear in output.", "Use generic terms like 'my spouse', 'the family', 'our dog' instead.", "", "ACTIVE PROJECTS:", ] for proj in KNOWN_PROJECTS: lines.append(f"\n• {proj['name']}: {proj['description']}") lines.append(f" Stack: {', '.join(proj['stack'])}") if recent: lines.extend([ "", "RECENT STRUGGLES (from actual logs):", ]) for s in recent: lines.append(f" [{s['date']}] {s['struggle']}") lines.extend([ "", "=== NARRATIVE RULES ===", "", "FORBIDDEN (hallucinated corporate scenarios):", ]) for banned in BANNED_NARRATIVES: lines.append(f" • Never write about: {banned}") lines.extend([ "", "REQUIRED:", " • First person only ('I', 'my', 'our')", " • Specific tools from the stack above", " • Real locations: 'titanium-butler (Beelink)', 'Gaming PC', 'Tailscale'", " • Real failures with specific error messages", " • No invented dates or 'Last Tuesday'", "", "=== END GROUNDING ===", ]) return "\n".join(lines) def get_struggle_candidates() -> List[Dict[str, str]]: """Get list of struggle topics the user could write about. Useful for UI 'suggested topics' feature. """ candidates = [] # From known projects for proj in KNOWN_PROJECTS: for struggle in proj.get("struggles", []): candidates.append({ "topic": f"{proj['name']}: {struggle}", "category": proj["name"], "source": "MEMORY.md" }) # From recent memory for s in extract_recent_struggles(days=30): candidates.append({ "topic": s["struggle"], "category": "Recent Issue", "source": s["source"] }) return candidates[:15] if __name__ == "__main__": print(get_grounding_context()) print("\n" + "="*50 + "\n") print("SUGGESTED TOPICS:") for c in get_struggle_candidates()[:5]: print(f" • {c['topic'][:60]}...")