"""Grounding Context Generator — Extract real struggles from memory files.

Provides LLM with actual project context to prevent hallucination.
"""

import os
import re
import glob
from datetime import datetime, timedelta
from typing import List, Dict, Optional
from pathlib import Path

MEMORY_DIR = Path("/home/hoffmann_admin/.openclaw/workspace-socrates/memory")

# Known real projects from MEMORY.md
KNOWN_PROJECTS = [
    {
        "name": "OpenClaw Gateway",
        "description": "Multi-agent orchestration framework running on Beelink (titanium-butler)",
        "stack": ["Python", "FastAPI", "systemd", "Tailscale"],
        "struggles": [
            "Token auth across multiple agents (Socrates, Daedalus, Wadsworth)",
            "Session contamination when wrong bot handles messages",
            "Identity confusion between agents"
        ]
    },
    {
        "name": "Family Assistant",
        "description": "Email-to-calendar pipeline for family scheduling",
        "stack": ["Python", "Radicale CalDAV", "ChromaDB", "Telegram Bot API"],
        "struggles": [
            "Google killing IMAP access (forced migration to webhooks)",
            "Radicale CalDAV migration from Google Calendar API",
            "Conflict detection between family members' schedules",
            "Document sorting with OCR on Gaming PC"
        ]
    },
    {
        "name": "HoffDesk Blog",
        "description": "Technical blog with LocalAI content generation",
        "stack": ["FastAPI", "SQLite", "Jinja2", "Ollama on Gaming PC"],
        "struggles": [
            "V1 content generation was too generic/corporate",
            "Magic Wand auth token issues between HTMX and fetch",
            "Struggle-first narrative vs tutorial-first structure"
        ]
    },
    {
        "name": "Sovereign Infrastructure",
        "description": "Self-hosted stack avoiding cloud lock-in",
        "stack": ["Tailscale mesh VPN", "Radicale", "Cloudflare Workers", "Local LLMs"],
        "struggles": [
            "Gmail IMAP deprecation forcing webhook architecture",
            "Gaming PC (Windows) vs Beelink (Linux) coordination",
            "Local LLM inference vs cloud API costs"
        ]
    }
]

# Banned narrative patterns (anti-patterns from V1)
BANNED_NARRATIVES = [
    "team's API",
    "e-commerce platform",
    "startup",
    "enterprise",
    "scaling to millions",
    "VC funding",
    "agile methodology",
    "sprint planning"
]


def extract_recent_struggles(days: int = 14) -> List[Dict[str, str]]:
    """Extract struggle mentions from recent memory files.
    
    Looks for patterns like:
    - "Issue discovered: ..."
    - "**Issue:** ..."
    - "Problem: ..."
    - "Blocker: ..."
    - "Failed: ..."
    """
    struggles = []
    cutoff = datetime.now() - timedelta(days=days)
    
    # Find memory files within date range
    for md_file in MEMORY_DIR.glob("*.md"):
        # Extract date from filename (e.g., 2026-04-22.md)
        try:
            date_str = md_file.stem[:10]
            file_date = datetime.strptime(date_str, "%Y-%m-%d")
            if file_date < cutoff:
                continue
        except (ValueError, IndexError):
            continue
        
        content = md_file.read_text()
        
        # Extract struggle patterns
        patterns = [
            r"Issue discovered:\s*(.+?)(?:\n\n|\n##|$)",
            r"\*\*Issue:\*\*\s*(.+?)(?:\n\n|\n##|$)",
            r"Problem:\s*(.+?)(?:\n\n|\n##|$)",
            r"Blocker:\s*(.+?)(?:\n\n|\n##|$)",
            r"Failed:\s*(.+?)(?:\n\n|\n##|$)",
            r"Root cause:\s*(.+?)(?:\n\n|\n##|$)",
        ]
        
        for pattern in patterns:
            for match in re.finditer(pattern, content, re.IGNORECASE | re.DOTALL):
                struggle = match.group(1).strip()[:200]  # Limit length
                if struggle and len(struggle) > 10:
                    struggles.append({
                        "date": date_str,
                        "struggle": struggle,
                        "source": md_file.name
                    })
    
    # Deduplicate by struggle text
    seen = set()
    unique = []
    for s in struggles:
        key = s["struggle"].lower()[:50]
        if key not in seen:
            seen.add(key)
            unique.append(s)
    
    return unique[:10]  # Return top 10


def get_grounding_context(topic: Optional[str] = None) -> str:
    """Generate grounding context for LLM based on real projects.
    
    This prevents hallucination of fake corporate scenarios.
    """
    # Get recent struggles
    recent = extract_recent_struggles(days=30)
    
    # Build context
    lines = [
        "=== GROUNDING CONTEXT: REAL PROJECTS ===",
        "",
        "The author (Matt) is building sovereign home infrastructure in Green Bay, WI.",
        "He has a wife and kids. His dog is Maggie. These names MUST NOT appear in output.",
        "Use generic terms like 'my spouse', 'the family', 'our dog' instead.",
        "",
        "ACTIVE PROJECTS:",
    ]
    
    for proj in KNOWN_PROJECTS:
        lines.append(f"\n• {proj['name']}: {proj['description']}")
        lines.append(f"  Stack: {', '.join(proj['stack'])}")
    
    if recent:
        lines.extend([
            "",
            "RECENT STRUGGLES (from actual logs):",
        ])
        for s in recent:
            lines.append(f"  [{s['date']}] {s['struggle']}")
    
    lines.extend([
        "",
        "=== NARRATIVE RULES ===",
        "",
        "FORBIDDEN (hallucinated corporate scenarios):",
    ])
    for banned in BANNED_NARRATIVES:
        lines.append(f"  • Never write about: {banned}")
    
    lines.extend([
        "",
        "REQUIRED:",
        "  • First person only ('I', 'my', 'our')",
        "  • Specific tools from the stack above",
        "  • Real locations: 'titanium-butler (Beelink)', 'Gaming PC', 'Tailscale'",
        "  • Real failures with specific error messages",
        "  • No invented dates or 'Last Tuesday'",
        "",
        "=== END GROUNDING ===",
    ])
    
    return "\n".join(lines)


def get_struggle_candidates() -> List[Dict[str, str]]:
    """Get list of struggle topics the user could write about.
    
    Useful for UI 'suggested topics' feature.
    """
    candidates = []
    
    # From known projects
    for proj in KNOWN_PROJECTS:
        for struggle in proj.get("struggles", []):
            candidates.append({
                "topic": f"{proj['name']}: {struggle}",
                "category": proj["name"],
                "source": "MEMORY.md"
            })
    
    # From recent memory
    for s in extract_recent_struggles(days=30):
        candidates.append({
            "topic": s["struggle"],
            "category": "Recent Issue",
            "source": s["source"]
        })
    
    return candidates[:15]


if __name__ == "__main__":
    print(get_grounding_context())
    print("\n" + "="*50 + "\n")
    print("SUGGESTED TOPICS:")
    for c in get_struggle_candidates()[:5]:
        print(f"  • {c['topic'][:60]}...")