📄 test_pipeline_v2_real.py 6,258 bytes Apr 23, 2026 📋 Raw

"""
V2 Pipeline — Real Incident Test (Full Stack)

Tests the complete pipeline with:
1. Real incident data from incidents/ directory
2. Strict compliance filter (blocks on names/dates)
3. Timestamp injection (prevents hallucinated dates)
"""

import sys
sys.path.insert(0, '/home/hoffmann_admin/.openclaw/workspace-socrates/hoffdesk-api')

import asyncio
from datetime import datetime, timezone
from content.pipeline import generate_ollama
from content.compliance_filter import ComplianceFilter
from blog.generation.prompts import build_struggle_first_prompt
from incidents import to_brief

Timestamp injection — prevents hallucinated dates

def inject_timestamp(prompt: str, incident_date: str) -> str:
"""Add explicit timestamp to prevent date hallucination."""
# Parse the real date
dt = datetime.fromisoformat(incident_date.replace('Z', '+00:00'))

# Format for prompt
timestamp_line = f"""

=== TIMESTAMP ===
This incident occurred around {dt.strftime('%B %d, %Y')} at {dt.strftime('%H:%M')} UTC.
Use ONLY this general timeframe. NEVER invent:
- Days of the week (no "Tuesday", "Wednesday")
- Specific times (no "3:45 PM", "7:30")
- Other dates or years
- "Last week", "yesterday", "this morning"

If you need to reference time, use vague terms: "around that time", "a few minutes later", "eventually"
"""

# Insert before the incident details
return prompt.replace("=== INCIDENT DETAILS ===", timestamp_line + "\n=== INCIDENT DETAILS ===")

async def test_pipeline_with_real_incident():
"""Test full pipeline with real incident data."""

print("=" * 70)
print("V2 PIPELINE — REAL INCIDENT TEST")
print("=" * 70)
print()

# Load real incident
print("Loading incident from incidents/...")
brief = to_brief('2026-04-23-cloudflare-error-1033-on-noteshoffdeskcom')
print(f"✓ Loaded: {brief['struggle_angle']}")
print(f"  Date: {brief['date'][:10]}")
print(f"  Systems: {', '.join(brief['systems'])}")
print(f"  Attempts: {len(brief['attempts'])}")
print()

# Build prompt
print("Building generation prompt...")
prompt = build_struggle_first_prompt(brief, style_reference=None)

# Inject timestamp to prevent hallucination
prompt = inject_timestamp(prompt, brief['date'])

# Add explicit anti-hallucination rules
prompt += """

=== ANTI-HALLUCINATION RULES ===
- Use ONLY the specific commands, files, and error messages from the incident details
- NEVER invent: days of the week, times of day, months, dates, years
- NEVER use: "Last Tuesday", "Wednesday afternoon", "March 15th", "7:45 PM"
- NEVER use real names: Aundrea, Sullivan, Harper, Maggie, Hoffmann
- Use generic terms: "my spouse", "the family", "our dog"
- Be specific about config files, commands, error codes
- If you don't know a timestamp, use "around that time" or omit it
"""

print("✓ Prompt built with timestamp injection")
print()

# Generate
print("Calling phi4:14b (Gaming PC)...")
print("-" * 70)

start = asyncio.get_event_loop().time()

try:
    draft = await generate_ollama(
        model="phi4:14b",
        prompt=prompt,
        temperature=0.6,  # Lower temp for more factual
        max_tokens=3000,
        timeout=180.0
    )

    elapsed = asyncio.get_event_loop().time() - start
    word_count = len(draft.split())

    print(f"✓ Generated in {elapsed:.1f}s")
    print(f"✓ {word_count} words")
    print()

except Exception as e:
    print(f"✗ Generation failed: {e}")
    return

# Compliance check (STRICT MODE)
print("Running compliance filter (STRICT MODE)...")
print("-" * 70)

compliance = ComplianceFilter(strict_mode=True).process(draft)

print(f"  Banned words: {compliance.banned_found or 'None'}")
print(f"  Real names: {compliance.names_found or 'None'}")
print(f"  Hallucinated dates: {compliance.dates_found or 'None'}")
print(f"  Compliant: {'✓ PASS' if compliance.is_compliant else '✗ FAIL'}")
print()

if not compliance.is_compliant:
    print("COMPLIANCE FAILURES:")
    if compliance.names_found:
        print(f"  ✗ Names found: {compliance.names_found}")
    if compliance.dates_found:
        print(f"  ✗ Dates found: {compliance.dates_found}")
    print()
    print("Options:")
    print("  1. Regenerate with stronger anti-hallucination prompt")
    print("  2. Manually edit the output")
    print("  3. Accept with warnings (lenient mode)")
    print()

# Save output
output = compliance.clean_text
output_path = "/tmp/v2_real_incident_output.md"
with open(output_path, "w") as f:
    f.write(output)

print("=" * 70)
print("GENERATED CONTENT:")
print("=" * 70)
print()
print(output)
print()
print("=" * 70)
print(f"Saved to: {output_path}")
print("=" * 70)

return {
    "success": True,
    "word_count": word_count,
    "elapsed": elapsed,
    "compliant": compliance.is_compliant,
    "issues": {
        "names": compliance.names_found,
        "dates": compliance.dates_found,
        "banned": compliance.banned_found
    }
}

if name == "main":
result = asyncio.run(test_pipeline_with_real_incident())

print()
print("=" * 70)
print("SUMMARY")
print("=" * 70)

if result and result.get("success"):
    print(f"✓ Pipeline complete")
    print(f"  Words: {result['word_count']}")
    print(f"  Time: {result['elapsed']:.1f}s")
    print(f"  Compliant: {result['compliant']}")

    if not result['compliant']:
        print()
        print("Fixes needed:")
        if result['issues']['names']:
            print(f"  - Remove names: {result['issues']['names']}")
        if result['issues']['dates']:
            print(f"  - Remove dates: {result['issues']['dates']}")
        if result['issues']['banned']:
            print(f"  - Replaced banned words: {result['issues']['banned']}")
else:
    print("✗ Pipeline failed")