""" V2 Pipeline — Final Integration Test Tests complete pipeline with: 1. Real incident data 2. Anti-hallucination date stripping 3. Word count enforcement 4. Strict compliance filter """ import sys sys.path.insert(0, '/home/hoffmann_admin/.openclaw/workspace-socrates/hoffdesk-api') import asyncio from datetime import datetime from content.pipeline import generate_ollama from content.compliance_filter import ComplianceFilter from blog.generation.prompts import build_struggle_first_prompt from incidents import to_brief from anti_hallucination import strip_dates, clean_generated_content from word_count import add_word_count_to_prompt, enforce_word_count async def test_final_pipeline(): """Test complete pipeline with all fixes.""" print("=" * 70) print("V2 PIPELINE — FINAL INTEGRATION TEST") print("=" * 70) print() # 1. Load real incident print("1. Loading real incident...") brief = to_brief('2026-04-23-cloudflare-error-1033-on-noteshoffdeskcom') print(f" ✓ {brief['struggle_angle']}") print() # 2. Build prompt with word count enforcement print("2. Building prompt with word count enforcement...") prompt = build_struggle_first_prompt(brief, style_reference=None) prompt = add_word_count_to_prompt(prompt, 1200) print(" ✓ Prompt ready") print() # 3. Generate print("3. Generating with phi4:14b...") print(" " + "-" * 66) start = asyncio.get_event_loop().time() try: draft = await generate_ollama( model="phi4:14b", prompt=prompt, temperature=0.6, max_tokens=4000, # Increased for longer content timeout=180.0 ) elapsed = asyncio.get_event_loop().time() - start word_count = len(draft.split()) print(f" ✓ Generated in {elapsed:.1f}s") print(f" ✓ {word_count} words") print() except Exception as e: print(f" ✗ Generation failed: {e}") return # 4. Anti-hallucination: Strip dates AND names print("4. Running anti-hallucination filter...") cleaned = clean_generated_content(draft, strip_names=True) dates_removed = len(draft.split()) - len(cleaned.split()) print(f" ✓ Dates and names stripped (text delta: {dates_removed} words)") print() # 5. Compliance check print("5. Running compliance filter (STRICT)...") compliance = ComplianceFilter(strict_mode=True).process(cleaned) print(f" Banned words: {compliance.banned_found or 'None'}") print(f" Real names: {compliance.names_found or 'None'}") print(f" Hallucinated dates: {compliance.dates_found or 'None'}") print(f" Compliant: {'✓ PASS' if compliance.is_compliant else '✗ FAIL'}") print() # 6. Word count check and expansion print("6. Word count check...") final_words = len(compliance.clean_text.split()) print(f" Target: 1200 words") print(f" Actual: {final_words} words ({final_words/1200*100:.0f}%)") if final_words < 960: # 80% threshold print(f" ⚠ Below 80% threshold — expanding...") expanded = await enforce_word_count(compliance.clean_text, 1200) expanded_words = len(expanded.split()) print(f" ✓ Expanded to {expanded_words} words ({expanded_words/1200*100:.0f}%)") compliance.clean_text = expanded final_words = expanded_words else: print(f" ✓ Acceptable") print() # 7. Save output output_path = "/tmp/v2_final_output.md" with open(output_path, "w") as f: f.write(compliance.clean_text) print("=" * 70) print("FINAL OUTPUT:") print("=" * 70) print() print(compliance.clean_text) print() print("=" * 70) print(f"Saved: {output_path}") print("=" * 70) return { "success": True, "word_count": final_words, "elapsed": elapsed, "compliant": compliance.is_compliant, "dates_removed": dates_removed, } if __name__ == "__main__": result = asyncio.run(test_final_pipeline()) if result: print() print("SUMMARY") print("-" * 70) print(f"✓ Pipeline: {'PASS' if result['compliant'] else 'NEEDS WORK'}") print(f" Words: {result['word_count']}/1200") print(f" Time: {result['elapsed']:.1f}s") print(f" Dates stripped: {result['dates_removed']}")