"""QA test suite for the Family Assistant package. Runs 14 test cases against the LLM endpoint to validate appointment parsing. Uses generic names — real family data comes from family.yaml (gitignored). """ from datetime import datetime from icarus.core.config import CHICAGO_TZ from icarus.core.appointment_parser import parse_email_with_llm def _serialize_result(parsed): """Convert datetime objects to ISO strings for JSON serialization.""" result = {} for key, value in parsed.items(): if isinstance(value, datetime): result[key] = value.isoformat() else: result[key] = value return result def run_qa_tests(): """Run the QA test suite against the LLM endpoint and report results.""" qa_tests = [ # Case 1: Forwarded confirmation with structured date block ("Confirmation", "Date: April 21, 2026\nTime: 2:00 PM - 3:00 PM\nLocation: Downtown Therapy Center\nPatient: Charlie Smith"), # Case 2: 'next Tuesday' relative date ("Soccer practice", "Sam has soccer practice next Tuesday at 4pm at East High Field"), # Case 3: Cancellation ("Appointment Cancelled", "Your appointment on April 20 at 10am has been cancelled."), # Case 4: Date range / multi-day ("Camp registration", "Sam is registered for summer camp June 15-19, 2026."), # Case 5: 24h time format ("Vet appointment", "Rover has a vet appointment on 4/18/2026 at 14:00."), # Case 6: Ambiguous 'at' - time vs location ("Grooming", "Drop off Rover at PetSmart at 9am on April 19."), # Case 7: False positive - not an appointment ("Fwd: Your order", "Your pizza will be delivered on April 16 at 6 PM."), # Case 8: Recurring event ("Weekly therapy", "Charlie therapy every Thursday at 3pm starting April 17."), # Case 9: Structured confirmation ("Appointment Confirmation", "Dear Patient,\n\nYour upcoming visit details:\nProvider: Dr. Smith\nDate: 05/03/2026\nTime: 9:30 AM\nDuration: 30 minutes\nClinic: Lakewood Pediatrics"), # Case 10: 'today' as date ("Reminder", "Reminder: Sam has a dentist appointment today at 3:30 PM."), # Case 11: Time zone specified ("Telehealth", "Your telehealth session is on April 22 at 11 AM CST."), # Case 12: Two appointments in one email ("Appointments", "Charlie has therapy on 4/18 at 10am. Sam has soccer on 4/19 at 4pm."), # Case 13: Nickname resolution ("Chuck dentist", "Chuck has a dentist appointment on Friday, April 18, 2026 at 2:00 PM."), # Case 14: The original test that works ("Chuck Well Child Visit", "Charlie has a well child visit scheduled on 4/17 at 10:00AM at Lakewood Pediatrics clinic."), ] print("=" * 70) print("QA TEST RESULTS (Prompt-as-Code via LLM)") print("=" * 70) passed = 0 failed = 0 total = len(qa_tests) for i, (subject, body) in enumerate(qa_tests, 1): print(f"\n--- Case {i}: {subject} ---") print(f"Body: {body[:80]}{'...' if len(body) > 80 else ''}") results = parse_email_with_llm(subject, body) results = [_serialize_result(r) for r in results] # Case-specific validation if i == 1: # Structured confirmation ok = (results and results[0].get("type") == "appointment" and "Charlie" in results[0].get("who", []) and "Therapy Center" in results[0].get("location", "") and results[0].get("start") is not None) dur = results[0].get("duration_minutes", 0) if results else 0 print(f" Type: {results[0].get('type') if results else 'N/A'}") print(f" Who: {results[0].get('who') if results else []}") print(f" Start: {results[0].get('start') if results else 'N/A'}") print(f" Duration: {dur} min") print(f" Location: {results[0].get('location') if results else 'N/A'}") elif i == 2: # next Tuesday ok = (results and results[0].get("type") == "appointment" and "Sam" in results[0].get("who", []) and results[0].get("start") is not None and "East High" in results[0].get("location", "")) print(f" Type: {results[0].get('type') if results else 'N/A'}") print(f" Who: {results[0].get('who') if results else []}") print(f" Start: {results[0].get('start') if results else 'N/A'}") print(f" Location: {results[0].get('location') if results else 'N/A'}") elif i == 3: # Cancellation ok = (results and results[0].get("type") == "cancellation") print(f" Type: {results[0].get('type') if results else 'N/A'}") print(f" Start: {results[0].get('start') if results else 'N/A'}") elif i == 4: # Multi-day ok = (results and results[0].get("is_multi_day") == True and "Sam" in results[0].get("who", [])) print(f" Type: {results[0].get('type') if results else 'N/A'}") print(f" Who: {results[0].get('who') if results else []}") print(f" Multi-day: {results[0].get('is_multi_day') if results else 'N/A'}") print(f" Start: {results[0].get('start') if results else 'N/A'}") elif i == 5: # 24h time ok = (results and results[0].get("type") == "appointment" and "Rover" in results[0].get("who", []) and results[0].get("start") is not None) print(f" Type: {results[0].get('type') if results else 'N/A'}") print(f" Who: {results[0].get('who') if results else []}") print(f" Start: {results[0].get('start') if results else 'N/A'}") start_str = str(results[0].get("start", "")) has_14 = "14:" in start_str or "T14:" in start_str or "T19:" in start_str if ok and not has_14: print(f" ⚠️ Time may not be 14:00 — check start field above") elif i == 6: # Ambiguous at ok = (results and results[0].get("type") == "appointment" and "Rover" in results[0].get("who", []) and "PetSmart" in results[0].get("location", "") and results[0].get("start") is not None) print(f" Type: {results[0].get('type') if results else 'N/A'}") print(f" Who: {results[0].get('who') if results else []}") print(f" Start: {results[0].get('start') if results else 'N/A'}") print(f" Location: {results[0].get('location') if results else 'N/A'}") elif i == 7: # False positive ok = (not results) print(f" Result: {'Correctly filtered' if ok else 'FALSE POSITIVE — should be filtered'}") elif i == 8: # Recurring ok = (results and results[0].get("is_recurring") == True and "Charlie" in results[0].get("who", [])) print(f" Type: {results[0].get('type') if results else 'N/A'}") print(f" Who: {results[0].get('who') if results else []}") print(f" Recurring: {results[0].get('is_recurring') if results else 'N/A'}") print(f" Start: {results[0].get('start') if results else 'N/A'}") elif i == 9: # Structured confirmation with Clinic field loc = results[0].get("location", "") if results else "" ok = (results and results[0].get("type") == "appointment" and ("Lakewood" in loc or "Pediatrics" in loc) and results[0].get("duration_minutes") == 30) print(f" Type: {results[0].get('type') if results else 'N/A'}") print(f" Who: {results[0].get('who') if results else []}") print(f" Start: {results[0].get('start') if results else 'N/A'}") print(f" Duration: {results[0].get('duration_minutes') if results else 'N/A'} min") print(f" Location: {loc}") elif i == 10: # today ok = (results and results[0].get("type") == "appointment" and "Sam" in results[0].get("who", []) and results[0].get("start") is not None) print(f" Type: {results[0].get('type') if results else 'N/A'}") print(f" Who: {results[0].get('who') if results else []}") print(f" Start: {results[0].get('start') if results else 'N/A'}") start_str = str(results[0].get("start", "")) today_str = datetime.now(CHICAGO_TZ).strftime("%Y-%m-%d") if results and start_str.startswith(today_str): print(f" ✅ Date resolves to today ({today_str})") elif results: print(f" ⚠️ Date may not be today — got {start_str[:10]}") elif i == 11: # Time zone ok = (results and results[0].get("type") == "appointment" and results[0].get("start") is not None) print(f" Type: {results[0].get('type') if results else 'N/A'}") print(f" Start: {results[0].get('start') if results else 'N/A'}") elif i == 12: # Two appointments ok = (len(results) == 2) if results: print(f" Found {len(results)} appointments:") for r in results: print(f" Who: {r.get('who')}, Start: {r.get('start')}") elif i == 13: # Nickname resolution ok = (results and "Charlie" in results[0].get("who", []) and results[0].get("start") is not None) print(f" Type: {results[0].get('type') if results else 'N/A'}") print(f" Who: {results[0].get('who') if results else []}") print(f" Start: {results[0].get('start') if results else 'N/A'}") elif i == 14: # Original working test ok = (results and "Charlie" in results[0].get("who", []) and results[0].get("start") is not None and "Lakewood" in results[0].get("location", "")) print(f" Type: {results[0].get('type') if results else 'N/A'}") print(f" Who: {results[0].get('who') if results else []}") print(f" Start: {results[0].get('start') if results else 'N/A'}") print(f" Location: {results[0].get('location') if results else 'N/A'}") else: for r in results: print(f" Type: {r.get('type', 'N/A')}") print(f" Who: {r.get('who', [])}") print(f" Start: {r.get('start', 'N/A')}") print(f" Duration: {r.get('duration_minutes', 'N/A')} min") print(f" Location: {r.get('location', 'N/A')}") status = "✅" if ok else "❌" print(f" {status} Case {i}") passed += 1 if ok else 0 failed += 0 if ok else 1 print(f"\n{'=' * 70}") print(f"Results: {passed}/{total} passed, {failed}/{total} failed") print("=" * 70)