"""QA test suite for the Family Assistant package.
Runs 14 test cases against the LLM endpoint to validate appointment parsing.
Uses generic names — real family data comes from family.yaml (gitignored).
"""
from datetime import datetime
from family_assistant.config import CHICAGO_TZ
from family_assistant.appointment_parser import parse_email_with_llm
def _serialize_result(parsed):
"""Convert datetime objects to ISO strings for JSON serialization."""
result = {}
for key, value in parsed.items():
if isinstance(value, datetime):
result[key] = value.isoformat()
else:
result[key] = value
return result
def run_qa_tests():
"""Run the QA test suite against the LLM endpoint and report results."""
qa_tests = [
# Case 1: Forwarded confirmation with structured date block
("Confirmation", "Date: April 21, 2026\nTime: 2:00 PM - 3:00 PM\nLocation: Downtown Therapy Center\nPatient: Charlie Smith"),
# Case 2: 'next Tuesday' relative date
("Soccer practice", "Sam has soccer practice next Tuesday at 4pm at East High Field"),
# Case 3: Cancellation
("Appointment Cancelled", "Your appointment on April 20 at 10am has been cancelled."),
# Case 4: Date range / multi-day
("Camp registration", "Sam is registered for summer camp June 15-19, 2026."),
# Case 5: 24h time format
("Vet appointment", "Rover has a vet appointment on 4/18/2026 at 14:00."),
# Case 6: Ambiguous 'at' - time vs location
("Grooming", "Drop off Rover at PetSmart at 9am on April 19."),
# Case 7: False positive - not an appointment
("Fwd: Your order", "Your pizza will be delivered on April 16 at 6 PM."),
# Case 8: Recurring event
("Weekly therapy", "Charlie therapy every Thursday at 3pm starting April 17."),
# Case 9: Structured confirmation
("Appointment Confirmation", "Dear Patient,\n\nYour upcoming visit details:\nProvider: Dr. Smith\nDate: 05/03/2026\nTime: 9:30 AM\nDuration: 30 minutes\nClinic: Lakewood Pediatrics"),
# Case 10: 'today' as date
("Reminder", "Reminder: Sam has a dentist appointment today at 3:30 PM."),
# Case 11: Time zone specified
("Telehealth", "Your telehealth session is on April 22 at 11 AM CST."),
# Case 12: Two appointments in one email
("Appointments", "Charlie has therapy on 4/18 at 10am. Sam has soccer on 4/19 at 4pm."),
# Case 13: Nickname resolution
("Chuck dentist", "Chuck has a dentist appointment on Friday, April 18, 2026 at 2:00 PM."),
# Case 14: The original test that works
("Chuck Well Child Visit", "Charlie has a well child visit scheduled on 4/17 at 10:00AM at Lakewood Pediatrics clinic."),
]
print("=" * 70)
print("QA TEST RESULTS (Prompt-as-Code via LLM)")
print("=" * 70)
passed = 0
failed = 0
total = len(qa_tests)
for i, (subject, body) in enumerate(qa_tests, 1):
print(f"\n--- Case {i}: {subject} ---")
print(f"Body: {body[:80]}{'...' if len(body) > 80 else ''}")
results = parse_email_with_llm(subject, body)
results = [_serialize_result(r) for r in results]
# Case-specific validation
if i == 1: # Structured confirmation
ok = (results and results[0].get("type") == "appointment"
and "Charlie" in results[0].get("who", [])
and "Therapy Center" in results[0].get("location", "")
and results[0].get("start") is not None)
dur = results[0].get("duration_minutes", 0) if results else 0
print(f" Type: {results[0].get('type') if results else 'N/A'}")
print(f" Who: {results[0].get('who') if results else []}")
print(f" Start: {results[0].get('start') if results else 'N/A'}")
print(f" Duration: {dur} min")
print(f" Location: {results[0].get('location') if results else 'N/A'}")
elif i == 2: # next Tuesday
ok = (results and results[0].get("type") == "appointment"
and "Sam" in results[0].get("who", [])
and results[0].get("start") is not None
and "East High" in results[0].get("location", ""))
print(f" Type: {results[0].get('type') if results else 'N/A'}")
print(f" Who: {results[0].get('who') if results else []}")
print(f" Start: {results[0].get('start') if results else 'N/A'}")
print(f" Location: {results[0].get('location') if results else 'N/A'}")
elif i == 3: # Cancellation
ok = (results and results[0].get("type") == "cancellation")
print(f" Type: {results[0].get('type') if results else 'N/A'}")
print(f" Start: {results[0].get('start') if results else 'N/A'}")
elif i == 4: # Multi-day
ok = (results and results[0].get("is_multi_day") == True
and "Sam" in results[0].get("who", []))
print(f" Type: {results[0].get('type') if results else 'N/A'}")
print(f" Who: {results[0].get('who') if results else []}")
print(f" Multi-day: {results[0].get('is_multi_day') if results else 'N/A'}")
print(f" Start: {results[0].get('start') if results else 'N/A'}")
elif i == 5: # 24h time
ok = (results and results[0].get("type") == "appointment"
and "Rover" in results[0].get("who", [])
and results[0].get("start") is not None)
print(f" Type: {results[0].get('type') if results else 'N/A'}")
print(f" Who: {results[0].get('who') if results else []}")
print(f" Start: {results[0].get('start') if results else 'N/A'}")
start_str = str(results[0].get("start", ""))
has_14 = "14:" in start_str or "T14:" in start_str or "T19:" in start_str
if ok and not has_14:
print(f" ⚠️ Time may not be 14:00 — check start field above")
elif i == 6: # Ambiguous at
ok = (results and results[0].get("type") == "appointment"
and "Rover" in results[0].get("who", [])
and "PetSmart" in results[0].get("location", "")
and results[0].get("start") is not None)
print(f" Type: {results[0].get('type') if results else 'N/A'}")
print(f" Who: {results[0].get('who') if results else []}")
print(f" Start: {results[0].get('start') if results else 'N/A'}")
print(f" Location: {results[0].get('location') if results else 'N/A'}")
elif i == 7: # False positive
ok = (not results)
print(f" Result: {'Correctly filtered' if ok else 'FALSE POSITIVE — should be filtered'}")
elif i == 8: # Recurring
ok = (results and results[0].get("is_recurring") == True
and "Charlie" in results[0].get("who", []))
print(f" Type: {results[0].get('type') if results else 'N/A'}")
print(f" Who: {results[0].get('who') if results else []}")
print(f" Recurring: {results[0].get('is_recurring') if results else 'N/A'}")
print(f" Start: {results[0].get('start') if results else 'N/A'}")
elif i == 9: # Structured confirmation with Clinic field
loc = results[0].get("location", "") if results else ""
ok = (results and results[0].get("type") == "appointment"
and ("Lakewood" in loc or "Pediatrics" in loc)
and results[0].get("duration_minutes") == 30)
print(f" Type: {results[0].get('type') if results else 'N/A'}")
print(f" Who: {results[0].get('who') if results else []}")
print(f" Start: {results[0].get('start') if results else 'N/A'}")
print(f" Duration: {results[0].get('duration_minutes') if results else 'N/A'} min")
print(f" Location: {loc}")
elif i == 10: # today
ok = (results and results[0].get("type") == "appointment"
and "Sam" in results[0].get("who", [])
and results[0].get("start") is not None)
print(f" Type: {results[0].get('type') if results else 'N/A'}")
print(f" Who: {results[0].get('who') if results else []}")
print(f" Start: {results[0].get('start') if results else 'N/A'}")
start_str = str(results[0].get("start", ""))
today_str = datetime.now(CHICAGO_TZ).strftime("%Y-%m-%d")
if results and start_str.startswith(today_str):
print(f" ✅ Date resolves to today ({today_str})")
elif results:
print(f" ⚠️ Date may not be today — got {start_str[:10]}")
elif i == 11: # Time zone
ok = (results and results[0].get("type") == "appointment"
and results[0].get("start") is not None)
print(f" Type: {results[0].get('type') if results else 'N/A'}")
print(f" Start: {results[0].get('start') if results else 'N/A'}")
elif i == 12: # Two appointments
ok = (len(results) == 2)
if results:
print(f" Found {len(results)} appointments:")
for r in results:
print(f" Who: {r.get('who')}, Start: {r.get('start')}")
elif i == 13: # Nickname resolution
ok = (results and "Charlie" in results[0].get("who", [])
and results[0].get("start") is not None)
print(f" Type: {results[0].get('type') if results else 'N/A'}")
print(f" Who: {results[0].get('who') if results else []}")
print(f" Start: {results[0].get('start') if results else 'N/A'}")
elif i == 14: # Original working test
ok = (results and "Charlie" in results[0].get("who", [])
and results[0].get("start") is not None
and "Lakewood" in results[0].get("location", ""))
print(f" Type: {results[0].get('type') if results else 'N/A'}")
print(f" Who: {results[0].get('who') if results else []}")
print(f" Start: {results[0].get('start') if results else 'N/A'}")
print(f" Location: {results[0].get('location') if results else 'N/A'}")
else:
for r in results:
print(f" Type: {r.get('type', 'N/A')}")
print(f" Who: {r.get('who', [])}")
print(f" Start: {r.get('start', 'N/A')}")
print(f" Duration: {r.get('duration_minutes', 'N/A')} min")
print(f" Location: {r.get('location', 'N/A')}")
status = "✅" if ok else "❌"
print(f" {status} Case {i}")
passed += 1 if ok else 0
failed += 0 if ok else 1
print(f"\n{'=' * 70}")
print(f"Results: {passed}/{total} passed, {failed}/{total} failed")
print("=" * 70)