📄 test_qa.py 11,051 bytes Apr 16, 2026 📋 Raw

"""QA test suite for the Family Assistant package.

Runs 14 test cases against the LLM endpoint to validate appointment parsing.
Uses generic names — real family data comes from family.yaml (gitignored).
"""

from datetime import datetime

from family_assistant.config import CHICAGO_TZ
from family_assistant.appointment_parser import parse_email_with_llm

def _serialize_result(parsed):
"""Convert datetime objects to ISO strings for JSON serialization."""
result = {}
for key, value in parsed.items():
if isinstance(value, datetime):
result[key] = value.isoformat()
else:
result[key] = value
return result

def run_qa_tests():
"""Run the QA test suite against the LLM endpoint and report results."""
qa_tests = [
# Case 1: Forwarded confirmation with structured date block
("Confirmation", "Date: April 21, 2026\nTime: 2:00 PM - 3:00 PM\nLocation: Downtown Therapy Center\nPatient: Charlie Smith"),
# Case 2: 'next Tuesday' relative date
("Soccer practice", "Sam has soccer practice next Tuesday at 4pm at East High Field"),
# Case 3: Cancellation
("Appointment Cancelled", "Your appointment on April 20 at 10am has been cancelled."),
# Case 4: Date range / multi-day
("Camp registration", "Sam is registered for summer camp June 15-19, 2026."),
# Case 5: 24h time format
("Vet appointment", "Rover has a vet appointment on 4/18/2026 at 14:00."),
# Case 6: Ambiguous 'at' - time vs location
("Grooming", "Drop off Rover at PetSmart at 9am on April 19."),
# Case 7: False positive - not an appointment
("Fwd: Your order", "Your pizza will be delivered on April 16 at 6 PM."),
# Case 8: Recurring event
("Weekly therapy", "Charlie therapy every Thursday at 3pm starting April 17."),
# Case 9: Structured confirmation
("Appointment Confirmation", "Dear Patient,\n\nYour upcoming visit details:\nProvider: Dr. Smith\nDate: 05/03/2026\nTime: 9:30 AM\nDuration: 30 minutes\nClinic: Lakewood Pediatrics"),
# Case 10: 'today' as date
("Reminder", "Reminder: Sam has a dentist appointment today at 3:30 PM."),
# Case 11: Time zone specified
("Telehealth", "Your telehealth session is on April 22 at 11 AM CST."),
# Case 12: Two appointments in one email
("Appointments", "Charlie has therapy on 4/18 at 10am. Sam has soccer on 4/19 at 4pm."),
# Case 13: Nickname resolution
("Chuck dentist", "Chuck has a dentist appointment on Friday, April 18, 2026 at 2:00 PM."),
# Case 14: The original test that works
("Chuck Well Child Visit", "Charlie has a well child visit scheduled on 4/17 at 10:00AM at Lakewood Pediatrics clinic."),
]

print("=" * 70)
print("QA TEST RESULTS (Prompt-as-Code via LLM)")
print("=" * 70)

passed = 0
failed = 0
total = len(qa_tests)

for i, (subject, body) in enumerate(qa_tests, 1):
    print(f"\n--- Case {i}: {subject} ---")
    print(f"Body: {body[:80]}{'...' if len(body) > 80 else ''}")

    results = parse_email_with_llm(subject, body)
    results = [_serialize_result(r) for r in results]

    # Case-specific validation
    if i == 1:  # Structured confirmation
        ok = (results and results[0].get("type") == "appointment"
              and "Charlie" in results[0].get("who", [])
              and "Therapy Center" in results[0].get("location", "")
              and results[0].get("start") is not None)
        dur = results[0].get("duration_minutes", 0) if results else 0
        print(f"  Type: {results[0].get('type') if results else 'N/A'}")
        print(f"  Who: {results[0].get('who') if results else []}")
        print(f"  Start: {results[0].get('start') if results else 'N/A'}")
        print(f"  Duration: {dur} min")
        print(f"  Location: {results[0].get('location') if results else 'N/A'}")

    elif i == 2:  # next Tuesday
        ok = (results and results[0].get("type") == "appointment"
              and "Sam" in results[0].get("who", [])
              and results[0].get("start") is not None
              and "East High" in results[0].get("location", ""))
        print(f"  Type: {results[0].get('type') if results else 'N/A'}")
        print(f"  Who: {results[0].get('who') if results else []}")
        print(f"  Start: {results[0].get('start') if results else 'N/A'}")
        print(f"  Location: {results[0].get('location') if results else 'N/A'}")

    elif i == 3:  # Cancellation
        ok = (results and results[0].get("type") == "cancellation")
        print(f"  Type: {results[0].get('type') if results else 'N/A'}")
        print(f"  Start: {results[0].get('start') if results else 'N/A'}")

    elif i == 4:  # Multi-day
        ok = (results and results[0].get("is_multi_day") == True
              and "Sam" in results[0].get("who", []))
        print(f"  Type: {results[0].get('type') if results else 'N/A'}")
        print(f"  Who: {results[0].get('who') if results else []}")
        print(f"  Multi-day: {results[0].get('is_multi_day') if results else 'N/A'}")
        print(f"  Start: {results[0].get('start') if results else 'N/A'}")

    elif i == 5:  # 24h time
        ok = (results and results[0].get("type") == "appointment"
              and "Rover" in results[0].get("who", [])
              and results[0].get("start") is not None)
        print(f"  Type: {results[0].get('type') if results else 'N/A'}")
        print(f"  Who: {results[0].get('who') if results else []}")
        print(f"  Start: {results[0].get('start') if results else 'N/A'}")
        start_str = str(results[0].get("start", ""))
        has_14 = "14:" in start_str or "T14:" in start_str or "T19:" in start_str
        if ok and not has_14:
            print(f"  ⚠️  Time may not be 14:00 — check start field above")

    elif i == 6:  # Ambiguous at
        ok = (results and results[0].get("type") == "appointment"
              and "Rover" in results[0].get("who", [])
              and "PetSmart" in results[0].get("location", "")
              and results[0].get("start") is not None)
        print(f"  Type: {results[0].get('type') if results else 'N/A'}")
        print(f"  Who: {results[0].get('who') if results else []}")
        print(f"  Start: {results[0].get('start') if results else 'N/A'}")
        print(f"  Location: {results[0].get('location') if results else 'N/A'}")

    elif i == 7:  # False positive
        ok = (not results)
        print(f"  Result: {'Correctly filtered' if ok else 'FALSE POSITIVE — should be filtered'}")

    elif i == 8:  # Recurring
        ok = (results and results[0].get("is_recurring") == True
              and "Charlie" in results[0].get("who", []))
        print(f"  Type: {results[0].get('type') if results else 'N/A'}")
        print(f"  Who: {results[0].get('who') if results else []}")
        print(f"  Recurring: {results[0].get('is_recurring') if results else 'N/A'}")
        print(f"  Start: {results[0].get('start') if results else 'N/A'}")

    elif i == 9:  # Structured confirmation with Clinic field
        loc = results[0].get("location", "") if results else ""
        ok = (results and results[0].get("type") == "appointment"
              and ("Lakewood" in loc or "Pediatrics" in loc)
              and results[0].get("duration_minutes") == 30)
        print(f"  Type: {results[0].get('type') if results else 'N/A'}")
        print(f"  Who: {results[0].get('who') if results else []}")
        print(f"  Start: {results[0].get('start') if results else 'N/A'}")
        print(f"  Duration: {results[0].get('duration_minutes') if results else 'N/A'} min")
        print(f"  Location: {loc}")

    elif i == 10:  # today
        ok = (results and results[0].get("type") == "appointment"
              and "Sam" in results[0].get("who", [])
              and results[0].get("start") is not None)
        print(f"  Type: {results[0].get('type') if results else 'N/A'}")
        print(f"  Who: {results[0].get('who') if results else []}")
        print(f"  Start: {results[0].get('start') if results else 'N/A'}")
        start_str = str(results[0].get("start", ""))
        today_str = datetime.now(CHICAGO_TZ).strftime("%Y-%m-%d")
        if results and start_str.startswith(today_str):
            print(f"  ✅ Date resolves to today ({today_str})")
        elif results:
            print(f"  ⚠️  Date may not be today — got {start_str[:10]}")

    elif i == 11:  # Time zone
        ok = (results and results[0].get("type") == "appointment"
              and results[0].get("start") is not None)
        print(f"  Type: {results[0].get('type') if results else 'N/A'}")
        print(f"  Start: {results[0].get('start') if results else 'N/A'}")

    elif i == 12:  # Two appointments
        ok = (len(results) == 2)
        if results:
            print(f"  Found {len(results)} appointments:")
            for r in results:
                print(f"    Who: {r.get('who')}, Start: {r.get('start')}")

    elif i == 13:  # Nickname resolution
        ok = (results and "Charlie" in results[0].get("who", [])
              and results[0].get("start") is not None)
        print(f"  Type: {results[0].get('type') if results else 'N/A'}")
        print(f"  Who: {results[0].get('who') if results else []}")
        print(f"  Start: {results[0].get('start') if results else 'N/A'}")

    elif i == 14:  # Original working test
        ok = (results and "Charlie" in results[0].get("who", [])
              and results[0].get("start") is not None
              and "Lakewood" in results[0].get("location", ""))
        print(f"  Type: {results[0].get('type') if results else 'N/A'}")
        print(f"  Who: {results[0].get('who') if results else []}")
        print(f"  Start: {results[0].get('start') if results else 'N/A'}")
        print(f"  Location: {results[0].get('location') if results else 'N/A'}")

    else:
        for r in results:
            print(f"  Type: {r.get('type', 'N/A')}")
            print(f"  Who: {r.get('who', [])}")
            print(f"  Start: {r.get('start', 'N/A')}")
            print(f"  Duration: {r.get('duration_minutes', 'N/A')} min")
            print(f"  Location: {r.get('location', 'N/A')}")

    status = "✅" if ok else "❌"
    print(f"  {status} Case {i}")
    passed += 1 if ok else 0
    failed += 0 if ok else 1

print(f"\n{'=' * 70}")
print(f"Results: {passed}/{total} passed, {failed}/{total} failed")
print("=" * 70)