📄 seed.py 5,717 bytes Apr 30, 2026 📋 Raw

"""Seed ChromaDB with Miller family test documents for staging."""

import logging
from datetime import datetime

from config import IS_STAGING
from brain.embeddings import embed_document
from brain.store import store_document, get_stats

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(name)

def seed_brain_documents():
"""Seed ChromaDB with Miller family test documents."""

if not IS_STAGING:
    logger.error("Refusing to seed non-staging environment!")
    return

logger.info("Seeding Brain with Miller family test documents...")

test_docs = [
    {
        "id": "newsletter_march_2026",
        "text": """March Newsletter — Lincoln Elementary

Mark your calendars:
• March 14: Parent-teacher conferences (half day, dismissal at 11:30 AM)
• March 24-28: Spring break — NO SCHOOL
• April 4: Spring carnival (Saturday 10 AM - 2 PM)

Reminder: Friday, March 21 is a FULL DAY (not half-day as previously printed in the August calendar).

Best,
Principal Henderson""",
"metadata": {
"source_date": "2026-03-01",
"doc_type": "newsletter",
"family": "Miller",
"school": "Lincoln Elementary"
}
},
{
"id": "invoice_hvac_2026",
"text": """HVAC Service Invoice
Invoice #HVAC-2026-0315

Service Date: March 1, 2026
Technician: Mike Johnson

Services Performed:
- Annual system inspection
- Filter replacement
- Duct cleaning

Filter size: 16x25x4
Next filter change: June 1, 2026

Total: $245.00

Madison Heating & Cooling""",
"metadata": {
"source_date": "2026-03-01",
"doc_type": "invoice",
"family": "Miller",
"vendor": "Madison Heating & Cooling"
}
},
{
"id": "receipt_vet_2026",
"text": """Westside Veterinary Clinic
Receipt #4582
Date: February 20, 2026

Patient: Buster Miller (Golden Retriever)
Owner: John Miller

Services:
- Annual wellness exam
- Rabies booster vaccination
- Heartworm test (negative)

Next visit recommended: August 20, 2026

Total: $189.00
Thank you for choosing Westside Veterinary Clinic!""",
"metadata": {
"source_date": "2026-02-20",
"doc_type": "receipt",
"family": "Miller",
"vendor": "Westside Veterinary Clinic"
}
},
{
"id": "invoice_dance_2026",
"text": """Madison Dance Academy
Invoice #MDA-2026-0342
Date: February 28, 2026

Student: Mia Miller
Program: Spring Ballet Session (March-May)
Schedule: Mondays and Wednesdays 4:00 PM - 5:30 PM
Location: Madison Dance Academy, 123 Main St

Tuition: $285.00
Payment due: March 15, 2026

Questions? Contact us at (608) 555-0123""",
"metadata": {
"source_date": "2026-02-28",
"doc_type": "invoice",
"family": "Miller",
"vendor": "Madison Dance Academy"
}
},
{
"id": "roofer_invoice_2024",
"text": """Madison Roofing Co.
Invoice #MR-2024-0892
Date: August 15, 2024

Customer: John Miller
Address: 456 Oak Avenue, Madison, WI

Services:
- Complete roof replacement
- Gutter installation
- Skylight repair

Materials: Architectural shingles, 30-year warranty
Total: $8,450.00

Contact: Mike Rodriguez (608) 555-0456
Warranty valid through August 15, 2054""",
"metadata": {
"source_date": "2024-08-15",
"doc_type": "invoice",
"family": "Miller",
"vendor": "Madison Roofing Co."
}
},
{
"id": "email_principal_march_2026",
"text": """Subject: Important Update — Schedule Change
From: principal@lincoln.madison.k12.wi.us
Date: March 18, 2026

Dear Lincoln Families,

Due to the snow day last week, we need to make up instructional time.
Therefore, Friday, March 21 will now be a FULL DAY (not an early release as originally scheduled).

Dismissal will be at 3:00 PM as usual.

Thank you for your understanding.

Principal Henderson
Lincoln Elementary School""",
"metadata": {
"source_date": "2026-03-18",
"doc_type": "email",
"family": "Miller",
"sender": "Principal Henderson"
}
},
{
"id": "calendar_2025_2026",
"text": """Madison Metropolitan School District
2025-2026 School Calendar

First day: September 2, 2025
Last day: June 6, 2026

Early Release Fridays:
- All Fridays are early release at 1:00 PM
- Except: March 21 (full day due to snow makeup)

Spring Break: March 24-28, 2026
Thanksgiving Break: November 27-28, 2025
Winter Break: December 22, 2025 - January 2, 2026""",
"metadata": {
"source_date": "2025-08-01",
"doc_type": "static_pdf",
"family": "Miller",
"school": "Madison Metropolitan School District"
}
}
]

# Add each document
for doc in test_docs:
    try:
        chunks = embed_document(doc["id"], doc["text"], doc["metadata"])
        store_document(doc["id"], chunks)
        logger.info(f"Added document: {doc['id']} ({len(chunks)} chunks)")
    except Exception as e:
        logger.error(f"Failed to add document {doc['id']}: {e}")

# Print stats
stats = get_stats()
logger.info(f"Brain seeding complete. Total chunks: {stats.get('total_chunks', 0)}")

if name == "main":
seed_brain_documents()