📄 pipeline.py 5,369 bytes Apr 19, 2026 📋 Raw

"""Pipeline: raw input → extracted items → classified → route-optimized output."""

from costco_route.llm_client import extract_items, classify_items
from costco_route.item_memory import lookup_items
from costco_route.router import generate_route, format_route, format_route_markdown

def optimize(raw_input: str, use_memory: bool = True, markdown: bool = False) -> dict:
"""Full pipeline: stream-of-consciousness → optimized shopping list.

Args:
    raw_input: Raw grocery list text (e.g., "milk eggs chicken paper towels")
    use_memory: Whether to check ChromaDB for learned item locations
    markdown: Use markdown formatting instead of Telegram format

Returns:
    Dict with route, formatted output, and metadata.
"""
# Step 1: Extract individual items from raw input
items = extract_items(raw_input)

if not items:
    return {
        "items": [],
        "classified": {},
        "route": [],
        "output": "No items found in input.",
        "learned_overrides": {},
    }

# Step 2: Classify items into zones via LLM
classified = classify_items(items)

# Step 2.5: Validate — remove hallucinated items, recover dropped items
classified = _validate_classification(classified, items)

# Step 3: Check learned overrides from ChromaDB
learned_overrides = {}
if use_memory:
    learned_overrides = lookup_items(items)

# Step 4: Generate route (sorted by zone traversal order)
route = generate_route(classified, learned_overrides if learned_overrides else None)

# Step 5: Format output
formatter = format_route_markdown if markdown else format_route
output = formatter(route)

return {
    "items": items,
    "classified": classified,
    "learned_overrides": learned_overrides,
    "route": route,
    "output": output,
}

def learn_correction(item: str, zone_id: str, notes: str = "") -> dict:
"""Save a user correction to the item memory.

Args:
    item: The item name
    zone_id: The correct zone ID (01-10)
    notes: Optional notes about the location

Returns:
    Confirmation dict.
"""
from costco_route.item_memory import learn_item

if zone_id not in ("01","02","03","04","05","06","07","08","09","10"):
    return {"error": f"Invalid zone: {zone_id}. Must be 01-10."}

learn_item(item, zone_id, notes)
return {"status": "learned", "item": item, "zone": zone_id}

def _validate_classification(classified: dict[str, list[str]], original_items: list[str]) -> dict[str, list[str]]:
"""Remove hallucinated items and recover dropped items.

The LLM sometimes:
1. Adds items not in the original list (hallucination)
2. Drops items from the original list

This function:
- Removes any classified item that doesn't fuzzy-match an original item
- Puts recovered dropped items into a sensible default zone
"""
import difflib

# Build a set of original items (lowercased for comparison)
original_lower = {item.lower().strip(): item for item in original_items}

# Track which original items have been matched
matched_originals = set()

# Filter classified: remove hallucinations, track matches
validated = {}
for zone_id, zone_items in classified.items():
    kept = []
    for item in zone_items:
        item_lower = item.lower().strip()
        # Check if this item matches any original item
        matches = difflib.get_close_matches(
            item_lower, original_lower.keys(), n=1, cutoff=0.6
        )
        if matches:
            matched_originals.add(matches[0])
            kept.append(item)
    if kept:
        validated[zone_id] = kept

# Find dropped items (in original but not matched)
dropped = {k: v for k, v in original_lower.items() if k not in matched_originals}

if dropped:
    # Assign dropped items to reasonable zones by keyword
    ZONE_DEFAULTS = {
        # produce keywords → 07
        'fruit': '07', 'vegetable': '07', 'tomato': '07', 'lettuce': '07',
        'onion': '07', 'potato': '07', 'avocado': '07', 'banana': '07',
        'apple': '07', 'berry': '07', 'berry': '07', 'salad': '07',
        'pepper': '07', 'carrot': '07', 'broccoli': '07',
        # dairy keywords → 06
        'milk': '06', 'cheese': '06', 'yogurt': '06', 'butter': '06',
        'egg': '06', 'cream': '06', 'bacon': '06',
        # beverage keywords → 05
        'water': '05', 'soda': '05', 'juice': '05', 'beer': '05', 'wine': '05',
        'coffee': '04', 'tea': '04',
        # household keywords → 08
        'paper': '08', 'towel': '08', 'tissue': '08', 'detergent': '08',
        'trash': '08', 'cleaning': '08', 'soap': '08',
        # freezer keywords → 09
        'frozen': '09', 'ice cream': '09',
    }
    for dropped_lower, dropped_original in dropped.items():
        # Try keyword matching first
        zone = '04'  # default to Pantry
        for kw, kw_zone in ZONE_DEFAULTS.items():
            if kw in dropped_lower:
                zone = kw_zone
                break
        validated.setdefault(zone, []).append(dropped_original)

return validated