"""Pipeline: raw input → extracted items → classified → route-optimized output."""
from costco_route.llm_client import extract_items, classify_items
from costco_route.item_memory import lookup_items
from costco_route.router import generate_route, format_route, format_route_markdown
def optimize(raw_input: str, use_memory: bool = True, markdown: bool = False) -> dict:
"""Full pipeline: stream-of-consciousness → optimized shopping list.
Args:
raw_input: Raw grocery list text (e.g., "milk eggs chicken paper towels")
use_memory: Whether to check ChromaDB for learned item locations
markdown: Use markdown formatting instead of Telegram format
Returns:
Dict with route, formatted output, and metadata.
"""
# Step 1: Extract individual items from raw input
items = extract_items(raw_input)
if not items:
return {
"items": [],
"classified": {},
"route": [],
"output": "No items found in input.",
"learned_overrides": {},
}
# Step 2: Classify items into zones via LLM
classified = classify_items(items)
# Step 2.5: Validate — remove hallucinated items, recover dropped items
classified = _validate_classification(classified, items)
# Step 3: Check learned overrides from ChromaDB
learned_overrides = {}
if use_memory:
learned_overrides = lookup_items(items)
# Step 4: Generate route (sorted by zone traversal order)
route = generate_route(classified, learned_overrides if learned_overrides else None)
# Step 5: Format output
formatter = format_route_markdown if markdown else format_route
output = formatter(route)
return {
"items": items,
"classified": classified,
"learned_overrides": learned_overrides,
"route": route,
"output": output,
}
def learn_correction(item: str, zone_id: str, notes: str = "") -> dict:
"""Save a user correction to the item memory.
Args:
item: The item name
zone_id: The correct zone ID (01-10)
notes: Optional notes about the location
Returns:
Confirmation dict.
"""
from costco_route.item_memory import learn_item
if zone_id not in ("01","02","03","04","05","06","07","08","09","10"):
return {"error": f"Invalid zone: {zone_id}. Must be 01-10."}
learn_item(item, zone_id, notes)
return {"status": "learned", "item": item, "zone": zone_id}
def _validate_classification(classified: dict[str, list[str]], original_items: list[str]) -> dict[str, list[str]]:
"""Remove hallucinated items and recover dropped items.
The LLM sometimes:
1. Adds items not in the original list (hallucination)
2. Drops items from the original list
This function:
- Removes any classified item that doesn't fuzzy-match an original item
- Puts recovered dropped items into a sensible default zone
"""
import difflib
# Build a set of original items (lowercased for comparison)
original_lower = {item.lower().strip(): item for item in original_items}
# Track which original items have been matched
matched_originals = set()
# Filter classified: remove hallucinations, track matches
validated = {}
for zone_id, zone_items in classified.items():
kept = []
for item in zone_items:
item_lower = item.lower().strip()
# Check if this item matches any original item
matches = difflib.get_close_matches(
item_lower, original_lower.keys(), n=1, cutoff=0.6
)
if matches:
matched_originals.add(matches[0])
kept.append(item)
if kept:
validated[zone_id] = kept
# Find dropped items (in original but not matched)
dropped = {k: v for k, v in original_lower.items() if k not in matched_originals}
if dropped:
# Assign dropped items to reasonable zones by keyword
ZONE_DEFAULTS = {
# produce keywords → 07
'fruit': '07', 'vegetable': '07', 'tomato': '07', 'lettuce': '07',
'onion': '07', 'potato': '07', 'avocado': '07', 'banana': '07',
'apple': '07', 'berry': '07', 'berry': '07', 'salad': '07',
'pepper': '07', 'carrot': '07', 'broccoli': '07',
# dairy keywords → 06
'milk': '06', 'cheese': '06', 'yogurt': '06', 'butter': '06',
'egg': '06', 'cream': '06', 'bacon': '06',
# beverage keywords → 05
'water': '05', 'soda': '05', 'juice': '05', 'beer': '05', 'wine': '05',
'coffee': '04', 'tea': '04',
# household keywords → 08
'paper': '08', 'towel': '08', 'tissue': '08', 'detergent': '08',
'trash': '08', 'cleaning': '08', 'soap': '08',
# freezer keywords → 09
'frozen': '09', 'ice cream': '09',
}
for dropped_lower, dropped_original in dropped.items():
# Try keyword matching first
zone = '04' # default to Pantry
for kw, kw_zone in ZONE_DEFAULTS.items():
if kw in dropped_lower:
zone = kw_zone
break
validated.setdefault(zone, []).append(dropped_original)
return validated