"""Pipeline: raw input → extracted items → classified → route-optimized output.""" from costco_route.llm_client import extract_items, classify_items from costco_route.item_memory import lookup_items from costco_route.router import generate_route, format_route, format_route_markdown def optimize(raw_input: str, use_memory: bool = True, markdown: bool = False) -> dict: """Full pipeline: stream-of-consciousness → optimized shopping list. Args: raw_input: Raw grocery list text (e.g., "milk eggs chicken paper towels") use_memory: Whether to check ChromaDB for learned item locations markdown: Use markdown formatting instead of Telegram format Returns: Dict with route, formatted output, and metadata. """ # Step 1: Extract individual items from raw input items = extract_items(raw_input) if not items: return { "items": [], "classified": {}, "route": [], "output": "No items found in input.", "learned_overrides": {}, } # Step 2: Classify items into zones via LLM classified = classify_items(items) # Step 2.5: Validate — remove hallucinated items, recover dropped items classified = _validate_classification(classified, items) # Step 3: Check learned overrides from ChromaDB learned_overrides = {} if use_memory: learned_overrides = lookup_items(items) # Step 4: Generate route (sorted by zone traversal order) route = generate_route(classified, learned_overrides if learned_overrides else None) # Step 5: Format output formatter = format_route_markdown if markdown else format_route output = formatter(route) return { "items": items, "classified": classified, "learned_overrides": learned_overrides, "route": route, "output": output, } def learn_correction(item: str, zone_id: str, notes: str = "") -> dict: """Save a user correction to the item memory. Args: item: The item name zone_id: The correct zone ID (01-10) notes: Optional notes about the location Returns: Confirmation dict. """ from costco_route.item_memory import learn_item if zone_id not in ("01","02","03","04","05","06","07","08","09","10"): return {"error": f"Invalid zone: {zone_id}. Must be 01-10."} learn_item(item, zone_id, notes) return {"status": "learned", "item": item, "zone": zone_id} def _validate_classification(classified: dict[str, list[str]], original_items: list[str]) -> dict[str, list[str]]: """Remove hallucinated items and recover dropped items. The LLM sometimes: 1. Adds items not in the original list (hallucination) 2. Drops items from the original list This function: - Removes any classified item that doesn't fuzzy-match an original item - Puts recovered dropped items into a sensible default zone """ import difflib # Build a set of original items (lowercased for comparison) original_lower = {item.lower().strip(): item for item in original_items} # Track which original items have been matched matched_originals = set() # Filter classified: remove hallucinations, track matches validated = {} for zone_id, zone_items in classified.items(): kept = [] for item in zone_items: item_lower = item.lower().strip() # Check if this item matches any original item matches = difflib.get_close_matches( item_lower, original_lower.keys(), n=1, cutoff=0.6 ) if matches: matched_originals.add(matches[0]) kept.append(item) if kept: validated[zone_id] = kept # Find dropped items (in original but not matched) dropped = {k: v for k, v in original_lower.items() if k not in matched_originals} if dropped: # Assign dropped items to reasonable zones by keyword ZONE_DEFAULTS = { # produce keywords → 07 'fruit': '07', 'vegetable': '07', 'tomato': '07', 'lettuce': '07', 'onion': '07', 'potato': '07', 'avocado': '07', 'banana': '07', 'apple': '07', 'berry': '07', 'berry': '07', 'salad': '07', 'pepper': '07', 'carrot': '07', 'broccoli': '07', # dairy keywords → 06 'milk': '06', 'cheese': '06', 'yogurt': '06', 'butter': '06', 'egg': '06', 'cream': '06', 'bacon': '06', # beverage keywords → 05 'water': '05', 'soda': '05', 'juice': '05', 'beer': '05', 'wine': '05', 'coffee': '04', 'tea': '04', # household keywords → 08 'paper': '08', 'towel': '08', 'tissue': '08', 'detergent': '08', 'trash': '08', 'cleaning': '08', 'soap': '08', # freezer keywords → 09 'frozen': '09', 'ice cream': '09', } for dropped_lower, dropped_original in dropped.items(): # Try keyword matching first zone = '04' # default to Pantry for kw, kw_zone in ZONE_DEFAULTS.items(): if kw in dropped_lower: zone = kw_zone break validated.setdefault(zone, []).append(dropped_original) return validated