""" Model Gate — Icarus Staging Model Restriction Updated: Director override allows 8B models for M4 16GB simulation """ import os import sys from typing import List, Optional # Allowed models for Icarus staging (Updated: 8B class for M4 16GB target) ALLOWED_MODELS: List[str] = [ # Primary (8B class — complex extraction, JSON, briefing) "llama3.1:8b", "llama3.1:8b-instruct-q4_K_M", "qwen2.5:7b", "qwen2.5:7b-instruct-q4_0", "qwen2.5-coder:7b", "llama3.1:8b-instruct", # Utility (3B class — spam filtering, fast tasks) "llama3.2:3b", "llama3.2:3b-instruct", "qwen2.5:3b", "qwen2.5:3b-instruct", "phi3:mini", "phi3:3.8b", # Embeddings (always allowed) "nomic-embed-text", "nomic-embed-text-v1.5", ] # Blocked model patterns (anything larger than 8B) BLOCKED_PATTERNS = [":14b", ":32b", ":70b", ":34b", ":65b", ":110b"] def is_model_allowed(model_name: str) -> bool: """Check if model is allowed for Icarus staging.""" model_lower = model_name.lower() # Exact match whitelist if model_lower in [m.lower() for m in ALLOWED_MODELS]: return True # Pattern blacklist (oversized models) for pattern in BLOCKED_PATTERNS: if pattern in model_lower: return False # Allow known size classes up to 8B if any(size in model_lower for size in [":1b", ":2b", ":3b", ":4b", ":7b", ":8b"]): return True # Unknown model — block by default return False def get_allowed_model(preferred: Optional[str] = None) -> str: """ Get model for Icarus staging. If preferred is specified and allowed, use it. Otherwise fall back to first allowed model. """ if preferred and is_model_allowed(preferred): return preferred if preferred and not is_model_allowed(preferred): print(f"⚠️ Model '{preferred}' blocked in staging. Using fallback.", file=sys.stderr) return ALLOWED_MODELS[0] # Default to llama3.1:8b def validate_ollama_request(model: str) -> None: """ Validate and raise if model is not allowed. Call this before any Ollama API call in Icarus staging. Only enforces when ICARUS_ENV=staging """ if os.environ.get("ICARUS_ENV") != "staging": return # Only enforce in staging if not is_model_allowed(model): raise ValueError( f"🚫 Model '{model}' is BLOCKED in Icarus staging.\n" f"Allowed models: {', '.join(ALLOWED_MODELS[:6])}...\n" f"See model_gate.py for full list" ) def get_model_for_task(task: str) -> str: """ Get appropriate model based on task complexity. Task categories: - "complex": JSON extraction, briefing generation → 8B - "fast": Spam filtering, quick checks → 3B - "embed": Embeddings → nomic-embed-text """ task_lower = task.lower() if task_lower in ["embed", "embedding"]: return "nomic-embed-text" elif task_lower in ["complex", "extraction", "json", "briefing"]: return "llama3.1:8b" elif task_lower in ["fast", "spam", "filter"]: return "llama3.2:3b" # Default to 8B for unknown tasks return ALLOWED_MODELS[0] if __name__ == "__main__": # CLI test test_models = sys.argv[1:] if len(sys.argv) > 1 else [ "llama3.1:8b", "qwen2.5-coder:7b", "llama3.2:3b", "nomic-embed-text", "llama3.1:70b", # Should be blocked "qwen2.5:32b", # Should be blocked ] print("Icarus Staging Model Gate (Updated)") print("=" * 40) print(f"Environment: {os.environ.get('ICARUS_ENV', 'not set')}") print(f"Enforcement: {'ACTIVE' if os.environ.get('ICARUS_ENV') == 'staging' else 'OFF'}") print("-" * 40) for model in test_models: allowed = "✅ ALLOWED" if is_model_allowed(model) else "🚫 BLOCKED" print(f"{model:25} {allowed}")