"""
Model Gate — Icarus Staging Model Restriction
Updated: Director override allows 8B models for M4 16GB simulation
"""
import os
import sys
from typing import List, Optional
Allowed models for Icarus staging (Updated: 8B class for M4 16GB target)
ALLOWED_MODELS: List[str] = [
# Primary (8B class — complex extraction, JSON, briefing)
"llama3.1:8b",
"llama3.1:8b-instruct-q4_K_M",
"qwen2.5:7b",
"qwen2.5:7b-instruct-q4_0",
"qwen2.5-coder:7b",
"llama3.1:8b-instruct",
# Utility (3B class — spam filtering, fast tasks)
"llama3.2:3b",
"llama3.2:3b-instruct",
"qwen2.5:3b",
"qwen2.5:3b-instruct",
"phi3:mini",
"phi3:3.8b",
# Embeddings (always allowed)
"nomic-embed-text",
"nomic-embed-text-v1.5",
]
Blocked model patterns (anything larger than 8B)
BLOCKED_PATTERNS = [":14b", ":32b", ":70b", ":34b", ":65b", ":110b"]
def is_model_allowed(model_name: str) -> bool:
"""Check if model is allowed for Icarus staging."""
model_lower = model_name.lower()
# Exact match whitelist
if model_lower in [m.lower() for m in ALLOWED_MODELS]:
return True
# Pattern blacklist (oversized models)
for pattern in BLOCKED_PATTERNS:
if pattern in model_lower:
return False
# Allow known size classes up to 8B
if any(size in model_lower for size in [":1b", ":2b", ":3b", ":4b", ":7b", ":8b"]):
return True
# Unknown model — block by default
return False
def get_allowed_model(preferred: Optional[str] = None) -> str:
"""
Get model for Icarus staging.
If preferred is specified and allowed, use it.
Otherwise fall back to first allowed model.
"""
if preferred and is_model_allowed(preferred):
return preferred
if preferred and not is_model_allowed(preferred):
print(f"⚠️ Model '{preferred}' blocked in staging. Using fallback.", file=sys.stderr)
return ALLOWED_MODELS[0] # Default to llama3.1:8b
def validate_ollama_request(model: str) -> None:
"""
Validate and raise if model is not allowed.
Call this before any Ollama API call in Icarus staging.
Only enforces when ICARUS_ENV=staging
"""
if os.environ.get("ICARUS_ENV") != "staging":
return # Only enforce in staging
if not is_model_allowed(model):
raise ValueError(
f"🚫 Model '{model}' is BLOCKED in Icarus staging.\n"
f"Allowed models: {', '.join(ALLOWED_MODELS[:6])}...\n"
f"See model_gate.py for full list"
)
def get_model_for_task(task: str) -> str:
"""
Get appropriate model based on task complexity.
Task categories:
- "complex": JSON extraction, briefing generation → 8B
- "fast": Spam filtering, quick checks → 3B
- "embed": Embeddings → nomic-embed-text
"""
task_lower = task.lower()
if task_lower in ["embed", "embedding"]:
return "nomic-embed-text"
elif task_lower in ["complex", "extraction", "json", "briefing"]:
return "llama3.1:8b"
elif task_lower in ["fast", "spam", "filter"]:
return "llama3.2:3b"
# Default to 8B for unknown tasks
return ALLOWED_MODELS[0]
if name == "main":
# CLI test
test_models = sys.argv[1:] if len(sys.argv) > 1 else [
"llama3.1:8b",
"qwen2.5-coder:7b",
"llama3.2:3b",
"nomic-embed-text",
"llama3.1:70b", # Should be blocked
"qwen2.5:32b", # Should be blocked
]
print("Icarus Staging Model Gate (Updated)")
print("=" * 40)
print(f"Environment: {os.environ.get('ICARUS_ENV', 'not set')}")
print(f"Enforcement: {'ACTIVE' if os.environ.get('ICARUS_ENV') == 'staging' else 'OFF'}")
print("-" * 40)
for model in test_models:
allowed = "✅ ALLOWED" if is_model_allowed(model) else "🚫 BLOCKED"
print(f"{model:25} {allowed}")