📄 model_gate.py 3,976 bytes Apr 25, 2026 📋 Raw

"""
Model Gate — Icarus Staging Model Restriction
Updated: Director override allows 8B models for M4 16GB simulation
"""

import os
import sys
from typing import List, Optional

Allowed models for Icarus staging (Updated: 8B class for M4 16GB target)

ALLOWED_MODELS: List[str] = [
# Primary (8B class — complex extraction, JSON, briefing)
"llama3.1:8b",
"llama3.1:8b-instruct-q4_K_M",
"qwen2.5:7b",
"qwen2.5:7b-instruct-q4_0",
"qwen2.5-coder:7b",
"llama3.1:8b-instruct",

# Utility (3B class — spam filtering, fast tasks)
"llama3.2:3b",
"llama3.2:3b-instruct",
"qwen2.5:3b",
"qwen2.5:3b-instruct",
"phi3:mini",
"phi3:3.8b",

# Embeddings (always allowed)
"nomic-embed-text",
"nomic-embed-text-v1.5",

]

Blocked model patterns (anything larger than 8B)

BLOCKED_PATTERNS = [":14b", ":32b", ":70b", ":34b", ":65b", ":110b"]

def is_model_allowed(model_name: str) -> bool:
"""Check if model is allowed for Icarus staging."""
model_lower = model_name.lower()

# Exact match whitelist
if model_lower in [m.lower() for m in ALLOWED_MODELS]:
    return True

# Pattern blacklist (oversized models)
for pattern in BLOCKED_PATTERNS:
    if pattern in model_lower:
        return False

# Allow known size classes up to 8B
if any(size in model_lower for size in [":1b", ":2b", ":3b", ":4b", ":7b", ":8b"]):
    return True

# Unknown model — block by default
return False

def get_allowed_model(preferred: Optional[str] = None) -> str:
"""
Get model for Icarus staging.

If preferred is specified and allowed, use it.
Otherwise fall back to first allowed model.
"""
if preferred and is_model_allowed(preferred):
    return preferred

if preferred and not is_model_allowed(preferred):
    print(f"⚠️  Model '{preferred}' blocked in staging. Using fallback.", file=sys.stderr)

return ALLOWED_MODELS[0]  # Default to llama3.1:8b

def validate_ollama_request(model: str) -> None:
"""
Validate and raise if model is not allowed.
Call this before any Ollama API call in Icarus staging.

Only enforces when ICARUS_ENV=staging
"""
if os.environ.get("ICARUS_ENV") != "staging":
    return  # Only enforce in staging

if not is_model_allowed(model):
    raise ValueError(
        f"🚫 Model '{model}' is BLOCKED in Icarus staging.\n"
        f"Allowed models: {', '.join(ALLOWED_MODELS[:6])}...\n"
        f"See model_gate.py for full list"
    )

def get_model_for_task(task: str) -> str:
"""
Get appropriate model based on task complexity.

Task categories:
- "complex": JSON extraction, briefing generation → 8B
- "fast": Spam filtering, quick checks → 3B
- "embed": Embeddings → nomic-embed-text
"""
task_lower = task.lower()

if task_lower in ["embed", "embedding"]:
    return "nomic-embed-text"
elif task_lower in ["complex", "extraction", "json", "briefing"]:
    return "llama3.1:8b"
elif task_lower in ["fast", "spam", "filter"]:
    return "llama3.2:3b"

# Default to 8B for unknown tasks
return ALLOWED_MODELS[0]

if name == "main":
# CLI test
test_models = sys.argv[1:] if len(sys.argv) > 1 else [
"llama3.1:8b",
"qwen2.5-coder:7b",
"llama3.2:3b",
"nomic-embed-text",
"llama3.1:70b", # Should be blocked
"qwen2.5:32b", # Should be blocked
]

print("Icarus Staging Model Gate (Updated)")
print("=" * 40)
print(f"Environment: {os.environ.get('ICARUS_ENV', 'not set')}")
print(f"Enforcement: {'ACTIVE' if os.environ.get('ICARUS_ENV') == 'staging' else 'OFF'}")
print("-" * 40)
for model in test_models:
    allowed = "✅ ALLOWED" if is_model_allowed(model) else "🚫 BLOCKED"
    print(f"{model:25} {allowed}")

← Back