📄 benchmark-reasoning-results.json 1,398 bytes Apr 19, 2026 📋 Raw

{
"timestamp": "2026-04-17T17:41:49UTC",
"url": "http://100.104.147.116:11434/v1/chat/completions",
"results": [
{
"model": "qwen2.5-coder:7b",
"test": "conflict_simple",
"latency": 13.0,
"valid": true,
"score": 3,
"error": null
},
{
"model": "qwen2.5-coder:7b",
"test": "conflict_tough",
"latency": 2.0,
"valid": true,
"score": 3,
"error": null
},
{
"model": "qwen2.5-coder:7b",
"test": "rejection_simple",
"latency": 0.3,
"valid": true,
"score": 3,
"error": null
},
{
"model": "qwen2.5-coder:7b",
"test": "rejection_ambiguous",
"latency": 0.4,
"valid": true,
"score": 3,
"error": null
},
{
"model": "phi4:14b",
"test": "conflict_simple",
"latency": 86.9,
"valid": true,
"score": 3,
"error": null
},
{
"model": "phi4:14b",
"test": "conflict_tough",
"latency": 35.7,
"valid": true,
"score": 0,
"error": null
},
{
"model": "phi4:14b",
"test": "rejection_simple",
"latency": 2.7,
"valid": true,
"score": 3,
"error": null
},
{
"model": "phi4:14b",
"test": "rejection_ambiguous",
"latency": 4.2,
"valid": true,
"score": 3,
"error": null
}
]
}