GLM-4.7-Flash-REAP-23B-A3B-ROCmFPX-STRIX_LEAN-GGUF / raw-hermes-loop-glm-reap-23b-strix-lean.json
maczzzzzz's picture
Upload GLM-4.7-Flash-REAP-23B-A3B-ROCmFPX-STRIX_LEAN.gguf + bench data (ROCmFPX STRIX_LEAN)
9d4fac2 verified
Raw
History Blame Contribute Delete
7.37 kB
{
"label": "glm-reap-23b-strix_lean",
"endpoint": "http://node-b:18082",
"timestamp": "2026-06-27T18:12:30.765358+00:00",
"scenarios": [
{
"scenario": "single",
"description": "Single tool call \u2014 model must call get_weather for Tokyo",
"status": "PASS",
"tool_match": true,
"tools_called": [
"get_weather"
],
"expected_tool": "get_weather",
"all_args_valid": true,
"final_answer_correct": true,
"final_answer_preview": "The current weather in Tokyo is:\n\n- **Temperature**: 22\u00b0C (72\u00b0F)\n- **Condition**: Partly cloudy\n- **Humidity**: 65%",
"turns_used": 2,
"max_turns": 3,
"efficiency": "OPTIMAL",
"total_time_s": 1.59,
"avg_tps": 38.5,
"turns": [
{
"turn": 1,
"elapsed_s": 0.92,
"tps": 25.1,
"finish_reason": "tool_calls",
"content_preview": "I'll check the current weather in Tokyo for you.",
"tool_calls": [
{
"name": "get_weather",
"args": {
"location": "Tokyo"
},
"args_valid": true
}
]
},
{
"turn": 2,
"elapsed_s": 0.67,
"tps": 51.9,
"finish_reason": "stop",
"content_preview": "The current weather in Tokyo is:\n\n- **Temperature**: 22\u00b0C (72\u00b0F)\n- **Condition**: Partly cloudy\n- **Humidity**: 65%",
"tool_calls": [],
"final": true
}
]
},
{
"scenario": "chained",
"description": "Chained tool calls \u2014 calculate then use result",
"status": "PASS",
"tool_match": true,
"tools_called": [
"calculate"
],
"expected_tool": "calculate",
"all_args_valid": true,
"final_answer_correct": true,
"final_answer_preview": "15 * 37 = 555",
"turns_used": 2,
"max_turns": 3,
"efficiency": "OPTIMAL",
"total_time_s": 0.61,
"avg_tps": 35.8,
"turns": [
{
"turn": 1,
"elapsed_s": 0.31,
"tps": 42.2,
"finish_reason": "tool_calls",
"content_preview": "",
"tool_calls": [
{
"name": "calculate",
"args": {
"expression": "15 * 37"
},
"args_valid": true
}
]
},
{
"turn": 2,
"elapsed_s": 0.31,
"tps": 29.3,
"finish_reason": "stop",
"content_preview": "15 * 37 = 555",
"tool_calls": [],
"final": true
}
]
},
{
"scenario": "multi_step",
"description": "Multi-step \u2014 compare weather in two cities",
"status": "PASS",
"tool_match": true,
"tools_called": [
"get_weather",
"get_weather"
],
"expected_tool": [
"get_weather",
"get_weather"
],
"all_args_valid": true,
"final_answer_correct": true,
"final_answer_preview": "Based on the current weather data:\n\n**Tokyo:** 22\u00b0C (partly cloudy, 65% humidity)\n**London:** 15\u00b0C (rainy, 80% humidity)\n\n**Tokyo is warmer** - it's 7 degrees hotter than London (22\u00b0C vs 15\u00b0C).",
"turns_used": 2,
"max_turns": 5,
"efficiency": "OPTIMAL",
"total_time_s": 1.94,
"avg_tps": 50.8,
"turns": [
{
"turn": 1,
"elapsed_s": 0.72,
"tps": 50.2,
"finish_reason": "tool_calls",
"content_preview": "I'll get the current weather for both cities and then compare them.",
"tool_calls": [
{
"name": "get_weather",
"args": {
"location": "Tokyo"
},
"args_valid": true
},
{
"name": "get_weather",
"args": {
"location": "London"
},
"args_valid": true
}
]
},
{
"turn": 2,
"elapsed_s": 1.23,
"tps": 51.3,
"finish_reason": "stop",
"content_preview": "Based on the current weather data:\n\n**Tokyo:** 22\u00b0C (partly cloudy, 65% humidity)\n**London:** 15\u00b0C (rainy, 80% humidity)\n\n**Tokyo is warmer** - it's 7 degrees hotter than London (22\u00b0C vs 15\u00b0C).",
"tool_calls": [],
"final": true
}
]
},
{
"scenario": "search",
"description": "Search + extract \u2014 find info and report it",
"status": "PASS",
"tool_match": true,
"tools_called": [
"search_web"
],
"expected_tool": "search_web",
"all_args_valid": true,
"final_answer_correct": true,
"final_answer_preview": "According to the search results, the Eiffel Tower is **330 meters tall**.",
"turns_used": 2,
"max_turns": 3,
"efficiency": "OPTIMAL",
"total_time_s": 1.02,
"avg_tps": 46.8,
"turns": [
{
"turn": 1,
"elapsed_s": 0.61,
"tps": 47.2,
"finish_reason": "tool_calls",
"content_preview": "I'll search for information about the Eiffel Tower's height.",
"tool_calls": [
{
"name": "search_web",
"args": {
"query": "Eiffel Tower height"
},
"args_valid": true
}
]
},
{
"turn": 2,
"elapsed_s": 0.41,
"tps": 46.4,
"finish_reason": "stop",
"content_preview": "According to the search results, the Eiffel Tower is **330 meters tall**.",
"tool_calls": [],
"final": true
}
]
},
{
"scenario": "error_recovery",
"description": "Error recovery \u2014 file doesn't exist, model should report it",
"status": "PASS",
"tool_match": true,
"tools_called": [
"read_file"
],
"expected_tool": "read_file",
"all_args_valid": true,
"final_answer_correct": true,
"final_answer_preview": "The file `/nonexistent/path.txt` does not exist. The system returned an error indicating that the file wasn't found.",
"turns_used": 2,
"max_turns": 3,
"efficiency": "OPTIMAL",
"total_time_s": 1.02,
"avg_tps": 48.9,
"turns": [
{
"turn": 1,
"elapsed_s": 0.51,
"tps": 47.0,
"finish_reason": "tool_calls",
"content_preview": "I'll attempt to read the file for you.",
"tool_calls": [
{
"name": "read_file",
"args": {
"path": "/nonexistent/path.txt"
},
"args_valid": true
}
]
},
{
"turn": 2,
"elapsed_s": 0.51,
"tps": 50.8,
"finish_reason": "stop",
"content_preview": "The file `/nonexistent/path.txt` does not exist. The system returned an error indicating that the file wasn't found.",
"tool_calls": [],
"final": true
}
]
}
],
"overall_status": "PASS",
"pass_count": "5/5",
"framework": "hermes_loop_eval.py v1.0"
}