{"case_id": "ocr_00", "category": "ocr", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/ocr_00.png", "prompt": "What four digit code is shown? Answer only the four digits.", "expected": "8400", "raw_response": "8400", "correct": true, "error": null} {"case_id": "ocr_01", "category": "ocr", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/ocr_01.png", "prompt": "What four digit code is shown? Answer only the four digits.", "expected": "9161", "raw_response": "9161", "correct": true, "error": null} {"case_id": "ocr_02", "category": "ocr", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/ocr_02.png", "prompt": "What four digit code is shown? Answer only the four digits.", "expected": "4705", "raw_response": "4705", "correct": true, "error": null} {"case_id": "ocr_03", "category": "ocr", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/ocr_03.png", "prompt": "What four digit code is shown? Answer only the four digits.", "expected": "5207", "raw_response": "5207", "correct": true, "error": null} {"case_id": "count_00", "category": "counting", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/count_00.png", "prompt": "How many purple circles are visible? Answer only the number.", "expected": "8", "raw_response": "8", "correct": true, "error": null} {"case_id": "count_01", "category": "counting", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/count_01.png", "prompt": "How many purple circles are visible? Answer only the number.", "expected": "8", "raw_response": "8", "correct": true, "error": null} {"case_id": "count_02", "category": "counting", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/count_02.png", "prompt": "How many blue circles are visible? Answer only the number.", "expected": "9", "raw_response": "9", "correct": true, "error": null} {"case_id": "count_03", "category": "counting", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/count_03.png", "prompt": "How many green circles are visible? Answer only the number.", "expected": "7", "raw_response": "7", "correct": true, "error": null} {"case_id": "spatial_00", "category": "spatial", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/spatial_00.png", "prompt": "What color square is directly above the blue square? Answer only the color.", "expected": "green", "raw_response": "green", "correct": true, "error": null} {"case_id": "spatial_01", "category": "spatial", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/spatial_01.png", "prompt": "What color square is directly above the blue square? Answer only the color.", "expected": "red", "raw_response": "red", "correct": true, "error": null} {"case_id": "spatial_02", "category": "spatial", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/spatial_02.png", "prompt": "What color square is directly above the blue square? Answer only the color.", "expected": "green", "raw_response": "green", "correct": true, "error": null} {"case_id": "spatial_03", "category": "spatial", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/spatial_03.png", "prompt": "What color square is directly above the blue square? Answer only the color.", "expected": "yellow", "raw_response": "yellow", "correct": true, "error": null} {"case_id": "chart_00", "category": "chart", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/chart_00.png", "prompt": "Which option has the tallest bar? Answer only A, B, C, or D.", "expected": "A", "raw_response": "A", "correct": true, "error": null} {"case_id": "chart_01", "category": "chart", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/chart_01.png", "prompt": "Which option has the tallest bar? Answer only A, B, C, or D.", "expected": "A", "raw_response": "A", "correct": true, "error": null} {"case_id": "chart_02", "category": "chart", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/chart_02.png", "prompt": "Which option has the tallest bar? Answer only A, B, C, or D.", "expected": "D", "raw_response": "D", "correct": true, "error": null} {"case_id": "chart_03", "category": "chart", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/chart_03.png", "prompt": "Which option has the tallest bar? Answer only A, B, C, or D.", "expected": "B", "raw_response": "B", "correct": true, "error": null} {"case_id": "table_00", "category": "table", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/table_00.png", "prompt": "What is the total for Adapter? Answer only the dollar amount without words.", "expected": "$45", "raw_response": "$45", "correct": true, "error": null} {"case_id": "table_01", "category": "table", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/table_01.png", "prompt": "What is the total for Cable? Answer only the dollar amount without words.", "expected": "$35", "raw_response": "$35", "correct": true, "error": null} {"case_id": "table_02", "category": "table", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/table_02.png", "prompt": "What is the total for Cable? Answer only the dollar amount without words.", "expected": "$49", "raw_response": "$49", "correct": true, "error": null} {"case_id": "table_03", "category": "table", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/table_03.png", "prompt": "What is the total for Sensor? Answer only the dollar amount without words.", "expected": "$72", "raw_response": "$72", "correct": true, "error": null} {"case_id": "ui_00", "category": "ui", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/ui_00.png", "prompt": "What word is written on the red button? Answer only the word.", "expected": "Delete", "raw_response": "Delete", "correct": true, "error": null} {"case_id": "ui_01", "category": "ui", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/ui_01.png", "prompt": "What word is written on the red button? Answer only the word.", "expected": "Cancel", "raw_response": "Cancel", "correct": true, "error": null} {"case_id": "ui_02", "category": "ui", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/ui_02.png", "prompt": "What word is written on the red button? Answer only the word.", "expected": "Cancel", "raw_response": "Cancel", "correct": true, "error": null} {"case_id": "ui_03", "category": "ui", "image_path": "/var/home/deucebucket/games/cerebellum-staging/heretic-qwen36-35b/benchmark_results/vision_images/ui_03.png", "prompt": "What word is written on the red button? Answer only the word.", "expected": "Delete", "raw_response": "Delete", "correct": true, "error": null}