{ "title": "Single-Episode 20-Task Radar", "status": "pass", "generated_at_utc": "2026-06-21T20:35:16+00:00", "description": "Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.", "task_count": 20, "method_count": 2, "method_task_record_count": 40, "scored_method_task_count": 40, "normalization_policy": { "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]", "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task", "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table", "radar_visual_radius": "SVG radar panels use sqrt(normalized_score) for radius so polygon area remains closer to the score and low-valued but real differences stay visible; the JSON and matrix retain exact linear normalized_score values", "result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used", "foundation_model_overlay": "Qwen3-Omni and Cosmos3 are grouped in the foundation-model radar panel. All current public model rows have 20 scored task records, with source paths retained for every metric.", "metadata_128_overlay": "128-episode aligned baselines are grouped in the metadata/text radar panel. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists.", "raw_128_overlay": "128-episode raw-feature baselines are grouped in the raw-feature radar panel. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export." }, "chart_design": { "mode": "grouped_small_multiples", "method_count": 2, "reason": "This split view has 2 methods and 40 method-task records; grouped radar panels keep related methods readable while retaining the unified source matrix.", "groups": [ { "id": "single_episode", "title": "Single-episode sample", "series_ids": [ "minimal", "neural_mlp" ] } ], "visual_radius_transform": "sqrt(normalized_score)", "exact_value_source": "docs/data/task_method_20_result_matrix.json" }, "source_unified_radar": "docs/data/unified_task_model_radar.json", "source_result_matrix": "docs/data/task_method_20_result_matrix.json", "series": [ { "id": "minimal", "label": "Minimal", "short_label": "Min", "color": "#ccffa0", "kind": "full_20_task_baseline", "scope": "1 public sample episode", "stroke_dasharray": null, "method_detail": "Single-episode simple heads over the public sample split.", "plotted_as": "grouped small-multiple radar panel with direct legend and coverage badges", "result_record_count": 20, "scored_task_count": 20, "covered_task_count": 20, "proxy_scored_task_count": 0, "scoreless_task_count": 0, "unsupported_task_count": 0, "not_evaluated_task_count": 0, "status_counts": { "scored": 20 }, "coverage_fraction": 1.0, "result_record_fraction": 1.0 }, { "id": "neural_mlp", "label": "Neural MLP", "short_label": "NN", "color": "#67e8d1", "kind": "full_20_task_baseline", "scope": "1 public sample episode", "stroke_dasharray": null, "method_detail": "Single-episode compact PyTorch MLP heads on the same 20 task contracts.", "plotted_as": "grouped small-multiple radar panel with direct legend and coverage badges", "result_record_count": 20, "scored_task_count": 20, "covered_task_count": 20, "proxy_scored_task_count": 0, "scoreless_task_count": 0, "unsupported_task_count": 0, "not_evaluated_task_count": 0, "status_counts": { "scored": 20 }, "coverage_fraction": 1.0, "result_record_fraction": 1.0 } ], "tasks": [ { "task_number": 1, "task_id": "timeline_action", "label": "Action Recognition", "axis_label": "01 Action Recognition", "short_label": "Action", "provenance_source": "walkthrough_backed_task_contract", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.05, "metric_key": "macro_f1", "source": "results/episode_task_suite/timeline_action/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.05, "raw_text": "0.0500", "status_label": "scored" }, "neural_mlp": { "raw": 0.014814814814814814, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/timeline_action/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.014814814814814814, "raw_text": "0.0148", "status_label": "scored" } } }, { "task_number": 2, "task_id": "timeline_subtask", "label": "Procedure Step Recognition", "axis_label": "02 Procedure Step Recognition", "short_label": "Step", "provenance_source": "walkthrough_backed_task_contract", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.05056355513846935, "metric_key": "macro_f1", "source": "results/episode_task_suite/timeline_subtask/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.05056355513846935, "raw_text": "0.0506", "status_label": "scored" }, "neural_mlp": { "raw": 0.02810810810810811, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/timeline_subtask/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.02810810810810811, "raw_text": "0.0281", "status_label": "scored" } } }, { "task_number": 3, "task_id": "transition_detection", "label": "Action Boundary Detection", "axis_label": "03 Action Boundary Detection", "short_label": "Boundary", "provenance_source": "walkthrough_backed_task_contract", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.6118237590630229, "metric_key": "macro_f1", "source": "results/episode_task_suite/transition_detection/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.6118237590630229, "raw_text": "0.6118", "status_label": "scored" }, "neural_mlp": { "raw": 0.5862068965517241, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/transition_detection/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.5862068965517241, "raw_text": "0.5862", "status_label": "scored" } } }, { "task_number": 4, "task_id": "next_action", "label": "Next-Action Prediction", "axis_label": "04 Next-Action Prediction", "short_label": "Next act", "provenance_source": "walkthrough_backed_task_contract", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.05925925925925927, "metric_key": "macro_f1", "source": "results/episode_task_suite/next_action/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.05925925925925927, "raw_text": "0.0593", "status_label": "scored" }, "neural_mlp": { "raw": 0.04186046511627907, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/next_action/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.04186046511627907, "raw_text": "0.0419", "status_label": "scored" } } }, { "task_number": 5, "task_id": "hand_trajectory_forecast", "label": "Hand Trajectory Forecasting", "axis_label": "05 Hand Trajectory Forecasting", "short_label": "Hand traj", "provenance_source": "walkthrough_backed_task_contract", "metric_key": "mpjpe", "metric_name": "MPJPE", "metric_direction": "lower", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.8646570444107056, "metric_key": "mpjpe", "source": "results/episode_task_suite/hand_trajectory_forecast/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.12473175026322614, "raw_text": "0.8647", "status_label": "scored" }, "neural_mlp": { "raw": 0.10785018652677536, "metric_key": "mpjpe", "source": "results/episode_task_suite/neural_mlp/hand_trajectory_forecast/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 1.0, "raw_text": "0.1079", "status_label": "scored" } } }, { "task_number": 6, "task_id": "contact_prediction", "label": "Contact State Prediction", "axis_label": "06 Contact State Prediction", "short_label": "Contact", "provenance_source": "walkthrough_backed_task_contract", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 1.0, "metric_key": "macro_f1", "source": "results/episode_task_suite/contact_prediction/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 1.0, "raw_text": "1.000", "status_label": "scored" }, "neural_mlp": { "raw": 1.0, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/contact_prediction/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 1.0, "raw_text": "1.000", "status_label": "scored" } } }, { "task_number": 7, "task_id": "object_relevance", "label": "Object Relevance Prediction", "axis_label": "07 Object Relevance Prediction", "short_label": "Objects", "provenance_source": "walkthrough_backed_task_contract", "metric_key": "micro_f1", "metric_name": "micro-F1", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.18034382095361662, "metric_key": "micro_f1", "source": "results/episode_task_suite/object_relevance/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.18034382095361662, "raw_text": "0.1803", "status_label": "scored" }, "neural_mlp": { "raw": 0.1679279279279279, "metric_key": "micro_f1", "source": "results/episode_task_suite/neural_mlp/object_relevance/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.1679279279279279, "raw_text": "0.1679", "status_label": "scored" } } }, { "task_number": 8, "task_id": "caption_grounding", "label": "Language Grounding", "axis_label": "08 Language Grounding", "short_label": "Language", "provenance_source": "walkthrough_backed_task_contract", "metric_key": "mrr", "metric_name": "MRR", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.016023479050338015, "metric_key": "mrr", "source": "results/episode_task_suite/caption_grounding/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.016023479050338015, "raw_text": "0.0160", "status_label": "scored" }, "neural_mlp": { "raw": 0.01684125567132316, "metric_key": "mrr", "source": "results/episode_task_suite/neural_mlp/caption_grounding/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.01684125567132316, "raw_text": "0.0168", "status_label": "scored" } } }, { "task_number": 9, "task_id": "cross_modal_retrieval", "label": "Cross-Modal Retrieval", "axis_label": "09 Cross-Modal Retrieval", "short_label": "X-modal", "provenance_source": "walkthrough_backed_task_contract", "metric_key": "mrr", "metric_name": "MRR", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.26925966892956127, "metric_key": "mrr", "source": "results/episode_task_suite/cross_modal_retrieval/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.26925966892956127, "raw_text": "0.2693", "status_label": "scored" }, "neural_mlp": { "raw": 0.1299971898648288, "metric_key": "mrr", "source": "results/episode_task_suite/neural_mlp/cross_modal_retrieval/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.1299971898648288, "raw_text": "0.1300", "status_label": "scored" } } }, { "task_number": 10, "task_id": "modality_reconstruction", "label": "Cross-Modal Reconstruction", "axis_label": "10 Cross-Modal Reconstruction", "short_label": "Recon", "provenance_source": "walkthrough_backed_task_contract", "metric_key": "r2", "metric_name": "R2", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": -0.015271898913936655, "metric_key": "r2", "source": "results/episode_task_suite/modality_reconstruction/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.0, "raw_text": "-0.0153", "status_label": "scored" }, "neural_mlp": { "raw": -0.010171410134180991, "metric_key": "r2", "source": "results/episode_task_suite/neural_mlp/modality_reconstruction/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.0, "raw_text": "-0.0102", "status_label": "scored" } } }, { "task_number": 11, "task_id": "temporal_order", "label": "Temporal Order Verification", "axis_label": "11 Temporal Order Verification", "short_label": "Order", "provenance_source": "walkthrough_backed_task_contract", "metric_key": "f1", "metric_name": "F1", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.5399515738498789, "metric_key": "f1", "source": "results/episode_task_suite/temporal_order/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.5399515738498789, "raw_text": "0.5400", "status_label": "scored" }, "neural_mlp": { "raw": 0.8520179372197308, "metric_key": "f1", "source": "results/episode_task_suite/neural_mlp/temporal_order/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.8520179372197308, "raw_text": "0.8520", "status_label": "scored" } } }, { "task_number": 12, "task_id": "misalignment_detection", "label": "Multimodal Synchronization Detection", "axis_label": "12 Multimodal Synchronization Detection", "short_label": "Sync", "provenance_source": "walkthrough_backed_task_contract", "metric_key": "f1", "metric_name": "F1", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.5051698670605613, "metric_key": "f1", "source": "results/episode_task_suite/misalignment_detection/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.5051698670605613, "raw_text": "0.5052", "status_label": "scored" }, "neural_mlp": { "raw": 0.7152682255845944, "metric_key": "f1", "source": "results/episode_task_suite/neural_mlp/misalignment_detection/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.7152682255845944, "raw_text": "0.7153", "status_label": "scored" } } }, { "task_number": 13, "task_id": "long_horizon_next_action", "label": "Long-Horizon Next-Action Forecasting", "axis_label": "13 Long-Horizon Next-Action Forecasting", "short_label": "Long act", "provenance_source": "historical_result_bundle", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.07499999999999998, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/long_horizon_next_action/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.07499999999999998, "raw_text": "0.0750", "status_label": "scored" }, "neural_mlp": { "raw": 0.06545454545454546, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/long_horizon_next_action/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.06545454545454546, "raw_text": "0.0655", "status_label": "scored" } } }, { "task_number": 14, "task_id": "next_subtask_forecast", "label": "Long-Horizon Next-Subtask Forecasting", "axis_label": "14 Long-Horizon Next-Subtask Forecasting", "short_label": "Long step", "provenance_source": "historical_result_bundle", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.04545454545454545, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/next_subtask_forecast/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.04545454545454545, "raw_text": "0.0455", "status_label": "scored" }, "neural_mlp": { "raw": 0.050724637681159424, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/next_subtask_forecast/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.050724637681159424, "raw_text": "0.0507", "status_label": "scored" } } }, { "task_number": 15, "task_id": "interaction_text_prediction", "label": "Interaction Text Prediction", "axis_label": "15 Interaction Text Prediction", "short_label": "Interact txt", "provenance_source": "historical_result_bundle", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "raw128_proxy_axis": true, "values": { "minimal": { "raw": 0.04444444444444444, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/interaction_text_prediction/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.04444444444444444, "raw_text": "0.0444", "status_label": "scored" }, "neural_mlp": { "raw": 0.0380952380952381, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/interaction_text_prediction/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.0380952380952381, "raw_text": "0.0381", "status_label": "scored" } } }, { "task_number": 16, "task_id": "action_object_relation", "label": "Action-Object Relation Prediction", "axis_label": "16 Action-Object Relation Prediction", "short_label": "Act+obj", "provenance_source": "historical_result_bundle", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.0, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/action_object_relation/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.0, "raw_text": "0.0000", "status_label": "scored" }, "neural_mlp": { "raw": 0.0, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/action_object_relation/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.0, "raw_text": "0.0000", "status_label": "scored" } } }, { "task_number": 17, "task_id": "object_set_forecast", "label": "Future Object-Set Forecasting", "axis_label": "17 Future Object-Set Forecasting", "short_label": "Future obj", "provenance_source": "historical_result_bundle", "metric_key": "micro_f1", "metric_name": "micro-F1", "metric_direction": "higher", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.16939890710382516, "metric_key": "micro_f1", "source": "results/episode_task_suite/tier2_task_suite/object_set_forecast/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.16939890710382516, "raw_text": "0.1694", "status_label": "scored" }, "neural_mlp": { "raw": 0.19718309859154928, "metric_key": "micro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/object_set_forecast/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.19718309859154928, "raw_text": "0.1972", "status_label": "scored" } } }, { "task_number": 18, "task_id": "imu_to_hand_pose", "label": "IMU-to-Hand Pose Reconstruction", "axis_label": "18 IMU-to-Hand Pose Reconstruction", "short_label": "IMU->hand", "provenance_source": "historical_result_bundle", "metric_key": "mae", "metric_name": "MAE", "metric_direction": "lower", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 0.042049407958984375, "metric_key": "mae", "source": "results/episode_task_suite/tier2_task_suite/imu_to_hand_pose/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 1.0, "raw_text": "0.0420", "status_label": "scored" }, "neural_mlp": { "raw": 0.042562149465084076, "metric_key": "mae", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/imu_to_hand_pose/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.9879531106266066, "raw_text": "0.0426", "status_label": "scored" } } }, { "task_number": 19, "task_id": "camera_view_sync_retrieval", "label": "Camera-View Synchronization Retrieval", "axis_label": "19 Camera-View Synchronization Retrieval", "short_label": "Cam sync", "provenance_source": "historical_result_bundle", "metric_key": "mrr", "metric_name": "MRR", "metric_direction": "higher", "raw128_proxy_axis": true, "values": { "minimal": { "raw": 0.4943004846572876, "metric_key": "mrr", "source": "results/episode_task_suite/tier2_task_suite/camera_view_sync_retrieval/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.4943004846572876, "raw_text": "0.4943", "status_label": "scored" }, "neural_mlp": { "raw": 0.24086658656597137, "metric_key": "mrr", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/camera_view_sync_retrieval/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.24086658656597137, "raw_text": "0.2409", "status_label": "scored" } } }, { "task_number": 20, "task_id": "time_to_transition", "label": "Time-to-Next-Transition Regression", "axis_label": "20 Time-to-Next-Transition Regression", "short_label": "Time2bdry", "provenance_source": "historical_result_bundle", "metric_key": "mae", "metric_name": "MAE frames", "metric_direction": "lower", "raw128_proxy_axis": false, "values": { "minimal": { "raw": 10.53735637664795, "metric_key": "mae", "source": "results/episode_task_suite/tier2_task_suite/time_to_transition/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 1.0, "raw_text": "10.54", "status_label": "scored" }, "neural_mlp": { "raw": 10.55449390411377, "metric_key": "mae", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/time_to_transition/metrics.json", "scope": "single_episode_public_sample", "status": "scored", "normalized_score": 0.9983762814568361, "raw_text": "10.55", "status_label": "scored" } } } ], "task_method_result_matrix": [ { "task_number": 1, "task_id": "timeline_action", "task_label": "Action Recognition", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.05, "raw_text": "0.0500", "normalized_score": 0.05, "metric_key": "macro_f1", "source": "results/episode_task_suite/timeline_action/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 1, "task_id": "timeline_action", "task_label": "Action Recognition", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.014814814814814814, "raw_text": "0.0148", "normalized_score": 0.014814814814814814, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/timeline_action/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 2, "task_id": "timeline_subtask", "task_label": "Procedure Step Recognition", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.05056355513846935, "raw_text": "0.0506", "normalized_score": 0.05056355513846935, "metric_key": "macro_f1", "source": "results/episode_task_suite/timeline_subtask/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 2, "task_id": "timeline_subtask", "task_label": "Procedure Step Recognition", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.02810810810810811, "raw_text": "0.0281", "normalized_score": 0.02810810810810811, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/timeline_subtask/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 3, "task_id": "transition_detection", "task_label": "Action Boundary Detection", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.6118237590630229, "raw_text": "0.6118", "normalized_score": 0.6118237590630229, "metric_key": "macro_f1", "source": "results/episode_task_suite/transition_detection/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 3, "task_id": "transition_detection", "task_label": "Action Boundary Detection", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.5862068965517241, "raw_text": "0.5862", "normalized_score": 0.5862068965517241, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/transition_detection/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 4, "task_id": "next_action", "task_label": "Next-Action Prediction", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.05925925925925927, "raw_text": "0.0593", "normalized_score": 0.05925925925925927, "metric_key": "macro_f1", "source": "results/episode_task_suite/next_action/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 4, "task_id": "next_action", "task_label": "Next-Action Prediction", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.04186046511627907, "raw_text": "0.0419", "normalized_score": 0.04186046511627907, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/next_action/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 5, "task_id": "hand_trajectory_forecast", "task_label": "Hand Trajectory Forecasting", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.8646570444107056, "raw_text": "0.8647", "normalized_score": 0.12473175026322614, "metric_key": "mpjpe", "source": "results/episode_task_suite/hand_trajectory_forecast/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 5, "task_id": "hand_trajectory_forecast", "task_label": "Hand Trajectory Forecasting", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.10785018652677536, "raw_text": "0.1079", "normalized_score": 1.0, "metric_key": "mpjpe", "source": "results/episode_task_suite/neural_mlp/hand_trajectory_forecast/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 6, "task_id": "contact_prediction", "task_label": "Contact State Prediction", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 1.0, "raw_text": "1.000", "normalized_score": 1.0, "metric_key": "macro_f1", "source": "results/episode_task_suite/contact_prediction/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 6, "task_id": "contact_prediction", "task_label": "Contact State Prediction", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 1.0, "raw_text": "1.000", "normalized_score": 1.0, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/contact_prediction/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 7, "task_id": "object_relevance", "task_label": "Object Relevance Prediction", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.18034382095361662, "raw_text": "0.1803", "normalized_score": 0.18034382095361662, "metric_key": "micro_f1", "source": "results/episode_task_suite/object_relevance/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 7, "task_id": "object_relevance", "task_label": "Object Relevance Prediction", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.1679279279279279, "raw_text": "0.1679", "normalized_score": 0.1679279279279279, "metric_key": "micro_f1", "source": "results/episode_task_suite/neural_mlp/object_relevance/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 8, "task_id": "caption_grounding", "task_label": "Language Grounding", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.016023479050338015, "raw_text": "0.0160", "normalized_score": 0.016023479050338015, "metric_key": "mrr", "source": "results/episode_task_suite/caption_grounding/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 8, "task_id": "caption_grounding", "task_label": "Language Grounding", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.01684125567132316, "raw_text": "0.0168", "normalized_score": 0.01684125567132316, "metric_key": "mrr", "source": "results/episode_task_suite/neural_mlp/caption_grounding/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 9, "task_id": "cross_modal_retrieval", "task_label": "Cross-Modal Retrieval", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.26925966892956127, "raw_text": "0.2693", "normalized_score": 0.26925966892956127, "metric_key": "mrr", "source": "results/episode_task_suite/cross_modal_retrieval/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 9, "task_id": "cross_modal_retrieval", "task_label": "Cross-Modal Retrieval", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.1299971898648288, "raw_text": "0.1300", "normalized_score": 0.1299971898648288, "metric_key": "mrr", "source": "results/episode_task_suite/neural_mlp/cross_modal_retrieval/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 10, "task_id": "modality_reconstruction", "task_label": "Cross-Modal Reconstruction", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": -0.015271898913936655, "raw_text": "-0.0153", "normalized_score": 0.0, "metric_key": "r2", "source": "results/episode_task_suite/modality_reconstruction/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 10, "task_id": "modality_reconstruction", "task_label": "Cross-Modal Reconstruction", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": -0.010171410134180991, "raw_text": "-0.0102", "normalized_score": 0.0, "metric_key": "r2", "source": "results/episode_task_suite/neural_mlp/modality_reconstruction/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 11, "task_id": "temporal_order", "task_label": "Temporal Order Verification", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.5399515738498789, "raw_text": "0.5400", "normalized_score": 0.5399515738498789, "metric_key": "f1", "source": "results/episode_task_suite/temporal_order/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 11, "task_id": "temporal_order", "task_label": "Temporal Order Verification", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.8520179372197308, "raw_text": "0.8520", "normalized_score": 0.8520179372197308, "metric_key": "f1", "source": "results/episode_task_suite/neural_mlp/temporal_order/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 12, "task_id": "misalignment_detection", "task_label": "Multimodal Synchronization Detection", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.5051698670605613, "raw_text": "0.5052", "normalized_score": 0.5051698670605613, "metric_key": "f1", "source": "results/episode_task_suite/misalignment_detection/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 12, "task_id": "misalignment_detection", "task_label": "Multimodal Synchronization Detection", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.7152682255845944, "raw_text": "0.7153", "normalized_score": 0.7152682255845944, "metric_key": "f1", "source": "results/episode_task_suite/neural_mlp/misalignment_detection/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 13, "task_id": "long_horizon_next_action", "task_label": "Long-Horizon Next-Action Forecasting", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.07499999999999998, "raw_text": "0.0750", "normalized_score": 0.07499999999999998, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/long_horizon_next_action/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 13, "task_id": "long_horizon_next_action", "task_label": "Long-Horizon Next-Action Forecasting", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.06545454545454546, "raw_text": "0.0655", "normalized_score": 0.06545454545454546, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/long_horizon_next_action/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 14, "task_id": "next_subtask_forecast", "task_label": "Long-Horizon Next-Subtask Forecasting", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.04545454545454545, "raw_text": "0.0455", "normalized_score": 0.04545454545454545, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/next_subtask_forecast/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 14, "task_id": "next_subtask_forecast", "task_label": "Long-Horizon Next-Subtask Forecasting", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.050724637681159424, "raw_text": "0.0507", "normalized_score": 0.050724637681159424, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/next_subtask_forecast/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 15, "task_id": "interaction_text_prediction", "task_label": "Interaction Text Prediction", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.04444444444444444, "raw_text": "0.0444", "normalized_score": 0.04444444444444444, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/interaction_text_prediction/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 15, "task_id": "interaction_text_prediction", "task_label": "Interaction Text Prediction", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.0380952380952381, "raw_text": "0.0381", "normalized_score": 0.0380952380952381, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/interaction_text_prediction/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 16, "task_id": "action_object_relation", "task_label": "Action-Object Relation Prediction", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.0, "raw_text": "0.0000", "normalized_score": 0.0, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/action_object_relation/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 16, "task_id": "action_object_relation", "task_label": "Action-Object Relation Prediction", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.0, "raw_text": "0.0000", "normalized_score": 0.0, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/action_object_relation/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 17, "task_id": "object_set_forecast", "task_label": "Future Object-Set Forecasting", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.16939890710382516, "raw_text": "0.1694", "normalized_score": 0.16939890710382516, "metric_key": "micro_f1", "source": "results/episode_task_suite/tier2_task_suite/object_set_forecast/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 17, "task_id": "object_set_forecast", "task_label": "Future Object-Set Forecasting", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.19718309859154928, "raw_text": "0.1972", "normalized_score": 0.19718309859154928, "metric_key": "micro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/object_set_forecast/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 18, "task_id": "imu_to_hand_pose", "task_label": "IMU-to-Hand Pose Reconstruction", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.042049407958984375, "raw_text": "0.0420", "normalized_score": 1.0, "metric_key": "mae", "source": "results/episode_task_suite/tier2_task_suite/imu_to_hand_pose/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 18, "task_id": "imu_to_hand_pose", "task_label": "IMU-to-Hand Pose Reconstruction", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.042562149465084076, "raw_text": "0.0426", "normalized_score": 0.9879531106266066, "metric_key": "mae", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/imu_to_hand_pose/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 19, "task_id": "camera_view_sync_retrieval", "task_label": "Camera-View Synchronization Retrieval", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.4943004846572876, "raw_text": "0.4943", "normalized_score": 0.4943004846572876, "metric_key": "mrr", "source": "results/episode_task_suite/tier2_task_suite/camera_view_sync_retrieval/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 19, "task_id": "camera_view_sync_retrieval", "task_label": "Camera-View Synchronization Retrieval", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 0.24086658656597137, "raw_text": "0.2409", "normalized_score": 0.24086658656597137, "metric_key": "mrr", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/camera_view_sync_retrieval/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 20, "task_id": "time_to_transition", "task_label": "Time-to-Next-Transition Regression", "series_id": "minimal", "method": "Minimal", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 10.53735637664795, "raw_text": "10.54", "normalized_score": 1.0, "metric_key": "mae", "source": "results/episode_task_suite/tier2_task_suite/time_to_transition/metrics.json", "scope": "single_episode_public_sample", "reason": null }, { "task_number": 20, "task_id": "time_to_transition", "task_label": "Time-to-Next-Transition Regression", "series_id": "neural_mlp", "method": "Neural MLP", "status": "scored", "status_label": "scored", "scored": true, "proxy_scored": false, "raw": 10.55449390411377, "raw_text": "10.55", "normalized_score": 0.9983762814568361, "metric_key": "mae", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/time_to_transition/metrics.json", "scope": "single_episode_public_sample", "reason": null } ] }