ropedia-xperience-10m-task-baselines / docs /data /single_episode_task_model_radar.json
cy0307's picture
Improve radar chart readability
a1d4a4f verified
Raw
History Blame Contribute Delete
52.3 kB
{
"title": "Single-Episode 20-Task Radar",
"status": "pass",
"generated_at_utc": "2026-06-22T13:13:58+00:00",
"description": "Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.",
"task_count": 20,
"method_count": 2,
"method_task_record_count": 40,
"scored_method_task_count": 40,
"normalization_policy": {
"higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
"raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
"radar_visual_radius": "SVG radar panels use sqrt(normalized_score) for radius so polygon area remains closer to the score and low-valued but real differences stay visible; the JSON and matrix retain exact linear normalized_score values",
"result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
"foundation_model_overlay": "Qwen3-Omni and Cosmos3 are grouped in the foundation-model radar panel. All current public model rows have 20 scored task records, with source paths retained for every metric.",
"metadata_128_overlay": "128-episode aligned baselines are grouped in the metadata/text radar panel. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists.",
"raw_128_overlay": "128-episode raw-feature baselines are grouped in the raw-feature radar panel. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
},
"chart_design": {
"mode": "grouped_small_multiples",
"method_count": 2,
"reason": "This split view has 2 methods and 40 method-task records; grouped radar panels keep related methods readable while retaining the unified source matrix.",
"groups": [
{
"id": "single_episode",
"title": "Single-episode sample",
"series_ids": [
"minimal",
"neural_mlp"
]
}
],
"visual_radius_transform": "sqrt(normalized_score)",
"exact_value_source": "docs/data/task_method_20_result_matrix.json"
},
"source_unified_radar": "docs/data/unified_task_model_radar.json",
"source_result_matrix": "docs/data/task_method_20_result_matrix.json",
"series": [
{
"id": "minimal",
"label": "Minimal",
"short_label": "Min",
"color": "#ccffa0",
"kind": "full_20_task_baseline",
"scope": "1 public sample episode",
"stroke_dasharray": null,
"method_detail": "Single-episode simple heads over the public sample split.",
"plotted_as": "grouped small-multiple radar panel with direct legend and coverage badges",
"result_record_count": 20,
"scored_task_count": 20,
"covered_task_count": 20,
"proxy_scored_task_count": 0,
"scoreless_task_count": 0,
"unsupported_task_count": 0,
"not_evaluated_task_count": 0,
"status_counts": {
"scored": 20
},
"coverage_fraction": 1.0,
"result_record_fraction": 1.0
},
{
"id": "neural_mlp",
"label": "Neural MLP",
"short_label": "NN",
"color": "#67e8d1",
"kind": "full_20_task_baseline",
"scope": "1 public sample episode",
"stroke_dasharray": null,
"method_detail": "Single-episode compact PyTorch MLP heads on the same 20 task contracts.",
"plotted_as": "grouped small-multiple radar panel with direct legend and coverage badges",
"result_record_count": 20,
"scored_task_count": 20,
"covered_task_count": 20,
"proxy_scored_task_count": 0,
"scoreless_task_count": 0,
"unsupported_task_count": 0,
"not_evaluated_task_count": 0,
"status_counts": {
"scored": 20
},
"coverage_fraction": 1.0,
"result_record_fraction": 1.0
}
],
"tasks": [
{
"task_number": 1,
"task_id": "timeline_action",
"label": "Action Recognition",
"axis_label": "01 Action Recognition",
"short_label": "Action",
"provenance_source": "walkthrough_backed_task_contract",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.05,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/timeline_action/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.05,
"raw_text": "0.0500",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.014814814814814814,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/neural_mlp/timeline_action/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.014814814814814814,
"raw_text": "0.0148",
"status_label": "scored"
}
}
},
{
"task_number": 2,
"task_id": "timeline_subtask",
"label": "Procedure Step Recognition",
"axis_label": "02 Procedure Step Recognition",
"short_label": "Step",
"provenance_source": "walkthrough_backed_task_contract",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.05056355513846935,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/timeline_subtask/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.05056355513846935,
"raw_text": "0.0506",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.02810810810810811,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/neural_mlp/timeline_subtask/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.02810810810810811,
"raw_text": "0.0281",
"status_label": "scored"
}
}
},
{
"task_number": 3,
"task_id": "transition_detection",
"label": "Action Boundary Detection",
"axis_label": "03 Action Boundary Detection",
"short_label": "Boundary",
"provenance_source": "walkthrough_backed_task_contract",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.6118237590630229,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/transition_detection/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.6118237590630229,
"raw_text": "0.6118",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.5862068965517241,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/neural_mlp/transition_detection/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.5862068965517241,
"raw_text": "0.5862",
"status_label": "scored"
}
}
},
{
"task_number": 4,
"task_id": "next_action",
"label": "Next-Action Prediction",
"axis_label": "04 Next-Action Prediction",
"short_label": "Next act",
"provenance_source": "walkthrough_backed_task_contract",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.05925925925925927,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/next_action/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.05925925925925927,
"raw_text": "0.0593",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.04186046511627907,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/neural_mlp/next_action/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.04186046511627907,
"raw_text": "0.0419",
"status_label": "scored"
}
}
},
{
"task_number": 5,
"task_id": "hand_trajectory_forecast",
"label": "Hand Trajectory Forecasting",
"axis_label": "05 Hand Trajectory Forecasting",
"short_label": "Hand traj",
"provenance_source": "walkthrough_backed_task_contract",
"metric_key": "mpjpe",
"metric_name": "MPJPE",
"metric_direction": "lower",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.8646570444107056,
"metric_key": "mpjpe",
"source": "results/episode_task_suite/hand_trajectory_forecast/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.12473175026322614,
"raw_text": "0.8647",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.10785018652677536,
"metric_key": "mpjpe",
"source": "results/episode_task_suite/neural_mlp/hand_trajectory_forecast/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 1.0,
"raw_text": "0.1079",
"status_label": "scored"
}
}
},
{
"task_number": 6,
"task_id": "contact_prediction",
"label": "Contact State Prediction",
"axis_label": "06 Contact State Prediction",
"short_label": "Contact",
"provenance_source": "walkthrough_backed_task_contract",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 1.0,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/contact_prediction/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 1.0,
"raw_text": "1.000",
"status_label": "scored"
},
"neural_mlp": {
"raw": 1.0,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/neural_mlp/contact_prediction/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 1.0,
"raw_text": "1.000",
"status_label": "scored"
}
}
},
{
"task_number": 7,
"task_id": "object_relevance",
"label": "Object Relevance Prediction",
"axis_label": "07 Object Relevance Prediction",
"short_label": "Objects",
"provenance_source": "walkthrough_backed_task_contract",
"metric_key": "micro_f1",
"metric_name": "micro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.18034382095361662,
"metric_key": "micro_f1",
"source": "results/episode_task_suite/object_relevance/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.18034382095361662,
"raw_text": "0.1803",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.1679279279279279,
"metric_key": "micro_f1",
"source": "results/episode_task_suite/neural_mlp/object_relevance/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.1679279279279279,
"raw_text": "0.1679",
"status_label": "scored"
}
}
},
{
"task_number": 8,
"task_id": "caption_grounding",
"label": "Language Grounding",
"axis_label": "08 Language Grounding",
"short_label": "Language",
"provenance_source": "walkthrough_backed_task_contract",
"metric_key": "mrr",
"metric_name": "MRR",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.016023479050338015,
"metric_key": "mrr",
"source": "results/episode_task_suite/caption_grounding/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.016023479050338015,
"raw_text": "0.0160",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.01684125567132316,
"metric_key": "mrr",
"source": "results/episode_task_suite/neural_mlp/caption_grounding/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.01684125567132316,
"raw_text": "0.0168",
"status_label": "scored"
}
}
},
{
"task_number": 9,
"task_id": "cross_modal_retrieval",
"label": "Cross-Modal Retrieval",
"axis_label": "09 Cross-Modal Retrieval",
"short_label": "X-modal",
"provenance_source": "walkthrough_backed_task_contract",
"metric_key": "mrr",
"metric_name": "MRR",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.26925966892956127,
"metric_key": "mrr",
"source": "results/episode_task_suite/cross_modal_retrieval/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.26925966892956127,
"raw_text": "0.2693",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.1299971898648288,
"metric_key": "mrr",
"source": "results/episode_task_suite/neural_mlp/cross_modal_retrieval/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.1299971898648288,
"raw_text": "0.1300",
"status_label": "scored"
}
}
},
{
"task_number": 10,
"task_id": "modality_reconstruction",
"label": "Cross-Modal Reconstruction",
"axis_label": "10 Cross-Modal Reconstruction",
"short_label": "Recon",
"provenance_source": "walkthrough_backed_task_contract",
"metric_key": "r2",
"metric_name": "R2",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": -0.015271898913936655,
"metric_key": "r2",
"source": "results/episode_task_suite/modality_reconstruction/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.0,
"raw_text": "-0.0153",
"status_label": "scored"
},
"neural_mlp": {
"raw": -0.010171410134180991,
"metric_key": "r2",
"source": "results/episode_task_suite/neural_mlp/modality_reconstruction/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.0,
"raw_text": "-0.0102",
"status_label": "scored"
}
}
},
{
"task_number": 11,
"task_id": "temporal_order",
"label": "Temporal Order Verification",
"axis_label": "11 Temporal Order Verification",
"short_label": "Order",
"provenance_source": "walkthrough_backed_task_contract",
"metric_key": "f1",
"metric_name": "F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.5399515738498789,
"metric_key": "f1",
"source": "results/episode_task_suite/temporal_order/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.5399515738498789,
"raw_text": "0.5400",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.8520179372197308,
"metric_key": "f1",
"source": "results/episode_task_suite/neural_mlp/temporal_order/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.8520179372197308,
"raw_text": "0.8520",
"status_label": "scored"
}
}
},
{
"task_number": 12,
"task_id": "misalignment_detection",
"label": "Multimodal Synchronization Detection",
"axis_label": "12 Multimodal Synchronization Detection",
"short_label": "Sync",
"provenance_source": "walkthrough_backed_task_contract",
"metric_key": "f1",
"metric_name": "F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.5051698670605613,
"metric_key": "f1",
"source": "results/episode_task_suite/misalignment_detection/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.5051698670605613,
"raw_text": "0.5052",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.7152682255845944,
"metric_key": "f1",
"source": "results/episode_task_suite/neural_mlp/misalignment_detection/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.7152682255845944,
"raw_text": "0.7153",
"status_label": "scored"
}
}
},
{
"task_number": 13,
"task_id": "long_horizon_next_action",
"label": "Long-Horizon Next-Action Forecasting",
"axis_label": "13 Long-Horizon Next-Action Forecasting",
"short_label": "Long act",
"provenance_source": "historical_result_bundle",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.07499999999999998,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/long_horizon_next_action/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.07499999999999998,
"raw_text": "0.0750",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.06545454545454546,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/long_horizon_next_action/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.06545454545454546,
"raw_text": "0.0655",
"status_label": "scored"
}
}
},
{
"task_number": 14,
"task_id": "next_subtask_forecast",
"label": "Long-Horizon Next-Subtask Forecasting",
"axis_label": "14 Long-Horizon Next-Subtask Forecasting",
"short_label": "Long step",
"provenance_source": "historical_result_bundle",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.04545454545454545,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/next_subtask_forecast/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.04545454545454545,
"raw_text": "0.0455",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.050724637681159424,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/next_subtask_forecast/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.050724637681159424,
"raw_text": "0.0507",
"status_label": "scored"
}
}
},
{
"task_number": 15,
"task_id": "interaction_text_prediction",
"label": "Interaction Text Prediction",
"axis_label": "15 Interaction Text Prediction",
"short_label": "Interact txt",
"provenance_source": "historical_result_bundle",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": true,
"values": {
"minimal": {
"raw": 0.04444444444444444,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/interaction_text_prediction/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.04444444444444444,
"raw_text": "0.0444",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.0380952380952381,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/interaction_text_prediction/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.0380952380952381,
"raw_text": "0.0381",
"status_label": "scored"
}
}
},
{
"task_number": 16,
"task_id": "action_object_relation",
"label": "Action-Object Relation Prediction",
"axis_label": "16 Action-Object Relation Prediction",
"short_label": "Act+obj",
"provenance_source": "historical_result_bundle",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.0,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/action_object_relation/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.0,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/action_object_relation/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
}
}
},
{
"task_number": 17,
"task_id": "object_set_forecast",
"label": "Future Object-Set Forecasting",
"axis_label": "17 Future Object-Set Forecasting",
"short_label": "Future obj",
"provenance_source": "historical_result_bundle",
"metric_key": "micro_f1",
"metric_name": "micro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.16939890710382516,
"metric_key": "micro_f1",
"source": "results/episode_task_suite/tier2_task_suite/object_set_forecast/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.16939890710382516,
"raw_text": "0.1694",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.19718309859154928,
"metric_key": "micro_f1",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/object_set_forecast/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.19718309859154928,
"raw_text": "0.1972",
"status_label": "scored"
}
}
},
{
"task_number": 18,
"task_id": "imu_to_hand_pose",
"label": "IMU-to-Hand Pose Reconstruction",
"axis_label": "18 IMU-to-Hand Pose Reconstruction",
"short_label": "IMU->hand",
"provenance_source": "historical_result_bundle",
"metric_key": "mae",
"metric_name": "MAE",
"metric_direction": "lower",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 0.042049407958984375,
"metric_key": "mae",
"source": "results/episode_task_suite/tier2_task_suite/imu_to_hand_pose/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 1.0,
"raw_text": "0.0420",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.042562149465084076,
"metric_key": "mae",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/imu_to_hand_pose/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.9879531106266066,
"raw_text": "0.0426",
"status_label": "scored"
}
}
},
{
"task_number": 19,
"task_id": "camera_view_sync_retrieval",
"label": "Camera-View Synchronization Retrieval",
"axis_label": "19 Camera-View Synchronization Retrieval",
"short_label": "Cam sync",
"provenance_source": "historical_result_bundle",
"metric_key": "mrr",
"metric_name": "MRR",
"metric_direction": "higher",
"raw128_proxy_axis": true,
"values": {
"minimal": {
"raw": 0.4943004846572876,
"metric_key": "mrr",
"source": "results/episode_task_suite/tier2_task_suite/camera_view_sync_retrieval/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.4943004846572876,
"raw_text": "0.4943",
"status_label": "scored"
},
"neural_mlp": {
"raw": 0.24086658656597137,
"metric_key": "mrr",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/camera_view_sync_retrieval/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.24086658656597137,
"raw_text": "0.2409",
"status_label": "scored"
}
}
},
{
"task_number": 20,
"task_id": "time_to_transition",
"label": "Time-to-Next-Transition Regression",
"axis_label": "20 Time-to-Next-Transition Regression",
"short_label": "Time2bdry",
"provenance_source": "historical_result_bundle",
"metric_key": "mae",
"metric_name": "MAE frames",
"metric_direction": "lower",
"raw128_proxy_axis": false,
"values": {
"minimal": {
"raw": 10.53735637664795,
"metric_key": "mae",
"source": "results/episode_task_suite/tier2_task_suite/time_to_transition/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 1.0,
"raw_text": "10.54",
"status_label": "scored"
},
"neural_mlp": {
"raw": 10.55449390411377,
"metric_key": "mae",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/time_to_transition/metrics.json",
"scope": "single_episode_public_sample",
"status": "scored",
"normalized_score": 0.9983762814568361,
"raw_text": "10.55",
"status_label": "scored"
}
}
}
],
"task_method_result_matrix": [
{
"task_number": 1,
"task_id": "timeline_action",
"task_label": "Action Recognition",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.05,
"raw_text": "0.0500",
"normalized_score": 0.05,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/timeline_action/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 1,
"task_id": "timeline_action",
"task_label": "Action Recognition",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.014814814814814814,
"raw_text": "0.0148",
"normalized_score": 0.014814814814814814,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/neural_mlp/timeline_action/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 2,
"task_id": "timeline_subtask",
"task_label": "Procedure Step Recognition",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.05056355513846935,
"raw_text": "0.0506",
"normalized_score": 0.05056355513846935,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/timeline_subtask/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 2,
"task_id": "timeline_subtask",
"task_label": "Procedure Step Recognition",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.02810810810810811,
"raw_text": "0.0281",
"normalized_score": 0.02810810810810811,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/neural_mlp/timeline_subtask/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 3,
"task_id": "transition_detection",
"task_label": "Action Boundary Detection",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.6118237590630229,
"raw_text": "0.6118",
"normalized_score": 0.6118237590630229,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/transition_detection/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 3,
"task_id": "transition_detection",
"task_label": "Action Boundary Detection",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.5862068965517241,
"raw_text": "0.5862",
"normalized_score": 0.5862068965517241,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/neural_mlp/transition_detection/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 4,
"task_id": "next_action",
"task_label": "Next-Action Prediction",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.05925925925925927,
"raw_text": "0.0593",
"normalized_score": 0.05925925925925927,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/next_action/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 4,
"task_id": "next_action",
"task_label": "Next-Action Prediction",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.04186046511627907,
"raw_text": "0.0419",
"normalized_score": 0.04186046511627907,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/neural_mlp/next_action/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 5,
"task_id": "hand_trajectory_forecast",
"task_label": "Hand Trajectory Forecasting",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.8646570444107056,
"raw_text": "0.8647",
"normalized_score": 0.12473175026322614,
"metric_key": "mpjpe",
"source": "results/episode_task_suite/hand_trajectory_forecast/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 5,
"task_id": "hand_trajectory_forecast",
"task_label": "Hand Trajectory Forecasting",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.10785018652677536,
"raw_text": "0.1079",
"normalized_score": 1.0,
"metric_key": "mpjpe",
"source": "results/episode_task_suite/neural_mlp/hand_trajectory_forecast/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 6,
"task_id": "contact_prediction",
"task_label": "Contact State Prediction",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 1.0,
"raw_text": "1.000",
"normalized_score": 1.0,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/contact_prediction/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 6,
"task_id": "contact_prediction",
"task_label": "Contact State Prediction",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 1.0,
"raw_text": "1.000",
"normalized_score": 1.0,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/neural_mlp/contact_prediction/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 7,
"task_id": "object_relevance",
"task_label": "Object Relevance Prediction",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.18034382095361662,
"raw_text": "0.1803",
"normalized_score": 0.18034382095361662,
"metric_key": "micro_f1",
"source": "results/episode_task_suite/object_relevance/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 7,
"task_id": "object_relevance",
"task_label": "Object Relevance Prediction",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.1679279279279279,
"raw_text": "0.1679",
"normalized_score": 0.1679279279279279,
"metric_key": "micro_f1",
"source": "results/episode_task_suite/neural_mlp/object_relevance/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 8,
"task_id": "caption_grounding",
"task_label": "Language Grounding",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.016023479050338015,
"raw_text": "0.0160",
"normalized_score": 0.016023479050338015,
"metric_key": "mrr",
"source": "results/episode_task_suite/caption_grounding/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 8,
"task_id": "caption_grounding",
"task_label": "Language Grounding",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.01684125567132316,
"raw_text": "0.0168",
"normalized_score": 0.01684125567132316,
"metric_key": "mrr",
"source": "results/episode_task_suite/neural_mlp/caption_grounding/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 9,
"task_id": "cross_modal_retrieval",
"task_label": "Cross-Modal Retrieval",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.26925966892956127,
"raw_text": "0.2693",
"normalized_score": 0.26925966892956127,
"metric_key": "mrr",
"source": "results/episode_task_suite/cross_modal_retrieval/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 9,
"task_id": "cross_modal_retrieval",
"task_label": "Cross-Modal Retrieval",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.1299971898648288,
"raw_text": "0.1300",
"normalized_score": 0.1299971898648288,
"metric_key": "mrr",
"source": "results/episode_task_suite/neural_mlp/cross_modal_retrieval/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 10,
"task_id": "modality_reconstruction",
"task_label": "Cross-Modal Reconstruction",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": -0.015271898913936655,
"raw_text": "-0.0153",
"normalized_score": 0.0,
"metric_key": "r2",
"source": "results/episode_task_suite/modality_reconstruction/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 10,
"task_id": "modality_reconstruction",
"task_label": "Cross-Modal Reconstruction",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": -0.010171410134180991,
"raw_text": "-0.0102",
"normalized_score": 0.0,
"metric_key": "r2",
"source": "results/episode_task_suite/neural_mlp/modality_reconstruction/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 11,
"task_id": "temporal_order",
"task_label": "Temporal Order Verification",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.5399515738498789,
"raw_text": "0.5400",
"normalized_score": 0.5399515738498789,
"metric_key": "f1",
"source": "results/episode_task_suite/temporal_order/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 11,
"task_id": "temporal_order",
"task_label": "Temporal Order Verification",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.8520179372197308,
"raw_text": "0.8520",
"normalized_score": 0.8520179372197308,
"metric_key": "f1",
"source": "results/episode_task_suite/neural_mlp/temporal_order/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 12,
"task_id": "misalignment_detection",
"task_label": "Multimodal Synchronization Detection",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.5051698670605613,
"raw_text": "0.5052",
"normalized_score": 0.5051698670605613,
"metric_key": "f1",
"source": "results/episode_task_suite/misalignment_detection/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 12,
"task_id": "misalignment_detection",
"task_label": "Multimodal Synchronization Detection",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.7152682255845944,
"raw_text": "0.7153",
"normalized_score": 0.7152682255845944,
"metric_key": "f1",
"source": "results/episode_task_suite/neural_mlp/misalignment_detection/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 13,
"task_id": "long_horizon_next_action",
"task_label": "Long-Horizon Next-Action Forecasting",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.07499999999999998,
"raw_text": "0.0750",
"normalized_score": 0.07499999999999998,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/long_horizon_next_action/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 13,
"task_id": "long_horizon_next_action",
"task_label": "Long-Horizon Next-Action Forecasting",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.06545454545454546,
"raw_text": "0.0655",
"normalized_score": 0.06545454545454546,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/long_horizon_next_action/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 14,
"task_id": "next_subtask_forecast",
"task_label": "Long-Horizon Next-Subtask Forecasting",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.04545454545454545,
"raw_text": "0.0455",
"normalized_score": 0.04545454545454545,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/next_subtask_forecast/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 14,
"task_id": "next_subtask_forecast",
"task_label": "Long-Horizon Next-Subtask Forecasting",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.050724637681159424,
"raw_text": "0.0507",
"normalized_score": 0.050724637681159424,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/next_subtask_forecast/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 15,
"task_id": "interaction_text_prediction",
"task_label": "Interaction Text Prediction",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.04444444444444444,
"raw_text": "0.0444",
"normalized_score": 0.04444444444444444,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/interaction_text_prediction/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 15,
"task_id": "interaction_text_prediction",
"task_label": "Interaction Text Prediction",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0380952380952381,
"raw_text": "0.0381",
"normalized_score": 0.0380952380952381,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/interaction_text_prediction/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 16,
"task_id": "action_object_relation",
"task_label": "Action-Object Relation Prediction",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/action_object_relation/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 16,
"task_id": "action_object_relation",
"task_label": "Action-Object Relation Prediction",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "macro_f1",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/action_object_relation/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 17,
"task_id": "object_set_forecast",
"task_label": "Future Object-Set Forecasting",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.16939890710382516,
"raw_text": "0.1694",
"normalized_score": 0.16939890710382516,
"metric_key": "micro_f1",
"source": "results/episode_task_suite/tier2_task_suite/object_set_forecast/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 17,
"task_id": "object_set_forecast",
"task_label": "Future Object-Set Forecasting",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.19718309859154928,
"raw_text": "0.1972",
"normalized_score": 0.19718309859154928,
"metric_key": "micro_f1",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/object_set_forecast/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 18,
"task_id": "imu_to_hand_pose",
"task_label": "IMU-to-Hand Pose Reconstruction",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.042049407958984375,
"raw_text": "0.0420",
"normalized_score": 1.0,
"metric_key": "mae",
"source": "results/episode_task_suite/tier2_task_suite/imu_to_hand_pose/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 18,
"task_id": "imu_to_hand_pose",
"task_label": "IMU-to-Hand Pose Reconstruction",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.042562149465084076,
"raw_text": "0.0426",
"normalized_score": 0.9879531106266066,
"metric_key": "mae",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/imu_to_hand_pose/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 19,
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.4943004846572876,
"raw_text": "0.4943",
"normalized_score": 0.4943004846572876,
"metric_key": "mrr",
"source": "results/episode_task_suite/tier2_task_suite/camera_view_sync_retrieval/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 19,
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.24086658656597137,
"raw_text": "0.2409",
"normalized_score": 0.24086658656597137,
"metric_key": "mrr",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/camera_view_sync_retrieval/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 20,
"task_id": "time_to_transition",
"task_label": "Time-to-Next-Transition Regression",
"series_id": "minimal",
"method": "Minimal",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 10.53735637664795,
"raw_text": "10.54",
"normalized_score": 1.0,
"metric_key": "mae",
"source": "results/episode_task_suite/tier2_task_suite/time_to_transition/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
},
{
"task_number": 20,
"task_id": "time_to_transition",
"task_label": "Time-to-Next-Transition Regression",
"series_id": "neural_mlp",
"method": "Neural MLP",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 10.55449390411377,
"raw_text": "10.55",
"normalized_score": 0.9983762814568361,
"metric_key": "mae",
"source": "results/episode_task_suite/tier2_task_suite/neural_mlp/time_to_transition/metrics.json",
"scope": "single_episode_public_sample",
"reason": null
}
]
}