| { |
| "title": "Single-Episode 20-Task Radar", |
| "status": "pass", |
| "generated_at_utc": "2026-06-22T13:13:58+00:00", |
| "description": "Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.", |
| "task_count": 20, |
| "method_count": 2, |
| "method_task_record_count": 40, |
| "scored_method_task_count": 40, |
| "normalization_policy": { |
| "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]", |
| "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task", |
| "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table", |
| "radar_visual_radius": "SVG radar panels use sqrt(normalized_score) for radius so polygon area remains closer to the score and low-valued but real differences stay visible; the JSON and matrix retain exact linear normalized_score values", |
| "result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used", |
| "foundation_model_overlay": "Qwen3-Omni and Cosmos3 are grouped in the foundation-model radar panel. All current public model rows have 20 scored task records, with source paths retained for every metric.", |
| "metadata_128_overlay": "128-episode aligned baselines are grouped in the metadata/text radar panel. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists.", |
| "raw_128_overlay": "128-episode raw-feature baselines are grouped in the raw-feature radar panel. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export." |
| }, |
| "chart_design": { |
| "mode": "grouped_small_multiples", |
| "method_count": 2, |
| "reason": "This split view has 2 methods and 40 method-task records; grouped radar panels keep related methods readable while retaining the unified source matrix.", |
| "groups": [ |
| { |
| "id": "single_episode", |
| "title": "Single-episode sample", |
| "series_ids": [ |
| "minimal", |
| "neural_mlp" |
| ] |
| } |
| ], |
| "visual_radius_transform": "sqrt(normalized_score)", |
| "exact_value_source": "docs/data/task_method_20_result_matrix.json" |
| }, |
| "source_unified_radar": "docs/data/unified_task_model_radar.json", |
| "source_result_matrix": "docs/data/task_method_20_result_matrix.json", |
| "series": [ |
| { |
| "id": "minimal", |
| "label": "Minimal", |
| "short_label": "Min", |
| "color": "#ccffa0", |
| "kind": "full_20_task_baseline", |
| "scope": "1 public sample episode", |
| "stroke_dasharray": null, |
| "method_detail": "Single-episode simple heads over the public sample split.", |
| "plotted_as": "grouped small-multiple radar panel with direct legend and coverage badges", |
| "result_record_count": 20, |
| "scored_task_count": 20, |
| "covered_task_count": 20, |
| "proxy_scored_task_count": 0, |
| "scoreless_task_count": 0, |
| "unsupported_task_count": 0, |
| "not_evaluated_task_count": 0, |
| "status_counts": { |
| "scored": 20 |
| }, |
| "coverage_fraction": 1.0, |
| "result_record_fraction": 1.0 |
| }, |
| { |
| "id": "neural_mlp", |
| "label": "Neural MLP", |
| "short_label": "NN", |
| "color": "#67e8d1", |
| "kind": "full_20_task_baseline", |
| "scope": "1 public sample episode", |
| "stroke_dasharray": null, |
| "method_detail": "Single-episode compact PyTorch MLP heads on the same 20 task contracts.", |
| "plotted_as": "grouped small-multiple radar panel with direct legend and coverage badges", |
| "result_record_count": 20, |
| "scored_task_count": 20, |
| "covered_task_count": 20, |
| "proxy_scored_task_count": 0, |
| "scoreless_task_count": 0, |
| "unsupported_task_count": 0, |
| "not_evaluated_task_count": 0, |
| "status_counts": { |
| "scored": 20 |
| }, |
| "coverage_fraction": 1.0, |
| "result_record_fraction": 1.0 |
| } |
| ], |
| "tasks": [ |
| { |
| "task_number": 1, |
| "task_id": "timeline_action", |
| "label": "Action Recognition", |
| "axis_label": "01 Action Recognition", |
| "short_label": "Action", |
| "provenance_source": "walkthrough_backed_task_contract", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.05, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/timeline_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.05, |
| "raw_text": "0.0500", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.014814814814814814, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/timeline_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.014814814814814814, |
| "raw_text": "0.0148", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 2, |
| "task_id": "timeline_subtask", |
| "label": "Procedure Step Recognition", |
| "axis_label": "02 Procedure Step Recognition", |
| "short_label": "Step", |
| "provenance_source": "walkthrough_backed_task_contract", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.05056355513846935, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/timeline_subtask/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.05056355513846935, |
| "raw_text": "0.0506", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.02810810810810811, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/timeline_subtask/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.02810810810810811, |
| "raw_text": "0.0281", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 3, |
| "task_id": "transition_detection", |
| "label": "Action Boundary Detection", |
| "axis_label": "03 Action Boundary Detection", |
| "short_label": "Boundary", |
| "provenance_source": "walkthrough_backed_task_contract", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.6118237590630229, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/transition_detection/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.6118237590630229, |
| "raw_text": "0.6118", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.5862068965517241, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/transition_detection/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.5862068965517241, |
| "raw_text": "0.5862", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 4, |
| "task_id": "next_action", |
| "label": "Next-Action Prediction", |
| "axis_label": "04 Next-Action Prediction", |
| "short_label": "Next act", |
| "provenance_source": "walkthrough_backed_task_contract", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.05925925925925927, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/next_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.05925925925925927, |
| "raw_text": "0.0593", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.04186046511627907, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/next_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.04186046511627907, |
| "raw_text": "0.0419", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 5, |
| "task_id": "hand_trajectory_forecast", |
| "label": "Hand Trajectory Forecasting", |
| "axis_label": "05 Hand Trajectory Forecasting", |
| "short_label": "Hand traj", |
| "provenance_source": "walkthrough_backed_task_contract", |
| "metric_key": "mpjpe", |
| "metric_name": "MPJPE", |
| "metric_direction": "lower", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.8646570444107056, |
| "metric_key": "mpjpe", |
| "source": "results/episode_task_suite/hand_trajectory_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.12473175026322614, |
| "raw_text": "0.8647", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.10785018652677536, |
| "metric_key": "mpjpe", |
| "source": "results/episode_task_suite/neural_mlp/hand_trajectory_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 1.0, |
| "raw_text": "0.1079", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 6, |
| "task_id": "contact_prediction", |
| "label": "Contact State Prediction", |
| "axis_label": "06 Contact State Prediction", |
| "short_label": "Contact", |
| "provenance_source": "walkthrough_backed_task_contract", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 1.0, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/contact_prediction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 1.0, |
| "raw_text": "1.000", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 1.0, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/contact_prediction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 1.0, |
| "raw_text": "1.000", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 7, |
| "task_id": "object_relevance", |
| "label": "Object Relevance Prediction", |
| "axis_label": "07 Object Relevance Prediction", |
| "short_label": "Objects", |
| "provenance_source": "walkthrough_backed_task_contract", |
| "metric_key": "micro_f1", |
| "metric_name": "micro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.18034382095361662, |
| "metric_key": "micro_f1", |
| "source": "results/episode_task_suite/object_relevance/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.18034382095361662, |
| "raw_text": "0.1803", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.1679279279279279, |
| "metric_key": "micro_f1", |
| "source": "results/episode_task_suite/neural_mlp/object_relevance/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.1679279279279279, |
| "raw_text": "0.1679", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 8, |
| "task_id": "caption_grounding", |
| "label": "Language Grounding", |
| "axis_label": "08 Language Grounding", |
| "short_label": "Language", |
| "provenance_source": "walkthrough_backed_task_contract", |
| "metric_key": "mrr", |
| "metric_name": "MRR", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.016023479050338015, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/caption_grounding/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.016023479050338015, |
| "raw_text": "0.0160", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.01684125567132316, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/neural_mlp/caption_grounding/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.01684125567132316, |
| "raw_text": "0.0168", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 9, |
| "task_id": "cross_modal_retrieval", |
| "label": "Cross-Modal Retrieval", |
| "axis_label": "09 Cross-Modal Retrieval", |
| "short_label": "X-modal", |
| "provenance_source": "walkthrough_backed_task_contract", |
| "metric_key": "mrr", |
| "metric_name": "MRR", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.26925966892956127, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/cross_modal_retrieval/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.26925966892956127, |
| "raw_text": "0.2693", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.1299971898648288, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/neural_mlp/cross_modal_retrieval/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.1299971898648288, |
| "raw_text": "0.1300", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 10, |
| "task_id": "modality_reconstruction", |
| "label": "Cross-Modal Reconstruction", |
| "axis_label": "10 Cross-Modal Reconstruction", |
| "short_label": "Recon", |
| "provenance_source": "walkthrough_backed_task_contract", |
| "metric_key": "r2", |
| "metric_name": "R2", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": -0.015271898913936655, |
| "metric_key": "r2", |
| "source": "results/episode_task_suite/modality_reconstruction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.0, |
| "raw_text": "-0.0153", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": -0.010171410134180991, |
| "metric_key": "r2", |
| "source": "results/episode_task_suite/neural_mlp/modality_reconstruction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.0, |
| "raw_text": "-0.0102", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 11, |
| "task_id": "temporal_order", |
| "label": "Temporal Order Verification", |
| "axis_label": "11 Temporal Order Verification", |
| "short_label": "Order", |
| "provenance_source": "walkthrough_backed_task_contract", |
| "metric_key": "f1", |
| "metric_name": "F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.5399515738498789, |
| "metric_key": "f1", |
| "source": "results/episode_task_suite/temporal_order/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.5399515738498789, |
| "raw_text": "0.5400", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.8520179372197308, |
| "metric_key": "f1", |
| "source": "results/episode_task_suite/neural_mlp/temporal_order/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.8520179372197308, |
| "raw_text": "0.8520", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 12, |
| "task_id": "misalignment_detection", |
| "label": "Multimodal Synchronization Detection", |
| "axis_label": "12 Multimodal Synchronization Detection", |
| "short_label": "Sync", |
| "provenance_source": "walkthrough_backed_task_contract", |
| "metric_key": "f1", |
| "metric_name": "F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.5051698670605613, |
| "metric_key": "f1", |
| "source": "results/episode_task_suite/misalignment_detection/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.5051698670605613, |
| "raw_text": "0.5052", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.7152682255845944, |
| "metric_key": "f1", |
| "source": "results/episode_task_suite/neural_mlp/misalignment_detection/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.7152682255845944, |
| "raw_text": "0.7153", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 13, |
| "task_id": "long_horizon_next_action", |
| "label": "Long-Horizon Next-Action Forecasting", |
| "axis_label": "13 Long-Horizon Next-Action Forecasting", |
| "short_label": "Long act", |
| "provenance_source": "historical_result_bundle", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.07499999999999998, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/long_horizon_next_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.07499999999999998, |
| "raw_text": "0.0750", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.06545454545454546, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/long_horizon_next_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.06545454545454546, |
| "raw_text": "0.0655", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 14, |
| "task_id": "next_subtask_forecast", |
| "label": "Long-Horizon Next-Subtask Forecasting", |
| "axis_label": "14 Long-Horizon Next-Subtask Forecasting", |
| "short_label": "Long step", |
| "provenance_source": "historical_result_bundle", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.04545454545454545, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/next_subtask_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.04545454545454545, |
| "raw_text": "0.0455", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.050724637681159424, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/next_subtask_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.050724637681159424, |
| "raw_text": "0.0507", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 15, |
| "task_id": "interaction_text_prediction", |
| "label": "Interaction Text Prediction", |
| "axis_label": "15 Interaction Text Prediction", |
| "short_label": "Interact txt", |
| "provenance_source": "historical_result_bundle", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": true, |
| "values": { |
| "minimal": { |
| "raw": 0.04444444444444444, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/interaction_text_prediction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.04444444444444444, |
| "raw_text": "0.0444", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.0380952380952381, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/interaction_text_prediction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.0380952380952381, |
| "raw_text": "0.0381", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 16, |
| "task_id": "action_object_relation", |
| "label": "Action-Object Relation Prediction", |
| "axis_label": "16 Action-Object Relation Prediction", |
| "short_label": "Act+obj", |
| "provenance_source": "historical_result_bundle", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/action_object_relation/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/action_object_relation/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 17, |
| "task_id": "object_set_forecast", |
| "label": "Future Object-Set Forecasting", |
| "axis_label": "17 Future Object-Set Forecasting", |
| "short_label": "Future obj", |
| "provenance_source": "historical_result_bundle", |
| "metric_key": "micro_f1", |
| "metric_name": "micro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.16939890710382516, |
| "metric_key": "micro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/object_set_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.16939890710382516, |
| "raw_text": "0.1694", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.19718309859154928, |
| "metric_key": "micro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/object_set_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.19718309859154928, |
| "raw_text": "0.1972", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 18, |
| "task_id": "imu_to_hand_pose", |
| "label": "IMU-to-Hand Pose Reconstruction", |
| "axis_label": "18 IMU-to-Hand Pose Reconstruction", |
| "short_label": "IMU->hand", |
| "provenance_source": "historical_result_bundle", |
| "metric_key": "mae", |
| "metric_name": "MAE", |
| "metric_direction": "lower", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 0.042049407958984375, |
| "metric_key": "mae", |
| "source": "results/episode_task_suite/tier2_task_suite/imu_to_hand_pose/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 1.0, |
| "raw_text": "0.0420", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.042562149465084076, |
| "metric_key": "mae", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/imu_to_hand_pose/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.9879531106266066, |
| "raw_text": "0.0426", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 19, |
| "task_id": "camera_view_sync_retrieval", |
| "label": "Camera-View Synchronization Retrieval", |
| "axis_label": "19 Camera-View Synchronization Retrieval", |
| "short_label": "Cam sync", |
| "provenance_source": "historical_result_bundle", |
| "metric_key": "mrr", |
| "metric_name": "MRR", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": true, |
| "values": { |
| "minimal": { |
| "raw": 0.4943004846572876, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/tier2_task_suite/camera_view_sync_retrieval/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.4943004846572876, |
| "raw_text": "0.4943", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 0.24086658656597137, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/camera_view_sync_retrieval/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.24086658656597137, |
| "raw_text": "0.2409", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 20, |
| "task_id": "time_to_transition", |
| "label": "Time-to-Next-Transition Regression", |
| "axis_label": "20 Time-to-Next-Transition Regression", |
| "short_label": "Time2bdry", |
| "provenance_source": "historical_result_bundle", |
| "metric_key": "mae", |
| "metric_name": "MAE frames", |
| "metric_direction": "lower", |
| "raw128_proxy_axis": false, |
| "values": { |
| "minimal": { |
| "raw": 10.53735637664795, |
| "metric_key": "mae", |
| "source": "results/episode_task_suite/tier2_task_suite/time_to_transition/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 1.0, |
| "raw_text": "10.54", |
| "status_label": "scored" |
| }, |
| "neural_mlp": { |
| "raw": 10.55449390411377, |
| "metric_key": "mae", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/time_to_transition/metrics.json", |
| "scope": "single_episode_public_sample", |
| "status": "scored", |
| "normalized_score": 0.9983762814568361, |
| "raw_text": "10.55", |
| "status_label": "scored" |
| } |
| } |
| } |
| ], |
| "task_method_result_matrix": [ |
| { |
| "task_number": 1, |
| "task_id": "timeline_action", |
| "task_label": "Action Recognition", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.05, |
| "raw_text": "0.0500", |
| "normalized_score": 0.05, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/timeline_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 1, |
| "task_id": "timeline_action", |
| "task_label": "Action Recognition", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.014814814814814814, |
| "raw_text": "0.0148", |
| "normalized_score": 0.014814814814814814, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/timeline_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 2, |
| "task_id": "timeline_subtask", |
| "task_label": "Procedure Step Recognition", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.05056355513846935, |
| "raw_text": "0.0506", |
| "normalized_score": 0.05056355513846935, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/timeline_subtask/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 2, |
| "task_id": "timeline_subtask", |
| "task_label": "Procedure Step Recognition", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.02810810810810811, |
| "raw_text": "0.0281", |
| "normalized_score": 0.02810810810810811, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/timeline_subtask/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 3, |
| "task_id": "transition_detection", |
| "task_label": "Action Boundary Detection", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.6118237590630229, |
| "raw_text": "0.6118", |
| "normalized_score": 0.6118237590630229, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/transition_detection/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 3, |
| "task_id": "transition_detection", |
| "task_label": "Action Boundary Detection", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.5862068965517241, |
| "raw_text": "0.5862", |
| "normalized_score": 0.5862068965517241, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/transition_detection/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 4, |
| "task_id": "next_action", |
| "task_label": "Next-Action Prediction", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.05925925925925927, |
| "raw_text": "0.0593", |
| "normalized_score": 0.05925925925925927, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/next_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 4, |
| "task_id": "next_action", |
| "task_label": "Next-Action Prediction", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.04186046511627907, |
| "raw_text": "0.0419", |
| "normalized_score": 0.04186046511627907, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/next_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 5, |
| "task_id": "hand_trajectory_forecast", |
| "task_label": "Hand Trajectory Forecasting", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.8646570444107056, |
| "raw_text": "0.8647", |
| "normalized_score": 0.12473175026322614, |
| "metric_key": "mpjpe", |
| "source": "results/episode_task_suite/hand_trajectory_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 5, |
| "task_id": "hand_trajectory_forecast", |
| "task_label": "Hand Trajectory Forecasting", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.10785018652677536, |
| "raw_text": "0.1079", |
| "normalized_score": 1.0, |
| "metric_key": "mpjpe", |
| "source": "results/episode_task_suite/neural_mlp/hand_trajectory_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 6, |
| "task_id": "contact_prediction", |
| "task_label": "Contact State Prediction", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 1.0, |
| "raw_text": "1.000", |
| "normalized_score": 1.0, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/contact_prediction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 6, |
| "task_id": "contact_prediction", |
| "task_label": "Contact State Prediction", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 1.0, |
| "raw_text": "1.000", |
| "normalized_score": 1.0, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/contact_prediction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 7, |
| "task_id": "object_relevance", |
| "task_label": "Object Relevance Prediction", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.18034382095361662, |
| "raw_text": "0.1803", |
| "normalized_score": 0.18034382095361662, |
| "metric_key": "micro_f1", |
| "source": "results/episode_task_suite/object_relevance/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 7, |
| "task_id": "object_relevance", |
| "task_label": "Object Relevance Prediction", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.1679279279279279, |
| "raw_text": "0.1679", |
| "normalized_score": 0.1679279279279279, |
| "metric_key": "micro_f1", |
| "source": "results/episode_task_suite/neural_mlp/object_relevance/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 8, |
| "task_id": "caption_grounding", |
| "task_label": "Language Grounding", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.016023479050338015, |
| "raw_text": "0.0160", |
| "normalized_score": 0.016023479050338015, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/caption_grounding/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 8, |
| "task_id": "caption_grounding", |
| "task_label": "Language Grounding", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.01684125567132316, |
| "raw_text": "0.0168", |
| "normalized_score": 0.01684125567132316, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/neural_mlp/caption_grounding/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 9, |
| "task_id": "cross_modal_retrieval", |
| "task_label": "Cross-Modal Retrieval", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.26925966892956127, |
| "raw_text": "0.2693", |
| "normalized_score": 0.26925966892956127, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/cross_modal_retrieval/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 9, |
| "task_id": "cross_modal_retrieval", |
| "task_label": "Cross-Modal Retrieval", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.1299971898648288, |
| "raw_text": "0.1300", |
| "normalized_score": 0.1299971898648288, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/neural_mlp/cross_modal_retrieval/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 10, |
| "task_id": "modality_reconstruction", |
| "task_label": "Cross-Modal Reconstruction", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": -0.015271898913936655, |
| "raw_text": "-0.0153", |
| "normalized_score": 0.0, |
| "metric_key": "r2", |
| "source": "results/episode_task_suite/modality_reconstruction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 10, |
| "task_id": "modality_reconstruction", |
| "task_label": "Cross-Modal Reconstruction", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": -0.010171410134180991, |
| "raw_text": "-0.0102", |
| "normalized_score": 0.0, |
| "metric_key": "r2", |
| "source": "results/episode_task_suite/neural_mlp/modality_reconstruction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 11, |
| "task_id": "temporal_order", |
| "task_label": "Temporal Order Verification", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.5399515738498789, |
| "raw_text": "0.5400", |
| "normalized_score": 0.5399515738498789, |
| "metric_key": "f1", |
| "source": "results/episode_task_suite/temporal_order/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 11, |
| "task_id": "temporal_order", |
| "task_label": "Temporal Order Verification", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.8520179372197308, |
| "raw_text": "0.8520", |
| "normalized_score": 0.8520179372197308, |
| "metric_key": "f1", |
| "source": "results/episode_task_suite/neural_mlp/temporal_order/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 12, |
| "task_id": "misalignment_detection", |
| "task_label": "Multimodal Synchronization Detection", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.5051698670605613, |
| "raw_text": "0.5052", |
| "normalized_score": 0.5051698670605613, |
| "metric_key": "f1", |
| "source": "results/episode_task_suite/misalignment_detection/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 12, |
| "task_id": "misalignment_detection", |
| "task_label": "Multimodal Synchronization Detection", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.7152682255845944, |
| "raw_text": "0.7153", |
| "normalized_score": 0.7152682255845944, |
| "metric_key": "f1", |
| "source": "results/episode_task_suite/neural_mlp/misalignment_detection/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 13, |
| "task_id": "long_horizon_next_action", |
| "task_label": "Long-Horizon Next-Action Forecasting", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.07499999999999998, |
| "raw_text": "0.0750", |
| "normalized_score": 0.07499999999999998, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/long_horizon_next_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 13, |
| "task_id": "long_horizon_next_action", |
| "task_label": "Long-Horizon Next-Action Forecasting", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.06545454545454546, |
| "raw_text": "0.0655", |
| "normalized_score": 0.06545454545454546, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/long_horizon_next_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 14, |
| "task_id": "next_subtask_forecast", |
| "task_label": "Long-Horizon Next-Subtask Forecasting", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.04545454545454545, |
| "raw_text": "0.0455", |
| "normalized_score": 0.04545454545454545, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/next_subtask_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 14, |
| "task_id": "next_subtask_forecast", |
| "task_label": "Long-Horizon Next-Subtask Forecasting", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.050724637681159424, |
| "raw_text": "0.0507", |
| "normalized_score": 0.050724637681159424, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/next_subtask_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 15, |
| "task_id": "interaction_text_prediction", |
| "task_label": "Interaction Text Prediction", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.04444444444444444, |
| "raw_text": "0.0444", |
| "normalized_score": 0.04444444444444444, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/interaction_text_prediction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 15, |
| "task_id": "interaction_text_prediction", |
| "task_label": "Interaction Text Prediction", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0380952380952381, |
| "raw_text": "0.0381", |
| "normalized_score": 0.0380952380952381, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/interaction_text_prediction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 16, |
| "task_id": "action_object_relation", |
| "task_label": "Action-Object Relation Prediction", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/action_object_relation/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 16, |
| "task_id": "action_object_relation", |
| "task_label": "Action-Object Relation Prediction", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/action_object_relation/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 17, |
| "task_id": "object_set_forecast", |
| "task_label": "Future Object-Set Forecasting", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.16939890710382516, |
| "raw_text": "0.1694", |
| "normalized_score": 0.16939890710382516, |
| "metric_key": "micro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/object_set_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 17, |
| "task_id": "object_set_forecast", |
| "task_label": "Future Object-Set Forecasting", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.19718309859154928, |
| "raw_text": "0.1972", |
| "normalized_score": 0.19718309859154928, |
| "metric_key": "micro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/object_set_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 18, |
| "task_id": "imu_to_hand_pose", |
| "task_label": "IMU-to-Hand Pose Reconstruction", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.042049407958984375, |
| "raw_text": "0.0420", |
| "normalized_score": 1.0, |
| "metric_key": "mae", |
| "source": "results/episode_task_suite/tier2_task_suite/imu_to_hand_pose/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 18, |
| "task_id": "imu_to_hand_pose", |
| "task_label": "IMU-to-Hand Pose Reconstruction", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.042562149465084076, |
| "raw_text": "0.0426", |
| "normalized_score": 0.9879531106266066, |
| "metric_key": "mae", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/imu_to_hand_pose/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 19, |
| "task_id": "camera_view_sync_retrieval", |
| "task_label": "Camera-View Synchronization Retrieval", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.4943004846572876, |
| "raw_text": "0.4943", |
| "normalized_score": 0.4943004846572876, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/tier2_task_suite/camera_view_sync_retrieval/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 19, |
| "task_id": "camera_view_sync_retrieval", |
| "task_label": "Camera-View Synchronization Retrieval", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.24086658656597137, |
| "raw_text": "0.2409", |
| "normalized_score": 0.24086658656597137, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/camera_view_sync_retrieval/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 20, |
| "task_id": "time_to_transition", |
| "task_label": "Time-to-Next-Transition Regression", |
| "series_id": "minimal", |
| "method": "Minimal", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 10.53735637664795, |
| "raw_text": "10.54", |
| "normalized_score": 1.0, |
| "metric_key": "mae", |
| "source": "results/episode_task_suite/tier2_task_suite/time_to_transition/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| }, |
| { |
| "task_number": 20, |
| "task_id": "time_to_transition", |
| "task_label": "Time-to-Next-Transition Regression", |
| "series_id": "neural_mlp", |
| "method": "Neural MLP", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 10.55449390411377, |
| "raw_text": "10.55", |
| "normalized_score": 0.9983762814568361, |
| "metric_key": "mae", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/time_to_transition/metrics.json", |
| "scope": "single_episode_public_sample", |
| "reason": null |
| } |
| ] |
| } |
|
|