ropedia-xperience-10m-task-baselines / data /episode128_task_model_radar.json
cy0307's picture
Add files using upload-large-folder tool
d272538 verified
Raw
History Blame
185 kB
{
"title": "128-Episode 20-Task Radar",
"status": "pass",
"generated_at_utc": "2026-06-20T20:38:21+00:00",
"description": "Selected 128-episode metadata/raw baselines plus verified Qwen3/Cosmos branches. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
"task_count": 20,
"method_count": 7,
"method_task_record_count": 140,
"scored_method_task_count": 140,
"normalization_policy": {
"higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
"raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
"result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
"foundation_model_overlay": "Qwen3/Cosmos points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
"metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
"raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
},
"source_unified_radar": "docs/data/unified_task_model_radar.json",
"source_result_matrix": "docs/data/task_method_20_result_matrix.json",
"series": [
{
"id": "metadata128_simple",
"label": "128ep Aligned Simple",
"short_label": "128-S",
"color": "#ffd166",
"kind": "partial_128_episode_aligned_baseline",
"scope": "128 selected episodes, JSONL metadata/text plus staged sensor-block targets where available",
"stroke_dasharray": "9 6",
"method_detail": "128-episode aligned simple baselines: JSONL metadata/text tasks plus staged sensor-block tasks where the processed target exists.",
"plotted_as": "colored point overlay",
"result_record_count": 20,
"scored_task_count": 20,
"covered_task_count": 20,
"proxy_scored_task_count": 1,
"scoreless_task_count": 0,
"unsupported_task_count": 0,
"not_evaluated_task_count": 0,
"status_counts": {
"proxy_scored": 1,
"scored": 19
},
"coverage_fraction": 1.0,
"result_record_fraction": 1.0
},
{
"id": "metadata128_neural_mlp",
"label": "128ep Aligned NN",
"short_label": "128-NN",
"color": "#f472b6",
"kind": "partial_128_episode_aligned_baseline",
"scope": "128 selected episodes, JSONL metadata/text plus staged sensor-block targets where available",
"stroke_dasharray": "3 6",
"method_detail": "128-episode aligned MLP baselines: JSONL metadata/text tasks plus staged sensor-block tasks where the processed target exists.",
"plotted_as": "colored point overlay",
"result_record_count": 20,
"scored_task_count": 20,
"covered_task_count": 20,
"proxy_scored_task_count": 1,
"scoreless_task_count": 0,
"unsupported_task_count": 0,
"not_evaluated_task_count": 0,
"status_counts": {
"proxy_scored": 1,
"scored": 19
},
"coverage_fraction": 1.0,
"result_record_fraction": 1.0
},
{
"id": "raw128_simple",
"label": "128ep Raw Simple",
"short_label": "128-RS",
"color": "#f59e0b",
"kind": "complete_128_episode_raw_feature_baseline",
"scope": "128 selected episodes, staged 4430-dim sensor NPZ features; 2 compact proxy axes",
"stroke_dasharray": "8 4",
"method_detail": "128-episode 4430-dim sensor NPZ simple heads; tasks 15/19 use compact proxies.",
"plotted_as": "colored point overlay",
"result_record_count": 20,
"scored_task_count": 20,
"covered_task_count": 20,
"proxy_scored_task_count": 2,
"scoreless_task_count": 0,
"unsupported_task_count": 0,
"not_evaluated_task_count": 0,
"status_counts": {
"proxy_scored": 2,
"scored": 18
},
"coverage_fraction": 1.0,
"result_record_fraction": 1.0
},
{
"id": "raw128_neural_mlp",
"label": "128ep Raw NN",
"short_label": "128-RN",
"color": "#22d3ee",
"kind": "complete_128_episode_raw_feature_baseline",
"scope": "128 selected episodes, staged 4430-dim sensor NPZ features; 2 compact proxy axes",
"stroke_dasharray": "2 5",
"method_detail": "128-episode 4430-dim sensor NPZ MLP heads; tasks 15/19 use compact proxies.",
"plotted_as": "colored point overlay",
"result_record_count": 20,
"scored_task_count": 20,
"covered_task_count": 20,
"proxy_scored_task_count": 2,
"scoreless_task_count": 0,
"unsupported_task_count": 0,
"not_evaluated_task_count": 0,
"status_counts": {
"proxy_scored": 2,
"scored": 18
},
"coverage_fraction": 1.0,
"result_record_fraction": 1.0
},
{
"id": "qwen3_omni_v6_lora",
"label": "Qwen3-Omni v6 LoRA",
"short_label": "Qwen3",
"color": "#9bb8ff",
"kind": "partial_128_episode_foundation_model_overlay",
"scope": "128 selected episodes, held-out test",
"stroke_dasharray": "7 7",
"method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics, plus task 16 and any completed private-GPU future/retrieval/sensor-target probes scored from task-specific JSON.",
"plotted_as": "colored point overlay",
"result_record_count": 20,
"scored_task_count": 20,
"covered_task_count": 20,
"proxy_scored_task_count": 0,
"scoreless_task_count": 0,
"unsupported_task_count": 0,
"not_evaluated_task_count": 0,
"status_counts": {
"scored": 20
},
"coverage_fraction": 1.0,
"result_record_fraction": 1.0
},
{
"id": "cosmos3_super_reasoner",
"label": "Cosmos3-Super Reasoner",
"short_label": "C3-S",
"color": "#ff9c7a",
"kind": "partial_128_episode_foundation_model_overlay",
"scope": "128 selected episodes, held-out test",
"stroke_dasharray": "4 7",
"method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 5/8/9/10/11/12/13/14/16/17/18/19/20 probes where public metrics exist.",
"plotted_as": "colored point overlay",
"result_record_count": 20,
"scored_task_count": 20,
"covered_task_count": 20,
"proxy_scored_task_count": 0,
"scoreless_task_count": 0,
"unsupported_task_count": 0,
"not_evaluated_task_count": 0,
"status_counts": {
"scored": 20
},
"coverage_fraction": 1.0,
"result_record_fraction": 1.0
},
{
"id": "cosmos3_nano_future_window",
"label": "Cosmos3-Nano Future Window",
"short_label": "C3-N",
"color": "#d9c7ff",
"kind": "partial_128_episode_world_model_overlay",
"scope": "128 selected episodes, held-out test",
"stroke_dasharray": "2 7",
"method_detail": "Verified Cosmos3-Nano future-window compatibility metrics, plus model-output probes for tasks 2/5/7/8/10/11/12/13/14/15/16/17/18/19 and a derived task-20 boundary timing probe scored from held-out future-window artifacts.",
"plotted_as": "colored point overlay",
"result_record_count": 20,
"scored_task_count": 20,
"covered_task_count": 20,
"proxy_scored_task_count": 0,
"scoreless_task_count": 0,
"unsupported_task_count": 0,
"not_evaluated_task_count": 0,
"status_counts": {
"scored": 20
},
"coverage_fraction": 1.0,
"result_record_fraction": 1.0
}
],
"tasks": [
{
"task_number": 1,
"task_id": "timeline_action",
"label": "Action Recognition",
"axis_label": "01 Action Recognition",
"short_label": "Action",
"origin": "original_public_sample_tasks",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.008252821966746326,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_action/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.008252821966746326,
"raw_text": "0.0083",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.004175793689174209,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_action/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.004175793689174209,
"raw_text": "0.0042",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.002915061325704321,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/timeline_action/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.002915061325704321,
"raw_text": "0.0029",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.0014955083181204041,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/timeline_action/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0014955083181204041,
"raw_text": "0.0015",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.0028830723979596335,
"metric_key": "action_macro_f1",
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.0028830723979596335,
"raw_text": "0.0029",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.0008284021201089245,
"metric_key": "action_macro_f1",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.0008284021201089245,
"raw_text": "0.0008",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.007936507936507936,
"metric_key": "action_accuracy_from_retrieved_future",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.007936507936507936,
"raw_text": "0.0079",
"status_label": "scored"
}
}
},
{
"task_number": 2,
"task_id": "timeline_subtask",
"label": "Procedure Step Recognition",
"axis_label": "02 Procedure Step Recognition",
"short_label": "Step",
"origin": "original_public_sample_tasks",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.00019512195121951218,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_subtask/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.00019512195121951218,
"raw_text": "0.0002",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 7.207207207207208e-05,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_subtask/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 7.207207207207208e-05,
"raw_text": "0.0001",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/timeline_subtask/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 7.35632183908046e-05,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/timeline_subtask/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 7.35632183908046e-05,
"raw_text": "0.0001",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.0037313432835820895,
"metric_key": "subtask_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.0037313432835820895,
"raw_text": "0.0037",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.0,
"metric_key": "subtask_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.0,
"metric_key": "timeline_subtask_macro_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_current_subtask_object_relevance_patched_textonly_20260621/timeline_subtask/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
}
}
},
{
"task_number": 3,
"task_id": "transition_detection",
"label": "Action Boundary Detection",
"axis_label": "03 Action Boundary Detection",
"short_label": "Boundary",
"origin": "original_public_sample_tasks",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.29652162550029315,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/transition_detection/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.29652162550029315,
"raw_text": "0.2965",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.4841733292368365,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/transition_detection/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.4841733292368365,
"raw_text": "0.4842",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.4203613574238283,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/transition_detection/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.4203613574238283,
"raw_text": "0.4204",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.4902206914147213,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/transition_detection/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.4902206914147213,
"raw_text": "0.4902",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.9898313492063492,
"metric_key": "transition_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.9898313492063492,
"raw_text": "0.9898",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.36830357142857145,
"metric_key": "transition_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.36830357142857145,
"raw_text": "0.3683",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.9682539682539683,
"metric_key": "transition_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.9682539682539683,
"raw_text": "0.9683",
"status_label": "scored"
}
}
},
{
"task_number": 4,
"task_id": "next_action",
"label": "Next-Action Prediction",
"axis_label": "04 Next-Action Prediction",
"short_label": "Next act",
"origin": "original_public_sample_tasks",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.006514774539765508,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/next_action/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.006514774539765508,
"raw_text": "0.0065",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.004910507980164745,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/next_action/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.004910507980164745,
"raw_text": "0.0049",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.003285273363482094,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/next_action/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.003285273363482094,
"raw_text": "0.0033",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.0018477984371755407,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/next_action/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0018477984371755407,
"raw_text": "0.0018",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.04305335446381405,
"metric_key": "next_action_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.04305335446381405,
"raw_text": "0.0431",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.013392857142857142,
"metric_key": "next_action_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.013392857142857142,
"raw_text": "0.0134",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.007936507936507936,
"metric_key": "action_accuracy_from_retrieved_future",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.007936507936507936,
"raw_text": "0.0079",
"status_label": "scored"
}
}
},
{
"task_number": 5,
"task_id": "hand_trajectory_forecast",
"label": "Hand Trajectory Forecasting",
"axis_label": "05 Hand Trajectory Forecasting",
"short_label": "Hand traj",
"origin": "original_public_sample_tasks",
"metric_key": "mpjpe",
"metric_name": "MPJPE",
"metric_direction": "lower",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 8.817333221435547,
"metric_key": "mpjpe",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.012231610603598841,
"raw_text": "8.817",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.429434210062027,
"metric_key": "mpjpe",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.25114484128127007,
"raw_text": "0.4294",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.2729249894618988,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.39516420515180267,
"raw_text": "0.2729",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.18475216627120972,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.5837560051580399,
"raw_text": "0.1848",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.7216105627267382,
"metric_key": "hand_trajectory_forecast_mrr",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.149457605109387,
"raw_text": "0.7216",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.8915253522315043,
"metric_key": "hand_trajectory_forecast_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.12097265238372007,
"raw_text": "0.8915",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.6912806884333101,
"metric_key": "hand_trajectory_forecast_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.15601504328321764,
"raw_text": "0.6913",
"status_label": "scored"
}
}
},
{
"task_number": 6,
"task_id": "contact_prediction",
"label": "Contact State Prediction",
"axis_label": "06 Contact State Prediction",
"short_label": "Contact",
"origin": "original_public_sample_tasks",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.4381481308057444,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/contact_prediction/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.4381481308057444,
"raw_text": "0.4381",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.5682695682695682,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/contact_prediction/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.5682695682695682,
"raw_text": "0.5683",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.886990707397193,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/contact_prediction/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.886990707397193,
"raw_text": "0.8870",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 1.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/contact_prediction/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 1.0,
"raw_text": "1.000",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.8177083333333334,
"metric_key": "contact_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.8177083333333334,
"raw_text": "0.8177",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.32142857142857145,
"metric_key": "contact_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.32142857142857145,
"raw_text": "0.3214",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.7433862433862434,
"metric_key": "contact_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.7433862433862434,
"raw_text": "0.7434",
"status_label": "scored"
}
}
},
{
"task_number": 7,
"task_id": "object_relevance",
"label": "Object Relevance Prediction",
"axis_label": "07 Object Relevance Prediction",
"short_label": "Objects",
"origin": "original_public_sample_tasks",
"metric_key": "micro_f1",
"metric_name": "micro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.17764578833693304,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/object_relevance/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.17764578833693304,
"raw_text": "0.1776",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.18662723837686876,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/object_relevance/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.18662723837686876,
"raw_text": "0.1866",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.0655376369662084,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/object_relevance/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0655376369662084,
"raw_text": "0.0655",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.1765890386972509,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/object_relevance/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.1765890386972509,
"raw_text": "0.1766",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.3064982378331287,
"metric_key": "object_micro_f1",
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.3064982378331287,
"raw_text": "0.3065",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.13704276146316333,
"metric_key": "object_micro_f1",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.13704276146316333,
"raw_text": "0.1370",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.00047209895194032665,
"metric_key": "object_relevance_micro_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_current_subtask_object_relevance_patched_textonly_20260621/object_relevance/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.00047209895194032665,
"raw_text": "0.0005",
"status_label": "scored"
}
}
},
{
"task_number": 8,
"task_id": "caption_grounding",
"label": "Language Grounding",
"axis_label": "08 Language Grounding",
"short_label": "Language",
"origin": "original_public_sample_tasks",
"metric_key": "mrr",
"metric_name": "MRR",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.002332374220713973,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/caption_grounding/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.002332374220713973,
"raw_text": "0.0023",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.008236799389123917,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/caption_grounding/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.008236799389123917,
"raw_text": "0.0082",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.011138836853206158,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/caption_grounding/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.011138836853206158,
"raw_text": "0.0111",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.0063402121886610985,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/caption_grounding/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0063402121886610985,
"raw_text": "0.0063",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.8764467592592605,
"metric_key": "caption_grounding_mrr",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_retrieval_task_probes_a100_20260617T175919Z/caption_grounding/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.8764467592592605,
"raw_text": "0.8764",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.30639899644580487,
"metric_key": "caption_grounding_iou",
"source": "results/omni_finetune/model_output_task_probes_20260616/caption_grounding/cosmos3_super_reasoner/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.30639899644580487,
"raw_text": "0.3064",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.5221041086644663,
"metric_key": "caption_grounding_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/caption_grounding/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.5221041086644663,
"raw_text": "0.5221",
"status_label": "scored"
}
}
},
{
"task_number": 9,
"task_id": "cross_modal_retrieval",
"label": "Cross-Modal Retrieval",
"axis_label": "09 Cross-Modal Retrieval",
"short_label": "X-modal",
"origin": "original_public_sample_tasks",
"metric_key": "mrr",
"metric_name": "MRR",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.002587692579254508,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/cross_modal_retrieval/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.002587692579254508,
"raw_text": "0.0026",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.0026067993603646755,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/cross_modal_retrieval/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0026067993603646755,
"raw_text": "0.0026",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.003459817497059703,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/cross_modal_retrieval/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.003459817497059703,
"raw_text": "0.0035",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.002535284962505102,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/cross_modal_retrieval/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.002535284962505102,
"raw_text": "0.0025",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.5080191798941732,
"metric_key": "cross_modal_retrieval_mrr",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_cross_modal_retrieval_probe_a100_20260618T000000Z/cross_modal_retrieval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.5080191798941732,
"raw_text": "0.5080",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.6628490677465636,
"metric_key": "cross_modal_retrieval_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/cross_modal_retrieval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.6628490677465636,
"raw_text": "0.6628",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.022138720585222767,
"metric_key": "future_retrieval_mrr",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.022138720585222767,
"raw_text": "0.0221",
"status_label": "scored"
}
}
},
{
"task_number": 10,
"task_id": "modality_reconstruction",
"label": "Cross-Modal Reconstruction",
"axis_label": "10 Cross-Modal Reconstruction",
"short_label": "Recon",
"origin": "original_public_sample_tasks",
"metric_key": "r2",
"metric_name": "R2",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": -190.66106203944798,
"metric_key": "r2",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/modality_reconstruction/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "-190.66",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": -0.43481132003942147,
"metric_key": "r2",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/modality_reconstruction/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "-0.4348",
"status_label": "scored"
},
"raw128_simple": {
"raw": -1.3450960391924882,
"metric_key": "r2",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/modality_reconstruction/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "-1.345",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": -1.3974418160502369,
"metric_key": "r2",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/modality_reconstruction/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "-1.397",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.9670547540707002,
"metric_key": "modality_reconstruction_mrr",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.9670547540707002,
"raw_text": "0.9671",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.9939466801653591,
"metric_key": "modality_reconstruction_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/modality_reconstruction/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.9939466801653591,
"raw_text": "0.9939",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.0002873382957286892,
"metric_key": "feature_reconstruction_quality",
"source": "results/omni_finetune/model_output_task_probes_20260616/modality_reconstruction/cosmos3_nano_future_window/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.0002873382957286892,
"raw_text": "0.0003",
"status_label": "scored"
}
}
},
{
"task_number": 11,
"task_id": "temporal_order",
"label": "Temporal Order Verification",
"axis_label": "11 Temporal Order Verification",
"short_label": "Order",
"origin": "original_public_sample_tasks",
"metric_key": "f1",
"metric_name": "F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.4198864140782312,
"metric_key": "f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/temporal_order/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.4198864140782312,
"raw_text": "0.4199",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.8252408266656923,
"metric_key": "f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/temporal_order/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.8252408266656923,
"raw_text": "0.8252",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.49824413370686593,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/temporal_order/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.49824413370686593,
"raw_text": "0.4982",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.8030047098504103,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/temporal_order/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.8030047098504103,
"raw_text": "0.8030",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.40984631701404173,
"metric_key": "temporal_order_f1",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/temporal_order/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.40984631701404173,
"raw_text": "0.4098",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.6286317274823326,
"metric_key": "temporal_order_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.6286317274823326,
"raw_text": "0.6286",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.5954109425716205,
"metric_key": "temporal_order_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_future_order_misalignment_patched_textonly_20260621/temporal_order/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.5954109425716205,
"raw_text": "0.5954",
"status_label": "scored"
}
}
},
{
"task_number": 12,
"task_id": "misalignment_detection",
"label": "Multimodal Synchronization Detection",
"axis_label": "12 Multimodal Synchronization Detection",
"short_label": "Sync",
"origin": "original_public_sample_tasks",
"metric_key": "f1",
"metric_name": "F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.49980060227663614,
"metric_key": "f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/misalignment_detection/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.49980060227663614,
"raw_text": "0.4998",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.7773773780941162,
"metric_key": "f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/misalignment_detection/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.7773773780941162,
"raw_text": "0.7774",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.4958867673901769,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/misalignment_detection/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.4958867673901769,
"raw_text": "0.4959",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.8272709077974252,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/misalignment_detection/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.8272709077974252,
"raw_text": "0.8273",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.3344936184319576,
"metric_key": "misalignment_detection_f1",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/misalignment_detection/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.3344936184319576,
"raw_text": "0.3345",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.37271645981034185,
"metric_key": "misalignment_detection_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.37271645981034185,
"raw_text": "0.3727",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.477201691802725,
"metric_key": "misalignment_detection_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_future_order_misalignment_patched_textonly_20260621/misalignment_detection/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.477201691802725,
"raw_text": "0.4772",
"status_label": "scored"
}
}
},
{
"task_number": 13,
"task_id": "long_horizon_next_action",
"label": "Long-Horizon Next-Action Forecasting",
"axis_label": "13 Long-Horizon Next-Action Forecasting",
"short_label": "Long act",
"origin": "additional_public_sample_tasks",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.004579592783699693,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/long_horizon_next_action/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.004579592783699693,
"raw_text": "0.0046",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.0029821307969142615,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/long_horizon_next_action/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0029821307969142615,
"raw_text": "0.0030",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.0024280172369056294,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/long_horizon_next_action/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0024280172369056294,
"raw_text": "0.0024",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.001063859887389299,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/long_horizon_next_action/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.001063859887389299,
"raw_text": "0.0011",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.0023356666867101906,
"metric_key": "long_horizon_next_action_macro_f1",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/long_horizon_next_action/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.0023356666867101906,
"raw_text": "0.0023",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.008807588075880758,
"metric_key": "long_horizon_next_action_macro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/long_horizon_next_action/cosmos3_super_reasoner/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.008807588075880758,
"raw_text": "0.0088",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.0024906600249066007,
"metric_key": "long_horizon_next_action_macro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/long_horizon_next_action/cosmos3_nano_future_window/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.0024906600249066007,
"raw_text": "0.0025",
"status_label": "scored"
}
}
},
{
"task_number": 14,
"task_id": "next_subtask_forecast",
"label": "Long-Horizon Next-Subtask Forecasting",
"axis_label": "14 Long-Horizon Next-Subtask Forecasting",
"short_label": "Long step",
"origin": "additional_public_sample_tasks",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.0001206030150753769,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/next_subtask_forecast/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0001206030150753769,
"raw_text": "0.0001",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 2.086049543676662e-05,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/next_subtask_forecast/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 2.086049543676662e-05,
"raw_text": "0.0000",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/next_subtask_forecast/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/next_subtask_forecast/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.004206715978529301,
"metric_key": "next_subtask_forecast_macro_f1",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/next_subtask_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.004206715978529301,
"raw_text": "0.0042",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.0,
"metric_key": "next_subtask_forecast_macro_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.006614876224708678,
"metric_key": "next_subtask_forecast_macro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/next_subtask_forecast/cosmos3_nano_future_window/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.006614876224708678,
"raw_text": "0.0066",
"status_label": "scored"
}
}
},
{
"task_number": 15,
"task_id": "interaction_text_prediction",
"label": "Interaction Text Prediction",
"axis_label": "15 Interaction Text Prediction",
"short_label": "Interact txt",
"origin": "additional_public_sample_tasks",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": true,
"values": {
"metadata128_simple": {
"raw": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.012611998261547169,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "proxy_scored",
"reason": "documented compact proxy completion for this raw128 task axis",
"normalized_score": 0.012611998261547169,
"raw_text": "0.0126",
"status_label": "proxy scored"
},
"raw128_neural_mlp": {
"raw": 0.009791421280985521,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "proxy_scored",
"reason": "documented compact proxy completion for this raw128 task axis",
"normalized_score": 0.009791421280985521,
"raw_text": "0.0098",
"status_label": "proxy scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.4318674027510605,
"metric_key": "macro_f1",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_interaction_text_task15_a100_20260620T010305Z/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.4318674027510605,
"raw_text": "0.4319",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.17949512355264183,
"metric_key": "macro_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_super_interaction_text_task15_textonly_v1_20260620T1558Z/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.17949512355264183,
"raw_text": "0.1795",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.1788367958939587,
"metric_key": "macro_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_interaction_text_task15_patched_textonly_20260621/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.1788367958939587,
"raw_text": "0.1788",
"status_label": "scored"
}
}
},
{
"task_number": 16,
"task_id": "action_object_relation",
"label": "Action-Object Relation Prediction",
"axis_label": "16 Action-Object Relation Prediction",
"short_label": "Act+obj",
"origin": "additional_public_sample_tasks",
"metric_key": "macro_f1",
"metric_name": "macro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/action_object_relation/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/action_object_relation/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/action_object_relation/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/action_object_relation/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.0002220083079671497,
"metric_key": "action_object_relation_macro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.0002220083079671497,
"raw_text": "0.0002",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.0,
"metric_key": "action_object_relation_macro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.0,
"raw_text": "0.0000",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.002794157670325683,
"metric_key": "action_object_relation_macro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_nano_future_window/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.002794157670325683,
"raw_text": "0.0028",
"status_label": "scored"
}
}
},
{
"task_number": 17,
"task_id": "object_set_forecast",
"label": "Future Object-Set Forecasting",
"axis_label": "17 Future Object-Set Forecasting",
"short_label": "Future obj",
"origin": "additional_public_sample_tasks",
"metric_key": "micro_f1",
"metric_name": "micro-F1",
"metric_direction": "higher",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.17656983343047333,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/object_set_forecast/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.17656983343047333,
"raw_text": "0.1766",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.17418550827844048,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/object_set_forecast/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.17418550827844048,
"raw_text": "0.1742",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.06469493412657774,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/object_set_forecast/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.06469493412657774,
"raw_text": "0.0647",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.17523098630012288,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/object_set_forecast/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.17523098630012288,
"raw_text": "0.1752",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.1659483964851402,
"metric_key": "object_set_forecast_micro_f1",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/object_set_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.1659483964851402,
"raw_text": "0.1659",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.0009279881217520415,
"metric_key": "object_set_forecast_micro_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.0009279881217520415,
"raw_text": "0.0009",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.01781970649895178,
"metric_key": "object_set_forecast_micro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/object_set_forecast/cosmos3_nano_future_window/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.01781970649895178,
"raw_text": "0.0178",
"status_label": "scored"
}
}
},
{
"task_number": 18,
"task_id": "imu_to_hand_pose",
"label": "IMU-to-Hand Pose Reconstruction",
"axis_label": "18 IMU-to-Hand Pose Reconstruction",
"short_label": "IMU->hand",
"origin": "additional_public_sample_tasks",
"metric_key": "mae",
"metric_name": "MAE",
"metric_direction": "lower",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 0.2294670194387436,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.18324815505876868,
"raw_text": "0.2295",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 0.2555866539478302,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.16452114110609004,
"raw_text": "0.2556",
"status_label": "scored"
},
"raw128_simple": {
"raw": 0.22941437363624573,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.1832902066792771,
"raw_text": "0.2294",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 0.252998411655426,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.1662042369509182,
"raw_text": "0.2530",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.9641651902471952,
"metric_key": "imu_to_hand_pose_mrr",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.043612244441436056,
"raw_text": "0.9642",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.9896650636969544,
"metric_key": "imu_to_hand_pose_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.04248852414968175,
"raw_text": "0.9897",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.9920062431451954,
"metric_key": "imu_to_hand_pose_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.04238824931752955,
"raw_text": "0.9920",
"status_label": "scored"
}
}
},
{
"task_number": 19,
"task_id": "camera_view_sync_retrieval",
"label": "Camera-View Synchronization Retrieval",
"axis_label": "19 Camera-View Synchronization Retrieval",
"short_label": "Cam sync",
"origin": "additional_public_sample_tasks",
"metric_key": "mrr",
"metric_name": "MRR",
"metric_direction": "higher",
"raw128_proxy_axis": true,
"values": {
"metadata128_simple": {
"raw": 0.0021294241305440664,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_aligned_metadata_baseline",
"status": "proxy_scored",
"reason": "paired camera-view embeddings are absent from the 128 JSONL/feature export; metadata features retrieve the synchronized same-window depth/audio block as a documented compact synchronization proxy",
"normalized_score": 0.0021294241305440664,
"raw_text": "0.0021",
"status_label": "proxy scored"
},
"metadata128_neural_mlp": {
"raw": 0.0027218370232731104,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_aligned_metadata_baseline",
"status": "proxy_scored",
"reason": "paired camera-view embeddings are absent from the 128 JSONL/feature export; metadata features retrieve the synchronized same-window depth/audio block as a documented compact synchronization proxy",
"normalized_score": 0.0027218370232731104,
"raw_text": "0.0027",
"status_label": "proxy scored"
},
"raw128_simple": {
"raw": 0.0026625150348991156,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "proxy_scored",
"reason": "documented compact proxy completion for this raw128 task axis",
"normalized_score": 0.0026625150348991156,
"raw_text": "0.0027",
"status_label": "proxy scored"
},
"raw128_neural_mlp": {
"raw": 0.0025448438245803118,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "proxy_scored",
"reason": "documented compact proxy completion for this raw128 task axis",
"normalized_score": 0.0025448438245803118,
"raw_text": "0.0025",
"status_label": "proxy scored"
},
"qwen3_omni_v6_lora": {
"raw": 0.6587714947089998,
"metric_key": "camera_view_sync_retrieval_mrr",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_camera_view_sync_mosaic_tile_a100_20260619T0305Z/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.6587714947089998,
"raw_text": "0.6588",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 0.9979751961528727,
"metric_key": "camera_view_sync_retrieval_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.9979751961528727,
"raw_text": "0.9980",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 0.9989875980764363,
"metric_key": "camera_view_sync_retrieval_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.9989875980764363,
"raw_text": "0.9990",
"status_label": "scored"
}
}
},
{
"task_number": 20,
"task_id": "time_to_transition",
"label": "Time-to-Next-Transition Regression",
"axis_label": "20 Time-to-Next-Transition Regression",
"short_label": "Time2bdry",
"origin": "additional_public_sample_tasks",
"metric_key": "mae",
"metric_name": "MAE frames",
"metric_direction": "lower",
"raw128_proxy_axis": false,
"values": {
"metadata128_simple": {
"raw": 624.8108520507812,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/time_to_transition/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.016864874132806403,
"raw_text": "624.81",
"status_label": "scored"
},
"metadata128_neural_mlp": {
"raw": 41.4664421081543,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/time_to_transition/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.25411768748242325,
"raw_text": "41.47",
"status_label": "scored"
},
"raw128_simple": {
"raw": 52.32759475708008,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/time_to_transition/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.20137284019197565,
"raw_text": "52.33",
"status_label": "scored"
},
"raw128_neural_mlp": {
"raw": 42.374061584472656,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/time_to_transition/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"status": "scored",
"reason": null,
"normalized_score": 0.24867468405504953,
"raw_text": "42.37",
"status_label": "scored"
},
"qwen3_omni_v6_lora": {
"raw": 134.0687422166874,
"metric_key": "time_to_transition_mae",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/time_to_transition/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.07859666766782253,
"raw_text": "134.07",
"status_label": "scored"
},
"cosmos3_super_reasoner": {
"raw": 52.94642857142857,
"metric_key": "time_to_transition_mae",
"source": "results/omni_finetune/model_output_task_probes_20260616/time_to_transition/cosmos3_super_reasoner/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.19901920981190058,
"raw_text": "52.95",
"status_label": "scored"
},
"cosmos3_nano_future_window": {
"raw": 33.80952380952381,
"metric_key": "time_to_transition_mae",
"source": "results/omni_finetune/model_output_task_probes_20260616/time_to_transition/cosmos3_nano_future_window/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"status": "scored",
"reason": null,
"normalized_score": 0.3116682871966295,
"raw_text": "33.81",
"status_label": "scored"
}
}
}
],
"task_method_result_matrix": [
{
"task_number": 1,
"task_id": "timeline_action",
"task_label": "Action Recognition",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.008252821966746326,
"raw_text": "0.0083",
"normalized_score": 0.008252821966746326,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_action/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 1,
"task_id": "timeline_action",
"task_label": "Action Recognition",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.004175793689174209,
"raw_text": "0.0042",
"normalized_score": 0.004175793689174209,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_action/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 1,
"task_id": "timeline_action",
"task_label": "Action Recognition",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.002915061325704321,
"raw_text": "0.0029",
"normalized_score": 0.002915061325704321,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/timeline_action/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 1,
"task_id": "timeline_action",
"task_label": "Action Recognition",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0014955083181204041,
"raw_text": "0.0015",
"normalized_score": 0.0014955083181204041,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/timeline_action/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 1,
"task_id": "timeline_action",
"task_label": "Action Recognition",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0028830723979596335,
"raw_text": "0.0029",
"normalized_score": 0.0028830723979596335,
"metric_key": "action_macro_f1",
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 1,
"task_id": "timeline_action",
"task_label": "Action Recognition",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0008284021201089245,
"raw_text": "0.0008",
"normalized_score": 0.0008284021201089245,
"metric_key": "action_macro_f1",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 1,
"task_id": "timeline_action",
"task_label": "Action Recognition",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.007936507936507936,
"raw_text": "0.0079",
"normalized_score": 0.007936507936507936,
"metric_key": "action_accuracy_from_retrieved_future",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 2,
"task_id": "timeline_subtask",
"task_label": "Procedure Step Recognition",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.00019512195121951218,
"raw_text": "0.0002",
"normalized_score": 0.00019512195121951218,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_subtask/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 2,
"task_id": "timeline_subtask",
"task_label": "Procedure Step Recognition",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 7.207207207207208e-05,
"raw_text": "0.0001",
"normalized_score": 7.207207207207208e-05,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_subtask/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 2,
"task_id": "timeline_subtask",
"task_label": "Procedure Step Recognition",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/timeline_subtask/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 2,
"task_id": "timeline_subtask",
"task_label": "Procedure Step Recognition",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 7.35632183908046e-05,
"raw_text": "0.0001",
"normalized_score": 7.35632183908046e-05,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/timeline_subtask/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 2,
"task_id": "timeline_subtask",
"task_label": "Procedure Step Recognition",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0037313432835820895,
"raw_text": "0.0037",
"normalized_score": 0.0037313432835820895,
"metric_key": "subtask_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 2,
"task_id": "timeline_subtask",
"task_label": "Procedure Step Recognition",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "subtask_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 2,
"task_id": "timeline_subtask",
"task_label": "Procedure Step Recognition",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "timeline_subtask_macro_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_current_subtask_object_relevance_patched_textonly_20260621/timeline_subtask/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 3,
"task_id": "transition_detection",
"task_label": "Action Boundary Detection",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.29652162550029315,
"raw_text": "0.2965",
"normalized_score": 0.29652162550029315,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/transition_detection/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 3,
"task_id": "transition_detection",
"task_label": "Action Boundary Detection",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.4841733292368365,
"raw_text": "0.4842",
"normalized_score": 0.4841733292368365,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/transition_detection/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 3,
"task_id": "transition_detection",
"task_label": "Action Boundary Detection",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.4203613574238283,
"raw_text": "0.4204",
"normalized_score": 0.4203613574238283,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/transition_detection/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 3,
"task_id": "transition_detection",
"task_label": "Action Boundary Detection",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.4902206914147213,
"raw_text": "0.4902",
"normalized_score": 0.4902206914147213,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/transition_detection/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 3,
"task_id": "transition_detection",
"task_label": "Action Boundary Detection",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.9898313492063492,
"raw_text": "0.9898",
"normalized_score": 0.9898313492063492,
"metric_key": "transition_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 3,
"task_id": "transition_detection",
"task_label": "Action Boundary Detection",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.36830357142857145,
"raw_text": "0.3683",
"normalized_score": 0.36830357142857145,
"metric_key": "transition_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 3,
"task_id": "transition_detection",
"task_label": "Action Boundary Detection",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.9682539682539683,
"raw_text": "0.9683",
"normalized_score": 0.9682539682539683,
"metric_key": "transition_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 4,
"task_id": "next_action",
"task_label": "Next-Action Prediction",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.006514774539765508,
"raw_text": "0.0065",
"normalized_score": 0.006514774539765508,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/next_action/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 4,
"task_id": "next_action",
"task_label": "Next-Action Prediction",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.004910507980164745,
"raw_text": "0.0049",
"normalized_score": 0.004910507980164745,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/next_action/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 4,
"task_id": "next_action",
"task_label": "Next-Action Prediction",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.003285273363482094,
"raw_text": "0.0033",
"normalized_score": 0.003285273363482094,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/next_action/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 4,
"task_id": "next_action",
"task_label": "Next-Action Prediction",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0018477984371755407,
"raw_text": "0.0018",
"normalized_score": 0.0018477984371755407,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/next_action/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 4,
"task_id": "next_action",
"task_label": "Next-Action Prediction",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.04305335446381405,
"raw_text": "0.0431",
"normalized_score": 0.04305335446381405,
"metric_key": "next_action_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 4,
"task_id": "next_action",
"task_label": "Next-Action Prediction",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.013392857142857142,
"raw_text": "0.0134",
"normalized_score": 0.013392857142857142,
"metric_key": "next_action_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 4,
"task_id": "next_action",
"task_label": "Next-Action Prediction",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.007936507936507936,
"raw_text": "0.0079",
"normalized_score": 0.007936507936507936,
"metric_key": "action_accuracy_from_retrieved_future",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 5,
"task_id": "hand_trajectory_forecast",
"task_label": "Hand Trajectory Forecasting",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 8.817333221435547,
"raw_text": "8.817",
"normalized_score": 0.012231610603598841,
"metric_key": "mpjpe",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"reason": null
},
{
"task_number": 5,
"task_id": "hand_trajectory_forecast",
"task_label": "Hand Trajectory Forecasting",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.429434210062027,
"raw_text": "0.4294",
"normalized_score": 0.25114484128127007,
"metric_key": "mpjpe",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"reason": null
},
{
"task_number": 5,
"task_id": "hand_trajectory_forecast",
"task_label": "Hand Trajectory Forecasting",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.2729249894618988,
"raw_text": "0.2729",
"normalized_score": 0.39516420515180267,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 5,
"task_id": "hand_trajectory_forecast",
"task_label": "Hand Trajectory Forecasting",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.18475216627120972,
"raw_text": "0.1848",
"normalized_score": 0.5837560051580399,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 5,
"task_id": "hand_trajectory_forecast",
"task_label": "Hand Trajectory Forecasting",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.7216105627267382,
"raw_text": "0.7216",
"normalized_score": 0.149457605109387,
"metric_key": "hand_trajectory_forecast_mrr",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 5,
"task_id": "hand_trajectory_forecast",
"task_label": "Hand Trajectory Forecasting",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.8915253522315043,
"raw_text": "0.8915",
"normalized_score": 0.12097265238372007,
"metric_key": "hand_trajectory_forecast_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 5,
"task_id": "hand_trajectory_forecast",
"task_label": "Hand Trajectory Forecasting",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.6912806884333101,
"raw_text": "0.6913",
"normalized_score": 0.15601504328321764,
"metric_key": "hand_trajectory_forecast_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/hand_trajectory_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 6,
"task_id": "contact_prediction",
"task_label": "Contact State Prediction",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.4381481308057444,
"raw_text": "0.4381",
"normalized_score": 0.4381481308057444,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/contact_prediction/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 6,
"task_id": "contact_prediction",
"task_label": "Contact State Prediction",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.5682695682695682,
"raw_text": "0.5683",
"normalized_score": 0.5682695682695682,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/contact_prediction/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 6,
"task_id": "contact_prediction",
"task_label": "Contact State Prediction",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.886990707397193,
"raw_text": "0.8870",
"normalized_score": 0.886990707397193,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/contact_prediction/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 6,
"task_id": "contact_prediction",
"task_label": "Contact State Prediction",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 1.0,
"raw_text": "1.000",
"normalized_score": 1.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/contact_prediction/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 6,
"task_id": "contact_prediction",
"task_label": "Contact State Prediction",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.8177083333333334,
"raw_text": "0.8177",
"normalized_score": 0.8177083333333334,
"metric_key": "contact_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 6,
"task_id": "contact_prediction",
"task_label": "Contact State Prediction",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.32142857142857145,
"raw_text": "0.3214",
"normalized_score": 0.32142857142857145,
"metric_key": "contact_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 6,
"task_id": "contact_prediction",
"task_label": "Contact State Prediction",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.7433862433862434,
"raw_text": "0.7434",
"normalized_score": 0.7433862433862434,
"metric_key": "contact_accuracy",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 7,
"task_id": "object_relevance",
"task_label": "Object Relevance Prediction",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.17764578833693304,
"raw_text": "0.1776",
"normalized_score": 0.17764578833693304,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/object_relevance/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 7,
"task_id": "object_relevance",
"task_label": "Object Relevance Prediction",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.18662723837686876,
"raw_text": "0.1866",
"normalized_score": 0.18662723837686876,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/object_relevance/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 7,
"task_id": "object_relevance",
"task_label": "Object Relevance Prediction",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0655376369662084,
"raw_text": "0.0655",
"normalized_score": 0.0655376369662084,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/object_relevance/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 7,
"task_id": "object_relevance",
"task_label": "Object Relevance Prediction",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.1765890386972509,
"raw_text": "0.1766",
"normalized_score": 0.1765890386972509,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/object_relevance/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 7,
"task_id": "object_relevance",
"task_label": "Object Relevance Prediction",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.3064982378331287,
"raw_text": "0.3065",
"normalized_score": 0.3064982378331287,
"metric_key": "object_micro_f1",
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 7,
"task_id": "object_relevance",
"task_label": "Object Relevance Prediction",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.13704276146316333,
"raw_text": "0.1370",
"normalized_score": 0.13704276146316333,
"metric_key": "object_micro_f1",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 7,
"task_id": "object_relevance",
"task_label": "Object Relevance Prediction",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.00047209895194032665,
"raw_text": "0.0005",
"normalized_score": 0.00047209895194032665,
"metric_key": "object_relevance_micro_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_current_subtask_object_relevance_patched_textonly_20260621/object_relevance/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 8,
"task_id": "caption_grounding",
"task_label": "Language Grounding",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.002332374220713973,
"raw_text": "0.0023",
"normalized_score": 0.002332374220713973,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/caption_grounding/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 8,
"task_id": "caption_grounding",
"task_label": "Language Grounding",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.008236799389123917,
"raw_text": "0.0082",
"normalized_score": 0.008236799389123917,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/caption_grounding/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 8,
"task_id": "caption_grounding",
"task_label": "Language Grounding",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.011138836853206158,
"raw_text": "0.0111",
"normalized_score": 0.011138836853206158,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/caption_grounding/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 8,
"task_id": "caption_grounding",
"task_label": "Language Grounding",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0063402121886610985,
"raw_text": "0.0063",
"normalized_score": 0.0063402121886610985,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/caption_grounding/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 8,
"task_id": "caption_grounding",
"task_label": "Language Grounding",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.8764467592592605,
"raw_text": "0.8764",
"normalized_score": 0.8764467592592605,
"metric_key": "caption_grounding_mrr",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_retrieval_task_probes_a100_20260617T175919Z/caption_grounding/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 8,
"task_id": "caption_grounding",
"task_label": "Language Grounding",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.30639899644580487,
"raw_text": "0.3064",
"normalized_score": 0.30639899644580487,
"metric_key": "caption_grounding_iou",
"source": "results/omni_finetune/model_output_task_probes_20260616/caption_grounding/cosmos3_super_reasoner/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 8,
"task_id": "caption_grounding",
"task_label": "Language Grounding",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.5221041086644663,
"raw_text": "0.5221",
"normalized_score": 0.5221041086644663,
"metric_key": "caption_grounding_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/caption_grounding/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 9,
"task_id": "cross_modal_retrieval",
"task_label": "Cross-Modal Retrieval",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.002587692579254508,
"raw_text": "0.0026",
"normalized_score": 0.002587692579254508,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/cross_modal_retrieval/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"reason": null
},
{
"task_number": 9,
"task_id": "cross_modal_retrieval",
"task_label": "Cross-Modal Retrieval",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0026067993603646755,
"raw_text": "0.0026",
"normalized_score": 0.0026067993603646755,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/cross_modal_retrieval/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"reason": null
},
{
"task_number": 9,
"task_id": "cross_modal_retrieval",
"task_label": "Cross-Modal Retrieval",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.003459817497059703,
"raw_text": "0.0035",
"normalized_score": 0.003459817497059703,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/cross_modal_retrieval/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 9,
"task_id": "cross_modal_retrieval",
"task_label": "Cross-Modal Retrieval",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.002535284962505102,
"raw_text": "0.0025",
"normalized_score": 0.002535284962505102,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/cross_modal_retrieval/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 9,
"task_id": "cross_modal_retrieval",
"task_label": "Cross-Modal Retrieval",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.5080191798941732,
"raw_text": "0.5080",
"normalized_score": 0.5080191798941732,
"metric_key": "cross_modal_retrieval_mrr",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_cross_modal_retrieval_probe_a100_20260618T000000Z/cross_modal_retrieval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 9,
"task_id": "cross_modal_retrieval",
"task_label": "Cross-Modal Retrieval",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.6628490677465636,
"raw_text": "0.6628",
"normalized_score": 0.6628490677465636,
"metric_key": "cross_modal_retrieval_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/cross_modal_retrieval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 9,
"task_id": "cross_modal_retrieval",
"task_label": "Cross-Modal Retrieval",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.022138720585222767,
"raw_text": "0.0221",
"normalized_score": 0.022138720585222767,
"metric_key": "future_retrieval_mrr",
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 10,
"task_id": "modality_reconstruction",
"task_label": "Cross-Modal Reconstruction",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": -190.66106203944798,
"raw_text": "-190.66",
"normalized_score": 0.0,
"metric_key": "r2",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/modality_reconstruction/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"reason": null
},
{
"task_number": 10,
"task_id": "modality_reconstruction",
"task_label": "Cross-Modal Reconstruction",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": -0.43481132003942147,
"raw_text": "-0.4348",
"normalized_score": 0.0,
"metric_key": "r2",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/modality_reconstruction/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"reason": null
},
{
"task_number": 10,
"task_id": "modality_reconstruction",
"task_label": "Cross-Modal Reconstruction",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": -1.3450960391924882,
"raw_text": "-1.345",
"normalized_score": 0.0,
"metric_key": "r2",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/modality_reconstruction/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 10,
"task_id": "modality_reconstruction",
"task_label": "Cross-Modal Reconstruction",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": -1.3974418160502369,
"raw_text": "-1.397",
"normalized_score": 0.0,
"metric_key": "r2",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/modality_reconstruction/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 10,
"task_id": "modality_reconstruction",
"task_label": "Cross-Modal Reconstruction",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.9670547540707002,
"raw_text": "0.9671",
"normalized_score": 0.9670547540707002,
"metric_key": "modality_reconstruction_mrr",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 10,
"task_id": "modality_reconstruction",
"task_label": "Cross-Modal Reconstruction",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.9939466801653591,
"raw_text": "0.9939",
"normalized_score": 0.9939466801653591,
"metric_key": "modality_reconstruction_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/modality_reconstruction/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 10,
"task_id": "modality_reconstruction",
"task_label": "Cross-Modal Reconstruction",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0002873382957286892,
"raw_text": "0.0003",
"normalized_score": 0.0002873382957286892,
"metric_key": "feature_reconstruction_quality",
"source": "results/omni_finetune/model_output_task_probes_20260616/modality_reconstruction/cosmos3_nano_future_window/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 11,
"task_id": "temporal_order",
"task_label": "Temporal Order Verification",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.4198864140782312,
"raw_text": "0.4199",
"normalized_score": 0.4198864140782312,
"metric_key": "f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/temporal_order/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 11,
"task_id": "temporal_order",
"task_label": "Temporal Order Verification",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.8252408266656923,
"raw_text": "0.8252",
"normalized_score": 0.8252408266656923,
"metric_key": "f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/temporal_order/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 11,
"task_id": "temporal_order",
"task_label": "Temporal Order Verification",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.49824413370686593,
"raw_text": "0.4982",
"normalized_score": 0.49824413370686593,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/temporal_order/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 11,
"task_id": "temporal_order",
"task_label": "Temporal Order Verification",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.8030047098504103,
"raw_text": "0.8030",
"normalized_score": 0.8030047098504103,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/temporal_order/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 11,
"task_id": "temporal_order",
"task_label": "Temporal Order Verification",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.40984631701404173,
"raw_text": "0.4098",
"normalized_score": 0.40984631701404173,
"metric_key": "temporal_order_f1",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/temporal_order/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 11,
"task_id": "temporal_order",
"task_label": "Temporal Order Verification",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.6286317274823326,
"raw_text": "0.6286",
"normalized_score": 0.6286317274823326,
"metric_key": "temporal_order_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 11,
"task_id": "temporal_order",
"task_label": "Temporal Order Verification",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.5954109425716205,
"raw_text": "0.5954",
"normalized_score": 0.5954109425716205,
"metric_key": "temporal_order_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_future_order_misalignment_patched_textonly_20260621/temporal_order/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 12,
"task_id": "misalignment_detection",
"task_label": "Multimodal Synchronization Detection",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.49980060227663614,
"raw_text": "0.4998",
"normalized_score": 0.49980060227663614,
"metric_key": "f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/misalignment_detection/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"reason": null
},
{
"task_number": 12,
"task_id": "misalignment_detection",
"task_label": "Multimodal Synchronization Detection",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.7773773780941162,
"raw_text": "0.7774",
"normalized_score": 0.7773773780941162,
"metric_key": "f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/misalignment_detection/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"reason": null
},
{
"task_number": 12,
"task_id": "misalignment_detection",
"task_label": "Multimodal Synchronization Detection",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.4958867673901769,
"raw_text": "0.4959",
"normalized_score": 0.4958867673901769,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/misalignment_detection/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 12,
"task_id": "misalignment_detection",
"task_label": "Multimodal Synchronization Detection",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.8272709077974252,
"raw_text": "0.8273",
"normalized_score": 0.8272709077974252,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/misalignment_detection/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 12,
"task_id": "misalignment_detection",
"task_label": "Multimodal Synchronization Detection",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.3344936184319576,
"raw_text": "0.3345",
"normalized_score": 0.3344936184319576,
"metric_key": "misalignment_detection_f1",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/misalignment_detection/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 12,
"task_id": "misalignment_detection",
"task_label": "Multimodal Synchronization Detection",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.37271645981034185,
"raw_text": "0.3727",
"normalized_score": 0.37271645981034185,
"metric_key": "misalignment_detection_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 12,
"task_id": "misalignment_detection",
"task_label": "Multimodal Synchronization Detection",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.477201691802725,
"raw_text": "0.4772",
"normalized_score": 0.477201691802725,
"metric_key": "misalignment_detection_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_future_order_misalignment_patched_textonly_20260621/misalignment_detection/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 13,
"task_id": "long_horizon_next_action",
"task_label": "Long-Horizon Next-Action Forecasting",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.004579592783699693,
"raw_text": "0.0046",
"normalized_score": 0.004579592783699693,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/long_horizon_next_action/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 13,
"task_id": "long_horizon_next_action",
"task_label": "Long-Horizon Next-Action Forecasting",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0029821307969142615,
"raw_text": "0.0030",
"normalized_score": 0.0029821307969142615,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/long_horizon_next_action/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 13,
"task_id": "long_horizon_next_action",
"task_label": "Long-Horizon Next-Action Forecasting",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0024280172369056294,
"raw_text": "0.0024",
"normalized_score": 0.0024280172369056294,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/long_horizon_next_action/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 13,
"task_id": "long_horizon_next_action",
"task_label": "Long-Horizon Next-Action Forecasting",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.001063859887389299,
"raw_text": "0.0011",
"normalized_score": 0.001063859887389299,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/long_horizon_next_action/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 13,
"task_id": "long_horizon_next_action",
"task_label": "Long-Horizon Next-Action Forecasting",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0023356666867101906,
"raw_text": "0.0023",
"normalized_score": 0.0023356666867101906,
"metric_key": "long_horizon_next_action_macro_f1",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/long_horizon_next_action/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 13,
"task_id": "long_horizon_next_action",
"task_label": "Long-Horizon Next-Action Forecasting",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.008807588075880758,
"raw_text": "0.0088",
"normalized_score": 0.008807588075880758,
"metric_key": "long_horizon_next_action_macro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/long_horizon_next_action/cosmos3_super_reasoner/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 13,
"task_id": "long_horizon_next_action",
"task_label": "Long-Horizon Next-Action Forecasting",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0024906600249066007,
"raw_text": "0.0025",
"normalized_score": 0.0024906600249066007,
"metric_key": "long_horizon_next_action_macro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/long_horizon_next_action/cosmos3_nano_future_window/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 14,
"task_id": "next_subtask_forecast",
"task_label": "Long-Horizon Next-Subtask Forecasting",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0001206030150753769,
"raw_text": "0.0001",
"normalized_score": 0.0001206030150753769,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/next_subtask_forecast/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 14,
"task_id": "next_subtask_forecast",
"task_label": "Long-Horizon Next-Subtask Forecasting",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 2.086049543676662e-05,
"raw_text": "0.0000",
"normalized_score": 2.086049543676662e-05,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/next_subtask_forecast/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 14,
"task_id": "next_subtask_forecast",
"task_label": "Long-Horizon Next-Subtask Forecasting",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/next_subtask_forecast/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 14,
"task_id": "next_subtask_forecast",
"task_label": "Long-Horizon Next-Subtask Forecasting",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/next_subtask_forecast/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 14,
"task_id": "next_subtask_forecast",
"task_label": "Long-Horizon Next-Subtask Forecasting",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.004206715978529301,
"raw_text": "0.0042",
"normalized_score": 0.004206715978529301,
"metric_key": "next_subtask_forecast_macro_f1",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/next_subtask_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 14,
"task_id": "next_subtask_forecast",
"task_label": "Long-Horizon Next-Subtask Forecasting",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "next_subtask_forecast_macro_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 14,
"task_id": "next_subtask_forecast",
"task_label": "Long-Horizon Next-Subtask Forecasting",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.006614876224708678,
"raw_text": "0.0066",
"normalized_score": 0.006614876224708678,
"metric_key": "next_subtask_forecast_macro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/next_subtask_forecast/cosmos3_nano_future_window/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 15,
"task_id": "interaction_text_prediction",
"task_label": "Interaction Text Prediction",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 15,
"task_id": "interaction_text_prediction",
"task_label": "Interaction Text Prediction",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 15,
"task_id": "interaction_text_prediction",
"task_label": "Interaction Text Prediction",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "proxy_scored",
"status_label": "proxy scored",
"scored": true,
"proxy_scored": true,
"raw": 0.012611998261547169,
"raw_text": "0.0126",
"normalized_score": 0.012611998261547169,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": "documented compact proxy completion for this raw128 task axis"
},
{
"task_number": 15,
"task_id": "interaction_text_prediction",
"task_label": "Interaction Text Prediction",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "proxy_scored",
"status_label": "proxy scored",
"scored": true,
"proxy_scored": true,
"raw": 0.009791421280985521,
"raw_text": "0.0098",
"normalized_score": 0.009791421280985521,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": "documented compact proxy completion for this raw128 task axis"
},
{
"task_number": 15,
"task_id": "interaction_text_prediction",
"task_label": "Interaction Text Prediction",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.4318674027510605,
"raw_text": "0.4319",
"normalized_score": 0.4318674027510605,
"metric_key": "macro_f1",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_interaction_text_task15_a100_20260620T010305Z/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 15,
"task_id": "interaction_text_prediction",
"task_label": "Interaction Text Prediction",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.17949512355264183,
"raw_text": "0.1795",
"normalized_score": 0.17949512355264183,
"metric_key": "macro_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_super_interaction_text_task15_textonly_v1_20260620T1558Z/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 15,
"task_id": "interaction_text_prediction",
"task_label": "Interaction Text Prediction",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.1788367958939587,
"raw_text": "0.1788",
"normalized_score": 0.1788367958939587,
"metric_key": "macro_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_interaction_text_task15_patched_textonly_20260621/interaction_text_prediction/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 16,
"task_id": "action_object_relation",
"task_label": "Action-Object Relation Prediction",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/action_object_relation/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 16,
"task_id": "action_object_relation",
"task_label": "Action-Object Relation Prediction",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/action_object_relation/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 16,
"task_id": "action_object_relation",
"task_label": "Action-Object Relation Prediction",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/action_object_relation/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 16,
"task_id": "action_object_relation",
"task_label": "Action-Object Relation Prediction",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "macro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/action_object_relation/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 16,
"task_id": "action_object_relation",
"task_label": "Action-Object Relation Prediction",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0002220083079671497,
"raw_text": "0.0002",
"normalized_score": 0.0002220083079671497,
"metric_key": "action_object_relation_macro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 16,
"task_id": "action_object_relation",
"task_label": "Action-Object Relation Prediction",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0,
"raw_text": "0.0000",
"normalized_score": 0.0,
"metric_key": "action_object_relation_macro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 16,
"task_id": "action_object_relation",
"task_label": "Action-Object Relation Prediction",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.002794157670325683,
"raw_text": "0.0028",
"normalized_score": 0.002794157670325683,
"metric_key": "action_object_relation_macro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_nano_future_window/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 17,
"task_id": "object_set_forecast",
"task_label": "Future Object-Set Forecasting",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.17656983343047333,
"raw_text": "0.1766",
"normalized_score": 0.17656983343047333,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/object_set_forecast/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 17,
"task_id": "object_set_forecast",
"task_label": "Future Object-Set Forecasting",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.17418550827844048,
"raw_text": "0.1742",
"normalized_score": 0.17418550827844048,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/object_set_forecast/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 17,
"task_id": "object_set_forecast",
"task_label": "Future Object-Set Forecasting",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.06469493412657774,
"raw_text": "0.0647",
"normalized_score": 0.06469493412657774,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/object_set_forecast/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 17,
"task_id": "object_set_forecast",
"task_label": "Future Object-Set Forecasting",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.17523098630012288,
"raw_text": "0.1752",
"normalized_score": 0.17523098630012288,
"metric_key": "micro_f1",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/object_set_forecast/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 17,
"task_id": "object_set_forecast",
"task_label": "Future Object-Set Forecasting",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.1659483964851402,
"raw_text": "0.1659",
"normalized_score": 0.1659483964851402,
"metric_key": "object_set_forecast_micro_f1",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/object_set_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 17,
"task_id": "object_set_forecast",
"task_label": "Future Object-Set Forecasting",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.0009279881217520415,
"raw_text": "0.0009",
"normalized_score": 0.0009279881217520415,
"metric_key": "object_set_forecast_micro_f1",
"source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 17,
"task_id": "object_set_forecast",
"task_label": "Future Object-Set Forecasting",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.01781970649895178,
"raw_text": "0.0178",
"normalized_score": 0.01781970649895178,
"metric_key": "object_set_forecast_micro_f1",
"source": "results/omni_finetune/model_output_task_probes_20260616/object_set_forecast/cosmos3_nano_future_window/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 18,
"task_id": "imu_to_hand_pose",
"task_label": "IMU-to-Hand Pose Reconstruction",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.2294670194387436,
"raw_text": "0.2295",
"normalized_score": 0.18324815505876868,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"reason": null
},
{
"task_number": 18,
"task_id": "imu_to_hand_pose",
"task_label": "IMU-to-Hand Pose Reconstruction",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.2555866539478302,
"raw_text": "0.2556",
"normalized_score": 0.16452114110609004,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_aligned_sensor_block_baseline",
"reason": null
},
{
"task_number": 18,
"task_id": "imu_to_hand_pose",
"task_label": "IMU-to-Hand Pose Reconstruction",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.22941437363624573,
"raw_text": "0.2294",
"normalized_score": 0.1832902066792771,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 18,
"task_id": "imu_to_hand_pose",
"task_label": "IMU-to-Hand Pose Reconstruction",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.252998411655426,
"raw_text": "0.2530",
"normalized_score": 0.1662042369509182,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 18,
"task_id": "imu_to_hand_pose",
"task_label": "IMU-to-Hand Pose Reconstruction",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.9641651902471952,
"raw_text": "0.9642",
"normalized_score": 0.043612244441436056,
"metric_key": "imu_to_hand_pose_mrr",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 18,
"task_id": "imu_to_hand_pose",
"task_label": "IMU-to-Hand Pose Reconstruction",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.9896650636969544,
"raw_text": "0.9897",
"normalized_score": 0.04248852414968175,
"metric_key": "imu_to_hand_pose_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 18,
"task_id": "imu_to_hand_pose",
"task_label": "IMU-to-Hand Pose Reconstruction",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.9920062431451954,
"raw_text": "0.9920",
"normalized_score": 0.04238824931752955,
"metric_key": "imu_to_hand_pose_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/imu_to_hand_pose/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 19,
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "proxy_scored",
"status_label": "proxy scored",
"scored": true,
"proxy_scored": true,
"raw": 0.0021294241305440664,
"raw_text": "0.0021",
"normalized_score": 0.0021294241305440664,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_aligned_metadata_baseline",
"reason": "paired camera-view embeddings are absent from the 128 JSONL/feature export; metadata features retrieve the synchronized same-window depth/audio block as a documented compact synchronization proxy"
},
{
"task_number": 19,
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "proxy_scored",
"status_label": "proxy scored",
"scored": true,
"proxy_scored": true,
"raw": 0.0027218370232731104,
"raw_text": "0.0027",
"normalized_score": 0.0027218370232731104,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_aligned_metadata_baseline",
"reason": "paired camera-view embeddings are absent from the 128 JSONL/feature export; metadata features retrieve the synchronized same-window depth/audio block as a documented compact synchronization proxy"
},
{
"task_number": 19,
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "proxy_scored",
"status_label": "proxy scored",
"scored": true,
"proxy_scored": true,
"raw": 0.0026625150348991156,
"raw_text": "0.0027",
"normalized_score": 0.0026625150348991156,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": "documented compact proxy completion for this raw128 task axis"
},
{
"task_number": 19,
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "proxy_scored",
"status_label": "proxy scored",
"scored": true,
"proxy_scored": true,
"raw": 0.0025448438245803118,
"raw_text": "0.0025",
"normalized_score": 0.0025448438245803118,
"metric_key": "mrr",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": "documented compact proxy completion for this raw128 task axis"
},
{
"task_number": 19,
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.6587714947089998,
"raw_text": "0.6588",
"normalized_score": 0.6587714947089998,
"metric_key": "camera_view_sync_retrieval_mrr",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_camera_view_sync_mosaic_tile_a100_20260619T0305Z/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 19,
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.9979751961528727,
"raw_text": "0.9980",
"normalized_score": 0.9979751961528727,
"metric_key": "camera_view_sync_retrieval_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 19,
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 0.9989875980764363,
"raw_text": "0.9990",
"normalized_score": 0.9989875980764363,
"metric_key": "camera_view_sync_retrieval_mrr",
"source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/camera_view_sync_retrieval/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 20,
"task_id": "time_to_transition",
"task_label": "Time-to-Next-Transition Regression",
"series_id": "metadata128_simple",
"method": "128ep Aligned Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 624.8108520507812,
"raw_text": "624.81",
"normalized_score": 0.016864874132806403,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/time_to_transition/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 20,
"task_id": "time_to_transition",
"task_label": "Time-to-Next-Transition Regression",
"series_id": "metadata128_neural_mlp",
"method": "128ep Aligned NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 41.4664421081543,
"raw_text": "41.47",
"normalized_score": 0.25411768748242325,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/time_to_transition/metrics.json",
"scope": "multi_episode_128_aligned_baseline",
"reason": null
},
{
"task_number": 20,
"task_id": "time_to_transition",
"task_label": "Time-to-Next-Transition Regression",
"series_id": "raw128_simple",
"method": "128ep Raw Simple",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 52.32759475708008,
"raw_text": "52.33",
"normalized_score": 0.20137284019197565,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/time_to_transition/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 20,
"task_id": "time_to_transition",
"task_label": "Time-to-Next-Transition Regression",
"series_id": "raw128_neural_mlp",
"method": "128ep Raw NN",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 42.374061584472656,
"raw_text": "42.37",
"normalized_score": 0.24867468405504953,
"metric_key": "mae",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/time_to_transition/metrics.json",
"scope": "multi_episode_128_raw_sensor_feature_baseline",
"reason": null
},
{
"task_number": 20,
"task_id": "time_to_transition",
"task_label": "Time-to-Next-Transition Regression",
"series_id": "qwen3_omni_v6_lora",
"method": "Qwen3-Omni v6 LoRA",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 134.0687422166874,
"raw_text": "134.07",
"normalized_score": 0.07859666766782253,
"metric_key": "time_to_transition_mae",
"source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/time_to_transition/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 20,
"task_id": "time_to_transition",
"task_label": "Time-to-Next-Transition Regression",
"series_id": "cosmos3_super_reasoner",
"method": "Cosmos3-Super Reasoner",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 52.94642857142857,
"raw_text": "52.95",
"normalized_score": 0.19901920981190058,
"metric_key": "time_to_transition_mae",
"source": "results/omni_finetune/model_output_task_probes_20260616/time_to_transition/cosmos3_super_reasoner/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
},
{
"task_number": 20,
"task_id": "time_to_transition",
"task_label": "Time-to-Next-Transition Regression",
"series_id": "cosmos3_nano_future_window",
"method": "Cosmos3-Nano Future Window",
"status": "scored",
"status_label": "scored",
"scored": true,
"proxy_scored": false,
"raw": 33.80952380952381,
"raw_text": "33.81",
"normalized_score": 0.3116682871966295,
"metric_key": "time_to_transition_mae",
"source": "results/omni_finetune/model_output_task_probes_20260616/time_to_transition/cosmos3_nano_future_window/metrics.json",
"scope": "multi_episode_128_partial_model_overlay",
"reason": null
}
]
}