{
  "title": "128-Episode 20-Task Radar",
  "status": "pass",
  "generated_at_utc": "2026-06-18T06:01:21+00:00",
  "description": "Selected 128-episode metadata/raw baselines plus verified Qwen3/Cosmos branches. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
  "task_count": 20,
  "method_count": 7,
  "method_task_record_count": 140,
  "scored_method_task_count": 80,
  "normalization_policy": {
    "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
    "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
    "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
    "result_record_policy": "every method has 20 task records; records without a numeric score carry explicit unsupported/not-evaluated status and reason fields",
    "foundation_model_overlay": "Qwen3/Cosmos points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
    "metadata_128_overlay": "128-episode metadata baselines have 20 records, but numeric scores only where the public JSONL contains enough task labels without raw feature blocks.",
    "raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
  },
  "source_unified_radar": "docs/data/unified_task_model_radar.json",
  "source_result_matrix": "docs/data/task_method_20_result_matrix.json",
  "series": [
    {
      "id": "metadata128_simple",
      "label": "128ep Metadata Simple",
      "short_label": "128-S",
      "color": "#ffd166",
      "kind": "partial_128_episode_metadata_baseline",
      "scope": "128 selected episodes, JSONL metadata/text only",
      "stroke_dasharray": "9 6",
      "method_detail": "128-episode JSONL metadata/text simple baselines.",
      "plotted_as": "colored point overlay",
      "result_record_count": 20,
      "scored_task_count": 8,
      "covered_task_count": 8,
      "proxy_scored_task_count": 0,
      "scoreless_task_count": 12,
      "unsupported_task_count": 12,
      "not_evaluated_task_count": 0,
      "status_counts": {
        "not_supported_by_metadata_only_package": 8,
        "scored": 8,
        "unsupported_without_required_target": 4
      },
      "coverage_fraction": 0.4,
      "result_record_fraction": 1.0
    },
    {
      "id": "metadata128_neural_mlp",
      "label": "128ep Metadata NN",
      "short_label": "128-NN",
      "color": "#f472b6",
      "kind": "partial_128_episode_metadata_baseline",
      "scope": "128 selected episodes, JSONL metadata/text only",
      "stroke_dasharray": "3 6",
      "method_detail": "128-episode JSONL metadata/text MLP baselines.",
      "plotted_as": "colored point overlay",
      "result_record_count": 20,
      "scored_task_count": 6,
      "covered_task_count": 6,
      "proxy_scored_task_count": 0,
      "scoreless_task_count": 14,
      "unsupported_task_count": 14,
      "not_evaluated_task_count": 0,
      "status_counts": {
        "not_supported_by_metadata_only_package": 14,
        "scored": 6
      },
      "coverage_fraction": 0.3,
      "result_record_fraction": 1.0
    },
    {
      "id": "raw128_simple",
      "label": "128ep Raw Simple",
      "short_label": "128-RS",
      "color": "#f59e0b",
      "kind": "complete_128_episode_raw_feature_baseline",
      "scope": "128 selected episodes, staged 4430-dim sensor NPZ features; 2 compact proxy axes",
      "stroke_dasharray": "8 4",
      "method_detail": "128-episode 4430-dim sensor NPZ simple heads; tasks 15/19 use compact proxies.",
      "plotted_as": "colored point overlay",
      "result_record_count": 20,
      "scored_task_count": 20,
      "covered_task_count": 20,
      "proxy_scored_task_count": 2,
      "scoreless_task_count": 0,
      "unsupported_task_count": 0,
      "not_evaluated_task_count": 0,
      "status_counts": {
        "proxy_scored": 2,
        "scored": 18
      },
      "coverage_fraction": 1.0,
      "result_record_fraction": 1.0
    },
    {
      "id": "raw128_neural_mlp",
      "label": "128ep Raw NN",
      "short_label": "128-RN",
      "color": "#22d3ee",
      "kind": "complete_128_episode_raw_feature_baseline",
      "scope": "128 selected episodes, staged 4430-dim sensor NPZ features; 2 compact proxy axes",
      "stroke_dasharray": "2 5",
      "method_detail": "128-episode 4430-dim sensor NPZ MLP heads; tasks 15/19 use compact proxies.",
      "plotted_as": "colored point overlay",
      "result_record_count": 20,
      "scored_task_count": 20,
      "covered_task_count": 20,
      "proxy_scored_task_count": 2,
      "scoreless_task_count": 0,
      "unsupported_task_count": 0,
      "not_evaluated_task_count": 0,
      "status_counts": {
        "proxy_scored": 2,
        "scored": 18
      },
      "coverage_fraction": 1.0,
      "result_record_fraction": 1.0
    },
    {
      "id": "qwen3_omni_v6_lora",
      "label": "Qwen3-Omni v6 LoRA",
      "short_label": "Qwen3",
      "color": "#9bb8ff",
      "kind": "partial_128_episode_foundation_model_overlay",
      "scope": "128 selected episodes, held-out test",
      "stroke_dasharray": "7 7",
      "method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics, plus task 16 and any completed private-GPU future-task probes scored from task-specific JSON.",
      "plotted_as": "colored point overlay",
      "result_record_count": 20,
      "scored_task_count": 14,
      "covered_task_count": 14,
      "proxy_scored_task_count": 0,
      "scoreless_task_count": 6,
      "unsupported_task_count": 0,
      "not_evaluated_task_count": 6,
      "status_counts": {
        "not_evaluated_in_verified_package": 6,
        "scored": 14
      },
      "coverage_fraction": 0.7,
      "result_record_fraction": 1.0
    },
    {
      "id": "cosmos3_super_reasoner",
      "label": "Cosmos3-Super Reasoner",
      "short_label": "C3-S",
      "color": "#ff9c7a",
      "kind": "partial_128_episode_foundation_model_overlay",
      "scope": "128 selected episodes, held-out test",
      "stroke_dasharray": "4 7",
      "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 16 scored from existing verified action/object JSON.",
      "plotted_as": "colored point overlay",
      "result_record_count": 20,
      "scored_task_count": 7,
      "covered_task_count": 7,
      "proxy_scored_task_count": 0,
      "scoreless_task_count": 13,
      "unsupported_task_count": 0,
      "not_evaluated_task_count": 13,
      "status_counts": {
        "not_evaluated_in_verified_package": 13,
        "scored": 7
      },
      "coverage_fraction": 0.35,
      "result_record_fraction": 1.0
    },
    {
      "id": "cosmos3_nano_future_window",
      "label": "Cosmos3-Nano Future Window",
      "short_label": "C3-N",
      "color": "#d9c7ff",
      "kind": "partial_128_episode_world_model_overlay",
      "scope": "128 selected episodes, held-out test",
      "stroke_dasharray": "2 7",
      "method_detail": "Verified Cosmos3-Nano future-window compatibility metrics.",
      "plotted_as": "colored point overlay",
      "result_record_count": 20,
      "scored_task_count": 5,
      "covered_task_count": 5,
      "proxy_scored_task_count": 0,
      "scoreless_task_count": 15,
      "unsupported_task_count": 0,
      "not_evaluated_task_count": 15,
      "status_counts": {
        "not_evaluated_in_verified_package": 15,
        "scored": 5
      },
      "coverage_fraction": 0.25,
      "result_record_fraction": 1.0
    }
  ],
  "tasks": [
    {
      "task_number": 1,
      "task_id": "timeline_action",
      "label": "Action Recognition",
      "axis_label": "01 Action Recognition",
      "short_label": "Action",
      "origin": "original_public_sample_tasks",
      "metric_key": "macro_f1",
      "metric_name": "macro-F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": 0.008252821966746326,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_action/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.008252821966746326,
          "raw_text": "0.0083",
          "status_label": "scored"
        },
        "metadata128_neural_mlp": {
          "raw": 0.004175793689174209,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_action/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.004175793689174209,
          "raw_text": "0.0042",
          "status_label": "scored"
        },
        "raw128_simple": {
          "raw": 0.002915061325704321,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/timeline_action/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.002915061325704321,
          "raw_text": "0.0029",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.0014955083181204041,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/timeline_action/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0014955083181204041,
          "raw_text": "0.0015",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.0028830723979596335,
          "metric_key": "action_macro_f1",
          "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0028830723979596335,
          "raw_text": "0.0029",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": 0.0008284021201089245,
          "metric_key": "action_macro_f1",
          "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0008284021201089245,
          "raw_text": "0.0008",
          "status_label": "scored"
        },
        "cosmos3_nano_future_window": {
          "raw": 0.007936507936507936,
          "metric_key": "action_accuracy_from_retrieved_future",
          "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.007936507936507936,
          "raw_text": "0.0079",
          "status_label": "scored"
        }
      }
    },
    {
      "task_number": 2,
      "task_id": "timeline_subtask",
      "label": "Procedure Step Recognition",
      "axis_label": "02 Procedure Step Recognition",
      "short_label": "Step",
      "origin": "original_public_sample_tasks",
      "metric_key": "macro_f1",
      "metric_name": "macro-F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": 0.00019512195121951218,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_subtask/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.00019512195121951218,
          "raw_text": "0.0002",
          "status_label": "scored"
        },
        "metadata128_neural_mlp": {
          "raw": 7.207207207207208e-05,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_subtask/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 7.207207207207208e-05,
          "raw_text": "0.0001",
          "status_label": "scored"
        },
        "raw128_simple": {
          "raw": 0.0,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/timeline_subtask/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0,
          "raw_text": "0.0000",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 7.35632183908046e-05,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/timeline_subtask/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 7.35632183908046e-05,
          "raw_text": "0.0001",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.0037313432835820895,
          "metric_key": "subtask_accuracy",
          "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0037313432835820895,
          "raw_text": "0.0037",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": 0.0,
          "metric_key": "subtask_accuracy",
          "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0,
          "raw_text": "0.0000",
          "status_label": "scored"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 3,
      "task_id": "transition_detection",
      "label": "Action Boundary Detection",
      "axis_label": "03 Action Boundary Detection",
      "short_label": "Boundary",
      "origin": "original_public_sample_tasks",
      "metric_key": "macro_f1",
      "metric_name": "macro-F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": 0.29652162550029315,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/transition_detection/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.29652162550029315,
          "raw_text": "0.2965",
          "status_label": "scored"
        },
        "metadata128_neural_mlp": {
          "raw": 0.4841733292368365,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/transition_detection/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.4841733292368365,
          "raw_text": "0.4842",
          "status_label": "scored"
        },
        "raw128_simple": {
          "raw": 0.4203613574238283,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/transition_detection/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.4203613574238283,
          "raw_text": "0.4204",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.4902206914147213,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/transition_detection/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.4902206914147213,
          "raw_text": "0.4902",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.9898313492063492,
          "metric_key": "transition_accuracy",
          "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.9898313492063492,
          "raw_text": "0.9898",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": 0.36830357142857145,
          "metric_key": "transition_accuracy",
          "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.36830357142857145,
          "raw_text": "0.3683",
          "status_label": "scored"
        },
        "cosmos3_nano_future_window": {
          "raw": 0.9682539682539683,
          "metric_key": "transition_accuracy",
          "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.9682539682539683,
          "raw_text": "0.9683",
          "status_label": "scored"
        }
      }
    },
    {
      "task_number": 4,
      "task_id": "next_action",
      "label": "Next-Action Prediction",
      "axis_label": "04 Next-Action Prediction",
      "short_label": "Next act",
      "origin": "original_public_sample_tasks",
      "metric_key": "macro_f1",
      "metric_name": "macro-F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": 0.006514774539765508,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/next_action/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.006514774539765508,
          "raw_text": "0.0065",
          "status_label": "scored"
        },
        "metadata128_neural_mlp": {
          "raw": 0.004910507980164745,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/next_action/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.004910507980164745,
          "raw_text": "0.0049",
          "status_label": "scored"
        },
        "raw128_simple": {
          "raw": 0.003285273363482094,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/next_action/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.003285273363482094,
          "raw_text": "0.0033",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.0018477984371755407,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/next_action/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0018477984371755407,
          "raw_text": "0.0018",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.04305335446381405,
          "metric_key": "next_action_accuracy",
          "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.04305335446381405,
          "raw_text": "0.0431",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": 0.013392857142857142,
          "metric_key": "next_action_accuracy",
          "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.013392857142857142,
          "raw_text": "0.0134",
          "status_label": "scored"
        },
        "cosmos3_nano_future_window": {
          "raw": 0.007936507936507936,
          "metric_key": "action_accuracy_from_retrieved_future",
          "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.007936507936507936,
          "raw_text": "0.0079",
          "status_label": "scored"
        }
      }
    },
    {
      "task_number": 5,
      "task_id": "hand_trajectory_forecast",
      "label": "Hand Trajectory Forecasting",
      "axis_label": "05 Hand Trajectory Forecasting",
      "short_label": "Hand traj",
      "origin": "original_public_sample_tasks",
      "metric_key": "mpjpe",
      "metric_name": "MPJPE",
      "metric_direction": "lower",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": null,
          "metric_key": "mpjpe",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/hand_trajectory_forecast/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "unsupported_without_required_target",
          "reason": "requires future hand-joint trajectories from raw sensor feature NPZ blocks, which are not in the public 128 package",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "unsupported"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "mpjpe",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 0.2729249894618988,
          "metric_key": "mae",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/hand_trajectory_forecast/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.39516420515180267,
          "raw_text": "0.2729",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.18475216627120972,
          "metric_key": "mae",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/hand_trajectory_forecast/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.5837560051580399,
          "raw_text": "0.1848",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": null,
          "metric_key": "mpjpe",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "mpjpe",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "mpjpe",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 6,
      "task_id": "contact_prediction",
      "label": "Contact State Prediction",
      "axis_label": "06 Contact State Prediction",
      "short_label": "Contact",
      "origin": "original_public_sample_tasks",
      "metric_key": "macro_f1",
      "metric_name": "macro-F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": 0.4381481308057444,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/contact_prediction/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.4381481308057444,
          "raw_text": "0.4381",
          "status_label": "scored"
        },
        "metadata128_neural_mlp": {
          "raw": 0.5682695682695682,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/contact_prediction/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.5682695682695682,
          "raw_text": "0.5683",
          "status_label": "scored"
        },
        "raw128_simple": {
          "raw": 0.886990707397193,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/contact_prediction/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.886990707397193,
          "raw_text": "0.8870",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 1.0,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/contact_prediction/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 1.0,
          "raw_text": "1.000",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.8177083333333334,
          "metric_key": "contact_accuracy",
          "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.8177083333333334,
          "raw_text": "0.8177",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": 0.32142857142857145,
          "metric_key": "contact_accuracy",
          "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.32142857142857145,
          "raw_text": "0.3214",
          "status_label": "scored"
        },
        "cosmos3_nano_future_window": {
          "raw": 0.7433862433862434,
          "metric_key": "contact_accuracy",
          "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.7433862433862434,
          "raw_text": "0.7434",
          "status_label": "scored"
        }
      }
    },
    {
      "task_number": 7,
      "task_id": "object_relevance",
      "label": "Object Relevance Prediction",
      "axis_label": "07 Object Relevance Prediction",
      "short_label": "Objects",
      "origin": "original_public_sample_tasks",
      "metric_key": "micro_f1",
      "metric_name": "micro-F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": 0.17764578833693304,
          "metric_key": "micro_f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/object_relevance/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.17764578833693304,
          "raw_text": "0.1776",
          "status_label": "scored"
        },
        "metadata128_neural_mlp": {
          "raw": 0.18662723837686876,
          "metric_key": "micro_f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/object_relevance/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.18662723837686876,
          "raw_text": "0.1866",
          "status_label": "scored"
        },
        "raw128_simple": {
          "raw": 0.0655376369662084,
          "metric_key": "micro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/object_relevance/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0655376369662084,
          "raw_text": "0.0655",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.1765890386972509,
          "metric_key": "micro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/object_relevance/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.1765890386972509,
          "raw_text": "0.1766",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.3064982378331287,
          "metric_key": "object_micro_f1",
          "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.3064982378331287,
          "raw_text": "0.3065",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": 0.13704276146316333,
          "metric_key": "object_micro_f1",
          "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.13704276146316333,
          "raw_text": "0.1370",
          "status_label": "scored"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "micro_f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 8,
      "task_id": "caption_grounding",
      "label": "Language Grounding",
      "axis_label": "08 Language Grounding",
      "short_label": "Language",
      "origin": "original_public_sample_tasks",
      "metric_key": "mrr",
      "metric_name": "MRR",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": 0.002332374220713973,
          "metric_key": "mrr",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/caption_grounding/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.002332374220713973,
          "raw_text": "0.0023",
          "status_label": "scored"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "mrr",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 0.011138836853206158,
          "metric_key": "mrr",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/caption_grounding/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.011138836853206158,
          "raw_text": "0.0111",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.0063402121886610985,
          "metric_key": "mrr",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/caption_grounding/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0063402121886610985,
          "raw_text": "0.0063",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.8764467592592605,
          "metric_key": "caption_grounding_mrr",
          "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_retrieval_task_probes_a100_20260617T175919Z/caption_grounding/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.8764467592592605,
          "raw_text": "0.8764",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "mrr",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "mrr",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 9,
      "task_id": "cross_modal_retrieval",
      "label": "Cross-Modal Retrieval",
      "axis_label": "09 Cross-Modal Retrieval",
      "short_label": "X-modal",
      "origin": "original_public_sample_tasks",
      "metric_key": "mrr",
      "metric_name": "MRR",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": null,
          "metric_key": "mrr",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/cross_modal_retrieval/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "unsupported_without_required_target",
          "reason": "requires paired motion/IMU/camera/audio/depth feature blocks, which are not in the public 128 package",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "unsupported"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "mrr",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 0.003459817497059703,
          "metric_key": "mrr",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/cross_modal_retrieval/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.003459817497059703,
          "raw_text": "0.0035",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.002535284962505102,
          "metric_key": "mrr",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/cross_modal_retrieval/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.002535284962505102,
          "raw_text": "0.0025",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": null,
          "metric_key": "mrr",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "mrr",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": 0.022138720585222767,
          "metric_key": "future_retrieval_mrr",
          "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.022138720585222767,
          "raw_text": "0.0221",
          "status_label": "scored"
        }
      }
    },
    {
      "task_number": 10,
      "task_id": "modality_reconstruction",
      "label": "Cross-Modal Reconstruction",
      "axis_label": "10 Cross-Modal Reconstruction",
      "short_label": "Recon",
      "origin": "original_public_sample_tasks",
      "metric_key": "r2",
      "metric_name": "R2",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": null,
          "metric_key": "r2",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/modality_reconstruction/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "unsupported_without_required_target",
          "reason": "requires source and target modality feature blocks such as depth/video vectors, which are not in the public 128 package",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "unsupported"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "r2",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": -1.3450960391924882,
          "metric_key": "r2",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/modality_reconstruction/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0,
          "raw_text": "-1.345",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": -1.3974418160502369,
          "metric_key": "r2",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/modality_reconstruction/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0,
          "raw_text": "-1.397",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": null,
          "metric_key": "r2",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "r2",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "r2",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 11,
      "task_id": "temporal_order",
      "label": "Temporal Order Verification",
      "axis_label": "11 Temporal Order Verification",
      "short_label": "Order",
      "origin": "original_public_sample_tasks",
      "metric_key": "f1",
      "metric_name": "F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": 0.4198864140782312,
          "metric_key": "f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/temporal_order/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.4198864140782312,
          "raw_text": "0.4199",
          "status_label": "scored"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "f1",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 0.49824413370686593,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/temporal_order/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.49824413370686593,
          "raw_text": "0.4982",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.8030047098504103,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/temporal_order/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.8030047098504103,
          "raw_text": "0.8030",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.40984631701404173,
          "metric_key": "temporal_order_f1",
          "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/temporal_order/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.40984631701404173,
          "raw_text": "0.4098",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 12,
      "task_id": "misalignment_detection",
      "label": "Multimodal Synchronization Detection",
      "axis_label": "12 Multimodal Synchronization Detection",
      "short_label": "Sync",
      "origin": "original_public_sample_tasks",
      "metric_key": "f1",
      "metric_name": "F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": null,
          "metric_key": "f1",
          "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/misalignment_detection/metrics.json",
          "scope": "multi_episode_128_metadata_baseline",
          "status": "unsupported_without_required_target",
          "reason": "requires deliberately shifted cross-modal feature pairs, which cannot be reconstructed from the public JSONL labels alone",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "unsupported"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "f1",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 0.4958867673901769,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/misalignment_detection/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.4958867673901769,
          "raw_text": "0.4959",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.8272709077974252,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/misalignment_detection/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.8272709077974252,
          "raw_text": "0.8273",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.3344936184319576,
          "metric_key": "misalignment_detection_f1",
          "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/misalignment_detection/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.3344936184319576,
          "raw_text": "0.3345",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 13,
      "task_id": "long_horizon_next_action",
      "label": "Long-Horizon Next-Action Forecasting",
      "axis_label": "13 Long-Horizon Next-Action Forecasting",
      "short_label": "Long act",
      "origin": "additional_public_sample_tasks",
      "metric_key": "macro_f1",
      "metric_name": "macro-F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 0.0024280172369056294,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/long_horizon_next_action/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0024280172369056294,
          "raw_text": "0.0024",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.001063859887389299,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/long_horizon_next_action/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.001063859887389299,
          "raw_text": "0.0011",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.0023356666867101906,
          "metric_key": "long_horizon_next_action_macro_f1",
          "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/long_horizon_next_action/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0023356666867101906,
          "raw_text": "0.0023",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 14,
      "task_id": "next_subtask_forecast",
      "label": "Long-Horizon Next-Subtask Forecasting",
      "axis_label": "14 Long-Horizon Next-Subtask Forecasting",
      "short_label": "Long step",
      "origin": "additional_public_sample_tasks",
      "metric_key": "macro_f1",
      "metric_name": "macro-F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 0.0,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/next_subtask_forecast/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0,
          "raw_text": "0.0000",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.0,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/next_subtask_forecast/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0,
          "raw_text": "0.0000",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.004206715978529301,
          "metric_key": "next_subtask_forecast_macro_f1",
          "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/next_subtask_forecast/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.004206715978529301,
          "raw_text": "0.0042",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 15,
      "task_id": "interaction_text_prediction",
      "label": "Interaction Text Prediction",
      "axis_label": "15 Interaction Text Prediction",
      "short_label": "Interact txt",
      "origin": "additional_public_sample_tasks",
      "metric_key": "macro_f1",
      "metric_name": "macro-F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": true,
      "values": {
        "metadata128_simple": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 0.012611998261547169,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/interaction_text_prediction/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "proxy_scored",
          "reason": "documented compact proxy completion for this raw128 task axis",
          "normalized_score": 0.012611998261547169,
          "raw_text": "0.0126",
          "status_label": "proxy scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.009791421280985521,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/interaction_text_prediction/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "proxy_scored",
          "reason": "documented compact proxy completion for this raw128 task axis",
          "normalized_score": 0.009791421280985521,
          "raw_text": "0.0098",
          "status_label": "proxy scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 16,
      "task_id": "action_object_relation",
      "label": "Action-Object Relation Prediction",
      "axis_label": "16 Action-Object Relation Prediction",
      "short_label": "Act+obj",
      "origin": "additional_public_sample_tasks",
      "metric_key": "macro_f1",
      "metric_name": "macro-F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 0.0,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/action_object_relation/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0,
          "raw_text": "0.0000",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.0,
          "metric_key": "macro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/action_object_relation/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0,
          "raw_text": "0.0000",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.0002220083079671497,
          "metric_key": "action_object_relation_macro_f1",
          "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0002220083079671497,
          "raw_text": "0.0002",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": 0.0,
          "metric_key": "action_object_relation_macro_f1",
          "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.0,
          "raw_text": "0.0000",
          "status_label": "scored"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "macro_f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 17,
      "task_id": "object_set_forecast",
      "label": "Future Object-Set Forecasting",
      "axis_label": "17 Future Object-Set Forecasting",
      "short_label": "Future obj",
      "origin": "additional_public_sample_tasks",
      "metric_key": "micro_f1",
      "metric_name": "micro-F1",
      "metric_direction": "higher",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": null,
          "metric_key": "micro_f1",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "micro_f1",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 0.06469493412657774,
          "metric_key": "micro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/object_set_forecast/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.06469493412657774,
          "raw_text": "0.0647",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.17523098630012288,
          "metric_key": "micro_f1",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/object_set_forecast/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.17523098630012288,
          "raw_text": "0.1752",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 0.1659483964851402,
          "metric_key": "object_set_forecast_micro_f1",
          "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/object_set_forecast/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.1659483964851402,
          "raw_text": "0.1659",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "micro_f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "micro_f1",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 18,
      "task_id": "imu_to_hand_pose",
      "label": "IMU-to-Hand Pose Reconstruction",
      "axis_label": "18 IMU-to-Hand Pose Reconstruction",
      "short_label": "IMU->hand",
      "origin": "additional_public_sample_tasks",
      "metric_key": "mae",
      "metric_name": "MAE",
      "metric_direction": "lower",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": null,
          "metric_key": "mae",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "mae",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 0.22941437363624573,
          "metric_key": "mae",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/imu_to_hand_pose/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.1832902066792771,
          "raw_text": "0.2294",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.252998411655426,
          "metric_key": "mae",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/imu_to_hand_pose/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.1662042369509182,
          "raw_text": "0.2530",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": null,
          "metric_key": "mae",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "mae",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "mae",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 19,
      "task_id": "camera_view_sync_retrieval",
      "label": "Camera-View Synchronization Retrieval",
      "axis_label": "19 Camera-View Synchronization Retrieval",
      "short_label": "Cam sync",
      "origin": "additional_public_sample_tasks",
      "metric_key": "mrr",
      "metric_name": "MRR",
      "metric_direction": "higher",
      "raw128_proxy_axis": true,
      "values": {
        "metadata128_simple": {
          "raw": null,
          "metric_key": "mrr",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "mrr",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 0.0026625150348991156,
          "metric_key": "mrr",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/camera_view_sync_retrieval/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "proxy_scored",
          "reason": "documented compact proxy completion for this raw128 task axis",
          "normalized_score": 0.0026625150348991156,
          "raw_text": "0.0027",
          "status_label": "proxy scored"
        },
        "raw128_neural_mlp": {
          "raw": 0.0025448438245803118,
          "metric_key": "mrr",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/camera_view_sync_retrieval/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "proxy_scored",
          "reason": "documented compact proxy completion for this raw128 task axis",
          "normalized_score": 0.0025448438245803118,
          "raw_text": "0.0025",
          "status_label": "proxy scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": null,
          "metric_key": "mrr",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "mrr",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "mrr",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    },
    {
      "task_number": 20,
      "task_id": "time_to_transition",
      "label": "Time-to-Next-Transition Regression",
      "axis_label": "20 Time-to-Next-Transition Regression",
      "short_label": "Time2bdry",
      "origin": "additional_public_sample_tasks",
      "metric_key": "mae",
      "metric_name": "MAE frames",
      "metric_direction": "lower",
      "raw128_proxy_axis": false,
      "values": {
        "metadata128_simple": {
          "raw": null,
          "metric_key": "mae",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "metadata128_neural_mlp": {
          "raw": null,
          "metric_key": "mae",
          "source": null,
          "scope": "multi_episode_128_metadata_baseline",
          "status": "not_supported_by_metadata_only_package",
          "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not supported"
        },
        "raw128_simple": {
          "raw": 52.32759475708008,
          "metric_key": "mae",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/time_to_transition/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.20137284019197565,
          "raw_text": "52.33",
          "status_label": "scored"
        },
        "raw128_neural_mlp": {
          "raw": 42.374061584472656,
          "metric_key": "mae",
          "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/time_to_transition/metrics.json",
          "scope": "multi_episode_128_raw_sensor_feature_baseline",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.24867468405504953,
          "raw_text": "42.37",
          "status_label": "scored"
        },
        "qwen3_omni_v6_lora": {
          "raw": 134.0687422166874,
          "metric_key": "time_to_transition_mae",
          "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/time_to_transition/metrics.json",
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "scored",
          "reason": null,
          "normalized_score": 0.07859666766782253,
          "raw_text": "134.07",
          "status_label": "scored"
        },
        "cosmos3_super_reasoner": {
          "raw": null,
          "metric_key": "mae",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        },
        "cosmos3_nano_future_window": {
          "raw": null,
          "metric_key": "mae",
          "source": null,
          "scope": "multi_episode_128_partial_model_overlay",
          "status": "not_evaluated_in_verified_package",
          "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
          "normalized_score": null,
          "raw_text": "n/a",
          "status_label": "not evaluated"
        }
      }
    }
  ],
  "task_method_result_matrix": [
    {
      "task_number": 1,
      "task_id": "timeline_action",
      "task_label": "Action Recognition",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.008252821966746326,
      "raw_text": "0.0083",
      "normalized_score": 0.008252821966746326,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_action/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 1,
      "task_id": "timeline_action",
      "task_label": "Action Recognition",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.004175793689174209,
      "raw_text": "0.0042",
      "normalized_score": 0.004175793689174209,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_action/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 1,
      "task_id": "timeline_action",
      "task_label": "Action Recognition",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.002915061325704321,
      "raw_text": "0.0029",
      "normalized_score": 0.002915061325704321,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/timeline_action/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 1,
      "task_id": "timeline_action",
      "task_label": "Action Recognition",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0014955083181204041,
      "raw_text": "0.0015",
      "normalized_score": 0.0014955083181204041,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/timeline_action/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 1,
      "task_id": "timeline_action",
      "task_label": "Action Recognition",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0028830723979596335,
      "raw_text": "0.0029",
      "normalized_score": 0.0028830723979596335,
      "metric_key": "action_macro_f1",
      "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 1,
      "task_id": "timeline_action",
      "task_label": "Action Recognition",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0008284021201089245,
      "raw_text": "0.0008",
      "normalized_score": 0.0008284021201089245,
      "metric_key": "action_macro_f1",
      "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 1,
      "task_id": "timeline_action",
      "task_label": "Action Recognition",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.007936507936507936,
      "raw_text": "0.0079",
      "normalized_score": 0.007936507936507936,
      "metric_key": "action_accuracy_from_retrieved_future",
      "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 2,
      "task_id": "timeline_subtask",
      "task_label": "Procedure Step Recognition",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.00019512195121951218,
      "raw_text": "0.0002",
      "normalized_score": 0.00019512195121951218,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_subtask/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 2,
      "task_id": "timeline_subtask",
      "task_label": "Procedure Step Recognition",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 7.207207207207208e-05,
      "raw_text": "0.0001",
      "normalized_score": 7.207207207207208e-05,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_subtask/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 2,
      "task_id": "timeline_subtask",
      "task_label": "Procedure Step Recognition",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0,
      "raw_text": "0.0000",
      "normalized_score": 0.0,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/timeline_subtask/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 2,
      "task_id": "timeline_subtask",
      "task_label": "Procedure Step Recognition",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 7.35632183908046e-05,
      "raw_text": "0.0001",
      "normalized_score": 7.35632183908046e-05,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/timeline_subtask/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 2,
      "task_id": "timeline_subtask",
      "task_label": "Procedure Step Recognition",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0037313432835820895,
      "raw_text": "0.0037",
      "normalized_score": 0.0037313432835820895,
      "metric_key": "subtask_accuracy",
      "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 2,
      "task_id": "timeline_subtask",
      "task_label": "Procedure Step Recognition",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0,
      "raw_text": "0.0000",
      "normalized_score": 0.0,
      "metric_key": "subtask_accuracy",
      "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 2,
      "task_id": "timeline_subtask",
      "task_label": "Procedure Step Recognition",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 3,
      "task_id": "transition_detection",
      "task_label": "Action Boundary Detection",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.29652162550029315,
      "raw_text": "0.2965",
      "normalized_score": 0.29652162550029315,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/transition_detection/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 3,
      "task_id": "transition_detection",
      "task_label": "Action Boundary Detection",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.4841733292368365,
      "raw_text": "0.4842",
      "normalized_score": 0.4841733292368365,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/transition_detection/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 3,
      "task_id": "transition_detection",
      "task_label": "Action Boundary Detection",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.4203613574238283,
      "raw_text": "0.4204",
      "normalized_score": 0.4203613574238283,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/transition_detection/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 3,
      "task_id": "transition_detection",
      "task_label": "Action Boundary Detection",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.4902206914147213,
      "raw_text": "0.4902",
      "normalized_score": 0.4902206914147213,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/transition_detection/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 3,
      "task_id": "transition_detection",
      "task_label": "Action Boundary Detection",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.9898313492063492,
      "raw_text": "0.9898",
      "normalized_score": 0.9898313492063492,
      "metric_key": "transition_accuracy",
      "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 3,
      "task_id": "transition_detection",
      "task_label": "Action Boundary Detection",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.36830357142857145,
      "raw_text": "0.3683",
      "normalized_score": 0.36830357142857145,
      "metric_key": "transition_accuracy",
      "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 3,
      "task_id": "transition_detection",
      "task_label": "Action Boundary Detection",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.9682539682539683,
      "raw_text": "0.9683",
      "normalized_score": 0.9682539682539683,
      "metric_key": "transition_accuracy",
      "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 4,
      "task_id": "next_action",
      "task_label": "Next-Action Prediction",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.006514774539765508,
      "raw_text": "0.0065",
      "normalized_score": 0.006514774539765508,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/next_action/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 4,
      "task_id": "next_action",
      "task_label": "Next-Action Prediction",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.004910507980164745,
      "raw_text": "0.0049",
      "normalized_score": 0.004910507980164745,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/next_action/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 4,
      "task_id": "next_action",
      "task_label": "Next-Action Prediction",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.003285273363482094,
      "raw_text": "0.0033",
      "normalized_score": 0.003285273363482094,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/next_action/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 4,
      "task_id": "next_action",
      "task_label": "Next-Action Prediction",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0018477984371755407,
      "raw_text": "0.0018",
      "normalized_score": 0.0018477984371755407,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/next_action/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 4,
      "task_id": "next_action",
      "task_label": "Next-Action Prediction",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.04305335446381405,
      "raw_text": "0.0431",
      "normalized_score": 0.04305335446381405,
      "metric_key": "next_action_accuracy",
      "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 4,
      "task_id": "next_action",
      "task_label": "Next-Action Prediction",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.013392857142857142,
      "raw_text": "0.0134",
      "normalized_score": 0.013392857142857142,
      "metric_key": "next_action_accuracy",
      "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 4,
      "task_id": "next_action",
      "task_label": "Next-Action Prediction",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.007936507936507936,
      "raw_text": "0.0079",
      "normalized_score": 0.007936507936507936,
      "metric_key": "action_accuracy_from_retrieved_future",
      "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 5,
      "task_id": "hand_trajectory_forecast",
      "task_label": "Hand Trajectory Forecasting",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "unsupported_without_required_target",
      "status_label": "unsupported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mpjpe",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/hand_trajectory_forecast/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "requires future hand-joint trajectories from raw sensor feature NPZ blocks, which are not in the public 128 package"
    },
    {
      "task_number": 5,
      "task_id": "hand_trajectory_forecast",
      "task_label": "Hand Trajectory Forecasting",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mpjpe",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 5,
      "task_id": "hand_trajectory_forecast",
      "task_label": "Hand Trajectory Forecasting",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.2729249894618988,
      "raw_text": "0.2729",
      "normalized_score": 0.39516420515180267,
      "metric_key": "mae",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/hand_trajectory_forecast/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 5,
      "task_id": "hand_trajectory_forecast",
      "task_label": "Hand Trajectory Forecasting",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.18475216627120972,
      "raw_text": "0.1848",
      "normalized_score": 0.5837560051580399,
      "metric_key": "mae",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/hand_trajectory_forecast/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 5,
      "task_id": "hand_trajectory_forecast",
      "task_label": "Hand Trajectory Forecasting",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mpjpe",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 5,
      "task_id": "hand_trajectory_forecast",
      "task_label": "Hand Trajectory Forecasting",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mpjpe",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 5,
      "task_id": "hand_trajectory_forecast",
      "task_label": "Hand Trajectory Forecasting",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mpjpe",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 6,
      "task_id": "contact_prediction",
      "task_label": "Contact State Prediction",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.4381481308057444,
      "raw_text": "0.4381",
      "normalized_score": 0.4381481308057444,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/contact_prediction/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 6,
      "task_id": "contact_prediction",
      "task_label": "Contact State Prediction",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.5682695682695682,
      "raw_text": "0.5683",
      "normalized_score": 0.5682695682695682,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/contact_prediction/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 6,
      "task_id": "contact_prediction",
      "task_label": "Contact State Prediction",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.886990707397193,
      "raw_text": "0.8870",
      "normalized_score": 0.886990707397193,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/contact_prediction/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 6,
      "task_id": "contact_prediction",
      "task_label": "Contact State Prediction",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 1.0,
      "raw_text": "1.000",
      "normalized_score": 1.0,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/contact_prediction/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 6,
      "task_id": "contact_prediction",
      "task_label": "Contact State Prediction",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.8177083333333334,
      "raw_text": "0.8177",
      "normalized_score": 0.8177083333333334,
      "metric_key": "contact_accuracy",
      "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 6,
      "task_id": "contact_prediction",
      "task_label": "Contact State Prediction",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.32142857142857145,
      "raw_text": "0.3214",
      "normalized_score": 0.32142857142857145,
      "metric_key": "contact_accuracy",
      "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 6,
      "task_id": "contact_prediction",
      "task_label": "Contact State Prediction",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.7433862433862434,
      "raw_text": "0.7434",
      "normalized_score": 0.7433862433862434,
      "metric_key": "contact_accuracy",
      "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 7,
      "task_id": "object_relevance",
      "task_label": "Object Relevance Prediction",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.17764578833693304,
      "raw_text": "0.1776",
      "normalized_score": 0.17764578833693304,
      "metric_key": "micro_f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/object_relevance/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 7,
      "task_id": "object_relevance",
      "task_label": "Object Relevance Prediction",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.18662723837686876,
      "raw_text": "0.1866",
      "normalized_score": 0.18662723837686876,
      "metric_key": "micro_f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/object_relevance/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 7,
      "task_id": "object_relevance",
      "task_label": "Object Relevance Prediction",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0655376369662084,
      "raw_text": "0.0655",
      "normalized_score": 0.0655376369662084,
      "metric_key": "micro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/object_relevance/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 7,
      "task_id": "object_relevance",
      "task_label": "Object Relevance Prediction",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.1765890386972509,
      "raw_text": "0.1766",
      "normalized_score": 0.1765890386972509,
      "metric_key": "micro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/object_relevance/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 7,
      "task_id": "object_relevance",
      "task_label": "Object Relevance Prediction",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.3064982378331287,
      "raw_text": "0.3065",
      "normalized_score": 0.3064982378331287,
      "metric_key": "object_micro_f1",
      "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 7,
      "task_id": "object_relevance",
      "task_label": "Object Relevance Prediction",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.13704276146316333,
      "raw_text": "0.1370",
      "normalized_score": 0.13704276146316333,
      "metric_key": "object_micro_f1",
      "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 7,
      "task_id": "object_relevance",
      "task_label": "Object Relevance Prediction",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "micro_f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 8,
      "task_id": "caption_grounding",
      "task_label": "Language Grounding",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.002332374220713973,
      "raw_text": "0.0023",
      "normalized_score": 0.002332374220713973,
      "metric_key": "mrr",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/caption_grounding/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 8,
      "task_id": "caption_grounding",
      "task_label": "Language Grounding",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mrr",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 8,
      "task_id": "caption_grounding",
      "task_label": "Language Grounding",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.011138836853206158,
      "raw_text": "0.0111",
      "normalized_score": 0.011138836853206158,
      "metric_key": "mrr",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/caption_grounding/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 8,
      "task_id": "caption_grounding",
      "task_label": "Language Grounding",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0063402121886610985,
      "raw_text": "0.0063",
      "normalized_score": 0.0063402121886610985,
      "metric_key": "mrr",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/caption_grounding/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 8,
      "task_id": "caption_grounding",
      "task_label": "Language Grounding",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.8764467592592605,
      "raw_text": "0.8764",
      "normalized_score": 0.8764467592592605,
      "metric_key": "caption_grounding_mrr",
      "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_retrieval_task_probes_a100_20260617T175919Z/caption_grounding/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 8,
      "task_id": "caption_grounding",
      "task_label": "Language Grounding",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mrr",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 8,
      "task_id": "caption_grounding",
      "task_label": "Language Grounding",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mrr",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 9,
      "task_id": "cross_modal_retrieval",
      "task_label": "Cross-Modal Retrieval",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "unsupported_without_required_target",
      "status_label": "unsupported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mrr",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/cross_modal_retrieval/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "requires paired motion/IMU/camera/audio/depth feature blocks, which are not in the public 128 package"
    },
    {
      "task_number": 9,
      "task_id": "cross_modal_retrieval",
      "task_label": "Cross-Modal Retrieval",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mrr",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 9,
      "task_id": "cross_modal_retrieval",
      "task_label": "Cross-Modal Retrieval",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.003459817497059703,
      "raw_text": "0.0035",
      "normalized_score": 0.003459817497059703,
      "metric_key": "mrr",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/cross_modal_retrieval/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 9,
      "task_id": "cross_modal_retrieval",
      "task_label": "Cross-Modal Retrieval",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.002535284962505102,
      "raw_text": "0.0025",
      "normalized_score": 0.002535284962505102,
      "metric_key": "mrr",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/cross_modal_retrieval/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 9,
      "task_id": "cross_modal_retrieval",
      "task_label": "Cross-Modal Retrieval",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mrr",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 9,
      "task_id": "cross_modal_retrieval",
      "task_label": "Cross-Modal Retrieval",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mrr",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 9,
      "task_id": "cross_modal_retrieval",
      "task_label": "Cross-Modal Retrieval",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.022138720585222767,
      "raw_text": "0.0221",
      "normalized_score": 0.022138720585222767,
      "metric_key": "future_retrieval_mrr",
      "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 10,
      "task_id": "modality_reconstruction",
      "task_label": "Cross-Modal Reconstruction",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "unsupported_without_required_target",
      "status_label": "unsupported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "r2",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/modality_reconstruction/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "requires source and target modality feature blocks such as depth/video vectors, which are not in the public 128 package"
    },
    {
      "task_number": 10,
      "task_id": "modality_reconstruction",
      "task_label": "Cross-Modal Reconstruction",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "r2",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 10,
      "task_id": "modality_reconstruction",
      "task_label": "Cross-Modal Reconstruction",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": -1.3450960391924882,
      "raw_text": "-1.345",
      "normalized_score": 0.0,
      "metric_key": "r2",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/modality_reconstruction/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 10,
      "task_id": "modality_reconstruction",
      "task_label": "Cross-Modal Reconstruction",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": -1.3974418160502369,
      "raw_text": "-1.397",
      "normalized_score": 0.0,
      "metric_key": "r2",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/modality_reconstruction/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 10,
      "task_id": "modality_reconstruction",
      "task_label": "Cross-Modal Reconstruction",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "r2",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 10,
      "task_id": "modality_reconstruction",
      "task_label": "Cross-Modal Reconstruction",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "r2",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 10,
      "task_id": "modality_reconstruction",
      "task_label": "Cross-Modal Reconstruction",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "r2",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 11,
      "task_id": "temporal_order",
      "task_label": "Temporal Order Verification",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.4198864140782312,
      "raw_text": "0.4199",
      "normalized_score": 0.4198864140782312,
      "metric_key": "f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/temporal_order/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": null
    },
    {
      "task_number": 11,
      "task_id": "temporal_order",
      "task_label": "Temporal Order Verification",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "f1",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 11,
      "task_id": "temporal_order",
      "task_label": "Temporal Order Verification",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.49824413370686593,
      "raw_text": "0.4982",
      "normalized_score": 0.49824413370686593,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/temporal_order/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 11,
      "task_id": "temporal_order",
      "task_label": "Temporal Order Verification",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.8030047098504103,
      "raw_text": "0.8030",
      "normalized_score": 0.8030047098504103,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/temporal_order/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 11,
      "task_id": "temporal_order",
      "task_label": "Temporal Order Verification",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.40984631701404173,
      "raw_text": "0.4098",
      "normalized_score": 0.40984631701404173,
      "metric_key": "temporal_order_f1",
      "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/temporal_order/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 11,
      "task_id": "temporal_order",
      "task_label": "Temporal Order Verification",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 11,
      "task_id": "temporal_order",
      "task_label": "Temporal Order Verification",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 12,
      "task_id": "misalignment_detection",
      "task_label": "Multimodal Synchronization Detection",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "unsupported_without_required_target",
      "status_label": "unsupported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "f1",
      "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/misalignment_detection/metrics.json",
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "requires deliberately shifted cross-modal feature pairs, which cannot be reconstructed from the public JSONL labels alone"
    },
    {
      "task_number": 12,
      "task_id": "misalignment_detection",
      "task_label": "Multimodal Synchronization Detection",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "f1",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 12,
      "task_id": "misalignment_detection",
      "task_label": "Multimodal Synchronization Detection",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.4958867673901769,
      "raw_text": "0.4959",
      "normalized_score": 0.4958867673901769,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/misalignment_detection/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 12,
      "task_id": "misalignment_detection",
      "task_label": "Multimodal Synchronization Detection",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.8272709077974252,
      "raw_text": "0.8273",
      "normalized_score": 0.8272709077974252,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/misalignment_detection/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 12,
      "task_id": "misalignment_detection",
      "task_label": "Multimodal Synchronization Detection",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.3344936184319576,
      "raw_text": "0.3345",
      "normalized_score": 0.3344936184319576,
      "metric_key": "misalignment_detection_f1",
      "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/misalignment_detection/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 12,
      "task_id": "misalignment_detection",
      "task_label": "Multimodal Synchronization Detection",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 12,
      "task_id": "misalignment_detection",
      "task_label": "Multimodal Synchronization Detection",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 13,
      "task_id": "long_horizon_next_action",
      "task_label": "Long-Horizon Next-Action Forecasting",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 13,
      "task_id": "long_horizon_next_action",
      "task_label": "Long-Horizon Next-Action Forecasting",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 13,
      "task_id": "long_horizon_next_action",
      "task_label": "Long-Horizon Next-Action Forecasting",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0024280172369056294,
      "raw_text": "0.0024",
      "normalized_score": 0.0024280172369056294,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/long_horizon_next_action/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 13,
      "task_id": "long_horizon_next_action",
      "task_label": "Long-Horizon Next-Action Forecasting",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.001063859887389299,
      "raw_text": "0.0011",
      "normalized_score": 0.001063859887389299,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/long_horizon_next_action/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 13,
      "task_id": "long_horizon_next_action",
      "task_label": "Long-Horizon Next-Action Forecasting",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0023356666867101906,
      "raw_text": "0.0023",
      "normalized_score": 0.0023356666867101906,
      "metric_key": "long_horizon_next_action_macro_f1",
      "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/long_horizon_next_action/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 13,
      "task_id": "long_horizon_next_action",
      "task_label": "Long-Horizon Next-Action Forecasting",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 13,
      "task_id": "long_horizon_next_action",
      "task_label": "Long-Horizon Next-Action Forecasting",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 14,
      "task_id": "next_subtask_forecast",
      "task_label": "Long-Horizon Next-Subtask Forecasting",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 14,
      "task_id": "next_subtask_forecast",
      "task_label": "Long-Horizon Next-Subtask Forecasting",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 14,
      "task_id": "next_subtask_forecast",
      "task_label": "Long-Horizon Next-Subtask Forecasting",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0,
      "raw_text": "0.0000",
      "normalized_score": 0.0,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/next_subtask_forecast/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 14,
      "task_id": "next_subtask_forecast",
      "task_label": "Long-Horizon Next-Subtask Forecasting",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0,
      "raw_text": "0.0000",
      "normalized_score": 0.0,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/next_subtask_forecast/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 14,
      "task_id": "next_subtask_forecast",
      "task_label": "Long-Horizon Next-Subtask Forecasting",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.004206715978529301,
      "raw_text": "0.0042",
      "normalized_score": 0.004206715978529301,
      "metric_key": "next_subtask_forecast_macro_f1",
      "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/next_subtask_forecast/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 14,
      "task_id": "next_subtask_forecast",
      "task_label": "Long-Horizon Next-Subtask Forecasting",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 14,
      "task_id": "next_subtask_forecast",
      "task_label": "Long-Horizon Next-Subtask Forecasting",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 15,
      "task_id": "interaction_text_prediction",
      "task_label": "Interaction Text Prediction",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 15,
      "task_id": "interaction_text_prediction",
      "task_label": "Interaction Text Prediction",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 15,
      "task_id": "interaction_text_prediction",
      "task_label": "Interaction Text Prediction",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "proxy_scored",
      "status_label": "proxy scored",
      "scored": true,
      "proxy_scored": true,
      "raw": 0.012611998261547169,
      "raw_text": "0.0126",
      "normalized_score": 0.012611998261547169,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/interaction_text_prediction/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": "documented compact proxy completion for this raw128 task axis"
    },
    {
      "task_number": 15,
      "task_id": "interaction_text_prediction",
      "task_label": "Interaction Text Prediction",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "proxy_scored",
      "status_label": "proxy scored",
      "scored": true,
      "proxy_scored": true,
      "raw": 0.009791421280985521,
      "raw_text": "0.0098",
      "normalized_score": 0.009791421280985521,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/interaction_text_prediction/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": "documented compact proxy completion for this raw128 task axis"
    },
    {
      "task_number": 15,
      "task_id": "interaction_text_prediction",
      "task_label": "Interaction Text Prediction",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 15,
      "task_id": "interaction_text_prediction",
      "task_label": "Interaction Text Prediction",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 15,
      "task_id": "interaction_text_prediction",
      "task_label": "Interaction Text Prediction",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 16,
      "task_id": "action_object_relation",
      "task_label": "Action-Object Relation Prediction",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 16,
      "task_id": "action_object_relation",
      "task_label": "Action-Object Relation Prediction",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 16,
      "task_id": "action_object_relation",
      "task_label": "Action-Object Relation Prediction",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0,
      "raw_text": "0.0000",
      "normalized_score": 0.0,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/action_object_relation/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 16,
      "task_id": "action_object_relation",
      "task_label": "Action-Object Relation Prediction",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0,
      "raw_text": "0.0000",
      "normalized_score": 0.0,
      "metric_key": "macro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/action_object_relation/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 16,
      "task_id": "action_object_relation",
      "task_label": "Action-Object Relation Prediction",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0002220083079671497,
      "raw_text": "0.0002",
      "normalized_score": 0.0002220083079671497,
      "metric_key": "action_object_relation_macro_f1",
      "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 16,
      "task_id": "action_object_relation",
      "task_label": "Action-Object Relation Prediction",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.0,
      "raw_text": "0.0000",
      "normalized_score": 0.0,
      "metric_key": "action_object_relation_macro_f1",
      "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 16,
      "task_id": "action_object_relation",
      "task_label": "Action-Object Relation Prediction",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "macro_f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 17,
      "task_id": "object_set_forecast",
      "task_label": "Future Object-Set Forecasting",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "micro_f1",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 17,
      "task_id": "object_set_forecast",
      "task_label": "Future Object-Set Forecasting",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "micro_f1",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 17,
      "task_id": "object_set_forecast",
      "task_label": "Future Object-Set Forecasting",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.06469493412657774,
      "raw_text": "0.0647",
      "normalized_score": 0.06469493412657774,
      "metric_key": "micro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/object_set_forecast/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 17,
      "task_id": "object_set_forecast",
      "task_label": "Future Object-Set Forecasting",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.17523098630012288,
      "raw_text": "0.1752",
      "normalized_score": 0.17523098630012288,
      "metric_key": "micro_f1",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/object_set_forecast/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 17,
      "task_id": "object_set_forecast",
      "task_label": "Future Object-Set Forecasting",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.1659483964851402,
      "raw_text": "0.1659",
      "normalized_score": 0.1659483964851402,
      "metric_key": "object_set_forecast_micro_f1",
      "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/object_set_forecast/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 17,
      "task_id": "object_set_forecast",
      "task_label": "Future Object-Set Forecasting",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "micro_f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 17,
      "task_id": "object_set_forecast",
      "task_label": "Future Object-Set Forecasting",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "micro_f1",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 18,
      "task_id": "imu_to_hand_pose",
      "task_label": "IMU-to-Hand Pose Reconstruction",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mae",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 18,
      "task_id": "imu_to_hand_pose",
      "task_label": "IMU-to-Hand Pose Reconstruction",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mae",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 18,
      "task_id": "imu_to_hand_pose",
      "task_label": "IMU-to-Hand Pose Reconstruction",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.22941437363624573,
      "raw_text": "0.2294",
      "normalized_score": 0.1832902066792771,
      "metric_key": "mae",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/imu_to_hand_pose/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 18,
      "task_id": "imu_to_hand_pose",
      "task_label": "IMU-to-Hand Pose Reconstruction",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 0.252998411655426,
      "raw_text": "0.2530",
      "normalized_score": 0.1662042369509182,
      "metric_key": "mae",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/imu_to_hand_pose/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 18,
      "task_id": "imu_to_hand_pose",
      "task_label": "IMU-to-Hand Pose Reconstruction",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mae",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 18,
      "task_id": "imu_to_hand_pose",
      "task_label": "IMU-to-Hand Pose Reconstruction",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mae",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 18,
      "task_id": "imu_to_hand_pose",
      "task_label": "IMU-to-Hand Pose Reconstruction",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mae",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 19,
      "task_id": "camera_view_sync_retrieval",
      "task_label": "Camera-View Synchronization Retrieval",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mrr",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 19,
      "task_id": "camera_view_sync_retrieval",
      "task_label": "Camera-View Synchronization Retrieval",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mrr",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 19,
      "task_id": "camera_view_sync_retrieval",
      "task_label": "Camera-View Synchronization Retrieval",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "proxy_scored",
      "status_label": "proxy scored",
      "scored": true,
      "proxy_scored": true,
      "raw": 0.0026625150348991156,
      "raw_text": "0.0027",
      "normalized_score": 0.0026625150348991156,
      "metric_key": "mrr",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/camera_view_sync_retrieval/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": "documented compact proxy completion for this raw128 task axis"
    },
    {
      "task_number": 19,
      "task_id": "camera_view_sync_retrieval",
      "task_label": "Camera-View Synchronization Retrieval",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "proxy_scored",
      "status_label": "proxy scored",
      "scored": true,
      "proxy_scored": true,
      "raw": 0.0025448438245803118,
      "raw_text": "0.0025",
      "normalized_score": 0.0025448438245803118,
      "metric_key": "mrr",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/camera_view_sync_retrieval/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": "documented compact proxy completion for this raw128 task axis"
    },
    {
      "task_number": 19,
      "task_id": "camera_view_sync_retrieval",
      "task_label": "Camera-View Synchronization Retrieval",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mrr",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 19,
      "task_id": "camera_view_sync_retrieval",
      "task_label": "Camera-View Synchronization Retrieval",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mrr",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 19,
      "task_id": "camera_view_sync_retrieval",
      "task_label": "Camera-View Synchronization Retrieval",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mrr",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 20,
      "task_id": "time_to_transition",
      "task_label": "Time-to-Next-Transition Regression",
      "series_id": "metadata128_simple",
      "method": "128ep Metadata Simple",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mae",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 20,
      "task_id": "time_to_transition",
      "task_label": "Time-to-Next-Transition Regression",
      "series_id": "metadata128_neural_mlp",
      "method": "128ep Metadata NN",
      "status": "not_supported_by_metadata_only_package",
      "status_label": "not supported",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mae",
      "source": null,
      "scope": "multi_episode_128_metadata_baseline",
      "reason": "the 128-episode metadata/text rerun did not produce this task target; raw sensor blocks or a task-specific metadata target builder are required"
    },
    {
      "task_number": 20,
      "task_id": "time_to_transition",
      "task_label": "Time-to-Next-Transition Regression",
      "series_id": "raw128_simple",
      "method": "128ep Raw Simple",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 52.32759475708008,
      "raw_text": "52.33",
      "normalized_score": 0.20137284019197565,
      "metric_key": "mae",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/time_to_transition/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 20,
      "task_id": "time_to_transition",
      "task_label": "Time-to-Next-Transition Regression",
      "series_id": "raw128_neural_mlp",
      "method": "128ep Raw NN",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 42.374061584472656,
      "raw_text": "42.37",
      "normalized_score": 0.24867468405504953,
      "metric_key": "mae",
      "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/time_to_transition/metrics.json",
      "scope": "multi_episode_128_raw_sensor_feature_baseline",
      "reason": null
    },
    {
      "task_number": 20,
      "task_id": "time_to_transition",
      "task_label": "Time-to-Next-Transition Regression",
      "series_id": "qwen3_omni_v6_lora",
      "method": "Qwen3-Omni v6 LoRA",
      "status": "scored",
      "status_label": "scored",
      "scored": true,
      "proxy_scored": false,
      "raw": 134.0687422166874,
      "raw_text": "134.07",
      "normalized_score": 0.07859666766782253,
      "metric_key": "time_to_transition_mae",
      "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/time_to_transition/metrics.json",
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": null
    },
    {
      "task_number": 20,
      "task_id": "time_to_transition",
      "task_label": "Time-to-Next-Transition Regression",
      "series_id": "cosmos3_super_reasoner",
      "method": "Cosmos3-Super Reasoner",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mae",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    },
    {
      "task_number": 20,
      "task_id": "time_to_transition",
      "task_label": "Time-to-Next-Transition Regression",
      "series_id": "cosmos3_nano_future_window",
      "method": "Cosmos3-Nano Future Window",
      "status": "not_evaluated_in_verified_package",
      "status_label": "not evaluated",
      "scored": false,
      "proxy_scored": false,
      "raw": null,
      "raw_text": "n/a",
      "normalized_score": null,
      "metric_key": "mae",
      "source": null,
      "scope": "multi_episode_128_partial_model_overlay",
      "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
    }
  ]
}