| { |
| "title": "128-Episode 20-Task Radar", |
| "status": "pass", |
| "generated_at_utc": "2026-06-20T20:38:21+00:00", |
| "description": "Selected 128-episode metadata/raw baselines plus verified Qwen3/Cosmos branches. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.", |
| "task_count": 20, |
| "method_count": 7, |
| "method_task_record_count": 140, |
| "scored_method_task_count": 140, |
| "normalization_policy": { |
| "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]", |
| "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task", |
| "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table", |
| "result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used", |
| "foundation_model_overlay": "Qwen3/Cosmos points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.", |
| "metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.", |
| "raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export." |
| }, |
| "source_unified_radar": "docs/data/unified_task_model_radar.json", |
| "source_result_matrix": "docs/data/task_method_20_result_matrix.json", |
| "series": [ |
| { |
| "id": "metadata128_simple", |
| "label": "128ep Aligned Simple", |
| "short_label": "128-S", |
| "color": "#ffd166", |
| "kind": "partial_128_episode_aligned_baseline", |
| "scope": "128 selected episodes, JSONL metadata/text plus staged sensor-block targets where available", |
| "stroke_dasharray": "9 6", |
| "method_detail": "128-episode aligned simple baselines: JSONL metadata/text tasks plus staged sensor-block tasks where the processed target exists.", |
| "plotted_as": "colored point overlay", |
| "result_record_count": 20, |
| "scored_task_count": 20, |
| "covered_task_count": 20, |
| "proxy_scored_task_count": 1, |
| "scoreless_task_count": 0, |
| "unsupported_task_count": 0, |
| "not_evaluated_task_count": 0, |
| "status_counts": { |
| "proxy_scored": 1, |
| "scored": 19 |
| }, |
| "coverage_fraction": 1.0, |
| "result_record_fraction": 1.0 |
| }, |
| { |
| "id": "metadata128_neural_mlp", |
| "label": "128ep Aligned NN", |
| "short_label": "128-NN", |
| "color": "#f472b6", |
| "kind": "partial_128_episode_aligned_baseline", |
| "scope": "128 selected episodes, JSONL metadata/text plus staged sensor-block targets where available", |
| "stroke_dasharray": "3 6", |
| "method_detail": "128-episode aligned MLP baselines: JSONL metadata/text tasks plus staged sensor-block tasks where the processed target exists.", |
| "plotted_as": "colored point overlay", |
| "result_record_count": 20, |
| "scored_task_count": 20, |
| "covered_task_count": 20, |
| "proxy_scored_task_count": 1, |
| "scoreless_task_count": 0, |
| "unsupported_task_count": 0, |
| "not_evaluated_task_count": 0, |
| "status_counts": { |
| "proxy_scored": 1, |
| "scored": 19 |
| }, |
| "coverage_fraction": 1.0, |
| "result_record_fraction": 1.0 |
| }, |
| { |
| "id": "raw128_simple", |
| "label": "128ep Raw Simple", |
| "short_label": "128-RS", |
| "color": "#f59e0b", |
| "kind": "complete_128_episode_raw_feature_baseline", |
| "scope": "128 selected episodes, staged 4430-dim sensor NPZ features; 2 compact proxy axes", |
| "stroke_dasharray": "8 4", |
| "method_detail": "128-episode 4430-dim sensor NPZ simple heads; tasks 15/19 use compact proxies.", |
| "plotted_as": "colored point overlay", |
| "result_record_count": 20, |
| "scored_task_count": 20, |
| "covered_task_count": 20, |
| "proxy_scored_task_count": 2, |
| "scoreless_task_count": 0, |
| "unsupported_task_count": 0, |
| "not_evaluated_task_count": 0, |
| "status_counts": { |
| "proxy_scored": 2, |
| "scored": 18 |
| }, |
| "coverage_fraction": 1.0, |
| "result_record_fraction": 1.0 |
| }, |
| { |
| "id": "raw128_neural_mlp", |
| "label": "128ep Raw NN", |
| "short_label": "128-RN", |
| "color": "#22d3ee", |
| "kind": "complete_128_episode_raw_feature_baseline", |
| "scope": "128 selected episodes, staged 4430-dim sensor NPZ features; 2 compact proxy axes", |
| "stroke_dasharray": "2 5", |
| "method_detail": "128-episode 4430-dim sensor NPZ MLP heads; tasks 15/19 use compact proxies.", |
| "plotted_as": "colored point overlay", |
| "result_record_count": 20, |
| "scored_task_count": 20, |
| "covered_task_count": 20, |
| "proxy_scored_task_count": 2, |
| "scoreless_task_count": 0, |
| "unsupported_task_count": 0, |
| "not_evaluated_task_count": 0, |
| "status_counts": { |
| "proxy_scored": 2, |
| "scored": 18 |
| }, |
| "coverage_fraction": 1.0, |
| "result_record_fraction": 1.0 |
| }, |
| { |
| "id": "qwen3_omni_v6_lora", |
| "label": "Qwen3-Omni v6 LoRA", |
| "short_label": "Qwen3", |
| "color": "#9bb8ff", |
| "kind": "partial_128_episode_foundation_model_overlay", |
| "scope": "128 selected episodes, held-out test", |
| "stroke_dasharray": "7 7", |
| "method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics, plus task 16 and any completed private-GPU future/retrieval/sensor-target probes scored from task-specific JSON.", |
| "plotted_as": "colored point overlay", |
| "result_record_count": 20, |
| "scored_task_count": 20, |
| "covered_task_count": 20, |
| "proxy_scored_task_count": 0, |
| "scoreless_task_count": 0, |
| "unsupported_task_count": 0, |
| "not_evaluated_task_count": 0, |
| "status_counts": { |
| "scored": 20 |
| }, |
| "coverage_fraction": 1.0, |
| "result_record_fraction": 1.0 |
| }, |
| { |
| "id": "cosmos3_super_reasoner", |
| "label": "Cosmos3-Super Reasoner", |
| "short_label": "C3-S", |
| "color": "#ff9c7a", |
| "kind": "partial_128_episode_foundation_model_overlay", |
| "scope": "128 selected episodes, held-out test", |
| "stroke_dasharray": "4 7", |
| "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 5/8/9/10/11/12/13/14/16/17/18/19/20 probes where public metrics exist.", |
| "plotted_as": "colored point overlay", |
| "result_record_count": 20, |
| "scored_task_count": 20, |
| "covered_task_count": 20, |
| "proxy_scored_task_count": 0, |
| "scoreless_task_count": 0, |
| "unsupported_task_count": 0, |
| "not_evaluated_task_count": 0, |
| "status_counts": { |
| "scored": 20 |
| }, |
| "coverage_fraction": 1.0, |
| "result_record_fraction": 1.0 |
| }, |
| { |
| "id": "cosmos3_nano_future_window", |
| "label": "Cosmos3-Nano Future Window", |
| "short_label": "C3-N", |
| "color": "#d9c7ff", |
| "kind": "partial_128_episode_world_model_overlay", |
| "scope": "128 selected episodes, held-out test", |
| "stroke_dasharray": "2 7", |
| "method_detail": "Verified Cosmos3-Nano future-window compatibility metrics, plus model-output probes for tasks 2/5/7/8/10/11/12/13/14/15/16/17/18/19 and a derived task-20 boundary timing probe scored from held-out future-window artifacts.", |
| "plotted_as": "colored point overlay", |
| "result_record_count": 20, |
| "scored_task_count": 20, |
| "covered_task_count": 20, |
| "proxy_scored_task_count": 0, |
| "scoreless_task_count": 0, |
| "unsupported_task_count": 0, |
| "not_evaluated_task_count": 0, |
| "status_counts": { |
| "scored": 20 |
| }, |
| "coverage_fraction": 1.0, |
| "result_record_fraction": 1.0 |
| } |
| ], |
| "tasks": [ |
| { |
| "task_number": 1, |
| "task_id": "timeline_action", |
| "label": "Action Recognition", |
| "axis_label": "01 Action Recognition", |
| "short_label": "Action", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.008252821966746326, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_action/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.008252821966746326, |
| "raw_text": "0.0083", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.004175793689174209, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_action/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.004175793689174209, |
| "raw_text": "0.0042", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.002915061325704321, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/timeline_action/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.002915061325704321, |
| "raw_text": "0.0029", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.0014955083181204041, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/timeline_action/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0014955083181204041, |
| "raw_text": "0.0015", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.0028830723979596335, |
| "metric_key": "action_macro_f1", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0028830723979596335, |
| "raw_text": "0.0029", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.0008284021201089245, |
| "metric_key": "action_macro_f1", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0008284021201089245, |
| "raw_text": "0.0008", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.007936507936507936, |
| "metric_key": "action_accuracy_from_retrieved_future", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.007936507936507936, |
| "raw_text": "0.0079", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 2, |
| "task_id": "timeline_subtask", |
| "label": "Procedure Step Recognition", |
| "axis_label": "02 Procedure Step Recognition", |
| "short_label": "Step", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.00019512195121951218, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_subtask/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.00019512195121951218, |
| "raw_text": "0.0002", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 7.207207207207208e-05, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_subtask/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 7.207207207207208e-05, |
| "raw_text": "0.0001", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/timeline_subtask/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 7.35632183908046e-05, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/timeline_subtask/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 7.35632183908046e-05, |
| "raw_text": "0.0001", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.0037313432835820895, |
| "metric_key": "subtask_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0037313432835820895, |
| "raw_text": "0.0037", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.0, |
| "metric_key": "subtask_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.0, |
| "metric_key": "timeline_subtask_macro_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_current_subtask_object_relevance_patched_textonly_20260621/timeline_subtask/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 3, |
| "task_id": "transition_detection", |
| "label": "Action Boundary Detection", |
| "axis_label": "03 Action Boundary Detection", |
| "short_label": "Boundary", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.29652162550029315, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/transition_detection/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.29652162550029315, |
| "raw_text": "0.2965", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.4841733292368365, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/transition_detection/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.4841733292368365, |
| "raw_text": "0.4842", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.4203613574238283, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/transition_detection/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.4203613574238283, |
| "raw_text": "0.4204", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.4902206914147213, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/transition_detection/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.4902206914147213, |
| "raw_text": "0.4902", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.9898313492063492, |
| "metric_key": "transition_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.9898313492063492, |
| "raw_text": "0.9898", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.36830357142857145, |
| "metric_key": "transition_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.36830357142857145, |
| "raw_text": "0.3683", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.9682539682539683, |
| "metric_key": "transition_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.9682539682539683, |
| "raw_text": "0.9683", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 4, |
| "task_id": "next_action", |
| "label": "Next-Action Prediction", |
| "axis_label": "04 Next-Action Prediction", |
| "short_label": "Next act", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.006514774539765508, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/next_action/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.006514774539765508, |
| "raw_text": "0.0065", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.004910507980164745, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/next_action/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.004910507980164745, |
| "raw_text": "0.0049", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.003285273363482094, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/next_action/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.003285273363482094, |
| "raw_text": "0.0033", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.0018477984371755407, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/next_action/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0018477984371755407, |
| "raw_text": "0.0018", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.04305335446381405, |
| "metric_key": "next_action_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.04305335446381405, |
| "raw_text": "0.0431", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.013392857142857142, |
| "metric_key": "next_action_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.013392857142857142, |
| "raw_text": "0.0134", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.007936507936507936, |
| "metric_key": "action_accuracy_from_retrieved_future", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.007936507936507936, |
| "raw_text": "0.0079", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 5, |
| "task_id": "hand_trajectory_forecast", |
| "label": "Hand Trajectory Forecasting", |
| "axis_label": "05 Hand Trajectory Forecasting", |
| "short_label": "Hand traj", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "mpjpe", |
| "metric_name": "MPJPE", |
| "metric_direction": "lower", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 8.817333221435547, |
| "metric_key": "mpjpe", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.012231610603598841, |
| "raw_text": "8.817", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.429434210062027, |
| "metric_key": "mpjpe", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.25114484128127007, |
| "raw_text": "0.4294", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.2729249894618988, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.39516420515180267, |
| "raw_text": "0.2729", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.18475216627120972, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.5837560051580399, |
| "raw_text": "0.1848", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.7216105627267382, |
| "metric_key": "hand_trajectory_forecast_mrr", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.149457605109387, |
| "raw_text": "0.7216", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.8915253522315043, |
| "metric_key": "hand_trajectory_forecast_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.12097265238372007, |
| "raw_text": "0.8915", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.6912806884333101, |
| "metric_key": "hand_trajectory_forecast_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.15601504328321764, |
| "raw_text": "0.6913", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 6, |
| "task_id": "contact_prediction", |
| "label": "Contact State Prediction", |
| "axis_label": "06 Contact State Prediction", |
| "short_label": "Contact", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.4381481308057444, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/contact_prediction/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.4381481308057444, |
| "raw_text": "0.4381", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.5682695682695682, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/contact_prediction/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.5682695682695682, |
| "raw_text": "0.5683", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.886990707397193, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/contact_prediction/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.886990707397193, |
| "raw_text": "0.8870", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 1.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/contact_prediction/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 1.0, |
| "raw_text": "1.000", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.8177083333333334, |
| "metric_key": "contact_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.8177083333333334, |
| "raw_text": "0.8177", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.32142857142857145, |
| "metric_key": "contact_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.32142857142857145, |
| "raw_text": "0.3214", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.7433862433862434, |
| "metric_key": "contact_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.7433862433862434, |
| "raw_text": "0.7434", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 7, |
| "task_id": "object_relevance", |
| "label": "Object Relevance Prediction", |
| "axis_label": "07 Object Relevance Prediction", |
| "short_label": "Objects", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "micro_f1", |
| "metric_name": "micro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.17764578833693304, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/object_relevance/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.17764578833693304, |
| "raw_text": "0.1776", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.18662723837686876, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/object_relevance/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.18662723837686876, |
| "raw_text": "0.1866", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.0655376369662084, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/object_relevance/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0655376369662084, |
| "raw_text": "0.0655", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.1765890386972509, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/object_relevance/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.1765890386972509, |
| "raw_text": "0.1766", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.3064982378331287, |
| "metric_key": "object_micro_f1", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.3064982378331287, |
| "raw_text": "0.3065", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.13704276146316333, |
| "metric_key": "object_micro_f1", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.13704276146316333, |
| "raw_text": "0.1370", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.00047209895194032665, |
| "metric_key": "object_relevance_micro_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_current_subtask_object_relevance_patched_textonly_20260621/object_relevance/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.00047209895194032665, |
| "raw_text": "0.0005", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 8, |
| "task_id": "caption_grounding", |
| "label": "Language Grounding", |
| "axis_label": "08 Language Grounding", |
| "short_label": "Language", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "mrr", |
| "metric_name": "MRR", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.002332374220713973, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.002332374220713973, |
| "raw_text": "0.0023", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.008236799389123917, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.008236799389123917, |
| "raw_text": "0.0082", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.011138836853206158, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.011138836853206158, |
| "raw_text": "0.0111", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.0063402121886610985, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0063402121886610985, |
| "raw_text": "0.0063", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.8764467592592605, |
| "metric_key": "caption_grounding_mrr", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_retrieval_task_probes_a100_20260617T175919Z/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.8764467592592605, |
| "raw_text": "0.8764", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.30639899644580487, |
| "metric_key": "caption_grounding_iou", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/caption_grounding/cosmos3_super_reasoner/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.30639899644580487, |
| "raw_text": "0.3064", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.5221041086644663, |
| "metric_key": "caption_grounding_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.5221041086644663, |
| "raw_text": "0.5221", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 9, |
| "task_id": "cross_modal_retrieval", |
| "label": "Cross-Modal Retrieval", |
| "axis_label": "09 Cross-Modal Retrieval", |
| "short_label": "X-modal", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "mrr", |
| "metric_name": "MRR", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.002587692579254508, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/cross_modal_retrieval/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.002587692579254508, |
| "raw_text": "0.0026", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.0026067993603646755, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/cross_modal_retrieval/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0026067993603646755, |
| "raw_text": "0.0026", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.003459817497059703, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/cross_modal_retrieval/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.003459817497059703, |
| "raw_text": "0.0035", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.002535284962505102, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/cross_modal_retrieval/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.002535284962505102, |
| "raw_text": "0.0025", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.5080191798941732, |
| "metric_key": "cross_modal_retrieval_mrr", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_cross_modal_retrieval_probe_a100_20260618T000000Z/cross_modal_retrieval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.5080191798941732, |
| "raw_text": "0.5080", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.6628490677465636, |
| "metric_key": "cross_modal_retrieval_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/cross_modal_retrieval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.6628490677465636, |
| "raw_text": "0.6628", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.022138720585222767, |
| "metric_key": "future_retrieval_mrr", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.022138720585222767, |
| "raw_text": "0.0221", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 10, |
| "task_id": "modality_reconstruction", |
| "label": "Cross-Modal Reconstruction", |
| "axis_label": "10 Cross-Modal Reconstruction", |
| "short_label": "Recon", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "r2", |
| "metric_name": "R2", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": -190.66106203944798, |
| "metric_key": "r2", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/modality_reconstruction/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "-190.66", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": -0.43481132003942147, |
| "metric_key": "r2", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/modality_reconstruction/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "-0.4348", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": -1.3450960391924882, |
| "metric_key": "r2", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/modality_reconstruction/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "-1.345", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": -1.3974418160502369, |
| "metric_key": "r2", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/modality_reconstruction/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "-1.397", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.9670547540707002, |
| "metric_key": "modality_reconstruction_mrr", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.9670547540707002, |
| "raw_text": "0.9671", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.9939466801653591, |
| "metric_key": "modality_reconstruction_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/modality_reconstruction/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.9939466801653591, |
| "raw_text": "0.9939", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.0002873382957286892, |
| "metric_key": "feature_reconstruction_quality", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/modality_reconstruction/cosmos3_nano_future_window/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0002873382957286892, |
| "raw_text": "0.0003", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 11, |
| "task_id": "temporal_order", |
| "label": "Temporal Order Verification", |
| "axis_label": "11 Temporal Order Verification", |
| "short_label": "Order", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "f1", |
| "metric_name": "F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.4198864140782312, |
| "metric_key": "f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/temporal_order/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.4198864140782312, |
| "raw_text": "0.4199", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.8252408266656923, |
| "metric_key": "f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/temporal_order/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.8252408266656923, |
| "raw_text": "0.8252", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.49824413370686593, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/temporal_order/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.49824413370686593, |
| "raw_text": "0.4982", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.8030047098504103, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/temporal_order/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.8030047098504103, |
| "raw_text": "0.8030", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.40984631701404173, |
| "metric_key": "temporal_order_f1", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/temporal_order/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.40984631701404173, |
| "raw_text": "0.4098", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.6286317274823326, |
| "metric_key": "temporal_order_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.6286317274823326, |
| "raw_text": "0.6286", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.5954109425716205, |
| "metric_key": "temporal_order_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_future_order_misalignment_patched_textonly_20260621/temporal_order/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.5954109425716205, |
| "raw_text": "0.5954", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 12, |
| "task_id": "misalignment_detection", |
| "label": "Multimodal Synchronization Detection", |
| "axis_label": "12 Multimodal Synchronization Detection", |
| "short_label": "Sync", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "f1", |
| "metric_name": "F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.49980060227663614, |
| "metric_key": "f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.49980060227663614, |
| "raw_text": "0.4998", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.7773773780941162, |
| "metric_key": "f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.7773773780941162, |
| "raw_text": "0.7774", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.4958867673901769, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.4958867673901769, |
| "raw_text": "0.4959", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.8272709077974252, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.8272709077974252, |
| "raw_text": "0.8273", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.3344936184319576, |
| "metric_key": "misalignment_detection_f1", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.3344936184319576, |
| "raw_text": "0.3345", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.37271645981034185, |
| "metric_key": "misalignment_detection_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.37271645981034185, |
| "raw_text": "0.3727", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.477201691802725, |
| "metric_key": "misalignment_detection_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_future_order_misalignment_patched_textonly_20260621/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.477201691802725, |
| "raw_text": "0.4772", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 13, |
| "task_id": "long_horizon_next_action", |
| "label": "Long-Horizon Next-Action Forecasting", |
| "axis_label": "13 Long-Horizon Next-Action Forecasting", |
| "short_label": "Long act", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.004579592783699693, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/long_horizon_next_action/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.004579592783699693, |
| "raw_text": "0.0046", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.0029821307969142615, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/long_horizon_next_action/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0029821307969142615, |
| "raw_text": "0.0030", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.0024280172369056294, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/long_horizon_next_action/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0024280172369056294, |
| "raw_text": "0.0024", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.001063859887389299, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/long_horizon_next_action/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.001063859887389299, |
| "raw_text": "0.0011", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.0023356666867101906, |
| "metric_key": "long_horizon_next_action_macro_f1", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/long_horizon_next_action/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0023356666867101906, |
| "raw_text": "0.0023", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.008807588075880758, |
| "metric_key": "long_horizon_next_action_macro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/long_horizon_next_action/cosmos3_super_reasoner/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.008807588075880758, |
| "raw_text": "0.0088", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.0024906600249066007, |
| "metric_key": "long_horizon_next_action_macro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/long_horizon_next_action/cosmos3_nano_future_window/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0024906600249066007, |
| "raw_text": "0.0025", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 14, |
| "task_id": "next_subtask_forecast", |
| "label": "Long-Horizon Next-Subtask Forecasting", |
| "axis_label": "14 Long-Horizon Next-Subtask Forecasting", |
| "short_label": "Long step", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.0001206030150753769, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/next_subtask_forecast/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0001206030150753769, |
| "raw_text": "0.0001", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 2.086049543676662e-05, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/next_subtask_forecast/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 2.086049543676662e-05, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/next_subtask_forecast/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/next_subtask_forecast/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.004206715978529301, |
| "metric_key": "next_subtask_forecast_macro_f1", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/next_subtask_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.004206715978529301, |
| "raw_text": "0.0042", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.0, |
| "metric_key": "next_subtask_forecast_macro_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.006614876224708678, |
| "metric_key": "next_subtask_forecast_macro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/next_subtask_forecast/cosmos3_nano_future_window/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.006614876224708678, |
| "raw_text": "0.0066", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 15, |
| "task_id": "interaction_text_prediction", |
| "label": "Interaction Text Prediction", |
| "axis_label": "15 Interaction Text Prediction", |
| "short_label": "Interact txt", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": true, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.012611998261547169, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "proxy_scored", |
| "reason": "documented compact proxy completion for this raw128 task axis", |
| "normalized_score": 0.012611998261547169, |
| "raw_text": "0.0126", |
| "status_label": "proxy scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.009791421280985521, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "proxy_scored", |
| "reason": "documented compact proxy completion for this raw128 task axis", |
| "normalized_score": 0.009791421280985521, |
| "raw_text": "0.0098", |
| "status_label": "proxy scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.4318674027510605, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_interaction_text_task15_a100_20260620T010305Z/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.4318674027510605, |
| "raw_text": "0.4319", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.17949512355264183, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_interaction_text_task15_textonly_v1_20260620T1558Z/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.17949512355264183, |
| "raw_text": "0.1795", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.1788367958939587, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_interaction_text_task15_patched_textonly_20260621/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.1788367958939587, |
| "raw_text": "0.1788", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 16, |
| "task_id": "action_object_relation", |
| "label": "Action-Object Relation Prediction", |
| "axis_label": "16 Action-Object Relation Prediction", |
| "short_label": "Act+obj", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/action_object_relation/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/action_object_relation/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/action_object_relation/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/action_object_relation/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.0002220083079671497, |
| "metric_key": "action_object_relation_macro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0002220083079671497, |
| "raw_text": "0.0002", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.0, |
| "metric_key": "action_object_relation_macro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0, |
| "raw_text": "0.0000", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.002794157670325683, |
| "metric_key": "action_object_relation_macro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_nano_future_window/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.002794157670325683, |
| "raw_text": "0.0028", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 17, |
| "task_id": "object_set_forecast", |
| "label": "Future Object-Set Forecasting", |
| "axis_label": "17 Future Object-Set Forecasting", |
| "short_label": "Future obj", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "micro_f1", |
| "metric_name": "micro-F1", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.17656983343047333, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/object_set_forecast/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.17656983343047333, |
| "raw_text": "0.1766", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.17418550827844048, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/object_set_forecast/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.17418550827844048, |
| "raw_text": "0.1742", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.06469493412657774, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/object_set_forecast/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.06469493412657774, |
| "raw_text": "0.0647", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.17523098630012288, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/object_set_forecast/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.17523098630012288, |
| "raw_text": "0.1752", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.1659483964851402, |
| "metric_key": "object_set_forecast_micro_f1", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/object_set_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.1659483964851402, |
| "raw_text": "0.1659", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.0009279881217520415, |
| "metric_key": "object_set_forecast_micro_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.0009279881217520415, |
| "raw_text": "0.0009", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.01781970649895178, |
| "metric_key": "object_set_forecast_micro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/object_set_forecast/cosmos3_nano_future_window/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.01781970649895178, |
| "raw_text": "0.0178", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 18, |
| "task_id": "imu_to_hand_pose", |
| "label": "IMU-to-Hand Pose Reconstruction", |
| "axis_label": "18 IMU-to-Hand Pose Reconstruction", |
| "short_label": "IMU->hand", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "mae", |
| "metric_name": "MAE", |
| "metric_direction": "lower", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.2294670194387436, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.18324815505876868, |
| "raw_text": "0.2295", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.2555866539478302, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.16452114110609004, |
| "raw_text": "0.2556", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.22941437363624573, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.1832902066792771, |
| "raw_text": "0.2294", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.252998411655426, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.1662042369509182, |
| "raw_text": "0.2530", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.9641651902471952, |
| "metric_key": "imu_to_hand_pose_mrr", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.043612244441436056, |
| "raw_text": "0.9642", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.9896650636969544, |
| "metric_key": "imu_to_hand_pose_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.04248852414968175, |
| "raw_text": "0.9897", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.9920062431451954, |
| "metric_key": "imu_to_hand_pose_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.04238824931752955, |
| "raw_text": "0.9920", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 19, |
| "task_id": "camera_view_sync_retrieval", |
| "label": "Camera-View Synchronization Retrieval", |
| "axis_label": "19 Camera-View Synchronization Retrieval", |
| "short_label": "Cam sync", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "mrr", |
| "metric_name": "MRR", |
| "metric_direction": "higher", |
| "raw128_proxy_axis": true, |
| "values": { |
| "metadata128_simple": { |
| "raw": 0.0021294241305440664, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_aligned_metadata_baseline", |
| "status": "proxy_scored", |
| "reason": "paired camera-view embeddings are absent from the 128 JSONL/feature export; metadata features retrieve the synchronized same-window depth/audio block as a documented compact synchronization proxy", |
| "normalized_score": 0.0021294241305440664, |
| "raw_text": "0.0021", |
| "status_label": "proxy scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.0027218370232731104, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_aligned_metadata_baseline", |
| "status": "proxy_scored", |
| "reason": "paired camera-view embeddings are absent from the 128 JSONL/feature export; metadata features retrieve the synchronized same-window depth/audio block as a documented compact synchronization proxy", |
| "normalized_score": 0.0027218370232731104, |
| "raw_text": "0.0027", |
| "status_label": "proxy scored" |
| }, |
| "raw128_simple": { |
| "raw": 0.0026625150348991156, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "proxy_scored", |
| "reason": "documented compact proxy completion for this raw128 task axis", |
| "normalized_score": 0.0026625150348991156, |
| "raw_text": "0.0027", |
| "status_label": "proxy scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 0.0025448438245803118, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "proxy_scored", |
| "reason": "documented compact proxy completion for this raw128 task axis", |
| "normalized_score": 0.0025448438245803118, |
| "raw_text": "0.0025", |
| "status_label": "proxy scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.6587714947089998, |
| "metric_key": "camera_view_sync_retrieval_mrr", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_camera_view_sync_mosaic_tile_a100_20260619T0305Z/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.6587714947089998, |
| "raw_text": "0.6588", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.9979751961528727, |
| "metric_key": "camera_view_sync_retrieval_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.9979751961528727, |
| "raw_text": "0.9980", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.9989875980764363, |
| "metric_key": "camera_view_sync_retrieval_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.9989875980764363, |
| "raw_text": "0.9990", |
| "status_label": "scored" |
| } |
| } |
| }, |
| { |
| "task_number": 20, |
| "task_id": "time_to_transition", |
| "label": "Time-to-Next-Transition Regression", |
| "axis_label": "20 Time-to-Next-Transition Regression", |
| "short_label": "Time2bdry", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "mae", |
| "metric_name": "MAE frames", |
| "metric_direction": "lower", |
| "raw128_proxy_axis": false, |
| "values": { |
| "metadata128_simple": { |
| "raw": 624.8108520507812, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/time_to_transition/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.016864874132806403, |
| "raw_text": "624.81", |
| "status_label": "scored" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 41.4664421081543, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/time_to_transition/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.25411768748242325, |
| "raw_text": "41.47", |
| "status_label": "scored" |
| }, |
| "raw128_simple": { |
| "raw": 52.32759475708008, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/time_to_transition/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.20137284019197565, |
| "raw_text": "52.33", |
| "status_label": "scored" |
| }, |
| "raw128_neural_mlp": { |
| "raw": 42.374061584472656, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/time_to_transition/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.24867468405504953, |
| "raw_text": "42.37", |
| "status_label": "scored" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 134.0687422166874, |
| "metric_key": "time_to_transition_mae", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/time_to_transition/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.07859666766782253, |
| "raw_text": "134.07", |
| "status_label": "scored" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 52.94642857142857, |
| "metric_key": "time_to_transition_mae", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/time_to_transition/cosmos3_super_reasoner/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.19901920981190058, |
| "raw_text": "52.95", |
| "status_label": "scored" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 33.80952380952381, |
| "metric_key": "time_to_transition_mae", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/time_to_transition/cosmos3_nano_future_window/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "status": "scored", |
| "reason": null, |
| "normalized_score": 0.3116682871966295, |
| "raw_text": "33.81", |
| "status_label": "scored" |
| } |
| } |
| } |
| ], |
| "task_method_result_matrix": [ |
| { |
| "task_number": 1, |
| "task_id": "timeline_action", |
| "task_label": "Action Recognition", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.008252821966746326, |
| "raw_text": "0.0083", |
| "normalized_score": 0.008252821966746326, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_action/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 1, |
| "task_id": "timeline_action", |
| "task_label": "Action Recognition", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.004175793689174209, |
| "raw_text": "0.0042", |
| "normalized_score": 0.004175793689174209, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_action/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 1, |
| "task_id": "timeline_action", |
| "task_label": "Action Recognition", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.002915061325704321, |
| "raw_text": "0.0029", |
| "normalized_score": 0.002915061325704321, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/timeline_action/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 1, |
| "task_id": "timeline_action", |
| "task_label": "Action Recognition", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0014955083181204041, |
| "raw_text": "0.0015", |
| "normalized_score": 0.0014955083181204041, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/timeline_action/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 1, |
| "task_id": "timeline_action", |
| "task_label": "Action Recognition", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0028830723979596335, |
| "raw_text": "0.0029", |
| "normalized_score": 0.0028830723979596335, |
| "metric_key": "action_macro_f1", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 1, |
| "task_id": "timeline_action", |
| "task_label": "Action Recognition", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0008284021201089245, |
| "raw_text": "0.0008", |
| "normalized_score": 0.0008284021201089245, |
| "metric_key": "action_macro_f1", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 1, |
| "task_id": "timeline_action", |
| "task_label": "Action Recognition", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.007936507936507936, |
| "raw_text": "0.0079", |
| "normalized_score": 0.007936507936507936, |
| "metric_key": "action_accuracy_from_retrieved_future", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 2, |
| "task_id": "timeline_subtask", |
| "task_label": "Procedure Step Recognition", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.00019512195121951218, |
| "raw_text": "0.0002", |
| "normalized_score": 0.00019512195121951218, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_subtask/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 2, |
| "task_id": "timeline_subtask", |
| "task_label": "Procedure Step Recognition", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 7.207207207207208e-05, |
| "raw_text": "0.0001", |
| "normalized_score": 7.207207207207208e-05, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_subtask/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 2, |
| "task_id": "timeline_subtask", |
| "task_label": "Procedure Step Recognition", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/timeline_subtask/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 2, |
| "task_id": "timeline_subtask", |
| "task_label": "Procedure Step Recognition", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 7.35632183908046e-05, |
| "raw_text": "0.0001", |
| "normalized_score": 7.35632183908046e-05, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/timeline_subtask/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 2, |
| "task_id": "timeline_subtask", |
| "task_label": "Procedure Step Recognition", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0037313432835820895, |
| "raw_text": "0.0037", |
| "normalized_score": 0.0037313432835820895, |
| "metric_key": "subtask_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 2, |
| "task_id": "timeline_subtask", |
| "task_label": "Procedure Step Recognition", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "subtask_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 2, |
| "task_id": "timeline_subtask", |
| "task_label": "Procedure Step Recognition", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "timeline_subtask_macro_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_current_subtask_object_relevance_patched_textonly_20260621/timeline_subtask/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 3, |
| "task_id": "transition_detection", |
| "task_label": "Action Boundary Detection", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.29652162550029315, |
| "raw_text": "0.2965", |
| "normalized_score": 0.29652162550029315, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/transition_detection/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 3, |
| "task_id": "transition_detection", |
| "task_label": "Action Boundary Detection", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.4841733292368365, |
| "raw_text": "0.4842", |
| "normalized_score": 0.4841733292368365, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/transition_detection/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 3, |
| "task_id": "transition_detection", |
| "task_label": "Action Boundary Detection", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.4203613574238283, |
| "raw_text": "0.4204", |
| "normalized_score": 0.4203613574238283, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/transition_detection/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 3, |
| "task_id": "transition_detection", |
| "task_label": "Action Boundary Detection", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.4902206914147213, |
| "raw_text": "0.4902", |
| "normalized_score": 0.4902206914147213, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/transition_detection/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 3, |
| "task_id": "transition_detection", |
| "task_label": "Action Boundary Detection", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.9898313492063492, |
| "raw_text": "0.9898", |
| "normalized_score": 0.9898313492063492, |
| "metric_key": "transition_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 3, |
| "task_id": "transition_detection", |
| "task_label": "Action Boundary Detection", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.36830357142857145, |
| "raw_text": "0.3683", |
| "normalized_score": 0.36830357142857145, |
| "metric_key": "transition_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 3, |
| "task_id": "transition_detection", |
| "task_label": "Action Boundary Detection", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.9682539682539683, |
| "raw_text": "0.9683", |
| "normalized_score": 0.9682539682539683, |
| "metric_key": "transition_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 4, |
| "task_id": "next_action", |
| "task_label": "Next-Action Prediction", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.006514774539765508, |
| "raw_text": "0.0065", |
| "normalized_score": 0.006514774539765508, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/next_action/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 4, |
| "task_id": "next_action", |
| "task_label": "Next-Action Prediction", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.004910507980164745, |
| "raw_text": "0.0049", |
| "normalized_score": 0.004910507980164745, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/next_action/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 4, |
| "task_id": "next_action", |
| "task_label": "Next-Action Prediction", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.003285273363482094, |
| "raw_text": "0.0033", |
| "normalized_score": 0.003285273363482094, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/next_action/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 4, |
| "task_id": "next_action", |
| "task_label": "Next-Action Prediction", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0018477984371755407, |
| "raw_text": "0.0018", |
| "normalized_score": 0.0018477984371755407, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/next_action/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 4, |
| "task_id": "next_action", |
| "task_label": "Next-Action Prediction", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.04305335446381405, |
| "raw_text": "0.0431", |
| "normalized_score": 0.04305335446381405, |
| "metric_key": "next_action_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 4, |
| "task_id": "next_action", |
| "task_label": "Next-Action Prediction", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.013392857142857142, |
| "raw_text": "0.0134", |
| "normalized_score": 0.013392857142857142, |
| "metric_key": "next_action_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 4, |
| "task_id": "next_action", |
| "task_label": "Next-Action Prediction", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.007936507936507936, |
| "raw_text": "0.0079", |
| "normalized_score": 0.007936507936507936, |
| "metric_key": "action_accuracy_from_retrieved_future", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 5, |
| "task_id": "hand_trajectory_forecast", |
| "task_label": "Hand Trajectory Forecasting", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 8.817333221435547, |
| "raw_text": "8.817", |
| "normalized_score": 0.012231610603598841, |
| "metric_key": "mpjpe", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 5, |
| "task_id": "hand_trajectory_forecast", |
| "task_label": "Hand Trajectory Forecasting", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.429434210062027, |
| "raw_text": "0.4294", |
| "normalized_score": 0.25114484128127007, |
| "metric_key": "mpjpe", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 5, |
| "task_id": "hand_trajectory_forecast", |
| "task_label": "Hand Trajectory Forecasting", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.2729249894618988, |
| "raw_text": "0.2729", |
| "normalized_score": 0.39516420515180267, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 5, |
| "task_id": "hand_trajectory_forecast", |
| "task_label": "Hand Trajectory Forecasting", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.18475216627120972, |
| "raw_text": "0.1848", |
| "normalized_score": 0.5837560051580399, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 5, |
| "task_id": "hand_trajectory_forecast", |
| "task_label": "Hand Trajectory Forecasting", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.7216105627267382, |
| "raw_text": "0.7216", |
| "normalized_score": 0.149457605109387, |
| "metric_key": "hand_trajectory_forecast_mrr", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 5, |
| "task_id": "hand_trajectory_forecast", |
| "task_label": "Hand Trajectory Forecasting", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.8915253522315043, |
| "raw_text": "0.8915", |
| "normalized_score": 0.12097265238372007, |
| "metric_key": "hand_trajectory_forecast_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 5, |
| "task_id": "hand_trajectory_forecast", |
| "task_label": "Hand Trajectory Forecasting", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.6912806884333101, |
| "raw_text": "0.6913", |
| "normalized_score": 0.15601504328321764, |
| "metric_key": "hand_trajectory_forecast_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/hand_trajectory_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 6, |
| "task_id": "contact_prediction", |
| "task_label": "Contact State Prediction", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.4381481308057444, |
| "raw_text": "0.4381", |
| "normalized_score": 0.4381481308057444, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/contact_prediction/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 6, |
| "task_id": "contact_prediction", |
| "task_label": "Contact State Prediction", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.5682695682695682, |
| "raw_text": "0.5683", |
| "normalized_score": 0.5682695682695682, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/contact_prediction/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 6, |
| "task_id": "contact_prediction", |
| "task_label": "Contact State Prediction", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.886990707397193, |
| "raw_text": "0.8870", |
| "normalized_score": 0.886990707397193, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/contact_prediction/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 6, |
| "task_id": "contact_prediction", |
| "task_label": "Contact State Prediction", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 1.0, |
| "raw_text": "1.000", |
| "normalized_score": 1.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/contact_prediction/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 6, |
| "task_id": "contact_prediction", |
| "task_label": "Contact State Prediction", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.8177083333333334, |
| "raw_text": "0.8177", |
| "normalized_score": 0.8177083333333334, |
| "metric_key": "contact_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 6, |
| "task_id": "contact_prediction", |
| "task_label": "Contact State Prediction", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.32142857142857145, |
| "raw_text": "0.3214", |
| "normalized_score": 0.32142857142857145, |
| "metric_key": "contact_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 6, |
| "task_id": "contact_prediction", |
| "task_label": "Contact State Prediction", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.7433862433862434, |
| "raw_text": "0.7434", |
| "normalized_score": 0.7433862433862434, |
| "metric_key": "contact_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 7, |
| "task_id": "object_relevance", |
| "task_label": "Object Relevance Prediction", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.17764578833693304, |
| "raw_text": "0.1776", |
| "normalized_score": 0.17764578833693304, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/object_relevance/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 7, |
| "task_id": "object_relevance", |
| "task_label": "Object Relevance Prediction", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.18662723837686876, |
| "raw_text": "0.1866", |
| "normalized_score": 0.18662723837686876, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/object_relevance/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 7, |
| "task_id": "object_relevance", |
| "task_label": "Object Relevance Prediction", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0655376369662084, |
| "raw_text": "0.0655", |
| "normalized_score": 0.0655376369662084, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/object_relevance/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 7, |
| "task_id": "object_relevance", |
| "task_label": "Object Relevance Prediction", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.1765890386972509, |
| "raw_text": "0.1766", |
| "normalized_score": 0.1765890386972509, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/object_relevance/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 7, |
| "task_id": "object_relevance", |
| "task_label": "Object Relevance Prediction", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.3064982378331287, |
| "raw_text": "0.3065", |
| "normalized_score": 0.3064982378331287, |
| "metric_key": "object_micro_f1", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 7, |
| "task_id": "object_relevance", |
| "task_label": "Object Relevance Prediction", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.13704276146316333, |
| "raw_text": "0.1370", |
| "normalized_score": 0.13704276146316333, |
| "metric_key": "object_micro_f1", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 7, |
| "task_id": "object_relevance", |
| "task_label": "Object Relevance Prediction", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.00047209895194032665, |
| "raw_text": "0.0005", |
| "normalized_score": 0.00047209895194032665, |
| "metric_key": "object_relevance_micro_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_current_subtask_object_relevance_patched_textonly_20260621/object_relevance/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 8, |
| "task_id": "caption_grounding", |
| "task_label": "Language Grounding", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.002332374220713973, |
| "raw_text": "0.0023", |
| "normalized_score": 0.002332374220713973, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 8, |
| "task_id": "caption_grounding", |
| "task_label": "Language Grounding", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.008236799389123917, |
| "raw_text": "0.0082", |
| "normalized_score": 0.008236799389123917, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 8, |
| "task_id": "caption_grounding", |
| "task_label": "Language Grounding", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.011138836853206158, |
| "raw_text": "0.0111", |
| "normalized_score": 0.011138836853206158, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 8, |
| "task_id": "caption_grounding", |
| "task_label": "Language Grounding", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0063402121886610985, |
| "raw_text": "0.0063", |
| "normalized_score": 0.0063402121886610985, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 8, |
| "task_id": "caption_grounding", |
| "task_label": "Language Grounding", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.8764467592592605, |
| "raw_text": "0.8764", |
| "normalized_score": 0.8764467592592605, |
| "metric_key": "caption_grounding_mrr", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_retrieval_task_probes_a100_20260617T175919Z/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 8, |
| "task_id": "caption_grounding", |
| "task_label": "Language Grounding", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.30639899644580487, |
| "raw_text": "0.3064", |
| "normalized_score": 0.30639899644580487, |
| "metric_key": "caption_grounding_iou", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/caption_grounding/cosmos3_super_reasoner/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 8, |
| "task_id": "caption_grounding", |
| "task_label": "Language Grounding", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.5221041086644663, |
| "raw_text": "0.5221", |
| "normalized_score": 0.5221041086644663, |
| "metric_key": "caption_grounding_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 9, |
| "task_id": "cross_modal_retrieval", |
| "task_label": "Cross-Modal Retrieval", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.002587692579254508, |
| "raw_text": "0.0026", |
| "normalized_score": 0.002587692579254508, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/cross_modal_retrieval/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 9, |
| "task_id": "cross_modal_retrieval", |
| "task_label": "Cross-Modal Retrieval", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0026067993603646755, |
| "raw_text": "0.0026", |
| "normalized_score": 0.0026067993603646755, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/cross_modal_retrieval/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 9, |
| "task_id": "cross_modal_retrieval", |
| "task_label": "Cross-Modal Retrieval", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.003459817497059703, |
| "raw_text": "0.0035", |
| "normalized_score": 0.003459817497059703, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/cross_modal_retrieval/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 9, |
| "task_id": "cross_modal_retrieval", |
| "task_label": "Cross-Modal Retrieval", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.002535284962505102, |
| "raw_text": "0.0025", |
| "normalized_score": 0.002535284962505102, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/cross_modal_retrieval/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 9, |
| "task_id": "cross_modal_retrieval", |
| "task_label": "Cross-Modal Retrieval", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.5080191798941732, |
| "raw_text": "0.5080", |
| "normalized_score": 0.5080191798941732, |
| "metric_key": "cross_modal_retrieval_mrr", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_cross_modal_retrieval_probe_a100_20260618T000000Z/cross_modal_retrieval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 9, |
| "task_id": "cross_modal_retrieval", |
| "task_label": "Cross-Modal Retrieval", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.6628490677465636, |
| "raw_text": "0.6628", |
| "normalized_score": 0.6628490677465636, |
| "metric_key": "cross_modal_retrieval_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/cross_modal_retrieval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 9, |
| "task_id": "cross_modal_retrieval", |
| "task_label": "Cross-Modal Retrieval", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.022138720585222767, |
| "raw_text": "0.0221", |
| "normalized_score": 0.022138720585222767, |
| "metric_key": "future_retrieval_mrr", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 10, |
| "task_id": "modality_reconstruction", |
| "task_label": "Cross-Modal Reconstruction", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": -190.66106203944798, |
| "raw_text": "-190.66", |
| "normalized_score": 0.0, |
| "metric_key": "r2", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/modality_reconstruction/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 10, |
| "task_id": "modality_reconstruction", |
| "task_label": "Cross-Modal Reconstruction", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": -0.43481132003942147, |
| "raw_text": "-0.4348", |
| "normalized_score": 0.0, |
| "metric_key": "r2", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/modality_reconstruction/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 10, |
| "task_id": "modality_reconstruction", |
| "task_label": "Cross-Modal Reconstruction", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": -1.3450960391924882, |
| "raw_text": "-1.345", |
| "normalized_score": 0.0, |
| "metric_key": "r2", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/modality_reconstruction/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 10, |
| "task_id": "modality_reconstruction", |
| "task_label": "Cross-Modal Reconstruction", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": -1.3974418160502369, |
| "raw_text": "-1.397", |
| "normalized_score": 0.0, |
| "metric_key": "r2", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/modality_reconstruction/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 10, |
| "task_id": "modality_reconstruction", |
| "task_label": "Cross-Modal Reconstruction", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.9670547540707002, |
| "raw_text": "0.9671", |
| "normalized_score": 0.9670547540707002, |
| "metric_key": "modality_reconstruction_mrr", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 10, |
| "task_id": "modality_reconstruction", |
| "task_label": "Cross-Modal Reconstruction", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.9939466801653591, |
| "raw_text": "0.9939", |
| "normalized_score": 0.9939466801653591, |
| "metric_key": "modality_reconstruction_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/modality_reconstruction/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 10, |
| "task_id": "modality_reconstruction", |
| "task_label": "Cross-Modal Reconstruction", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0002873382957286892, |
| "raw_text": "0.0003", |
| "normalized_score": 0.0002873382957286892, |
| "metric_key": "feature_reconstruction_quality", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/modality_reconstruction/cosmos3_nano_future_window/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 11, |
| "task_id": "temporal_order", |
| "task_label": "Temporal Order Verification", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.4198864140782312, |
| "raw_text": "0.4199", |
| "normalized_score": 0.4198864140782312, |
| "metric_key": "f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/temporal_order/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 11, |
| "task_id": "temporal_order", |
| "task_label": "Temporal Order Verification", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.8252408266656923, |
| "raw_text": "0.8252", |
| "normalized_score": 0.8252408266656923, |
| "metric_key": "f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/temporal_order/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 11, |
| "task_id": "temporal_order", |
| "task_label": "Temporal Order Verification", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.49824413370686593, |
| "raw_text": "0.4982", |
| "normalized_score": 0.49824413370686593, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/temporal_order/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 11, |
| "task_id": "temporal_order", |
| "task_label": "Temporal Order Verification", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.8030047098504103, |
| "raw_text": "0.8030", |
| "normalized_score": 0.8030047098504103, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/temporal_order/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 11, |
| "task_id": "temporal_order", |
| "task_label": "Temporal Order Verification", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.40984631701404173, |
| "raw_text": "0.4098", |
| "normalized_score": 0.40984631701404173, |
| "metric_key": "temporal_order_f1", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/temporal_order/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 11, |
| "task_id": "temporal_order", |
| "task_label": "Temporal Order Verification", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.6286317274823326, |
| "raw_text": "0.6286", |
| "normalized_score": 0.6286317274823326, |
| "metric_key": "temporal_order_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 11, |
| "task_id": "temporal_order", |
| "task_label": "Temporal Order Verification", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.5954109425716205, |
| "raw_text": "0.5954", |
| "normalized_score": 0.5954109425716205, |
| "metric_key": "temporal_order_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_future_order_misalignment_patched_textonly_20260621/temporal_order/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 12, |
| "task_id": "misalignment_detection", |
| "task_label": "Multimodal Synchronization Detection", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.49980060227663614, |
| "raw_text": "0.4998", |
| "normalized_score": 0.49980060227663614, |
| "metric_key": "f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 12, |
| "task_id": "misalignment_detection", |
| "task_label": "Multimodal Synchronization Detection", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.7773773780941162, |
| "raw_text": "0.7774", |
| "normalized_score": 0.7773773780941162, |
| "metric_key": "f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 12, |
| "task_id": "misalignment_detection", |
| "task_label": "Multimodal Synchronization Detection", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.4958867673901769, |
| "raw_text": "0.4959", |
| "normalized_score": 0.4958867673901769, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 12, |
| "task_id": "misalignment_detection", |
| "task_label": "Multimodal Synchronization Detection", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.8272709077974252, |
| "raw_text": "0.8273", |
| "normalized_score": 0.8272709077974252, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 12, |
| "task_id": "misalignment_detection", |
| "task_label": "Multimodal Synchronization Detection", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.3344936184319576, |
| "raw_text": "0.3345", |
| "normalized_score": 0.3344936184319576, |
| "metric_key": "misalignment_detection_f1", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 12, |
| "task_id": "misalignment_detection", |
| "task_label": "Multimodal Synchronization Detection", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.37271645981034185, |
| "raw_text": "0.3727", |
| "normalized_score": 0.37271645981034185, |
| "metric_key": "misalignment_detection_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 12, |
| "task_id": "misalignment_detection", |
| "task_label": "Multimodal Synchronization Detection", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.477201691802725, |
| "raw_text": "0.4772", |
| "normalized_score": 0.477201691802725, |
| "metric_key": "misalignment_detection_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_future_order_misalignment_patched_textonly_20260621/misalignment_detection/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 13, |
| "task_id": "long_horizon_next_action", |
| "task_label": "Long-Horizon Next-Action Forecasting", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.004579592783699693, |
| "raw_text": "0.0046", |
| "normalized_score": 0.004579592783699693, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/long_horizon_next_action/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 13, |
| "task_id": "long_horizon_next_action", |
| "task_label": "Long-Horizon Next-Action Forecasting", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0029821307969142615, |
| "raw_text": "0.0030", |
| "normalized_score": 0.0029821307969142615, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/long_horizon_next_action/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 13, |
| "task_id": "long_horizon_next_action", |
| "task_label": "Long-Horizon Next-Action Forecasting", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0024280172369056294, |
| "raw_text": "0.0024", |
| "normalized_score": 0.0024280172369056294, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/long_horizon_next_action/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 13, |
| "task_id": "long_horizon_next_action", |
| "task_label": "Long-Horizon Next-Action Forecasting", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.001063859887389299, |
| "raw_text": "0.0011", |
| "normalized_score": 0.001063859887389299, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/long_horizon_next_action/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 13, |
| "task_id": "long_horizon_next_action", |
| "task_label": "Long-Horizon Next-Action Forecasting", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0023356666867101906, |
| "raw_text": "0.0023", |
| "normalized_score": 0.0023356666867101906, |
| "metric_key": "long_horizon_next_action_macro_f1", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/long_horizon_next_action/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 13, |
| "task_id": "long_horizon_next_action", |
| "task_label": "Long-Horizon Next-Action Forecasting", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.008807588075880758, |
| "raw_text": "0.0088", |
| "normalized_score": 0.008807588075880758, |
| "metric_key": "long_horizon_next_action_macro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/long_horizon_next_action/cosmos3_super_reasoner/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 13, |
| "task_id": "long_horizon_next_action", |
| "task_label": "Long-Horizon Next-Action Forecasting", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0024906600249066007, |
| "raw_text": "0.0025", |
| "normalized_score": 0.0024906600249066007, |
| "metric_key": "long_horizon_next_action_macro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/long_horizon_next_action/cosmos3_nano_future_window/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 14, |
| "task_id": "next_subtask_forecast", |
| "task_label": "Long-Horizon Next-Subtask Forecasting", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0001206030150753769, |
| "raw_text": "0.0001", |
| "normalized_score": 0.0001206030150753769, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/next_subtask_forecast/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 14, |
| "task_id": "next_subtask_forecast", |
| "task_label": "Long-Horizon Next-Subtask Forecasting", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 2.086049543676662e-05, |
| "raw_text": "0.0000", |
| "normalized_score": 2.086049543676662e-05, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/next_subtask_forecast/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 14, |
| "task_id": "next_subtask_forecast", |
| "task_label": "Long-Horizon Next-Subtask Forecasting", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/next_subtask_forecast/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 14, |
| "task_id": "next_subtask_forecast", |
| "task_label": "Long-Horizon Next-Subtask Forecasting", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/next_subtask_forecast/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 14, |
| "task_id": "next_subtask_forecast", |
| "task_label": "Long-Horizon Next-Subtask Forecasting", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.004206715978529301, |
| "raw_text": "0.0042", |
| "normalized_score": 0.004206715978529301, |
| "metric_key": "next_subtask_forecast_macro_f1", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/next_subtask_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 14, |
| "task_id": "next_subtask_forecast", |
| "task_label": "Long-Horizon Next-Subtask Forecasting", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "next_subtask_forecast_macro_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 14, |
| "task_id": "next_subtask_forecast", |
| "task_label": "Long-Horizon Next-Subtask Forecasting", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.006614876224708678, |
| "raw_text": "0.0066", |
| "normalized_score": 0.006614876224708678, |
| "metric_key": "next_subtask_forecast_macro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/next_subtask_forecast/cosmos3_nano_future_window/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 15, |
| "task_id": "interaction_text_prediction", |
| "task_label": "Interaction Text Prediction", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 15, |
| "task_id": "interaction_text_prediction", |
| "task_label": "Interaction Text Prediction", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 15, |
| "task_id": "interaction_text_prediction", |
| "task_label": "Interaction Text Prediction", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "proxy_scored", |
| "status_label": "proxy scored", |
| "scored": true, |
| "proxy_scored": true, |
| "raw": 0.012611998261547169, |
| "raw_text": "0.0126", |
| "normalized_score": 0.012611998261547169, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": "documented compact proxy completion for this raw128 task axis" |
| }, |
| { |
| "task_number": 15, |
| "task_id": "interaction_text_prediction", |
| "task_label": "Interaction Text Prediction", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "proxy_scored", |
| "status_label": "proxy scored", |
| "scored": true, |
| "proxy_scored": true, |
| "raw": 0.009791421280985521, |
| "raw_text": "0.0098", |
| "normalized_score": 0.009791421280985521, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": "documented compact proxy completion for this raw128 task axis" |
| }, |
| { |
| "task_number": 15, |
| "task_id": "interaction_text_prediction", |
| "task_label": "Interaction Text Prediction", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.4318674027510605, |
| "raw_text": "0.4319", |
| "normalized_score": 0.4318674027510605, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_interaction_text_task15_a100_20260620T010305Z/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 15, |
| "task_id": "interaction_text_prediction", |
| "task_label": "Interaction Text Prediction", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.17949512355264183, |
| "raw_text": "0.1795", |
| "normalized_score": 0.17949512355264183, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_interaction_text_task15_textonly_v1_20260620T1558Z/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 15, |
| "task_id": "interaction_text_prediction", |
| "task_label": "Interaction Text Prediction", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.1788367958939587, |
| "raw_text": "0.1788", |
| "normalized_score": 0.1788367958939587, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_interaction_text_task15_patched_textonly_20260621/interaction_text_prediction/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 16, |
| "task_id": "action_object_relation", |
| "task_label": "Action-Object Relation Prediction", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/action_object_relation/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 16, |
| "task_id": "action_object_relation", |
| "task_label": "Action-Object Relation Prediction", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/action_object_relation/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 16, |
| "task_id": "action_object_relation", |
| "task_label": "Action-Object Relation Prediction", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/action_object_relation/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 16, |
| "task_id": "action_object_relation", |
| "task_label": "Action-Object Relation Prediction", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/action_object_relation/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 16, |
| "task_id": "action_object_relation", |
| "task_label": "Action-Object Relation Prediction", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0002220083079671497, |
| "raw_text": "0.0002", |
| "normalized_score": 0.0002220083079671497, |
| "metric_key": "action_object_relation_macro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 16, |
| "task_id": "action_object_relation", |
| "task_label": "Action-Object Relation Prediction", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0, |
| "raw_text": "0.0000", |
| "normalized_score": 0.0, |
| "metric_key": "action_object_relation_macro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 16, |
| "task_id": "action_object_relation", |
| "task_label": "Action-Object Relation Prediction", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.002794157670325683, |
| "raw_text": "0.0028", |
| "normalized_score": 0.002794157670325683, |
| "metric_key": "action_object_relation_macro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_nano_future_window/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 17, |
| "task_id": "object_set_forecast", |
| "task_label": "Future Object-Set Forecasting", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.17656983343047333, |
| "raw_text": "0.1766", |
| "normalized_score": 0.17656983343047333, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/object_set_forecast/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 17, |
| "task_id": "object_set_forecast", |
| "task_label": "Future Object-Set Forecasting", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.17418550827844048, |
| "raw_text": "0.1742", |
| "normalized_score": 0.17418550827844048, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/object_set_forecast/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 17, |
| "task_id": "object_set_forecast", |
| "task_label": "Future Object-Set Forecasting", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.06469493412657774, |
| "raw_text": "0.0647", |
| "normalized_score": 0.06469493412657774, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/object_set_forecast/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 17, |
| "task_id": "object_set_forecast", |
| "task_label": "Future Object-Set Forecasting", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.17523098630012288, |
| "raw_text": "0.1752", |
| "normalized_score": 0.17523098630012288, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/object_set_forecast/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 17, |
| "task_id": "object_set_forecast", |
| "task_label": "Future Object-Set Forecasting", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.1659483964851402, |
| "raw_text": "0.1659", |
| "normalized_score": 0.1659483964851402, |
| "metric_key": "object_set_forecast_micro_f1", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z/object_set_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 17, |
| "task_id": "object_set_forecast", |
| "task_label": "Future Object-Set Forecasting", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.0009279881217520415, |
| "raw_text": "0.0009", |
| "normalized_score": 0.0009279881217520415, |
| "metric_key": "object_set_forecast_micro_f1", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 17, |
| "task_id": "object_set_forecast", |
| "task_label": "Future Object-Set Forecasting", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.01781970649895178, |
| "raw_text": "0.0178", |
| "normalized_score": 0.01781970649895178, |
| "metric_key": "object_set_forecast_micro_f1", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/object_set_forecast/cosmos3_nano_future_window/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 18, |
| "task_id": "imu_to_hand_pose", |
| "task_label": "IMU-to-Hand Pose Reconstruction", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.2294670194387436, |
| "raw_text": "0.2295", |
| "normalized_score": 0.18324815505876868, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 18, |
| "task_id": "imu_to_hand_pose", |
| "task_label": "IMU-to-Hand Pose Reconstruction", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.2555866539478302, |
| "raw_text": "0.2556", |
| "normalized_score": 0.16452114110609004, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_aligned_sensor_block_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 18, |
| "task_id": "imu_to_hand_pose", |
| "task_label": "IMU-to-Hand Pose Reconstruction", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.22941437363624573, |
| "raw_text": "0.2294", |
| "normalized_score": 0.1832902066792771, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 18, |
| "task_id": "imu_to_hand_pose", |
| "task_label": "IMU-to-Hand Pose Reconstruction", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.252998411655426, |
| "raw_text": "0.2530", |
| "normalized_score": 0.1662042369509182, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 18, |
| "task_id": "imu_to_hand_pose", |
| "task_label": "IMU-to-Hand Pose Reconstruction", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.9641651902471952, |
| "raw_text": "0.9642", |
| "normalized_score": 0.043612244441436056, |
| "metric_key": "imu_to_hand_pose_mrr", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 18, |
| "task_id": "imu_to_hand_pose", |
| "task_label": "IMU-to-Hand Pose Reconstruction", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.9896650636969544, |
| "raw_text": "0.9897", |
| "normalized_score": 0.04248852414968175, |
| "metric_key": "imu_to_hand_pose_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 18, |
| "task_id": "imu_to_hand_pose", |
| "task_label": "IMU-to-Hand Pose Reconstruction", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.9920062431451954, |
| "raw_text": "0.9920", |
| "normalized_score": 0.04238824931752955, |
| "metric_key": "imu_to_hand_pose_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/imu_to_hand_pose/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 19, |
| "task_id": "camera_view_sync_retrieval", |
| "task_label": "Camera-View Synchronization Retrieval", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "proxy_scored", |
| "status_label": "proxy scored", |
| "scored": true, |
| "proxy_scored": true, |
| "raw": 0.0021294241305440664, |
| "raw_text": "0.0021", |
| "normalized_score": 0.0021294241305440664, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_aligned_metadata_baseline", |
| "reason": "paired camera-view embeddings are absent from the 128 JSONL/feature export; metadata features retrieve the synchronized same-window depth/audio block as a documented compact synchronization proxy" |
| }, |
| { |
| "task_number": 19, |
| "task_id": "camera_view_sync_retrieval", |
| "task_label": "Camera-View Synchronization Retrieval", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "proxy_scored", |
| "status_label": "proxy scored", |
| "scored": true, |
| "proxy_scored": true, |
| "raw": 0.0027218370232731104, |
| "raw_text": "0.0027", |
| "normalized_score": 0.0027218370232731104, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_aligned_metadata_baseline", |
| "reason": "paired camera-view embeddings are absent from the 128 JSONL/feature export; metadata features retrieve the synchronized same-window depth/audio block as a documented compact synchronization proxy" |
| }, |
| { |
| "task_number": 19, |
| "task_id": "camera_view_sync_retrieval", |
| "task_label": "Camera-View Synchronization Retrieval", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "proxy_scored", |
| "status_label": "proxy scored", |
| "scored": true, |
| "proxy_scored": true, |
| "raw": 0.0026625150348991156, |
| "raw_text": "0.0027", |
| "normalized_score": 0.0026625150348991156, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": "documented compact proxy completion for this raw128 task axis" |
| }, |
| { |
| "task_number": 19, |
| "task_id": "camera_view_sync_retrieval", |
| "task_label": "Camera-View Synchronization Retrieval", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "proxy_scored", |
| "status_label": "proxy scored", |
| "scored": true, |
| "proxy_scored": true, |
| "raw": 0.0025448438245803118, |
| "raw_text": "0.0025", |
| "normalized_score": 0.0025448438245803118, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": "documented compact proxy completion for this raw128 task axis" |
| }, |
| { |
| "task_number": 19, |
| "task_id": "camera_view_sync_retrieval", |
| "task_label": "Camera-View Synchronization Retrieval", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.6587714947089998, |
| "raw_text": "0.6588", |
| "normalized_score": 0.6587714947089998, |
| "metric_key": "camera_view_sync_retrieval_mrr", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_camera_view_sync_mosaic_tile_a100_20260619T0305Z/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 19, |
| "task_id": "camera_view_sync_retrieval", |
| "task_label": "Camera-View Synchronization Retrieval", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.9979751961528727, |
| "raw_text": "0.9980", |
| "normalized_score": 0.9979751961528727, |
| "metric_key": "camera_view_sync_retrieval_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 19, |
| "task_id": "camera_view_sync_retrieval", |
| "task_label": "Camera-View Synchronization Retrieval", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 0.9989875980764363, |
| "raw_text": "0.9990", |
| "normalized_score": 0.9989875980764363, |
| "metric_key": "camera_view_sync_retrieval_mrr", |
| "source": "results/omni_finetune/xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621/camera_view_sync_retrieval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 20, |
| "task_id": "time_to_transition", |
| "task_label": "Time-to-Next-Transition Regression", |
| "series_id": "metadata128_simple", |
| "method": "128ep Aligned Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 624.8108520507812, |
| "raw_text": "624.81", |
| "normalized_score": 0.016864874132806403, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/time_to_transition/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 20, |
| "task_id": "time_to_transition", |
| "task_label": "Time-to-Next-Transition Regression", |
| "series_id": "metadata128_neural_mlp", |
| "method": "128ep Aligned NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 41.4664421081543, |
| "raw_text": "41.47", |
| "normalized_score": 0.25411768748242325, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/time_to_transition/metrics.json", |
| "scope": "multi_episode_128_aligned_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 20, |
| "task_id": "time_to_transition", |
| "task_label": "Time-to-Next-Transition Regression", |
| "series_id": "raw128_simple", |
| "method": "128ep Raw Simple", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 52.32759475708008, |
| "raw_text": "52.33", |
| "normalized_score": 0.20137284019197565, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/time_to_transition/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 20, |
| "task_id": "time_to_transition", |
| "task_label": "Time-to-Next-Transition Regression", |
| "series_id": "raw128_neural_mlp", |
| "method": "128ep Raw NN", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 42.374061584472656, |
| "raw_text": "42.37", |
| "normalized_score": 0.24867468405504953, |
| "metric_key": "mae", |
| "source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/time_to_transition/metrics.json", |
| "scope": "multi_episode_128_raw_sensor_feature_baseline", |
| "reason": null |
| }, |
| { |
| "task_number": 20, |
| "task_id": "time_to_transition", |
| "task_label": "Time-to-Next-Transition Regression", |
| "series_id": "qwen3_omni_v6_lora", |
| "method": "Qwen3-Omni v6 LoRA", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 134.0687422166874, |
| "raw_text": "134.07", |
| "normalized_score": 0.07859666766782253, |
| "metric_key": "time_to_transition_mae", |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z/time_to_transition/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 20, |
| "task_id": "time_to_transition", |
| "task_label": "Time-to-Next-Transition Regression", |
| "series_id": "cosmos3_super_reasoner", |
| "method": "Cosmos3-Super Reasoner", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 52.94642857142857, |
| "raw_text": "52.95", |
| "normalized_score": 0.19901920981190058, |
| "metric_key": "time_to_transition_mae", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/time_to_transition/cosmos3_super_reasoner/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| }, |
| { |
| "task_number": 20, |
| "task_id": "time_to_transition", |
| "task_label": "Time-to-Next-Transition Regression", |
| "series_id": "cosmos3_nano_future_window", |
| "method": "Cosmos3-Nano Future Window", |
| "status": "scored", |
| "status_label": "scored", |
| "scored": true, |
| "proxy_scored": false, |
| "raw": 33.80952380952381, |
| "raw_text": "33.81", |
| "normalized_score": 0.3116682871966295, |
| "metric_key": "time_to_transition_mae", |
| "source": "results/omni_finetune/model_output_task_probes_20260616/time_to_transition/cosmos3_nano_future_window/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "reason": null |
| } |
| ] |
| } |
|
|