{ "title": "Unified 20-Task Model Radar", "status": "pass", "generated_at_utc": "2026-06-16T06:32:05+00:00", "task_count": 20, "normalization_policy": { "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]", "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task", "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table", "foundation_model_overlay": "Qwen3/Cosmos points are plotted only on task-aligned axes. Missing axes mean the public result does not evaluate that task contract.", "metadata_128_overlay": "128-episode metadata baselines are plotted only where the public JSONL contains enough task labels without raw feature blocks." }, "series": [ { "id": "minimal", "label": "Minimal", "short_label": "Min", "color": "#ccffa0", "kind": "full_20_task_baseline", "scope": "1 public sample episode", "stroke_dasharray": null, "covered_task_count": 20, "coverage_fraction": 1.0 }, { "id": "neural_mlp", "label": "Neural MLP", "short_label": "NN", "color": "#67e8d1", "kind": "full_20_task_baseline", "scope": "1 public sample episode", "stroke_dasharray": null, "covered_task_count": 20, "coverage_fraction": 1.0 }, { "id": "metadata128_simple", "label": "128ep Metadata Simple", "short_label": "128-S", "color": "#ffd166", "kind": "partial_128_episode_metadata_baseline", "scope": "128 selected episodes, JSONL metadata/text only", "stroke_dasharray": "9 6", "covered_task_count": 8, "coverage_fraction": 0.4 }, { "id": "metadata128_neural_mlp", "label": "128ep Metadata NN", "short_label": "128-NN", "color": "#f472b6", "kind": "partial_128_episode_metadata_baseline", "scope": "128 selected episodes, JSONL metadata/text only", "stroke_dasharray": "3 6", "covered_task_count": 6, "coverage_fraction": 0.3 }, { "id": "qwen3_omni_v6_lora", "label": "Qwen3-Omni v6 LoRA", "short_label": "Qwen3", "color": "#9bb8ff", "kind": "partial_128_episode_foundation_model_overlay", "scope": "128 selected episodes, held-out test", "stroke_dasharray": "7 7", "covered_task_count": 6, "coverage_fraction": 0.3 }, { "id": "cosmos3_super_reasoner", "label": "Cosmos3-Super Reasoner", "short_label": "C3-S", "color": "#ff9c7a", "kind": "partial_128_episode_foundation_model_overlay", "scope": "128 selected episodes, held-out test", "stroke_dasharray": "4 7", "covered_task_count": 6, "coverage_fraction": 0.3 }, { "id": "cosmos3_nano_future_window", "label": "Cosmos3-Nano Future Window", "short_label": "C3-N", "color": "#d9c7ff", "kind": "partial_128_episode_world_model_overlay", "scope": "128 selected episodes, held-out test", "stroke_dasharray": "2 7", "covered_task_count": 5, "coverage_fraction": 0.25 } ], "tasks": [ { "task_number": 1, "task_id": "timeline_action", "label": "Action Recognition", "short_label": "Action", "origin": "original_public_sample_tasks", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "values": { "minimal": { "raw": 0.05, "metric_key": "macro_f1", "source": "results/episode_task_suite/timeline_action/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.05, "raw_text": "0.0500" }, "neural_mlp": { "raw": 0.014814814814814814, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/timeline_action/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.014814814814814814, "raw_text": "0.0148" }, "qwen3_omni_v6_lora": { "raw": 0.0028830723979596335, "metric_key": "action_macro_f1", "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.0028830723979596335, "raw_text": "0.0029" }, "cosmos3_super_reasoner": { "raw": 0.0008284021201089245, "metric_key": "action_macro_f1", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.0008284021201089245, "raw_text": "0.0008" }, "cosmos3_nano_future_window": { "raw": 0.007936507936507936, "metric_key": "action_accuracy_from_retrieved_future", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.007936507936507936, "raw_text": "0.0079" }, "metadata128_simple": { "raw": 0.008252821966746326, "metric_key": "macro_f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_action/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.008252821966746326, "raw_text": "0.0083" }, "metadata128_neural_mlp": { "raw": 0.004175793689174209, "metric_key": "macro_f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_action/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.004175793689174209, "raw_text": "0.0042" } } }, { "task_number": 2, "task_id": "timeline_subtask", "label": "Procedure Step Recognition", "short_label": "Step", "origin": "original_public_sample_tasks", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "values": { "minimal": { "raw": 0.05056355513846935, "metric_key": "macro_f1", "source": "results/episode_task_suite/timeline_subtask/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.05056355513846935, "raw_text": "0.0506" }, "neural_mlp": { "raw": 0.02810810810810811, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/timeline_subtask/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.02810810810810811, "raw_text": "0.0281" }, "qwen3_omni_v6_lora": { "raw": 0.0037313432835820895, "metric_key": "subtask_accuracy", "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.0037313432835820895, "raw_text": "0.0037" }, "cosmos3_super_reasoner": { "raw": 0.0, "metric_key": "subtask_accuracy", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.0, "raw_text": "0.0000" }, "metadata128_simple": { "raw": 0.00019512195121951218, "metric_key": "macro_f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_subtask/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.00019512195121951218, "raw_text": "0.0002" }, "metadata128_neural_mlp": { "raw": 7.207207207207208e-05, "metric_key": "macro_f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_subtask/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 7.207207207207208e-05, "raw_text": "0.0001" } } }, { "task_number": 3, "task_id": "transition_detection", "label": "Action Boundary Detection", "short_label": "Boundary", "origin": "original_public_sample_tasks", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "values": { "minimal": { "raw": 0.6118237590630229, "metric_key": "macro_f1", "source": "results/episode_task_suite/transition_detection/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.6118237590630229, "raw_text": "0.6118" }, "neural_mlp": { "raw": 0.5862068965517241, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/transition_detection/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.5862068965517241, "raw_text": "0.5862" }, "qwen3_omni_v6_lora": { "raw": 0.9898313492063492, "metric_key": "transition_accuracy", "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.9898313492063492, "raw_text": "0.9898" }, "cosmos3_super_reasoner": { "raw": 0.36830357142857145, "metric_key": "transition_accuracy", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.36830357142857145, "raw_text": "0.3683" }, "cosmos3_nano_future_window": { "raw": 0.9682539682539683, "metric_key": "transition_accuracy", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.9682539682539683, "raw_text": "0.9683" }, "metadata128_simple": { "raw": 0.29652162550029315, "metric_key": "macro_f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/transition_detection/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.29652162550029315, "raw_text": "0.2965" }, "metadata128_neural_mlp": { "raw": 0.4841733292368365, "metric_key": "macro_f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/transition_detection/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.4841733292368365, "raw_text": "0.4842" } } }, { "task_number": 4, "task_id": "next_action", "label": "Next-Action Prediction", "short_label": "Next act", "origin": "original_public_sample_tasks", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "values": { "minimal": { "raw": 0.05925925925925927, "metric_key": "macro_f1", "source": "results/episode_task_suite/next_action/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.05925925925925927, "raw_text": "0.0593" }, "neural_mlp": { "raw": 0.04186046511627907, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/next_action/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.04186046511627907, "raw_text": "0.0419" }, "qwen3_omni_v6_lora": { "raw": 0.04305335446381405, "metric_key": "next_action_accuracy", "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.04305335446381405, "raw_text": "0.0431" }, "cosmos3_super_reasoner": { "raw": 0.013392857142857142, "metric_key": "next_action_accuracy", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.013392857142857142, "raw_text": "0.0134" }, "cosmos3_nano_future_window": { "raw": 0.007936507936507936, "metric_key": "action_accuracy_from_retrieved_future", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.007936507936507936, "raw_text": "0.0079" }, "metadata128_simple": { "raw": 0.006514774539765508, "metric_key": "macro_f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/next_action/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.006514774539765508, "raw_text": "0.0065" }, "metadata128_neural_mlp": { "raw": 0.004910507980164745, "metric_key": "macro_f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/next_action/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.004910507980164745, "raw_text": "0.0049" } } }, { "task_number": 5, "task_id": "hand_trajectory_forecast", "label": "Hand Trajectory Forecasting", "short_label": "Hand traj", "origin": "original_public_sample_tasks", "metric_key": "mpjpe", "metric_name": "MPJPE", "metric_direction": "lower", "values": { "minimal": { "raw": 0.8646570444107056, "metric_key": "mpjpe", "source": "results/episode_task_suite/hand_trajectory_forecast/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.12473175026322614, "raw_text": "0.8647" }, "neural_mlp": { "raw": 0.10785018652677536, "metric_key": "mpjpe", "source": "results/episode_task_suite/neural_mlp/hand_trajectory_forecast/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 1.0, "raw_text": "0.1079" } } }, { "task_number": 6, "task_id": "contact_prediction", "label": "Contact State Prediction", "short_label": "Contact", "origin": "original_public_sample_tasks", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "values": { "minimal": { "raw": 1.0, "metric_key": "macro_f1", "source": "results/episode_task_suite/contact_prediction/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 1.0, "raw_text": "1.000" }, "neural_mlp": { "raw": 1.0, "metric_key": "macro_f1", "source": "results/episode_task_suite/neural_mlp/contact_prediction/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 1.0, "raw_text": "1.000" }, "qwen3_omni_v6_lora": { "raw": 0.8177083333333334, "metric_key": "contact_accuracy", "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.8177083333333334, "raw_text": "0.8177" }, "cosmos3_super_reasoner": { "raw": 0.32142857142857145, "metric_key": "contact_accuracy", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.32142857142857145, "raw_text": "0.3214" }, "cosmos3_nano_future_window": { "raw": 0.7433862433862434, "metric_key": "contact_accuracy", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.7433862433862434, "raw_text": "0.7434" }, "metadata128_simple": { "raw": 0.4381481308057444, "metric_key": "macro_f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/contact_prediction/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.4381481308057444, "raw_text": "0.4381" }, "metadata128_neural_mlp": { "raw": 0.5682695682695682, "metric_key": "macro_f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/contact_prediction/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.5682695682695682, "raw_text": "0.5683" } } }, { "task_number": 7, "task_id": "object_relevance", "label": "Object Relevance Prediction", "short_label": "Objects", "origin": "original_public_sample_tasks", "metric_key": "micro_f1", "metric_name": "micro-F1", "metric_direction": "higher", "values": { "minimal": { "raw": 0.18034382095361662, "metric_key": "micro_f1", "source": "results/episode_task_suite/object_relevance/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.18034382095361662, "raw_text": "0.1803" }, "neural_mlp": { "raw": 0.1679279279279279, "metric_key": "micro_f1", "source": "results/episode_task_suite/neural_mlp/object_relevance/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.1679279279279279, "raw_text": "0.1679" }, "qwen3_omni_v6_lora": { "raw": 0.3064982378331287, "metric_key": "object_micro_f1", "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.3064982378331287, "raw_text": "0.3065" }, "cosmos3_super_reasoner": { "raw": 0.13704276146316333, "metric_key": "object_micro_f1", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.13704276146316333, "raw_text": "0.1370" }, "metadata128_simple": { "raw": 0.17764578833693304, "metric_key": "micro_f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/object_relevance/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.17764578833693304, "raw_text": "0.1776" }, "metadata128_neural_mlp": { "raw": 0.18662723837686876, "metric_key": "micro_f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/object_relevance/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.18662723837686876, "raw_text": "0.1866" } } }, { "task_number": 8, "task_id": "caption_grounding", "label": "Language Grounding", "short_label": "Language", "origin": "original_public_sample_tasks", "metric_key": "mrr", "metric_name": "MRR", "metric_direction": "higher", "values": { "minimal": { "raw": 0.016023479050338015, "metric_key": "mrr", "source": "results/episode_task_suite/caption_grounding/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.016023479050338015, "raw_text": "0.0160" }, "neural_mlp": { "raw": 0.01684125567132316, "metric_key": "mrr", "source": "results/episode_task_suite/neural_mlp/caption_grounding/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.01684125567132316, "raw_text": "0.0168" }, "metadata128_simple": { "raw": 0.002332374220713973, "metric_key": "mrr", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/caption_grounding/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.002332374220713973, "raw_text": "0.0023" } } }, { "task_number": 9, "task_id": "cross_modal_retrieval", "label": "Cross-Modal Retrieval", "short_label": "X-modal", "origin": "original_public_sample_tasks", "metric_key": "mrr", "metric_name": "MRR", "metric_direction": "higher", "values": { "minimal": { "raw": 0.26925966892956127, "metric_key": "mrr", "source": "results/episode_task_suite/cross_modal_retrieval/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.26925966892956127, "raw_text": "0.2693" }, "neural_mlp": { "raw": 0.1299971898648288, "metric_key": "mrr", "source": "results/episode_task_suite/neural_mlp/cross_modal_retrieval/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.1299971898648288, "raw_text": "0.1300" }, "cosmos3_nano_future_window": { "raw": 0.022138720585222767, "metric_key": "future_retrieval_mrr", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", "scope": "multi_episode_128_partial_model_overlay", "normalized_score": 0.022138720585222767, "raw_text": "0.0221" } } }, { "task_number": 10, "task_id": "modality_reconstruction", "label": "Cross-Modal Reconstruction", "short_label": "Recon", "origin": "original_public_sample_tasks", "metric_key": "r2", "metric_name": "R2", "metric_direction": "higher", "values": { "minimal": { "raw": -0.015271898913936655, "metric_key": "r2", "source": "results/episode_task_suite/modality_reconstruction/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.0, "raw_text": "-0.0153" }, "neural_mlp": { "raw": -0.010171410134180991, "metric_key": "r2", "source": "results/episode_task_suite/neural_mlp/modality_reconstruction/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.0, "raw_text": "-0.0102" } } }, { "task_number": 11, "task_id": "temporal_order", "label": "Temporal Order Verification", "short_label": "Order", "origin": "original_public_sample_tasks", "metric_key": "f1", "metric_name": "F1", "metric_direction": "higher", "values": { "minimal": { "raw": 0.5399515738498789, "metric_key": "f1", "source": "results/episode_task_suite/temporal_order/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.5399515738498789, "raw_text": "0.5400" }, "neural_mlp": { "raw": 0.8520179372197308, "metric_key": "f1", "source": "results/episode_task_suite/neural_mlp/temporal_order/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.8520179372197308, "raw_text": "0.8520" }, "metadata128_simple": { "raw": 0.4198864140782312, "metric_key": "f1", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/temporal_order/metrics.json", "scope": "multi_episode_128_metadata_baseline", "normalized_score": 0.4198864140782312, "raw_text": "0.4199" } } }, { "task_number": 12, "task_id": "misalignment_detection", "label": "Multimodal Synchronization Detection", "short_label": "Sync", "origin": "original_public_sample_tasks", "metric_key": "f1", "metric_name": "F1", "metric_direction": "higher", "values": { "minimal": { "raw": 0.5051698670605613, "metric_key": "f1", "source": "results/episode_task_suite/misalignment_detection/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.5051698670605613, "raw_text": "0.5052" }, "neural_mlp": { "raw": 0.7152682255845944, "metric_key": "f1", "source": "results/episode_task_suite/neural_mlp/misalignment_detection/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.7152682255845944, "raw_text": "0.7153" } } }, { "task_number": 13, "task_id": "long_horizon_next_action", "label": "Long-Horizon Next-Action Forecasting", "short_label": "Long act", "origin": "additional_public_sample_tasks", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "values": { "minimal": { "raw": 0.07499999999999998, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/long_horizon_next_action/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.07499999999999998, "raw_text": "0.0750" }, "neural_mlp": { "raw": 0.06545454545454546, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/long_horizon_next_action/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.06545454545454546, "raw_text": "0.0655" } } }, { "task_number": 14, "task_id": "next_subtask_forecast", "label": "Long-Horizon Next-Subtask Forecasting", "short_label": "Long step", "origin": "additional_public_sample_tasks", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "values": { "minimal": { "raw": 0.04545454545454545, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/next_subtask_forecast/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.04545454545454545, "raw_text": "0.0455" }, "neural_mlp": { "raw": 0.050724637681159424, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/next_subtask_forecast/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.050724637681159424, "raw_text": "0.0507" } } }, { "task_number": 15, "task_id": "interaction_text_prediction", "label": "Interaction Text Prediction", "short_label": "Interact txt", "origin": "additional_public_sample_tasks", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "values": { "minimal": { "raw": 0.04444444444444444, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/interaction_text_prediction/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.04444444444444444, "raw_text": "0.0444" }, "neural_mlp": { "raw": 0.0380952380952381, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/interaction_text_prediction/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.0380952380952381, "raw_text": "0.0381" } } }, { "task_number": 16, "task_id": "action_object_relation", "label": "Action-Object Relation Prediction", "short_label": "Act+obj", "origin": "additional_public_sample_tasks", "metric_key": "macro_f1", "metric_name": "macro-F1", "metric_direction": "higher", "values": { "minimal": { "raw": 0.0, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/action_object_relation/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.0, "raw_text": "0.0000" }, "neural_mlp": { "raw": 0.0, "metric_key": "macro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/action_object_relation/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.0, "raw_text": "0.0000" } } }, { "task_number": 17, "task_id": "object_set_forecast", "label": "Future Object-Set Forecasting", "short_label": "Future obj", "origin": "additional_public_sample_tasks", "metric_key": "micro_f1", "metric_name": "micro-F1", "metric_direction": "higher", "values": { "minimal": { "raw": 0.16939890710382516, "metric_key": "micro_f1", "source": "results/episode_task_suite/tier2_task_suite/object_set_forecast/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.16939890710382516, "raw_text": "0.1694" }, "neural_mlp": { "raw": 0.19718309859154928, "metric_key": "micro_f1", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/object_set_forecast/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.19718309859154928, "raw_text": "0.1972" } } }, { "task_number": 18, "task_id": "imu_to_hand_pose", "label": "IMU-to-Hand Pose Reconstruction", "short_label": "IMU->hand", "origin": "additional_public_sample_tasks", "metric_key": "mae", "metric_name": "MAE", "metric_direction": "lower", "values": { "minimal": { "raw": 0.042049407958984375, "metric_key": "mae", "source": "results/episode_task_suite/tier2_task_suite/imu_to_hand_pose/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 1.0, "raw_text": "0.0420" }, "neural_mlp": { "raw": 0.042562149465084076, "metric_key": "mae", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/imu_to_hand_pose/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.9879531106266066, "raw_text": "0.0426" } } }, { "task_number": 19, "task_id": "camera_view_sync_retrieval", "label": "Camera-View Synchronization Retrieval", "short_label": "Cam sync", "origin": "additional_public_sample_tasks", "metric_key": "mrr", "metric_name": "MRR", "metric_direction": "higher", "values": { "minimal": { "raw": 0.4943004846572876, "metric_key": "mrr", "source": "results/episode_task_suite/tier2_task_suite/camera_view_sync_retrieval/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.4943004846572876, "raw_text": "0.4943" }, "neural_mlp": { "raw": 0.24086658656597137, "metric_key": "mrr", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/camera_view_sync_retrieval/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.24086658656597137, "raw_text": "0.2409" } } }, { "task_number": 20, "task_id": "time_to_transition", "label": "Time-to-Next-Transition Regression", "short_label": "Time2bdry", "origin": "additional_public_sample_tasks", "metric_key": "mae", "metric_name": "MAE frames", "metric_direction": "lower", "values": { "minimal": { "raw": 10.53735637664795, "metric_key": "mae", "source": "results/episode_task_suite/tier2_task_suite/time_to_transition/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 1.0, "raw_text": "10.54" }, "neural_mlp": { "raw": 10.55449390411377, "metric_key": "mae", "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/time_to_transition/metrics.json", "scope": "single_episode_public_sample", "normalized_score": 0.9983762814568361, "raw_text": "10.55" } } } ], "model_branch_cards": [ { "id": "metadata128_simple", "title": "128ep Metadata Simple", "status": "a100_rerun_pass", "coverage": "8/20 JSONL-supported axes", "headline": "34,269 rows; train/val/test 25,629/4,608/4,032", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/summary_report.json" }, { "id": "metadata128_neural_mlp", "title": "128ep Metadata NN", "status": "a100_rerun_pass", "coverage": "6/20 JSONL-supported axes", "headline": "compact MLP heads over metadata/text features", "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/summary_report.json" }, { "id": "qwen3_omni_v6_lora", "title": "Qwen3-Omni v6 LoRA", "status": "verified", "task_aligned_axes": "Qwen3", "coverage": "6/20 task-aligned axes", "headline": "JSON validity 0.9990; action macro-F1 0.0029", "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json" }, { "id": "cosmos3_super_reasoner", "title": "Cosmos3-Super Reasoner", "status": "verified_base_weight_eval", "coverage": "6/20 task-aligned axes", "headline": "JSON validity 0.5112; action macro-F1 0.0008", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json" }, { "id": "cosmos3_nano_future_window", "title": "Cosmos3-Nano Future Window", "status": "verified_compatibility_eval", "coverage": "5/20 task-aligned axes", "headline": "future retrieval MRR 0.0221; transition accuracy 0.9683", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json" }, { "id": "cosmos3_super_forward_dynamics_lora", "title": "Cosmos3-Super Forward-Dynamics LoRA", "status": "verified_finetuned_adapter", "coverage": "separate camera-pose proxy target, not plotted on the 20 task axes", "headline": "test MSE 3.685 over 448 held-out rows", "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/eval/metrics.json" } ] }