| { |
| "title": "Unified 20-Task Model Radar", |
| "status": "pass", |
| "generated_at_utc": "2026-06-16T06:32:05+00:00", |
| "task_count": 20, |
| "normalization_policy": { |
| "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]", |
| "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task", |
| "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table", |
| "foundation_model_overlay": "Qwen3/Cosmos points are plotted only on task-aligned axes. Missing axes mean the public result does not evaluate that task contract.", |
| "metadata_128_overlay": "128-episode metadata baselines are plotted only where the public JSONL contains enough task labels without raw feature blocks." |
| }, |
| "series": [ |
| { |
| "id": "minimal", |
| "label": "Minimal", |
| "short_label": "Min", |
| "color": "#ccffa0", |
| "kind": "full_20_task_baseline", |
| "scope": "1 public sample episode", |
| "stroke_dasharray": null, |
| "covered_task_count": 20, |
| "coverage_fraction": 1.0 |
| }, |
| { |
| "id": "neural_mlp", |
| "label": "Neural MLP", |
| "short_label": "NN", |
| "color": "#67e8d1", |
| "kind": "full_20_task_baseline", |
| "scope": "1 public sample episode", |
| "stroke_dasharray": null, |
| "covered_task_count": 20, |
| "coverage_fraction": 1.0 |
| }, |
| { |
| "id": "metadata128_simple", |
| "label": "128ep Metadata Simple", |
| "short_label": "128-S", |
| "color": "#ffd166", |
| "kind": "partial_128_episode_metadata_baseline", |
| "scope": "128 selected episodes, JSONL metadata/text only", |
| "stroke_dasharray": "9 6", |
| "covered_task_count": 8, |
| "coverage_fraction": 0.4 |
| }, |
| { |
| "id": "metadata128_neural_mlp", |
| "label": "128ep Metadata NN", |
| "short_label": "128-NN", |
| "color": "#f472b6", |
| "kind": "partial_128_episode_metadata_baseline", |
| "scope": "128 selected episodes, JSONL metadata/text only", |
| "stroke_dasharray": "3 6", |
| "covered_task_count": 6, |
| "coverage_fraction": 0.3 |
| }, |
| { |
| "id": "qwen3_omni_v6_lora", |
| "label": "Qwen3-Omni v6 LoRA", |
| "short_label": "Qwen3", |
| "color": "#9bb8ff", |
| "kind": "partial_128_episode_foundation_model_overlay", |
| "scope": "128 selected episodes, held-out test", |
| "stroke_dasharray": "7 7", |
| "covered_task_count": 6, |
| "coverage_fraction": 0.3 |
| }, |
| { |
| "id": "cosmos3_super_reasoner", |
| "label": "Cosmos3-Super Reasoner", |
| "short_label": "C3-S", |
| "color": "#ff9c7a", |
| "kind": "partial_128_episode_foundation_model_overlay", |
| "scope": "128 selected episodes, held-out test", |
| "stroke_dasharray": "4 7", |
| "covered_task_count": 6, |
| "coverage_fraction": 0.3 |
| }, |
| { |
| "id": "cosmos3_nano_future_window", |
| "label": "Cosmos3-Nano Future Window", |
| "short_label": "C3-N", |
| "color": "#d9c7ff", |
| "kind": "partial_128_episode_world_model_overlay", |
| "scope": "128 selected episodes, held-out test", |
| "stroke_dasharray": "2 7", |
| "covered_task_count": 5, |
| "coverage_fraction": 0.25 |
| } |
| ], |
| "tasks": [ |
| { |
| "task_number": 1, |
| "task_id": "timeline_action", |
| "label": "Action Recognition", |
| "short_label": "Action", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.05, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/timeline_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.05, |
| "raw_text": "0.0500" |
| }, |
| "neural_mlp": { |
| "raw": 0.014814814814814814, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/timeline_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.014814814814814814, |
| "raw_text": "0.0148" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.0028830723979596335, |
| "metric_key": "action_macro_f1", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.0028830723979596335, |
| "raw_text": "0.0029" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.0008284021201089245, |
| "metric_key": "action_macro_f1", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.0008284021201089245, |
| "raw_text": "0.0008" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.007936507936507936, |
| "metric_key": "action_accuracy_from_retrieved_future", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.007936507936507936, |
| "raw_text": "0.0079" |
| }, |
| "metadata128_simple": { |
| "raw": 0.008252821966746326, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_action/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.008252821966746326, |
| "raw_text": "0.0083" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.004175793689174209, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_action/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.004175793689174209, |
| "raw_text": "0.0042" |
| } |
| } |
| }, |
| { |
| "task_number": 2, |
| "task_id": "timeline_subtask", |
| "label": "Procedure Step Recognition", |
| "short_label": "Step", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.05056355513846935, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/timeline_subtask/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.05056355513846935, |
| "raw_text": "0.0506" |
| }, |
| "neural_mlp": { |
| "raw": 0.02810810810810811, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/timeline_subtask/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.02810810810810811, |
| "raw_text": "0.0281" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.0037313432835820895, |
| "metric_key": "subtask_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.0037313432835820895, |
| "raw_text": "0.0037" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.0, |
| "metric_key": "subtask_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.0, |
| "raw_text": "0.0000" |
| }, |
| "metadata128_simple": { |
| "raw": 0.00019512195121951218, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/timeline_subtask/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.00019512195121951218, |
| "raw_text": "0.0002" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 7.207207207207208e-05, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/timeline_subtask/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 7.207207207207208e-05, |
| "raw_text": "0.0001" |
| } |
| } |
| }, |
| { |
| "task_number": 3, |
| "task_id": "transition_detection", |
| "label": "Action Boundary Detection", |
| "short_label": "Boundary", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.6118237590630229, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/transition_detection/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.6118237590630229, |
| "raw_text": "0.6118" |
| }, |
| "neural_mlp": { |
| "raw": 0.5862068965517241, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/transition_detection/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.5862068965517241, |
| "raw_text": "0.5862" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.9898313492063492, |
| "metric_key": "transition_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.9898313492063492, |
| "raw_text": "0.9898" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.36830357142857145, |
| "metric_key": "transition_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.36830357142857145, |
| "raw_text": "0.3683" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.9682539682539683, |
| "metric_key": "transition_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.9682539682539683, |
| "raw_text": "0.9683" |
| }, |
| "metadata128_simple": { |
| "raw": 0.29652162550029315, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/transition_detection/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.29652162550029315, |
| "raw_text": "0.2965" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.4841733292368365, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/transition_detection/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.4841733292368365, |
| "raw_text": "0.4842" |
| } |
| } |
| }, |
| { |
| "task_number": 4, |
| "task_id": "next_action", |
| "label": "Next-Action Prediction", |
| "short_label": "Next act", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.05925925925925927, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/next_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.05925925925925927, |
| "raw_text": "0.0593" |
| }, |
| "neural_mlp": { |
| "raw": 0.04186046511627907, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/next_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.04186046511627907, |
| "raw_text": "0.0419" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.04305335446381405, |
| "metric_key": "next_action_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.04305335446381405, |
| "raw_text": "0.0431" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.013392857142857142, |
| "metric_key": "next_action_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.013392857142857142, |
| "raw_text": "0.0134" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.007936507936507936, |
| "metric_key": "action_accuracy_from_retrieved_future", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.007936507936507936, |
| "raw_text": "0.0079" |
| }, |
| "metadata128_simple": { |
| "raw": 0.006514774539765508, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/next_action/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.006514774539765508, |
| "raw_text": "0.0065" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.004910507980164745, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/next_action/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.004910507980164745, |
| "raw_text": "0.0049" |
| } |
| } |
| }, |
| { |
| "task_number": 5, |
| "task_id": "hand_trajectory_forecast", |
| "label": "Hand Trajectory Forecasting", |
| "short_label": "Hand traj", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "mpjpe", |
| "metric_name": "MPJPE", |
| "metric_direction": "lower", |
| "values": { |
| "minimal": { |
| "raw": 0.8646570444107056, |
| "metric_key": "mpjpe", |
| "source": "results/episode_task_suite/hand_trajectory_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.12473175026322614, |
| "raw_text": "0.8647" |
| }, |
| "neural_mlp": { |
| "raw": 0.10785018652677536, |
| "metric_key": "mpjpe", |
| "source": "results/episode_task_suite/neural_mlp/hand_trajectory_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 1.0, |
| "raw_text": "0.1079" |
| } |
| } |
| }, |
| { |
| "task_number": 6, |
| "task_id": "contact_prediction", |
| "label": "Contact State Prediction", |
| "short_label": "Contact", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 1.0, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/contact_prediction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 1.0, |
| "raw_text": "1.000" |
| }, |
| "neural_mlp": { |
| "raw": 1.0, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/neural_mlp/contact_prediction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 1.0, |
| "raw_text": "1.000" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.8177083333333334, |
| "metric_key": "contact_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.8177083333333334, |
| "raw_text": "0.8177" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.32142857142857145, |
| "metric_key": "contact_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.32142857142857145, |
| "raw_text": "0.3214" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.7433862433862434, |
| "metric_key": "contact_accuracy", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.7433862433862434, |
| "raw_text": "0.7434" |
| }, |
| "metadata128_simple": { |
| "raw": 0.4381481308057444, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/contact_prediction/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.4381481308057444, |
| "raw_text": "0.4381" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.5682695682695682, |
| "metric_key": "macro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/contact_prediction/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.5682695682695682, |
| "raw_text": "0.5683" |
| } |
| } |
| }, |
| { |
| "task_number": 7, |
| "task_id": "object_relevance", |
| "label": "Object Relevance Prediction", |
| "short_label": "Objects", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "micro_f1", |
| "metric_name": "micro-F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.18034382095361662, |
| "metric_key": "micro_f1", |
| "source": "results/episode_task_suite/object_relevance/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.18034382095361662, |
| "raw_text": "0.1803" |
| }, |
| "neural_mlp": { |
| "raw": 0.1679279279279279, |
| "metric_key": "micro_f1", |
| "source": "results/episode_task_suite/neural_mlp/object_relevance/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.1679279279279279, |
| "raw_text": "0.1679" |
| }, |
| "qwen3_omni_v6_lora": { |
| "raw": 0.3064982378331287, |
| "metric_key": "object_micro_f1", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.3064982378331287, |
| "raw_text": "0.3065" |
| }, |
| "cosmos3_super_reasoner": { |
| "raw": 0.13704276146316333, |
| "metric_key": "object_micro_f1", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.13704276146316333, |
| "raw_text": "0.1370" |
| }, |
| "metadata128_simple": { |
| "raw": 0.17764578833693304, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/object_relevance/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.17764578833693304, |
| "raw_text": "0.1776" |
| }, |
| "metadata128_neural_mlp": { |
| "raw": 0.18662723837686876, |
| "metric_key": "micro_f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/object_relevance/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.18662723837686876, |
| "raw_text": "0.1866" |
| } |
| } |
| }, |
| { |
| "task_number": 8, |
| "task_id": "caption_grounding", |
| "label": "Language Grounding", |
| "short_label": "Language", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "mrr", |
| "metric_name": "MRR", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.016023479050338015, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/caption_grounding/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.016023479050338015, |
| "raw_text": "0.0160" |
| }, |
| "neural_mlp": { |
| "raw": 0.01684125567132316, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/neural_mlp/caption_grounding/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.01684125567132316, |
| "raw_text": "0.0168" |
| }, |
| "metadata128_simple": { |
| "raw": 0.002332374220713973, |
| "metric_key": "mrr", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/caption_grounding/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.002332374220713973, |
| "raw_text": "0.0023" |
| } |
| } |
| }, |
| { |
| "task_number": 9, |
| "task_id": "cross_modal_retrieval", |
| "label": "Cross-Modal Retrieval", |
| "short_label": "X-modal", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "mrr", |
| "metric_name": "MRR", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.26925966892956127, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/cross_modal_retrieval/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.26925966892956127, |
| "raw_text": "0.2693" |
| }, |
| "neural_mlp": { |
| "raw": 0.1299971898648288, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/neural_mlp/cross_modal_retrieval/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.1299971898648288, |
| "raw_text": "0.1300" |
| }, |
| "cosmos3_nano_future_window": { |
| "raw": 0.022138720585222767, |
| "metric_key": "future_retrieval_mrr", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json", |
| "scope": "multi_episode_128_partial_model_overlay", |
| "normalized_score": 0.022138720585222767, |
| "raw_text": "0.0221" |
| } |
| } |
| }, |
| { |
| "task_number": 10, |
| "task_id": "modality_reconstruction", |
| "label": "Cross-Modal Reconstruction", |
| "short_label": "Recon", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "r2", |
| "metric_name": "R2", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": -0.015271898913936655, |
| "metric_key": "r2", |
| "source": "results/episode_task_suite/modality_reconstruction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.0, |
| "raw_text": "-0.0153" |
| }, |
| "neural_mlp": { |
| "raw": -0.010171410134180991, |
| "metric_key": "r2", |
| "source": "results/episode_task_suite/neural_mlp/modality_reconstruction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.0, |
| "raw_text": "-0.0102" |
| } |
| } |
| }, |
| { |
| "task_number": 11, |
| "task_id": "temporal_order", |
| "label": "Temporal Order Verification", |
| "short_label": "Order", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "f1", |
| "metric_name": "F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.5399515738498789, |
| "metric_key": "f1", |
| "source": "results/episode_task_suite/temporal_order/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.5399515738498789, |
| "raw_text": "0.5400" |
| }, |
| "neural_mlp": { |
| "raw": 0.8520179372197308, |
| "metric_key": "f1", |
| "source": "results/episode_task_suite/neural_mlp/temporal_order/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.8520179372197308, |
| "raw_text": "0.8520" |
| }, |
| "metadata128_simple": { |
| "raw": 0.4198864140782312, |
| "metric_key": "f1", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/temporal_order/metrics.json", |
| "scope": "multi_episode_128_metadata_baseline", |
| "normalized_score": 0.4198864140782312, |
| "raw_text": "0.4199" |
| } |
| } |
| }, |
| { |
| "task_number": 12, |
| "task_id": "misalignment_detection", |
| "label": "Multimodal Synchronization Detection", |
| "short_label": "Sync", |
| "origin": "original_public_sample_tasks", |
| "metric_key": "f1", |
| "metric_name": "F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.5051698670605613, |
| "metric_key": "f1", |
| "source": "results/episode_task_suite/misalignment_detection/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.5051698670605613, |
| "raw_text": "0.5052" |
| }, |
| "neural_mlp": { |
| "raw": 0.7152682255845944, |
| "metric_key": "f1", |
| "source": "results/episode_task_suite/neural_mlp/misalignment_detection/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.7152682255845944, |
| "raw_text": "0.7153" |
| } |
| } |
| }, |
| { |
| "task_number": 13, |
| "task_id": "long_horizon_next_action", |
| "label": "Long-Horizon Next-Action Forecasting", |
| "short_label": "Long act", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.07499999999999998, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/long_horizon_next_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.07499999999999998, |
| "raw_text": "0.0750" |
| }, |
| "neural_mlp": { |
| "raw": 0.06545454545454546, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/long_horizon_next_action/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.06545454545454546, |
| "raw_text": "0.0655" |
| } |
| } |
| }, |
| { |
| "task_number": 14, |
| "task_id": "next_subtask_forecast", |
| "label": "Long-Horizon Next-Subtask Forecasting", |
| "short_label": "Long step", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.04545454545454545, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/next_subtask_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.04545454545454545, |
| "raw_text": "0.0455" |
| }, |
| "neural_mlp": { |
| "raw": 0.050724637681159424, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/next_subtask_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.050724637681159424, |
| "raw_text": "0.0507" |
| } |
| } |
| }, |
| { |
| "task_number": 15, |
| "task_id": "interaction_text_prediction", |
| "label": "Interaction Text Prediction", |
| "short_label": "Interact txt", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.04444444444444444, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/interaction_text_prediction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.04444444444444444, |
| "raw_text": "0.0444" |
| }, |
| "neural_mlp": { |
| "raw": 0.0380952380952381, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/interaction_text_prediction/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.0380952380952381, |
| "raw_text": "0.0381" |
| } |
| } |
| }, |
| { |
| "task_number": 16, |
| "task_id": "action_object_relation", |
| "label": "Action-Object Relation Prediction", |
| "short_label": "Act+obj", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "macro_f1", |
| "metric_name": "macro-F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/action_object_relation/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.0, |
| "raw_text": "0.0000" |
| }, |
| "neural_mlp": { |
| "raw": 0.0, |
| "metric_key": "macro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/action_object_relation/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.0, |
| "raw_text": "0.0000" |
| } |
| } |
| }, |
| { |
| "task_number": 17, |
| "task_id": "object_set_forecast", |
| "label": "Future Object-Set Forecasting", |
| "short_label": "Future obj", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "micro_f1", |
| "metric_name": "micro-F1", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.16939890710382516, |
| "metric_key": "micro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/object_set_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.16939890710382516, |
| "raw_text": "0.1694" |
| }, |
| "neural_mlp": { |
| "raw": 0.19718309859154928, |
| "metric_key": "micro_f1", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/object_set_forecast/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.19718309859154928, |
| "raw_text": "0.1972" |
| } |
| } |
| }, |
| { |
| "task_number": 18, |
| "task_id": "imu_to_hand_pose", |
| "label": "IMU-to-Hand Pose Reconstruction", |
| "short_label": "IMU->hand", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "mae", |
| "metric_name": "MAE", |
| "metric_direction": "lower", |
| "values": { |
| "minimal": { |
| "raw": 0.042049407958984375, |
| "metric_key": "mae", |
| "source": "results/episode_task_suite/tier2_task_suite/imu_to_hand_pose/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 1.0, |
| "raw_text": "0.0420" |
| }, |
| "neural_mlp": { |
| "raw": 0.042562149465084076, |
| "metric_key": "mae", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/imu_to_hand_pose/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.9879531106266066, |
| "raw_text": "0.0426" |
| } |
| } |
| }, |
| { |
| "task_number": 19, |
| "task_id": "camera_view_sync_retrieval", |
| "label": "Camera-View Synchronization Retrieval", |
| "short_label": "Cam sync", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "mrr", |
| "metric_name": "MRR", |
| "metric_direction": "higher", |
| "values": { |
| "minimal": { |
| "raw": 0.4943004846572876, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/tier2_task_suite/camera_view_sync_retrieval/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.4943004846572876, |
| "raw_text": "0.4943" |
| }, |
| "neural_mlp": { |
| "raw": 0.24086658656597137, |
| "metric_key": "mrr", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/camera_view_sync_retrieval/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.24086658656597137, |
| "raw_text": "0.2409" |
| } |
| } |
| }, |
| { |
| "task_number": 20, |
| "task_id": "time_to_transition", |
| "label": "Time-to-Next-Transition Regression", |
| "short_label": "Time2bdry", |
| "origin": "additional_public_sample_tasks", |
| "metric_key": "mae", |
| "metric_name": "MAE frames", |
| "metric_direction": "lower", |
| "values": { |
| "minimal": { |
| "raw": 10.53735637664795, |
| "metric_key": "mae", |
| "source": "results/episode_task_suite/tier2_task_suite/time_to_transition/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 1.0, |
| "raw_text": "10.54" |
| }, |
| "neural_mlp": { |
| "raw": 10.55449390411377, |
| "metric_key": "mae", |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/time_to_transition/metrics.json", |
| "scope": "single_episode_public_sample", |
| "normalized_score": 0.9983762814568361, |
| "raw_text": "10.55" |
| } |
| } |
| } |
| ], |
| "model_branch_cards": [ |
| { |
| "id": "metadata128_simple", |
| "title": "128ep Metadata Simple", |
| "status": "a100_rerun_pass", |
| "coverage": "8/20 JSONL-supported axes", |
| "headline": "34,269 rows; train/val/test 25,629/4,608/4,032", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/summary_report.json" |
| }, |
| { |
| "id": "metadata128_neural_mlp", |
| "title": "128ep Metadata NN", |
| "status": "a100_rerun_pass", |
| "coverage": "6/20 JSONL-supported axes", |
| "headline": "compact MLP heads over metadata/text features", |
| "source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/summary_report.json" |
| }, |
| { |
| "id": "qwen3_omni_v6_lora", |
| "title": "Qwen3-Omni v6 LoRA", |
| "status": "verified", |
| "task_aligned_axes": "Qwen3", |
| "coverage": "6/20 task-aligned axes", |
| "headline": "JSON validity 0.9990; action macro-F1 0.0029", |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json" |
| }, |
| { |
| "id": "cosmos3_super_reasoner", |
| "title": "Cosmos3-Super Reasoner", |
| "status": "verified_base_weight_eval", |
| "coverage": "6/20 task-aligned axes", |
| "headline": "JSON validity 0.5112; action macro-F1 0.0008", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json" |
| }, |
| { |
| "id": "cosmos3_nano_future_window", |
| "title": "Cosmos3-Nano Future Window", |
| "status": "verified_compatibility_eval", |
| "coverage": "5/20 task-aligned axes", |
| "headline": "future retrieval MRR 0.0221; transition accuracy 0.9683", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json" |
| }, |
| { |
| "id": "cosmos3_super_forward_dynamics_lora", |
| "title": "Cosmos3-Super Forward-Dynamics LoRA", |
| "status": "verified_finetuned_adapter", |
| "coverage": "separate camera-pose proxy target, not plotted on the 20 task axes", |
| "headline": "test MSE 3.685 over 448 held-out rows", |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/eval/metrics.json" |
| } |
| ] |
| } |
|
|