Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
File size: 5,951 Bytes
8ca5135 094eb82 8ca5135 942c6d8 8ca5135 942c6d8 094eb82 942c6d8 7606bed c975eb1 8508633 c975eb1 942c6d8 094eb82 8ca5135 942c6d8 8ca5135 942c6d8 417a659 094eb82 a8277a7 417a659 942c6d8 8ca5135 942c6d8 8ca5135 942c6d8 8ca5135 094eb82 8ca5135 942c6d8 094eb82 417a659 7606bed 8508633 417a659 942c6d8 8ca5135 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | {
"generated_at_utc": "2026-06-18T22:52:18+00:00",
"methods": {
"cosmos3_nano_future_window": {
"label": "Cosmos3-Nano Future Window",
"reason": null,
"source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/future_predictions.jsonl",
"status": "scored",
"tasks": {
"action_object_relation": {
"action_object_relation_accuracy": 0.013297872340425532,
"action_object_relation_macro_f1": 0.002794157670325683,
"scored_rows": 376,
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_nano_future_window/metrics.json"
},
"long_horizon_next_action": {
"horizon_windows": 5,
"long_horizon_next_action_accuracy": 0.007936507936507936,
"long_horizon_next_action_macro_f1": 0.0024906600249066007,
"scored_rows": 378,
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/long_horizon_next_action/cosmos3_nano_future_window/metrics.json"
},
"modality_reconstruction": {
"feature_reconstruction_error": 3479.218317102503,
"feature_reconstruction_quality": 0.0002873382957286892,
"num_samples": 378,
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/modality_reconstruction/cosmos3_nano_future_window/metrics.json",
"source_verified_metrics_json": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json"
},
"next_subtask_forecast": {
"next_subtask_forecast_accuracy": 0.015873015873015872,
"next_subtask_forecast_macro_f1": 0.006614876224708678,
"scored_rows": 378,
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/next_subtask_forecast/cosmos3_nano_future_window/metrics.json"
},
"object_set_forecast": {
"object_set_forecast_micro_f1": 0.01781970649895178,
"object_set_forecast_precision": 0.02225130890052356,
"object_set_forecast_recall": 0.01486013986013986,
"scored_rows": 378,
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/object_set_forecast/cosmos3_nano_future_window/metrics.json"
},
"time_to_transition": {
"scored_rows": 378,
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/time_to_transition/cosmos3_nano_future_window/metrics.json",
"time_to_transition_mae": 33.80952380952381,
"within_20_frames": 0.6666666666666666
}
},
"unsupported_tasks": {}
},
"cosmos3_super_reasoner": {
"label": "Cosmos3-Super Reasoner",
"reason": null,
"source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/predictions.jsonl",
"status": "scored",
"tasks": {
"action_object_relation": {
"action_object_relation_accuracy": 0.0,
"action_object_relation_macro_f1": 0.0,
"scored_rows": 446,
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
"valid_pred_relation_rate": 0.49327354260089684
},
"caption_grounding": {
"caption_grounding_center_hit_rate": 0.3236607142857143,
"caption_grounding_iou": 0.30639899644580487,
"missing_pred_evidence_window_count": 219,
"scored_rows": 448,
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/caption_grounding/cosmos3_super_reasoner/metrics.json"
},
"long_horizon_next_action": {
"long_horizon_next_action_accuracy": 0.03794642857142857,
"long_horizon_next_action_macro_f1": 0.008807588075880758,
"scored_rows": 448,
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/long_horizon_next_action/cosmos3_super_reasoner/metrics.json"
},
"time_to_transition": {
"scored_rows": 448,
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/time_to_transition/cosmos3_super_reasoner/metrics.json",
"time_to_transition_mae": 52.94642857142857,
"within_20_frames": 0.6473214285714286
}
},
"unsupported_tasks": {}
},
"qwen3_omni_v6_lora": {
"label": "Qwen3-Omni v6 LoRA",
"reason": null,
"source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/predictions.jsonl",
"status": "scored",
"tasks": {
"action_object_relation": {
"action_object_relation_accuracy": 0.000996512207274539,
"action_object_relation_macro_f1": 0.0002220083079671497,
"scored_rows": 4014,
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
"valid_pred_relation_rate": 0.9990034877927254
}
},
"unsupported_tasks": {}
}
},
"scope": "Task-specific scoring from existing verified held-out model outputs. No new model inference, training, or target backfilling is performed.",
"scored_method_task_count_added": 11,
"status": "pass",
"task_ids_added_to_matrix": [
"action_object_relation",
"caption_grounding",
"long_horizon_next_action",
"modality_reconstruction",
"next_subtask_forecast",
"object_set_forecast",
"time_to_transition"
],
"title": "Existing Model-Output Task Probes"
}
|