Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| { | |
| "generated_at_utc": "2026-06-18T22:52:18+00:00", | |
| "methods": { | |
| "cosmos3_nano_future_window": { | |
| "label": "Cosmos3-Nano Future Window", | |
| "reason": null, | |
| "source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/future_predictions.jsonl", | |
| "status": "scored", | |
| "tasks": { | |
| "action_object_relation": { | |
| "action_object_relation_accuracy": 0.013297872340425532, | |
| "action_object_relation_macro_f1": 0.002794157670325683, | |
| "scored_rows": 376, | |
| "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_nano_future_window/metrics.json" | |
| }, | |
| "long_horizon_next_action": { | |
| "horizon_windows": 5, | |
| "long_horizon_next_action_accuracy": 0.007936507936507936, | |
| "long_horizon_next_action_macro_f1": 0.0024906600249066007, | |
| "scored_rows": 378, | |
| "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/long_horizon_next_action/cosmos3_nano_future_window/metrics.json" | |
| }, | |
| "modality_reconstruction": { | |
| "feature_reconstruction_error": 3479.218317102503, | |
| "feature_reconstruction_quality": 0.0002873382957286892, | |
| "num_samples": 378, | |
| "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/modality_reconstruction/cosmos3_nano_future_window/metrics.json", | |
| "source_verified_metrics_json": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json" | |
| }, | |
| "next_subtask_forecast": { | |
| "next_subtask_forecast_accuracy": 0.015873015873015872, | |
| "next_subtask_forecast_macro_f1": 0.006614876224708678, | |
| "scored_rows": 378, | |
| "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/next_subtask_forecast/cosmos3_nano_future_window/metrics.json" | |
| }, | |
| "object_set_forecast": { | |
| "object_set_forecast_micro_f1": 0.01781970649895178, | |
| "object_set_forecast_precision": 0.02225130890052356, | |
| "object_set_forecast_recall": 0.01486013986013986, | |
| "scored_rows": 378, | |
| "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/object_set_forecast/cosmos3_nano_future_window/metrics.json" | |
| }, | |
| "time_to_transition": { | |
| "scored_rows": 378, | |
| "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/time_to_transition/cosmos3_nano_future_window/metrics.json", | |
| "time_to_transition_mae": 33.80952380952381, | |
| "within_20_frames": 0.6666666666666666 | |
| } | |
| }, | |
| "unsupported_tasks": {} | |
| }, | |
| "cosmos3_super_reasoner": { | |
| "label": "Cosmos3-Super Reasoner", | |
| "reason": null, | |
| "source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/predictions.jsonl", | |
| "status": "scored", | |
| "tasks": { | |
| "action_object_relation": { | |
| "action_object_relation_accuracy": 0.0, | |
| "action_object_relation_macro_f1": 0.0, | |
| "scored_rows": 446, | |
| "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json", | |
| "valid_pred_relation_rate": 0.49327354260089684 | |
| }, | |
| "caption_grounding": { | |
| "caption_grounding_center_hit_rate": 0.3236607142857143, | |
| "caption_grounding_iou": 0.30639899644580487, | |
| "missing_pred_evidence_window_count": 219, | |
| "scored_rows": 448, | |
| "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/caption_grounding/cosmos3_super_reasoner/metrics.json" | |
| }, | |
| "long_horizon_next_action": { | |
| "long_horizon_next_action_accuracy": 0.03794642857142857, | |
| "long_horizon_next_action_macro_f1": 0.008807588075880758, | |
| "scored_rows": 448, | |
| "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/long_horizon_next_action/cosmos3_super_reasoner/metrics.json" | |
| }, | |
| "time_to_transition": { | |
| "scored_rows": 448, | |
| "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/time_to_transition/cosmos3_super_reasoner/metrics.json", | |
| "time_to_transition_mae": 52.94642857142857, | |
| "within_20_frames": 0.6473214285714286 | |
| } | |
| }, | |
| "unsupported_tasks": {} | |
| }, | |
| "qwen3_omni_v6_lora": { | |
| "label": "Qwen3-Omni v6 LoRA", | |
| "reason": null, | |
| "source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/predictions.jsonl", | |
| "status": "scored", | |
| "tasks": { | |
| "action_object_relation": { | |
| "action_object_relation_accuracy": 0.000996512207274539, | |
| "action_object_relation_macro_f1": 0.0002220083079671497, | |
| "scored_rows": 4014, | |
| "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json", | |
| "valid_pred_relation_rate": 0.9990034877927254 | |
| } | |
| }, | |
| "unsupported_tasks": {} | |
| } | |
| }, | |
| "scope": "Task-specific scoring from existing verified held-out model outputs. No new model inference, training, or target backfilling is performed.", | |
| "scored_method_task_count_added": 11, | |
| "status": "pass", | |
| "task_ids_added_to_matrix": [ | |
| "action_object_relation", | |
| "caption_grounding", | |
| "long_horizon_next_action", | |
| "modality_reconstruction", | |
| "next_subtask_forecast", | |
| "object_set_forecast", | |
| "time_to_transition" | |
| ], | |
| "title": "Existing Model-Output Task Probes" | |
| } | |