Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| { | |
| "id": "cosmos_world_model", | |
| "display_name": "Cosmos3-Nano Future-Window World Model", | |
| "status": "implemented", | |
| "model_family": "Cosmos / physical-world foundation models", | |
| "default_model_id": "nvidia/Cosmos3-Nano", | |
| "local_model_env": "COSMOS_MODEL_DIR", | |
| "dataset_contract": "xperience10m_future_window_world_model_v0", | |
| "training_objective": "future_window_and_action_conditioned_world_modeling", | |
| "split_policy": { | |
| "unit": "episode", | |
| "default_counts": { | |
| "train": 96, | |
| "val": 16, | |
| "test": 16 | |
| }, | |
| "leakage_guard": "future windows must remain inside the same episode and test episodes must never condition training" | |
| }, | |
| "modalities": { | |
| "direct_inputs": [ | |
| "camera video streams or rendered mosaics", | |
| "language task context" | |
| ], | |
| "conditioning_inputs": [ | |
| "pose and SLAM trajectory", | |
| "depth and confidence", | |
| "mocap or action labels", | |
| "IMU acceleration and gyro", | |
| "audio event cues" | |
| ], | |
| "targets": [ | |
| "future visual window", | |
| "future latent state", | |
| "future sensor-feature window", | |
| "transition or contact event" | |
| ], | |
| "excluded_inputs": [ | |
| "visualization.rrd" | |
| ] | |
| }, | |
| "entrypoints": { | |
| "selection_manifest": "scripts/omni/build_selection_episode_manifest.py", | |
| "neutral_index": "scripts/omni/export_model_neutral_window_index.py", | |
| "export": "scripts/omni/export_cosmos3_future_window_dataset.py", | |
| "train": "scripts/omni/eval_cosmos3_future_window_retrieval.py", | |
| "eval": "scripts/omni/eval_cosmos3_future_window_retrieval.py", | |
| "launcher": "scripts/omni/run_cosmos3_nano_future_window_compat.sh", | |
| "validate": "scripts/omni/validate_omni_finetune_run.py" | |
| }, | |
| "primary_metrics": [ | |
| "future_retrieval_mrr", | |
| "future_retrieval_recall_at_5", | |
| "temporal_consistency", | |
| "feature_reconstruction_error", | |
| "transition_accuracy", | |
| "contact_accuracy", | |
| "held_out_episode_count" | |
| ], | |
| "artifact_contract": { | |
| "checkpoint_gate": "world_model_checkpoint_and_generation_config", | |
| "required_eval_files": [ | |
| "metrics.json", | |
| "future_predictions.jsonl", | |
| "retrieval_rankings.csv", | |
| "temporal_consistency.csv", | |
| "qualitative_examples.json", | |
| "RUN_REPORT.md" | |
| ], | |
| "required_training_files": [ | |
| "training_metadata.json", | |
| "progress.jsonl", | |
| "model_config.json", | |
| "checkpoint_manifest.json" | |
| ], | |
| "public_package_allowed": [ | |
| "metrics", | |
| "future-window prediction summaries", | |
| "retrieval rankings", | |
| "temporal consistency tables", | |
| "qualitative example metadata", | |
| "episode and dataset manifests", | |
| "validation summaries" | |
| ], | |
| "public_package_forbidden": [ | |
| "raw MP4", | |
| "annotation HDF5", | |
| "Rerun RRD", | |
| "generated raw video unless explicitly licensed and size-bounded", | |
| "base-model weights", | |
| "full checkpoints", | |
| "large archives" | |
| ] | |
| }, | |
| "extension_requirements": [ | |
| "Current implementation starts with Cosmos3-Nano compatibility over same-split future sensor-feature retrieval; it does not fine-tune Cosmos diffusion weights yet.", | |
| "Install a Cosmos3 Diffusers training stack before replacing the compatibility adapter with LoRA or diffusion post-training.", | |
| "Keep target windows inside the same episode and never train on held-out test episodes.", | |
| "Record generated or retrieved qualitative examples separately from task-classification metrics." | |
| ] | |
| } | |