cy0307's picture
Publish Ropedia Xperience-10M task baseline cards
eeac43c verified
Raw
History Blame
3.49 kB
{
"id": "cosmos_world_model",
"display_name": "Cosmos3-Nano Future-Window World Model",
"status": "implemented",
"model_family": "Cosmos / physical-world foundation models",
"default_model_id": "nvidia/Cosmos3-Nano",
"local_model_env": "COSMOS_MODEL_DIR",
"dataset_contract": "xperience10m_future_window_world_model_v0",
"training_objective": "future_window_and_action_conditioned_world_modeling",
"split_policy": {
"unit": "episode",
"default_counts": {
"train": 96,
"val": 16,
"test": 16
},
"leakage_guard": "future windows must remain inside the same episode and test episodes must never condition training"
},
"modalities": {
"direct_inputs": [
"camera video streams or rendered mosaics",
"language task context"
],
"conditioning_inputs": [
"pose and SLAM trajectory",
"depth and confidence",
"mocap or action labels",
"IMU acceleration and gyro",
"audio event cues"
],
"targets": [
"future visual window",
"future latent state",
"future sensor-feature window",
"transition or contact event"
],
"excluded_inputs": [
"visualization.rrd"
]
},
"entrypoints": {
"selection_manifest": "scripts/omni/build_selection_episode_manifest.py",
"neutral_index": "scripts/omni/export_model_neutral_window_index.py",
"export": "scripts/omni/export_cosmos3_future_window_dataset.py",
"train": "scripts/omni/eval_cosmos3_future_window_retrieval.py",
"eval": "scripts/omni/eval_cosmos3_future_window_retrieval.py",
"launcher": "scripts/omni/run_cosmos3_nano_future_window_compat.sh",
"validate": "scripts/omni/validate_omni_finetune_run.py"
},
"primary_metrics": [
"future_retrieval_mrr",
"future_retrieval_recall_at_5",
"temporal_consistency",
"feature_reconstruction_error",
"transition_accuracy",
"contact_accuracy",
"held_out_episode_count"
],
"artifact_contract": {
"checkpoint_gate": "world_model_checkpoint_and_generation_config",
"required_eval_files": [
"metrics.json",
"future_predictions.jsonl",
"retrieval_rankings.csv",
"temporal_consistency.csv",
"qualitative_examples.json",
"RUN_REPORT.md"
],
"required_training_files": [
"training_metadata.json",
"progress.jsonl",
"model_config.json",
"checkpoint_manifest.json"
],
"public_package_allowed": [
"metrics",
"future-window prediction summaries",
"retrieval rankings",
"temporal consistency tables",
"qualitative example metadata",
"episode and dataset manifests",
"validation summaries"
],
"public_package_forbidden": [
"raw MP4",
"annotation HDF5",
"Rerun RRD",
"generated raw video unless explicitly licensed and size-bounded",
"base-model weights",
"full checkpoints",
"large archives"
]
},
"extension_requirements": [
"Current implementation starts with Cosmos3-Nano compatibility over same-split future sensor-feature retrieval; it does not fine-tune Cosmos diffusion weights yet.",
"Install a Cosmos3 Diffusers training stack before replacing the compatibility adapter with LoRA or diffusion post-training.",
"Keep target windows inside the same episode and never train on held-out test episodes.",
"Record generated or retrieved qualitative examples separately from task-classification metrics."
]
}