{ "id": "cosmos3_super_reasoner", "display_name": "Cosmos3-Super Reasoner", "status": "implemented", "model_family": "Cosmos3 / physical-world foundation models", "default_model_id": "nv-community/Cosmos3-Super", "local_model_env": "COSMOS3_SUPER_MODEL_DIR", "dataset_contract": "xperience10m_episode_json_qa_v1", "training_objective": "zero_shot_structured_episode_understanding_json_qa_via_vllm_reasoner", "split_policy": { "unit": "episode", "default_counts": { "train": 96, "val": 16, "test": 16 }, "leakage_guard": "uses the same 96/16/16 selected episode split as the Qwen3-Omni LoRA branch; no Super weights are updated" }, "modalities": { "direct_inputs": [ "multi-camera rendered mosaic video", "language prompt and label options" ], "conditioning_inputs": [ "prompt-side task schema and episode/window metadata" ], "targets": [ "structured action/subtask/contact/transition/object JSON" ], "excluded_inputs": [ "visualization.rrd", "raw annotation HDF5", "audio in the current vLLM Reasoner path" ] }, "entrypoints": { "selection_manifest": "scripts/omni/build_selection_episode_manifest.py", "export": "scripts/omni/parallel_export_qwen3_omni_action_dataset.py", "neutral_index": "scripts/omni/export_model_neutral_window_index.py", "train": "", "eval": "scripts/omni/eval_cosmos3_super_reasoner.py", "launcher": "scripts/omni/run_cosmos3_super_reasoner_eval.sh", "validate": "scripts/omni/validate_omni_finetune_run.py" }, "primary_metrics": [ "json_validity_rate", "action_macro_f1", "subtask_accuracy", "transition_accuracy", "next_action_accuracy", "contact_accuracy", "object_micro_f1", "held_out_episode_count" ], "artifact_contract": { "checkpoint_gate": "base_weight_vllm_reasoner_setup_metadata", "required_eval_files": [ "metrics.json", "predictions.jsonl", "predictions.csv", "per_class_metrics.csv", "confusion_matrix.csv", "server_info.json", "RUN_REPORT.md" ], "required_training_files": [ "training_metadata.json", "progress.jsonl" ], "public_package_allowed": [ "metrics", "predictions", "confusion matrices", "run reports", "server/model setup metadata", "episode and dataset manifests", "validation summaries" ], "public_package_forbidden": [ "raw MP4", "annotation HDF5", "Rerun RRD", "base-model weights", "fine-tuned weights", "checkpoints", "large archives" ] }, "extension_requirements": [ "This branch evaluates staged Cosmos3-Super Reasoner base weights through vLLM on the 128-episode held-out JSON task; it does not fine-tune or release new Cosmos weights.", "Create a separate Cosmos3-Super adapter/model repository only after a real fine-tuning run produces new adapter or checkpoint weights.", "Keep it separate from the Cosmos3-Nano future-window compatibility branch, which answers a different world-model retrieval target." ] }