File size: 3,166 Bytes
eeac43c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
{
  "id": "cosmos3_super_reasoner",
  "display_name": "Cosmos3-Super Reasoner",
  "status": "implemented",
  "model_family": "Cosmos3 / physical-world foundation models",
  "default_model_id": "nv-community/Cosmos3-Super",
  "local_model_env": "COSMOS3_SUPER_MODEL_DIR",
  "dataset_contract": "xperience10m_episode_json_qa_v1",
  "training_objective": "zero_shot_structured_episode_understanding_json_qa_via_vllm_reasoner",
  "split_policy": {
    "unit": "episode",
    "default_counts": {
      "train": 96,
      "val": 16,
      "test": 16
    },
    "leakage_guard": "uses the same 96/16/16 selected episode split as the Qwen3-Omni LoRA branch; no Super weights are updated"
  },
  "modalities": {
    "direct_inputs": [
      "multi-camera rendered mosaic video",
      "language prompt and label options"
    ],
    "conditioning_inputs": [
      "prompt-side task schema and episode/window metadata"
    ],
    "targets": [
      "structured action/subtask/contact/transition/object JSON"
    ],
    "excluded_inputs": [
      "visualization.rrd",
      "raw annotation HDF5",
      "audio in the current vLLM Reasoner path"
    ]
  },
  "entrypoints": {
    "selection_manifest": "scripts/omni/build_selection_episode_manifest.py",
    "export": "scripts/omni/parallel_export_qwen3_omni_action_dataset.py",
    "neutral_index": "scripts/omni/export_model_neutral_window_index.py",
    "train": "",
    "eval": "scripts/omni/eval_cosmos3_super_reasoner.py",
    "launcher": "scripts/omni/run_cosmos3_super_reasoner_eval.sh",
    "validate": "scripts/omni/validate_omni_finetune_run.py"
  },
  "primary_metrics": [
    "json_validity_rate",
    "action_macro_f1",
    "subtask_accuracy",
    "transition_accuracy",
    "next_action_accuracy",
    "contact_accuracy",
    "object_micro_f1",
    "held_out_episode_count"
  ],
  "artifact_contract": {
    "checkpoint_gate": "base_weight_vllm_reasoner_setup_metadata",
    "required_eval_files": [
      "metrics.json",
      "predictions.jsonl",
      "predictions.csv",
      "per_class_metrics.csv",
      "confusion_matrix.csv",
      "server_info.json",
      "RUN_REPORT.md"
    ],
    "required_training_files": [
      "training_metadata.json",
      "progress.jsonl"
    ],
    "public_package_allowed": [
      "metrics",
      "predictions",
      "confusion matrices",
      "run reports",
      "server/model setup metadata",
      "episode and dataset manifests",
      "validation summaries"
    ],
    "public_package_forbidden": [
      "raw MP4",
      "annotation HDF5",
      "Rerun RRD",
      "base-model weights",
      "fine-tuned weights",
      "checkpoints",
      "large archives"
    ]
  },
  "extension_requirements": [
    "This branch evaluates staged Cosmos3-Super Reasoner base weights through vLLM on the 128-episode held-out JSON task; it does not fine-tune or release new Cosmos weights.",
    "Create a separate Cosmos3-Super adapter/model repository only after a real fine-tuning run produces new adapter or checkpoint weights.",
    "Keep it separate from the Cosmos3-Nano future-window compatibility branch, which answers a different world-model retrieval target."
  ]
}