cy0307 commited on
Commit
389c0f8
·
verified ·
1 Parent(s): 86e7cb2

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ADDITIONAL_DEVELOPMENT_DIRECTIONS.md +1 -1
  2. FOUNDATION_MODEL_PLAN.md +3 -3
  3. PROJECT_README.md +33 -34
  4. README.ko.md +2 -2
  5. README.pt.md +1 -1
  6. docs/assets/charts/episode128_task_model_radar.svg +1 -1
  7. docs/assets/charts/research_direction_coverage.svg +1 -1
  8. docs/assets/charts/single_episode_task_model_radar.svg +1 -1
  9. docs/data/additional_development_directions.json +1 -1
  10. docs/data/artifact_index.json +73 -73
  11. docs/data/episode128_task_model_radar.json +3 -3
  12. docs/data/figure_index.json +19 -19
  13. docs/data/foundation_model_plan.json +1 -1
  14. docs/data/live_publication_status.json +1 -1
  15. docs/data/mirror_parity.json +0 -0
  16. docs/data/omni_finetune_verified_result.json +1 -1
  17. docs/data/omni_model_comparison.json +5 -5
  18. docs/data/project_status.json +16 -16
  19. docs/data/public_reader_map.json +7 -7
  20. docs/data/public_surface_qa.json +9 -9
  21. docs/data/publication_audit.json +7 -7
  22. docs/data/quality_gates.json +1 -1
  23. docs/data/qwen3_omni_run_lineage.json +20 -2
  24. docs/data/research_roadmap.json +3 -3
  25. docs/data/research_roadmap_interactive.json +1006 -54
  26. docs/data/scope_claims_audit.json +1 -1
  27. docs/data/single_episode_task_model_radar.json +2 -2
  28. docs/data/source_alignment_audit.json +1 -1
  29. docs/data/task_method_20_result_matrix.json +1 -1
  30. docs/data/task_method_20_source_audit.json +1 -1
  31. docs/data/task_suite_enhancement_128.json +1 -1
  32. docs/data/task_surface_integrity.json +1 -1
  33. docs/data/two_evidence_line_result_summary.json +6 -6
  34. docs/data/two_evidence_lines.json +4 -4
  35. docs/data/unified_task_model_radar.json +2 -2
  36. docs/data/website_integrity.json +31 -31
  37. docs/index.html +72 -69
  38. metrics/additional_development_directions.json +1 -1
  39. metrics/artifact_index.json +73 -73
  40. metrics/episode128_task_model_radar.json +3 -3
  41. metrics/figure_index.json +19 -19
  42. metrics/foundation_model_plan.json +1 -1
  43. metrics/live_publication_status.json +1 -1
  44. metrics/mirror_parity.json +0 -0
  45. metrics/omni_finetune_verified_result.json +1 -1
  46. metrics/omni_model_comparison.json +5 -5
  47. metrics/project_status.json +16 -16
  48. metrics/public_reader_map.json +7 -7
  49. metrics/public_surface_qa.json +9 -9
  50. metrics/publication_audit.json +7 -7
ADDITIONAL_DEVELOPMENT_DIRECTIONS.md CHANGED
@@ -1,7 +1,7 @@
1
  # Additional Development Directions
2
 
3
  This note records concrete directions that can grow from Xperience-10M beyond
4
- the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model branch,
5
  and long-term Xperience-native pretraining goal. These are project directions,
6
  not completed benchmark results.
7
 
 
1
  # Additional Development Directions
2
 
3
  This note records concrete directions that can grow from Xperience-10M beyond
4
+ the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model track,
5
  and long-term Xperience-native pretraining goal. These are project directions,
6
  not completed benchmark results.
7
 
FOUNDATION_MODEL_PLAN.md CHANGED
@@ -32,7 +32,7 @@ machine-readable copy at
32
  | Priority | Model family | Best role for this project | Why it fits Xperience-10M | Current decision |
33
  | --- | --- | --- | --- | --- |
34
  | 1 | Qwen3-Omni | Multimodal instruction model and JSON task predictor | Accepts video/audio/language directly; depth, pose, mocap, and IMU can enter through the existing sensor bridge | Keep as the first selected-episode LoRA pilot |
35
- | 2 | Cosmos 3 | Embodied world model, action generation, and synthetic future prediction | Designed for physical-world video generation, action-conditioned world modeling, and robot/world simulation style objectives | Add as the first world-model branch after the data gate |
36
  | 3 | NVIDIA GR00T | Humanoid/action-policy foundation model | Xperience-10M mocap, hand motion, contacts, and egocentric interaction can support retargeting and action-understanding probes | Track as a humanoid policy branch, not the first LoRA pilot |
37
  | 4 | OpenVLA / OpenVLA-OFT | Open vision-language-action policy baseline | Useful when windows are converted into visual observation plus action-token targets | Use after action-space design is explicit |
38
  | 5 | openpi pi0/pi0.5 | Open robot policy and action expert baseline | Useful for action chunking, policy fine-tuning, and embodiment transfer experiments | Candidate for policy branch once action labels are retargeted |
@@ -100,7 +100,7 @@ The full plan is documented in
100
  ## Why Cosmos 3 Should Be Added Next
101
 
102
  Cosmos 3 should not replace the Qwen3-Omni pilot. It should become the first
103
- world-model branch after the data gate. The reason is that the Xperience-10M
104
  modalities are unusually aligned with physical-world modeling:
105
 
106
  - video streams for visual state,
@@ -159,7 +159,7 @@ The foundation-model stage should add metrics beyond the current 20-task suite:
159
 
160
  1. Keep the selected 96/16/16 split as the comparison spine.
161
  2. Treat the verified Qwen3-Omni LoRA package as the structured JSON baseline.
162
- 3. Treat Cosmos3-Nano compatibility and Cosmos3-Super Forward-Dynamics LoRA as separate world-model branches with different metrics.
163
  4. Run a model-selection dry run on 3-8 episodes for any next backbone before scaling beyond the selected split.
164
  5. Promote Cosmos 3 to larger world-model experiments if video/sensor
165
  preprocessing, storage, and loss metrics justify the extra cost.
 
32
  | Priority | Model family | Best role for this project | Why it fits Xperience-10M | Current decision |
33
  | --- | --- | --- | --- | --- |
34
  | 1 | Qwen3-Omni | Multimodal instruction model and JSON task predictor | Accepts video/audio/language directly; depth, pose, mocap, and IMU can enter through the existing sensor bridge | Keep as the first selected-episode LoRA pilot |
35
+ | 2 | Cosmos 3 | Embodied world model, action generation, and synthetic future prediction | Designed for physical-world video generation, action-conditioned world modeling, and robot/world simulation style objectives | Add as the first world-model track after the data gate |
36
  | 3 | NVIDIA GR00T | Humanoid/action-policy foundation model | Xperience-10M mocap, hand motion, contacts, and egocentric interaction can support retargeting and action-understanding probes | Track as a humanoid policy branch, not the first LoRA pilot |
37
  | 4 | OpenVLA / OpenVLA-OFT | Open vision-language-action policy baseline | Useful when windows are converted into visual observation plus action-token targets | Use after action-space design is explicit |
38
  | 5 | openpi pi0/pi0.5 | Open robot policy and action expert baseline | Useful for action chunking, policy fine-tuning, and embodiment transfer experiments | Candidate for policy branch once action labels are retargeted |
 
100
  ## Why Cosmos 3 Should Be Added Next
101
 
102
  Cosmos 3 should not replace the Qwen3-Omni pilot. It should become the first
103
+ world-model track after the data gate. The reason is that the Xperience-10M
104
  modalities are unusually aligned with physical-world modeling:
105
 
106
  - video streams for visual state,
 
159
 
160
  1. Keep the selected 96/16/16 split as the comparison spine.
161
  2. Treat the verified Qwen3-Omni LoRA package as the structured JSON baseline.
162
+ 3. Treat Cosmos3-Nano compatibility and Cosmos3-Super Forward-Dynamics LoRA as separate Cosmos3 world-model artifacts with different metrics.
163
  4. Run a model-selection dry run on 3-8 episodes for any next backbone before scaling beyond the selected split.
164
  5. Promote Cosmos 3 to larger world-model experiments if video/sensor
165
  preprocessing, storage, and loss metrics justify the extra cost.
PROJECT_README.md CHANGED
@@ -39,7 +39,7 @@
39
 
40
  **Updated:** 2026-06-21.
41
 
42
- **Scope:** one public sample episode for raw-file inspection and reproducible task construction; selected 128-episode public-safe artifacts for same-split comparison and model-branch diagnostics. Raw Xperience-10M MP4/HDF5/RRD files, full Qwen weights, and gated data are not redistributed here.
43
 
44
  ## Contents
45
 
@@ -73,7 +73,7 @@ The multilingual README files are reader guides. The canonical technical evidenc
73
  <tbody>
74
  <tr>
75
  <td><strong>Two result lines</strong></td>
76
- <td><strong>1 sample episode</strong> for task construction and reproducibility. <strong>128 selected episodes</strong> for same-split baselines and model-branch comparison.</td>
77
  </tr>
78
  <tr>
79
  <td><strong>180 method-task records</strong></td>
@@ -132,7 +132,7 @@ The public suite is organized around two result lines. Keep them separate when r
132
  <td><strong>128 selected episodes</strong></td>
133
  <td>Selected held-out 96/16/16 split: 34,269 exported windows with public-safe processed features linked to official gated episode paths.</td>
134
  <td>140/140 selected-128 scores: 134 direct + 6 compact-proxy.</td>
135
- <td>Same-split comparison, model-branch diagnostics, and scale-up planning.</td>
136
  <td>Reading proxy cells as direct raw-target measurements.</td>
137
  </tr>
138
  </tbody>
@@ -227,26 +227,25 @@ Cosmos3-Super Forward-Dynamics LoRA is published as a separate fine-tuned adapte
227
 
228
  ### Qwen3-Omni Run Versions
229
 
230
- These are Qwen3-Omni run versions, not the three project-level public result layers. The 20-task matrix uses **Qwen3-Omni v6 LoRA**. v5 remains the pinned prior release. v1-v4 are lineage/ablation evidence.
231
 
232
  <table>
233
  <thead>
234
  <tr>
235
- <th width="10%">Run</th>
236
- <th width="26%">What changed</th>
237
- <th width="12%">Eval samples</th>
238
- <th width="12%">JSON validity</th>
239
- <th width="12%">Contact acc.</th>
240
- <th>Public role</th>
241
  </tr>
242
  </thead>
243
  <tbody>
244
- <tr><td><strong>v1</strong></td><td>Selected-128 validation-aware LoRA baseline.</td><td>448</td><td>0.8750</td><td>0.6451</td><td>Superseded lineage evidence.</td></tr>
245
- <tr><td><strong>v2</strong></td><td>Structured-JSON reuse full-8-GPU LoRA.</td><td>448</td><td>0.9978</td><td>0.7188</td><td>Superseded lineage evidence.</td></tr>
246
- <tr><td><strong>v3</strong></td><td>Strict-label prompt/eval over the v2 adapter.</td><td>448</td><td>1.0000</td><td>0.7210</td><td>Prompt/eval lineage evidence.</td></tr>
247
- <tr><td><strong>v4</strong></td><td>Four-epoch structured-JSON LoRA.</td><td>448</td><td>1.0000</td><td>0.7299</td><td>Superseded metric tradeoff run.</td></tr>
248
- <tr><td><strong>v5</strong></td><td>Multiscale cap96 LoRA.</td><td>4,032</td><td>1.0000</td><td>0.7865</td><td>Pinned prior release and comparison baseline.</td></tr>
249
- <tr><td><strong>v6</strong></td><td>Rank64/lr5e-5 multiscale LoRA.</td><td>4,032</td><td>0.9990</td><td>0.8177</td><td>Current public 20-task Qwen row.</td></tr>
250
  </tbody>
251
  </table>
252
 
@@ -338,7 +337,7 @@ embodied-AI research infrastructure:
338
  <tr><td><strong>Multimodal data understanding</strong></td><td>Parses the public sample into synchronized windows across video, audio, depth, pose/SLAM, mocap, IMU, calibration, and language-derived signals.</td></tr>
339
  <tr><td><strong>Task design</strong></td><td>Defines 20 human-readable tasks in one unified public-sample suite, plus four direction-extension probes with inputs, outputs, process modules, metrics, and case-study walkthroughs.</td></tr>
340
  <tr><td><strong>Model and evaluation discipline</strong></td><td>Runs minimal and compact neural baselines, records predictions/metrics, keeps chronological split boundaries explicit, and separates sample evidence from held-out claims.</td></tr>
341
- <tr><td><strong>Scale-up planning</strong></td><td>Connects the public-sample pipeline to 32/128-episode held-out pilots, Qwen3-Omni LoRA, Cosmos-style world-model branches, policy-model branches, and the future Xperience-native foundation-model pretraining goal.</td></tr>
342
  </tbody>
343
  </table>
344
 
@@ -387,7 +386,7 @@ and [`docs/data/project_brief.json`](docs/data/project_brief.json).
387
  <tr><td><strong>Hugging Face Space</strong></td><td>Hub-hosted copy of the dashboard and static app assets.</td></tr>
388
  <tr><td><strong>HF artifact dataset</strong></td><td>Public-safe metrics, reports, website JSON, result packages, and derived evidence files.</td></tr>
389
  <tr><td><strong>HF baseline model repo</strong></td><td>Minimal/neural baseline weights, figures, metrics, and mirrored task artifacts.</td></tr>
390
- <tr><td><strong>Qwen3-Omni and Cosmos3 model repos</strong></td><td>Adapter-specific public weights or package cards when Qwen3-Omni v6, Cosmos3-Super, or Cosmos3-Nano branches are verified and publishable.</td></tr>
391
  </tbody>
392
  </table>
393
 
@@ -449,7 +448,7 @@ Current contributions:
449
  - a generated four-direction research taxonomy matching the Ropedia job tracks,
450
  - four additional direction-extension probes with minimal and neural baselines,
451
  - human-readable research task cards and an interactive scrub/play walkthrough storyboard for every task,
452
- - an interactive research roadmap connecting 20 tasks, four research tracks, current sample evidence, the Qwen3-Omni scale-up path, and foundation-model branch selection,
453
  - a next-milestone track for Qwen3-Omni fine-tuning, Cosmos 3 world modeling, and sensor-bridge evaluation,
454
  - a future pretraining plan for an Xperience Embodied Foundation Model over the full corpus after smaller multi-episode stages prove value,
455
  - metrics, predictions, model weights, manifests, charts, and a two-level
@@ -692,7 +691,7 @@ Hugging Face Space app:
692
  <tr><td><strong>Dataset context</strong></td><td><a href="XPERIENCE10M_DATASET_CARD_ALIGNMENT.md">XPERIENCE10M_DATASET_CARD_ALIGNMENT.md</a><br>official dataset links</td><td>Explains the official dataset, public sample, modalities, access boundary, and what this repo uses.</td></tr>
693
  <tr><td><strong>Visual assets</strong></td><td><a href="FIGURE_INDEX.md">FIGURE_INDEX.md</a><br><a href="docs/assets/">docs/assets/</a></td><td>Shows the task-suite graphic, modality thumbnails, pipeline diagrams, charts, and logo assets.</td></tr>
694
  <tr><td><strong>Evaluation protocol</strong></td><td><a href="EVALUATION_PROTOCOL.md">EVALUATION_PROTOCOL.md</a><br><a href="docs/data/evaluation_protocol.json">evaluation_protocol.json</a></td><td>Defines the task unit, split, metrics, leakage controls, and current limitations.</td></tr>
695
- <tr><td><strong>Research roadmap</strong></td><td><a href="RESEARCH_ROADMAP.md">RESEARCH_ROADMAP.md</a><br><a href="docs/data/research_roadmap.json">research_roadmap.json</a></td><td>Shows the path from sample-level task development to multi-episode work, larger model branches, and the future native-pretraining goal.</td></tr>
696
  <tr><td><strong>Additional development directions</strong></td><td><a href="ADDITIONAL_DEVELOPMENT_DIRECTIONS.md">ADDITIONAL_DEVELOPMENT_DIRECTIONS.md</a><br><a href="docs/data/additional_development_directions.json">additional_development_directions.json</a></td><td>Records concrete non-backbone tracks: taxonomy, benchmark protocol, representation learning, skill graphs, affordances, 3D/4D memory, QA, and policy transfer.</td></tr>
697
  <tr><td><strong>Xperience Embodied Foundation Model plan</strong></td><td><a href="XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md">XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md</a></td><td>Describes the long-term full-corpus pretraining goal, target modules, objectives, staged scale-up, hardware ranges, and evaluation protocol.</td></tr>
698
  <tr><td><strong>Minimal heads</strong></td><td>softmax<br>ridge projection/regression<br>multi-label logistic heads</td><td>Keeps every input/output contract visible and inspectable.</td></tr>
@@ -774,11 +773,11 @@ completions because the 128 export lacks raw interaction strings and paired
774
  video-view embeddings. The verified model-output probe package adds task-16
775
  action/object relation scores for Qwen3-Omni and Cosmos3-Super, plus a task-13
776
  long-horizon next-action score for Cosmos3-Nano derived from its existing
777
- held-out future-window predictions. Metadata-only baselines and model branches
778
  now have scored records on all 20 axes; six compact-proxy scores stay
779
  explicitly marked instead of being blended into direct-target metrics.
780
  Cosmos3-Super forward-dynamics LoRA
781
- remains a branch card because its camera-pose proxy MSE is not one of the 20
782
  task metrics. The machine-readable copies are
783
  [`docs/data/unified_task_model_radar.json`](docs/data/unified_task_model_radar.json)
784
  and
@@ -880,7 +879,7 @@ docs/
880
  data/additional_development_directions.json # concrete non-backbone project directions
881
  data/summary_metrics.json # website-readable metrics bundle
882
  data/task_suite_20.json # unified 20-task suite bundle
883
- data/unified_task_model_radar.json # 20-task radar values and model-branch overlays
884
  data/single_episode_task_model_radar.json # 1-episode split radar values
885
  data/episode128_task_model_radar.json # 128-episode split radar values
886
  data/task_method_20_result_matrix.json # 9-method x 20-task result matrix
@@ -1128,9 +1127,9 @@ Current status in this repo:
1128
  - qwen3_lora_adapter_repo: https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep
1129
  - cosmos3_super_lora_adapter_repo: https://huggingface.co/cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep
1130
  - 128_aligned_baselines: unified 20-task axes for simple and neural baselines, including metadata/text rows and public-safe compact-proxy rows where raw-feature targets are required
1131
- - cosmos3_nano_branch: verified Cosmos3-Nano future-window compatibility package, 378 held-out future-window predictions from 14 test episodes
1132
- - cosmos3_super_branch: verified Cosmos3-Super Reasoner base-weight JSON-task evaluation, 448 held-out predictions from 14 test episodes; JSON validity 51.12%, action macro-F1 0.0008, contact accuracy 32.14%, transition accuracy 36.83%
1133
- - cosmos3_super_forward_dynamics_lora: verified 8-GPU FSDP LoRA branch over camera-pose proxy targets; 2,848 train rows, 512 val rows, 448 test rows, 26.2M adapter parameters, val MSE 4.0082, test MSE 3.6853; public package excludes safetensors
1134
  - gated dataset: available for selected multi-episode data preparation
1135
  - source_discovery: `results/omni_finetune/source_discovery.json`
1136
  - data_status: `results/omni_finetune/DATA_ACCESS_STATUS.md`
@@ -1249,7 +1248,7 @@ The package copies only small derived artifacts such as metrics, predictions,
1249
  confusion matrices, run reports, manifests, validation summaries, and training
1250
  metadata. The exact required eval files and primary metrics come from the
1251
  selected backbone contract in `configs/omni_backbones`, so Qwen3-Omni,
1252
- Cosmos-style world models, and VLA/policy branches can share the same verified
1253
  publication gate once their model-specific evaluators exist. The package
1254
  excludes raw Xperience-10M files, base-model weights, adapter or checkpoint
1255
  weights, full checkpoints, and large archives.
@@ -1277,7 +1276,7 @@ python scripts/omni/export_model_neutral_window_index.py \
1277
  ```
1278
 
1279
  This produces `window_index.jsonl` and `window_index_manifest.json` so Cosmos-
1280
- style world models and VLA/policy branches can reuse the same split-checked
1281
  windows without depending on Qwen chat-message records.
1282
 
1283
  ### Uploading Qwen3-Omni LoRA artifacts
@@ -1308,14 +1307,14 @@ Network availability to `huggingface.co` is required.
1308
 
1309
  ### Foundation Backbone Plan
1310
 
1311
- The next modeling plan tracks several foundation-model branches instead of
1312
  assuming one backbone solves every Xperience-10M objective.
1313
 
1314
  | Branch | Current role | When to use it |
1315
  | --- | --- | --- |
1316
  | Qwen3-Omni | First trainable multimodal LoRA pilot | Use for the selected 128-episode held-out baseline over video/audio/language plus sensor-bridge features. |
1317
- | Cosmos 3 | First world-model/action-generation branch | Use now for future-window compatibility analysis and the verified Cosmos3-Super forward-dynamics LoRA branch; compare its loss metrics separately from Qwen JSON-task accuracy. |
1318
- | GR00T | Humanoid/action-policy branch | Use after mocap/contact retargeting creates well-defined humanoid action targets. |
1319
  | OpenVLA / openpi | Open VLA/policy baselines | Use after the project defines robot-compatible or action-token targets. |
1320
  | Gemini Robotics | External reasoning reference | Use only for qualitative comparison or annotation support unless local trainable access exists. |
1321
  | Xperience Embodied Foundation Model | Future Xperience-native pretraining goal | Use only after multi-episode pilots, full-corpus storage, distributed training infrastructure, and scaling evidence justify a from-scratch domain model. |
@@ -1333,7 +1332,7 @@ so the public claims stay precise:
1333
  | Pipeline track | First concrete pipeline | Claim boundary |
1334
  | --- | --- | --- |
1335
  | Spatial intelligence models | Build scene/object memory targets from multiview RGB, depth, pose, calibration, object cues, and language prompts. | Ready as a geometry/reasoning pipeline; strong claims need raw depth/pose artifacts and held-out spatial metrics. |
1336
- | Human-video world models | Predict next action, next subtask, future object set, contact transition, and future state from observed interaction windows. | Partially evidenced by future-task probes and Cosmos-style branches; visual/latent future quality still needs stronger metrics. |
1337
  | Vision-language-action models | Convert egocentric video, captions, hand/body motion, contacts, and objects into action chunks or policy-compatible targets. | Feasible, but gated by action-token conversion, normalization, retargeting evidence, and held-out policy metrics. |
1338
 
1339
  High-resolution slide diagrams for the three tracks are published in
@@ -1386,7 +1385,7 @@ python scripts/omni/audit_verified_omni_package.py \
1386
  --package-dir results/omni_finetune/verified_public/<eval_run_id>
1387
  ```
1388
 
1389
- Create a new planned backbone branch from an existing contract template with:
1390
 
1391
  ```bash
1392
  python scripts/omni/scaffold_omni_backbone.py \
@@ -1402,7 +1401,7 @@ python scripts/omni/scaffold_omni_backbone.py \
1402
 
1403
  Each backbone config declares the checkpoint gate, required train/eval files,
1404
  allowed public artifacts, and forbidden private or heavyweight artifacts. This
1405
- keeps Qwen3-Omni, Cosmos-style world models, and policy/VLA branches on the same
1406
  split, validation, and publication discipline even though their training targets
1407
  are different.
1408
 
 
39
 
40
  **Updated:** 2026-06-21.
41
 
42
+ **Scope:** one public sample episode for raw-file inspection and reproducible task construction; selected 128-episode public-safe artifacts for same-split metadata/raw baselines, Qwen3-Omni v6 LoRA, Cosmos3-Super Reasoner, and Cosmos3-Nano Future Window. Raw Xperience-10M MP4/HDF5/RRD files, Qwen3 base weights, Cosmos3 base weights, and gated data are not redistributed here.
43
 
44
  ## Contents
45
 
 
73
  <tbody>
74
  <tr>
75
  <td><strong>Two result lines</strong></td>
76
+ <td><strong>1 sample episode</strong> for task construction and reproducibility. <strong>128 selected episodes</strong> for same-split metadata/raw baselines plus Qwen3-Omni v6 and Cosmos3 diagnostics.</td>
77
  </tr>
78
  <tr>
79
  <td><strong>180 method-task records</strong></td>
 
132
  <td><strong>128 selected episodes</strong></td>
133
  <td>Selected held-out 96/16/16 split: 34,269 exported windows with public-safe processed features linked to official gated episode paths.</td>
134
  <td>140/140 selected-128 scores: 134 direct + 6 compact-proxy.</td>
135
+ <td>Same-split metadata/raw baseline comparison, Qwen3-Omni v6 diagnostics, Cosmos3 diagnostics, and scale-up planning.</td>
136
  <td>Reading proxy cells as direct raw-target measurements.</td>
137
  </tr>
138
  </tbody>
 
227
 
228
  ### Qwen3-Omni Run Versions
229
 
230
+ These are Qwen3-Omni run versions inside **Line 2: selected 128 episodes**. They are not the project evidence lines. The 20-task matrix uses **Qwen3-Omni v6 LoRA**; **v5** remains the pinned prior multiscale release; **v1-v4** are lineage and ablation evidence.
231
 
232
  <table>
233
  <thead>
234
  <tr>
235
+ <th width="8%">Run</th>
236
+ <th width="26%">Purpose</th>
237
+ <th width="28%">Main change</th>
238
+ <th width="16%">Eval signal</th>
239
+ <th>Use now</th>
 
240
  </tr>
241
  </thead>
242
  <tbody>
243
+ <tr><td><strong>v1</strong></td><td>Prove the selected-128 LoRA/eval/package loop.</td><td>First verified 96/16/16 selected-episode Qwen3-Omni LoRA run.</td><td>448 eval; JSON 0.8750; contact 0.6451.</td><td>Lineage only.</td></tr>
244
+ <tr><td><strong>v2</strong></td><td>Make answers schema-checked.</td><td>Structured-JSON contract with full-8-GPU LoRA on the same split.</td><td>448 eval; JSON 0.9978; contact 0.7188.</td><td>Structured-output ablation.</td></tr>
245
+ <tr><td><strong>v3</strong></td><td>Separate prompt/eval effects from training.</td><td>Strict-label prompt/eval over the v2 adapter; no new adapter training.</td><td>448 eval; JSON 1.0000; contact 0.7210.</td><td>Prompt/eval ablation.</td></tr>
246
+ <tr><td><strong>v4</strong></td><td>Test longer structured-JSON LoRA training.</td><td>New four-epoch full-8-GPU adapter on the same selected split.</td><td>448 eval; JSON 1.0000; contact 0.7299.</td><td>Overfit/metric-tradeoff evidence.</td></tr>
247
+ <tr><td><strong>v5</strong></td><td>Move to denser multiscale evaluation.</td><td>Multiscale cap96 export with 4,032 held-out predictions.</td><td>4,032 eval; JSON 1.0000; contact 0.7865.</td><td>Pinned prior release; stronger on several non-contact metrics.</td></tr>
248
+ <tr><td><strong>v6</strong></td><td>Publish the current Qwen 20-task row.</td><td>Rank64/lr5e-5 multiscale LoRA plus verified task-specific probes.</td><td>4,032 eval; JSON 0.9990; contact 0.8177.</td><td>Current public 20-task Qwen3-Omni row.</td></tr>
249
  </tbody>
250
  </table>
251
 
 
337
  <tr><td><strong>Multimodal data understanding</strong></td><td>Parses the public sample into synchronized windows across video, audio, depth, pose/SLAM, mocap, IMU, calibration, and language-derived signals.</td></tr>
338
  <tr><td><strong>Task design</strong></td><td>Defines 20 human-readable tasks in one unified public-sample suite, plus four direction-extension probes with inputs, outputs, process modules, metrics, and case-study walkthroughs.</td></tr>
339
  <tr><td><strong>Model and evaluation discipline</strong></td><td>Runs minimal and compact neural baselines, records predictions/metrics, keeps chronological split boundaries explicit, and separates sample evidence from held-out claims.</td></tr>
340
+ <tr><td><strong>Scale-up planning</strong></td><td>Connects the public-sample pipeline to 32/128-episode held-out pilots, Qwen3-Omni LoRA, Cosmos-style world-model tracks, policy/VLA tracks, and the future Xperience-native foundation-model pretraining goal.</td></tr>
341
  </tbody>
342
  </table>
343
 
 
386
  <tr><td><strong>Hugging Face Space</strong></td><td>Hub-hosted copy of the dashboard and static app assets.</td></tr>
387
  <tr><td><strong>HF artifact dataset</strong></td><td>Public-safe metrics, reports, website JSON, result packages, and derived evidence files.</td></tr>
388
  <tr><td><strong>HF baseline model repo</strong></td><td>Minimal/neural baseline weights, figures, metrics, and mirrored task artifacts.</td></tr>
389
+ <tr><td><strong>Qwen3-Omni and Cosmos3 model repos</strong></td><td>Adapter-specific public weights or package cards when Qwen3-Omni v6, Cosmos3-Super, or Cosmos3-Nano runs are verified and publishable.</td></tr>
390
  </tbody>
391
  </table>
392
 
 
448
  - a generated four-direction research taxonomy matching the Ropedia job tracks,
449
  - four additional direction-extension probes with minimal and neural baselines,
450
  - human-readable research task cards and an interactive scrub/play walkthrough storyboard for every task,
451
+ - an interactive research roadmap connecting 20 tasks, four research tracks, current sample evidence, the Qwen3-Omni scale-up path, and foundation-model track selection,
452
  - a next-milestone track for Qwen3-Omni fine-tuning, Cosmos 3 world modeling, and sensor-bridge evaluation,
453
  - a future pretraining plan for an Xperience Embodied Foundation Model over the full corpus after smaller multi-episode stages prove value,
454
  - metrics, predictions, model weights, manifests, charts, and a two-level
 
691
  <tr><td><strong>Dataset context</strong></td><td><a href="XPERIENCE10M_DATASET_CARD_ALIGNMENT.md">XPERIENCE10M_DATASET_CARD_ALIGNMENT.md</a><br>official dataset links</td><td>Explains the official dataset, public sample, modalities, access boundary, and what this repo uses.</td></tr>
692
  <tr><td><strong>Visual assets</strong></td><td><a href="FIGURE_INDEX.md">FIGURE_INDEX.md</a><br><a href="docs/assets/">docs/assets/</a></td><td>Shows the task-suite graphic, modality thumbnails, pipeline diagrams, charts, and logo assets.</td></tr>
693
  <tr><td><strong>Evaluation protocol</strong></td><td><a href="EVALUATION_PROTOCOL.md">EVALUATION_PROTOCOL.md</a><br><a href="docs/data/evaluation_protocol.json">evaluation_protocol.json</a></td><td>Defines the task unit, split, metrics, leakage controls, and current limitations.</td></tr>
694
+ <tr><td><strong>Research roadmap</strong></td><td><a href="RESEARCH_ROADMAP.md">RESEARCH_ROADMAP.md</a><br><a href="docs/data/research_roadmap.json">research_roadmap.json</a></td><td>Shows the path from sample-level task development to multi-episode work, larger model tracks, and the future native-pretraining goal.</td></tr>
695
  <tr><td><strong>Additional development directions</strong></td><td><a href="ADDITIONAL_DEVELOPMENT_DIRECTIONS.md">ADDITIONAL_DEVELOPMENT_DIRECTIONS.md</a><br><a href="docs/data/additional_development_directions.json">additional_development_directions.json</a></td><td>Records concrete non-backbone tracks: taxonomy, benchmark protocol, representation learning, skill graphs, affordances, 3D/4D memory, QA, and policy transfer.</td></tr>
696
  <tr><td><strong>Xperience Embodied Foundation Model plan</strong></td><td><a href="XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md">XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md</a></td><td>Describes the long-term full-corpus pretraining goal, target modules, objectives, staged scale-up, hardware ranges, and evaluation protocol.</td></tr>
697
  <tr><td><strong>Minimal heads</strong></td><td>softmax<br>ridge projection/regression<br>multi-label logistic heads</td><td>Keeps every input/output contract visible and inspectable.</td></tr>
 
773
  video-view embeddings. The verified model-output probe package adds task-16
774
  action/object relation scores for Qwen3-Omni and Cosmos3-Super, plus a task-13
775
  long-horizon next-action score for Cosmos3-Nano derived from its existing
776
+ held-out future-window predictions. Metadata-only baselines and model diagnostics
777
  now have scored records on all 20 axes; six compact-proxy scores stay
778
  explicitly marked instead of being blended into direct-target metrics.
779
  Cosmos3-Super forward-dynamics LoRA
780
+ remains a separate artifact card because its camera-pose proxy MSE is not one of the 20
781
  task metrics. The machine-readable copies are
782
  [`docs/data/unified_task_model_radar.json`](docs/data/unified_task_model_radar.json)
783
  and
 
879
  data/additional_development_directions.json # concrete non-backbone project directions
880
  data/summary_metrics.json # website-readable metrics bundle
881
  data/task_suite_20.json # unified 20-task suite bundle
882
+ data/unified_task_model_radar.json # 20-task radar values and method overlays
883
  data/single_episode_task_model_radar.json # 1-episode split radar values
884
  data/episode128_task_model_radar.json # 128-episode split radar values
885
  data/task_method_20_result_matrix.json # 9-method x 20-task result matrix
 
1127
  - qwen3_lora_adapter_repo: https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep
1128
  - cosmos3_super_lora_adapter_repo: https://huggingface.co/cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep
1129
  - 128_aligned_baselines: unified 20-task axes for simple and neural baselines, including metadata/text rows and public-safe compact-proxy rows where raw-feature targets are required
1130
+ - cosmos3_nano: verified Cosmos3-Nano future-window compatibility package, 378 held-out future-window predictions from 14 test episodes
1131
+ - cosmos3_super_reasoner: verified Cosmos3-Super Reasoner base-weight JSON-task evaluation, 448 held-out predictions from 14 test episodes; JSON validity 51.12%, action macro-F1 0.0008, contact accuracy 32.14%, transition accuracy 36.83%
1132
+ - cosmos3_super_forward_dynamics_lora: verified 8-GPU FSDP LoRA artifact over camera-pose proxy targets; 2,848 train rows, 512 val rows, 448 test rows, 26.2M adapter parameters, val MSE 4.0082, test MSE 3.6853; public package excludes safetensors
1133
  - gated dataset: available for selected multi-episode data preparation
1134
  - source_discovery: `results/omni_finetune/source_discovery.json`
1135
  - data_status: `results/omni_finetune/DATA_ACCESS_STATUS.md`
 
1248
  confusion matrices, run reports, manifests, validation summaries, and training
1249
  metadata. The exact required eval files and primary metrics come from the
1250
  selected backbone contract in `configs/omni_backbones`, so Qwen3-Omni,
1251
+ Cosmos-style world models, and VLA/policy tracks can share the same verified
1252
  publication gate once their model-specific evaluators exist. The package
1253
  excludes raw Xperience-10M files, base-model weights, adapter or checkpoint
1254
  weights, full checkpoints, and large archives.
 
1276
  ```
1277
 
1278
  This produces `window_index.jsonl` and `window_index_manifest.json` so Cosmos-
1279
+ style world models and VLA/policy tracks can reuse the same split-checked
1280
  windows without depending on Qwen chat-message records.
1281
 
1282
  ### Uploading Qwen3-Omni LoRA artifacts
 
1307
 
1308
  ### Foundation Backbone Plan
1309
 
1310
+ The next modeling plan tracks several foundation-model tracks instead of
1311
  assuming one backbone solves every Xperience-10M objective.
1312
 
1313
  | Branch | Current role | When to use it |
1314
  | --- | --- | --- |
1315
  | Qwen3-Omni | First trainable multimodal LoRA pilot | Use for the selected 128-episode held-out baseline over video/audio/language plus sensor-bridge features. |
1316
+ | Cosmos 3 | First world-model/action-generation track | Use now for future-window compatibility analysis and the verified Cosmos3-Super forward-dynamics LoRA artifact; compare its loss metrics separately from Qwen JSON-task accuracy. |
1317
+ | GR00T | Humanoid/action-policy track | Use after mocap/contact retargeting creates well-defined humanoid action targets. |
1318
  | OpenVLA / openpi | Open VLA/policy baselines | Use after the project defines robot-compatible or action-token targets. |
1319
  | Gemini Robotics | External reasoning reference | Use only for qualitative comparison or annotation support unless local trainable access exists. |
1320
  | Xperience Embodied Foundation Model | Future Xperience-native pretraining goal | Use only after multi-episode pilots, full-corpus storage, distributed training infrastructure, and scaling evidence justify a from-scratch domain model. |
 
1332
  | Pipeline track | First concrete pipeline | Claim boundary |
1333
  | --- | --- | --- |
1334
  | Spatial intelligence models | Build scene/object memory targets from multiview RGB, depth, pose, calibration, object cues, and language prompts. | Ready as a geometry/reasoning pipeline; strong claims need raw depth/pose artifacts and held-out spatial metrics. |
1335
+ | Human-video world models | Predict next action, next subtask, future object set, contact transition, and future state from observed interaction windows. | Partially evidenced by future-task probes and Cosmos-style artifacts; visual/latent future quality still needs stronger metrics. |
1336
  | Vision-language-action models | Convert egocentric video, captions, hand/body motion, contacts, and objects into action chunks or policy-compatible targets. | Feasible, but gated by action-token conversion, normalization, retargeting evidence, and held-out policy metrics. |
1337
 
1338
  High-resolution slide diagrams for the three tracks are published in
 
1385
  --package-dir results/omni_finetune/verified_public/<eval_run_id>
1386
  ```
1387
 
1388
+ Create a new planned backbone track from an existing contract template with:
1389
 
1390
  ```bash
1391
  python scripts/omni/scaffold_omni_backbone.py \
 
1401
 
1402
  Each backbone config declares the checkpoint gate, required train/eval files,
1403
  allowed public artifacts, and forbidden private or heavyweight artifacts. This
1404
+ keeps Qwen3-Omni, Cosmos-style world models, and policy/VLA tracks on the same
1405
  split, validation, and publication discipline even though their training targets
1406
  are different.
1407
 
README.ko.md CHANGED
@@ -47,11 +47,11 @@
47
  | 라인 | 데이터 단위 | 방법과 결과 | 용도 |
48
  | --- | --- | --- | --- |
49
  | 1 sample episode | 5,821 frames, 1,161 aligned 20-frame windows, 8,546 dimensions. | Minimal + Neural MLP가 20 tasks 전체를 평가; 40/40 scored records; 모두 direct scores. | Raw sample files, task definitions, reproducible baselines, task validity 확인. |
50
- | 128 selected episodes | 96/16/16 split, 34,269 exported windows, public-safe features가 official gated episode paths에 연결됨. | Metadata simple/NN, raw-feature simple/NN, Qwen3-Omni, Cosmos3-Super, Cosmos3-Nano; 140/140 scored records; 134 direct + 6 compact proxy. | 같은 split에서 baselines model branches 비교; proxy targets는 명시 유지. |
51
 
52
  공식: single-episode 방법 2개 x 20 tasks = 40; 128-episode 방법 7개 x 20 tasks = 140; 전체 공개 matrix = 180/180 scored records.
53
 
54
- 방법 블록: Line 1은 task-head baselines(Minimal, Neural MLP)입니다. Line 2는 aligned baseline heads(metadata simple/NN, raw-feature simple/NN), Qwen3-Omni series(Qwen3-Omni v6 LoRA), Cosmos3 series(Cosmos3-Super Reasoner, Cosmos3-Nano Future Window)로 분리됩니다. Qwen3 v1-v6은 LoRA/eval lineage이며 project-level 3개 버전과 다릅니다. 20-task matrix는 v6을 사용하고 v5는 pinned prior release입니다. Cosmos3-Super Forward-Dynamics LoRA는 별도의 adapter/weights/results artifact로 공개되며 20-task matrix method row에는 포함되지 않습니다.
55
 
56
  입구: [`TWO_EVIDENCE_LINES.md`](TWO_EVIDENCE_LINES.md), [`two_evidence_lines.json`](docs/data/two_evidence_lines.json), [`task_method_20_result_matrix.json`](docs/data/task_method_20_result_matrix.json), [`two_evidence_line_result_summary.json`](docs/data/two_evidence_line_result_summary.json).
57
 
 
47
  | 라인 | 데이터 단위 | 방법과 결과 | 용도 |
48
  | --- | --- | --- | --- |
49
  | 1 sample episode | 5,821 frames, 1,161 aligned 20-frame windows, 8,546 dimensions. | Minimal + Neural MLP가 20 tasks 전체를 평가; 40/40 scored records; 모두 direct scores. | Raw sample files, task definitions, reproducible baselines, task validity 확인. |
50
+ | 128 selected episodes | 96/16/16 split, 34,269 exported windows, public-safe features가 official gated episode paths에 연결됨. | Metadata simple/NN, raw-feature simple/NN, Qwen3-Omni v6, Cosmos3-Super, Cosmos3-Nano; 140/140 scored records; 134 direct + 6 compact proxy. | 같은 split에서 metadata/raw baselines, Qwen3-Omni diagnostics, Cosmos3 diagnostics 비교; proxy targets는 명시 유지. |
51
 
52
  공식: single-episode 방법 2개 x 20 tasks = 40; 128-episode 방법 7개 x 20 tasks = 140; 전체 공개 matrix = 180/180 scored records.
53
 
54
+ 방법 블록: Line 1은 task-head baselines(Minimal, Neural MLP)입니다. Line 2는 aligned baseline heads(metadata simple/NN, raw-feature simple/NN), Qwen3-Omni series(Qwen3-Omni v6 LoRA), Cosmos3 series(Cosmos3-Super Reasoner, Cosmos3-Nano Future Window)로 분리됩니다. Qwen3 v1-v6은 Line 2 내부의 LoRA/eval lineage이며 project evidence lines와 다릅니다. 20-task matrix는 v6을 사용하고 v5는 pinned prior release입니다. Cosmos3-Super Forward-Dynamics LoRA는 별도의 adapter/weights/results artifact로 공개되며 20-task matrix method row에는 포함되지 않습니다.
55
 
56
  입구: [`TWO_EVIDENCE_LINES.md`](TWO_EVIDENCE_LINES.md), [`two_evidence_lines.json`](docs/data/two_evidence_lines.json), [`task_method_20_result_matrix.json`](docs/data/task_method_20_result_matrix.json), [`two_evidence_line_result_summary.json`](docs/data/two_evidence_line_result_summary.json).
57
 
README.pt.md CHANGED
@@ -51,7 +51,7 @@ Este repositório transforma o episódio público de amostra do Xperience-10M em
51
 
52
  Fórmula: 2 métodos de um episódio x 20 tarefas = 40; 7 métodos de 128 episódios x 20 tarefas = 140; matriz pública total = 180/180 registros com score.
53
 
54
- Blocos de métodos: a linha 1 contém task-head baselines (Minimal, Neural MLP). A linha 2 separa aligned baseline heads (metadata simple/NN, raw-feature simple/NN), a série Qwen3-Omni (Qwen3-Omni v6 LoRA) e a série Cosmos3 (Cosmos3-Super Reasoner, Cosmos3-Nano Future Window). Qwen3 v1-v6 é uma linhagem LoRA/eval, não as três camadas públicas do projeto; a matriz de 20 tarefas usa v6 e v5 fica como pinned prior release. Cosmos3-Super Forward-Dynamics LoRA é publicado como adapter/pesos/resultados separado e não conta como linha de método na matriz de 20 tarefas.
55
 
56
  Entradas: [`TWO_EVIDENCE_LINES.md`](TWO_EVIDENCE_LINES.md), [`two_evidence_lines.json`](docs/data/two_evidence_lines.json), [`task_method_20_result_matrix.json`](docs/data/task_method_20_result_matrix.json), [`two_evidence_line_result_summary.json`](docs/data/two_evidence_line_result_summary.json).
57
 
 
51
 
52
  Fórmula: 2 métodos de um episódio x 20 tarefas = 40; 7 métodos de 128 episódios x 20 tarefas = 140; matriz pública total = 180/180 registros com score.
53
 
54
+ Blocos de métodos: a linha 1 contém task-head baselines (Minimal, Neural MLP). A linha 2 separa aligned baseline heads (metadata simple/NN, raw-feature simple/NN), a série Qwen3-Omni (Qwen3-Omni v6 LoRA) e a série Cosmos3 (Cosmos3-Super Reasoner, Cosmos3-Nano Future Window). Qwen3 v1-v6 é uma linhagem LoRA/eval interna à linha 2, não as evidence lines do projeto; a matriz de 20 tarefas usa v6 e v5 fica como pinned prior release. Cosmos3-Super Forward-Dynamics LoRA é publicado como adapter/pesos/resultados separado e não conta como linha de método na matriz de 20 tarefas.
55
 
56
  Entradas: [`TWO_EVIDENCE_LINES.md`](TWO_EVIDENCE_LINES.md), [`two_evidence_lines.json`](docs/data/two_evidence_lines.json), [`task_method_20_result_matrix.json`](docs/data/task_method_20_result_matrix.json), [`two_evidence_line_result_summary.json`](docs/data/two_evidence_line_result_summary.json).
57
 
docs/assets/charts/episode128_task_model_radar.svg CHANGED
docs/assets/charts/research_direction_coverage.svg CHANGED
docs/assets/charts/single_episode_task_model_radar.svg CHANGED
docs/data/additional_development_directions.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "title": "Additional Development Directions",
3
- "summary": "Concrete Xperience-10M project directions beyond the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model branch, and long-term native pretraining goal.",
4
  "status": "planned_research_directions",
5
  "public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
6
  "directions": [
 
1
  {
2
  "title": "Additional Development Directions",
3
+ "summary": "Concrete Xperience-10M project directions beyond the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model track, and long-term native pretraining goal.",
4
  "status": "planned_research_directions",
5
  "public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
6
  "directions": [
docs/data/artifact_index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
- "generated_at_utc": "2026-06-21T08:55:13+00:00",
4
  "status": "pass",
5
  "artifact_count": 226,
6
  "missing": [],
@@ -81,8 +81,8 @@
81
  "surface": "website_hf",
82
  "shows": "Machine-readable copy of the current project status for website and HF mirrors.",
83
  "exists": true,
84
- "bytes": 23057,
85
- "sha256": "aa24087a4c80390869cbf771571dd04923f8cf1b5a2f773c70586a4bae10bd48"
86
  },
87
  {
88
  "id": "research_roadmap",
@@ -92,8 +92,8 @@
92
  "surface": "repo_hf",
93
  "shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
94
  "exists": true,
95
- "bytes": 15275,
96
- "sha256": "b7774813c9cddb49181d9589cf07aa9496756c09ddede41c7661a41b6e81a3a0"
97
  },
98
  {
99
  "id": "research_roadmap_json",
@@ -103,8 +103,8 @@
103
  "surface": "website_hf",
104
  "shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
105
  "exists": true,
106
- "bytes": 14133,
107
- "sha256": "5d73996f9bf4c3539beb5d428b21423a583d439fcf439faf8ab17f7364d53d88"
108
  },
109
  {
110
  "id": "foundation_model_plan",
@@ -114,8 +114,8 @@
114
  "surface": "repo_hf",
115
  "shows": "Defines the post-data-gate backbone choices: Qwen3-Omni first, Cosmos 3 for world modeling, and VLA/policy models after action-target conversion.",
116
  "exists": true,
117
- "bytes": 10996,
118
- "sha256": "a78e960ae0f0e815c2e26a69ec3b6071099fa7ccfb6ad860144cd7ee94e77e56"
119
  },
120
  {
121
  "id": "foundation_model_plan_json",
@@ -125,8 +125,8 @@
125
  "surface": "website_hf",
126
  "shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
127
  "exists": true,
128
- "bytes": 13926,
129
- "sha256": "ccc80ed46eb961eb315f3060bdffa4676a05e73ef47ba25b1e5a675e25ce8754"
130
  },
131
  {
132
  "id": "three_foundation_pipelines",
@@ -222,7 +222,7 @@
222
  "path": "OMNI_MODEL_EXTENSION_CONTRACT.md",
223
  "kind": "scaleup_contract",
224
  "surface": "repo_hf",
225
- "shows": "Defines the shared manifest, episode split, held-out evaluation, packaging, and public-safety rules for Qwen3-Omni, Cosmos-style, and VLA/policy model branches.",
226
  "exists": true,
227
  "bytes": 8900,
228
  "sha256": "c4e51d0aa7536045c229418603a67c6b3c5f31c9d756ca7395cb0c9455f0ed6d"
@@ -323,8 +323,8 @@
323
  "surface": "website_hf",
324
  "shows": "Machine-readable enhancement pack for the website and Hugging Face mirrors.",
325
  "exists": true,
326
- "bytes": 20181,
327
- "sha256": "17453f9a949278b1f3038d68124f8f0e2441584d4c1384d482d2ac9ca295e97e"
328
  },
329
  {
330
  "id": "task_suite_enhancement_128_result",
@@ -345,8 +345,8 @@
345
  "surface": "repo_hf",
346
  "shows": "Regenerates the enhancement pack from committed 128-episode windows, baseline summaries, verified Qwen predictions, and Cosmos reference metrics.",
347
  "exists": true,
348
- "bytes": 27210,
349
- "sha256": "0e098d7c1a5c91ec8472d5eb8fc0ebab0305cf647d1f0f4f2ba6bd4c1d531546"
350
  },
351
  {
352
  "id": "xperience10m_128_episode_feature_index",
@@ -510,8 +510,8 @@
510
  "surface": "repo_hf",
511
  "shows": "Records concrete non-backbone Xperience-10M development tracks: taxonomy, benchmark protocol, representation learning, skill graphs, affordances, 3D/4D memory, QA, and policy transfer.",
512
  "exists": true,
513
- "bytes": 3137,
514
- "sha256": "cb4077bcd7e2b33efdd0306c7cb1f28dec547c71739fd52f7233218f90c54941"
515
  },
516
  {
517
  "id": "additional_development_directions_json",
@@ -521,8 +521,8 @@
521
  "surface": "website_hf",
522
  "shows": "Machine-readable additional development directions for the website and Hugging Face mirrors.",
523
  "exists": true,
524
- "bytes": 6121,
525
- "sha256": "4458c5b82062aed8c19bcc914f795ec217114422e9b6d54b624371c4b3c8681f"
526
  },
527
  {
528
  "id": "xperience_embodied_foundation_pretraining",
@@ -610,7 +610,7 @@
610
  "shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
611
  "exists": true,
612
  "bytes": 4432,
613
- "sha256": "7e921c5225389e3481f6133e6c3a1afb7f7f79ea5cdc4638ed3d1bfde48c63cf"
614
  },
615
  {
616
  "id": "source_alignment_validator",
@@ -631,8 +631,8 @@
631
  "surface": "repo_hf",
632
  "shows": "Publishes prepared Space, artifact dataset, and model bundles, including an explicit model-binary upload batch.",
633
  "exists": true,
634
- "bytes": 25097,
635
- "sha256": "7775c5e8767c0dba207fb5fda2d9f0d4a47280d978a1947baa39469fba977a69"
636
  },
637
  {
638
  "id": "github_package_dockerfile",
@@ -728,10 +728,10 @@
728
  "path": "docs/data/unified_task_model_radar.json",
729
  "kind": "website_data",
730
  "surface": "website_hf",
731
- "shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3/Cosmos overlay mappings, branch-card caveats, proxy flags, and source artifacts.",
732
  "exists": true,
733
- "bytes": 228805,
734
- "sha256": "e947ff0579014d5f2c928f689077958d94304a3ac9d978d8475ce5b799e03df8"
735
  },
736
  {
737
  "id": "single_episode_task_model_radar_json",
@@ -741,8 +741,8 @@
741
  "surface": "website_hf",
742
  "shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
743
  "exists": true,
744
- "bytes": 51097,
745
- "sha256": "f4975e8a1d02dd3a168660827fc92257257d18107887abc83ca225950fb283d7"
746
  },
747
  {
748
  "id": "episode128_task_model_radar_json",
@@ -750,10 +750,10 @@
750
  "path": "docs/data/episode128_task_model_radar.json",
751
  "kind": "website_data",
752
  "surface": "website_hf",
753
- "shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines and verified Qwen3/Cosmos branches, now complete at 140/140 scored rows with proxy notes retained.",
754
  "exists": true,
755
- "bytes": 184945,
756
- "sha256": "36d500a4f64614a88ed80af88594289adf06753ea85a2273b9bdaf6cb8ca7f44"
757
  },
758
  {
759
  "id": "task_method_20_result_matrix_json",
@@ -764,7 +764,7 @@
764
  "shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and the current release is complete at 180/180 scored rows.",
765
  "exists": true,
766
  "bytes": 128509,
767
- "sha256": "5b02cd11edffe2e8f6f9d882bf5b895c308b8686cb6672e5df3c88e8e17a9ddd"
768
  },
769
  {
770
  "id": "task_method_20_result_matrix",
@@ -808,7 +808,7 @@
808
  "shows": "Machine-readable check that scored JSON-backed matrix cells match their declared metric source values.",
809
  "exists": true,
810
  "bytes": 561,
811
- "sha256": "a2c684b7c6a60dc5868b796eec41adbc3a1eb37235d357271f3171b20f85c28f"
812
  },
813
  {
814
  "id": "task_method_20_source_audit",
@@ -819,7 +819,7 @@
819
  "shows": "Reader-facing source-value audit for the 180-result matrix.",
820
  "exists": true,
821
  "bytes": 447,
822
- "sha256": "0b1bc352200143957a177947b197cab6ee46602cda338b94bfc81922568ee9ea"
823
  },
824
  {
825
  "id": "two_evidence_line_map_chart",
@@ -838,7 +838,7 @@
838
  "path": "docs/assets/charts/unified_task_model_radar.svg",
839
  "kind": "generated_figure",
840
  "surface": "website_hf",
841
- "shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3/Cosmos task-aligned model overlays.",
842
  "exists": true,
843
  "bytes": 57938,
844
  "sha256": "bb83b80b47fe679ebdce2c99378a4548120f1c8cc2d725b88e409d8c386dcbf8"
@@ -851,8 +851,8 @@
851
  "surface": "website_hf",
852
  "shows": "Separates the one-episode Minimal and Neural MLP 20/20 scored baselines into a clean two-polygon radar.",
853
  "exists": true,
854
- "bytes": 35230,
855
- "sha256": "e5516268336fd7289f38c0b80937d40080f3b1c804e82a409405d60e6eab03b0"
856
  },
857
  {
858
  "id": "episode128_task_model_radar_chart",
@@ -860,10 +860,10 @@
860
  "path": "docs/assets/charts/episode128_task_model_radar.svg",
861
  "kind": "generated_figure",
862
  "surface": "website_hf",
863
- "shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons and metadata/Qwen/Cosmos as task-aligned overlays.",
864
  "exists": true,
865
- "bytes": 51905,
866
- "sha256": "03e78b45fc91bab4c88e54bd0c3dc03afda4d55ef1f96569d7aedb2506d99065"
867
  },
868
  {
869
  "id": "unified_task_model_radar_builder",
@@ -873,8 +873,8 @@
873
  "surface": "repo_hf",
874
  "shows": "Regenerates the direction-aware radar chart and machine-readable metric overlay JSON.",
875
  "exists": true,
876
- "bytes": 68555,
877
- "sha256": "e7cf9d1d3e25117ccd0a2b93c69850d4201cd34bda97bf294fb3b037fc1aa351"
878
  },
879
  {
880
  "id": "task_method_20_gap_audit_builder",
@@ -915,7 +915,7 @@
915
  "path": "results/omni_finetune/model_output_probe_readiness/model_output_probe_readiness.json",
916
  "kind": "scaleup_status",
917
  "surface": "repo_hf",
918
- "shows": "Checks whether Qwen3/Cosmos branches have train, validation, and test prediction files before extending model overlays to all 20 task contracts.",
919
  "exists": true,
920
  "bytes": 4320,
921
  "sha256": "11cff26749bf6ad8b8ee028b18e0b4be5713ed8b5325578caa03be25d894263b"
@@ -928,8 +928,8 @@
928
  "surface": "repo_hf",
929
  "shows": "Audits model-output split availability and writes a readiness report without assigning new numeric task scores.",
930
  "exists": true,
931
- "bytes": 10520,
932
- "sha256": "741ee733068e87c52c8da2bd15987e2b4538b5e705592182d76c42b5cf34fe96"
933
  },
934
  {
935
  "id": "existing_model_output_task_probe",
@@ -937,7 +937,7 @@
937
  "path": "results/omni_finetune/model_output_task_probes_20260616/summary.json",
938
  "kind": "model_result",
939
  "surface": "repo_hf",
940
- "shows": "Scores task-specific Qwen3/Cosmos overlays only where verified held-out prediction JSON or compact target maps already contain the required targets.",
941
  "exists": true,
942
  "bytes": 5951,
943
  "sha256": "910477d2fba648605dda128d0ecd2a2c13cfa460573e350dc850014ac91c6c2b"
@@ -950,8 +950,8 @@
950
  "surface": "repo_hf",
951
  "shows": "Derives task-specific scores from committed verified model outputs without running new inference or backfilling absent targets.",
952
  "exists": true,
953
- "bytes": 69411,
954
- "sha256": "a78889e5225405fcbb0156aa8c63cbbe79022a12283d0b398c5f2bb015175fe7"
955
  },
956
  {
957
  "id": "a100_128_metadata_task_baselines",
@@ -1071,8 +1071,8 @@
1071
  "surface": "repo_hf",
1072
  "shows": "Catalogs public figures, charts, modality thumbnails, dimensions, hashes, roles, and source scripts.",
1073
  "exists": true,
1074
- "bytes": 6983,
1075
- "sha256": "48ea04c063df0745f2a31483d15baa71d420906b2ad7ce15fdb10760f41907e6"
1076
  },
1077
  {
1078
  "id": "figure_index_json",
@@ -1082,8 +1082,8 @@
1082
  "surface": "website_hf",
1083
  "shows": "Machine-readable visual asset index for website and Hugging Face mirrors.",
1084
  "exists": true,
1085
- "bytes": 19441,
1086
- "sha256": "b14b9cb1561db131827a8898fc42629c772eb173108d1fcbf1fbf931389da285"
1087
  },
1088
  {
1089
  "id": "figure_index_builder",
@@ -1093,8 +1093,8 @@
1093
  "surface": "repo_hf",
1094
  "shows": "Regenerates visual-asset hashes, dimensions, and source-script provenance.",
1095
  "exists": true,
1096
- "bytes": 16801,
1097
- "sha256": "b0e060c77a10d509ac471a83a7aa2f0ec9474a5b48fbb56cbc0d62c8ffa6fcd2"
1098
  },
1099
  {
1100
  "id": "brand_assets_json",
@@ -1160,7 +1160,7 @@
1160
  "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
1161
  "exists": true,
1162
  "bytes": 8640,
1163
- "sha256": "1046f75bd4529244290822c67fde137ae1465a3b65e6308b75acb856e4bee191"
1164
  },
1165
  {
1166
  "id": "public_surface_qa",
@@ -1179,10 +1179,10 @@
1179
  "path": "PUBLIC_READER_MAP.md",
1180
  "kind": "project_path",
1181
  "surface": "repo_hf",
1182
- "shows": "Provides the first-pass navigation layer for GitHub, GitHub Pages, Hugging Face mirrors, model-branch repos, evidence layers, and claim boundaries.",
1183
  "exists": true,
1184
- "bytes": 4892,
1185
- "sha256": "d1121455dbd547a5f6111c8ec0edc3380586d824c853dff521ef640d872ae1fb"
1186
  },
1187
  {
1188
  "id": "public_reader_map_json",
@@ -1192,8 +1192,8 @@
1192
  "surface": "website_hf",
1193
  "shows": "Machine-readable public reader map used by the website and Hugging Face mirrors to keep entry points and surface responsibilities explicit.",
1194
  "exists": true,
1195
- "bytes": 5906,
1196
- "sha256": "4a229fc7f084dbaab14bf11e00e5128d0b73dd074fafbbd732b8b0cbf92c01df"
1197
  },
1198
  {
1199
  "id": "public_surface_qa_json",
@@ -1285,7 +1285,7 @@
1285
  "volatile": true,
1286
  "shows": "Records the last live GitHub/HF URL verification after upload.",
1287
  "exists": true,
1288
- "bytes": 184684,
1289
  "hash_policy": "existence_and_size_only"
1290
  },
1291
  {
@@ -1296,8 +1296,8 @@
1296
  "surface": "repo",
1297
  "shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
1298
  "exists": true,
1299
- "bytes": 67647,
1300
- "sha256": "d2b4af98e6fd8b23fd86cd068f2bbf887e5d69686dd62fe3bfc7e8251a6d75d6"
1301
  },
1302
  {
1303
  "id": "reproducibility_contract",
@@ -1329,8 +1329,8 @@
1329
  "surface": "repo_hf",
1330
  "shows": "Generates the selective artifact catalog from local files.",
1331
  "exists": true,
1332
- "bytes": 67519,
1333
- "sha256": "eae86845582f2551782fa7a81837bc3b30d67a050aced1a5f5158644ea0e6512"
1334
  },
1335
  {
1336
  "id": "publication_audit",
@@ -1365,7 +1365,7 @@
1365
  "volatile": true,
1366
  "shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
1367
  "exists": true,
1368
- "bytes": 1413010,
1369
  "hash_policy": "existence_and_size_only"
1370
  },
1371
  {
@@ -1377,7 +1377,7 @@
1377
  "volatile": true,
1378
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
1379
  "exists": true,
1380
- "bytes": 20542,
1381
  "hash_policy": "existence_and_size_only"
1382
  },
1383
  {
@@ -1542,8 +1542,8 @@
1542
  "surface": "website_hf",
1543
  "shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
1544
  "exists": true,
1545
- "bytes": 1899884,
1546
- "sha256": "7bbd5b3c54ef151d598c827f5cb5416566c3106b198e7ad5c4665a03f2566a35"
1547
  },
1548
  {
1549
  "id": "modality_atlas",
@@ -1674,8 +1674,8 @@
1674
  "surface": "repo_hf",
1675
  "shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
1676
  "exists": true,
1677
- "bytes": 16045,
1678
- "sha256": "130578a51a77e2be0230da1288beee3528cff2c7a39830c91f0509682da4b404"
1679
  },
1680
  {
1681
  "id": "omni_model_comparison_json",
@@ -1685,8 +1685,8 @@
1685
  "surface": "repo_hf",
1686
  "shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
1687
  "exists": true,
1688
- "bytes": 82110,
1689
- "sha256": "ebbb0d0d28a1f4a5c7c9f015d772624eddadc0d382e4917c8dbdcc512a5b276d"
1690
  },
1691
  {
1692
  "id": "cosmos3_nano_verified_summary",
 
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
+ "generated_at_utc": "2026-06-21T10:52:12+00:00",
4
  "status": "pass",
5
  "artifact_count": 226,
6
  "missing": [],
 
81
  "surface": "website_hf",
82
  "shows": "Machine-readable copy of the current project status for website and HF mirrors.",
83
  "exists": true,
84
+ "bytes": 23049,
85
+ "sha256": "9a06cc54d3b43362867a2fde9edc61d09f53df2d9ad761ecf95c862c76af31d2"
86
  },
87
  {
88
  "id": "research_roadmap",
 
92
  "surface": "repo_hf",
93
  "shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
94
  "exists": true,
95
+ "bytes": 15272,
96
+ "sha256": "559fa9e818f2c6fc7b926f880e9183200911317e70a26391f1830f4119ebc6b0"
97
  },
98
  {
99
  "id": "research_roadmap_json",
 
103
  "surface": "website_hf",
104
  "shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
105
  "exists": true,
106
+ "bytes": 14129,
107
+ "sha256": "a06d6525d9532b8608bf7be81eb9387deca3159b7c42bf38e107b4096953f351"
108
  },
109
  {
110
  "id": "foundation_model_plan",
 
114
  "surface": "repo_hf",
115
  "shows": "Defines the post-data-gate backbone choices: Qwen3-Omni first, Cosmos 3 for world modeling, and VLA/policy models after action-target conversion.",
116
  "exists": true,
117
+ "bytes": 11003,
118
+ "sha256": "24047e8692f69927d3fabf3c01058278e85651355f3749886493159971120cc6"
119
  },
120
  {
121
  "id": "foundation_model_plan_json",
 
125
  "surface": "website_hf",
126
  "shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
127
  "exists": true,
128
+ "bytes": 13925,
129
+ "sha256": "77d4b2d5918ef1f776de6d29d34d523de95ba58df9526e2b255bed567434f932"
130
  },
131
  {
132
  "id": "three_foundation_pipelines",
 
222
  "path": "OMNI_MODEL_EXTENSION_CONTRACT.md",
223
  "kind": "scaleup_contract",
224
  "surface": "repo_hf",
225
+ "shows": "Defines the shared manifest, episode split, held-out evaluation, packaging, and public-safety rules for Qwen3-Omni, Cosmos3, and VLA/policy model tracks.",
226
  "exists": true,
227
  "bytes": 8900,
228
  "sha256": "c4e51d0aa7536045c229418603a67c6b3c5f31c9d756ca7395cb0c9455f0ed6d"
 
323
  "surface": "website_hf",
324
  "shows": "Machine-readable enhancement pack for the website and Hugging Face mirrors.",
325
  "exists": true,
326
+ "bytes": 20196,
327
+ "sha256": "9e1a3339425981dcf7931bf08684860864598bf679d0df86f93c656bacdb71bf"
328
  },
329
  {
330
  "id": "task_suite_enhancement_128_result",
 
345
  "surface": "repo_hf",
346
  "shows": "Regenerates the enhancement pack from committed 128-episode windows, baseline summaries, verified Qwen predictions, and Cosmos reference metrics.",
347
  "exists": true,
348
+ "bytes": 27225,
349
+ "sha256": "86e6098506b365cc92a9658d347645c285c5f61b5113eeaf1d170df0e2d7cc8f"
350
  },
351
  {
352
  "id": "xperience10m_128_episode_feature_index",
 
510
  "surface": "repo_hf",
511
  "shows": "Records concrete non-backbone Xperience-10M development tracks: taxonomy, benchmark protocol, representation learning, skill graphs, affordances, 3D/4D memory, QA, and policy transfer.",
512
  "exists": true,
513
+ "bytes": 3136,
514
+ "sha256": "decdd359d89694fe10873dcce6cee23e991de1b874ade72643314e879ade784e"
515
  },
516
  {
517
  "id": "additional_development_directions_json",
 
521
  "surface": "website_hf",
522
  "shows": "Machine-readable additional development directions for the website and Hugging Face mirrors.",
523
  "exists": true,
524
+ "bytes": 6120,
525
+ "sha256": "669d1523f767a8eda22bbe96ab54af99e102496a3d27f7dd850e08e2724e661f"
526
  },
527
  {
528
  "id": "xperience_embodied_foundation_pretraining",
 
610
  "shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
611
  "exists": true,
612
  "bytes": 4432,
613
+ "sha256": "db279081759eebb09a4ba53c56fb17a14f3546e13d058100494ac7745b901a1c"
614
  },
615
  {
616
  "id": "source_alignment_validator",
 
631
  "surface": "repo_hf",
632
  "shows": "Publishes prepared Space, artifact dataset, and model bundles, including an explicit model-binary upload batch.",
633
  "exists": true,
634
+ "bytes": 25159,
635
+ "sha256": "a74451a7d717661e1499b98631d825f4db8c6b51b1e9bafd73966697eb04258a"
636
  },
637
  {
638
  "id": "github_package_dockerfile",
 
728
  "path": "docs/data/unified_task_model_radar.json",
729
  "kind": "website_data",
730
  "surface": "website_hf",
731
+ "shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3-Omni/Cosmos3 overlay mappings, method-card caveats, proxy flags, and source artifacts.",
732
  "exists": true,
733
+ "bytes": 228815,
734
+ "sha256": "862376178e8b0d01b536f49a18b7934a373494f8b36080790f616438ec0e035e"
735
  },
736
  {
737
  "id": "single_episode_task_model_radar_json",
 
741
  "surface": "website_hf",
742
  "shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
743
  "exists": true,
744
+ "bytes": 51107,
745
+ "sha256": "5f2ebb41e8488446ea5c5cd2cb75bbedce688433feffe1412288de56b133bd5c"
746
  },
747
  {
748
  "id": "episode128_task_model_radar_json",
 
750
  "path": "docs/data/episode128_task_model_radar.json",
751
  "kind": "website_data",
752
  "surface": "website_hf",
753
+ "shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano, now complete at 140/140 scored rows with proxy notes retained.",
754
  "exists": true,
755
+ "bytes": 184992,
756
+ "sha256": "385704db90443d74903f365e90b27538020f5574c96f296bbf63173f488a645d"
757
  },
758
  {
759
  "id": "task_method_20_result_matrix_json",
 
764
  "shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and the current release is complete at 180/180 scored rows.",
765
  "exists": true,
766
  "bytes": 128509,
767
+ "sha256": "96082daa33771963ac40b7d719df00a76ec443508a3d3101cb6dd82d87965729"
768
  },
769
  {
770
  "id": "task_method_20_result_matrix",
 
808
  "shows": "Machine-readable check that scored JSON-backed matrix cells match their declared metric source values.",
809
  "exists": true,
810
  "bytes": 561,
811
+ "sha256": "cbe9be1ea3d62b253780aade9c51cb7f3a5882df185927186ee6a1d6516ad3a6"
812
  },
813
  {
814
  "id": "task_method_20_source_audit",
 
819
  "shows": "Reader-facing source-value audit for the 180-result matrix.",
820
  "exists": true,
821
  "bytes": 447,
822
+ "sha256": "dfcde22c9350858d0df6d881533f63ba6838fc980b62f0b68770f9b708fcde85"
823
  },
824
  {
825
  "id": "two_evidence_line_map_chart",
 
838
  "path": "docs/assets/charts/unified_task_model_radar.svg",
839
  "kind": "generated_figure",
840
  "surface": "website_hf",
841
+ "shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3-Omni and Cosmos3 task-aligned overlays.",
842
  "exists": true,
843
  "bytes": 57938,
844
  "sha256": "bb83b80b47fe679ebdce2c99378a4548120f1c8cc2d725b88e409d8c386dcbf8"
 
851
  "surface": "website_hf",
852
  "shows": "Separates the one-episode Minimal and Neural MLP 20/20 scored baselines into a clean two-polygon radar.",
853
  "exists": true,
854
+ "bytes": 35232,
855
+ "sha256": "87b52a7dead40358f1778dda43ade4d2e875ac98e507e01ca007084363e5977e"
856
  },
857
  {
858
  "id": "episode128_task_model_radar_chart",
 
860
  "path": "docs/assets/charts/episode128_task_model_radar.svg",
861
  "kind": "generated_figure",
862
  "surface": "website_hf",
863
+ "shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons plus metadata, Qwen3-Omni, Cosmos3-Super, and Cosmos3-Nano task-aligned overlays.",
864
  "exists": true,
865
+ "bytes": 51915,
866
+ "sha256": "047ea4b05a04f6734e2afcf792863559dc8f3091eae88a97ff90e8b038a423f4"
867
  },
868
  {
869
  "id": "unified_task_model_radar_builder",
 
873
  "surface": "repo_hf",
874
  "shows": "Regenerates the direction-aware radar chart and machine-readable metric overlay JSON.",
875
  "exists": true,
876
+ "bytes": 68610,
877
+ "sha256": "96bc2df0de5a9e512d69961ddb13ea87b26ef01f1f943f5a78a6dc373400949d"
878
  },
879
  {
880
  "id": "task_method_20_gap_audit_builder",
 
915
  "path": "results/omni_finetune/model_output_probe_readiness/model_output_probe_readiness.json",
916
  "kind": "scaleup_status",
917
  "surface": "repo_hf",
918
+ "shows": "Checks whether Qwen3-Omni and Cosmos3 runs have train, validation, and test prediction files before extending model overlays to all 20 task contracts.",
919
  "exists": true,
920
  "bytes": 4320,
921
  "sha256": "11cff26749bf6ad8b8ee028b18e0b4be5713ed8b5325578caa03be25d894263b"
 
928
  "surface": "repo_hf",
929
  "shows": "Audits model-output split availability and writes a readiness report without assigning new numeric task scores.",
930
  "exists": true,
931
+ "bytes": 10526,
932
+ "sha256": "2b95834c75b0c90ceefe2c20381b3997a63f283b733186e07dea9e2778c78fad"
933
  },
934
  {
935
  "id": "existing_model_output_task_probe",
 
937
  "path": "results/omni_finetune/model_output_task_probes_20260616/summary.json",
938
  "kind": "model_result",
939
  "surface": "repo_hf",
940
+ "shows": "Scores task-specific Qwen3-Omni and Cosmos3 overlays only where verified held-out prediction JSON or compact target maps already contain the required targets.",
941
  "exists": true,
942
  "bytes": 5951,
943
  "sha256": "910477d2fba648605dda128d0ecd2a2c13cfa460573e350dc850014ac91c6c2b"
 
950
  "surface": "repo_hf",
951
  "shows": "Derives task-specific scores from committed verified model outputs without running new inference or backfilling absent targets.",
952
  "exists": true,
953
+ "bytes": 69423,
954
+ "sha256": "43086745ba53f5a4da1a39b9c223914707ab51b027555c91bea27c0bc152a27f"
955
  },
956
  {
957
  "id": "a100_128_metadata_task_baselines",
 
1071
  "surface": "repo_hf",
1072
  "shows": "Catalogs public figures, charts, modality thumbnails, dimensions, hashes, roles, and source scripts.",
1073
  "exists": true,
1074
+ "bytes": 7014,
1075
+ "sha256": "1087774a85614f12871418bb9fa375b98121596eb11dcdc22d324b943fb9d313"
1076
  },
1077
  {
1078
  "id": "figure_index_json",
 
1082
  "surface": "website_hf",
1083
  "shows": "Machine-readable visual asset index for website and Hugging Face mirrors.",
1084
  "exists": true,
1085
+ "bytes": 19472,
1086
+ "sha256": "e56f76038a56ffc61e882d0201f13912af5cba3e5ade08b1bb912fba0acdcd24"
1087
  },
1088
  {
1089
  "id": "figure_index_builder",
 
1093
  "surface": "repo_hf",
1094
  "shows": "Regenerates visual-asset hashes, dimensions, and source-script provenance.",
1095
  "exists": true,
1096
+ "bytes": 16832,
1097
+ "sha256": "7c526bff01c282d81e4f64bbdb31c059953ea7868b75b0c3104826241280165f"
1098
  },
1099
  {
1100
  "id": "brand_assets_json",
 
1160
  "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
1161
  "exists": true,
1162
  "bytes": 8640,
1163
+ "sha256": "3cb0aca2dca01448cb9bc5cbb519a91bc6397c08a1eaaa84c031e773221e5a0a"
1164
  },
1165
  {
1166
  "id": "public_surface_qa",
 
1179
  "path": "PUBLIC_READER_MAP.md",
1180
  "kind": "project_path",
1181
  "surface": "repo_hf",
1182
+ "shows": "Provides the first-pass navigation layer for GitHub, GitHub Pages, Hugging Face mirrors, Qwen3-Omni/Cosmos3 repos, evidence lines, and claim boundaries.",
1183
  "exists": true,
1184
+ "bytes": 4948,
1185
+ "sha256": "7a7128fdde08f770338c3fe2d473565918c5633f948dec6a78a6b2a67938e91a"
1186
  },
1187
  {
1188
  "id": "public_reader_map_json",
 
1192
  "surface": "website_hf",
1193
  "shows": "Machine-readable public reader map used by the website and Hugging Face mirrors to keep entry points and surface responsibilities explicit.",
1194
  "exists": true,
1195
+ "bytes": 5971,
1196
+ "sha256": "3474f84ffa53aefabdbf8a75c466c271675162ce0f8a23ea3b6660951048072f"
1197
  },
1198
  {
1199
  "id": "public_surface_qa_json",
 
1285
  "volatile": true,
1286
  "shows": "Records the last live GitHub/HF URL verification after upload.",
1287
  "exists": true,
1288
+ "bytes": 184689,
1289
  "hash_policy": "existence_and_size_only"
1290
  },
1291
  {
 
1296
  "surface": "repo",
1297
  "shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
1298
  "exists": true,
1299
+ "bytes": 67652,
1300
+ "sha256": "47c6e5e0d93a881db045842ef98656d04c74cf7605f33a56b8d4daecf97fb547"
1301
  },
1302
  {
1303
  "id": "reproducibility_contract",
 
1329
  "surface": "repo_hf",
1330
  "shows": "Generates the selective artifact catalog from local files.",
1331
  "exists": true,
1332
+ "bytes": 67587,
1333
+ "sha256": "28a93ec92c91886388f5d42ab8e25af0b218e4644b733bc8f8230bc0f91aab65"
1334
  },
1335
  {
1336
  "id": "publication_audit",
 
1365
  "volatile": true,
1366
  "shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
1367
  "exists": true,
1368
+ "bytes": 1418066,
1369
  "hash_policy": "existence_and_size_only"
1370
  },
1371
  {
 
1377
  "volatile": true,
1378
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
1379
  "exists": true,
1380
+ "bytes": 20657,
1381
  "hash_policy": "existence_and_size_only"
1382
  },
1383
  {
 
1542
  "surface": "website_hf",
1543
  "shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
1544
  "exists": true,
1545
+ "bytes": 1903454,
1546
+ "sha256": "6667eb856cf61ada9f868807b5d5c6ccde06e4f791b2f9dd567d98b71b307415"
1547
  },
1548
  {
1549
  "id": "modality_atlas",
 
1674
  "surface": "repo_hf",
1675
  "shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
1676
  "exists": true,
1677
+ "bytes": 15983,
1678
+ "sha256": "4db248566972e811aac6ca06582f233414821624f00f9d4fc4a1b66b2e00401f"
1679
  },
1680
  {
1681
  "id": "omni_model_comparison_json",
 
1685
  "surface": "repo_hf",
1686
  "shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
1687
  "exists": true,
1688
+ "bytes": 82088,
1689
+ "sha256": "82ccc2932cad63a9ebad85da53e694b18ef626aa3720bda3ed5da30f3dc5e121"
1690
  },
1691
  {
1692
  "id": "cosmos3_nano_verified_summary",
docs/data/episode128_task_model_radar.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "title": "128-Episode 20-Task Radar",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-21T08:37:32+00:00",
5
- "description": "Selected 128-episode metadata/raw baselines plus verified Qwen3/Cosmos branches. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
6
  "task_count": 20,
7
  "method_count": 7,
8
  "method_task_record_count": 140,
@@ -12,7 +12,7 @@
12
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
13
  "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
14
  "result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
15
- "foundation_model_overlay": "Qwen3/Cosmos points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
16
  "metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
17
  "raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
18
  },
 
1
  {
2
  "title": "128-Episode 20-Task Radar",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-21T10:47:17+00:00",
5
+ "description": "Selected 128-episode metadata/raw baselines plus verified Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano diagnostics. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
6
  "task_count": 20,
7
  "method_count": 7,
8
  "method_task_record_count": 140,
 
12
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
13
  "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
14
  "result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
15
+ "foundation_model_overlay": "Qwen3-Omni and Cosmos3 points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
16
  "metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
17
  "raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
18
  },
docs/data/figure_index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Figure Index",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-18T18:18:13+00:00",
5
  "scope": "Public figures, diagrams, charts, and derived modality thumbnails. Raw Xperience-10M videos, annotations, RRD files, and Qwen weights are excluded.",
6
  "figure_count": 29,
7
  "figures": [
@@ -64,12 +64,12 @@
64
  "source_script": "scripts/render_task_suite_infographic.py",
65
  "surface": "README, website, HF Space, artifact dataset, model card",
66
  "exists": true,
67
- "bytes": 1591194,
68
- "sha256": "95ab73e01cfba86538b63247869fae4091934ddedf9e22523ab4cead9c59086d",
69
  "dimensions": {
70
  "format": "PNG",
71
  "width": 1800,
72
- "height": 6600
73
  },
74
  "source_script_exists": true
75
  },
@@ -81,8 +81,8 @@
81
  "source_script": "scripts/generate_visualizations.py",
82
  "surface": "README, website, HF artifact dataset",
83
  "exists": true,
84
- "bytes": 704575,
85
- "sha256": "c90723cc4b1bf5490269af2df594849030ae8d4cc8176e1d1eab96fabf9412f9",
86
  "dimensions": {
87
  "format": "PNG",
88
  "width": 1800,
@@ -149,8 +149,8 @@
149
  "source_script": "scripts/render_foundation_pipeline_diagrams.py",
150
  "surface": "README, website, HF Space, artifact dataset, model card",
151
  "exists": true,
152
- "bytes": 1553916,
153
- "sha256": "6d502580c9f11b170036843690dff0ef99e146890d9914046b5d4b165bd1f89b",
154
  "dimensions": {
155
  "format": "PNG",
156
  "width": 2560,
@@ -166,8 +166,8 @@
166
  "source_script": "scripts/render_overview_figures.py",
167
  "surface": "README, website, HF artifact dataset, model card",
168
  "exists": true,
169
- "bytes": 774391,
170
- "sha256": "f08b03bc21e194efe382347d74cf89cd6ac65dede51889971dbfc2fb9d1de3c2",
171
  "dimensions": {
172
  "format": "PNG",
173
  "width": 1800,
@@ -356,8 +356,8 @@
356
  "source_script": "scripts/generate_visualizations.py",
357
  "surface": "website directions",
358
  "exists": true,
359
- "bytes": 5078,
360
- "sha256": "fd4ba0a9d6d525bdfa8677c66e1a751efc83936dc032ce229bfca1ea106acb40",
361
  "dimensions": {
362
  "format": "SVG",
363
  "width": 1180,
@@ -410,8 +410,8 @@
410
  "source_script": "scripts/build_unified_task_model_radar.py",
411
  "surface": "website unified task section, README, HF mirrors",
412
  "exists": true,
413
- "bytes": 54276,
414
- "sha256": "66b3f285ecb9a3bf7d1125495fc3b6d4400edacb59700e06e9b504c9767d434e",
415
  "dimensions": {
416
  "format": "SVG",
417
  "width": 2400,
@@ -428,8 +428,8 @@
428
  "source_script": "scripts/build_unified_task_model_radar.py",
429
  "surface": "website unified task section, README, HF mirrors",
430
  "exists": true,
431
- "bytes": 35229,
432
- "sha256": "eae52facf93c6c674a82178e4ec1592f9d77fd5d5fcf8b11845deff7dbca3f6c",
433
  "dimensions": {
434
  "format": "SVG",
435
  "width": 2400,
@@ -442,12 +442,12 @@
442
  "id": "episode128_task_model_radar",
443
  "title": "128-episode 20-task model radar",
444
  "path": "docs/assets/charts/episode128_task_model_radar.svg",
445
- "role": "Twenty-axis split radar for selected 128-episode methods: raw-feature simple/NN as complete scored polygons and metadata/Qwen/Cosmos as task-aligned overlays.",
446
  "source_script": "scripts/build_unified_task_model_radar.py",
447
  "surface": "website unified task section, README, HF mirrors",
448
  "exists": true,
449
- "bytes": 48263,
450
- "sha256": "9538bfb512f16bbd280151923adf8a23377bfaed2a8be5961a25eaf0a11d1404",
451
  "dimensions": {
452
  "format": "SVG",
453
  "width": 2400,
 
1
  {
2
  "title": "Ropedia Xperience-10M Figure Index",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-21T10:52:12+00:00",
5
  "scope": "Public figures, diagrams, charts, and derived modality thumbnails. Raw Xperience-10M videos, annotations, RRD files, and Qwen weights are excluded.",
6
  "figure_count": 29,
7
  "figures": [
 
64
  "source_script": "scripts/render_task_suite_infographic.py",
65
  "surface": "README, website, HF Space, artifact dataset, model card",
66
  "exists": true,
67
+ "bytes": 1903454,
68
+ "sha256": "6667eb856cf61ada9f868807b5d5c6ccde06e4f791b2f9dd567d98b71b307415",
69
  "dimensions": {
70
  "format": "PNG",
71
  "width": 1800,
72
+ "height": 7600
73
  },
74
  "source_script_exists": true
75
  },
 
81
  "source_script": "scripts/generate_visualizations.py",
82
  "surface": "README, website, HF artifact dataset",
83
  "exists": true,
84
+ "bytes": 711222,
85
+ "sha256": "4db6a6353d3f1e49bae12447e1a78a874aa780d60e9817f3052ac0d0acf2f7b2",
86
  "dimensions": {
87
  "format": "PNG",
88
  "width": 1800,
 
149
  "source_script": "scripts/render_foundation_pipeline_diagrams.py",
150
  "surface": "README, website, HF Space, artifact dataset, model card",
151
  "exists": true,
152
+ "bytes": 1853350,
153
+ "sha256": "e8d863cc5104602e464048b4bf48f9acf3a108495298d9ec15b2e9cf346f41f9",
154
  "dimensions": {
155
  "format": "PNG",
156
  "width": 2560,
 
166
  "source_script": "scripts/render_overview_figures.py",
167
  "surface": "README, website, HF artifact dataset, model card",
168
  "exists": true,
169
+ "bytes": 757827,
170
+ "sha256": "d83b75a6778033a716f1086dbe61298662d4b8f80cb8f52193d2cbdb1e8e31f7",
171
  "dimensions": {
172
  "format": "PNG",
173
  "width": 1800,
 
356
  "source_script": "scripts/generate_visualizations.py",
357
  "surface": "website directions",
358
  "exists": true,
359
+ "bytes": 5352,
360
+ "sha256": "506e12aa1b6c4fd50fb0c65714c7f0a92c02c40069cb879503471ba9b63d4afb",
361
  "dimensions": {
362
  "format": "SVG",
363
  "width": 1180,
 
410
  "source_script": "scripts/build_unified_task_model_radar.py",
411
  "surface": "website unified task section, README, HF mirrors",
412
  "exists": true,
413
+ "bytes": 57938,
414
+ "sha256": "bb83b80b47fe679ebdce2c99378a4548120f1c8cc2d725b88e409d8c386dcbf8",
415
  "dimensions": {
416
  "format": "SVG",
417
  "width": 2400,
 
428
  "source_script": "scripts/build_unified_task_model_radar.py",
429
  "surface": "website unified task section, README, HF mirrors",
430
  "exists": true,
431
+ "bytes": 35232,
432
+ "sha256": "87b52a7dead40358f1778dda43ade4d2e875ac98e507e01ca007084363e5977e",
433
  "dimensions": {
434
  "format": "SVG",
435
  "width": 2400,
 
442
  "id": "episode128_task_model_radar",
443
  "title": "128-episode 20-task model radar",
444
  "path": "docs/assets/charts/episode128_task_model_radar.svg",
445
+ "role": "Twenty-axis split radar for selected 128-episode methods: raw-feature simple/NN as complete scored polygons plus metadata, Qwen3-Omni, Cosmos3-Super, and Cosmos3-Nano task-aligned overlays.",
446
  "source_script": "scripts/build_unified_task_model_radar.py",
447
  "surface": "website unified task section, README, HF mirrors",
448
  "exists": true,
449
+ "bytes": 51915,
450
+ "sha256": "047ea4b05a04f6734e2afcf792863559dc8f3091eae88a97ff90e8b038a423f4",
451
  "dimensions": {
452
  "format": "SVG",
453
  "width": 2400,
docs/data/foundation_model_plan.json CHANGED
@@ -230,7 +230,7 @@
230
  },
231
  {
232
  "step": 4,
233
- "name": "World-model branch",
234
  "action": "Promote Cosmos 3 beyond the current Nano compatibility and Super forward-dynamics runs only when loss metrics, preprocessing, and storage justify the added compute."
235
  },
236
  {
 
230
  },
231
  {
232
  "step": 4,
233
+ "name": "World-model track",
234
  "action": "Promote Cosmos 3 beyond the current Nano compatibility and Super forward-dynamics runs only when loss metrics, preprocessing, and storage justify the added compute."
235
  },
236
  {
docs/data/live_publication_status.json CHANGED
@@ -2,7 +2,7 @@
2
  "title": "Ropedia Xperience-10M Live Publication Status",
3
  "status": "pass",
4
  "checked_at_utc": "2026-06-20T21:56:07+00:00",
5
- "scope": "Live GitHub Pages, GitHub raw, Hugging Face Space, artifact dataset, baseline model mirrors, and the Qwen3/Cosmos3 LoRA adapter repos when their upload packages exist locally.",
6
  "hash_groups": [
7
  {
8
  "id": "task_suite_infographic",
 
2
  "title": "Ropedia Xperience-10M Live Publication Status",
3
  "status": "pass",
4
  "checked_at_utc": "2026-06-20T21:56:07+00:00",
5
+ "scope": "Live GitHub Pages, GitHub raw, Hugging Face Space, artifact dataset, baseline model mirrors, and the Qwen3-Omni/Cosmos3 LoRA adapter repos when their upload packages exist locally.",
6
  "hash_groups": [
7
  {
8
  "id": "task_suite_infographic",
docs/data/mirror_parity.json CHANGED
The diff for this file is too large to render. See raw diff
 
docs/data/omni_finetune_verified_result.json CHANGED
@@ -91,6 +91,6 @@
91
  "Use results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before deciding whether v6 should become a formal release tag.",
92
  "Use the v6 predictions for action/contact error analysis, and compare v5 for subtask, next-action, and object regressions.",
93
  "Keep full-parameter Qwen runs as feasibility gates until there is a storage plan for checkpoints or mergeable full-weight deltas.",
94
- "Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model branch: it updates adapter weights over camera-pose proxy future-vision-velocity targets, not Qwen-style JSON action labels."
95
  ]
96
  }
 
91
  "Use results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before deciding whether v6 should become a formal release tag.",
92
  "Use the v6 predictions for action/contact error analysis, and compare v5 for subtask, next-action, and object regressions.",
93
  "Keep full-parameter Qwen runs as feasibility gates until there is a storage plan for checkpoints or mergeable full-weight deltas.",
94
+ "Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model artifact: it updates adapter weights over camera-pose proxy future-vision-velocity targets, not Qwen-style JSON action labels."
95
  ]
96
  }
docs/data/omni_model_comparison.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
3
- "generated_at_utc": "2026-06-20T21:27:21+00:00",
4
  "status": "pass",
5
  "version_count": 3,
6
  "model_group_count": 5,
@@ -8,7 +8,7 @@
8
  "version_reading_notes": [
9
  "Version 1 is the public-sample 20-task surface: original core heads, tasks 13-20, and the 180-row method-task matrix.",
10
  "Version 2 is the selected 128-episode same-split simple/NN baseline alignment.",
11
- "Version 3 is the verified model-branch layer: the current final Qwen3-Omni LoRA package is the JSON-task diagnostic result, Cosmos3-Nano is a future-window compatibility result, Cosmos3-Super Reasoner is a base-weight JSON-task evaluation, and Cosmos3-Super Forward-Dynamics LoRA is the first Super fine-tuned adapter branch."
12
  ],
13
  "versions": [
14
  {
@@ -305,7 +305,7 @@
305
  "neural_primary_score": null
306
  }
307
  ],
308
- "interpretation": "This layer aligns the previous simple and neural baseline framing to the same selected 96/16/16 split used by the model branches. It uses public-safe JSONL metadata/text features, so raw-feature-only tasks remain explicitly unsupported until 128-run sensor feature blocks exist."
309
  },
310
  {
311
  "id": "v3_multi_episode_foundation_model_branches",
@@ -870,7 +870,7 @@
870
  "neural_supported_task_count": 6
871
  },
872
  "weights": "metadata/text baseline artifacts; raw 128 sensor-feature model weights not yet complete",
873
- "interpretation": "Same selected 96/16/16 split and task ids as the model branches, but metadata/text features only."
874
  }
875
  ],
876
  "comparison_note": "This is the cleanest 1-episode versus 128-episode grouping for the same simple/NN task-head family, but the feature surface changes from raw public-sample features to public-safe 128-episode metadata/text features."
@@ -1683,7 +1683,7 @@
1683
  "weights_repository": "none for this run: staged base nv-community/Cosmos3-Super weights were evaluated through vLLM; create a separate repo only after new adapter or fine-tuned weights exist"
1684
  }
1685
  ],
1686
- "comparison_note": "Cosmos3-Super is now represented by a verified 448-window held-out Reasoner evaluation on the same JSON task as Qwen3. It uses staged base weights through vLLM, so it is a model-branch diagnostic, not a weight release. A camera-pose proxy forward-dynamics target export now passes the contract audit and schema-only packer smoke; the separate Forward-Dynamics LoRA group records the trainable adapter run and loss-based held-out evaluation."
1687
  },
1688
  {
1689
  "id": "cosmos3_super_forward_dynamics",
 
1
  {
2
  "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
3
+ "generated_at_utc": "2026-06-21T10:47:04+00:00",
4
  "status": "pass",
5
  "version_count": 3,
6
  "model_group_count": 5,
 
8
  "version_reading_notes": [
9
  "Version 1 is the public-sample 20-task surface: original core heads, tasks 13-20, and the 180-row method-task matrix.",
10
  "Version 2 is the selected 128-episode same-split simple/NN baseline alignment.",
11
+ "The selected-128 model-diagnostic group contains the current Qwen3-Omni LoRA JSON-task row, Cosmos3-Nano future-window compatibility result, Cosmos3-Super Reasoner base-weight JSON-task evaluation, and the separate Cosmos3-Super Forward-Dynamics LoRA adapter artifact."
12
  ],
13
  "versions": [
14
  {
 
305
  "neural_primary_score": null
306
  }
307
  ],
308
+ "interpretation": "This layer aligns the previous simple and neural baseline framing to the same selected 96/16/16 split used by the Qwen3-Omni and Cosmos3 diagnostics. It uses public-safe JSONL metadata/text features, so raw-feature-only tasks remain explicitly unsupported until 128-run sensor feature blocks exist."
309
  },
310
  {
311
  "id": "v3_multi_episode_foundation_model_branches",
 
870
  "neural_supported_task_count": 6
871
  },
872
  "weights": "metadata/text baseline artifacts; raw 128 sensor-feature model weights not yet complete",
873
+ "interpretation": "Same selected 96/16/16 split and task ids as the Qwen3-Omni and Cosmos3 diagnostics, but metadata/text features only."
874
  }
875
  ],
876
  "comparison_note": "This is the cleanest 1-episode versus 128-episode grouping for the same simple/NN task-head family, but the feature surface changes from raw public-sample features to public-safe 128-episode metadata/text features."
 
1683
  "weights_repository": "none for this run: staged base nv-community/Cosmos3-Super weights were evaluated through vLLM; create a separate repo only after new adapter or fine-tuned weights exist"
1684
  }
1685
  ],
1686
+ "comparison_note": "Cosmos3-Super is now represented by a verified 448-window held-out Reasoner evaluation on the same JSON task as Qwen3. It uses staged base weights through vLLM, so it is a Cosmos3 diagnostic, not a weight release. A camera-pose proxy forward-dynamics target export now passes the contract audit and schema-only packer smoke; the separate Forward-Dynamics LoRA group records the trainable adapter run and loss-based held-out evaluation."
1687
  },
1688
  {
1689
  "id": "cosmos3_super_forward_dynamics",
docs/data/project_status.json CHANGED
@@ -2,7 +2,7 @@
2
  "title": "Ropedia Xperience-10M Task Suite Project Status",
3
  "version": "2026-06-20",
4
  "decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v6_cosmos_comparison",
5
- "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3 branch packages as early cross-episode diagnostics, and now records a no-new-episode enhancement pack for pushing the current 128-episode suite harder.",
6
  "scope_boundary": {
7
  "validated_episode_count": 1,
8
  "aligned_frames": 5821,
@@ -145,7 +145,7 @@
145
  "RESEARCH_ROADMAP.md",
146
  "docs/data/research_roadmap.json"
147
  ],
148
- "readout": "The roadmap connects public-sample task development to the final verified Qwen3-Omni diagnostic result, same-split baseline alignment, the no-new-episode 128-suite enhancement pack, action/subtask error analysis, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
149
  },
150
  {
151
  "area": "128-episode task-suite enhancement pack",
@@ -156,7 +156,7 @@
156
  "results/omni_finetune/task_suite_enhancement_128_v1_20260608/enhancement_plan.json",
157
  "scripts/omni/build_task_suite_enhancement_128.py"
158
  ],
159
- "readout": "The current 3,808-window selected split can be stressed without more episodes by exporting denser and multiscale windows. The recommended next export is multiscale_20s10_40s20_80s40, estimated at 106,095 windows from observed frame spans; the pack also defines hierarchical action/subtask targets, raw-feature shard priorities for unsupported tasks, and Qwen/Cosmos follow-up run cards."
160
  },
161
  {
162
  "area": "Foundation-model plan",
@@ -176,7 +176,7 @@
176
  "scripts/omni/backbone_registry.py",
177
  "scripts/omni/smoke_test_backbone_packaging.py"
178
  ],
179
- "readout": "Future Qwen, Cosmos-style, and VLA/policy branches must keep the same episode split discipline, held-out metrics, validation gate, public-safe package contract, and explicit forbidden-artifact policy before reporting results."
180
  },
181
  {
182
  "area": "Xperience Embodied Foundation Model",
@@ -253,7 +253,7 @@
253
  "results/omni_finetune/OMNI_MODEL_COMPARISON.md",
254
  "scripts/omni/build_omni_model_comparison.py"
255
  ],
256
- "readout": "The public comparison now has two views: the three result layers and a model-family grouping. The model grouping pairs 1-episode and 128-episode entries for task-head baselines, separates Qwen3-Omni sensor-adapter smoke from 128-episode LoRA diagnostics, separates Cosmos3-Nano future-window compatibility from Cosmos3-Super base-weight Reasoner evaluation, and adds Cosmos3-Super Forward-Dynamics LoRA as a loss-based fine-tuned adapter branch."
257
  },
258
  {
259
  "area": "Qwen3-Omni fine-tuning",
@@ -271,7 +271,7 @@
271
  "readout": "The selected 96/16/16 episode split now has a current v6 rank64/lr5e-5 public-safe held-out package with 34,269 exported windows, 4,032 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.90%, meeting the 98% target; transition accuracy is 98.98%, contact accuracy is 81.77%, object micro-F1 is 30.65%, next-action accuracy is 4.31%, and action/subtask metrics remain weak. v6 improves action macro-F1 and contact accuracy versus v5, but v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics."
272
  },
273
  {
274
- "area": "Cosmos3-Nano future-window branch",
275
  "status": "verified_compatibility_result",
276
  "evidence": [
277
  "configs/omni_backbones/cosmos_world_model.json",
@@ -279,10 +279,10 @@
279
  "scripts/omni/eval_cosmos3_future_window_retrieval.py",
280
  "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json"
281
  ],
282
- "readout": "The Cosmos3-Nano branch now has a public-safe verified future-window compatibility package with 3,213 future-window samples, 378 held-out test predictions, future retrieval MRR 0.0221, temporal consistency 0.0952, transition accuracy 0.9683, and contact accuracy 0.7434. It is a compatibility adapter result, not a full Cosmos diffusion-weight fine-tune."
283
  },
284
  {
285
- "area": "Cosmos3-Super Reasoner branch",
286
  "status": "verified_base_weight_result",
287
  "evidence": [
288
  "configs/omni_backbones/cosmos3_super_reasoner.json",
@@ -314,7 +314,7 @@
314
  "results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/verified_result_summary.json",
315
  "results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/package_audit.json"
316
  ],
317
- "readout": "The first fine-tuned Cosmos3-Super adapter branch is verified as a public-safe package: 8-GPU FSDP LoRA, 26.2M adapter parameters, 2,848 train rows, 512 validation rows, 448 held-out test rows, validation MSE 4.0082, and test MSE 3.6853. The package excludes adapter safetensors; weights are published separately at cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep."
318
  },
319
  {
320
  "area": "Raw Xperience-10M redistribution",
@@ -331,8 +331,8 @@
331
  "Open docs/data/project_packet.json for the machine-readable project path.",
332
  "Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
333
  "Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
334
- "Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone branch.",
335
- "Inspect OMNI_MODEL_EXTENSION_CONTRACT.md and run python scripts/omni/backbone_registry.py --validate --json before adding a new Qwen, Cosmos-style, or VLA/policy branch.",
336
  "Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
337
  "Inspect TASK_SUITE_20.md, docs/data/task_suite_20.json, docs/data/summary_metrics.json, and results/episode_task_suite/neural_mlp/ to check the unified 20-task outputs.",
338
  "Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
@@ -346,16 +346,16 @@
346
  "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
347
  ],
348
  "current_reading_notes": [
349
- "The latest Qwen3-Omni v6 diagnostic branch is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak: JSON validity is 99.90%, action macro-F1 is 0.0029, and subtask accuracy is 0.0037. v5 remains the pinned prior release row because it is still stronger on several metrics.",
350
  "Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
351
- "Use docs/data/omni_model_comparison.json to compare both views: the single-episode/128-baseline/model-branch result layers and the model-family grouping for task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.",
352
  "The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
353
- "The Cosmos3-Nano future-window branch is verified as a compatibility adapter result, Cosmos3-Super Reasoner is verified as a base-weight evaluation, and Cosmos3-Super Forward-Dynamics LoRA is verified as the first fine-tuned Super adapter branch. Cosmos3-Super adapter weights belong in cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep; verified_public packages exclude safetensors.",
354
  "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
355
  "Audio is one of the synchronized source modalities in the current task representation.",
356
  "The audio ablation report compares audio/no-audio variants across the original task contracts in results/audio_ablation/.",
357
- "Foundation-model selection is explicit: Qwen3-Omni is the structured JSON baseline, Cosmos 3 is the world-model branch with Nano compatibility and Super forward-dynamics LoRA results, and policy models such as OpenVLA/openpi/GR00T wait for robot-compatible action-target conversion.",
358
- "Future model branches should be added through the backbone registry and verified package contract, not as one-off result folders with incompatible metrics or publication rules.",
359
  "The Xperience Embodied Foundation Model is a future native-pretraining goal, not a completed model or current benchmark."
360
  ]
361
  }
 
2
  "title": "Ropedia Xperience-10M Task Suite Project Status",
3
  "version": "2026-06-20",
4
  "decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v6_cosmos_comparison",
5
+ "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3 packages as early cross-episode diagnostics, and now records a no-new-episode enhancement pack for pushing the current 128-episode suite harder.",
6
  "scope_boundary": {
7
  "validated_episode_count": 1,
8
  "aligned_frames": 5821,
 
145
  "RESEARCH_ROADMAP.md",
146
  "docs/data/research_roadmap.json"
147
  ],
148
+ "readout": "The roadmap connects public-sample task development to the final verified Qwen3-Omni diagnostic result, same-split baseline alignment, the no-new-episode 128-suite enhancement pack, action/subtask error analysis, robustness runs, world/policy tracks, and the future Xperience-native pretraining goal."
149
  },
150
  {
151
  "area": "128-episode task-suite enhancement pack",
 
156
  "results/omni_finetune/task_suite_enhancement_128_v1_20260608/enhancement_plan.json",
157
  "scripts/omni/build_task_suite_enhancement_128.py"
158
  ],
159
+ "readout": "The current 3,808-window selected split can be stressed without more episodes by exporting denser and multiscale windows. The recommended next export is multiscale_20s10_40s20_80s40, estimated at 106,095 windows from observed frame spans; the pack also defines hierarchical action/subtask targets, raw-feature shard priorities for unsupported tasks, and Qwen3-Omni/Cosmos3 follow-up run cards."
160
  },
161
  {
162
  "area": "Foundation-model plan",
 
176
  "scripts/omni/backbone_registry.py",
177
  "scripts/omni/smoke_test_backbone_packaging.py"
178
  ],
179
+ "readout": "Future Qwen3-Omni, Cosmos3-style, and VLA/policy tracks must keep the same episode split discipline, held-out metrics, validation gate, public-safe package contract, and explicit forbidden-artifact policy before reporting results."
180
  },
181
  {
182
  "area": "Xperience Embodied Foundation Model",
 
253
  "results/omni_finetune/OMNI_MODEL_COMPARISON.md",
254
  "scripts/omni/build_omni_model_comparison.py"
255
  ],
256
+ "readout": "The public comparison now has two evidence lines plus a model-family grouping. The model grouping pairs 1-episode and 128-episode entries for task-head baselines, separates Qwen3-Omni sensor-adapter smoke from 128-episode LoRA diagnostics, separates Cosmos3-Nano future-window compatibility from Cosmos3-Super base-weight Reasoner evaluation, and adds Cosmos3-Super Forward-Dynamics LoRA as a loss-based fine-tuned adapter artifact."
257
  },
258
  {
259
  "area": "Qwen3-Omni fine-tuning",
 
271
  "readout": "The selected 96/16/16 episode split now has a current v6 rank64/lr5e-5 public-safe held-out package with 34,269 exported windows, 4,032 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.90%, meeting the 98% target; transition accuracy is 98.98%, contact accuracy is 81.77%, object micro-F1 is 30.65%, next-action accuracy is 4.31%, and action/subtask metrics remain weak. v6 improves action macro-F1 and contact accuracy versus v5, but v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics."
272
  },
273
  {
274
+ "area": "Cosmos3-Nano future-window package",
275
  "status": "verified_compatibility_result",
276
  "evidence": [
277
  "configs/omni_backbones/cosmos_world_model.json",
 
279
  "scripts/omni/eval_cosmos3_future_window_retrieval.py",
280
  "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json"
281
  ],
282
+ "readout": "The Cosmos3-Nano package now has a public-safe verified future-window compatibility result with 3,213 future-window samples, 378 held-out test predictions, future retrieval MRR 0.0221, temporal consistency 0.0952, transition accuracy 0.9683, and contact accuracy 0.7434. It is a compatibility adapter result, not a full Cosmos diffusion-weight fine-tune."
283
  },
284
  {
285
+ "area": "Cosmos3-Super Reasoner package",
286
  "status": "verified_base_weight_result",
287
  "evidence": [
288
  "configs/omni_backbones/cosmos3_super_reasoner.json",
 
314
  "results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/verified_result_summary.json",
315
  "results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/package_audit.json"
316
  ],
317
+ "readout": "The first fine-tuned Cosmos3-Super adapter artifact is verified as a public-safe package: 8-GPU FSDP LoRA, 26.2M adapter parameters, 2,848 train rows, 512 validation rows, 448 held-out test rows, validation MSE 4.0082, and test MSE 3.6853. The package excludes adapter safetensors; weights are published separately at cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep."
318
  },
319
  {
320
  "area": "Raw Xperience-10M redistribution",
 
331
  "Open docs/data/project_packet.json for the machine-readable project path.",
332
  "Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
333
  "Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
334
+ "Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone track.",
335
+ "Inspect OMNI_MODEL_EXTENSION_CONTRACT.md and run python scripts/omni/backbone_registry.py --validate --json before adding a new Qwen3-Omni, Cosmos3-style, or VLA/policy track.",
336
  "Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
337
  "Inspect TASK_SUITE_20.md, docs/data/task_suite_20.json, docs/data/summary_metrics.json, and results/episode_task_suite/neural_mlp/ to check the unified 20-task outputs.",
338
  "Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
 
346
  "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
347
  ],
348
  "current_reading_notes": [
349
+ "The latest Qwen3-Omni v6 diagnostic run is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak: JSON validity is 99.90%, action macro-F1 is 0.0029, and subtask accuracy is 0.0037. v5 remains the pinned prior release row because it is still stronger on several metrics.",
350
  "Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
351
+ "Use docs/data/omni_model_comparison.json to compare both views: the 1-sample evidence line, the selected-128 evidence line, and the model-family grouping for task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.",
352
  "The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
353
+ "The Cosmos3-Nano future-window package is verified as a compatibility adapter result, Cosmos3-Super Reasoner is verified as a base-weight evaluation, and Cosmos3-Super Forward-Dynamics LoRA is verified as the first fine-tuned Super adapter artifact. Cosmos3-Super adapter weights belong in cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep; verified_public packages exclude safetensors.",
354
  "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
355
  "Audio is one of the synchronized source modalities in the current task representation.",
356
  "The audio ablation report compares audio/no-audio variants across the original task contracts in results/audio_ablation/.",
357
+ "Foundation-model selection is explicit: Qwen3-Omni is the structured JSON baseline, Cosmos 3 is the world-model track with Nano compatibility and Super forward-dynamics LoRA results, and policy models such as OpenVLA/openpi/GR00T wait for robot-compatible action-target conversion.",
358
+ "Future model tracks should be added through the backbone registry and verified package contract, not as one-off result folders with incompatible metrics or publication rules.",
359
  "The Xperience Embodied Foundation Model is a future native-pretraining goal, not a completed model or current benchmark."
360
  ]
361
  }
docs/data/public_reader_map.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Public Reader Map",
3
  "status": "published",
4
- "purpose": "Organize the GitHub repo, GitHub Pages dashboard, Hugging Face Space, artifact dataset, baseline model repo, consolidated weights/results repo, and model-branch repos without removing evidence.",
5
  "fast_paths": [
6
  {
7
  "reader_goal": "Understand the project in one pass",
@@ -92,13 +92,13 @@
92
  },
93
  {
94
  "surface": "HF weights/results repo",
95
- "responsibility": "Consolidated baseline weights, Qwen3/Cosmos adapter weights, verified results, analysis files, and file-level manifest.",
96
  "best_use": "Auditing all public-safe weight-bearing artifacts from one repo."
97
  },
98
  {
99
- "surface": "Qwen3/Cosmos model repos",
100
- "responsibility": "Adapter-specific public weights or package cards when a branch is verified and publishable.",
101
- "best_use": "Inspecting model-branch artifacts."
102
  }
103
  ],
104
  "evidence_layers": [
@@ -121,8 +121,8 @@
121
  "boundary": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
122
  },
123
  {
124
- "claim_type": "Foundation-model branch quality",
125
- "public_evidence": ["Verified Qwen3/Cosmos result packages", "model cards"],
126
  "boundary": "Numeric task scores appear only when a task-specific eval or probe exists."
127
  },
128
  {
 
1
  {
2
  "title": "Ropedia Xperience-10M Public Reader Map",
3
  "status": "published",
4
+ "purpose": "Organize the GitHub repo, GitHub Pages dashboard, Hugging Face Space, artifact dataset, baseline model repo, consolidated weights/results repo, and Qwen3-Omni/Cosmos3 repos without removing evidence.",
5
  "fast_paths": [
6
  {
7
  "reader_goal": "Understand the project in one pass",
 
92
  },
93
  {
94
  "surface": "HF weights/results repo",
95
+ "responsibility": "Consolidated baseline weights, Qwen3-Omni v6 LoRA, Cosmos3-Super adapter/result artifacts, verified results, analysis files, and file-level manifest.",
96
  "best_use": "Auditing all public-safe weight-bearing artifacts from one repo."
97
  },
98
  {
99
+ "surface": "Qwen3-Omni and Cosmos3 model repos",
100
+ "responsibility": "Adapter-specific public weights or package cards when a Qwen3-Omni or Cosmos3 run is verified and publishable.",
101
+ "best_use": "Inspecting Qwen3-Omni and Cosmos3 artifacts."
102
  }
103
  ],
104
  "evidence_layers": [
 
121
  "boundary": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
122
  },
123
  {
124
+ "claim_type": "Foundation-model track quality",
125
+ "public_evidence": ["Verified Qwen3-Omni and Cosmos3 result packages", "model cards"],
126
  "boundary": "Numeric task scores appear only when a task-specific eval or probe exists."
127
  },
128
  {
docs/data/public_surface_qa.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Public Project Surface",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-21T10:02:48+00:00",
5
  "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
6
  "checks": [
7
  {
@@ -18,7 +18,7 @@
18
  "website_integrity": {
19
  "exists": true,
20
  "status": "pass",
21
- "generated_at_utc": "2026-06-21T10:02:48+00:00"
22
  },
23
  "rendered_site_check": {
24
  "exists": true,
@@ -28,27 +28,27 @@
28
  "task_surface_integrity": {
29
  "exists": true,
30
  "status": "pass",
31
- "generated_at_utc": "2026-06-21T09:35:37+00:00"
32
  },
33
  "source_alignment": {
34
  "exists": true,
35
  "status": "pass",
36
- "generated_at_utc": "2026-06-21T09:35:37+00:00"
37
  },
38
  "scale_up_status": {
39
  "exists": true,
40
  "status": "pass",
41
- "generated_at_utc": "2026-06-21T09:35:40+00:00"
42
  },
43
  "publication_package": {
44
  "exists": true,
45
  "status": "pass",
46
- "generated_at_utc": "2026-06-21T09:35:32+00:00"
47
  },
48
  "mirror_parity": {
49
  "exists": true,
50
  "status": "pass",
51
- "generated_at_utc": "2026-06-21T09:36:59+00:00"
52
  }
53
  },
54
  "failures": {}
@@ -97,8 +97,8 @@
97
  "marker_counts": {
98
  "Ropedia Xperience-10M Task Suite": 20,
99
  "Xperience-10M": 166,
100
- "20-task": 78,
101
- "Qwen3-Omni": 191,
102
  "128-episode pilot": 1
103
  }
104
  },
 
1
  {
2
  "title": "Ropedia Xperience-10M Public Project Surface",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-21T11:08:07+00:00",
5
  "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
6
  "checks": [
7
  {
 
18
  "website_integrity": {
19
  "exists": true,
20
  "status": "pass",
21
+ "generated_at_utc": "2026-06-21T11:07:26+00:00"
22
  },
23
  "rendered_site_check": {
24
  "exists": true,
 
28
  "task_surface_integrity": {
29
  "exists": true,
30
  "status": "pass",
31
+ "generated_at_utc": "2026-06-21T11:04:16+00:00"
32
  },
33
  "source_alignment": {
34
  "exists": true,
35
  "status": "pass",
36
+ "generated_at_utc": "2026-06-21T11:04:16+00:00"
37
  },
38
  "scale_up_status": {
39
  "exists": true,
40
  "status": "pass",
41
+ "generated_at_utc": "2026-06-21T11:03:20+00:00"
42
  },
43
  "publication_package": {
44
  "exists": true,
45
  "status": "pass",
46
+ "generated_at_utc": "2026-06-21T11:07:41+00:00"
47
  },
48
  "mirror_parity": {
49
  "exists": true,
50
  "status": "pass",
51
+ "generated_at_utc": "2026-06-21T11:05:04+00:00"
52
  }
53
  },
54
  "failures": {}
 
97
  "marker_counts": {
98
  "Ropedia Xperience-10M Task Suite": 20,
99
  "Xperience-10M": 166,
100
+ "20-task": 89,
101
+ "Qwen3-Omni": 241,
102
  "128-episode pilot": 1
103
  }
104
  },
docs/data/publication_audit.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-21T10:08:29+00:00",
4
  "checks": [
5
  {
6
  "name": "required_publication_assets_present",
@@ -244,8 +244,8 @@
244
  "hf_space_bundle": {
245
  "root": "hf_publish/space",
246
  "exists": true,
247
- "file_count": 569,
248
- "text_file_count": 422,
249
  "largest_file": {
250
  "path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
251
  "bytes": 10221085
@@ -255,8 +255,8 @@
255
  "hf_artifact_bundle": {
256
  "root": "hf_publish/artifacts",
257
  "exists": true,
258
- "file_count": 3041,
259
- "text_file_count": 1279,
260
  "largest_file": {
261
  "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
262
  "bytes": 135591061
@@ -266,8 +266,8 @@
266
  "hf_model_bundle": {
267
  "root": "hf_publish/model",
268
  "exists": true,
269
- "file_count": 3523,
270
- "text_file_count": 1450,
271
  "largest_file": {
272
  "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
273
  "bytes": 135591061
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-21T11:07:41+00:00",
4
  "checks": [
5
  {
6
  "name": "required_publication_assets_present",
 
244
  "hf_space_bundle": {
245
  "root": "hf_publish/space",
246
  "exists": true,
247
+ "file_count": 572,
248
+ "text_file_count": 425,
249
  "largest_file": {
250
  "path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
251
  "bytes": 10221085
 
255
  "hf_artifact_bundle": {
256
  "root": "hf_publish/artifacts",
257
  "exists": true,
258
+ "file_count": 3049,
259
+ "text_file_count": 1283,
260
  "largest_file": {
261
  "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
262
  "bytes": 135591061
 
266
  "hf_model_bundle": {
267
  "root": "hf_publish/model",
268
  "exists": true,
269
+ "file_count": 3533,
270
+ "text_file_count": 1455,
271
  "largest_file": {
272
  "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
273
  "bytes": 135591061
docs/data/quality_gates.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Release Checks",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-21T10:25:40+00:00",
5
  "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
6
  "automated_gates": [
7
  {
 
1
  {
2
  "title": "Ropedia Xperience-10M Release Checks",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-21T11:09:13+00:00",
5
  "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
6
  "automated_gates": [
7
  {
docs/data/qwen3_omni_run_lineage.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "current_public_matrix_row": "qwen3_omni_v6_lora",
3
- "generated_at_utc": "2026-06-21T09:58:19+00:00",
4
- "interpretation_rule": "Do not confuse the Qwen run versions with the project-level public result layers. The 20-task matrix uses Qwen3-Omni v6 LoRA; v5 remains the pinned prior release; v1-v4 are lineage and ablation evidence.",
5
  "pinned_prior_release": "v5",
6
  "related_engineering_artifacts": [
7
  {
@@ -17,6 +17,7 @@
17
  ],
18
  "runs": [
19
  {
 
20
  "dataset_contract": "xperience10m_episode_json_qa_v1",
21
  "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
22
  "eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
@@ -33,6 +34,8 @@
33
  "package": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
34
  "package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
35
  "public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
 
 
36
  "role": "First verified 96/16/16 selected-episode Qwen3-Omni LoRA package; establishes dataset, training, eval, and packaging plumbing.",
37
  "status": "verified",
38
  "title": "Selected-128 validation-aware LoRA baseline",
@@ -40,6 +43,7 @@
40
  "version": "v1"
41
  },
42
  {
 
43
  "dataset_contract": "xperience10m_episode_json_qa_v1",
44
  "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
45
  "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
@@ -56,6 +60,8 @@
56
  "package": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
57
  "package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
58
  "public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
 
 
59
  "role": "Reuses the selected-128 split with a stricter structured JSON answer contract and full 8-GPU LoRA training.",
60
  "status": "verified",
61
  "title": "Structured-JSON reuse full-8-GPU LoRA",
@@ -63,6 +69,7 @@
63
  "version": "v2"
64
  },
65
  {
 
66
  "dataset_contract": "xperience10m_episode_json_qa_v1",
67
  "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
68
  "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
@@ -79,6 +86,8 @@
79
  "package": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
80
  "package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
81
  "public_matrix_role": "superseded prompt/eval lineage evidence",
 
 
82
  "role": "Strict-label prompt/eval pass over the v2 adapter; improves JSON validity without introducing a new adapter training run.",
83
  "status": "verified",
84
  "title": "Strict-label prompt evaluation",
@@ -86,6 +95,7 @@
86
  "version": "v3"
87
  },
88
  {
 
89
  "dataset_contract": "xperience10m_episode_json_qa_v1",
90
  "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
91
  "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
@@ -102,6 +112,8 @@
102
  "package": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
103
  "package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
104
  "public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
 
 
105
  "role": "Four-epoch full-8-GPU LoRA run on the same selected split; useful for overfit/metric tradeoff analysis.",
106
  "status": "verified",
107
  "title": "Four-epoch structured-JSON LoRA",
@@ -109,6 +121,7 @@
109
  "version": "v4"
110
  },
111
  {
 
112
  "dataset_contract": "xperience10m_episode_json_qa_v1",
113
  "dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
114
  "eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
@@ -125,6 +138,8 @@
125
  "package": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
126
  "package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
127
  "public_matrix_role": "pinned prior release row and comparison baseline",
 
 
128
  "role": "Dense/multiscale selected-128 run with 4,032 held-out predictions; kept as the pinned prior release because several metrics remain stronger than v6.",
129
  "status": "verified",
130
  "title": "Multiscale cap96 LoRA",
@@ -132,6 +147,7 @@
132
  "version": "v5"
133
  },
134
  {
 
135
  "dataset_contract": "xperience10m_episode_json_qa_v1",
136
  "dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
137
  "eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
@@ -148,6 +164,8 @@
148
  "package": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
149
  "package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
150
  "public_matrix_role": "current public 20-task Qwen3-Omni v6 LoRA row",
 
 
151
  "role": "Current verified Qwen3-Omni row: rank64/lr5e-5 multiscale LoRA plus task-specific probe artifacts used for the 20/20 Qwen matrix coverage.",
152
  "status": "verified",
153
  "title": "Rank64 lr5e-5 multiscale LoRA",
 
1
  {
2
  "current_public_matrix_row": "qwen3_omni_v6_lora",
3
+ "generated_at_utc": "2026-06-21T10:54:46+00:00",
4
+ "interpretation_rule": "Do not confuse the Qwen run versions with the project evidence lines. The project evidence lines are one public sample episode and selected 128-episode artifacts. Qwen v1-v6 are only the Qwen3-Omni run lineage inside the selected-128 line. The 20-task matrix uses Qwen3-Omni v6 LoRA; v5 remains the pinned prior release; v1-v4 are lineage and ablation evidence.",
5
  "pinned_prior_release": "v5",
6
  "related_engineering_artifacts": [
7
  {
 
17
  ],
18
  "runs": [
19
  {
20
+ "change_from_previous": "First verified Qwen3-Omni selected-128 LoRA run.",
21
  "dataset_contract": "xperience10m_episode_json_qa_v1",
22
  "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
23
  "eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
 
34
  "package": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
35
  "package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
36
  "public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
37
+ "purpose": "Prove that the selected-128 split, LoRA training, held-out eval, validation, and public packaging loop works end to end.",
38
+ "reader_use": "Use only as lineage evidence for the first working pipeline.",
39
  "role": "First verified 96/16/16 selected-episode Qwen3-Omni LoRA package; establishes dataset, training, eval, and packaging plumbing.",
40
  "status": "verified",
41
  "title": "Selected-128 validation-aware LoRA baseline",
 
43
  "version": "v1"
44
  },
45
  {
46
+ "change_from_previous": "Reused the selected-128 split with a stricter structured-JSON answer contract and full 8-GPU LoRA training.",
47
  "dataset_contract": "xperience10m_episode_json_qa_v1",
48
  "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
49
  "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
 
60
  "package": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
61
  "package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
62
  "public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
63
+ "purpose": "Make the answer format schema-checked and reduce invalid JSON before expanding scale.",
64
+ "reader_use": "Use as evidence that schema-constrained evaluation improved validity and contact accuracy over v1.",
65
  "role": "Reuses the selected-128 split with a stricter structured JSON answer contract and full 8-GPU LoRA training.",
66
  "status": "verified",
67
  "title": "Structured-JSON reuse full-8-GPU LoRA",
 
69
  "version": "v2"
70
  },
71
  {
72
+ "change_from_previous": "Evaluated the v2 adapter with stricter labels and prompts; no new adapter training.",
73
  "dataset_contract": "xperience10m_episode_json_qa_v1",
74
  "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
75
  "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
 
86
  "package": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
87
  "package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
88
  "public_matrix_role": "superseded prompt/eval lineage evidence",
89
+ "purpose": "Separate prompt/eval formatting effects from adapter-training effects.",
90
+ "reader_use": "Use as prompt/eval ablation evidence, not as a separate trained model.",
91
  "role": "Strict-label prompt/eval pass over the v2 adapter; improves JSON validity without introducing a new adapter training run.",
92
  "status": "verified",
93
  "title": "Strict-label prompt evaluation",
 
95
  "version": "v3"
96
  },
97
  {
98
+ "change_from_previous": "Trained a new four-epoch full-8-GPU LoRA adapter on the structured-JSON setup.",
99
  "dataset_contract": "xperience10m_episode_json_qa_v1",
100
  "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
101
  "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
 
112
  "package": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
113
  "package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
114
  "public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
115
+ "purpose": "Test whether longer structured-JSON LoRA training improves the same selected split.",
116
+ "reader_use": "Use as overfit and metric-tradeoff evidence before the multiscale export.",
117
  "role": "Four-epoch full-8-GPU LoRA run on the same selected split; useful for overfit/metric tradeoff analysis.",
118
  "status": "verified",
119
  "title": "Four-epoch structured-JSON LoRA",
 
121
  "version": "v4"
122
  },
123
  {
124
+ "change_from_previous": "Introduced the multiscale cap96 export and larger held-out evaluation surface.",
125
  "dataset_contract": "xperience10m_episode_json_qa_v1",
126
  "dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
127
  "eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
 
138
  "package": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
139
  "package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
140
  "public_matrix_role": "pinned prior release row and comparison baseline",
141
+ "purpose": "Move from the 448-sample compact eval to a denser multiscale 4,032-sample held-out eval.",
142
+ "reader_use": "Use as the pinned prior release; it remains stronger on JSON validity, subtask, next-action, object, and transition metrics.",
143
  "role": "Dense/multiscale selected-128 run with 4,032 held-out predictions; kept as the pinned prior release because several metrics remain stronger than v6.",
144
  "status": "verified",
145
  "title": "Multiscale cap96 LoRA",
 
147
  "version": "v5"
148
  },
149
  {
150
+ "change_from_previous": "Kept the multiscale setup, changed LoRA rank/lr to rank64/lr5e-5, and added verified task-specific probes for full 20-task coverage.",
151
  "dataset_contract": "xperience10m_episode_json_qa_v1",
152
  "dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
153
  "eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
 
164
  "package": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
165
  "package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
166
  "public_matrix_role": "current public 20-task Qwen3-Omni v6 LoRA row",
167
+ "purpose": "Promote the current public Qwen3-Omni 20-task row with multiscale LoRA plus task-specific probes.",
168
+ "reader_use": "Use as the current public 20-task Qwen row; it improves action macro-F1 and contact accuracy while v5 remains the prior comparator.",
169
  "role": "Current verified Qwen3-Omni row: rank64/lr5e-5 multiscale LoRA plus task-specific probe artifacts used for the 20/20 Qwen matrix coverage.",
170
  "status": "verified",
171
  "title": "Rank64 lr5e-5 multiscale LoRA",
docs/data/research_roadmap.json CHANGED
@@ -151,7 +151,7 @@
151
  "entry_condition": "The selected episodes are prepared or a 3-8 episode dry run is available for preprocessing checks.",
152
  "deliverables": [
153
  "backbone registry",
154
- "Cosmos 3 world-model branch plan",
155
  "Cosmos3-Super Forward-Dynamics LoRA verified package",
156
  "Qwen3-Omni LoRA baseline plan",
157
  "OpenVLA/openpi/GR00T policy-branch candidates",
@@ -162,7 +162,7 @@
162
  "docs/data/foundation_model_plan.json",
163
  "research_roadmap_interactive.json"
164
  ],
165
- "reader_takeaway": "Qwen3-Omni remains the structured JSON held-out pilot; Cosmos 3 is the first world-model branch. Cosmos3-Super now has a verified forward-dynamics LoRA over camera-pose proxy targets, while VLA/policy models wait for robot-compatible action targets."
166
  },
167
  {
168
  "id": "robustness_run_64_128_episode",
@@ -202,7 +202,7 @@
202
  "qualitative inspection",
203
  "updated model cards"
204
  ],
205
- "reader_takeaway": "The Cosmos branch now includes Nano future-window compatibility and Super forward-dynamics LoRA; the long-term direction remains richer multimodal representation learning with model branches chosen by task fit rather than by a single default backbone."
206
  },
207
  {
208
  "id": "xperience_embodied_foundation_pretraining",
 
151
  "entry_condition": "The selected episodes are prepared or a 3-8 episode dry run is available for preprocessing checks.",
152
  "deliverables": [
153
  "backbone registry",
154
+ "Cosmos 3 world-model track plan",
155
  "Cosmos3-Super Forward-Dynamics LoRA verified package",
156
  "Qwen3-Omni LoRA baseline plan",
157
  "OpenVLA/openpi/GR00T policy-branch candidates",
 
162
  "docs/data/foundation_model_plan.json",
163
  "research_roadmap_interactive.json"
164
  ],
165
+ "reader_takeaway": "Qwen3-Omni remains the structured JSON held-out pilot; Cosmos 3 is the first world-model track. Cosmos3-Super now has a verified forward-dynamics LoRA over camera-pose proxy targets, while VLA/policy models wait for robot-compatible action targets."
166
  },
167
  {
168
  "id": "robustness_run_64_128_episode",
 
202
  "qualitative inspection",
203
  "updated model cards"
204
  ],
205
+ "reader_takeaway": "The Cosmos3 track now includes Nano future-window compatibility and Super forward-dynamics LoRA; the long-term direction remains richer multimodal representation learning with model tracks chosen by task fit rather than by a single default backbone."
206
  },
207
  {
208
  "id": "xperience_embodied_foundation_pretraining",
docs/data/research_roadmap_interactive.json CHANGED
@@ -132,23 +132,23 @@
132
  "public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
133
  "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
134
  "status": "planned_research_directions",
135
- "summary": "Concrete Xperience-10M project directions beyond the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model branch, and long-term native pretraining goal.",
136
  "title": "Additional Development Directions"
137
  },
138
  "baseline_summary": {
139
  "baseline_heads": "minimal and neural MLP heads",
140
  "current_use": "task design, data-contract validation, case studies, and baseline comparison",
141
  "split": "chronological single-episode split for public-sample diagnostics",
142
- "task_count": 12
143
  },
144
  "directions": [
145
  {
146
  "code": "A",
147
  "counts": {
148
  "diagnostic": 0,
149
- "direct": 2,
150
- "proxy": 2,
151
- "total_links": 4
152
  },
153
  "current_readout": "The sample supports hand trajectory forecasting and contact/object probes, but it does not yet include a full body/shape model or multi-person priors.",
154
  "current_status": "partially implemented",
@@ -174,7 +174,9 @@
174
  "timeline_action",
175
  "hand_trajectory_forecast",
176
  "contact_prediction",
177
- "object_relevance"
 
 
178
  ],
179
  "tasks": [
180
  {
@@ -429,6 +431,84 @@
429
  "process_short": "object vocabulary -> multi-hot labels -> sigmoid heads",
430
  "research_name": "Object-Centric Interaction Recognition",
431
  "why": "Connects egocentric activity to manipulated objects and early object-centric state."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  }
433
  ]
434
  },
@@ -436,9 +516,9 @@
436
  "code": "B",
437
  "counts": {
438
  "diagnostic": 1,
439
- "direct": 0,
440
- "proxy": 2,
441
- "total_links": 3
442
  },
443
  "current_readout": "The current suite checks cross-modal alignment and depth/video reconstruction proxies; it does not yet train a renderer or reconstruct geometry.",
444
  "current_status": "proxy tasks only",
@@ -463,7 +543,9 @@
463
  "task_ids": [
464
  "cross_modal_retrieval",
465
  "modality_reconstruction",
466
- "misalignment_detection"
 
 
467
  ],
468
  "tasks": [
469
  {
@@ -634,18 +716,96 @@
634
  "process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
635
  "research_name": "Cross-Modal Misalignment Detection",
636
  "why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
637
  }
638
  ]
639
  },
640
  {
641
  "code": "C",
642
  "counts": {
643
- "diagnostic": 3,
644
- "direct": 6,
645
- "proxy": 2,
646
- "total_links": 11
647
  },
648
- "current_readout": "Most of the 12 tasks directly target egocentric action, task state, interaction, grounding, and alignment.",
649
  "current_status": "strongest implemented track",
650
  "extension_tasks": [
651
  {
@@ -676,7 +836,13 @@
676
  "caption_grounding",
677
  "cross_modal_retrieval",
678
  "temporal_order",
679
- "misalignment_detection"
 
 
 
 
 
 
680
  ],
681
  "tasks": [
682
  {
@@ -1367,16 +1533,250 @@
1367
  "process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
1368
  "research_name": "Cross-Modal Misalignment Detection",
1369
  "why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1370
  }
1371
  ]
1372
  },
1373
  {
1374
  "code": "D",
1375
  "counts": {
1376
- "diagnostic": 3,
1377
- "direct": 0,
1378
- "proxy": 6,
1379
- "total_links": 9
1380
  },
1381
  "current_readout": "The current tasks probe temporal structure, object relevance, cross-modal retrieval, and modality prediction, but they do not yet build persistent maps or scene graphs.",
1382
  "current_status": "early proxy tasks",
@@ -1407,7 +1807,13 @@
1407
  "cross_modal_retrieval",
1408
  "modality_reconstruction",
1409
  "temporal_order",
1410
- "misalignment_detection"
 
 
 
 
 
 
1411
  ],
1412
  "tasks": [
1413
  {
@@ -1960,34 +2366,268 @@
1960
  "process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
1961
  "research_name": "Cross-Modal Misalignment Detection",
1962
  "why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
1963
- }
1964
- ]
1965
- }
1966
- ],
1967
- "foundation_model_plan": {
1968
- "decision": {
1969
- "external_reasoning_reference": "Gemini Robotics",
1970
- "first_policy_branch_candidates": [
1971
- "OpenVLA / OpenVLA-OFT",
1972
- "openpi pi0/pi0.5",
1973
- "NVIDIA GR00T"
1974
- ],
1975
- "first_world_model_branch": "Cosmos 3",
1976
- "immediate_trainable_backbone": "Qwen3-Omni",
1977
- "long_term_native_pretraining_goal": "Xperience Embodied Foundation Model"
1978
- },
1979
- "evaluation_additions": [
1980
- {
1981
- "metrics": [
1982
- "JSON validity",
1983
- "macro-F1",
1984
- "accuracy",
1985
- "micro-F1"
1986
- ],
1987
- "model_families": [
1988
- "Qwen3-Omni",
1989
- "Gemini Robotics reference"
1990
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1991
  "target": "structured_task_prediction"
1992
  },
1993
  {
@@ -2046,7 +2686,7 @@
2046
  },
2047
  {
2048
  "action": "Promote Cosmos 3 beyond the current Nano compatibility and Super forward-dynamics runs only when loss metrics, preprocessing, and storage justify the added compute.",
2049
- "name": "World-model branch",
2050
  "step": 4
2051
  },
2052
  {
@@ -2222,7 +2862,7 @@
2222
  ],
2223
  "status": "planning_artifact"
2224
  },
2225
- "generated_at_utc": "2026-06-18T16:36:15+00:00",
2226
  "omni_plan": {
2227
  "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
2228
  "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
@@ -2385,7 +3025,7 @@
2385
  ],
2386
  "deliverables": [
2387
  "backbone registry",
2388
- "Cosmos 3 world-model branch plan",
2389
  "Cosmos3-Super Forward-Dynamics LoRA verified package",
2390
  "Qwen3-Omni LoRA baseline plan",
2391
  "OpenVLA/openpi/GR00T policy-branch candidates",
@@ -2394,7 +3034,7 @@
2394
  "entry_condition": "The selected episodes are prepared or a 3-8 episode dry run is available for preprocessing checks.",
2395
  "id": "foundation_model_selection_matrix",
2396
  "name": "Foundation-Model Selection Matrix",
2397
- "reader_takeaway": "Qwen3-Omni remains the structured JSON held-out pilot; Cosmos 3 is the first world-model branch. Cosmos3-Super now has a verified forward-dynamics LoRA over camera-pose proxy targets, while VLA/policy models wait for robot-compatible action targets.",
2398
  "stage": "future",
2399
  "status": "current"
2400
  },
@@ -2436,7 +3076,7 @@
2436
  "entry_condition": "Enough multi-episode data, compute budget, and model-specific action/world-state targets.",
2437
  "id": "foundation_world_model_extensions",
2438
  "name": "Cosmos 3 and Policy-Model Extensions",
2439
- "reader_takeaway": "The Cosmos branch now includes Nano future-window compatibility and Super forward-dynamics LoRA; the long-term direction remains richer multimodal representation learning with model branches chosen by task fit rather than by a single default backbone.",
2440
  "stage": "future",
2441
  "status": "planned"
2442
  },
@@ -3242,6 +3882,318 @@
3242
  "process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
3243
  "research_name": "Cross-Modal Misalignment Detection",
3244
  "why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3245
  }
3246
  ],
3247
  "three_foundation_pipelines": {
 
132
  "public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
133
  "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
134
  "status": "planned_research_directions",
135
+ "summary": "Concrete Xperience-10M project directions beyond the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model track, and long-term native pretraining goal.",
136
  "title": "Additional Development Directions"
137
  },
138
  "baseline_summary": {
139
  "baseline_heads": "minimal and neural MLP heads",
140
  "current_use": "task design, data-contract validation, case studies, and baseline comparison",
141
  "split": "chronological single-episode split for public-sample diagnostics",
142
+ "task_count": 20
143
  },
144
  "directions": [
145
  {
146
  "code": "A",
147
  "counts": {
148
  "diagnostic": 0,
149
+ "direct": 3,
150
+ "proxy": 3,
151
+ "total_links": 6
152
  },
153
  "current_readout": "The sample supports hand trajectory forecasting and contact/object probes, but it does not yet include a full body/shape model or multi-person priors.",
154
  "current_status": "partially implemented",
 
174
  "timeline_action",
175
  "hand_trajectory_forecast",
176
  "contact_prediction",
177
+ "object_relevance",
178
+ "interaction_text_prediction",
179
+ "imu_to_hand_pose"
180
  ],
181
  "tasks": [
182
  {
 
431
  "process_short": "object vocabulary -> multi-hot labels -> sigmoid heads",
432
  "research_name": "Object-Centric Interaction Recognition",
433
  "why": "Connects egocentric activity to manipulated objects and early object-centric state."
434
+ },
435
+ {
436
+ "architecture_family": null,
437
+ "case_study": null,
438
+ "current_limit": "Public derived features retain hashed text targets; raw full text requires the official annotation source.",
439
+ "direction_roles": {
440
+ "A": "proxy",
441
+ "C": "direct"
442
+ },
443
+ "display_name": "Interaction text prediction",
444
+ "evidence_links": [
445
+ {
446
+ "href": "data/task_walkthroughs.json",
447
+ "label": "Task walkthrough"
448
+ },
449
+ {
450
+ "href": "single_episode_explorer.html",
451
+ "label": "Single-episode explorer"
452
+ }
453
+ ],
454
+ "family": "classification",
455
+ "id": "interaction_text_prediction",
456
+ "input": null,
457
+ "input_short": null,
458
+ "metric": {
459
+ "better_baseline": "minimal",
460
+ "direction": "higher",
461
+ "key": "macro_f1",
462
+ "minimal": 0.0444,
463
+ "name": "macro-F1",
464
+ "neural_mlp": 0.0381
465
+ },
466
+ "modalities": [],
467
+ "module_summary": null,
468
+ "output_short": null,
469
+ "primary_direction": "C",
470
+ "process_short": null,
471
+ "research_name": "Interaction text prediction",
472
+ "why": "Connects egocentric observations to the natural-language interaction semantics carried by the annotation."
473
+ },
474
+ {
475
+ "architecture_family": null,
476
+ "case_study": null,
477
+ "current_limit": "Pose reconstruction is window-level and does not yet fit a full parametric hand/body model.",
478
+ "direction_roles": {
479
+ "A": "direct",
480
+ "B": "proxy"
481
+ },
482
+ "display_name": "IMU-to-hand pose reconstruction",
483
+ "evidence_links": [
484
+ {
485
+ "href": "data/task_walkthroughs.json",
486
+ "label": "Task walkthrough"
487
+ },
488
+ {
489
+ "href": "single_episode_explorer.html",
490
+ "label": "Single-episode explorer"
491
+ }
492
+ ],
493
+ "family": "regression",
494
+ "id": "imu_to_hand_pose",
495
+ "input": null,
496
+ "input_short": null,
497
+ "metric": {
498
+ "better_baseline": "minimal",
499
+ "direction": "lower",
500
+ "key": "mae",
501
+ "minimal": 0.042,
502
+ "name": "MAE",
503
+ "neural_mlp": 0.0426
504
+ },
505
+ "modalities": [],
506
+ "module_summary": null,
507
+ "output_short": null,
508
+ "primary_direction": "A",
509
+ "process_short": null,
510
+ "research_name": "IMU-to-hand pose reconstruction",
511
+ "why": "Measures human-motion reconstruction from wearable and motion cues."
512
  }
513
  ]
514
  },
 
516
  "code": "B",
517
  "counts": {
518
  "diagnostic": 1,
519
+ "direct": 1,
520
+ "proxy": 3,
521
+ "total_links": 5
522
  },
523
  "current_readout": "The current suite checks cross-modal alignment and depth/video reconstruction proxies; it does not yet train a renderer or reconstruct geometry.",
524
  "current_status": "proxy tasks only",
 
543
  "task_ids": [
544
  "cross_modal_retrieval",
545
  "modality_reconstruction",
546
+ "misalignment_detection",
547
+ "imu_to_hand_pose",
548
+ "camera_view_sync_retrieval"
549
  ],
550
  "tasks": [
551
  {
 
716
  "process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
717
  "research_name": "Cross-Modal Misalignment Detection",
718
  "why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
719
+ },
720
+ {
721
+ "architecture_family": null,
722
+ "case_study": null,
723
+ "current_limit": "Pose reconstruction is window-level and does not yet fit a full parametric hand/body model.",
724
+ "direction_roles": {
725
+ "A": "direct",
726
+ "B": "proxy"
727
+ },
728
+ "display_name": "IMU-to-hand pose reconstruction",
729
+ "evidence_links": [
730
+ {
731
+ "href": "data/task_walkthroughs.json",
732
+ "label": "Task walkthrough"
733
+ },
734
+ {
735
+ "href": "single_episode_explorer.html",
736
+ "label": "Single-episode explorer"
737
+ }
738
+ ],
739
+ "family": "regression",
740
+ "id": "imu_to_hand_pose",
741
+ "input": null,
742
+ "input_short": null,
743
+ "metric": {
744
+ "better_baseline": "minimal",
745
+ "direction": "lower",
746
+ "key": "mae",
747
+ "minimal": 0.042,
748
+ "name": "MAE",
749
+ "neural_mlp": 0.0426
750
+ },
751
+ "modalities": [],
752
+ "module_summary": null,
753
+ "output_short": null,
754
+ "primary_direction": "A",
755
+ "process_short": null,
756
+ "research_name": "IMU-to-hand pose reconstruction",
757
+ "why": "Measures human-motion reconstruction from wearable and motion cues."
758
+ },
759
+ {
760
+ "architecture_family": null,
761
+ "case_study": null,
762
+ "current_limit": "Retrieval checks view consistency but does not reconstruct geometry by itself.",
763
+ "direction_roles": {
764
+ "B": "direct",
765
+ "D": "proxy"
766
+ },
767
+ "display_name": "Camera-view synchronization retrieval",
768
+ "evidence_links": [
769
+ {
770
+ "href": "data/task_walkthroughs.json",
771
+ "label": "Task walkthrough"
772
+ },
773
+ {
774
+ "href": "single_episode_explorer.html",
775
+ "label": "Single-episode explorer"
776
+ }
777
+ ],
778
+ "family": "retrieval",
779
+ "id": "camera_view_sync_retrieval",
780
+ "input": null,
781
+ "input_short": null,
782
+ "metric": {
783
+ "better_baseline": "minimal",
784
+ "direction": "higher",
785
+ "key": "mrr",
786
+ "minimal": 0.4943,
787
+ "name": "MRR",
788
+ "neural_mlp": 0.2409
789
+ },
790
+ "modalities": [],
791
+ "module_summary": null,
792
+ "output_short": null,
793
+ "primary_direction": "B",
794
+ "process_short": null,
795
+ "research_name": "Camera-view synchronization retrieval",
796
+ "why": "Tests whether synchronized multi-view structure is recoverable across camera streams."
797
  }
798
  ]
799
  },
800
  {
801
  "code": "C",
802
  "counts": {
803
+ "diagnostic": 4,
804
+ "direct": 10,
805
+ "proxy": 3,
806
+ "total_links": 17
807
  },
808
+ "current_readout": "The unified 20-task suite directly targets egocentric action, task state, interaction, grounding, forecasting, and alignment.",
809
  "current_status": "strongest implemented track",
810
  "extension_tasks": [
811
  {
 
836
  "caption_grounding",
837
  "cross_modal_retrieval",
838
  "temporal_order",
839
+ "misalignment_detection",
840
+ "long_horizon_next_action",
841
+ "next_subtask_forecast",
842
+ "interaction_text_prediction",
843
+ "action_object_relation",
844
+ "object_set_forecast",
845
+ "time_to_transition"
846
  ],
847
  "tasks": [
848
  {
 
1533
  "process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
1534
  "research_name": "Cross-Modal Misalignment Detection",
1535
  "why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
1536
+ },
1537
+ {
1538
+ "architecture_family": null,
1539
+ "case_study": null,
1540
+ "current_limit": "Evaluated from sample-supported future labels, not full open-world action generation.",
1541
+ "direction_roles": {
1542
+ "C": "direct",
1543
+ "D": "proxy"
1544
+ },
1545
+ "display_name": "Long-horizon next-action forecasting",
1546
+ "evidence_links": [
1547
+ {
1548
+ "href": "data/task_walkthroughs.json",
1549
+ "label": "Task walkthrough"
1550
+ },
1551
+ {
1552
+ "href": "single_episode_explorer.html",
1553
+ "label": "Single-episode explorer"
1554
+ }
1555
+ ],
1556
+ "family": "classification",
1557
+ "id": "long_horizon_next_action",
1558
+ "input": null,
1559
+ "input_short": null,
1560
+ "metric": {
1561
+ "better_baseline": "minimal",
1562
+ "direction": "higher",
1563
+ "key": "macro_f1",
1564
+ "minimal": 0.075,
1565
+ "name": "macro-F1",
1566
+ "neural_mlp": 0.0655
1567
+ },
1568
+ "modalities": [],
1569
+ "module_summary": null,
1570
+ "output_short": null,
1571
+ "primary_direction": "C",
1572
+ "process_short": null,
1573
+ "research_name": "Long-horizon next-action forecasting",
1574
+ "why": "Extends short-horizon intention prediction into longer activity futures, a key egocentric and world-model signal."
1575
+ },
1576
+ {
1577
+ "architecture_family": null,
1578
+ "case_study": null,
1579
+ "current_limit": "Subtask labels are constrained to the available annotation vocabulary.",
1580
+ "direction_roles": {
1581
+ "C": "direct",
1582
+ "D": "proxy"
1583
+ },
1584
+ "display_name": "Long-horizon next-subtask forecasting",
1585
+ "evidence_links": [
1586
+ {
1587
+ "href": "data/task_walkthroughs.json",
1588
+ "label": "Task walkthrough"
1589
+ },
1590
+ {
1591
+ "href": "single_episode_explorer.html",
1592
+ "label": "Single-episode explorer"
1593
+ }
1594
+ ],
1595
+ "family": "classification",
1596
+ "id": "next_subtask_forecast",
1597
+ "input": null,
1598
+ "input_short": null,
1599
+ "metric": {
1600
+ "better_baseline": "neural_mlp",
1601
+ "direction": "higher",
1602
+ "key": "macro_f1",
1603
+ "minimal": 0.0455,
1604
+ "name": "macro-F1",
1605
+ "neural_mlp": 0.0507
1606
+ },
1607
+ "modalities": [],
1608
+ "module_summary": null,
1609
+ "output_short": null,
1610
+ "primary_direction": "C",
1611
+ "process_short": null,
1612
+ "research_name": "Long-horizon next-subtask forecasting",
1613
+ "why": "Measures whether the model can anticipate the next procedural phase rather than only the current frame state."
1614
+ },
1615
+ {
1616
+ "architecture_family": null,
1617
+ "case_study": null,
1618
+ "current_limit": "Public derived features retain hashed text targets; raw full text requires the official annotation source.",
1619
+ "direction_roles": {
1620
+ "A": "proxy",
1621
+ "C": "direct"
1622
+ },
1623
+ "display_name": "Interaction text prediction",
1624
+ "evidence_links": [
1625
+ {
1626
+ "href": "data/task_walkthroughs.json",
1627
+ "label": "Task walkthrough"
1628
+ },
1629
+ {
1630
+ "href": "single_episode_explorer.html",
1631
+ "label": "Single-episode explorer"
1632
+ }
1633
+ ],
1634
+ "family": "classification",
1635
+ "id": "interaction_text_prediction",
1636
+ "input": null,
1637
+ "input_short": null,
1638
+ "metric": {
1639
+ "better_baseline": "minimal",
1640
+ "direction": "higher",
1641
+ "key": "macro_f1",
1642
+ "minimal": 0.0444,
1643
+ "name": "macro-F1",
1644
+ "neural_mlp": 0.0381
1645
+ },
1646
+ "modalities": [],
1647
+ "module_summary": null,
1648
+ "output_short": null,
1649
+ "primary_direction": "C",
1650
+ "process_short": null,
1651
+ "research_name": "Interaction text prediction",
1652
+ "why": "Connects egocentric observations to the natural-language interaction semantics carried by the annotation."
1653
+ },
1654
+ {
1655
+ "architecture_family": null,
1656
+ "case_study": null,
1657
+ "current_limit": "Relation labels are derived from the public-sample annotation scope.",
1658
+ "direction_roles": {
1659
+ "C": "direct",
1660
+ "D": "proxy"
1661
+ },
1662
+ "display_name": "Action-object relation prediction",
1663
+ "evidence_links": [
1664
+ {
1665
+ "href": "data/task_walkthroughs.json",
1666
+ "label": "Task walkthrough"
1667
+ },
1668
+ {
1669
+ "href": "single_episode_explorer.html",
1670
+ "label": "Single-episode explorer"
1671
+ }
1672
+ ],
1673
+ "family": "classification",
1674
+ "id": "action_object_relation",
1675
+ "input": null,
1676
+ "input_short": null,
1677
+ "metric": {
1678
+ "better_baseline": "tie",
1679
+ "direction": "higher",
1680
+ "key": "macro_f1",
1681
+ "minimal": 0.0,
1682
+ "name": "macro-F1",
1683
+ "neural_mlp": 0.0
1684
+ },
1685
+ "modalities": [],
1686
+ "module_summary": null,
1687
+ "output_short": null,
1688
+ "primary_direction": "C",
1689
+ "process_short": null,
1690
+ "research_name": "Action-object relation prediction",
1691
+ "why": "Tests whether action recognition and object state are connected as a relational interaction representation."
1692
+ },
1693
+ {
1694
+ "architecture_family": null,
1695
+ "case_study": null,
1696
+ "current_limit": "This is a set-level proxy, not a persistent 3D scene graph.",
1697
+ "direction_roles": {
1698
+ "C": "proxy",
1699
+ "D": "direct"
1700
+ },
1701
+ "display_name": "Future object-set forecasting",
1702
+ "evidence_links": [
1703
+ {
1704
+ "href": "data/task_walkthroughs.json",
1705
+ "label": "Task walkthrough"
1706
+ },
1707
+ {
1708
+ "href": "single_episode_explorer.html",
1709
+ "label": "Single-episode explorer"
1710
+ }
1711
+ ],
1712
+ "family": "multi-label",
1713
+ "id": "object_set_forecast",
1714
+ "input": null,
1715
+ "input_short": null,
1716
+ "metric": {
1717
+ "better_baseline": "neural_mlp",
1718
+ "direction": "higher",
1719
+ "key": "micro_f1",
1720
+ "minimal": 0.1694,
1721
+ "name": "micro-F1",
1722
+ "neural_mlp": 0.1972
1723
+ },
1724
+ "modalities": [],
1725
+ "module_summary": null,
1726
+ "output_short": null,
1727
+ "primary_direction": "D",
1728
+ "process_short": null,
1729
+ "research_name": "Future object-set forecasting",
1730
+ "why": "Asks whether the current scene state supports predicting which objects will matter later."
1731
+ },
1732
+ {
1733
+ "architecture_family": null,
1734
+ "case_study": null,
1735
+ "current_limit": "Regression is local to the annotated public sample timeline.",
1736
+ "direction_roles": {
1737
+ "C": "diagnostic",
1738
+ "D": "diagnostic"
1739
+ },
1740
+ "display_name": "Time-to-next-transition regression",
1741
+ "evidence_links": [
1742
+ {
1743
+ "href": "data/task_walkthroughs.json",
1744
+ "label": "Task walkthrough"
1745
+ },
1746
+ {
1747
+ "href": "single_episode_explorer.html",
1748
+ "label": "Single-episode explorer"
1749
+ }
1750
+ ],
1751
+ "family": "regression",
1752
+ "id": "time_to_transition",
1753
+ "input": null,
1754
+ "input_short": null,
1755
+ "metric": {
1756
+ "better_baseline": "minimal",
1757
+ "direction": "lower",
1758
+ "key": "mae",
1759
+ "minimal": 10.5374,
1760
+ "name": "MAE frames",
1761
+ "neural_mlp": 10.5545
1762
+ },
1763
+ "modalities": [],
1764
+ "module_summary": null,
1765
+ "output_short": null,
1766
+ "primary_direction": "C",
1767
+ "process_short": null,
1768
+ "research_name": "Time-to-next-transition regression",
1769
+ "why": "Measures temporal boundary awareness as a continuous timing target."
1770
  }
1771
  ]
1772
  },
1773
  {
1774
  "code": "D",
1775
  "counts": {
1776
+ "diagnostic": 4,
1777
+ "direct": 1,
1778
+ "proxy": 10,
1779
+ "total_links": 15
1780
  },
1781
  "current_readout": "The current tasks probe temporal structure, object relevance, cross-modal retrieval, and modality prediction, but they do not yet build persistent maps or scene graphs.",
1782
  "current_status": "early proxy tasks",
 
1807
  "cross_modal_retrieval",
1808
  "modality_reconstruction",
1809
  "temporal_order",
1810
+ "misalignment_detection",
1811
+ "long_horizon_next_action",
1812
+ "next_subtask_forecast",
1813
+ "action_object_relation",
1814
+ "object_set_forecast",
1815
+ "camera_view_sync_retrieval",
1816
+ "time_to_transition"
1817
  ],
1818
  "tasks": [
1819
  {
 
2366
  "process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
2367
  "research_name": "Cross-Modal Misalignment Detection",
2368
  "why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
2369
+ },
2370
+ {
2371
+ "architecture_family": null,
2372
+ "case_study": null,
2373
+ "current_limit": "Evaluated from sample-supported future labels, not full open-world action generation.",
2374
+ "direction_roles": {
2375
+ "C": "direct",
2376
+ "D": "proxy"
2377
+ },
2378
+ "display_name": "Long-horizon next-action forecasting",
2379
+ "evidence_links": [
2380
+ {
2381
+ "href": "data/task_walkthroughs.json",
2382
+ "label": "Task walkthrough"
2383
+ },
2384
+ {
2385
+ "href": "single_episode_explorer.html",
2386
+ "label": "Single-episode explorer"
2387
+ }
2388
+ ],
2389
+ "family": "classification",
2390
+ "id": "long_horizon_next_action",
2391
+ "input": null,
2392
+ "input_short": null,
2393
+ "metric": {
2394
+ "better_baseline": "minimal",
2395
+ "direction": "higher",
2396
+ "key": "macro_f1",
2397
+ "minimal": 0.075,
2398
+ "name": "macro-F1",
2399
+ "neural_mlp": 0.0655
2400
+ },
2401
+ "modalities": [],
2402
+ "module_summary": null,
2403
+ "output_short": null,
2404
+ "primary_direction": "C",
2405
+ "process_short": null,
2406
+ "research_name": "Long-horizon next-action forecasting",
2407
+ "why": "Extends short-horizon intention prediction into longer activity futures, a key egocentric and world-model signal."
2408
+ },
2409
+ {
2410
+ "architecture_family": null,
2411
+ "case_study": null,
2412
+ "current_limit": "Subtask labels are constrained to the available annotation vocabulary.",
2413
+ "direction_roles": {
2414
+ "C": "direct",
2415
+ "D": "proxy"
2416
+ },
2417
+ "display_name": "Long-horizon next-subtask forecasting",
2418
+ "evidence_links": [
2419
+ {
2420
+ "href": "data/task_walkthroughs.json",
2421
+ "label": "Task walkthrough"
2422
+ },
2423
+ {
2424
+ "href": "single_episode_explorer.html",
2425
+ "label": "Single-episode explorer"
2426
+ }
2427
+ ],
2428
+ "family": "classification",
2429
+ "id": "next_subtask_forecast",
2430
+ "input": null,
2431
+ "input_short": null,
2432
+ "metric": {
2433
+ "better_baseline": "neural_mlp",
2434
+ "direction": "higher",
2435
+ "key": "macro_f1",
2436
+ "minimal": 0.0455,
2437
+ "name": "macro-F1",
2438
+ "neural_mlp": 0.0507
2439
+ },
2440
+ "modalities": [],
2441
+ "module_summary": null,
2442
+ "output_short": null,
2443
+ "primary_direction": "C",
2444
+ "process_short": null,
2445
+ "research_name": "Long-horizon next-subtask forecasting",
2446
+ "why": "Measures whether the model can anticipate the next procedural phase rather than only the current frame state."
2447
+ },
2448
+ {
2449
+ "architecture_family": null,
2450
+ "case_study": null,
2451
+ "current_limit": "Relation labels are derived from the public-sample annotation scope.",
2452
+ "direction_roles": {
2453
+ "C": "direct",
2454
+ "D": "proxy"
2455
+ },
2456
+ "display_name": "Action-object relation prediction",
2457
+ "evidence_links": [
2458
+ {
2459
+ "href": "data/task_walkthroughs.json",
2460
+ "label": "Task walkthrough"
2461
+ },
2462
+ {
2463
+ "href": "single_episode_explorer.html",
2464
+ "label": "Single-episode explorer"
2465
+ }
2466
+ ],
2467
+ "family": "classification",
2468
+ "id": "action_object_relation",
2469
+ "input": null,
2470
+ "input_short": null,
2471
+ "metric": {
2472
+ "better_baseline": "tie",
2473
+ "direction": "higher",
2474
+ "key": "macro_f1",
2475
+ "minimal": 0.0,
2476
+ "name": "macro-F1",
2477
+ "neural_mlp": 0.0
2478
+ },
2479
+ "modalities": [],
2480
+ "module_summary": null,
2481
+ "output_short": null,
2482
+ "primary_direction": "C",
2483
+ "process_short": null,
2484
+ "research_name": "Action-object relation prediction",
2485
+ "why": "Tests whether action recognition and object state are connected as a relational interaction representation."
2486
+ },
2487
+ {
2488
+ "architecture_family": null,
2489
+ "case_study": null,
2490
+ "current_limit": "This is a set-level proxy, not a persistent 3D scene graph.",
2491
+ "direction_roles": {
2492
+ "C": "proxy",
2493
+ "D": "direct"
2494
+ },
2495
+ "display_name": "Future object-set forecasting",
2496
+ "evidence_links": [
2497
+ {
2498
+ "href": "data/task_walkthroughs.json",
2499
+ "label": "Task walkthrough"
2500
+ },
2501
+ {
2502
+ "href": "single_episode_explorer.html",
2503
+ "label": "Single-episode explorer"
2504
+ }
2505
+ ],
2506
+ "family": "multi-label",
2507
+ "id": "object_set_forecast",
2508
+ "input": null,
2509
+ "input_short": null,
2510
+ "metric": {
2511
+ "better_baseline": "neural_mlp",
2512
+ "direction": "higher",
2513
+ "key": "micro_f1",
2514
+ "minimal": 0.1694,
2515
+ "name": "micro-F1",
2516
+ "neural_mlp": 0.1972
2517
+ },
2518
+ "modalities": [],
2519
+ "module_summary": null,
2520
+ "output_short": null,
2521
+ "primary_direction": "D",
2522
+ "process_short": null,
2523
+ "research_name": "Future object-set forecasting",
2524
+ "why": "Asks whether the current scene state supports predicting which objects will matter later."
2525
+ },
2526
+ {
2527
+ "architecture_family": null,
2528
+ "case_study": null,
2529
+ "current_limit": "Retrieval checks view consistency but does not reconstruct geometry by itself.",
2530
+ "direction_roles": {
2531
+ "B": "direct",
2532
+ "D": "proxy"
2533
+ },
2534
+ "display_name": "Camera-view synchronization retrieval",
2535
+ "evidence_links": [
2536
+ {
2537
+ "href": "data/task_walkthroughs.json",
2538
+ "label": "Task walkthrough"
2539
+ },
2540
+ {
2541
+ "href": "single_episode_explorer.html",
2542
+ "label": "Single-episode explorer"
2543
+ }
2544
+ ],
2545
+ "family": "retrieval",
2546
+ "id": "camera_view_sync_retrieval",
2547
+ "input": null,
2548
+ "input_short": null,
2549
+ "metric": {
2550
+ "better_baseline": "minimal",
2551
+ "direction": "higher",
2552
+ "key": "mrr",
2553
+ "minimal": 0.4943,
2554
+ "name": "MRR",
2555
+ "neural_mlp": 0.2409
2556
+ },
2557
+ "modalities": [],
2558
+ "module_summary": null,
2559
+ "output_short": null,
2560
+ "primary_direction": "B",
2561
+ "process_short": null,
2562
+ "research_name": "Camera-view synchronization retrieval",
2563
+ "why": "Tests whether synchronized multi-view structure is recoverable across camera streams."
2564
+ },
2565
+ {
2566
+ "architecture_family": null,
2567
+ "case_study": null,
2568
+ "current_limit": "Regression is local to the annotated public sample timeline.",
2569
+ "direction_roles": {
2570
+ "C": "diagnostic",
2571
+ "D": "diagnostic"
2572
+ },
2573
+ "display_name": "Time-to-next-transition regression",
2574
+ "evidence_links": [
2575
+ {
2576
+ "href": "data/task_walkthroughs.json",
2577
+ "label": "Task walkthrough"
2578
+ },
2579
+ {
2580
+ "href": "single_episode_explorer.html",
2581
+ "label": "Single-episode explorer"
2582
+ }
2583
+ ],
2584
+ "family": "regression",
2585
+ "id": "time_to_transition",
2586
+ "input": null,
2587
+ "input_short": null,
2588
+ "metric": {
2589
+ "better_baseline": "minimal",
2590
+ "direction": "lower",
2591
+ "key": "mae",
2592
+ "minimal": 10.5374,
2593
+ "name": "MAE frames",
2594
+ "neural_mlp": 10.5545
2595
+ },
2596
+ "modalities": [],
2597
+ "module_summary": null,
2598
+ "output_short": null,
2599
+ "primary_direction": "C",
2600
+ "process_short": null,
2601
+ "research_name": "Time-to-next-transition regression",
2602
+ "why": "Measures temporal boundary awareness as a continuous timing target."
2603
+ }
2604
+ ]
2605
+ }
2606
+ ],
2607
+ "foundation_model_plan": {
2608
+ "decision": {
2609
+ "external_reasoning_reference": "Gemini Robotics",
2610
+ "first_policy_branch_candidates": [
2611
+ "OpenVLA / OpenVLA-OFT",
2612
+ "openpi pi0/pi0.5",
2613
+ "NVIDIA GR00T"
2614
+ ],
2615
+ "first_world_model_branch": "Cosmos 3",
2616
+ "immediate_trainable_backbone": "Qwen3-Omni",
2617
+ "long_term_native_pretraining_goal": "Xperience Embodied Foundation Model"
2618
+ },
2619
+ "evaluation_additions": [
2620
+ {
2621
+ "metrics": [
2622
+ "JSON validity",
2623
+ "macro-F1",
2624
+ "accuracy",
2625
+ "micro-F1"
2626
+ ],
2627
+ "model_families": [
2628
+ "Qwen3-Omni",
2629
+ "Gemini Robotics reference"
2630
+ ],
2631
  "target": "structured_task_prediction"
2632
  },
2633
  {
 
2686
  },
2687
  {
2688
  "action": "Promote Cosmos 3 beyond the current Nano compatibility and Super forward-dynamics runs only when loss metrics, preprocessing, and storage justify the added compute.",
2689
+ "name": "World-model track",
2690
  "step": 4
2691
  },
2692
  {
 
2862
  ],
2863
  "status": "planning_artifact"
2864
  },
2865
+ "generated_at_utc": "2026-06-21T10:51:52+00:00",
2866
  "omni_plan": {
2867
  "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
2868
  "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
 
3025
  ],
3026
  "deliverables": [
3027
  "backbone registry",
3028
+ "Cosmos 3 world-model track plan",
3029
  "Cosmos3-Super Forward-Dynamics LoRA verified package",
3030
  "Qwen3-Omni LoRA baseline plan",
3031
  "OpenVLA/openpi/GR00T policy-branch candidates",
 
3034
  "entry_condition": "The selected episodes are prepared or a 3-8 episode dry run is available for preprocessing checks.",
3035
  "id": "foundation_model_selection_matrix",
3036
  "name": "Foundation-Model Selection Matrix",
3037
+ "reader_takeaway": "Qwen3-Omni remains the structured JSON held-out pilot; Cosmos 3 is the first world-model track. Cosmos3-Super now has a verified forward-dynamics LoRA over camera-pose proxy targets, while VLA/policy models wait for robot-compatible action targets.",
3038
  "stage": "future",
3039
  "status": "current"
3040
  },
 
3076
  "entry_condition": "Enough multi-episode data, compute budget, and model-specific action/world-state targets.",
3077
  "id": "foundation_world_model_extensions",
3078
  "name": "Cosmos 3 and Policy-Model Extensions",
3079
+ "reader_takeaway": "The Cosmos3 track now includes Nano future-window compatibility and Super forward-dynamics LoRA; the long-term direction remains richer multimodal representation learning with model tracks chosen by task fit rather than by a single default backbone.",
3080
  "stage": "future",
3081
  "status": "planned"
3082
  },
 
3882
  "process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
3883
  "research_name": "Cross-Modal Misalignment Detection",
3884
  "why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
3885
+ },
3886
+ {
3887
+ "architecture_family": null,
3888
+ "case_study": null,
3889
+ "current_limit": "Evaluated from sample-supported future labels, not full open-world action generation.",
3890
+ "direction_roles": {
3891
+ "C": "direct",
3892
+ "D": "proxy"
3893
+ },
3894
+ "display_name": "Long-horizon next-action forecasting",
3895
+ "evidence_links": [
3896
+ {
3897
+ "href": "data/task_walkthroughs.json",
3898
+ "label": "Task walkthrough"
3899
+ },
3900
+ {
3901
+ "href": "single_episode_explorer.html",
3902
+ "label": "Single-episode explorer"
3903
+ }
3904
+ ],
3905
+ "family": "classification",
3906
+ "id": "long_horizon_next_action",
3907
+ "input": null,
3908
+ "input_short": null,
3909
+ "metric": {
3910
+ "better_baseline": "minimal",
3911
+ "direction": "higher",
3912
+ "key": "macro_f1",
3913
+ "minimal": 0.075,
3914
+ "name": "macro-F1",
3915
+ "neural_mlp": 0.0655
3916
+ },
3917
+ "modalities": [],
3918
+ "module_summary": null,
3919
+ "output_short": null,
3920
+ "primary_direction": "C",
3921
+ "process_short": null,
3922
+ "research_name": "Long-horizon next-action forecasting",
3923
+ "why": "Extends short-horizon intention prediction into longer activity futures, a key egocentric and world-model signal."
3924
+ },
3925
+ {
3926
+ "architecture_family": null,
3927
+ "case_study": null,
3928
+ "current_limit": "Subtask labels are constrained to the available annotation vocabulary.",
3929
+ "direction_roles": {
3930
+ "C": "direct",
3931
+ "D": "proxy"
3932
+ },
3933
+ "display_name": "Long-horizon next-subtask forecasting",
3934
+ "evidence_links": [
3935
+ {
3936
+ "href": "data/task_walkthroughs.json",
3937
+ "label": "Task walkthrough"
3938
+ },
3939
+ {
3940
+ "href": "single_episode_explorer.html",
3941
+ "label": "Single-episode explorer"
3942
+ }
3943
+ ],
3944
+ "family": "classification",
3945
+ "id": "next_subtask_forecast",
3946
+ "input": null,
3947
+ "input_short": null,
3948
+ "metric": {
3949
+ "better_baseline": "neural_mlp",
3950
+ "direction": "higher",
3951
+ "key": "macro_f1",
3952
+ "minimal": 0.0455,
3953
+ "name": "macro-F1",
3954
+ "neural_mlp": 0.0507
3955
+ },
3956
+ "modalities": [],
3957
+ "module_summary": null,
3958
+ "output_short": null,
3959
+ "primary_direction": "C",
3960
+ "process_short": null,
3961
+ "research_name": "Long-horizon next-subtask forecasting",
3962
+ "why": "Measures whether the model can anticipate the next procedural phase rather than only the current frame state."
3963
+ },
3964
+ {
3965
+ "architecture_family": null,
3966
+ "case_study": null,
3967
+ "current_limit": "Public derived features retain hashed text targets; raw full text requires the official annotation source.",
3968
+ "direction_roles": {
3969
+ "A": "proxy",
3970
+ "C": "direct"
3971
+ },
3972
+ "display_name": "Interaction text prediction",
3973
+ "evidence_links": [
3974
+ {
3975
+ "href": "data/task_walkthroughs.json",
3976
+ "label": "Task walkthrough"
3977
+ },
3978
+ {
3979
+ "href": "single_episode_explorer.html",
3980
+ "label": "Single-episode explorer"
3981
+ }
3982
+ ],
3983
+ "family": "classification",
3984
+ "id": "interaction_text_prediction",
3985
+ "input": null,
3986
+ "input_short": null,
3987
+ "metric": {
3988
+ "better_baseline": "minimal",
3989
+ "direction": "higher",
3990
+ "key": "macro_f1",
3991
+ "minimal": 0.0444,
3992
+ "name": "macro-F1",
3993
+ "neural_mlp": 0.0381
3994
+ },
3995
+ "modalities": [],
3996
+ "module_summary": null,
3997
+ "output_short": null,
3998
+ "primary_direction": "C",
3999
+ "process_short": null,
4000
+ "research_name": "Interaction text prediction",
4001
+ "why": "Connects egocentric observations to the natural-language interaction semantics carried by the annotation."
4002
+ },
4003
+ {
4004
+ "architecture_family": null,
4005
+ "case_study": null,
4006
+ "current_limit": "Relation labels are derived from the public-sample annotation scope.",
4007
+ "direction_roles": {
4008
+ "C": "direct",
4009
+ "D": "proxy"
4010
+ },
4011
+ "display_name": "Action-object relation prediction",
4012
+ "evidence_links": [
4013
+ {
4014
+ "href": "data/task_walkthroughs.json",
4015
+ "label": "Task walkthrough"
4016
+ },
4017
+ {
4018
+ "href": "single_episode_explorer.html",
4019
+ "label": "Single-episode explorer"
4020
+ }
4021
+ ],
4022
+ "family": "classification",
4023
+ "id": "action_object_relation",
4024
+ "input": null,
4025
+ "input_short": null,
4026
+ "metric": {
4027
+ "better_baseline": "tie",
4028
+ "direction": "higher",
4029
+ "key": "macro_f1",
4030
+ "minimal": 0.0,
4031
+ "name": "macro-F1",
4032
+ "neural_mlp": 0.0
4033
+ },
4034
+ "modalities": [],
4035
+ "module_summary": null,
4036
+ "output_short": null,
4037
+ "primary_direction": "C",
4038
+ "process_short": null,
4039
+ "research_name": "Action-object relation prediction",
4040
+ "why": "Tests whether action recognition and object state are connected as a relational interaction representation."
4041
+ },
4042
+ {
4043
+ "architecture_family": null,
4044
+ "case_study": null,
4045
+ "current_limit": "This is a set-level proxy, not a persistent 3D scene graph.",
4046
+ "direction_roles": {
4047
+ "C": "proxy",
4048
+ "D": "direct"
4049
+ },
4050
+ "display_name": "Future object-set forecasting",
4051
+ "evidence_links": [
4052
+ {
4053
+ "href": "data/task_walkthroughs.json",
4054
+ "label": "Task walkthrough"
4055
+ },
4056
+ {
4057
+ "href": "single_episode_explorer.html",
4058
+ "label": "Single-episode explorer"
4059
+ }
4060
+ ],
4061
+ "family": "multi-label",
4062
+ "id": "object_set_forecast",
4063
+ "input": null,
4064
+ "input_short": null,
4065
+ "metric": {
4066
+ "better_baseline": "neural_mlp",
4067
+ "direction": "higher",
4068
+ "key": "micro_f1",
4069
+ "minimal": 0.1694,
4070
+ "name": "micro-F1",
4071
+ "neural_mlp": 0.1972
4072
+ },
4073
+ "modalities": [],
4074
+ "module_summary": null,
4075
+ "output_short": null,
4076
+ "primary_direction": "D",
4077
+ "process_short": null,
4078
+ "research_name": "Future object-set forecasting",
4079
+ "why": "Asks whether the current scene state supports predicting which objects will matter later."
4080
+ },
4081
+ {
4082
+ "architecture_family": null,
4083
+ "case_study": null,
4084
+ "current_limit": "Pose reconstruction is window-level and does not yet fit a full parametric hand/body model.",
4085
+ "direction_roles": {
4086
+ "A": "direct",
4087
+ "B": "proxy"
4088
+ },
4089
+ "display_name": "IMU-to-hand pose reconstruction",
4090
+ "evidence_links": [
4091
+ {
4092
+ "href": "data/task_walkthroughs.json",
4093
+ "label": "Task walkthrough"
4094
+ },
4095
+ {
4096
+ "href": "single_episode_explorer.html",
4097
+ "label": "Single-episode explorer"
4098
+ }
4099
+ ],
4100
+ "family": "regression",
4101
+ "id": "imu_to_hand_pose",
4102
+ "input": null,
4103
+ "input_short": null,
4104
+ "metric": {
4105
+ "better_baseline": "minimal",
4106
+ "direction": "lower",
4107
+ "key": "mae",
4108
+ "minimal": 0.042,
4109
+ "name": "MAE",
4110
+ "neural_mlp": 0.0426
4111
+ },
4112
+ "modalities": [],
4113
+ "module_summary": null,
4114
+ "output_short": null,
4115
+ "primary_direction": "A",
4116
+ "process_short": null,
4117
+ "research_name": "IMU-to-hand pose reconstruction",
4118
+ "why": "Measures human-motion reconstruction from wearable and motion cues."
4119
+ },
4120
+ {
4121
+ "architecture_family": null,
4122
+ "case_study": null,
4123
+ "current_limit": "Retrieval checks view consistency but does not reconstruct geometry by itself.",
4124
+ "direction_roles": {
4125
+ "B": "direct",
4126
+ "D": "proxy"
4127
+ },
4128
+ "display_name": "Camera-view synchronization retrieval",
4129
+ "evidence_links": [
4130
+ {
4131
+ "href": "data/task_walkthroughs.json",
4132
+ "label": "Task walkthrough"
4133
+ },
4134
+ {
4135
+ "href": "single_episode_explorer.html",
4136
+ "label": "Single-episode explorer"
4137
+ }
4138
+ ],
4139
+ "family": "retrieval",
4140
+ "id": "camera_view_sync_retrieval",
4141
+ "input": null,
4142
+ "input_short": null,
4143
+ "metric": {
4144
+ "better_baseline": "minimal",
4145
+ "direction": "higher",
4146
+ "key": "mrr",
4147
+ "minimal": 0.4943,
4148
+ "name": "MRR",
4149
+ "neural_mlp": 0.2409
4150
+ },
4151
+ "modalities": [],
4152
+ "module_summary": null,
4153
+ "output_short": null,
4154
+ "primary_direction": "B",
4155
+ "process_short": null,
4156
+ "research_name": "Camera-view synchronization retrieval",
4157
+ "why": "Tests whether synchronized multi-view structure is recoverable across camera streams."
4158
+ },
4159
+ {
4160
+ "architecture_family": null,
4161
+ "case_study": null,
4162
+ "current_limit": "Regression is local to the annotated public sample timeline.",
4163
+ "direction_roles": {
4164
+ "C": "diagnostic",
4165
+ "D": "diagnostic"
4166
+ },
4167
+ "display_name": "Time-to-next-transition regression",
4168
+ "evidence_links": [
4169
+ {
4170
+ "href": "data/task_walkthroughs.json",
4171
+ "label": "Task walkthrough"
4172
+ },
4173
+ {
4174
+ "href": "single_episode_explorer.html",
4175
+ "label": "Single-episode explorer"
4176
+ }
4177
+ ],
4178
+ "family": "regression",
4179
+ "id": "time_to_transition",
4180
+ "input": null,
4181
+ "input_short": null,
4182
+ "metric": {
4183
+ "better_baseline": "minimal",
4184
+ "direction": "lower",
4185
+ "key": "mae",
4186
+ "minimal": 10.5374,
4187
+ "name": "MAE frames",
4188
+ "neural_mlp": 10.5545
4189
+ },
4190
+ "modalities": [],
4191
+ "module_summary": null,
4192
+ "output_short": null,
4193
+ "primary_direction": "C",
4194
+ "process_short": null,
4195
+ "research_name": "Time-to-next-transition regression",
4196
+ "why": "Measures temporal boundary awareness as a continuous timing target."
4197
  }
4198
  ],
4199
  "three_foundation_pipelines": {
docs/data/scope_claims_audit.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-21T10:03:08+00:00",
4
  "summary": {
5
  "qwen3_omni_verified_diagnostic_pilot": true,
6
  "dataset_manifest_num_episodes": 119,
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-21T11:08:09+00:00",
4
  "summary": {
5
  "qwen3_omni_verified_diagnostic_pilot": true,
6
  "dataset_manifest_num_episodes": 119,
docs/data/single_episode_task_model_radar.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Single-Episode 20-Task Radar",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-21T08:37:32+00:00",
5
  "description": "Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.",
6
  "task_count": 20,
7
  "method_count": 2,
@@ -12,7 +12,7 @@
12
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
13
  "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
14
  "result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
15
- "foundation_model_overlay": "Qwen3/Cosmos points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
16
  "metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
17
  "raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
18
  },
 
1
  {
2
  "title": "Single-Episode 20-Task Radar",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-21T10:47:17+00:00",
5
  "description": "Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.",
6
  "task_count": 20,
7
  "method_count": 2,
 
12
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
13
  "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
14
  "result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
15
+ "foundation_model_overlay": "Qwen3-Omni and Cosmos3 points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
16
  "metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
17
  "raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
18
  },
docs/data/source_alignment_audit.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Source Alignment Note",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-21T10:03:08+00:00",
5
  "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
6
  "alignment_summary": {
7
  "full_dataset_repo": "ropedia-ai/xperience-10m",
 
1
  {
2
  "title": "Ropedia Xperience-10M Source Alignment Note",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-21T11:08:07+00:00",
5
  "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
6
  "alignment_summary": {
7
  "full_dataset_repo": "ropedia-ai/xperience-10m",
docs/data/task_method_20_result_matrix.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Task Method 20-Result Matrix",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-21T08:37:32+00:00",
5
  "task_count": 20,
6
  "method_count": 9,
7
  "method_task_record_count": 180,
 
1
  {
2
  "title": "Task Method 20-Result Matrix",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-21T10:47:17+00:00",
5
  "task_count": 20,
6
  "method_count": 9,
7
  "method_task_record_count": 180,
docs/data/task_method_20_source_audit.json CHANGED
@@ -2,7 +2,7 @@
2
  "checked_json_metric_count": 180,
3
  "failure_count": 0,
4
  "failures": [],
5
- "generated_at_utc": "2026-06-21T10:03:08+00:00",
6
  "method_task_record_count": 180,
7
  "rule": "Every scored row that declares a JSON metric source must have the same numeric value under that row's metric_key.",
8
  "scored_method_task_count": 180,
 
2
  "checked_json_metric_count": 180,
3
  "failure_count": 0,
4
  "failures": [],
5
+ "generated_at_utc": "2026-06-21T11:07:42+00:00",
6
  "method_task_record_count": 180,
7
  "rule": "Every scored row that declares a JSON metric source must have the same numeric value under that row's metric_key.",
8
  "scored_method_task_count": 180,
docs/data/task_suite_enhancement_128.json CHANGED
@@ -181,7 +181,7 @@
181
  ],
182
  "public_safety": [
183
  "No raw MP4/HDF5/RRD files are written.",
184
- "No full Qwen/Cosmos weights are mirrored.",
185
  "Generated labels and aggregate metrics remain public-safe derived metadata."
186
  ]
187
  },
 
181
  ],
182
  "public_safety": [
183
  "No raw MP4/HDF5/RRD files are written.",
184
+ "No full Qwen3-Omni or Cosmos3 base weights are mirrored.",
185
  "Generated labels and aggregate metrics remain public-safe derived metadata."
186
  ]
187
  },
docs/data/task_surface_integrity.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-21T10:03:08+00:00",
4
  "summary": {
5
  "original_walkthrough_task_count": 12,
6
  "expected_original_walkthrough_task_count": 12,
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-21T11:08:07+00:00",
4
  "summary": {
5
  "original_walkthrough_task_count": 12,
6
  "expected_original_walkthrough_task_count": 12,
docs/data/two_evidence_line_result_summary.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "generated_at_utc": "2026-06-21T10:00:36+00:00",
3
- "interpretation_rule": "Use the 1-episode line for task construction and reproducibility claims. Use the 128-episode line for held-out comparison and model-branch claims.",
4
  "lines": [
5
  {
6
  "artifact_entry_points": [
@@ -66,7 +66,7 @@
66
  "docs/data/qwen3_omni_run_lineage.json",
67
  "docs/data/task_method_20_gap_audit.json"
68
  ],
69
- "claim_boundary": "Supports same-split comparison, model-branch diagnostics, and scale-up planning on public-safe processed artifacts.",
70
  "data_unit": "Selected held-out 96/16/16 split with public-safe processed features linked to official gated episode paths",
71
  "direct_scored_method_task_count": 134,
72
  "id": "selected_128_episode_surface",
@@ -171,14 +171,14 @@
171
  }
172
  ],
173
  "not_for": "Do not read compact-proxy cells as direct raw-target measurements.",
174
- "primary_use": "Compare same-split baselines and model branches while keeping evidence type explicit.",
175
  "primary_visuals": [
176
  "docs/assets/charts/two_evidence_line_map.svg",
177
  "docs/assets/charts/episode128_task_model_radar.svg",
178
  "docs/assets/charts/unified_task_model_radar.svg"
179
  ],
180
  "proxy_scored_method_task_count": 6,
181
- "result_statement": "140/140 selected-128 scores across seven method branches: 134 direct scores plus 6 documented compact-proxy scores.",
182
  "scored_method_task_count": 140,
183
  "short_label": "Line 2",
184
  "task_count": 20
@@ -344,7 +344,7 @@
344
  "step": "Choose the evidence line"
345
  },
346
  {
347
- "reason": "Use the 1-episode radar for Minimal-vs-Neural behavior and the 128-episode radar for baseline/model-branch comparison.",
348
  "step": "Open the matching radar"
349
  },
350
  {
 
1
  {
2
+ "generated_at_utc": "2026-06-21T10:47:04+00:00",
3
+ "interpretation_rule": "Use the 1-episode line for task construction and reproducibility claims. Use the 128-episode line for same-split metadata/raw baselines, Qwen3-Omni v6 LoRA diagnostics, and Cosmos3 diagnostics.",
4
  "lines": [
5
  {
6
  "artifact_entry_points": [
 
66
  "docs/data/qwen3_omni_run_lineage.json",
67
  "docs/data/task_method_20_gap_audit.json"
68
  ],
69
+ "claim_boundary": "Supports same-split metadata/raw baseline comparison, Qwen3-Omni v6 diagnostics, Cosmos3 diagnostics, and scale-up planning on public-safe processed artifacts.",
70
  "data_unit": "Selected held-out 96/16/16 split with public-safe processed features linked to official gated episode paths",
71
  "direct_scored_method_task_count": 134,
72
  "id": "selected_128_episode_surface",
 
171
  }
172
  ],
173
  "not_for": "Do not read compact-proxy cells as direct raw-target measurements.",
174
+ "primary_use": "Compare same-split metadata/raw baselines, Qwen3-Omni v6 LoRA, Cosmos3-Super Reasoner, and Cosmos3-Nano Future Window while keeping evidence type explicit.",
175
  "primary_visuals": [
176
  "docs/assets/charts/two_evidence_line_map.svg",
177
  "docs/assets/charts/episode128_task_model_radar.svg",
178
  "docs/assets/charts/unified_task_model_radar.svg"
179
  ],
180
  "proxy_scored_method_task_count": 6,
181
+ "result_statement": "140/140 selected-128 scores across seven methods: 134 direct scores plus 6 documented compact-proxy scores.",
182
  "scored_method_task_count": 140,
183
  "short_label": "Line 2",
184
  "task_count": 20
 
344
  "step": "Choose the evidence line"
345
  },
346
  {
347
+ "reason": "Use the 1-episode radar for Minimal-vs-Neural behavior and the 128-episode radar for metadata/raw baselines, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano.",
348
  "step": "Open the matching radar"
349
  },
350
  {
docs/data/two_evidence_lines.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "status": "current",
3
  "updated_utc": "2026-06-21T00:00:00Z",
4
- "interpretation_rule": "Use the 1-episode line for task construction and reproducibility claims. Use the 128-episode line for held-out comparison and model-branch claims.",
5
  "reader_summary": "The suite has two public result lines. Line 1 is the fully inspectable one-episode task lab. Line 2 is the 128-episode comparison surface for aligned baselines, the Qwen3-Omni series, and the Cosmos3 series. Do not mix the two when reading scores.",
6
  "score_formula": "2 single-episode methods x 20 tasks = 40 records; 7 selected-128 methods x 20 tasks = 140 records; total public matrix = 180/180 scored records.",
7
  "lines": [
@@ -44,8 +44,8 @@
44
  "label": "128 selected episodes",
45
  "short_label": "Line 2",
46
  "data_unit": "Selected held-out 96/16/16 split with public-safe processed features linked to official gated episode paths",
47
- "result_statement": "140/140 selected-128 scores across seven method branches: 134 direct scores plus 6 documented compact-proxy scores.",
48
- "claim_boundary": "Supports same-split comparison, model-branch diagnostics, and scale-up planning on public-safe processed artifacts.",
49
  "not_for": "Do not read compact-proxy cells as direct raw-target measurements.",
50
  "episodes": 128,
51
  "split": {
@@ -69,7 +69,7 @@
69
  "direct_scored_records": 134,
70
  "proxy_scored_records": 6,
71
  "proxy_policy": "Proxy flags remain visible where the public export lacks a direct raw target.",
72
- "best_use": "Compare same-split baselines and model branches while keeping evidence type explicit.",
73
  "primary_visuals": [
74
  "docs/assets/charts/two_evidence_line_map.svg",
75
  "docs/assets/charts/episode128_task_model_radar.svg",
 
1
  {
2
  "status": "current",
3
  "updated_utc": "2026-06-21T00:00:00Z",
4
+ "interpretation_rule": "Use the 1-episode line for task construction and reproducibility claims. Use the 128-episode line for same-split metadata/raw baselines, Qwen3-Omni v6 LoRA diagnostics, and Cosmos3 diagnostics.",
5
  "reader_summary": "The suite has two public result lines. Line 1 is the fully inspectable one-episode task lab. Line 2 is the 128-episode comparison surface for aligned baselines, the Qwen3-Omni series, and the Cosmos3 series. Do not mix the two when reading scores.",
6
  "score_formula": "2 single-episode methods x 20 tasks = 40 records; 7 selected-128 methods x 20 tasks = 140 records; total public matrix = 180/180 scored records.",
7
  "lines": [
 
44
  "label": "128 selected episodes",
45
  "short_label": "Line 2",
46
  "data_unit": "Selected held-out 96/16/16 split with public-safe processed features linked to official gated episode paths",
47
+ "result_statement": "140/140 selected-128 scores across seven methods: 134 direct scores plus 6 documented compact-proxy scores.",
48
+ "claim_boundary": "Supports same-split metadata/raw baseline comparison, Qwen3-Omni v6 diagnostics, Cosmos3 diagnostics, and scale-up planning on public-safe processed artifacts.",
49
  "not_for": "Do not read compact-proxy cells as direct raw-target measurements.",
50
  "episodes": 128,
51
  "split": {
 
69
  "direct_scored_records": 134,
70
  "proxy_scored_records": 6,
71
  "proxy_policy": "Proxy flags remain visible where the public export lacks a direct raw target.",
72
+ "best_use": "Compare same-split metadata/raw baselines, Qwen3-Omni v6 LoRA, Cosmos3-Super Reasoner, and Cosmos3-Nano Future Window while keeping evidence type explicit.",
73
  "primary_visuals": [
74
  "docs/assets/charts/two_evidence_line_map.svg",
75
  "docs/assets/charts/episode128_task_model_radar.svg",
docs/data/unified_task_model_radar.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Unified 20-Task Model Radar",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-21T08:37:32+00:00",
5
  "task_count": 20,
6
  "method_count": 9,
7
  "method_task_record_count": 180,
@@ -11,7 +11,7 @@
11
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
12
  "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
13
  "result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
14
- "foundation_model_overlay": "Qwen3/Cosmos points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
15
  "metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
16
  "raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
17
  },
 
1
  {
2
  "title": "Unified 20-Task Model Radar",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-21T10:47:17+00:00",
5
  "task_count": 20,
6
  "method_count": 9,
7
  "method_task_record_count": 180,
 
11
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
12
  "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
13
  "result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
14
+ "foundation_model_overlay": "Qwen3-Omni and Cosmos3 points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
15
  "metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
16
  "raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
17
  },
docs/data/website_integrity.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-21T10:08:12+00:00",
4
  "docs_root": "docs",
5
  "site_base": "/ropedia-xperience-10m-task-suite/",
6
  "summary": {
@@ -80,8 +80,8 @@
80
  "name": "project_overview_precedes_progress_ledger",
81
  "status": "pass",
82
  "reason": "The project overview should appear before the deeper progress ledger.",
83
- "overview_index": 118264,
84
- "evidence_index": 163038
85
  },
86
  {
87
  "name": "project_status_links_json",
@@ -159,9 +159,9 @@
159
  "name": "evaluation_protocol_between_overview_and_progress",
160
  "status": "pass",
161
  "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
162
- "overview_index": 118264,
163
- "protocol_index": 159219,
164
- "evidence_index": 163038
165
  },
166
  {
167
  "name": "evaluation_protocol_links_json",
@@ -180,7 +180,7 @@
180
  "status": "pass",
181
  "reason": "The Suite anchor should show the task-suite map before the modality atlas.",
182
  "first_marker_index": 471,
183
- "second_marker_index": 3794
184
  },
185
  {
186
  "name": "suite_modality_atlas_contains_seven_cards",
@@ -296,12 +296,12 @@
296
  "json_files": [
297
  {
298
  "path": "data/additional_development_directions.json",
299
- "bytes": 6121,
300
  "top_level_type": "dict"
301
  },
302
  {
303
  "path": "data/artifact_index.json",
304
- "bytes": 123351,
305
  "top_level_type": "dict"
306
  },
307
  {
@@ -316,7 +316,7 @@
316
  },
317
  {
318
  "path": "data/episode128_task_model_radar.json",
319
- "bytes": 184945,
320
  "top_level_type": "dict"
321
  },
322
  {
@@ -331,12 +331,12 @@
331
  },
332
  {
333
  "path": "data/figure_index.json",
334
- "bytes": 19441,
335
  "top_level_type": "dict"
336
  },
337
  {
338
  "path": "data/foundation_model_plan.json",
339
- "bytes": 13926,
340
  "top_level_type": "dict"
341
  },
342
  {
@@ -346,12 +346,12 @@
346
  },
347
  {
348
  "path": "data/live_publication_status.json",
349
- "bytes": 184684,
350
  "top_level_type": "dict"
351
  },
352
  {
353
  "path": "data/mirror_parity.json",
354
- "bytes": 1414374,
355
  "top_level_type": "dict"
356
  },
357
  {
@@ -361,12 +361,12 @@
361
  },
362
  {
363
  "path": "data/omni_finetune_verified_result.json",
364
- "bytes": 4325,
365
  "top_level_type": "dict"
366
  },
367
  {
368
  "path": "data/omni_model_comparison.json",
369
- "bytes": 82110,
370
  "top_level_type": "dict"
371
  },
372
  {
@@ -386,12 +386,12 @@
386
  },
387
  {
388
  "path": "data/project_status.json",
389
- "bytes": 23057,
390
  "top_level_type": "dict"
391
  },
392
  {
393
  "path": "data/public_reader_map.json",
394
- "bytes": 5906,
395
  "top_level_type": "dict"
396
  },
397
  {
@@ -416,7 +416,7 @@
416
  },
417
  {
418
  "path": "data/qwen3_omni_run_lineage.json",
419
- "bytes": 9211,
420
  "top_level_type": "dict"
421
  },
422
  {
@@ -451,12 +451,12 @@
451
  },
452
  {
453
  "path": "data/research_roadmap.json",
454
- "bytes": 14133,
455
  "top_level_type": "dict"
456
  },
457
  {
458
  "path": "data/research_roadmap_interactive.json",
459
- "bytes": 154689,
460
  "top_level_type": "dict"
461
  },
462
  {
@@ -476,7 +476,7 @@
476
  },
477
  {
478
  "path": "data/single_episode_task_model_radar.json",
479
- "bytes": 51097,
480
  "top_level_type": "dict"
481
  },
482
  {
@@ -511,7 +511,7 @@
511
  },
512
  {
513
  "path": "data/task_suite_enhancement_128.json",
514
- "bytes": 20181,
515
  "top_level_type": "dict"
516
  },
517
  {
@@ -536,22 +536,22 @@
536
  },
537
  {
538
  "path": "data/two_evidence_line_result_summary.json",
539
- "bytes": 17223,
540
  "top_level_type": "dict"
541
  },
542
  {
543
  "path": "data/two_evidence_lines.json",
544
- "bytes": 7196,
545
  "top_level_type": "dict"
546
  },
547
  {
548
  "path": "data/unified_task_model_radar.json",
549
- "bytes": 228805,
550
  "top_level_type": "dict"
551
  },
552
  {
553
  "path": "data/website_integrity.json",
554
- "bytes": 20657,
555
  "top_level_type": "dict"
556
  },
557
  {
@@ -591,7 +591,7 @@
591
  {
592
  "path": "assets/charts/episode128_task_model_radar.svg",
593
  "exists": true,
594
- "bytes": 51905,
595
  "format": "SVG",
596
  "has_viewbox": true
597
  },
@@ -633,7 +633,7 @@
633
  {
634
  "path": "assets/charts/research_direction_coverage.svg",
635
  "exists": true,
636
- "bytes": 5347,
637
  "format": "SVG",
638
  "has_viewbox": true
639
  },
@@ -647,7 +647,7 @@
647
  {
648
  "path": "assets/charts/single_episode_task_model_radar.svg",
649
  "exists": true,
650
- "bytes": 35230,
651
  "format": "SVG",
652
  "has_viewbox": true
653
  },
@@ -779,7 +779,7 @@
779
  {
780
  "path": "assets/task_suite_infographic.png",
781
  "exists": true,
782
- "bytes": 1899884,
783
  "width": 1800,
784
  "height": 7600,
785
  "format": "PNG"
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-21T11:07:26+00:00",
4
  "docs_root": "docs",
5
  "site_base": "/ropedia-xperience-10m-task-suite/",
6
  "summary": {
 
80
  "name": "project_overview_precedes_progress_ledger",
81
  "status": "pass",
82
  "reason": "The project overview should appear before the deeper progress ledger.",
83
+ "overview_index": 118524,
84
+ "evidence_index": 163802
85
  },
86
  {
87
  "name": "project_status_links_json",
 
159
  "name": "evaluation_protocol_between_overview_and_progress",
160
  "status": "pass",
161
  "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
162
+ "overview_index": 118524,
163
+ "protocol_index": 159990,
164
+ "evidence_index": 163802
165
  },
166
  {
167
  "name": "evaluation_protocol_links_json",
 
180
  "status": "pass",
181
  "reason": "The Suite anchor should show the task-suite map before the modality atlas.",
182
  "first_marker_index": 471,
183
+ "second_marker_index": 3792
184
  },
185
  {
186
  "name": "suite_modality_atlas_contains_seven_cards",
 
296
  "json_files": [
297
  {
298
  "path": "data/additional_development_directions.json",
299
+ "bytes": 6120,
300
  "top_level_type": "dict"
301
  },
302
  {
303
  "path": "data/artifact_index.json",
304
+ "bytes": 123419,
305
  "top_level_type": "dict"
306
  },
307
  {
 
316
  },
317
  {
318
  "path": "data/episode128_task_model_radar.json",
319
+ "bytes": 184992,
320
  "top_level_type": "dict"
321
  },
322
  {
 
331
  },
332
  {
333
  "path": "data/figure_index.json",
334
+ "bytes": 19472,
335
  "top_level_type": "dict"
336
  },
337
  {
338
  "path": "data/foundation_model_plan.json",
339
+ "bytes": 13925,
340
  "top_level_type": "dict"
341
  },
342
  {
 
346
  },
347
  {
348
  "path": "data/live_publication_status.json",
349
+ "bytes": 184689,
350
  "top_level_type": "dict"
351
  },
352
  {
353
  "path": "data/mirror_parity.json",
354
+ "bytes": 1418076,
355
  "top_level_type": "dict"
356
  },
357
  {
 
361
  },
362
  {
363
  "path": "data/omni_finetune_verified_result.json",
364
+ "bytes": 4327,
365
  "top_level_type": "dict"
366
  },
367
  {
368
  "path": "data/omni_model_comparison.json",
369
+ "bytes": 82088,
370
  "top_level_type": "dict"
371
  },
372
  {
 
386
  },
387
  {
388
  "path": "data/project_status.json",
389
+ "bytes": 23054,
390
  "top_level_type": "dict"
391
  },
392
  {
393
  "path": "data/public_reader_map.json",
394
+ "bytes": 5990,
395
  "top_level_type": "dict"
396
  },
397
  {
 
416
  },
417
  {
418
  "path": "data/qwen3_omni_run_lineage.json",
419
+ "bytes": 11468,
420
  "top_level_type": "dict"
421
  },
422
  {
 
451
  },
452
  {
453
  "path": "data/research_roadmap.json",
454
+ "bytes": 14129,
455
  "top_level_type": "dict"
456
  },
457
  {
458
  "path": "data/research_roadmap_interactive.json",
459
+ "bytes": 186755,
460
  "top_level_type": "dict"
461
  },
462
  {
 
476
  },
477
  {
478
  "path": "data/single_episode_task_model_radar.json",
479
+ "bytes": 51107,
480
  "top_level_type": "dict"
481
  },
482
  {
 
511
  },
512
  {
513
  "path": "data/task_suite_enhancement_128.json",
514
+ "bytes": 20196,
515
  "top_level_type": "dict"
516
  },
517
  {
 
536
  },
537
  {
538
  "path": "data/two_evidence_line_result_summary.json",
539
+ "bytes": 17414,
540
  "top_level_type": "dict"
541
  },
542
  {
543
  "path": "data/two_evidence_lines.json",
544
+ "bytes": 7349,
545
  "top_level_type": "dict"
546
  },
547
  {
548
  "path": "data/unified_task_model_radar.json",
549
+ "bytes": 228815,
550
  "top_level_type": "dict"
551
  },
552
  {
553
  "path": "data/website_integrity.json",
554
+ "bytes": 20658,
555
  "top_level_type": "dict"
556
  },
557
  {
 
591
  {
592
  "path": "assets/charts/episode128_task_model_radar.svg",
593
  "exists": true,
594
+ "bytes": 51915,
595
  "format": "SVG",
596
  "has_viewbox": true
597
  },
 
633
  {
634
  "path": "assets/charts/research_direction_coverage.svg",
635
  "exists": true,
636
+ "bytes": 5352,
637
  "format": "SVG",
638
  "has_viewbox": true
639
  },
 
647
  {
648
  "path": "assets/charts/single_episode_task_model_radar.svg",
649
  "exists": true,
650
+ "bytes": 35232,
651
  "format": "SVG",
652
  "has_viewbox": true
653
  },
 
779
  {
780
  "path": "assets/task_suite_infographic.png",
781
  "exists": true,
782
+ "bytes": 1903454,
783
  "width": 1800,
784
  "height": 7600,
785
  "format": "PNG"
docs/index.html CHANGED
@@ -1147,6 +1147,16 @@
1147
  font-weight: 760;
1148
  width: 18%;
1149
  }
 
 
 
 
 
 
 
 
 
 
1150
  .line-table a {
1151
  color: var(--cyan);
1152
  font-weight: 760;
@@ -3913,9 +3923,9 @@
3913
  <article class="suite-line-card">
3914
  <small>line 2 / 128 selected episodes</small>
3915
  <h3>128 selected episodes: comparison layer</h3>
3916
- <p>Seven method branches share the selected-episode surface and the same 20 task axes.</p>
3917
  <div class="line-claim">
3918
- <div><span>valid claim</span><p>Same-split baseline/model comparison and scale-up planning.</p></div>
3919
  <div><span>do not claim</span><p>Proxy cells as direct raw-target measurements.</p></div>
3920
  </div>
3921
  <div class="suite-line-facts">
@@ -3944,7 +3954,7 @@
3944
  </a>
3945
  <a class="hero-path" href="#directions">
3946
  <small>Extend</small>
3947
- <strong>Train next model branches</strong>
3948
  <span>Spatial intelligence, human-video world models, VLA, and scale-up plans.</span>
3949
  </a>
3950
  </div>
@@ -4044,7 +4054,7 @@
4044
  <div class="wrap">
4045
  <div class="section-head">
4046
  <h2>Two evidence lines: 1 episode and 128 episodes.</h2>
4047
- <p>Read the suite as two lines. Line 1 proves the task lab is inspectable and reproducible. Line 2 compares selected-128 baselines and model branches. Keep the lines separate when interpreting scores.</p>
4048
  </div>
4049
  <figure class="line-map-figure">
4050
  <img src="assets/charts/two_evidence_line_map.svg?v=two-line-map-v1" alt="Two evidence-line map showing 1 sample episode, 128 selected episodes, and the combined 180 scored method-task records">
@@ -4121,69 +4131,62 @@
4121
  </tbody>
4122
  </table>
4123
  <p class="table-note">Cosmos3-Super Forward-Dynamics LoRA is published as a separate fine-tuned adapter with weights/results; it is not counted as a 20-task matrix method row.</p>
4124
- <table class="line-table" aria-label="Qwen3-Omni run version ladder">
4125
  <thead>
4126
  <tr>
4127
  <th>Qwen run</th>
4128
- <th>What changed</th>
4129
- <th>Eval samples</th>
4130
- <th>JSON validity</th>
4131
- <th>Contact acc.</th>
4132
- <th>Public role</th>
4133
  </tr>
4134
  </thead>
4135
  <tbody>
4136
  <tr>
4137
  <td>v1</td>
4138
- <td>Selected-128 validation-aware LoRA baseline.</td>
4139
- <td>448</td>
4140
- <td>0.8750</td>
4141
- <td>0.6451</td>
4142
- <td>Superseded lineage evidence.</td>
4143
  </tr>
4144
  <tr>
4145
  <td>v2</td>
4146
- <td>Structured-JSON reuse full-8-GPU LoRA.</td>
4147
- <td>448</td>
4148
- <td>0.9978</td>
4149
- <td>0.7188</td>
4150
- <td>Superseded lineage evidence.</td>
4151
  </tr>
4152
  <tr>
4153
  <td>v3</td>
4154
- <td>Strict-label prompt/eval over the v2 adapter.</td>
4155
- <td>448</td>
4156
- <td>1.0000</td>
4157
- <td>0.7210</td>
4158
- <td>Prompt/eval lineage evidence.</td>
4159
  </tr>
4160
  <tr>
4161
  <td>v4</td>
4162
- <td>Four-epoch structured-JSON LoRA.</td>
4163
- <td>448</td>
4164
- <td>1.0000</td>
4165
- <td>0.7299</td>
4166
- <td>Superseded metric-tradeoff run.</td>
4167
  </tr>
4168
  <tr>
4169
  <td>v5</td>
4170
- <td>Multiscale cap96 LoRA.</td>
4171
- <td>4,032</td>
4172
- <td>1.0000</td>
4173
- <td>0.7865</td>
4174
- <td>Pinned prior release and comparison baseline.</td>
4175
  </tr>
4176
  <tr>
4177
  <td>v6</td>
4178
- <td>Rank64/lr5e-5 multiscale LoRA plus task-specific probes.</td>
4179
- <td>4,032</td>
4180
- <td>0.9990</td>
4181
- <td>0.8177</td>
4182
  <td>Current public 20-task Qwen3-Omni row.</td>
4183
  </tr>
4184
  </tbody>
4185
  </table>
4186
- <p class="table-note">Qwen v1-v6 are run-lineage labels, not the project-level result layers. The public matrix row is Qwen3-Omni v6 LoRA; v5 stays pinned as the prior release. Full details: <a href="data/qwen3_omni_run_lineage.json">qwen3_omni_run_lineage.json</a> and <a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/QWEN3_OMNI_RUN_LINEAGE.md">QWEN3_OMNI_RUN_LINEAGE.md</a>.</p>
4187
  <div class="reader-journey" aria-label="Recommended reader journeys">
4188
  <article class="reader-step">
4189
  <small>01 understand</small>
@@ -4205,7 +4208,7 @@
4205
  </article>
4206
  <article class="reader-step">
4207
  <small>04 extend</small>
4208
- <strong>Choose the next model branch</strong>
4209
  <p>Use directions and scale-up resources for spatial, world-model, VLA, Qwen3-Omni, and Cosmos3 follow-up work.</p>
4210
  <a href="#directions">Open directions</a>
4211
  </article>
@@ -4251,7 +4254,7 @@
4251
  <article class="brief-card">
4252
  <small>results</small>
4253
  <strong>Compare methods cleanly</strong>
4254
- <p>Single-episode baselines, 128-episode aligned baselines, Qwen3-Omni v6 LoRA, and Cosmos3-Super/Nano branches stay separated by evidence type.</p>
4255
  <div class="reading-links">
4256
  <a href="#takeaways">takeaways</a>
4257
  <a href="data/unified_task_model_radar.json">radar data</a>
@@ -4285,7 +4288,7 @@
4285
  <a href="https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts"><strong>HF artifacts</strong><span>Public-safe derived reports, metrics, website JSON, and result packages.</span></a>
4286
  <a href="https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines"><strong>HF baselines</strong><span>Compact baseline weights, figures, metrics, and mirrored task artifacts.</span></a>
4287
  <a href="https://huggingface.co/cy0307/ropedia-xperience-10m-weights-results"><strong>HF weights + results</strong><span>Consolidated baseline weights, adapters, result summaries, analysis, and manifest.</span></a>
4288
- <a href="https://huggingface.co/collections/cy0307/ropedia-xperience-10m-task-suite"><strong>HF collection</strong><span>Grouped project surfaces, baseline repos, and verified model-branch repos.</span></a>
4289
  </div>
4290
  <div class="brief-actions">
4291
  <a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/PUBLIC_READER_MAP.md">Open full reader map</a>
@@ -4335,7 +4338,7 @@
4335
  </article>
4336
  <article class="brief-card">
4337
  <strong>Scale-up readiness</strong>
4338
- <p>Connects the same data contract to 128-episode baselines, a no-new-episode enhancement pack, Qwen3-Omni LoRA, Cosmos-style world modeling, policy-model branches, and the later Xperience-native pretraining goal.</p>
4339
  </article>
4340
  </div>
4341
  <div class="brief-actions">
@@ -4359,8 +4362,8 @@
4359
  </article>
4360
  <article class="split-radar-card">
4361
  <h3>128-Episode 20-Task Radar</h3>
4362
- <p>Metadata, raw-feature, Qwen3-Omni, and Cosmos3 branches on the aligned 128-episode surface, with all 140 rows scored and proxy/evidence notes kept explicit.</p>
4363
- <img src="assets/charts/episode128_task_model_radar.svg?v=xperience10m-split-radar-v1" alt="128-episode 20-task radar comparing raw-feature baselines, metadata baselines, Qwen3-Omni, and Cosmos3 branches with explicit score counts">
4364
  <div class="split-radar-links">
4365
  <a href="assets/charts/episode128_task_model_radar.svg">Open SVG</a>
4366
  <a href="data/episode128_task_model_radar.json">Open JSON</a>
@@ -4470,7 +4473,7 @@
4470
  <div class="wrap">
4471
  <div class="section-head">
4472
  <h2>Research roadmap.</h2>
4473
- <p>The project path moves from the current public-sample task lab to the latest verified Qwen3-Omni diagnostic branch, same-split 128-episode baseline alignment, a no-new-episode enhancement pack, action/subtask error analysis, robustness runs, world/policy branches, and the future Xperience Embodied Foundation Model pretraining goal.</p>
4474
  </div>
4475
  <div class="roadmap-grid" aria-label="Research roadmap stages">
4476
  <article class="roadmap-card" data-status="implemented">
@@ -4492,7 +4495,7 @@
4492
  </div>
4493
  </article>
4494
  <article class="roadmap-card" data-status="verified_latest_branch">
4495
- <span class="roadmap-status">verified latest branch</span>
4496
  <h3>Qwen3-Omni LoRA Latest Diagnostic Branch</h3>
4497
  <p>Train lightweight adapters on selected prepared episodes and evaluate on held-out episodes with committed predictions, metrics, and run reports.</p>
4498
  <div class="roadmap-meta">
@@ -4585,7 +4588,7 @@
4585
  <div class="wrap">
4586
  <div class="section-head">
4587
  <h2>Additional development directions.</h2>
4588
- <p>Beyond the current task heads, Qwen3-Omni fine-tuning path, Cosmos/world-model branch, and future native pretraining goal, Xperience-10M can support three foundation pipeline tracks plus several concrete research-development tracks.</p>
4589
  </div>
4590
  <div class="foundation-pipeline-grid" aria-label="Three high-resolution foundation direction slide diagrams">
4591
  <article class="foundation-pipeline-card">
@@ -4631,7 +4634,7 @@
4631
  <article class="artifact"><h3>Multimodal representation learning</h3><p>Train contrastive and masked-prediction encoders over synchronized video, audio, depth, pose, mocap, IMU, and language windows.</p><a href="data/additional_development_directions.json">JSON plan</a></article>
4632
  <article class="artifact"><h3>Skill and procedure graphs</h3><p>Mine action steps, transitions, preconditions, effects, and temporal graphs that connect egocentric perception to planning.</p><a href="data/research_directions.json">current task map</a></article>
4633
  <article class="artifact"><h3>Human-object affordances</h3><p>Add contact, reachable-object, tool-use, and next-affordance tasks using hands, mocap, objects, contacts, video, and language.</p><a href="data/task_walkthroughs.json">task walkthroughs</a></article>
4634
- <article class="artifact"><h3>3D/4D scene and object memory</h3><p>Fuse depth, pose/SLAM, multiview video, and object cues into persistent scene/object maps for spatial reasoning and object permanence.</p><a href="data/foundation_model_plan.json">model branches</a></article>
4635
  <article class="artifact"><h3>Quality and sync diagnostics</h3><p>Track timestamp drift, missing streams, calibration consistency, corrupted files, and degraded-mode manifests before large training runs.</p><a href="data/evidence_contract.json">evidence contract</a></article>
4636
  <article class="artifact"><h3>Policy and simulation transfer</h3><p>Convert mocap, hand trajectories, contacts, and object states into action tokens, robot-compatible targets, and imitation-learning examples.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/FOUNDATION_MODEL_PLAN.md">foundation plan</a></article>
4637
  </div>
@@ -4650,10 +4653,10 @@
4650
  <article class="artifact"><h3>Metric contract</h3><p>All 20 tasks list input, target, primary metric, baseline score, and source artifact path in the unified suite file.</p><a href="data/task_suite_20.json">task_suite_20.json</a></article>
4651
  <article class="artifact"><h3>Leakage controls</h3><p>Scalers fit on train windows only; future labels, target-side signals, caption/object labels, and contact labels stay on the target side unless explicitly queried.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/scripts/build_evaluation_protocol.py">builder script</a></article>
4652
  <article class="artifact"><h3>Audio ablation</h3><p>Audio and no-audio variants are evaluated across the original task contracts under the same chronological split.</p><a href="data/audio_ablation_summary.json">audio summary</a></article>
4653
- <article class="artifact"><h3>Foundation branch selection</h3><p>Qwen3-Omni is the first trainable baseline, Cosmos 3 becomes the world-model branch with a camera-pose proxy forward-dynamics contract ready for trainer work, policy models wait for robot-compatible action targets, and Xperience-native pretraining remains a later full-corpus goal.</p><a href="data/foundation_model_plan.json">backbone plan</a></article>
4654
- <article class="artifact"><h3>Next evaluation stage</h3><p>This public-sample run covers single-episode task development. The selected multi-episode Qwen3-Omni final diagnostic result is verified and meets the JSON-validity target; Cosmos3-Nano has a verified future-window compatibility package; and Cosmos3-Super has a verified base-weight JSON-task evaluation plus a fine-tuned forward-dynamics LoRA branch. The next stage is action/subtask error analysis, stronger model-quality runs, and policy-target conversion.</p><a href="data/omni_model_comparison.json">result comparison</a></article>
4655
  <article class="artifact"><h3>128-Episode Task Suite Enhancement Pack</h3><p>Before adding episodes, the suite should try `multiscale_20s10_40s20_80s40`, hierarchical action/subtask targets, label-normalized scoring, and compact raw-feature shards for unsupported tasks.</p><a href="data/task_suite_enhancement_128.json">task_suite_enhancement_128.json</a></article>
4656
- <article class="artifact"><h3>Scale-up requirement</h3><p>Future Omni, Cosmos, and policy branches use the same episode split discipline, training metadata, held-out predictions, metrics, run report, and public-safe package gate.</p><a href="data/foundation_model_plan.json">scale-up status</a></article>
4657
  </div>
4658
  </div>
4659
  </section>
@@ -4705,7 +4708,7 @@
4705
  <article class="evidence-card">
4706
  <span class="status-pill">current plan</span>
4707
  <h3>Foundation backbones are separated by role</h3>
4708
- <p>Qwen3-Omni stays first for held-out LoRA; Cosmos 3 is the world-model branch with camera-pose proxy forward-dynamics targets ready for trainer work; OpenVLA/openpi/GR00T are policy candidates after robot-compatible action conversion; Xperience-native pretraining is the later full-corpus goal.</p>
4709
  <div class="evidence-links">
4710
  <a href="data/foundation_model_plan.json">foundation model plan</a>
4711
  <a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/FOUNDATION_MODEL_PLAN.md">plan doc</a>
@@ -4714,7 +4717,7 @@
4714
  </article>
4715
  <article class="evidence-card">
4716
  <span class="status-pill">verified diagnostic</span>
4717
- <h3>Qwen3-Omni and Cosmos3 branches</h3>
4718
  <p>The selected 96/16/16 episode split now has a verified Qwen3-Omni v6 package with 4,032 held-out test predictions and 99.90% JSON validity. Cosmos3-Nano has 378 held-out future-window predictions, Cosmos3-Super Reasoner has 448 held-out base-weight JSON-task predictions, and Cosmos3-Super Forward-Dynamics LoRA has 448 held-out loss records.</p>
4719
  <div class="evidence-links">
4720
  <a href="data/omni_model_comparison.json">result comparison</a>
@@ -4891,9 +4894,9 @@
4891
  </div>
4892
  <div class="artifact-grid">
4893
  <article class="artifact primary-artifact"><div><h3>Official dataset</h3><p>Xperience-10M is a gated large-scale egocentric multimodal dataset for embodied AI, robotics, spatial intelligence, and world modeling.</p></div><a href="https://huggingface.co/datasets/ropedia-ai/xperience-10m">official HF dataset</a></article>
4894
- <article class="artifact"><h3>Public sample</h3><p>The current unified 20-task suite is built from one public sample episode, not from the entire gated dataset.</p><a href="https://huggingface.co/datasets/ropedia-ai/xperience-10m-sample">sample dataset</a></article>
4895
  <article class="artifact"><h3>Modalities</h3><p>The sample exposes synchronized video, audio, depth, pose/SLAM, motion capture, inertial signals, calibration, and language annotations.</p><a href="data/modality_atlas.json">modality atlas</a></article>
4896
- <article class="artifact"><h3>Multi-episode pilot</h3><p>The selected 128-episode Qwen3-Omni LoRA v6 diagnostic branch is verified with 4,032 held-out test predictions and 99.90% JSON validity. Action/subtask metrics are still weak, so this remains a baseline for error analysis.</p><a href="https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep">LoRA adapter</a><a href="data/qwen3_v5_v6_comparison.json">v5/v6 comparison</a></article>
4897
  <article class="artifact"><h3>Raw sample browser</h3><p>The Data tab now exposes the official public sample files directly, including playable MP4 video streams and the audio track embedded in fisheye_cam0.mp4.</p><a href="#raw-sample">open raw browser</a><a href="data/raw_sample_files.json">raw manifest</a></article>
4898
  <article class="artifact"><h3>Data boundary</h3><p>Raw MP4, HDF5, RRD files are streamed from the official public sample source when opened here; private gated data and full Qwen weights are not redistributed in this project.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/DATA_NOTICE.md">data notice</a></article>
4899
  <article class="artifact"><h3>Current project subset</h3><p>One public sample episode, 5,821 frames, 1,161 aligned windows, 8,546-dimensional task inputs, plus direct links to the official raw sample files.</p><a href="data/modality_atlas.json">modality atlas</a></article>
@@ -5020,7 +5023,7 @@
5020
  <article class="split-radar-card">
5021
  <h3>128-Episode 20-Task Radar</h3>
5022
  <p>Seven aligned 128-episode methods cover all 20 axes: metadata simple/NN, raw-feature simple/NN, Qwen3-Omni, Cosmos3-Super, and Cosmos3-Nano. Proxy axes stay labeled in the JSON.</p>
5023
- <img src="assets/charts/episode128_task_model_radar.svg?v=xperience10m-split-radar-v1" alt="128-episode 20-task radar comparing raw-feature baselines, metadata baselines, Qwen3-Omni, and Cosmos3 branches with explicit score counts">
5024
  <div class="split-radar-links">
5025
  <a href="assets/charts/episode128_task_model_radar.svg">Open SVG</a>
5026
  <a href="data/episode128_task_model_radar.json">Open JSON</a>
@@ -5124,7 +5127,7 @@
5124
  <article class="result-reading-step">
5125
  <span>02</span>
5126
  <strong>Open the radar</strong>
5127
- <p>Single-episode radar shows Minimal vs Neural MLP. 128 radar shows baseline and model branches.</p>
5128
  </article>
5129
  <article class="result-reading-step">
5130
  <span>03</span>
@@ -5676,7 +5679,7 @@
5676
  <p>Use these files to navigate the whole project, open the published mirrors, or reproduce the public-sample pipeline.</p>
5677
  </div>
5678
  <div class="artifact-grid">
5679
- <article class="artifact primary-artifact"><div><h3>Public reader map</h3><p>Single navigation layer for GitHub, GitHub Pages, HF Space, artifact dataset, baseline model repo, model-branch repos, and public claim boundaries.</p></div><a href="data/public_reader_map.json">reader map</a></article>
5680
  <article class="artifact primary-artifact"><div><h3>Artifact guide</h3><p>Human-readable map from project scope to data contract, task evidence, platform mirrors, and scale-up status.</p></div><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/ARTIFACT_GUIDE.md">artifact guide</a></article>
5681
  <article class="artifact"><h3>Reproduction scripts</h3><p>Training, visualization, taxonomy, walkthrough, validator, and omni-readiness scripts.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/tree/main/scripts">scripts/</a></article>
5682
  <article class="artifact"><h3>Hugging Face Space</h3><p>The dashboard packaged as a public static Space.</p><a href="https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite">HF Space</a></article>
@@ -5696,13 +5699,13 @@
5696
  <p>The multi-episode Qwen3-Omni path is documented, scripted, and verified as a validation-monitored diagnostic held-out pilot. Stronger model-quality metrics require structured-output and error-analysis improvements.</p>
5697
  </div>
5698
  <div class="artifact-grid">
5699
- <article class="artifact primary-artifact"><div><h3>Model-family comparison</h3><p>Compares the three result layers and also groups 1-episode and 128-episode entries by model family: task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.</p></div><a href="data/omni_model_comparison.json">result comparison</a></article>
5700
  <article class="artifact primary-artifact"><div><h3>128-episode source + features</h3><p>Maps every selected official Xperience-10M episode id to its gated source tree and the public-safe processed features: Qwen v6 multiscale windows, dense multiscale rows, and metadata matrices.</p></div><a href="data/xperience10m_128_episode_feature_index.json">source/feature index</a></article>
5701
  <article class="artifact"><h3>128-Episode Task Suite Enhancement Pack</h3><p>No-new-episode plan for denser supervision: `multiscale_20s10_40s20_80s40`, hierarchical action/subtask labels, stronger scoring slices, and raw-feature shard priorities.</p><a href="data/task_suite_enhancement_128.json">task_suite_enhancement_128.json</a></article>
5702
  <article class="artifact"><h3>Foundation-model plan</h3><p>Backbone selection matrix covering Qwen3-Omni, Cosmos 3, GR00T, OpenVLA/openpi, Gemini Robotics, Octo, SmolVLA-style policy candidates, and the future Xperience-native pretraining goal.</p><a href="data/foundation_model_plan.json">foundation model plan</a></article>
5703
  <article class="artifact"><h3>Multi-episode data access</h3><p>Public data-access path, selected 128-episode pilot plan, and preparation requirements.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md">data access</a></article>
5704
  <article class="artifact"><h3>Qwen3-Omni LoRA group</h3><p>Separates the 1-episode sensor-adapter smoke test from Qwen run v1-v6. v6 is the current 20-task matrix row, while v5 remains the pinned prior release.</p><a href="data/qwen3_omni_run_lineage.json">Qwen v1-v6 lineage</a><a href="data/omni_model_comparison.json">Qwen group</a></article>
5705
- <article class="artifact"><h3>Cosmos3 groups</h3><p>Shows the verified Nano future-window compatibility package, the Super base-weight Reasoner JSON-task evaluation, and the Super fine-tuned forward-dynamics LoRA branch with separate loss metrics.</p><a href="data/omni_model_comparison.json">Cosmos groups</a></article>
5706
  <article class="artifact"><h3>Scale-up requirement</h3><p>Future runs need validation tracking, held-out predictions, quality-target reporting, and the same public-safe package gate.</p><a href="data/foundation_model_plan.json">training requirements</a></article>
5707
  <article class="artifact"><h3>Xperience-native pretraining</h3><p>Future plan for a domain-specific embodied foundation model trained from scratch over full-corpus video, audio, geometry, motion, inertial, and language streams.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md">pretraining plan</a></article>
5708
  </div>
@@ -5721,7 +5724,7 @@
5721
  <article class="artifact"><h3>Dataset notes</h3><p>Official dataset links, public sample source, modalities, access boundary, and current project subset.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/XPERIENCE10M_DATASET_CARD_ALIGNMENT.md">dataset notes</a></article>
5722
  <article class="artifact"><h3>Reproducibility</h3><p>Commands and expected outputs for rebuilding the public-sample task suite and visual artifacts.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/REPRODUCIBILITY.md">reproduce</a></article>
5723
  <article class="artifact"><h3>Qwen3-Omni status</h3><p>Data requirements and evaluation boundary for the selected multi-episode LoRA pilot.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/results/omni_finetune/DATA_ACCESS_STATUS.md">training status</a></article>
5724
- <article class="artifact"><h3>Foundation-model plan</h3><p>Qwen3-Omni, Cosmos 3, GR00T, OpenVLA/openpi, Gemini Robotics, Octo, SmolVLA-style branches, and the Xperience-native pretraining goal by role.</p><a href="data/foundation_model_plan.json">model plan</a></article>
5725
  <article class="artifact"><h3>Hub artifacts</h3><p>Derived CSV/JSON/Markdown/figure artifacts without redistributing raw Xperience-10M data.</p><a href="https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts">artifact dataset</a></article>
5726
  <article class="artifact"><h3>Baseline models</h3><p>Lightweight minimal and neural task-head model files for the task contracts.</p><a href="https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines">model repo</a></article>
5727
  </div>
@@ -5733,7 +5736,7 @@
5733
  <section id="omni-scale-up" data-project-tab="resources" role="tabpanel" aria-labelledby="tab-resources" tabindex="-1">
5734
  <div class="wrap">
5735
  <div class="section-head">
5736
- <h2>Qwen3-Omni diagnostic branch is verified.</h2>
5737
  <p>The selected pilot uses 128 source-balanced episodes across 128 different session UUIDs. The latest v6 held-out package is verified, and its weak metrics define the next structured-output and error-analysis pass.</p>
5738
  </div>
5739
  <div class="artifact-grid">
@@ -5741,7 +5744,7 @@
5741
  <article class="artifact"><h3>Transfer</h3><p>Download raw episodes only from official gated sources, exclude visualization.rrd, validate files, then stage them for training.</p></article>
5742
  <article class="artifact"><h3>Current LoRA artifact</h3><p>The current Qwen3-Omni LoRA artifact is the verified v6 selected 128-episode diagnostic adapter. The v5 row remains pinned as the prior release, and the 1-episode Qwen entry is only a sensor-adapter smoke test.</p><a href="data/omni_model_comparison.json">model groups</a></article>
5743
  <article class="artifact"><h3>128-Episode Task Suite Enhancement Pack</h3><p>The next suite push does not need more episodes first: use `multiscale_20s10_40s20_80s40`, hierarchical action/subtask targets, and raw-feature shards while keeping the held-out split fixed.</p><a href="data/task_suite_enhancement_128.json">task_suite_enhancement_128.json</a></article>
5744
- <article class="artifact"><h3>Backbone branches</h3><p>Qwen3-Omni uses a separate LoRA model repo; Cosmos3-Nano remains a compatibility package; Cosmos3-Super now has a verified forward-dynamics LoRA branch with weights in a dedicated model repo.</p><a href="https://huggingface.co/cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep">Cosmos3-Super weights</a></article>
5745
  <article class="artifact"><h3>Native foundation model</h3><p>The long-term goal is a full-corpus Xperience Embodied Foundation Model trained on synchronized perception, geometry, motion, inertial, audio, and language streams after smaller scaling stages validate the approach.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md">pretraining plan</a></article>
5746
  </div>
5747
  </div>
@@ -5758,7 +5761,7 @@
5758
  <article class="artifact"><h3>Reproducibility matrix</h3><p>Machine-readable command matrix covering sample download, baselines, the unified 20-task suite, figures, and validation.</p><a href="data/reproducibility_matrix.json">reproducibility matrix</a></article>
5759
  <article class="artifact"><h3>Exact-match reproduction record</h3><p>The last metric rebuild reproduced the public-sample outputs from a fresh cache and matched the committed metrics.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/notes/reproducibility_audit.md">reproduction audit</a></article>
5760
  <article class="artifact"><h3>Project dashboard</h3><p>The website organizes the dataset sample, tasks, methods, results, directions, and scale-up path in one tabbed reader flow.</p><a href="#artifacts">project materials</a></article>
5761
- <article class="artifact"><h3>Multi-episode pilot status</h3><p>The comparison JSON now supports both the three-version reading and model-family grouping, with Qwen3 v5/v6 detail kept as a separate machine-readable audit.</p><a href="data/omni_model_comparison.json">comparison</a><a href="data/qwen3_v5_v6_comparison.json">Qwen v5/v6</a></article>
5762
  </div>
5763
  <p class="repro-note">Minimal path: install the toolkit dependencies, download the official sample, run the task suite with neural heads, regenerate tasks 13-20, build the unified 20-task index, regenerate visualizations, then rebuild the supporting project reports.</p>
5764
  <pre class="code-panel"><button type="button" data-copy="setup">Copy</button><code id="setup">git clone https://github.com/Ropedia/HOMIE-toolkit.git
@@ -5796,7 +5799,7 @@ python scripts/validate_publication_package.py</code></pre>
5796
 
5797
  <footer>
5798
  <div class="wrap">
5799
- Built as an embodied-AI learning lab with verified held-out diagnostic branches and a next stage focused on stronger action/subtask quality.
5800
  <span class="footer-meta">README translation metadata remains available at <a href="data/language_versions.json">language_versions.json</a>; use the header selector to translate this website in place.</span>
5801
  </div>
5802
  </footer>
@@ -6087,7 +6090,7 @@ python scripts/validate_publication_package.py</code></pre>
6087
  tasks: "Best for task-by-task input, output, and metric cards.",
6088
  pipeline: "Best for understanding how raw episode data becomes features and results.",
6089
  protocol: "Best for splits, leakage controls, metrics, and evaluation rules.",
6090
- architectures: "Best for how task heads and model branches are organized.",
6091
  features: "Best for modality and feature provenance.",
6092
  takeaways: "Best for the fastest read on what the current metrics mean.",
6093
  models: "Best for minimal baseline evidence.",
@@ -6097,7 +6100,7 @@ python scripts/validate_publication_package.py</code></pre>
6097
  diagnostics: "Best for charts and error-analysis evidence.",
6098
  artifacts: "Best for finding files, mirrors, weights, scripts, and checks.",
6099
  evidence: "Best for current experiment status and milestones.",
6100
- "omni-scale-up": "Best for Qwen3-Omni and Cosmos3 model-branch status.",
6101
  run: "Best for reproduction commands."
6102
  };
6103
  const sectionTabMap = Object.fromEntries(tabSections.map((section) => [section.id, section.dataset.projectTab]));
 
1147
  font-weight: 760;
1148
  width: 18%;
1149
  }
1150
+ .qwen-lineage-table td:first-child {
1151
+ width: 8%;
1152
+ min-width: 58px;
1153
+ }
1154
+ .qwen-lineage-table th:nth-child(2),
1155
+ .qwen-lineage-table td:nth-child(2),
1156
+ .qwen-lineage-table th:nth-child(3),
1157
+ .qwen-lineage-table td:nth-child(3) {
1158
+ width: 28%;
1159
+ }
1160
  .line-table a {
1161
  color: var(--cyan);
1162
  font-weight: 760;
 
3923
  <article class="suite-line-card">
3924
  <small>line 2 / 128 selected episodes</small>
3925
  <h3>128 selected episodes: comparison layer</h3>
3926
+ <p>Seven methods share the selected-episode surface and the same 20 task axes.</p>
3927
  <div class="line-claim">
3928
+ <div><span>valid claim</span><p>Same-split method comparison and scale-up planning.</p></div>
3929
  <div><span>do not claim</span><p>Proxy cells as direct raw-target measurements.</p></div>
3930
  </div>
3931
  <div class="suite-line-facts">
 
3954
  </a>
3955
  <a class="hero-path" href="#directions">
3956
  <small>Extend</small>
3957
+ <strong>Plan next training tracks</strong>
3958
  <span>Spatial intelligence, human-video world models, VLA, and scale-up plans.</span>
3959
  </a>
3960
  </div>
 
4054
  <div class="wrap">
4055
  <div class="section-head">
4056
  <h2>Two evidence lines: 1 episode and 128 episodes.</h2>
4057
+ <p>Read the suite as two lines. Line 1 proves the task lab is inspectable and reproducible. Line 2 compares selected-128 metadata/raw baselines, Qwen3-Omni v6 LoRA, Cosmos3-Super Reasoner, and Cosmos3-Nano Future Window. Keep the lines separate when interpreting scores.</p>
4058
  </div>
4059
  <figure class="line-map-figure">
4060
  <img src="assets/charts/two_evidence_line_map.svg?v=two-line-map-v1" alt="Two evidence-line map showing 1 sample episode, 128 selected episodes, and the combined 180 scored method-task records">
 
4131
  </tbody>
4132
  </table>
4133
  <p class="table-note">Cosmos3-Super Forward-Dynamics LoRA is published as a separate fine-tuned adapter with weights/results; it is not counted as a 20-task matrix method row.</p>
4134
+ <table class="line-table qwen-lineage-table" aria-label="Qwen3-Omni run version ladder">
4135
  <thead>
4136
  <tr>
4137
  <th>Qwen run</th>
4138
+ <th>Purpose</th>
4139
+ <th>Main change</th>
4140
+ <th>Eval signal</th>
4141
+ <th>Use now</th>
 
4142
  </tr>
4143
  </thead>
4144
  <tbody>
4145
  <tr>
4146
  <td>v1</td>
4147
+ <td>Prove the selected-128 LoRA/eval/package loop.</td>
4148
+ <td>First verified 96/16/16 selected-episode Qwen3-Omni LoRA run.</td>
4149
+ <td>448 eval; JSON 0.8750; contact 0.6451.</td>
4150
+ <td>Lineage only.</td>
 
4151
  </tr>
4152
  <tr>
4153
  <td>v2</td>
4154
+ <td>Make answers schema-checked.</td>
4155
+ <td>Structured-JSON contract with full-8-GPU LoRA on the same split.</td>
4156
+ <td>448 eval; JSON 0.9978; contact 0.7188.</td>
4157
+ <td>Structured-output ablation.</td>
 
4158
  </tr>
4159
  <tr>
4160
  <td>v3</td>
4161
+ <td>Separate prompt/eval effects from training.</td>
4162
+ <td>Strict-label prompt/eval over the v2 adapter; no new adapter training.</td>
4163
+ <td>448 eval; JSON 1.0000; contact 0.7210.</td>
4164
+ <td>Prompt/eval ablation.</td>
 
4165
  </tr>
4166
  <tr>
4167
  <td>v4</td>
4168
+ <td>Test longer structured-JSON LoRA training.</td>
4169
+ <td>New four-epoch full-8-GPU adapter on the same selected split.</td>
4170
+ <td>448 eval; JSON 1.0000; contact 0.7299.</td>
4171
+ <td>Overfit/metric-tradeoff evidence.</td>
 
4172
  </tr>
4173
  <tr>
4174
  <td>v5</td>
4175
+ <td>Move to denser multiscale evaluation.</td>
4176
+ <td>Multiscale cap96 export with 4,032 held-out predictions.</td>
4177
+ <td>4,032 eval; JSON 1.0000; contact 0.7865.</td>
4178
+ <td>Pinned prior release; stronger on several non-contact metrics.</td>
 
4179
  </tr>
4180
  <tr>
4181
  <td>v6</td>
4182
+ <td>Publish the current Qwen 20-task row.</td>
4183
+ <td>Rank64/lr5e-5 multiscale LoRA plus verified task-specific probes.</td>
4184
+ <td>4,032 eval; JSON 0.9990; contact 0.8177.</td>
 
4185
  <td>Current public 20-task Qwen3-Omni row.</td>
4186
  </tr>
4187
  </tbody>
4188
  </table>
4189
+ <p class="table-note">Qwen v1-v6 are run-lineage labels inside the selected-128 evidence line, not project-level result lines. Use v6 for the public 20-task Qwen3-Omni row; keep v5 as the pinned prior multiscale comparator; read v1-v4 as pipeline-hardening and ablation evidence. Full details: <a href="data/qwen3_omni_run_lineage.json">qwen3_omni_run_lineage.json</a> and <a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/QWEN3_OMNI_RUN_LINEAGE.md">QWEN3_OMNI_RUN_LINEAGE.md</a>.</p>
4190
  <div class="reader-journey" aria-label="Recommended reader journeys">
4191
  <article class="reader-step">
4192
  <small>01 understand</small>
 
4208
  </article>
4209
  <article class="reader-step">
4210
  <small>04 extend</small>
4211
+ <strong>Choose the next model track</strong>
4212
  <p>Use directions and scale-up resources for spatial, world-model, VLA, Qwen3-Omni, and Cosmos3 follow-up work.</p>
4213
  <a href="#directions">Open directions</a>
4214
  </article>
 
4254
  <article class="brief-card">
4255
  <small>results</small>
4256
  <strong>Compare methods cleanly</strong>
4257
+ <p>Single-episode baselines, 128-episode aligned baselines, Qwen3-Omni v6 LoRA, and Cosmos3-Super/Nano diagnostics stay separated by evidence type.</p>
4258
  <div class="reading-links">
4259
  <a href="#takeaways">takeaways</a>
4260
  <a href="data/unified_task_model_radar.json">radar data</a>
 
4288
  <a href="https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts"><strong>HF artifacts</strong><span>Public-safe derived reports, metrics, website JSON, and result packages.</span></a>
4289
  <a href="https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines"><strong>HF baselines</strong><span>Compact baseline weights, figures, metrics, and mirrored task artifacts.</span></a>
4290
  <a href="https://huggingface.co/cy0307/ropedia-xperience-10m-weights-results"><strong>HF weights + results</strong><span>Consolidated baseline weights, adapters, result summaries, analysis, and manifest.</span></a>
4291
+ <a href="https://huggingface.co/collections/cy0307/ropedia-xperience-10m-task-suite"><strong>HF collection</strong><span>Grouped project surfaces, baseline repos, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano repos.</span></a>
4292
  </div>
4293
  <div class="brief-actions">
4294
  <a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/PUBLIC_READER_MAP.md">Open full reader map</a>
 
4338
  </article>
4339
  <article class="brief-card">
4340
  <strong>Scale-up readiness</strong>
4341
+ <p>Connects the same data contract to 128-episode baselines, a no-new-episode enhancement pack, Qwen3-Omni LoRA, Cosmos-style world modeling, policy/VLA tracks, and the later Xperience-native pretraining goal.</p>
4342
  </article>
4343
  </div>
4344
  <div class="brief-actions">
 
4362
  </article>
4363
  <article class="split-radar-card">
4364
  <h3>128-Episode 20-Task Radar</h3>
4365
+ <p>Metadata, raw-feature, Qwen3-Omni, and Cosmos3 methods on the aligned 128-episode surface, with all 140 rows scored and proxy/evidence notes kept explicit.</p>
4366
+ <img src="assets/charts/episode128_task_model_radar.svg?v=xperience10m-split-radar-v1" alt="128-episode 20-task radar comparing raw-feature baselines, metadata baselines, Qwen3-Omni, and Cosmos3 series with explicit score counts">
4367
  <div class="split-radar-links">
4368
  <a href="assets/charts/episode128_task_model_radar.svg">Open SVG</a>
4369
  <a href="data/episode128_task_model_radar.json">Open JSON</a>
 
4473
  <div class="wrap">
4474
  <div class="section-head">
4475
  <h2>Research roadmap.</h2>
4476
+ <p>The project path moves from the current public-sample task lab to the latest verified Qwen3-Omni diagnostic run, same-split 128-episode baseline alignment, a no-new-episode enhancement pack, action/subtask error analysis, robustness runs, world/policy tracks, and the future Xperience Embodied Foundation Model pretraining goal.</p>
4477
  </div>
4478
  <div class="roadmap-grid" aria-label="Research roadmap stages">
4479
  <article class="roadmap-card" data-status="implemented">
 
4495
  </div>
4496
  </article>
4497
  <article class="roadmap-card" data-status="verified_latest_branch">
4498
+ <span class="roadmap-status">verified latest run</span>
4499
  <h3>Qwen3-Omni LoRA Latest Diagnostic Branch</h3>
4500
  <p>Train lightweight adapters on selected prepared episodes and evaluate on held-out episodes with committed predictions, metrics, and run reports.</p>
4501
  <div class="roadmap-meta">
 
4588
  <div class="wrap">
4589
  <div class="section-head">
4590
  <h2>Additional development directions.</h2>
4591
+ <p>Beyond the current task heads, Qwen3-Omni fine-tuning path, Cosmos/world-model track, and future native pretraining goal, Xperience-10M can support three foundation pipeline tracks plus several concrete research-development tracks.</p>
4592
  </div>
4593
  <div class="foundation-pipeline-grid" aria-label="Three high-resolution foundation direction slide diagrams">
4594
  <article class="foundation-pipeline-card">
 
4634
  <article class="artifact"><h3>Multimodal representation learning</h3><p>Train contrastive and masked-prediction encoders over synchronized video, audio, depth, pose, mocap, IMU, and language windows.</p><a href="data/additional_development_directions.json">JSON plan</a></article>
4635
  <article class="artifact"><h3>Skill and procedure graphs</h3><p>Mine action steps, transitions, preconditions, effects, and temporal graphs that connect egocentric perception to planning.</p><a href="data/research_directions.json">current task map</a></article>
4636
  <article class="artifact"><h3>Human-object affordances</h3><p>Add contact, reachable-object, tool-use, and next-affordance tasks using hands, mocap, objects, contacts, video, and language.</p><a href="data/task_walkthroughs.json">task walkthroughs</a></article>
4637
+ <article class="artifact"><h3>3D/4D scene and object memory</h3><p>Fuse depth, pose/SLAM, multiview video, and object cues into persistent scene/object maps for spatial reasoning and object permanence.</p><a href="data/foundation_model_plan.json">model tracks</a></article>
4638
  <article class="artifact"><h3>Quality and sync diagnostics</h3><p>Track timestamp drift, missing streams, calibration consistency, corrupted files, and degraded-mode manifests before large training runs.</p><a href="data/evidence_contract.json">evidence contract</a></article>
4639
  <article class="artifact"><h3>Policy and simulation transfer</h3><p>Convert mocap, hand trajectories, contacts, and object states into action tokens, robot-compatible targets, and imitation-learning examples.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/FOUNDATION_MODEL_PLAN.md">foundation plan</a></article>
4640
  </div>
 
4653
  <article class="artifact"><h3>Metric contract</h3><p>All 20 tasks list input, target, primary metric, baseline score, and source artifact path in the unified suite file.</p><a href="data/task_suite_20.json">task_suite_20.json</a></article>
4654
  <article class="artifact"><h3>Leakage controls</h3><p>Scalers fit on train windows only; future labels, target-side signals, caption/object labels, and contact labels stay on the target side unless explicitly queried.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/scripts/build_evaluation_protocol.py">builder script</a></article>
4655
  <article class="artifact"><h3>Audio ablation</h3><p>Audio and no-audio variants are evaluated across the original task contracts under the same chronological split.</p><a href="data/audio_ablation_summary.json">audio summary</a></article>
4656
+ <article class="artifact"><h3>Foundation track selection</h3><p>Qwen3-Omni is the first trainable baseline, Cosmos 3 is the world-model track with a camera-pose proxy forward-dynamics contract ready for trainer work, policy models wait for robot-compatible action targets, and Xperience-native pretraining remains a later full-corpus goal.</p><a href="data/foundation_model_plan.json">backbone plan</a></article>
4657
+ <article class="artifact"><h3>Next evaluation stage</h3><p>This public-sample run covers single-episode task development. The selected multi-episode Qwen3-Omni final diagnostic result is verified and meets the JSON-validity target; Cosmos3-Nano has a verified future-window compatibility package; and Cosmos3-Super has a verified base-weight JSON-task evaluation plus a fine-tuned forward-dynamics LoRA artifact. The next stage is action/subtask error analysis, stronger model-quality runs, and policy-target conversion.</p><a href="data/omni_model_comparison.json">result comparison</a></article>
4658
  <article class="artifact"><h3>128-Episode Task Suite Enhancement Pack</h3><p>Before adding episodes, the suite should try `multiscale_20s10_40s20_80s40`, hierarchical action/subtask targets, label-normalized scoring, and compact raw-feature shards for unsupported tasks.</p><a href="data/task_suite_enhancement_128.json">task_suite_enhancement_128.json</a></article>
4659
+ <article class="artifact"><h3>Scale-up requirement</h3><p>Future Omni, Cosmos, and policy tracks use the same episode split discipline, training metadata, held-out predictions, metrics, run report, and public-safe package gate.</p><a href="data/foundation_model_plan.json">scale-up status</a></article>
4660
  </div>
4661
  </div>
4662
  </section>
 
4708
  <article class="evidence-card">
4709
  <span class="status-pill">current plan</span>
4710
  <h3>Foundation backbones are separated by role</h3>
4711
+ <p>Qwen3-Omni stays first for held-out LoRA; Cosmos 3 is the world-model track with camera-pose proxy forward-dynamics targets ready for trainer work; OpenVLA/openpi/GR00T are policy candidates after robot-compatible action conversion; Xperience-native pretraining is the later full-corpus goal.</p>
4712
  <div class="evidence-links">
4713
  <a href="data/foundation_model_plan.json">foundation model plan</a>
4714
  <a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/FOUNDATION_MODEL_PLAN.md">plan doc</a>
 
4717
  </article>
4718
  <article class="evidence-card">
4719
  <span class="status-pill">verified diagnostic</span>
4720
+ <h3>Qwen3-Omni and Cosmos3 series</h3>
4721
  <p>The selected 96/16/16 episode split now has a verified Qwen3-Omni v6 package with 4,032 held-out test predictions and 99.90% JSON validity. Cosmos3-Nano has 378 held-out future-window predictions, Cosmos3-Super Reasoner has 448 held-out base-weight JSON-task predictions, and Cosmos3-Super Forward-Dynamics LoRA has 448 held-out loss records.</p>
4722
  <div class="evidence-links">
4723
  <a href="data/omni_model_comparison.json">result comparison</a>
 
4894
  </div>
4895
  <div class="artifact-grid">
4896
  <article class="artifact primary-artifact"><div><h3>Official dataset</h3><p>Xperience-10M is a gated large-scale egocentric multimodal dataset for embodied AI, robotics, spatial intelligence, and world modeling.</p></div><a href="https://huggingface.co/datasets/ropedia-ai/xperience-10m">official HF dataset</a></article>
4897
+ <article class="artifact"><h3>Line 1 public sample</h3><p>The one-episode line builds the inspectable 20-task lab. It is not evidence of multi-episode generalization.</p><a href="https://huggingface.co/datasets/ropedia-ai/xperience-10m-sample">sample dataset</a></article>
4898
  <article class="artifact"><h3>Modalities</h3><p>The sample exposes synchronized video, audio, depth, pose/SLAM, motion capture, inertial signals, calibration, and language annotations.</p><a href="data/modality_atlas.json">modality atlas</a></article>
4899
+ <article class="artifact"><h3>Multi-episode pilot</h3><p>The selected 128-episode Qwen3-Omni LoRA v6 diagnostic run is verified with 4,032 held-out test predictions and 99.90% JSON validity. Action/subtask metrics are still weak, so this remains a baseline for error analysis.</p><a href="https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep">LoRA adapter</a><a href="data/qwen3_v5_v6_comparison.json">v5/v6 comparison</a></article>
4900
  <article class="artifact"><h3>Raw sample browser</h3><p>The Data tab now exposes the official public sample files directly, including playable MP4 video streams and the audio track embedded in fisheye_cam0.mp4.</p><a href="#raw-sample">open raw browser</a><a href="data/raw_sample_files.json">raw manifest</a></article>
4901
  <article class="artifact"><h3>Data boundary</h3><p>Raw MP4, HDF5, RRD files are streamed from the official public sample source when opened here; private gated data and full Qwen weights are not redistributed in this project.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/DATA_NOTICE.md">data notice</a></article>
4902
  <article class="artifact"><h3>Current project subset</h3><p>One public sample episode, 5,821 frames, 1,161 aligned windows, 8,546-dimensional task inputs, plus direct links to the official raw sample files.</p><a href="data/modality_atlas.json">modality atlas</a></article>
 
5023
  <article class="split-radar-card">
5024
  <h3>128-Episode 20-Task Radar</h3>
5025
  <p>Seven aligned 128-episode methods cover all 20 axes: metadata simple/NN, raw-feature simple/NN, Qwen3-Omni, Cosmos3-Super, and Cosmos3-Nano. Proxy axes stay labeled in the JSON.</p>
5026
+ <img src="assets/charts/episode128_task_model_radar.svg?v=xperience10m-split-radar-v1" alt="128-episode 20-task radar comparing raw-feature baselines, metadata baselines, Qwen3-Omni, and Cosmos3 series with explicit score counts">
5027
  <div class="split-radar-links">
5028
  <a href="assets/charts/episode128_task_model_radar.svg">Open SVG</a>
5029
  <a href="data/episode128_task_model_radar.json">Open JSON</a>
 
5127
  <article class="result-reading-step">
5128
  <span>02</span>
5129
  <strong>Open the radar</strong>
5130
+ <p>Single-episode radar shows Minimal vs Neural MLP. The 128-episode radar shows metadata/raw baselines, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano.</p>
5131
  </article>
5132
  <article class="result-reading-step">
5133
  <span>03</span>
 
5679
  <p>Use these files to navigate the whole project, open the published mirrors, or reproduce the public-sample pipeline.</p>
5680
  </div>
5681
  <div class="artifact-grid">
5682
+ <article class="artifact primary-artifact"><div><h3>Public reader map</h3><p>Single navigation layer for GitHub, GitHub Pages, HF Space, artifact dataset, baseline model repo, Qwen3-Omni/Cosmos3 repos, and public claim boundaries.</p></div><a href="data/public_reader_map.json">reader map</a></article>
5683
  <article class="artifact primary-artifact"><div><h3>Artifact guide</h3><p>Human-readable map from project scope to data contract, task evidence, platform mirrors, and scale-up status.</p></div><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/ARTIFACT_GUIDE.md">artifact guide</a></article>
5684
  <article class="artifact"><h3>Reproduction scripts</h3><p>Training, visualization, taxonomy, walkthrough, validator, and omni-readiness scripts.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/tree/main/scripts">scripts/</a></article>
5685
  <article class="artifact"><h3>Hugging Face Space</h3><p>The dashboard packaged as a public static Space.</p><a href="https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite">HF Space</a></article>
 
5699
  <p>The multi-episode Qwen3-Omni path is documented, scripted, and verified as a validation-monitored diagnostic held-out pilot. Stronger model-quality metrics require structured-output and error-analysis improvements.</p>
5700
  </div>
5701
  <div class="artifact-grid">
5702
+ <article class="artifact primary-artifact"><div><h3>Two-line model comparison</h3><p>Groups Line 1 task-head baselines and Line 2 selected-128 methods: metadata/raw baselines, Qwen3-Omni v6 LoRA, Cosmos3-Nano Future Window, and Cosmos3-Super Reasoner.</p></div><a href="data/omni_model_comparison.json">result comparison</a></article>
5703
  <article class="artifact primary-artifact"><div><h3>128-episode source + features</h3><p>Maps every selected official Xperience-10M episode id to its gated source tree and the public-safe processed features: Qwen v6 multiscale windows, dense multiscale rows, and metadata matrices.</p></div><a href="data/xperience10m_128_episode_feature_index.json">source/feature index</a></article>
5704
  <article class="artifact"><h3>128-Episode Task Suite Enhancement Pack</h3><p>No-new-episode plan for denser supervision: `multiscale_20s10_40s20_80s40`, hierarchical action/subtask labels, stronger scoring slices, and raw-feature shard priorities.</p><a href="data/task_suite_enhancement_128.json">task_suite_enhancement_128.json</a></article>
5705
  <article class="artifact"><h3>Foundation-model plan</h3><p>Backbone selection matrix covering Qwen3-Omni, Cosmos 3, GR00T, OpenVLA/openpi, Gemini Robotics, Octo, SmolVLA-style policy candidates, and the future Xperience-native pretraining goal.</p><a href="data/foundation_model_plan.json">foundation model plan</a></article>
5706
  <article class="artifact"><h3>Multi-episode data access</h3><p>Public data-access path, selected 128-episode pilot plan, and preparation requirements.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md">data access</a></article>
5707
  <article class="artifact"><h3>Qwen3-Omni LoRA group</h3><p>Separates the 1-episode sensor-adapter smoke test from Qwen run v1-v6. v6 is the current 20-task matrix row, while v5 remains the pinned prior release.</p><a href="data/qwen3_omni_run_lineage.json">Qwen v1-v6 lineage</a><a href="data/omni_model_comparison.json">Qwen group</a></article>
5708
+ <article class="artifact"><h3>Cosmos3 groups</h3><p>Shows the verified Nano future-window compatibility package, the Super base-weight Reasoner JSON-task evaluation, and the Super fine-tuned forward-dynamics LoRA artifact with separate loss metrics.</p><a href="data/omni_model_comparison.json">Cosmos groups</a></article>
5709
  <article class="artifact"><h3>Scale-up requirement</h3><p>Future runs need validation tracking, held-out predictions, quality-target reporting, and the same public-safe package gate.</p><a href="data/foundation_model_plan.json">training requirements</a></article>
5710
  <article class="artifact"><h3>Xperience-native pretraining</h3><p>Future plan for a domain-specific embodied foundation model trained from scratch over full-corpus video, audio, geometry, motion, inertial, and language streams.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md">pretraining plan</a></article>
5711
  </div>
 
5724
  <article class="artifact"><h3>Dataset notes</h3><p>Official dataset links, public sample source, modalities, access boundary, and current project subset.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/XPERIENCE10M_DATASET_CARD_ALIGNMENT.md">dataset notes</a></article>
5725
  <article class="artifact"><h3>Reproducibility</h3><p>Commands and expected outputs for rebuilding the public-sample task suite and visual artifacts.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/REPRODUCIBILITY.md">reproduce</a></article>
5726
  <article class="artifact"><h3>Qwen3-Omni status</h3><p>Data requirements and evaluation boundary for the selected multi-episode LoRA pilot.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/results/omni_finetune/DATA_ACCESS_STATUS.md">training status</a></article>
5727
+ <article class="artifact"><h3>Foundation-model plan</h3><p>Qwen3-Omni, Cosmos 3, GR00T, OpenVLA/openpi, Gemini Robotics, Octo, SmolVLA-style tracks, and the Xperience-native pretraining goal by role.</p><a href="data/foundation_model_plan.json">model plan</a></article>
5728
  <article class="artifact"><h3>Hub artifacts</h3><p>Derived CSV/JSON/Markdown/figure artifacts without redistributing raw Xperience-10M data.</p><a href="https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts">artifact dataset</a></article>
5729
  <article class="artifact"><h3>Baseline models</h3><p>Lightweight minimal and neural task-head model files for the task contracts.</p><a href="https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines">model repo</a></article>
5730
  </div>
 
5736
  <section id="omni-scale-up" data-project-tab="resources" role="tabpanel" aria-labelledby="tab-resources" tabindex="-1">
5737
  <div class="wrap">
5738
  <div class="section-head">
5739
+ <h2>Qwen3-Omni diagnostic run is verified.</h2>
5740
  <p>The selected pilot uses 128 source-balanced episodes across 128 different session UUIDs. The latest v6 held-out package is verified, and its weak metrics define the next structured-output and error-analysis pass.</p>
5741
  </div>
5742
  <div class="artifact-grid">
 
5744
  <article class="artifact"><h3>Transfer</h3><p>Download raw episodes only from official gated sources, exclude visualization.rrd, validate files, then stage them for training.</p></article>
5745
  <article class="artifact"><h3>Current LoRA artifact</h3><p>The current Qwen3-Omni LoRA artifact is the verified v6 selected 128-episode diagnostic adapter. The v5 row remains pinned as the prior release, and the 1-episode Qwen entry is only a sensor-adapter smoke test.</p><a href="data/omni_model_comparison.json">model groups</a></article>
5746
  <article class="artifact"><h3>128-Episode Task Suite Enhancement Pack</h3><p>The next suite push does not need more episodes first: use `multiscale_20s10_40s20_80s40`, hierarchical action/subtask targets, and raw-feature shards while keeping the held-out split fixed.</p><a href="data/task_suite_enhancement_128.json">task_suite_enhancement_128.json</a></article>
5747
+ <article class="artifact"><h3>Backbone tracks</h3><p>Qwen3-Omni uses a separate LoRA model repo; Cosmos3-Nano remains a compatibility package; Cosmos3-Super now has a verified forward-dynamics LoRA artifact with weights in a dedicated model repo.</p><a href="https://huggingface.co/cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep">Cosmos3-Super weights</a></article>
5748
  <article class="artifact"><h3>Native foundation model</h3><p>The long-term goal is a full-corpus Xperience Embodied Foundation Model trained on synchronized perception, geometry, motion, inertial, audio, and language streams after smaller scaling stages validate the approach.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md">pretraining plan</a></article>
5749
  </div>
5750
  </div>
 
5761
  <article class="artifact"><h3>Reproducibility matrix</h3><p>Machine-readable command matrix covering sample download, baselines, the unified 20-task suite, figures, and validation.</p><a href="data/reproducibility_matrix.json">reproducibility matrix</a></article>
5762
  <article class="artifact"><h3>Exact-match reproduction record</h3><p>The last metric rebuild reproduced the public-sample outputs from a fresh cache and matched the committed metrics.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/notes/reproducibility_audit.md">reproduction audit</a></article>
5763
  <article class="artifact"><h3>Project dashboard</h3><p>The website organizes the dataset sample, tasks, methods, results, directions, and scale-up path in one tabbed reader flow.</p><a href="#artifacts">project materials</a></article>
5764
+ <article class="artifact"><h3>Line 2 model status</h3><p>The comparison JSON groups selected-128 baselines, Qwen3-Omni v6 LoRA, Cosmos3-Nano Future Window, and Cosmos3-Super Reasoner. Qwen v5/v6 detail stays in a separate lineage audit.</p><a href="data/omni_model_comparison.json">comparison</a><a href="data/qwen3_v5_v6_comparison.json">Qwen v5/v6</a></article>
5765
  </div>
5766
  <p class="repro-note">Minimal path: install the toolkit dependencies, download the official sample, run the task suite with neural heads, regenerate tasks 13-20, build the unified 20-task index, regenerate visualizations, then rebuild the supporting project reports.</p>
5767
  <pre class="code-panel"><button type="button" data-copy="setup">Copy</button><code id="setup">git clone https://github.com/Ropedia/HOMIE-toolkit.git
 
5799
 
5800
  <footer>
5801
  <div class="wrap">
5802
+ Built as an embodied-AI learning lab with verified held-out diagnostic runs and a next stage focused on stronger action/subtask quality.
5803
  <span class="footer-meta">README translation metadata remains available at <a href="data/language_versions.json">language_versions.json</a>; use the header selector to translate this website in place.</span>
5804
  </div>
5805
  </footer>
 
6090
  tasks: "Best for task-by-task input, output, and metric cards.",
6091
  pipeline: "Best for understanding how raw episode data becomes features and results.",
6092
  protocol: "Best for splits, leakage controls, metrics, and evaluation rules.",
6093
+ architectures: "Best for how task heads and model tracks are organized.",
6094
  features: "Best for modality and feature provenance.",
6095
  takeaways: "Best for the fastest read on what the current metrics mean.",
6096
  models: "Best for minimal baseline evidence.",
 
6100
  diagnostics: "Best for charts and error-analysis evidence.",
6101
  artifacts: "Best for finding files, mirrors, weights, scripts, and checks.",
6102
  evidence: "Best for current experiment status and milestones.",
6103
+ "omni-scale-up": "Best for Qwen3-Omni and Cosmos3 status.",
6104
  run: "Best for reproduction commands."
6105
  };
6106
  const sectionTabMap = Object.fromEntries(tabSections.map((section) => [section.id, section.dataset.projectTab]));
metrics/additional_development_directions.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "title": "Additional Development Directions",
3
- "summary": "Concrete Xperience-10M project directions beyond the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model branch, and long-term native pretraining goal.",
4
  "status": "planned_research_directions",
5
  "public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
6
  "directions": [
 
1
  {
2
  "title": "Additional Development Directions",
3
+ "summary": "Concrete Xperience-10M project directions beyond the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model track, and long-term native pretraining goal.",
4
  "status": "planned_research_directions",
5
  "public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
6
  "directions": [
metrics/artifact_index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
- "generated_at_utc": "2026-06-21T08:55:13+00:00",
4
  "status": "pass",
5
  "artifact_count": 226,
6
  "missing": [],
@@ -81,8 +81,8 @@
81
  "surface": "website_hf",
82
  "shows": "Machine-readable copy of the current project status for website and HF mirrors.",
83
  "exists": true,
84
- "bytes": 23057,
85
- "sha256": "aa24087a4c80390869cbf771571dd04923f8cf1b5a2f773c70586a4bae10bd48"
86
  },
87
  {
88
  "id": "research_roadmap",
@@ -92,8 +92,8 @@
92
  "surface": "repo_hf",
93
  "shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
94
  "exists": true,
95
- "bytes": 15275,
96
- "sha256": "b7774813c9cddb49181d9589cf07aa9496756c09ddede41c7661a41b6e81a3a0"
97
  },
98
  {
99
  "id": "research_roadmap_json",
@@ -103,8 +103,8 @@
103
  "surface": "website_hf",
104
  "shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
105
  "exists": true,
106
- "bytes": 14133,
107
- "sha256": "5d73996f9bf4c3539beb5d428b21423a583d439fcf439faf8ab17f7364d53d88"
108
  },
109
  {
110
  "id": "foundation_model_plan",
@@ -114,8 +114,8 @@
114
  "surface": "repo_hf",
115
  "shows": "Defines the post-data-gate backbone choices: Qwen3-Omni first, Cosmos 3 for world modeling, and VLA/policy models after action-target conversion.",
116
  "exists": true,
117
- "bytes": 10996,
118
- "sha256": "a78e960ae0f0e815c2e26a69ec3b6071099fa7ccfb6ad860144cd7ee94e77e56"
119
  },
120
  {
121
  "id": "foundation_model_plan_json",
@@ -125,8 +125,8 @@
125
  "surface": "website_hf",
126
  "shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
127
  "exists": true,
128
- "bytes": 13926,
129
- "sha256": "ccc80ed46eb961eb315f3060bdffa4676a05e73ef47ba25b1e5a675e25ce8754"
130
  },
131
  {
132
  "id": "three_foundation_pipelines",
@@ -222,7 +222,7 @@
222
  "path": "OMNI_MODEL_EXTENSION_CONTRACT.md",
223
  "kind": "scaleup_contract",
224
  "surface": "repo_hf",
225
- "shows": "Defines the shared manifest, episode split, held-out evaluation, packaging, and public-safety rules for Qwen3-Omni, Cosmos-style, and VLA/policy model branches.",
226
  "exists": true,
227
  "bytes": 8900,
228
  "sha256": "c4e51d0aa7536045c229418603a67c6b3c5f31c9d756ca7395cb0c9455f0ed6d"
@@ -323,8 +323,8 @@
323
  "surface": "website_hf",
324
  "shows": "Machine-readable enhancement pack for the website and Hugging Face mirrors.",
325
  "exists": true,
326
- "bytes": 20181,
327
- "sha256": "17453f9a949278b1f3038d68124f8f0e2441584d4c1384d482d2ac9ca295e97e"
328
  },
329
  {
330
  "id": "task_suite_enhancement_128_result",
@@ -345,8 +345,8 @@
345
  "surface": "repo_hf",
346
  "shows": "Regenerates the enhancement pack from committed 128-episode windows, baseline summaries, verified Qwen predictions, and Cosmos reference metrics.",
347
  "exists": true,
348
- "bytes": 27210,
349
- "sha256": "0e098d7c1a5c91ec8472d5eb8fc0ebab0305cf647d1f0f4f2ba6bd4c1d531546"
350
  },
351
  {
352
  "id": "xperience10m_128_episode_feature_index",
@@ -510,8 +510,8 @@
510
  "surface": "repo_hf",
511
  "shows": "Records concrete non-backbone Xperience-10M development tracks: taxonomy, benchmark protocol, representation learning, skill graphs, affordances, 3D/4D memory, QA, and policy transfer.",
512
  "exists": true,
513
- "bytes": 3137,
514
- "sha256": "cb4077bcd7e2b33efdd0306c7cb1f28dec547c71739fd52f7233218f90c54941"
515
  },
516
  {
517
  "id": "additional_development_directions_json",
@@ -521,8 +521,8 @@
521
  "surface": "website_hf",
522
  "shows": "Machine-readable additional development directions for the website and Hugging Face mirrors.",
523
  "exists": true,
524
- "bytes": 6121,
525
- "sha256": "4458c5b82062aed8c19bcc914f795ec217114422e9b6d54b624371c4b3c8681f"
526
  },
527
  {
528
  "id": "xperience_embodied_foundation_pretraining",
@@ -610,7 +610,7 @@
610
  "shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
611
  "exists": true,
612
  "bytes": 4432,
613
- "sha256": "7e921c5225389e3481f6133e6c3a1afb7f7f79ea5cdc4638ed3d1bfde48c63cf"
614
  },
615
  {
616
  "id": "source_alignment_validator",
@@ -631,8 +631,8 @@
631
  "surface": "repo_hf",
632
  "shows": "Publishes prepared Space, artifact dataset, and model bundles, including an explicit model-binary upload batch.",
633
  "exists": true,
634
- "bytes": 25097,
635
- "sha256": "7775c5e8767c0dba207fb5fda2d9f0d4a47280d978a1947baa39469fba977a69"
636
  },
637
  {
638
  "id": "github_package_dockerfile",
@@ -728,10 +728,10 @@
728
  "path": "docs/data/unified_task_model_radar.json",
729
  "kind": "website_data",
730
  "surface": "website_hf",
731
- "shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3/Cosmos overlay mappings, branch-card caveats, proxy flags, and source artifacts.",
732
  "exists": true,
733
- "bytes": 228805,
734
- "sha256": "e947ff0579014d5f2c928f689077958d94304a3ac9d978d8475ce5b799e03df8"
735
  },
736
  {
737
  "id": "single_episode_task_model_radar_json",
@@ -741,8 +741,8 @@
741
  "surface": "website_hf",
742
  "shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
743
  "exists": true,
744
- "bytes": 51097,
745
- "sha256": "f4975e8a1d02dd3a168660827fc92257257d18107887abc83ca225950fb283d7"
746
  },
747
  {
748
  "id": "episode128_task_model_radar_json",
@@ -750,10 +750,10 @@
750
  "path": "docs/data/episode128_task_model_radar.json",
751
  "kind": "website_data",
752
  "surface": "website_hf",
753
- "shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines and verified Qwen3/Cosmos branches, now complete at 140/140 scored rows with proxy notes retained.",
754
  "exists": true,
755
- "bytes": 184945,
756
- "sha256": "36d500a4f64614a88ed80af88594289adf06753ea85a2273b9bdaf6cb8ca7f44"
757
  },
758
  {
759
  "id": "task_method_20_result_matrix_json",
@@ -764,7 +764,7 @@
764
  "shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and the current release is complete at 180/180 scored rows.",
765
  "exists": true,
766
  "bytes": 128509,
767
- "sha256": "5b02cd11edffe2e8f6f9d882bf5b895c308b8686cb6672e5df3c88e8e17a9ddd"
768
  },
769
  {
770
  "id": "task_method_20_result_matrix",
@@ -808,7 +808,7 @@
808
  "shows": "Machine-readable check that scored JSON-backed matrix cells match their declared metric source values.",
809
  "exists": true,
810
  "bytes": 561,
811
- "sha256": "a2c684b7c6a60dc5868b796eec41adbc3a1eb37235d357271f3171b20f85c28f"
812
  },
813
  {
814
  "id": "task_method_20_source_audit",
@@ -819,7 +819,7 @@
819
  "shows": "Reader-facing source-value audit for the 180-result matrix.",
820
  "exists": true,
821
  "bytes": 447,
822
- "sha256": "0b1bc352200143957a177947b197cab6ee46602cda338b94bfc81922568ee9ea"
823
  },
824
  {
825
  "id": "two_evidence_line_map_chart",
@@ -838,7 +838,7 @@
838
  "path": "docs/assets/charts/unified_task_model_radar.svg",
839
  "kind": "generated_figure",
840
  "surface": "website_hf",
841
- "shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3/Cosmos task-aligned model overlays.",
842
  "exists": true,
843
  "bytes": 57938,
844
  "sha256": "bb83b80b47fe679ebdce2c99378a4548120f1c8cc2d725b88e409d8c386dcbf8"
@@ -851,8 +851,8 @@
851
  "surface": "website_hf",
852
  "shows": "Separates the one-episode Minimal and Neural MLP 20/20 scored baselines into a clean two-polygon radar.",
853
  "exists": true,
854
- "bytes": 35230,
855
- "sha256": "e5516268336fd7289f38c0b80937d40080f3b1c804e82a409405d60e6eab03b0"
856
  },
857
  {
858
  "id": "episode128_task_model_radar_chart",
@@ -860,10 +860,10 @@
860
  "path": "docs/assets/charts/episode128_task_model_radar.svg",
861
  "kind": "generated_figure",
862
  "surface": "website_hf",
863
- "shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons and metadata/Qwen/Cosmos as task-aligned overlays.",
864
  "exists": true,
865
- "bytes": 51905,
866
- "sha256": "03e78b45fc91bab4c88e54bd0c3dc03afda4d55ef1f96569d7aedb2506d99065"
867
  },
868
  {
869
  "id": "unified_task_model_radar_builder",
@@ -873,8 +873,8 @@
873
  "surface": "repo_hf",
874
  "shows": "Regenerates the direction-aware radar chart and machine-readable metric overlay JSON.",
875
  "exists": true,
876
- "bytes": 68555,
877
- "sha256": "e7cf9d1d3e25117ccd0a2b93c69850d4201cd34bda97bf294fb3b037fc1aa351"
878
  },
879
  {
880
  "id": "task_method_20_gap_audit_builder",
@@ -915,7 +915,7 @@
915
  "path": "results/omni_finetune/model_output_probe_readiness/model_output_probe_readiness.json",
916
  "kind": "scaleup_status",
917
  "surface": "repo_hf",
918
- "shows": "Checks whether Qwen3/Cosmos branches have train, validation, and test prediction files before extending model overlays to all 20 task contracts.",
919
  "exists": true,
920
  "bytes": 4320,
921
  "sha256": "11cff26749bf6ad8b8ee028b18e0b4be5713ed8b5325578caa03be25d894263b"
@@ -928,8 +928,8 @@
928
  "surface": "repo_hf",
929
  "shows": "Audits model-output split availability and writes a readiness report without assigning new numeric task scores.",
930
  "exists": true,
931
- "bytes": 10520,
932
- "sha256": "741ee733068e87c52c8da2bd15987e2b4538b5e705592182d76c42b5cf34fe96"
933
  },
934
  {
935
  "id": "existing_model_output_task_probe",
@@ -937,7 +937,7 @@
937
  "path": "results/omni_finetune/model_output_task_probes_20260616/summary.json",
938
  "kind": "model_result",
939
  "surface": "repo_hf",
940
- "shows": "Scores task-specific Qwen3/Cosmos overlays only where verified held-out prediction JSON or compact target maps already contain the required targets.",
941
  "exists": true,
942
  "bytes": 5951,
943
  "sha256": "910477d2fba648605dda128d0ecd2a2c13cfa460573e350dc850014ac91c6c2b"
@@ -950,8 +950,8 @@
950
  "surface": "repo_hf",
951
  "shows": "Derives task-specific scores from committed verified model outputs without running new inference or backfilling absent targets.",
952
  "exists": true,
953
- "bytes": 69411,
954
- "sha256": "a78889e5225405fcbb0156aa8c63cbbe79022a12283d0b398c5f2bb015175fe7"
955
  },
956
  {
957
  "id": "a100_128_metadata_task_baselines",
@@ -1071,8 +1071,8 @@
1071
  "surface": "repo_hf",
1072
  "shows": "Catalogs public figures, charts, modality thumbnails, dimensions, hashes, roles, and source scripts.",
1073
  "exists": true,
1074
- "bytes": 6983,
1075
- "sha256": "48ea04c063df0745f2a31483d15baa71d420906b2ad7ce15fdb10760f41907e6"
1076
  },
1077
  {
1078
  "id": "figure_index_json",
@@ -1082,8 +1082,8 @@
1082
  "surface": "website_hf",
1083
  "shows": "Machine-readable visual asset index for website and Hugging Face mirrors.",
1084
  "exists": true,
1085
- "bytes": 19441,
1086
- "sha256": "b14b9cb1561db131827a8898fc42629c772eb173108d1fcbf1fbf931389da285"
1087
  },
1088
  {
1089
  "id": "figure_index_builder",
@@ -1093,8 +1093,8 @@
1093
  "surface": "repo_hf",
1094
  "shows": "Regenerates visual-asset hashes, dimensions, and source-script provenance.",
1095
  "exists": true,
1096
- "bytes": 16801,
1097
- "sha256": "b0e060c77a10d509ac471a83a7aa2f0ec9474a5b48fbb56cbc0d62c8ffa6fcd2"
1098
  },
1099
  {
1100
  "id": "brand_assets_json",
@@ -1160,7 +1160,7 @@
1160
  "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
1161
  "exists": true,
1162
  "bytes": 8640,
1163
- "sha256": "1046f75bd4529244290822c67fde137ae1465a3b65e6308b75acb856e4bee191"
1164
  },
1165
  {
1166
  "id": "public_surface_qa",
@@ -1179,10 +1179,10 @@
1179
  "path": "PUBLIC_READER_MAP.md",
1180
  "kind": "project_path",
1181
  "surface": "repo_hf",
1182
- "shows": "Provides the first-pass navigation layer for GitHub, GitHub Pages, Hugging Face mirrors, model-branch repos, evidence layers, and claim boundaries.",
1183
  "exists": true,
1184
- "bytes": 4892,
1185
- "sha256": "d1121455dbd547a5f6111c8ec0edc3380586d824c853dff521ef640d872ae1fb"
1186
  },
1187
  {
1188
  "id": "public_reader_map_json",
@@ -1192,8 +1192,8 @@
1192
  "surface": "website_hf",
1193
  "shows": "Machine-readable public reader map used by the website and Hugging Face mirrors to keep entry points and surface responsibilities explicit.",
1194
  "exists": true,
1195
- "bytes": 5906,
1196
- "sha256": "4a229fc7f084dbaab14bf11e00e5128d0b73dd074fafbbd732b8b0cbf92c01df"
1197
  },
1198
  {
1199
  "id": "public_surface_qa_json",
@@ -1285,7 +1285,7 @@
1285
  "volatile": true,
1286
  "shows": "Records the last live GitHub/HF URL verification after upload.",
1287
  "exists": true,
1288
- "bytes": 184684,
1289
  "hash_policy": "existence_and_size_only"
1290
  },
1291
  {
@@ -1296,8 +1296,8 @@
1296
  "surface": "repo",
1297
  "shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
1298
  "exists": true,
1299
- "bytes": 67647,
1300
- "sha256": "d2b4af98e6fd8b23fd86cd068f2bbf887e5d69686dd62fe3bfc7e8251a6d75d6"
1301
  },
1302
  {
1303
  "id": "reproducibility_contract",
@@ -1329,8 +1329,8 @@
1329
  "surface": "repo_hf",
1330
  "shows": "Generates the selective artifact catalog from local files.",
1331
  "exists": true,
1332
- "bytes": 67519,
1333
- "sha256": "eae86845582f2551782fa7a81837bc3b30d67a050aced1a5f5158644ea0e6512"
1334
  },
1335
  {
1336
  "id": "publication_audit",
@@ -1365,7 +1365,7 @@
1365
  "volatile": true,
1366
  "shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
1367
  "exists": true,
1368
- "bytes": 1413010,
1369
  "hash_policy": "existence_and_size_only"
1370
  },
1371
  {
@@ -1377,7 +1377,7 @@
1377
  "volatile": true,
1378
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
1379
  "exists": true,
1380
- "bytes": 20542,
1381
  "hash_policy": "existence_and_size_only"
1382
  },
1383
  {
@@ -1542,8 +1542,8 @@
1542
  "surface": "website_hf",
1543
  "shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
1544
  "exists": true,
1545
- "bytes": 1899884,
1546
- "sha256": "7bbd5b3c54ef151d598c827f5cb5416566c3106b198e7ad5c4665a03f2566a35"
1547
  },
1548
  {
1549
  "id": "modality_atlas",
@@ -1674,8 +1674,8 @@
1674
  "surface": "repo_hf",
1675
  "shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
1676
  "exists": true,
1677
- "bytes": 16045,
1678
- "sha256": "130578a51a77e2be0230da1288beee3528cff2c7a39830c91f0509682da4b404"
1679
  },
1680
  {
1681
  "id": "omni_model_comparison_json",
@@ -1685,8 +1685,8 @@
1685
  "surface": "repo_hf",
1686
  "shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
1687
  "exists": true,
1688
- "bytes": 82110,
1689
- "sha256": "ebbb0d0d28a1f4a5c7c9f015d772624eddadc0d382e4917c8dbdcc512a5b276d"
1690
  },
1691
  {
1692
  "id": "cosmos3_nano_verified_summary",
 
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
+ "generated_at_utc": "2026-06-21T10:52:12+00:00",
4
  "status": "pass",
5
  "artifact_count": 226,
6
  "missing": [],
 
81
  "surface": "website_hf",
82
  "shows": "Machine-readable copy of the current project status for website and HF mirrors.",
83
  "exists": true,
84
+ "bytes": 23049,
85
+ "sha256": "9a06cc54d3b43362867a2fde9edc61d09f53df2d9ad761ecf95c862c76af31d2"
86
  },
87
  {
88
  "id": "research_roadmap",
 
92
  "surface": "repo_hf",
93
  "shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
94
  "exists": true,
95
+ "bytes": 15272,
96
+ "sha256": "559fa9e818f2c6fc7b926f880e9183200911317e70a26391f1830f4119ebc6b0"
97
  },
98
  {
99
  "id": "research_roadmap_json",
 
103
  "surface": "website_hf",
104
  "shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
105
  "exists": true,
106
+ "bytes": 14129,
107
+ "sha256": "a06d6525d9532b8608bf7be81eb9387deca3159b7c42bf38e107b4096953f351"
108
  },
109
  {
110
  "id": "foundation_model_plan",
 
114
  "surface": "repo_hf",
115
  "shows": "Defines the post-data-gate backbone choices: Qwen3-Omni first, Cosmos 3 for world modeling, and VLA/policy models after action-target conversion.",
116
  "exists": true,
117
+ "bytes": 11003,
118
+ "sha256": "24047e8692f69927d3fabf3c01058278e85651355f3749886493159971120cc6"
119
  },
120
  {
121
  "id": "foundation_model_plan_json",
 
125
  "surface": "website_hf",
126
  "shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
127
  "exists": true,
128
+ "bytes": 13925,
129
+ "sha256": "77d4b2d5918ef1f776de6d29d34d523de95ba58df9526e2b255bed567434f932"
130
  },
131
  {
132
  "id": "three_foundation_pipelines",
 
222
  "path": "OMNI_MODEL_EXTENSION_CONTRACT.md",
223
  "kind": "scaleup_contract",
224
  "surface": "repo_hf",
225
+ "shows": "Defines the shared manifest, episode split, held-out evaluation, packaging, and public-safety rules for Qwen3-Omni, Cosmos3, and VLA/policy model tracks.",
226
  "exists": true,
227
  "bytes": 8900,
228
  "sha256": "c4e51d0aa7536045c229418603a67c6b3c5f31c9d756ca7395cb0c9455f0ed6d"
 
323
  "surface": "website_hf",
324
  "shows": "Machine-readable enhancement pack for the website and Hugging Face mirrors.",
325
  "exists": true,
326
+ "bytes": 20196,
327
+ "sha256": "9e1a3339425981dcf7931bf08684860864598bf679d0df86f93c656bacdb71bf"
328
  },
329
  {
330
  "id": "task_suite_enhancement_128_result",
 
345
  "surface": "repo_hf",
346
  "shows": "Regenerates the enhancement pack from committed 128-episode windows, baseline summaries, verified Qwen predictions, and Cosmos reference metrics.",
347
  "exists": true,
348
+ "bytes": 27225,
349
+ "sha256": "86e6098506b365cc92a9658d347645c285c5f61b5113eeaf1d170df0e2d7cc8f"
350
  },
351
  {
352
  "id": "xperience10m_128_episode_feature_index",
 
510
  "surface": "repo_hf",
511
  "shows": "Records concrete non-backbone Xperience-10M development tracks: taxonomy, benchmark protocol, representation learning, skill graphs, affordances, 3D/4D memory, QA, and policy transfer.",
512
  "exists": true,
513
+ "bytes": 3136,
514
+ "sha256": "decdd359d89694fe10873dcce6cee23e991de1b874ade72643314e879ade784e"
515
  },
516
  {
517
  "id": "additional_development_directions_json",
 
521
  "surface": "website_hf",
522
  "shows": "Machine-readable additional development directions for the website and Hugging Face mirrors.",
523
  "exists": true,
524
+ "bytes": 6120,
525
+ "sha256": "669d1523f767a8eda22bbe96ab54af99e102496a3d27f7dd850e08e2724e661f"
526
  },
527
  {
528
  "id": "xperience_embodied_foundation_pretraining",
 
610
  "shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
611
  "exists": true,
612
  "bytes": 4432,
613
+ "sha256": "db279081759eebb09a4ba53c56fb17a14f3546e13d058100494ac7745b901a1c"
614
  },
615
  {
616
  "id": "source_alignment_validator",
 
631
  "surface": "repo_hf",
632
  "shows": "Publishes prepared Space, artifact dataset, and model bundles, including an explicit model-binary upload batch.",
633
  "exists": true,
634
+ "bytes": 25159,
635
+ "sha256": "a74451a7d717661e1499b98631d825f4db8c6b51b1e9bafd73966697eb04258a"
636
  },
637
  {
638
  "id": "github_package_dockerfile",
 
728
  "path": "docs/data/unified_task_model_radar.json",
729
  "kind": "website_data",
730
  "surface": "website_hf",
731
+ "shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3-Omni/Cosmos3 overlay mappings, method-card caveats, proxy flags, and source artifacts.",
732
  "exists": true,
733
+ "bytes": 228815,
734
+ "sha256": "862376178e8b0d01b536f49a18b7934a373494f8b36080790f616438ec0e035e"
735
  },
736
  {
737
  "id": "single_episode_task_model_radar_json",
 
741
  "surface": "website_hf",
742
  "shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
743
  "exists": true,
744
+ "bytes": 51107,
745
+ "sha256": "5f2ebb41e8488446ea5c5cd2cb75bbedce688433feffe1412288de56b133bd5c"
746
  },
747
  {
748
  "id": "episode128_task_model_radar_json",
 
750
  "path": "docs/data/episode128_task_model_radar.json",
751
  "kind": "website_data",
752
  "surface": "website_hf",
753
+ "shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano, now complete at 140/140 scored rows with proxy notes retained.",
754
  "exists": true,
755
+ "bytes": 184992,
756
+ "sha256": "385704db90443d74903f365e90b27538020f5574c96f296bbf63173f488a645d"
757
  },
758
  {
759
  "id": "task_method_20_result_matrix_json",
 
764
  "shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and the current release is complete at 180/180 scored rows.",
765
  "exists": true,
766
  "bytes": 128509,
767
+ "sha256": "96082daa33771963ac40b7d719df00a76ec443508a3d3101cb6dd82d87965729"
768
  },
769
  {
770
  "id": "task_method_20_result_matrix",
 
808
  "shows": "Machine-readable check that scored JSON-backed matrix cells match their declared metric source values.",
809
  "exists": true,
810
  "bytes": 561,
811
+ "sha256": "cbe9be1ea3d62b253780aade9c51cb7f3a5882df185927186ee6a1d6516ad3a6"
812
  },
813
  {
814
  "id": "task_method_20_source_audit",
 
819
  "shows": "Reader-facing source-value audit for the 180-result matrix.",
820
  "exists": true,
821
  "bytes": 447,
822
+ "sha256": "dfcde22c9350858d0df6d881533f63ba6838fc980b62f0b68770f9b708fcde85"
823
  },
824
  {
825
  "id": "two_evidence_line_map_chart",
 
838
  "path": "docs/assets/charts/unified_task_model_radar.svg",
839
  "kind": "generated_figure",
840
  "surface": "website_hf",
841
+ "shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3-Omni and Cosmos3 task-aligned overlays.",
842
  "exists": true,
843
  "bytes": 57938,
844
  "sha256": "bb83b80b47fe679ebdce2c99378a4548120f1c8cc2d725b88e409d8c386dcbf8"
 
851
  "surface": "website_hf",
852
  "shows": "Separates the one-episode Minimal and Neural MLP 20/20 scored baselines into a clean two-polygon radar.",
853
  "exists": true,
854
+ "bytes": 35232,
855
+ "sha256": "87b52a7dead40358f1778dda43ade4d2e875ac98e507e01ca007084363e5977e"
856
  },
857
  {
858
  "id": "episode128_task_model_radar_chart",
 
860
  "path": "docs/assets/charts/episode128_task_model_radar.svg",
861
  "kind": "generated_figure",
862
  "surface": "website_hf",
863
+ "shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons plus metadata, Qwen3-Omni, Cosmos3-Super, and Cosmos3-Nano task-aligned overlays.",
864
  "exists": true,
865
+ "bytes": 51915,
866
+ "sha256": "047ea4b05a04f6734e2afcf792863559dc8f3091eae88a97ff90e8b038a423f4"
867
  },
868
  {
869
  "id": "unified_task_model_radar_builder",
 
873
  "surface": "repo_hf",
874
  "shows": "Regenerates the direction-aware radar chart and machine-readable metric overlay JSON.",
875
  "exists": true,
876
+ "bytes": 68610,
877
+ "sha256": "96bc2df0de5a9e512d69961ddb13ea87b26ef01f1f943f5a78a6dc373400949d"
878
  },
879
  {
880
  "id": "task_method_20_gap_audit_builder",
 
915
  "path": "results/omni_finetune/model_output_probe_readiness/model_output_probe_readiness.json",
916
  "kind": "scaleup_status",
917
  "surface": "repo_hf",
918
+ "shows": "Checks whether Qwen3-Omni and Cosmos3 runs have train, validation, and test prediction files before extending model overlays to all 20 task contracts.",
919
  "exists": true,
920
  "bytes": 4320,
921
  "sha256": "11cff26749bf6ad8b8ee028b18e0b4be5713ed8b5325578caa03be25d894263b"
 
928
  "surface": "repo_hf",
929
  "shows": "Audits model-output split availability and writes a readiness report without assigning new numeric task scores.",
930
  "exists": true,
931
+ "bytes": 10526,
932
+ "sha256": "2b95834c75b0c90ceefe2c20381b3997a63f283b733186e07dea9e2778c78fad"
933
  },
934
  {
935
  "id": "existing_model_output_task_probe",
 
937
  "path": "results/omni_finetune/model_output_task_probes_20260616/summary.json",
938
  "kind": "model_result",
939
  "surface": "repo_hf",
940
+ "shows": "Scores task-specific Qwen3-Omni and Cosmos3 overlays only where verified held-out prediction JSON or compact target maps already contain the required targets.",
941
  "exists": true,
942
  "bytes": 5951,
943
  "sha256": "910477d2fba648605dda128d0ecd2a2c13cfa460573e350dc850014ac91c6c2b"
 
950
  "surface": "repo_hf",
951
  "shows": "Derives task-specific scores from committed verified model outputs without running new inference or backfilling absent targets.",
952
  "exists": true,
953
+ "bytes": 69423,
954
+ "sha256": "43086745ba53f5a4da1a39b9c223914707ab51b027555c91bea27c0bc152a27f"
955
  },
956
  {
957
  "id": "a100_128_metadata_task_baselines",
 
1071
  "surface": "repo_hf",
1072
  "shows": "Catalogs public figures, charts, modality thumbnails, dimensions, hashes, roles, and source scripts.",
1073
  "exists": true,
1074
+ "bytes": 7014,
1075
+ "sha256": "1087774a85614f12871418bb9fa375b98121596eb11dcdc22d324b943fb9d313"
1076
  },
1077
  {
1078
  "id": "figure_index_json",
 
1082
  "surface": "website_hf",
1083
  "shows": "Machine-readable visual asset index for website and Hugging Face mirrors.",
1084
  "exists": true,
1085
+ "bytes": 19472,
1086
+ "sha256": "e56f76038a56ffc61e882d0201f13912af5cba3e5ade08b1bb912fba0acdcd24"
1087
  },
1088
  {
1089
  "id": "figure_index_builder",
 
1093
  "surface": "repo_hf",
1094
  "shows": "Regenerates visual-asset hashes, dimensions, and source-script provenance.",
1095
  "exists": true,
1096
+ "bytes": 16832,
1097
+ "sha256": "7c526bff01c282d81e4f64bbdb31c059953ea7868b75b0c3104826241280165f"
1098
  },
1099
  {
1100
  "id": "brand_assets_json",
 
1160
  "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
1161
  "exists": true,
1162
  "bytes": 8640,
1163
+ "sha256": "3cb0aca2dca01448cb9bc5cbb519a91bc6397c08a1eaaa84c031e773221e5a0a"
1164
  },
1165
  {
1166
  "id": "public_surface_qa",
 
1179
  "path": "PUBLIC_READER_MAP.md",
1180
  "kind": "project_path",
1181
  "surface": "repo_hf",
1182
+ "shows": "Provides the first-pass navigation layer for GitHub, GitHub Pages, Hugging Face mirrors, Qwen3-Omni/Cosmos3 repos, evidence lines, and claim boundaries.",
1183
  "exists": true,
1184
+ "bytes": 4948,
1185
+ "sha256": "7a7128fdde08f770338c3fe2d473565918c5633f948dec6a78a6b2a67938e91a"
1186
  },
1187
  {
1188
  "id": "public_reader_map_json",
 
1192
  "surface": "website_hf",
1193
  "shows": "Machine-readable public reader map used by the website and Hugging Face mirrors to keep entry points and surface responsibilities explicit.",
1194
  "exists": true,
1195
+ "bytes": 5971,
1196
+ "sha256": "3474f84ffa53aefabdbf8a75c466c271675162ce0f8a23ea3b6660951048072f"
1197
  },
1198
  {
1199
  "id": "public_surface_qa_json",
 
1285
  "volatile": true,
1286
  "shows": "Records the last live GitHub/HF URL verification after upload.",
1287
  "exists": true,
1288
+ "bytes": 184689,
1289
  "hash_policy": "existence_and_size_only"
1290
  },
1291
  {
 
1296
  "surface": "repo",
1297
  "shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
1298
  "exists": true,
1299
+ "bytes": 67652,
1300
+ "sha256": "47c6e5e0d93a881db045842ef98656d04c74cf7605f33a56b8d4daecf97fb547"
1301
  },
1302
  {
1303
  "id": "reproducibility_contract",
 
1329
  "surface": "repo_hf",
1330
  "shows": "Generates the selective artifact catalog from local files.",
1331
  "exists": true,
1332
+ "bytes": 67587,
1333
+ "sha256": "28a93ec92c91886388f5d42ab8e25af0b218e4644b733bc8f8230bc0f91aab65"
1334
  },
1335
  {
1336
  "id": "publication_audit",
 
1365
  "volatile": true,
1366
  "shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
1367
  "exists": true,
1368
+ "bytes": 1418066,
1369
  "hash_policy": "existence_and_size_only"
1370
  },
1371
  {
 
1377
  "volatile": true,
1378
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
1379
  "exists": true,
1380
+ "bytes": 20657,
1381
  "hash_policy": "existence_and_size_only"
1382
  },
1383
  {
 
1542
  "surface": "website_hf",
1543
  "shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
1544
  "exists": true,
1545
+ "bytes": 1903454,
1546
+ "sha256": "6667eb856cf61ada9f868807b5d5c6ccde06e4f791b2f9dd567d98b71b307415"
1547
  },
1548
  {
1549
  "id": "modality_atlas",
 
1674
  "surface": "repo_hf",
1675
  "shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
1676
  "exists": true,
1677
+ "bytes": 15983,
1678
+ "sha256": "4db248566972e811aac6ca06582f233414821624f00f9d4fc4a1b66b2e00401f"
1679
  },
1680
  {
1681
  "id": "omni_model_comparison_json",
 
1685
  "surface": "repo_hf",
1686
  "shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
1687
  "exists": true,
1688
+ "bytes": 82088,
1689
+ "sha256": "82ccc2932cad63a9ebad85da53e694b18ef626aa3720bda3ed5da30f3dc5e121"
1690
  },
1691
  {
1692
  "id": "cosmos3_nano_verified_summary",
metrics/episode128_task_model_radar.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "title": "128-Episode 20-Task Radar",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-21T08:37:32+00:00",
5
- "description": "Selected 128-episode metadata/raw baselines plus verified Qwen3/Cosmos branches. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
6
  "task_count": 20,
7
  "method_count": 7,
8
  "method_task_record_count": 140,
@@ -12,7 +12,7 @@
12
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
13
  "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
14
  "result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
15
- "foundation_model_overlay": "Qwen3/Cosmos points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
16
  "metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
17
  "raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
18
  },
 
1
  {
2
  "title": "128-Episode 20-Task Radar",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-21T10:47:17+00:00",
5
+ "description": "Selected 128-episode metadata/raw baselines plus verified Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano diagnostics. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
6
  "task_count": 20,
7
  "method_count": 7,
8
  "method_task_record_count": 140,
 
12
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
13
  "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
14
  "result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
15
+ "foundation_model_overlay": "Qwen3-Omni and Cosmos3 points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
16
  "metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
17
  "raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
18
  },
metrics/figure_index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Figure Index",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-18T18:18:13+00:00",
5
  "scope": "Public figures, diagrams, charts, and derived modality thumbnails. Raw Xperience-10M videos, annotations, RRD files, and Qwen weights are excluded.",
6
  "figure_count": 29,
7
  "figures": [
@@ -64,12 +64,12 @@
64
  "source_script": "scripts/render_task_suite_infographic.py",
65
  "surface": "README, website, HF Space, artifact dataset, model card",
66
  "exists": true,
67
- "bytes": 1591194,
68
- "sha256": "95ab73e01cfba86538b63247869fae4091934ddedf9e22523ab4cead9c59086d",
69
  "dimensions": {
70
  "format": "PNG",
71
  "width": 1800,
72
- "height": 6600
73
  },
74
  "source_script_exists": true
75
  },
@@ -81,8 +81,8 @@
81
  "source_script": "scripts/generate_visualizations.py",
82
  "surface": "README, website, HF artifact dataset",
83
  "exists": true,
84
- "bytes": 704575,
85
- "sha256": "c90723cc4b1bf5490269af2df594849030ae8d4cc8176e1d1eab96fabf9412f9",
86
  "dimensions": {
87
  "format": "PNG",
88
  "width": 1800,
@@ -149,8 +149,8 @@
149
  "source_script": "scripts/render_foundation_pipeline_diagrams.py",
150
  "surface": "README, website, HF Space, artifact dataset, model card",
151
  "exists": true,
152
- "bytes": 1553916,
153
- "sha256": "6d502580c9f11b170036843690dff0ef99e146890d9914046b5d4b165bd1f89b",
154
  "dimensions": {
155
  "format": "PNG",
156
  "width": 2560,
@@ -166,8 +166,8 @@
166
  "source_script": "scripts/render_overview_figures.py",
167
  "surface": "README, website, HF artifact dataset, model card",
168
  "exists": true,
169
- "bytes": 774391,
170
- "sha256": "f08b03bc21e194efe382347d74cf89cd6ac65dede51889971dbfc2fb9d1de3c2",
171
  "dimensions": {
172
  "format": "PNG",
173
  "width": 1800,
@@ -356,8 +356,8 @@
356
  "source_script": "scripts/generate_visualizations.py",
357
  "surface": "website directions",
358
  "exists": true,
359
- "bytes": 5078,
360
- "sha256": "fd4ba0a9d6d525bdfa8677c66e1a751efc83936dc032ce229bfca1ea106acb40",
361
  "dimensions": {
362
  "format": "SVG",
363
  "width": 1180,
@@ -410,8 +410,8 @@
410
  "source_script": "scripts/build_unified_task_model_radar.py",
411
  "surface": "website unified task section, README, HF mirrors",
412
  "exists": true,
413
- "bytes": 54276,
414
- "sha256": "66b3f285ecb9a3bf7d1125495fc3b6d4400edacb59700e06e9b504c9767d434e",
415
  "dimensions": {
416
  "format": "SVG",
417
  "width": 2400,
@@ -428,8 +428,8 @@
428
  "source_script": "scripts/build_unified_task_model_radar.py",
429
  "surface": "website unified task section, README, HF mirrors",
430
  "exists": true,
431
- "bytes": 35229,
432
- "sha256": "eae52facf93c6c674a82178e4ec1592f9d77fd5d5fcf8b11845deff7dbca3f6c",
433
  "dimensions": {
434
  "format": "SVG",
435
  "width": 2400,
@@ -442,12 +442,12 @@
442
  "id": "episode128_task_model_radar",
443
  "title": "128-episode 20-task model radar",
444
  "path": "docs/assets/charts/episode128_task_model_radar.svg",
445
- "role": "Twenty-axis split radar for selected 128-episode methods: raw-feature simple/NN as complete scored polygons and metadata/Qwen/Cosmos as task-aligned overlays.",
446
  "source_script": "scripts/build_unified_task_model_radar.py",
447
  "surface": "website unified task section, README, HF mirrors",
448
  "exists": true,
449
- "bytes": 48263,
450
- "sha256": "9538bfb512f16bbd280151923adf8a23377bfaed2a8be5961a25eaf0a11d1404",
451
  "dimensions": {
452
  "format": "SVG",
453
  "width": 2400,
 
1
  {
2
  "title": "Ropedia Xperience-10M Figure Index",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-21T10:52:12+00:00",
5
  "scope": "Public figures, diagrams, charts, and derived modality thumbnails. Raw Xperience-10M videos, annotations, RRD files, and Qwen weights are excluded.",
6
  "figure_count": 29,
7
  "figures": [
 
64
  "source_script": "scripts/render_task_suite_infographic.py",
65
  "surface": "README, website, HF Space, artifact dataset, model card",
66
  "exists": true,
67
+ "bytes": 1903454,
68
+ "sha256": "6667eb856cf61ada9f868807b5d5c6ccde06e4f791b2f9dd567d98b71b307415",
69
  "dimensions": {
70
  "format": "PNG",
71
  "width": 1800,
72
+ "height": 7600
73
  },
74
  "source_script_exists": true
75
  },
 
81
  "source_script": "scripts/generate_visualizations.py",
82
  "surface": "README, website, HF artifact dataset",
83
  "exists": true,
84
+ "bytes": 711222,
85
+ "sha256": "4db6a6353d3f1e49bae12447e1a78a874aa780d60e9817f3052ac0d0acf2f7b2",
86
  "dimensions": {
87
  "format": "PNG",
88
  "width": 1800,
 
149
  "source_script": "scripts/render_foundation_pipeline_diagrams.py",
150
  "surface": "README, website, HF Space, artifact dataset, model card",
151
  "exists": true,
152
+ "bytes": 1853350,
153
+ "sha256": "e8d863cc5104602e464048b4bf48f9acf3a108495298d9ec15b2e9cf346f41f9",
154
  "dimensions": {
155
  "format": "PNG",
156
  "width": 2560,
 
166
  "source_script": "scripts/render_overview_figures.py",
167
  "surface": "README, website, HF artifact dataset, model card",
168
  "exists": true,
169
+ "bytes": 757827,
170
+ "sha256": "d83b75a6778033a716f1086dbe61298662d4b8f80cb8f52193d2cbdb1e8e31f7",
171
  "dimensions": {
172
  "format": "PNG",
173
  "width": 1800,
 
356
  "source_script": "scripts/generate_visualizations.py",
357
  "surface": "website directions",
358
  "exists": true,
359
+ "bytes": 5352,
360
+ "sha256": "506e12aa1b6c4fd50fb0c65714c7f0a92c02c40069cb879503471ba9b63d4afb",
361
  "dimensions": {
362
  "format": "SVG",
363
  "width": 1180,
 
410
  "source_script": "scripts/build_unified_task_model_radar.py",
411
  "surface": "website unified task section, README, HF mirrors",
412
  "exists": true,
413
+ "bytes": 57938,
414
+ "sha256": "bb83b80b47fe679ebdce2c99378a4548120f1c8cc2d725b88e409d8c386dcbf8",
415
  "dimensions": {
416
  "format": "SVG",
417
  "width": 2400,
 
428
  "source_script": "scripts/build_unified_task_model_radar.py",
429
  "surface": "website unified task section, README, HF mirrors",
430
  "exists": true,
431
+ "bytes": 35232,
432
+ "sha256": "87b52a7dead40358f1778dda43ade4d2e875ac98e507e01ca007084363e5977e",
433
  "dimensions": {
434
  "format": "SVG",
435
  "width": 2400,
 
442
  "id": "episode128_task_model_radar",
443
  "title": "128-episode 20-task model radar",
444
  "path": "docs/assets/charts/episode128_task_model_radar.svg",
445
+ "role": "Twenty-axis split radar for selected 128-episode methods: raw-feature simple/NN as complete scored polygons plus metadata, Qwen3-Omni, Cosmos3-Super, and Cosmos3-Nano task-aligned overlays.",
446
  "source_script": "scripts/build_unified_task_model_radar.py",
447
  "surface": "website unified task section, README, HF mirrors",
448
  "exists": true,
449
+ "bytes": 51915,
450
+ "sha256": "047ea4b05a04f6734e2afcf792863559dc8f3091eae88a97ff90e8b038a423f4",
451
  "dimensions": {
452
  "format": "SVG",
453
  "width": 2400,
metrics/foundation_model_plan.json CHANGED
@@ -230,7 +230,7 @@
230
  },
231
  {
232
  "step": 4,
233
- "name": "World-model branch",
234
  "action": "Promote Cosmos 3 beyond the current Nano compatibility and Super forward-dynamics runs only when loss metrics, preprocessing, and storage justify the added compute."
235
  },
236
  {
 
230
  },
231
  {
232
  "step": 4,
233
+ "name": "World-model track",
234
  "action": "Promote Cosmos 3 beyond the current Nano compatibility and Super forward-dynamics runs only when loss metrics, preprocessing, and storage justify the added compute."
235
  },
236
  {
metrics/live_publication_status.json CHANGED
@@ -2,7 +2,7 @@
2
  "title": "Ropedia Xperience-10M Live Publication Status",
3
  "status": "pass",
4
  "checked_at_utc": "2026-06-20T21:56:07+00:00",
5
- "scope": "Live GitHub Pages, GitHub raw, Hugging Face Space, artifact dataset, baseline model mirrors, and the Qwen3/Cosmos3 LoRA adapter repos when their upload packages exist locally.",
6
  "hash_groups": [
7
  {
8
  "id": "task_suite_infographic",
 
2
  "title": "Ropedia Xperience-10M Live Publication Status",
3
  "status": "pass",
4
  "checked_at_utc": "2026-06-20T21:56:07+00:00",
5
+ "scope": "Live GitHub Pages, GitHub raw, Hugging Face Space, artifact dataset, baseline model mirrors, and the Qwen3-Omni/Cosmos3 LoRA adapter repos when their upload packages exist locally.",
6
  "hash_groups": [
7
  {
8
  "id": "task_suite_infographic",
metrics/mirror_parity.json CHANGED
The diff for this file is too large to render. See raw diff
 
metrics/omni_finetune_verified_result.json CHANGED
@@ -91,6 +91,6 @@
91
  "Use results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before deciding whether v6 should become a formal release tag.",
92
  "Use the v6 predictions for action/contact error analysis, and compare v5 for subtask, next-action, and object regressions.",
93
  "Keep full-parameter Qwen runs as feasibility gates until there is a storage plan for checkpoints or mergeable full-weight deltas.",
94
- "Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model branch: it updates adapter weights over camera-pose proxy future-vision-velocity targets, not Qwen-style JSON action labels."
95
  ]
96
  }
 
91
  "Use results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before deciding whether v6 should become a formal release tag.",
92
  "Use the v6 predictions for action/contact error analysis, and compare v5 for subtask, next-action, and object regressions.",
93
  "Keep full-parameter Qwen runs as feasibility gates until there is a storage plan for checkpoints or mergeable full-weight deltas.",
94
+ "Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model artifact: it updates adapter weights over camera-pose proxy future-vision-velocity targets, not Qwen-style JSON action labels."
95
  ]
96
  }
metrics/omni_model_comparison.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
3
- "generated_at_utc": "2026-06-20T21:27:21+00:00",
4
  "status": "pass",
5
  "version_count": 3,
6
  "model_group_count": 5,
@@ -8,7 +8,7 @@
8
  "version_reading_notes": [
9
  "Version 1 is the public-sample 20-task surface: original core heads, tasks 13-20, and the 180-row method-task matrix.",
10
  "Version 2 is the selected 128-episode same-split simple/NN baseline alignment.",
11
- "Version 3 is the verified model-branch layer: the current final Qwen3-Omni LoRA package is the JSON-task diagnostic result, Cosmos3-Nano is a future-window compatibility result, Cosmos3-Super Reasoner is a base-weight JSON-task evaluation, and Cosmos3-Super Forward-Dynamics LoRA is the first Super fine-tuned adapter branch."
12
  ],
13
  "versions": [
14
  {
@@ -305,7 +305,7 @@
305
  "neural_primary_score": null
306
  }
307
  ],
308
- "interpretation": "This layer aligns the previous simple and neural baseline framing to the same selected 96/16/16 split used by the model branches. It uses public-safe JSONL metadata/text features, so raw-feature-only tasks remain explicitly unsupported until 128-run sensor feature blocks exist."
309
  },
310
  {
311
  "id": "v3_multi_episode_foundation_model_branches",
@@ -870,7 +870,7 @@
870
  "neural_supported_task_count": 6
871
  },
872
  "weights": "metadata/text baseline artifacts; raw 128 sensor-feature model weights not yet complete",
873
- "interpretation": "Same selected 96/16/16 split and task ids as the model branches, but metadata/text features only."
874
  }
875
  ],
876
  "comparison_note": "This is the cleanest 1-episode versus 128-episode grouping for the same simple/NN task-head family, but the feature surface changes from raw public-sample features to public-safe 128-episode metadata/text features."
@@ -1683,7 +1683,7 @@
1683
  "weights_repository": "none for this run: staged base nv-community/Cosmos3-Super weights were evaluated through vLLM; create a separate repo only after new adapter or fine-tuned weights exist"
1684
  }
1685
  ],
1686
- "comparison_note": "Cosmos3-Super is now represented by a verified 448-window held-out Reasoner evaluation on the same JSON task as Qwen3. It uses staged base weights through vLLM, so it is a model-branch diagnostic, not a weight release. A camera-pose proxy forward-dynamics target export now passes the contract audit and schema-only packer smoke; the separate Forward-Dynamics LoRA group records the trainable adapter run and loss-based held-out evaluation."
1687
  },
1688
  {
1689
  "id": "cosmos3_super_forward_dynamics",
 
1
  {
2
  "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
3
+ "generated_at_utc": "2026-06-21T10:47:04+00:00",
4
  "status": "pass",
5
  "version_count": 3,
6
  "model_group_count": 5,
 
8
  "version_reading_notes": [
9
  "Version 1 is the public-sample 20-task surface: original core heads, tasks 13-20, and the 180-row method-task matrix.",
10
  "Version 2 is the selected 128-episode same-split simple/NN baseline alignment.",
11
+ "The selected-128 model-diagnostic group contains the current Qwen3-Omni LoRA JSON-task row, Cosmos3-Nano future-window compatibility result, Cosmos3-Super Reasoner base-weight JSON-task evaluation, and the separate Cosmos3-Super Forward-Dynamics LoRA adapter artifact."
12
  ],
13
  "versions": [
14
  {
 
305
  "neural_primary_score": null
306
  }
307
  ],
308
+ "interpretation": "This layer aligns the previous simple and neural baseline framing to the same selected 96/16/16 split used by the Qwen3-Omni and Cosmos3 diagnostics. It uses public-safe JSONL metadata/text features, so raw-feature-only tasks remain explicitly unsupported until 128-run sensor feature blocks exist."
309
  },
310
  {
311
  "id": "v3_multi_episode_foundation_model_branches",
 
870
  "neural_supported_task_count": 6
871
  },
872
  "weights": "metadata/text baseline artifacts; raw 128 sensor-feature model weights not yet complete",
873
+ "interpretation": "Same selected 96/16/16 split and task ids as the Qwen3-Omni and Cosmos3 diagnostics, but metadata/text features only."
874
  }
875
  ],
876
  "comparison_note": "This is the cleanest 1-episode versus 128-episode grouping for the same simple/NN task-head family, but the feature surface changes from raw public-sample features to public-safe 128-episode metadata/text features."
 
1683
  "weights_repository": "none for this run: staged base nv-community/Cosmos3-Super weights were evaluated through vLLM; create a separate repo only after new adapter or fine-tuned weights exist"
1684
  }
1685
  ],
1686
+ "comparison_note": "Cosmos3-Super is now represented by a verified 448-window held-out Reasoner evaluation on the same JSON task as Qwen3. It uses staged base weights through vLLM, so it is a Cosmos3 diagnostic, not a weight release. A camera-pose proxy forward-dynamics target export now passes the contract audit and schema-only packer smoke; the separate Forward-Dynamics LoRA group records the trainable adapter run and loss-based held-out evaluation."
1687
  },
1688
  {
1689
  "id": "cosmos3_super_forward_dynamics",
metrics/project_status.json CHANGED
@@ -2,7 +2,7 @@
2
  "title": "Ropedia Xperience-10M Task Suite Project Status",
3
  "version": "2026-06-20",
4
  "decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v6_cosmos_comparison",
5
- "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3 branch packages as early cross-episode diagnostics, and now records a no-new-episode enhancement pack for pushing the current 128-episode suite harder.",
6
  "scope_boundary": {
7
  "validated_episode_count": 1,
8
  "aligned_frames": 5821,
@@ -145,7 +145,7 @@
145
  "RESEARCH_ROADMAP.md",
146
  "docs/data/research_roadmap.json"
147
  ],
148
- "readout": "The roadmap connects public-sample task development to the final verified Qwen3-Omni diagnostic result, same-split baseline alignment, the no-new-episode 128-suite enhancement pack, action/subtask error analysis, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
149
  },
150
  {
151
  "area": "128-episode task-suite enhancement pack",
@@ -156,7 +156,7 @@
156
  "results/omni_finetune/task_suite_enhancement_128_v1_20260608/enhancement_plan.json",
157
  "scripts/omni/build_task_suite_enhancement_128.py"
158
  ],
159
- "readout": "The current 3,808-window selected split can be stressed without more episodes by exporting denser and multiscale windows. The recommended next export is multiscale_20s10_40s20_80s40, estimated at 106,095 windows from observed frame spans; the pack also defines hierarchical action/subtask targets, raw-feature shard priorities for unsupported tasks, and Qwen/Cosmos follow-up run cards."
160
  },
161
  {
162
  "area": "Foundation-model plan",
@@ -176,7 +176,7 @@
176
  "scripts/omni/backbone_registry.py",
177
  "scripts/omni/smoke_test_backbone_packaging.py"
178
  ],
179
- "readout": "Future Qwen, Cosmos-style, and VLA/policy branches must keep the same episode split discipline, held-out metrics, validation gate, public-safe package contract, and explicit forbidden-artifact policy before reporting results."
180
  },
181
  {
182
  "area": "Xperience Embodied Foundation Model",
@@ -253,7 +253,7 @@
253
  "results/omni_finetune/OMNI_MODEL_COMPARISON.md",
254
  "scripts/omni/build_omni_model_comparison.py"
255
  ],
256
- "readout": "The public comparison now has two views: the three result layers and a model-family grouping. The model grouping pairs 1-episode and 128-episode entries for task-head baselines, separates Qwen3-Omni sensor-adapter smoke from 128-episode LoRA diagnostics, separates Cosmos3-Nano future-window compatibility from Cosmos3-Super base-weight Reasoner evaluation, and adds Cosmos3-Super Forward-Dynamics LoRA as a loss-based fine-tuned adapter branch."
257
  },
258
  {
259
  "area": "Qwen3-Omni fine-tuning",
@@ -271,7 +271,7 @@
271
  "readout": "The selected 96/16/16 episode split now has a current v6 rank64/lr5e-5 public-safe held-out package with 34,269 exported windows, 4,032 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.90%, meeting the 98% target; transition accuracy is 98.98%, contact accuracy is 81.77%, object micro-F1 is 30.65%, next-action accuracy is 4.31%, and action/subtask metrics remain weak. v6 improves action macro-F1 and contact accuracy versus v5, but v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics."
272
  },
273
  {
274
- "area": "Cosmos3-Nano future-window branch",
275
  "status": "verified_compatibility_result",
276
  "evidence": [
277
  "configs/omni_backbones/cosmos_world_model.json",
@@ -279,10 +279,10 @@
279
  "scripts/omni/eval_cosmos3_future_window_retrieval.py",
280
  "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json"
281
  ],
282
- "readout": "The Cosmos3-Nano branch now has a public-safe verified future-window compatibility package with 3,213 future-window samples, 378 held-out test predictions, future retrieval MRR 0.0221, temporal consistency 0.0952, transition accuracy 0.9683, and contact accuracy 0.7434. It is a compatibility adapter result, not a full Cosmos diffusion-weight fine-tune."
283
  },
284
  {
285
- "area": "Cosmos3-Super Reasoner branch",
286
  "status": "verified_base_weight_result",
287
  "evidence": [
288
  "configs/omni_backbones/cosmos3_super_reasoner.json",
@@ -314,7 +314,7 @@
314
  "results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/verified_result_summary.json",
315
  "results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/package_audit.json"
316
  ],
317
- "readout": "The first fine-tuned Cosmos3-Super adapter branch is verified as a public-safe package: 8-GPU FSDP LoRA, 26.2M adapter parameters, 2,848 train rows, 512 validation rows, 448 held-out test rows, validation MSE 4.0082, and test MSE 3.6853. The package excludes adapter safetensors; weights are published separately at cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep."
318
  },
319
  {
320
  "area": "Raw Xperience-10M redistribution",
@@ -331,8 +331,8 @@
331
  "Open docs/data/project_packet.json for the machine-readable project path.",
332
  "Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
333
  "Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
334
- "Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone branch.",
335
- "Inspect OMNI_MODEL_EXTENSION_CONTRACT.md and run python scripts/omni/backbone_registry.py --validate --json before adding a new Qwen, Cosmos-style, or VLA/policy branch.",
336
  "Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
337
  "Inspect TASK_SUITE_20.md, docs/data/task_suite_20.json, docs/data/summary_metrics.json, and results/episode_task_suite/neural_mlp/ to check the unified 20-task outputs.",
338
  "Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
@@ -346,16 +346,16 @@
346
  "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
347
  ],
348
  "current_reading_notes": [
349
- "The latest Qwen3-Omni v6 diagnostic branch is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak: JSON validity is 99.90%, action macro-F1 is 0.0029, and subtask accuracy is 0.0037. v5 remains the pinned prior release row because it is still stronger on several metrics.",
350
  "Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
351
- "Use docs/data/omni_model_comparison.json to compare both views: the single-episode/128-baseline/model-branch result layers and the model-family grouping for task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.",
352
  "The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
353
- "The Cosmos3-Nano future-window branch is verified as a compatibility adapter result, Cosmos3-Super Reasoner is verified as a base-weight evaluation, and Cosmos3-Super Forward-Dynamics LoRA is verified as the first fine-tuned Super adapter branch. Cosmos3-Super adapter weights belong in cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep; verified_public packages exclude safetensors.",
354
  "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
355
  "Audio is one of the synchronized source modalities in the current task representation.",
356
  "The audio ablation report compares audio/no-audio variants across the original task contracts in results/audio_ablation/.",
357
- "Foundation-model selection is explicit: Qwen3-Omni is the structured JSON baseline, Cosmos 3 is the world-model branch with Nano compatibility and Super forward-dynamics LoRA results, and policy models such as OpenVLA/openpi/GR00T wait for robot-compatible action-target conversion.",
358
- "Future model branches should be added through the backbone registry and verified package contract, not as one-off result folders with incompatible metrics or publication rules.",
359
  "The Xperience Embodied Foundation Model is a future native-pretraining goal, not a completed model or current benchmark."
360
  ]
361
  }
 
2
  "title": "Ropedia Xperience-10M Task Suite Project Status",
3
  "version": "2026-06-20",
4
  "decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v6_cosmos_comparison",
5
+ "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3 packages as early cross-episode diagnostics, and now records a no-new-episode enhancement pack for pushing the current 128-episode suite harder.",
6
  "scope_boundary": {
7
  "validated_episode_count": 1,
8
  "aligned_frames": 5821,
 
145
  "RESEARCH_ROADMAP.md",
146
  "docs/data/research_roadmap.json"
147
  ],
148
+ "readout": "The roadmap connects public-sample task development to the final verified Qwen3-Omni diagnostic result, same-split baseline alignment, the no-new-episode 128-suite enhancement pack, action/subtask error analysis, robustness runs, world/policy tracks, and the future Xperience-native pretraining goal."
149
  },
150
  {
151
  "area": "128-episode task-suite enhancement pack",
 
156
  "results/omni_finetune/task_suite_enhancement_128_v1_20260608/enhancement_plan.json",
157
  "scripts/omni/build_task_suite_enhancement_128.py"
158
  ],
159
+ "readout": "The current 3,808-window selected split can be stressed without more episodes by exporting denser and multiscale windows. The recommended next export is multiscale_20s10_40s20_80s40, estimated at 106,095 windows from observed frame spans; the pack also defines hierarchical action/subtask targets, raw-feature shard priorities for unsupported tasks, and Qwen3-Omni/Cosmos3 follow-up run cards."
160
  },
161
  {
162
  "area": "Foundation-model plan",
 
176
  "scripts/omni/backbone_registry.py",
177
  "scripts/omni/smoke_test_backbone_packaging.py"
178
  ],
179
+ "readout": "Future Qwen3-Omni, Cosmos3-style, and VLA/policy tracks must keep the same episode split discipline, held-out metrics, validation gate, public-safe package contract, and explicit forbidden-artifact policy before reporting results."
180
  },
181
  {
182
  "area": "Xperience Embodied Foundation Model",
 
253
  "results/omni_finetune/OMNI_MODEL_COMPARISON.md",
254
  "scripts/omni/build_omni_model_comparison.py"
255
  ],
256
+ "readout": "The public comparison now has two evidence lines plus a model-family grouping. The model grouping pairs 1-episode and 128-episode entries for task-head baselines, separates Qwen3-Omni sensor-adapter smoke from 128-episode LoRA diagnostics, separates Cosmos3-Nano future-window compatibility from Cosmos3-Super base-weight Reasoner evaluation, and adds Cosmos3-Super Forward-Dynamics LoRA as a loss-based fine-tuned adapter artifact."
257
  },
258
  {
259
  "area": "Qwen3-Omni fine-tuning",
 
271
  "readout": "The selected 96/16/16 episode split now has a current v6 rank64/lr5e-5 public-safe held-out package with 34,269 exported windows, 4,032 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.90%, meeting the 98% target; transition accuracy is 98.98%, contact accuracy is 81.77%, object micro-F1 is 30.65%, next-action accuracy is 4.31%, and action/subtask metrics remain weak. v6 improves action macro-F1 and contact accuracy versus v5, but v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics."
272
  },
273
  {
274
+ "area": "Cosmos3-Nano future-window package",
275
  "status": "verified_compatibility_result",
276
  "evidence": [
277
  "configs/omni_backbones/cosmos_world_model.json",
 
279
  "scripts/omni/eval_cosmos3_future_window_retrieval.py",
280
  "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json"
281
  ],
282
+ "readout": "The Cosmos3-Nano package now has a public-safe verified future-window compatibility result with 3,213 future-window samples, 378 held-out test predictions, future retrieval MRR 0.0221, temporal consistency 0.0952, transition accuracy 0.9683, and contact accuracy 0.7434. It is a compatibility adapter result, not a full Cosmos diffusion-weight fine-tune."
283
  },
284
  {
285
+ "area": "Cosmos3-Super Reasoner package",
286
  "status": "verified_base_weight_result",
287
  "evidence": [
288
  "configs/omni_backbones/cosmos3_super_reasoner.json",
 
314
  "results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/verified_result_summary.json",
315
  "results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/package_audit.json"
316
  ],
317
+ "readout": "The first fine-tuned Cosmos3-Super adapter artifact is verified as a public-safe package: 8-GPU FSDP LoRA, 26.2M adapter parameters, 2,848 train rows, 512 validation rows, 448 held-out test rows, validation MSE 4.0082, and test MSE 3.6853. The package excludes adapter safetensors; weights are published separately at cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep."
318
  },
319
  {
320
  "area": "Raw Xperience-10M redistribution",
 
331
  "Open docs/data/project_packet.json for the machine-readable project path.",
332
  "Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
333
  "Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
334
+ "Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone track.",
335
+ "Inspect OMNI_MODEL_EXTENSION_CONTRACT.md and run python scripts/omni/backbone_registry.py --validate --json before adding a new Qwen3-Omni, Cosmos3-style, or VLA/policy track.",
336
  "Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
337
  "Inspect TASK_SUITE_20.md, docs/data/task_suite_20.json, docs/data/summary_metrics.json, and results/episode_task_suite/neural_mlp/ to check the unified 20-task outputs.",
338
  "Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
 
346
  "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
347
  ],
348
  "current_reading_notes": [
349
+ "The latest Qwen3-Omni v6 diagnostic run is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak: JSON validity is 99.90%, action macro-F1 is 0.0029, and subtask accuracy is 0.0037. v5 remains the pinned prior release row because it is still stronger on several metrics.",
350
  "Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
351
+ "Use docs/data/omni_model_comparison.json to compare both views: the 1-sample evidence line, the selected-128 evidence line, and the model-family grouping for task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.",
352
  "The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
353
+ "The Cosmos3-Nano future-window package is verified as a compatibility adapter result, Cosmos3-Super Reasoner is verified as a base-weight evaluation, and Cosmos3-Super Forward-Dynamics LoRA is verified as the first fine-tuned Super adapter artifact. Cosmos3-Super adapter weights belong in cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep; verified_public packages exclude safetensors.",
354
  "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
355
  "Audio is one of the synchronized source modalities in the current task representation.",
356
  "The audio ablation report compares audio/no-audio variants across the original task contracts in results/audio_ablation/.",
357
+ "Foundation-model selection is explicit: Qwen3-Omni is the structured JSON baseline, Cosmos 3 is the world-model track with Nano compatibility and Super forward-dynamics LoRA results, and policy models such as OpenVLA/openpi/GR00T wait for robot-compatible action-target conversion.",
358
+ "Future model tracks should be added through the backbone registry and verified package contract, not as one-off result folders with incompatible metrics or publication rules.",
359
  "The Xperience Embodied Foundation Model is a future native-pretraining goal, not a completed model or current benchmark."
360
  ]
361
  }
metrics/public_reader_map.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Public Reader Map",
3
  "status": "published",
4
- "purpose": "Organize the GitHub repo, GitHub Pages dashboard, Hugging Face Space, artifact dataset, baseline model repo, consolidated weights/results repo, and model-branch repos without removing evidence.",
5
  "fast_paths": [
6
  {
7
  "reader_goal": "Understand the project in one pass",
@@ -92,13 +92,13 @@
92
  },
93
  {
94
  "surface": "HF weights/results repo",
95
- "responsibility": "Consolidated baseline weights, Qwen3/Cosmos adapter weights, verified results, analysis files, and file-level manifest.",
96
  "best_use": "Auditing all public-safe weight-bearing artifacts from one repo."
97
  },
98
  {
99
- "surface": "Qwen3/Cosmos model repos",
100
- "responsibility": "Adapter-specific public weights or package cards when a branch is verified and publishable.",
101
- "best_use": "Inspecting model-branch artifacts."
102
  }
103
  ],
104
  "evidence_layers": [
@@ -121,8 +121,8 @@
121
  "boundary": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
122
  },
123
  {
124
- "claim_type": "Foundation-model branch quality",
125
- "public_evidence": ["Verified Qwen3/Cosmos result packages", "model cards"],
126
  "boundary": "Numeric task scores appear only when a task-specific eval or probe exists."
127
  },
128
  {
 
1
  {
2
  "title": "Ropedia Xperience-10M Public Reader Map",
3
  "status": "published",
4
+ "purpose": "Organize the GitHub repo, GitHub Pages dashboard, Hugging Face Space, artifact dataset, baseline model repo, consolidated weights/results repo, and Qwen3-Omni/Cosmos3 repos without removing evidence.",
5
  "fast_paths": [
6
  {
7
  "reader_goal": "Understand the project in one pass",
 
92
  },
93
  {
94
  "surface": "HF weights/results repo",
95
+ "responsibility": "Consolidated baseline weights, Qwen3-Omni v6 LoRA, Cosmos3-Super adapter/result artifacts, verified results, analysis files, and file-level manifest.",
96
  "best_use": "Auditing all public-safe weight-bearing artifacts from one repo."
97
  },
98
  {
99
+ "surface": "Qwen3-Omni and Cosmos3 model repos",
100
+ "responsibility": "Adapter-specific public weights or package cards when a Qwen3-Omni or Cosmos3 run is verified and publishable.",
101
+ "best_use": "Inspecting Qwen3-Omni and Cosmos3 artifacts."
102
  }
103
  ],
104
  "evidence_layers": [
 
121
  "boundary": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
122
  },
123
  {
124
+ "claim_type": "Foundation-model track quality",
125
+ "public_evidence": ["Verified Qwen3-Omni and Cosmos3 result packages", "model cards"],
126
  "boundary": "Numeric task scores appear only when a task-specific eval or probe exists."
127
  },
128
  {
metrics/public_surface_qa.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Public Project Surface",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-21T10:02:48+00:00",
5
  "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
6
  "checks": [
7
  {
@@ -18,7 +18,7 @@
18
  "website_integrity": {
19
  "exists": true,
20
  "status": "pass",
21
- "generated_at_utc": "2026-06-21T10:02:48+00:00"
22
  },
23
  "rendered_site_check": {
24
  "exists": true,
@@ -28,27 +28,27 @@
28
  "task_surface_integrity": {
29
  "exists": true,
30
  "status": "pass",
31
- "generated_at_utc": "2026-06-21T09:35:37+00:00"
32
  },
33
  "source_alignment": {
34
  "exists": true,
35
  "status": "pass",
36
- "generated_at_utc": "2026-06-21T09:35:37+00:00"
37
  },
38
  "scale_up_status": {
39
  "exists": true,
40
  "status": "pass",
41
- "generated_at_utc": "2026-06-21T09:35:40+00:00"
42
  },
43
  "publication_package": {
44
  "exists": true,
45
  "status": "pass",
46
- "generated_at_utc": "2026-06-21T09:35:32+00:00"
47
  },
48
  "mirror_parity": {
49
  "exists": true,
50
  "status": "pass",
51
- "generated_at_utc": "2026-06-21T09:36:59+00:00"
52
  }
53
  },
54
  "failures": {}
@@ -97,8 +97,8 @@
97
  "marker_counts": {
98
  "Ropedia Xperience-10M Task Suite": 20,
99
  "Xperience-10M": 166,
100
- "20-task": 78,
101
- "Qwen3-Omni": 191,
102
  "128-episode pilot": 1
103
  }
104
  },
 
1
  {
2
  "title": "Ropedia Xperience-10M Public Project Surface",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-21T11:08:07+00:00",
5
  "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
6
  "checks": [
7
  {
 
18
  "website_integrity": {
19
  "exists": true,
20
  "status": "pass",
21
+ "generated_at_utc": "2026-06-21T11:07:26+00:00"
22
  },
23
  "rendered_site_check": {
24
  "exists": true,
 
28
  "task_surface_integrity": {
29
  "exists": true,
30
  "status": "pass",
31
+ "generated_at_utc": "2026-06-21T11:04:16+00:00"
32
  },
33
  "source_alignment": {
34
  "exists": true,
35
  "status": "pass",
36
+ "generated_at_utc": "2026-06-21T11:04:16+00:00"
37
  },
38
  "scale_up_status": {
39
  "exists": true,
40
  "status": "pass",
41
+ "generated_at_utc": "2026-06-21T11:03:20+00:00"
42
  },
43
  "publication_package": {
44
  "exists": true,
45
  "status": "pass",
46
+ "generated_at_utc": "2026-06-21T11:07:41+00:00"
47
  },
48
  "mirror_parity": {
49
  "exists": true,
50
  "status": "pass",
51
+ "generated_at_utc": "2026-06-21T11:05:04+00:00"
52
  }
53
  },
54
  "failures": {}
 
97
  "marker_counts": {
98
  "Ropedia Xperience-10M Task Suite": 20,
99
  "Xperience-10M": 166,
100
+ "20-task": 89,
101
+ "Qwen3-Omni": 241,
102
  "128-episode pilot": 1
103
  }
104
  },
metrics/publication_audit.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-21T10:08:29+00:00",
4
  "checks": [
5
  {
6
  "name": "required_publication_assets_present",
@@ -244,8 +244,8 @@
244
  "hf_space_bundle": {
245
  "root": "hf_publish/space",
246
  "exists": true,
247
- "file_count": 569,
248
- "text_file_count": 422,
249
  "largest_file": {
250
  "path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
251
  "bytes": 10221085
@@ -255,8 +255,8 @@
255
  "hf_artifact_bundle": {
256
  "root": "hf_publish/artifacts",
257
  "exists": true,
258
- "file_count": 3041,
259
- "text_file_count": 1279,
260
  "largest_file": {
261
  "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
262
  "bytes": 135591061
@@ -266,8 +266,8 @@
266
  "hf_model_bundle": {
267
  "root": "hf_publish/model",
268
  "exists": true,
269
- "file_count": 3523,
270
- "text_file_count": 1450,
271
  "largest_file": {
272
  "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
273
  "bytes": 135591061
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-21T11:07:41+00:00",
4
  "checks": [
5
  {
6
  "name": "required_publication_assets_present",
 
244
  "hf_space_bundle": {
245
  "root": "hf_publish/space",
246
  "exists": true,
247
+ "file_count": 572,
248
+ "text_file_count": 425,
249
  "largest_file": {
250
  "path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
251
  "bytes": 10221085
 
255
  "hf_artifact_bundle": {
256
  "root": "hf_publish/artifacts",
257
  "exists": true,
258
+ "file_count": 3049,
259
+ "text_file_count": 1283,
260
  "largest_file": {
261
  "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
262
  "bytes": 135591061
 
266
  "hf_model_bundle": {
267
  "root": "hf_publish/model",
268
  "exists": true,
269
+ "file_count": 3533,
270
+ "text_file_count": 1455,
271
  "largest_file": {
272
  "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
273
  "bytes": 135591061