Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- ADDITIONAL_DEVELOPMENT_DIRECTIONS.md +1 -1
- FOUNDATION_MODEL_PLAN.md +3 -3
- PROJECT_README.md +33 -34
- README.ko.md +2 -2
- README.pt.md +1 -1
- docs/assets/charts/episode128_task_model_radar.svg +1 -1
- docs/assets/charts/research_direction_coverage.svg +1 -1
- docs/assets/charts/single_episode_task_model_radar.svg +1 -1
- docs/data/additional_development_directions.json +1 -1
- docs/data/artifact_index.json +73 -73
- docs/data/episode128_task_model_radar.json +3 -3
- docs/data/figure_index.json +19 -19
- docs/data/foundation_model_plan.json +1 -1
- docs/data/live_publication_status.json +1 -1
- docs/data/mirror_parity.json +0 -0
- docs/data/omni_finetune_verified_result.json +1 -1
- docs/data/omni_model_comparison.json +5 -5
- docs/data/project_status.json +16 -16
- docs/data/public_reader_map.json +7 -7
- docs/data/public_surface_qa.json +9 -9
- docs/data/publication_audit.json +7 -7
- docs/data/quality_gates.json +1 -1
- docs/data/qwen3_omni_run_lineage.json +20 -2
- docs/data/research_roadmap.json +3 -3
- docs/data/research_roadmap_interactive.json +1006 -54
- docs/data/scope_claims_audit.json +1 -1
- docs/data/single_episode_task_model_radar.json +2 -2
- docs/data/source_alignment_audit.json +1 -1
- docs/data/task_method_20_result_matrix.json +1 -1
- docs/data/task_method_20_source_audit.json +1 -1
- docs/data/task_suite_enhancement_128.json +1 -1
- docs/data/task_surface_integrity.json +1 -1
- docs/data/two_evidence_line_result_summary.json +6 -6
- docs/data/two_evidence_lines.json +4 -4
- docs/data/unified_task_model_radar.json +2 -2
- docs/data/website_integrity.json +31 -31
- docs/index.html +72 -69
- metrics/additional_development_directions.json +1 -1
- metrics/artifact_index.json +73 -73
- metrics/episode128_task_model_radar.json +3 -3
- metrics/figure_index.json +19 -19
- metrics/foundation_model_plan.json +1 -1
- metrics/live_publication_status.json +1 -1
- metrics/mirror_parity.json +0 -0
- metrics/omni_finetune_verified_result.json +1 -1
- metrics/omni_model_comparison.json +5 -5
- metrics/project_status.json +16 -16
- metrics/public_reader_map.json +7 -7
- metrics/public_surface_qa.json +9 -9
- metrics/publication_audit.json +7 -7
ADDITIONAL_DEVELOPMENT_DIRECTIONS.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# Additional Development Directions
|
| 2 |
|
| 3 |
This note records concrete directions that can grow from Xperience-10M beyond
|
| 4 |
-
the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model
|
| 5 |
and long-term Xperience-native pretraining goal. These are project directions,
|
| 6 |
not completed benchmark results.
|
| 7 |
|
|
|
|
| 1 |
# Additional Development Directions
|
| 2 |
|
| 3 |
This note records concrete directions that can grow from Xperience-10M beyond
|
| 4 |
+
the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model track,
|
| 5 |
and long-term Xperience-native pretraining goal. These are project directions,
|
| 6 |
not completed benchmark results.
|
| 7 |
|
FOUNDATION_MODEL_PLAN.md
CHANGED
|
@@ -32,7 +32,7 @@ machine-readable copy at
|
|
| 32 |
| Priority | Model family | Best role for this project | Why it fits Xperience-10M | Current decision |
|
| 33 |
| --- | --- | --- | --- | --- |
|
| 34 |
| 1 | Qwen3-Omni | Multimodal instruction model and JSON task predictor | Accepts video/audio/language directly; depth, pose, mocap, and IMU can enter through the existing sensor bridge | Keep as the first selected-episode LoRA pilot |
|
| 35 |
-
| 2 | Cosmos 3 | Embodied world model, action generation, and synthetic future prediction | Designed for physical-world video generation, action-conditioned world modeling, and robot/world simulation style objectives | Add as the first world-model
|
| 36 |
| 3 | NVIDIA GR00T | Humanoid/action-policy foundation model | Xperience-10M mocap, hand motion, contacts, and egocentric interaction can support retargeting and action-understanding probes | Track as a humanoid policy branch, not the first LoRA pilot |
|
| 37 |
| 4 | OpenVLA / OpenVLA-OFT | Open vision-language-action policy baseline | Useful when windows are converted into visual observation plus action-token targets | Use after action-space design is explicit |
|
| 38 |
| 5 | openpi pi0/pi0.5 | Open robot policy and action expert baseline | Useful for action chunking, policy fine-tuning, and embodiment transfer experiments | Candidate for policy branch once action labels are retargeted |
|
|
@@ -100,7 +100,7 @@ The full plan is documented in
|
|
| 100 |
## Why Cosmos 3 Should Be Added Next
|
| 101 |
|
| 102 |
Cosmos 3 should not replace the Qwen3-Omni pilot. It should become the first
|
| 103 |
-
world-model
|
| 104 |
modalities are unusually aligned with physical-world modeling:
|
| 105 |
|
| 106 |
- video streams for visual state,
|
|
@@ -159,7 +159,7 @@ The foundation-model stage should add metrics beyond the current 20-task suite:
|
|
| 159 |
|
| 160 |
1. Keep the selected 96/16/16 split as the comparison spine.
|
| 161 |
2. Treat the verified Qwen3-Omni LoRA package as the structured JSON baseline.
|
| 162 |
-
3. Treat Cosmos3-Nano compatibility and Cosmos3-Super Forward-Dynamics LoRA as separate world-model
|
| 163 |
4. Run a model-selection dry run on 3-8 episodes for any next backbone before scaling beyond the selected split.
|
| 164 |
5. Promote Cosmos 3 to larger world-model experiments if video/sensor
|
| 165 |
preprocessing, storage, and loss metrics justify the extra cost.
|
|
|
|
| 32 |
| Priority | Model family | Best role for this project | Why it fits Xperience-10M | Current decision |
|
| 33 |
| --- | --- | --- | --- | --- |
|
| 34 |
| 1 | Qwen3-Omni | Multimodal instruction model and JSON task predictor | Accepts video/audio/language directly; depth, pose, mocap, and IMU can enter through the existing sensor bridge | Keep as the first selected-episode LoRA pilot |
|
| 35 |
+
| 2 | Cosmos 3 | Embodied world model, action generation, and synthetic future prediction | Designed for physical-world video generation, action-conditioned world modeling, and robot/world simulation style objectives | Add as the first world-model track after the data gate |
|
| 36 |
| 3 | NVIDIA GR00T | Humanoid/action-policy foundation model | Xperience-10M mocap, hand motion, contacts, and egocentric interaction can support retargeting and action-understanding probes | Track as a humanoid policy branch, not the first LoRA pilot |
|
| 37 |
| 4 | OpenVLA / OpenVLA-OFT | Open vision-language-action policy baseline | Useful when windows are converted into visual observation plus action-token targets | Use after action-space design is explicit |
|
| 38 |
| 5 | openpi pi0/pi0.5 | Open robot policy and action expert baseline | Useful for action chunking, policy fine-tuning, and embodiment transfer experiments | Candidate for policy branch once action labels are retargeted |
|
|
|
|
| 100 |
## Why Cosmos 3 Should Be Added Next
|
| 101 |
|
| 102 |
Cosmos 3 should not replace the Qwen3-Omni pilot. It should become the first
|
| 103 |
+
world-model track after the data gate. The reason is that the Xperience-10M
|
| 104 |
modalities are unusually aligned with physical-world modeling:
|
| 105 |
|
| 106 |
- video streams for visual state,
|
|
|
|
| 159 |
|
| 160 |
1. Keep the selected 96/16/16 split as the comparison spine.
|
| 161 |
2. Treat the verified Qwen3-Omni LoRA package as the structured JSON baseline.
|
| 162 |
+
3. Treat Cosmos3-Nano compatibility and Cosmos3-Super Forward-Dynamics LoRA as separate Cosmos3 world-model artifacts with different metrics.
|
| 163 |
4. Run a model-selection dry run on 3-8 episodes for any next backbone before scaling beyond the selected split.
|
| 164 |
5. Promote Cosmos 3 to larger world-model experiments if video/sensor
|
| 165 |
preprocessing, storage, and loss metrics justify the extra cost.
|
PROJECT_README.md
CHANGED
|
@@ -39,7 +39,7 @@
|
|
| 39 |
|
| 40 |
**Updated:** 2026-06-21.
|
| 41 |
|
| 42 |
-
**Scope:** one public sample episode for raw-file inspection and reproducible task construction; selected 128-episode public-safe artifacts for same-split
|
| 43 |
|
| 44 |
## Contents
|
| 45 |
|
|
@@ -73,7 +73,7 @@ The multilingual README files are reader guides. The canonical technical evidenc
|
|
| 73 |
<tbody>
|
| 74 |
<tr>
|
| 75 |
<td><strong>Two result lines</strong></td>
|
| 76 |
-
<td><strong>1 sample episode</strong> for task construction and reproducibility. <strong>128 selected episodes</strong> for same-split baselines
|
| 77 |
</tr>
|
| 78 |
<tr>
|
| 79 |
<td><strong>180 method-task records</strong></td>
|
|
@@ -132,7 +132,7 @@ The public suite is organized around two result lines. Keep them separate when r
|
|
| 132 |
<td><strong>128 selected episodes</strong></td>
|
| 133 |
<td>Selected held-out 96/16/16 split: 34,269 exported windows with public-safe processed features linked to official gated episode paths.</td>
|
| 134 |
<td>140/140 selected-128 scores: 134 direct + 6 compact-proxy.</td>
|
| 135 |
-
<td>Same-split comparison,
|
| 136 |
<td>Reading proxy cells as direct raw-target measurements.</td>
|
| 137 |
</tr>
|
| 138 |
</tbody>
|
|
@@ -227,26 +227,25 @@ Cosmos3-Super Forward-Dynamics LoRA is published as a separate fine-tuned adapte
|
|
| 227 |
|
| 228 |
### Qwen3-Omni Run Versions
|
| 229 |
|
| 230 |
-
These are Qwen3-Omni run versions
|
| 231 |
|
| 232 |
<table>
|
| 233 |
<thead>
|
| 234 |
<tr>
|
| 235 |
-
<th width="
|
| 236 |
-
<th width="26%">
|
| 237 |
-
<th width="
|
| 238 |
-
<th width="
|
| 239 |
-
<th
|
| 240 |
-
<th>Public role</th>
|
| 241 |
</tr>
|
| 242 |
</thead>
|
| 243 |
<tbody>
|
| 244 |
-
<tr><td><strong>v1</strong></td><td>
|
| 245 |
-
<tr><td><strong>v2</strong></td><td>Structured-JSON
|
| 246 |
-
<tr><td><strong>v3</strong></td><td>Strict-label prompt/eval over the v2 adapter.</td><td>448
|
| 247 |
-
<tr><td><strong>v4</strong></td><td>
|
| 248 |
-
<tr><td><strong>v5</strong></td><td>
|
| 249 |
-
<tr><td><strong>v6</strong></td><td>Rank64/lr5e-5 multiscale LoRA.</td><td>4,032
|
| 250 |
</tbody>
|
| 251 |
</table>
|
| 252 |
|
|
@@ -338,7 +337,7 @@ embodied-AI research infrastructure:
|
|
| 338 |
<tr><td><strong>Multimodal data understanding</strong></td><td>Parses the public sample into synchronized windows across video, audio, depth, pose/SLAM, mocap, IMU, calibration, and language-derived signals.</td></tr>
|
| 339 |
<tr><td><strong>Task design</strong></td><td>Defines 20 human-readable tasks in one unified public-sample suite, plus four direction-extension probes with inputs, outputs, process modules, metrics, and case-study walkthroughs.</td></tr>
|
| 340 |
<tr><td><strong>Model and evaluation discipline</strong></td><td>Runs minimal and compact neural baselines, records predictions/metrics, keeps chronological split boundaries explicit, and separates sample evidence from held-out claims.</td></tr>
|
| 341 |
-
<tr><td><strong>Scale-up planning</strong></td><td>Connects the public-sample pipeline to 32/128-episode held-out pilots, Qwen3-Omni LoRA, Cosmos-style world-model
|
| 342 |
</tbody>
|
| 343 |
</table>
|
| 344 |
|
|
@@ -387,7 +386,7 @@ and [`docs/data/project_brief.json`](docs/data/project_brief.json).
|
|
| 387 |
<tr><td><strong>Hugging Face Space</strong></td><td>Hub-hosted copy of the dashboard and static app assets.</td></tr>
|
| 388 |
<tr><td><strong>HF artifact dataset</strong></td><td>Public-safe metrics, reports, website JSON, result packages, and derived evidence files.</td></tr>
|
| 389 |
<tr><td><strong>HF baseline model repo</strong></td><td>Minimal/neural baseline weights, figures, metrics, and mirrored task artifacts.</td></tr>
|
| 390 |
-
<tr><td><strong>Qwen3-Omni and Cosmos3 model repos</strong></td><td>Adapter-specific public weights or package cards when Qwen3-Omni v6, Cosmos3-Super, or Cosmos3-Nano
|
| 391 |
</tbody>
|
| 392 |
</table>
|
| 393 |
|
|
@@ -449,7 +448,7 @@ Current contributions:
|
|
| 449 |
- a generated four-direction research taxonomy matching the Ropedia job tracks,
|
| 450 |
- four additional direction-extension probes with minimal and neural baselines,
|
| 451 |
- human-readable research task cards and an interactive scrub/play walkthrough storyboard for every task,
|
| 452 |
-
- an interactive research roadmap connecting 20 tasks, four research tracks, current sample evidence, the Qwen3-Omni scale-up path, and foundation-model
|
| 453 |
- a next-milestone track for Qwen3-Omni fine-tuning, Cosmos 3 world modeling, and sensor-bridge evaluation,
|
| 454 |
- a future pretraining plan for an Xperience Embodied Foundation Model over the full corpus after smaller multi-episode stages prove value,
|
| 455 |
- metrics, predictions, model weights, manifests, charts, and a two-level
|
|
@@ -692,7 +691,7 @@ Hugging Face Space app:
|
|
| 692 |
<tr><td><strong>Dataset context</strong></td><td><a href="XPERIENCE10M_DATASET_CARD_ALIGNMENT.md">XPERIENCE10M_DATASET_CARD_ALIGNMENT.md</a><br>official dataset links</td><td>Explains the official dataset, public sample, modalities, access boundary, and what this repo uses.</td></tr>
|
| 693 |
<tr><td><strong>Visual assets</strong></td><td><a href="FIGURE_INDEX.md">FIGURE_INDEX.md</a><br><a href="docs/assets/">docs/assets/</a></td><td>Shows the task-suite graphic, modality thumbnails, pipeline diagrams, charts, and logo assets.</td></tr>
|
| 694 |
<tr><td><strong>Evaluation protocol</strong></td><td><a href="EVALUATION_PROTOCOL.md">EVALUATION_PROTOCOL.md</a><br><a href="docs/data/evaluation_protocol.json">evaluation_protocol.json</a></td><td>Defines the task unit, split, metrics, leakage controls, and current limitations.</td></tr>
|
| 695 |
-
<tr><td><strong>Research roadmap</strong></td><td><a href="RESEARCH_ROADMAP.md">RESEARCH_ROADMAP.md</a><br><a href="docs/data/research_roadmap.json">research_roadmap.json</a></td><td>Shows the path from sample-level task development to multi-episode work, larger model
|
| 696 |
<tr><td><strong>Additional development directions</strong></td><td><a href="ADDITIONAL_DEVELOPMENT_DIRECTIONS.md">ADDITIONAL_DEVELOPMENT_DIRECTIONS.md</a><br><a href="docs/data/additional_development_directions.json">additional_development_directions.json</a></td><td>Records concrete non-backbone tracks: taxonomy, benchmark protocol, representation learning, skill graphs, affordances, 3D/4D memory, QA, and policy transfer.</td></tr>
|
| 697 |
<tr><td><strong>Xperience Embodied Foundation Model plan</strong></td><td><a href="XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md">XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md</a></td><td>Describes the long-term full-corpus pretraining goal, target modules, objectives, staged scale-up, hardware ranges, and evaluation protocol.</td></tr>
|
| 698 |
<tr><td><strong>Minimal heads</strong></td><td>softmax<br>ridge projection/regression<br>multi-label logistic heads</td><td>Keeps every input/output contract visible and inspectable.</td></tr>
|
|
@@ -774,11 +773,11 @@ completions because the 128 export lacks raw interaction strings and paired
|
|
| 774 |
video-view embeddings. The verified model-output probe package adds task-16
|
| 775 |
action/object relation scores for Qwen3-Omni and Cosmos3-Super, plus a task-13
|
| 776 |
long-horizon next-action score for Cosmos3-Nano derived from its existing
|
| 777 |
-
held-out future-window predictions. Metadata-only baselines and model
|
| 778 |
now have scored records on all 20 axes; six compact-proxy scores stay
|
| 779 |
explicitly marked instead of being blended into direct-target metrics.
|
| 780 |
Cosmos3-Super forward-dynamics LoRA
|
| 781 |
-
remains a
|
| 782 |
task metrics. The machine-readable copies are
|
| 783 |
[`docs/data/unified_task_model_radar.json`](docs/data/unified_task_model_radar.json)
|
| 784 |
and
|
|
@@ -880,7 +879,7 @@ docs/
|
|
| 880 |
data/additional_development_directions.json # concrete non-backbone project directions
|
| 881 |
data/summary_metrics.json # website-readable metrics bundle
|
| 882 |
data/task_suite_20.json # unified 20-task suite bundle
|
| 883 |
-
data/unified_task_model_radar.json # 20-task radar values and
|
| 884 |
data/single_episode_task_model_radar.json # 1-episode split radar values
|
| 885 |
data/episode128_task_model_radar.json # 128-episode split radar values
|
| 886 |
data/task_method_20_result_matrix.json # 9-method x 20-task result matrix
|
|
@@ -1128,9 +1127,9 @@ Current status in this repo:
|
|
| 1128 |
- qwen3_lora_adapter_repo: https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep
|
| 1129 |
- cosmos3_super_lora_adapter_repo: https://huggingface.co/cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep
|
| 1130 |
- 128_aligned_baselines: unified 20-task axes for simple and neural baselines, including metadata/text rows and public-safe compact-proxy rows where raw-feature targets are required
|
| 1131 |
-
-
|
| 1132 |
-
-
|
| 1133 |
-
- cosmos3_super_forward_dynamics_lora: verified 8-GPU FSDP LoRA
|
| 1134 |
- gated dataset: available for selected multi-episode data preparation
|
| 1135 |
- source_discovery: `results/omni_finetune/source_discovery.json`
|
| 1136 |
- data_status: `results/omni_finetune/DATA_ACCESS_STATUS.md`
|
|
@@ -1249,7 +1248,7 @@ The package copies only small derived artifacts such as metrics, predictions,
|
|
| 1249 |
confusion matrices, run reports, manifests, validation summaries, and training
|
| 1250 |
metadata. The exact required eval files and primary metrics come from the
|
| 1251 |
selected backbone contract in `configs/omni_backbones`, so Qwen3-Omni,
|
| 1252 |
-
Cosmos-style world models, and VLA/policy
|
| 1253 |
publication gate once their model-specific evaluators exist. The package
|
| 1254 |
excludes raw Xperience-10M files, base-model weights, adapter or checkpoint
|
| 1255 |
weights, full checkpoints, and large archives.
|
|
@@ -1277,7 +1276,7 @@ python scripts/omni/export_model_neutral_window_index.py \
|
|
| 1277 |
```
|
| 1278 |
|
| 1279 |
This produces `window_index.jsonl` and `window_index_manifest.json` so Cosmos-
|
| 1280 |
-
style world models and VLA/policy
|
| 1281 |
windows without depending on Qwen chat-message records.
|
| 1282 |
|
| 1283 |
### Uploading Qwen3-Omni LoRA artifacts
|
|
@@ -1308,14 +1307,14 @@ Network availability to `huggingface.co` is required.
|
|
| 1308 |
|
| 1309 |
### Foundation Backbone Plan
|
| 1310 |
|
| 1311 |
-
The next modeling plan tracks several foundation-model
|
| 1312 |
assuming one backbone solves every Xperience-10M objective.
|
| 1313 |
|
| 1314 |
| Branch | Current role | When to use it |
|
| 1315 |
| --- | --- | --- |
|
| 1316 |
| Qwen3-Omni | First trainable multimodal LoRA pilot | Use for the selected 128-episode held-out baseline over video/audio/language plus sensor-bridge features. |
|
| 1317 |
-
| Cosmos 3 | First world-model/action-generation
|
| 1318 |
-
| GR00T | Humanoid/action-policy
|
| 1319 |
| OpenVLA / openpi | Open VLA/policy baselines | Use after the project defines robot-compatible or action-token targets. |
|
| 1320 |
| Gemini Robotics | External reasoning reference | Use only for qualitative comparison or annotation support unless local trainable access exists. |
|
| 1321 |
| Xperience Embodied Foundation Model | Future Xperience-native pretraining goal | Use only after multi-episode pilots, full-corpus storage, distributed training infrastructure, and scaling evidence justify a from-scratch domain model. |
|
|
@@ -1333,7 +1332,7 @@ so the public claims stay precise:
|
|
| 1333 |
| Pipeline track | First concrete pipeline | Claim boundary |
|
| 1334 |
| --- | --- | --- |
|
| 1335 |
| Spatial intelligence models | Build scene/object memory targets from multiview RGB, depth, pose, calibration, object cues, and language prompts. | Ready as a geometry/reasoning pipeline; strong claims need raw depth/pose artifacts and held-out spatial metrics. |
|
| 1336 |
-
| Human-video world models | Predict next action, next subtask, future object set, contact transition, and future state from observed interaction windows. | Partially evidenced by future-task probes and Cosmos-style
|
| 1337 |
| Vision-language-action models | Convert egocentric video, captions, hand/body motion, contacts, and objects into action chunks or policy-compatible targets. | Feasible, but gated by action-token conversion, normalization, retargeting evidence, and held-out policy metrics. |
|
| 1338 |
|
| 1339 |
High-resolution slide diagrams for the three tracks are published in
|
|
@@ -1386,7 +1385,7 @@ python scripts/omni/audit_verified_omni_package.py \
|
|
| 1386 |
--package-dir results/omni_finetune/verified_public/<eval_run_id>
|
| 1387 |
```
|
| 1388 |
|
| 1389 |
-
Create a new planned backbone
|
| 1390 |
|
| 1391 |
```bash
|
| 1392 |
python scripts/omni/scaffold_omni_backbone.py \
|
|
@@ -1402,7 +1401,7 @@ python scripts/omni/scaffold_omni_backbone.py \
|
|
| 1402 |
|
| 1403 |
Each backbone config declares the checkpoint gate, required train/eval files,
|
| 1404 |
allowed public artifacts, and forbidden private or heavyweight artifacts. This
|
| 1405 |
-
keeps Qwen3-Omni, Cosmos-style world models, and policy/VLA
|
| 1406 |
split, validation, and publication discipline even though their training targets
|
| 1407 |
are different.
|
| 1408 |
|
|
|
|
| 39 |
|
| 40 |
**Updated:** 2026-06-21.
|
| 41 |
|
| 42 |
+
**Scope:** one public sample episode for raw-file inspection and reproducible task construction; selected 128-episode public-safe artifacts for same-split metadata/raw baselines, Qwen3-Omni v6 LoRA, Cosmos3-Super Reasoner, and Cosmos3-Nano Future Window. Raw Xperience-10M MP4/HDF5/RRD files, Qwen3 base weights, Cosmos3 base weights, and gated data are not redistributed here.
|
| 43 |
|
| 44 |
## Contents
|
| 45 |
|
|
|
|
| 73 |
<tbody>
|
| 74 |
<tr>
|
| 75 |
<td><strong>Two result lines</strong></td>
|
| 76 |
+
<td><strong>1 sample episode</strong> for task construction and reproducibility. <strong>128 selected episodes</strong> for same-split metadata/raw baselines plus Qwen3-Omni v6 and Cosmos3 diagnostics.</td>
|
| 77 |
</tr>
|
| 78 |
<tr>
|
| 79 |
<td><strong>180 method-task records</strong></td>
|
|
|
|
| 132 |
<td><strong>128 selected episodes</strong></td>
|
| 133 |
<td>Selected held-out 96/16/16 split: 34,269 exported windows with public-safe processed features linked to official gated episode paths.</td>
|
| 134 |
<td>140/140 selected-128 scores: 134 direct + 6 compact-proxy.</td>
|
| 135 |
+
<td>Same-split metadata/raw baseline comparison, Qwen3-Omni v6 diagnostics, Cosmos3 diagnostics, and scale-up planning.</td>
|
| 136 |
<td>Reading proxy cells as direct raw-target measurements.</td>
|
| 137 |
</tr>
|
| 138 |
</tbody>
|
|
|
|
| 227 |
|
| 228 |
### Qwen3-Omni Run Versions
|
| 229 |
|
| 230 |
+
These are Qwen3-Omni run versions inside **Line 2: selected 128 episodes**. They are not the project evidence lines. The 20-task matrix uses **Qwen3-Omni v6 LoRA**; **v5** remains the pinned prior multiscale release; **v1-v4** are lineage and ablation evidence.
|
| 231 |
|
| 232 |
<table>
|
| 233 |
<thead>
|
| 234 |
<tr>
|
| 235 |
+
<th width="8%">Run</th>
|
| 236 |
+
<th width="26%">Purpose</th>
|
| 237 |
+
<th width="28%">Main change</th>
|
| 238 |
+
<th width="16%">Eval signal</th>
|
| 239 |
+
<th>Use now</th>
|
|
|
|
| 240 |
</tr>
|
| 241 |
</thead>
|
| 242 |
<tbody>
|
| 243 |
+
<tr><td><strong>v1</strong></td><td>Prove the selected-128 LoRA/eval/package loop.</td><td>First verified 96/16/16 selected-episode Qwen3-Omni LoRA run.</td><td>448 eval; JSON 0.8750; contact 0.6451.</td><td>Lineage only.</td></tr>
|
| 244 |
+
<tr><td><strong>v2</strong></td><td>Make answers schema-checked.</td><td>Structured-JSON contract with full-8-GPU LoRA on the same split.</td><td>448 eval; JSON 0.9978; contact 0.7188.</td><td>Structured-output ablation.</td></tr>
|
| 245 |
+
<tr><td><strong>v3</strong></td><td>Separate prompt/eval effects from training.</td><td>Strict-label prompt/eval over the v2 adapter; no new adapter training.</td><td>448 eval; JSON 1.0000; contact 0.7210.</td><td>Prompt/eval ablation.</td></tr>
|
| 246 |
+
<tr><td><strong>v4</strong></td><td>Test longer structured-JSON LoRA training.</td><td>New four-epoch full-8-GPU adapter on the same selected split.</td><td>448 eval; JSON 1.0000; contact 0.7299.</td><td>Overfit/metric-tradeoff evidence.</td></tr>
|
| 247 |
+
<tr><td><strong>v5</strong></td><td>Move to denser multiscale evaluation.</td><td>Multiscale cap96 export with 4,032 held-out predictions.</td><td>4,032 eval; JSON 1.0000; contact 0.7865.</td><td>Pinned prior release; stronger on several non-contact metrics.</td></tr>
|
| 248 |
+
<tr><td><strong>v6</strong></td><td>Publish the current Qwen 20-task row.</td><td>Rank64/lr5e-5 multiscale LoRA plus verified task-specific probes.</td><td>4,032 eval; JSON 0.9990; contact 0.8177.</td><td>Current public 20-task Qwen3-Omni row.</td></tr>
|
| 249 |
</tbody>
|
| 250 |
</table>
|
| 251 |
|
|
|
|
| 337 |
<tr><td><strong>Multimodal data understanding</strong></td><td>Parses the public sample into synchronized windows across video, audio, depth, pose/SLAM, mocap, IMU, calibration, and language-derived signals.</td></tr>
|
| 338 |
<tr><td><strong>Task design</strong></td><td>Defines 20 human-readable tasks in one unified public-sample suite, plus four direction-extension probes with inputs, outputs, process modules, metrics, and case-study walkthroughs.</td></tr>
|
| 339 |
<tr><td><strong>Model and evaluation discipline</strong></td><td>Runs minimal and compact neural baselines, records predictions/metrics, keeps chronological split boundaries explicit, and separates sample evidence from held-out claims.</td></tr>
|
| 340 |
+
<tr><td><strong>Scale-up planning</strong></td><td>Connects the public-sample pipeline to 32/128-episode held-out pilots, Qwen3-Omni LoRA, Cosmos-style world-model tracks, policy/VLA tracks, and the future Xperience-native foundation-model pretraining goal.</td></tr>
|
| 341 |
</tbody>
|
| 342 |
</table>
|
| 343 |
|
|
|
|
| 386 |
<tr><td><strong>Hugging Face Space</strong></td><td>Hub-hosted copy of the dashboard and static app assets.</td></tr>
|
| 387 |
<tr><td><strong>HF artifact dataset</strong></td><td>Public-safe metrics, reports, website JSON, result packages, and derived evidence files.</td></tr>
|
| 388 |
<tr><td><strong>HF baseline model repo</strong></td><td>Minimal/neural baseline weights, figures, metrics, and mirrored task artifacts.</td></tr>
|
| 389 |
+
<tr><td><strong>Qwen3-Omni and Cosmos3 model repos</strong></td><td>Adapter-specific public weights or package cards when Qwen3-Omni v6, Cosmos3-Super, or Cosmos3-Nano runs are verified and publishable.</td></tr>
|
| 390 |
</tbody>
|
| 391 |
</table>
|
| 392 |
|
|
|
|
| 448 |
- a generated four-direction research taxonomy matching the Ropedia job tracks,
|
| 449 |
- four additional direction-extension probes with minimal and neural baselines,
|
| 450 |
- human-readable research task cards and an interactive scrub/play walkthrough storyboard for every task,
|
| 451 |
+
- an interactive research roadmap connecting 20 tasks, four research tracks, current sample evidence, the Qwen3-Omni scale-up path, and foundation-model track selection,
|
| 452 |
- a next-milestone track for Qwen3-Omni fine-tuning, Cosmos 3 world modeling, and sensor-bridge evaluation,
|
| 453 |
- a future pretraining plan for an Xperience Embodied Foundation Model over the full corpus after smaller multi-episode stages prove value,
|
| 454 |
- metrics, predictions, model weights, manifests, charts, and a two-level
|
|
|
|
| 691 |
<tr><td><strong>Dataset context</strong></td><td><a href="XPERIENCE10M_DATASET_CARD_ALIGNMENT.md">XPERIENCE10M_DATASET_CARD_ALIGNMENT.md</a><br>official dataset links</td><td>Explains the official dataset, public sample, modalities, access boundary, and what this repo uses.</td></tr>
|
| 692 |
<tr><td><strong>Visual assets</strong></td><td><a href="FIGURE_INDEX.md">FIGURE_INDEX.md</a><br><a href="docs/assets/">docs/assets/</a></td><td>Shows the task-suite graphic, modality thumbnails, pipeline diagrams, charts, and logo assets.</td></tr>
|
| 693 |
<tr><td><strong>Evaluation protocol</strong></td><td><a href="EVALUATION_PROTOCOL.md">EVALUATION_PROTOCOL.md</a><br><a href="docs/data/evaluation_protocol.json">evaluation_protocol.json</a></td><td>Defines the task unit, split, metrics, leakage controls, and current limitations.</td></tr>
|
| 694 |
+
<tr><td><strong>Research roadmap</strong></td><td><a href="RESEARCH_ROADMAP.md">RESEARCH_ROADMAP.md</a><br><a href="docs/data/research_roadmap.json">research_roadmap.json</a></td><td>Shows the path from sample-level task development to multi-episode work, larger model tracks, and the future native-pretraining goal.</td></tr>
|
| 695 |
<tr><td><strong>Additional development directions</strong></td><td><a href="ADDITIONAL_DEVELOPMENT_DIRECTIONS.md">ADDITIONAL_DEVELOPMENT_DIRECTIONS.md</a><br><a href="docs/data/additional_development_directions.json">additional_development_directions.json</a></td><td>Records concrete non-backbone tracks: taxonomy, benchmark protocol, representation learning, skill graphs, affordances, 3D/4D memory, QA, and policy transfer.</td></tr>
|
| 696 |
<tr><td><strong>Xperience Embodied Foundation Model plan</strong></td><td><a href="XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md">XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md</a></td><td>Describes the long-term full-corpus pretraining goal, target modules, objectives, staged scale-up, hardware ranges, and evaluation protocol.</td></tr>
|
| 697 |
<tr><td><strong>Minimal heads</strong></td><td>softmax<br>ridge projection/regression<br>multi-label logistic heads</td><td>Keeps every input/output contract visible and inspectable.</td></tr>
|
|
|
|
| 773 |
video-view embeddings. The verified model-output probe package adds task-16
|
| 774 |
action/object relation scores for Qwen3-Omni and Cosmos3-Super, plus a task-13
|
| 775 |
long-horizon next-action score for Cosmos3-Nano derived from its existing
|
| 776 |
+
held-out future-window predictions. Metadata-only baselines and model diagnostics
|
| 777 |
now have scored records on all 20 axes; six compact-proxy scores stay
|
| 778 |
explicitly marked instead of being blended into direct-target metrics.
|
| 779 |
Cosmos3-Super forward-dynamics LoRA
|
| 780 |
+
remains a separate artifact card because its camera-pose proxy MSE is not one of the 20
|
| 781 |
task metrics. The machine-readable copies are
|
| 782 |
[`docs/data/unified_task_model_radar.json`](docs/data/unified_task_model_radar.json)
|
| 783 |
and
|
|
|
|
| 879 |
data/additional_development_directions.json # concrete non-backbone project directions
|
| 880 |
data/summary_metrics.json # website-readable metrics bundle
|
| 881 |
data/task_suite_20.json # unified 20-task suite bundle
|
| 882 |
+
data/unified_task_model_radar.json # 20-task radar values and method overlays
|
| 883 |
data/single_episode_task_model_radar.json # 1-episode split radar values
|
| 884 |
data/episode128_task_model_radar.json # 128-episode split radar values
|
| 885 |
data/task_method_20_result_matrix.json # 9-method x 20-task result matrix
|
|
|
|
| 1127 |
- qwen3_lora_adapter_repo: https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep
|
| 1128 |
- cosmos3_super_lora_adapter_repo: https://huggingface.co/cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep
|
| 1129 |
- 128_aligned_baselines: unified 20-task axes for simple and neural baselines, including metadata/text rows and public-safe compact-proxy rows where raw-feature targets are required
|
| 1130 |
+
- cosmos3_nano: verified Cosmos3-Nano future-window compatibility package, 378 held-out future-window predictions from 14 test episodes
|
| 1131 |
+
- cosmos3_super_reasoner: verified Cosmos3-Super Reasoner base-weight JSON-task evaluation, 448 held-out predictions from 14 test episodes; JSON validity 51.12%, action macro-F1 0.0008, contact accuracy 32.14%, transition accuracy 36.83%
|
| 1132 |
+
- cosmos3_super_forward_dynamics_lora: verified 8-GPU FSDP LoRA artifact over camera-pose proxy targets; 2,848 train rows, 512 val rows, 448 test rows, 26.2M adapter parameters, val MSE 4.0082, test MSE 3.6853; public package excludes safetensors
|
| 1133 |
- gated dataset: available for selected multi-episode data preparation
|
| 1134 |
- source_discovery: `results/omni_finetune/source_discovery.json`
|
| 1135 |
- data_status: `results/omni_finetune/DATA_ACCESS_STATUS.md`
|
|
|
|
| 1248 |
confusion matrices, run reports, manifests, validation summaries, and training
|
| 1249 |
metadata. The exact required eval files and primary metrics come from the
|
| 1250 |
selected backbone contract in `configs/omni_backbones`, so Qwen3-Omni,
|
| 1251 |
+
Cosmos-style world models, and VLA/policy tracks can share the same verified
|
| 1252 |
publication gate once their model-specific evaluators exist. The package
|
| 1253 |
excludes raw Xperience-10M files, base-model weights, adapter or checkpoint
|
| 1254 |
weights, full checkpoints, and large archives.
|
|
|
|
| 1276 |
```
|
| 1277 |
|
| 1278 |
This produces `window_index.jsonl` and `window_index_manifest.json` so Cosmos-
|
| 1279 |
+
style world models and VLA/policy tracks can reuse the same split-checked
|
| 1280 |
windows without depending on Qwen chat-message records.
|
| 1281 |
|
| 1282 |
### Uploading Qwen3-Omni LoRA artifacts
|
|
|
|
| 1307 |
|
| 1308 |
### Foundation Backbone Plan
|
| 1309 |
|
| 1310 |
+
The next modeling plan tracks several foundation-model tracks instead of
|
| 1311 |
assuming one backbone solves every Xperience-10M objective.
|
| 1312 |
|
| 1313 |
| Branch | Current role | When to use it |
|
| 1314 |
| --- | --- | --- |
|
| 1315 |
| Qwen3-Omni | First trainable multimodal LoRA pilot | Use for the selected 128-episode held-out baseline over video/audio/language plus sensor-bridge features. |
|
| 1316 |
+
| Cosmos 3 | First world-model/action-generation track | Use now for future-window compatibility analysis and the verified Cosmos3-Super forward-dynamics LoRA artifact; compare its loss metrics separately from Qwen JSON-task accuracy. |
|
| 1317 |
+
| GR00T | Humanoid/action-policy track | Use after mocap/contact retargeting creates well-defined humanoid action targets. |
|
| 1318 |
| OpenVLA / openpi | Open VLA/policy baselines | Use after the project defines robot-compatible or action-token targets. |
|
| 1319 |
| Gemini Robotics | External reasoning reference | Use only for qualitative comparison or annotation support unless local trainable access exists. |
|
| 1320 |
| Xperience Embodied Foundation Model | Future Xperience-native pretraining goal | Use only after multi-episode pilots, full-corpus storage, distributed training infrastructure, and scaling evidence justify a from-scratch domain model. |
|
|
|
|
| 1332 |
| Pipeline track | First concrete pipeline | Claim boundary |
|
| 1333 |
| --- | --- | --- |
|
| 1334 |
| Spatial intelligence models | Build scene/object memory targets from multiview RGB, depth, pose, calibration, object cues, and language prompts. | Ready as a geometry/reasoning pipeline; strong claims need raw depth/pose artifacts and held-out spatial metrics. |
|
| 1335 |
+
| Human-video world models | Predict next action, next subtask, future object set, contact transition, and future state from observed interaction windows. | Partially evidenced by future-task probes and Cosmos-style artifacts; visual/latent future quality still needs stronger metrics. |
|
| 1336 |
| Vision-language-action models | Convert egocentric video, captions, hand/body motion, contacts, and objects into action chunks or policy-compatible targets. | Feasible, but gated by action-token conversion, normalization, retargeting evidence, and held-out policy metrics. |
|
| 1337 |
|
| 1338 |
High-resolution slide diagrams for the three tracks are published in
|
|
|
|
| 1385 |
--package-dir results/omni_finetune/verified_public/<eval_run_id>
|
| 1386 |
```
|
| 1387 |
|
| 1388 |
+
Create a new planned backbone track from an existing contract template with:
|
| 1389 |
|
| 1390 |
```bash
|
| 1391 |
python scripts/omni/scaffold_omni_backbone.py \
|
|
|
|
| 1401 |
|
| 1402 |
Each backbone config declares the checkpoint gate, required train/eval files,
|
| 1403 |
allowed public artifacts, and forbidden private or heavyweight artifacts. This
|
| 1404 |
+
keeps Qwen3-Omni, Cosmos-style world models, and policy/VLA tracks on the same
|
| 1405 |
split, validation, and publication discipline even though their training targets
|
| 1406 |
are different.
|
| 1407 |
|
README.ko.md
CHANGED
|
@@ -47,11 +47,11 @@
|
|
| 47 |
| 라인 | 데이터 단위 | 방법과 결과 | 용도 |
|
| 48 |
| --- | --- | --- | --- |
|
| 49 |
| 1 sample episode | 5,821 frames, 1,161 aligned 20-frame windows, 8,546 dimensions. | Minimal + Neural MLP가 20 tasks 전체를 평가; 40/40 scored records; 모두 direct scores. | Raw sample files, task definitions, reproducible baselines, task validity 확인. |
|
| 50 |
-
| 128 selected episodes | 96/16/16 split, 34,269 exported windows, public-safe features가 official gated episode paths에 연결됨. | Metadata simple/NN, raw-feature simple/NN, Qwen3-Omni, Cosmos3-Super, Cosmos3-Nano; 140/140 scored records; 134 direct + 6 compact proxy. | 같은 split에서 baselines
|
| 51 |
|
| 52 |
공식: single-episode 방법 2개 x 20 tasks = 40; 128-episode 방법 7개 x 20 tasks = 140; 전체 공개 matrix = 180/180 scored records.
|
| 53 |
|
| 54 |
-
방법 블록: Line 1은 task-head baselines(Minimal, Neural MLP)입니다. Line 2는 aligned baseline heads(metadata simple/NN, raw-feature simple/NN), Qwen3-Omni series(Qwen3-Omni v6 LoRA), Cosmos3 series(Cosmos3-Super Reasoner, Cosmos3-Nano Future Window)로 분리됩니다. Qwen3 v1-v6은 LoRA/eval lineage이며 project
|
| 55 |
|
| 56 |
입구: [`TWO_EVIDENCE_LINES.md`](TWO_EVIDENCE_LINES.md), [`two_evidence_lines.json`](docs/data/two_evidence_lines.json), [`task_method_20_result_matrix.json`](docs/data/task_method_20_result_matrix.json), [`two_evidence_line_result_summary.json`](docs/data/two_evidence_line_result_summary.json).
|
| 57 |
|
|
|
|
| 47 |
| 라인 | 데이터 단위 | 방법과 결과 | 용도 |
|
| 48 |
| --- | --- | --- | --- |
|
| 49 |
| 1 sample episode | 5,821 frames, 1,161 aligned 20-frame windows, 8,546 dimensions. | Minimal + Neural MLP가 20 tasks 전체를 평가; 40/40 scored records; 모두 direct scores. | Raw sample files, task definitions, reproducible baselines, task validity 확인. |
|
| 50 |
+
| 128 selected episodes | 96/16/16 split, 34,269 exported windows, public-safe features가 official gated episode paths에 연결됨. | Metadata simple/NN, raw-feature simple/NN, Qwen3-Omni v6, Cosmos3-Super, Cosmos3-Nano; 140/140 scored records; 134 direct + 6 compact proxy. | 같은 split에서 metadata/raw baselines, Qwen3-Omni diagnostics, Cosmos3 diagnostics 비교; proxy targets는 명시 유지. |
|
| 51 |
|
| 52 |
공식: single-episode 방법 2개 x 20 tasks = 40; 128-episode 방법 7개 x 20 tasks = 140; 전체 공개 matrix = 180/180 scored records.
|
| 53 |
|
| 54 |
+
방법 블록: Line 1은 task-head baselines(Minimal, Neural MLP)입니다. Line 2는 aligned baseline heads(metadata simple/NN, raw-feature simple/NN), Qwen3-Omni series(Qwen3-Omni v6 LoRA), Cosmos3 series(Cosmos3-Super Reasoner, Cosmos3-Nano Future Window)로 분리됩니다. Qwen3 v1-v6은 Line 2 내부의 LoRA/eval lineage이며 project evidence lines와 다릅니다. 20-task matrix는 v6을 사용하고 v5는 pinned prior release입니다. Cosmos3-Super Forward-Dynamics LoRA는 별도의 adapter/weights/results artifact로 공개되며 20-task matrix method row에는 포함되지 않습니다.
|
| 55 |
|
| 56 |
입구: [`TWO_EVIDENCE_LINES.md`](TWO_EVIDENCE_LINES.md), [`two_evidence_lines.json`](docs/data/two_evidence_lines.json), [`task_method_20_result_matrix.json`](docs/data/task_method_20_result_matrix.json), [`two_evidence_line_result_summary.json`](docs/data/two_evidence_line_result_summary.json).
|
| 57 |
|
README.pt.md
CHANGED
|
@@ -51,7 +51,7 @@ Este repositório transforma o episódio público de amostra do Xperience-10M em
|
|
| 51 |
|
| 52 |
Fórmula: 2 métodos de um episódio x 20 tarefas = 40; 7 métodos de 128 episódios x 20 tarefas = 140; matriz pública total = 180/180 registros com score.
|
| 53 |
|
| 54 |
-
Blocos de métodos: a linha 1 contém task-head baselines (Minimal, Neural MLP). A linha 2 separa aligned baseline heads (metadata simple/NN, raw-feature simple/NN), a série Qwen3-Omni (Qwen3-Omni v6 LoRA) e a série Cosmos3 (Cosmos3-Super Reasoner, Cosmos3-Nano Future Window). Qwen3 v1-v6 é uma linhagem LoRA/eval, não as
|
| 55 |
|
| 56 |
Entradas: [`TWO_EVIDENCE_LINES.md`](TWO_EVIDENCE_LINES.md), [`two_evidence_lines.json`](docs/data/two_evidence_lines.json), [`task_method_20_result_matrix.json`](docs/data/task_method_20_result_matrix.json), [`two_evidence_line_result_summary.json`](docs/data/two_evidence_line_result_summary.json).
|
| 57 |
|
|
|
|
| 51 |
|
| 52 |
Fórmula: 2 métodos de um episódio x 20 tarefas = 40; 7 métodos de 128 episódios x 20 tarefas = 140; matriz pública total = 180/180 registros com score.
|
| 53 |
|
| 54 |
+
Blocos de métodos: a linha 1 contém task-head baselines (Minimal, Neural MLP). A linha 2 separa aligned baseline heads (metadata simple/NN, raw-feature simple/NN), a série Qwen3-Omni (Qwen3-Omni v6 LoRA) e a série Cosmos3 (Cosmos3-Super Reasoner, Cosmos3-Nano Future Window). Qwen3 v1-v6 é uma linhagem LoRA/eval interna à linha 2, não as evidence lines do projeto; a matriz de 20 tarefas usa v6 e v5 fica como pinned prior release. Cosmos3-Super Forward-Dynamics LoRA é publicado como adapter/pesos/resultados separado e não conta como linha de método na matriz de 20 tarefas.
|
| 55 |
|
| 56 |
Entradas: [`TWO_EVIDENCE_LINES.md`](TWO_EVIDENCE_LINES.md), [`two_evidence_lines.json`](docs/data/two_evidence_lines.json), [`task_method_20_result_matrix.json`](docs/data/task_method_20_result_matrix.json), [`two_evidence_line_result_summary.json`](docs/data/two_evidence_line_result_summary.json).
|
| 57 |
|
docs/assets/charts/episode128_task_model_radar.svg
CHANGED
|
|
|
|
docs/assets/charts/research_direction_coverage.svg
CHANGED
|
|
|
|
docs/assets/charts/single_episode_task_model_radar.svg
CHANGED
|
|
|
|
docs/data/additional_development_directions.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Additional Development Directions",
|
| 3 |
-
"summary": "Concrete Xperience-10M project directions beyond the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model
|
| 4 |
"status": "planned_research_directions",
|
| 5 |
"public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
|
| 6 |
"directions": [
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Additional Development Directions",
|
| 3 |
+
"summary": "Concrete Xperience-10M project directions beyond the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model track, and long-term native pretraining goal.",
|
| 4 |
"status": "planned_research_directions",
|
| 5 |
"public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
|
| 6 |
"directions": [
|
docs/data/artifact_index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Artifact Index",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"status": "pass",
|
| 5 |
"artifact_count": 226,
|
| 6 |
"missing": [],
|
|
@@ -81,8 +81,8 @@
|
|
| 81 |
"surface": "website_hf",
|
| 82 |
"shows": "Machine-readable copy of the current project status for website and HF mirrors.",
|
| 83 |
"exists": true,
|
| 84 |
-
"bytes":
|
| 85 |
-
"sha256": "
|
| 86 |
},
|
| 87 |
{
|
| 88 |
"id": "research_roadmap",
|
|
@@ -92,8 +92,8 @@
|
|
| 92 |
"surface": "repo_hf",
|
| 93 |
"shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
|
| 94 |
"exists": true,
|
| 95 |
-
"bytes":
|
| 96 |
-
"sha256": "
|
| 97 |
},
|
| 98 |
{
|
| 99 |
"id": "research_roadmap_json",
|
|
@@ -103,8 +103,8 @@
|
|
| 103 |
"surface": "website_hf",
|
| 104 |
"shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
|
| 105 |
"exists": true,
|
| 106 |
-
"bytes":
|
| 107 |
-
"sha256": "
|
| 108 |
},
|
| 109 |
{
|
| 110 |
"id": "foundation_model_plan",
|
|
@@ -114,8 +114,8 @@
|
|
| 114 |
"surface": "repo_hf",
|
| 115 |
"shows": "Defines the post-data-gate backbone choices: Qwen3-Omni first, Cosmos 3 for world modeling, and VLA/policy models after action-target conversion.",
|
| 116 |
"exists": true,
|
| 117 |
-
"bytes":
|
| 118 |
-
"sha256": "
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"id": "foundation_model_plan_json",
|
|
@@ -125,8 +125,8 @@
|
|
| 125 |
"surface": "website_hf",
|
| 126 |
"shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
|
| 127 |
"exists": true,
|
| 128 |
-
"bytes":
|
| 129 |
-
"sha256": "
|
| 130 |
},
|
| 131 |
{
|
| 132 |
"id": "three_foundation_pipelines",
|
|
@@ -222,7 +222,7 @@
|
|
| 222 |
"path": "OMNI_MODEL_EXTENSION_CONTRACT.md",
|
| 223 |
"kind": "scaleup_contract",
|
| 224 |
"surface": "repo_hf",
|
| 225 |
-
"shows": "Defines the shared manifest, episode split, held-out evaluation, packaging, and public-safety rules for Qwen3-Omni,
|
| 226 |
"exists": true,
|
| 227 |
"bytes": 8900,
|
| 228 |
"sha256": "c4e51d0aa7536045c229418603a67c6b3c5f31c9d756ca7395cb0c9455f0ed6d"
|
|
@@ -323,8 +323,8 @@
|
|
| 323 |
"surface": "website_hf",
|
| 324 |
"shows": "Machine-readable enhancement pack for the website and Hugging Face mirrors.",
|
| 325 |
"exists": true,
|
| 326 |
-
"bytes":
|
| 327 |
-
"sha256": "
|
| 328 |
},
|
| 329 |
{
|
| 330 |
"id": "task_suite_enhancement_128_result",
|
|
@@ -345,8 +345,8 @@
|
|
| 345 |
"surface": "repo_hf",
|
| 346 |
"shows": "Regenerates the enhancement pack from committed 128-episode windows, baseline summaries, verified Qwen predictions, and Cosmos reference metrics.",
|
| 347 |
"exists": true,
|
| 348 |
-
"bytes":
|
| 349 |
-
"sha256": "
|
| 350 |
},
|
| 351 |
{
|
| 352 |
"id": "xperience10m_128_episode_feature_index",
|
|
@@ -510,8 +510,8 @@
|
|
| 510 |
"surface": "repo_hf",
|
| 511 |
"shows": "Records concrete non-backbone Xperience-10M development tracks: taxonomy, benchmark protocol, representation learning, skill graphs, affordances, 3D/4D memory, QA, and policy transfer.",
|
| 512 |
"exists": true,
|
| 513 |
-
"bytes":
|
| 514 |
-
"sha256": "
|
| 515 |
},
|
| 516 |
{
|
| 517 |
"id": "additional_development_directions_json",
|
|
@@ -521,8 +521,8 @@
|
|
| 521 |
"surface": "website_hf",
|
| 522 |
"shows": "Machine-readable additional development directions for the website and Hugging Face mirrors.",
|
| 523 |
"exists": true,
|
| 524 |
-
"bytes":
|
| 525 |
-
"sha256": "
|
| 526 |
},
|
| 527 |
{
|
| 528 |
"id": "xperience_embodied_foundation_pretraining",
|
|
@@ -610,7 +610,7 @@
|
|
| 610 |
"shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
|
| 611 |
"exists": true,
|
| 612 |
"bytes": 4432,
|
| 613 |
-
"sha256": "
|
| 614 |
},
|
| 615 |
{
|
| 616 |
"id": "source_alignment_validator",
|
|
@@ -631,8 +631,8 @@
|
|
| 631 |
"surface": "repo_hf",
|
| 632 |
"shows": "Publishes prepared Space, artifact dataset, and model bundles, including an explicit model-binary upload batch.",
|
| 633 |
"exists": true,
|
| 634 |
-
"bytes":
|
| 635 |
-
"sha256": "
|
| 636 |
},
|
| 637 |
{
|
| 638 |
"id": "github_package_dockerfile",
|
|
@@ -728,10 +728,10 @@
|
|
| 728 |
"path": "docs/data/unified_task_model_radar.json",
|
| 729 |
"kind": "website_data",
|
| 730 |
"surface": "website_hf",
|
| 731 |
-
"shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3/
|
| 732 |
"exists": true,
|
| 733 |
-
"bytes":
|
| 734 |
-
"sha256": "
|
| 735 |
},
|
| 736 |
{
|
| 737 |
"id": "single_episode_task_model_radar_json",
|
|
@@ -741,8 +741,8 @@
|
|
| 741 |
"surface": "website_hf",
|
| 742 |
"shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
|
| 743 |
"exists": true,
|
| 744 |
-
"bytes":
|
| 745 |
-
"sha256": "
|
| 746 |
},
|
| 747 |
{
|
| 748 |
"id": "episode128_task_model_radar_json",
|
|
@@ -750,10 +750,10 @@
|
|
| 750 |
"path": "docs/data/episode128_task_model_radar.json",
|
| 751 |
"kind": "website_data",
|
| 752 |
"surface": "website_hf",
|
| 753 |
-
"shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines
|
| 754 |
"exists": true,
|
| 755 |
-
"bytes":
|
| 756 |
-
"sha256": "
|
| 757 |
},
|
| 758 |
{
|
| 759 |
"id": "task_method_20_result_matrix_json",
|
|
@@ -764,7 +764,7 @@
|
|
| 764 |
"shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and the current release is complete at 180/180 scored rows.",
|
| 765 |
"exists": true,
|
| 766 |
"bytes": 128509,
|
| 767 |
-
"sha256": "
|
| 768 |
},
|
| 769 |
{
|
| 770 |
"id": "task_method_20_result_matrix",
|
|
@@ -808,7 +808,7 @@
|
|
| 808 |
"shows": "Machine-readable check that scored JSON-backed matrix cells match their declared metric source values.",
|
| 809 |
"exists": true,
|
| 810 |
"bytes": 561,
|
| 811 |
-
"sha256": "
|
| 812 |
},
|
| 813 |
{
|
| 814 |
"id": "task_method_20_source_audit",
|
|
@@ -819,7 +819,7 @@
|
|
| 819 |
"shows": "Reader-facing source-value audit for the 180-result matrix.",
|
| 820 |
"exists": true,
|
| 821 |
"bytes": 447,
|
| 822 |
-
"sha256": "
|
| 823 |
},
|
| 824 |
{
|
| 825 |
"id": "two_evidence_line_map_chart",
|
|
@@ -838,7 +838,7 @@
|
|
| 838 |
"path": "docs/assets/charts/unified_task_model_radar.svg",
|
| 839 |
"kind": "generated_figure",
|
| 840 |
"surface": "website_hf",
|
| 841 |
-
"shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3
|
| 842 |
"exists": true,
|
| 843 |
"bytes": 57938,
|
| 844 |
"sha256": "bb83b80b47fe679ebdce2c99378a4548120f1c8cc2d725b88e409d8c386dcbf8"
|
|
@@ -851,8 +851,8 @@
|
|
| 851 |
"surface": "website_hf",
|
| 852 |
"shows": "Separates the one-episode Minimal and Neural MLP 20/20 scored baselines into a clean two-polygon radar.",
|
| 853 |
"exists": true,
|
| 854 |
-
"bytes":
|
| 855 |
-
"sha256": "
|
| 856 |
},
|
| 857 |
{
|
| 858 |
"id": "episode128_task_model_radar_chart",
|
|
@@ -860,10 +860,10 @@
|
|
| 860 |
"path": "docs/assets/charts/episode128_task_model_radar.svg",
|
| 861 |
"kind": "generated_figure",
|
| 862 |
"surface": "website_hf",
|
| 863 |
-
"shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons
|
| 864 |
"exists": true,
|
| 865 |
-
"bytes":
|
| 866 |
-
"sha256": "
|
| 867 |
},
|
| 868 |
{
|
| 869 |
"id": "unified_task_model_radar_builder",
|
|
@@ -873,8 +873,8 @@
|
|
| 873 |
"surface": "repo_hf",
|
| 874 |
"shows": "Regenerates the direction-aware radar chart and machine-readable metric overlay JSON.",
|
| 875 |
"exists": true,
|
| 876 |
-
"bytes":
|
| 877 |
-
"sha256": "
|
| 878 |
},
|
| 879 |
{
|
| 880 |
"id": "task_method_20_gap_audit_builder",
|
|
@@ -915,7 +915,7 @@
|
|
| 915 |
"path": "results/omni_finetune/model_output_probe_readiness/model_output_probe_readiness.json",
|
| 916 |
"kind": "scaleup_status",
|
| 917 |
"surface": "repo_hf",
|
| 918 |
-
"shows": "Checks whether Qwen3
|
| 919 |
"exists": true,
|
| 920 |
"bytes": 4320,
|
| 921 |
"sha256": "11cff26749bf6ad8b8ee028b18e0b4be5713ed8b5325578caa03be25d894263b"
|
|
@@ -928,8 +928,8 @@
|
|
| 928 |
"surface": "repo_hf",
|
| 929 |
"shows": "Audits model-output split availability and writes a readiness report without assigning new numeric task scores.",
|
| 930 |
"exists": true,
|
| 931 |
-
"bytes":
|
| 932 |
-
"sha256": "
|
| 933 |
},
|
| 934 |
{
|
| 935 |
"id": "existing_model_output_task_probe",
|
|
@@ -937,7 +937,7 @@
|
|
| 937 |
"path": "results/omni_finetune/model_output_task_probes_20260616/summary.json",
|
| 938 |
"kind": "model_result",
|
| 939 |
"surface": "repo_hf",
|
| 940 |
-
"shows": "Scores task-specific Qwen3
|
| 941 |
"exists": true,
|
| 942 |
"bytes": 5951,
|
| 943 |
"sha256": "910477d2fba648605dda128d0ecd2a2c13cfa460573e350dc850014ac91c6c2b"
|
|
@@ -950,8 +950,8 @@
|
|
| 950 |
"surface": "repo_hf",
|
| 951 |
"shows": "Derives task-specific scores from committed verified model outputs without running new inference or backfilling absent targets.",
|
| 952 |
"exists": true,
|
| 953 |
-
"bytes":
|
| 954 |
-
"sha256": "
|
| 955 |
},
|
| 956 |
{
|
| 957 |
"id": "a100_128_metadata_task_baselines",
|
|
@@ -1071,8 +1071,8 @@
|
|
| 1071 |
"surface": "repo_hf",
|
| 1072 |
"shows": "Catalogs public figures, charts, modality thumbnails, dimensions, hashes, roles, and source scripts.",
|
| 1073 |
"exists": true,
|
| 1074 |
-
"bytes":
|
| 1075 |
-
"sha256": "
|
| 1076 |
},
|
| 1077 |
{
|
| 1078 |
"id": "figure_index_json",
|
|
@@ -1082,8 +1082,8 @@
|
|
| 1082 |
"surface": "website_hf",
|
| 1083 |
"shows": "Machine-readable visual asset index for website and Hugging Face mirrors.",
|
| 1084 |
"exists": true,
|
| 1085 |
-
"bytes":
|
| 1086 |
-
"sha256": "
|
| 1087 |
},
|
| 1088 |
{
|
| 1089 |
"id": "figure_index_builder",
|
|
@@ -1093,8 +1093,8 @@
|
|
| 1093 |
"surface": "repo_hf",
|
| 1094 |
"shows": "Regenerates visual-asset hashes, dimensions, and source-script provenance.",
|
| 1095 |
"exists": true,
|
| 1096 |
-
"bytes":
|
| 1097 |
-
"sha256": "
|
| 1098 |
},
|
| 1099 |
{
|
| 1100 |
"id": "brand_assets_json",
|
|
@@ -1160,7 +1160,7 @@
|
|
| 1160 |
"shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
|
| 1161 |
"exists": true,
|
| 1162 |
"bytes": 8640,
|
| 1163 |
-
"sha256": "
|
| 1164 |
},
|
| 1165 |
{
|
| 1166 |
"id": "public_surface_qa",
|
|
@@ -1179,10 +1179,10 @@
|
|
| 1179 |
"path": "PUBLIC_READER_MAP.md",
|
| 1180 |
"kind": "project_path",
|
| 1181 |
"surface": "repo_hf",
|
| 1182 |
-
"shows": "Provides the first-pass navigation layer for GitHub, GitHub Pages, Hugging Face mirrors,
|
| 1183 |
"exists": true,
|
| 1184 |
-
"bytes":
|
| 1185 |
-
"sha256": "
|
| 1186 |
},
|
| 1187 |
{
|
| 1188 |
"id": "public_reader_map_json",
|
|
@@ -1192,8 +1192,8 @@
|
|
| 1192 |
"surface": "website_hf",
|
| 1193 |
"shows": "Machine-readable public reader map used by the website and Hugging Face mirrors to keep entry points and surface responsibilities explicit.",
|
| 1194 |
"exists": true,
|
| 1195 |
-
"bytes":
|
| 1196 |
-
"sha256": "
|
| 1197 |
},
|
| 1198 |
{
|
| 1199 |
"id": "public_surface_qa_json",
|
|
@@ -1285,7 +1285,7 @@
|
|
| 1285 |
"volatile": true,
|
| 1286 |
"shows": "Records the last live GitHub/HF URL verification after upload.",
|
| 1287 |
"exists": true,
|
| 1288 |
-
"bytes":
|
| 1289 |
"hash_policy": "existence_and_size_only"
|
| 1290 |
},
|
| 1291 |
{
|
|
@@ -1296,8 +1296,8 @@
|
|
| 1296 |
"surface": "repo",
|
| 1297 |
"shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
|
| 1298 |
"exists": true,
|
| 1299 |
-
"bytes":
|
| 1300 |
-
"sha256": "
|
| 1301 |
},
|
| 1302 |
{
|
| 1303 |
"id": "reproducibility_contract",
|
|
@@ -1329,8 +1329,8 @@
|
|
| 1329 |
"surface": "repo_hf",
|
| 1330 |
"shows": "Generates the selective artifact catalog from local files.",
|
| 1331 |
"exists": true,
|
| 1332 |
-
"bytes":
|
| 1333 |
-
"sha256": "
|
| 1334 |
},
|
| 1335 |
{
|
| 1336 |
"id": "publication_audit",
|
|
@@ -1365,7 +1365,7 @@
|
|
| 1365 |
"volatile": true,
|
| 1366 |
"shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
|
| 1367 |
"exists": true,
|
| 1368 |
-
"bytes":
|
| 1369 |
"hash_policy": "existence_and_size_only"
|
| 1370 |
},
|
| 1371 |
{
|
|
@@ -1377,7 +1377,7 @@
|
|
| 1377 |
"volatile": true,
|
| 1378 |
"shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
|
| 1379 |
"exists": true,
|
| 1380 |
-
"bytes":
|
| 1381 |
"hash_policy": "existence_and_size_only"
|
| 1382 |
},
|
| 1383 |
{
|
|
@@ -1542,8 +1542,8 @@
|
|
| 1542 |
"surface": "website_hf",
|
| 1543 |
"shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
|
| 1544 |
"exists": true,
|
| 1545 |
-
"bytes":
|
| 1546 |
-
"sha256": "
|
| 1547 |
},
|
| 1548 |
{
|
| 1549 |
"id": "modality_atlas",
|
|
@@ -1674,8 +1674,8 @@
|
|
| 1674 |
"surface": "repo_hf",
|
| 1675 |
"shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
|
| 1676 |
"exists": true,
|
| 1677 |
-
"bytes":
|
| 1678 |
-
"sha256": "
|
| 1679 |
},
|
| 1680 |
{
|
| 1681 |
"id": "omni_model_comparison_json",
|
|
@@ -1685,8 +1685,8 @@
|
|
| 1685 |
"surface": "repo_hf",
|
| 1686 |
"shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
|
| 1687 |
"exists": true,
|
| 1688 |
-
"bytes":
|
| 1689 |
-
"sha256": "
|
| 1690 |
},
|
| 1691 |
{
|
| 1692 |
"id": "cosmos3_nano_verified_summary",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Artifact Index",
|
| 3 |
+
"generated_at_utc": "2026-06-21T10:52:12+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
"artifact_count": 226,
|
| 6 |
"missing": [],
|
|
|
|
| 81 |
"surface": "website_hf",
|
| 82 |
"shows": "Machine-readable copy of the current project status for website and HF mirrors.",
|
| 83 |
"exists": true,
|
| 84 |
+
"bytes": 23049,
|
| 85 |
+
"sha256": "9a06cc54d3b43362867a2fde9edc61d09f53df2d9ad761ecf95c862c76af31d2"
|
| 86 |
},
|
| 87 |
{
|
| 88 |
"id": "research_roadmap",
|
|
|
|
| 92 |
"surface": "repo_hf",
|
| 93 |
"shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
|
| 94 |
"exists": true,
|
| 95 |
+
"bytes": 15272,
|
| 96 |
+
"sha256": "559fa9e818f2c6fc7b926f880e9183200911317e70a26391f1830f4119ebc6b0"
|
| 97 |
},
|
| 98 |
{
|
| 99 |
"id": "research_roadmap_json",
|
|
|
|
| 103 |
"surface": "website_hf",
|
| 104 |
"shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
|
| 105 |
"exists": true,
|
| 106 |
+
"bytes": 14129,
|
| 107 |
+
"sha256": "a06d6525d9532b8608bf7be81eb9387deca3159b7c42bf38e107b4096953f351"
|
| 108 |
},
|
| 109 |
{
|
| 110 |
"id": "foundation_model_plan",
|
|
|
|
| 114 |
"surface": "repo_hf",
|
| 115 |
"shows": "Defines the post-data-gate backbone choices: Qwen3-Omni first, Cosmos 3 for world modeling, and VLA/policy models after action-target conversion.",
|
| 116 |
"exists": true,
|
| 117 |
+
"bytes": 11003,
|
| 118 |
+
"sha256": "24047e8692f69927d3fabf3c01058278e85651355f3749886493159971120cc6"
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"id": "foundation_model_plan_json",
|
|
|
|
| 125 |
"surface": "website_hf",
|
| 126 |
"shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
|
| 127 |
"exists": true,
|
| 128 |
+
"bytes": 13925,
|
| 129 |
+
"sha256": "77d4b2d5918ef1f776de6d29d34d523de95ba58df9526e2b255bed567434f932"
|
| 130 |
},
|
| 131 |
{
|
| 132 |
"id": "three_foundation_pipelines",
|
|
|
|
| 222 |
"path": "OMNI_MODEL_EXTENSION_CONTRACT.md",
|
| 223 |
"kind": "scaleup_contract",
|
| 224 |
"surface": "repo_hf",
|
| 225 |
+
"shows": "Defines the shared manifest, episode split, held-out evaluation, packaging, and public-safety rules for Qwen3-Omni, Cosmos3, and VLA/policy model tracks.",
|
| 226 |
"exists": true,
|
| 227 |
"bytes": 8900,
|
| 228 |
"sha256": "c4e51d0aa7536045c229418603a67c6b3c5f31c9d756ca7395cb0c9455f0ed6d"
|
|
|
|
| 323 |
"surface": "website_hf",
|
| 324 |
"shows": "Machine-readable enhancement pack for the website and Hugging Face mirrors.",
|
| 325 |
"exists": true,
|
| 326 |
+
"bytes": 20196,
|
| 327 |
+
"sha256": "9e1a3339425981dcf7931bf08684860864598bf679d0df86f93c656bacdb71bf"
|
| 328 |
},
|
| 329 |
{
|
| 330 |
"id": "task_suite_enhancement_128_result",
|
|
|
|
| 345 |
"surface": "repo_hf",
|
| 346 |
"shows": "Regenerates the enhancement pack from committed 128-episode windows, baseline summaries, verified Qwen predictions, and Cosmos reference metrics.",
|
| 347 |
"exists": true,
|
| 348 |
+
"bytes": 27225,
|
| 349 |
+
"sha256": "86e6098506b365cc92a9658d347645c285c5f61b5113eeaf1d170df0e2d7cc8f"
|
| 350 |
},
|
| 351 |
{
|
| 352 |
"id": "xperience10m_128_episode_feature_index",
|
|
|
|
| 510 |
"surface": "repo_hf",
|
| 511 |
"shows": "Records concrete non-backbone Xperience-10M development tracks: taxonomy, benchmark protocol, representation learning, skill graphs, affordances, 3D/4D memory, QA, and policy transfer.",
|
| 512 |
"exists": true,
|
| 513 |
+
"bytes": 3136,
|
| 514 |
+
"sha256": "decdd359d89694fe10873dcce6cee23e991de1b874ade72643314e879ade784e"
|
| 515 |
},
|
| 516 |
{
|
| 517 |
"id": "additional_development_directions_json",
|
|
|
|
| 521 |
"surface": "website_hf",
|
| 522 |
"shows": "Machine-readable additional development directions for the website and Hugging Face mirrors.",
|
| 523 |
"exists": true,
|
| 524 |
+
"bytes": 6120,
|
| 525 |
+
"sha256": "669d1523f767a8eda22bbe96ab54af99e102496a3d27f7dd850e08e2724e661f"
|
| 526 |
},
|
| 527 |
{
|
| 528 |
"id": "xperience_embodied_foundation_pretraining",
|
|
|
|
| 610 |
"shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
|
| 611 |
"exists": true,
|
| 612 |
"bytes": 4432,
|
| 613 |
+
"sha256": "db279081759eebb09a4ba53c56fb17a14f3546e13d058100494ac7745b901a1c"
|
| 614 |
},
|
| 615 |
{
|
| 616 |
"id": "source_alignment_validator",
|
|
|
|
| 631 |
"surface": "repo_hf",
|
| 632 |
"shows": "Publishes prepared Space, artifact dataset, and model bundles, including an explicit model-binary upload batch.",
|
| 633 |
"exists": true,
|
| 634 |
+
"bytes": 25159,
|
| 635 |
+
"sha256": "a74451a7d717661e1499b98631d825f4db8c6b51b1e9bafd73966697eb04258a"
|
| 636 |
},
|
| 637 |
{
|
| 638 |
"id": "github_package_dockerfile",
|
|
|
|
| 728 |
"path": "docs/data/unified_task_model_radar.json",
|
| 729 |
"kind": "website_data",
|
| 730 |
"surface": "website_hf",
|
| 731 |
+
"shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3-Omni/Cosmos3 overlay mappings, method-card caveats, proxy flags, and source artifacts.",
|
| 732 |
"exists": true,
|
| 733 |
+
"bytes": 228815,
|
| 734 |
+
"sha256": "862376178e8b0d01b536f49a18b7934a373494f8b36080790f616438ec0e035e"
|
| 735 |
},
|
| 736 |
{
|
| 737 |
"id": "single_episode_task_model_radar_json",
|
|
|
|
| 741 |
"surface": "website_hf",
|
| 742 |
"shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
|
| 743 |
"exists": true,
|
| 744 |
+
"bytes": 51107,
|
| 745 |
+
"sha256": "5f2ebb41e8488446ea5c5cd2cb75bbedce688433feffe1412288de56b133bd5c"
|
| 746 |
},
|
| 747 |
{
|
| 748 |
"id": "episode128_task_model_radar_json",
|
|
|
|
| 750 |
"path": "docs/data/episode128_task_model_radar.json",
|
| 751 |
"kind": "website_data",
|
| 752 |
"surface": "website_hf",
|
| 753 |
+
"shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano, now complete at 140/140 scored rows with proxy notes retained.",
|
| 754 |
"exists": true,
|
| 755 |
+
"bytes": 184992,
|
| 756 |
+
"sha256": "385704db90443d74903f365e90b27538020f5574c96f296bbf63173f488a645d"
|
| 757 |
},
|
| 758 |
{
|
| 759 |
"id": "task_method_20_result_matrix_json",
|
|
|
|
| 764 |
"shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and the current release is complete at 180/180 scored rows.",
|
| 765 |
"exists": true,
|
| 766 |
"bytes": 128509,
|
| 767 |
+
"sha256": "96082daa33771963ac40b7d719df00a76ec443508a3d3101cb6dd82d87965729"
|
| 768 |
},
|
| 769 |
{
|
| 770 |
"id": "task_method_20_result_matrix",
|
|
|
|
| 808 |
"shows": "Machine-readable check that scored JSON-backed matrix cells match their declared metric source values.",
|
| 809 |
"exists": true,
|
| 810 |
"bytes": 561,
|
| 811 |
+
"sha256": "cbe9be1ea3d62b253780aade9c51cb7f3a5882df185927186ee6a1d6516ad3a6"
|
| 812 |
},
|
| 813 |
{
|
| 814 |
"id": "task_method_20_source_audit",
|
|
|
|
| 819 |
"shows": "Reader-facing source-value audit for the 180-result matrix.",
|
| 820 |
"exists": true,
|
| 821 |
"bytes": 447,
|
| 822 |
+
"sha256": "dfcde22c9350858d0df6d881533f63ba6838fc980b62f0b68770f9b708fcde85"
|
| 823 |
},
|
| 824 |
{
|
| 825 |
"id": "two_evidence_line_map_chart",
|
|
|
|
| 838 |
"path": "docs/assets/charts/unified_task_model_radar.svg",
|
| 839 |
"kind": "generated_figure",
|
| 840 |
"surface": "website_hf",
|
| 841 |
+
"shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3-Omni and Cosmos3 task-aligned overlays.",
|
| 842 |
"exists": true,
|
| 843 |
"bytes": 57938,
|
| 844 |
"sha256": "bb83b80b47fe679ebdce2c99378a4548120f1c8cc2d725b88e409d8c386dcbf8"
|
|
|
|
| 851 |
"surface": "website_hf",
|
| 852 |
"shows": "Separates the one-episode Minimal and Neural MLP 20/20 scored baselines into a clean two-polygon radar.",
|
| 853 |
"exists": true,
|
| 854 |
+
"bytes": 35232,
|
| 855 |
+
"sha256": "87b52a7dead40358f1778dda43ade4d2e875ac98e507e01ca007084363e5977e"
|
| 856 |
},
|
| 857 |
{
|
| 858 |
"id": "episode128_task_model_radar_chart",
|
|
|
|
| 860 |
"path": "docs/assets/charts/episode128_task_model_radar.svg",
|
| 861 |
"kind": "generated_figure",
|
| 862 |
"surface": "website_hf",
|
| 863 |
+
"shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons plus metadata, Qwen3-Omni, Cosmos3-Super, and Cosmos3-Nano task-aligned overlays.",
|
| 864 |
"exists": true,
|
| 865 |
+
"bytes": 51915,
|
| 866 |
+
"sha256": "047ea4b05a04f6734e2afcf792863559dc8f3091eae88a97ff90e8b038a423f4"
|
| 867 |
},
|
| 868 |
{
|
| 869 |
"id": "unified_task_model_radar_builder",
|
|
|
|
| 873 |
"surface": "repo_hf",
|
| 874 |
"shows": "Regenerates the direction-aware radar chart and machine-readable metric overlay JSON.",
|
| 875 |
"exists": true,
|
| 876 |
+
"bytes": 68610,
|
| 877 |
+
"sha256": "96bc2df0de5a9e512d69961ddb13ea87b26ef01f1f943f5a78a6dc373400949d"
|
| 878 |
},
|
| 879 |
{
|
| 880 |
"id": "task_method_20_gap_audit_builder",
|
|
|
|
| 915 |
"path": "results/omni_finetune/model_output_probe_readiness/model_output_probe_readiness.json",
|
| 916 |
"kind": "scaleup_status",
|
| 917 |
"surface": "repo_hf",
|
| 918 |
+
"shows": "Checks whether Qwen3-Omni and Cosmos3 runs have train, validation, and test prediction files before extending model overlays to all 20 task contracts.",
|
| 919 |
"exists": true,
|
| 920 |
"bytes": 4320,
|
| 921 |
"sha256": "11cff26749bf6ad8b8ee028b18e0b4be5713ed8b5325578caa03be25d894263b"
|
|
|
|
| 928 |
"surface": "repo_hf",
|
| 929 |
"shows": "Audits model-output split availability and writes a readiness report without assigning new numeric task scores.",
|
| 930 |
"exists": true,
|
| 931 |
+
"bytes": 10526,
|
| 932 |
+
"sha256": "2b95834c75b0c90ceefe2c20381b3997a63f283b733186e07dea9e2778c78fad"
|
| 933 |
},
|
| 934 |
{
|
| 935 |
"id": "existing_model_output_task_probe",
|
|
|
|
| 937 |
"path": "results/omni_finetune/model_output_task_probes_20260616/summary.json",
|
| 938 |
"kind": "model_result",
|
| 939 |
"surface": "repo_hf",
|
| 940 |
+
"shows": "Scores task-specific Qwen3-Omni and Cosmos3 overlays only where verified held-out prediction JSON or compact target maps already contain the required targets.",
|
| 941 |
"exists": true,
|
| 942 |
"bytes": 5951,
|
| 943 |
"sha256": "910477d2fba648605dda128d0ecd2a2c13cfa460573e350dc850014ac91c6c2b"
|
|
|
|
| 950 |
"surface": "repo_hf",
|
| 951 |
"shows": "Derives task-specific scores from committed verified model outputs without running new inference or backfilling absent targets.",
|
| 952 |
"exists": true,
|
| 953 |
+
"bytes": 69423,
|
| 954 |
+
"sha256": "43086745ba53f5a4da1a39b9c223914707ab51b027555c91bea27c0bc152a27f"
|
| 955 |
},
|
| 956 |
{
|
| 957 |
"id": "a100_128_metadata_task_baselines",
|
|
|
|
| 1071 |
"surface": "repo_hf",
|
| 1072 |
"shows": "Catalogs public figures, charts, modality thumbnails, dimensions, hashes, roles, and source scripts.",
|
| 1073 |
"exists": true,
|
| 1074 |
+
"bytes": 7014,
|
| 1075 |
+
"sha256": "1087774a85614f12871418bb9fa375b98121596eb11dcdc22d324b943fb9d313"
|
| 1076 |
},
|
| 1077 |
{
|
| 1078 |
"id": "figure_index_json",
|
|
|
|
| 1082 |
"surface": "website_hf",
|
| 1083 |
"shows": "Machine-readable visual asset index for website and Hugging Face mirrors.",
|
| 1084 |
"exists": true,
|
| 1085 |
+
"bytes": 19472,
|
| 1086 |
+
"sha256": "e56f76038a56ffc61e882d0201f13912af5cba3e5ade08b1bb912fba0acdcd24"
|
| 1087 |
},
|
| 1088 |
{
|
| 1089 |
"id": "figure_index_builder",
|
|
|
|
| 1093 |
"surface": "repo_hf",
|
| 1094 |
"shows": "Regenerates visual-asset hashes, dimensions, and source-script provenance.",
|
| 1095 |
"exists": true,
|
| 1096 |
+
"bytes": 16832,
|
| 1097 |
+
"sha256": "7c526bff01c282d81e4f64bbdb31c059953ea7868b75b0c3104826241280165f"
|
| 1098 |
},
|
| 1099 |
{
|
| 1100 |
"id": "brand_assets_json",
|
|
|
|
| 1160 |
"shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
|
| 1161 |
"exists": true,
|
| 1162 |
"bytes": 8640,
|
| 1163 |
+
"sha256": "3cb0aca2dca01448cb9bc5cbb519a91bc6397c08a1eaaa84c031e773221e5a0a"
|
| 1164 |
},
|
| 1165 |
{
|
| 1166 |
"id": "public_surface_qa",
|
|
|
|
| 1179 |
"path": "PUBLIC_READER_MAP.md",
|
| 1180 |
"kind": "project_path",
|
| 1181 |
"surface": "repo_hf",
|
| 1182 |
+
"shows": "Provides the first-pass navigation layer for GitHub, GitHub Pages, Hugging Face mirrors, Qwen3-Omni/Cosmos3 repos, evidence lines, and claim boundaries.",
|
| 1183 |
"exists": true,
|
| 1184 |
+
"bytes": 4948,
|
| 1185 |
+
"sha256": "7a7128fdde08f770338c3fe2d473565918c5633f948dec6a78a6b2a67938e91a"
|
| 1186 |
},
|
| 1187 |
{
|
| 1188 |
"id": "public_reader_map_json",
|
|
|
|
| 1192 |
"surface": "website_hf",
|
| 1193 |
"shows": "Machine-readable public reader map used by the website and Hugging Face mirrors to keep entry points and surface responsibilities explicit.",
|
| 1194 |
"exists": true,
|
| 1195 |
+
"bytes": 5971,
|
| 1196 |
+
"sha256": "3474f84ffa53aefabdbf8a75c466c271675162ce0f8a23ea3b6660951048072f"
|
| 1197 |
},
|
| 1198 |
{
|
| 1199 |
"id": "public_surface_qa_json",
|
|
|
|
| 1285 |
"volatile": true,
|
| 1286 |
"shows": "Records the last live GitHub/HF URL verification after upload.",
|
| 1287 |
"exists": true,
|
| 1288 |
+
"bytes": 184689,
|
| 1289 |
"hash_policy": "existence_and_size_only"
|
| 1290 |
},
|
| 1291 |
{
|
|
|
|
| 1296 |
"surface": "repo",
|
| 1297 |
"shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
|
| 1298 |
"exists": true,
|
| 1299 |
+
"bytes": 67652,
|
| 1300 |
+
"sha256": "47c6e5e0d93a881db045842ef98656d04c74cf7605f33a56b8d4daecf97fb547"
|
| 1301 |
},
|
| 1302 |
{
|
| 1303 |
"id": "reproducibility_contract",
|
|
|
|
| 1329 |
"surface": "repo_hf",
|
| 1330 |
"shows": "Generates the selective artifact catalog from local files.",
|
| 1331 |
"exists": true,
|
| 1332 |
+
"bytes": 67587,
|
| 1333 |
+
"sha256": "28a93ec92c91886388f5d42ab8e25af0b218e4644b733bc8f8230bc0f91aab65"
|
| 1334 |
},
|
| 1335 |
{
|
| 1336 |
"id": "publication_audit",
|
|
|
|
| 1365 |
"volatile": true,
|
| 1366 |
"shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
|
| 1367 |
"exists": true,
|
| 1368 |
+
"bytes": 1418066,
|
| 1369 |
"hash_policy": "existence_and_size_only"
|
| 1370 |
},
|
| 1371 |
{
|
|
|
|
| 1377 |
"volatile": true,
|
| 1378 |
"shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
|
| 1379 |
"exists": true,
|
| 1380 |
+
"bytes": 20657,
|
| 1381 |
"hash_policy": "existence_and_size_only"
|
| 1382 |
},
|
| 1383 |
{
|
|
|
|
| 1542 |
"surface": "website_hf",
|
| 1543 |
"shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
|
| 1544 |
"exists": true,
|
| 1545 |
+
"bytes": 1903454,
|
| 1546 |
+
"sha256": "6667eb856cf61ada9f868807b5d5c6ccde06e4f791b2f9dd567d98b71b307415"
|
| 1547 |
},
|
| 1548 |
{
|
| 1549 |
"id": "modality_atlas",
|
|
|
|
| 1674 |
"surface": "repo_hf",
|
| 1675 |
"shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
|
| 1676 |
"exists": true,
|
| 1677 |
+
"bytes": 15983,
|
| 1678 |
+
"sha256": "4db248566972e811aac6ca06582f233414821624f00f9d4fc4a1b66b2e00401f"
|
| 1679 |
},
|
| 1680 |
{
|
| 1681 |
"id": "omni_model_comparison_json",
|
|
|
|
| 1685 |
"surface": "repo_hf",
|
| 1686 |
"shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
|
| 1687 |
"exists": true,
|
| 1688 |
+
"bytes": 82088,
|
| 1689 |
+
"sha256": "82ccc2932cad63a9ebad85da53e694b18ef626aa3720bda3ed5da30f3dc5e121"
|
| 1690 |
},
|
| 1691 |
{
|
| 1692 |
"id": "cosmos3_nano_verified_summary",
|
docs/data/episode128_task_model_radar.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"title": "128-Episode 20-Task Radar",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
-
"description": "Selected 128-episode metadata/raw baselines plus verified Qwen3
|
| 6 |
"task_count": 20,
|
| 7 |
"method_count": 7,
|
| 8 |
"method_task_record_count": 140,
|
|
@@ -12,7 +12,7 @@
|
|
| 12 |
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
|
| 13 |
"raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
|
| 14 |
"result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
|
| 15 |
-
"foundation_model_overlay": "Qwen3
|
| 16 |
"metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
|
| 17 |
"raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
|
| 18 |
},
|
|
|
|
| 1 |
{
|
| 2 |
"title": "128-Episode 20-Task Radar",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-21T10:47:17+00:00",
|
| 5 |
+
"description": "Selected 128-episode metadata/raw baselines plus verified Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano diagnostics. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
|
| 6 |
"task_count": 20,
|
| 7 |
"method_count": 7,
|
| 8 |
"method_task_record_count": 140,
|
|
|
|
| 12 |
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
|
| 13 |
"raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
|
| 14 |
"result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
|
| 15 |
+
"foundation_model_overlay": "Qwen3-Omni and Cosmos3 points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
|
| 16 |
"metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
|
| 17 |
"raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
|
| 18 |
},
|
docs/data/figure_index.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Figure Index",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"scope": "Public figures, diagrams, charts, and derived modality thumbnails. Raw Xperience-10M videos, annotations, RRD files, and Qwen weights are excluded.",
|
| 6 |
"figure_count": 29,
|
| 7 |
"figures": [
|
|
@@ -64,12 +64,12 @@
|
|
| 64 |
"source_script": "scripts/render_task_suite_infographic.py",
|
| 65 |
"surface": "README, website, HF Space, artifact dataset, model card",
|
| 66 |
"exists": true,
|
| 67 |
-
"bytes":
|
| 68 |
-
"sha256": "
|
| 69 |
"dimensions": {
|
| 70 |
"format": "PNG",
|
| 71 |
"width": 1800,
|
| 72 |
-
"height":
|
| 73 |
},
|
| 74 |
"source_script_exists": true
|
| 75 |
},
|
|
@@ -81,8 +81,8 @@
|
|
| 81 |
"source_script": "scripts/generate_visualizations.py",
|
| 82 |
"surface": "README, website, HF artifact dataset",
|
| 83 |
"exists": true,
|
| 84 |
-
"bytes":
|
| 85 |
-
"sha256": "
|
| 86 |
"dimensions": {
|
| 87 |
"format": "PNG",
|
| 88 |
"width": 1800,
|
|
@@ -149,8 +149,8 @@
|
|
| 149 |
"source_script": "scripts/render_foundation_pipeline_diagrams.py",
|
| 150 |
"surface": "README, website, HF Space, artifact dataset, model card",
|
| 151 |
"exists": true,
|
| 152 |
-
"bytes":
|
| 153 |
-
"sha256": "
|
| 154 |
"dimensions": {
|
| 155 |
"format": "PNG",
|
| 156 |
"width": 2560,
|
|
@@ -166,8 +166,8 @@
|
|
| 166 |
"source_script": "scripts/render_overview_figures.py",
|
| 167 |
"surface": "README, website, HF artifact dataset, model card",
|
| 168 |
"exists": true,
|
| 169 |
-
"bytes":
|
| 170 |
-
"sha256": "
|
| 171 |
"dimensions": {
|
| 172 |
"format": "PNG",
|
| 173 |
"width": 1800,
|
|
@@ -356,8 +356,8 @@
|
|
| 356 |
"source_script": "scripts/generate_visualizations.py",
|
| 357 |
"surface": "website directions",
|
| 358 |
"exists": true,
|
| 359 |
-
"bytes":
|
| 360 |
-
"sha256": "
|
| 361 |
"dimensions": {
|
| 362 |
"format": "SVG",
|
| 363 |
"width": 1180,
|
|
@@ -410,8 +410,8 @@
|
|
| 410 |
"source_script": "scripts/build_unified_task_model_radar.py",
|
| 411 |
"surface": "website unified task section, README, HF mirrors",
|
| 412 |
"exists": true,
|
| 413 |
-
"bytes":
|
| 414 |
-
"sha256": "
|
| 415 |
"dimensions": {
|
| 416 |
"format": "SVG",
|
| 417 |
"width": 2400,
|
|
@@ -428,8 +428,8 @@
|
|
| 428 |
"source_script": "scripts/build_unified_task_model_radar.py",
|
| 429 |
"surface": "website unified task section, README, HF mirrors",
|
| 430 |
"exists": true,
|
| 431 |
-
"bytes":
|
| 432 |
-
"sha256": "
|
| 433 |
"dimensions": {
|
| 434 |
"format": "SVG",
|
| 435 |
"width": 2400,
|
|
@@ -442,12 +442,12 @@
|
|
| 442 |
"id": "episode128_task_model_radar",
|
| 443 |
"title": "128-episode 20-task model radar",
|
| 444 |
"path": "docs/assets/charts/episode128_task_model_radar.svg",
|
| 445 |
-
"role": "Twenty-axis split radar for selected 128-episode methods: raw-feature simple/NN as complete scored polygons
|
| 446 |
"source_script": "scripts/build_unified_task_model_radar.py",
|
| 447 |
"surface": "website unified task section, README, HF mirrors",
|
| 448 |
"exists": true,
|
| 449 |
-
"bytes":
|
| 450 |
-
"sha256": "
|
| 451 |
"dimensions": {
|
| 452 |
"format": "SVG",
|
| 453 |
"width": 2400,
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Figure Index",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-21T10:52:12+00:00",
|
| 5 |
"scope": "Public figures, diagrams, charts, and derived modality thumbnails. Raw Xperience-10M videos, annotations, RRD files, and Qwen weights are excluded.",
|
| 6 |
"figure_count": 29,
|
| 7 |
"figures": [
|
|
|
|
| 64 |
"source_script": "scripts/render_task_suite_infographic.py",
|
| 65 |
"surface": "README, website, HF Space, artifact dataset, model card",
|
| 66 |
"exists": true,
|
| 67 |
+
"bytes": 1903454,
|
| 68 |
+
"sha256": "6667eb856cf61ada9f868807b5d5c6ccde06e4f791b2f9dd567d98b71b307415",
|
| 69 |
"dimensions": {
|
| 70 |
"format": "PNG",
|
| 71 |
"width": 1800,
|
| 72 |
+
"height": 7600
|
| 73 |
},
|
| 74 |
"source_script_exists": true
|
| 75 |
},
|
|
|
|
| 81 |
"source_script": "scripts/generate_visualizations.py",
|
| 82 |
"surface": "README, website, HF artifact dataset",
|
| 83 |
"exists": true,
|
| 84 |
+
"bytes": 711222,
|
| 85 |
+
"sha256": "4db6a6353d3f1e49bae12447e1a78a874aa780d60e9817f3052ac0d0acf2f7b2",
|
| 86 |
"dimensions": {
|
| 87 |
"format": "PNG",
|
| 88 |
"width": 1800,
|
|
|
|
| 149 |
"source_script": "scripts/render_foundation_pipeline_diagrams.py",
|
| 150 |
"surface": "README, website, HF Space, artifact dataset, model card",
|
| 151 |
"exists": true,
|
| 152 |
+
"bytes": 1853350,
|
| 153 |
+
"sha256": "e8d863cc5104602e464048b4bf48f9acf3a108495298d9ec15b2e9cf346f41f9",
|
| 154 |
"dimensions": {
|
| 155 |
"format": "PNG",
|
| 156 |
"width": 2560,
|
|
|
|
| 166 |
"source_script": "scripts/render_overview_figures.py",
|
| 167 |
"surface": "README, website, HF artifact dataset, model card",
|
| 168 |
"exists": true,
|
| 169 |
+
"bytes": 757827,
|
| 170 |
+
"sha256": "d83b75a6778033a716f1086dbe61298662d4b8f80cb8f52193d2cbdb1e8e31f7",
|
| 171 |
"dimensions": {
|
| 172 |
"format": "PNG",
|
| 173 |
"width": 1800,
|
|
|
|
| 356 |
"source_script": "scripts/generate_visualizations.py",
|
| 357 |
"surface": "website directions",
|
| 358 |
"exists": true,
|
| 359 |
+
"bytes": 5352,
|
| 360 |
+
"sha256": "506e12aa1b6c4fd50fb0c65714c7f0a92c02c40069cb879503471ba9b63d4afb",
|
| 361 |
"dimensions": {
|
| 362 |
"format": "SVG",
|
| 363 |
"width": 1180,
|
|
|
|
| 410 |
"source_script": "scripts/build_unified_task_model_radar.py",
|
| 411 |
"surface": "website unified task section, README, HF mirrors",
|
| 412 |
"exists": true,
|
| 413 |
+
"bytes": 57938,
|
| 414 |
+
"sha256": "bb83b80b47fe679ebdce2c99378a4548120f1c8cc2d725b88e409d8c386dcbf8",
|
| 415 |
"dimensions": {
|
| 416 |
"format": "SVG",
|
| 417 |
"width": 2400,
|
|
|
|
| 428 |
"source_script": "scripts/build_unified_task_model_radar.py",
|
| 429 |
"surface": "website unified task section, README, HF mirrors",
|
| 430 |
"exists": true,
|
| 431 |
+
"bytes": 35232,
|
| 432 |
+
"sha256": "87b52a7dead40358f1778dda43ade4d2e875ac98e507e01ca007084363e5977e",
|
| 433 |
"dimensions": {
|
| 434 |
"format": "SVG",
|
| 435 |
"width": 2400,
|
|
|
|
| 442 |
"id": "episode128_task_model_radar",
|
| 443 |
"title": "128-episode 20-task model radar",
|
| 444 |
"path": "docs/assets/charts/episode128_task_model_radar.svg",
|
| 445 |
+
"role": "Twenty-axis split radar for selected 128-episode methods: raw-feature simple/NN as complete scored polygons plus metadata, Qwen3-Omni, Cosmos3-Super, and Cosmos3-Nano task-aligned overlays.",
|
| 446 |
"source_script": "scripts/build_unified_task_model_radar.py",
|
| 447 |
"surface": "website unified task section, README, HF mirrors",
|
| 448 |
"exists": true,
|
| 449 |
+
"bytes": 51915,
|
| 450 |
+
"sha256": "047ea4b05a04f6734e2afcf792863559dc8f3091eae88a97ff90e8b038a423f4",
|
| 451 |
"dimensions": {
|
| 452 |
"format": "SVG",
|
| 453 |
"width": 2400,
|
docs/data/foundation_model_plan.json
CHANGED
|
@@ -230,7 +230,7 @@
|
|
| 230 |
},
|
| 231 |
{
|
| 232 |
"step": 4,
|
| 233 |
-
"name": "World-model
|
| 234 |
"action": "Promote Cosmos 3 beyond the current Nano compatibility and Super forward-dynamics runs only when loss metrics, preprocessing, and storage justify the added compute."
|
| 235 |
},
|
| 236 |
{
|
|
|
|
| 230 |
},
|
| 231 |
{
|
| 232 |
"step": 4,
|
| 233 |
+
"name": "World-model track",
|
| 234 |
"action": "Promote Cosmos 3 beyond the current Nano compatibility and Super forward-dynamics runs only when loss metrics, preprocessing, and storage justify the added compute."
|
| 235 |
},
|
| 236 |
{
|
docs/data/live_publication_status.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"title": "Ropedia Xperience-10M Live Publication Status",
|
| 3 |
"status": "pass",
|
| 4 |
"checked_at_utc": "2026-06-20T21:56:07+00:00",
|
| 5 |
-
"scope": "Live GitHub Pages, GitHub raw, Hugging Face Space, artifact dataset, baseline model mirrors, and the Qwen3/Cosmos3 LoRA adapter repos when their upload packages exist locally.",
|
| 6 |
"hash_groups": [
|
| 7 |
{
|
| 8 |
"id": "task_suite_infographic",
|
|
|
|
| 2 |
"title": "Ropedia Xperience-10M Live Publication Status",
|
| 3 |
"status": "pass",
|
| 4 |
"checked_at_utc": "2026-06-20T21:56:07+00:00",
|
| 5 |
+
"scope": "Live GitHub Pages, GitHub raw, Hugging Face Space, artifact dataset, baseline model mirrors, and the Qwen3-Omni/Cosmos3 LoRA adapter repos when their upload packages exist locally.",
|
| 6 |
"hash_groups": [
|
| 7 |
{
|
| 8 |
"id": "task_suite_infographic",
|
docs/data/mirror_parity.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
docs/data/omni_finetune_verified_result.json
CHANGED
|
@@ -91,6 +91,6 @@
|
|
| 91 |
"Use results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before deciding whether v6 should become a formal release tag.",
|
| 92 |
"Use the v6 predictions for action/contact error analysis, and compare v5 for subtask, next-action, and object regressions.",
|
| 93 |
"Keep full-parameter Qwen runs as feasibility gates until there is a storage plan for checkpoints or mergeable full-weight deltas.",
|
| 94 |
-
"Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model
|
| 95 |
]
|
| 96 |
}
|
|
|
|
| 91 |
"Use results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before deciding whether v6 should become a formal release tag.",
|
| 92 |
"Use the v6 predictions for action/contact error analysis, and compare v5 for subtask, next-action, and object regressions.",
|
| 93 |
"Keep full-parameter Qwen runs as feasibility gates until there is a storage plan for checkpoints or mergeable full-weight deltas.",
|
| 94 |
+
"Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model artifact: it updates adapter weights over camera-pose proxy future-vision-velocity targets, not Qwen-style JSON action labels."
|
| 95 |
]
|
| 96 |
}
|
docs/data/omni_model_comparison.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"status": "pass",
|
| 5 |
"version_count": 3,
|
| 6 |
"model_group_count": 5,
|
|
@@ -8,7 +8,7 @@
|
|
| 8 |
"version_reading_notes": [
|
| 9 |
"Version 1 is the public-sample 20-task surface: original core heads, tasks 13-20, and the 180-row method-task matrix.",
|
| 10 |
"Version 2 is the selected 128-episode same-split simple/NN baseline alignment.",
|
| 11 |
-
"
|
| 12 |
],
|
| 13 |
"versions": [
|
| 14 |
{
|
|
@@ -305,7 +305,7 @@
|
|
| 305 |
"neural_primary_score": null
|
| 306 |
}
|
| 307 |
],
|
| 308 |
-
"interpretation": "This layer aligns the previous simple and neural baseline framing to the same selected 96/16/16 split used by the
|
| 309 |
},
|
| 310 |
{
|
| 311 |
"id": "v3_multi_episode_foundation_model_branches",
|
|
@@ -870,7 +870,7 @@
|
|
| 870 |
"neural_supported_task_count": 6
|
| 871 |
},
|
| 872 |
"weights": "metadata/text baseline artifacts; raw 128 sensor-feature model weights not yet complete",
|
| 873 |
-
"interpretation": "Same selected 96/16/16 split and task ids as the
|
| 874 |
}
|
| 875 |
],
|
| 876 |
"comparison_note": "This is the cleanest 1-episode versus 128-episode grouping for the same simple/NN task-head family, but the feature surface changes from raw public-sample features to public-safe 128-episode metadata/text features."
|
|
@@ -1683,7 +1683,7 @@
|
|
| 1683 |
"weights_repository": "none for this run: staged base nv-community/Cosmos3-Super weights were evaluated through vLLM; create a separate repo only after new adapter or fine-tuned weights exist"
|
| 1684 |
}
|
| 1685 |
],
|
| 1686 |
-
"comparison_note": "Cosmos3-Super is now represented by a verified 448-window held-out Reasoner evaluation on the same JSON task as Qwen3. It uses staged base weights through vLLM, so it is a
|
| 1687 |
},
|
| 1688 |
{
|
| 1689 |
"id": "cosmos3_super_forward_dynamics",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
|
| 3 |
+
"generated_at_utc": "2026-06-21T10:47:04+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
"version_count": 3,
|
| 6 |
"model_group_count": 5,
|
|
|
|
| 8 |
"version_reading_notes": [
|
| 9 |
"Version 1 is the public-sample 20-task surface: original core heads, tasks 13-20, and the 180-row method-task matrix.",
|
| 10 |
"Version 2 is the selected 128-episode same-split simple/NN baseline alignment.",
|
| 11 |
+
"The selected-128 model-diagnostic group contains the current Qwen3-Omni LoRA JSON-task row, Cosmos3-Nano future-window compatibility result, Cosmos3-Super Reasoner base-weight JSON-task evaluation, and the separate Cosmos3-Super Forward-Dynamics LoRA adapter artifact."
|
| 12 |
],
|
| 13 |
"versions": [
|
| 14 |
{
|
|
|
|
| 305 |
"neural_primary_score": null
|
| 306 |
}
|
| 307 |
],
|
| 308 |
+
"interpretation": "This layer aligns the previous simple and neural baseline framing to the same selected 96/16/16 split used by the Qwen3-Omni and Cosmos3 diagnostics. It uses public-safe JSONL metadata/text features, so raw-feature-only tasks remain explicitly unsupported until 128-run sensor feature blocks exist."
|
| 309 |
},
|
| 310 |
{
|
| 311 |
"id": "v3_multi_episode_foundation_model_branches",
|
|
|
|
| 870 |
"neural_supported_task_count": 6
|
| 871 |
},
|
| 872 |
"weights": "metadata/text baseline artifacts; raw 128 sensor-feature model weights not yet complete",
|
| 873 |
+
"interpretation": "Same selected 96/16/16 split and task ids as the Qwen3-Omni and Cosmos3 diagnostics, but metadata/text features only."
|
| 874 |
}
|
| 875 |
],
|
| 876 |
"comparison_note": "This is the cleanest 1-episode versus 128-episode grouping for the same simple/NN task-head family, but the feature surface changes from raw public-sample features to public-safe 128-episode metadata/text features."
|
|
|
|
| 1683 |
"weights_repository": "none for this run: staged base nv-community/Cosmos3-Super weights were evaluated through vLLM; create a separate repo only after new adapter or fine-tuned weights exist"
|
| 1684 |
}
|
| 1685 |
],
|
| 1686 |
+
"comparison_note": "Cosmos3-Super is now represented by a verified 448-window held-out Reasoner evaluation on the same JSON task as Qwen3. It uses staged base weights through vLLM, so it is a Cosmos3 diagnostic, not a weight release. A camera-pose proxy forward-dynamics target export now passes the contract audit and schema-only packer smoke; the separate Forward-Dynamics LoRA group records the trainable adapter run and loss-based held-out evaluation."
|
| 1687 |
},
|
| 1688 |
{
|
| 1689 |
"id": "cosmos3_super_forward_dynamics",
|
docs/data/project_status.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Project Status",
|
| 3 |
"version": "2026-06-20",
|
| 4 |
"decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v6_cosmos_comparison",
|
| 5 |
-
"research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3
|
| 6 |
"scope_boundary": {
|
| 7 |
"validated_episode_count": 1,
|
| 8 |
"aligned_frames": 5821,
|
|
@@ -145,7 +145,7 @@
|
|
| 145 |
"RESEARCH_ROADMAP.md",
|
| 146 |
"docs/data/research_roadmap.json"
|
| 147 |
],
|
| 148 |
-
"readout": "The roadmap connects public-sample task development to the final verified Qwen3-Omni diagnostic result, same-split baseline alignment, the no-new-episode 128-suite enhancement pack, action/subtask error analysis, robustness runs, world/policy
|
| 149 |
},
|
| 150 |
{
|
| 151 |
"area": "128-episode task-suite enhancement pack",
|
|
@@ -156,7 +156,7 @@
|
|
| 156 |
"results/omni_finetune/task_suite_enhancement_128_v1_20260608/enhancement_plan.json",
|
| 157 |
"scripts/omni/build_task_suite_enhancement_128.py"
|
| 158 |
],
|
| 159 |
-
"readout": "The current 3,808-window selected split can be stressed without more episodes by exporting denser and multiscale windows. The recommended next export is multiscale_20s10_40s20_80s40, estimated at 106,095 windows from observed frame spans; the pack also defines hierarchical action/subtask targets, raw-feature shard priorities for unsupported tasks, and
|
| 160 |
},
|
| 161 |
{
|
| 162 |
"area": "Foundation-model plan",
|
|
@@ -176,7 +176,7 @@
|
|
| 176 |
"scripts/omni/backbone_registry.py",
|
| 177 |
"scripts/omni/smoke_test_backbone_packaging.py"
|
| 178 |
],
|
| 179 |
-
"readout": "Future
|
| 180 |
},
|
| 181 |
{
|
| 182 |
"area": "Xperience Embodied Foundation Model",
|
|
@@ -253,7 +253,7 @@
|
|
| 253 |
"results/omni_finetune/OMNI_MODEL_COMPARISON.md",
|
| 254 |
"scripts/omni/build_omni_model_comparison.py"
|
| 255 |
],
|
| 256 |
-
"readout": "The public comparison now has two
|
| 257 |
},
|
| 258 |
{
|
| 259 |
"area": "Qwen3-Omni fine-tuning",
|
|
@@ -271,7 +271,7 @@
|
|
| 271 |
"readout": "The selected 96/16/16 episode split now has a current v6 rank64/lr5e-5 public-safe held-out package with 34,269 exported windows, 4,032 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.90%, meeting the 98% target; transition accuracy is 98.98%, contact accuracy is 81.77%, object micro-F1 is 30.65%, next-action accuracy is 4.31%, and action/subtask metrics remain weak. v6 improves action macro-F1 and contact accuracy versus v5, but v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics."
|
| 272 |
},
|
| 273 |
{
|
| 274 |
-
"area": "Cosmos3-Nano future-window
|
| 275 |
"status": "verified_compatibility_result",
|
| 276 |
"evidence": [
|
| 277 |
"configs/omni_backbones/cosmos_world_model.json",
|
|
@@ -279,10 +279,10 @@
|
|
| 279 |
"scripts/omni/eval_cosmos3_future_window_retrieval.py",
|
| 280 |
"results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json"
|
| 281 |
],
|
| 282 |
-
"readout": "The Cosmos3-Nano
|
| 283 |
},
|
| 284 |
{
|
| 285 |
-
"area": "Cosmos3-Super Reasoner
|
| 286 |
"status": "verified_base_weight_result",
|
| 287 |
"evidence": [
|
| 288 |
"configs/omni_backbones/cosmos3_super_reasoner.json",
|
|
@@ -314,7 +314,7 @@
|
|
| 314 |
"results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/verified_result_summary.json",
|
| 315 |
"results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/package_audit.json"
|
| 316 |
],
|
| 317 |
-
"readout": "The first fine-tuned Cosmos3-Super adapter
|
| 318 |
},
|
| 319 |
{
|
| 320 |
"area": "Raw Xperience-10M redistribution",
|
|
@@ -331,8 +331,8 @@
|
|
| 331 |
"Open docs/data/project_packet.json for the machine-readable project path.",
|
| 332 |
"Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
|
| 333 |
"Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
|
| 334 |
-
"Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone
|
| 335 |
-
"Inspect OMNI_MODEL_EXTENSION_CONTRACT.md and run python scripts/omni/backbone_registry.py --validate --json before adding a new
|
| 336 |
"Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
|
| 337 |
"Inspect TASK_SUITE_20.md, docs/data/task_suite_20.json, docs/data/summary_metrics.json, and results/episode_task_suite/neural_mlp/ to check the unified 20-task outputs.",
|
| 338 |
"Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
|
|
@@ -346,16 +346,16 @@
|
|
| 346 |
"Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
|
| 347 |
],
|
| 348 |
"current_reading_notes": [
|
| 349 |
-
"The latest Qwen3-Omni v6 diagnostic
|
| 350 |
"Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
|
| 351 |
-
"Use docs/data/omni_model_comparison.json to compare both views: the
|
| 352 |
"The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
|
| 353 |
-
"The Cosmos3-Nano future-window
|
| 354 |
"The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
|
| 355 |
"Audio is one of the synchronized source modalities in the current task representation.",
|
| 356 |
"The audio ablation report compares audio/no-audio variants across the original task contracts in results/audio_ablation/.",
|
| 357 |
-
"Foundation-model selection is explicit: Qwen3-Omni is the structured JSON baseline, Cosmos 3 is the world-model
|
| 358 |
-
"Future model
|
| 359 |
"The Xperience Embodied Foundation Model is a future native-pretraining goal, not a completed model or current benchmark."
|
| 360 |
]
|
| 361 |
}
|
|
|
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Project Status",
|
| 3 |
"version": "2026-06-20",
|
| 4 |
"decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v6_cosmos_comparison",
|
| 5 |
+
"research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3 packages as early cross-episode diagnostics, and now records a no-new-episode enhancement pack for pushing the current 128-episode suite harder.",
|
| 6 |
"scope_boundary": {
|
| 7 |
"validated_episode_count": 1,
|
| 8 |
"aligned_frames": 5821,
|
|
|
|
| 145 |
"RESEARCH_ROADMAP.md",
|
| 146 |
"docs/data/research_roadmap.json"
|
| 147 |
],
|
| 148 |
+
"readout": "The roadmap connects public-sample task development to the final verified Qwen3-Omni diagnostic result, same-split baseline alignment, the no-new-episode 128-suite enhancement pack, action/subtask error analysis, robustness runs, world/policy tracks, and the future Xperience-native pretraining goal."
|
| 149 |
},
|
| 150 |
{
|
| 151 |
"area": "128-episode task-suite enhancement pack",
|
|
|
|
| 156 |
"results/omni_finetune/task_suite_enhancement_128_v1_20260608/enhancement_plan.json",
|
| 157 |
"scripts/omni/build_task_suite_enhancement_128.py"
|
| 158 |
],
|
| 159 |
+
"readout": "The current 3,808-window selected split can be stressed without more episodes by exporting denser and multiscale windows. The recommended next export is multiscale_20s10_40s20_80s40, estimated at 106,095 windows from observed frame spans; the pack also defines hierarchical action/subtask targets, raw-feature shard priorities for unsupported tasks, and Qwen3-Omni/Cosmos3 follow-up run cards."
|
| 160 |
},
|
| 161 |
{
|
| 162 |
"area": "Foundation-model plan",
|
|
|
|
| 176 |
"scripts/omni/backbone_registry.py",
|
| 177 |
"scripts/omni/smoke_test_backbone_packaging.py"
|
| 178 |
],
|
| 179 |
+
"readout": "Future Qwen3-Omni, Cosmos3-style, and VLA/policy tracks must keep the same episode split discipline, held-out metrics, validation gate, public-safe package contract, and explicit forbidden-artifact policy before reporting results."
|
| 180 |
},
|
| 181 |
{
|
| 182 |
"area": "Xperience Embodied Foundation Model",
|
|
|
|
| 253 |
"results/omni_finetune/OMNI_MODEL_COMPARISON.md",
|
| 254 |
"scripts/omni/build_omni_model_comparison.py"
|
| 255 |
],
|
| 256 |
+
"readout": "The public comparison now has two evidence lines plus a model-family grouping. The model grouping pairs 1-episode and 128-episode entries for task-head baselines, separates Qwen3-Omni sensor-adapter smoke from 128-episode LoRA diagnostics, separates Cosmos3-Nano future-window compatibility from Cosmos3-Super base-weight Reasoner evaluation, and adds Cosmos3-Super Forward-Dynamics LoRA as a loss-based fine-tuned adapter artifact."
|
| 257 |
},
|
| 258 |
{
|
| 259 |
"area": "Qwen3-Omni fine-tuning",
|
|
|
|
| 271 |
"readout": "The selected 96/16/16 episode split now has a current v6 rank64/lr5e-5 public-safe held-out package with 34,269 exported windows, 4,032 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.90%, meeting the 98% target; transition accuracy is 98.98%, contact accuracy is 81.77%, object micro-F1 is 30.65%, next-action accuracy is 4.31%, and action/subtask metrics remain weak. v6 improves action macro-F1 and contact accuracy versus v5, but v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics."
|
| 272 |
},
|
| 273 |
{
|
| 274 |
+
"area": "Cosmos3-Nano future-window package",
|
| 275 |
"status": "verified_compatibility_result",
|
| 276 |
"evidence": [
|
| 277 |
"configs/omni_backbones/cosmos_world_model.json",
|
|
|
|
| 279 |
"scripts/omni/eval_cosmos3_future_window_retrieval.py",
|
| 280 |
"results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json"
|
| 281 |
],
|
| 282 |
+
"readout": "The Cosmos3-Nano package now has a public-safe verified future-window compatibility result with 3,213 future-window samples, 378 held-out test predictions, future retrieval MRR 0.0221, temporal consistency 0.0952, transition accuracy 0.9683, and contact accuracy 0.7434. It is a compatibility adapter result, not a full Cosmos diffusion-weight fine-tune."
|
| 283 |
},
|
| 284 |
{
|
| 285 |
+
"area": "Cosmos3-Super Reasoner package",
|
| 286 |
"status": "verified_base_weight_result",
|
| 287 |
"evidence": [
|
| 288 |
"configs/omni_backbones/cosmos3_super_reasoner.json",
|
|
|
|
| 314 |
"results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/verified_result_summary.json",
|
| 315 |
"results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/package_audit.json"
|
| 316 |
],
|
| 317 |
+
"readout": "The first fine-tuned Cosmos3-Super adapter artifact is verified as a public-safe package: 8-GPU FSDP LoRA, 26.2M adapter parameters, 2,848 train rows, 512 validation rows, 448 held-out test rows, validation MSE 4.0082, and test MSE 3.6853. The package excludes adapter safetensors; weights are published separately at cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep."
|
| 318 |
},
|
| 319 |
{
|
| 320 |
"area": "Raw Xperience-10M redistribution",
|
|
|
|
| 331 |
"Open docs/data/project_packet.json for the machine-readable project path.",
|
| 332 |
"Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
|
| 333 |
"Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
|
| 334 |
+
"Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone track.",
|
| 335 |
+
"Inspect OMNI_MODEL_EXTENSION_CONTRACT.md and run python scripts/omni/backbone_registry.py --validate --json before adding a new Qwen3-Omni, Cosmos3-style, or VLA/policy track.",
|
| 336 |
"Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
|
| 337 |
"Inspect TASK_SUITE_20.md, docs/data/task_suite_20.json, docs/data/summary_metrics.json, and results/episode_task_suite/neural_mlp/ to check the unified 20-task outputs.",
|
| 338 |
"Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
|
|
|
|
| 346 |
"Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
|
| 347 |
],
|
| 348 |
"current_reading_notes": [
|
| 349 |
+
"The latest Qwen3-Omni v6 diagnostic run is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak: JSON validity is 99.90%, action macro-F1 is 0.0029, and subtask accuracy is 0.0037. v5 remains the pinned prior release row because it is still stronger on several metrics.",
|
| 350 |
"Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
|
| 351 |
+
"Use docs/data/omni_model_comparison.json to compare both views: the 1-sample evidence line, the selected-128 evidence line, and the model-family grouping for task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.",
|
| 352 |
"The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
|
| 353 |
+
"The Cosmos3-Nano future-window package is verified as a compatibility adapter result, Cosmos3-Super Reasoner is verified as a base-weight evaluation, and Cosmos3-Super Forward-Dynamics LoRA is verified as the first fine-tuned Super adapter artifact. Cosmos3-Super adapter weights belong in cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep; verified_public packages exclude safetensors.",
|
| 354 |
"The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
|
| 355 |
"Audio is one of the synchronized source modalities in the current task representation.",
|
| 356 |
"The audio ablation report compares audio/no-audio variants across the original task contracts in results/audio_ablation/.",
|
| 357 |
+
"Foundation-model selection is explicit: Qwen3-Omni is the structured JSON baseline, Cosmos 3 is the world-model track with Nano compatibility and Super forward-dynamics LoRA results, and policy models such as OpenVLA/openpi/GR00T wait for robot-compatible action-target conversion.",
|
| 358 |
+
"Future model tracks should be added through the backbone registry and verified package contract, not as one-off result folders with incompatible metrics or publication rules.",
|
| 359 |
"The Xperience Embodied Foundation Model is a future native-pretraining goal, not a completed model or current benchmark."
|
| 360 |
]
|
| 361 |
}
|
docs/data/public_reader_map.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Reader Map",
|
| 3 |
"status": "published",
|
| 4 |
-
"purpose": "Organize the GitHub repo, GitHub Pages dashboard, Hugging Face Space, artifact dataset, baseline model repo, consolidated weights/results repo, and
|
| 5 |
"fast_paths": [
|
| 6 |
{
|
| 7 |
"reader_goal": "Understand the project in one pass",
|
|
@@ -92,13 +92,13 @@
|
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"surface": "HF weights/results repo",
|
| 95 |
-
"responsibility": "Consolidated baseline weights, Qwen3
|
| 96 |
"best_use": "Auditing all public-safe weight-bearing artifacts from one repo."
|
| 97 |
},
|
| 98 |
{
|
| 99 |
-
"surface": "Qwen3
|
| 100 |
-
"responsibility": "Adapter-specific public weights or package cards when a
|
| 101 |
-
"best_use": "Inspecting
|
| 102 |
}
|
| 103 |
],
|
| 104 |
"evidence_layers": [
|
|
@@ -121,8 +121,8 @@
|
|
| 121 |
"boundary": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
|
| 122 |
},
|
| 123 |
{
|
| 124 |
-
"claim_type": "Foundation-model
|
| 125 |
-
"public_evidence": ["Verified Qwen3
|
| 126 |
"boundary": "Numeric task scores appear only when a task-specific eval or probe exists."
|
| 127 |
},
|
| 128 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Reader Map",
|
| 3 |
"status": "published",
|
| 4 |
+
"purpose": "Organize the GitHub repo, GitHub Pages dashboard, Hugging Face Space, artifact dataset, baseline model repo, consolidated weights/results repo, and Qwen3-Omni/Cosmos3 repos without removing evidence.",
|
| 5 |
"fast_paths": [
|
| 6 |
{
|
| 7 |
"reader_goal": "Understand the project in one pass",
|
|
|
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"surface": "HF weights/results repo",
|
| 95 |
+
"responsibility": "Consolidated baseline weights, Qwen3-Omni v6 LoRA, Cosmos3-Super adapter/result artifacts, verified results, analysis files, and file-level manifest.",
|
| 96 |
"best_use": "Auditing all public-safe weight-bearing artifacts from one repo."
|
| 97 |
},
|
| 98 |
{
|
| 99 |
+
"surface": "Qwen3-Omni and Cosmos3 model repos",
|
| 100 |
+
"responsibility": "Adapter-specific public weights or package cards when a Qwen3-Omni or Cosmos3 run is verified and publishable.",
|
| 101 |
+
"best_use": "Inspecting Qwen3-Omni and Cosmos3 artifacts."
|
| 102 |
}
|
| 103 |
],
|
| 104 |
"evidence_layers": [
|
|
|
|
| 121 |
"boundary": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
|
| 122 |
},
|
| 123 |
{
|
| 124 |
+
"claim_type": "Foundation-model track quality",
|
| 125 |
+
"public_evidence": ["Verified Qwen3-Omni and Cosmos3 result packages", "model cards"],
|
| 126 |
"boundary": "Numeric task scores appear only when a task-specific eval or probe exists."
|
| 127 |
},
|
| 128 |
{
|
docs/data/public_surface_qa.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
@@ -18,7 +18,7 @@
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
-
"generated_at_utc": "2026-06-
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
@@ -28,27 +28,27 @@
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
-
"generated_at_utc": "2026-06-
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
| 35 |
"status": "pass",
|
| 36 |
-
"generated_at_utc": "2026-06-
|
| 37 |
},
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
-
"generated_at_utc": "2026-06-
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
| 45 |
"status": "pass",
|
| 46 |
-
"generated_at_utc": "2026-06-
|
| 47 |
},
|
| 48 |
"mirror_parity": {
|
| 49 |
"exists": true,
|
| 50 |
"status": "pass",
|
| 51 |
-
"generated_at_utc": "2026-06-
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"failures": {}
|
|
@@ -97,8 +97,8 @@
|
|
| 97 |
"marker_counts": {
|
| 98 |
"Ropedia Xperience-10M Task Suite": 20,
|
| 99 |
"Xperience-10M": 166,
|
| 100 |
-
"20-task":
|
| 101 |
-
"Qwen3-Omni":
|
| 102 |
"128-episode pilot": 1
|
| 103 |
}
|
| 104 |
},
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-21T11:08:07+00:00",
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
+
"generated_at_utc": "2026-06-21T11:07:26+00:00"
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
+
"generated_at_utc": "2026-06-21T11:04:16+00:00"
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
| 35 |
"status": "pass",
|
| 36 |
+
"generated_at_utc": "2026-06-21T11:04:16+00:00"
|
| 37 |
},
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
+
"generated_at_utc": "2026-06-21T11:03:20+00:00"
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
| 45 |
"status": "pass",
|
| 46 |
+
"generated_at_utc": "2026-06-21T11:07:41+00:00"
|
| 47 |
},
|
| 48 |
"mirror_parity": {
|
| 49 |
"exists": true,
|
| 50 |
"status": "pass",
|
| 51 |
+
"generated_at_utc": "2026-06-21T11:05:04+00:00"
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"failures": {}
|
|
|
|
| 97 |
"marker_counts": {
|
| 98 |
"Ropedia Xperience-10M Task Suite": 20,
|
| 99 |
"Xperience-10M": 166,
|
| 100 |
+
"20-task": 89,
|
| 101 |
+
"Qwen3-Omni": 241,
|
| 102 |
"128-episode pilot": 1
|
| 103 |
}
|
| 104 |
},
|
docs/data/publication_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
@@ -244,8 +244,8 @@
|
|
| 244 |
"hf_space_bundle": {
|
| 245 |
"root": "hf_publish/space",
|
| 246 |
"exists": true,
|
| 247 |
-
"file_count":
|
| 248 |
-
"text_file_count":
|
| 249 |
"largest_file": {
|
| 250 |
"path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
|
| 251 |
"bytes": 10221085
|
|
@@ -255,8 +255,8 @@
|
|
| 255 |
"hf_artifact_bundle": {
|
| 256 |
"root": "hf_publish/artifacts",
|
| 257 |
"exists": true,
|
| 258 |
-
"file_count":
|
| 259 |
-
"text_file_count":
|
| 260 |
"largest_file": {
|
| 261 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 262 |
"bytes": 135591061
|
|
@@ -266,8 +266,8 @@
|
|
| 266 |
"hf_model_bundle": {
|
| 267 |
"root": "hf_publish/model",
|
| 268 |
"exists": true,
|
| 269 |
-
"file_count":
|
| 270 |
-
"text_file_count":
|
| 271 |
"largest_file": {
|
| 272 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 273 |
"bytes": 135591061
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-21T11:07:41+00:00",
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
|
|
| 244 |
"hf_space_bundle": {
|
| 245 |
"root": "hf_publish/space",
|
| 246 |
"exists": true,
|
| 247 |
+
"file_count": 572,
|
| 248 |
+
"text_file_count": 425,
|
| 249 |
"largest_file": {
|
| 250 |
"path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
|
| 251 |
"bytes": 10221085
|
|
|
|
| 255 |
"hf_artifact_bundle": {
|
| 256 |
"root": "hf_publish/artifacts",
|
| 257 |
"exists": true,
|
| 258 |
+
"file_count": 3049,
|
| 259 |
+
"text_file_count": 1283,
|
| 260 |
"largest_file": {
|
| 261 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 262 |
"bytes": 135591061
|
|
|
|
| 266 |
"hf_model_bundle": {
|
| 267 |
"root": "hf_publish/model",
|
| 268 |
"exists": true,
|
| 269 |
+
"file_count": 3533,
|
| 270 |
+
"text_file_count": 1455,
|
| 271 |
"largest_file": {
|
| 272 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 273 |
"bytes": 135591061
|
docs/data/quality_gates.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Release Checks",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
|
| 6 |
"automated_gates": [
|
| 7 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Release Checks",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-21T11:09:13+00:00",
|
| 5 |
"rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
|
| 6 |
"automated_gates": [
|
| 7 |
{
|
docs/data/qwen3_omni_run_lineage.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"current_public_matrix_row": "qwen3_omni_v6_lora",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
-
"interpretation_rule": "Do not confuse the Qwen run versions with the project
|
| 5 |
"pinned_prior_release": "v5",
|
| 6 |
"related_engineering_artifacts": [
|
| 7 |
{
|
|
@@ -17,6 +17,7 @@
|
|
| 17 |
],
|
| 18 |
"runs": [
|
| 19 |
{
|
|
|
|
| 20 |
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 21 |
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
|
| 22 |
"eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
|
@@ -33,6 +34,8 @@
|
|
| 33 |
"package": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 34 |
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 35 |
"public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
|
|
|
|
|
|
|
| 36 |
"role": "First verified 96/16/16 selected-episode Qwen3-Omni LoRA package; establishes dataset, training, eval, and packaging plumbing.",
|
| 37 |
"status": "verified",
|
| 38 |
"title": "Selected-128 validation-aware LoRA baseline",
|
|
@@ -40,6 +43,7 @@
|
|
| 40 |
"version": "v1"
|
| 41 |
},
|
| 42 |
{
|
|
|
|
| 43 |
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 44 |
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
|
| 45 |
"eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
|
@@ -56,6 +60,8 @@
|
|
| 56 |
"package": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 57 |
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 58 |
"public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
|
|
|
|
|
|
|
| 59 |
"role": "Reuses the selected-128 split with a stricter structured JSON answer contract and full 8-GPU LoRA training.",
|
| 60 |
"status": "verified",
|
| 61 |
"title": "Structured-JSON reuse full-8-GPU LoRA",
|
|
@@ -63,6 +69,7 @@
|
|
| 63 |
"version": "v2"
|
| 64 |
},
|
| 65 |
{
|
|
|
|
| 66 |
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 67 |
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
|
| 68 |
"eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
|
|
@@ -79,6 +86,8 @@
|
|
| 79 |
"package": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
|
| 80 |
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
|
| 81 |
"public_matrix_role": "superseded prompt/eval lineage evidence",
|
|
|
|
|
|
|
| 82 |
"role": "Strict-label prompt/eval pass over the v2 adapter; improves JSON validity without introducing a new adapter training run.",
|
| 83 |
"status": "verified",
|
| 84 |
"title": "Strict-label prompt evaluation",
|
|
@@ -86,6 +95,7 @@
|
|
| 86 |
"version": "v3"
|
| 87 |
},
|
| 88 |
{
|
|
|
|
| 89 |
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 90 |
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
|
| 91 |
"eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
|
|
@@ -102,6 +112,8 @@
|
|
| 102 |
"package": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
|
| 103 |
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
|
| 104 |
"public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
|
|
|
|
|
|
|
| 105 |
"role": "Four-epoch full-8-GPU LoRA run on the same selected split; useful for overfit/metric tradeoff analysis.",
|
| 106 |
"status": "verified",
|
| 107 |
"title": "Four-epoch structured-JSON LoRA",
|
|
@@ -109,6 +121,7 @@
|
|
| 109 |
"version": "v4"
|
| 110 |
},
|
| 111 |
{
|
|
|
|
| 112 |
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 113 |
"dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
|
| 114 |
"eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
|
|
@@ -125,6 +138,8 @@
|
|
| 125 |
"package": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
|
| 126 |
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
|
| 127 |
"public_matrix_role": "pinned prior release row and comparison baseline",
|
|
|
|
|
|
|
| 128 |
"role": "Dense/multiscale selected-128 run with 4,032 held-out predictions; kept as the pinned prior release because several metrics remain stronger than v6.",
|
| 129 |
"status": "verified",
|
| 130 |
"title": "Multiscale cap96 LoRA",
|
|
@@ -132,6 +147,7 @@
|
|
| 132 |
"version": "v5"
|
| 133 |
},
|
| 134 |
{
|
|
|
|
| 135 |
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 136 |
"dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
|
| 137 |
"eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
|
@@ -148,6 +164,8 @@
|
|
| 148 |
"package": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
| 149 |
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
| 150 |
"public_matrix_role": "current public 20-task Qwen3-Omni v6 LoRA row",
|
|
|
|
|
|
|
| 151 |
"role": "Current verified Qwen3-Omni row: rank64/lr5e-5 multiscale LoRA plus task-specific probe artifacts used for the 20/20 Qwen matrix coverage.",
|
| 152 |
"status": "verified",
|
| 153 |
"title": "Rank64 lr5e-5 multiscale LoRA",
|
|
|
|
| 1 |
{
|
| 2 |
"current_public_matrix_row": "qwen3_omni_v6_lora",
|
| 3 |
+
"generated_at_utc": "2026-06-21T10:54:46+00:00",
|
| 4 |
+
"interpretation_rule": "Do not confuse the Qwen run versions with the project evidence lines. The project evidence lines are one public sample episode and selected 128-episode artifacts. Qwen v1-v6 are only the Qwen3-Omni run lineage inside the selected-128 line. The 20-task matrix uses Qwen3-Omni v6 LoRA; v5 remains the pinned prior release; v1-v4 are lineage and ablation evidence.",
|
| 5 |
"pinned_prior_release": "v5",
|
| 6 |
"related_engineering_artifacts": [
|
| 7 |
{
|
|
|
|
| 17 |
],
|
| 18 |
"runs": [
|
| 19 |
{
|
| 20 |
+
"change_from_previous": "First verified Qwen3-Omni selected-128 LoRA run.",
|
| 21 |
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 22 |
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
|
| 23 |
"eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
|
|
|
| 34 |
"package": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 35 |
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 36 |
"public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
|
| 37 |
+
"purpose": "Prove that the selected-128 split, LoRA training, held-out eval, validation, and public packaging loop works end to end.",
|
| 38 |
+
"reader_use": "Use only as lineage evidence for the first working pipeline.",
|
| 39 |
"role": "First verified 96/16/16 selected-episode Qwen3-Omni LoRA package; establishes dataset, training, eval, and packaging plumbing.",
|
| 40 |
"status": "verified",
|
| 41 |
"title": "Selected-128 validation-aware LoRA baseline",
|
|
|
|
| 43 |
"version": "v1"
|
| 44 |
},
|
| 45 |
{
|
| 46 |
+
"change_from_previous": "Reused the selected-128 split with a stricter structured-JSON answer contract and full 8-GPU LoRA training.",
|
| 47 |
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 48 |
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
|
| 49 |
"eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
|
|
|
| 60 |
"package": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 61 |
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 62 |
"public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
|
| 63 |
+
"purpose": "Make the answer format schema-checked and reduce invalid JSON before expanding scale.",
|
| 64 |
+
"reader_use": "Use as evidence that schema-constrained evaluation improved validity and contact accuracy over v1.",
|
| 65 |
"role": "Reuses the selected-128 split with a stricter structured JSON answer contract and full 8-GPU LoRA training.",
|
| 66 |
"status": "verified",
|
| 67 |
"title": "Structured-JSON reuse full-8-GPU LoRA",
|
|
|
|
| 69 |
"version": "v2"
|
| 70 |
},
|
| 71 |
{
|
| 72 |
+
"change_from_previous": "Evaluated the v2 adapter with stricter labels and prompts; no new adapter training.",
|
| 73 |
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 74 |
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
|
| 75 |
"eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
|
|
|
|
| 86 |
"package": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
|
| 87 |
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
|
| 88 |
"public_matrix_role": "superseded prompt/eval lineage evidence",
|
| 89 |
+
"purpose": "Separate prompt/eval formatting effects from adapter-training effects.",
|
| 90 |
+
"reader_use": "Use as prompt/eval ablation evidence, not as a separate trained model.",
|
| 91 |
"role": "Strict-label prompt/eval pass over the v2 adapter; improves JSON validity without introducing a new adapter training run.",
|
| 92 |
"status": "verified",
|
| 93 |
"title": "Strict-label prompt evaluation",
|
|
|
|
| 95 |
"version": "v3"
|
| 96 |
},
|
| 97 |
{
|
| 98 |
+
"change_from_previous": "Trained a new four-epoch full-8-GPU LoRA adapter on the structured-JSON setup.",
|
| 99 |
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 100 |
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
|
| 101 |
"eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
|
|
|
|
| 112 |
"package": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
|
| 113 |
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
|
| 114 |
"public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
|
| 115 |
+
"purpose": "Test whether longer structured-JSON LoRA training improves the same selected split.",
|
| 116 |
+
"reader_use": "Use as overfit and metric-tradeoff evidence before the multiscale export.",
|
| 117 |
"role": "Four-epoch full-8-GPU LoRA run on the same selected split; useful for overfit/metric tradeoff analysis.",
|
| 118 |
"status": "verified",
|
| 119 |
"title": "Four-epoch structured-JSON LoRA",
|
|
|
|
| 121 |
"version": "v4"
|
| 122 |
},
|
| 123 |
{
|
| 124 |
+
"change_from_previous": "Introduced the multiscale cap96 export and larger held-out evaluation surface.",
|
| 125 |
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 126 |
"dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
|
| 127 |
"eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
|
|
|
|
| 138 |
"package": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
|
| 139 |
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
|
| 140 |
"public_matrix_role": "pinned prior release row and comparison baseline",
|
| 141 |
+
"purpose": "Move from the 448-sample compact eval to a denser multiscale 4,032-sample held-out eval.",
|
| 142 |
+
"reader_use": "Use as the pinned prior release; it remains stronger on JSON validity, subtask, next-action, object, and transition metrics.",
|
| 143 |
"role": "Dense/multiscale selected-128 run with 4,032 held-out predictions; kept as the pinned prior release because several metrics remain stronger than v6.",
|
| 144 |
"status": "verified",
|
| 145 |
"title": "Multiscale cap96 LoRA",
|
|
|
|
| 147 |
"version": "v5"
|
| 148 |
},
|
| 149 |
{
|
| 150 |
+
"change_from_previous": "Kept the multiscale setup, changed LoRA rank/lr to rank64/lr5e-5, and added verified task-specific probes for full 20-task coverage.",
|
| 151 |
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 152 |
"dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
|
| 153 |
"eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
|
|
|
| 164 |
"package": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
| 165 |
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
| 166 |
"public_matrix_role": "current public 20-task Qwen3-Omni v6 LoRA row",
|
| 167 |
+
"purpose": "Promote the current public Qwen3-Omni 20-task row with multiscale LoRA plus task-specific probes.",
|
| 168 |
+
"reader_use": "Use as the current public 20-task Qwen row; it improves action macro-F1 and contact accuracy while v5 remains the prior comparator.",
|
| 169 |
"role": "Current verified Qwen3-Omni row: rank64/lr5e-5 multiscale LoRA plus task-specific probe artifacts used for the 20/20 Qwen matrix coverage.",
|
| 170 |
"status": "verified",
|
| 171 |
"title": "Rank64 lr5e-5 multiscale LoRA",
|
docs/data/research_roadmap.json
CHANGED
|
@@ -151,7 +151,7 @@
|
|
| 151 |
"entry_condition": "The selected episodes are prepared or a 3-8 episode dry run is available for preprocessing checks.",
|
| 152 |
"deliverables": [
|
| 153 |
"backbone registry",
|
| 154 |
-
"Cosmos 3 world-model
|
| 155 |
"Cosmos3-Super Forward-Dynamics LoRA verified package",
|
| 156 |
"Qwen3-Omni LoRA baseline plan",
|
| 157 |
"OpenVLA/openpi/GR00T policy-branch candidates",
|
|
@@ -162,7 +162,7 @@
|
|
| 162 |
"docs/data/foundation_model_plan.json",
|
| 163 |
"research_roadmap_interactive.json"
|
| 164 |
],
|
| 165 |
-
"reader_takeaway": "Qwen3-Omni remains the structured JSON held-out pilot; Cosmos 3 is the first world-model
|
| 166 |
},
|
| 167 |
{
|
| 168 |
"id": "robustness_run_64_128_episode",
|
|
@@ -202,7 +202,7 @@
|
|
| 202 |
"qualitative inspection",
|
| 203 |
"updated model cards"
|
| 204 |
],
|
| 205 |
-
"reader_takeaway": "The
|
| 206 |
},
|
| 207 |
{
|
| 208 |
"id": "xperience_embodied_foundation_pretraining",
|
|
|
|
| 151 |
"entry_condition": "The selected episodes are prepared or a 3-8 episode dry run is available for preprocessing checks.",
|
| 152 |
"deliverables": [
|
| 153 |
"backbone registry",
|
| 154 |
+
"Cosmos 3 world-model track plan",
|
| 155 |
"Cosmos3-Super Forward-Dynamics LoRA verified package",
|
| 156 |
"Qwen3-Omni LoRA baseline plan",
|
| 157 |
"OpenVLA/openpi/GR00T policy-branch candidates",
|
|
|
|
| 162 |
"docs/data/foundation_model_plan.json",
|
| 163 |
"research_roadmap_interactive.json"
|
| 164 |
],
|
| 165 |
+
"reader_takeaway": "Qwen3-Omni remains the structured JSON held-out pilot; Cosmos 3 is the first world-model track. Cosmos3-Super now has a verified forward-dynamics LoRA over camera-pose proxy targets, while VLA/policy models wait for robot-compatible action targets."
|
| 166 |
},
|
| 167 |
{
|
| 168 |
"id": "robustness_run_64_128_episode",
|
|
|
|
| 202 |
"qualitative inspection",
|
| 203 |
"updated model cards"
|
| 204 |
],
|
| 205 |
+
"reader_takeaway": "The Cosmos3 track now includes Nano future-window compatibility and Super forward-dynamics LoRA; the long-term direction remains richer multimodal representation learning with model tracks chosen by task fit rather than by a single default backbone."
|
| 206 |
},
|
| 207 |
{
|
| 208 |
"id": "xperience_embodied_foundation_pretraining",
|
docs/data/research_roadmap_interactive.json
CHANGED
|
@@ -132,23 +132,23 @@
|
|
| 132 |
"public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
|
| 133 |
"source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
|
| 134 |
"status": "planned_research_directions",
|
| 135 |
-
"summary": "Concrete Xperience-10M project directions beyond the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model
|
| 136 |
"title": "Additional Development Directions"
|
| 137 |
},
|
| 138 |
"baseline_summary": {
|
| 139 |
"baseline_heads": "minimal and neural MLP heads",
|
| 140 |
"current_use": "task design, data-contract validation, case studies, and baseline comparison",
|
| 141 |
"split": "chronological single-episode split for public-sample diagnostics",
|
| 142 |
-
"task_count":
|
| 143 |
},
|
| 144 |
"directions": [
|
| 145 |
{
|
| 146 |
"code": "A",
|
| 147 |
"counts": {
|
| 148 |
"diagnostic": 0,
|
| 149 |
-
"direct":
|
| 150 |
-
"proxy":
|
| 151 |
-
"total_links":
|
| 152 |
},
|
| 153 |
"current_readout": "The sample supports hand trajectory forecasting and contact/object probes, but it does not yet include a full body/shape model or multi-person priors.",
|
| 154 |
"current_status": "partially implemented",
|
|
@@ -174,7 +174,9 @@
|
|
| 174 |
"timeline_action",
|
| 175 |
"hand_trajectory_forecast",
|
| 176 |
"contact_prediction",
|
| 177 |
-
"object_relevance"
|
|
|
|
|
|
|
| 178 |
],
|
| 179 |
"tasks": [
|
| 180 |
{
|
|
@@ -429,6 +431,84 @@
|
|
| 429 |
"process_short": "object vocabulary -> multi-hot labels -> sigmoid heads",
|
| 430 |
"research_name": "Object-Centric Interaction Recognition",
|
| 431 |
"why": "Connects egocentric activity to manipulated objects and early object-centric state."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
}
|
| 433 |
]
|
| 434 |
},
|
|
@@ -436,9 +516,9 @@
|
|
| 436 |
"code": "B",
|
| 437 |
"counts": {
|
| 438 |
"diagnostic": 1,
|
| 439 |
-
"direct":
|
| 440 |
-
"proxy":
|
| 441 |
-
"total_links":
|
| 442 |
},
|
| 443 |
"current_readout": "The current suite checks cross-modal alignment and depth/video reconstruction proxies; it does not yet train a renderer or reconstruct geometry.",
|
| 444 |
"current_status": "proxy tasks only",
|
|
@@ -463,7 +543,9 @@
|
|
| 463 |
"task_ids": [
|
| 464 |
"cross_modal_retrieval",
|
| 465 |
"modality_reconstruction",
|
| 466 |
-
"misalignment_detection"
|
|
|
|
|
|
|
| 467 |
],
|
| 468 |
"tasks": [
|
| 469 |
{
|
|
@@ -634,18 +716,96 @@
|
|
| 634 |
"process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
|
| 635 |
"research_name": "Cross-Modal Misalignment Detection",
|
| 636 |
"why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 637 |
}
|
| 638 |
]
|
| 639 |
},
|
| 640 |
{
|
| 641 |
"code": "C",
|
| 642 |
"counts": {
|
| 643 |
-
"diagnostic":
|
| 644 |
-
"direct":
|
| 645 |
-
"proxy":
|
| 646 |
-
"total_links":
|
| 647 |
},
|
| 648 |
-
"current_readout": "
|
| 649 |
"current_status": "strongest implemented track",
|
| 650 |
"extension_tasks": [
|
| 651 |
{
|
|
@@ -676,7 +836,13 @@
|
|
| 676 |
"caption_grounding",
|
| 677 |
"cross_modal_retrieval",
|
| 678 |
"temporal_order",
|
| 679 |
-
"misalignment_detection"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
],
|
| 681 |
"tasks": [
|
| 682 |
{
|
|
@@ -1367,16 +1533,250 @@
|
|
| 1367 |
"process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
|
| 1368 |
"research_name": "Cross-Modal Misalignment Detection",
|
| 1369 |
"why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1370 |
}
|
| 1371 |
]
|
| 1372 |
},
|
| 1373 |
{
|
| 1374 |
"code": "D",
|
| 1375 |
"counts": {
|
| 1376 |
-
"diagnostic":
|
| 1377 |
-
"direct":
|
| 1378 |
-
"proxy":
|
| 1379 |
-
"total_links":
|
| 1380 |
},
|
| 1381 |
"current_readout": "The current tasks probe temporal structure, object relevance, cross-modal retrieval, and modality prediction, but they do not yet build persistent maps or scene graphs.",
|
| 1382 |
"current_status": "early proxy tasks",
|
|
@@ -1407,7 +1807,13 @@
|
|
| 1407 |
"cross_modal_retrieval",
|
| 1408 |
"modality_reconstruction",
|
| 1409 |
"temporal_order",
|
| 1410 |
-
"misalignment_detection"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1411 |
],
|
| 1412 |
"tasks": [
|
| 1413 |
{
|
|
@@ -1960,34 +2366,268 @@
|
|
| 1960 |
"process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
|
| 1961 |
"research_name": "Cross-Modal Misalignment Detection",
|
| 1962 |
"why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
|
| 1963 |
-
}
|
| 1964 |
-
|
| 1965 |
-
|
| 1966 |
-
|
| 1967 |
-
|
| 1968 |
-
|
| 1969 |
-
|
| 1970 |
-
|
| 1971 |
-
|
| 1972 |
-
|
| 1973 |
-
|
| 1974 |
-
|
| 1975 |
-
|
| 1976 |
-
|
| 1977 |
-
|
| 1978 |
-
|
| 1979 |
-
|
| 1980 |
-
|
| 1981 |
-
|
| 1982 |
-
|
| 1983 |
-
"
|
| 1984 |
-
"
|
| 1985 |
-
"
|
| 1986 |
-
|
| 1987 |
-
|
| 1988 |
-
|
| 1989 |
-
|
| 1990 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1991 |
"target": "structured_task_prediction"
|
| 1992 |
},
|
| 1993 |
{
|
|
@@ -2046,7 +2686,7 @@
|
|
| 2046 |
},
|
| 2047 |
{
|
| 2048 |
"action": "Promote Cosmos 3 beyond the current Nano compatibility and Super forward-dynamics runs only when loss metrics, preprocessing, and storage justify the added compute.",
|
| 2049 |
-
"name": "World-model
|
| 2050 |
"step": 4
|
| 2051 |
},
|
| 2052 |
{
|
|
@@ -2222,7 +2862,7 @@
|
|
| 2222 |
],
|
| 2223 |
"status": "planning_artifact"
|
| 2224 |
},
|
| 2225 |
-
"generated_at_utc": "2026-06-
|
| 2226 |
"omni_plan": {
|
| 2227 |
"adapter": "LoRA rank 16, alpha 32, dropout 0.05",
|
| 2228 |
"backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
|
|
@@ -2385,7 +3025,7 @@
|
|
| 2385 |
],
|
| 2386 |
"deliverables": [
|
| 2387 |
"backbone registry",
|
| 2388 |
-
"Cosmos 3 world-model
|
| 2389 |
"Cosmos3-Super Forward-Dynamics LoRA verified package",
|
| 2390 |
"Qwen3-Omni LoRA baseline plan",
|
| 2391 |
"OpenVLA/openpi/GR00T policy-branch candidates",
|
|
@@ -2394,7 +3034,7 @@
|
|
| 2394 |
"entry_condition": "The selected episodes are prepared or a 3-8 episode dry run is available for preprocessing checks.",
|
| 2395 |
"id": "foundation_model_selection_matrix",
|
| 2396 |
"name": "Foundation-Model Selection Matrix",
|
| 2397 |
-
"reader_takeaway": "Qwen3-Omni remains the structured JSON held-out pilot; Cosmos 3 is the first world-model
|
| 2398 |
"stage": "future",
|
| 2399 |
"status": "current"
|
| 2400 |
},
|
|
@@ -2436,7 +3076,7 @@
|
|
| 2436 |
"entry_condition": "Enough multi-episode data, compute budget, and model-specific action/world-state targets.",
|
| 2437 |
"id": "foundation_world_model_extensions",
|
| 2438 |
"name": "Cosmos 3 and Policy-Model Extensions",
|
| 2439 |
-
"reader_takeaway": "The
|
| 2440 |
"stage": "future",
|
| 2441 |
"status": "planned"
|
| 2442 |
},
|
|
@@ -3242,6 +3882,318 @@
|
|
| 3242 |
"process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
|
| 3243 |
"research_name": "Cross-Modal Misalignment Detection",
|
| 3244 |
"why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3245 |
}
|
| 3246 |
],
|
| 3247 |
"three_foundation_pipelines": {
|
|
|
|
| 132 |
"public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
|
| 133 |
"source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
|
| 134 |
"status": "planned_research_directions",
|
| 135 |
+
"summary": "Concrete Xperience-10M project directions beyond the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model track, and long-term native pretraining goal.",
|
| 136 |
"title": "Additional Development Directions"
|
| 137 |
},
|
| 138 |
"baseline_summary": {
|
| 139 |
"baseline_heads": "minimal and neural MLP heads",
|
| 140 |
"current_use": "task design, data-contract validation, case studies, and baseline comparison",
|
| 141 |
"split": "chronological single-episode split for public-sample diagnostics",
|
| 142 |
+
"task_count": 20
|
| 143 |
},
|
| 144 |
"directions": [
|
| 145 |
{
|
| 146 |
"code": "A",
|
| 147 |
"counts": {
|
| 148 |
"diagnostic": 0,
|
| 149 |
+
"direct": 3,
|
| 150 |
+
"proxy": 3,
|
| 151 |
+
"total_links": 6
|
| 152 |
},
|
| 153 |
"current_readout": "The sample supports hand trajectory forecasting and contact/object probes, but it does not yet include a full body/shape model or multi-person priors.",
|
| 154 |
"current_status": "partially implemented",
|
|
|
|
| 174 |
"timeline_action",
|
| 175 |
"hand_trajectory_forecast",
|
| 176 |
"contact_prediction",
|
| 177 |
+
"object_relevance",
|
| 178 |
+
"interaction_text_prediction",
|
| 179 |
+
"imu_to_hand_pose"
|
| 180 |
],
|
| 181 |
"tasks": [
|
| 182 |
{
|
|
|
|
| 431 |
"process_short": "object vocabulary -> multi-hot labels -> sigmoid heads",
|
| 432 |
"research_name": "Object-Centric Interaction Recognition",
|
| 433 |
"why": "Connects egocentric activity to manipulated objects and early object-centric state."
|
| 434 |
+
},
|
| 435 |
+
{
|
| 436 |
+
"architecture_family": null,
|
| 437 |
+
"case_study": null,
|
| 438 |
+
"current_limit": "Public derived features retain hashed text targets; raw full text requires the official annotation source.",
|
| 439 |
+
"direction_roles": {
|
| 440 |
+
"A": "proxy",
|
| 441 |
+
"C": "direct"
|
| 442 |
+
},
|
| 443 |
+
"display_name": "Interaction text prediction",
|
| 444 |
+
"evidence_links": [
|
| 445 |
+
{
|
| 446 |
+
"href": "data/task_walkthroughs.json",
|
| 447 |
+
"label": "Task walkthrough"
|
| 448 |
+
},
|
| 449 |
+
{
|
| 450 |
+
"href": "single_episode_explorer.html",
|
| 451 |
+
"label": "Single-episode explorer"
|
| 452 |
+
}
|
| 453 |
+
],
|
| 454 |
+
"family": "classification",
|
| 455 |
+
"id": "interaction_text_prediction",
|
| 456 |
+
"input": null,
|
| 457 |
+
"input_short": null,
|
| 458 |
+
"metric": {
|
| 459 |
+
"better_baseline": "minimal",
|
| 460 |
+
"direction": "higher",
|
| 461 |
+
"key": "macro_f1",
|
| 462 |
+
"minimal": 0.0444,
|
| 463 |
+
"name": "macro-F1",
|
| 464 |
+
"neural_mlp": 0.0381
|
| 465 |
+
},
|
| 466 |
+
"modalities": [],
|
| 467 |
+
"module_summary": null,
|
| 468 |
+
"output_short": null,
|
| 469 |
+
"primary_direction": "C",
|
| 470 |
+
"process_short": null,
|
| 471 |
+
"research_name": "Interaction text prediction",
|
| 472 |
+
"why": "Connects egocentric observations to the natural-language interaction semantics carried by the annotation."
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"architecture_family": null,
|
| 476 |
+
"case_study": null,
|
| 477 |
+
"current_limit": "Pose reconstruction is window-level and does not yet fit a full parametric hand/body model.",
|
| 478 |
+
"direction_roles": {
|
| 479 |
+
"A": "direct",
|
| 480 |
+
"B": "proxy"
|
| 481 |
+
},
|
| 482 |
+
"display_name": "IMU-to-hand pose reconstruction",
|
| 483 |
+
"evidence_links": [
|
| 484 |
+
{
|
| 485 |
+
"href": "data/task_walkthroughs.json",
|
| 486 |
+
"label": "Task walkthrough"
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"href": "single_episode_explorer.html",
|
| 490 |
+
"label": "Single-episode explorer"
|
| 491 |
+
}
|
| 492 |
+
],
|
| 493 |
+
"family": "regression",
|
| 494 |
+
"id": "imu_to_hand_pose",
|
| 495 |
+
"input": null,
|
| 496 |
+
"input_short": null,
|
| 497 |
+
"metric": {
|
| 498 |
+
"better_baseline": "minimal",
|
| 499 |
+
"direction": "lower",
|
| 500 |
+
"key": "mae",
|
| 501 |
+
"minimal": 0.042,
|
| 502 |
+
"name": "MAE",
|
| 503 |
+
"neural_mlp": 0.0426
|
| 504 |
+
},
|
| 505 |
+
"modalities": [],
|
| 506 |
+
"module_summary": null,
|
| 507 |
+
"output_short": null,
|
| 508 |
+
"primary_direction": "A",
|
| 509 |
+
"process_short": null,
|
| 510 |
+
"research_name": "IMU-to-hand pose reconstruction",
|
| 511 |
+
"why": "Measures human-motion reconstruction from wearable and motion cues."
|
| 512 |
}
|
| 513 |
]
|
| 514 |
},
|
|
|
|
| 516 |
"code": "B",
|
| 517 |
"counts": {
|
| 518 |
"diagnostic": 1,
|
| 519 |
+
"direct": 1,
|
| 520 |
+
"proxy": 3,
|
| 521 |
+
"total_links": 5
|
| 522 |
},
|
| 523 |
"current_readout": "The current suite checks cross-modal alignment and depth/video reconstruction proxies; it does not yet train a renderer or reconstruct geometry.",
|
| 524 |
"current_status": "proxy tasks only",
|
|
|
|
| 543 |
"task_ids": [
|
| 544 |
"cross_modal_retrieval",
|
| 545 |
"modality_reconstruction",
|
| 546 |
+
"misalignment_detection",
|
| 547 |
+
"imu_to_hand_pose",
|
| 548 |
+
"camera_view_sync_retrieval"
|
| 549 |
],
|
| 550 |
"tasks": [
|
| 551 |
{
|
|
|
|
| 716 |
"process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
|
| 717 |
"research_name": "Cross-Modal Misalignment Detection",
|
| 718 |
"why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
|
| 719 |
+
},
|
| 720 |
+
{
|
| 721 |
+
"architecture_family": null,
|
| 722 |
+
"case_study": null,
|
| 723 |
+
"current_limit": "Pose reconstruction is window-level and does not yet fit a full parametric hand/body model.",
|
| 724 |
+
"direction_roles": {
|
| 725 |
+
"A": "direct",
|
| 726 |
+
"B": "proxy"
|
| 727 |
+
},
|
| 728 |
+
"display_name": "IMU-to-hand pose reconstruction",
|
| 729 |
+
"evidence_links": [
|
| 730 |
+
{
|
| 731 |
+
"href": "data/task_walkthroughs.json",
|
| 732 |
+
"label": "Task walkthrough"
|
| 733 |
+
},
|
| 734 |
+
{
|
| 735 |
+
"href": "single_episode_explorer.html",
|
| 736 |
+
"label": "Single-episode explorer"
|
| 737 |
+
}
|
| 738 |
+
],
|
| 739 |
+
"family": "regression",
|
| 740 |
+
"id": "imu_to_hand_pose",
|
| 741 |
+
"input": null,
|
| 742 |
+
"input_short": null,
|
| 743 |
+
"metric": {
|
| 744 |
+
"better_baseline": "minimal",
|
| 745 |
+
"direction": "lower",
|
| 746 |
+
"key": "mae",
|
| 747 |
+
"minimal": 0.042,
|
| 748 |
+
"name": "MAE",
|
| 749 |
+
"neural_mlp": 0.0426
|
| 750 |
+
},
|
| 751 |
+
"modalities": [],
|
| 752 |
+
"module_summary": null,
|
| 753 |
+
"output_short": null,
|
| 754 |
+
"primary_direction": "A",
|
| 755 |
+
"process_short": null,
|
| 756 |
+
"research_name": "IMU-to-hand pose reconstruction",
|
| 757 |
+
"why": "Measures human-motion reconstruction from wearable and motion cues."
|
| 758 |
+
},
|
| 759 |
+
{
|
| 760 |
+
"architecture_family": null,
|
| 761 |
+
"case_study": null,
|
| 762 |
+
"current_limit": "Retrieval checks view consistency but does not reconstruct geometry by itself.",
|
| 763 |
+
"direction_roles": {
|
| 764 |
+
"B": "direct",
|
| 765 |
+
"D": "proxy"
|
| 766 |
+
},
|
| 767 |
+
"display_name": "Camera-view synchronization retrieval",
|
| 768 |
+
"evidence_links": [
|
| 769 |
+
{
|
| 770 |
+
"href": "data/task_walkthroughs.json",
|
| 771 |
+
"label": "Task walkthrough"
|
| 772 |
+
},
|
| 773 |
+
{
|
| 774 |
+
"href": "single_episode_explorer.html",
|
| 775 |
+
"label": "Single-episode explorer"
|
| 776 |
+
}
|
| 777 |
+
],
|
| 778 |
+
"family": "retrieval",
|
| 779 |
+
"id": "camera_view_sync_retrieval",
|
| 780 |
+
"input": null,
|
| 781 |
+
"input_short": null,
|
| 782 |
+
"metric": {
|
| 783 |
+
"better_baseline": "minimal",
|
| 784 |
+
"direction": "higher",
|
| 785 |
+
"key": "mrr",
|
| 786 |
+
"minimal": 0.4943,
|
| 787 |
+
"name": "MRR",
|
| 788 |
+
"neural_mlp": 0.2409
|
| 789 |
+
},
|
| 790 |
+
"modalities": [],
|
| 791 |
+
"module_summary": null,
|
| 792 |
+
"output_short": null,
|
| 793 |
+
"primary_direction": "B",
|
| 794 |
+
"process_short": null,
|
| 795 |
+
"research_name": "Camera-view synchronization retrieval",
|
| 796 |
+
"why": "Tests whether synchronized multi-view structure is recoverable across camera streams."
|
| 797 |
}
|
| 798 |
]
|
| 799 |
},
|
| 800 |
{
|
| 801 |
"code": "C",
|
| 802 |
"counts": {
|
| 803 |
+
"diagnostic": 4,
|
| 804 |
+
"direct": 10,
|
| 805 |
+
"proxy": 3,
|
| 806 |
+
"total_links": 17
|
| 807 |
},
|
| 808 |
+
"current_readout": "The unified 20-task suite directly targets egocentric action, task state, interaction, grounding, forecasting, and alignment.",
|
| 809 |
"current_status": "strongest implemented track",
|
| 810 |
"extension_tasks": [
|
| 811 |
{
|
|
|
|
| 836 |
"caption_grounding",
|
| 837 |
"cross_modal_retrieval",
|
| 838 |
"temporal_order",
|
| 839 |
+
"misalignment_detection",
|
| 840 |
+
"long_horizon_next_action",
|
| 841 |
+
"next_subtask_forecast",
|
| 842 |
+
"interaction_text_prediction",
|
| 843 |
+
"action_object_relation",
|
| 844 |
+
"object_set_forecast",
|
| 845 |
+
"time_to_transition"
|
| 846 |
],
|
| 847 |
"tasks": [
|
| 848 |
{
|
|
|
|
| 1533 |
"process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
|
| 1534 |
"research_name": "Cross-Modal Misalignment Detection",
|
| 1535 |
"why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
|
| 1536 |
+
},
|
| 1537 |
+
{
|
| 1538 |
+
"architecture_family": null,
|
| 1539 |
+
"case_study": null,
|
| 1540 |
+
"current_limit": "Evaluated from sample-supported future labels, not full open-world action generation.",
|
| 1541 |
+
"direction_roles": {
|
| 1542 |
+
"C": "direct",
|
| 1543 |
+
"D": "proxy"
|
| 1544 |
+
},
|
| 1545 |
+
"display_name": "Long-horizon next-action forecasting",
|
| 1546 |
+
"evidence_links": [
|
| 1547 |
+
{
|
| 1548 |
+
"href": "data/task_walkthroughs.json",
|
| 1549 |
+
"label": "Task walkthrough"
|
| 1550 |
+
},
|
| 1551 |
+
{
|
| 1552 |
+
"href": "single_episode_explorer.html",
|
| 1553 |
+
"label": "Single-episode explorer"
|
| 1554 |
+
}
|
| 1555 |
+
],
|
| 1556 |
+
"family": "classification",
|
| 1557 |
+
"id": "long_horizon_next_action",
|
| 1558 |
+
"input": null,
|
| 1559 |
+
"input_short": null,
|
| 1560 |
+
"metric": {
|
| 1561 |
+
"better_baseline": "minimal",
|
| 1562 |
+
"direction": "higher",
|
| 1563 |
+
"key": "macro_f1",
|
| 1564 |
+
"minimal": 0.075,
|
| 1565 |
+
"name": "macro-F1",
|
| 1566 |
+
"neural_mlp": 0.0655
|
| 1567 |
+
},
|
| 1568 |
+
"modalities": [],
|
| 1569 |
+
"module_summary": null,
|
| 1570 |
+
"output_short": null,
|
| 1571 |
+
"primary_direction": "C",
|
| 1572 |
+
"process_short": null,
|
| 1573 |
+
"research_name": "Long-horizon next-action forecasting",
|
| 1574 |
+
"why": "Extends short-horizon intention prediction into longer activity futures, a key egocentric and world-model signal."
|
| 1575 |
+
},
|
| 1576 |
+
{
|
| 1577 |
+
"architecture_family": null,
|
| 1578 |
+
"case_study": null,
|
| 1579 |
+
"current_limit": "Subtask labels are constrained to the available annotation vocabulary.",
|
| 1580 |
+
"direction_roles": {
|
| 1581 |
+
"C": "direct",
|
| 1582 |
+
"D": "proxy"
|
| 1583 |
+
},
|
| 1584 |
+
"display_name": "Long-horizon next-subtask forecasting",
|
| 1585 |
+
"evidence_links": [
|
| 1586 |
+
{
|
| 1587 |
+
"href": "data/task_walkthroughs.json",
|
| 1588 |
+
"label": "Task walkthrough"
|
| 1589 |
+
},
|
| 1590 |
+
{
|
| 1591 |
+
"href": "single_episode_explorer.html",
|
| 1592 |
+
"label": "Single-episode explorer"
|
| 1593 |
+
}
|
| 1594 |
+
],
|
| 1595 |
+
"family": "classification",
|
| 1596 |
+
"id": "next_subtask_forecast",
|
| 1597 |
+
"input": null,
|
| 1598 |
+
"input_short": null,
|
| 1599 |
+
"metric": {
|
| 1600 |
+
"better_baseline": "neural_mlp",
|
| 1601 |
+
"direction": "higher",
|
| 1602 |
+
"key": "macro_f1",
|
| 1603 |
+
"minimal": 0.0455,
|
| 1604 |
+
"name": "macro-F1",
|
| 1605 |
+
"neural_mlp": 0.0507
|
| 1606 |
+
},
|
| 1607 |
+
"modalities": [],
|
| 1608 |
+
"module_summary": null,
|
| 1609 |
+
"output_short": null,
|
| 1610 |
+
"primary_direction": "C",
|
| 1611 |
+
"process_short": null,
|
| 1612 |
+
"research_name": "Long-horizon next-subtask forecasting",
|
| 1613 |
+
"why": "Measures whether the model can anticipate the next procedural phase rather than only the current frame state."
|
| 1614 |
+
},
|
| 1615 |
+
{
|
| 1616 |
+
"architecture_family": null,
|
| 1617 |
+
"case_study": null,
|
| 1618 |
+
"current_limit": "Public derived features retain hashed text targets; raw full text requires the official annotation source.",
|
| 1619 |
+
"direction_roles": {
|
| 1620 |
+
"A": "proxy",
|
| 1621 |
+
"C": "direct"
|
| 1622 |
+
},
|
| 1623 |
+
"display_name": "Interaction text prediction",
|
| 1624 |
+
"evidence_links": [
|
| 1625 |
+
{
|
| 1626 |
+
"href": "data/task_walkthroughs.json",
|
| 1627 |
+
"label": "Task walkthrough"
|
| 1628 |
+
},
|
| 1629 |
+
{
|
| 1630 |
+
"href": "single_episode_explorer.html",
|
| 1631 |
+
"label": "Single-episode explorer"
|
| 1632 |
+
}
|
| 1633 |
+
],
|
| 1634 |
+
"family": "classification",
|
| 1635 |
+
"id": "interaction_text_prediction",
|
| 1636 |
+
"input": null,
|
| 1637 |
+
"input_short": null,
|
| 1638 |
+
"metric": {
|
| 1639 |
+
"better_baseline": "minimal",
|
| 1640 |
+
"direction": "higher",
|
| 1641 |
+
"key": "macro_f1",
|
| 1642 |
+
"minimal": 0.0444,
|
| 1643 |
+
"name": "macro-F1",
|
| 1644 |
+
"neural_mlp": 0.0381
|
| 1645 |
+
},
|
| 1646 |
+
"modalities": [],
|
| 1647 |
+
"module_summary": null,
|
| 1648 |
+
"output_short": null,
|
| 1649 |
+
"primary_direction": "C",
|
| 1650 |
+
"process_short": null,
|
| 1651 |
+
"research_name": "Interaction text prediction",
|
| 1652 |
+
"why": "Connects egocentric observations to the natural-language interaction semantics carried by the annotation."
|
| 1653 |
+
},
|
| 1654 |
+
{
|
| 1655 |
+
"architecture_family": null,
|
| 1656 |
+
"case_study": null,
|
| 1657 |
+
"current_limit": "Relation labels are derived from the public-sample annotation scope.",
|
| 1658 |
+
"direction_roles": {
|
| 1659 |
+
"C": "direct",
|
| 1660 |
+
"D": "proxy"
|
| 1661 |
+
},
|
| 1662 |
+
"display_name": "Action-object relation prediction",
|
| 1663 |
+
"evidence_links": [
|
| 1664 |
+
{
|
| 1665 |
+
"href": "data/task_walkthroughs.json",
|
| 1666 |
+
"label": "Task walkthrough"
|
| 1667 |
+
},
|
| 1668 |
+
{
|
| 1669 |
+
"href": "single_episode_explorer.html",
|
| 1670 |
+
"label": "Single-episode explorer"
|
| 1671 |
+
}
|
| 1672 |
+
],
|
| 1673 |
+
"family": "classification",
|
| 1674 |
+
"id": "action_object_relation",
|
| 1675 |
+
"input": null,
|
| 1676 |
+
"input_short": null,
|
| 1677 |
+
"metric": {
|
| 1678 |
+
"better_baseline": "tie",
|
| 1679 |
+
"direction": "higher",
|
| 1680 |
+
"key": "macro_f1",
|
| 1681 |
+
"minimal": 0.0,
|
| 1682 |
+
"name": "macro-F1",
|
| 1683 |
+
"neural_mlp": 0.0
|
| 1684 |
+
},
|
| 1685 |
+
"modalities": [],
|
| 1686 |
+
"module_summary": null,
|
| 1687 |
+
"output_short": null,
|
| 1688 |
+
"primary_direction": "C",
|
| 1689 |
+
"process_short": null,
|
| 1690 |
+
"research_name": "Action-object relation prediction",
|
| 1691 |
+
"why": "Tests whether action recognition and object state are connected as a relational interaction representation."
|
| 1692 |
+
},
|
| 1693 |
+
{
|
| 1694 |
+
"architecture_family": null,
|
| 1695 |
+
"case_study": null,
|
| 1696 |
+
"current_limit": "This is a set-level proxy, not a persistent 3D scene graph.",
|
| 1697 |
+
"direction_roles": {
|
| 1698 |
+
"C": "proxy",
|
| 1699 |
+
"D": "direct"
|
| 1700 |
+
},
|
| 1701 |
+
"display_name": "Future object-set forecasting",
|
| 1702 |
+
"evidence_links": [
|
| 1703 |
+
{
|
| 1704 |
+
"href": "data/task_walkthroughs.json",
|
| 1705 |
+
"label": "Task walkthrough"
|
| 1706 |
+
},
|
| 1707 |
+
{
|
| 1708 |
+
"href": "single_episode_explorer.html",
|
| 1709 |
+
"label": "Single-episode explorer"
|
| 1710 |
+
}
|
| 1711 |
+
],
|
| 1712 |
+
"family": "multi-label",
|
| 1713 |
+
"id": "object_set_forecast",
|
| 1714 |
+
"input": null,
|
| 1715 |
+
"input_short": null,
|
| 1716 |
+
"metric": {
|
| 1717 |
+
"better_baseline": "neural_mlp",
|
| 1718 |
+
"direction": "higher",
|
| 1719 |
+
"key": "micro_f1",
|
| 1720 |
+
"minimal": 0.1694,
|
| 1721 |
+
"name": "micro-F1",
|
| 1722 |
+
"neural_mlp": 0.1972
|
| 1723 |
+
},
|
| 1724 |
+
"modalities": [],
|
| 1725 |
+
"module_summary": null,
|
| 1726 |
+
"output_short": null,
|
| 1727 |
+
"primary_direction": "D",
|
| 1728 |
+
"process_short": null,
|
| 1729 |
+
"research_name": "Future object-set forecasting",
|
| 1730 |
+
"why": "Asks whether the current scene state supports predicting which objects will matter later."
|
| 1731 |
+
},
|
| 1732 |
+
{
|
| 1733 |
+
"architecture_family": null,
|
| 1734 |
+
"case_study": null,
|
| 1735 |
+
"current_limit": "Regression is local to the annotated public sample timeline.",
|
| 1736 |
+
"direction_roles": {
|
| 1737 |
+
"C": "diagnostic",
|
| 1738 |
+
"D": "diagnostic"
|
| 1739 |
+
},
|
| 1740 |
+
"display_name": "Time-to-next-transition regression",
|
| 1741 |
+
"evidence_links": [
|
| 1742 |
+
{
|
| 1743 |
+
"href": "data/task_walkthroughs.json",
|
| 1744 |
+
"label": "Task walkthrough"
|
| 1745 |
+
},
|
| 1746 |
+
{
|
| 1747 |
+
"href": "single_episode_explorer.html",
|
| 1748 |
+
"label": "Single-episode explorer"
|
| 1749 |
+
}
|
| 1750 |
+
],
|
| 1751 |
+
"family": "regression",
|
| 1752 |
+
"id": "time_to_transition",
|
| 1753 |
+
"input": null,
|
| 1754 |
+
"input_short": null,
|
| 1755 |
+
"metric": {
|
| 1756 |
+
"better_baseline": "minimal",
|
| 1757 |
+
"direction": "lower",
|
| 1758 |
+
"key": "mae",
|
| 1759 |
+
"minimal": 10.5374,
|
| 1760 |
+
"name": "MAE frames",
|
| 1761 |
+
"neural_mlp": 10.5545
|
| 1762 |
+
},
|
| 1763 |
+
"modalities": [],
|
| 1764 |
+
"module_summary": null,
|
| 1765 |
+
"output_short": null,
|
| 1766 |
+
"primary_direction": "C",
|
| 1767 |
+
"process_short": null,
|
| 1768 |
+
"research_name": "Time-to-next-transition regression",
|
| 1769 |
+
"why": "Measures temporal boundary awareness as a continuous timing target."
|
| 1770 |
}
|
| 1771 |
]
|
| 1772 |
},
|
| 1773 |
{
|
| 1774 |
"code": "D",
|
| 1775 |
"counts": {
|
| 1776 |
+
"diagnostic": 4,
|
| 1777 |
+
"direct": 1,
|
| 1778 |
+
"proxy": 10,
|
| 1779 |
+
"total_links": 15
|
| 1780 |
},
|
| 1781 |
"current_readout": "The current tasks probe temporal structure, object relevance, cross-modal retrieval, and modality prediction, but they do not yet build persistent maps or scene graphs.",
|
| 1782 |
"current_status": "early proxy tasks",
|
|
|
|
| 1807 |
"cross_modal_retrieval",
|
| 1808 |
"modality_reconstruction",
|
| 1809 |
"temporal_order",
|
| 1810 |
+
"misalignment_detection",
|
| 1811 |
+
"long_horizon_next_action",
|
| 1812 |
+
"next_subtask_forecast",
|
| 1813 |
+
"action_object_relation",
|
| 1814 |
+
"object_set_forecast",
|
| 1815 |
+
"camera_view_sync_retrieval",
|
| 1816 |
+
"time_to_transition"
|
| 1817 |
],
|
| 1818 |
"tasks": [
|
| 1819 |
{
|
|
|
|
| 2366 |
"process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
|
| 2367 |
"research_name": "Cross-Modal Misalignment Detection",
|
| 2368 |
"why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
|
| 2369 |
+
},
|
| 2370 |
+
{
|
| 2371 |
+
"architecture_family": null,
|
| 2372 |
+
"case_study": null,
|
| 2373 |
+
"current_limit": "Evaluated from sample-supported future labels, not full open-world action generation.",
|
| 2374 |
+
"direction_roles": {
|
| 2375 |
+
"C": "direct",
|
| 2376 |
+
"D": "proxy"
|
| 2377 |
+
},
|
| 2378 |
+
"display_name": "Long-horizon next-action forecasting",
|
| 2379 |
+
"evidence_links": [
|
| 2380 |
+
{
|
| 2381 |
+
"href": "data/task_walkthroughs.json",
|
| 2382 |
+
"label": "Task walkthrough"
|
| 2383 |
+
},
|
| 2384 |
+
{
|
| 2385 |
+
"href": "single_episode_explorer.html",
|
| 2386 |
+
"label": "Single-episode explorer"
|
| 2387 |
+
}
|
| 2388 |
+
],
|
| 2389 |
+
"family": "classification",
|
| 2390 |
+
"id": "long_horizon_next_action",
|
| 2391 |
+
"input": null,
|
| 2392 |
+
"input_short": null,
|
| 2393 |
+
"metric": {
|
| 2394 |
+
"better_baseline": "minimal",
|
| 2395 |
+
"direction": "higher",
|
| 2396 |
+
"key": "macro_f1",
|
| 2397 |
+
"minimal": 0.075,
|
| 2398 |
+
"name": "macro-F1",
|
| 2399 |
+
"neural_mlp": 0.0655
|
| 2400 |
+
},
|
| 2401 |
+
"modalities": [],
|
| 2402 |
+
"module_summary": null,
|
| 2403 |
+
"output_short": null,
|
| 2404 |
+
"primary_direction": "C",
|
| 2405 |
+
"process_short": null,
|
| 2406 |
+
"research_name": "Long-horizon next-action forecasting",
|
| 2407 |
+
"why": "Extends short-horizon intention prediction into longer activity futures, a key egocentric and world-model signal."
|
| 2408 |
+
},
|
| 2409 |
+
{
|
| 2410 |
+
"architecture_family": null,
|
| 2411 |
+
"case_study": null,
|
| 2412 |
+
"current_limit": "Subtask labels are constrained to the available annotation vocabulary.",
|
| 2413 |
+
"direction_roles": {
|
| 2414 |
+
"C": "direct",
|
| 2415 |
+
"D": "proxy"
|
| 2416 |
+
},
|
| 2417 |
+
"display_name": "Long-horizon next-subtask forecasting",
|
| 2418 |
+
"evidence_links": [
|
| 2419 |
+
{
|
| 2420 |
+
"href": "data/task_walkthroughs.json",
|
| 2421 |
+
"label": "Task walkthrough"
|
| 2422 |
+
},
|
| 2423 |
+
{
|
| 2424 |
+
"href": "single_episode_explorer.html",
|
| 2425 |
+
"label": "Single-episode explorer"
|
| 2426 |
+
}
|
| 2427 |
+
],
|
| 2428 |
+
"family": "classification",
|
| 2429 |
+
"id": "next_subtask_forecast",
|
| 2430 |
+
"input": null,
|
| 2431 |
+
"input_short": null,
|
| 2432 |
+
"metric": {
|
| 2433 |
+
"better_baseline": "neural_mlp",
|
| 2434 |
+
"direction": "higher",
|
| 2435 |
+
"key": "macro_f1",
|
| 2436 |
+
"minimal": 0.0455,
|
| 2437 |
+
"name": "macro-F1",
|
| 2438 |
+
"neural_mlp": 0.0507
|
| 2439 |
+
},
|
| 2440 |
+
"modalities": [],
|
| 2441 |
+
"module_summary": null,
|
| 2442 |
+
"output_short": null,
|
| 2443 |
+
"primary_direction": "C",
|
| 2444 |
+
"process_short": null,
|
| 2445 |
+
"research_name": "Long-horizon next-subtask forecasting",
|
| 2446 |
+
"why": "Measures whether the model can anticipate the next procedural phase rather than only the current frame state."
|
| 2447 |
+
},
|
| 2448 |
+
{
|
| 2449 |
+
"architecture_family": null,
|
| 2450 |
+
"case_study": null,
|
| 2451 |
+
"current_limit": "Relation labels are derived from the public-sample annotation scope.",
|
| 2452 |
+
"direction_roles": {
|
| 2453 |
+
"C": "direct",
|
| 2454 |
+
"D": "proxy"
|
| 2455 |
+
},
|
| 2456 |
+
"display_name": "Action-object relation prediction",
|
| 2457 |
+
"evidence_links": [
|
| 2458 |
+
{
|
| 2459 |
+
"href": "data/task_walkthroughs.json",
|
| 2460 |
+
"label": "Task walkthrough"
|
| 2461 |
+
},
|
| 2462 |
+
{
|
| 2463 |
+
"href": "single_episode_explorer.html",
|
| 2464 |
+
"label": "Single-episode explorer"
|
| 2465 |
+
}
|
| 2466 |
+
],
|
| 2467 |
+
"family": "classification",
|
| 2468 |
+
"id": "action_object_relation",
|
| 2469 |
+
"input": null,
|
| 2470 |
+
"input_short": null,
|
| 2471 |
+
"metric": {
|
| 2472 |
+
"better_baseline": "tie",
|
| 2473 |
+
"direction": "higher",
|
| 2474 |
+
"key": "macro_f1",
|
| 2475 |
+
"minimal": 0.0,
|
| 2476 |
+
"name": "macro-F1",
|
| 2477 |
+
"neural_mlp": 0.0
|
| 2478 |
+
},
|
| 2479 |
+
"modalities": [],
|
| 2480 |
+
"module_summary": null,
|
| 2481 |
+
"output_short": null,
|
| 2482 |
+
"primary_direction": "C",
|
| 2483 |
+
"process_short": null,
|
| 2484 |
+
"research_name": "Action-object relation prediction",
|
| 2485 |
+
"why": "Tests whether action recognition and object state are connected as a relational interaction representation."
|
| 2486 |
+
},
|
| 2487 |
+
{
|
| 2488 |
+
"architecture_family": null,
|
| 2489 |
+
"case_study": null,
|
| 2490 |
+
"current_limit": "This is a set-level proxy, not a persistent 3D scene graph.",
|
| 2491 |
+
"direction_roles": {
|
| 2492 |
+
"C": "proxy",
|
| 2493 |
+
"D": "direct"
|
| 2494 |
+
},
|
| 2495 |
+
"display_name": "Future object-set forecasting",
|
| 2496 |
+
"evidence_links": [
|
| 2497 |
+
{
|
| 2498 |
+
"href": "data/task_walkthroughs.json",
|
| 2499 |
+
"label": "Task walkthrough"
|
| 2500 |
+
},
|
| 2501 |
+
{
|
| 2502 |
+
"href": "single_episode_explorer.html",
|
| 2503 |
+
"label": "Single-episode explorer"
|
| 2504 |
+
}
|
| 2505 |
+
],
|
| 2506 |
+
"family": "multi-label",
|
| 2507 |
+
"id": "object_set_forecast",
|
| 2508 |
+
"input": null,
|
| 2509 |
+
"input_short": null,
|
| 2510 |
+
"metric": {
|
| 2511 |
+
"better_baseline": "neural_mlp",
|
| 2512 |
+
"direction": "higher",
|
| 2513 |
+
"key": "micro_f1",
|
| 2514 |
+
"minimal": 0.1694,
|
| 2515 |
+
"name": "micro-F1",
|
| 2516 |
+
"neural_mlp": 0.1972
|
| 2517 |
+
},
|
| 2518 |
+
"modalities": [],
|
| 2519 |
+
"module_summary": null,
|
| 2520 |
+
"output_short": null,
|
| 2521 |
+
"primary_direction": "D",
|
| 2522 |
+
"process_short": null,
|
| 2523 |
+
"research_name": "Future object-set forecasting",
|
| 2524 |
+
"why": "Asks whether the current scene state supports predicting which objects will matter later."
|
| 2525 |
+
},
|
| 2526 |
+
{
|
| 2527 |
+
"architecture_family": null,
|
| 2528 |
+
"case_study": null,
|
| 2529 |
+
"current_limit": "Retrieval checks view consistency but does not reconstruct geometry by itself.",
|
| 2530 |
+
"direction_roles": {
|
| 2531 |
+
"B": "direct",
|
| 2532 |
+
"D": "proxy"
|
| 2533 |
+
},
|
| 2534 |
+
"display_name": "Camera-view synchronization retrieval",
|
| 2535 |
+
"evidence_links": [
|
| 2536 |
+
{
|
| 2537 |
+
"href": "data/task_walkthroughs.json",
|
| 2538 |
+
"label": "Task walkthrough"
|
| 2539 |
+
},
|
| 2540 |
+
{
|
| 2541 |
+
"href": "single_episode_explorer.html",
|
| 2542 |
+
"label": "Single-episode explorer"
|
| 2543 |
+
}
|
| 2544 |
+
],
|
| 2545 |
+
"family": "retrieval",
|
| 2546 |
+
"id": "camera_view_sync_retrieval",
|
| 2547 |
+
"input": null,
|
| 2548 |
+
"input_short": null,
|
| 2549 |
+
"metric": {
|
| 2550 |
+
"better_baseline": "minimal",
|
| 2551 |
+
"direction": "higher",
|
| 2552 |
+
"key": "mrr",
|
| 2553 |
+
"minimal": 0.4943,
|
| 2554 |
+
"name": "MRR",
|
| 2555 |
+
"neural_mlp": 0.2409
|
| 2556 |
+
},
|
| 2557 |
+
"modalities": [],
|
| 2558 |
+
"module_summary": null,
|
| 2559 |
+
"output_short": null,
|
| 2560 |
+
"primary_direction": "B",
|
| 2561 |
+
"process_short": null,
|
| 2562 |
+
"research_name": "Camera-view synchronization retrieval",
|
| 2563 |
+
"why": "Tests whether synchronized multi-view structure is recoverable across camera streams."
|
| 2564 |
+
},
|
| 2565 |
+
{
|
| 2566 |
+
"architecture_family": null,
|
| 2567 |
+
"case_study": null,
|
| 2568 |
+
"current_limit": "Regression is local to the annotated public sample timeline.",
|
| 2569 |
+
"direction_roles": {
|
| 2570 |
+
"C": "diagnostic",
|
| 2571 |
+
"D": "diagnostic"
|
| 2572 |
+
},
|
| 2573 |
+
"display_name": "Time-to-next-transition regression",
|
| 2574 |
+
"evidence_links": [
|
| 2575 |
+
{
|
| 2576 |
+
"href": "data/task_walkthroughs.json",
|
| 2577 |
+
"label": "Task walkthrough"
|
| 2578 |
+
},
|
| 2579 |
+
{
|
| 2580 |
+
"href": "single_episode_explorer.html",
|
| 2581 |
+
"label": "Single-episode explorer"
|
| 2582 |
+
}
|
| 2583 |
+
],
|
| 2584 |
+
"family": "regression",
|
| 2585 |
+
"id": "time_to_transition",
|
| 2586 |
+
"input": null,
|
| 2587 |
+
"input_short": null,
|
| 2588 |
+
"metric": {
|
| 2589 |
+
"better_baseline": "minimal",
|
| 2590 |
+
"direction": "lower",
|
| 2591 |
+
"key": "mae",
|
| 2592 |
+
"minimal": 10.5374,
|
| 2593 |
+
"name": "MAE frames",
|
| 2594 |
+
"neural_mlp": 10.5545
|
| 2595 |
+
},
|
| 2596 |
+
"modalities": [],
|
| 2597 |
+
"module_summary": null,
|
| 2598 |
+
"output_short": null,
|
| 2599 |
+
"primary_direction": "C",
|
| 2600 |
+
"process_short": null,
|
| 2601 |
+
"research_name": "Time-to-next-transition regression",
|
| 2602 |
+
"why": "Measures temporal boundary awareness as a continuous timing target."
|
| 2603 |
+
}
|
| 2604 |
+
]
|
| 2605 |
+
}
|
| 2606 |
+
],
|
| 2607 |
+
"foundation_model_plan": {
|
| 2608 |
+
"decision": {
|
| 2609 |
+
"external_reasoning_reference": "Gemini Robotics",
|
| 2610 |
+
"first_policy_branch_candidates": [
|
| 2611 |
+
"OpenVLA / OpenVLA-OFT",
|
| 2612 |
+
"openpi pi0/pi0.5",
|
| 2613 |
+
"NVIDIA GR00T"
|
| 2614 |
+
],
|
| 2615 |
+
"first_world_model_branch": "Cosmos 3",
|
| 2616 |
+
"immediate_trainable_backbone": "Qwen3-Omni",
|
| 2617 |
+
"long_term_native_pretraining_goal": "Xperience Embodied Foundation Model"
|
| 2618 |
+
},
|
| 2619 |
+
"evaluation_additions": [
|
| 2620 |
+
{
|
| 2621 |
+
"metrics": [
|
| 2622 |
+
"JSON validity",
|
| 2623 |
+
"macro-F1",
|
| 2624 |
+
"accuracy",
|
| 2625 |
+
"micro-F1"
|
| 2626 |
+
],
|
| 2627 |
+
"model_families": [
|
| 2628 |
+
"Qwen3-Omni",
|
| 2629 |
+
"Gemini Robotics reference"
|
| 2630 |
+
],
|
| 2631 |
"target": "structured_task_prediction"
|
| 2632 |
},
|
| 2633 |
{
|
|
|
|
| 2686 |
},
|
| 2687 |
{
|
| 2688 |
"action": "Promote Cosmos 3 beyond the current Nano compatibility and Super forward-dynamics runs only when loss metrics, preprocessing, and storage justify the added compute.",
|
| 2689 |
+
"name": "World-model track",
|
| 2690 |
"step": 4
|
| 2691 |
},
|
| 2692 |
{
|
|
|
|
| 2862 |
],
|
| 2863 |
"status": "planning_artifact"
|
| 2864 |
},
|
| 2865 |
+
"generated_at_utc": "2026-06-21T10:51:52+00:00",
|
| 2866 |
"omni_plan": {
|
| 2867 |
"adapter": "LoRA rank 16, alpha 32, dropout 0.05",
|
| 2868 |
"backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
|
|
|
|
| 3025 |
],
|
| 3026 |
"deliverables": [
|
| 3027 |
"backbone registry",
|
| 3028 |
+
"Cosmos 3 world-model track plan",
|
| 3029 |
"Cosmos3-Super Forward-Dynamics LoRA verified package",
|
| 3030 |
"Qwen3-Omni LoRA baseline plan",
|
| 3031 |
"OpenVLA/openpi/GR00T policy-branch candidates",
|
|
|
|
| 3034 |
"entry_condition": "The selected episodes are prepared or a 3-8 episode dry run is available for preprocessing checks.",
|
| 3035 |
"id": "foundation_model_selection_matrix",
|
| 3036 |
"name": "Foundation-Model Selection Matrix",
|
| 3037 |
+
"reader_takeaway": "Qwen3-Omni remains the structured JSON held-out pilot; Cosmos 3 is the first world-model track. Cosmos3-Super now has a verified forward-dynamics LoRA over camera-pose proxy targets, while VLA/policy models wait for robot-compatible action targets.",
|
| 3038 |
"stage": "future",
|
| 3039 |
"status": "current"
|
| 3040 |
},
|
|
|
|
| 3076 |
"entry_condition": "Enough multi-episode data, compute budget, and model-specific action/world-state targets.",
|
| 3077 |
"id": "foundation_world_model_extensions",
|
| 3078 |
"name": "Cosmos 3 and Policy-Model Extensions",
|
| 3079 |
+
"reader_takeaway": "The Cosmos3 track now includes Nano future-window compatibility and Super forward-dynamics LoRA; the long-term direction remains richer multimodal representation learning with model tracks chosen by task fit rather than by a single default backbone.",
|
| 3080 |
"stage": "future",
|
| 3081 |
"status": "planned"
|
| 3082 |
},
|
|
|
|
| 3882 |
"process_short": "aligned/shifted pairs -> feature combiner -> binary classifier",
|
| 3883 |
"research_name": "Cross-Modal Misalignment Detection",
|
| 3884 |
"why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models."
|
| 3885 |
+
},
|
| 3886 |
+
{
|
| 3887 |
+
"architecture_family": null,
|
| 3888 |
+
"case_study": null,
|
| 3889 |
+
"current_limit": "Evaluated from sample-supported future labels, not full open-world action generation.",
|
| 3890 |
+
"direction_roles": {
|
| 3891 |
+
"C": "direct",
|
| 3892 |
+
"D": "proxy"
|
| 3893 |
+
},
|
| 3894 |
+
"display_name": "Long-horizon next-action forecasting",
|
| 3895 |
+
"evidence_links": [
|
| 3896 |
+
{
|
| 3897 |
+
"href": "data/task_walkthroughs.json",
|
| 3898 |
+
"label": "Task walkthrough"
|
| 3899 |
+
},
|
| 3900 |
+
{
|
| 3901 |
+
"href": "single_episode_explorer.html",
|
| 3902 |
+
"label": "Single-episode explorer"
|
| 3903 |
+
}
|
| 3904 |
+
],
|
| 3905 |
+
"family": "classification",
|
| 3906 |
+
"id": "long_horizon_next_action",
|
| 3907 |
+
"input": null,
|
| 3908 |
+
"input_short": null,
|
| 3909 |
+
"metric": {
|
| 3910 |
+
"better_baseline": "minimal",
|
| 3911 |
+
"direction": "higher",
|
| 3912 |
+
"key": "macro_f1",
|
| 3913 |
+
"minimal": 0.075,
|
| 3914 |
+
"name": "macro-F1",
|
| 3915 |
+
"neural_mlp": 0.0655
|
| 3916 |
+
},
|
| 3917 |
+
"modalities": [],
|
| 3918 |
+
"module_summary": null,
|
| 3919 |
+
"output_short": null,
|
| 3920 |
+
"primary_direction": "C",
|
| 3921 |
+
"process_short": null,
|
| 3922 |
+
"research_name": "Long-horizon next-action forecasting",
|
| 3923 |
+
"why": "Extends short-horizon intention prediction into longer activity futures, a key egocentric and world-model signal."
|
| 3924 |
+
},
|
| 3925 |
+
{
|
| 3926 |
+
"architecture_family": null,
|
| 3927 |
+
"case_study": null,
|
| 3928 |
+
"current_limit": "Subtask labels are constrained to the available annotation vocabulary.",
|
| 3929 |
+
"direction_roles": {
|
| 3930 |
+
"C": "direct",
|
| 3931 |
+
"D": "proxy"
|
| 3932 |
+
},
|
| 3933 |
+
"display_name": "Long-horizon next-subtask forecasting",
|
| 3934 |
+
"evidence_links": [
|
| 3935 |
+
{
|
| 3936 |
+
"href": "data/task_walkthroughs.json",
|
| 3937 |
+
"label": "Task walkthrough"
|
| 3938 |
+
},
|
| 3939 |
+
{
|
| 3940 |
+
"href": "single_episode_explorer.html",
|
| 3941 |
+
"label": "Single-episode explorer"
|
| 3942 |
+
}
|
| 3943 |
+
],
|
| 3944 |
+
"family": "classification",
|
| 3945 |
+
"id": "next_subtask_forecast",
|
| 3946 |
+
"input": null,
|
| 3947 |
+
"input_short": null,
|
| 3948 |
+
"metric": {
|
| 3949 |
+
"better_baseline": "neural_mlp",
|
| 3950 |
+
"direction": "higher",
|
| 3951 |
+
"key": "macro_f1",
|
| 3952 |
+
"minimal": 0.0455,
|
| 3953 |
+
"name": "macro-F1",
|
| 3954 |
+
"neural_mlp": 0.0507
|
| 3955 |
+
},
|
| 3956 |
+
"modalities": [],
|
| 3957 |
+
"module_summary": null,
|
| 3958 |
+
"output_short": null,
|
| 3959 |
+
"primary_direction": "C",
|
| 3960 |
+
"process_short": null,
|
| 3961 |
+
"research_name": "Long-horizon next-subtask forecasting",
|
| 3962 |
+
"why": "Measures whether the model can anticipate the next procedural phase rather than only the current frame state."
|
| 3963 |
+
},
|
| 3964 |
+
{
|
| 3965 |
+
"architecture_family": null,
|
| 3966 |
+
"case_study": null,
|
| 3967 |
+
"current_limit": "Public derived features retain hashed text targets; raw full text requires the official annotation source.",
|
| 3968 |
+
"direction_roles": {
|
| 3969 |
+
"A": "proxy",
|
| 3970 |
+
"C": "direct"
|
| 3971 |
+
},
|
| 3972 |
+
"display_name": "Interaction text prediction",
|
| 3973 |
+
"evidence_links": [
|
| 3974 |
+
{
|
| 3975 |
+
"href": "data/task_walkthroughs.json",
|
| 3976 |
+
"label": "Task walkthrough"
|
| 3977 |
+
},
|
| 3978 |
+
{
|
| 3979 |
+
"href": "single_episode_explorer.html",
|
| 3980 |
+
"label": "Single-episode explorer"
|
| 3981 |
+
}
|
| 3982 |
+
],
|
| 3983 |
+
"family": "classification",
|
| 3984 |
+
"id": "interaction_text_prediction",
|
| 3985 |
+
"input": null,
|
| 3986 |
+
"input_short": null,
|
| 3987 |
+
"metric": {
|
| 3988 |
+
"better_baseline": "minimal",
|
| 3989 |
+
"direction": "higher",
|
| 3990 |
+
"key": "macro_f1",
|
| 3991 |
+
"minimal": 0.0444,
|
| 3992 |
+
"name": "macro-F1",
|
| 3993 |
+
"neural_mlp": 0.0381
|
| 3994 |
+
},
|
| 3995 |
+
"modalities": [],
|
| 3996 |
+
"module_summary": null,
|
| 3997 |
+
"output_short": null,
|
| 3998 |
+
"primary_direction": "C",
|
| 3999 |
+
"process_short": null,
|
| 4000 |
+
"research_name": "Interaction text prediction",
|
| 4001 |
+
"why": "Connects egocentric observations to the natural-language interaction semantics carried by the annotation."
|
| 4002 |
+
},
|
| 4003 |
+
{
|
| 4004 |
+
"architecture_family": null,
|
| 4005 |
+
"case_study": null,
|
| 4006 |
+
"current_limit": "Relation labels are derived from the public-sample annotation scope.",
|
| 4007 |
+
"direction_roles": {
|
| 4008 |
+
"C": "direct",
|
| 4009 |
+
"D": "proxy"
|
| 4010 |
+
},
|
| 4011 |
+
"display_name": "Action-object relation prediction",
|
| 4012 |
+
"evidence_links": [
|
| 4013 |
+
{
|
| 4014 |
+
"href": "data/task_walkthroughs.json",
|
| 4015 |
+
"label": "Task walkthrough"
|
| 4016 |
+
},
|
| 4017 |
+
{
|
| 4018 |
+
"href": "single_episode_explorer.html",
|
| 4019 |
+
"label": "Single-episode explorer"
|
| 4020 |
+
}
|
| 4021 |
+
],
|
| 4022 |
+
"family": "classification",
|
| 4023 |
+
"id": "action_object_relation",
|
| 4024 |
+
"input": null,
|
| 4025 |
+
"input_short": null,
|
| 4026 |
+
"metric": {
|
| 4027 |
+
"better_baseline": "tie",
|
| 4028 |
+
"direction": "higher",
|
| 4029 |
+
"key": "macro_f1",
|
| 4030 |
+
"minimal": 0.0,
|
| 4031 |
+
"name": "macro-F1",
|
| 4032 |
+
"neural_mlp": 0.0
|
| 4033 |
+
},
|
| 4034 |
+
"modalities": [],
|
| 4035 |
+
"module_summary": null,
|
| 4036 |
+
"output_short": null,
|
| 4037 |
+
"primary_direction": "C",
|
| 4038 |
+
"process_short": null,
|
| 4039 |
+
"research_name": "Action-object relation prediction",
|
| 4040 |
+
"why": "Tests whether action recognition and object state are connected as a relational interaction representation."
|
| 4041 |
+
},
|
| 4042 |
+
{
|
| 4043 |
+
"architecture_family": null,
|
| 4044 |
+
"case_study": null,
|
| 4045 |
+
"current_limit": "This is a set-level proxy, not a persistent 3D scene graph.",
|
| 4046 |
+
"direction_roles": {
|
| 4047 |
+
"C": "proxy",
|
| 4048 |
+
"D": "direct"
|
| 4049 |
+
},
|
| 4050 |
+
"display_name": "Future object-set forecasting",
|
| 4051 |
+
"evidence_links": [
|
| 4052 |
+
{
|
| 4053 |
+
"href": "data/task_walkthroughs.json",
|
| 4054 |
+
"label": "Task walkthrough"
|
| 4055 |
+
},
|
| 4056 |
+
{
|
| 4057 |
+
"href": "single_episode_explorer.html",
|
| 4058 |
+
"label": "Single-episode explorer"
|
| 4059 |
+
}
|
| 4060 |
+
],
|
| 4061 |
+
"family": "multi-label",
|
| 4062 |
+
"id": "object_set_forecast",
|
| 4063 |
+
"input": null,
|
| 4064 |
+
"input_short": null,
|
| 4065 |
+
"metric": {
|
| 4066 |
+
"better_baseline": "neural_mlp",
|
| 4067 |
+
"direction": "higher",
|
| 4068 |
+
"key": "micro_f1",
|
| 4069 |
+
"minimal": 0.1694,
|
| 4070 |
+
"name": "micro-F1",
|
| 4071 |
+
"neural_mlp": 0.1972
|
| 4072 |
+
},
|
| 4073 |
+
"modalities": [],
|
| 4074 |
+
"module_summary": null,
|
| 4075 |
+
"output_short": null,
|
| 4076 |
+
"primary_direction": "D",
|
| 4077 |
+
"process_short": null,
|
| 4078 |
+
"research_name": "Future object-set forecasting",
|
| 4079 |
+
"why": "Asks whether the current scene state supports predicting which objects will matter later."
|
| 4080 |
+
},
|
| 4081 |
+
{
|
| 4082 |
+
"architecture_family": null,
|
| 4083 |
+
"case_study": null,
|
| 4084 |
+
"current_limit": "Pose reconstruction is window-level and does not yet fit a full parametric hand/body model.",
|
| 4085 |
+
"direction_roles": {
|
| 4086 |
+
"A": "direct",
|
| 4087 |
+
"B": "proxy"
|
| 4088 |
+
},
|
| 4089 |
+
"display_name": "IMU-to-hand pose reconstruction",
|
| 4090 |
+
"evidence_links": [
|
| 4091 |
+
{
|
| 4092 |
+
"href": "data/task_walkthroughs.json",
|
| 4093 |
+
"label": "Task walkthrough"
|
| 4094 |
+
},
|
| 4095 |
+
{
|
| 4096 |
+
"href": "single_episode_explorer.html",
|
| 4097 |
+
"label": "Single-episode explorer"
|
| 4098 |
+
}
|
| 4099 |
+
],
|
| 4100 |
+
"family": "regression",
|
| 4101 |
+
"id": "imu_to_hand_pose",
|
| 4102 |
+
"input": null,
|
| 4103 |
+
"input_short": null,
|
| 4104 |
+
"metric": {
|
| 4105 |
+
"better_baseline": "minimal",
|
| 4106 |
+
"direction": "lower",
|
| 4107 |
+
"key": "mae",
|
| 4108 |
+
"minimal": 0.042,
|
| 4109 |
+
"name": "MAE",
|
| 4110 |
+
"neural_mlp": 0.0426
|
| 4111 |
+
},
|
| 4112 |
+
"modalities": [],
|
| 4113 |
+
"module_summary": null,
|
| 4114 |
+
"output_short": null,
|
| 4115 |
+
"primary_direction": "A",
|
| 4116 |
+
"process_short": null,
|
| 4117 |
+
"research_name": "IMU-to-hand pose reconstruction",
|
| 4118 |
+
"why": "Measures human-motion reconstruction from wearable and motion cues."
|
| 4119 |
+
},
|
| 4120 |
+
{
|
| 4121 |
+
"architecture_family": null,
|
| 4122 |
+
"case_study": null,
|
| 4123 |
+
"current_limit": "Retrieval checks view consistency but does not reconstruct geometry by itself.",
|
| 4124 |
+
"direction_roles": {
|
| 4125 |
+
"B": "direct",
|
| 4126 |
+
"D": "proxy"
|
| 4127 |
+
},
|
| 4128 |
+
"display_name": "Camera-view synchronization retrieval",
|
| 4129 |
+
"evidence_links": [
|
| 4130 |
+
{
|
| 4131 |
+
"href": "data/task_walkthroughs.json",
|
| 4132 |
+
"label": "Task walkthrough"
|
| 4133 |
+
},
|
| 4134 |
+
{
|
| 4135 |
+
"href": "single_episode_explorer.html",
|
| 4136 |
+
"label": "Single-episode explorer"
|
| 4137 |
+
}
|
| 4138 |
+
],
|
| 4139 |
+
"family": "retrieval",
|
| 4140 |
+
"id": "camera_view_sync_retrieval",
|
| 4141 |
+
"input": null,
|
| 4142 |
+
"input_short": null,
|
| 4143 |
+
"metric": {
|
| 4144 |
+
"better_baseline": "minimal",
|
| 4145 |
+
"direction": "higher",
|
| 4146 |
+
"key": "mrr",
|
| 4147 |
+
"minimal": 0.4943,
|
| 4148 |
+
"name": "MRR",
|
| 4149 |
+
"neural_mlp": 0.2409
|
| 4150 |
+
},
|
| 4151 |
+
"modalities": [],
|
| 4152 |
+
"module_summary": null,
|
| 4153 |
+
"output_short": null,
|
| 4154 |
+
"primary_direction": "B",
|
| 4155 |
+
"process_short": null,
|
| 4156 |
+
"research_name": "Camera-view synchronization retrieval",
|
| 4157 |
+
"why": "Tests whether synchronized multi-view structure is recoverable across camera streams."
|
| 4158 |
+
},
|
| 4159 |
+
{
|
| 4160 |
+
"architecture_family": null,
|
| 4161 |
+
"case_study": null,
|
| 4162 |
+
"current_limit": "Regression is local to the annotated public sample timeline.",
|
| 4163 |
+
"direction_roles": {
|
| 4164 |
+
"C": "diagnostic",
|
| 4165 |
+
"D": "diagnostic"
|
| 4166 |
+
},
|
| 4167 |
+
"display_name": "Time-to-next-transition regression",
|
| 4168 |
+
"evidence_links": [
|
| 4169 |
+
{
|
| 4170 |
+
"href": "data/task_walkthroughs.json",
|
| 4171 |
+
"label": "Task walkthrough"
|
| 4172 |
+
},
|
| 4173 |
+
{
|
| 4174 |
+
"href": "single_episode_explorer.html",
|
| 4175 |
+
"label": "Single-episode explorer"
|
| 4176 |
+
}
|
| 4177 |
+
],
|
| 4178 |
+
"family": "regression",
|
| 4179 |
+
"id": "time_to_transition",
|
| 4180 |
+
"input": null,
|
| 4181 |
+
"input_short": null,
|
| 4182 |
+
"metric": {
|
| 4183 |
+
"better_baseline": "minimal",
|
| 4184 |
+
"direction": "lower",
|
| 4185 |
+
"key": "mae",
|
| 4186 |
+
"minimal": 10.5374,
|
| 4187 |
+
"name": "MAE frames",
|
| 4188 |
+
"neural_mlp": 10.5545
|
| 4189 |
+
},
|
| 4190 |
+
"modalities": [],
|
| 4191 |
+
"module_summary": null,
|
| 4192 |
+
"output_short": null,
|
| 4193 |
+
"primary_direction": "C",
|
| 4194 |
+
"process_short": null,
|
| 4195 |
+
"research_name": "Time-to-next-transition regression",
|
| 4196 |
+
"why": "Measures temporal boundary awareness as a continuous timing target."
|
| 4197 |
}
|
| 4198 |
],
|
| 4199 |
"three_foundation_pipelines": {
|
docs/data/scope_claims_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-21T11:08:09+00:00",
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
docs/data/single_episode_task_model_radar.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Single-Episode 20-Task Radar",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"description": "Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.",
|
| 6 |
"task_count": 20,
|
| 7 |
"method_count": 2,
|
|
@@ -12,7 +12,7 @@
|
|
| 12 |
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
|
| 13 |
"raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
|
| 14 |
"result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
|
| 15 |
-
"foundation_model_overlay": "Qwen3
|
| 16 |
"metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
|
| 17 |
"raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
|
| 18 |
},
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Single-Episode 20-Task Radar",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-21T10:47:17+00:00",
|
| 5 |
"description": "Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.",
|
| 6 |
"task_count": 20,
|
| 7 |
"method_count": 2,
|
|
|
|
| 12 |
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
|
| 13 |
"raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
|
| 14 |
"result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
|
| 15 |
+
"foundation_model_overlay": "Qwen3-Omni and Cosmos3 points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
|
| 16 |
"metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
|
| 17 |
"raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
|
| 18 |
},
|
docs/data/source_alignment_audit.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-21T11:08:07+00:00",
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
docs/data/task_method_20_result_matrix.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Task Method 20-Result Matrix",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"task_count": 20,
|
| 6 |
"method_count": 9,
|
| 7 |
"method_task_record_count": 180,
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Task Method 20-Result Matrix",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-21T10:47:17+00:00",
|
| 5 |
"task_count": 20,
|
| 6 |
"method_count": 9,
|
| 7 |
"method_task_record_count": 180,
|
docs/data/task_method_20_source_audit.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"checked_json_metric_count": 180,
|
| 3 |
"failure_count": 0,
|
| 4 |
"failures": [],
|
| 5 |
-
"generated_at_utc": "2026-06-
|
| 6 |
"method_task_record_count": 180,
|
| 7 |
"rule": "Every scored row that declares a JSON metric source must have the same numeric value under that row's metric_key.",
|
| 8 |
"scored_method_task_count": 180,
|
|
|
|
| 2 |
"checked_json_metric_count": 180,
|
| 3 |
"failure_count": 0,
|
| 4 |
"failures": [],
|
| 5 |
+
"generated_at_utc": "2026-06-21T11:07:42+00:00",
|
| 6 |
"method_task_record_count": 180,
|
| 7 |
"rule": "Every scored row that declares a JSON metric source must have the same numeric value under that row's metric_key.",
|
| 8 |
"scored_method_task_count": 180,
|
docs/data/task_suite_enhancement_128.json
CHANGED
|
@@ -181,7 +181,7 @@
|
|
| 181 |
],
|
| 182 |
"public_safety": [
|
| 183 |
"No raw MP4/HDF5/RRD files are written.",
|
| 184 |
-
"No full
|
| 185 |
"Generated labels and aggregate metrics remain public-safe derived metadata."
|
| 186 |
]
|
| 187 |
},
|
|
|
|
| 181 |
],
|
| 182 |
"public_safety": [
|
| 183 |
"No raw MP4/HDF5/RRD files are written.",
|
| 184 |
+
"No full Qwen3-Omni or Cosmos3 base weights are mirrored.",
|
| 185 |
"Generated labels and aggregate metrics remain public-safe derived metadata."
|
| 186 |
]
|
| 187 |
},
|
docs/data/task_surface_integrity.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"summary": {
|
| 5 |
"original_walkthrough_task_count": 12,
|
| 6 |
"expected_original_walkthrough_task_count": 12,
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-21T11:08:07+00:00",
|
| 4 |
"summary": {
|
| 5 |
"original_walkthrough_task_count": 12,
|
| 6 |
"expected_original_walkthrough_task_count": 12,
|
docs/data/two_evidence_line_result_summary.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
-
"generated_at_utc": "2026-06-21T10:
|
| 3 |
-
"interpretation_rule": "Use the 1-episode line for task construction and reproducibility claims. Use the 128-episode line for
|
| 4 |
"lines": [
|
| 5 |
{
|
| 6 |
"artifact_entry_points": [
|
|
@@ -66,7 +66,7 @@
|
|
| 66 |
"docs/data/qwen3_omni_run_lineage.json",
|
| 67 |
"docs/data/task_method_20_gap_audit.json"
|
| 68 |
],
|
| 69 |
-
"claim_boundary": "Supports same-split comparison,
|
| 70 |
"data_unit": "Selected held-out 96/16/16 split with public-safe processed features linked to official gated episode paths",
|
| 71 |
"direct_scored_method_task_count": 134,
|
| 72 |
"id": "selected_128_episode_surface",
|
|
@@ -171,14 +171,14 @@
|
|
| 171 |
}
|
| 172 |
],
|
| 173 |
"not_for": "Do not read compact-proxy cells as direct raw-target measurements.",
|
| 174 |
-
"primary_use": "Compare same-split baselines and
|
| 175 |
"primary_visuals": [
|
| 176 |
"docs/assets/charts/two_evidence_line_map.svg",
|
| 177 |
"docs/assets/charts/episode128_task_model_radar.svg",
|
| 178 |
"docs/assets/charts/unified_task_model_radar.svg"
|
| 179 |
],
|
| 180 |
"proxy_scored_method_task_count": 6,
|
| 181 |
-
"result_statement": "140/140 selected-128 scores across seven
|
| 182 |
"scored_method_task_count": 140,
|
| 183 |
"short_label": "Line 2",
|
| 184 |
"task_count": 20
|
|
@@ -344,7 +344,7 @@
|
|
| 344 |
"step": "Choose the evidence line"
|
| 345 |
},
|
| 346 |
{
|
| 347 |
-
"reason": "Use the 1-episode radar for Minimal-vs-Neural behavior and the 128-episode radar for
|
| 348 |
"step": "Open the matching radar"
|
| 349 |
},
|
| 350 |
{
|
|
|
|
| 1 |
{
|
| 2 |
+
"generated_at_utc": "2026-06-21T10:47:04+00:00",
|
| 3 |
+
"interpretation_rule": "Use the 1-episode line for task construction and reproducibility claims. Use the 128-episode line for same-split metadata/raw baselines, Qwen3-Omni v6 LoRA diagnostics, and Cosmos3 diagnostics.",
|
| 4 |
"lines": [
|
| 5 |
{
|
| 6 |
"artifact_entry_points": [
|
|
|
|
| 66 |
"docs/data/qwen3_omni_run_lineage.json",
|
| 67 |
"docs/data/task_method_20_gap_audit.json"
|
| 68 |
],
|
| 69 |
+
"claim_boundary": "Supports same-split metadata/raw baseline comparison, Qwen3-Omni v6 diagnostics, Cosmos3 diagnostics, and scale-up planning on public-safe processed artifacts.",
|
| 70 |
"data_unit": "Selected held-out 96/16/16 split with public-safe processed features linked to official gated episode paths",
|
| 71 |
"direct_scored_method_task_count": 134,
|
| 72 |
"id": "selected_128_episode_surface",
|
|
|
|
| 171 |
}
|
| 172 |
],
|
| 173 |
"not_for": "Do not read compact-proxy cells as direct raw-target measurements.",
|
| 174 |
+
"primary_use": "Compare same-split metadata/raw baselines, Qwen3-Omni v6 LoRA, Cosmos3-Super Reasoner, and Cosmos3-Nano Future Window while keeping evidence type explicit.",
|
| 175 |
"primary_visuals": [
|
| 176 |
"docs/assets/charts/two_evidence_line_map.svg",
|
| 177 |
"docs/assets/charts/episode128_task_model_radar.svg",
|
| 178 |
"docs/assets/charts/unified_task_model_radar.svg"
|
| 179 |
],
|
| 180 |
"proxy_scored_method_task_count": 6,
|
| 181 |
+
"result_statement": "140/140 selected-128 scores across seven methods: 134 direct scores plus 6 documented compact-proxy scores.",
|
| 182 |
"scored_method_task_count": 140,
|
| 183 |
"short_label": "Line 2",
|
| 184 |
"task_count": 20
|
|
|
|
| 344 |
"step": "Choose the evidence line"
|
| 345 |
},
|
| 346 |
{
|
| 347 |
+
"reason": "Use the 1-episode radar for Minimal-vs-Neural behavior and the 128-episode radar for metadata/raw baselines, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano.",
|
| 348 |
"step": "Open the matching radar"
|
| 349 |
},
|
| 350 |
{
|
docs/data/two_evidence_lines.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"status": "current",
|
| 3 |
"updated_utc": "2026-06-21T00:00:00Z",
|
| 4 |
-
"interpretation_rule": "Use the 1-episode line for task construction and reproducibility claims. Use the 128-episode line for
|
| 5 |
"reader_summary": "The suite has two public result lines. Line 1 is the fully inspectable one-episode task lab. Line 2 is the 128-episode comparison surface for aligned baselines, the Qwen3-Omni series, and the Cosmos3 series. Do not mix the two when reading scores.",
|
| 6 |
"score_formula": "2 single-episode methods x 20 tasks = 40 records; 7 selected-128 methods x 20 tasks = 140 records; total public matrix = 180/180 scored records.",
|
| 7 |
"lines": [
|
|
@@ -44,8 +44,8 @@
|
|
| 44 |
"label": "128 selected episodes",
|
| 45 |
"short_label": "Line 2",
|
| 46 |
"data_unit": "Selected held-out 96/16/16 split with public-safe processed features linked to official gated episode paths",
|
| 47 |
-
"result_statement": "140/140 selected-128 scores across seven
|
| 48 |
-
"claim_boundary": "Supports same-split comparison,
|
| 49 |
"not_for": "Do not read compact-proxy cells as direct raw-target measurements.",
|
| 50 |
"episodes": 128,
|
| 51 |
"split": {
|
|
@@ -69,7 +69,7 @@
|
|
| 69 |
"direct_scored_records": 134,
|
| 70 |
"proxy_scored_records": 6,
|
| 71 |
"proxy_policy": "Proxy flags remain visible where the public export lacks a direct raw target.",
|
| 72 |
-
"best_use": "Compare same-split baselines and
|
| 73 |
"primary_visuals": [
|
| 74 |
"docs/assets/charts/two_evidence_line_map.svg",
|
| 75 |
"docs/assets/charts/episode128_task_model_radar.svg",
|
|
|
|
| 1 |
{
|
| 2 |
"status": "current",
|
| 3 |
"updated_utc": "2026-06-21T00:00:00Z",
|
| 4 |
+
"interpretation_rule": "Use the 1-episode line for task construction and reproducibility claims. Use the 128-episode line for same-split metadata/raw baselines, Qwen3-Omni v6 LoRA diagnostics, and Cosmos3 diagnostics.",
|
| 5 |
"reader_summary": "The suite has two public result lines. Line 1 is the fully inspectable one-episode task lab. Line 2 is the 128-episode comparison surface for aligned baselines, the Qwen3-Omni series, and the Cosmos3 series. Do not mix the two when reading scores.",
|
| 6 |
"score_formula": "2 single-episode methods x 20 tasks = 40 records; 7 selected-128 methods x 20 tasks = 140 records; total public matrix = 180/180 scored records.",
|
| 7 |
"lines": [
|
|
|
|
| 44 |
"label": "128 selected episodes",
|
| 45 |
"short_label": "Line 2",
|
| 46 |
"data_unit": "Selected held-out 96/16/16 split with public-safe processed features linked to official gated episode paths",
|
| 47 |
+
"result_statement": "140/140 selected-128 scores across seven methods: 134 direct scores plus 6 documented compact-proxy scores.",
|
| 48 |
+
"claim_boundary": "Supports same-split metadata/raw baseline comparison, Qwen3-Omni v6 diagnostics, Cosmos3 diagnostics, and scale-up planning on public-safe processed artifacts.",
|
| 49 |
"not_for": "Do not read compact-proxy cells as direct raw-target measurements.",
|
| 50 |
"episodes": 128,
|
| 51 |
"split": {
|
|
|
|
| 69 |
"direct_scored_records": 134,
|
| 70 |
"proxy_scored_records": 6,
|
| 71 |
"proxy_policy": "Proxy flags remain visible where the public export lacks a direct raw target.",
|
| 72 |
+
"best_use": "Compare same-split metadata/raw baselines, Qwen3-Omni v6 LoRA, Cosmos3-Super Reasoner, and Cosmos3-Nano Future Window while keeping evidence type explicit.",
|
| 73 |
"primary_visuals": [
|
| 74 |
"docs/assets/charts/two_evidence_line_map.svg",
|
| 75 |
"docs/assets/charts/episode128_task_model_radar.svg",
|
docs/data/unified_task_model_radar.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Unified 20-Task Model Radar",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"task_count": 20,
|
| 6 |
"method_count": 9,
|
| 7 |
"method_task_record_count": 180,
|
|
@@ -11,7 +11,7 @@
|
|
| 11 |
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
|
| 12 |
"raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
|
| 13 |
"result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
|
| 14 |
-
"foundation_model_overlay": "Qwen3
|
| 15 |
"metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
|
| 16 |
"raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
|
| 17 |
},
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Unified 20-Task Model Radar",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-21T10:47:17+00:00",
|
| 5 |
"task_count": 20,
|
| 6 |
"method_count": 9,
|
| 7 |
"method_task_record_count": 180,
|
|
|
|
| 11 |
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
|
| 12 |
"raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
|
| 13 |
"result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
|
| 14 |
+
"foundation_model_overlay": "Qwen3-Omni and Cosmos3 points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
|
| 15 |
"metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
|
| 16 |
"raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
|
| 17 |
},
|
docs/data/website_integrity.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"docs_root": "docs",
|
| 5 |
"site_base": "/ropedia-xperience-10m-task-suite/",
|
| 6 |
"summary": {
|
|
@@ -80,8 +80,8 @@
|
|
| 80 |
"name": "project_overview_precedes_progress_ledger",
|
| 81 |
"status": "pass",
|
| 82 |
"reason": "The project overview should appear before the deeper progress ledger.",
|
| 83 |
-
"overview_index":
|
| 84 |
-
"evidence_index":
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"name": "project_status_links_json",
|
|
@@ -159,9 +159,9 @@
|
|
| 159 |
"name": "evaluation_protocol_between_overview_and_progress",
|
| 160 |
"status": "pass",
|
| 161 |
"reason": "The evaluation protocol should appear before the deeper evidence ledger.",
|
| 162 |
-
"overview_index":
|
| 163 |
-
"protocol_index":
|
| 164 |
-
"evidence_index":
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"name": "evaluation_protocol_links_json",
|
|
@@ -180,7 +180,7 @@
|
|
| 180 |
"status": "pass",
|
| 181 |
"reason": "The Suite anchor should show the task-suite map before the modality atlas.",
|
| 182 |
"first_marker_index": 471,
|
| 183 |
-
"second_marker_index":
|
| 184 |
},
|
| 185 |
{
|
| 186 |
"name": "suite_modality_atlas_contains_seven_cards",
|
|
@@ -296,12 +296,12 @@
|
|
| 296 |
"json_files": [
|
| 297 |
{
|
| 298 |
"path": "data/additional_development_directions.json",
|
| 299 |
-
"bytes":
|
| 300 |
"top_level_type": "dict"
|
| 301 |
},
|
| 302 |
{
|
| 303 |
"path": "data/artifact_index.json",
|
| 304 |
-
"bytes":
|
| 305 |
"top_level_type": "dict"
|
| 306 |
},
|
| 307 |
{
|
|
@@ -316,7 +316,7 @@
|
|
| 316 |
},
|
| 317 |
{
|
| 318 |
"path": "data/episode128_task_model_radar.json",
|
| 319 |
-
"bytes":
|
| 320 |
"top_level_type": "dict"
|
| 321 |
},
|
| 322 |
{
|
|
@@ -331,12 +331,12 @@
|
|
| 331 |
},
|
| 332 |
{
|
| 333 |
"path": "data/figure_index.json",
|
| 334 |
-
"bytes":
|
| 335 |
"top_level_type": "dict"
|
| 336 |
},
|
| 337 |
{
|
| 338 |
"path": "data/foundation_model_plan.json",
|
| 339 |
-
"bytes":
|
| 340 |
"top_level_type": "dict"
|
| 341 |
},
|
| 342 |
{
|
|
@@ -346,12 +346,12 @@
|
|
| 346 |
},
|
| 347 |
{
|
| 348 |
"path": "data/live_publication_status.json",
|
| 349 |
-
"bytes":
|
| 350 |
"top_level_type": "dict"
|
| 351 |
},
|
| 352 |
{
|
| 353 |
"path": "data/mirror_parity.json",
|
| 354 |
-
"bytes":
|
| 355 |
"top_level_type": "dict"
|
| 356 |
},
|
| 357 |
{
|
|
@@ -361,12 +361,12 @@
|
|
| 361 |
},
|
| 362 |
{
|
| 363 |
"path": "data/omni_finetune_verified_result.json",
|
| 364 |
-
"bytes":
|
| 365 |
"top_level_type": "dict"
|
| 366 |
},
|
| 367 |
{
|
| 368 |
"path": "data/omni_model_comparison.json",
|
| 369 |
-
"bytes":
|
| 370 |
"top_level_type": "dict"
|
| 371 |
},
|
| 372 |
{
|
|
@@ -386,12 +386,12 @@
|
|
| 386 |
},
|
| 387 |
{
|
| 388 |
"path": "data/project_status.json",
|
| 389 |
-
"bytes":
|
| 390 |
"top_level_type": "dict"
|
| 391 |
},
|
| 392 |
{
|
| 393 |
"path": "data/public_reader_map.json",
|
| 394 |
-
"bytes":
|
| 395 |
"top_level_type": "dict"
|
| 396 |
},
|
| 397 |
{
|
|
@@ -416,7 +416,7 @@
|
|
| 416 |
},
|
| 417 |
{
|
| 418 |
"path": "data/qwen3_omni_run_lineage.json",
|
| 419 |
-
"bytes":
|
| 420 |
"top_level_type": "dict"
|
| 421 |
},
|
| 422 |
{
|
|
@@ -451,12 +451,12 @@
|
|
| 451 |
},
|
| 452 |
{
|
| 453 |
"path": "data/research_roadmap.json",
|
| 454 |
-
"bytes":
|
| 455 |
"top_level_type": "dict"
|
| 456 |
},
|
| 457 |
{
|
| 458 |
"path": "data/research_roadmap_interactive.json",
|
| 459 |
-
"bytes":
|
| 460 |
"top_level_type": "dict"
|
| 461 |
},
|
| 462 |
{
|
|
@@ -476,7 +476,7 @@
|
|
| 476 |
},
|
| 477 |
{
|
| 478 |
"path": "data/single_episode_task_model_radar.json",
|
| 479 |
-
"bytes":
|
| 480 |
"top_level_type": "dict"
|
| 481 |
},
|
| 482 |
{
|
|
@@ -511,7 +511,7 @@
|
|
| 511 |
},
|
| 512 |
{
|
| 513 |
"path": "data/task_suite_enhancement_128.json",
|
| 514 |
-
"bytes":
|
| 515 |
"top_level_type": "dict"
|
| 516 |
},
|
| 517 |
{
|
|
@@ -536,22 +536,22 @@
|
|
| 536 |
},
|
| 537 |
{
|
| 538 |
"path": "data/two_evidence_line_result_summary.json",
|
| 539 |
-
"bytes":
|
| 540 |
"top_level_type": "dict"
|
| 541 |
},
|
| 542 |
{
|
| 543 |
"path": "data/two_evidence_lines.json",
|
| 544 |
-
"bytes":
|
| 545 |
"top_level_type": "dict"
|
| 546 |
},
|
| 547 |
{
|
| 548 |
"path": "data/unified_task_model_radar.json",
|
| 549 |
-
"bytes":
|
| 550 |
"top_level_type": "dict"
|
| 551 |
},
|
| 552 |
{
|
| 553 |
"path": "data/website_integrity.json",
|
| 554 |
-
"bytes":
|
| 555 |
"top_level_type": "dict"
|
| 556 |
},
|
| 557 |
{
|
|
@@ -591,7 +591,7 @@
|
|
| 591 |
{
|
| 592 |
"path": "assets/charts/episode128_task_model_radar.svg",
|
| 593 |
"exists": true,
|
| 594 |
-
"bytes":
|
| 595 |
"format": "SVG",
|
| 596 |
"has_viewbox": true
|
| 597 |
},
|
|
@@ -633,7 +633,7 @@
|
|
| 633 |
{
|
| 634 |
"path": "assets/charts/research_direction_coverage.svg",
|
| 635 |
"exists": true,
|
| 636 |
-
"bytes":
|
| 637 |
"format": "SVG",
|
| 638 |
"has_viewbox": true
|
| 639 |
},
|
|
@@ -647,7 +647,7 @@
|
|
| 647 |
{
|
| 648 |
"path": "assets/charts/single_episode_task_model_radar.svg",
|
| 649 |
"exists": true,
|
| 650 |
-
"bytes":
|
| 651 |
"format": "SVG",
|
| 652 |
"has_viewbox": true
|
| 653 |
},
|
|
@@ -779,7 +779,7 @@
|
|
| 779 |
{
|
| 780 |
"path": "assets/task_suite_infographic.png",
|
| 781 |
"exists": true,
|
| 782 |
-
"bytes":
|
| 783 |
"width": 1800,
|
| 784 |
"height": 7600,
|
| 785 |
"format": "PNG"
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-21T11:07:26+00:00",
|
| 4 |
"docs_root": "docs",
|
| 5 |
"site_base": "/ropedia-xperience-10m-task-suite/",
|
| 6 |
"summary": {
|
|
|
|
| 80 |
"name": "project_overview_precedes_progress_ledger",
|
| 81 |
"status": "pass",
|
| 82 |
"reason": "The project overview should appear before the deeper progress ledger.",
|
| 83 |
+
"overview_index": 118524,
|
| 84 |
+
"evidence_index": 163802
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"name": "project_status_links_json",
|
|
|
|
| 159 |
"name": "evaluation_protocol_between_overview_and_progress",
|
| 160 |
"status": "pass",
|
| 161 |
"reason": "The evaluation protocol should appear before the deeper evidence ledger.",
|
| 162 |
+
"overview_index": 118524,
|
| 163 |
+
"protocol_index": 159990,
|
| 164 |
+
"evidence_index": 163802
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"name": "evaluation_protocol_links_json",
|
|
|
|
| 180 |
"status": "pass",
|
| 181 |
"reason": "The Suite anchor should show the task-suite map before the modality atlas.",
|
| 182 |
"first_marker_index": 471,
|
| 183 |
+
"second_marker_index": 3792
|
| 184 |
},
|
| 185 |
{
|
| 186 |
"name": "suite_modality_atlas_contains_seven_cards",
|
|
|
|
| 296 |
"json_files": [
|
| 297 |
{
|
| 298 |
"path": "data/additional_development_directions.json",
|
| 299 |
+
"bytes": 6120,
|
| 300 |
"top_level_type": "dict"
|
| 301 |
},
|
| 302 |
{
|
| 303 |
"path": "data/artifact_index.json",
|
| 304 |
+
"bytes": 123419,
|
| 305 |
"top_level_type": "dict"
|
| 306 |
},
|
| 307 |
{
|
|
|
|
| 316 |
},
|
| 317 |
{
|
| 318 |
"path": "data/episode128_task_model_radar.json",
|
| 319 |
+
"bytes": 184992,
|
| 320 |
"top_level_type": "dict"
|
| 321 |
},
|
| 322 |
{
|
|
|
|
| 331 |
},
|
| 332 |
{
|
| 333 |
"path": "data/figure_index.json",
|
| 334 |
+
"bytes": 19472,
|
| 335 |
"top_level_type": "dict"
|
| 336 |
},
|
| 337 |
{
|
| 338 |
"path": "data/foundation_model_plan.json",
|
| 339 |
+
"bytes": 13925,
|
| 340 |
"top_level_type": "dict"
|
| 341 |
},
|
| 342 |
{
|
|
|
|
| 346 |
},
|
| 347 |
{
|
| 348 |
"path": "data/live_publication_status.json",
|
| 349 |
+
"bytes": 184689,
|
| 350 |
"top_level_type": "dict"
|
| 351 |
},
|
| 352 |
{
|
| 353 |
"path": "data/mirror_parity.json",
|
| 354 |
+
"bytes": 1418076,
|
| 355 |
"top_level_type": "dict"
|
| 356 |
},
|
| 357 |
{
|
|
|
|
| 361 |
},
|
| 362 |
{
|
| 363 |
"path": "data/omni_finetune_verified_result.json",
|
| 364 |
+
"bytes": 4327,
|
| 365 |
"top_level_type": "dict"
|
| 366 |
},
|
| 367 |
{
|
| 368 |
"path": "data/omni_model_comparison.json",
|
| 369 |
+
"bytes": 82088,
|
| 370 |
"top_level_type": "dict"
|
| 371 |
},
|
| 372 |
{
|
|
|
|
| 386 |
},
|
| 387 |
{
|
| 388 |
"path": "data/project_status.json",
|
| 389 |
+
"bytes": 23054,
|
| 390 |
"top_level_type": "dict"
|
| 391 |
},
|
| 392 |
{
|
| 393 |
"path": "data/public_reader_map.json",
|
| 394 |
+
"bytes": 5990,
|
| 395 |
"top_level_type": "dict"
|
| 396 |
},
|
| 397 |
{
|
|
|
|
| 416 |
},
|
| 417 |
{
|
| 418 |
"path": "data/qwen3_omni_run_lineage.json",
|
| 419 |
+
"bytes": 11468,
|
| 420 |
"top_level_type": "dict"
|
| 421 |
},
|
| 422 |
{
|
|
|
|
| 451 |
},
|
| 452 |
{
|
| 453 |
"path": "data/research_roadmap.json",
|
| 454 |
+
"bytes": 14129,
|
| 455 |
"top_level_type": "dict"
|
| 456 |
},
|
| 457 |
{
|
| 458 |
"path": "data/research_roadmap_interactive.json",
|
| 459 |
+
"bytes": 186755,
|
| 460 |
"top_level_type": "dict"
|
| 461 |
},
|
| 462 |
{
|
|
|
|
| 476 |
},
|
| 477 |
{
|
| 478 |
"path": "data/single_episode_task_model_radar.json",
|
| 479 |
+
"bytes": 51107,
|
| 480 |
"top_level_type": "dict"
|
| 481 |
},
|
| 482 |
{
|
|
|
|
| 511 |
},
|
| 512 |
{
|
| 513 |
"path": "data/task_suite_enhancement_128.json",
|
| 514 |
+
"bytes": 20196,
|
| 515 |
"top_level_type": "dict"
|
| 516 |
},
|
| 517 |
{
|
|
|
|
| 536 |
},
|
| 537 |
{
|
| 538 |
"path": "data/two_evidence_line_result_summary.json",
|
| 539 |
+
"bytes": 17414,
|
| 540 |
"top_level_type": "dict"
|
| 541 |
},
|
| 542 |
{
|
| 543 |
"path": "data/two_evidence_lines.json",
|
| 544 |
+
"bytes": 7349,
|
| 545 |
"top_level_type": "dict"
|
| 546 |
},
|
| 547 |
{
|
| 548 |
"path": "data/unified_task_model_radar.json",
|
| 549 |
+
"bytes": 228815,
|
| 550 |
"top_level_type": "dict"
|
| 551 |
},
|
| 552 |
{
|
| 553 |
"path": "data/website_integrity.json",
|
| 554 |
+
"bytes": 20658,
|
| 555 |
"top_level_type": "dict"
|
| 556 |
},
|
| 557 |
{
|
|
|
|
| 591 |
{
|
| 592 |
"path": "assets/charts/episode128_task_model_radar.svg",
|
| 593 |
"exists": true,
|
| 594 |
+
"bytes": 51915,
|
| 595 |
"format": "SVG",
|
| 596 |
"has_viewbox": true
|
| 597 |
},
|
|
|
|
| 633 |
{
|
| 634 |
"path": "assets/charts/research_direction_coverage.svg",
|
| 635 |
"exists": true,
|
| 636 |
+
"bytes": 5352,
|
| 637 |
"format": "SVG",
|
| 638 |
"has_viewbox": true
|
| 639 |
},
|
|
|
|
| 647 |
{
|
| 648 |
"path": "assets/charts/single_episode_task_model_radar.svg",
|
| 649 |
"exists": true,
|
| 650 |
+
"bytes": 35232,
|
| 651 |
"format": "SVG",
|
| 652 |
"has_viewbox": true
|
| 653 |
},
|
|
|
|
| 779 |
{
|
| 780 |
"path": "assets/task_suite_infographic.png",
|
| 781 |
"exists": true,
|
| 782 |
+
"bytes": 1903454,
|
| 783 |
"width": 1800,
|
| 784 |
"height": 7600,
|
| 785 |
"format": "PNG"
|
docs/index.html
CHANGED
|
@@ -1147,6 +1147,16 @@
|
|
| 1147 |
font-weight: 760;
|
| 1148 |
width: 18%;
|
| 1149 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1150 |
.line-table a {
|
| 1151 |
color: var(--cyan);
|
| 1152 |
font-weight: 760;
|
|
@@ -3913,9 +3923,9 @@
|
|
| 3913 |
<article class="suite-line-card">
|
| 3914 |
<small>line 2 / 128 selected episodes</small>
|
| 3915 |
<h3>128 selected episodes: comparison layer</h3>
|
| 3916 |
-
<p>Seven
|
| 3917 |
<div class="line-claim">
|
| 3918 |
-
<div><span>valid claim</span><p>Same-split
|
| 3919 |
<div><span>do not claim</span><p>Proxy cells as direct raw-target measurements.</p></div>
|
| 3920 |
</div>
|
| 3921 |
<div class="suite-line-facts">
|
|
@@ -3944,7 +3954,7 @@
|
|
| 3944 |
</a>
|
| 3945 |
<a class="hero-path" href="#directions">
|
| 3946 |
<small>Extend</small>
|
| 3947 |
-
<strong>
|
| 3948 |
<span>Spatial intelligence, human-video world models, VLA, and scale-up plans.</span>
|
| 3949 |
</a>
|
| 3950 |
</div>
|
|
@@ -4044,7 +4054,7 @@
|
|
| 4044 |
<div class="wrap">
|
| 4045 |
<div class="section-head">
|
| 4046 |
<h2>Two evidence lines: 1 episode and 128 episodes.</h2>
|
| 4047 |
-
<p>Read the suite as two lines. Line 1 proves the task lab is inspectable and reproducible. Line 2 compares selected-128 baselines and
|
| 4048 |
</div>
|
| 4049 |
<figure class="line-map-figure">
|
| 4050 |
<img src="assets/charts/two_evidence_line_map.svg?v=two-line-map-v1" alt="Two evidence-line map showing 1 sample episode, 128 selected episodes, and the combined 180 scored method-task records">
|
|
@@ -4121,69 +4131,62 @@
|
|
| 4121 |
</tbody>
|
| 4122 |
</table>
|
| 4123 |
<p class="table-note">Cosmos3-Super Forward-Dynamics LoRA is published as a separate fine-tuned adapter with weights/results; it is not counted as a 20-task matrix method row.</p>
|
| 4124 |
-
<table class="line-table" aria-label="Qwen3-Omni run version ladder">
|
| 4125 |
<thead>
|
| 4126 |
<tr>
|
| 4127 |
<th>Qwen run</th>
|
| 4128 |
-
<th>
|
| 4129 |
-
<th>
|
| 4130 |
-
<th>
|
| 4131 |
-
<th>
|
| 4132 |
-
<th>Public role</th>
|
| 4133 |
</tr>
|
| 4134 |
</thead>
|
| 4135 |
<tbody>
|
| 4136 |
<tr>
|
| 4137 |
<td>v1</td>
|
| 4138 |
-
<td>
|
| 4139 |
-
<td>
|
| 4140 |
-
<td>0.8750</td>
|
| 4141 |
-
<td>
|
| 4142 |
-
<td>Superseded lineage evidence.</td>
|
| 4143 |
</tr>
|
| 4144 |
<tr>
|
| 4145 |
<td>v2</td>
|
| 4146 |
-
<td>
|
| 4147 |
-
<td>
|
| 4148 |
-
<td>0.9978</td>
|
| 4149 |
-
<td>
|
| 4150 |
-
<td>Superseded lineage evidence.</td>
|
| 4151 |
</tr>
|
| 4152 |
<tr>
|
| 4153 |
<td>v3</td>
|
| 4154 |
-
<td>
|
| 4155 |
-
<td>
|
| 4156 |
-
<td>1.0000</td>
|
| 4157 |
-
<td>
|
| 4158 |
-
<td>Prompt/eval lineage evidence.</td>
|
| 4159 |
</tr>
|
| 4160 |
<tr>
|
| 4161 |
<td>v4</td>
|
| 4162 |
-
<td>
|
| 4163 |
-
<td>
|
| 4164 |
-
<td>1.0000</td>
|
| 4165 |
-
<td>
|
| 4166 |
-
<td>Superseded metric-tradeoff run.</td>
|
| 4167 |
</tr>
|
| 4168 |
<tr>
|
| 4169 |
<td>v5</td>
|
| 4170 |
-
<td>
|
| 4171 |
-
<td>4,032</td>
|
| 4172 |
-
<td>1.0000</td>
|
| 4173 |
-
<td>
|
| 4174 |
-
<td>Pinned prior release and comparison baseline.</td>
|
| 4175 |
</tr>
|
| 4176 |
<tr>
|
| 4177 |
<td>v6</td>
|
| 4178 |
-
<td>
|
| 4179 |
-
<td>
|
| 4180 |
-
<td>0.9990</td>
|
| 4181 |
-
<td>0.8177</td>
|
| 4182 |
<td>Current public 20-task Qwen3-Omni row.</td>
|
| 4183 |
</tr>
|
| 4184 |
</tbody>
|
| 4185 |
</table>
|
| 4186 |
-
<p class="table-note">Qwen v1-v6 are run-lineage labels
|
| 4187 |
<div class="reader-journey" aria-label="Recommended reader journeys">
|
| 4188 |
<article class="reader-step">
|
| 4189 |
<small>01 understand</small>
|
|
@@ -4205,7 +4208,7 @@
|
|
| 4205 |
</article>
|
| 4206 |
<article class="reader-step">
|
| 4207 |
<small>04 extend</small>
|
| 4208 |
-
<strong>Choose the next model
|
| 4209 |
<p>Use directions and scale-up resources for spatial, world-model, VLA, Qwen3-Omni, and Cosmos3 follow-up work.</p>
|
| 4210 |
<a href="#directions">Open directions</a>
|
| 4211 |
</article>
|
|
@@ -4251,7 +4254,7 @@
|
|
| 4251 |
<article class="brief-card">
|
| 4252 |
<small>results</small>
|
| 4253 |
<strong>Compare methods cleanly</strong>
|
| 4254 |
-
<p>Single-episode baselines, 128-episode aligned baselines, Qwen3-Omni v6 LoRA, and Cosmos3-Super/Nano
|
| 4255 |
<div class="reading-links">
|
| 4256 |
<a href="#takeaways">takeaways</a>
|
| 4257 |
<a href="data/unified_task_model_radar.json">radar data</a>
|
|
@@ -4285,7 +4288,7 @@
|
|
| 4285 |
<a href="https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts"><strong>HF artifacts</strong><span>Public-safe derived reports, metrics, website JSON, and result packages.</span></a>
|
| 4286 |
<a href="https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines"><strong>HF baselines</strong><span>Compact baseline weights, figures, metrics, and mirrored task artifacts.</span></a>
|
| 4287 |
<a href="https://huggingface.co/cy0307/ropedia-xperience-10m-weights-results"><strong>HF weights + results</strong><span>Consolidated baseline weights, adapters, result summaries, analysis, and manifest.</span></a>
|
| 4288 |
-
<a href="https://huggingface.co/collections/cy0307/ropedia-xperience-10m-task-suite"><strong>HF collection</strong><span>Grouped project surfaces, baseline repos,
|
| 4289 |
</div>
|
| 4290 |
<div class="brief-actions">
|
| 4291 |
<a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/PUBLIC_READER_MAP.md">Open full reader map</a>
|
|
@@ -4335,7 +4338,7 @@
|
|
| 4335 |
</article>
|
| 4336 |
<article class="brief-card">
|
| 4337 |
<strong>Scale-up readiness</strong>
|
| 4338 |
-
<p>Connects the same data contract to 128-episode baselines, a no-new-episode enhancement pack, Qwen3-Omni LoRA, Cosmos-style world modeling, policy
|
| 4339 |
</article>
|
| 4340 |
</div>
|
| 4341 |
<div class="brief-actions">
|
|
@@ -4359,8 +4362,8 @@
|
|
| 4359 |
</article>
|
| 4360 |
<article class="split-radar-card">
|
| 4361 |
<h3>128-Episode 20-Task Radar</h3>
|
| 4362 |
-
<p>Metadata, raw-feature, Qwen3-Omni, and Cosmos3
|
| 4363 |
-
<img src="assets/charts/episode128_task_model_radar.svg?v=xperience10m-split-radar-v1" alt="128-episode 20-task radar comparing raw-feature baselines, metadata baselines, Qwen3-Omni, and Cosmos3
|
| 4364 |
<div class="split-radar-links">
|
| 4365 |
<a href="assets/charts/episode128_task_model_radar.svg">Open SVG</a>
|
| 4366 |
<a href="data/episode128_task_model_radar.json">Open JSON</a>
|
|
@@ -4470,7 +4473,7 @@
|
|
| 4470 |
<div class="wrap">
|
| 4471 |
<div class="section-head">
|
| 4472 |
<h2>Research roadmap.</h2>
|
| 4473 |
-
<p>The project path moves from the current public-sample task lab to the latest verified Qwen3-Omni diagnostic
|
| 4474 |
</div>
|
| 4475 |
<div class="roadmap-grid" aria-label="Research roadmap stages">
|
| 4476 |
<article class="roadmap-card" data-status="implemented">
|
|
@@ -4492,7 +4495,7 @@
|
|
| 4492 |
</div>
|
| 4493 |
</article>
|
| 4494 |
<article class="roadmap-card" data-status="verified_latest_branch">
|
| 4495 |
-
<span class="roadmap-status">verified latest
|
| 4496 |
<h3>Qwen3-Omni LoRA Latest Diagnostic Branch</h3>
|
| 4497 |
<p>Train lightweight adapters on selected prepared episodes and evaluate on held-out episodes with committed predictions, metrics, and run reports.</p>
|
| 4498 |
<div class="roadmap-meta">
|
|
@@ -4585,7 +4588,7 @@
|
|
| 4585 |
<div class="wrap">
|
| 4586 |
<div class="section-head">
|
| 4587 |
<h2>Additional development directions.</h2>
|
| 4588 |
-
<p>Beyond the current task heads, Qwen3-Omni fine-tuning path, Cosmos/world-model
|
| 4589 |
</div>
|
| 4590 |
<div class="foundation-pipeline-grid" aria-label="Three high-resolution foundation direction slide diagrams">
|
| 4591 |
<article class="foundation-pipeline-card">
|
|
@@ -4631,7 +4634,7 @@
|
|
| 4631 |
<article class="artifact"><h3>Multimodal representation learning</h3><p>Train contrastive and masked-prediction encoders over synchronized video, audio, depth, pose, mocap, IMU, and language windows.</p><a href="data/additional_development_directions.json">JSON plan</a></article>
|
| 4632 |
<article class="artifact"><h3>Skill and procedure graphs</h3><p>Mine action steps, transitions, preconditions, effects, and temporal graphs that connect egocentric perception to planning.</p><a href="data/research_directions.json">current task map</a></article>
|
| 4633 |
<article class="artifact"><h3>Human-object affordances</h3><p>Add contact, reachable-object, tool-use, and next-affordance tasks using hands, mocap, objects, contacts, video, and language.</p><a href="data/task_walkthroughs.json">task walkthroughs</a></article>
|
| 4634 |
-
<article class="artifact"><h3>3D/4D scene and object memory</h3><p>Fuse depth, pose/SLAM, multiview video, and object cues into persistent scene/object maps for spatial reasoning and object permanence.</p><a href="data/foundation_model_plan.json">model
|
| 4635 |
<article class="artifact"><h3>Quality and sync diagnostics</h3><p>Track timestamp drift, missing streams, calibration consistency, corrupted files, and degraded-mode manifests before large training runs.</p><a href="data/evidence_contract.json">evidence contract</a></article>
|
| 4636 |
<article class="artifact"><h3>Policy and simulation transfer</h3><p>Convert mocap, hand trajectories, contacts, and object states into action tokens, robot-compatible targets, and imitation-learning examples.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/FOUNDATION_MODEL_PLAN.md">foundation plan</a></article>
|
| 4637 |
</div>
|
|
@@ -4650,10 +4653,10 @@
|
|
| 4650 |
<article class="artifact"><h3>Metric contract</h3><p>All 20 tasks list input, target, primary metric, baseline score, and source artifact path in the unified suite file.</p><a href="data/task_suite_20.json">task_suite_20.json</a></article>
|
| 4651 |
<article class="artifact"><h3>Leakage controls</h3><p>Scalers fit on train windows only; future labels, target-side signals, caption/object labels, and contact labels stay on the target side unless explicitly queried.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/scripts/build_evaluation_protocol.py">builder script</a></article>
|
| 4652 |
<article class="artifact"><h3>Audio ablation</h3><p>Audio and no-audio variants are evaluated across the original task contracts under the same chronological split.</p><a href="data/audio_ablation_summary.json">audio summary</a></article>
|
| 4653 |
-
<article class="artifact"><h3>Foundation
|
| 4654 |
-
<article class="artifact"><h3>Next evaluation stage</h3><p>This public-sample run covers single-episode task development. The selected multi-episode Qwen3-Omni final diagnostic result is verified and meets the JSON-validity target; Cosmos3-Nano has a verified future-window compatibility package; and Cosmos3-Super has a verified base-weight JSON-task evaluation plus a fine-tuned forward-dynamics LoRA
|
| 4655 |
<article class="artifact"><h3>128-Episode Task Suite Enhancement Pack</h3><p>Before adding episodes, the suite should try `multiscale_20s10_40s20_80s40`, hierarchical action/subtask targets, label-normalized scoring, and compact raw-feature shards for unsupported tasks.</p><a href="data/task_suite_enhancement_128.json">task_suite_enhancement_128.json</a></article>
|
| 4656 |
-
<article class="artifact"><h3>Scale-up requirement</h3><p>Future Omni, Cosmos, and policy
|
| 4657 |
</div>
|
| 4658 |
</div>
|
| 4659 |
</section>
|
|
@@ -4705,7 +4708,7 @@
|
|
| 4705 |
<article class="evidence-card">
|
| 4706 |
<span class="status-pill">current plan</span>
|
| 4707 |
<h3>Foundation backbones are separated by role</h3>
|
| 4708 |
-
<p>Qwen3-Omni stays first for held-out LoRA; Cosmos 3 is the world-model
|
| 4709 |
<div class="evidence-links">
|
| 4710 |
<a href="data/foundation_model_plan.json">foundation model plan</a>
|
| 4711 |
<a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/FOUNDATION_MODEL_PLAN.md">plan doc</a>
|
|
@@ -4714,7 +4717,7 @@
|
|
| 4714 |
</article>
|
| 4715 |
<article class="evidence-card">
|
| 4716 |
<span class="status-pill">verified diagnostic</span>
|
| 4717 |
-
<h3>Qwen3-Omni and Cosmos3
|
| 4718 |
<p>The selected 96/16/16 episode split now has a verified Qwen3-Omni v6 package with 4,032 held-out test predictions and 99.90% JSON validity. Cosmos3-Nano has 378 held-out future-window predictions, Cosmos3-Super Reasoner has 448 held-out base-weight JSON-task predictions, and Cosmos3-Super Forward-Dynamics LoRA has 448 held-out loss records.</p>
|
| 4719 |
<div class="evidence-links">
|
| 4720 |
<a href="data/omni_model_comparison.json">result comparison</a>
|
|
@@ -4891,9 +4894,9 @@
|
|
| 4891 |
</div>
|
| 4892 |
<div class="artifact-grid">
|
| 4893 |
<article class="artifact primary-artifact"><div><h3>Official dataset</h3><p>Xperience-10M is a gated large-scale egocentric multimodal dataset for embodied AI, robotics, spatial intelligence, and world modeling.</p></div><a href="https://huggingface.co/datasets/ropedia-ai/xperience-10m">official HF dataset</a></article>
|
| 4894 |
-
<article class="artifact"><h3>
|
| 4895 |
<article class="artifact"><h3>Modalities</h3><p>The sample exposes synchronized video, audio, depth, pose/SLAM, motion capture, inertial signals, calibration, and language annotations.</p><a href="data/modality_atlas.json">modality atlas</a></article>
|
| 4896 |
-
<article class="artifact"><h3>Multi-episode pilot</h3><p>The selected 128-episode Qwen3-Omni LoRA v6 diagnostic
|
| 4897 |
<article class="artifact"><h3>Raw sample browser</h3><p>The Data tab now exposes the official public sample files directly, including playable MP4 video streams and the audio track embedded in fisheye_cam0.mp4.</p><a href="#raw-sample">open raw browser</a><a href="data/raw_sample_files.json">raw manifest</a></article>
|
| 4898 |
<article class="artifact"><h3>Data boundary</h3><p>Raw MP4, HDF5, RRD files are streamed from the official public sample source when opened here; private gated data and full Qwen weights are not redistributed in this project.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/DATA_NOTICE.md">data notice</a></article>
|
| 4899 |
<article class="artifact"><h3>Current project subset</h3><p>One public sample episode, 5,821 frames, 1,161 aligned windows, 8,546-dimensional task inputs, plus direct links to the official raw sample files.</p><a href="data/modality_atlas.json">modality atlas</a></article>
|
|
@@ -5020,7 +5023,7 @@
|
|
| 5020 |
<article class="split-radar-card">
|
| 5021 |
<h3>128-Episode 20-Task Radar</h3>
|
| 5022 |
<p>Seven aligned 128-episode methods cover all 20 axes: metadata simple/NN, raw-feature simple/NN, Qwen3-Omni, Cosmos3-Super, and Cosmos3-Nano. Proxy axes stay labeled in the JSON.</p>
|
| 5023 |
-
<img src="assets/charts/episode128_task_model_radar.svg?v=xperience10m-split-radar-v1" alt="128-episode 20-task radar comparing raw-feature baselines, metadata baselines, Qwen3-Omni, and Cosmos3
|
| 5024 |
<div class="split-radar-links">
|
| 5025 |
<a href="assets/charts/episode128_task_model_radar.svg">Open SVG</a>
|
| 5026 |
<a href="data/episode128_task_model_radar.json">Open JSON</a>
|
|
@@ -5124,7 +5127,7 @@
|
|
| 5124 |
<article class="result-reading-step">
|
| 5125 |
<span>02</span>
|
| 5126 |
<strong>Open the radar</strong>
|
| 5127 |
-
<p>Single-episode radar shows Minimal vs Neural MLP. 128 radar shows
|
| 5128 |
</article>
|
| 5129 |
<article class="result-reading-step">
|
| 5130 |
<span>03</span>
|
|
@@ -5676,7 +5679,7 @@
|
|
| 5676 |
<p>Use these files to navigate the whole project, open the published mirrors, or reproduce the public-sample pipeline.</p>
|
| 5677 |
</div>
|
| 5678 |
<div class="artifact-grid">
|
| 5679 |
-
<article class="artifact primary-artifact"><div><h3>Public reader map</h3><p>Single navigation layer for GitHub, GitHub Pages, HF Space, artifact dataset, baseline model repo,
|
| 5680 |
<article class="artifact primary-artifact"><div><h3>Artifact guide</h3><p>Human-readable map from project scope to data contract, task evidence, platform mirrors, and scale-up status.</p></div><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/ARTIFACT_GUIDE.md">artifact guide</a></article>
|
| 5681 |
<article class="artifact"><h3>Reproduction scripts</h3><p>Training, visualization, taxonomy, walkthrough, validator, and omni-readiness scripts.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/tree/main/scripts">scripts/</a></article>
|
| 5682 |
<article class="artifact"><h3>Hugging Face Space</h3><p>The dashboard packaged as a public static Space.</p><a href="https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite">HF Space</a></article>
|
|
@@ -5696,13 +5699,13 @@
|
|
| 5696 |
<p>The multi-episode Qwen3-Omni path is documented, scripted, and verified as a validation-monitored diagnostic held-out pilot. Stronger model-quality metrics require structured-output and error-analysis improvements.</p>
|
| 5697 |
</div>
|
| 5698 |
<div class="artifact-grid">
|
| 5699 |
-
<article class="artifact primary-artifact"><div><h3>
|
| 5700 |
<article class="artifact primary-artifact"><div><h3>128-episode source + features</h3><p>Maps every selected official Xperience-10M episode id to its gated source tree and the public-safe processed features: Qwen v6 multiscale windows, dense multiscale rows, and metadata matrices.</p></div><a href="data/xperience10m_128_episode_feature_index.json">source/feature index</a></article>
|
| 5701 |
<article class="artifact"><h3>128-Episode Task Suite Enhancement Pack</h3><p>No-new-episode plan for denser supervision: `multiscale_20s10_40s20_80s40`, hierarchical action/subtask labels, stronger scoring slices, and raw-feature shard priorities.</p><a href="data/task_suite_enhancement_128.json">task_suite_enhancement_128.json</a></article>
|
| 5702 |
<article class="artifact"><h3>Foundation-model plan</h3><p>Backbone selection matrix covering Qwen3-Omni, Cosmos 3, GR00T, OpenVLA/openpi, Gemini Robotics, Octo, SmolVLA-style policy candidates, and the future Xperience-native pretraining goal.</p><a href="data/foundation_model_plan.json">foundation model plan</a></article>
|
| 5703 |
<article class="artifact"><h3>Multi-episode data access</h3><p>Public data-access path, selected 128-episode pilot plan, and preparation requirements.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md">data access</a></article>
|
| 5704 |
<article class="artifact"><h3>Qwen3-Omni LoRA group</h3><p>Separates the 1-episode sensor-adapter smoke test from Qwen run v1-v6. v6 is the current 20-task matrix row, while v5 remains the pinned prior release.</p><a href="data/qwen3_omni_run_lineage.json">Qwen v1-v6 lineage</a><a href="data/omni_model_comparison.json">Qwen group</a></article>
|
| 5705 |
-
<article class="artifact"><h3>Cosmos3 groups</h3><p>Shows the verified Nano future-window compatibility package, the Super base-weight Reasoner JSON-task evaluation, and the Super fine-tuned forward-dynamics LoRA
|
| 5706 |
<article class="artifact"><h3>Scale-up requirement</h3><p>Future runs need validation tracking, held-out predictions, quality-target reporting, and the same public-safe package gate.</p><a href="data/foundation_model_plan.json">training requirements</a></article>
|
| 5707 |
<article class="artifact"><h3>Xperience-native pretraining</h3><p>Future plan for a domain-specific embodied foundation model trained from scratch over full-corpus video, audio, geometry, motion, inertial, and language streams.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md">pretraining plan</a></article>
|
| 5708 |
</div>
|
|
@@ -5721,7 +5724,7 @@
|
|
| 5721 |
<article class="artifact"><h3>Dataset notes</h3><p>Official dataset links, public sample source, modalities, access boundary, and current project subset.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/XPERIENCE10M_DATASET_CARD_ALIGNMENT.md">dataset notes</a></article>
|
| 5722 |
<article class="artifact"><h3>Reproducibility</h3><p>Commands and expected outputs for rebuilding the public-sample task suite and visual artifacts.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/REPRODUCIBILITY.md">reproduce</a></article>
|
| 5723 |
<article class="artifact"><h3>Qwen3-Omni status</h3><p>Data requirements and evaluation boundary for the selected multi-episode LoRA pilot.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/results/omni_finetune/DATA_ACCESS_STATUS.md">training status</a></article>
|
| 5724 |
-
<article class="artifact"><h3>Foundation-model plan</h3><p>Qwen3-Omni, Cosmos 3, GR00T, OpenVLA/openpi, Gemini Robotics, Octo, SmolVLA-style
|
| 5725 |
<article class="artifact"><h3>Hub artifacts</h3><p>Derived CSV/JSON/Markdown/figure artifacts without redistributing raw Xperience-10M data.</p><a href="https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts">artifact dataset</a></article>
|
| 5726 |
<article class="artifact"><h3>Baseline models</h3><p>Lightweight minimal and neural task-head model files for the task contracts.</p><a href="https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines">model repo</a></article>
|
| 5727 |
</div>
|
|
@@ -5733,7 +5736,7 @@
|
|
| 5733 |
<section id="omni-scale-up" data-project-tab="resources" role="tabpanel" aria-labelledby="tab-resources" tabindex="-1">
|
| 5734 |
<div class="wrap">
|
| 5735 |
<div class="section-head">
|
| 5736 |
-
<h2>Qwen3-Omni diagnostic
|
| 5737 |
<p>The selected pilot uses 128 source-balanced episodes across 128 different session UUIDs. The latest v6 held-out package is verified, and its weak metrics define the next structured-output and error-analysis pass.</p>
|
| 5738 |
</div>
|
| 5739 |
<div class="artifact-grid">
|
|
@@ -5741,7 +5744,7 @@
|
|
| 5741 |
<article class="artifact"><h3>Transfer</h3><p>Download raw episodes only from official gated sources, exclude visualization.rrd, validate files, then stage them for training.</p></article>
|
| 5742 |
<article class="artifact"><h3>Current LoRA artifact</h3><p>The current Qwen3-Omni LoRA artifact is the verified v6 selected 128-episode diagnostic adapter. The v5 row remains pinned as the prior release, and the 1-episode Qwen entry is only a sensor-adapter smoke test.</p><a href="data/omni_model_comparison.json">model groups</a></article>
|
| 5743 |
<article class="artifact"><h3>128-Episode Task Suite Enhancement Pack</h3><p>The next suite push does not need more episodes first: use `multiscale_20s10_40s20_80s40`, hierarchical action/subtask targets, and raw-feature shards while keeping the held-out split fixed.</p><a href="data/task_suite_enhancement_128.json">task_suite_enhancement_128.json</a></article>
|
| 5744 |
-
<article class="artifact"><h3>Backbone
|
| 5745 |
<article class="artifact"><h3>Native foundation model</h3><p>The long-term goal is a full-corpus Xperience Embodied Foundation Model trained on synchronized perception, geometry, motion, inertial, audio, and language streams after smaller scaling stages validate the approach.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md">pretraining plan</a></article>
|
| 5746 |
</div>
|
| 5747 |
</div>
|
|
@@ -5758,7 +5761,7 @@
|
|
| 5758 |
<article class="artifact"><h3>Reproducibility matrix</h3><p>Machine-readable command matrix covering sample download, baselines, the unified 20-task suite, figures, and validation.</p><a href="data/reproducibility_matrix.json">reproducibility matrix</a></article>
|
| 5759 |
<article class="artifact"><h3>Exact-match reproduction record</h3><p>The last metric rebuild reproduced the public-sample outputs from a fresh cache and matched the committed metrics.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/notes/reproducibility_audit.md">reproduction audit</a></article>
|
| 5760 |
<article class="artifact"><h3>Project dashboard</h3><p>The website organizes the dataset sample, tasks, methods, results, directions, and scale-up path in one tabbed reader flow.</p><a href="#artifacts">project materials</a></article>
|
| 5761 |
-
<article class="artifact"><h3>
|
| 5762 |
</div>
|
| 5763 |
<p class="repro-note">Minimal path: install the toolkit dependencies, download the official sample, run the task suite with neural heads, regenerate tasks 13-20, build the unified 20-task index, regenerate visualizations, then rebuild the supporting project reports.</p>
|
| 5764 |
<pre class="code-panel"><button type="button" data-copy="setup">Copy</button><code id="setup">git clone https://github.com/Ropedia/HOMIE-toolkit.git
|
|
@@ -5796,7 +5799,7 @@ python scripts/validate_publication_package.py</code></pre>
|
|
| 5796 |
|
| 5797 |
<footer>
|
| 5798 |
<div class="wrap">
|
| 5799 |
-
Built as an embodied-AI learning lab with verified held-out diagnostic
|
| 5800 |
<span class="footer-meta">README translation metadata remains available at <a href="data/language_versions.json">language_versions.json</a>; use the header selector to translate this website in place.</span>
|
| 5801 |
</div>
|
| 5802 |
</footer>
|
|
@@ -6087,7 +6090,7 @@ python scripts/validate_publication_package.py</code></pre>
|
|
| 6087 |
tasks: "Best for task-by-task input, output, and metric cards.",
|
| 6088 |
pipeline: "Best for understanding how raw episode data becomes features and results.",
|
| 6089 |
protocol: "Best for splits, leakage controls, metrics, and evaluation rules.",
|
| 6090 |
-
architectures: "Best for how task heads and model
|
| 6091 |
features: "Best for modality and feature provenance.",
|
| 6092 |
takeaways: "Best for the fastest read on what the current metrics mean.",
|
| 6093 |
models: "Best for minimal baseline evidence.",
|
|
@@ -6097,7 +6100,7 @@ python scripts/validate_publication_package.py</code></pre>
|
|
| 6097 |
diagnostics: "Best for charts and error-analysis evidence.",
|
| 6098 |
artifacts: "Best for finding files, mirrors, weights, scripts, and checks.",
|
| 6099 |
evidence: "Best for current experiment status and milestones.",
|
| 6100 |
-
"omni-scale-up": "Best for Qwen3-Omni and Cosmos3
|
| 6101 |
run: "Best for reproduction commands."
|
| 6102 |
};
|
| 6103 |
const sectionTabMap = Object.fromEntries(tabSections.map((section) => [section.id, section.dataset.projectTab]));
|
|
|
|
| 1147 |
font-weight: 760;
|
| 1148 |
width: 18%;
|
| 1149 |
}
|
| 1150 |
+
.qwen-lineage-table td:first-child {
|
| 1151 |
+
width: 8%;
|
| 1152 |
+
min-width: 58px;
|
| 1153 |
+
}
|
| 1154 |
+
.qwen-lineage-table th:nth-child(2),
|
| 1155 |
+
.qwen-lineage-table td:nth-child(2),
|
| 1156 |
+
.qwen-lineage-table th:nth-child(3),
|
| 1157 |
+
.qwen-lineage-table td:nth-child(3) {
|
| 1158 |
+
width: 28%;
|
| 1159 |
+
}
|
| 1160 |
.line-table a {
|
| 1161 |
color: var(--cyan);
|
| 1162 |
font-weight: 760;
|
|
|
|
| 3923 |
<article class="suite-line-card">
|
| 3924 |
<small>line 2 / 128 selected episodes</small>
|
| 3925 |
<h3>128 selected episodes: comparison layer</h3>
|
| 3926 |
+
<p>Seven methods share the selected-episode surface and the same 20 task axes.</p>
|
| 3927 |
<div class="line-claim">
|
| 3928 |
+
<div><span>valid claim</span><p>Same-split method comparison and scale-up planning.</p></div>
|
| 3929 |
<div><span>do not claim</span><p>Proxy cells as direct raw-target measurements.</p></div>
|
| 3930 |
</div>
|
| 3931 |
<div class="suite-line-facts">
|
|
|
|
| 3954 |
</a>
|
| 3955 |
<a class="hero-path" href="#directions">
|
| 3956 |
<small>Extend</small>
|
| 3957 |
+
<strong>Plan next training tracks</strong>
|
| 3958 |
<span>Spatial intelligence, human-video world models, VLA, and scale-up plans.</span>
|
| 3959 |
</a>
|
| 3960 |
</div>
|
|
|
|
| 4054 |
<div class="wrap">
|
| 4055 |
<div class="section-head">
|
| 4056 |
<h2>Two evidence lines: 1 episode and 128 episodes.</h2>
|
| 4057 |
+
<p>Read the suite as two lines. Line 1 proves the task lab is inspectable and reproducible. Line 2 compares selected-128 metadata/raw baselines, Qwen3-Omni v6 LoRA, Cosmos3-Super Reasoner, and Cosmos3-Nano Future Window. Keep the lines separate when interpreting scores.</p>
|
| 4058 |
</div>
|
| 4059 |
<figure class="line-map-figure">
|
| 4060 |
<img src="assets/charts/two_evidence_line_map.svg?v=two-line-map-v1" alt="Two evidence-line map showing 1 sample episode, 128 selected episodes, and the combined 180 scored method-task records">
|
|
|
|
| 4131 |
</tbody>
|
| 4132 |
</table>
|
| 4133 |
<p class="table-note">Cosmos3-Super Forward-Dynamics LoRA is published as a separate fine-tuned adapter with weights/results; it is not counted as a 20-task matrix method row.</p>
|
| 4134 |
+
<table class="line-table qwen-lineage-table" aria-label="Qwen3-Omni run version ladder">
|
| 4135 |
<thead>
|
| 4136 |
<tr>
|
| 4137 |
<th>Qwen run</th>
|
| 4138 |
+
<th>Purpose</th>
|
| 4139 |
+
<th>Main change</th>
|
| 4140 |
+
<th>Eval signal</th>
|
| 4141 |
+
<th>Use now</th>
|
|
|
|
| 4142 |
</tr>
|
| 4143 |
</thead>
|
| 4144 |
<tbody>
|
| 4145 |
<tr>
|
| 4146 |
<td>v1</td>
|
| 4147 |
+
<td>Prove the selected-128 LoRA/eval/package loop.</td>
|
| 4148 |
+
<td>First verified 96/16/16 selected-episode Qwen3-Omni LoRA run.</td>
|
| 4149 |
+
<td>448 eval; JSON 0.8750; contact 0.6451.</td>
|
| 4150 |
+
<td>Lineage only.</td>
|
|
|
|
| 4151 |
</tr>
|
| 4152 |
<tr>
|
| 4153 |
<td>v2</td>
|
| 4154 |
+
<td>Make answers schema-checked.</td>
|
| 4155 |
+
<td>Structured-JSON contract with full-8-GPU LoRA on the same split.</td>
|
| 4156 |
+
<td>448 eval; JSON 0.9978; contact 0.7188.</td>
|
| 4157 |
+
<td>Structured-output ablation.</td>
|
|
|
|
| 4158 |
</tr>
|
| 4159 |
<tr>
|
| 4160 |
<td>v3</td>
|
| 4161 |
+
<td>Separate prompt/eval effects from training.</td>
|
| 4162 |
+
<td>Strict-label prompt/eval over the v2 adapter; no new adapter training.</td>
|
| 4163 |
+
<td>448 eval; JSON 1.0000; contact 0.7210.</td>
|
| 4164 |
+
<td>Prompt/eval ablation.</td>
|
|
|
|
| 4165 |
</tr>
|
| 4166 |
<tr>
|
| 4167 |
<td>v4</td>
|
| 4168 |
+
<td>Test longer structured-JSON LoRA training.</td>
|
| 4169 |
+
<td>New four-epoch full-8-GPU adapter on the same selected split.</td>
|
| 4170 |
+
<td>448 eval; JSON 1.0000; contact 0.7299.</td>
|
| 4171 |
+
<td>Overfit/metric-tradeoff evidence.</td>
|
|
|
|
| 4172 |
</tr>
|
| 4173 |
<tr>
|
| 4174 |
<td>v5</td>
|
| 4175 |
+
<td>Move to denser multiscale evaluation.</td>
|
| 4176 |
+
<td>Multiscale cap96 export with 4,032 held-out predictions.</td>
|
| 4177 |
+
<td>4,032 eval; JSON 1.0000; contact 0.7865.</td>
|
| 4178 |
+
<td>Pinned prior release; stronger on several non-contact metrics.</td>
|
|
|
|
| 4179 |
</tr>
|
| 4180 |
<tr>
|
| 4181 |
<td>v6</td>
|
| 4182 |
+
<td>Publish the current Qwen 20-task row.</td>
|
| 4183 |
+
<td>Rank64/lr5e-5 multiscale LoRA plus verified task-specific probes.</td>
|
| 4184 |
+
<td>4,032 eval; JSON 0.9990; contact 0.8177.</td>
|
|
|
|
| 4185 |
<td>Current public 20-task Qwen3-Omni row.</td>
|
| 4186 |
</tr>
|
| 4187 |
</tbody>
|
| 4188 |
</table>
|
| 4189 |
+
<p class="table-note">Qwen v1-v6 are run-lineage labels inside the selected-128 evidence line, not project-level result lines. Use v6 for the public 20-task Qwen3-Omni row; keep v5 as the pinned prior multiscale comparator; read v1-v4 as pipeline-hardening and ablation evidence. Full details: <a href="data/qwen3_omni_run_lineage.json">qwen3_omni_run_lineage.json</a> and <a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/QWEN3_OMNI_RUN_LINEAGE.md">QWEN3_OMNI_RUN_LINEAGE.md</a>.</p>
|
| 4190 |
<div class="reader-journey" aria-label="Recommended reader journeys">
|
| 4191 |
<article class="reader-step">
|
| 4192 |
<small>01 understand</small>
|
|
|
|
| 4208 |
</article>
|
| 4209 |
<article class="reader-step">
|
| 4210 |
<small>04 extend</small>
|
| 4211 |
+
<strong>Choose the next model track</strong>
|
| 4212 |
<p>Use directions and scale-up resources for spatial, world-model, VLA, Qwen3-Omni, and Cosmos3 follow-up work.</p>
|
| 4213 |
<a href="#directions">Open directions</a>
|
| 4214 |
</article>
|
|
|
|
| 4254 |
<article class="brief-card">
|
| 4255 |
<small>results</small>
|
| 4256 |
<strong>Compare methods cleanly</strong>
|
| 4257 |
+
<p>Single-episode baselines, 128-episode aligned baselines, Qwen3-Omni v6 LoRA, and Cosmos3-Super/Nano diagnostics stay separated by evidence type.</p>
|
| 4258 |
<div class="reading-links">
|
| 4259 |
<a href="#takeaways">takeaways</a>
|
| 4260 |
<a href="data/unified_task_model_radar.json">radar data</a>
|
|
|
|
| 4288 |
<a href="https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts"><strong>HF artifacts</strong><span>Public-safe derived reports, metrics, website JSON, and result packages.</span></a>
|
| 4289 |
<a href="https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines"><strong>HF baselines</strong><span>Compact baseline weights, figures, metrics, and mirrored task artifacts.</span></a>
|
| 4290 |
<a href="https://huggingface.co/cy0307/ropedia-xperience-10m-weights-results"><strong>HF weights + results</strong><span>Consolidated baseline weights, adapters, result summaries, analysis, and manifest.</span></a>
|
| 4291 |
+
<a href="https://huggingface.co/collections/cy0307/ropedia-xperience-10m-task-suite"><strong>HF collection</strong><span>Grouped project surfaces, baseline repos, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano repos.</span></a>
|
| 4292 |
</div>
|
| 4293 |
<div class="brief-actions">
|
| 4294 |
<a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/PUBLIC_READER_MAP.md">Open full reader map</a>
|
|
|
|
| 4338 |
</article>
|
| 4339 |
<article class="brief-card">
|
| 4340 |
<strong>Scale-up readiness</strong>
|
| 4341 |
+
<p>Connects the same data contract to 128-episode baselines, a no-new-episode enhancement pack, Qwen3-Omni LoRA, Cosmos-style world modeling, policy/VLA tracks, and the later Xperience-native pretraining goal.</p>
|
| 4342 |
</article>
|
| 4343 |
</div>
|
| 4344 |
<div class="brief-actions">
|
|
|
|
| 4362 |
</article>
|
| 4363 |
<article class="split-radar-card">
|
| 4364 |
<h3>128-Episode 20-Task Radar</h3>
|
| 4365 |
+
<p>Metadata, raw-feature, Qwen3-Omni, and Cosmos3 methods on the aligned 128-episode surface, with all 140 rows scored and proxy/evidence notes kept explicit.</p>
|
| 4366 |
+
<img src="assets/charts/episode128_task_model_radar.svg?v=xperience10m-split-radar-v1" alt="128-episode 20-task radar comparing raw-feature baselines, metadata baselines, Qwen3-Omni, and Cosmos3 series with explicit score counts">
|
| 4367 |
<div class="split-radar-links">
|
| 4368 |
<a href="assets/charts/episode128_task_model_radar.svg">Open SVG</a>
|
| 4369 |
<a href="data/episode128_task_model_radar.json">Open JSON</a>
|
|
|
|
| 4473 |
<div class="wrap">
|
| 4474 |
<div class="section-head">
|
| 4475 |
<h2>Research roadmap.</h2>
|
| 4476 |
+
<p>The project path moves from the current public-sample task lab to the latest verified Qwen3-Omni diagnostic run, same-split 128-episode baseline alignment, a no-new-episode enhancement pack, action/subtask error analysis, robustness runs, world/policy tracks, and the future Xperience Embodied Foundation Model pretraining goal.</p>
|
| 4477 |
</div>
|
| 4478 |
<div class="roadmap-grid" aria-label="Research roadmap stages">
|
| 4479 |
<article class="roadmap-card" data-status="implemented">
|
|
|
|
| 4495 |
</div>
|
| 4496 |
</article>
|
| 4497 |
<article class="roadmap-card" data-status="verified_latest_branch">
|
| 4498 |
+
<span class="roadmap-status">verified latest run</span>
|
| 4499 |
<h3>Qwen3-Omni LoRA Latest Diagnostic Branch</h3>
|
| 4500 |
<p>Train lightweight adapters on selected prepared episodes and evaluate on held-out episodes with committed predictions, metrics, and run reports.</p>
|
| 4501 |
<div class="roadmap-meta">
|
|
|
|
| 4588 |
<div class="wrap">
|
| 4589 |
<div class="section-head">
|
| 4590 |
<h2>Additional development directions.</h2>
|
| 4591 |
+
<p>Beyond the current task heads, Qwen3-Omni fine-tuning path, Cosmos/world-model track, and future native pretraining goal, Xperience-10M can support three foundation pipeline tracks plus several concrete research-development tracks.</p>
|
| 4592 |
</div>
|
| 4593 |
<div class="foundation-pipeline-grid" aria-label="Three high-resolution foundation direction slide diagrams">
|
| 4594 |
<article class="foundation-pipeline-card">
|
|
|
|
| 4634 |
<article class="artifact"><h3>Multimodal representation learning</h3><p>Train contrastive and masked-prediction encoders over synchronized video, audio, depth, pose, mocap, IMU, and language windows.</p><a href="data/additional_development_directions.json">JSON plan</a></article>
|
| 4635 |
<article class="artifact"><h3>Skill and procedure graphs</h3><p>Mine action steps, transitions, preconditions, effects, and temporal graphs that connect egocentric perception to planning.</p><a href="data/research_directions.json">current task map</a></article>
|
| 4636 |
<article class="artifact"><h3>Human-object affordances</h3><p>Add contact, reachable-object, tool-use, and next-affordance tasks using hands, mocap, objects, contacts, video, and language.</p><a href="data/task_walkthroughs.json">task walkthroughs</a></article>
|
| 4637 |
+
<article class="artifact"><h3>3D/4D scene and object memory</h3><p>Fuse depth, pose/SLAM, multiview video, and object cues into persistent scene/object maps for spatial reasoning and object permanence.</p><a href="data/foundation_model_plan.json">model tracks</a></article>
|
| 4638 |
<article class="artifact"><h3>Quality and sync diagnostics</h3><p>Track timestamp drift, missing streams, calibration consistency, corrupted files, and degraded-mode manifests before large training runs.</p><a href="data/evidence_contract.json">evidence contract</a></article>
|
| 4639 |
<article class="artifact"><h3>Policy and simulation transfer</h3><p>Convert mocap, hand trajectories, contacts, and object states into action tokens, robot-compatible targets, and imitation-learning examples.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/FOUNDATION_MODEL_PLAN.md">foundation plan</a></article>
|
| 4640 |
</div>
|
|
|
|
| 4653 |
<article class="artifact"><h3>Metric contract</h3><p>All 20 tasks list input, target, primary metric, baseline score, and source artifact path in the unified suite file.</p><a href="data/task_suite_20.json">task_suite_20.json</a></article>
|
| 4654 |
<article class="artifact"><h3>Leakage controls</h3><p>Scalers fit on train windows only; future labels, target-side signals, caption/object labels, and contact labels stay on the target side unless explicitly queried.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/scripts/build_evaluation_protocol.py">builder script</a></article>
|
| 4655 |
<article class="artifact"><h3>Audio ablation</h3><p>Audio and no-audio variants are evaluated across the original task contracts under the same chronological split.</p><a href="data/audio_ablation_summary.json">audio summary</a></article>
|
| 4656 |
+
<article class="artifact"><h3>Foundation track selection</h3><p>Qwen3-Omni is the first trainable baseline, Cosmos 3 is the world-model track with a camera-pose proxy forward-dynamics contract ready for trainer work, policy models wait for robot-compatible action targets, and Xperience-native pretraining remains a later full-corpus goal.</p><a href="data/foundation_model_plan.json">backbone plan</a></article>
|
| 4657 |
+
<article class="artifact"><h3>Next evaluation stage</h3><p>This public-sample run covers single-episode task development. The selected multi-episode Qwen3-Omni final diagnostic result is verified and meets the JSON-validity target; Cosmos3-Nano has a verified future-window compatibility package; and Cosmos3-Super has a verified base-weight JSON-task evaluation plus a fine-tuned forward-dynamics LoRA artifact. The next stage is action/subtask error analysis, stronger model-quality runs, and policy-target conversion.</p><a href="data/omni_model_comparison.json">result comparison</a></article>
|
| 4658 |
<article class="artifact"><h3>128-Episode Task Suite Enhancement Pack</h3><p>Before adding episodes, the suite should try `multiscale_20s10_40s20_80s40`, hierarchical action/subtask targets, label-normalized scoring, and compact raw-feature shards for unsupported tasks.</p><a href="data/task_suite_enhancement_128.json">task_suite_enhancement_128.json</a></article>
|
| 4659 |
+
<article class="artifact"><h3>Scale-up requirement</h3><p>Future Omni, Cosmos, and policy tracks use the same episode split discipline, training metadata, held-out predictions, metrics, run report, and public-safe package gate.</p><a href="data/foundation_model_plan.json">scale-up status</a></article>
|
| 4660 |
</div>
|
| 4661 |
</div>
|
| 4662 |
</section>
|
|
|
|
| 4708 |
<article class="evidence-card">
|
| 4709 |
<span class="status-pill">current plan</span>
|
| 4710 |
<h3>Foundation backbones are separated by role</h3>
|
| 4711 |
+
<p>Qwen3-Omni stays first for held-out LoRA; Cosmos 3 is the world-model track with camera-pose proxy forward-dynamics targets ready for trainer work; OpenVLA/openpi/GR00T are policy candidates after robot-compatible action conversion; Xperience-native pretraining is the later full-corpus goal.</p>
|
| 4712 |
<div class="evidence-links">
|
| 4713 |
<a href="data/foundation_model_plan.json">foundation model plan</a>
|
| 4714 |
<a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/FOUNDATION_MODEL_PLAN.md">plan doc</a>
|
|
|
|
| 4717 |
</article>
|
| 4718 |
<article class="evidence-card">
|
| 4719 |
<span class="status-pill">verified diagnostic</span>
|
| 4720 |
+
<h3>Qwen3-Omni and Cosmos3 series</h3>
|
| 4721 |
<p>The selected 96/16/16 episode split now has a verified Qwen3-Omni v6 package with 4,032 held-out test predictions and 99.90% JSON validity. Cosmos3-Nano has 378 held-out future-window predictions, Cosmos3-Super Reasoner has 448 held-out base-weight JSON-task predictions, and Cosmos3-Super Forward-Dynamics LoRA has 448 held-out loss records.</p>
|
| 4722 |
<div class="evidence-links">
|
| 4723 |
<a href="data/omni_model_comparison.json">result comparison</a>
|
|
|
|
| 4894 |
</div>
|
| 4895 |
<div class="artifact-grid">
|
| 4896 |
<article class="artifact primary-artifact"><div><h3>Official dataset</h3><p>Xperience-10M is a gated large-scale egocentric multimodal dataset for embodied AI, robotics, spatial intelligence, and world modeling.</p></div><a href="https://huggingface.co/datasets/ropedia-ai/xperience-10m">official HF dataset</a></article>
|
| 4897 |
+
<article class="artifact"><h3>Line 1 public sample</h3><p>The one-episode line builds the inspectable 20-task lab. It is not evidence of multi-episode generalization.</p><a href="https://huggingface.co/datasets/ropedia-ai/xperience-10m-sample">sample dataset</a></article>
|
| 4898 |
<article class="artifact"><h3>Modalities</h3><p>The sample exposes synchronized video, audio, depth, pose/SLAM, motion capture, inertial signals, calibration, and language annotations.</p><a href="data/modality_atlas.json">modality atlas</a></article>
|
| 4899 |
+
<article class="artifact"><h3>Multi-episode pilot</h3><p>The selected 128-episode Qwen3-Omni LoRA v6 diagnostic run is verified with 4,032 held-out test predictions and 99.90% JSON validity. Action/subtask metrics are still weak, so this remains a baseline for error analysis.</p><a href="https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep">LoRA adapter</a><a href="data/qwen3_v5_v6_comparison.json">v5/v6 comparison</a></article>
|
| 4900 |
<article class="artifact"><h3>Raw sample browser</h3><p>The Data tab now exposes the official public sample files directly, including playable MP4 video streams and the audio track embedded in fisheye_cam0.mp4.</p><a href="#raw-sample">open raw browser</a><a href="data/raw_sample_files.json">raw manifest</a></article>
|
| 4901 |
<article class="artifact"><h3>Data boundary</h3><p>Raw MP4, HDF5, RRD files are streamed from the official public sample source when opened here; private gated data and full Qwen weights are not redistributed in this project.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/DATA_NOTICE.md">data notice</a></article>
|
| 4902 |
<article class="artifact"><h3>Current project subset</h3><p>One public sample episode, 5,821 frames, 1,161 aligned windows, 8,546-dimensional task inputs, plus direct links to the official raw sample files.</p><a href="data/modality_atlas.json">modality atlas</a></article>
|
|
|
|
| 5023 |
<article class="split-radar-card">
|
| 5024 |
<h3>128-Episode 20-Task Radar</h3>
|
| 5025 |
<p>Seven aligned 128-episode methods cover all 20 axes: metadata simple/NN, raw-feature simple/NN, Qwen3-Omni, Cosmos3-Super, and Cosmos3-Nano. Proxy axes stay labeled in the JSON.</p>
|
| 5026 |
+
<img src="assets/charts/episode128_task_model_radar.svg?v=xperience10m-split-radar-v1" alt="128-episode 20-task radar comparing raw-feature baselines, metadata baselines, Qwen3-Omni, and Cosmos3 series with explicit score counts">
|
| 5027 |
<div class="split-radar-links">
|
| 5028 |
<a href="assets/charts/episode128_task_model_radar.svg">Open SVG</a>
|
| 5029 |
<a href="data/episode128_task_model_radar.json">Open JSON</a>
|
|
|
|
| 5127 |
<article class="result-reading-step">
|
| 5128 |
<span>02</span>
|
| 5129 |
<strong>Open the radar</strong>
|
| 5130 |
+
<p>Single-episode radar shows Minimal vs Neural MLP. The 128-episode radar shows metadata/raw baselines, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano.</p>
|
| 5131 |
</article>
|
| 5132 |
<article class="result-reading-step">
|
| 5133 |
<span>03</span>
|
|
|
|
| 5679 |
<p>Use these files to navigate the whole project, open the published mirrors, or reproduce the public-sample pipeline.</p>
|
| 5680 |
</div>
|
| 5681 |
<div class="artifact-grid">
|
| 5682 |
+
<article class="artifact primary-artifact"><div><h3>Public reader map</h3><p>Single navigation layer for GitHub, GitHub Pages, HF Space, artifact dataset, baseline model repo, Qwen3-Omni/Cosmos3 repos, and public claim boundaries.</p></div><a href="data/public_reader_map.json">reader map</a></article>
|
| 5683 |
<article class="artifact primary-artifact"><div><h3>Artifact guide</h3><p>Human-readable map from project scope to data contract, task evidence, platform mirrors, and scale-up status.</p></div><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/ARTIFACT_GUIDE.md">artifact guide</a></article>
|
| 5684 |
<article class="artifact"><h3>Reproduction scripts</h3><p>Training, visualization, taxonomy, walkthrough, validator, and omni-readiness scripts.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/tree/main/scripts">scripts/</a></article>
|
| 5685 |
<article class="artifact"><h3>Hugging Face Space</h3><p>The dashboard packaged as a public static Space.</p><a href="https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite">HF Space</a></article>
|
|
|
|
| 5699 |
<p>The multi-episode Qwen3-Omni path is documented, scripted, and verified as a validation-monitored diagnostic held-out pilot. Stronger model-quality metrics require structured-output and error-analysis improvements.</p>
|
| 5700 |
</div>
|
| 5701 |
<div class="artifact-grid">
|
| 5702 |
+
<article class="artifact primary-artifact"><div><h3>Two-line model comparison</h3><p>Groups Line 1 task-head baselines and Line 2 selected-128 methods: metadata/raw baselines, Qwen3-Omni v6 LoRA, Cosmos3-Nano Future Window, and Cosmos3-Super Reasoner.</p></div><a href="data/omni_model_comparison.json">result comparison</a></article>
|
| 5703 |
<article class="artifact primary-artifact"><div><h3>128-episode source + features</h3><p>Maps every selected official Xperience-10M episode id to its gated source tree and the public-safe processed features: Qwen v6 multiscale windows, dense multiscale rows, and metadata matrices.</p></div><a href="data/xperience10m_128_episode_feature_index.json">source/feature index</a></article>
|
| 5704 |
<article class="artifact"><h3>128-Episode Task Suite Enhancement Pack</h3><p>No-new-episode plan for denser supervision: `multiscale_20s10_40s20_80s40`, hierarchical action/subtask labels, stronger scoring slices, and raw-feature shard priorities.</p><a href="data/task_suite_enhancement_128.json">task_suite_enhancement_128.json</a></article>
|
| 5705 |
<article class="artifact"><h3>Foundation-model plan</h3><p>Backbone selection matrix covering Qwen3-Omni, Cosmos 3, GR00T, OpenVLA/openpi, Gemini Robotics, Octo, SmolVLA-style policy candidates, and the future Xperience-native pretraining goal.</p><a href="data/foundation_model_plan.json">foundation model plan</a></article>
|
| 5706 |
<article class="artifact"><h3>Multi-episode data access</h3><p>Public data-access path, selected 128-episode pilot plan, and preparation requirements.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md">data access</a></article>
|
| 5707 |
<article class="artifact"><h3>Qwen3-Omni LoRA group</h3><p>Separates the 1-episode sensor-adapter smoke test from Qwen run v1-v6. v6 is the current 20-task matrix row, while v5 remains the pinned prior release.</p><a href="data/qwen3_omni_run_lineage.json">Qwen v1-v6 lineage</a><a href="data/omni_model_comparison.json">Qwen group</a></article>
|
| 5708 |
+
<article class="artifact"><h3>Cosmos3 groups</h3><p>Shows the verified Nano future-window compatibility package, the Super base-weight Reasoner JSON-task evaluation, and the Super fine-tuned forward-dynamics LoRA artifact with separate loss metrics.</p><a href="data/omni_model_comparison.json">Cosmos groups</a></article>
|
| 5709 |
<article class="artifact"><h3>Scale-up requirement</h3><p>Future runs need validation tracking, held-out predictions, quality-target reporting, and the same public-safe package gate.</p><a href="data/foundation_model_plan.json">training requirements</a></article>
|
| 5710 |
<article class="artifact"><h3>Xperience-native pretraining</h3><p>Future plan for a domain-specific embodied foundation model trained from scratch over full-corpus video, audio, geometry, motion, inertial, and language streams.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md">pretraining plan</a></article>
|
| 5711 |
</div>
|
|
|
|
| 5724 |
<article class="artifact"><h3>Dataset notes</h3><p>Official dataset links, public sample source, modalities, access boundary, and current project subset.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/XPERIENCE10M_DATASET_CARD_ALIGNMENT.md">dataset notes</a></article>
|
| 5725 |
<article class="artifact"><h3>Reproducibility</h3><p>Commands and expected outputs for rebuilding the public-sample task suite and visual artifacts.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/REPRODUCIBILITY.md">reproduce</a></article>
|
| 5726 |
<article class="artifact"><h3>Qwen3-Omni status</h3><p>Data requirements and evaluation boundary for the selected multi-episode LoRA pilot.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/results/omni_finetune/DATA_ACCESS_STATUS.md">training status</a></article>
|
| 5727 |
+
<article class="artifact"><h3>Foundation-model plan</h3><p>Qwen3-Omni, Cosmos 3, GR00T, OpenVLA/openpi, Gemini Robotics, Octo, SmolVLA-style tracks, and the Xperience-native pretraining goal by role.</p><a href="data/foundation_model_plan.json">model plan</a></article>
|
| 5728 |
<article class="artifact"><h3>Hub artifacts</h3><p>Derived CSV/JSON/Markdown/figure artifacts without redistributing raw Xperience-10M data.</p><a href="https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts">artifact dataset</a></article>
|
| 5729 |
<article class="artifact"><h3>Baseline models</h3><p>Lightweight minimal and neural task-head model files for the task contracts.</p><a href="https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines">model repo</a></article>
|
| 5730 |
</div>
|
|
|
|
| 5736 |
<section id="omni-scale-up" data-project-tab="resources" role="tabpanel" aria-labelledby="tab-resources" tabindex="-1">
|
| 5737 |
<div class="wrap">
|
| 5738 |
<div class="section-head">
|
| 5739 |
+
<h2>Qwen3-Omni diagnostic run is verified.</h2>
|
| 5740 |
<p>The selected pilot uses 128 source-balanced episodes across 128 different session UUIDs. The latest v6 held-out package is verified, and its weak metrics define the next structured-output and error-analysis pass.</p>
|
| 5741 |
</div>
|
| 5742 |
<div class="artifact-grid">
|
|
|
|
| 5744 |
<article class="artifact"><h3>Transfer</h3><p>Download raw episodes only from official gated sources, exclude visualization.rrd, validate files, then stage them for training.</p></article>
|
| 5745 |
<article class="artifact"><h3>Current LoRA artifact</h3><p>The current Qwen3-Omni LoRA artifact is the verified v6 selected 128-episode diagnostic adapter. The v5 row remains pinned as the prior release, and the 1-episode Qwen entry is only a sensor-adapter smoke test.</p><a href="data/omni_model_comparison.json">model groups</a></article>
|
| 5746 |
<article class="artifact"><h3>128-Episode Task Suite Enhancement Pack</h3><p>The next suite push does not need more episodes first: use `multiscale_20s10_40s20_80s40`, hierarchical action/subtask targets, and raw-feature shards while keeping the held-out split fixed.</p><a href="data/task_suite_enhancement_128.json">task_suite_enhancement_128.json</a></article>
|
| 5747 |
+
<article class="artifact"><h3>Backbone tracks</h3><p>Qwen3-Omni uses a separate LoRA model repo; Cosmos3-Nano remains a compatibility package; Cosmos3-Super now has a verified forward-dynamics LoRA artifact with weights in a dedicated model repo.</p><a href="https://huggingface.co/cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep">Cosmos3-Super weights</a></article>
|
| 5748 |
<article class="artifact"><h3>Native foundation model</h3><p>The long-term goal is a full-corpus Xperience Embodied Foundation Model trained on synchronized perception, geometry, motion, inertial, audio, and language streams after smaller scaling stages validate the approach.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md">pretraining plan</a></article>
|
| 5749 |
</div>
|
| 5750 |
</div>
|
|
|
|
| 5761 |
<article class="artifact"><h3>Reproducibility matrix</h3><p>Machine-readable command matrix covering sample download, baselines, the unified 20-task suite, figures, and validation.</p><a href="data/reproducibility_matrix.json">reproducibility matrix</a></article>
|
| 5762 |
<article class="artifact"><h3>Exact-match reproduction record</h3><p>The last metric rebuild reproduced the public-sample outputs from a fresh cache and matched the committed metrics.</p><a href="https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite/blob/main/notes/reproducibility_audit.md">reproduction audit</a></article>
|
| 5763 |
<article class="artifact"><h3>Project dashboard</h3><p>The website organizes the dataset sample, tasks, methods, results, directions, and scale-up path in one tabbed reader flow.</p><a href="#artifacts">project materials</a></article>
|
| 5764 |
+
<article class="artifact"><h3>Line 2 model status</h3><p>The comparison JSON groups selected-128 baselines, Qwen3-Omni v6 LoRA, Cosmos3-Nano Future Window, and Cosmos3-Super Reasoner. Qwen v5/v6 detail stays in a separate lineage audit.</p><a href="data/omni_model_comparison.json">comparison</a><a href="data/qwen3_v5_v6_comparison.json">Qwen v5/v6</a></article>
|
| 5765 |
</div>
|
| 5766 |
<p class="repro-note">Minimal path: install the toolkit dependencies, download the official sample, run the task suite with neural heads, regenerate tasks 13-20, build the unified 20-task index, regenerate visualizations, then rebuild the supporting project reports.</p>
|
| 5767 |
<pre class="code-panel"><button type="button" data-copy="setup">Copy</button><code id="setup">git clone https://github.com/Ropedia/HOMIE-toolkit.git
|
|
|
|
| 5799 |
|
| 5800 |
<footer>
|
| 5801 |
<div class="wrap">
|
| 5802 |
+
Built as an embodied-AI learning lab with verified held-out diagnostic runs and a next stage focused on stronger action/subtask quality.
|
| 5803 |
<span class="footer-meta">README translation metadata remains available at <a href="data/language_versions.json">language_versions.json</a>; use the header selector to translate this website in place.</span>
|
| 5804 |
</div>
|
| 5805 |
</footer>
|
|
|
|
| 6090 |
tasks: "Best for task-by-task input, output, and metric cards.",
|
| 6091 |
pipeline: "Best for understanding how raw episode data becomes features and results.",
|
| 6092 |
protocol: "Best for splits, leakage controls, metrics, and evaluation rules.",
|
| 6093 |
+
architectures: "Best for how task heads and model tracks are organized.",
|
| 6094 |
features: "Best for modality and feature provenance.",
|
| 6095 |
takeaways: "Best for the fastest read on what the current metrics mean.",
|
| 6096 |
models: "Best for minimal baseline evidence.",
|
|
|
|
| 6100 |
diagnostics: "Best for charts and error-analysis evidence.",
|
| 6101 |
artifacts: "Best for finding files, mirrors, weights, scripts, and checks.",
|
| 6102 |
evidence: "Best for current experiment status and milestones.",
|
| 6103 |
+
"omni-scale-up": "Best for Qwen3-Omni and Cosmos3 status.",
|
| 6104 |
run: "Best for reproduction commands."
|
| 6105 |
};
|
| 6106 |
const sectionTabMap = Object.fromEntries(tabSections.map((section) => [section.id, section.dataset.projectTab]));
|
metrics/additional_development_directions.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Additional Development Directions",
|
| 3 |
-
"summary": "Concrete Xperience-10M project directions beyond the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model
|
| 4 |
"status": "planned_research_directions",
|
| 5 |
"public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
|
| 6 |
"directions": [
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Additional Development Directions",
|
| 3 |
+
"summary": "Concrete Xperience-10M project directions beyond the current minimal baselines, Qwen3-Omni LoRA plan, Cosmos/world-model track, and long-term native pretraining goal.",
|
| 4 |
"status": "planned_research_directions",
|
| 5 |
"public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
|
| 6 |
"directions": [
|
metrics/artifact_index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Artifact Index",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"status": "pass",
|
| 5 |
"artifact_count": 226,
|
| 6 |
"missing": [],
|
|
@@ -81,8 +81,8 @@
|
|
| 81 |
"surface": "website_hf",
|
| 82 |
"shows": "Machine-readable copy of the current project status for website and HF mirrors.",
|
| 83 |
"exists": true,
|
| 84 |
-
"bytes":
|
| 85 |
-
"sha256": "
|
| 86 |
},
|
| 87 |
{
|
| 88 |
"id": "research_roadmap",
|
|
@@ -92,8 +92,8 @@
|
|
| 92 |
"surface": "repo_hf",
|
| 93 |
"shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
|
| 94 |
"exists": true,
|
| 95 |
-
"bytes":
|
| 96 |
-
"sha256": "
|
| 97 |
},
|
| 98 |
{
|
| 99 |
"id": "research_roadmap_json",
|
|
@@ -103,8 +103,8 @@
|
|
| 103 |
"surface": "website_hf",
|
| 104 |
"shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
|
| 105 |
"exists": true,
|
| 106 |
-
"bytes":
|
| 107 |
-
"sha256": "
|
| 108 |
},
|
| 109 |
{
|
| 110 |
"id": "foundation_model_plan",
|
|
@@ -114,8 +114,8 @@
|
|
| 114 |
"surface": "repo_hf",
|
| 115 |
"shows": "Defines the post-data-gate backbone choices: Qwen3-Omni first, Cosmos 3 for world modeling, and VLA/policy models after action-target conversion.",
|
| 116 |
"exists": true,
|
| 117 |
-
"bytes":
|
| 118 |
-
"sha256": "
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"id": "foundation_model_plan_json",
|
|
@@ -125,8 +125,8 @@
|
|
| 125 |
"surface": "website_hf",
|
| 126 |
"shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
|
| 127 |
"exists": true,
|
| 128 |
-
"bytes":
|
| 129 |
-
"sha256": "
|
| 130 |
},
|
| 131 |
{
|
| 132 |
"id": "three_foundation_pipelines",
|
|
@@ -222,7 +222,7 @@
|
|
| 222 |
"path": "OMNI_MODEL_EXTENSION_CONTRACT.md",
|
| 223 |
"kind": "scaleup_contract",
|
| 224 |
"surface": "repo_hf",
|
| 225 |
-
"shows": "Defines the shared manifest, episode split, held-out evaluation, packaging, and public-safety rules for Qwen3-Omni,
|
| 226 |
"exists": true,
|
| 227 |
"bytes": 8900,
|
| 228 |
"sha256": "c4e51d0aa7536045c229418603a67c6b3c5f31c9d756ca7395cb0c9455f0ed6d"
|
|
@@ -323,8 +323,8 @@
|
|
| 323 |
"surface": "website_hf",
|
| 324 |
"shows": "Machine-readable enhancement pack for the website and Hugging Face mirrors.",
|
| 325 |
"exists": true,
|
| 326 |
-
"bytes":
|
| 327 |
-
"sha256": "
|
| 328 |
},
|
| 329 |
{
|
| 330 |
"id": "task_suite_enhancement_128_result",
|
|
@@ -345,8 +345,8 @@
|
|
| 345 |
"surface": "repo_hf",
|
| 346 |
"shows": "Regenerates the enhancement pack from committed 128-episode windows, baseline summaries, verified Qwen predictions, and Cosmos reference metrics.",
|
| 347 |
"exists": true,
|
| 348 |
-
"bytes":
|
| 349 |
-
"sha256": "
|
| 350 |
},
|
| 351 |
{
|
| 352 |
"id": "xperience10m_128_episode_feature_index",
|
|
@@ -510,8 +510,8 @@
|
|
| 510 |
"surface": "repo_hf",
|
| 511 |
"shows": "Records concrete non-backbone Xperience-10M development tracks: taxonomy, benchmark protocol, representation learning, skill graphs, affordances, 3D/4D memory, QA, and policy transfer.",
|
| 512 |
"exists": true,
|
| 513 |
-
"bytes":
|
| 514 |
-
"sha256": "
|
| 515 |
},
|
| 516 |
{
|
| 517 |
"id": "additional_development_directions_json",
|
|
@@ -521,8 +521,8 @@
|
|
| 521 |
"surface": "website_hf",
|
| 522 |
"shows": "Machine-readable additional development directions for the website and Hugging Face mirrors.",
|
| 523 |
"exists": true,
|
| 524 |
-
"bytes":
|
| 525 |
-
"sha256": "
|
| 526 |
},
|
| 527 |
{
|
| 528 |
"id": "xperience_embodied_foundation_pretraining",
|
|
@@ -610,7 +610,7 @@
|
|
| 610 |
"shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
|
| 611 |
"exists": true,
|
| 612 |
"bytes": 4432,
|
| 613 |
-
"sha256": "
|
| 614 |
},
|
| 615 |
{
|
| 616 |
"id": "source_alignment_validator",
|
|
@@ -631,8 +631,8 @@
|
|
| 631 |
"surface": "repo_hf",
|
| 632 |
"shows": "Publishes prepared Space, artifact dataset, and model bundles, including an explicit model-binary upload batch.",
|
| 633 |
"exists": true,
|
| 634 |
-
"bytes":
|
| 635 |
-
"sha256": "
|
| 636 |
},
|
| 637 |
{
|
| 638 |
"id": "github_package_dockerfile",
|
|
@@ -728,10 +728,10 @@
|
|
| 728 |
"path": "docs/data/unified_task_model_radar.json",
|
| 729 |
"kind": "website_data",
|
| 730 |
"surface": "website_hf",
|
| 731 |
-
"shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3/
|
| 732 |
"exists": true,
|
| 733 |
-
"bytes":
|
| 734 |
-
"sha256": "
|
| 735 |
},
|
| 736 |
{
|
| 737 |
"id": "single_episode_task_model_radar_json",
|
|
@@ -741,8 +741,8 @@
|
|
| 741 |
"surface": "website_hf",
|
| 742 |
"shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
|
| 743 |
"exists": true,
|
| 744 |
-
"bytes":
|
| 745 |
-
"sha256": "
|
| 746 |
},
|
| 747 |
{
|
| 748 |
"id": "episode128_task_model_radar_json",
|
|
@@ -750,10 +750,10 @@
|
|
| 750 |
"path": "docs/data/episode128_task_model_radar.json",
|
| 751 |
"kind": "website_data",
|
| 752 |
"surface": "website_hf",
|
| 753 |
-
"shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines
|
| 754 |
"exists": true,
|
| 755 |
-
"bytes":
|
| 756 |
-
"sha256": "
|
| 757 |
},
|
| 758 |
{
|
| 759 |
"id": "task_method_20_result_matrix_json",
|
|
@@ -764,7 +764,7 @@
|
|
| 764 |
"shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and the current release is complete at 180/180 scored rows.",
|
| 765 |
"exists": true,
|
| 766 |
"bytes": 128509,
|
| 767 |
-
"sha256": "
|
| 768 |
},
|
| 769 |
{
|
| 770 |
"id": "task_method_20_result_matrix",
|
|
@@ -808,7 +808,7 @@
|
|
| 808 |
"shows": "Machine-readable check that scored JSON-backed matrix cells match their declared metric source values.",
|
| 809 |
"exists": true,
|
| 810 |
"bytes": 561,
|
| 811 |
-
"sha256": "
|
| 812 |
},
|
| 813 |
{
|
| 814 |
"id": "task_method_20_source_audit",
|
|
@@ -819,7 +819,7 @@
|
|
| 819 |
"shows": "Reader-facing source-value audit for the 180-result matrix.",
|
| 820 |
"exists": true,
|
| 821 |
"bytes": 447,
|
| 822 |
-
"sha256": "
|
| 823 |
},
|
| 824 |
{
|
| 825 |
"id": "two_evidence_line_map_chart",
|
|
@@ -838,7 +838,7 @@
|
|
| 838 |
"path": "docs/assets/charts/unified_task_model_radar.svg",
|
| 839 |
"kind": "generated_figure",
|
| 840 |
"surface": "website_hf",
|
| 841 |
-
"shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3
|
| 842 |
"exists": true,
|
| 843 |
"bytes": 57938,
|
| 844 |
"sha256": "bb83b80b47fe679ebdce2c99378a4548120f1c8cc2d725b88e409d8c386dcbf8"
|
|
@@ -851,8 +851,8 @@
|
|
| 851 |
"surface": "website_hf",
|
| 852 |
"shows": "Separates the one-episode Minimal and Neural MLP 20/20 scored baselines into a clean two-polygon radar.",
|
| 853 |
"exists": true,
|
| 854 |
-
"bytes":
|
| 855 |
-
"sha256": "
|
| 856 |
},
|
| 857 |
{
|
| 858 |
"id": "episode128_task_model_radar_chart",
|
|
@@ -860,10 +860,10 @@
|
|
| 860 |
"path": "docs/assets/charts/episode128_task_model_radar.svg",
|
| 861 |
"kind": "generated_figure",
|
| 862 |
"surface": "website_hf",
|
| 863 |
-
"shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons
|
| 864 |
"exists": true,
|
| 865 |
-
"bytes":
|
| 866 |
-
"sha256": "
|
| 867 |
},
|
| 868 |
{
|
| 869 |
"id": "unified_task_model_radar_builder",
|
|
@@ -873,8 +873,8 @@
|
|
| 873 |
"surface": "repo_hf",
|
| 874 |
"shows": "Regenerates the direction-aware radar chart and machine-readable metric overlay JSON.",
|
| 875 |
"exists": true,
|
| 876 |
-
"bytes":
|
| 877 |
-
"sha256": "
|
| 878 |
},
|
| 879 |
{
|
| 880 |
"id": "task_method_20_gap_audit_builder",
|
|
@@ -915,7 +915,7 @@
|
|
| 915 |
"path": "results/omni_finetune/model_output_probe_readiness/model_output_probe_readiness.json",
|
| 916 |
"kind": "scaleup_status",
|
| 917 |
"surface": "repo_hf",
|
| 918 |
-
"shows": "Checks whether Qwen3
|
| 919 |
"exists": true,
|
| 920 |
"bytes": 4320,
|
| 921 |
"sha256": "11cff26749bf6ad8b8ee028b18e0b4be5713ed8b5325578caa03be25d894263b"
|
|
@@ -928,8 +928,8 @@
|
|
| 928 |
"surface": "repo_hf",
|
| 929 |
"shows": "Audits model-output split availability and writes a readiness report without assigning new numeric task scores.",
|
| 930 |
"exists": true,
|
| 931 |
-
"bytes":
|
| 932 |
-
"sha256": "
|
| 933 |
},
|
| 934 |
{
|
| 935 |
"id": "existing_model_output_task_probe",
|
|
@@ -937,7 +937,7 @@
|
|
| 937 |
"path": "results/omni_finetune/model_output_task_probes_20260616/summary.json",
|
| 938 |
"kind": "model_result",
|
| 939 |
"surface": "repo_hf",
|
| 940 |
-
"shows": "Scores task-specific Qwen3
|
| 941 |
"exists": true,
|
| 942 |
"bytes": 5951,
|
| 943 |
"sha256": "910477d2fba648605dda128d0ecd2a2c13cfa460573e350dc850014ac91c6c2b"
|
|
@@ -950,8 +950,8 @@
|
|
| 950 |
"surface": "repo_hf",
|
| 951 |
"shows": "Derives task-specific scores from committed verified model outputs without running new inference or backfilling absent targets.",
|
| 952 |
"exists": true,
|
| 953 |
-
"bytes":
|
| 954 |
-
"sha256": "
|
| 955 |
},
|
| 956 |
{
|
| 957 |
"id": "a100_128_metadata_task_baselines",
|
|
@@ -1071,8 +1071,8 @@
|
|
| 1071 |
"surface": "repo_hf",
|
| 1072 |
"shows": "Catalogs public figures, charts, modality thumbnails, dimensions, hashes, roles, and source scripts.",
|
| 1073 |
"exists": true,
|
| 1074 |
-
"bytes":
|
| 1075 |
-
"sha256": "
|
| 1076 |
},
|
| 1077 |
{
|
| 1078 |
"id": "figure_index_json",
|
|
@@ -1082,8 +1082,8 @@
|
|
| 1082 |
"surface": "website_hf",
|
| 1083 |
"shows": "Machine-readable visual asset index for website and Hugging Face mirrors.",
|
| 1084 |
"exists": true,
|
| 1085 |
-
"bytes":
|
| 1086 |
-
"sha256": "
|
| 1087 |
},
|
| 1088 |
{
|
| 1089 |
"id": "figure_index_builder",
|
|
@@ -1093,8 +1093,8 @@
|
|
| 1093 |
"surface": "repo_hf",
|
| 1094 |
"shows": "Regenerates visual-asset hashes, dimensions, and source-script provenance.",
|
| 1095 |
"exists": true,
|
| 1096 |
-
"bytes":
|
| 1097 |
-
"sha256": "
|
| 1098 |
},
|
| 1099 |
{
|
| 1100 |
"id": "brand_assets_json",
|
|
@@ -1160,7 +1160,7 @@
|
|
| 1160 |
"shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
|
| 1161 |
"exists": true,
|
| 1162 |
"bytes": 8640,
|
| 1163 |
-
"sha256": "
|
| 1164 |
},
|
| 1165 |
{
|
| 1166 |
"id": "public_surface_qa",
|
|
@@ -1179,10 +1179,10 @@
|
|
| 1179 |
"path": "PUBLIC_READER_MAP.md",
|
| 1180 |
"kind": "project_path",
|
| 1181 |
"surface": "repo_hf",
|
| 1182 |
-
"shows": "Provides the first-pass navigation layer for GitHub, GitHub Pages, Hugging Face mirrors,
|
| 1183 |
"exists": true,
|
| 1184 |
-
"bytes":
|
| 1185 |
-
"sha256": "
|
| 1186 |
},
|
| 1187 |
{
|
| 1188 |
"id": "public_reader_map_json",
|
|
@@ -1192,8 +1192,8 @@
|
|
| 1192 |
"surface": "website_hf",
|
| 1193 |
"shows": "Machine-readable public reader map used by the website and Hugging Face mirrors to keep entry points and surface responsibilities explicit.",
|
| 1194 |
"exists": true,
|
| 1195 |
-
"bytes":
|
| 1196 |
-
"sha256": "
|
| 1197 |
},
|
| 1198 |
{
|
| 1199 |
"id": "public_surface_qa_json",
|
|
@@ -1285,7 +1285,7 @@
|
|
| 1285 |
"volatile": true,
|
| 1286 |
"shows": "Records the last live GitHub/HF URL verification after upload.",
|
| 1287 |
"exists": true,
|
| 1288 |
-
"bytes":
|
| 1289 |
"hash_policy": "existence_and_size_only"
|
| 1290 |
},
|
| 1291 |
{
|
|
@@ -1296,8 +1296,8 @@
|
|
| 1296 |
"surface": "repo",
|
| 1297 |
"shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
|
| 1298 |
"exists": true,
|
| 1299 |
-
"bytes":
|
| 1300 |
-
"sha256": "
|
| 1301 |
},
|
| 1302 |
{
|
| 1303 |
"id": "reproducibility_contract",
|
|
@@ -1329,8 +1329,8 @@
|
|
| 1329 |
"surface": "repo_hf",
|
| 1330 |
"shows": "Generates the selective artifact catalog from local files.",
|
| 1331 |
"exists": true,
|
| 1332 |
-
"bytes":
|
| 1333 |
-
"sha256": "
|
| 1334 |
},
|
| 1335 |
{
|
| 1336 |
"id": "publication_audit",
|
|
@@ -1365,7 +1365,7 @@
|
|
| 1365 |
"volatile": true,
|
| 1366 |
"shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
|
| 1367 |
"exists": true,
|
| 1368 |
-
"bytes":
|
| 1369 |
"hash_policy": "existence_and_size_only"
|
| 1370 |
},
|
| 1371 |
{
|
|
@@ -1377,7 +1377,7 @@
|
|
| 1377 |
"volatile": true,
|
| 1378 |
"shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
|
| 1379 |
"exists": true,
|
| 1380 |
-
"bytes":
|
| 1381 |
"hash_policy": "existence_and_size_only"
|
| 1382 |
},
|
| 1383 |
{
|
|
@@ -1542,8 +1542,8 @@
|
|
| 1542 |
"surface": "website_hf",
|
| 1543 |
"shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
|
| 1544 |
"exists": true,
|
| 1545 |
-
"bytes":
|
| 1546 |
-
"sha256": "
|
| 1547 |
},
|
| 1548 |
{
|
| 1549 |
"id": "modality_atlas",
|
|
@@ -1674,8 +1674,8 @@
|
|
| 1674 |
"surface": "repo_hf",
|
| 1675 |
"shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
|
| 1676 |
"exists": true,
|
| 1677 |
-
"bytes":
|
| 1678 |
-
"sha256": "
|
| 1679 |
},
|
| 1680 |
{
|
| 1681 |
"id": "omni_model_comparison_json",
|
|
@@ -1685,8 +1685,8 @@
|
|
| 1685 |
"surface": "repo_hf",
|
| 1686 |
"shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
|
| 1687 |
"exists": true,
|
| 1688 |
-
"bytes":
|
| 1689 |
-
"sha256": "
|
| 1690 |
},
|
| 1691 |
{
|
| 1692 |
"id": "cosmos3_nano_verified_summary",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Artifact Index",
|
| 3 |
+
"generated_at_utc": "2026-06-21T10:52:12+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
"artifact_count": 226,
|
| 6 |
"missing": [],
|
|
|
|
| 81 |
"surface": "website_hf",
|
| 82 |
"shows": "Machine-readable copy of the current project status for website and HF mirrors.",
|
| 83 |
"exists": true,
|
| 84 |
+
"bytes": 23049,
|
| 85 |
+
"sha256": "9a06cc54d3b43362867a2fde9edc61d09f53df2d9ad761ecf95c862c76af31d2"
|
| 86 |
},
|
| 87 |
{
|
| 88 |
"id": "research_roadmap",
|
|
|
|
| 92 |
"surface": "repo_hf",
|
| 93 |
"shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
|
| 94 |
"exists": true,
|
| 95 |
+
"bytes": 15272,
|
| 96 |
+
"sha256": "559fa9e818f2c6fc7b926f880e9183200911317e70a26391f1830f4119ebc6b0"
|
| 97 |
},
|
| 98 |
{
|
| 99 |
"id": "research_roadmap_json",
|
|
|
|
| 103 |
"surface": "website_hf",
|
| 104 |
"shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
|
| 105 |
"exists": true,
|
| 106 |
+
"bytes": 14129,
|
| 107 |
+
"sha256": "a06d6525d9532b8608bf7be81eb9387deca3159b7c42bf38e107b4096953f351"
|
| 108 |
},
|
| 109 |
{
|
| 110 |
"id": "foundation_model_plan",
|
|
|
|
| 114 |
"surface": "repo_hf",
|
| 115 |
"shows": "Defines the post-data-gate backbone choices: Qwen3-Omni first, Cosmos 3 for world modeling, and VLA/policy models after action-target conversion.",
|
| 116 |
"exists": true,
|
| 117 |
+
"bytes": 11003,
|
| 118 |
+
"sha256": "24047e8692f69927d3fabf3c01058278e85651355f3749886493159971120cc6"
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"id": "foundation_model_plan_json",
|
|
|
|
| 125 |
"surface": "website_hf",
|
| 126 |
"shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
|
| 127 |
"exists": true,
|
| 128 |
+
"bytes": 13925,
|
| 129 |
+
"sha256": "77d4b2d5918ef1f776de6d29d34d523de95ba58df9526e2b255bed567434f932"
|
| 130 |
},
|
| 131 |
{
|
| 132 |
"id": "three_foundation_pipelines",
|
|
|
|
| 222 |
"path": "OMNI_MODEL_EXTENSION_CONTRACT.md",
|
| 223 |
"kind": "scaleup_contract",
|
| 224 |
"surface": "repo_hf",
|
| 225 |
+
"shows": "Defines the shared manifest, episode split, held-out evaluation, packaging, and public-safety rules for Qwen3-Omni, Cosmos3, and VLA/policy model tracks.",
|
| 226 |
"exists": true,
|
| 227 |
"bytes": 8900,
|
| 228 |
"sha256": "c4e51d0aa7536045c229418603a67c6b3c5f31c9d756ca7395cb0c9455f0ed6d"
|
|
|
|
| 323 |
"surface": "website_hf",
|
| 324 |
"shows": "Machine-readable enhancement pack for the website and Hugging Face mirrors.",
|
| 325 |
"exists": true,
|
| 326 |
+
"bytes": 20196,
|
| 327 |
+
"sha256": "9e1a3339425981dcf7931bf08684860864598bf679d0df86f93c656bacdb71bf"
|
| 328 |
},
|
| 329 |
{
|
| 330 |
"id": "task_suite_enhancement_128_result",
|
|
|
|
| 345 |
"surface": "repo_hf",
|
| 346 |
"shows": "Regenerates the enhancement pack from committed 128-episode windows, baseline summaries, verified Qwen predictions, and Cosmos reference metrics.",
|
| 347 |
"exists": true,
|
| 348 |
+
"bytes": 27225,
|
| 349 |
+
"sha256": "86e6098506b365cc92a9658d347645c285c5f61b5113eeaf1d170df0e2d7cc8f"
|
| 350 |
},
|
| 351 |
{
|
| 352 |
"id": "xperience10m_128_episode_feature_index",
|
|
|
|
| 510 |
"surface": "repo_hf",
|
| 511 |
"shows": "Records concrete non-backbone Xperience-10M development tracks: taxonomy, benchmark protocol, representation learning, skill graphs, affordances, 3D/4D memory, QA, and policy transfer.",
|
| 512 |
"exists": true,
|
| 513 |
+
"bytes": 3136,
|
| 514 |
+
"sha256": "decdd359d89694fe10873dcce6cee23e991de1b874ade72643314e879ade784e"
|
| 515 |
},
|
| 516 |
{
|
| 517 |
"id": "additional_development_directions_json",
|
|
|
|
| 521 |
"surface": "website_hf",
|
| 522 |
"shows": "Machine-readable additional development directions for the website and Hugging Face mirrors.",
|
| 523 |
"exists": true,
|
| 524 |
+
"bytes": 6120,
|
| 525 |
+
"sha256": "669d1523f767a8eda22bbe96ab54af99e102496a3d27f7dd850e08e2724e661f"
|
| 526 |
},
|
| 527 |
{
|
| 528 |
"id": "xperience_embodied_foundation_pretraining",
|
|
|
|
| 610 |
"shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
|
| 611 |
"exists": true,
|
| 612 |
"bytes": 4432,
|
| 613 |
+
"sha256": "db279081759eebb09a4ba53c56fb17a14f3546e13d058100494ac7745b901a1c"
|
| 614 |
},
|
| 615 |
{
|
| 616 |
"id": "source_alignment_validator",
|
|
|
|
| 631 |
"surface": "repo_hf",
|
| 632 |
"shows": "Publishes prepared Space, artifact dataset, and model bundles, including an explicit model-binary upload batch.",
|
| 633 |
"exists": true,
|
| 634 |
+
"bytes": 25159,
|
| 635 |
+
"sha256": "a74451a7d717661e1499b98631d825f4db8c6b51b1e9bafd73966697eb04258a"
|
| 636 |
},
|
| 637 |
{
|
| 638 |
"id": "github_package_dockerfile",
|
|
|
|
| 728 |
"path": "docs/data/unified_task_model_radar.json",
|
| 729 |
"kind": "website_data",
|
| 730 |
"surface": "website_hf",
|
| 731 |
+
"shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3-Omni/Cosmos3 overlay mappings, method-card caveats, proxy flags, and source artifacts.",
|
| 732 |
"exists": true,
|
| 733 |
+
"bytes": 228815,
|
| 734 |
+
"sha256": "862376178e8b0d01b536f49a18b7934a373494f8b36080790f616438ec0e035e"
|
| 735 |
},
|
| 736 |
{
|
| 737 |
"id": "single_episode_task_model_radar_json",
|
|
|
|
| 741 |
"surface": "website_hf",
|
| 742 |
"shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
|
| 743 |
"exists": true,
|
| 744 |
+
"bytes": 51107,
|
| 745 |
+
"sha256": "5f2ebb41e8488446ea5c5cd2cb75bbedce688433feffe1412288de56b133bd5c"
|
| 746 |
},
|
| 747 |
{
|
| 748 |
"id": "episode128_task_model_radar_json",
|
|
|
|
| 750 |
"path": "docs/data/episode128_task_model_radar.json",
|
| 751 |
"kind": "website_data",
|
| 752 |
"surface": "website_hf",
|
| 753 |
+
"shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano, now complete at 140/140 scored rows with proxy notes retained.",
|
| 754 |
"exists": true,
|
| 755 |
+
"bytes": 184992,
|
| 756 |
+
"sha256": "385704db90443d74903f365e90b27538020f5574c96f296bbf63173f488a645d"
|
| 757 |
},
|
| 758 |
{
|
| 759 |
"id": "task_method_20_result_matrix_json",
|
|
|
|
| 764 |
"shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and the current release is complete at 180/180 scored rows.",
|
| 765 |
"exists": true,
|
| 766 |
"bytes": 128509,
|
| 767 |
+
"sha256": "96082daa33771963ac40b7d719df00a76ec443508a3d3101cb6dd82d87965729"
|
| 768 |
},
|
| 769 |
{
|
| 770 |
"id": "task_method_20_result_matrix",
|
|
|
|
| 808 |
"shows": "Machine-readable check that scored JSON-backed matrix cells match their declared metric source values.",
|
| 809 |
"exists": true,
|
| 810 |
"bytes": 561,
|
| 811 |
+
"sha256": "cbe9be1ea3d62b253780aade9c51cb7f3a5882df185927186ee6a1d6516ad3a6"
|
| 812 |
},
|
| 813 |
{
|
| 814 |
"id": "task_method_20_source_audit",
|
|
|
|
| 819 |
"shows": "Reader-facing source-value audit for the 180-result matrix.",
|
| 820 |
"exists": true,
|
| 821 |
"bytes": 447,
|
| 822 |
+
"sha256": "dfcde22c9350858d0df6d881533f63ba6838fc980b62f0b68770f9b708fcde85"
|
| 823 |
},
|
| 824 |
{
|
| 825 |
"id": "two_evidence_line_map_chart",
|
|
|
|
| 838 |
"path": "docs/assets/charts/unified_task_model_radar.svg",
|
| 839 |
"kind": "generated_figure",
|
| 840 |
"surface": "website_hf",
|
| 841 |
+
"shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3-Omni and Cosmos3 task-aligned overlays.",
|
| 842 |
"exists": true,
|
| 843 |
"bytes": 57938,
|
| 844 |
"sha256": "bb83b80b47fe679ebdce2c99378a4548120f1c8cc2d725b88e409d8c386dcbf8"
|
|
|
|
| 851 |
"surface": "website_hf",
|
| 852 |
"shows": "Separates the one-episode Minimal and Neural MLP 20/20 scored baselines into a clean two-polygon radar.",
|
| 853 |
"exists": true,
|
| 854 |
+
"bytes": 35232,
|
| 855 |
+
"sha256": "87b52a7dead40358f1778dda43ade4d2e875ac98e507e01ca007084363e5977e"
|
| 856 |
},
|
| 857 |
{
|
| 858 |
"id": "episode128_task_model_radar_chart",
|
|
|
|
| 860 |
"path": "docs/assets/charts/episode128_task_model_radar.svg",
|
| 861 |
"kind": "generated_figure",
|
| 862 |
"surface": "website_hf",
|
| 863 |
+
"shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons plus metadata, Qwen3-Omni, Cosmos3-Super, and Cosmos3-Nano task-aligned overlays.",
|
| 864 |
"exists": true,
|
| 865 |
+
"bytes": 51915,
|
| 866 |
+
"sha256": "047ea4b05a04f6734e2afcf792863559dc8f3091eae88a97ff90e8b038a423f4"
|
| 867 |
},
|
| 868 |
{
|
| 869 |
"id": "unified_task_model_radar_builder",
|
|
|
|
| 873 |
"surface": "repo_hf",
|
| 874 |
"shows": "Regenerates the direction-aware radar chart and machine-readable metric overlay JSON.",
|
| 875 |
"exists": true,
|
| 876 |
+
"bytes": 68610,
|
| 877 |
+
"sha256": "96bc2df0de5a9e512d69961ddb13ea87b26ef01f1f943f5a78a6dc373400949d"
|
| 878 |
},
|
| 879 |
{
|
| 880 |
"id": "task_method_20_gap_audit_builder",
|
|
|
|
| 915 |
"path": "results/omni_finetune/model_output_probe_readiness/model_output_probe_readiness.json",
|
| 916 |
"kind": "scaleup_status",
|
| 917 |
"surface": "repo_hf",
|
| 918 |
+
"shows": "Checks whether Qwen3-Omni and Cosmos3 runs have train, validation, and test prediction files before extending model overlays to all 20 task contracts.",
|
| 919 |
"exists": true,
|
| 920 |
"bytes": 4320,
|
| 921 |
"sha256": "11cff26749bf6ad8b8ee028b18e0b4be5713ed8b5325578caa03be25d894263b"
|
|
|
|
| 928 |
"surface": "repo_hf",
|
| 929 |
"shows": "Audits model-output split availability and writes a readiness report without assigning new numeric task scores.",
|
| 930 |
"exists": true,
|
| 931 |
+
"bytes": 10526,
|
| 932 |
+
"sha256": "2b95834c75b0c90ceefe2c20381b3997a63f283b733186e07dea9e2778c78fad"
|
| 933 |
},
|
| 934 |
{
|
| 935 |
"id": "existing_model_output_task_probe",
|
|
|
|
| 937 |
"path": "results/omni_finetune/model_output_task_probes_20260616/summary.json",
|
| 938 |
"kind": "model_result",
|
| 939 |
"surface": "repo_hf",
|
| 940 |
+
"shows": "Scores task-specific Qwen3-Omni and Cosmos3 overlays only where verified held-out prediction JSON or compact target maps already contain the required targets.",
|
| 941 |
"exists": true,
|
| 942 |
"bytes": 5951,
|
| 943 |
"sha256": "910477d2fba648605dda128d0ecd2a2c13cfa460573e350dc850014ac91c6c2b"
|
|
|
|
| 950 |
"surface": "repo_hf",
|
| 951 |
"shows": "Derives task-specific scores from committed verified model outputs without running new inference or backfilling absent targets.",
|
| 952 |
"exists": true,
|
| 953 |
+
"bytes": 69423,
|
| 954 |
+
"sha256": "43086745ba53f5a4da1a39b9c223914707ab51b027555c91bea27c0bc152a27f"
|
| 955 |
},
|
| 956 |
{
|
| 957 |
"id": "a100_128_metadata_task_baselines",
|
|
|
|
| 1071 |
"surface": "repo_hf",
|
| 1072 |
"shows": "Catalogs public figures, charts, modality thumbnails, dimensions, hashes, roles, and source scripts.",
|
| 1073 |
"exists": true,
|
| 1074 |
+
"bytes": 7014,
|
| 1075 |
+
"sha256": "1087774a85614f12871418bb9fa375b98121596eb11dcdc22d324b943fb9d313"
|
| 1076 |
},
|
| 1077 |
{
|
| 1078 |
"id": "figure_index_json",
|
|
|
|
| 1082 |
"surface": "website_hf",
|
| 1083 |
"shows": "Machine-readable visual asset index for website and Hugging Face mirrors.",
|
| 1084 |
"exists": true,
|
| 1085 |
+
"bytes": 19472,
|
| 1086 |
+
"sha256": "e56f76038a56ffc61e882d0201f13912af5cba3e5ade08b1bb912fba0acdcd24"
|
| 1087 |
},
|
| 1088 |
{
|
| 1089 |
"id": "figure_index_builder",
|
|
|
|
| 1093 |
"surface": "repo_hf",
|
| 1094 |
"shows": "Regenerates visual-asset hashes, dimensions, and source-script provenance.",
|
| 1095 |
"exists": true,
|
| 1096 |
+
"bytes": 16832,
|
| 1097 |
+
"sha256": "7c526bff01c282d81e4f64bbdb31c059953ea7868b75b0c3104826241280165f"
|
| 1098 |
},
|
| 1099 |
{
|
| 1100 |
"id": "brand_assets_json",
|
|
|
|
| 1160 |
"shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
|
| 1161 |
"exists": true,
|
| 1162 |
"bytes": 8640,
|
| 1163 |
+
"sha256": "3cb0aca2dca01448cb9bc5cbb519a91bc6397c08a1eaaa84c031e773221e5a0a"
|
| 1164 |
},
|
| 1165 |
{
|
| 1166 |
"id": "public_surface_qa",
|
|
|
|
| 1179 |
"path": "PUBLIC_READER_MAP.md",
|
| 1180 |
"kind": "project_path",
|
| 1181 |
"surface": "repo_hf",
|
| 1182 |
+
"shows": "Provides the first-pass navigation layer for GitHub, GitHub Pages, Hugging Face mirrors, Qwen3-Omni/Cosmos3 repos, evidence lines, and claim boundaries.",
|
| 1183 |
"exists": true,
|
| 1184 |
+
"bytes": 4948,
|
| 1185 |
+
"sha256": "7a7128fdde08f770338c3fe2d473565918c5633f948dec6a78a6b2a67938e91a"
|
| 1186 |
},
|
| 1187 |
{
|
| 1188 |
"id": "public_reader_map_json",
|
|
|
|
| 1192 |
"surface": "website_hf",
|
| 1193 |
"shows": "Machine-readable public reader map used by the website and Hugging Face mirrors to keep entry points and surface responsibilities explicit.",
|
| 1194 |
"exists": true,
|
| 1195 |
+
"bytes": 5971,
|
| 1196 |
+
"sha256": "3474f84ffa53aefabdbf8a75c466c271675162ce0f8a23ea3b6660951048072f"
|
| 1197 |
},
|
| 1198 |
{
|
| 1199 |
"id": "public_surface_qa_json",
|
|
|
|
| 1285 |
"volatile": true,
|
| 1286 |
"shows": "Records the last live GitHub/HF URL verification after upload.",
|
| 1287 |
"exists": true,
|
| 1288 |
+
"bytes": 184689,
|
| 1289 |
"hash_policy": "existence_and_size_only"
|
| 1290 |
},
|
| 1291 |
{
|
|
|
|
| 1296 |
"surface": "repo",
|
| 1297 |
"shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
|
| 1298 |
"exists": true,
|
| 1299 |
+
"bytes": 67652,
|
| 1300 |
+
"sha256": "47c6e5e0d93a881db045842ef98656d04c74cf7605f33a56b8d4daecf97fb547"
|
| 1301 |
},
|
| 1302 |
{
|
| 1303 |
"id": "reproducibility_contract",
|
|
|
|
| 1329 |
"surface": "repo_hf",
|
| 1330 |
"shows": "Generates the selective artifact catalog from local files.",
|
| 1331 |
"exists": true,
|
| 1332 |
+
"bytes": 67587,
|
| 1333 |
+
"sha256": "28a93ec92c91886388f5d42ab8e25af0b218e4644b733bc8f8230bc0f91aab65"
|
| 1334 |
},
|
| 1335 |
{
|
| 1336 |
"id": "publication_audit",
|
|
|
|
| 1365 |
"volatile": true,
|
| 1366 |
"shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
|
| 1367 |
"exists": true,
|
| 1368 |
+
"bytes": 1418066,
|
| 1369 |
"hash_policy": "existence_and_size_only"
|
| 1370 |
},
|
| 1371 |
{
|
|
|
|
| 1377 |
"volatile": true,
|
| 1378 |
"shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
|
| 1379 |
"exists": true,
|
| 1380 |
+
"bytes": 20657,
|
| 1381 |
"hash_policy": "existence_and_size_only"
|
| 1382 |
},
|
| 1383 |
{
|
|
|
|
| 1542 |
"surface": "website_hf",
|
| 1543 |
"shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
|
| 1544 |
"exists": true,
|
| 1545 |
+
"bytes": 1903454,
|
| 1546 |
+
"sha256": "6667eb856cf61ada9f868807b5d5c6ccde06e4f791b2f9dd567d98b71b307415"
|
| 1547 |
},
|
| 1548 |
{
|
| 1549 |
"id": "modality_atlas",
|
|
|
|
| 1674 |
"surface": "repo_hf",
|
| 1675 |
"shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
|
| 1676 |
"exists": true,
|
| 1677 |
+
"bytes": 15983,
|
| 1678 |
+
"sha256": "4db248566972e811aac6ca06582f233414821624f00f9d4fc4a1b66b2e00401f"
|
| 1679 |
},
|
| 1680 |
{
|
| 1681 |
"id": "omni_model_comparison_json",
|
|
|
|
| 1685 |
"surface": "repo_hf",
|
| 1686 |
"shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
|
| 1687 |
"exists": true,
|
| 1688 |
+
"bytes": 82088,
|
| 1689 |
+
"sha256": "82ccc2932cad63a9ebad85da53e694b18ef626aa3720bda3ed5da30f3dc5e121"
|
| 1690 |
},
|
| 1691 |
{
|
| 1692 |
"id": "cosmos3_nano_verified_summary",
|
metrics/episode128_task_model_radar.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"title": "128-Episode 20-Task Radar",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
-
"description": "Selected 128-episode metadata/raw baselines plus verified Qwen3
|
| 6 |
"task_count": 20,
|
| 7 |
"method_count": 7,
|
| 8 |
"method_task_record_count": 140,
|
|
@@ -12,7 +12,7 @@
|
|
| 12 |
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
|
| 13 |
"raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
|
| 14 |
"result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
|
| 15 |
-
"foundation_model_overlay": "Qwen3
|
| 16 |
"metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
|
| 17 |
"raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
|
| 18 |
},
|
|
|
|
| 1 |
{
|
| 2 |
"title": "128-Episode 20-Task Radar",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-21T10:47:17+00:00",
|
| 5 |
+
"description": "Selected 128-episode metadata/raw baselines plus verified Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano diagnostics. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
|
| 6 |
"task_count": 20,
|
| 7 |
"method_count": 7,
|
| 8 |
"method_task_record_count": 140,
|
|
|
|
| 12 |
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
|
| 13 |
"raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table",
|
| 14 |
"result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used",
|
| 15 |
+
"foundation_model_overlay": "Qwen3-Omni and Cosmos3 points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.",
|
| 16 |
"metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.",
|
| 17 |
"raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export."
|
| 18 |
},
|
metrics/figure_index.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Figure Index",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"scope": "Public figures, diagrams, charts, and derived modality thumbnails. Raw Xperience-10M videos, annotations, RRD files, and Qwen weights are excluded.",
|
| 6 |
"figure_count": 29,
|
| 7 |
"figures": [
|
|
@@ -64,12 +64,12 @@
|
|
| 64 |
"source_script": "scripts/render_task_suite_infographic.py",
|
| 65 |
"surface": "README, website, HF Space, artifact dataset, model card",
|
| 66 |
"exists": true,
|
| 67 |
-
"bytes":
|
| 68 |
-
"sha256": "
|
| 69 |
"dimensions": {
|
| 70 |
"format": "PNG",
|
| 71 |
"width": 1800,
|
| 72 |
-
"height":
|
| 73 |
},
|
| 74 |
"source_script_exists": true
|
| 75 |
},
|
|
@@ -81,8 +81,8 @@
|
|
| 81 |
"source_script": "scripts/generate_visualizations.py",
|
| 82 |
"surface": "README, website, HF artifact dataset",
|
| 83 |
"exists": true,
|
| 84 |
-
"bytes":
|
| 85 |
-
"sha256": "
|
| 86 |
"dimensions": {
|
| 87 |
"format": "PNG",
|
| 88 |
"width": 1800,
|
|
@@ -149,8 +149,8 @@
|
|
| 149 |
"source_script": "scripts/render_foundation_pipeline_diagrams.py",
|
| 150 |
"surface": "README, website, HF Space, artifact dataset, model card",
|
| 151 |
"exists": true,
|
| 152 |
-
"bytes":
|
| 153 |
-
"sha256": "
|
| 154 |
"dimensions": {
|
| 155 |
"format": "PNG",
|
| 156 |
"width": 2560,
|
|
@@ -166,8 +166,8 @@
|
|
| 166 |
"source_script": "scripts/render_overview_figures.py",
|
| 167 |
"surface": "README, website, HF artifact dataset, model card",
|
| 168 |
"exists": true,
|
| 169 |
-
"bytes":
|
| 170 |
-
"sha256": "
|
| 171 |
"dimensions": {
|
| 172 |
"format": "PNG",
|
| 173 |
"width": 1800,
|
|
@@ -356,8 +356,8 @@
|
|
| 356 |
"source_script": "scripts/generate_visualizations.py",
|
| 357 |
"surface": "website directions",
|
| 358 |
"exists": true,
|
| 359 |
-
"bytes":
|
| 360 |
-
"sha256": "
|
| 361 |
"dimensions": {
|
| 362 |
"format": "SVG",
|
| 363 |
"width": 1180,
|
|
@@ -410,8 +410,8 @@
|
|
| 410 |
"source_script": "scripts/build_unified_task_model_radar.py",
|
| 411 |
"surface": "website unified task section, README, HF mirrors",
|
| 412 |
"exists": true,
|
| 413 |
-
"bytes":
|
| 414 |
-
"sha256": "
|
| 415 |
"dimensions": {
|
| 416 |
"format": "SVG",
|
| 417 |
"width": 2400,
|
|
@@ -428,8 +428,8 @@
|
|
| 428 |
"source_script": "scripts/build_unified_task_model_radar.py",
|
| 429 |
"surface": "website unified task section, README, HF mirrors",
|
| 430 |
"exists": true,
|
| 431 |
-
"bytes":
|
| 432 |
-
"sha256": "
|
| 433 |
"dimensions": {
|
| 434 |
"format": "SVG",
|
| 435 |
"width": 2400,
|
|
@@ -442,12 +442,12 @@
|
|
| 442 |
"id": "episode128_task_model_radar",
|
| 443 |
"title": "128-episode 20-task model radar",
|
| 444 |
"path": "docs/assets/charts/episode128_task_model_radar.svg",
|
| 445 |
-
"role": "Twenty-axis split radar for selected 128-episode methods: raw-feature simple/NN as complete scored polygons
|
| 446 |
"source_script": "scripts/build_unified_task_model_radar.py",
|
| 447 |
"surface": "website unified task section, README, HF mirrors",
|
| 448 |
"exists": true,
|
| 449 |
-
"bytes":
|
| 450 |
-
"sha256": "
|
| 451 |
"dimensions": {
|
| 452 |
"format": "SVG",
|
| 453 |
"width": 2400,
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Figure Index",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-21T10:52:12+00:00",
|
| 5 |
"scope": "Public figures, diagrams, charts, and derived modality thumbnails. Raw Xperience-10M videos, annotations, RRD files, and Qwen weights are excluded.",
|
| 6 |
"figure_count": 29,
|
| 7 |
"figures": [
|
|
|
|
| 64 |
"source_script": "scripts/render_task_suite_infographic.py",
|
| 65 |
"surface": "README, website, HF Space, artifact dataset, model card",
|
| 66 |
"exists": true,
|
| 67 |
+
"bytes": 1903454,
|
| 68 |
+
"sha256": "6667eb856cf61ada9f868807b5d5c6ccde06e4f791b2f9dd567d98b71b307415",
|
| 69 |
"dimensions": {
|
| 70 |
"format": "PNG",
|
| 71 |
"width": 1800,
|
| 72 |
+
"height": 7600
|
| 73 |
},
|
| 74 |
"source_script_exists": true
|
| 75 |
},
|
|
|
|
| 81 |
"source_script": "scripts/generate_visualizations.py",
|
| 82 |
"surface": "README, website, HF artifact dataset",
|
| 83 |
"exists": true,
|
| 84 |
+
"bytes": 711222,
|
| 85 |
+
"sha256": "4db6a6353d3f1e49bae12447e1a78a874aa780d60e9817f3052ac0d0acf2f7b2",
|
| 86 |
"dimensions": {
|
| 87 |
"format": "PNG",
|
| 88 |
"width": 1800,
|
|
|
|
| 149 |
"source_script": "scripts/render_foundation_pipeline_diagrams.py",
|
| 150 |
"surface": "README, website, HF Space, artifact dataset, model card",
|
| 151 |
"exists": true,
|
| 152 |
+
"bytes": 1853350,
|
| 153 |
+
"sha256": "e8d863cc5104602e464048b4bf48f9acf3a108495298d9ec15b2e9cf346f41f9",
|
| 154 |
"dimensions": {
|
| 155 |
"format": "PNG",
|
| 156 |
"width": 2560,
|
|
|
|
| 166 |
"source_script": "scripts/render_overview_figures.py",
|
| 167 |
"surface": "README, website, HF artifact dataset, model card",
|
| 168 |
"exists": true,
|
| 169 |
+
"bytes": 757827,
|
| 170 |
+
"sha256": "d83b75a6778033a716f1086dbe61298662d4b8f80cb8f52193d2cbdb1e8e31f7",
|
| 171 |
"dimensions": {
|
| 172 |
"format": "PNG",
|
| 173 |
"width": 1800,
|
|
|
|
| 356 |
"source_script": "scripts/generate_visualizations.py",
|
| 357 |
"surface": "website directions",
|
| 358 |
"exists": true,
|
| 359 |
+
"bytes": 5352,
|
| 360 |
+
"sha256": "506e12aa1b6c4fd50fb0c65714c7f0a92c02c40069cb879503471ba9b63d4afb",
|
| 361 |
"dimensions": {
|
| 362 |
"format": "SVG",
|
| 363 |
"width": 1180,
|
|
|
|
| 410 |
"source_script": "scripts/build_unified_task_model_radar.py",
|
| 411 |
"surface": "website unified task section, README, HF mirrors",
|
| 412 |
"exists": true,
|
| 413 |
+
"bytes": 57938,
|
| 414 |
+
"sha256": "bb83b80b47fe679ebdce2c99378a4548120f1c8cc2d725b88e409d8c386dcbf8",
|
| 415 |
"dimensions": {
|
| 416 |
"format": "SVG",
|
| 417 |
"width": 2400,
|
|
|
|
| 428 |
"source_script": "scripts/build_unified_task_model_radar.py",
|
| 429 |
"surface": "website unified task section, README, HF mirrors",
|
| 430 |
"exists": true,
|
| 431 |
+
"bytes": 35232,
|
| 432 |
+
"sha256": "87b52a7dead40358f1778dda43ade4d2e875ac98e507e01ca007084363e5977e",
|
| 433 |
"dimensions": {
|
| 434 |
"format": "SVG",
|
| 435 |
"width": 2400,
|
|
|
|
| 442 |
"id": "episode128_task_model_radar",
|
| 443 |
"title": "128-episode 20-task model radar",
|
| 444 |
"path": "docs/assets/charts/episode128_task_model_radar.svg",
|
| 445 |
+
"role": "Twenty-axis split radar for selected 128-episode methods: raw-feature simple/NN as complete scored polygons plus metadata, Qwen3-Omni, Cosmos3-Super, and Cosmos3-Nano task-aligned overlays.",
|
| 446 |
"source_script": "scripts/build_unified_task_model_radar.py",
|
| 447 |
"surface": "website unified task section, README, HF mirrors",
|
| 448 |
"exists": true,
|
| 449 |
+
"bytes": 51915,
|
| 450 |
+
"sha256": "047ea4b05a04f6734e2afcf792863559dc8f3091eae88a97ff90e8b038a423f4",
|
| 451 |
"dimensions": {
|
| 452 |
"format": "SVG",
|
| 453 |
"width": 2400,
|
metrics/foundation_model_plan.json
CHANGED
|
@@ -230,7 +230,7 @@
|
|
| 230 |
},
|
| 231 |
{
|
| 232 |
"step": 4,
|
| 233 |
-
"name": "World-model
|
| 234 |
"action": "Promote Cosmos 3 beyond the current Nano compatibility and Super forward-dynamics runs only when loss metrics, preprocessing, and storage justify the added compute."
|
| 235 |
},
|
| 236 |
{
|
|
|
|
| 230 |
},
|
| 231 |
{
|
| 232 |
"step": 4,
|
| 233 |
+
"name": "World-model track",
|
| 234 |
"action": "Promote Cosmos 3 beyond the current Nano compatibility and Super forward-dynamics runs only when loss metrics, preprocessing, and storage justify the added compute."
|
| 235 |
},
|
| 236 |
{
|
metrics/live_publication_status.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"title": "Ropedia Xperience-10M Live Publication Status",
|
| 3 |
"status": "pass",
|
| 4 |
"checked_at_utc": "2026-06-20T21:56:07+00:00",
|
| 5 |
-
"scope": "Live GitHub Pages, GitHub raw, Hugging Face Space, artifact dataset, baseline model mirrors, and the Qwen3/Cosmos3 LoRA adapter repos when their upload packages exist locally.",
|
| 6 |
"hash_groups": [
|
| 7 |
{
|
| 8 |
"id": "task_suite_infographic",
|
|
|
|
| 2 |
"title": "Ropedia Xperience-10M Live Publication Status",
|
| 3 |
"status": "pass",
|
| 4 |
"checked_at_utc": "2026-06-20T21:56:07+00:00",
|
| 5 |
+
"scope": "Live GitHub Pages, GitHub raw, Hugging Face Space, artifact dataset, baseline model mirrors, and the Qwen3-Omni/Cosmos3 LoRA adapter repos when their upload packages exist locally.",
|
| 6 |
"hash_groups": [
|
| 7 |
{
|
| 8 |
"id": "task_suite_infographic",
|
metrics/mirror_parity.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
metrics/omni_finetune_verified_result.json
CHANGED
|
@@ -91,6 +91,6 @@
|
|
| 91 |
"Use results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before deciding whether v6 should become a formal release tag.",
|
| 92 |
"Use the v6 predictions for action/contact error analysis, and compare v5 for subtask, next-action, and object regressions.",
|
| 93 |
"Keep full-parameter Qwen runs as feasibility gates until there is a storage plan for checkpoints or mergeable full-weight deltas.",
|
| 94 |
-
"Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model
|
| 95 |
]
|
| 96 |
}
|
|
|
|
| 91 |
"Use results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before deciding whether v6 should become a formal release tag.",
|
| 92 |
"Use the v6 predictions for action/contact error analysis, and compare v5 for subtask, next-action, and object regressions.",
|
| 93 |
"Keep full-parameter Qwen runs as feasibility gates until there is a storage plan for checkpoints or mergeable full-weight deltas.",
|
| 94 |
+
"Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model artifact: it updates adapter weights over camera-pose proxy future-vision-velocity targets, not Qwen-style JSON action labels."
|
| 95 |
]
|
| 96 |
}
|
metrics/omni_model_comparison.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"status": "pass",
|
| 5 |
"version_count": 3,
|
| 6 |
"model_group_count": 5,
|
|
@@ -8,7 +8,7 @@
|
|
| 8 |
"version_reading_notes": [
|
| 9 |
"Version 1 is the public-sample 20-task surface: original core heads, tasks 13-20, and the 180-row method-task matrix.",
|
| 10 |
"Version 2 is the selected 128-episode same-split simple/NN baseline alignment.",
|
| 11 |
-
"
|
| 12 |
],
|
| 13 |
"versions": [
|
| 14 |
{
|
|
@@ -305,7 +305,7 @@
|
|
| 305 |
"neural_primary_score": null
|
| 306 |
}
|
| 307 |
],
|
| 308 |
-
"interpretation": "This layer aligns the previous simple and neural baseline framing to the same selected 96/16/16 split used by the
|
| 309 |
},
|
| 310 |
{
|
| 311 |
"id": "v3_multi_episode_foundation_model_branches",
|
|
@@ -870,7 +870,7 @@
|
|
| 870 |
"neural_supported_task_count": 6
|
| 871 |
},
|
| 872 |
"weights": "metadata/text baseline artifacts; raw 128 sensor-feature model weights not yet complete",
|
| 873 |
-
"interpretation": "Same selected 96/16/16 split and task ids as the
|
| 874 |
}
|
| 875 |
],
|
| 876 |
"comparison_note": "This is the cleanest 1-episode versus 128-episode grouping for the same simple/NN task-head family, but the feature surface changes from raw public-sample features to public-safe 128-episode metadata/text features."
|
|
@@ -1683,7 +1683,7 @@
|
|
| 1683 |
"weights_repository": "none for this run: staged base nv-community/Cosmos3-Super weights were evaluated through vLLM; create a separate repo only after new adapter or fine-tuned weights exist"
|
| 1684 |
}
|
| 1685 |
],
|
| 1686 |
-
"comparison_note": "Cosmos3-Super is now represented by a verified 448-window held-out Reasoner evaluation on the same JSON task as Qwen3. It uses staged base weights through vLLM, so it is a
|
| 1687 |
},
|
| 1688 |
{
|
| 1689 |
"id": "cosmos3_super_forward_dynamics",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
|
| 3 |
+
"generated_at_utc": "2026-06-21T10:47:04+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
"version_count": 3,
|
| 6 |
"model_group_count": 5,
|
|
|
|
| 8 |
"version_reading_notes": [
|
| 9 |
"Version 1 is the public-sample 20-task surface: original core heads, tasks 13-20, and the 180-row method-task matrix.",
|
| 10 |
"Version 2 is the selected 128-episode same-split simple/NN baseline alignment.",
|
| 11 |
+
"The selected-128 model-diagnostic group contains the current Qwen3-Omni LoRA JSON-task row, Cosmos3-Nano future-window compatibility result, Cosmos3-Super Reasoner base-weight JSON-task evaluation, and the separate Cosmos3-Super Forward-Dynamics LoRA adapter artifact."
|
| 12 |
],
|
| 13 |
"versions": [
|
| 14 |
{
|
|
|
|
| 305 |
"neural_primary_score": null
|
| 306 |
}
|
| 307 |
],
|
| 308 |
+
"interpretation": "This layer aligns the previous simple and neural baseline framing to the same selected 96/16/16 split used by the Qwen3-Omni and Cosmos3 diagnostics. It uses public-safe JSONL metadata/text features, so raw-feature-only tasks remain explicitly unsupported until 128-run sensor feature blocks exist."
|
| 309 |
},
|
| 310 |
{
|
| 311 |
"id": "v3_multi_episode_foundation_model_branches",
|
|
|
|
| 870 |
"neural_supported_task_count": 6
|
| 871 |
},
|
| 872 |
"weights": "metadata/text baseline artifacts; raw 128 sensor-feature model weights not yet complete",
|
| 873 |
+
"interpretation": "Same selected 96/16/16 split and task ids as the Qwen3-Omni and Cosmos3 diagnostics, but metadata/text features only."
|
| 874 |
}
|
| 875 |
],
|
| 876 |
"comparison_note": "This is the cleanest 1-episode versus 128-episode grouping for the same simple/NN task-head family, but the feature surface changes from raw public-sample features to public-safe 128-episode metadata/text features."
|
|
|
|
| 1683 |
"weights_repository": "none for this run: staged base nv-community/Cosmos3-Super weights were evaluated through vLLM; create a separate repo only after new adapter or fine-tuned weights exist"
|
| 1684 |
}
|
| 1685 |
],
|
| 1686 |
+
"comparison_note": "Cosmos3-Super is now represented by a verified 448-window held-out Reasoner evaluation on the same JSON task as Qwen3. It uses staged base weights through vLLM, so it is a Cosmos3 diagnostic, not a weight release. A camera-pose proxy forward-dynamics target export now passes the contract audit and schema-only packer smoke; the separate Forward-Dynamics LoRA group records the trainable adapter run and loss-based held-out evaluation."
|
| 1687 |
},
|
| 1688 |
{
|
| 1689 |
"id": "cosmos3_super_forward_dynamics",
|
metrics/project_status.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Project Status",
|
| 3 |
"version": "2026-06-20",
|
| 4 |
"decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v6_cosmos_comparison",
|
| 5 |
-
"research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3
|
| 6 |
"scope_boundary": {
|
| 7 |
"validated_episode_count": 1,
|
| 8 |
"aligned_frames": 5821,
|
|
@@ -145,7 +145,7 @@
|
|
| 145 |
"RESEARCH_ROADMAP.md",
|
| 146 |
"docs/data/research_roadmap.json"
|
| 147 |
],
|
| 148 |
-
"readout": "The roadmap connects public-sample task development to the final verified Qwen3-Omni diagnostic result, same-split baseline alignment, the no-new-episode 128-suite enhancement pack, action/subtask error analysis, robustness runs, world/policy
|
| 149 |
},
|
| 150 |
{
|
| 151 |
"area": "128-episode task-suite enhancement pack",
|
|
@@ -156,7 +156,7 @@
|
|
| 156 |
"results/omni_finetune/task_suite_enhancement_128_v1_20260608/enhancement_plan.json",
|
| 157 |
"scripts/omni/build_task_suite_enhancement_128.py"
|
| 158 |
],
|
| 159 |
-
"readout": "The current 3,808-window selected split can be stressed without more episodes by exporting denser and multiscale windows. The recommended next export is multiscale_20s10_40s20_80s40, estimated at 106,095 windows from observed frame spans; the pack also defines hierarchical action/subtask targets, raw-feature shard priorities for unsupported tasks, and
|
| 160 |
},
|
| 161 |
{
|
| 162 |
"area": "Foundation-model plan",
|
|
@@ -176,7 +176,7 @@
|
|
| 176 |
"scripts/omni/backbone_registry.py",
|
| 177 |
"scripts/omni/smoke_test_backbone_packaging.py"
|
| 178 |
],
|
| 179 |
-
"readout": "Future
|
| 180 |
},
|
| 181 |
{
|
| 182 |
"area": "Xperience Embodied Foundation Model",
|
|
@@ -253,7 +253,7 @@
|
|
| 253 |
"results/omni_finetune/OMNI_MODEL_COMPARISON.md",
|
| 254 |
"scripts/omni/build_omni_model_comparison.py"
|
| 255 |
],
|
| 256 |
-
"readout": "The public comparison now has two
|
| 257 |
},
|
| 258 |
{
|
| 259 |
"area": "Qwen3-Omni fine-tuning",
|
|
@@ -271,7 +271,7 @@
|
|
| 271 |
"readout": "The selected 96/16/16 episode split now has a current v6 rank64/lr5e-5 public-safe held-out package with 34,269 exported windows, 4,032 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.90%, meeting the 98% target; transition accuracy is 98.98%, contact accuracy is 81.77%, object micro-F1 is 30.65%, next-action accuracy is 4.31%, and action/subtask metrics remain weak. v6 improves action macro-F1 and contact accuracy versus v5, but v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics."
|
| 272 |
},
|
| 273 |
{
|
| 274 |
-
"area": "Cosmos3-Nano future-window
|
| 275 |
"status": "verified_compatibility_result",
|
| 276 |
"evidence": [
|
| 277 |
"configs/omni_backbones/cosmos_world_model.json",
|
|
@@ -279,10 +279,10 @@
|
|
| 279 |
"scripts/omni/eval_cosmos3_future_window_retrieval.py",
|
| 280 |
"results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json"
|
| 281 |
],
|
| 282 |
-
"readout": "The Cosmos3-Nano
|
| 283 |
},
|
| 284 |
{
|
| 285 |
-
"area": "Cosmos3-Super Reasoner
|
| 286 |
"status": "verified_base_weight_result",
|
| 287 |
"evidence": [
|
| 288 |
"configs/omni_backbones/cosmos3_super_reasoner.json",
|
|
@@ -314,7 +314,7 @@
|
|
| 314 |
"results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/verified_result_summary.json",
|
| 315 |
"results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/package_audit.json"
|
| 316 |
],
|
| 317 |
-
"readout": "The first fine-tuned Cosmos3-Super adapter
|
| 318 |
},
|
| 319 |
{
|
| 320 |
"area": "Raw Xperience-10M redistribution",
|
|
@@ -331,8 +331,8 @@
|
|
| 331 |
"Open docs/data/project_packet.json for the machine-readable project path.",
|
| 332 |
"Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
|
| 333 |
"Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
|
| 334 |
-
"Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone
|
| 335 |
-
"Inspect OMNI_MODEL_EXTENSION_CONTRACT.md and run python scripts/omni/backbone_registry.py --validate --json before adding a new
|
| 336 |
"Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
|
| 337 |
"Inspect TASK_SUITE_20.md, docs/data/task_suite_20.json, docs/data/summary_metrics.json, and results/episode_task_suite/neural_mlp/ to check the unified 20-task outputs.",
|
| 338 |
"Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
|
|
@@ -346,16 +346,16 @@
|
|
| 346 |
"Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
|
| 347 |
],
|
| 348 |
"current_reading_notes": [
|
| 349 |
-
"The latest Qwen3-Omni v6 diagnostic
|
| 350 |
"Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
|
| 351 |
-
"Use docs/data/omni_model_comparison.json to compare both views: the
|
| 352 |
"The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
|
| 353 |
-
"The Cosmos3-Nano future-window
|
| 354 |
"The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
|
| 355 |
"Audio is one of the synchronized source modalities in the current task representation.",
|
| 356 |
"The audio ablation report compares audio/no-audio variants across the original task contracts in results/audio_ablation/.",
|
| 357 |
-
"Foundation-model selection is explicit: Qwen3-Omni is the structured JSON baseline, Cosmos 3 is the world-model
|
| 358 |
-
"Future model
|
| 359 |
"The Xperience Embodied Foundation Model is a future native-pretraining goal, not a completed model or current benchmark."
|
| 360 |
]
|
| 361 |
}
|
|
|
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Project Status",
|
| 3 |
"version": "2026-06-20",
|
| 4 |
"decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v6_cosmos_comparison",
|
| 5 |
+
"research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3 packages as early cross-episode diagnostics, and now records a no-new-episode enhancement pack for pushing the current 128-episode suite harder.",
|
| 6 |
"scope_boundary": {
|
| 7 |
"validated_episode_count": 1,
|
| 8 |
"aligned_frames": 5821,
|
|
|
|
| 145 |
"RESEARCH_ROADMAP.md",
|
| 146 |
"docs/data/research_roadmap.json"
|
| 147 |
],
|
| 148 |
+
"readout": "The roadmap connects public-sample task development to the final verified Qwen3-Omni diagnostic result, same-split baseline alignment, the no-new-episode 128-suite enhancement pack, action/subtask error analysis, robustness runs, world/policy tracks, and the future Xperience-native pretraining goal."
|
| 149 |
},
|
| 150 |
{
|
| 151 |
"area": "128-episode task-suite enhancement pack",
|
|
|
|
| 156 |
"results/omni_finetune/task_suite_enhancement_128_v1_20260608/enhancement_plan.json",
|
| 157 |
"scripts/omni/build_task_suite_enhancement_128.py"
|
| 158 |
],
|
| 159 |
+
"readout": "The current 3,808-window selected split can be stressed without more episodes by exporting denser and multiscale windows. The recommended next export is multiscale_20s10_40s20_80s40, estimated at 106,095 windows from observed frame spans; the pack also defines hierarchical action/subtask targets, raw-feature shard priorities for unsupported tasks, and Qwen3-Omni/Cosmos3 follow-up run cards."
|
| 160 |
},
|
| 161 |
{
|
| 162 |
"area": "Foundation-model plan",
|
|
|
|
| 176 |
"scripts/omni/backbone_registry.py",
|
| 177 |
"scripts/omni/smoke_test_backbone_packaging.py"
|
| 178 |
],
|
| 179 |
+
"readout": "Future Qwen3-Omni, Cosmos3-style, and VLA/policy tracks must keep the same episode split discipline, held-out metrics, validation gate, public-safe package contract, and explicit forbidden-artifact policy before reporting results."
|
| 180 |
},
|
| 181 |
{
|
| 182 |
"area": "Xperience Embodied Foundation Model",
|
|
|
|
| 253 |
"results/omni_finetune/OMNI_MODEL_COMPARISON.md",
|
| 254 |
"scripts/omni/build_omni_model_comparison.py"
|
| 255 |
],
|
| 256 |
+
"readout": "The public comparison now has two evidence lines plus a model-family grouping. The model grouping pairs 1-episode and 128-episode entries for task-head baselines, separates Qwen3-Omni sensor-adapter smoke from 128-episode LoRA diagnostics, separates Cosmos3-Nano future-window compatibility from Cosmos3-Super base-weight Reasoner evaluation, and adds Cosmos3-Super Forward-Dynamics LoRA as a loss-based fine-tuned adapter artifact."
|
| 257 |
},
|
| 258 |
{
|
| 259 |
"area": "Qwen3-Omni fine-tuning",
|
|
|
|
| 271 |
"readout": "The selected 96/16/16 episode split now has a current v6 rank64/lr5e-5 public-safe held-out package with 34,269 exported windows, 4,032 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.90%, meeting the 98% target; transition accuracy is 98.98%, contact accuracy is 81.77%, object micro-F1 is 30.65%, next-action accuracy is 4.31%, and action/subtask metrics remain weak. v6 improves action macro-F1 and contact accuracy versus v5, but v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics."
|
| 272 |
},
|
| 273 |
{
|
| 274 |
+
"area": "Cosmos3-Nano future-window package",
|
| 275 |
"status": "verified_compatibility_result",
|
| 276 |
"evidence": [
|
| 277 |
"configs/omni_backbones/cosmos_world_model.json",
|
|
|
|
| 279 |
"scripts/omni/eval_cosmos3_future_window_retrieval.py",
|
| 280 |
"results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json"
|
| 281 |
],
|
| 282 |
+
"readout": "The Cosmos3-Nano package now has a public-safe verified future-window compatibility result with 3,213 future-window samples, 378 held-out test predictions, future retrieval MRR 0.0221, temporal consistency 0.0952, transition accuracy 0.9683, and contact accuracy 0.7434. It is a compatibility adapter result, not a full Cosmos diffusion-weight fine-tune."
|
| 283 |
},
|
| 284 |
{
|
| 285 |
+
"area": "Cosmos3-Super Reasoner package",
|
| 286 |
"status": "verified_base_weight_result",
|
| 287 |
"evidence": [
|
| 288 |
"configs/omni_backbones/cosmos3_super_reasoner.json",
|
|
|
|
| 314 |
"results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/verified_result_summary.json",
|
| 315 |
"results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/package_audit.json"
|
| 316 |
],
|
| 317 |
+
"readout": "The first fine-tuned Cosmos3-Super adapter artifact is verified as a public-safe package: 8-GPU FSDP LoRA, 26.2M adapter parameters, 2,848 train rows, 512 validation rows, 448 held-out test rows, validation MSE 4.0082, and test MSE 3.6853. The package excludes adapter safetensors; weights are published separately at cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep."
|
| 318 |
},
|
| 319 |
{
|
| 320 |
"area": "Raw Xperience-10M redistribution",
|
|
|
|
| 331 |
"Open docs/data/project_packet.json for the machine-readable project path.",
|
| 332 |
"Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
|
| 333 |
"Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
|
| 334 |
+
"Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone track.",
|
| 335 |
+
"Inspect OMNI_MODEL_EXTENSION_CONTRACT.md and run python scripts/omni/backbone_registry.py --validate --json before adding a new Qwen3-Omni, Cosmos3-style, or VLA/policy track.",
|
| 336 |
"Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
|
| 337 |
"Inspect TASK_SUITE_20.md, docs/data/task_suite_20.json, docs/data/summary_metrics.json, and results/episode_task_suite/neural_mlp/ to check the unified 20-task outputs.",
|
| 338 |
"Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
|
|
|
|
| 346 |
"Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
|
| 347 |
],
|
| 348 |
"current_reading_notes": [
|
| 349 |
+
"The latest Qwen3-Omni v6 diagnostic run is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak: JSON validity is 99.90%, action macro-F1 is 0.0029, and subtask accuracy is 0.0037. v5 remains the pinned prior release row because it is still stronger on several metrics.",
|
| 350 |
"Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
|
| 351 |
+
"Use docs/data/omni_model_comparison.json to compare both views: the 1-sample evidence line, the selected-128 evidence line, and the model-family grouping for task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.",
|
| 352 |
"The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
|
| 353 |
+
"The Cosmos3-Nano future-window package is verified as a compatibility adapter result, Cosmos3-Super Reasoner is verified as a base-weight evaluation, and Cosmos3-Super Forward-Dynamics LoRA is verified as the first fine-tuned Super adapter artifact. Cosmos3-Super adapter weights belong in cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep; verified_public packages exclude safetensors.",
|
| 354 |
"The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
|
| 355 |
"Audio is one of the synchronized source modalities in the current task representation.",
|
| 356 |
"The audio ablation report compares audio/no-audio variants across the original task contracts in results/audio_ablation/.",
|
| 357 |
+
"Foundation-model selection is explicit: Qwen3-Omni is the structured JSON baseline, Cosmos 3 is the world-model track with Nano compatibility and Super forward-dynamics LoRA results, and policy models such as OpenVLA/openpi/GR00T wait for robot-compatible action-target conversion.",
|
| 358 |
+
"Future model tracks should be added through the backbone registry and verified package contract, not as one-off result folders with incompatible metrics or publication rules.",
|
| 359 |
"The Xperience Embodied Foundation Model is a future native-pretraining goal, not a completed model or current benchmark."
|
| 360 |
]
|
| 361 |
}
|
metrics/public_reader_map.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Reader Map",
|
| 3 |
"status": "published",
|
| 4 |
-
"purpose": "Organize the GitHub repo, GitHub Pages dashboard, Hugging Face Space, artifact dataset, baseline model repo, consolidated weights/results repo, and
|
| 5 |
"fast_paths": [
|
| 6 |
{
|
| 7 |
"reader_goal": "Understand the project in one pass",
|
|
@@ -92,13 +92,13 @@
|
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"surface": "HF weights/results repo",
|
| 95 |
-
"responsibility": "Consolidated baseline weights, Qwen3
|
| 96 |
"best_use": "Auditing all public-safe weight-bearing artifacts from one repo."
|
| 97 |
},
|
| 98 |
{
|
| 99 |
-
"surface": "Qwen3
|
| 100 |
-
"responsibility": "Adapter-specific public weights or package cards when a
|
| 101 |
-
"best_use": "Inspecting
|
| 102 |
}
|
| 103 |
],
|
| 104 |
"evidence_layers": [
|
|
@@ -121,8 +121,8 @@
|
|
| 121 |
"boundary": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
|
| 122 |
},
|
| 123 |
{
|
| 124 |
-
"claim_type": "Foundation-model
|
| 125 |
-
"public_evidence": ["Verified Qwen3
|
| 126 |
"boundary": "Numeric task scores appear only when a task-specific eval or probe exists."
|
| 127 |
},
|
| 128 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Reader Map",
|
| 3 |
"status": "published",
|
| 4 |
+
"purpose": "Organize the GitHub repo, GitHub Pages dashboard, Hugging Face Space, artifact dataset, baseline model repo, consolidated weights/results repo, and Qwen3-Omni/Cosmos3 repos without removing evidence.",
|
| 5 |
"fast_paths": [
|
| 6 |
{
|
| 7 |
"reader_goal": "Understand the project in one pass",
|
|
|
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"surface": "HF weights/results repo",
|
| 95 |
+
"responsibility": "Consolidated baseline weights, Qwen3-Omni v6 LoRA, Cosmos3-Super adapter/result artifacts, verified results, analysis files, and file-level manifest.",
|
| 96 |
"best_use": "Auditing all public-safe weight-bearing artifacts from one repo."
|
| 97 |
},
|
| 98 |
{
|
| 99 |
+
"surface": "Qwen3-Omni and Cosmos3 model repos",
|
| 100 |
+
"responsibility": "Adapter-specific public weights or package cards when a Qwen3-Omni or Cosmos3 run is verified and publishable.",
|
| 101 |
+
"best_use": "Inspecting Qwen3-Omni and Cosmos3 artifacts."
|
| 102 |
}
|
| 103 |
],
|
| 104 |
"evidence_layers": [
|
|
|
|
| 121 |
"boundary": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
|
| 122 |
},
|
| 123 |
{
|
| 124 |
+
"claim_type": "Foundation-model track quality",
|
| 125 |
+
"public_evidence": ["Verified Qwen3-Omni and Cosmos3 result packages", "model cards"],
|
| 126 |
"boundary": "Numeric task scores appear only when a task-specific eval or probe exists."
|
| 127 |
},
|
| 128 |
{
|
metrics/public_surface_qa.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
@@ -18,7 +18,7 @@
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
-
"generated_at_utc": "2026-06-
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
@@ -28,27 +28,27 @@
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
-
"generated_at_utc": "2026-06-
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
| 35 |
"status": "pass",
|
| 36 |
-
"generated_at_utc": "2026-06-
|
| 37 |
},
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
-
"generated_at_utc": "2026-06-
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
| 45 |
"status": "pass",
|
| 46 |
-
"generated_at_utc": "2026-06-
|
| 47 |
},
|
| 48 |
"mirror_parity": {
|
| 49 |
"exists": true,
|
| 50 |
"status": "pass",
|
| 51 |
-
"generated_at_utc": "2026-06-
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"failures": {}
|
|
@@ -97,8 +97,8 @@
|
|
| 97 |
"marker_counts": {
|
| 98 |
"Ropedia Xperience-10M Task Suite": 20,
|
| 99 |
"Xperience-10M": 166,
|
| 100 |
-
"20-task":
|
| 101 |
-
"Qwen3-Omni":
|
| 102 |
"128-episode pilot": 1
|
| 103 |
}
|
| 104 |
},
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-21T11:08:07+00:00",
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
+
"generated_at_utc": "2026-06-21T11:07:26+00:00"
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
+
"generated_at_utc": "2026-06-21T11:04:16+00:00"
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
| 35 |
"status": "pass",
|
| 36 |
+
"generated_at_utc": "2026-06-21T11:04:16+00:00"
|
| 37 |
},
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
+
"generated_at_utc": "2026-06-21T11:03:20+00:00"
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
| 45 |
"status": "pass",
|
| 46 |
+
"generated_at_utc": "2026-06-21T11:07:41+00:00"
|
| 47 |
},
|
| 48 |
"mirror_parity": {
|
| 49 |
"exists": true,
|
| 50 |
"status": "pass",
|
| 51 |
+
"generated_at_utc": "2026-06-21T11:05:04+00:00"
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"failures": {}
|
|
|
|
| 97 |
"marker_counts": {
|
| 98 |
"Ropedia Xperience-10M Task Suite": 20,
|
| 99 |
"Xperience-10M": 166,
|
| 100 |
+
"20-task": 89,
|
| 101 |
+
"Qwen3-Omni": 241,
|
| 102 |
"128-episode pilot": 1
|
| 103 |
}
|
| 104 |
},
|
metrics/publication_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
@@ -244,8 +244,8 @@
|
|
| 244 |
"hf_space_bundle": {
|
| 245 |
"root": "hf_publish/space",
|
| 246 |
"exists": true,
|
| 247 |
-
"file_count":
|
| 248 |
-
"text_file_count":
|
| 249 |
"largest_file": {
|
| 250 |
"path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
|
| 251 |
"bytes": 10221085
|
|
@@ -255,8 +255,8 @@
|
|
| 255 |
"hf_artifact_bundle": {
|
| 256 |
"root": "hf_publish/artifacts",
|
| 257 |
"exists": true,
|
| 258 |
-
"file_count":
|
| 259 |
-
"text_file_count":
|
| 260 |
"largest_file": {
|
| 261 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 262 |
"bytes": 135591061
|
|
@@ -266,8 +266,8 @@
|
|
| 266 |
"hf_model_bundle": {
|
| 267 |
"root": "hf_publish/model",
|
| 268 |
"exists": true,
|
| 269 |
-
"file_count":
|
| 270 |
-
"text_file_count":
|
| 271 |
"largest_file": {
|
| 272 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 273 |
"bytes": 135591061
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-21T11:07:41+00:00",
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
|
|
| 244 |
"hf_space_bundle": {
|
| 245 |
"root": "hf_publish/space",
|
| 246 |
"exists": true,
|
| 247 |
+
"file_count": 572,
|
| 248 |
+
"text_file_count": 425,
|
| 249 |
"largest_file": {
|
| 250 |
"path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
|
| 251 |
"bytes": 10221085
|
|
|
|
| 255 |
"hf_artifact_bundle": {
|
| 256 |
"root": "hf_publish/artifacts",
|
| 257 |
"exists": true,
|
| 258 |
+
"file_count": 3049,
|
| 259 |
+
"text_file_count": 1283,
|
| 260 |
"largest_file": {
|
| 261 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 262 |
"bytes": 135591061
|
|
|
|
| 266 |
"hf_model_bundle": {
|
| 267 |
"root": "hf_publish/model",
|
| 268 |
"exists": true,
|
| 269 |
+
"file_count": 3533,
|
| 270 |
+
"text_file_count": 1455,
|
| 271 |
"largest_file": {
|
| 272 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 273 |
"bytes": 135591061
|