cy0307 commited on
Commit
8b4c4fe
·
verified ·
1 Parent(s): c433b73

Add files using upload-large-folder tool

Browse files
docs/data/artifact_index.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
- "generated_at_utc": "2026-06-12T18:14:49+00:00",
4
  "status": "pass",
5
- "artifact_count": 159,
6
  "missing": [],
7
  "by_kind": {
8
  "project_path": 14,
9
  "scaleup_contract": 7,
10
- "scaleup_status": 36,
11
  "publication_workflow": 5,
12
  "project_scope": 1,
13
  "source_alignment": 5,
@@ -32,7 +32,7 @@
32
  "citation": 1,
33
  "license": 1,
34
  "verified_public_package": 10,
35
- "publication_audit": 6
36
  },
37
  "artifacts": [
38
  {
@@ -65,8 +65,8 @@
65
  "surface": "repo_hf",
66
  "shows": "Gives a compact current-state table for first-pass readers.",
67
  "exists": true,
68
- "bytes": 13535,
69
- "sha256": "595f90e26d75baba46a0827eb2ec38d0c6badb137437f642dff4e88e22b7ca80"
70
  },
71
  {
72
  "id": "project_status_json",
@@ -76,8 +76,8 @@
76
  "surface": "website_hf",
77
  "shows": "Machine-readable copy of the current project status for website and HF mirrors.",
78
  "exists": true,
79
- "bytes": 23198,
80
- "sha256": "07678354fde90a7e15134f5deff036eb200f7bf422ab2350ff64825b902843d5"
81
  },
82
  {
83
  "id": "research_roadmap",
@@ -87,8 +87,8 @@
87
  "surface": "repo_hf",
88
  "shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
89
  "exists": true,
90
- "bytes": 14354,
91
- "sha256": "7d385916e6d07cfa5f95e80b501fe58237f671711691cc8c792479d233991ef5"
92
  },
93
  {
94
  "id": "research_roadmap_json",
@@ -98,8 +98,8 @@
98
  "surface": "website_hf",
99
  "shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
100
  "exists": true,
101
- "bytes": 13313,
102
- "sha256": "719a8b5f45810d6b09788391d8549eb5226e480454c9f2a4ed7380ceacfd834b"
103
  },
104
  {
105
  "id": "foundation_model_plan",
@@ -264,7 +264,7 @@
264
  "shows": "Summarizes the 2026-06-09 full-parameter FSDP feasibility gates: 1/8/32/64-step guarded runs passed, the 128-step opportunistic pilot was preempted for Qwen v5 handoff, and no full checkpoints or weights are published.",
265
  "exists": true,
266
  "bytes": 3253,
267
- "sha256": "5c0e49adad861e00bede7f9245808e6e68346e7bc51c0b0142d2b99f944f2408"
268
  },
269
  {
270
  "id": "qwen3_full_parameter_gates_json",
@@ -275,7 +275,29 @@
275
  "shows": "Machine-readable summary of full-parameter feasibility evidence and publication policy for website and Hugging Face mirrors.",
276
  "exists": true,
277
  "bytes": 12183,
278
- "sha256": "50b8b93f0bae34c7fa269ae52d7c2073d6122fdafc9297f380c052d94253bf3f"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  },
280
  {
281
  "id": "qwen3_full_parameter_gates_builder",
@@ -362,8 +384,8 @@
362
  "surface": "website_hf",
363
  "shows": "Gives a short project path with scope status and public surfaces.",
364
  "exists": true,
365
- "bytes": 10496,
366
- "sha256": "fc21325035c3b8d4892bfad50dd1a7f5f562a662b7da7be73f7349cb515544ef"
367
  },
368
  {
369
  "id": "artifact_guide",
@@ -418,7 +440,7 @@
418
  "shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
419
  "exists": true,
420
  "bytes": 4432,
421
- "sha256": "d5ada9ec76cace484a779672d636222f1dfa281135508ff1b5d5eac98908bf60"
422
  },
423
  {
424
  "id": "source_alignment_validator",
@@ -671,7 +693,7 @@
671
  "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
672
  "exists": true,
673
  "bytes": 8097,
674
- "sha256": "9b4783b42d0a6f58f2bc36bb7990c86dc85af9d346cb59c90dca402e2c4cf0de"
675
  },
676
  {
677
  "id": "public_surface_qa",
@@ -785,8 +807,8 @@
785
  "surface": "repo",
786
  "shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
787
  "exists": true,
788
- "bytes": 42224,
789
- "sha256": "069b05229b72b9fa0c0917afd18655f58e5258499b43889dd96a87f29f577890"
790
  },
791
  {
792
  "id": "reproducibility_contract",
@@ -818,8 +840,8 @@
818
  "surface": "repo_hf",
819
  "shows": "Generates the selective artifact catalog from local files.",
820
  "exists": true,
821
- "bytes": 42045,
822
- "sha256": "0954119cda59bb9c77cba362a206ac237eae73c4a1dc44cfa56599f170fc94de"
823
  },
824
  {
825
  "id": "publication_audit",
@@ -842,7 +864,7 @@
842
  "volatile": true,
843
  "shows": "Separates setup paths from completed held-out-episode results.",
844
  "exists": true,
845
- "bytes": 21325,
846
  "hash_policy": "existence_and_size_only"
847
  },
848
  {
@@ -866,7 +888,7 @@
866
  "volatile": true,
867
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
868
  "exists": true,
869
- "bytes": 15654,
870
  "hash_policy": "existence_and_size_only"
871
  },
872
  {
@@ -899,8 +921,8 @@
899
  "surface": "website_hf",
900
  "shows": "Mirrors task metrics for the static dashboard.",
901
  "exists": true,
902
- "bytes": 27604,
903
- "sha256": "ebaf9d598b4cd91118f149cafa01fe4d17629499565b7be0c7ce4a0ffcd70f6b"
904
  },
905
  {
906
  "id": "feature_manifest",
@@ -1119,8 +1141,8 @@
1119
  "surface": "repo_hf",
1120
  "shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
1121
  "exists": true,
1122
- "bytes": 15731,
1123
- "sha256": "1ee5621a825219c1125a9c3f3ac779ac8bfa2fc45668531dbee3a728be13bfb5"
1124
  },
1125
  {
1126
  "id": "omni_model_comparison_json",
@@ -1130,8 +1152,8 @@
1130
  "surface": "repo_hf",
1131
  "shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
1132
  "exists": true,
1133
- "bytes": 81593,
1134
- "sha256": "c570f0810dec46f9a14969089245051c2b3a255f074bd9d06f529615fa6fbd73"
1135
  },
1136
  {
1137
  "id": "cosmos3_nano_verified_summary",
@@ -1548,8 +1570,8 @@
1548
  "surface": "repo_hf",
1549
  "shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full (qwen3_omni_lora, status=verified).",
1550
  "exists": true,
1551
- "file_count": 13,
1552
- "bytes": 12189099
1553
  },
1554
  {
1555
  "id": "verified_public_summary_xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
@@ -1595,6 +1617,17 @@
1595
  "bytes": 49205,
1596
  "sha256": "fc198c3e443877bca42cc33bec6e2a194d6cb20e97c28e931a90736c45538bba"
1597
  },
 
 
 
 
 
 
 
 
 
 
 
1598
  {
1599
  "id": "verified_public_package_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
1600
  "title": "Verified public package: Qwen3-Omni LoRA",
 
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
+ "generated_at_utc": "2026-06-13T17:46:38+00:00",
4
  "status": "pass",
5
+ "artifact_count": 162,
6
  "missing": [],
7
  "by_kind": {
8
  "project_path": 14,
9
  "scaleup_contract": 7,
10
+ "scaleup_status": 38,
11
  "publication_workflow": 5,
12
  "project_scope": 1,
13
  "source_alignment": 5,
 
32
  "citation": 1,
33
  "license": 1,
34
  "verified_public_package": 10,
35
+ "publication_audit": 7
36
  },
37
  "artifacts": [
38
  {
 
65
  "surface": "repo_hf",
66
  "shows": "Gives a compact current-state table for first-pass readers.",
67
  "exists": true,
68
+ "bytes": 13755,
69
+ "sha256": "342897ae05ceab83d626765c0052c140e414ba25ebda4fce9fb07bb37a2decef"
70
  },
71
  {
72
  "id": "project_status_json",
 
76
  "surface": "website_hf",
77
  "shows": "Machine-readable copy of the current project status for website and HF mirrors.",
78
  "exists": true,
79
+ "bytes": 23535,
80
+ "sha256": "9ffae32ff0b3750f89179d2ce92205f95a5b53069d0aa344d6342c23b1efebbd"
81
  },
82
  {
83
  "id": "research_roadmap",
 
87
  "surface": "repo_hf",
88
  "shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
89
  "exists": true,
90
+ "bytes": 14503,
91
+ "sha256": "bb06fb0ccf336cafb0305883b7f93c2e1af547c9ff04b1fa6fc87481d54bcf61"
92
  },
93
  {
94
  "id": "research_roadmap_json",
 
98
  "surface": "website_hf",
99
  "shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
100
  "exists": true,
101
+ "bytes": 13450,
102
+ "sha256": "668220626950f8e55410b8f829dba6306d0b2feedafdf0198800ad9814992d84"
103
  },
104
  {
105
  "id": "foundation_model_plan",
 
264
  "shows": "Summarizes the 2026-06-09 full-parameter FSDP feasibility gates: 1/8/32/64-step guarded runs passed, the 128-step opportunistic pilot was preempted for Qwen v5 handoff, and no full checkpoints or weights are published.",
265
  "exists": true,
266
  "bytes": 3253,
267
+ "sha256": "b25f1d8cde814207b4c3234bf07140cf99a0ede29af3f53dbc146aab464e8a9b"
268
  },
269
  {
270
  "id": "qwen3_full_parameter_gates_json",
 
275
  "shows": "Machine-readable summary of full-parameter feasibility evidence and publication policy for website and Hugging Face mirrors.",
276
  "exists": true,
277
  "bytes": 12183,
278
+ "sha256": "d051608d3428645778f721e538af93566ab772871a825ac12825dd5f18e94a95"
279
+ },
280
+ {
281
+ "id": "qwen3_v5_v6_comparison",
282
+ "title": "Qwen3-Omni v5/v6 comparison",
283
+ "path": "results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
284
+ "kind": "scaleup_status",
285
+ "surface": "repo_hf",
286
+ "shows": "Reader-facing comparison of the verified Qwen3 v5 release row and the latest verified v6 row, including metric deltas and release-tag policy.",
287
+ "exists": true,
288
+ "bytes": 1793,
289
+ "sha256": "890430b05ace20375fc9534f923d170c0509037272ba4ef523e3ca2f3c9ac746"
290
+ },
291
+ {
292
+ "id": "qwen3_v5_v6_comparison_json",
293
+ "title": "Qwen3-Omni v5/v6 comparison JSON",
294
+ "path": "docs/data/qwen3_v5_v6_comparison.json",
295
+ "kind": "scaleup_status",
296
+ "surface": "website_hf",
297
+ "shows": "Machine-readable v5/v6 metric deltas and publication recommendation for website and Hugging Face mirrors.",
298
+ "exists": true,
299
+ "bytes": 2814,
300
+ "sha256": "f5d16e279a82cdc6266a1318584bf38cbc0b105296d437f9b8bf0398403aace5"
301
  },
302
  {
303
  "id": "qwen3_full_parameter_gates_builder",
 
384
  "surface": "website_hf",
385
  "shows": "Gives a short project path with scope status and public surfaces.",
386
  "exists": true,
387
+ "bytes": 10597,
388
+ "sha256": "a64b7c033c54879e0183e7ec794d3197fb483024c25947759287fcd4b7e0fec1"
389
  },
390
  {
391
  "id": "artifact_guide",
 
440
  "shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
441
  "exists": true,
442
  "bytes": 4432,
443
+ "sha256": "0d0d381f726c1e3787fb3fb15b6fb8879512c26fa0dc06fb943e1a239b0063dd"
444
  },
445
  {
446
  "id": "source_alignment_validator",
 
693
  "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
694
  "exists": true,
695
  "bytes": 8097,
696
+ "sha256": "1cdc8b4767b3ca88eada654a3117aa2de253fea7af62573b080088e8f1b311bd"
697
  },
698
  {
699
  "id": "public_surface_qa",
 
807
  "surface": "repo",
808
  "shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
809
  "exists": true,
810
+ "bytes": 42394,
811
+ "sha256": "43a70436108eea3fa6692096ebf318fb755ff040d925094ef9de018f212fde18"
812
  },
813
  {
814
  "id": "reproducibility_contract",
 
840
  "surface": "repo_hf",
841
  "shows": "Generates the selective artifact catalog from local files.",
842
  "exists": true,
843
+ "bytes": 42809,
844
+ "sha256": "c03d1b1367ad191fea0be3c634fddf8ee6fdc2118bf17396920c16cc288c4ef0"
845
  },
846
  {
847
  "id": "publication_audit",
 
864
  "volatile": true,
865
  "shows": "Separates setup paths from completed held-out-episode results.",
866
  "exists": true,
867
+ "bytes": 21795,
868
  "hash_policy": "existence_and_size_only"
869
  },
870
  {
 
888
  "volatile": true,
889
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
890
  "exists": true,
891
+ "bytes": 15774,
892
  "hash_policy": "existence_and_size_only"
893
  },
894
  {
 
921
  "surface": "website_hf",
922
  "shows": "Mirrors task metrics for the static dashboard.",
923
  "exists": true,
924
+ "bytes": 27807,
925
+ "sha256": "3a6a5ee59562ae189844cb4ba26d6e261c2f73a8e54bb6e2fbc3e307c2d123fa"
926
  },
927
  {
928
  "id": "feature_manifest",
 
1141
  "surface": "repo_hf",
1142
  "shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
1143
  "exists": true,
1144
+ "bytes": 15999,
1145
+ "sha256": "d5a7118a878b202adbc50e3436bbe134e5de139f2a9e97176efe9ecc0f446088"
1146
  },
1147
  {
1148
  "id": "omni_model_comparison_json",
 
1152
  "surface": "repo_hf",
1153
  "shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
1154
  "exists": true,
1155
+ "bytes": 81866,
1156
+ "sha256": "191125098a66ecccfa27395c0f9776616f74b4bf8fb19f16b75cda7ed06cb4b2"
1157
  },
1158
  {
1159
  "id": "cosmos3_nano_verified_summary",
 
1570
  "surface": "repo_hf",
1571
  "shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full (qwen3_omni_lora, status=verified).",
1572
  "exists": true,
1573
+ "file_count": 14,
1574
+ "bytes": 12189730
1575
  },
1576
  {
1577
  "id": "verified_public_summary_xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
 
1617
  "bytes": 49205,
1618
  "sha256": "fc198c3e443877bca42cc33bec6e2a194d6cb20e97c28e931a90736c45538bba"
1619
  },
1620
+ {
1621
+ "id": "verified_public_package_audit_xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
1622
+ "title": "Verified package audit: Qwen3-Omni LoRA",
1623
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/package_audit.json",
1624
+ "kind": "publication_audit",
1625
+ "surface": "repo_hf",
1626
+ "shows": "Package audit for xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full.",
1627
+ "exists": true,
1628
+ "bytes": 631,
1629
+ "sha256": "7cf478ae33c52bae0ba742e81da8e482e06d0853eecd85f895f447a708f81718"
1630
+ },
1631
  {
1632
  "id": "verified_public_package_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
1633
  "title": "Verified public package: Qwen3-Omni LoRA",
docs/data/omni_model_comparison.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
3
- "generated_at_utc": "2026-06-12T18:14:48+00:00",
4
  "status": "pass",
5
  "version_count": 3,
6
  "model_group_count": 5,
@@ -588,8 +588,8 @@
588
  "global_step": 3204
589
  }
590
  ],
591
- "is_current": true,
592
- "weights_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
593
  },
594
  {
595
  "id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
@@ -640,8 +640,8 @@
640
  "global_step": 6408
641
  }
642
  ],
643
- "is_current": false,
644
- "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo"
645
  },
646
  {
647
  "id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
@@ -1202,8 +1202,8 @@
1202
  "global_step": 3204
1203
  }
1204
  ],
1205
- "is_current": true,
1206
- "weights_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
1207
  },
1208
  {
1209
  "id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
@@ -1254,8 +1254,8 @@
1254
  "global_step": 6408
1255
  }
1256
  ],
1257
- "is_current": false,
1258
- "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo"
1259
  },
1260
  {
1261
  "id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
@@ -1753,6 +1753,7 @@
1753
  "Cosmos3-Super now has both a 128-episode base-weight Reasoner evaluation on the JSON task and a fine-tuned forward-dynamics LoRA branch over camera-pose proxy targets."
1754
  ],
1755
  "pending": [
1756
- "Use the verified Qwen3 v5 dense multiscale full-eval package as the current Qwen row; older Qwen package rows remain historical diagnostics for comparison."
 
1757
  ]
1758
  }
 
1
  {
2
  "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
3
+ "generated_at_utc": "2026-06-13T17:41:35+00:00",
4
  "status": "pass",
5
  "version_count": 3,
6
  "model_group_count": 5,
 
588
  "global_step": 3204
589
  }
590
  ],
591
+ "is_current": false,
592
+ "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo"
593
  },
594
  {
595
  "id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
 
640
  "global_step": 6408
641
  }
642
  ],
643
+ "is_current": true,
644
+ "weights_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
645
  },
646
  {
647
  "id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
 
1202
  "global_step": 3204
1203
  }
1204
  ],
1205
+ "is_current": false,
1206
+ "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo"
1207
  },
1208
  {
1209
  "id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
 
1254
  "global_step": 6408
1255
  }
1256
  ],
1257
+ "is_current": true,
1258
+ "weights_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
1259
  },
1260
  {
1261
  "id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
 
1753
  "Cosmos3-Super now has both a 128-episode base-weight Reasoner evaluation on the JSON task and a fine-tuned forward-dynamics LoRA branch over camera-pose proxy targets."
1754
  ],
1755
  "pending": [
1756
+ "Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
1757
+ "Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before claiming v6 is globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly."
1758
  ]
1759
  }
docs/data/project_packet.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Project Packet",
3
- "version": "2026-06-08",
4
  "scope_status": {
5
  "validated_data": "one public Xperience-10M sample episode",
6
  "aligned_frames": 5821,
@@ -12,7 +12,7 @@
12
  "raw_xperience10m_data_in_repo": false,
13
  "audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
14
  "qwen3_omni_32_episode_claim": false,
15
- "qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni v4 final diagnostic result is verified, meets the strict-JSON target, and still has weak action/subtask metrics that guide the next error-analysis pass.",
16
  "cosmos3_super_forward_dynamics_lora_status": "The first Cosmos3-Super fine-tuned adapter branch is verified as a forward-dynamics LoRA over camera-pose proxy targets; it reports loss metrics, not JSON action-label accuracy.",
17
  "task_suite_enhancement_128_status": "Current no-new-episode enhancement pack recommends multiscale_20s10_40s20_80s40, hierarchical action/subtask targets, label-normalized scoring, and raw-feature shards before adding more episodes."
18
  },
@@ -118,7 +118,7 @@
118
  "scripts/omni/discover_xperience10m_sources.py",
119
  "docs/data/omni_finetune_verified_result.json"
120
  ],
121
- "readout": "The selected-episode held-out Qwen3-Omni final diagnostic result is verified and JSON-format reliability meets the 98% target. The same public comparison also includes the verified 128-episode baselines, Cosmos3-Nano compatibility result, Cosmos3-Super Reasoner evaluation, and Cosmos3-Super Forward-Dynamics LoRA package. The next milestone is action/subtask error analysis and stronger model-quality runs on the same split."
122
  },
123
  {
124
  "step": 9,
@@ -155,7 +155,7 @@
155
  "hf_model_baselines": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines"
156
  },
157
  "current_reading_notes": [
158
- "The first cross-episode Qwen3-Omni v4 diagnostic pilot is verified, but strong model quality is not yet shown; action/subtask metrics remain weak.",
159
  "The current 128-episode suite has a no-new-episode enhancement plan: multiscale_20s10_40s20_80s40 windows, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
160
  "Cosmos3-Super Forward-Dynamics LoRA is verified as a loss-based world-model adapter branch, not as JSON action-token prediction.",
161
  "Older Qwen3-Omni setup artifacts are separate from the verified selected-episode diagnostic package.",
 
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Project Packet",
3
+ "version": "2026-06-14",
4
  "scope_status": {
5
  "validated_data": "one public Xperience-10M sample episode",
6
  "aligned_frames": 5821,
 
12
  "raw_xperience10m_data_in_repo": false,
13
  "audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
14
  "qwen3_omni_32_episode_claim": false,
15
+ "qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni v6 diagnostic branch is verified, meets the strict-JSON target, improves action macro-F1/contact accuracy versus v5, and still has weak action/subtask metrics that guide the next error-analysis pass.",
16
  "cosmos3_super_forward_dynamics_lora_status": "The first Cosmos3-Super fine-tuned adapter branch is verified as a forward-dynamics LoRA over camera-pose proxy targets; it reports loss metrics, not JSON action-label accuracy.",
17
  "task_suite_enhancement_128_status": "Current no-new-episode enhancement pack recommends multiscale_20s10_40s20_80s40, hierarchical action/subtask targets, label-normalized scoring, and raw-feature shards before adding more episodes."
18
  },
 
118
  "scripts/omni/discover_xperience10m_sources.py",
119
  "docs/data/omni_finetune_verified_result.json"
120
  ],
121
+ "readout": "The selected-episode held-out Qwen3-Omni v6 diagnostic branch is verified and JSON-format reliability meets the 98% target. The same public comparison also includes the verified 128-episode baselines, Cosmos3-Nano compatibility result, Cosmos3-Super Reasoner evaluation, and Cosmos3-Super Forward-Dynamics LoRA package. The next milestone is action/subtask error analysis and stronger model-quality runs on the same split."
122
  },
123
  {
124
  "step": 9,
 
155
  "hf_model_baselines": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines"
156
  },
157
  "current_reading_notes": [
158
+ "The latest cross-episode Qwen3-Omni v6 diagnostic branch is verified, but strong model quality is not yet shown; action/subtask metrics remain weak and v5 remains stronger on several non-contact metrics.",
159
  "The current 128-episode suite has a no-new-episode enhancement plan: multiscale_20s10_40s20_80s40 windows, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
160
  "Cosmos3-Super Forward-Dynamics LoRA is verified as a loss-based world-model adapter branch, not as JSON action-token prediction.",
161
  "Older Qwen3-Omni setup artifacts are separate from the verified selected-episode diagnostic package.",
docs/data/project_status.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Project Status",
3
- "version": "2026-06-08",
4
- "decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v4_cosmos_comparison",
5
  "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3 branch packages as early cross-episode diagnostics, and now records a no-new-episode enhancement pack for pushing the current 128-episode suite harder.",
6
  "scope_boundary": {
7
  "validated_episode_count": 1,
@@ -21,11 +21,11 @@
21
  "test": 16
22
  },
23
  "qwen3_omni_exported_window_counts": {
24
- "train": 2848,
25
- "val": 512,
26
- "test": 448
27
  },
28
- "qwen3_omni_json_validity_rate": 1.0,
29
  "qwen3_omni_validation_aware": true,
30
  "qwen3_omni_json_quality_target_met": true,
31
  "qwen3_omni_lora_adapter_repo": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
@@ -48,12 +48,12 @@
48
  "test": 448
49
  },
50
  "multi_episode_128_baseline_task_count": 12,
51
- "qwen3_omni_current_eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
52
- "qwen3_omni_current_train_epochs": 4,
53
- "qwen3_omni_action_macro_f1": 0.0018678269676001454,
54
- "qwen3_omni_subtask_accuracy": 0.0,
55
- "qwen3_omni_contact_accuracy": 0.7299107142857143,
56
- "qwen3_omni_object_micro_f1": 0.31099781500364165,
57
  "task_suite_enhancement_128_available": true,
58
  "task_suite_enhancement_128_current_windows": 3808,
59
  "task_suite_enhancement_128_recommended_export": "multiscale_20s10_40s20_80s40",
@@ -241,13 +241,15 @@
241
  "status": "final_verified_diagnostic_result_json_target_met",
242
  "evidence": [
243
  "docs/data/omni_finetune_verified_result.json",
244
- "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/",
 
 
245
  "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
246
  "scripts/omni/package_verified_omni_result.py",
247
  "scripts/omni/audit_verified_omni_package.py",
248
  "scripts/omni/analyze_qwen3_omni_errors.py"
249
  ],
250
- "readout": "The selected 96/16/16 episode split now has a v4 four-epoch public-safe held-out package with 3,808 exported windows, 512 validation windows, 448 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 100.00%, meeting the 98% target; transition accuracy is 97.32%, contact accuracy is 72.99%, object micro-F1 is 31.10%, next-action accuracy is 3.35%, and action/subtask metrics remain weak, so it is still a diagnostic baseline rather than a strong model-quality claim."
251
  },
252
  {
253
  "area": "Cosmos3-Nano future-window branch",
@@ -324,7 +326,7 @@
324
  "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
325
  ],
326
  "current_reading_notes": [
327
- "The final Qwen3-Omni v4 diagnostic result is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak: JSON validity is 100.00%, action macro-F1 is 0.0019, and subtask accuracy is 0.0000.",
328
  "Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
329
  "Use docs/data/omni_model_comparison.json to compare both views: the single-episode/128-baseline/model-branch result layers and the model-family grouping for task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.",
330
  "The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
 
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Project Status",
3
+ "version": "2026-06-14",
4
+ "decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v6_cosmos_comparison",
5
  "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3 branch packages as early cross-episode diagnostics, and now records a no-new-episode enhancement pack for pushing the current 128-episode suite harder.",
6
  "scope_boundary": {
7
  "validated_episode_count": 1,
 
21
  "test": 16
22
  },
23
  "qwen3_omni_exported_window_counts": {
24
+ "train": 25629,
25
+ "val": 4608,
26
+ "test": 4032
27
  },
28
+ "qwen3_omni_json_validity_rate": 0.9990079365079365,
29
  "qwen3_omni_validation_aware": true,
30
  "qwen3_omni_json_quality_target_met": true,
31
  "qwen3_omni_lora_adapter_repo": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
 
48
  "test": 448
49
  },
50
  "multi_episode_128_baseline_task_count": 12,
51
+ "qwen3_omni_current_eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
52
+ "qwen3_omni_current_train_epochs": 2,
53
+ "qwen3_omni_action_macro_f1": 0.0028830723979596335,
54
+ "qwen3_omni_subtask_accuracy": 0.0037313432835820895,
55
+ "qwen3_omni_contact_accuracy": 0.8177083333333334,
56
+ "qwen3_omni_object_micro_f1": 0.3064982378331287,
57
  "task_suite_enhancement_128_available": true,
58
  "task_suite_enhancement_128_current_windows": 3808,
59
  "task_suite_enhancement_128_recommended_export": "multiscale_20s10_40s20_80s40",
 
241
  "status": "final_verified_diagnostic_result_json_target_met",
242
  "evidence": [
243
  "docs/data/omni_finetune_verified_result.json",
244
+ "docs/data/qwen3_v5_v6_comparison.json",
245
+ "results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
246
+ "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/",
247
  "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
248
  "scripts/omni/package_verified_omni_result.py",
249
  "scripts/omni/audit_verified_omni_package.py",
250
  "scripts/omni/analyze_qwen3_omni_errors.py"
251
  ],
252
+ "readout": "The selected 96/16/16 episode split now has a current v6 rank64/lr5e-5 public-safe held-out package with 34,269 exported windows, 4,032 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.90%, meeting the 98% target; transition accuracy is 98.98%, contact accuracy is 81.77%, object micro-F1 is 30.65%, next-action accuracy is 4.31%, and action/subtask metrics remain weak. v6 improves action macro-F1 and contact accuracy versus v5, but v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics."
253
  },
254
  {
255
  "area": "Cosmos3-Nano future-window branch",
 
326
  "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
327
  ],
328
  "current_reading_notes": [
329
+ "The latest Qwen3-Omni v6 diagnostic branch is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak: JSON validity is 99.90%, action macro-F1 is 0.0029, and subtask accuracy is 0.0037. v5 remains the pinned prior release row because it is still stronger on several metrics.",
330
  "Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
331
  "Use docs/data/omni_model_comparison.json to compare both views: the single-episode/128-baseline/model-branch result layers and the model-family grouping for task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.",
332
  "The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
docs/data/publication_audit.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-12T18:14:57+00:00",
4
  "checks": [
5
  {
6
  "name": "required_publication_assets_present",
@@ -187,8 +187,8 @@
187
  "github_repo": {
188
  "root": "repo",
189
  "exists": true,
190
- "file_count": 906,
191
- "text_file_count": 740,
192
  "largest_file": {
193
  "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
194
  "bytes": 55702978
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-13T17:46:58+00:00",
4
  "checks": [
5
  {
6
  "name": "required_publication_assets_present",
 
187
  "github_repo": {
188
  "root": "repo",
189
  "exists": true,
190
+ "file_count": 914,
191
+ "text_file_count": 746,
192
  "largest_file": {
193
  "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
194
  "bytes": 55702978
docs/data/quality_gates.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Release Checks",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-12T18:14:49+00:00",
5
  "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
6
  "automated_gates": [
7
  {
 
1
  {
2
  "title": "Ropedia Xperience-10M Release Checks",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-13T17:46:37+00:00",
5
  "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
6
  "automated_gates": [
7
  {
docs/data/qwen3_full_parameter_gates.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "title": "Qwen3-Omni Full-Parameter Feasibility Gates",
3
- "generated_at_utc": "2026-06-12T18:14:48+00:00",
4
  "status": "pass",
5
  "decision": "full_parameter_feasible_for_guarded_short_runs_not_promoted",
6
  "interpretation": "The 2026-06-09 gates prove that Qwen3-Omni full-parameter FSDP can load, prepare, run backward/optimizer steps, and complete guarded pilots up to 128 optimizer steps on an 8-GPU remote worker. They do not prove a production full-parameter fine-tune, and they intentionally save no full checkpoints or public weights.",
 
1
  {
2
  "title": "Qwen3-Omni Full-Parameter Feasibility Gates",
3
+ "generated_at_utc": "2026-06-13T17:41:13+00:00",
4
  "status": "pass",
5
  "decision": "full_parameter_feasible_for_guarded_short_runs_not_promoted",
6
  "interpretation": "The 2026-06-09 gates prove that Qwen3-Omni full-parameter FSDP can load, prepare, run backward/optimizer steps, and complete guarded pilots up to 128 optimizer steps on an 8-GPU remote worker. They do not prove a production full-parameter fine-tune, and they intentionally save no full checkpoints or public weights.",
docs/data/research_roadmap.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Research Roadmap",
3
  "summary": "Staged path from the public-sample task lab to verified Qwen3-Omni, Cosmos3-Nano, and Cosmos3-Super diagnostics, same-split 128-episode baseline alignment, a no-new-episode 128-suite enhancement pack, action/subtask error analysis, world/policy branches, and a future Xperience-native embodied foundation model.",
4
- "current_decision_point": "Push the current selected 128 episodes harder before requesting more storage: keep the public-sample task suite as the development harness, use the final verified selected-episode Qwen3-Omni v4 diagnostic result and same-split 128-episode simple/NN metadata baselines as structured-task references, read Cosmos3-Nano and Cosmos3-Super Forward-Dynamics LoRA as separate world-model results, export multiscale_20s10_40s20_80s40 windows plus hierarchical action/subtask targets, and defer policy-model experiments until robot-compatible targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
5
  "additional_development_directions": {
6
  "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
7
  "source_json": "docs/data/additional_development_directions.json",
@@ -52,8 +52,8 @@
52
  },
53
  {
54
  "id": "qwen3_omni_lora_diagnostic_pilot",
55
- "name": "Qwen3-Omni LoRA Final Diagnostic Result",
56
- "status": "verified_baseline",
57
  "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
58
  "deliverables": [
59
  "dataset JSONL/media manifests",
@@ -64,10 +64,13 @@
64
  "metrics",
65
  "confusion matrices",
66
  "run report",
 
67
  "public LoRA adapter repo"
68
  ],
69
  "completion_evidence": [
70
  "docs/data/omni_finetune_verified_result.json",
 
 
71
  "results/omni_finetune/verified_public/",
72
  "dataset_manifest.json",
73
  "training_metadata.json",
 
1
  {
2
  "title": "Ropedia Xperience-10M Research Roadmap",
3
  "summary": "Staged path from the public-sample task lab to verified Qwen3-Omni, Cosmos3-Nano, and Cosmos3-Super diagnostics, same-split 128-episode baseline alignment, a no-new-episode 128-suite enhancement pack, action/subtask error analysis, world/policy branches, and a future Xperience-native embodied foundation model.",
4
+ "current_decision_point": "Push the current selected 128 episodes harder before requesting more storage: keep the public-sample task suite as the development harness, use the latest verified selected-episode Qwen3-Omni v6 diagnostic branch plus the pinned v5 row as structured-task references, read Cosmos3-Nano and Cosmos3-Super Forward-Dynamics LoRA as separate world-model results, continue with hierarchical action/subtask targets and label-normalized scoring, and defer policy-model experiments until robot-compatible targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
5
  "additional_development_directions": {
6
  "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
7
  "source_json": "docs/data/additional_development_directions.json",
 
52
  },
53
  {
54
  "id": "qwen3_omni_lora_diagnostic_pilot",
55
+ "name": "Qwen3-Omni LoRA Latest Diagnostic Branch",
56
+ "status": "verified_latest_branch",
57
  "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
58
  "deliverables": [
59
  "dataset JSONL/media manifests",
 
64
  "metrics",
65
  "confusion matrices",
66
  "run report",
67
+ "v5/v6 comparison",
68
  "public LoRA adapter repo"
69
  ],
70
  "completion_evidence": [
71
  "docs/data/omni_finetune_verified_result.json",
72
+ "docs/data/qwen3_v5_v6_comparison.json",
73
+ "results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
74
  "results/omni_finetune/verified_public/",
75
  "dataset_manifest.json",
76
  "training_metadata.json",
docs/data/research_roadmap_interactive.json CHANGED
@@ -2222,7 +2222,7 @@
2222
  ],
2223
  "status": "planning_artifact"
2224
  },
2225
- "generated_at_utc": "2026-06-08T12:22:13+00:00",
2226
  "omni_plan": {
2227
  "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
2228
  "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
@@ -2287,6 +2287,8 @@
2287
  {
2288
  "completion_evidence": [
2289
  "docs/data/omni_finetune_verified_result.json",
 
 
2290
  "results/omni_finetune/verified_public/",
2291
  "dataset_manifest.json",
2292
  "training_metadata.json",
@@ -2304,14 +2306,15 @@
2304
  "metrics",
2305
  "confusion matrices",
2306
  "run report",
 
2307
  "public LoRA adapter repo"
2308
  ],
2309
  "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
2310
  "id": "qwen3_omni_lora_diagnostic_pilot",
2311
- "name": "Qwen3-Omni LoRA Final Diagnostic Result",
2312
  "reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline.",
2313
  "stage": "future",
2314
- "status": "verified_baseline"
2315
  },
2316
  {
2317
  "completion_evidence": [
 
2222
  ],
2223
  "status": "planning_artifact"
2224
  },
2225
+ "generated_at_utc": "2026-06-13T17:41:13+00:00",
2226
  "omni_plan": {
2227
  "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
2228
  "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
 
2287
  {
2288
  "completion_evidence": [
2289
  "docs/data/omni_finetune_verified_result.json",
2290
+ "docs/data/qwen3_v5_v6_comparison.json",
2291
+ "results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
2292
  "results/omni_finetune/verified_public/",
2293
  "dataset_manifest.json",
2294
  "training_metadata.json",
 
2306
  "metrics",
2307
  "confusion matrices",
2308
  "run report",
2309
+ "v5/v6 comparison",
2310
  "public LoRA adapter repo"
2311
  ],
2312
  "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
2313
  "id": "qwen3_omni_lora_diagnostic_pilot",
2314
+ "name": "Qwen3-Omni LoRA Latest Diagnostic Branch",
2315
  "reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline.",
2316
  "stage": "future",
2317
+ "status": "verified_latest_branch"
2318
  },
2319
  {
2320
  "completion_evidence": [
docs/data/scope_claims_audit.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-12T18:14:51+00:00",
4
  "summary": {
5
  "qwen3_omni_verified_diagnostic_pilot": true,
6
  "dataset_manifest_num_episodes": 119,
7
- "dataset_manifest_num_samples": 3808,
8
- "training_metadata_num_train_samples": 2848,
9
- "eval_num_samples": 448,
10
- "eval_json_validity_rate": 1.0,
11
  "quality_target_met": true,
12
- "historical_identifier_count": 1799,
13
  "public_32_episode_status_file_count": 1,
14
  "failure_count": 0
15
  },
@@ -25,7 +25,7 @@
25
  {
26
  "name": "summary_metrics_preserves_verified_diagnostic_status",
27
  "status": "pass",
28
- "detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and meets the 98% target for JSON validity; action/subtask quality remains weak, so it is a structured-task baseline. Cosmos3-Nano future-window compatibility and Cosmos3-Super Forward-Dynamics LoRA are also verified as separate world-model diagnostics with different metrics.",
29
  "evidence": [
30
  "docs/data/summary_metrics.json"
31
  ]
@@ -33,25 +33,25 @@
33
  {
34
  "name": "verified_package_dataset_has_expected_windows",
35
  "status": "pass",
36
- "detail": "episodes=119, samples=3808, split_counts={'train': 2848, 'val': 512, 'test': 448}",
37
  "evidence": [
38
- "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/dataset/dataset_manifest.json"
39
  ]
40
  },
41
  {
42
  "name": "verified_package_training_records_8_processes",
43
  "status": "pass",
44
- "detail": "train=2848, val=512, processes=8",
45
  "evidence": [
46
- "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/training/training_metadata.json"
47
  ]
48
  },
49
  {
50
  "name": "verified_package_eval_records_real_held_out_metrics",
51
  "status": "pass",
52
- "detail": "samples=448, split=test, held_out=14, json_validity=1.0",
53
  "evidence": [
54
- "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/eval/metrics.json"
55
  ]
56
  },
57
  {
@@ -59,7 +59,7 @@
59
  "status": "pass",
60
  "detail": "audit_status=pass, issues=0",
61
  "evidence": [
62
- "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/package_audit.json"
63
  ]
64
  },
65
  {
@@ -84,7 +84,7 @@
84
  {
85
  "name": "historical_32ep_identifiers_are_confined_to_readiness_artifacts",
86
  "status": "pass",
87
- "detail": "historical identifiers found in result provenance files=1799",
88
  "evidence": [
89
  "results/omni_finetune/"
90
  ]
@@ -424,6 +424,6 @@
424
  "example": "{\"id\": \"xperience-10m-sample:qa:53\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1060, \"end_frame\": 1079, \"num_frames\": 20}, \"media\": {\"video_path"
425
  }
426
  ],
427
- "historical_identifier_total_count": 1799,
428
  "failures": []
429
  }
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-13T17:46:50+00:00",
4
  "summary": {
5
  "qwen3_omni_verified_diagnostic_pilot": true,
6
  "dataset_manifest_num_episodes": 119,
7
+ "dataset_manifest_num_samples": 34269,
8
+ "training_metadata_num_train_samples": 25629,
9
+ "eval_num_samples": 4032,
10
+ "eval_json_validity_rate": 0.9990079365079365,
11
  "quality_target_met": true,
12
+ "historical_identifier_count": 1800,
13
  "public_32_episode_status_file_count": 1,
14
  "failure_count": 0
15
  },
 
25
  {
26
  "name": "summary_metrics_preserves_verified_diagnostic_status",
27
  "status": "pass",
28
+ "detail": "The selected-episode Qwen3-Omni v6 diagnostic branch is verified on the 96/16/16 split and meets the 98% target for JSON validity; action/subtask quality remains weak, so it is a structured-task baseline rather than a strong model-quality claim. v6 improves action macro-F1 and contact accuracy versus v5, while v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics. Cosmos3-Nano future-window compatibility and Cosmos3-Super Forward-Dynamics LoRA are also verified as separate world-model diagnostics with different metrics.",
29
  "evidence": [
30
  "docs/data/summary_metrics.json"
31
  ]
 
33
  {
34
  "name": "verified_package_dataset_has_expected_windows",
35
  "status": "pass",
36
+ "detail": "episodes=119, samples=34269, split_counts={'test': 4032, 'train': 25629, 'val': 4608}, expected_samples=34269, expected_split_counts={'train': 25629, 'val': 4608, 'test': 4032}",
37
  "evidence": [
38
+ "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/dataset/dataset_manifest.json"
39
  ]
40
  },
41
  {
42
  "name": "verified_package_training_records_8_processes",
43
  "status": "pass",
44
+ "detail": "train=25629, val=2048, processes=8, expected_train=25629, expected_val=2048, expected_processes=8",
45
  "evidence": [
46
+ "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/training/training_metadata.json"
47
  ]
48
  },
49
  {
50
  "name": "verified_package_eval_records_real_held_out_metrics",
51
  "status": "pass",
52
+ "detail": "samples=4032, split=test, held_out=14, json_validity=0.9990079365079365, expected_samples=4032, expected_held_out=14",
53
  "evidence": [
54
+ "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json"
55
  ]
56
  },
57
  {
 
59
  "status": "pass",
60
  "detail": "audit_status=pass, issues=0",
61
  "evidence": [
62
+ "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/package_audit.json"
63
  ]
64
  },
65
  {
 
84
  {
85
  "name": "historical_32ep_identifiers_are_confined_to_readiness_artifacts",
86
  "status": "pass",
87
+ "detail": "historical identifiers found in result provenance files=1800",
88
  "evidence": [
89
  "results/omni_finetune/"
90
  ]
 
424
  "example": "{\"id\": \"xperience-10m-sample:qa:53\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1060, \"end_frame\": 1079, \"num_frames\": 20}, \"media\": {\"video_path"
425
  }
426
  ],
427
+ "historical_identifier_total_count": 1800,
428
  "failures": []
429
  }
docs/data/source_alignment_audit.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Source Alignment Note",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-12T18:14:51+00:00",
5
  "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
6
  "alignment_summary": {
7
  "full_dataset_repo": "ropedia-ai/xperience-10m",
 
1
  {
2
  "title": "Ropedia Xperience-10M Source Alignment Note",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-13T17:46:47+00:00",
5
  "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
6
  "alignment_summary": {
7
  "full_dataset_repo": "ropedia-ai/xperience-10m",
docs/data/summary_metrics.json CHANGED
@@ -14,7 +14,7 @@
14
  "visualization.rrd"
15
  ],
16
  "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
17
- "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and meets the 98% target for JSON validity; action/subtask quality remains weak, so it is a structured-task baseline. Cosmos3-Nano future-window compatibility and Cosmos3-Super Forward-Dynamics LoRA are also verified as separate world-model diagnostics with different metrics."
18
  },
19
  "models": {
20
  "motion_action": {
 
14
  "visualization.rrd"
15
  ],
16
  "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
17
+ "current_scope": "The selected-episode Qwen3-Omni v6 diagnostic branch is verified on the 96/16/16 split and meets the 98% target for JSON validity; action/subtask quality remains weak, so it is a structured-task baseline rather than a strong model-quality claim. v6 improves action macro-F1 and contact accuracy versus v5, while v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics. Cosmos3-Nano future-window compatibility and Cosmos3-Super Forward-Dynamics LoRA are also verified as separate world-model diagnostics with different metrics."
18
  },
19
  "models": {
20
  "motion_action": {
docs/data/task_surface_integrity.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-12T18:14:51+00:00",
4
  "summary": {
5
  "task_count": 12,
6
  "expected_task_count": 12,
@@ -64,45 +64,45 @@
64
  "observed": "timeline_action"
65
  },
66
  {
67
- "name": "timeline_action: public_field_output_short_is_human_readable",
68
  "status": "pass",
69
- "value": "current action class",
70
  "raw_hits": []
71
  },
72
  {
73
- "name": "timeline_action: public_field_plain_goal_is_human_readable",
74
  "status": "pass",
75
- "value": "Look at one short multimodal window and name what action is happening now.",
76
  "raw_hits": []
77
  },
78
  {
79
- "name": "timeline_action: public_field_display_name_is_human_readable",
80
  "status": "pass",
81
- "value": "Action Recognition",
82
  "raw_hits": []
83
  },
84
  {
85
- "name": "timeline_action: public_field_input_short_is_human_readable",
86
  "status": "pass",
87
- "value": "20-frame multimodal window",
88
  "raw_hits": []
89
  },
90
  {
91
- "name": "timeline_action: public_field_card_blurb_is_human_readable",
92
  "status": "pass",
93
- "value": "Recognize the current manipulation action from synchronized visual, motion, inertial, pose, and annotation context.",
94
  "raw_hits": []
95
  },
96
  {
97
- "name": "timeline_action: public_field_process_short_is_human_readable",
98
  "status": "pass",
99
- "value": "window features -> action label builder -> classifier",
100
  "raw_hits": []
101
  },
102
  {
103
- "name": "timeline_action: public_field_research_name_is_human_readable",
104
  "status": "pass",
105
- "value": "Egocentric Action Recognition",
106
  "raw_hits": []
107
  },
108
  {
@@ -184,45 +184,45 @@
184
  "observed": "timeline_subtask"
185
  },
186
  {
187
- "name": "timeline_subtask: public_field_output_short_is_human_readable",
188
  "status": "pass",
189
- "value": "current procedure step",
190
  "raw_hits": []
191
  },
192
  {
193
- "name": "timeline_subtask: public_field_plain_goal_is_human_readable",
194
  "status": "pass",
195
- "value": "Predict the higher-level task stage for the current window.",
196
  "raw_hits": []
197
  },
198
  {
199
- "name": "timeline_subtask: public_field_display_name_is_human_readable",
200
  "status": "pass",
201
- "value": "Procedure Step Recognition",
202
  "raw_hits": []
203
  },
204
  {
205
- "name": "timeline_subtask: public_field_input_short_is_human_readable",
206
  "status": "pass",
207
- "value": "20-frame multimodal window",
208
  "raw_hits": []
209
  },
210
  {
211
- "name": "timeline_subtask: public_field_card_blurb_is_human_readable",
212
  "status": "pass",
213
- "value": "Recognize the broader activity stage so fine actions become a readable procedure timeline.",
214
  "raw_hits": []
215
  },
216
  {
217
- "name": "timeline_subtask: public_field_process_short_is_human_readable",
218
  "status": "pass",
219
- "value": "window features -> subtask label builder -> classifier",
220
  "raw_hits": []
221
  },
222
  {
223
- "name": "timeline_subtask: public_field_research_name_is_human_readable",
224
  "status": "pass",
225
- "value": "Temporal Subtask Recognition",
226
  "raw_hits": []
227
  },
228
  {
@@ -304,45 +304,45 @@
304
  "observed": "transition_detection"
305
  },
306
  {
307
- "name": "transition_detection: public_field_output_short_is_human_readable",
308
  "status": "pass",
309
- "value": "boundary or steady",
310
  "raw_hits": []
311
  },
312
  {
313
- "name": "transition_detection: public_field_plain_goal_is_human_readable",
314
  "status": "pass",
315
- "value": "Detect whether the current window is near a boundary between actions.",
316
  "raw_hits": []
317
  },
318
  {
319
- "name": "transition_detection: public_field_display_name_is_human_readable",
320
  "status": "pass",
321
- "value": "Action Boundary Detection",
322
  "raw_hits": []
323
  },
324
  {
325
- "name": "transition_detection: public_field_input_short_is_human_readable",
326
  "status": "pass",
327
- "value": "current window with boundary target",
328
  "raw_hits": []
329
  },
330
  {
331
- "name": "transition_detection: public_field_card_blurb_is_human_readable",
332
  "status": "pass",
333
- "value": "Detect the local moment where the episode changes from one action segment to the next.",
334
  "raw_hits": []
335
  },
336
  {
337
- "name": "transition_detection: public_field_process_short_is_human_readable",
338
  "status": "pass",
339
- "value": "action changes -> boundary labels -> binary classifier",
340
  "raw_hits": []
341
  },
342
  {
343
- "name": "transition_detection: public_field_research_name_is_human_readable",
344
  "status": "pass",
345
- "value": "Temporal Action Segmentation",
346
  "raw_hits": []
347
  },
348
  {
@@ -422,45 +422,45 @@
422
  "observed": "next_action"
423
  },
424
  {
425
- "name": "next_action: public_field_output_short_is_human_readable",
426
  "status": "pass",
427
- "value": "action at t+20 frames",
428
  "raw_hits": []
429
  },
430
  {
431
- "name": "next_action: public_field_plain_goal_is_human_readable",
432
  "status": "pass",
433
- "value": "Use the current window to guess the action that will happen shortly after it.",
434
  "raw_hits": []
435
  },
436
  {
437
- "name": "next_action: public_field_display_name_is_human_readable",
438
  "status": "pass",
439
- "value": "Next-Action Prediction",
440
  "raw_hits": []
441
  },
442
  {
443
- "name": "next_action: public_field_input_short_is_human_readable",
444
  "status": "pass",
445
- "value": "current window at time t",
446
  "raw_hits": []
447
  },
448
  {
449
- "name": "next_action: public_field_card_blurb_is_human_readable",
450
  "status": "pass",
451
- "value": "Forecast the near-future action from the current observations only.",
452
  "raw_hits": []
453
  },
454
  {
455
- "name": "next_action: public_field_process_short_is_human_readable",
456
  "status": "pass",
457
- "value": "current features -> future label shift -> classifier",
458
  "raw_hits": []
459
  },
460
  {
461
- "name": "next_action: public_field_research_name_is_human_readable",
462
  "status": "pass",
463
- "value": "Short-Horizon Intention Prediction",
464
  "raw_hits": []
465
  },
466
  {
@@ -540,45 +540,45 @@
540
  "observed": "hand_trajectory_forecast"
541
  },
542
  {
543
- "name": "hand_trajectory_forecast: public_field_output_short_is_human_readable",
544
  "status": "pass",
545
- "value": "future hand-joint trajectory",
546
  "raw_hits": []
547
  },
548
  {
549
- "name": "hand_trajectory_forecast: public_field_plain_goal_is_human_readable",
550
  "status": "pass",
551
- "value": "Predict where the hands will move over the next few frames.",
552
  "raw_hits": []
553
  },
554
  {
555
- "name": "hand_trajectory_forecast: public_field_display_name_is_human_readable",
556
  "status": "pass",
557
- "value": "Hand Trajectory Forecasting",
558
  "raw_hits": []
559
  },
560
  {
561
- "name": "hand_trajectory_forecast: public_field_input_short_is_human_readable",
562
  "status": "pass",
563
- "value": "current multimodal window",
564
  "raw_hits": []
565
  },
566
  {
567
- "name": "hand_trajectory_forecast: public_field_card_blurb_is_human_readable",
568
  "status": "pass",
569
- "value": "Predict the future 3D left/right hand path from the current multimodal state.",
570
  "raw_hits": []
571
  },
572
  {
573
- "name": "hand_trajectory_forecast: public_field_process_short_is_human_readable",
574
  "status": "pass",
575
- "value": "current features -> future mocap target -> regression head",
576
  "raw_hits": []
577
  },
578
  {
579
- "name": "hand_trajectory_forecast: public_field_research_name_is_human_readable",
580
  "status": "pass",
581
- "value": "3D Hand Motion Forecasting",
582
  "raw_hits": []
583
  },
584
  {
@@ -658,45 +658,45 @@
658
  "observed": "contact_prediction"
659
  },
660
  {
661
- "name": "contact_prediction: public_field_output_short_is_human_readable",
662
  "status": "pass",
663
- "value": "contact or no contact",
664
  "raw_hits": []
665
  },
666
  {
667
- "name": "contact_prediction: public_field_plain_goal_is_human_readable",
668
  "status": "pass",
669
- "value": "Predict whether the body or hand is in contact with something.",
670
  "raw_hits": []
671
  },
672
  {
673
- "name": "contact_prediction: public_field_display_name_is_human_readable",
674
  "status": "pass",
675
- "value": "Contact State Prediction",
676
  "raw_hits": []
677
  },
678
  {
679
- "name": "contact_prediction: public_field_input_short_is_human_readable",
680
  "status": "pass",
681
- "value": "non-contact, non-caption features",
682
  "raw_hits": []
683
  },
684
  {
685
- "name": "contact_prediction: public_field_card_blurb_is_human_readable",
686
  "status": "pass",
687
- "value": "Predict whether body or hand contact with the scene is occurring without leaking contact labels.",
688
  "raw_hits": []
689
  },
690
  {
691
- "name": "contact_prediction: public_field_process_short_is_human_readable",
692
  "status": "pass",
693
- "value": "feature filter -> contact target -> binary classifier",
694
  "raw_hits": []
695
  },
696
  {
697
- "name": "contact_prediction: public_field_research_name_is_human_readable",
698
  "status": "pass",
699
- "value": "Human-Object Contact Prediction",
700
  "raw_hits": []
701
  },
702
  {
@@ -774,45 +774,45 @@
774
  "observed": "object_relevance"
775
  },
776
  {
777
- "name": "object_relevance: public_field_output_short_is_human_readable",
778
  "status": "pass",
779
- "value": "relevant object set",
780
  "raw_hits": []
781
  },
782
  {
783
- "name": "object_relevance: public_field_plain_goal_is_human_readable",
784
  "status": "pass",
785
- "value": "Predict which objects matter in the current window.",
786
  "raw_hits": []
787
  },
788
  {
789
- "name": "object_relevance: public_field_display_name_is_human_readable",
790
  "status": "pass",
791
- "value": "Object Relevance Prediction",
792
  "raw_hits": []
793
  },
794
  {
795
- "name": "object_relevance: public_field_input_short_is_human_readable",
796
  "status": "pass",
797
- "value": "non-caption multimodal features",
798
  "raw_hits": []
799
  },
800
  {
801
- "name": "object_relevance: public_field_card_blurb_is_human_readable",
802
  "status": "pass",
803
- "value": "Infer which objects are relevant to the current manipulation window from non-caption features.",
804
  "raw_hits": []
805
  },
806
  {
807
- "name": "object_relevance: public_field_process_short_is_human_readable",
808
  "status": "pass",
809
- "value": "object vocabulary -> multi-hot labels -> sigmoid heads",
810
  "raw_hits": []
811
  },
812
  {
813
- "name": "object_relevance: public_field_research_name_is_human_readable",
814
  "status": "pass",
815
- "value": "Object-Centric Interaction Recognition",
816
  "raw_hits": []
817
  },
818
  {
@@ -892,45 +892,45 @@
892
  "observed": "caption_grounding"
893
  },
894
  {
895
- "name": "caption_grounding: public_field_output_short_is_human_readable",
896
  "status": "pass",
897
- "value": "ranked matching moments",
898
  "raw_hits": []
899
  },
900
  {
901
- "name": "caption_grounding: public_field_plain_goal_is_human_readable",
902
  "status": "pass",
903
- "value": "Given a text-like query from annotation, find the matching time window.",
904
  "raw_hits": []
905
  },
906
  {
907
- "name": "caption_grounding: public_field_display_name_is_human_readable",
908
  "status": "pass",
909
- "value": "Language Grounding",
910
  "raw_hits": []
911
  },
912
  {
913
- "name": "caption_grounding: public_field_input_short_is_human_readable",
914
  "status": "pass",
915
- "value": "text-like query and candidate windows",
916
  "raw_hits": []
917
  },
918
  {
919
- "name": "caption_grounding: public_field_card_blurb_is_human_readable",
920
  "status": "pass",
921
- "value": "Retrieve the matching time window for an annotation-derived text query.",
922
  "raw_hits": []
923
  },
924
  {
925
- "name": "caption_grounding: public_field_process_short_is_human_readable",
926
  "status": "pass",
927
- "value": "query features -> candidate index -> cosine ranker",
928
  "raw_hits": []
929
  },
930
  {
931
- "name": "caption_grounding: public_field_research_name_is_human_readable",
932
  "status": "pass",
933
- "value": "Language-to-Moment Grounding",
934
  "raw_hits": []
935
  },
936
  {
@@ -1008,45 +1008,45 @@
1008
  "observed": "cross_modal_retrieval"
1009
  },
1010
  {
1011
- "name": "cross_modal_retrieval: public_field_output_short_is_human_readable",
1012
  "status": "pass",
1013
- "value": "ranked visual windows",
1014
  "raw_hits": []
1015
  },
1016
  {
1017
- "name": "cross_modal_retrieval: public_field_plain_goal_is_human_readable",
1018
  "status": "pass",
1019
- "value": "Use one group of modalities to retrieve the matching window from another group.",
1020
  "raw_hits": []
1021
  },
1022
  {
1023
- "name": "cross_modal_retrieval: public_field_display_name_is_human_readable",
1024
  "status": "pass",
1025
- "value": "Cross-Modal Retrieval",
1026
  "raw_hits": []
1027
  },
1028
  {
1029
- "name": "cross_modal_retrieval: public_field_input_short_is_human_readable",
1030
  "status": "pass",
1031
- "value": "motion/IMU/pose query; depth/video candidates",
1032
  "raw_hits": []
1033
  },
1034
  {
1035
- "name": "cross_modal_retrieval: public_field_card_blurb_is_human_readable",
1036
  "status": "pass",
1037
- "value": "Use motion, IMU, and camera-pose signals to retrieve the matching depth/video window.",
1038
  "raw_hits": []
1039
  },
1040
  {
1041
- "name": "cross_modal_retrieval: public_field_process_short_is_human_readable",
1042
  "status": "pass",
1043
- "value": "modality split -> projection -> nearest-neighbor ranker",
1044
  "raw_hits": []
1045
  },
1046
  {
1047
- "name": "cross_modal_retrieval: public_field_research_name_is_human_readable",
1048
  "status": "pass",
1049
- "value": "Multimodal Representation Retrieval",
1050
  "raw_hits": []
1051
  },
1052
  {
@@ -1126,45 +1126,45 @@
1126
  "observed": "modality_reconstruction"
1127
  },
1128
  {
1129
- "name": "modality_reconstruction: public_field_output_short_is_human_readable",
1130
  "status": "pass",
1131
- "value": "reconstructed depth/video vector",
1132
  "raw_hits": []
1133
  },
1134
  {
1135
- "name": "modality_reconstruction: public_field_plain_goal_is_human_readable",
1136
  "status": "pass",
1137
- "value": "Predict one modality feature block from other modality blocks.",
1138
  "raw_hits": []
1139
  },
1140
  {
1141
- "name": "modality_reconstruction: public_field_display_name_is_human_readable",
1142
  "status": "pass",
1143
- "value": "Cross-Modal Reconstruction",
1144
  "raw_hits": []
1145
  },
1146
  {
1147
- "name": "modality_reconstruction: public_field_input_short_is_human_readable",
1148
  "status": "pass",
1149
- "value": "motion, IMU, and camera/pose features",
1150
  "raw_hits": []
1151
  },
1152
  {
1153
- "name": "modality_reconstruction: public_field_card_blurb_is_human_readable",
1154
  "status": "pass",
1155
- "value": "Predict compressed depth/video feature vectors from motion, IMU, and camera-pose features.",
1156
  "raw_hits": []
1157
  },
1158
  {
1159
- "name": "modality_reconstruction: public_field_process_short_is_human_readable",
1160
  "status": "pass",
1161
- "value": "source-target split -> scaler -> regression head",
1162
  "raw_hits": []
1163
  },
1164
  {
1165
- "name": "modality_reconstruction: public_field_research_name_is_human_readable",
1166
  "status": "pass",
1167
- "value": "Modality Feature Reconstruction",
1168
  "raw_hits": []
1169
  },
1170
  {
@@ -1244,43 +1244,43 @@
1244
  "observed": "temporal_order"
1245
  },
1246
  {
1247
- "name": "temporal_order: public_field_output_short_is_human_readable",
1248
  "status": "pass",
1249
- "value": "correct or reversed",
1250
  "raw_hits": []
1251
  },
1252
  {
1253
- "name": "temporal_order: public_field_plain_goal_is_human_readable",
1254
  "status": "pass",
1255
- "value": "Tell whether two nearby windows are in the correct time order.",
1256
  "raw_hits": []
1257
  },
1258
  {
1259
- "name": "temporal_order: public_field_display_name_is_human_readable",
1260
  "status": "pass",
1261
  "value": "Temporal Order Verification",
1262
  "raw_hits": []
1263
  },
1264
  {
1265
- "name": "temporal_order: public_field_input_short_is_human_readable",
1266
  "status": "pass",
1267
- "value": "two adjacent windows plus difference vector",
1268
  "raw_hits": []
1269
  },
1270
  {
1271
- "name": "temporal_order: public_field_card_blurb_is_human_readable",
1272
  "status": "pass",
1273
- "value": "Tell whether two neighboring windows are in chronological order or reversed.",
1274
  "raw_hits": []
1275
  },
1276
  {
1277
- "name": "temporal_order: public_field_process_short_is_human_readable",
1278
  "status": "pass",
1279
- "value": "pair builder -> feature combiner -> binary classifier",
1280
  "raw_hits": []
1281
  },
1282
  {
1283
- "name": "temporal_order: public_field_research_name_is_human_readable",
1284
  "status": "pass",
1285
  "value": "Temporal Order Verification",
1286
  "raw_hits": []
@@ -1360,45 +1360,45 @@
1360
  "observed": "misalignment_detection"
1361
  },
1362
  {
1363
- "name": "misalignment_detection: public_field_output_short_is_human_readable",
1364
  "status": "pass",
1365
- "value": "aligned or shifted",
1366
  "raw_hits": []
1367
  },
1368
  {
1369
- "name": "misalignment_detection: public_field_plain_goal_is_human_readable",
1370
  "status": "pass",
1371
- "value": "Detect when modalities that should match are shifted out of sync.",
1372
  "raw_hits": []
1373
  },
1374
  {
1375
- "name": "misalignment_detection: public_field_display_name_is_human_readable",
1376
  "status": "pass",
1377
- "value": "Multimodal Synchronization Detection",
1378
  "raw_hits": []
1379
  },
1380
  {
1381
- "name": "misalignment_detection: public_field_input_short_is_human_readable",
1382
  "status": "pass",
1383
- "value": "motion-side and visual/depth-side feature groups",
1384
  "raw_hits": []
1385
  },
1386
  {
1387
- "name": "misalignment_detection: public_field_card_blurb_is_human_readable",
1388
  "status": "pass",
1389
- "value": "Detect whether motion and visual/depth streams have been artificially shifted out of sync.",
1390
  "raw_hits": []
1391
  },
1392
  {
1393
- "name": "misalignment_detection: public_field_process_short_is_human_readable",
1394
  "status": "pass",
1395
- "value": "aligned/shifted pairs -> feature combiner -> binary classifier",
1396
  "raw_hits": []
1397
  },
1398
  {
1399
- "name": "misalignment_detection: public_field_research_name_is_human_readable",
1400
  "status": "pass",
1401
- "value": "Cross-Modal Misalignment Detection",
1402
  "raw_hits": []
1403
  },
1404
  {
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-13T17:46:47+00:00",
4
  "summary": {
5
  "task_count": 12,
6
  "expected_task_count": 12,
 
64
  "observed": "timeline_action"
65
  },
66
  {
67
+ "name": "timeline_action: public_field_input_short_is_human_readable",
68
  "status": "pass",
69
+ "value": "20-frame multimodal window",
70
  "raw_hits": []
71
  },
72
  {
73
+ "name": "timeline_action: public_field_process_short_is_human_readable",
74
  "status": "pass",
75
+ "value": "window features -> action label builder -> classifier",
76
  "raw_hits": []
77
  },
78
  {
79
+ "name": "timeline_action: public_field_research_name_is_human_readable",
80
  "status": "pass",
81
+ "value": "Egocentric Action Recognition",
82
  "raw_hits": []
83
  },
84
  {
85
+ "name": "timeline_action: public_field_card_blurb_is_human_readable",
86
  "status": "pass",
87
+ "value": "Recognize the current manipulation action from synchronized visual, motion, inertial, pose, and annotation context.",
88
  "raw_hits": []
89
  },
90
  {
91
+ "name": "timeline_action: public_field_output_short_is_human_readable",
92
  "status": "pass",
93
+ "value": "current action class",
94
  "raw_hits": []
95
  },
96
  {
97
+ "name": "timeline_action: public_field_plain_goal_is_human_readable",
98
  "status": "pass",
99
+ "value": "Look at one short multimodal window and name what action is happening now.",
100
  "raw_hits": []
101
  },
102
  {
103
+ "name": "timeline_action: public_field_display_name_is_human_readable",
104
  "status": "pass",
105
+ "value": "Action Recognition",
106
  "raw_hits": []
107
  },
108
  {
 
184
  "observed": "timeline_subtask"
185
  },
186
  {
187
+ "name": "timeline_subtask: public_field_input_short_is_human_readable",
188
  "status": "pass",
189
+ "value": "20-frame multimodal window",
190
  "raw_hits": []
191
  },
192
  {
193
+ "name": "timeline_subtask: public_field_process_short_is_human_readable",
194
  "status": "pass",
195
+ "value": "window features -> subtask label builder -> classifier",
196
  "raw_hits": []
197
  },
198
  {
199
+ "name": "timeline_subtask: public_field_research_name_is_human_readable",
200
  "status": "pass",
201
+ "value": "Temporal Subtask Recognition",
202
  "raw_hits": []
203
  },
204
  {
205
+ "name": "timeline_subtask: public_field_card_blurb_is_human_readable",
206
  "status": "pass",
207
+ "value": "Recognize the broader activity stage so fine actions become a readable procedure timeline.",
208
  "raw_hits": []
209
  },
210
  {
211
+ "name": "timeline_subtask: public_field_output_short_is_human_readable",
212
  "status": "pass",
213
+ "value": "current procedure step",
214
  "raw_hits": []
215
  },
216
  {
217
+ "name": "timeline_subtask: public_field_plain_goal_is_human_readable",
218
  "status": "pass",
219
+ "value": "Predict the higher-level task stage for the current window.",
220
  "raw_hits": []
221
  },
222
  {
223
+ "name": "timeline_subtask: public_field_display_name_is_human_readable",
224
  "status": "pass",
225
+ "value": "Procedure Step Recognition",
226
  "raw_hits": []
227
  },
228
  {
 
304
  "observed": "transition_detection"
305
  },
306
  {
307
+ "name": "transition_detection: public_field_input_short_is_human_readable",
308
  "status": "pass",
309
+ "value": "current window with boundary target",
310
  "raw_hits": []
311
  },
312
  {
313
+ "name": "transition_detection: public_field_process_short_is_human_readable",
314
  "status": "pass",
315
+ "value": "action changes -> boundary labels -> binary classifier",
316
  "raw_hits": []
317
  },
318
  {
319
+ "name": "transition_detection: public_field_research_name_is_human_readable",
320
  "status": "pass",
321
+ "value": "Temporal Action Segmentation",
322
  "raw_hits": []
323
  },
324
  {
325
+ "name": "transition_detection: public_field_card_blurb_is_human_readable",
326
  "status": "pass",
327
+ "value": "Detect the local moment where the episode changes from one action segment to the next.",
328
  "raw_hits": []
329
  },
330
  {
331
+ "name": "transition_detection: public_field_output_short_is_human_readable",
332
  "status": "pass",
333
+ "value": "boundary or steady",
334
  "raw_hits": []
335
  },
336
  {
337
+ "name": "transition_detection: public_field_plain_goal_is_human_readable",
338
  "status": "pass",
339
+ "value": "Detect whether the current window is near a boundary between actions.",
340
  "raw_hits": []
341
  },
342
  {
343
+ "name": "transition_detection: public_field_display_name_is_human_readable",
344
  "status": "pass",
345
+ "value": "Action Boundary Detection",
346
  "raw_hits": []
347
  },
348
  {
 
422
  "observed": "next_action"
423
  },
424
  {
425
+ "name": "next_action: public_field_input_short_is_human_readable",
426
  "status": "pass",
427
+ "value": "current window at time t",
428
  "raw_hits": []
429
  },
430
  {
431
+ "name": "next_action: public_field_process_short_is_human_readable",
432
  "status": "pass",
433
+ "value": "current features -> future label shift -> classifier",
434
  "raw_hits": []
435
  },
436
  {
437
+ "name": "next_action: public_field_research_name_is_human_readable",
438
  "status": "pass",
439
+ "value": "Short-Horizon Intention Prediction",
440
  "raw_hits": []
441
  },
442
  {
443
+ "name": "next_action: public_field_card_blurb_is_human_readable",
444
  "status": "pass",
445
+ "value": "Forecast the near-future action from the current observations only.",
446
  "raw_hits": []
447
  },
448
  {
449
+ "name": "next_action: public_field_output_short_is_human_readable",
450
  "status": "pass",
451
+ "value": "action at t+20 frames",
452
  "raw_hits": []
453
  },
454
  {
455
+ "name": "next_action: public_field_plain_goal_is_human_readable",
456
  "status": "pass",
457
+ "value": "Use the current window to guess the action that will happen shortly after it.",
458
  "raw_hits": []
459
  },
460
  {
461
+ "name": "next_action: public_field_display_name_is_human_readable",
462
  "status": "pass",
463
+ "value": "Next-Action Prediction",
464
  "raw_hits": []
465
  },
466
  {
 
540
  "observed": "hand_trajectory_forecast"
541
  },
542
  {
543
+ "name": "hand_trajectory_forecast: public_field_input_short_is_human_readable",
544
  "status": "pass",
545
+ "value": "current multimodal window",
546
  "raw_hits": []
547
  },
548
  {
549
+ "name": "hand_trajectory_forecast: public_field_process_short_is_human_readable",
550
  "status": "pass",
551
+ "value": "current features -> future mocap target -> regression head",
552
  "raw_hits": []
553
  },
554
  {
555
+ "name": "hand_trajectory_forecast: public_field_research_name_is_human_readable",
556
  "status": "pass",
557
+ "value": "3D Hand Motion Forecasting",
558
  "raw_hits": []
559
  },
560
  {
561
+ "name": "hand_trajectory_forecast: public_field_card_blurb_is_human_readable",
562
  "status": "pass",
563
+ "value": "Predict the future 3D left/right hand path from the current multimodal state.",
564
  "raw_hits": []
565
  },
566
  {
567
+ "name": "hand_trajectory_forecast: public_field_output_short_is_human_readable",
568
  "status": "pass",
569
+ "value": "future hand-joint trajectory",
570
  "raw_hits": []
571
  },
572
  {
573
+ "name": "hand_trajectory_forecast: public_field_plain_goal_is_human_readable",
574
  "status": "pass",
575
+ "value": "Predict where the hands will move over the next few frames.",
576
  "raw_hits": []
577
  },
578
  {
579
+ "name": "hand_trajectory_forecast: public_field_display_name_is_human_readable",
580
  "status": "pass",
581
+ "value": "Hand Trajectory Forecasting",
582
  "raw_hits": []
583
  },
584
  {
 
658
  "observed": "contact_prediction"
659
  },
660
  {
661
+ "name": "contact_prediction: public_field_input_short_is_human_readable",
662
  "status": "pass",
663
+ "value": "non-contact, non-caption features",
664
  "raw_hits": []
665
  },
666
  {
667
+ "name": "contact_prediction: public_field_process_short_is_human_readable",
668
  "status": "pass",
669
+ "value": "feature filter -> contact target -> binary classifier",
670
  "raw_hits": []
671
  },
672
  {
673
+ "name": "contact_prediction: public_field_research_name_is_human_readable",
674
  "status": "pass",
675
+ "value": "Human-Object Contact Prediction",
676
  "raw_hits": []
677
  },
678
  {
679
+ "name": "contact_prediction: public_field_card_blurb_is_human_readable",
680
  "status": "pass",
681
+ "value": "Predict whether body or hand contact with the scene is occurring without leaking contact labels.",
682
  "raw_hits": []
683
  },
684
  {
685
+ "name": "contact_prediction: public_field_output_short_is_human_readable",
686
  "status": "pass",
687
+ "value": "contact or no contact",
688
  "raw_hits": []
689
  },
690
  {
691
+ "name": "contact_prediction: public_field_plain_goal_is_human_readable",
692
  "status": "pass",
693
+ "value": "Predict whether the body or hand is in contact with something.",
694
  "raw_hits": []
695
  },
696
  {
697
+ "name": "contact_prediction: public_field_display_name_is_human_readable",
698
  "status": "pass",
699
+ "value": "Contact State Prediction",
700
  "raw_hits": []
701
  },
702
  {
 
774
  "observed": "object_relevance"
775
  },
776
  {
777
+ "name": "object_relevance: public_field_input_short_is_human_readable",
778
  "status": "pass",
779
+ "value": "non-caption multimodal features",
780
  "raw_hits": []
781
  },
782
  {
783
+ "name": "object_relevance: public_field_process_short_is_human_readable",
784
  "status": "pass",
785
+ "value": "object vocabulary -> multi-hot labels -> sigmoid heads",
786
  "raw_hits": []
787
  },
788
  {
789
+ "name": "object_relevance: public_field_research_name_is_human_readable",
790
  "status": "pass",
791
+ "value": "Object-Centric Interaction Recognition",
792
  "raw_hits": []
793
  },
794
  {
795
+ "name": "object_relevance: public_field_card_blurb_is_human_readable",
796
  "status": "pass",
797
+ "value": "Infer which objects are relevant to the current manipulation window from non-caption features.",
798
  "raw_hits": []
799
  },
800
  {
801
+ "name": "object_relevance: public_field_output_short_is_human_readable",
802
  "status": "pass",
803
+ "value": "relevant object set",
804
  "raw_hits": []
805
  },
806
  {
807
+ "name": "object_relevance: public_field_plain_goal_is_human_readable",
808
  "status": "pass",
809
+ "value": "Predict which objects matter in the current window.",
810
  "raw_hits": []
811
  },
812
  {
813
+ "name": "object_relevance: public_field_display_name_is_human_readable",
814
  "status": "pass",
815
+ "value": "Object Relevance Prediction",
816
  "raw_hits": []
817
  },
818
  {
 
892
  "observed": "caption_grounding"
893
  },
894
  {
895
+ "name": "caption_grounding: public_field_input_short_is_human_readable",
896
  "status": "pass",
897
+ "value": "text-like query and candidate windows",
898
  "raw_hits": []
899
  },
900
  {
901
+ "name": "caption_grounding: public_field_process_short_is_human_readable",
902
  "status": "pass",
903
+ "value": "query features -> candidate index -> cosine ranker",
904
  "raw_hits": []
905
  },
906
  {
907
+ "name": "caption_grounding: public_field_research_name_is_human_readable",
908
  "status": "pass",
909
+ "value": "Language-to-Moment Grounding",
910
  "raw_hits": []
911
  },
912
  {
913
+ "name": "caption_grounding: public_field_card_blurb_is_human_readable",
914
  "status": "pass",
915
+ "value": "Retrieve the matching time window for an annotation-derived text query.",
916
  "raw_hits": []
917
  },
918
  {
919
+ "name": "caption_grounding: public_field_output_short_is_human_readable",
920
  "status": "pass",
921
+ "value": "ranked matching moments",
922
  "raw_hits": []
923
  },
924
  {
925
+ "name": "caption_grounding: public_field_plain_goal_is_human_readable",
926
  "status": "pass",
927
+ "value": "Given a text-like query from annotation, find the matching time window.",
928
  "raw_hits": []
929
  },
930
  {
931
+ "name": "caption_grounding: public_field_display_name_is_human_readable",
932
  "status": "pass",
933
+ "value": "Language Grounding",
934
  "raw_hits": []
935
  },
936
  {
 
1008
  "observed": "cross_modal_retrieval"
1009
  },
1010
  {
1011
+ "name": "cross_modal_retrieval: public_field_input_short_is_human_readable",
1012
  "status": "pass",
1013
+ "value": "motion/IMU/pose query; depth/video candidates",
1014
  "raw_hits": []
1015
  },
1016
  {
1017
+ "name": "cross_modal_retrieval: public_field_process_short_is_human_readable",
1018
  "status": "pass",
1019
+ "value": "modality split -> projection -> nearest-neighbor ranker",
1020
  "raw_hits": []
1021
  },
1022
  {
1023
+ "name": "cross_modal_retrieval: public_field_research_name_is_human_readable",
1024
  "status": "pass",
1025
+ "value": "Multimodal Representation Retrieval",
1026
  "raw_hits": []
1027
  },
1028
  {
1029
+ "name": "cross_modal_retrieval: public_field_card_blurb_is_human_readable",
1030
  "status": "pass",
1031
+ "value": "Use motion, IMU, and camera-pose signals to retrieve the matching depth/video window.",
1032
  "raw_hits": []
1033
  },
1034
  {
1035
+ "name": "cross_modal_retrieval: public_field_output_short_is_human_readable",
1036
  "status": "pass",
1037
+ "value": "ranked visual windows",
1038
  "raw_hits": []
1039
  },
1040
  {
1041
+ "name": "cross_modal_retrieval: public_field_plain_goal_is_human_readable",
1042
  "status": "pass",
1043
+ "value": "Use one group of modalities to retrieve the matching window from another group.",
1044
  "raw_hits": []
1045
  },
1046
  {
1047
+ "name": "cross_modal_retrieval: public_field_display_name_is_human_readable",
1048
  "status": "pass",
1049
+ "value": "Cross-Modal Retrieval",
1050
  "raw_hits": []
1051
  },
1052
  {
 
1126
  "observed": "modality_reconstruction"
1127
  },
1128
  {
1129
+ "name": "modality_reconstruction: public_field_input_short_is_human_readable",
1130
  "status": "pass",
1131
+ "value": "motion, IMU, and camera/pose features",
1132
  "raw_hits": []
1133
  },
1134
  {
1135
+ "name": "modality_reconstruction: public_field_process_short_is_human_readable",
1136
  "status": "pass",
1137
+ "value": "source-target split -> scaler -> regression head",
1138
  "raw_hits": []
1139
  },
1140
  {
1141
+ "name": "modality_reconstruction: public_field_research_name_is_human_readable",
1142
  "status": "pass",
1143
+ "value": "Modality Feature Reconstruction",
1144
  "raw_hits": []
1145
  },
1146
  {
1147
+ "name": "modality_reconstruction: public_field_card_blurb_is_human_readable",
1148
  "status": "pass",
1149
+ "value": "Predict compressed depth/video feature vectors from motion, IMU, and camera-pose features.",
1150
  "raw_hits": []
1151
  },
1152
  {
1153
+ "name": "modality_reconstruction: public_field_output_short_is_human_readable",
1154
  "status": "pass",
1155
+ "value": "reconstructed depth/video vector",
1156
  "raw_hits": []
1157
  },
1158
  {
1159
+ "name": "modality_reconstruction: public_field_plain_goal_is_human_readable",
1160
  "status": "pass",
1161
+ "value": "Predict one modality feature block from other modality blocks.",
1162
  "raw_hits": []
1163
  },
1164
  {
1165
+ "name": "modality_reconstruction: public_field_display_name_is_human_readable",
1166
  "status": "pass",
1167
+ "value": "Cross-Modal Reconstruction",
1168
  "raw_hits": []
1169
  },
1170
  {
 
1244
  "observed": "temporal_order"
1245
  },
1246
  {
1247
+ "name": "temporal_order: public_field_input_short_is_human_readable",
1248
  "status": "pass",
1249
+ "value": "two adjacent windows plus difference vector",
1250
  "raw_hits": []
1251
  },
1252
  {
1253
+ "name": "temporal_order: public_field_process_short_is_human_readable",
1254
  "status": "pass",
1255
+ "value": "pair builder -> feature combiner -> binary classifier",
1256
  "raw_hits": []
1257
  },
1258
  {
1259
+ "name": "temporal_order: public_field_research_name_is_human_readable",
1260
  "status": "pass",
1261
  "value": "Temporal Order Verification",
1262
  "raw_hits": []
1263
  },
1264
  {
1265
+ "name": "temporal_order: public_field_card_blurb_is_human_readable",
1266
  "status": "pass",
1267
+ "value": "Tell whether two neighboring windows are in chronological order or reversed.",
1268
  "raw_hits": []
1269
  },
1270
  {
1271
+ "name": "temporal_order: public_field_output_short_is_human_readable",
1272
  "status": "pass",
1273
+ "value": "correct or reversed",
1274
  "raw_hits": []
1275
  },
1276
  {
1277
+ "name": "temporal_order: public_field_plain_goal_is_human_readable",
1278
  "status": "pass",
1279
+ "value": "Tell whether two nearby windows are in the correct time order.",
1280
  "raw_hits": []
1281
  },
1282
  {
1283
+ "name": "temporal_order: public_field_display_name_is_human_readable",
1284
  "status": "pass",
1285
  "value": "Temporal Order Verification",
1286
  "raw_hits": []
 
1360
  "observed": "misalignment_detection"
1361
  },
1362
  {
1363
+ "name": "misalignment_detection: public_field_input_short_is_human_readable",
1364
  "status": "pass",
1365
+ "value": "motion-side and visual/depth-side feature groups",
1366
  "raw_hits": []
1367
  },
1368
  {
1369
+ "name": "misalignment_detection: public_field_process_short_is_human_readable",
1370
  "status": "pass",
1371
+ "value": "aligned/shifted pairs -> feature combiner -> binary classifier",
1372
  "raw_hits": []
1373
  },
1374
  {
1375
+ "name": "misalignment_detection: public_field_research_name_is_human_readable",
1376
  "status": "pass",
1377
+ "value": "Cross-Modal Misalignment Detection",
1378
  "raw_hits": []
1379
  },
1380
  {
1381
+ "name": "misalignment_detection: public_field_card_blurb_is_human_readable",
1382
  "status": "pass",
1383
+ "value": "Detect whether motion and visual/depth streams have been artificially shifted out of sync.",
1384
  "raw_hits": []
1385
  },
1386
  {
1387
+ "name": "misalignment_detection: public_field_output_short_is_human_readable",
1388
  "status": "pass",
1389
+ "value": "aligned or shifted",
1390
  "raw_hits": []
1391
  },
1392
  {
1393
+ "name": "misalignment_detection: public_field_plain_goal_is_human_readable",
1394
  "status": "pass",
1395
+ "value": "Detect when modalities that should match are shifted out of sync.",
1396
  "raw_hits": []
1397
  },
1398
  {
1399
+ "name": "misalignment_detection: public_field_display_name_is_human_readable",
1400
  "status": "pass",
1401
+ "value": "Multimodal Synchronization Detection",
1402
  "raw_hits": []
1403
  },
1404
  {
docs/data/website_integrity.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-12T18:14:52+00:00",
4
  "docs_root": "docs",
5
  "site_base": "/ropedia-xperience-10m-task-suite/",
6
  "summary": {
7
  "html_pages": 4,
8
  "local_references": 142,
9
  "external_reference_count": 111,
10
- "json_files": 37,
11
  "image_assets_referenced": 22,
12
  "failure_count": 0
13
  },
@@ -75,7 +75,7 @@
75
  "status": "pass",
76
  "reason": "The project overview should appear before the deeper progress ledger.",
77
  "overview_index": 67412,
78
- "evidence_index": 93026
79
  },
80
  {
81
  "name": "project_status_links_json",
@@ -137,7 +137,7 @@
137
  "statuses": [
138
  "implemented",
139
  "implemented_for_first_pilot",
140
- "verified_baseline",
141
  "verified_companion_result",
142
  "current",
143
  "active_next_step",
@@ -154,8 +154,8 @@
154
  "status": "pass",
155
  "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
156
  "overview_index": 67412,
157
- "protocol_index": 89208,
158
- "evidence_index": 93026
159
  },
160
  {
161
  "name": "evaluation_protocol_links_json",
@@ -253,7 +253,7 @@
253
  },
254
  {
255
  "path": "data/artifact_index.json",
256
- "bytes": 87230,
257
  "top_level_type": "dict"
258
  },
259
  {
@@ -303,12 +303,12 @@
303
  },
304
  {
305
  "path": "data/omni_finetune_verified_result.json",
306
- "bytes": 4431,
307
  "top_level_type": "dict"
308
  },
309
  {
310
  "path": "data/omni_model_comparison.json",
311
- "bytes": 81593,
312
  "top_level_type": "dict"
313
  },
314
  {
@@ -323,12 +323,12 @@
323
  },
324
  {
325
  "path": "data/project_packet.json",
326
- "bytes": 10496,
327
  "top_level_type": "dict"
328
  },
329
  {
330
  "path": "data/project_status.json",
331
- "bytes": 23198,
332
  "top_level_type": "dict"
333
  },
334
  {
@@ -351,6 +351,11 @@
351
  "bytes": 12183,
352
  "top_level_type": "dict"
353
  },
 
 
 
 
 
354
  {
355
  "path": "data/rendered_site_check.json",
356
  "bytes": 4032,
@@ -373,12 +378,12 @@
373
  },
374
  {
375
  "path": "data/research_roadmap.json",
376
- "bytes": 13313,
377
  "top_level_type": "dict"
378
  },
379
  {
380
  "path": "data/research_roadmap_interactive.json",
381
- "bytes": 145083,
382
  "top_level_type": "dict"
383
  },
384
  {
@@ -388,7 +393,7 @@
388
  },
389
  {
390
  "path": "data/scope_claims_audit.json",
391
- "bytes": 21325,
392
  "top_level_type": "dict"
393
  },
394
  {
@@ -403,7 +408,7 @@
403
  },
404
  {
405
  "path": "data/summary_metrics.json",
406
- "bytes": 27604,
407
  "top_level_type": "dict"
408
  },
409
  {
@@ -423,7 +428,7 @@
423
  },
424
  {
425
  "path": "data/website_integrity.json",
426
- "bytes": 15654,
427
  "top_level_type": "dict"
428
  },
429
  {
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-13T17:46:49+00:00",
4
  "docs_root": "docs",
5
  "site_base": "/ropedia-xperience-10m-task-suite/",
6
  "summary": {
7
  "html_pages": 4,
8
  "local_references": 142,
9
  "external_reference_count": 111,
10
+ "json_files": 38,
11
  "image_assets_referenced": 22,
12
  "failure_count": 0
13
  },
 
75
  "status": "pass",
76
  "reason": "The project overview should appear before the deeper progress ledger.",
77
  "overview_index": 67412,
78
+ "evidence_index": 93059
79
  },
80
  {
81
  "name": "project_status_links_json",
 
137
  "statuses": [
138
  "implemented",
139
  "implemented_for_first_pilot",
140
+ "verified_latest_branch",
141
  "verified_companion_result",
142
  "current",
143
  "active_next_step",
 
154
  "status": "pass",
155
  "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
156
  "overview_index": 67412,
157
+ "protocol_index": 89241,
158
+ "evidence_index": 93059
159
  },
160
  {
161
  "name": "evaluation_protocol_links_json",
 
253
  },
254
  {
255
  "path": "data/artifact_index.json",
256
+ "bytes": 88913,
257
  "top_level_type": "dict"
258
  },
259
  {
 
303
  },
304
  {
305
  "path": "data/omni_finetune_verified_result.json",
306
+ "bytes": 4325,
307
  "top_level_type": "dict"
308
  },
309
  {
310
  "path": "data/omni_model_comparison.json",
311
+ "bytes": 81866,
312
  "top_level_type": "dict"
313
  },
314
  {
 
323
  },
324
  {
325
  "path": "data/project_packet.json",
326
+ "bytes": 10597,
327
  "top_level_type": "dict"
328
  },
329
  {
330
  "path": "data/project_status.json",
331
+ "bytes": 23535,
332
  "top_level_type": "dict"
333
  },
334
  {
 
351
  "bytes": 12183,
352
  "top_level_type": "dict"
353
  },
354
+ {
355
+ "path": "data/qwen3_v5_v6_comparison.json",
356
+ "bytes": 2814,
357
+ "top_level_type": "dict"
358
+ },
359
  {
360
  "path": "data/rendered_site_check.json",
361
  "bytes": 4032,
 
378
  },
379
  {
380
  "path": "data/research_roadmap.json",
381
+ "bytes": 13450,
382
  "top_level_type": "dict"
383
  },
384
  {
385
  "path": "data/research_roadmap_interactive.json",
386
+ "bytes": 145234,
387
  "top_level_type": "dict"
388
  },
389
  {
 
393
  },
394
  {
395
  "path": "data/scope_claims_audit.json",
396
+ "bytes": 21795,
397
  "top_level_type": "dict"
398
  },
399
  {
 
408
  },
409
  {
410
  "path": "data/summary_metrics.json",
411
+ "bytes": 27807,
412
  "top_level_type": "dict"
413
  },
414
  {
 
428
  },
429
  {
430
  "path": "data/website_integrity.json",
431
+ "bytes": 15774,
432
  "top_level_type": "dict"
433
  },
434
  {
results/omni_finetune/OMNI_MODEL_COMPARISON.md CHANGED
@@ -1,6 +1,6 @@
1
  # Omni Model Comparison
2
 
3
- Generated: `2026-06-12T18:14:48+00:00`
4
 
5
  Compare only rows with the same scope and target. Single-episode raw-feature metrics, 128-episode metadata baselines, Qwen3 structured JSON metrics, and the two Cosmos3 targets answer different questions: Nano future-window retrieval versus Super structured JSON Reasoner evaluation.
6
 
@@ -55,8 +55,8 @@ The one-episode Qwen entry is only a sensor-adapter smoke test with Qwen3 weight
55
  | full-param gate | passed | Full-Parameter 256-Step Post-Qwen-v6 Pilot | 2048 windows/samples | full_parameter_gate=passed, observed_train_steps=256, final_step_loss=0.0096, epoch_train_loss=0.1158, checkpoint_saved=False | `results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_pilot256_after_qwen_v6_preemptible_8gpu_20260611/training_metadata.json` |
56
  | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.8750, action_macro_f1=0.0027, transition_accuracy=0.8504, contact_accuracy=0.6451 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json` |
57
  | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.8527, action_macro_f1=0.0021, transition_accuracy=0.8281, contact_accuracy=0.6518 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json` |
58
- | 128 episode | verified current | Qwen3-Omni LoRA | 119 episodes, 34269 windows/samples, 4032 eval | json_validity_rate=1.0000, action_macro_f1=0.0023, transition_accuracy=0.9908, contact_accuracy=0.7865 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full/verified_result_summary.json` |
59
- | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 34269 windows/samples, 4032 eval | json_validity_rate=0.9990, action_macro_f1=0.0029, transition_accuracy=0.9898, contact_accuracy=0.8177 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/verified_result_summary.json` |
60
  | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.9978, action_macro_f1=0.0024, transition_accuracy=0.9710, contact_accuracy=0.7188 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json` |
61
  | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=1.0000, action_macro_f1=0.0022, transition_accuracy=0.9732, contact_accuracy=0.7210 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/verified_result_summary.json` |
62
  | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=1.0000, action_macro_f1=0.0019, transition_accuracy=0.9732, contact_accuracy=0.7299 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/verified_result_summary.json` |
@@ -132,4 +132,5 @@ This is the first verified Cosmos3-Super fine-tuned adapter branch. Its metric i
132
 
133
  ## Pending
134
 
135
- - Use the verified Qwen3 v5 dense multiscale full-eval package as the current Qwen row; older Qwen package rows remain historical diagnostics for comparison.
 
 
1
  # Omni Model Comparison
2
 
3
+ Generated: `2026-06-13T17:41:35+00:00`
4
 
5
  Compare only rows with the same scope and target. Single-episode raw-feature metrics, 128-episode metadata baselines, Qwen3 structured JSON metrics, and the two Cosmos3 targets answer different questions: Nano future-window retrieval versus Super structured JSON Reasoner evaluation.
6
 
 
55
  | full-param gate | passed | Full-Parameter 256-Step Post-Qwen-v6 Pilot | 2048 windows/samples | full_parameter_gate=passed, observed_train_steps=256, final_step_loss=0.0096, epoch_train_loss=0.1158, checkpoint_saved=False | `results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_pilot256_after_qwen_v6_preemptible_8gpu_20260611/training_metadata.json` |
56
  | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.8750, action_macro_f1=0.0027, transition_accuracy=0.8504, contact_accuracy=0.6451 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json` |
57
  | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.8527, action_macro_f1=0.0021, transition_accuracy=0.8281, contact_accuracy=0.6518 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json` |
58
+ | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 34269 windows/samples, 4032 eval | json_validity_rate=1.0000, action_macro_f1=0.0023, transition_accuracy=0.9908, contact_accuracy=0.7865 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full/verified_result_summary.json` |
59
+ | 128 episode | verified current | Qwen3-Omni LoRA | 119 episodes, 34269 windows/samples, 4032 eval | json_validity_rate=0.9990, action_macro_f1=0.0029, transition_accuracy=0.9898, contact_accuracy=0.8177 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/verified_result_summary.json` |
60
  | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.9978, action_macro_f1=0.0024, transition_accuracy=0.9710, contact_accuracy=0.7188 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json` |
61
  | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=1.0000, action_macro_f1=0.0022, transition_accuracy=0.9732, contact_accuracy=0.7210 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/verified_result_summary.json` |
62
  | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=1.0000, action_macro_f1=0.0019, transition_accuracy=0.9732, contact_accuracy=0.7299 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/verified_result_summary.json` |
 
132
 
133
  ## Pending
134
 
135
+ - Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.
136
+ - Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before claiming v6 is globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly.
results/omni_finetune/QWEN3_FULL_PARAMETER_GATES_20260609.md CHANGED
@@ -1,6 +1,6 @@
1
  # Qwen3-Omni Full-Parameter Feasibility Gates
2
 
3
- Generated: `2026-06-12T18:14:48+00:00`
4
 
5
  The 2026-06-09 gates prove that Qwen3-Omni full-parameter FSDP can load, prepare, run backward/optimizer steps, and complete guarded pilots up to 128 optimizer steps on an 8-GPU remote worker. They do not prove a production full-parameter fine-tune, and they intentionally save no full checkpoints or public weights.
6
 
 
1
  # Qwen3-Omni Full-Parameter Feasibility Gates
2
 
3
+ Generated: `2026-06-13T17:41:13+00:00`
4
 
5
  The 2026-06-09 gates prove that Qwen3-Omni full-parameter FSDP can load, prepare, run backward/optimizer steps, and complete guarded pilots up to 128 optimizer steps on an 8-GPU remote worker. They do not prove a production full-parameter fine-tune, and they intentionally save no full checkpoints or public weights.
6
 
results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Qwen3-Omni v5/v6 Verified Comparison
2
+
3
+ Generated: `2026-06-14`
4
+
5
+ This compares only the two dense multiscale Qwen3-Omni LoRA held-out packages on the same selected 128-episode setup. Both use 4,032 held-out test predictions from 14 exported test episodes.
6
+
7
+ | metric | v5 | v6 | v6 - v5 |
8
+ | --- | ---: | ---: | ---: |
9
+ | JSON validity | 1.000000 | 0.999008 | -0.000992 |
10
+ | Action macro-F1 | 0.002290 | 0.002883 | +0.000593 |
11
+ | Subtask accuracy | 0.011194 | 0.003731 | -0.007463 |
12
+ | Transition accuracy | 0.990823 | 0.989831 | -0.000992 |
13
+ | Next-action accuracy | 0.053619 | 0.043053 | -0.010565 |
14
+ | Contact accuracy | 0.786458 | 0.817708 | +0.031250 |
15
+ | Object micro-F1 | 0.316146 | 0.306498 | -0.009648 |
16
+
17
+ ## Readout
18
+
19
+ v6 is the latest verified Qwen3-Omni LoRA branch and should be shown as the current Qwen row in generated comparisons. It improves action macro-F1 and contact accuracy. It does not dominate v5: v5 remains stronger on exact JSON validity, subtask accuracy, transition accuracy, next-action accuracy, and object micro-F1.
20
+
21
+ The public release policy is therefore:
22
+
23
+ - keep `ropedia-xperience-10m-v5` pinned to the previous stable v5 commit,
24
+ - publish v6 on `main`, GitHub Pages, HF Space, artifact dataset, and the Qwen LoRA model repo,
25
+ - create a separate `ropedia-xperience-10m-v6` tag only as an experimental/latest-Qwen release, not by moving the v5 tag.
26
+
27
+ ## Sources
28
+
29
+ - v5 package: `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full/verified_result_summary.json`
30
+ - v6 package: `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/verified_result_summary.json`
31
+ - machine-readable comparison: `docs/data/qwen3_v5_v6_comparison.json`
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/package_audit.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "pass",
3
+ "package_dir": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
4
+ "backbone": "qwen3_omni_lora",
5
+ "required_eval_files": [
6
+ "metrics.json",
7
+ "predictions.jsonl",
8
+ "predictions.csv",
9
+ "per_class_metrics.csv",
10
+ "confusion_matrix.csv",
11
+ "RUN_REPORT.md"
12
+ ],
13
+ "primary_metrics": [
14
+ "action_macro_f1",
15
+ "contact_accuracy",
16
+ "held_out_episode_count",
17
+ "json_validity_rate",
18
+ "next_action_accuracy",
19
+ "object_micro_f1",
20
+ "subtask_accuracy",
21
+ "transition_accuracy"
22
+ ],
23
+ "issues": []
24
+ }