cy0307 commited on 9 days ago

Commit

8b4c4fe

verified ·

1 Parent(s): c433b73

Add files using upload-large-folder tool

Browse files

Files changed (18) hide show

docs/data/artifact_index.json +65 -32
docs/data/omni_model_comparison.json +11 -10
docs/data/project_packet.json +4 -4
docs/data/project_status.json +17 -15
docs/data/publication_audit.json +3 -3
docs/data/quality_gates.json +1 -1
docs/data/qwen3_full_parameter_gates.json +1 -1
docs/data/research_roadmap.json +6 -3
docs/data/research_roadmap_interactive.json +6 -3
docs/data/scope_claims_audit.json +16 -16
docs/data/source_alignment_audit.json +1 -1
docs/data/summary_metrics.json +1 -1
docs/data/task_surface_integrity.json +167 -167
docs/data/website_integrity.json +21 -16
results/omni_finetune/OMNI_MODEL_COMPARISON.md +5 -4
results/omni_finetune/QWEN3_FULL_PARAMETER_GATES_20260609.md +1 -1
results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md +31 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/package_audit.json +24 -0

docs/data/artifact_index.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
   "title": "Ropedia Xperience-10M Task Suite Artifact Index",
-  "generated_at_utc": "2026-06-12T18:14:49+00:00",
   "status": "pass",
-  "artifact_count": 159,
   "missing": [],
   "by_kind": {
     "project_path": 14,
     "scaleup_contract": 7,
-    "scaleup_status": 36,
     "publication_workflow": 5,
     "project_scope": 1,
     "source_alignment": 5,
@@ -32,7 +32,7 @@
     "citation": 1,
     "license": 1,
     "verified_public_package": 10,
-    "publication_audit": 6
   },
   "artifacts": [
     {
@@ -65,8 +65,8 @@
       "surface": "repo_hf",
       "shows": "Gives a compact current-state table for first-pass readers.",
       "exists": true,
-      "bytes": 13535,
-      "sha256": "595f90e26d75baba46a0827eb2ec38d0c6badb137437f642dff4e88e22b7ca80"
     },
     {
       "id": "project_status_json",
@@ -76,8 +76,8 @@
       "surface": "website_hf",
       "shows": "Machine-readable copy of the current project status for website and HF mirrors.",
       "exists": true,
-      "bytes": 23198,
-      "sha256": "07678354fde90a7e15134f5deff036eb200f7bf422ab2350ff64825b902843d5"
     },
     {
       "id": "research_roadmap",
@@ -87,8 +87,8 @@
       "surface": "repo_hf",
       "shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
       "exists": true,
-      "bytes": 14354,
-      "sha256": "7d385916e6d07cfa5f95e80b501fe58237f671711691cc8c792479d233991ef5"
     },
     {
       "id": "research_roadmap_json",
@@ -98,8 +98,8 @@
       "surface": "website_hf",
       "shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
       "exists": true,
-      "bytes": 13313,
-      "sha256": "719a8b5f45810d6b09788391d8549eb5226e480454c9f2a4ed7380ceacfd834b"
     },
     {
       "id": "foundation_model_plan",
@@ -264,7 +264,7 @@
       "shows": "Summarizes the 2026-06-09 full-parameter FSDP feasibility gates: 1/8/32/64-step guarded runs passed, the 128-step opportunistic pilot was preempted for Qwen v5 handoff, and no full checkpoints or weights are published.",
       "exists": true,
       "bytes": 3253,
-      "sha256": "5c0e49adad861e00bede7f9245808e6e68346e7bc51c0b0142d2b99f944f2408"
     },
     {
       "id": "qwen3_full_parameter_gates_json",
@@ -275,7 +275,29 @@
       "shows": "Machine-readable summary of full-parameter feasibility evidence and publication policy for website and Hugging Face mirrors.",
       "exists": true,
       "bytes": 12183,
-      "sha256": "50b8b93f0bae34c7fa269ae52d7c2073d6122fdafc9297f380c052d94253bf3f"
     },
     {
       "id": "qwen3_full_parameter_gates_builder",
@@ -362,8 +384,8 @@
       "surface": "website_hf",
       "shows": "Gives a short project path with scope status and public surfaces.",
       "exists": true,
-      "bytes": 10496,
-      "sha256": "fc21325035c3b8d4892bfad50dd1a7f5f562a662b7da7be73f7349cb515544ef"
     },
     {
       "id": "artifact_guide",
@@ -418,7 +440,7 @@
       "shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
       "exists": true,
       "bytes": 4432,
-      "sha256": "d5ada9ec76cace484a779672d636222f1dfa281135508ff1b5d5eac98908bf60"
     },
     {
       "id": "source_alignment_validator",
@@ -671,7 +693,7 @@
       "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
       "exists": true,
       "bytes": 8097,
-      "sha256": "9b4783b42d0a6f58f2bc36bb7990c86dc85af9d346cb59c90dca402e2c4cf0de"
     },
     {
       "id": "public_surface_qa",
@@ -785,8 +807,8 @@
       "surface": "repo",
       "shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
       "exists": true,
-      "bytes": 42224,
-      "sha256": "069b05229b72b9fa0c0917afd18655f58e5258499b43889dd96a87f29f577890"
     },
     {
       "id": "reproducibility_contract",
@@ -818,8 +840,8 @@
       "surface": "repo_hf",
       "shows": "Generates the selective artifact catalog from local files.",
       "exists": true,
-      "bytes": 42045,
-      "sha256": "0954119cda59bb9c77cba362a206ac237eae73c4a1dc44cfa56599f170fc94de"
     },
     {
       "id": "publication_audit",
@@ -842,7 +864,7 @@
       "volatile": true,
       "shows": "Separates setup paths from completed held-out-episode results.",
       "exists": true,
-      "bytes": 21325,
       "hash_policy": "existence_and_size_only"
     },
     {
@@ -866,7 +888,7 @@
       "volatile": true,
       "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
       "exists": true,
-      "bytes": 15654,
       "hash_policy": "existence_and_size_only"
     },
     {
@@ -899,8 +921,8 @@
       "surface": "website_hf",
       "shows": "Mirrors task metrics for the static dashboard.",
       "exists": true,
-      "bytes": 27604,
-      "sha256": "ebaf9d598b4cd91118f149cafa01fe4d17629499565b7be0c7ce4a0ffcd70f6b"
     },
     {
       "id": "feature_manifest",
@@ -1119,8 +1141,8 @@
       "surface": "repo_hf",
       "shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
       "exists": true,
-      "bytes": 15731,
-      "sha256": "1ee5621a825219c1125a9c3f3ac779ac8bfa2fc45668531dbee3a728be13bfb5"
     },
     {
       "id": "omni_model_comparison_json",
@@ -1130,8 +1152,8 @@
       "surface": "repo_hf",
       "shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
       "exists": true,
-      "bytes": 81593,
-      "sha256": "c570f0810dec46f9a14969089245051c2b3a255f074bd9d06f529615fa6fbd73"
     },
     {
       "id": "cosmos3_nano_verified_summary",
@@ -1548,8 +1570,8 @@
       "surface": "repo_hf",
       "shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full (qwen3_omni_lora, status=verified).",
       "exists": true,
-      "file_count": 13,
-      "bytes": 12189099
     },
     {
       "id": "verified_public_summary_xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
@@ -1595,6 +1617,17 @@
       "bytes": 49205,
       "sha256": "fc198c3e443877bca42cc33bec6e2a194d6cb20e97c28e931a90736c45538bba"
     },
     {
       "id": "verified_public_package_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
       "title": "Verified public package: Qwen3-Omni LoRA",

 {
   "title": "Ropedia Xperience-10M Task Suite Artifact Index",
+  "generated_at_utc": "2026-06-13T17:46:38+00:00",
   "status": "pass",
+  "artifact_count": 162,
   "missing": [],
   "by_kind": {
     "project_path": 14,
     "scaleup_contract": 7,
+    "scaleup_status": 38,
     "publication_workflow": 5,
     "project_scope": 1,
     "source_alignment": 5,
     "citation": 1,
     "license": 1,
     "verified_public_package": 10,
+    "publication_audit": 7
   },
   "artifacts": [
     {
       "surface": "repo_hf",
       "shows": "Gives a compact current-state table for first-pass readers.",
       "exists": true,
+      "bytes": 13755,
+      "sha256": "342897ae05ceab83d626765c0052c140e414ba25ebda4fce9fb07bb37a2decef"
     },
     {
       "id": "project_status_json",
       "surface": "website_hf",
       "shows": "Machine-readable copy of the current project status for website and HF mirrors.",
       "exists": true,
+      "bytes": 23535,
+      "sha256": "9ffae32ff0b3750f89179d2ce92205f95a5b53069d0aa344d6342c23b1efebbd"
     },
     {
       "id": "research_roadmap",
       "surface": "repo_hf",
       "shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
       "exists": true,
+      "bytes": 14503,
+      "sha256": "bb06fb0ccf336cafb0305883b7f93c2e1af547c9ff04b1fa6fc87481d54bcf61"
     },
     {
       "id": "research_roadmap_json",
       "surface": "website_hf",
       "shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
       "exists": true,
+      "bytes": 13450,
+      "sha256": "668220626950f8e55410b8f829dba6306d0b2feedafdf0198800ad9814992d84"
     },
     {
       "id": "foundation_model_plan",
       "shows": "Summarizes the 2026-06-09 full-parameter FSDP feasibility gates: 1/8/32/64-step guarded runs passed, the 128-step opportunistic pilot was preempted for Qwen v5 handoff, and no full checkpoints or weights are published.",
       "exists": true,
       "bytes": 3253,
+      "sha256": "b25f1d8cde814207b4c3234bf07140cf99a0ede29af3f53dbc146aab464e8a9b"
     },
     {
       "id": "qwen3_full_parameter_gates_json",
       "shows": "Machine-readable summary of full-parameter feasibility evidence and publication policy for website and Hugging Face mirrors.",
       "exists": true,
       "bytes": 12183,
+      "sha256": "d051608d3428645778f721e538af93566ab772871a825ac12825dd5f18e94a95"
+    },
+    {
+      "id": "qwen3_v5_v6_comparison",
+      "title": "Qwen3-Omni v5/v6 comparison",
+      "path": "results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
+      "kind": "scaleup_status",
+      "surface": "repo_hf",
+      "shows": "Reader-facing comparison of the verified Qwen3 v5 release row and the latest verified v6 row, including metric deltas and release-tag policy.",
+      "exists": true,
+      "bytes": 1793,
+      "sha256": "890430b05ace20375fc9534f923d170c0509037272ba4ef523e3ca2f3c9ac746"
+    },
+    {
+      "id": "qwen3_v5_v6_comparison_json",
+      "title": "Qwen3-Omni v5/v6 comparison JSON",
+      "path": "docs/data/qwen3_v5_v6_comparison.json",
+      "kind": "scaleup_status",
+      "surface": "website_hf",
+      "shows": "Machine-readable v5/v6 metric deltas and publication recommendation for website and Hugging Face mirrors.",
+      "exists": true,
+      "bytes": 2814,
+      "sha256": "f5d16e279a82cdc6266a1318584bf38cbc0b105296d437f9b8bf0398403aace5"
     },
     {
       "id": "qwen3_full_parameter_gates_builder",
       "surface": "website_hf",
       "shows": "Gives a short project path with scope status and public surfaces.",
       "exists": true,
+      "bytes": 10597,
+      "sha256": "a64b7c033c54879e0183e7ec794d3197fb483024c25947759287fcd4b7e0fec1"
     },
     {
       "id": "artifact_guide",
       "shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
       "exists": true,
       "bytes": 4432,
+      "sha256": "0d0d381f726c1e3787fb3fb15b6fb8879512c26fa0dc06fb943e1a239b0063dd"
     },
     {
       "id": "source_alignment_validator",
       "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
       "exists": true,
       "bytes": 8097,
+      "sha256": "1cdc8b4767b3ca88eada654a3117aa2de253fea7af62573b080088e8f1b311bd"
     },
     {
       "id": "public_surface_qa",
       "surface": "repo",
       "shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
       "exists": true,
+      "bytes": 42394,
+      "sha256": "43a70436108eea3fa6692096ebf318fb755ff040d925094ef9de018f212fde18"
     },
     {
       "id": "reproducibility_contract",
       "surface": "repo_hf",
       "shows": "Generates the selective artifact catalog from local files.",
       "exists": true,
+      "bytes": 42809,
+      "sha256": "c03d1b1367ad191fea0be3c634fddf8ee6fdc2118bf17396920c16cc288c4ef0"
     },
     {
       "id": "publication_audit",
       "volatile": true,
       "shows": "Separates setup paths from completed held-out-episode results.",
       "exists": true,
+      "bytes": 21795,
       "hash_policy": "existence_and_size_only"
     },
     {
       "volatile": true,
       "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
       "exists": true,
+      "bytes": 15774,
       "hash_policy": "existence_and_size_only"
     },
     {
       "surface": "website_hf",
       "shows": "Mirrors task metrics for the static dashboard.",
       "exists": true,
+      "bytes": 27807,
+      "sha256": "3a6a5ee59562ae189844cb4ba26d6e261c2f73a8e54bb6e2fbc3e307c2d123fa"
     },
     {
       "id": "feature_manifest",
       "surface": "repo_hf",
       "shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
       "exists": true,
+      "bytes": 15999,
+      "sha256": "d5a7118a878b202adbc50e3436bbe134e5de139f2a9e97176efe9ecc0f446088"
     },
     {
       "id": "omni_model_comparison_json",
       "surface": "repo_hf",
       "shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
       "exists": true,
+      "bytes": 81866,
+      "sha256": "191125098a66ecccfa27395c0f9776616f74b4bf8fb19f16b75cda7ed06cb4b2"
     },
     {
       "id": "cosmos3_nano_verified_summary",
       "surface": "repo_hf",
       "shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full (qwen3_omni_lora, status=verified).",
       "exists": true,
+      "file_count": 14,
+      "bytes": 12189730
     },
     {
       "id": "verified_public_summary_xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
       "bytes": 49205,
       "sha256": "fc198c3e443877bca42cc33bec6e2a194d6cb20e97c28e931a90736c45538bba"
     },
+    {
+      "id": "verified_public_package_audit_xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
+      "title": "Verified package audit: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/package_audit.json",
+      "kind": "publication_audit",
+      "surface": "repo_hf",
+      "shows": "Package audit for xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full.",
+      "exists": true,
+      "bytes": 631,
+      "sha256": "7cf478ae33c52bae0ba742e81da8e482e06d0853eecd85f895f447a708f81718"
+    },
     {
       "id": "verified_public_package_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
       "title": "Verified public package: Qwen3-Omni LoRA",

docs/data/omni_model_comparison.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
-  "generated_at_utc": "2026-06-12T18:14:48+00:00",
   "status": "pass",
   "version_count": 3,
   "model_group_count": 5,
@@ -588,8 +588,8 @@
               "global_step": 3204
             }
           ],
-          "is_current": true,
-          "weights_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
         },
         {
           "id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
@@ -640,8 +640,8 @@
               "global_step": 6408
             }
           ],
-          "is_current": false,
-          "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo"
         },
         {
           "id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
@@ -1202,8 +1202,8 @@
               "global_step": 3204
             }
           ],
-          "is_current": true,
-          "weights_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
         },
         {
           "id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
@@ -1254,8 +1254,8 @@
               "global_step": 6408
             }
           ],
-          "is_current": false,
-          "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo"
         },
         {
           "id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
@@ -1753,6 +1753,7 @@
     "Cosmos3-Super now has both a 128-episode base-weight Reasoner evaluation on the JSON task and a fine-tuned forward-dynamics LoRA branch over camera-pose proxy targets."
   ],
   "pending": [
-    "Use the verified Qwen3 v5 dense multiscale full-eval package as the current Qwen row; older Qwen package rows remain historical diagnostics for comparison."
   ]
 }

 {
   "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
+  "generated_at_utc": "2026-06-13T17:41:35+00:00",
   "status": "pass",
   "version_count": 3,
   "model_group_count": 5,
               "global_step": 3204
             }
           ],
+          "is_current": false,
+          "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo"
         },
         {
           "id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
               "global_step": 6408
             }
           ],
+          "is_current": true,
+          "weights_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
         },
         {
           "id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
               "global_step": 3204
             }
           ],
+          "is_current": false,
+          "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo"
         },
         {
           "id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
               "global_step": 6408
             }
           ],
+          "is_current": true,
+          "weights_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
         },
         {
           "id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
     "Cosmos3-Super now has both a 128-episode base-weight Reasoner evaluation on the JSON task and a fine-tuned forward-dynamics LoRA branch over camera-pose proxy targets."
   ],
   "pending": [
+    "Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
+    "Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before claiming v6 is globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly."
   ]
 }

docs/data/project_packet.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "title": "Ropedia Xperience-10M Task Suite Project Packet",
-    "version": "2026-06-08",
     "scope_status": {
         "validated_data": "one public Xperience-10M sample episode",
         "aligned_frames": 5821,
@@ -12,7 +12,7 @@
         "raw_xperience10m_data_in_repo": false,
         "audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
         "qwen3_omni_32_episode_claim": false,
-        "qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni v4 final diagnostic result is verified, meets the strict-JSON target, and still has weak action/subtask metrics that guide the next error-analysis pass.",
         "cosmos3_super_forward_dynamics_lora_status": "The first Cosmos3-Super fine-tuned adapter branch is verified as a forward-dynamics LoRA over camera-pose proxy targets; it reports loss metrics, not JSON action-label accuracy.",
         "task_suite_enhancement_128_status": "Current no-new-episode enhancement pack recommends multiscale_20s10_40s20_80s40, hierarchical action/subtask targets, label-normalized scoring, and raw-feature shards before adding more episodes."
     },
@@ -118,7 +118,7 @@
                 "scripts/omni/discover_xperience10m_sources.py",
                 "docs/data/omni_finetune_verified_result.json"
             ],
-            "readout": "The selected-episode held-out Qwen3-Omni final diagnostic result is verified and JSON-format reliability meets the 98% target. The same public comparison also includes the verified 128-episode baselines, Cosmos3-Nano compatibility result, Cosmos3-Super Reasoner evaluation, and Cosmos3-Super Forward-Dynamics LoRA package. The next milestone is action/subtask error analysis and stronger model-quality runs on the same split."
         },
         {
             "step": 9,
@@ -155,7 +155,7 @@
         "hf_model_baselines": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines"
     },
     "current_reading_notes": [
-        "The first cross-episode Qwen3-Omni v4 diagnostic pilot is verified, but strong model quality is not yet shown; action/subtask metrics remain weak.",
         "The current 128-episode suite has a no-new-episode enhancement plan: multiscale_20s10_40s20_80s40 windows, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
         "Cosmos3-Super Forward-Dynamics LoRA is verified as a loss-based world-model adapter branch, not as JSON action-token prediction.",
         "Older Qwen3-Omni setup artifacts are separate from the verified selected-episode diagnostic package.",

 {
     "title": "Ropedia Xperience-10M Task Suite Project Packet",
+    "version": "2026-06-14",
     "scope_status": {
         "validated_data": "one public Xperience-10M sample episode",
         "aligned_frames": 5821,
         "raw_xperience10m_data_in_repo": false,
         "audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
         "qwen3_omni_32_episode_claim": false,
+        "qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni v6 diagnostic branch is verified, meets the strict-JSON target, improves action macro-F1/contact accuracy versus v5, and still has weak action/subtask metrics that guide the next error-analysis pass.",
         "cosmos3_super_forward_dynamics_lora_status": "The first Cosmos3-Super fine-tuned adapter branch is verified as a forward-dynamics LoRA over camera-pose proxy targets; it reports loss metrics, not JSON action-label accuracy.",
         "task_suite_enhancement_128_status": "Current no-new-episode enhancement pack recommends multiscale_20s10_40s20_80s40, hierarchical action/subtask targets, label-normalized scoring, and raw-feature shards before adding more episodes."
     },
                 "scripts/omni/discover_xperience10m_sources.py",
                 "docs/data/omni_finetune_verified_result.json"
             ],
+            "readout": "The selected-episode held-out Qwen3-Omni v6 diagnostic branch is verified and JSON-format reliability meets the 98% target. The same public comparison also includes the verified 128-episode baselines, Cosmos3-Nano compatibility result, Cosmos3-Super Reasoner evaluation, and Cosmos3-Super Forward-Dynamics LoRA package. The next milestone is action/subtask error analysis and stronger model-quality runs on the same split."
         },
         {
             "step": 9,
         "hf_model_baselines": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines"
     },
     "current_reading_notes": [
+        "The latest cross-episode Qwen3-Omni v6 diagnostic branch is verified, but strong model quality is not yet shown; action/subtask metrics remain weak and v5 remains stronger on several non-contact metrics.",
         "The current 128-episode suite has a no-new-episode enhancement plan: multiscale_20s10_40s20_80s40 windows, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
         "Cosmos3-Super Forward-Dynamics LoRA is verified as a loss-based world-model adapter branch, not as JSON action-token prediction.",
         "Older Qwen3-Omni setup artifacts are separate from the verified selected-episode diagnostic package.",

docs/data/project_status.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "title": "Ropedia Xperience-10M Task Suite Project Status",
-    "version": "2026-06-08",
-    "decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v4_cosmos_comparison",
     "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3 branch packages as early cross-episode diagnostics, and now records a no-new-episode enhancement pack for pushing the current 128-episode suite harder.",
     "scope_boundary": {
         "validated_episode_count": 1,
@@ -21,11 +21,11 @@
             "test": 16
         },
         "qwen3_omni_exported_window_counts": {
-            "train": 2848,
-            "val": 512,
-            "test": 448
         },
-        "qwen3_omni_json_validity_rate": 1.0,
         "qwen3_omni_validation_aware": true,
         "qwen3_omni_json_quality_target_met": true,
         "qwen3_omni_lora_adapter_repo": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
@@ -48,12 +48,12 @@
             "test": 448
         },
         "multi_episode_128_baseline_task_count": 12,
-        "qwen3_omni_current_eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
-        "qwen3_omni_current_train_epochs": 4,
-        "qwen3_omni_action_macro_f1": 0.0018678269676001454,
-        "qwen3_omni_subtask_accuracy": 0.0,
-        "qwen3_omni_contact_accuracy": 0.7299107142857143,
-        "qwen3_omni_object_micro_f1": 0.31099781500364165,
         "task_suite_enhancement_128_available": true,
         "task_suite_enhancement_128_current_windows": 3808,
         "task_suite_enhancement_128_recommended_export": "multiscale_20s10_40s20_80s40",
@@ -241,13 +241,15 @@
             "status": "final_verified_diagnostic_result_json_target_met",
             "evidence": [
                 "docs/data/omni_finetune_verified_result.json",
-                "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/",
                 "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
                 "scripts/omni/package_verified_omni_result.py",
                 "scripts/omni/audit_verified_omni_package.py",
                 "scripts/omni/analyze_qwen3_omni_errors.py"
             ],
-            "readout": "The selected 96/16/16 episode split now has a v4 four-epoch public-safe held-out package with 3,808 exported windows, 512 validation windows, 448 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 100.00%, meeting the 98% target; transition accuracy is 97.32%, contact accuracy is 72.99%, object micro-F1 is 31.10%, next-action accuracy is 3.35%, and action/subtask metrics remain weak, so it is still a diagnostic baseline rather than a strong model-quality claim."
         },
         {
             "area": "Cosmos3-Nano future-window branch",
@@ -324,7 +326,7 @@
         "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
     ],
     "current_reading_notes": [
-        "The final Qwen3-Omni v4 diagnostic result is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak: JSON validity is 100.00%, action macro-F1 is 0.0019, and subtask accuracy is 0.0000.",
         "Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
         "Use docs/data/omni_model_comparison.json to compare both views: the single-episode/128-baseline/model-branch result layers and the model-family grouping for task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.",
         "The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",

 {
     "title": "Ropedia Xperience-10M Task Suite Project Status",
+    "version": "2026-06-14",
+    "decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v6_cosmos_comparison",
     "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3 branch packages as early cross-episode diagnostics, and now records a no-new-episode enhancement pack for pushing the current 128-episode suite harder.",
     "scope_boundary": {
         "validated_episode_count": 1,
             "test": 16
         },
         "qwen3_omni_exported_window_counts": {
+            "train": 25629,
+            "val": 4608,
+            "test": 4032
         },
+        "qwen3_omni_json_validity_rate": 0.9990079365079365,
         "qwen3_omni_validation_aware": true,
         "qwen3_omni_json_quality_target_met": true,
         "qwen3_omni_lora_adapter_repo": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
             "test": 448
         },
         "multi_episode_128_baseline_task_count": 12,
+        "qwen3_omni_current_eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
+        "qwen3_omni_current_train_epochs": 2,
+        "qwen3_omni_action_macro_f1": 0.0028830723979596335,
+        "qwen3_omni_subtask_accuracy": 0.0037313432835820895,
+        "qwen3_omni_contact_accuracy": 0.8177083333333334,
+        "qwen3_omni_object_micro_f1": 0.3064982378331287,
         "task_suite_enhancement_128_available": true,
         "task_suite_enhancement_128_current_windows": 3808,
         "task_suite_enhancement_128_recommended_export": "multiscale_20s10_40s20_80s40",
             "status": "final_verified_diagnostic_result_json_target_met",
             "evidence": [
                 "docs/data/omni_finetune_verified_result.json",
+                "docs/data/qwen3_v5_v6_comparison.json",
+                "results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
+                "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/",
                 "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
                 "scripts/omni/package_verified_omni_result.py",
                 "scripts/omni/audit_verified_omni_package.py",
                 "scripts/omni/analyze_qwen3_omni_errors.py"
             ],
+            "readout": "The selected 96/16/16 episode split now has a current v6 rank64/lr5e-5 public-safe held-out package with 34,269 exported windows, 4,032 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.90%, meeting the 98% target; transition accuracy is 98.98%, contact accuracy is 81.77%, object micro-F1 is 30.65%, next-action accuracy is 4.31%, and action/subtask metrics remain weak. v6 improves action macro-F1 and contact accuracy versus v5, but v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics."
         },
         {
             "area": "Cosmos3-Nano future-window branch",
         "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
     ],
     "current_reading_notes": [
+        "The latest Qwen3-Omni v6 diagnostic branch is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak: JSON validity is 99.90%, action macro-F1 is 0.0029, and subtask accuracy is 0.0037. v5 remains the pinned prior release row because it is still stronger on several metrics.",
         "Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
         "Use docs/data/omni_model_comparison.json to compare both views: the single-episode/128-baseline/model-branch result layers and the model-family grouping for task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.",
         "The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",

docs/data/publication_audit.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-12T18:14:57+00:00",
   "checks": [
     {
       "name": "required_publication_assets_present",
@@ -187,8 +187,8 @@
     "github_repo": {
       "root": "repo",
       "exists": true,
-      "file_count": 906,
-      "text_file_count": 740,
       "largest_file": {
         "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
         "bytes": 55702978

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-13T17:46:58+00:00",
   "checks": [
     {
       "name": "required_publication_assets_present",
     "github_repo": {
       "root": "repo",
       "exists": true,
+      "file_count": 914,
+      "text_file_count": 746,
       "largest_file": {
         "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
         "bytes": 55702978

docs/data/quality_gates.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Release Checks",
   "status": "pass",
-  "generated_at_utc": "2026-06-12T18:14:49+00:00",
   "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
   "automated_gates": [
     {

 {
   "title": "Ropedia Xperience-10M Release Checks",
   "status": "pass",
+  "generated_at_utc": "2026-06-13T17:46:37+00:00",
   "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
   "automated_gates": [
     {

docs/data/qwen3_full_parameter_gates.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "title": "Qwen3-Omni Full-Parameter Feasibility Gates",
-  "generated_at_utc": "2026-06-12T18:14:48+00:00",
   "status": "pass",
   "decision": "full_parameter_feasible_for_guarded_short_runs_not_promoted",
   "interpretation": "The 2026-06-09 gates prove that Qwen3-Omni full-parameter FSDP can load, prepare, run backward/optimizer steps, and complete guarded pilots up to 128 optimizer steps on an 8-GPU remote worker. They do not prove a production full-parameter fine-tune, and they intentionally save no full checkpoints or public weights.",

 {
   "title": "Qwen3-Omni Full-Parameter Feasibility Gates",
+  "generated_at_utc": "2026-06-13T17:41:13+00:00",
   "status": "pass",
   "decision": "full_parameter_feasible_for_guarded_short_runs_not_promoted",
   "interpretation": "The 2026-06-09 gates prove that Qwen3-Omni full-parameter FSDP can load, prepare, run backward/optimizer steps, and complete guarded pilots up to 128 optimizer steps on an 8-GPU remote worker. They do not prove a production full-parameter fine-tune, and they intentionally save no full checkpoints or public weights.",

docs/data/research_roadmap.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "title": "Ropedia Xperience-10M Research Roadmap",
     "summary": "Staged path from the public-sample task lab to verified Qwen3-Omni, Cosmos3-Nano, and Cosmos3-Super diagnostics, same-split 128-episode baseline alignment, a no-new-episode 128-suite enhancement pack, action/subtask error analysis, world/policy branches, and a future Xperience-native embodied foundation model.",
-    "current_decision_point": "Push the current selected 128 episodes harder before requesting more storage: keep the public-sample task suite as the development harness, use the final verified selected-episode Qwen3-Omni v4 diagnostic result and same-split 128-episode simple/NN metadata baselines as structured-task references, read Cosmos3-Nano and Cosmos3-Super Forward-Dynamics LoRA as separate world-model results, export multiscale_20s10_40s20_80s40 windows plus hierarchical action/subtask targets, and defer policy-model experiments until robot-compatible targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
     "additional_development_directions": {
         "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
         "source_json": "docs/data/additional_development_directions.json",
@@ -52,8 +52,8 @@
         },
         {
             "id": "qwen3_omni_lora_diagnostic_pilot",
-            "name": "Qwen3-Omni LoRA Final Diagnostic Result",
-            "status": "verified_baseline",
             "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
             "deliverables": [
                 "dataset JSONL/media manifests",
@@ -64,10 +64,13 @@
                 "metrics",
                 "confusion matrices",
                 "run report",
                 "public LoRA adapter repo"
             ],
             "completion_evidence": [
                 "docs/data/omni_finetune_verified_result.json",
                 "results/omni_finetune/verified_public/",
                 "dataset_manifest.json",
                 "training_metadata.json",

 {
     "title": "Ropedia Xperience-10M Research Roadmap",
     "summary": "Staged path from the public-sample task lab to verified Qwen3-Omni, Cosmos3-Nano, and Cosmos3-Super diagnostics, same-split 128-episode baseline alignment, a no-new-episode 128-suite enhancement pack, action/subtask error analysis, world/policy branches, and a future Xperience-native embodied foundation model.",
+    "current_decision_point": "Push the current selected 128 episodes harder before requesting more storage: keep the public-sample task suite as the development harness, use the latest verified selected-episode Qwen3-Omni v6 diagnostic branch plus the pinned v5 row as structured-task references, read Cosmos3-Nano and Cosmos3-Super Forward-Dynamics LoRA as separate world-model results, continue with hierarchical action/subtask targets and label-normalized scoring, and defer policy-model experiments until robot-compatible targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
     "additional_development_directions": {
         "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
         "source_json": "docs/data/additional_development_directions.json",
         },
         {
             "id": "qwen3_omni_lora_diagnostic_pilot",
+            "name": "Qwen3-Omni LoRA Latest Diagnostic Branch",
+            "status": "verified_latest_branch",
             "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
             "deliverables": [
                 "dataset JSONL/media manifests",
                 "metrics",
                 "confusion matrices",
                 "run report",
+                "v5/v6 comparison",
                 "public LoRA adapter repo"
             ],
             "completion_evidence": [
                 "docs/data/omni_finetune_verified_result.json",
+                "docs/data/qwen3_v5_v6_comparison.json",
+                "results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
                 "results/omni_finetune/verified_public/",
                 "dataset_manifest.json",
                 "training_metadata.json",

docs/data/research_roadmap_interactive.json CHANGED Viewed

@@ -2222,7 +2222,7 @@
     ],
     "status": "planning_artifact"
   },
-  "generated_at_utc": "2026-06-08T12:22:13+00:00",
   "omni_plan": {
     "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
     "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
@@ -2287,6 +2287,8 @@
     {
       "completion_evidence": [
         "docs/data/omni_finetune_verified_result.json",
         "results/omni_finetune/verified_public/",
         "dataset_manifest.json",
         "training_metadata.json",
@@ -2304,14 +2306,15 @@
         "metrics",
         "confusion matrices",
         "run report",
         "public LoRA adapter repo"
       ],
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
       "id": "qwen3_omni_lora_diagnostic_pilot",
-      "name": "Qwen3-Omni LoRA Final Diagnostic Result",
       "reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline.",
       "stage": "future",
-      "status": "verified_baseline"
     },
     {
       "completion_evidence": [

     ],
     "status": "planning_artifact"
   },
+  "generated_at_utc": "2026-06-13T17:41:13+00:00",
   "omni_plan": {
     "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
     "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
     {
       "completion_evidence": [
         "docs/data/omni_finetune_verified_result.json",
+        "docs/data/qwen3_v5_v6_comparison.json",
+        "results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
         "results/omni_finetune/verified_public/",
         "dataset_manifest.json",
         "training_metadata.json",
         "metrics",
         "confusion matrices",
         "run report",
+        "v5/v6 comparison",
         "public LoRA adapter repo"
       ],
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
       "id": "qwen3_omni_lora_diagnostic_pilot",
+      "name": "Qwen3-Omni LoRA Latest Diagnostic Branch",
       "reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline.",
       "stage": "future",
+      "status": "verified_latest_branch"
     },
     {
       "completion_evidence": [

docs/data/scope_claims_audit.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-12T18:14:51+00:00",
   "summary": {
     "qwen3_omni_verified_diagnostic_pilot": true,
     "dataset_manifest_num_episodes": 119,
-    "dataset_manifest_num_samples": 3808,
-    "training_metadata_num_train_samples": 2848,
-    "eval_num_samples": 448,
-    "eval_json_validity_rate": 1.0,
     "quality_target_met": true,
-    "historical_identifier_count": 1799,
     "public_32_episode_status_file_count": 1,
     "failure_count": 0
   },
@@ -25,7 +25,7 @@
     {
       "name": "summary_metrics_preserves_verified_diagnostic_status",
       "status": "pass",
-      "detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and meets the 98% target for JSON validity; action/subtask quality remains weak, so it is a structured-task baseline. Cosmos3-Nano future-window compatibility and Cosmos3-Super Forward-Dynamics LoRA are also verified as separate world-model diagnostics with different metrics.",
       "evidence": [
         "docs/data/summary_metrics.json"
       ]
@@ -33,25 +33,25 @@
     {
       "name": "verified_package_dataset_has_expected_windows",
       "status": "pass",
-      "detail": "episodes=119, samples=3808, split_counts={'train': 2848, 'val': 512, 'test': 448}",
       "evidence": [
-        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/dataset/dataset_manifest.json"
       ]
     },
     {
       "name": "verified_package_training_records_8_processes",
       "status": "pass",
-      "detail": "train=2848, val=512, processes=8",
       "evidence": [
-        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/training/training_metadata.json"
       ]
     },
     {
       "name": "verified_package_eval_records_real_held_out_metrics",
       "status": "pass",
-      "detail": "samples=448, split=test, held_out=14, json_validity=1.0",
       "evidence": [
-        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/eval/metrics.json"
       ]
     },
     {
@@ -59,7 +59,7 @@
       "status": "pass",
       "detail": "audit_status=pass, issues=0",
       "evidence": [
-        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/package_audit.json"
       ]
     },
     {
@@ -84,7 +84,7 @@
     {
       "name": "historical_32ep_identifiers_are_confined_to_readiness_artifacts",
       "status": "pass",
-      "detail": "historical identifiers found in result provenance files=1799",
       "evidence": [
         "results/omni_finetune/"
       ]
@@ -424,6 +424,6 @@
       "example": "{\"id\": \"xperience-10m-sample:qa:53\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1060, \"end_frame\": 1079, \"num_frames\": 20}, \"media\": {\"video_path"
     }
   ],
-  "historical_identifier_total_count": 1799,
   "failures": []
 }

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-13T17:46:50+00:00",
   "summary": {
     "qwen3_omni_verified_diagnostic_pilot": true,
     "dataset_manifest_num_episodes": 119,
+    "dataset_manifest_num_samples": 34269,
+    "training_metadata_num_train_samples": 25629,
+    "eval_num_samples": 4032,
+    "eval_json_validity_rate": 0.9990079365079365,
     "quality_target_met": true,
+    "historical_identifier_count": 1800,
     "public_32_episode_status_file_count": 1,
     "failure_count": 0
   },
     {
       "name": "summary_metrics_preserves_verified_diagnostic_status",
       "status": "pass",
+      "detail": "The selected-episode Qwen3-Omni v6 diagnostic branch is verified on the 96/16/16 split and meets the 98% target for JSON validity; action/subtask quality remains weak, so it is a structured-task baseline rather than a strong model-quality claim. v6 improves action macro-F1 and contact accuracy versus v5, while v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics. Cosmos3-Nano future-window compatibility and Cosmos3-Super Forward-Dynamics LoRA are also verified as separate world-model diagnostics with different metrics.",
       "evidence": [
         "docs/data/summary_metrics.json"
       ]
     {
       "name": "verified_package_dataset_has_expected_windows",
       "status": "pass",
+      "detail": "episodes=119, samples=34269, split_counts={'test': 4032, 'train': 25629, 'val': 4608}, expected_samples=34269, expected_split_counts={'train': 25629, 'val': 4608, 'test': 4032}",
       "evidence": [
+        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/dataset/dataset_manifest.json"
       ]
     },
     {
       "name": "verified_package_training_records_8_processes",
       "status": "pass",
+      "detail": "train=25629, val=2048, processes=8, expected_train=25629, expected_val=2048, expected_processes=8",
       "evidence": [
+        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/training/training_metadata.json"
       ]
     },
     {
       "name": "verified_package_eval_records_real_held_out_metrics",
       "status": "pass",
+      "detail": "samples=4032, split=test, held_out=14, json_validity=0.9990079365079365, expected_samples=4032, expected_held_out=14",
       "evidence": [
+        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json"
       ]
     },
     {
       "status": "pass",
       "detail": "audit_status=pass, issues=0",
       "evidence": [
+        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/package_audit.json"
       ]
     },
     {
     {
       "name": "historical_32ep_identifiers_are_confined_to_readiness_artifacts",
       "status": "pass",
+      "detail": "historical identifiers found in result provenance files=1800",
       "evidence": [
         "results/omni_finetune/"
       ]
       "example": "{\"id\": \"xperience-10m-sample:qa:53\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1060, \"end_frame\": 1079, \"num_frames\": 20}, \"media\": {\"video_path"
     }
   ],
+  "historical_identifier_total_count": 1800,
   "failures": []
 }

docs/data/source_alignment_audit.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Source Alignment Note",
   "status": "pass",
-  "generated_at_utc": "2026-06-12T18:14:51+00:00",
   "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
   "alignment_summary": {
     "full_dataset_repo": "ropedia-ai/xperience-10m",

 {
   "title": "Ropedia Xperience-10M Source Alignment Note",
   "status": "pass",
+  "generated_at_utc": "2026-06-13T17:46:47+00:00",
   "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
   "alignment_summary": {
     "full_dataset_repo": "ropedia-ai/xperience-10m",

docs/data/summary_metrics.json CHANGED Viewed

@@ -14,7 +14,7 @@
       "visualization.rrd"
     ],
     "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
-    "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and meets the 98% target for JSON validity; action/subtask quality remains weak, so it is a structured-task baseline. Cosmos3-Nano future-window compatibility and Cosmos3-Super Forward-Dynamics LoRA are also verified as separate world-model diagnostics with different metrics."
   },
   "models": {
     "motion_action": {

       "visualization.rrd"
     ],
     "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
+    "current_scope": "The selected-episode Qwen3-Omni v6 diagnostic branch is verified on the 96/16/16 split and meets the 98% target for JSON validity; action/subtask quality remains weak, so it is a structured-task baseline rather than a strong model-quality claim. v6 improves action macro-F1 and contact accuracy versus v5, while v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics. Cosmos3-Nano future-window compatibility and Cosmos3-Super Forward-Dynamics LoRA are also verified as separate world-model diagnostics with different metrics."
   },
   "models": {
     "motion_action": {

docs/data/task_surface_integrity.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-12T18:14:51+00:00",
   "summary": {
     "task_count": 12,
     "expected_task_count": 12,
@@ -64,45 +64,45 @@
       "observed": "timeline_action"
     },
     {
-      "name": "timeline_action: public_field_output_short_is_human_readable",
       "status": "pass",
-      "value": "current action class",
       "raw_hits": []
     },
     {
-      "name": "timeline_action: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Look at one short multimodal window and name what action is happening now.",
       "raw_hits": []
     },
     {
-      "name": "timeline_action: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Action Recognition",
       "raw_hits": []
     },
     {
-      "name": "timeline_action: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "20-frame multimodal window",
       "raw_hits": []
     },
     {
-      "name": "timeline_action: public_field_card_blurb_is_human_readable",
       "status": "pass",
-      "value": "Recognize the current manipulation action from synchronized visual, motion, inertial, pose, and annotation context.",
       "raw_hits": []
     },
     {
-      "name": "timeline_action: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "window features -> action label builder -> classifier",
       "raw_hits": []
     },
     {
-      "name": "timeline_action: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Egocentric Action Recognition",
       "raw_hits": []
     },
     {
@@ -184,45 +184,45 @@
       "observed": "timeline_subtask"
     },
     {
-      "name": "timeline_subtask: public_field_output_short_is_human_readable",
       "status": "pass",
-      "value": "current procedure step",
       "raw_hits": []
     },
     {
-      "name": "timeline_subtask: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Predict the higher-level task stage for the current window.",
       "raw_hits": []
     },
     {
-      "name": "timeline_subtask: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Procedure Step Recognition",
       "raw_hits": []
     },
     {
-      "name": "timeline_subtask: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "20-frame multimodal window",
       "raw_hits": []
     },
     {
-      "name": "timeline_subtask: public_field_card_blurb_is_human_readable",
       "status": "pass",
-      "value": "Recognize the broader activity stage so fine actions become a readable procedure timeline.",
       "raw_hits": []
     },
     {
-      "name": "timeline_subtask: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "window features -> subtask label builder -> classifier",
       "raw_hits": []
     },
     {
-      "name": "timeline_subtask: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Temporal Subtask Recognition",
       "raw_hits": []
     },
     {
@@ -304,45 +304,45 @@
       "observed": "transition_detection"
     },
     {
-      "name": "transition_detection: public_field_output_short_is_human_readable",
       "status": "pass",
-      "value": "boundary or steady",
       "raw_hits": []
     },
     {
-      "name": "transition_detection: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Detect whether the current window is near a boundary between actions.",
       "raw_hits": []
     },
     {
-      "name": "transition_detection: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Action Boundary Detection",
       "raw_hits": []
     },
     {
-      "name": "transition_detection: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "current window with boundary target",
       "raw_hits": []
     },
     {
-      "name": "transition_detection: public_field_card_blurb_is_human_readable",
       "status": "pass",
-      "value": "Detect the local moment where the episode changes from one action segment to the next.",
       "raw_hits": []
     },
     {
-      "name": "transition_detection: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "action changes -> boundary labels -> binary classifier",
       "raw_hits": []
     },
     {
-      "name": "transition_detection: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Temporal Action Segmentation",
       "raw_hits": []
     },
     {
@@ -422,45 +422,45 @@
       "observed": "next_action"
     },
     {
-      "name": "next_action: public_field_output_short_is_human_readable",
       "status": "pass",
-      "value": "action at t+20 frames",
       "raw_hits": []
     },
     {
-      "name": "next_action: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Use the current window to guess the action that will happen shortly after it.",
       "raw_hits": []
     },
     {
-      "name": "next_action: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Next-Action Prediction",
       "raw_hits": []
     },
     {
-      "name": "next_action: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "current window at time t",
       "raw_hits": []
     },
     {
-      "name": "next_action: public_field_card_blurb_is_human_readable",
       "status": "pass",
-      "value": "Forecast the near-future action from the current observations only.",
       "raw_hits": []
     },
     {
-      "name": "next_action: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "current features -> future label shift -> classifier",
       "raw_hits": []
     },
     {
-      "name": "next_action: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Short-Horizon Intention Prediction",
       "raw_hits": []
     },
     {
@@ -540,45 +540,45 @@
       "observed": "hand_trajectory_forecast"
     },
     {
-      "name": "hand_trajectory_forecast: public_field_output_short_is_human_readable",
       "status": "pass",
-      "value": "future hand-joint trajectory",
       "raw_hits": []
     },
     {
-      "name": "hand_trajectory_forecast: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Predict where the hands will move over the next few frames.",
       "raw_hits": []
     },
     {
-      "name": "hand_trajectory_forecast: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Hand Trajectory Forecasting",
       "raw_hits": []
     },
     {
-      "name": "hand_trajectory_forecast: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "current multimodal window",
       "raw_hits": []
     },
     {
-      "name": "hand_trajectory_forecast: public_field_card_blurb_is_human_readable",
       "status": "pass",
-      "value": "Predict the future 3D left/right hand path from the current multimodal state.",
       "raw_hits": []
     },
     {
-      "name": "hand_trajectory_forecast: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "current features -> future mocap target -> regression head",
       "raw_hits": []
     },
     {
-      "name": "hand_trajectory_forecast: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "3D Hand Motion Forecasting",
       "raw_hits": []
     },
     {
@@ -658,45 +658,45 @@
       "observed": "contact_prediction"
     },
     {
-      "name": "contact_prediction: public_field_output_short_is_human_readable",
       "status": "pass",
-      "value": "contact or no contact",
       "raw_hits": []
     },
     {
-      "name": "contact_prediction: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Predict whether the body or hand is in contact with something.",
       "raw_hits": []
     },
     {
-      "name": "contact_prediction: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Contact State Prediction",
       "raw_hits": []
     },
     {
-      "name": "contact_prediction: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "non-contact, non-caption features",
       "raw_hits": []
     },
     {
-      "name": "contact_prediction: public_field_card_blurb_is_human_readable",
       "status": "pass",
-      "value": "Predict whether body or hand contact with the scene is occurring without leaking contact labels.",
       "raw_hits": []
     },
     {
-      "name": "contact_prediction: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "feature filter -> contact target -> binary classifier",
       "raw_hits": []
     },
     {
-      "name": "contact_prediction: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Human-Object Contact Prediction",
       "raw_hits": []
     },
     {
@@ -774,45 +774,45 @@
       "observed": "object_relevance"
     },
     {
-      "name": "object_relevance: public_field_output_short_is_human_readable",
       "status": "pass",
-      "value": "relevant object set",
       "raw_hits": []
     },
     {
-      "name": "object_relevance: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Predict which objects matter in the current window.",
       "raw_hits": []
     },
     {
-      "name": "object_relevance: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Object Relevance Prediction",
       "raw_hits": []
     },
     {
-      "name": "object_relevance: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "non-caption multimodal features",
       "raw_hits": []
     },
     {
-      "name": "object_relevance: public_field_card_blurb_is_human_readable",
       "status": "pass",
-      "value": "Infer which objects are relevant to the current manipulation window from non-caption features.",
       "raw_hits": []
     },
     {
-      "name": "object_relevance: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "object vocabulary -> multi-hot labels -> sigmoid heads",
       "raw_hits": []
     },
     {
-      "name": "object_relevance: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Object-Centric Interaction Recognition",
       "raw_hits": []
     },
     {
@@ -892,45 +892,45 @@
       "observed": "caption_grounding"
     },
     {
-      "name": "caption_grounding: public_field_output_short_is_human_readable",
       "status": "pass",
-      "value": "ranked matching moments",
       "raw_hits": []
     },
     {
-      "name": "caption_grounding: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Given a text-like query from annotation, find the matching time window.",
       "raw_hits": []
     },
     {
-      "name": "caption_grounding: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Language Grounding",
       "raw_hits": []
     },
     {
-      "name": "caption_grounding: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "text-like query and candidate windows",
       "raw_hits": []
     },
     {
-      "name": "caption_grounding: public_field_card_blurb_is_human_readable",
       "status": "pass",
-      "value": "Retrieve the matching time window for an annotation-derived text query.",
       "raw_hits": []
     },
     {
-      "name": "caption_grounding: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "query features -> candidate index -> cosine ranker",
       "raw_hits": []
     },
     {
-      "name": "caption_grounding: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Language-to-Moment Grounding",
       "raw_hits": []
     },
     {
@@ -1008,45 +1008,45 @@
       "observed": "cross_modal_retrieval"
     },
     {
-      "name": "cross_modal_retrieval: public_field_output_short_is_human_readable",
       "status": "pass",
-      "value": "ranked visual windows",
       "raw_hits": []
     },
     {
-      "name": "cross_modal_retrieval: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Use one group of modalities to retrieve the matching window from another group.",
       "raw_hits": []
     },
     {
-      "name": "cross_modal_retrieval: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Cross-Modal Retrieval",
       "raw_hits": []
     },
     {
-      "name": "cross_modal_retrieval: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "motion/IMU/pose query; depth/video candidates",
       "raw_hits": []
     },
     {
-      "name": "cross_modal_retrieval: public_field_card_blurb_is_human_readable",
       "status": "pass",
-      "value": "Use motion, IMU, and camera-pose signals to retrieve the matching depth/video window.",
       "raw_hits": []
     },
     {
-      "name": "cross_modal_retrieval: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "modality split -> projection -> nearest-neighbor ranker",
       "raw_hits": []
     },
     {
-      "name": "cross_modal_retrieval: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Multimodal Representation Retrieval",
       "raw_hits": []
     },
     {
@@ -1126,45 +1126,45 @@
       "observed": "modality_reconstruction"
     },
     {
-      "name": "modality_reconstruction: public_field_output_short_is_human_readable",
       "status": "pass",
-      "value": "reconstructed depth/video vector",
       "raw_hits": []
     },
     {
-      "name": "modality_reconstruction: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Predict one modality feature block from other modality blocks.",
       "raw_hits": []
     },
     {
-      "name": "modality_reconstruction: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Cross-Modal Reconstruction",
       "raw_hits": []
     },
     {
-      "name": "modality_reconstruction: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "motion, IMU, and camera/pose features",
       "raw_hits": []
     },
     {
-      "name": "modality_reconstruction: public_field_card_blurb_is_human_readable",
       "status": "pass",
-      "value": "Predict compressed depth/video feature vectors from motion, IMU, and camera-pose features.",
       "raw_hits": []
     },
     {
-      "name": "modality_reconstruction: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "source-target split -> scaler -> regression head",
       "raw_hits": []
     },
     {
-      "name": "modality_reconstruction: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Modality Feature Reconstruction",
       "raw_hits": []
     },
     {
@@ -1244,43 +1244,43 @@
       "observed": "temporal_order"
     },
     {
-      "name": "temporal_order: public_field_output_short_is_human_readable",
       "status": "pass",
-      "value": "correct or reversed",
       "raw_hits": []
     },
     {
-      "name": "temporal_order: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Tell whether two nearby windows are in the correct time order.",
       "raw_hits": []
     },
     {
-      "name": "temporal_order: public_field_display_name_is_human_readable",
       "status": "pass",
       "value": "Temporal Order Verification",
       "raw_hits": []
     },
     {
-      "name": "temporal_order: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "two adjacent windows plus difference vector",
       "raw_hits": []
     },
     {
-      "name": "temporal_order: public_field_card_blurb_is_human_readable",
       "status": "pass",
-      "value": "Tell whether two neighboring windows are in chronological order or reversed.",
       "raw_hits": []
     },
     {
-      "name": "temporal_order: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "pair builder -> feature combiner -> binary classifier",
       "raw_hits": []
     },
     {
-      "name": "temporal_order: public_field_research_name_is_human_readable",
       "status": "pass",
       "value": "Temporal Order Verification",
       "raw_hits": []
@@ -1360,45 +1360,45 @@
       "observed": "misalignment_detection"
     },
     {
-      "name": "misalignment_detection: public_field_output_short_is_human_readable",
       "status": "pass",
-      "value": "aligned or shifted",
       "raw_hits": []
     },
     {
-      "name": "misalignment_detection: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Detect when modalities that should match are shifted out of sync.",
       "raw_hits": []
     },
     {
-      "name": "misalignment_detection: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Multimodal Synchronization Detection",
       "raw_hits": []
     },
     {
-      "name": "misalignment_detection: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "motion-side and visual/depth-side feature groups",
       "raw_hits": []
     },
     {
-      "name": "misalignment_detection: public_field_card_blurb_is_human_readable",
       "status": "pass",
-      "value": "Detect whether motion and visual/depth streams have been artificially shifted out of sync.",
       "raw_hits": []
     },
     {
-      "name": "misalignment_detection: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "aligned/shifted pairs -> feature combiner -> binary classifier",
       "raw_hits": []
     },
     {
-      "name": "misalignment_detection: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Cross-Modal Misalignment Detection",
       "raw_hits": []
     },
     {

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-13T17:46:47+00:00",
   "summary": {
     "task_count": 12,
     "expected_task_count": 12,
       "observed": "timeline_action"
     },
     {
+      "name": "timeline_action: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "20-frame multimodal window",
       "raw_hits": []
     },
     {
+      "name": "timeline_action: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "window features -> action label builder -> classifier",
       "raw_hits": []
     },
     {
+      "name": "timeline_action: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Egocentric Action Recognition",
       "raw_hits": []
     },
     {
+      "name": "timeline_action: public_field_card_blurb_is_human_readable",
       "status": "pass",
+      "value": "Recognize the current manipulation action from synchronized visual, motion, inertial, pose, and annotation context.",
       "raw_hits": []
     },
     {
+      "name": "timeline_action: public_field_output_short_is_human_readable",
       "status": "pass",
+      "value": "current action class",
       "raw_hits": []
     },
     {
+      "name": "timeline_action: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Look at one short multimodal window and name what action is happening now.",
       "raw_hits": []
     },
     {
+      "name": "timeline_action: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Action Recognition",
       "raw_hits": []
     },
     {
       "observed": "timeline_subtask"
     },
     {
+      "name": "timeline_subtask: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "20-frame multimodal window",
       "raw_hits": []
     },
     {
+      "name": "timeline_subtask: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "window features -> subtask label builder -> classifier",
       "raw_hits": []
     },
     {
+      "name": "timeline_subtask: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Temporal Subtask Recognition",
       "raw_hits": []
     },
     {
+      "name": "timeline_subtask: public_field_card_blurb_is_human_readable",
       "status": "pass",
+      "value": "Recognize the broader activity stage so fine actions become a readable procedure timeline.",
       "raw_hits": []
     },
     {
+      "name": "timeline_subtask: public_field_output_short_is_human_readable",
       "status": "pass",
+      "value": "current procedure step",
       "raw_hits": []
     },
     {
+      "name": "timeline_subtask: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Predict the higher-level task stage for the current window.",
       "raw_hits": []
     },
     {
+      "name": "timeline_subtask: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Procedure Step Recognition",
       "raw_hits": []
     },
     {
       "observed": "transition_detection"
     },
     {
+      "name": "transition_detection: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "current window with boundary target",
       "raw_hits": []
     },
     {
+      "name": "transition_detection: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "action changes -> boundary labels -> binary classifier",
       "raw_hits": []
     },
     {
+      "name": "transition_detection: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Temporal Action Segmentation",
       "raw_hits": []
     },
     {
+      "name": "transition_detection: public_field_card_blurb_is_human_readable",
       "status": "pass",
+      "value": "Detect the local moment where the episode changes from one action segment to the next.",
       "raw_hits": []
     },
     {
+      "name": "transition_detection: public_field_output_short_is_human_readable",
       "status": "pass",
+      "value": "boundary or steady",
       "raw_hits": []
     },
     {
+      "name": "transition_detection: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Detect whether the current window is near a boundary between actions.",
       "raw_hits": []
     },
     {
+      "name": "transition_detection: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Action Boundary Detection",
       "raw_hits": []
     },
     {
       "observed": "next_action"
     },
     {
+      "name": "next_action: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "current window at time t",
       "raw_hits": []
     },
     {
+      "name": "next_action: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "current features -> future label shift -> classifier",
       "raw_hits": []
     },
     {
+      "name": "next_action: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Short-Horizon Intention Prediction",
       "raw_hits": []
     },
     {
+      "name": "next_action: public_field_card_blurb_is_human_readable",
       "status": "pass",
+      "value": "Forecast the near-future action from the current observations only.",
       "raw_hits": []
     },
     {
+      "name": "next_action: public_field_output_short_is_human_readable",
       "status": "pass",
+      "value": "action at t+20 frames",
       "raw_hits": []
     },
     {
+      "name": "next_action: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Use the current window to guess the action that will happen shortly after it.",
       "raw_hits": []
     },
     {
+      "name": "next_action: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Next-Action Prediction",
       "raw_hits": []
     },
     {
       "observed": "hand_trajectory_forecast"
     },
     {
+      "name": "hand_trajectory_forecast: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "current multimodal window",
       "raw_hits": []
     },
     {
+      "name": "hand_trajectory_forecast: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "current features -> future mocap target -> regression head",
       "raw_hits": []
     },
     {
+      "name": "hand_trajectory_forecast: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "3D Hand Motion Forecasting",
       "raw_hits": []
     },
     {
+      "name": "hand_trajectory_forecast: public_field_card_blurb_is_human_readable",
       "status": "pass",
+      "value": "Predict the future 3D left/right hand path from the current multimodal state.",
       "raw_hits": []
     },
     {
+      "name": "hand_trajectory_forecast: public_field_output_short_is_human_readable",
       "status": "pass",
+      "value": "future hand-joint trajectory",
       "raw_hits": []
     },
     {
+      "name": "hand_trajectory_forecast: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Predict where the hands will move over the next few frames.",
       "raw_hits": []
     },
     {
+      "name": "hand_trajectory_forecast: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Hand Trajectory Forecasting",
       "raw_hits": []
     },
     {
       "observed": "contact_prediction"
     },
     {
+      "name": "contact_prediction: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "non-contact, non-caption features",
       "raw_hits": []
     },
     {
+      "name": "contact_prediction: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "feature filter -> contact target -> binary classifier",
       "raw_hits": []
     },
     {
+      "name": "contact_prediction: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Human-Object Contact Prediction",
       "raw_hits": []
     },
     {
+      "name": "contact_prediction: public_field_card_blurb_is_human_readable",
       "status": "pass",
+      "value": "Predict whether body or hand contact with the scene is occurring without leaking contact labels.",
       "raw_hits": []
     },
     {
+      "name": "contact_prediction: public_field_output_short_is_human_readable",
       "status": "pass",
+      "value": "contact or no contact",
       "raw_hits": []
     },
     {
+      "name": "contact_prediction: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Predict whether the body or hand is in contact with something.",
       "raw_hits": []
     },
     {
+      "name": "contact_prediction: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Contact State Prediction",
       "raw_hits": []
     },
     {
       "observed": "object_relevance"
     },
     {
+      "name": "object_relevance: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "non-caption multimodal features",
       "raw_hits": []
     },
     {
+      "name": "object_relevance: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "object vocabulary -> multi-hot labels -> sigmoid heads",
       "raw_hits": []
     },
     {
+      "name": "object_relevance: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Object-Centric Interaction Recognition",
       "raw_hits": []
     },
     {
+      "name": "object_relevance: public_field_card_blurb_is_human_readable",
       "status": "pass",
+      "value": "Infer which objects are relevant to the current manipulation window from non-caption features.",
       "raw_hits": []
     },
     {
+      "name": "object_relevance: public_field_output_short_is_human_readable",
       "status": "pass",
+      "value": "relevant object set",
       "raw_hits": []
     },
     {
+      "name": "object_relevance: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Predict which objects matter in the current window.",
       "raw_hits": []
     },
     {
+      "name": "object_relevance: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Object Relevance Prediction",
       "raw_hits": []
     },
     {
       "observed": "caption_grounding"
     },
     {
+      "name": "caption_grounding: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "text-like query and candidate windows",
       "raw_hits": []
     },
     {
+      "name": "caption_grounding: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "query features -> candidate index -> cosine ranker",
       "raw_hits": []
     },
     {
+      "name": "caption_grounding: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Language-to-Moment Grounding",
       "raw_hits": []
     },
     {
+      "name": "caption_grounding: public_field_card_blurb_is_human_readable",
       "status": "pass",
+      "value": "Retrieve the matching time window for an annotation-derived text query.",
       "raw_hits": []
     },
     {
+      "name": "caption_grounding: public_field_output_short_is_human_readable",
       "status": "pass",
+      "value": "ranked matching moments",
       "raw_hits": []
     },
     {
+      "name": "caption_grounding: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Given a text-like query from annotation, find the matching time window.",
       "raw_hits": []
     },
     {
+      "name": "caption_grounding: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Language Grounding",
       "raw_hits": []
     },
     {
       "observed": "cross_modal_retrieval"
     },
     {
+      "name": "cross_modal_retrieval: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "motion/IMU/pose query; depth/video candidates",
       "raw_hits": []
     },
     {
+      "name": "cross_modal_retrieval: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "modality split -> projection -> nearest-neighbor ranker",
       "raw_hits": []
     },
     {
+      "name": "cross_modal_retrieval: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Multimodal Representation Retrieval",
       "raw_hits": []
     },
     {
+      "name": "cross_modal_retrieval: public_field_card_blurb_is_human_readable",
       "status": "pass",
+      "value": "Use motion, IMU, and camera-pose signals to retrieve the matching depth/video window.",
       "raw_hits": []
     },
     {
+      "name": "cross_modal_retrieval: public_field_output_short_is_human_readable",
       "status": "pass",
+      "value": "ranked visual windows",
       "raw_hits": []
     },
     {
+      "name": "cross_modal_retrieval: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Use one group of modalities to retrieve the matching window from another group.",
       "raw_hits": []
     },
     {
+      "name": "cross_modal_retrieval: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Cross-Modal Retrieval",
       "raw_hits": []
     },
     {
       "observed": "modality_reconstruction"
     },
     {
+      "name": "modality_reconstruction: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "motion, IMU, and camera/pose features",
       "raw_hits": []
     },
     {
+      "name": "modality_reconstruction: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "source-target split -> scaler -> regression head",
       "raw_hits": []
     },
     {
+      "name": "modality_reconstruction: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Modality Feature Reconstruction",
       "raw_hits": []
     },
     {
+      "name": "modality_reconstruction: public_field_card_blurb_is_human_readable",
       "status": "pass",
+      "value": "Predict compressed depth/video feature vectors from motion, IMU, and camera-pose features.",
       "raw_hits": []
     },
     {
+      "name": "modality_reconstruction: public_field_output_short_is_human_readable",
       "status": "pass",
+      "value": "reconstructed depth/video vector",
       "raw_hits": []
     },
     {
+      "name": "modality_reconstruction: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Predict one modality feature block from other modality blocks.",
       "raw_hits": []
     },
     {
+      "name": "modality_reconstruction: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Cross-Modal Reconstruction",
       "raw_hits": []
     },
     {
       "observed": "temporal_order"
     },
     {
+      "name": "temporal_order: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "two adjacent windows plus difference vector",
       "raw_hits": []
     },
     {
+      "name": "temporal_order: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "pair builder -> feature combiner -> binary classifier",
       "raw_hits": []
     },
     {
+      "name": "temporal_order: public_field_research_name_is_human_readable",
       "status": "pass",
       "value": "Temporal Order Verification",
       "raw_hits": []
     },
     {
+      "name": "temporal_order: public_field_card_blurb_is_human_readable",
       "status": "pass",
+      "value": "Tell whether two neighboring windows are in chronological order or reversed.",
       "raw_hits": []
     },
     {
+      "name": "temporal_order: public_field_output_short_is_human_readable",
       "status": "pass",
+      "value": "correct or reversed",
       "raw_hits": []
     },
     {
+      "name": "temporal_order: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Tell whether two nearby windows are in the correct time order.",
       "raw_hits": []
     },
     {
+      "name": "temporal_order: public_field_display_name_is_human_readable",
       "status": "pass",
       "value": "Temporal Order Verification",
       "raw_hits": []
       "observed": "misalignment_detection"
     },
     {
+      "name": "misalignment_detection: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "motion-side and visual/depth-side feature groups",
       "raw_hits": []
     },
     {
+      "name": "misalignment_detection: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "aligned/shifted pairs -> feature combiner -> binary classifier",
       "raw_hits": []
     },
     {
+      "name": "misalignment_detection: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Cross-Modal Misalignment Detection",
       "raw_hits": []
     },
     {
+      "name": "misalignment_detection: public_field_card_blurb_is_human_readable",
       "status": "pass",
+      "value": "Detect whether motion and visual/depth streams have been artificially shifted out of sync.",
       "raw_hits": []
     },
     {
+      "name": "misalignment_detection: public_field_output_short_is_human_readable",
       "status": "pass",
+      "value": "aligned or shifted",
       "raw_hits": []
     },
     {
+      "name": "misalignment_detection: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Detect when modalities that should match are shifted out of sync.",
       "raw_hits": []
     },
     {
+      "name": "misalignment_detection: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Multimodal Synchronization Detection",
       "raw_hits": []
     },
     {

docs/data/website_integrity.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-12T18:14:52+00:00",
   "docs_root": "docs",
   "site_base": "/ropedia-xperience-10m-task-suite/",
   "summary": {
     "html_pages": 4,
     "local_references": 142,
     "external_reference_count": 111,
-    "json_files": 37,
     "image_assets_referenced": 22,
     "failure_count": 0
   },
@@ -75,7 +75,7 @@
       "status": "pass",
       "reason": "The project overview should appear before the deeper progress ledger.",
       "overview_index": 67412,
-      "evidence_index": 93026
     },
     {
       "name": "project_status_links_json",
@@ -137,7 +137,7 @@
       "statuses": [
         "implemented",
         "implemented_for_first_pilot",
-        "verified_baseline",
         "verified_companion_result",
         "current",
         "active_next_step",
@@ -154,8 +154,8 @@
       "status": "pass",
       "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
       "overview_index": 67412,
-      "protocol_index": 89208,
-      "evidence_index": 93026
     },
     {
       "name": "evaluation_protocol_links_json",
@@ -253,7 +253,7 @@
     },
     {
       "path": "data/artifact_index.json",
-      "bytes": 87230,
       "top_level_type": "dict"
     },
     {
@@ -303,12 +303,12 @@
     },
     {
       "path": "data/omni_finetune_verified_result.json",
-      "bytes": 4431,
       "top_level_type": "dict"
     },
     {
       "path": "data/omni_model_comparison.json",
-      "bytes": 81593,
       "top_level_type": "dict"
     },
     {
@@ -323,12 +323,12 @@
     },
     {
       "path": "data/project_packet.json",
-      "bytes": 10496,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_status.json",
-      "bytes": 23198,
       "top_level_type": "dict"
     },
     {
@@ -351,6 +351,11 @@
       "bytes": 12183,
       "top_level_type": "dict"
     },
     {
       "path": "data/rendered_site_check.json",
       "bytes": 4032,
@@ -373,12 +378,12 @@
     },
     {
       "path": "data/research_roadmap.json",
-      "bytes": 13313,
       "top_level_type": "dict"
     },
     {
       "path": "data/research_roadmap_interactive.json",
-      "bytes": 145083,
       "top_level_type": "dict"
     },
     {
@@ -388,7 +393,7 @@
     },
     {
       "path": "data/scope_claims_audit.json",
-      "bytes": 21325,
       "top_level_type": "dict"
     },
     {
@@ -403,7 +408,7 @@
     },
     {
       "path": "data/summary_metrics.json",
-      "bytes": 27604,
       "top_level_type": "dict"
     },
     {
@@ -423,7 +428,7 @@
     },
     {
       "path": "data/website_integrity.json",
-      "bytes": 15654,
       "top_level_type": "dict"
     },
     {

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-13T17:46:49+00:00",
   "docs_root": "docs",
   "site_base": "/ropedia-xperience-10m-task-suite/",
   "summary": {
     "html_pages": 4,
     "local_references": 142,
     "external_reference_count": 111,
+    "json_files": 38,
     "image_assets_referenced": 22,
     "failure_count": 0
   },
       "status": "pass",
       "reason": "The project overview should appear before the deeper progress ledger.",
       "overview_index": 67412,
+      "evidence_index": 93059
     },
     {
       "name": "project_status_links_json",
       "statuses": [
         "implemented",
         "implemented_for_first_pilot",
+        "verified_latest_branch",
         "verified_companion_result",
         "current",
         "active_next_step",
       "status": "pass",
       "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
       "overview_index": 67412,
+      "protocol_index": 89241,
+      "evidence_index": 93059
     },
     {
       "name": "evaluation_protocol_links_json",
     },
     {
       "path": "data/artifact_index.json",
+      "bytes": 88913,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/omni_finetune_verified_result.json",
+      "bytes": 4325,
       "top_level_type": "dict"
     },
     {
       "path": "data/omni_model_comparison.json",
+      "bytes": 81866,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/project_packet.json",
+      "bytes": 10597,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_status.json",
+      "bytes": 23535,
       "top_level_type": "dict"
     },
     {
       "bytes": 12183,
       "top_level_type": "dict"
     },
+    {
+      "path": "data/qwen3_v5_v6_comparison.json",
+      "bytes": 2814,
+      "top_level_type": "dict"
+    },
     {
       "path": "data/rendered_site_check.json",
       "bytes": 4032,
     },
     {
       "path": "data/research_roadmap.json",
+      "bytes": 13450,
       "top_level_type": "dict"
     },
     {
       "path": "data/research_roadmap_interactive.json",
+      "bytes": 145234,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/scope_claims_audit.json",
+      "bytes": 21795,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/summary_metrics.json",
+      "bytes": 27807,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/website_integrity.json",
+      "bytes": 15774,
       "top_level_type": "dict"
     },
     {

results/omni_finetune/OMNI_MODEL_COMPARISON.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Omni Model Comparison
-Generated: `2026-06-12T18:14:48+00:00`
 Compare only rows with the same scope and target. Single-episode raw-feature metrics, 128-episode metadata baselines, Qwen3 structured JSON metrics, and the two Cosmos3 targets answer different questions: Nano future-window retrieval versus Super structured JSON Reasoner evaluation.
@@ -55,8 +55,8 @@ The one-episode Qwen entry is only a sensor-adapter smoke test with Qwen3 weight
 | full-param gate | passed | Full-Parameter 256-Step Post-Qwen-v6 Pilot | 2048 windows/samples | full_parameter_gate=passed, observed_train_steps=256, final_step_loss=0.0096, epoch_train_loss=0.1158, checkpoint_saved=False | `results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_pilot256_after_qwen_v6_preemptible_8gpu_20260611/training_metadata.json` |
 | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.8750, action_macro_f1=0.0027, transition_accuracy=0.8504, contact_accuracy=0.6451 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json` |
 | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.8527, action_macro_f1=0.0021, transition_accuracy=0.8281, contact_accuracy=0.6518 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json` |
-| 128 episode | verified current | Qwen3-Omni LoRA | 119 episodes, 34269 windows/samples, 4032 eval | json_validity_rate=1.0000, action_macro_f1=0.0023, transition_accuracy=0.9908, contact_accuracy=0.7865 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full/verified_result_summary.json` |
-| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 34269 windows/samples, 4032 eval | json_validity_rate=0.9990, action_macro_f1=0.0029, transition_accuracy=0.9898, contact_accuracy=0.8177 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/verified_result_summary.json` |
 | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.9978, action_macro_f1=0.0024, transition_accuracy=0.9710, contact_accuracy=0.7188 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json` |
 | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=1.0000, action_macro_f1=0.0022, transition_accuracy=0.9732, contact_accuracy=0.7210 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/verified_result_summary.json` |
 | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=1.0000, action_macro_f1=0.0019, transition_accuracy=0.9732, contact_accuracy=0.7299 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/verified_result_summary.json` |
@@ -132,4 +132,5 @@ This is the first verified Cosmos3-Super fine-tuned adapter branch. Its metric i
 ## Pending
-- Use the verified Qwen3 v5 dense multiscale full-eval package as the current Qwen row; older Qwen package rows remain historical diagnostics for comparison.

 # Omni Model Comparison
+Generated: `2026-06-13T17:41:35+00:00`
 Compare only rows with the same scope and target. Single-episode raw-feature metrics, 128-episode metadata baselines, Qwen3 structured JSON metrics, and the two Cosmos3 targets answer different questions: Nano future-window retrieval versus Super structured JSON Reasoner evaluation.
 | full-param gate | passed | Full-Parameter 256-Step Post-Qwen-v6 Pilot | 2048 windows/samples | full_parameter_gate=passed, observed_train_steps=256, final_step_loss=0.0096, epoch_train_loss=0.1158, checkpoint_saved=False | `results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_pilot256_after_qwen_v6_preemptible_8gpu_20260611/training_metadata.json` |
 | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.8750, action_macro_f1=0.0027, transition_accuracy=0.8504, contact_accuracy=0.6451 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json` |
 | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.8527, action_macro_f1=0.0021, transition_accuracy=0.8281, contact_accuracy=0.6518 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json` |
+| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 34269 windows/samples, 4032 eval | json_validity_rate=1.0000, action_macro_f1=0.0023, transition_accuracy=0.9908, contact_accuracy=0.7865 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full/verified_result_summary.json` |
+| 128 episode | verified current | Qwen3-Omni LoRA | 119 episodes, 34269 windows/samples, 4032 eval | json_validity_rate=0.9990, action_macro_f1=0.0029, transition_accuracy=0.9898, contact_accuracy=0.8177 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/verified_result_summary.json` |
 | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.9978, action_macro_f1=0.0024, transition_accuracy=0.9710, contact_accuracy=0.7188 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json` |
 | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=1.0000, action_macro_f1=0.0022, transition_accuracy=0.9732, contact_accuracy=0.7210 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/verified_result_summary.json` |
 | 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=1.0000, action_macro_f1=0.0019, transition_accuracy=0.9732, contact_accuracy=0.7299 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/verified_result_summary.json` |
 ## Pending
+- Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.
+- Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before claiming v6 is globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly.

results/omni_finetune/QWEN3_FULL_PARAMETER_GATES_20260609.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Qwen3-Omni Full-Parameter Feasibility Gates
-Generated: `2026-06-12T18:14:48+00:00`
 The 2026-06-09 gates prove that Qwen3-Omni full-parameter FSDP can load, prepare, run backward/optimizer steps, and complete guarded pilots up to 128 optimizer steps on an 8-GPU remote worker. They do not prove a production full-parameter fine-tune, and they intentionally save no full checkpoints or public weights.

 # Qwen3-Omni Full-Parameter Feasibility Gates
+Generated: `2026-06-13T17:41:13+00:00`
 The 2026-06-09 gates prove that Qwen3-Omni full-parameter FSDP can load, prepare, run backward/optimizer steps, and complete guarded pilots up to 128 optimizer steps on an 8-GPU remote worker. They do not prove a production full-parameter fine-tune, and they intentionally save no full checkpoints or public weights.

results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md ADDED Viewed

	@@ -0,0 +1,31 @@

+# Qwen3-Omni v5/v6 Verified Comparison
+Generated: `2026-06-14`
+This compares only the two dense multiscale Qwen3-Omni LoRA held-out packages on the same selected 128-episode setup. Both use 4,032 held-out test predictions from 14 exported test episodes.
+| metric | v5 | v6 | v6 - v5 |
+| --- | ---: | ---: | ---: |
+| JSON validity | 1.000000 | 0.999008 | -0.000992 |
+| Action macro-F1 | 0.002290 | 0.002883 | +0.000593 |
+| Subtask accuracy | 0.011194 | 0.003731 | -0.007463 |
+| Transition accuracy | 0.990823 | 0.989831 | -0.000992 |
+| Next-action accuracy | 0.053619 | 0.043053 | -0.010565 |
+| Contact accuracy | 0.786458 | 0.817708 | +0.031250 |
+| Object micro-F1 | 0.316146 | 0.306498 | -0.009648 |
+## Readout
+v6 is the latest verified Qwen3-Omni LoRA branch and should be shown as the current Qwen row in generated comparisons. It improves action macro-F1 and contact accuracy. It does not dominate v5: v5 remains stronger on exact JSON validity, subtask accuracy, transition accuracy, next-action accuracy, and object micro-F1.
+The public release policy is therefore:
+- keep `ropedia-xperience-10m-v5` pinned to the previous stable v5 commit,
+- publish v6 on `main`, GitHub Pages, HF Space, artifact dataset, and the Qwen LoRA model repo,
+- create a separate `ropedia-xperience-10m-v6` tag only as an experimental/latest-Qwen release, not by moving the v5 tag.
+## Sources
+- v5 package: `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full/verified_result_summary.json`
+- v6 package: `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/verified_result_summary.json`
+- machine-readable comparison: `docs/data/qwen3_v5_v6_comparison.json`

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/package_audit.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "status": "pass",
+  "package_dir": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
+  "backbone": "qwen3_omni_lora",
+  "required_eval_files": [
+    "metrics.json",
+    "predictions.jsonl",
+    "predictions.csv",
+    "per_class_metrics.csv",
+    "confusion_matrix.csv",
+    "RUN_REPORT.md"
+  ],
+  "primary_metrics": [
+    "action_macro_f1",
+    "contact_accuracy",
+    "held_out_episode_count",
+    "json_validity_rate",
+    "next_action_accuracy",
+    "object_micro_f1",
+    "subtask_accuracy",
+    "transition_accuracy"
+  ],
+  "issues": []
+}