cy0307 commited on 19 days ago

Commit

2d80be0

verified ·

1 Parent(s): a07660e

Update final Qwen public metrics

Browse files

Files changed (22) hide show

metrics/artifact_index.json +393 -72
metrics/audio_ablation_summary.json +24 -12
metrics/evaluation_protocol.json +15 -3
metrics/foundation_model_plan.json +2 -2
metrics/mirror_parity.json +0 -0
metrics/omni_finetune_verified_result.json +29 -43
metrics/omni_model_comparison.json +513 -0
metrics/project_brief.json +4 -4
metrics/project_manifest.json +7 -4
metrics/project_packet.json +3 -3
metrics/project_status.json +40 -11
metrics/publication_audit.json +9 -9
metrics/reproducibility_matrix.json +3 -3
metrics/research_directions.json +59 -0
metrics/research_roadmap.json +14 -13
metrics/research_roadmap_interactive.json +34 -14
metrics/research_takeaways.json +6 -6
metrics/scope_claims_audit.json +23 -22
metrics/single_episode_explorer.json +261 -115
metrics/summary_metrics.json +66 -52
metrics/task_surface_integrity.json +121 -121
metrics/website_integrity.json +38 -33

metrics/artifact_index.json CHANGED Viewed

@@ -1,19 +1,19 @@
 {
   "title": "Ropedia Xperience-10M Task Suite Artifact Index",
-  "generated_at_utc": "2026-06-06T17:44:58+00:00",
   "status": "pass",
-  "artifact_count": 89,
   "missing": [],
   "by_kind": {
     "project_path": 14,
     "scaleup_contract": 7,
-    "scaleup_status": 6,
     "project_scope": 1,
     "source_alignment": 5,
-    "publication_workflow": 3,
     "evaluation_protocol": 3,
     "result_interpretation": 5,
-    "metrics_source": 4,
     "website_data": 3,
     "visual_evidence": 7,
     "quality_gate": 12,
@@ -30,7 +30,9 @@
     "generated_figure": 3,
     "generated_figure_assets": 1,
     "citation": 1,
-    "license": 1
   },
   "artifacts": [
     {
@@ -41,8 +43,8 @@
       "surface": "repo_hf",
       "shows": "Gives first-pass readers a concise project shape before the detailed artifact trail.",
       "exists": true,
-      "bytes": 3829,
-      "sha256": "3d16e700c31aafe889b3d8e43d52250208766809c483a716a13cbd26961b3e72"
     },
     {
       "id": "project_brief_json",
@@ -52,8 +54,8 @@
       "surface": "website_hf",
       "shows": "Machine-readable first-reader project brief for the website and Hugging Face mirrors.",
       "exists": true,
-      "bytes": 3752,
-      "sha256": "4bd2f3471d0141abf64a2327541e25031e9efa39940cc68a29c316cf36f7c884"
     },
     {
       "id": "project_status",
@@ -63,8 +65,8 @@
       "surface": "repo_hf",
       "shows": "Gives a compact current-state table for first-pass readers.",
       "exists": true,
-      "bytes": 9818,
-      "sha256": "ae59a373796d279cf0c14208e14a1feca1ecbf3d31e5099fcf126c4c8de8a93a"
     },
     {
       "id": "project_status_json",
@@ -74,8 +76,8 @@
       "surface": "website_hf",
       "shows": "Machine-readable copy of the current project status for website and HF mirrors.",
       "exists": true,
-      "bytes": 12602,
-      "sha256": "38c3ea58375f127bc72653beffef5fb9d872430843137944f626f395395a5b1f"
     },
     {
       "id": "research_roadmap",
@@ -85,8 +87,8 @@
       "surface": "repo_hf",
       "shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
       "exists": true,
-      "bytes": 12045,
-      "sha256": "dece941b1be0f03aea11f0ce3e1a8240977cd9d9629b640fdcf233825cfd5f48"
     },
     {
       "id": "research_roadmap_json",
@@ -96,8 +98,8 @@
       "surface": "website_hf",
       "shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
       "exists": true,
-      "bytes": 10052,
-      "sha256": "0a2ce1c96e9546ce32571f26bf4a2c580708e0cb7bb56b51e3bfbc6eef240ff9"
     },
     {
       "id": "foundation_model_plan",
@@ -118,8 +120,8 @@
       "surface": "website_hf",
       "shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
       "exists": true,
-      "bytes": 13112,
-      "sha256": "0850a9b74f2c62aba30b297089ad0a4a04424cc16d4a265f71e39538c6d22792"
     },
     {
       "id": "omni_model_extension_contract",
@@ -141,7 +143,7 @@
       "shows": "Stores the implemented Qwen3-Omni LoRA contract and planned Cosmos-style world-model and VLA/policy branch contracts.",
       "exists": true,
       "file_count": 3,
-      "bytes": 8904
     },
     {
       "id": "omni_backbone_registry_validator",
@@ -206,8 +208,19 @@
       "surface": "repo_hf",
       "shows": "Runs simple metadata and neural MLP baselines on the same selected 96/16/16 episode split used by the Qwen3-Omni diagnostic pilot.",
       "exists": true,
-      "bytes": 46953,
-      "sha256": "f6579d123275ec098184ac2021372e33ed48100e42fd62fe37ab7cb7126e5a1c"
     },
     {
       "id": "additional_development_directions",
@@ -261,8 +274,8 @@
       "surface": "website_hf",
       "shows": "Gives a short project path with scope status and public surfaces.",
       "exists": true,
-      "bytes": 7802,
-      "sha256": "40964c04cd769970e212288dc61a2a462c68ef5e4d962a7b9f50fe5a1fc84ce7"
     },
     {
       "id": "artifact_guide",
@@ -272,8 +285,8 @@
       "surface": "repo_hf",
       "shows": "Gives the human-readable map from project scope to data, tasks, platform mirrors, and scale-up status.",
       "exists": true,
-      "bytes": 17246,
-      "sha256": "0e3739a51aca083bb9be8195b1759dddf84378f34059eaad950013307dd377ec"
     },
     {
       "id": "official_dataset_card_alignment",
@@ -371,8 +384,8 @@
       "surface": "repo_hf",
       "shows": "Defines the window unit, chronological split, task metrics, leakage controls, and current limitations.",
       "exists": true,
-      "bytes": 6003,
-      "sha256": "b0dce21ec27228e49693c1aefed1be120c73f80551d6d3609c48c473d792e709"
     },
     {
       "id": "evaluation_protocol_json",
@@ -382,8 +395,8 @@
       "surface": "website_hf",
       "shows": "Machine-readable protocol generated from committed task metrics for website and HF mirrors.",
       "exists": true,
-      "bytes": 13788,
-      "sha256": "ff76b2af0f6d626d16daafffc2fdfda27801a9a15a8cca255a3d9f6be1b2a8a1"
     },
     {
       "id": "evaluation_protocol_builder",
@@ -393,8 +406,8 @@
       "surface": "repo_hf",
       "shows": "Regenerates the protocol from committed summary metrics and task artifacts.",
       "exists": true,
-      "bytes": 16289,
-      "sha256": "0e404d53826ea893ed1a9d6f07b3e98cdf16b64b37088480a1b8ddb957997164"
     },
     {
       "id": "research_takeaways",
@@ -404,8 +417,8 @@
       "surface": "repo_hf",
       "shows": "Summarizes the main research lessons from committed metrics and identifies which experiments need held-out episodes.",
       "exists": true,
-      "bytes": 5101,
-      "sha256": "29856af67d4c2e4ea8f339e5a9bde362da08bbd17b1a3c681ee9b4aa579559f0"
     },
     {
       "id": "research_takeaways_json",
@@ -415,8 +428,8 @@
       "surface": "website_hf",
       "shows": "Machine-readable result interpretation for the website, HF cards, and mirror checks.",
       "exists": true,
-      "bytes": 7102,
-      "sha256": "fe5ca81a9212ac2122787781868ad6955b74734f5eca8dd52def0c186ed8039e"
     },
     {
       "id": "research_takeaways_builder",
@@ -426,8 +439,8 @@
       "surface": "repo_hf",
       "shows": "Regenerates the research takeaways from committed summary metrics and task result artifacts.",
       "exists": true,
-      "bytes": 13503,
-      "sha256": "88711ef1e9c5f874d886fbc39c5138d8945d73358b3d0938c5d668bc0b9dba9a"
     },
     {
       "id": "audio_ablation_script",
@@ -470,8 +483,8 @@
       "surface": "website_hf",
       "shows": "Machine-readable audio ablation summary mirrored into the static website and Hugging Face bundles.",
       "exists": true,
-      "bytes": 9701,
-      "sha256": "7debd9e67b7df09322d743193bf8b785277a992c9e8c82c08c7a36c4e066e6de"
     },
     {
       "id": "audio_ablation_delta_chart",
@@ -661,8 +674,8 @@
       "surface": "repo_hf",
       "shows": "Regenerates the task-surface integrity report and fails if task cards expose raw artifact ids or lose the interactive player wiring.",
       "exists": true,
-      "bytes": 15964,
-      "sha256": "e674a6301692132fdbd6e379e4fa8db677388d762d86d3b1bb1f9f76b3b453de"
     },
     {
       "id": "live_publication_status",
@@ -684,8 +697,8 @@
       "surface": "repo",
       "shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
       "exists": true,
-      "bytes": 34779,
-      "sha256": "73c45e1ae1b7509b9f2c54cf42424c9ac0aab8a9e54be58fc488eaa2a696acc7"
     },
     {
       "id": "reproducibility_contract",
@@ -706,8 +719,8 @@
       "surface": "website_hf",
       "shows": "Machine-readable reproduction steps with expected artifacts and public boundaries.",
       "exists": true,
-      "bytes": 5223,
-      "sha256": "f218a630d3894f402bcb43d7eb24e4fdcedd7a93caf1816539cdf052c0620727"
     },
     {
       "id": "artifact_index_builder",
@@ -717,8 +730,8 @@
       "surface": "repo_hf",
       "shows": "Generates the selective artifact catalog from local files.",
       "exists": true,
-      "bytes": 33486,
-      "sha256": "9c41f5660ca8380deb0e0f466f21fb4fc73c85cb67c7a058de17173a4d075179"
     },
     {
       "id": "publication_audit",
@@ -741,7 +754,7 @@
       "volatile": true,
       "shows": "Separates setup paths from completed held-out-episode results.",
       "exists": true,
-      "bytes": 20823,
       "hash_policy": "existence_and_size_only"
     },
     {
@@ -753,7 +766,7 @@
       "volatile": true,
       "shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
       "exists": true,
-      "bytes": 131036,
       "hash_policy": "existence_and_size_only"
     },
     {
@@ -765,7 +778,7 @@
       "volatile": true,
       "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
       "exists": true,
-      "bytes": 15259,
       "hash_policy": "existence_and_size_only"
     },
     {
@@ -776,8 +789,8 @@
       "surface": "website_hf",
       "shows": "Lists public URLs, upstream sources, and machine-readable project metadata.",
       "exists": true,
-      "bytes": 4927,
-      "sha256": "032d9aa43c467bfa5004e18e1d7881ba6901371a5d8ec23cacf1a1058de50b2a"
     },
     {
       "id": "task_summary",
@@ -787,8 +800,8 @@
       "surface": "repo_hf",
       "shows": "Stores the task definitions, splits, feature dimension, and minimal/neural metrics.",
       "exists": true,
-      "bytes": 19653,
-      "sha256": "4c4db850c37268a8dc4d2e86c21f99c8d92c4cee106b27f7b8db0347631947cd"
     },
     {
       "id": "website_metrics_bundle",
@@ -798,8 +811,8 @@
       "surface": "website_hf",
       "shows": "Mirrors task metrics for the static dashboard.",
       "exists": true,
-      "bytes": 26028,
-      "sha256": "5259cf5373cb07ce6fad2bed69c35bfa77550515949588326f792d5d6043c082"
     },
     {
       "id": "feature_manifest",
@@ -843,7 +856,7 @@
       "shows": "Stores matching PyTorch MLP results for the 12 task contracts.",
       "exists": true,
       "file_count": 60,
-      "bytes": 90608884
     },
     {
       "id": "research_direction_taxonomy",
@@ -853,8 +866,8 @@
       "surface": "repo_hf",
       "shows": "Maps the 12 tasks to the four Ropedia research directions as direct/proxy/diagnostic.",
       "exists": true,
-      "bytes": 14414,
-      "sha256": "7215681c55a6739da7f16d833f62fcb8f6d58069840963182e98427eaf0cf654"
     },
     {
       "id": "research_direction_extensions",
@@ -864,8 +877,8 @@
       "surface": "repo_hf",
       "shows": "Stores one coded extension probe per research direction with minimal and neural metrics.",
       "exists": true,
-      "bytes": 11903,
-      "sha256": "010e254b61de6cc199e8e710fb10d8304c8156835f5cbafb79202996a74c0c77"
     },
     {
       "id": "task_walkthroughs",
@@ -886,8 +899,8 @@
       "surface": "website_hf",
       "shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
       "exists": true,
-      "bytes": 2612510,
-      "sha256": "213d81f49d27e3f2560c79e29a017c017cbe38d8d605815bf3bc87834a1424ae"
     },
     {
       "id": "modality_atlas",
@@ -930,8 +943,8 @@
       "surface": "website_hf",
       "shows": "Shows the shared feature pipeline and minimal/neural head families.",
       "exists": true,
-      "bytes": 761507,
-      "sha256": "076c2e463ddce473e9138ac6f3615152d59031d6be2aa5c3d9ae1ace3d3f6c83"
     },
     {
       "id": "qwen_data_access_status",
@@ -944,6 +957,17 @@
       "bytes": 3499,
       "sha256": "c2999f0ea75765c8da3b94aa54d8a9628edd687a0fe38c09d2582f578f2b1ba7"
     },
     {
       "id": "multi_episode_access_status",
       "title": "Multi-episode access status",
@@ -961,7 +985,7 @@
       "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/ERROR_ANALYSIS.md",
       "kind": "scaleup_status",
       "surface": "repo_hf",
-      "shows": "Summarizes validation-aware Qwen3-Omni held-out failures by episode, action family, train-seen status, required-modality state, and object category.",
       "exists": true,
       "bytes": 3331,
       "sha256": "063fcc2ebd7b57ab5b281fd5e8edc629da4e1f4e5a708483ba27375d02af9467"
@@ -985,8 +1009,8 @@
       "surface": "repo_hf",
       "shows": "Summarizes same-split simple and neural metadata baselines for the 12 task ids, with unsupported markers for tasks that need missing raw 128 feature blocks.",
       "exists": true,
-      "bytes": 1861,
-      "sha256": "6e233609117917c9d14dcd815457cb2884f2000bef6cde24b7628d6060737b2b"
     },
     {
       "id": "multi_episode_128_baseline_summary",
@@ -996,8 +1020,52 @@
       "surface": "repo_hf",
       "shows": "Machine-readable 96/16/16 split counts, run configuration, per-task simple metrics, neural metrics, and raw-feature unsupported statuses.",
       "exists": true,
-      "bytes": 42129,
-      "sha256": "32592b0d976a4bf610a6e93412114d792989344570f30c4e89702e310c422f1e"
     },
     {
       "id": "citation",
@@ -1020,6 +1088,259 @@
       "exists": true,
       "bytes": 1745,
       "sha256": "09cf3f632d1248b6aa4457fdd510f878dcbd1e2e51bafba0ddc7cd4f05e23d07"
     }
   ]
 }

 {
   "title": "Ropedia Xperience-10M Task Suite Artifact Index",
+  "generated_at_utc": "2026-06-06T23:27:35+00:00",
   "status": "pass",
+  "artifact_count": 118,
   "missing": [],
   "by_kind": {
     "project_path": 14,
     "scaleup_contract": 7,
+    "scaleup_status": 16,
+    "publication_workflow": 5,
     "project_scope": 1,
     "source_alignment": 5,
     "evaluation_protocol": 3,
     "result_interpretation": 5,
+    "metrics_source": 14,
     "website_data": 3,
     "visual_evidence": 7,
     "quality_gate": 12,
     "generated_figure": 3,
     "generated_figure_assets": 1,
     "citation": 1,
+    "license": 1,
+    "verified_public_package": 4,
+    "publication_audit": 3
   },
   "artifacts": [
     {
       "surface": "repo_hf",
       "shows": "Gives first-pass readers a concise project shape before the detailed artifact trail.",
       "exists": true,
+      "bytes": 3837,
+      "sha256": "fbaa540aadbe2cf9b6581c5b43cac8cee3056f98cfc7386d322d6f38e70e42a4"
     },
     {
       "id": "project_brief_json",
       "surface": "website_hf",
       "shows": "Machine-readable first-reader project brief for the website and Hugging Face mirrors.",
       "exists": true,
+      "bytes": 3811,
+      "sha256": "ebf3d73a94c31ec8ba67e2aed8cfb04edfad07ad75694eb5373b2fe5a5da9dd9"
     },
     {
       "id": "project_status",
       "surface": "repo_hf",
       "shows": "Gives a compact current-state table for first-pass readers.",
       "exists": true,
+      "bytes": 9845,
+      "sha256": "e77d3facc533bffe35586e4de6500400352c07b4ca0df5ffc523855f38faa26e"
     },
     {
       "id": "project_status_json",
       "surface": "website_hf",
       "shows": "Machine-readable copy of the current project status for website and HF mirrors.",
       "exists": true,
+      "bytes": 15049,
+      "sha256": "23873ed59f3a38f46e45b15a5965afbb1365d49eb359bd5089a4ba6bda990d3c"
     },
     {
       "id": "research_roadmap",
       "surface": "repo_hf",
       "shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
       "exists": true,
+      "bytes": 12194,
+      "sha256": "8773f240e362198b3a669d1ac848d6f1629df3a33e41bd76fba157cbf566479c"
     },
     {
       "id": "research_roadmap_json",
       "surface": "website_hf",
       "shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
       "exists": true,
+      "bytes": 10133,
+      "sha256": "45fd3a1bde93654ccfe14f9271928a67b36eb3f166826bfbdbb9c1092ad33bcf"
     },
     {
       "id": "foundation_model_plan",
       "surface": "website_hf",
       "shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
       "exists": true,
+      "bytes": 13193,
+      "sha256": "63529cbaf1d5c549f595b3ed49f49feda03edf96952b5cb321117fee340849c9"
     },
     {
       "id": "omni_model_extension_contract",
       "shows": "Stores the implemented Qwen3-Omni LoRA contract and planned Cosmos-style world-model and VLA/policy branch contracts.",
       "exists": true,
       "file_count": 3,
+      "bytes": 9203
     },
     {
       "id": "omni_backbone_registry_validator",
       "surface": "repo_hf",
       "shows": "Runs simple metadata and neural MLP baselines on the same selected 96/16/16 episode split used by the Qwen3-Omni diagnostic pilot.",
       "exists": true,
+      "bytes": 48164,
+      "sha256": "fbefe3f31e2d19566ed1fc356a25c564ecb4f0645de4d595f5926e1426c058d5"
+    },
+    {
+      "id": "qwen3_lora_hf_package_builder",
+      "title": "Qwen3 LoRA HF package builder",
+      "path": "scripts/omni/prepare_qwen3_lora_hf_package.py",
+      "kind": "publication_workflow",
+      "surface": "repo_hf",
+      "shows": "Builds the upload-ready Hugging Face adapter folder from a verified Qwen3 LoRA result summary and adapter directory.",
+      "exists": true,
+      "bytes": 9843,
+      "sha256": "636132a7d299db4d874ec797e34acd7e37eea69994c2d39afaafaec6587169a0"
     },
     {
       "id": "additional_development_directions",
       "surface": "website_hf",
       "shows": "Gives a short project path with scope status and public surfaces.",
       "exists": true,
+      "bytes": 7943,
+      "sha256": "ffd5da5fd2c2dc82fa1beb74335a51a33317923b3e7ee4864e2b5031082b0a42"
     },
     {
       "id": "artifact_guide",
       "surface": "repo_hf",
       "shows": "Gives the human-readable map from project scope to data, tasks, platform mirrors, and scale-up status.",
       "exists": true,
+      "bytes": 17508,
+      "sha256": "fbbd9f460610464efb27c371a17cf23c3fa409d853f8148368f48707192427d7"
     },
     {
       "id": "official_dataset_card_alignment",
       "surface": "repo_hf",
       "shows": "Defines the window unit, chronological split, task metrics, leakage controls, and current limitations.",
       "exists": true,
+      "bytes": 6434,
+      "sha256": "4817266bdfdf852ad97b3d37614141c56794d955d82110a819daa1d76755a675"
     },
     {
       "id": "evaluation_protocol_json",
       "surface": "website_hf",
       "shows": "Machine-readable protocol generated from committed task metrics for website and HF mirrors.",
       "exists": true,
+      "bytes": 14511,
+      "sha256": "ea7caff963fcf048f803a852e5cdae8d3975ba4a36d805c5e42211b1bf2744ef"
     },
     {
       "id": "evaluation_protocol_builder",
       "surface": "repo_hf",
       "shows": "Regenerates the protocol from committed summary metrics and task artifacts.",
       "exists": true,
+      "bytes": 16584,
+      "sha256": "e8cd8df471985688fa71e2b1be801e346e50911465ef886625a5d863bf9158f1"
     },
     {
       "id": "research_takeaways",
       "surface": "repo_hf",
       "shows": "Summarizes the main research lessons from committed metrics and identifies which experiments need held-out episodes.",
       "exists": true,
+      "bytes": 5149,
+      "sha256": "a2ab81a52a825b4f1dae59023cfe905a63128384f892dcc8e91c4c4351500aef"
     },
     {
       "id": "research_takeaways_json",
       "surface": "website_hf",
       "shows": "Machine-readable result interpretation for the website, HF cards, and mirror checks.",
       "exists": true,
+      "bytes": 7139,
+      "sha256": "eb87b65ef2f6ef910b4cda29c33f3c75014a5cce8ebf8299f71eb09c856a2481"
     },
     {
       "id": "research_takeaways_builder",
       "surface": "repo_hf",
       "shows": "Regenerates the research takeaways from committed summary metrics and task result artifacts.",
       "exists": true,
+      "bytes": 13473,
+      "sha256": "40ab06b9adaf2c2a9a8d55e07b361198f4cb3a88285596625cc8133e5135a4d2"
     },
     {
       "id": "audio_ablation_script",
       "surface": "website_hf",
       "shows": "Machine-readable audio ablation summary mirrored into the static website and Hugging Face bundles.",
       "exists": true,
+      "bytes": 10370,
+      "sha256": "d6de8db171993f8cc39153075a7e17cda79762659fad7d1944556f8bf10afd0d"
     },
     {
       "id": "audio_ablation_delta_chart",
       "surface": "repo_hf",
       "shows": "Regenerates the task-surface integrity report and fails if task cards expose raw artifact ids or lose the interactive player wiring.",
       "exists": true,
+      "bytes": 15366,
+      "sha256": "8d4573b7a4b75e433da577067369e5221515184536a281c4d2e30c3422ddc4ad"
     },
     {
       "id": "live_publication_status",
       "surface": "repo",
       "shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
       "exists": true,
+      "bytes": 36847,
+      "sha256": "07fd059a9ff8c13b073f349c79f1f7d3abe839559cf0809e291f6ea9bbad21e8"
     },
     {
       "id": "reproducibility_contract",
       "surface": "website_hf",
       "shows": "Machine-readable reproduction steps with expected artifacts and public boundaries.",
       "exists": true,
+      "bytes": 5280,
+      "sha256": "bfb34f14206943da909aee36465e8211c592615fca15a284e2fa8ef9ea1d438b"
     },
     {
       "id": "artifact_index_builder",
       "surface": "repo_hf",
       "shows": "Generates the selective artifact catalog from local files.",
       "exists": true,
+      "bytes": 38561,
+      "sha256": "571a06684909bd4d544d455d5cdee2fb69439b1e16de95609dd51fecc7b58b29"
     },
     {
       "id": "publication_audit",
       "volatile": true,
       "shows": "Separates setup paths from completed held-out-episode results.",
       "exists": true,
+      "bytes": 21234,
       "hash_policy": "existence_and_size_only"
     },
     {
       "volatile": true,
       "shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
       "exists": true,
+      "bytes": 235815,
       "hash_policy": "existence_and_size_only"
     },
     {
       "volatile": true,
       "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
       "exists": true,
+      "bytes": 15375,
       "hash_policy": "existence_and_size_only"
     },
     {
       "surface": "website_hf",
       "shows": "Lists public URLs, upstream sources, and machine-readable project metadata.",
       "exists": true,
+      "bytes": 5193,
+      "sha256": "1ae4c41fdcca6638e570e081d07f700d56ca490fecc25d681d5066b1ca8319ee"
     },
     {
       "id": "task_summary",
       "surface": "repo_hf",
       "shows": "Stores the task definitions, splits, feature dimension, and minimal/neural metrics.",
       "exists": true,
+      "bytes": 21680,
+      "sha256": "5860c901536495b7a8cb592ca0728a546566a70cef6d2b7d1a986e5140fbfe08"
     },
     {
       "id": "website_metrics_bundle",
       "surface": "website_hf",
       "shows": "Mirrors task metrics for the static dashboard.",
       "exists": true,
+      "bytes": 27490,
+      "sha256": "159ed565571aa4215ef30a5ea8fce057481cf0f77ad50aec3ae15de6a38e12ba"
     },
     {
       "id": "feature_manifest",
       "shows": "Stores matching PyTorch MLP results for the 12 task contracts.",
       "exists": true,
       "file_count": 60,
+      "bytes": 90609517
     },
     {
       "id": "research_direction_taxonomy",
       "surface": "repo_hf",
       "shows": "Maps the 12 tasks to the four Ropedia research directions as direct/proxy/diagnostic.",
       "exists": true,
+      "bytes": 19204,
+      "sha256": "59bece1a151d8475fde50396fd2e70ed4abcfec33f10e400ef165148fd6e7dde"
     },
     {
       "id": "research_direction_extensions",
       "surface": "repo_hf",
       "shows": "Stores one coded extension probe per research direction with minimal and neural metrics.",
       "exists": true,
+      "bytes": 12592,
+      "sha256": "6fa965d5e8249f0972e93558dcc1e7de15d53bdcfd253354255637c421b68dc4"
     },
     {
       "id": "task_walkthroughs",
       "surface": "website_hf",
       "shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
       "exists": true,
+      "bytes": 1588641,
+      "sha256": "1275e2adaef920ecde7c29dc62c8d79d4f13475a0c09bc3baa693f47cdec2e1f"
     },
     {
       "id": "modality_atlas",
       "surface": "website_hf",
       "shows": "Shows the shared feature pipeline and minimal/neural head families.",
       "exists": true,
+      "bytes": 774391,
+      "sha256": "f08b03bc21e194efe382347d74cf89cd6ac65dede51889971dbfc2fb9d1de3c2"
     },
     {
       "id": "qwen_data_access_status",
       "bytes": 3499,
       "sha256": "c2999f0ea75765c8da3b94aa54d8a9628edd687a0fe38c09d2582f578f2b1ba7"
     },
+    {
+      "id": "qwen3_lora_hf_upload_note",
+      "title": "Qwen3 LoRA HF upload note",
+      "path": "results/omni_finetune/HF_UPLOAD.md",
+      "kind": "publication_workflow",
+      "surface": "repo_hf",
+      "shows": "Documents the final 128-episode LoRA adapter upload path, target model repo, package builder, and forbidden files.",
+      "exists": true,
+      "bytes": 1875,
+      "sha256": "7a822452347e8c4241a5160d67a9782f17f3d3cb9bd2960b00bac0ca1bf2392f"
+    },
     {
       "id": "multi_episode_access_status",
       "title": "Multi-episode access status",
       "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/ERROR_ANALYSIS.md",
       "kind": "scaleup_status",
       "surface": "repo_hf",
+      "shows": "Summarizes the earlier validation-aware Qwen3-Omni held-out failures by episode, action family, train-seen status, required-modality state, and object category.",
       "exists": true,
       "bytes": 3331,
       "sha256": "063fcc2ebd7b57ab5b281fd5e8edc629da4e1f4e5a708483ba27375d02af9467"
       "surface": "repo_hf",
       "shows": "Summarizes same-split simple and neural metadata baselines for the 12 task ids, with unsupported markers for tasks that need missing raw 128 feature blocks.",
       "exists": true,
+      "bytes": 2238,
+      "sha256": "c70440aa502ec569a840159ab7e05b8e7d4ed70e0091ad9a4b2fb3fb0d3803c1"
     },
     {
       "id": "multi_episode_128_baseline_summary",
       "surface": "repo_hf",
       "shows": "Machine-readable 96/16/16 split counts, run configuration, per-task simple metrics, neural metrics, and raw-feature unsupported statuses.",
       "exists": true,
+      "bytes": 44519,
+      "sha256": "107a4bedf53a22a1395f5e08b7f1cc9bb1becb8c0e95bc03178029abb3a83aef"
+    },
+    {
+      "id": "omni_model_comparison_report",
+      "title": "Omni model comparison report",
+      "path": "results/omni_finetune/OMNI_MODEL_COMPARISON.md",
+      "kind": "scaleup_status",
+      "surface": "repo_hf",
+      "shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
+      "exists": true,
+      "bytes": 3110,
+      "sha256": "11c22b7ac1e16fd8db86eb7c6fc33cf28fee97a38098f1606a35daee113dc72b"
+    },
+    {
+      "id": "omni_model_comparison_json",
+      "title": "Omni model comparison JSON",
+      "path": "docs/data/omni_model_comparison.json",
+      "kind": "metrics_source",
+      "surface": "repo_hf",
+      "shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
+      "exists": true,
+      "bytes": 21433,
+      "sha256": "b539a489a8974ecec90dda312471be54f466b81bef9d1ebc99d08155f8c21c94"
+    },
+    {
+      "id": "cosmos3_nano_verified_summary",
+      "title": "Cosmos3-Nano verified package summary",
+      "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json",
+      "kind": "metrics_source",
+      "surface": "repo_hf",
+      "shows": "Machine-readable verified public summary for the Cosmos3-Nano future-window compatibility package.",
+      "exists": true,
+      "bytes": 6151,
+      "sha256": "386b374ef1837fe0087f9eeb21248e6c823334270fe4b1a52dadb3a11c09ef88"
+    },
+    {
+      "id": "cosmos3_nano_run_report",
+      "title": "Cosmos3-Nano future-window run report",
+      "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/RUN_REPORT.md",
+      "kind": "scaleup_status",
+      "surface": "repo_hf",
+      "shows": "Reader-facing held-out metrics and interpretation for the Cosmos3-Nano future-window compatibility branch.",
+      "exists": true,
+      "bytes": 698,
+      "sha256": "3f56dc6ed58ea079a98a8f7e7ccd294238623a5f06bb9a01f1448665cf3eeb60"
     },
     {
       "id": "citation",
       "exists": true,
       "bytes": 1745,
       "sha256": "09cf3f632d1248b6aa4457fdd510f878dcbd1e2e51bafba0ddc7cd4f05e23d07"
+    },
+    {
+      "id": "verified_public_package_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
+      "title": "Verified public package: Cosmos3-Nano Future-Window World Model",
+      "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
+      "kind": "verified_public_package",
+      "surface": "repo_hf",
+      "shows": "Public-safe verified package for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full (cosmos_world_model, status=verified).",
+      "exists": true,
+      "file_count": 14,
+      "bytes": 745194
+    },
+    {
+      "id": "verified_public_summary_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
+      "title": "Verified summary: Cosmos3-Nano Future-Window World Model",
+      "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json",
+      "kind": "metrics_source",
+      "surface": "repo_hf",
+      "shows": "Machine-readable verified summary for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full.",
+      "exists": true,
+      "bytes": 6151,
+      "sha256": "386b374ef1837fe0087f9eeb21248e6c823334270fe4b1a52dadb3a11c09ef88"
+    },
+    {
+      "id": "verified_public_public_result_summary_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
+      "title": "Verified public result summary: Cosmos3-Nano Future-Window World Model",
+      "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/PUBLIC_RESULT_SUMMARY.md",
+      "kind": "scaleup_status",
+      "surface": "repo_hf",
+      "shows": "Public result summary for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full.",
+      "exists": true,
+      "bytes": 984,
+      "sha256": "e7a98bb4bbea34e4dfed25bb1682284514996b722661b13cc59eb70b4163d682"
+    },
+    {
+      "id": "verified_public_run_report_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
+      "title": "Verified run report: Cosmos3-Nano Future-Window World Model",
+      "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/RUN_REPORT.md",
+      "kind": "scaleup_status",
+      "surface": "repo_hf",
+      "shows": "Run report for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full.",
+      "exists": true,
+      "bytes": 698,
+      "sha256": "3f56dc6ed58ea079a98a8f7e7ccd294238623a5f06bb9a01f1448665cf3eeb60"
+    },
+    {
+      "id": "verified_public_metrics_JSON_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
+      "title": "Verified metrics JSON: Cosmos3-Nano Future-Window World Model",
+      "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
+      "kind": "metrics_source",
+      "surface": "repo_hf",
+      "shows": "Metrics json for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full.",
+      "exists": true,
+      "bytes": 1099,
+      "sha256": "f11ccb167908d4f5bfb49c0be0b4bc6c9254901462aa52ae98a2a98e8af16558"
+    },
+    {
+      "id": "verified_public_package_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+      "title": "Verified public package: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+      "kind": "verified_public_package",
+      "surface": "repo_hf",
+      "shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval (qwen3_omni_lora, status=verified).",
+      "exists": true,
+      "file_count": 21,
+      "bytes": 5561131
+    },
+    {
+      "id": "verified_public_summary_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+      "title": "Verified summary: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json",
+      "kind": "metrics_source",
+      "surface": "repo_hf",
+      "shows": "Machine-readable verified summary for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
+      "exists": true,
+      "bytes": 5933,
+      "sha256": "b5f8ef88cd9d8515f03bf092107a9e788695e4c4853feae0db98d384f0c39c9d"
+    },
+    {
+      "id": "verified_public_public_result_summary_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+      "title": "Verified public result summary: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/PUBLIC_RESULT_SUMMARY.md",
+      "kind": "scaleup_status",
+      "surface": "repo_hf",
+      "shows": "Public result summary for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
+      "exists": true,
+      "bytes": 1779,
+      "sha256": "11305d535a6cb60530560f3862b8374ec083adfc7cf714b49fe06b079e3c049d"
+    },
+    {
+      "id": "verified_public_run_report_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+      "title": "Verified run report: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/RUN_REPORT.md",
+      "kind": "scaleup_status",
+      "surface": "repo_hf",
+      "shows": "Run report for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
+      "exists": true,
+      "bytes": 603,
+      "sha256": "6792b92c8d8661d8f4f3670e7961a14fd0c495dbb4279602a6fba1480179ad9b"
+    },
+    {
+      "id": "verified_public_metrics_JSON_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+      "title": "Verified metrics JSON: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/metrics.json",
+      "kind": "metrics_source",
+      "surface": "repo_hf",
+      "shows": "Metrics json for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
+      "exists": true,
+      "bytes": 75629,
+      "sha256": "055b0932ea439338839256ded2fa5fb3ddb562ced0f149d2ea37460e966c4404"
+    },
+    {
+      "id": "verified_public_package_audit_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+      "title": "Verified package audit: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/package_audit.json",
+      "kind": "publication_audit",
+      "surface": "repo_hf",
+      "shows": "Package audit for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
+      "exists": true,
+      "bytes": 611,
+      "sha256": "2226cdd2e457b23c89b909e40ca469dd08f3db81c1bb797aaafb6cd19de6deea"
+    },
+    {
+      "id": "verified_public_package_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
+      "title": "Verified public package: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
+      "kind": "verified_public_package",
+      "surface": "repo_hf",
+      "shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full (qwen3_omni_lora, status=verified).",
+      "exists": true,
+      "file_count": 16,
+      "bytes": 5872232
+    },
+    {
+      "id": "verified_public_summary_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
+      "title": "Verified summary: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json",
+      "kind": "metrics_source",
+      "surface": "repo_hf",
+      "shows": "Machine-readable verified summary for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
+      "exists": true,
+      "bytes": 6270,
+      "sha256": "e4dac96e88eb03a36ead205f509c680aa2bb763b4da2256e265311bc17304d7f"
+    },
+    {
+      "id": "verified_public_public_result_summary_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
+      "title": "Verified public result summary: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/PUBLIC_RESULT_SUMMARY.md",
+      "kind": "scaleup_status",
+      "surface": "repo_hf",
+      "shows": "Public result summary for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
+      "exists": true,
+      "bytes": 1061,
+      "sha256": "5e4de510a64b90d0632d72575965208f6b272b4531bf9f4c515bab23876654aa"
+    },
+    {
+      "id": "verified_public_run_report_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
+      "title": "Verified run report: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/eval/RUN_REPORT.md",
+      "kind": "scaleup_status",
+      "surface": "repo_hf",
+      "shows": "Run report for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
+      "exists": true,
+      "bytes": 618,
+      "sha256": "2e572809cb3e97c4c17e5f126a63ec1d470e5da345f8a3b6026a6efd5fb927d9"
+    },
+    {
+      "id": "verified_public_metrics_JSON_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
+      "title": "Verified metrics JSON: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/eval/metrics.json",
+      "kind": "metrics_source",
+      "surface": "repo_hf",
+      "shows": "Metrics json for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
+      "exists": true,
+      "bytes": 108127,
+      "sha256": "4c11c61ee661ee201ae91f50d2dc9c0eabe2a1040a2534fe91f4b5b54c96b27c"
+    },
+    {
+      "id": "verified_public_package_audit_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
+      "title": "Verified package audit: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/package_audit.json",
+      "kind": "publication_audit",
+      "surface": "repo_hf",
+      "shows": "Package audit for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
+      "exists": true,
+      "bytes": 669,
+      "sha256": "3d427e70e44b22b882be49f2963e2afcf5b497f25c445850c9f567cdbc41ed15"
+    },
+    {
+      "id": "verified_public_package_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
+      "title": "Verified public package: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
+      "kind": "verified_public_package",
+      "surface": "repo_hf",
+      "shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full (qwen3_omni_lora, status=verified).",
+      "exists": true,
+      "file_count": 16,
+      "bytes": 4898687
+    },
+    {
+      "id": "verified_public_summary_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
+      "title": "Verified summary: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json",
+      "kind": "metrics_source",
+      "surface": "repo_hf",
+      "shows": "Machine-readable verified summary for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
+      "exists": true,
+      "bytes": 6207,
+      "sha256": "d7dae7bc17d0fd07a3f29fd61d57803b9d96d65da2ebd7f5436683a9aa18bfeb"
+    },
+    {
+      "id": "verified_public_public_result_summary_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
+      "title": "Verified public result summary: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/PUBLIC_RESULT_SUMMARY.md",
+      "kind": "scaleup_status",
+      "surface": "repo_hf",
+      "shows": "Public result summary for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
+      "exists": true,
+      "bytes": 1008,
+      "sha256": "080636ce30a37a259c4eaad0791fe5dd03fd60d61092407470d616391f0079ea"
+    },
+    {
+      "id": "verified_public_run_report_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
+      "title": "Verified run report: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/eval/RUN_REPORT.md",
+      "kind": "scaleup_status",
+      "surface": "repo_hf",
+      "shows": "Run report for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
+      "exists": true,
+      "bytes": 590,
+      "sha256": "4309393cd227803f766a9c7b317f5917e39b09cfb6f2618105c5c6cdb064f1a5"
+    },
+    {
+      "id": "verified_public_metrics_JSON_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
+      "title": "Verified metrics JSON: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/eval/metrics.json",
+      "kind": "metrics_source",
+      "surface": "repo_hf",
+      "shows": "Metrics json for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
+      "exists": true,
+      "bytes": 52485,
+      "sha256": "4174640ef32665853b0b807329855344302018952cfa97639cec66649adcbec7"
+    },
+    {
+      "id": "verified_public_package_audit_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
+      "title": "Verified package audit: Qwen3-Omni LoRA",
+      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/package_audit.json",
+      "kind": "publication_audit",
+      "surface": "repo_hf",
+      "shows": "Package audit for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
+      "exists": true,
+      "bytes": 623,
+      "sha256": "d7264cfb34e48b5c41c89444ea9cd1314b8f4d0bcc0224debbbe5ea512450197"
     }
   ]
 }

metrics/audio_ablation_summary.json CHANGED Viewed

@@ -38,7 +38,8 @@
       "raw_replacement_delta_vs_no_audio": -0.007422402159244265,
       "raw_replacement_delta_vs_handcrafted": -0.00770504201550171,
       "all_plus_raw_logmel": 0.002734107997265892,
-      "all_plus_raw_delta_vs_handcrafted": -0.006320461683552957
     },
     {
       "task": "timeline_subtask",
@@ -53,7 +54,8 @@
       "raw_replacement_delta_vs_no_audio": -0.01034742052599772,
       "raw_replacement_delta_vs_handcrafted": -0.010430590562065117,
       "all_plus_raw_logmel": 0.0017889087656529517,
-      "all_plus_raw_delta_vs_handcrafted": -0.009467445627956345
     },
     {
       "task": "transition_detection",
@@ -68,7 +70,8 @@
       "raw_replacement_delta_vs_no_audio": 0.010507780641701658,
       "raw_replacement_delta_vs_handcrafted": 0.01707714954338524,
       "all_plus_raw_logmel": 0.4816233470132239,
-      "all_plus_raw_delta_vs_handcrafted": 0.019490425838571634
     },
     {
       "task": "next_action",
@@ -83,7 +86,8 @@
       "raw_replacement_delta_vs_no_audio": -0.004703498679402295,
       "raw_replacement_delta_vs_handcrafted": -0.004576004576004574,
       "all_plus_raw_logmel": 0.0058479532163742695,
-      "all_plus_raw_delta_vs_handcrafted": -0.00473405736563631
     },
     {
       "task": "hand_trajectory_forecast",
@@ -98,7 +102,8 @@
       "raw_replacement_delta_vs_no_audio": -0.0021152496337890625,
       "raw_replacement_delta_vs_handcrafted": 0.16052484512329102,
       "all_plus_raw_logmel": 4.1367621421813965,
-      "all_plus_raw_delta_vs_handcrafted": 0.3296332359313965
     },
     {
       "task": "contact_prediction",
@@ -113,7 +118,8 @@
       "raw_replacement_delta_vs_no_audio": 0.0,
       "raw_replacement_delta_vs_handcrafted": 0.0,
       "all_plus_raw_logmel": 1.0,
-      "all_plus_raw_delta_vs_handcrafted": 0.0
     },
     {
       "task": "object_relevance",
@@ -128,7 +134,8 @@
       "raw_replacement_delta_vs_no_audio": 0.030784313919472256,
       "raw_replacement_delta_vs_handcrafted": 0.020578064024873888,
       "all_plus_raw_logmel": 0.18262653898768813,
-      "all_plus_raw_delta_vs_handcrafted": 0.024487004103967203
     },
     {
       "task": "caption_grounding",
@@ -143,7 +150,8 @@
       "raw_replacement_delta_vs_no_audio": -0.002380702644586563,
       "raw_replacement_delta_vs_handcrafted": -0.007237853482365608,
       "all_plus_raw_logmel": 0.02719014883041382,
-      "all_plus_raw_delta_vs_handcrafted": -0.004895530641078949
     },
     {
       "task": "cross_modal_retrieval",
@@ -158,7 +166,8 @@
       "raw_replacement_delta_vs_no_audio": -0.061719030141830444,
       "raw_replacement_delta_vs_handcrafted": -0.04763227701187134,
       "all_plus_raw_logmel": 0.31795138120651245,
-      "all_plus_raw_delta_vs_handcrafted": -0.05717244744300842
     },
     {
       "task": "modality_reconstruction",
@@ -173,7 +182,8 @@
       "raw_replacement_delta_vs_no_audio": 1.615983009338379,
       "raw_replacement_delta_vs_handcrafted": 0.9635343551635742,
       "all_plus_raw_logmel": 8.392388343811035,
-      "all_plus_raw_delta_vs_handcrafted": 1.401824951171875
     },
     {
       "task": "temporal_order",
@@ -188,7 +198,8 @@
       "raw_replacement_delta_vs_no_audio": 0.03591857034334939,
       "raw_replacement_delta_vs_handcrafted": 0.012930064596222923,
       "all_plus_raw_logmel": 0.5330450130569861,
-      "all_plus_raw_delta_vs_handcrafted": 0.015803633746641288
     },
     {
       "task": "misalignment_detection",
@@ -203,7 +214,8 @@
       "raw_replacement_delta_vs_no_audio": 0.021203945154488313,
       "raw_replacement_delta_vs_handcrafted": 0.02644906505448169,
       "all_plus_raw_logmel": 0.4373795761078998,
-      "all_plus_raw_delta_vs_handcrafted": 0.02003912235410793
     }
   ],
   "aggregate": {

       "raw_replacement_delta_vs_no_audio": -0.007422402159244265,
       "raw_replacement_delta_vs_handcrafted": -0.00770504201550171,
       "all_plus_raw_logmel": 0.002734107997265892,
+      "all_plus_raw_delta_vs_handcrafted": -0.006320461683552957,
+      "task_display_name": "Action Recognition"
     },
     {
       "task": "timeline_subtask",
       "raw_replacement_delta_vs_no_audio": -0.01034742052599772,
       "raw_replacement_delta_vs_handcrafted": -0.010430590562065117,
       "all_plus_raw_logmel": 0.0017889087656529517,
+      "all_plus_raw_delta_vs_handcrafted": -0.009467445627956345,
+      "task_display_name": "Procedure Step Recognition"
     },
     {
       "task": "transition_detection",
       "raw_replacement_delta_vs_no_audio": 0.010507780641701658,
       "raw_replacement_delta_vs_handcrafted": 0.01707714954338524,
       "all_plus_raw_logmel": 0.4816233470132239,
+      "all_plus_raw_delta_vs_handcrafted": 0.019490425838571634,
+      "task_display_name": "Action Boundary Detection"
     },
     {
       "task": "next_action",
       "raw_replacement_delta_vs_no_audio": -0.004703498679402295,
       "raw_replacement_delta_vs_handcrafted": -0.004576004576004574,
       "all_plus_raw_logmel": 0.0058479532163742695,
+      "all_plus_raw_delta_vs_handcrafted": -0.00473405736563631,
+      "task_display_name": "Next-Action Prediction"
     },
     {
       "task": "hand_trajectory_forecast",
       "raw_replacement_delta_vs_no_audio": -0.0021152496337890625,
       "raw_replacement_delta_vs_handcrafted": 0.16052484512329102,
       "all_plus_raw_logmel": 4.1367621421813965,
+      "all_plus_raw_delta_vs_handcrafted": 0.3296332359313965,
+      "task_display_name": "Hand Trajectory Forecasting"
     },
     {
       "task": "contact_prediction",
       "raw_replacement_delta_vs_no_audio": 0.0,
       "raw_replacement_delta_vs_handcrafted": 0.0,
       "all_plus_raw_logmel": 1.0,
+      "all_plus_raw_delta_vs_handcrafted": 0.0,
+      "task_display_name": "Contact State Prediction"
     },
     {
       "task": "object_relevance",
       "raw_replacement_delta_vs_no_audio": 0.030784313919472256,
       "raw_replacement_delta_vs_handcrafted": 0.020578064024873888,
       "all_plus_raw_logmel": 0.18262653898768813,
+      "all_plus_raw_delta_vs_handcrafted": 0.024487004103967203,
+      "task_display_name": "Object Relevance Prediction"
     },
     {
       "task": "caption_grounding",
       "raw_replacement_delta_vs_no_audio": -0.002380702644586563,
       "raw_replacement_delta_vs_handcrafted": -0.007237853482365608,
       "all_plus_raw_logmel": 0.02719014883041382,
+      "all_plus_raw_delta_vs_handcrafted": -0.004895530641078949,
+      "task_display_name": "Language Grounding"
     },
     {
       "task": "cross_modal_retrieval",
       "raw_replacement_delta_vs_no_audio": -0.061719030141830444,
       "raw_replacement_delta_vs_handcrafted": -0.04763227701187134,
       "all_plus_raw_logmel": 0.31795138120651245,
+      "all_plus_raw_delta_vs_handcrafted": -0.05717244744300842,
+      "task_display_name": "Cross-Modal Retrieval"
     },
     {
       "task": "modality_reconstruction",
       "raw_replacement_delta_vs_no_audio": 1.615983009338379,
       "raw_replacement_delta_vs_handcrafted": 0.9635343551635742,
       "all_plus_raw_logmel": 8.392388343811035,
+      "all_plus_raw_delta_vs_handcrafted": 1.401824951171875,
+      "task_display_name": "Cross-Modal Reconstruction"
     },
     {
       "task": "temporal_order",
       "raw_replacement_delta_vs_no_audio": 0.03591857034334939,
       "raw_replacement_delta_vs_handcrafted": 0.012930064596222923,
       "all_plus_raw_logmel": 0.5330450130569861,
+      "all_plus_raw_delta_vs_handcrafted": 0.015803633746641288,
+      "task_display_name": "Temporal Order Verification"
     },
     {
       "task": "misalignment_detection",
       "raw_replacement_delta_vs_no_audio": 0.021203945154488313,
       "raw_replacement_delta_vs_handcrafted": 0.02644906505448169,
       "all_plus_raw_logmel": 0.4373795761078998,
+      "all_plus_raw_delta_vs_handcrafted": 0.02003912235410793,
+      "task_display_name": "Multimodal Synchronization Detection"
     }
   ],
   "aggregate": {

metrics/evaluation_protocol.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "title": "Ropedia Xperience-10M Task Suite Evaluation Protocol",
   "status": "pass",
   "version": "2026-06-01",
-  "generated_at_utc": "2026-06-06T13:49:32+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
@@ -69,6 +69,7 @@
   "task_protocols": [
     {
       "task": "timeline_action",
       "family": "supervised classification",
       "unit": "single window",
       "input": "current 20-frame all-feature window",
@@ -88,6 +89,7 @@
     },
     {
       "task": "timeline_subtask",
       "family": "supervised classification",
       "unit": "single window",
       "input": "current 20-frame all-feature window",
@@ -107,6 +109,7 @@
     },
     {
       "task": "transition_detection",
       "family": "temporal diagnostic",
       "unit": "single window",
       "input": "current 20-frame all-feature window",
@@ -126,6 +129,7 @@
     },
     {
       "task": "next_action",
       "family": "short-horizon prediction",
       "unit": "single window",
       "input": "current 20-frame all-feature window at time t",
@@ -145,6 +149,7 @@
     },
     {
       "task": "hand_trajectory_forecast",
       "family": "trajectory regression",
       "unit": "single window",
       "input": "current all-feature window",
@@ -164,6 +169,7 @@
     },
     {
       "task": "contact_prediction",
       "family": "binary classification",
       "unit": "single window",
       "input": "non-contact and non-caption feature blocks",
@@ -183,6 +189,7 @@
     },
     {
       "task": "object_relevance",
       "family": "multi-label classification",
       "unit": "single window",
       "input": "non-caption feature blocks",
@@ -202,6 +209,7 @@
     },
     {
       "task": "caption_grounding",
       "family": "retrieval",
       "unit": "caption query",
       "input": "caption object/interaction query plus candidate sensor windows",
@@ -221,6 +229,7 @@
     },
     {
       "task": "cross_modal_retrieval",
       "family": "retrieval",
       "unit": "sensor query",
       "input": "motion, IMU, and camera query features",
@@ -240,6 +249,7 @@
     },
     {
       "task": "modality_reconstruction",
       "family": "cross-modal regression",
       "unit": "single window",
       "input": "motion, IMU, and camera features",
@@ -258,6 +268,7 @@
     },
     {
       "task": "temporal_order",
       "family": "pairwise diagnostic",
       "unit": "adjacent window pair",
       "input": "two adjacent windows",
@@ -277,6 +288,7 @@
     },
     {
       "task": "misalignment_detection",
       "family": "pairwise diagnostic",
       "unit": "paired modality window",
       "input": "motion side plus visual/depth side",
@@ -305,7 +317,7 @@
   "current_limitations": [
     "Cross-episode generalization for Qwen3-Omni has a first verified diagnostic pilot, but strong model quality is not yet shown.",
     "Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.",
-    "The verified validation-aware Qwen3-Omni diagnostic pilot has weak held-out metrics and needs structured-output and task-quality improvements before larger model-quality claims.",
     "Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations."
   ],
   "scale_up_gate": {
@@ -316,7 +328,7 @@
       "manifest, training metadata, progress logs, metrics, predictions, and run report",
       "held-out evaluation on test episodes rather than train windows"
     ],
-    "current_status": "verified diagnostic pilot; quality target not met",
     "evidence": [
       "docs/data/omni_finetune_verified_result.json",
       "results/omni_finetune/verified_public/"

   "title": "Ropedia Xperience-10M Task Suite Evaluation Protocol",
   "status": "pass",
   "version": "2026-06-01",
+  "generated_at_utc": "2026-06-06T23:26:13+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
   "task_protocols": [
     {
       "task": "timeline_action",
+      "task_display_name": "Action Recognition",
       "family": "supervised classification",
       "unit": "single window",
       "input": "current 20-frame all-feature window",
     },
     {
       "task": "timeline_subtask",
+      "task_display_name": "Procedure Step Recognition",
       "family": "supervised classification",
       "unit": "single window",
       "input": "current 20-frame all-feature window",
     },
     {
       "task": "transition_detection",
+      "task_display_name": "Action Boundary Detection",
       "family": "temporal diagnostic",
       "unit": "single window",
       "input": "current 20-frame all-feature window",
     },
     {
       "task": "next_action",
+      "task_display_name": "Next-Action Prediction",
       "family": "short-horizon prediction",
       "unit": "single window",
       "input": "current 20-frame all-feature window at time t",
     },
     {
       "task": "hand_trajectory_forecast",
+      "task_display_name": "Hand Trajectory Forecasting",
       "family": "trajectory regression",
       "unit": "single window",
       "input": "current all-feature window",
     },
     {
       "task": "contact_prediction",
+      "task_display_name": "Contact State Prediction",
       "family": "binary classification",
       "unit": "single window",
       "input": "non-contact and non-caption feature blocks",
     },
     {
       "task": "object_relevance",
+      "task_display_name": "Object Relevance Prediction",
       "family": "multi-label classification",
       "unit": "single window",
       "input": "non-caption feature blocks",
     },
     {
       "task": "caption_grounding",
+      "task_display_name": "Language Grounding",
       "family": "retrieval",
       "unit": "caption query",
       "input": "caption object/interaction query plus candidate sensor windows",
     },
     {
       "task": "cross_modal_retrieval",
+      "task_display_name": "Cross-Modal Retrieval",
       "family": "retrieval",
       "unit": "sensor query",
       "input": "motion, IMU, and camera query features",
     },
     {
       "task": "modality_reconstruction",
+      "task_display_name": "Cross-Modal Reconstruction",
       "family": "cross-modal regression",
       "unit": "single window",
       "input": "motion, IMU, and camera features",
     },
     {
       "task": "temporal_order",
+      "task_display_name": "Temporal Order Verification",
       "family": "pairwise diagnostic",
       "unit": "adjacent window pair",
       "input": "two adjacent windows",
     },
     {
       "task": "misalignment_detection",
+      "task_display_name": "Multimodal Synchronization Detection",
       "family": "pairwise diagnostic",
       "unit": "paired modality window",
       "input": "motion side plus visual/depth side",
   "current_limitations": [
     "Cross-episode generalization for Qwen3-Omni has a first verified diagnostic pilot, but strong model quality is not yet shown.",
     "Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.",
+    "The final verified Qwen3-Omni diagnostic result meets the strict-JSON target, but action/subtask held-out quality remains weak and needs error analysis before larger model-quality claims.",
     "Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations."
   ],
   "scale_up_gate": {
       "manifest, training metadata, progress logs, metrics, predictions, and run report",
       "held-out evaluation on test episodes rather than train windows"
     ],
+    "current_status": "verified diagnostic result; strict-JSON quality target met, action/subtask quality still weak",
     "evidence": [
       "docs/data/omni_finetune_verified_result.json",
       "results/omni_finetune/verified_public/"

metrics/foundation_model_plan.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Xperience-10M Foundation Model Plan",
   "status": "planning_artifact",
-  "current_boundary": "A first held-out multi-episode Qwen3-Omni diagnostic pilot is verified in this repo, but it is not a strong model result. The current foundation-model work should treat it as the baseline train/eval/package loop before validation-aware Qwen reruns, Cosmos-style world modeling, or policy/VLA branches.",
   "backbone_registry": {
     "config_dir": "configs/omni_backbones",
     "validator": "scripts/omni/backbone_registry.py --validate --json",
@@ -206,7 +206,7 @@
     {
       "step": 2,
       "name": "First held-out baseline",
-      "action": "Run validation-aware Qwen3-Omni LoRA to improve the verified diagnostic baseline."
     },
     {
       "step": 3,

 {
   "title": "Xperience-10M Foundation Model Plan",
   "status": "planning_artifact",
+  "current_boundary": "A final held-out multi-episode Qwen3-Omni diagnostic result is verified in this repo and meets the strict-JSON target, but it is not a strong action/subtask model result. The current foundation-model work should treat it as the baseline train/eval/package loop before Qwen action/subtask improvements, Cosmos-style world modeling, or policy/VLA branches.",
   "backbone_registry": {
     "config_dir": "configs/omni_backbones",
     "validator": "scripts/omni/backbone_registry.py --validate --json",
     {
       "step": 2,
       "name": "First held-out baseline",
+      "action": "Run Qwen3-Omni action/subtask error analysis and targeted reruns to improve the verified diagnostic baseline."
     },
     {
       "step": 3,

metrics/mirror_parity.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

metrics/omni_finetune_verified_result.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "title": "Verified Qwen3-Omni LoRA Validation-Aware Held-Out Pilot",
-  "status": "verified_validation_aware_diagnostic_pilot",
-  "status_date": "2026-06-06",
   "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
   "adapter": "Qwen3-Omni LoRA",
   "dataset": "Ropedia Xperience-10M selected 128-episode pilot",
@@ -27,7 +27,7 @@
   },
   "training": {
     "num_processes": 8,
-    "epochs": 1,
     "lora_rank": 16,
     "lora_alpha": 32,
     "lora_dropout": 0.05,
@@ -36,64 +36,50 @@
     "history": [
       {
         "epoch": 1,
-        "train_loss": 0.41304643672440994,
-        "val_loss": 0.0330660454928875,
         "global_step": 356
       }
     ],
     "loss": "answer-token cross entropy over supervised JSON tokens",
-    "note": "This validation-aware run uses the selected validation split during training and preserves the held-out test split for final evaluation."
   },
   "evaluation": {
     "split": "test",
     "num_samples": 448,
     "held_out_episode_count": 14,
-    "json_validity_rate": 0.875,
-    "action_macro_f1": 0.0026621494447581404,
-    "subtask_accuracy": 0.006696428571428571,
-    "transition_accuracy": 0.8504464285714286,
-    "next_action_accuracy": 0.024553571428571428,
-    "contact_accuracy": 0.6450892857142857,
-    "object_micro_f1": 0.22299431459254582,
     "quality_target": {
       "json_validity_rate": 0.98,
-      "status": "not_met"
     },
-    "previous_diagnostic_json_validity_rate": 0.8526785714285714
   },
-  "interpretation": "This is a real held-out multi-episode validation-aware diagnostic pilot proving the export, LoRA training with validation monitoring, evaluation, validation, and public-safe packaging loop. JSON validity improved over the earlier no-validation diagnostic run, but task-quality metrics remain weak, so it should be used as a baseline and error-analysis starting point rather than a strong Xperience-10M model.",
   "public_package": {
-    "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
     "audit_status": "pass",
     "contains_raw_xperience10m_data": false,
     "contains_qwen_base_weights": false,
     "contains_lora_weights": false,
-    "error_analysis": {
-      "status": "pass",
-      "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/error_analysis_summary.json",
-      "markdown_report": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/ERROR_ANALYSIS.md",
-      "groupings": [
-        "episode",
-        "action_family",
-        "train_seen_status",
-        "required_modality_state",
-        "object_category"
-      ],
-      "key_readouts": {
-        "parsed_prediction_rate": 0.8772321428571429,
-        "weakest_action_family": "locomotion",
-        "weakest_action_family_samples": 23,
-        "weakest_action_family_parsed_prediction_rate": 0.2608695652173913,
-        "seen_action_exact_rate": 0.04580152671755725,
-        "unseen_action_exact_rate": 0.015772870662460567,
-        "required_modality_state": "rrd_missing_only_required_modalities_present"
-      }
-    }
   },
   "required_next_steps": [
-    "Improve JSON-format reliability through prompt, decoding, constrained parsing, or target formatting changes.",
-    "Use the published held-out error analysis to prioritize JSON constraints, action/subtask formatting, object vocabulary handling, and missing-modality robustness.",
-    "Run a second validation-aware Qwen3-Omni pass only after the JSON/output contract is tightened.",
-    "Keep the same verified package contract for Cosmos-style world-model and VLA/policy branches."
   ]
 }

 {
+  "title": "Verified Qwen3-Omni LoRA 128-Episode Held-Out Result",
+  "status": "verified_full_128_episode_diagnostic_result",
+  "status_date": "2026-06-07",
   "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
   "adapter": "Qwen3-Omni LoRA",
   "dataset": "Ropedia Xperience-10M selected 128-episode pilot",
   },
   "training": {
     "num_processes": 8,
+    "epochs": 2,
     "lora_rank": 16,
     "lora_alpha": 32,
     "lora_dropout": 0.05,
     "history": [
       {
         "epoch": 1,
+        "train_loss": 0.41282760031950355,
+        "val_loss": 0.03288277983665466,
         "global_step": 356
+      },
+      {
+        "epoch": 2,
+        "train_loss": 0.027745448225544075,
+        "val_loss": 0.027823254466056824,
+        "global_step": 712
       }
     ],
     "loss": "answer-token cross entropy over supervised JSON tokens",
+    "note": "This final Qwen3-Omni LoRA pass reused the selected 96/16/16 episode setup, trained on all exported train windows with validation monitoring, and preserved the held-out test split for final evaluation."
   },
   "evaluation": {
     "split": "test",
     "num_samples": 448,
     "held_out_episode_count": 14,
+    "json_validity_rate": 0.9977678571428571,
+    "action_macro_f1": 0.0024331644885523347,
+    "subtask_accuracy": 0.002232142857142857,
+    "transition_accuracy": 0.9709821428571429,
+    "next_action_accuracy": 0.029017857142857144,
+    "contact_accuracy": 0.71875,
+    "object_micro_f1": 0.30160427807486634,
     "quality_target": {
       "json_validity_rate": 0.98,
+      "status": "met"
     },
+    "previous_validation_aware_json_validity_rate": 0.875
   },
+  "interpretation": "This is the final verified two-epoch Qwen3-Omni LoRA diagnostic result for the selected 128-episode setup. It meets the 98% JSON-validity target and improves transition, contact, and object metrics over the earlier validation-aware pilot, but action and subtask classification remain weak on held-out episodes, so this is still a baseline-quality diagnostic model rather than a strong Xperience-10M action recognizer.",
   "public_package": {
+    "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
     "audit_status": "pass",
     "contains_raw_xperience10m_data": false,
     "contains_qwen_base_weights": false,
     "contains_lora_weights": false,
+    "adapter_weights_repo": "cy0307/ropedia-qwen3-omni-lora-128ep"
   },
   "required_next_steps": [
+    "Verify the public Hugging Face LoRA adapter repository hashes after publication.",
+    "Publish the final verified package and refreshed comparison tables to all public mirrors, then run live publication verification.",
+    "Use the full-eval predictions for error analysis focused on action/subtask confusions and unseen-label behavior.",
+    "Keep the same verified package contract for the Cosmos3 world-model branch and any future VLA/policy branches."
   ]
 }

metrics/omni_model_comparison.json ADDED Viewed

	@@ -0,0 +1,513 @@

+{
+  "title": "Ropedia Xperience-10M Current Result Versions",
+  "generated_at_utc": "2026-06-06T23:26:13+00:00",
+  "status": "pass",
+  "version_count": 3,
+  "comparison_rule": "Compare only rows with the same scope and target. Single-episode raw-feature metrics, 128-episode metadata baselines, Qwen3 structured JSON metrics, and Cosmos3 future-window metrics answer different questions.",
+  "version_reading_notes": [
+    "Version 1 is the public-sample 12-task harness with minimal and neural heads.",
+    "Version 2 is the selected 128-episode same-split simple/NN baseline alignment.",
+    "Version 3 is the verified model-branch layer: the current final Qwen3-Omni LoRA package is the JSON-task diagnostic result, while Cosmos3-Nano is a future-window compatibility result rather than a full Cosmos diffusion fine-tune."
+  ],
+  "versions": [
+    {
+      "id": "v1_single_episode_public_sample",
+      "title": "Single-Episode Public-Sample Task Suite",
+      "status": "verified",
+      "scope": "one public Xperience-10M sample episode",
+      "source": "results/episode_task_suite/summary_report.json",
+      "split": "chronological 70/30 within one episode",
+      "counts": {
+        "episodes": 1,
+        "windows": 1161,
+        "frames": 5821,
+        "feature_dim": 8546,
+        "task_count": 12,
+        "neural_task_count": 12
+      },
+      "models": [
+        "minimal task heads",
+        "compact neural MLP task heads"
+      ],
+      "task_metrics": [
+        {
+          "task": "caption_grounding",
+          "task_display_name": "Language Grounding",
+          "simple_status": "pass",
+          "simple_primary_metric": "mrr",
+          "simple_primary_score": 0.016023479050338015,
+          "neural_status": "pass",
+          "neural_primary_metric": "mrr",
+          "neural_primary_score": 0.01684125567132316
+        },
+        {
+          "task": "contact_prediction",
+          "task_display_name": "Contact State Prediction",
+          "simple_status": "pass",
+          "simple_primary_metric": "macro_f1",
+          "simple_primary_score": 1.0,
+          "neural_status": "pass",
+          "neural_primary_metric": "macro_f1",
+          "neural_primary_score": 1.0
+        },
+        {
+          "task": "cross_modal_retrieval",
+          "task_display_name": "Cross-Modal Retrieval",
+          "simple_status": "pass",
+          "simple_primary_metric": "mrr",
+          "simple_primary_score": 0.26925966892956127,
+          "neural_status": "pass",
+          "neural_primary_metric": "mrr",
+          "neural_primary_score": 0.1299971898648288
+        },
+        {
+          "task": "hand_trajectory_forecast",
+          "task_display_name": "Hand Trajectory Forecasting",
+          "simple_status": "pass",
+          "simple_primary_metric": "mpjpe",
+          "simple_primary_score": 0.8646570444107056,
+          "neural_status": "pass",
+          "neural_primary_metric": "mpjpe",
+          "neural_primary_score": 0.10785018652677536
+        },
+        {
+          "task": "misalignment_detection",
+          "task_display_name": "Multimodal Synchronization Detection",
+          "simple_status": "pass",
+          "simple_primary_metric": "f1",
+          "simple_primary_score": 0.5051698670605613,
+          "neural_status": "pass",
+          "neural_primary_metric": "f1",
+          "neural_primary_score": 0.7152682255845944
+        },
+        {
+          "task": "modality_reconstruction",
+          "task_display_name": "Cross-Modal Reconstruction",
+          "simple_status": "pass",
+          "simple_primary_metric": "r2",
+          "simple_primary_score": -0.015271898913936655,
+          "neural_status": "pass",
+          "neural_primary_metric": "r2",
+          "neural_primary_score": -0.010171410134180991
+        },
+        {
+          "task": "next_action",
+          "task_display_name": "Next-Action Prediction",
+          "simple_status": "pass",
+          "simple_primary_metric": "macro_f1",
+          "simple_primary_score": 0.05925925925925927,
+          "neural_status": "pass",
+          "neural_primary_metric": "macro_f1",
+          "neural_primary_score": 0.04186046511627907
+        },
+        {
+          "task": "object_relevance",
+          "task_display_name": "Object Relevance Prediction",
+          "simple_status": "pass",
+          "simple_primary_metric": "micro_f1",
+          "simple_primary_score": 0.18034382095361662,
+          "neural_status": "pass",
+          "neural_primary_metric": "micro_f1",
+          "neural_primary_score": 0.1679279279279279
+        },
+        {
+          "task": "temporal_order",
+          "task_display_name": "Temporal Order Verification",
+          "simple_status": "pass",
+          "simple_primary_metric": "accuracy",
+          "simple_primary_score": 0.4540229885057471,
+          "neural_status": "pass",
+          "neural_primary_metric": "accuracy",
+          "neural_primary_score": 0.8577586206896551
+        },
+        {
+          "task": "timeline_action",
+          "task_display_name": "Action Recognition",
+          "simple_status": "pass",
+          "simple_primary_metric": "macro_f1",
+          "simple_primary_score": 0.05,
+          "neural_status": "pass",
+          "neural_primary_metric": "macro_f1",
+          "neural_primary_score": 0.014814814814814814
+        },
+        {
+          "task": "timeline_subtask",
+          "task_display_name": "Procedure Step Recognition",
+          "simple_status": "pass",
+          "simple_primary_metric": "macro_f1",
+          "simple_primary_score": 0.05056355513846935,
+          "neural_status": "pass",
+          "neural_primary_metric": "macro_f1",
+          "neural_primary_score": 0.02810810810810811
+        },
+        {
+          "task": "transition_detection",
+          "task_display_name": "Action Boundary Detection",
+          "simple_status": "pass",
+          "simple_primary_metric": "macro_f1",
+          "simple_primary_score": 0.6118237590630229,
+          "neural_status": "pass",
+          "neural_primary_metric": "macro_f1",
+          "neural_primary_score": 0.5862068965517241
+        }
+      ],
+      "interpretation": "This layer verifies the 12 task contracts and raw multimodal feature pipeline on the public sample. It is not a cross-episode benchmark."
+    },
+    {
+      "id": "v2_multi_episode_128_aligned_metadata_baselines",
+      "title": "128-Episode Aligned Simple/NN Baselines",
+      "status": "pass",
+      "scope": "selected 128-episode 96/16/16 split",
+      "source": "results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
+      "split": "train/val/test by selected episode/session",
+      "counts": {
+        "rows": 3808,
+        "split_counts": {
+          "train": 2848,
+          "val": 512,
+          "test": 448
+        },
+        "episode_counts": {
+          "test": 16,
+          "train": 96,
+          "val": 16
+        },
+        "task_count": 12,
+        "simple_supported_task_count": 8,
+        "neural_supported_task_count": 6
+      },
+      "models": [
+        "metadata/text simple baselines",
+        "metadata/text neural MLP baselines"
+      ],
+      "task_metrics": [
+        {
+          "task": "timeline_action",
+          "task_display_name": "Action Recognition",
+          "simple_status": "pass",
+          "simple_primary_metric": "macro_f1",
+          "simple_primary_score": 0.00017511601435951318,
+          "neural_status": "pass",
+          "neural_primary_metric": "macro_f1",
+          "neural_primary_score": 0.0
+        },
+        {
+          "task": "timeline_subtask",
+          "task_display_name": "Procedure Step Recognition",
+          "simple_status": "pass",
+          "simple_primary_metric": "macro_f1",
+          "simple_primary_score": 0.0,
+          "neural_status": "pass",
+          "neural_primary_metric": "macro_f1",
+          "neural_primary_score": 0.0
+        },
+        {
+          "task": "transition_detection",
+          "task_display_name": "Action Boundary Detection",
+          "simple_status": "pass",
+          "simple_primary_metric": "macro_f1",
+          "simple_primary_score": 0.5219803670507895,
+          "neural_status": "pass",
+          "neural_primary_metric": "macro_f1",
+          "neural_primary_score": 0.45822172492907925
+        },
+        {
+          "task": "next_action",
+          "task_display_name": "Next-Action Prediction",
+          "simple_status": "pass",
+          "simple_primary_metric": "macro_f1",
+          "simple_primary_score": 0.00019966057701906761,
+          "neural_status": "pass",
+          "neural_primary_metric": "macro_f1",
+          "neural_primary_score": 0.0
+        },
+        {
+          "task": "hand_trajectory_forecast",
+          "task_display_name": "Hand Trajectory Forecasting",
+          "simple_status": "unsupported_without_raw_128_feature_blocks",
+          "simple_primary_metric": "mpjpe",
+          "simple_primary_score": null,
+          "neural_status": "not_run",
+          "neural_primary_metric": "",
+          "neural_primary_score": null
+        },
+        {
+          "task": "contact_prediction",
+          "task_display_name": "Contact State Prediction",
+          "simple_status": "pass",
+          "simple_primary_metric": "macro_f1",
+          "simple_primary_score": 0.5167950693374422,
+          "neural_status": "pass",
+          "neural_primary_metric": "macro_f1",
+          "neural_primary_score": 0.21951219512195122
+        },
+        {
+          "task": "object_relevance",
+          "task_display_name": "Object Relevance Prediction",
+          "simple_status": "pass",
+          "simple_primary_metric": "micro_f1",
+          "simple_primary_score": 0.18221614227086183,
+          "neural_status": "pass",
+          "neural_primary_metric": "micro_f1",
+          "neural_primary_score": 0.1053878034339846
+        },
+        {
+          "task": "caption_grounding",
+          "task_display_name": "Language Grounding",
+          "simple_status": "pass",
+          "simple_primary_metric": "mrr",
+          "simple_primary_score": 0.012785504572093487,
+          "neural_status": "not_run",
+          "neural_primary_metric": "",
+          "neural_primary_score": null
+        },
+        {
+          "task": "cross_modal_retrieval",
+          "task_display_name": "Cross-Modal Retrieval",
+          "simple_status": "unsupported_without_raw_128_feature_blocks",
+          "simple_primary_metric": "mrr",
+          "simple_primary_score": null,
+          "neural_status": "not_run",
+          "neural_primary_metric": "",
+          "neural_primary_score": null
+        },
+        {
+          "task": "modality_reconstruction",
+          "task_display_name": "Cross-Modal Reconstruction",
+          "simple_status": "unsupported_without_raw_128_feature_blocks",
+          "simple_primary_metric": "r2",
+          "simple_primary_score": null,
+          "neural_status": "not_run",
+          "neural_primary_metric": "",
+          "neural_primary_score": null
+        },
+        {
+          "task": "temporal_order",
+          "task_display_name": "Temporal Order Verification",
+          "simple_status": "pass",
+          "simple_primary_metric": "f1",
+          "simple_primary_score": 0.32713178294573647,
+          "neural_status": "not_run",
+          "neural_primary_metric": "",
+          "neural_primary_score": null
+        },
+        {
+          "task": "misalignment_detection",
+          "task_display_name": "Multimodal Synchronization Detection",
+          "simple_status": "unsupported_without_raw_128_feature_blocks",
+          "simple_primary_metric": "f1",
+          "simple_primary_score": null,
+          "neural_status": "not_run",
+          "neural_primary_metric": "",
+          "neural_primary_score": null
+        }
+      ],
+      "interpretation": "This layer aligns the previous simple and neural baseline framing to the same selected 96/16/16 split used by the model branches. It uses public-safe JSONL metadata/text features, so raw-feature-only tasks remain explicitly unsupported until 128-run sensor feature blocks exist."
+    },
+    {
+      "id": "v3_multi_episode_foundation_model_branches",
+      "title": "128-Episode Foundation-Model Branches",
+      "status": "partial_verified",
+      "scope": "selected 128-episode split and compatible derived windows",
+      "source": "results/omni_finetune/verified_public/",
+      "split": "episode/session held-out split; exact task target depends on backbone contract",
+      "counts": {
+        "verified_branch_count": 4,
+        "qwen3_verified_package_count": 3,
+        "cosmos3_verified_package_count": 1
+      },
+      "models": [
+        "Qwen3-Omni LoRA",
+        "Cosmos3-Nano future-window compatibility branch"
+      ],
+      "branches": [
+        {
+          "id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
+          "title": "Cosmos3-Nano Future-Window World Model",
+          "status": "verified",
+          "backbone": "cosmos_world_model",
+          "dataset_contract": "xperience10m_future_window_world_model_v0",
+          "training_objective": "future_window_and_action_conditioned_world_modeling",
+          "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json",
+          "dataset_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat",
+          "train_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter",
+          "eval_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
+          "counts": {
+            "dataset_samples": 3213,
+            "dataset_episodes": 119,
+            "split_counts": {
+              "train": 2403,
+              "test": 378,
+              "val": 432
+            },
+            "train_samples": 2403,
+            "val_samples": 432,
+            "eval_samples": 378,
+            "held_out_episode_count": 14,
+            "num_processes": 1
+          },
+          "primary_metrics": {
+            "future_retrieval_mrr": 0.022138720585222767,
+            "future_retrieval_recall_at_5": 0.015873015873015872,
+            "temporal_consistency": 0.09523809523809523,
+            "feature_reconstruction_error": 3479.218317102503,
+            "transition_accuracy": 0.9682539682539683,
+            "contact_accuracy": 0.7433862433862434,
+            "held_out_episode_count": 14
+          },
+          "history": [
+            {
+              "epoch": 0,
+              "train_loss": null,
+              "val_loss": null,
+              "note": "closed-form mean-delta adapter; no Cosmos diffusion weights fine-tuned in this compatibility run"
+            }
+          ]
+        },
+        {
+          "id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+          "title": "Qwen3-Omni LoRA",
+          "status": "verified",
+          "backbone": "qwen3_omni_lora",
+          "dataset_contract": "xperience10m_episode_json_qa_v1",
+          "training_objective": "structured_episode_understanding_json_qa",
+          "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json",
+          "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
+          "train_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora",
+          "eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+          "counts": {
+            "dataset_samples": 3808,
+            "dataset_episodes": 119,
+            "split_counts": {
+              "train": 2848,
+              "val": 512,
+              "test": 448
+            },
+            "train_samples": 2848,
+            "val_samples": 512,
+            "eval_samples": 448,
+            "held_out_episode_count": 14,
+            "num_processes": 8
+          },
+          "primary_metrics": {
+            "json_validity_rate": 0.875,
+            "action_macro_f1": 0.0026621494447581404,
+            "subtask_accuracy": 0.006696428571428571,
+            "transition_accuracy": 0.8504464285714286,
+            "next_action_accuracy": 0.024553571428571428,
+            "contact_accuracy": 0.6450892857142857,
+            "object_micro_f1": 0.22299431459254582,
+            "held_out_episode_count": 14
+          },
+          "history": [
+            {
+              "epoch": 1,
+              "train_loss": 0.41304643672440994,
+              "val_loss": 0.0330660454928875,
+              "global_step": 356
+            }
+          ]
+        },
+        {
+          "id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
+          "title": "Qwen3-Omni LoRA",
+          "status": "verified",
+          "backbone": "qwen3_omni_lora",
+          "dataset_contract": "xperience10m_episode_json_qa_v1",
+          "training_objective": "structured_episode_understanding_json_qa",
+          "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json",
+          "dataset_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu",
+          "train_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6",
+          "eval_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
+          "counts": {
+            "dataset_samples": 3808,
+            "dataset_episodes": 119,
+            "split_counts": {
+              "train": 2848,
+              "val": 512,
+              "test": 448
+            },
+            "train_samples": 2848,
+            "val_samples": 0,
+            "eval_samples": 448,
+            "held_out_episode_count": 14,
+            "num_processes": 8
+          },
+          "primary_metrics": {
+            "json_validity_rate": 0.8526785714285714,
+            "action_macro_f1": 0.00213753459655099,
+            "subtask_accuracy": 0.004464285714285714,
+            "transition_accuracy": 0.828125,
+            "next_action_accuracy": 0.022321428571428572,
+            "contact_accuracy": 0.6517857142857143,
+            "object_micro_f1": 0.23062730627306272,
+            "held_out_episode_count": 14
+          },
+          "history": [
+            {
+              "epoch": 1,
+              "train_loss": 0.4121775626560694,
+              "val_loss": null,
+              "global_step": 356
+            }
+          ]
+        },
+        {
+          "id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
+          "title": "Qwen3-Omni LoRA",
+          "status": "verified",
+          "backbone": "qwen3_omni_lora",
+          "dataset_contract": "xperience10m_episode_json_qa_v1",
+          "training_objective": "structured_episode_understanding_json_qa",
+          "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json",
+          "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
+          "train_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora",
+          "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
+          "counts": {
+            "dataset_samples": 3808,
+            "dataset_episodes": 119,
+            "split_counts": {
+              "train": 2848,
+              "val": 512,
+              "test": 448
+            },
+            "train_samples": 2848,
+            "val_samples": 512,
+            "eval_samples": 448,
+            "held_out_episode_count": 14,
+            "num_processes": 8
+          },
+          "primary_metrics": {
+            "json_validity_rate": 0.9977678571428571,
+            "action_macro_f1": 0.0024331644885523347,
+            "subtask_accuracy": 0.002232142857142857,
+            "transition_accuracy": 0.9709821428571429,
+            "next_action_accuracy": 0.029017857142857144,
+            "contact_accuracy": 0.71875,
+            "object_micro_f1": 0.30160427807486634,
+            "held_out_episode_count": 14
+          },
+          "history": [
+            {
+              "epoch": 1,
+              "train_loss": 0.41282760031950355,
+              "val_loss": 0.03288277983665466,
+              "global_step": 356
+            },
+            {
+              "epoch": 2,
+              "train_loss": 0.027745448225544075,
+              "val_loss": 0.027823254466056824,
+              "global_step": 712
+            }
+          ]
+        }
+      ],
+      "interpretation": "This layer contains the held-out foundation-model packages. Qwen3-Omni packages evaluate structured JSON task prediction; Cosmos3-Nano currently evaluates a future-window world-model compatibility adapter, not a full diffusion-weight fine-tune."
+    }
+  ],
+  "pending": [
+    "Use the final Qwen3 full-eval package as the current Qwen result; older Qwen package rows remain historical diagnostics for comparison.",
+    "Promote Cosmos3 from compatibility adapter to full Cosmos3 fine-tuning only after a separate environment with matching Diffusers/Cosmos dependencies is prepared."
+  ]
+}

metrics/project_brief.json CHANGED Viewed

@@ -17,7 +17,7 @@
     },
     {
       "capability": "Scale-up planning",
-      "evidence": "verified 96/16/16 Qwen3-Omni validation-monitored diagnostic pilot, structured-output improvement path, Cosmos 3 branch, and policy-model candidates after action-space conversion"
     }
   ],
   "current_artifacts": [
@@ -43,7 +43,7 @@
     },
     {
       "layer": "Scale-up path",
-      "status": "A selected 96/16/16 Qwen3-Omni LoRA validation-monitored diagnostic pilot is verified; current model-quality metrics are weak and guide the next structured-output improvement pass"
     }
   ],
   "reading_order": [
@@ -54,8 +54,8 @@
     "Inspect results/episode_task_suite/feature_manifest.json to understand one model input.",
     "Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
   ],
-  "scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The validation-aware multi-episode Qwen3-Omni pilot verifies the training loop but does not yet show strong model quality.",
-  "next_stage": "Improve structured JSON reliability and error analysis before larger robustness or alternative-backbone claims.",
   "entry_points": {
     "visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
     "hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",

     },
     {
       "capability": "Scale-up planning",
+      "evidence": "final verified 96/16/16 Qwen3-Omni diagnostic result, same-split 128-episode baseline alignment, Cosmos3-Nano compatibility branch, and policy-model candidates after action-space conversion"
     }
   ],
   "current_artifacts": [
     },
     {
       "layer": "Scale-up path",
+      "status": "A selected 96/16/16 Qwen3-Omni LoRA final diagnostic result is verified; strict-JSON validity meets target, while weak action/subtask metrics guide the next error-analysis pass"
     }
   ],
   "reading_order": [
     "Inspect results/episode_task_suite/feature_manifest.json to understand one model input.",
     "Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
   ],
+  "scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
+  "next_stage": "Improve action/subtask quality through error analysis before larger robustness or alternative-backbone claims.",
   "entry_points": {
     "visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
     "hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",

metrics/project_manifest.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "name": "Ropedia Xperience-10M Task Suite",
   "slug": "ropedia-xperience-10m-task-suite",
   "version": "0.1.0",
-  "status": "single_episode_suite_plus_qwen3_omni_diagnostic_pilot",
-  "last_metadata_update": "2026-06-05",
-  "summary": "Research-development repo built around one public Xperience-10M sample episode plus a verified selected-episode Qwen3-Omni diagnostic pilot.",
   "scope_boundary": {
     "raw_data_redistributed": false,
     "episode_count_verified": 1,
@@ -19,7 +19,9 @@
       "test": 16
     },
     "qwen3_omni_held_out_test_windows": 448,
-    "qwen3_omni_json_validity_rate": 0.875
   },
   "public_surfaces": {
     "github_repo": "https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite",
@@ -28,6 +30,7 @@
     "hf_static_space": "https://cy0307-ropedia-xperience-10m-task-suite.static.hf.space/",
     "hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts",
     "hf_model_repo": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines",
     "hf_collection": "https://huggingface.co/collections/cy0307/ropedia-xperience-10m-task-suite"
   },
   "upstream_sources": {

   "name": "Ropedia Xperience-10M Task Suite",
   "slug": "ropedia-xperience-10m-task-suite",
   "version": "0.1.0",
+  "status": "single_episode_suite_plus_final_qwen3_omni_diagnostic_result",
+  "last_metadata_update": "2026-06-07",
+  "summary": "Research-development repo built around one public Xperience-10M sample episode plus a final verified selected-episode Qwen3-Omni diagnostic result.",
   "scope_boundary": {
     "raw_data_redistributed": false,
     "episode_count_verified": 1,
       "test": 16
     },
     "qwen3_omni_held_out_test_windows": 448,
+    "qwen3_omni_json_validity_rate": 0.9977678571428571,
+    "qwen3_omni_json_quality_target_met": true,
+    "qwen3_omni_lora_adapter_repo": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
   },
   "public_surfaces": {
     "github_repo": "https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite",
     "hf_static_space": "https://cy0307-ropedia-xperience-10m-task-suite.static.hf.space/",
     "hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts",
     "hf_model_repo": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines",
+    "hf_qwen3_lora_adapter": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
     "hf_collection": "https://huggingface.co/collections/cy0307/ropedia-xperience-10m-task-suite"
   },
   "upstream_sources": {

metrics/project_packet.json CHANGED Viewed

@@ -12,7 +12,7 @@
     "raw_xperience10m_data_in_repo": false,
     "audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
     "qwen3_omni_32_episode_claim": false,
-    "qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni validation-monitored diagnostic pilot is verified, with weak held-out metrics that guide the next structured-output improvement pass."
   },
   "reading_path": [
     {
@@ -41,7 +41,7 @@
         "docs/data/scope_claims_audit.json",
         "docs/data/website_integrity.json"
       ],
-      "readout": "The project status table and roadmap give the compact current-state summary. Single-episode task engineering, metrics, visualizations, public website integrity, mirror parity, and the validation-aware selected-episode Qwen3-Omni diagnostic pilot are implemented; stronger cross-episode model quality remains a follow-up."
     },
     {
       "step": 2,
@@ -116,7 +116,7 @@
         "scripts/omni/discover_xperience10m_sources.py",
         "docs/data/omni_finetune_verified_result.json"
       ],
-      "readout": "The selected-episode held-out Qwen3-Omni diagnostic pilot is verified. The next milestone is a validation-aware diagnostic run with stronger JSON-format reliability and error analysis."
     }
   ],
     "project_status": "PROJECT_STATUS.md",

     "raw_xperience10m_data_in_repo": false,
     "audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
     "qwen3_omni_32_episode_claim": false,
+    "qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni final diagnostic result is verified, meets the strict-JSON target, and still has weak action/subtask metrics that guide the next error-analysis pass."
   },
   "reading_path": [
     {
         "docs/data/scope_claims_audit.json",
         "docs/data/website_integrity.json"
       ],
+      "readout": "The project status table and roadmap give the compact current-state summary. Single-episode task engineering, metrics, visualizations, public website integrity, mirror parity, same-split 128-episode baselines, the final selected-episode Qwen3-Omni diagnostic result, and the Cosmos3-Nano compatibility package are implemented; stronger action/subtask and full Cosmos model quality remain follow-ups."
     },
     {
       "step": 2,
         "scripts/omni/discover_xperience10m_sources.py",
         "docs/data/omni_finetune_verified_result.json"
       ],
+      "readout": "The selected-episode held-out Qwen3-Omni final diagnostic result is verified and JSON-format reliability meets the 98% target. The next milestone is action/subtask error analysis and a stronger model-quality run on the same split."
     }
   ],
     "project_status": "PROJECT_STATUS.md",

metrics/project_status.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "title": "Ropedia Xperience-10M Task Suite Project Status",
   "version": "2026-06-01",
-  "decision": "public_sample_pipeline_verified_qwen3_omni_validation_aware_diagnostic_pilot",
-  "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, and uses the selected-episode Qwen3-Omni validation-aware diagnostic pilot as a verified but weak cross-episode baseline.",
   "scope_boundary": {
     "validated_episode_count": 1,
     "aligned_frames": 5821,
@@ -25,8 +25,13 @@
       "val": 512,
       "test": 448
     },
-    "qwen3_omni_json_validity_rate": 0.875,
     "qwen3_omni_validation_aware": true,
     "multi_episode_128_aligned_baselines": true,
     "multi_episode_128_baseline_window_counts": {
       "train": 2848,
@@ -102,7 +107,7 @@
         "RESEARCH_ROADMAP.md",
         "docs/data/research_roadmap.json"
       ],
-      "readout": "The roadmap connects public-sample task development to the verified Qwen3-Omni diagnostic pilot, validation-aware diagnostics, foundation-model selection, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
     },
     {
       "area": "Foundation-model plan",
@@ -111,7 +116,7 @@
         "FOUNDATION_MODEL_PLAN.md",
         "docs/data/foundation_model_plan.json"
       ],
-      "readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is added as the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit."
     },
     {
       "area": "Omni model extension contract",
@@ -191,18 +196,39 @@
       ],
       "readout": "The earlier simple and neural baseline framing is aligned to the selected 96/16/16 episode split used by the Qwen3-Omni pilot. JSON-supported tasks have metadata/text simple and neural MLP metrics; raw-feature-only tasks are explicitly marked unsupported until 128-run sensor feature blocks are available."
     },
     {
       "area": "Qwen3-Omni fine-tuning",
-      "status": "verified_validation_aware_diagnostic_pilot_quality_target_not_met",
       "evidence": [
         "docs/data/omni_finetune_verified_result.json",
-        "results/omni_finetune/verified_public/",
-        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/",
         "scripts/omni/package_verified_omni_result.py",
         "scripts/omni/audit_verified_omni_package.py",
         "scripts/omni/analyze_qwen3_omni_errors.py"
       ],
-      "readout": "The selected 96/16/16 episode split produced a validation-aware public-safe held-out package with 3,808 exported windows, 512 validation windows, 448 test predictions, and derived error-analysis tables by episode, action family, train-seen status, required-modality state, and object category. JSON validity is 87.50%, below the 98% target, so it is a diagnostic baseline but not a strong model-quality result."
     },
     {
       "area": "Raw Xperience-10M redistribution",
@@ -228,12 +254,15 @@
     "Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
     "Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
     "Inspect results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md before comparing simple/NN baselines to the selected 128-episode setup.",
     "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
   ],
   "current_reading_notes": [
-    "The validation-aware Qwen3-Omni diagnostic pilot is verified, but current held-out quality is still weak.",
-    "Use docs/data/omni_finetune_verified_result.json and the latest verified_public validation-aware package for current held-out results.",
     "The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
     "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
     "Audio is one of the synchronized source modalities in the current task representation.",
     "The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",

 {
   "title": "Ropedia Xperience-10M Task Suite Project Status",
   "version": "2026-06-01",
+  "decision": "public_sample_pipeline_verified_128_aligned_baselines_qwen3_cosmos_comparison",
+  "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, and compares verified Qwen3-Omni and Cosmos3 branch packages as early cross-episode diagnostics.",
   "scope_boundary": {
     "validated_episode_count": 1,
     "aligned_frames": 5821,
       "val": 512,
       "test": 448
     },
+    "qwen3_omni_json_validity_rate": 0.9977678571428571,
     "qwen3_omni_validation_aware": true,
+    "qwen3_omni_json_quality_target_met": true,
+    "qwen3_omni_lora_adapter_repo": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
+    "cosmos3_nano_future_window_compatibility_verified": true,
+    "cosmos3_nano_future_window_test_predictions": 378,
+    "omni_model_comparison_available": true,
     "multi_episode_128_aligned_baselines": true,
     "multi_episode_128_baseline_window_counts": {
       "train": 2848,
         "RESEARCH_ROADMAP.md",
         "docs/data/research_roadmap.json"
       ],
+      "readout": "The roadmap connects public-sample task development to the final verified Qwen3-Omni diagnostic result, same-split baseline alignment, action/subtask error analysis, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
     },
     {
       "area": "Foundation-model plan",
         "FOUNDATION_MODEL_PLAN.md",
         "docs/data/foundation_model_plan.json"
       ],
+      "readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is now represented by a verified Cosmos3-Nano future-window compatibility package and remains the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit."
     },
     {
       "area": "Omni model extension contract",
       ],
       "readout": "The earlier simple and neural baseline framing is aligned to the selected 96/16/16 episode split used by the Qwen3-Omni pilot. JSON-supported tasks have metadata/text simple and neural MLP metrics; raw-feature-only tasks are explicitly marked unsupported until 128-run sensor feature blocks are available."
     },
+    {
+      "area": "Current result comparison",
+      "status": "verified_generated_summary",
+      "evidence": [
+        "docs/data/omni_model_comparison.json",
+        "results/omni_finetune/OMNI_MODEL_COMPARISON.md",
+        "scripts/omni/build_omni_model_comparison.py"
+      ],
+      "readout": "The public comparison separates three layers: the single-episode raw-feature task suite, the selected 128-episode simple/NN metadata baselines, and verified foundation-model branch packages for Qwen3-Omni and Cosmos3-Nano future-window compatibility."
+    },
     {
       "area": "Qwen3-Omni fine-tuning",
+      "status": "final_verified_diagnostic_result_json_target_met",
       "evidence": [
         "docs/data/omni_finetune_verified_result.json",
+        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/",
+        "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
         "scripts/omni/package_verified_omni_result.py",
         "scripts/omni/audit_verified_omni_package.py",
         "scripts/omni/analyze_qwen3_omni_errors.py"
       ],
+      "readout": "The selected 96/16/16 episode split produced a final public-safe held-out package with 3,808 exported windows, 512 validation windows, 448 test predictions, two training epochs, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.78%, meeting the 98% target; transition accuracy is 97.10%, contact accuracy is 71.88%, object micro-F1 is 30.16%, and action/subtask metrics remain weak, so it is still a diagnostic baseline rather than a strong model-quality claim."
+    },
+    {
+      "area": "Cosmos3-Nano future-window branch",
+      "status": "verified_compatibility_result",
+      "evidence": [
+        "configs/omni_backbones/cosmos_world_model.json",
+        "scripts/omni/export_cosmos3_future_window_dataset.py",
+        "scripts/omni/eval_cosmos3_future_window_retrieval.py",
+        "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json"
+      ],
+      "readout": "The Cosmos3-Nano branch now has a public-safe verified future-window compatibility package with 3,213 future-window samples, 378 held-out test predictions, future retrieval MRR 0.0221, temporal consistency 0.0952, transition accuracy 0.9683, and contact accuracy 0.7434. It is a compatibility adapter result, not a full Cosmos diffusion-weight fine-tune."
     },
     {
       "area": "Raw Xperience-10M redistribution",
     "Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
     "Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
     "Inspect results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md before comparing simple/NN baselines to the selected 128-episode setup.",
+    "Inspect docs/data/omni_model_comparison.json before comparing the current three result versions.",
     "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
   ],
   "current_reading_notes": [
+    "The final Qwen3-Omni diagnostic result is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak.",
+    "Use docs/data/omni_model_comparison.json to compare the single-episode task suite, 128-episode aligned baselines, and verified Qwen3/Cosmos branch packages without mixing incompatible metric targets.",
+    "Use docs/data/omni_finetune_verified_result.json and the latest verified_public final Qwen package for current held-out results.",
     "The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
+    "The Cosmos3-Nano future-window branch is verified as a compatibility adapter result; full Cosmos diffusion-weight fine-tuning remains pending.",
     "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
     "Audio is one of the synchronized source modalities in the current task representation.",
     "The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",

metrics/publication_audit.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-06T17:44:50+00:00",
   "checks": [
     {
       "name": "required_publication_assets_present",
@@ -182,8 +182,8 @@
     "github_repo": {
       "root": "repo",
       "exists": true,
-      "file_count": 517,
-      "text_file_count": 440,
       "largest_file": {
         "path": "tmp/omni_128_dataset_fetch/dataset.jsonl",
         "bytes": 582271586
@@ -193,8 +193,8 @@
     "hf_space_bundle": {
       "root": "hf_publish/space",
       "exists": true,
-      "file_count": 428,
-      "text_file_count": 352,
       "largest_file": {
         "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
         "bytes": 55702978
@@ -204,8 +204,8 @@
     "hf_artifact_bundle": {
       "root": "hf_publish/artifacts",
       "exists": true,
-      "file_count": 588,
-      "text_file_count": 488,
       "largest_file": {
         "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
         "bytes": 55702978
@@ -215,8 +215,8 @@
     "hf_model_bundle": {
       "root": "hf_publish/model",
       "exists": true,
-      "file_count": 775,
-      "text_file_count": 640,
       "largest_file": {
         "path": "pytorch_model.bin",
         "bytes": 93495480

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-06T23:31:25+00:00",
   "checks": [
     {
       "name": "required_publication_assets_present",
     "github_repo": {
       "root": "repo",
       "exists": true,
+      "file_count": 586,
+      "text_file_count": 497,
       "largest_file": {
         "path": "tmp/omni_128_dataset_fetch/dataset.jsonl",
         "bytes": 582271586
     "hf_space_bundle": {
       "root": "hf_publish/space",
       "exists": true,
+      "file_count": 460,
+      "text_file_count": 380,
       "largest_file": {
         "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
         "bytes": 55702978
     "hf_artifact_bundle": {
       "root": "hf_publish/artifacts",
       "exists": true,
+      "file_count": 631,
+      "text_file_count": 527,
       "largest_file": {
         "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
         "bytes": 55702978
     "hf_model_bundle": {
       "root": "hf_publish/model",
       "exists": true,
+      "file_count": 819,
+      "text_file_count": 680,
       "largest_file": {
         "path": "pytorch_model.bin",
         "bytes": 93495480

metrics/reproducibility_matrix.json CHANGED Viewed

@@ -79,10 +79,10 @@
     },
     {
       "id": "qwen3_omni_multi_episode_pilot",
-      "status": "verified_diagnostic_pilot_not_publicly_rerunnable_without_gated_data",
       "command": "scripts/omni/build_qwen3_omni_dataset.py and scripts/omni/train_qwen3_omni_lora.py on the selected gated episodes",
-      "expected": "verified diagnostic LoRA package with 3,808 exported windows, 2,848 train windows, and 448 held-out test predictions",
-      "boundary": "the public package records metrics and manifests, but rerunning requires gated Xperience-10M episode access and base-model weights; current JSON validity is 87.50%, below the 98% target"
     }
   ]
 }

     },
     {
       "id": "qwen3_omni_multi_episode_pilot",
+      "status": "verified_final_diagnostic_result_not_publicly_rerunnable_without_gated_data",
       "command": "scripts/omni/build_qwen3_omni_dataset.py and scripts/omni/train_qwen3_omni_lora.py on the selected gated episodes",
+      "expected": "verified final diagnostic LoRA package with 3,808 exported windows, 2,848 train windows, and 448 held-out test predictions",
+      "boundary": "the public package records metrics and manifests, but rerunning requires gated Xperience-10M episode access and base-model weights; current JSON validity is 99.78%, meeting the 98% target, while action/subtask metrics remain weak"
     }
   ]
 }

metrics/research_directions.json CHANGED Viewed

@@ -30,6 +30,12 @@
         "contact_prediction",
         "object_relevance"
       ],
       "counts": {
         "direct": 2,
         "proxy": 2,
@@ -54,6 +60,11 @@
         "modality_reconstruction",
         "misalignment_detection"
       ],
       "counts": {
         "direct": 0,
         "proxy": 2,
@@ -86,6 +97,19 @@
         "temporal_order",
         "misalignment_detection"
       ],
       "counts": {
         "direct": 6,
         "proxy": 2,
@@ -116,6 +140,17 @@
         "temporal_order",
         "misalignment_detection"
       ],
       "counts": {
         "direct": 0,
         "proxy": 6,
@@ -137,6 +172,8 @@
       },
       "why": "Reads egocentric sensor state as the current human action; also provides a weak human-motion readout.",
       "current_limit": "Chronological single-episode split creates unseen future action classes.",
       "metric": {
         "key": "macro_f1",
         "name": "macro-F1",
@@ -158,6 +195,8 @@
       },
       "why": "Segments egocentric task state and provides a first proxy for symbolic world/task state.",
       "current_limit": "Single-episode ordering makes future subtasks hard to generalize.",
       "metric": {
         "key": "macro_f1",
         "name": "macro-F1",
@@ -179,6 +218,8 @@
       },
       "why": "Localizes egocentric task boundaries and diagnoses temporal state changes.",
       "current_limit": "Boundary class is sparse, so accuracy alone is misleading.",
       "metric": {
         "key": "macro_f1",
         "name": "macro-F1",
@@ -200,6 +241,8 @@
       },
       "why": "Tests action intention/task-flow prediction from egocentric context.",
       "current_limit": "Unseen future labels dominate the single-episode chronological test.",
       "metric": {
         "key": "macro_f1",
         "name": "macro-F1",
@@ -221,6 +264,8 @@
       },
       "why": "Directly predicts human hand motion and supports hand-object interaction modeling.",
       "current_limit": "Forecasting is window-level and not yet a full sequence or policy model.",
       "metric": {
         "key": "mpjpe",
         "name": "MPJPE",
@@ -242,6 +287,8 @@
       },
       "why": "Targets physical interaction state, a core affordance and manipulation signal.",
       "current_limit": "The public sample is degenerate for this target because one class dominates.",
       "metric": {
         "key": "macro_f1",
         "name": "macro-F1",
@@ -264,6 +311,8 @@
       },
       "why": "Connects egocentric activity to manipulated objects and early object-centric state.",
       "current_limit": "Object labels are language-derived and sparse in one episode.",
       "metric": {
         "key": "micro_f1",
         "name": "micro-F1",
@@ -285,6 +334,8 @@
       },
       "why": "Grounds language annotation into egocentric sensor time and task state.",
       "current_limit": "Bag-of-objects language features are too weak for rich grounding.",
       "metric": {
         "key": "mrr",
         "name": "MRR",
@@ -307,6 +358,8 @@
       },
       "why": "Tests whether synchronized modalities identify the same 4D moment, a prerequisite for reconstruction and world modeling.",
       "current_limit": "Retrieval shows an alignment signal, not geometric reconstruction.",
       "metric": {
         "key": "mrr",
         "name": "MRR",
@@ -328,6 +381,8 @@
       },
       "why": "Predicts visual/depth state from non-target sensors as a weak reconstruction/world-model objective.",
       "current_limit": "Feature-vector reconstruction is not pixel, depth-map, mesh, NeRF, or Gaussian reconstruction.",
       "metric": {
         "key": "r2",
         "name": "R2",
@@ -349,6 +404,8 @@
       },
       "why": "Checks whether features encode local time direction and task progression.",
       "current_limit": "Only local adjacent ordering, not long-horizon causal modeling.",
       "metric": {
         "key": "f1",
         "name": "F1",
@@ -371,6 +428,8 @@
       },
       "why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models.",
       "current_limit": "Synthetic shifts diagnose alignment but do not solve calibration or mapping.",
       "metric": {
         "key": "f1",
         "name": "F1",

         "contact_prediction",
         "object_relevance"
       ],
+      "task_display_names": [
+        "Action Recognition",
+        "Hand Trajectory Forecasting",
+        "Contact State Prediction",
+        "Object Relevance Prediction"
+      ],
       "counts": {
         "direct": 2,
         "proxy": 2,
         "modality_reconstruction",
         "misalignment_detection"
       ],
+      "task_display_names": [
+        "Cross-Modal Retrieval",
+        "Cross-Modal Reconstruction",
+        "Multimodal Synchronization Detection"
+      ],
       "counts": {
         "direct": 0,
         "proxy": 2,
         "temporal_order",
         "misalignment_detection"
       ],
+      "task_display_names": [
+        "Action Recognition",
+        "Procedure Step Recognition",
+        "Action Boundary Detection",
+        "Next-Action Prediction",
+        "Hand Trajectory Forecasting",
+        "Contact State Prediction",
+        "Object Relevance Prediction",
+        "Language Grounding",
+        "Cross-Modal Retrieval",
+        "Temporal Order Verification",
+        "Multimodal Synchronization Detection"
+      ],
       "counts": {
         "direct": 6,
         "proxy": 2,
         "temporal_order",
         "misalignment_detection"
       ],
+      "task_display_names": [
+        "Procedure Step Recognition",
+        "Action Boundary Detection",
+        "Next-Action Prediction",
+        "Object Relevance Prediction",
+        "Language Grounding",
+        "Cross-Modal Retrieval",
+        "Cross-Modal Reconstruction",
+        "Temporal Order Verification",
+        "Multimodal Synchronization Detection"
+      ],
       "counts": {
         "direct": 0,
         "proxy": 6,
       },
       "why": "Reads egocentric sensor state as the current human action; also provides a weak human-motion readout.",
       "current_limit": "Chronological single-episode split creates unseen future action classes.",
+      "display_name": "Action Recognition",
+      "artifact_id": "timeline_action",
       "metric": {
         "key": "macro_f1",
         "name": "macro-F1",
       },
       "why": "Segments egocentric task state and provides a first proxy for symbolic world/task state.",
       "current_limit": "Single-episode ordering makes future subtasks hard to generalize.",
+      "display_name": "Procedure Step Recognition",
+      "artifact_id": "timeline_subtask",
       "metric": {
         "key": "macro_f1",
         "name": "macro-F1",
       },
       "why": "Localizes egocentric task boundaries and diagnoses temporal state changes.",
       "current_limit": "Boundary class is sparse, so accuracy alone is misleading.",
+      "display_name": "Action Boundary Detection",
+      "artifact_id": "transition_detection",
       "metric": {
         "key": "macro_f1",
         "name": "macro-F1",
       },
       "why": "Tests action intention/task-flow prediction from egocentric context.",
       "current_limit": "Unseen future labels dominate the single-episode chronological test.",
+      "display_name": "Next-Action Prediction",
+      "artifact_id": "next_action",
       "metric": {
         "key": "macro_f1",
         "name": "macro-F1",
       },
       "why": "Directly predicts human hand motion and supports hand-object interaction modeling.",
       "current_limit": "Forecasting is window-level and not yet a full sequence or policy model.",
+      "display_name": "Hand Trajectory Forecasting",
+      "artifact_id": "hand_trajectory_forecast",
       "metric": {
         "key": "mpjpe",
         "name": "MPJPE",
       },
       "why": "Targets physical interaction state, a core affordance and manipulation signal.",
       "current_limit": "The public sample is degenerate for this target because one class dominates.",
+      "display_name": "Contact State Prediction",
+      "artifact_id": "contact_prediction",
       "metric": {
         "key": "macro_f1",
         "name": "macro-F1",
       },
       "why": "Connects egocentric activity to manipulated objects and early object-centric state.",
       "current_limit": "Object labels are language-derived and sparse in one episode.",
+      "display_name": "Object Relevance Prediction",
+      "artifact_id": "object_relevance",
       "metric": {
         "key": "micro_f1",
         "name": "micro-F1",
       },
       "why": "Grounds language annotation into egocentric sensor time and task state.",
       "current_limit": "Bag-of-objects language features are too weak for rich grounding.",
+      "display_name": "Language Grounding",
+      "artifact_id": "caption_grounding",
       "metric": {
         "key": "mrr",
         "name": "MRR",
       },
       "why": "Tests whether synchronized modalities identify the same 4D moment, a prerequisite for reconstruction and world modeling.",
       "current_limit": "Retrieval shows an alignment signal, not geometric reconstruction.",
+      "display_name": "Cross-Modal Retrieval",
+      "artifact_id": "cross_modal_retrieval",
       "metric": {
         "key": "mrr",
         "name": "MRR",
       },
       "why": "Predicts visual/depth state from non-target sensors as a weak reconstruction/world-model objective.",
       "current_limit": "Feature-vector reconstruction is not pixel, depth-map, mesh, NeRF, or Gaussian reconstruction.",
+      "display_name": "Cross-Modal Reconstruction",
+      "artifact_id": "modality_reconstruction",
       "metric": {
         "key": "r2",
         "name": "R2",
       },
       "why": "Checks whether features encode local time direction and task progression.",
       "current_limit": "Only local adjacent ordering, not long-horizon causal modeling.",
+      "display_name": "Temporal Order Verification",
+      "artifact_id": "temporal_order",
       "metric": {
         "key": "f1",
         "name": "F1",
       },
       "why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models.",
       "current_limit": "Synthetic shifts diagnose alignment but do not solve calibration or mapping.",
+      "display_name": "Multimodal Synchronization Detection",
+      "artifact_id": "misalignment_detection",
       "metric": {
         "key": "f1",
         "name": "F1",

metrics/research_roadmap.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Research Roadmap",
-  "summary": "Staged path from the public-sample task lab to a verified validation-aware Qwen3-Omni diagnostic pilot, structured-output improvement pass, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
-  "current_decision_point": "Keep the public-sample task suite as the development harness, use the verified selected-episode Qwen3-Omni validation-aware diagnostic pilot and the same-split 128-episode simple/NN metadata baselines as the first cross-episode references, improve structured-output reliability and task-quality error analysis, then branch into Cosmos 3 world modeling and policy-model experiments after their targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
   "additional_development_directions": {
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
     "source_json": "docs/data/additional_development_directions.json",
@@ -52,7 +52,7 @@
     },
     {
       "id": "qwen3_omni_lora_diagnostic_pilot",
-      "name": "Qwen3-Omni LoRA Validation-Aware Diagnostic Pilot",
       "status": "verified_baseline",
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
       "deliverables": [
@@ -63,7 +63,8 @@
         "held-out predictions",
         "metrics",
         "confusion matrices",
-        "run report"
       ],
       "completion_evidence": [
         "docs/data/omni_finetune_verified_result.json",
@@ -75,7 +76,7 @@
         "predictions.jsonl",
         "RUN_REPORT.md"
       ],
-      "reader_takeaway": "The first omni-model pilot establishes the full held-out training/validation/evaluation loop, but the weak metrics make it a diagnostic baseline."
     },
     {
       "id": "multi_episode_128_same_split_baselines",
@@ -97,23 +98,23 @@
     },
     {
       "id": "qwen3_omni_structured_output_error_analysis",
-      "name": "Structured-Output And Error-Analysis Pass",
       "status": "active_next_step",
-      "entry_condition": "The validation-aware diagnostic package exists and shows weak held-out quality.",
       "deliverables": [
         "same 96/16/16 episode split",
-        "stricter JSON decoding or target formatting",
-        "episode/action/object error analysis",
         "held-out test evaluation",
-        "comparison to the verified validation-aware baseline"
       ],
       "completion_evidence": [
-        "quality-target report",
         "error-analysis tables",
-        "held-out metrics",
         "verified public-safe package"
       ],
-      "reader_takeaway": "The next pass should improve output reliability and task metrics before larger model-quality claims."
     },
     {
       "id": "foundation_model_selection_matrix",

 {
   "title": "Ropedia Xperience-10M Research Roadmap",
+  "summary": "Staged path from the public-sample task lab to a final verified Qwen3-Omni diagnostic result, same-split 128-episode baseline alignment, action/subtask error analysis, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
+  "current_decision_point": "Keep the public-sample task suite as the development harness, use the final verified selected-episode Qwen3-Omni diagnostic result and the same-split 128-episode simple/NN metadata baselines as the first cross-episode references, improve action/subtask quality through error analysis, then branch into Cosmos 3 world modeling and policy-model experiments after their targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
   "additional_development_directions": {
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
     "source_json": "docs/data/additional_development_directions.json",
     },
     {
       "id": "qwen3_omni_lora_diagnostic_pilot",
+      "name": "Qwen3-Omni LoRA Final Diagnostic Result",
       "status": "verified_baseline",
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
       "deliverables": [
         "held-out predictions",
         "metrics",
         "confusion matrices",
+        "run report",
+        "public LoRA adapter repo"
       ],
       "completion_evidence": [
         "docs/data/omni_finetune_verified_result.json",
         "predictions.jsonl",
         "RUN_REPORT.md"
       ],
+      "reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline."
     },
     {
       "id": "multi_episode_128_same_split_baselines",
     },
     {
       "id": "qwen3_omni_structured_output_error_analysis",
+      "name": "Action/Subtask Error-Analysis Pass",
       "status": "active_next_step",
+      "entry_condition": "The final diagnostic package meets strict JSON validity but has weak action/subtask held-out quality.",
       "deliverables": [
         "same 96/16/16 episode split",
+        "action/subtask confusion analysis",
+        "unseen-label analysis",
+        "object/action family breakdowns",
         "held-out test evaluation",
+        "comparison to the final verified Qwen baseline"
       ],
       "completion_evidence": [
         "error-analysis tables",
+        "held-out metrics by failure type",
         "verified public-safe package"
       ],
+      "reader_takeaway": "The next pass should improve action/subtask quality before larger model-quality claims."
     },
     {
       "id": "foundation_model_selection_matrix",

metrics/research_roadmap_interactive.json CHANGED Viewed

@@ -2035,7 +2035,7 @@
         "step": 1
       },
       {
-        "action": "Run validation-aware Qwen3-Omni LoRA to improve the verified diagnostic baseline.",
         "name": "First held-out baseline",
         "step": 2
       },
@@ -2222,7 +2222,7 @@
     ],
     "status": "planning_artifact"
   },
-  "generated_at_utc": "2026-06-06T13:49:32+00:00",
   "omni_plan": {
     "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
     "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
@@ -2303,33 +2303,53 @@
         "held-out predictions",
         "metrics",
         "confusion matrices",
-        "run report"
       ],
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
       "id": "qwen3_omni_lora_diagnostic_pilot",
-      "name": "Qwen3-Omni LoRA Validation-Aware Diagnostic Pilot",
-      "reader_takeaway": "The first omni-model pilot establishes the full held-out training/validation/evaluation loop, but the weak metrics make it a diagnostic baseline.",
       "stage": "future",
       "status": "verified_baseline"
     },
     {
       "completion_evidence": [
-        "quality-target report",
         "error-analysis tables",
-        "held-out metrics",
         "verified public-safe package"
       ],
       "deliverables": [
         "same 96/16/16 episode split",
-        "stricter JSON decoding or target formatting",
-        "episode/action/object error analysis",
         "held-out test evaluation",
-        "comparison to the verified validation-aware baseline"
       ],
-      "entry_condition": "The validation-aware diagnostic package exists and shows weak held-out quality.",
       "id": "qwen3_omni_structured_output_error_analysis",
-      "name": "Structured-Output And Error-Analysis Pass",
-      "reader_takeaway": "The next pass should improve output reliability and task metrics before larger model-quality claims.",
       "stage": "future",
       "status": "active_next_step"
     },
@@ -2428,7 +2448,7 @@
       "visualization.rrd"
     ],
     "selection_strategy": "stratified_round_robin_by_top_level_session",
-    "status": "verified_validation_aware_diagnostic_pilot",
     "target_episodes": 128,
     "valid_candidates": 12102
   },

         "step": 1
       },
       {
+        "action": "Run Qwen3-Omni action/subtask error analysis and targeted reruns to improve the verified diagnostic baseline.",
         "name": "First held-out baseline",
         "step": 2
       },
     ],
     "status": "planning_artifact"
   },
+  "generated_at_utc": "2026-06-06T23:26:13+00:00",
   "omni_plan": {
     "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
     "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
         "held-out predictions",
         "metrics",
         "confusion matrices",
+        "run report",
+        "public LoRA adapter repo"
       ],
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
       "id": "qwen3_omni_lora_diagnostic_pilot",
+      "name": "Qwen3-Omni LoRA Final Diagnostic Result",
+      "reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline.",
       "stage": "future",
       "status": "verified_baseline"
     },
     {
       "completion_evidence": [
+        "results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
+        "results/omni_finetune/multi_episode_128_task_baselines/summary_report.json",
+        "scripts/omni/run_128_task_baselines.py"
+      ],
+      "deliverables": [
+        "same 12 task ids",
+        "simple metadata/text baselines",
+        "neural MLP baselines for JSON-supported labels",
+        "explicit unsupported markers for raw-feature-only tasks"
+      ],
+      "entry_condition": "Derived Qwen JSONL export for the selected 96/16/16 split.",
+      "id": "multi_episode_128_same_split_baselines",
+      "name": "128-Episode Same-Split Simple/NN Baselines",
+      "reader_takeaway": "The simple and neural baseline framing is now aligned to the selected 128-episode setup; trajectory, retrieval, reconstruction, and misalignment variants still need raw 128 feature blocks for exact feature-level reproduction.",
+      "stage": "future",
+      "status": "verified_companion_result"
+    },
+    {
+      "completion_evidence": [
         "error-analysis tables",
+        "held-out metrics by failure type",
         "verified public-safe package"
       ],
       "deliverables": [
         "same 96/16/16 episode split",
+        "action/subtask confusion analysis",
+        "unseen-label analysis",
+        "object/action family breakdowns",
         "held-out test evaluation",
+        "comparison to the final verified Qwen baseline"
       ],
+      "entry_condition": "The final diagnostic package meets strict JSON validity but has weak action/subtask held-out quality.",
       "id": "qwen3_omni_structured_output_error_analysis",
+      "name": "Action/Subtask Error-Analysis Pass",
+      "reader_takeaway": "The next pass should improve action/subtask quality before larger model-quality claims.",
       "stage": "future",
       "status": "active_next_step"
     },
       "visualization.rrd"
     ],
     "selection_strategy": "stratified_round_robin_by_top_level_session",
+    "status": "verified_full_128_episode_diagnostic_result",
     "target_episodes": 128,
     "valid_candidates": 12102
   },

metrics/research_takeaways.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
-  "generated_at_utc": "2026-06-06T13:49:32+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
@@ -166,7 +166,7 @@
     {
       "id": "scale_requires_episodes",
       "title": "The next scientific unit is held-out episodes, not more adjacent windows",
-      "readout": "The selected Qwen3-Omni path now has a verified validation-aware held-out diagnostic pilot. It proves the cross-episode train/validation/eval loop, but the weak metrics show that structured-output reliability and task-quality error analysis are the next modeling problems.",
       "evidence": [
         {
           "label": "selected_episodes",
@@ -174,19 +174,19 @@
         },
         {
           "label": "held_out_test_windows",
-          "value": 448
         },
         {
           "label": "json_validity_rate",
-          "value": 0.875
         },
         {
           "label": "action_macro_f1",
-          "value": 0.0026621494447581404
         }
       ],
       "source": "docs/data/omni_finetune_verified_result.json",
-      "current_scope": "The selected-episode Qwen3-Omni validation-aware diagnostic pilot is verified, but held-out quality is still weak and JSON validity remains below the 98% target."
     }
   ]
 }

 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
+  "generated_at_utc": "2026-06-06T23:26:13+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
     {
       "id": "scale_requires_episodes",
       "title": "The next scientific unit is held-out episodes, not more adjacent windows",
+      "readout": "The selected Qwen3-Omni path now has a verified two-epoch held-out diagnostic result. It proves the cross-episode train/validation/eval loop and meets the strict-JSON target, while weak action/subtask metrics remain the next modeling problem.",
       "evidence": [
         {
           "label": "selected_episodes",
         },
         {
           "label": "held_out_test_windows",
+          "value": null
         },
         {
           "label": "json_validity_rate",
+          "value": null
         },
         {
           "label": "action_macro_f1",
+          "value": null
         }
       ],
       "source": "docs/data/omni_finetune_verified_result.json",
+      "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
     }
   ]
 }

metrics/scope_claims_audit.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-06T17:43:55+00:00",
   "summary": {
     "qwen3_omni_verified_diagnostic_pilot": true,
     "dataset_manifest_num_episodes": 119,
     "dataset_manifest_num_samples": 3808,
     "training_metadata_num_train_samples": 2848,
     "eval_num_samples": 448,
-    "eval_json_validity_rate": 0.875,
-    "quality_target_met": false,
-    "historical_identifier_count": 132,
     "public_32_episode_status_file_count": 1,
     "failure_count": 0
   },
@@ -25,7 +25,7 @@
     {
       "name": "summary_metrics_preserves_verified_diagnostic_status",
       "status": "pass",
-      "detail": "The selected-episode Qwen3-Omni validation-aware diagnostic pilot is verified, but held-out quality is still weak and JSON validity remains below the 98% target.",
       "evidence": [
         "docs/data/summary_metrics.json"
       ]
@@ -35,7 +35,7 @@
       "status": "pass",
       "detail": "episodes=119, samples=3808, split_counts={'train': 2848, 'val': 512, 'test': 448}",
       "evidence": [
-        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/dataset/dataset_manifest.json"
       ]
     },
     {
@@ -43,15 +43,15 @@
       "status": "pass",
       "detail": "train=2848, val=512, processes=8",
       "evidence": [
-        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/training/training_metadata.json"
       ]
     },
     {
       "name": "verified_package_eval_records_real_held_out_metrics",
       "status": "pass",
-      "detail": "samples=448, split=test, held_out=14, json_validity=0.875",
       "evidence": [
-        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/metrics.json"
       ]
     },
     {
@@ -59,7 +59,7 @@
       "status": "pass",
       "detail": "audit_status=pass, issues=0",
       "evidence": [
-        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/package_audit.json"
       ]
     },
     {
@@ -84,7 +84,7 @@
     {
       "name": "historical_32ep_identifiers_are_confined_to_readiness_artifacts",
       "status": "pass",
-      "detail": "historical identifiers found in result provenance files=132",
       "evidence": [
         "results/omni_finetune/"
       ]
@@ -97,16 +97,6 @@
     }
   ],
   "historical_identifiers": [
-    {
-      "classification": "historical_identifier_in_readiness_artifact",
-      "path": "results/omni_finetune/HF_UPLOAD.md",
-      "line": 5,
-      "patterns": [
-        "qwen3_omni_32ep",
-        "xperience10m_qwen3_omni_32ep"
-      ],
-      "example": "- `results/omni_finetune/adapter_lora/` (`xperience10m_qwen3_omni_32ep_lora`)"
-    },
     {
       "classification": "historical_identifier_in_readiness_artifact",
       "path": "results/omni_finetune/XPERIENCE10M_128_DATA_PREPARATION_AND_FINETUNE_PLAN.md",
@@ -421,8 +411,19 @@
         "ropedia-episode-task-suite"
       ],
       "example": "{\"id\": \"xperience-10m-sample:qa:52\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1040, \"end_frame\": 1059, \"num_frames\": 20}, \"media\": {\"video_path"
     }
   ],
-  "historical_identifier_total_count": 132,
   "failures": []
 }

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-06T23:27:07+00:00",
   "summary": {
     "qwen3_omni_verified_diagnostic_pilot": true,
     "dataset_manifest_num_episodes": 119,
     "dataset_manifest_num_samples": 3808,
     "training_metadata_num_train_samples": 2848,
     "eval_num_samples": 448,
+    "eval_json_validity_rate": 0.9977678571428571,
+    "quality_target_met": true,
+    "historical_identifier_count": 131,
     "public_32_episode_status_file_count": 1,
     "failure_count": 0
   },
     {
       "name": "summary_metrics_preserves_verified_diagnostic_status",
       "status": "pass",
+      "detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims.",
       "evidence": [
         "docs/data/summary_metrics.json"
       ]
       "status": "pass",
       "detail": "episodes=119, samples=3808, split_counts={'train': 2848, 'val': 512, 'test': 448}",
       "evidence": [
+        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/dataset/dataset_manifest.json"
       ]
     },
     {
       "status": "pass",
       "detail": "train=2848, val=512, processes=8",
       "evidence": [
+        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/training/training_metadata.json"
       ]
     },
     {
       "name": "verified_package_eval_records_real_held_out_metrics",
       "status": "pass",
+      "detail": "samples=448, split=test, held_out=14, json_validity=0.9977678571428571",
       "evidence": [
+        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/eval/metrics.json"
       ]
     },
     {
       "status": "pass",
       "detail": "audit_status=pass, issues=0",
       "evidence": [
+        "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/package_audit.json"
       ]
     },
     {
     {
       "name": "historical_32ep_identifiers_are_confined_to_readiness_artifacts",
       "status": "pass",
+      "detail": "historical identifiers found in result provenance files=131",
       "evidence": [
         "results/omni_finetune/"
       ]
     }
   ],
   "historical_identifiers": [
     {
       "classification": "historical_identifier_in_readiness_artifact",
       "path": "results/omni_finetune/XPERIENCE10M_128_DATA_PREPARATION_AND_FINETUNE_PLAN.md",
         "ropedia-episode-task-suite"
       ],
       "example": "{\"id\": \"xperience-10m-sample:qa:52\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1040, \"end_frame\": 1059, \"num_frames\": 20}, \"media\": {\"video_path"
+    },
+    {
+      "classification": "historical_identifier_in_readiness_artifact",
+      "path": "results/omni_finetune/dataset.jsonl",
+      "line": 28,
+      "patterns": [
+        "qwen3_omni_32ep",
+        "xperience10m_qwen3_omni_32ep",
+        "ropedia-episode-task-suite"
+      ],
+      "example": "{\"id\": \"xperience-10m-sample:qa:53\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1060, \"end_frame\": 1079, \"num_frames\": 20}, \"media\": {\"video_path"
     }
   ],
+  "historical_identifier_total_count": 131,
   "failures": []
 }

metrics/single_episode_explorer.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "meta": {
-    "generated_at": "2026-06-03T12:47:16.188806+00:00",
     "window_count": 1161,
     "feature_dim": 8546,
     "object_label_rows": 1161,
@@ -16,12 +16,26 @@
     }
   },
   "tasks": {
-    "timeline_action": "Current Action Recognition",
-    "timeline_subtask": "Current Subtask Recognition",
-    "transition_detection": "Action Transition Detection",
     "next_action": "Next-Action Prediction",
     "contact_prediction": "Contact State Prediction",
-    "object_relevance": "Relevant Object Prediction"
   },
   "feature_blocks": [
     {
@@ -138,7 +152,7 @@
     },
     {
       "name": "audio_fisheye_cam0_aac",
-      "display": "Audio AAC",
       "modality": "audio",
       "start": 7343,
       "end": 7511,
@@ -163958,6 +163972,8 @@
   "ablation": {
     "best_by_task": {
       "caption_grounding": {
         "best": {
           "modality_group": "language",
           "modality_display": "Language",
@@ -163973,6 +163989,8 @@
         }
       },
       "contact_prediction": {
         "best": {
           "modality_group": "all_features",
           "modality_display": "All Features",
@@ -163988,6 +164006,8 @@
         }
       },
       "cross_modal_retrieval": {
         "best": {
           "modality_group": "all_features",
           "modality_display": "All Features",
@@ -164003,6 +164023,8 @@
         }
       },
       "hand_trajectory_forecast": {
         "best": {
           "modality_group": "inertial",
           "modality_display": "Inertial",
@@ -164018,6 +164040,8 @@
         }
       },
       "misalignment_detection": {
         "best": {
           "modality_group": "audio",
           "modality_display": "Audio",
@@ -164033,6 +164057,8 @@
         }
       },
       "modality_reconstruction": {
         "best": {
           "modality_group": "video",
           "modality_display": "Video",
@@ -164048,6 +164074,8 @@
         }
       },
       "next_action": {
         "best": {
           "modality_group": "language",
           "modality_display": "Language",
@@ -164063,6 +164091,8 @@
         }
       },
       "object_relevance": {
         "best": {
           "modality_group": "language",
           "modality_display": "Language",
@@ -164078,6 +164108,8 @@
         }
       },
       "temporal_order": {
         "best": {
           "modality_group": "pose_slam",
           "modality_display": "Pose + SLAM",
@@ -164093,6 +164125,8 @@
         }
       },
       "timeline_action": {
         "best": {
           "modality_group": "language",
           "modality_display": "Language",
@@ -164108,6 +164142,8 @@
         }
       },
       "timeline_subtask": {
         "best": {
           "modality_group": "language",
           "modality_display": "Language",
@@ -164123,6 +164159,8 @@
         }
       },
       "transition_detection": {
         "best": {
           "modality_group": "language",
           "modality_display": "Language",
@@ -164173,7 +164211,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_action",
@@ -164209,7 +164248,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_action",
@@ -164245,7 +164285,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_action",
@@ -164281,7 +164322,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_action",
@@ -164317,7 +164359,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_action",
@@ -164353,7 +164396,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_action",
@@ -164389,7 +164433,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_action",
@@ -164425,7 +164470,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_action",
@@ -164461,7 +164507,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_subtask",
@@ -164497,7 +164544,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_subtask",
@@ -164533,7 +164581,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_subtask",
@@ -164569,7 +164618,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_subtask",
@@ -164605,7 +164655,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_subtask",
@@ -164641,7 +164692,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_subtask",
@@ -164677,7 +164729,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_subtask",
@@ -164713,7 +164766,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_subtask",
@@ -164749,7 +164803,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "timeline_subtask",
@@ -164785,7 +164840,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "transition_detection",
@@ -164821,7 +164877,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "transition_detection",
@@ -164857,7 +164914,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "transition_detection",
@@ -164893,7 +164951,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "transition_detection",
@@ -164929,7 +164988,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "transition_detection",
@@ -164965,7 +165025,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "transition_detection",
@@ -165001,7 +165062,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "transition_detection",
@@ -165037,7 +165099,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "transition_detection",
@@ -165073,7 +165136,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "transition_detection",
@@ -165109,7 +165173,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "next_action",
@@ -165145,7 +165210,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "next_action",
@@ -165181,7 +165247,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "next_action",
@@ -165217,7 +165284,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "next_action",
@@ -165253,7 +165321,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "next_action",
@@ -165289,7 +165358,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "next_action",
@@ -165325,7 +165395,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "next_action",
@@ -165361,7 +165432,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "next_action",
@@ -165397,7 +165469,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "next_action",
@@ -165433,7 +165506,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "hand_trajectory_forecast",
@@ -165469,7 +165543,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "hand_trajectory_forecast",
@@ -165505,7 +165580,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "hand_trajectory_forecast",
@@ -165541,7 +165617,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "hand_trajectory_forecast",
@@ -165577,7 +165654,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "hand_trajectory_forecast",
@@ -165613,7 +165691,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "hand_trajectory_forecast",
@@ -165649,7 +165728,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "hand_trajectory_forecast",
@@ -165685,7 +165765,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "hand_trajectory_forecast",
@@ -165721,7 +165802,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "hand_trajectory_forecast",
@@ -165757,7 +165839,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "contact_prediction",
@@ -165793,7 +165876,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "contact_prediction",
@@ -165829,7 +165913,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "contact_prediction",
@@ -165865,7 +165950,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "contact_prediction",
@@ -165901,7 +165987,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "contact_prediction",
@@ -165937,7 +166024,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "contact_prediction",
@@ -165973,7 +166061,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "contact_prediction",
@@ -166009,7 +166098,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "contact_prediction",
@@ -166045,7 +166135,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "contact_prediction",
@@ -166081,7 +166172,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "object_relevance",
@@ -166117,7 +166209,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "object_relevance",
@@ -166153,7 +166246,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "object_relevance",
@@ -166189,7 +166283,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "object_relevance",
@@ -166225,7 +166320,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "object_relevance",
@@ -166261,7 +166357,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "object_relevance",
@@ -166297,7 +166394,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "object_relevance",
@@ -166333,7 +166431,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "object_relevance",
@@ -166369,7 +166468,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "object_relevance",
@@ -166405,7 +166505,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "caption_grounding",
@@ -166441,7 +166542,8 @@
         "top10_accuracy": "0.4454022988505747",
         "median_rank": "13.0",
         "mean_rank": "23.19827651977539",
-        "num_queries": "348"
       },
       {
         "task": "caption_grounding",
@@ -166477,7 +166579,8 @@
         "top10_accuracy": "0.034482758620689655",
         "median_rank": "162.0",
         "mean_rank": "161.4770050048828",
-        "num_queries": "348"
       },
       {
         "task": "caption_grounding",
@@ -166513,7 +166616,8 @@
         "top10_accuracy": "0.03735632183908046",
         "median_rank": "114.0",
         "mean_rank": "137.90805053710938",
-        "num_queries": "348"
       },
       {
         "task": "caption_grounding",
@@ -166549,7 +166653,8 @@
         "top10_accuracy": "0.04597701149425287",
         "median_rank": "143.5",
         "mean_rank": "155.4712677001953",
-        "num_queries": "348"
       },
       {
         "task": "caption_grounding",
@@ -166585,7 +166690,8 @@
         "top10_accuracy": "0.04885057471264368",
         "median_rank": "110.5",
         "mean_rank": "130.32470703125",
-        "num_queries": "348"
       },
       {
         "task": "caption_grounding",
@@ -166621,7 +166727,8 @@
         "top10_accuracy": "0.04597701149425287",
         "median_rank": "123.0",
         "mean_rank": "138.61207580566406",
-        "num_queries": "348"
       },
       {
         "task": "caption_grounding",
@@ -166657,7 +166764,8 @@
         "top10_accuracy": "0.07758620689655173",
         "median_rank": "141.0",
         "mean_rank": "152.14942932128906",
-        "num_queries": "348"
       },
       {
         "task": "caption_grounding",
@@ -166693,7 +166801,8 @@
         "top10_accuracy": "0.47126436781609193",
         "median_rank": "12.0",
         "mean_rank": "15.106322288513184",
-        "num_queries": "348"
       },
       {
         "task": "caption_grounding",
@@ -166729,7 +166838,8 @@
         "top10_accuracy": "0.06896551724137931",
         "median_rank": "132.0",
         "mean_rank": "137.30746459960938",
-        "num_queries": "348"
       },
       {
         "task": "cross_modal_retrieval",
@@ -166765,7 +166875,8 @@
         "top10_accuracy": "0.9798850574712644",
         "median_rank": "1.0",
         "mean_rank": "2.0862069129943848",
-        "num_queries": "348"
       },
       {
         "task": "cross_modal_retrieval",
@@ -166801,7 +166912,8 @@
         "top10_accuracy": "0.9798850574712644",
         "median_rank": "1.0",
         "mean_rank": "3.844827651977539",
-        "num_queries": "348"
       },
       {
         "task": "cross_modal_retrieval",
@@ -166837,7 +166949,8 @@
         "top10_accuracy": "0.8620689655172413",
         "median_rank": "1.0",
         "mean_rank": "5.729885101318359",
-        "num_queries": "348"
       },
       {
         "task": "cross_modal_retrieval",
@@ -166873,7 +166986,8 @@
         "top10_accuracy": "0.6551724137931034",
         "median_rank": "4.0",
         "mean_rank": "15.623562812805176",
-        "num_queries": "348"
       },
       {
         "task": "cross_modal_retrieval",
@@ -166909,7 +167023,8 @@
         "top10_accuracy": "0.3994252873563218",
         "median_rank": "21.5",
         "mean_rank": "49.181034088134766",
-        "num_queries": "348"
       },
       {
         "task": "cross_modal_retrieval",
@@ -166945,7 +167060,8 @@
         "top10_accuracy": "0.5229885057471264",
         "median_rank": "10.0",
         "mean_rank": "20.577587127685547",
-        "num_queries": "348"
       },
       {
         "task": "cross_modal_retrieval",
@@ -166981,7 +167097,8 @@
         "top10_accuracy": "0.031609195402298854",
         "median_rank": "152.5",
         "mean_rank": "161.44540405273438",
-        "num_queries": "348"
       },
       {
         "task": "cross_modal_retrieval",
@@ -167017,7 +167134,8 @@
         "top10_accuracy": "0.05747126436781609",
         "median_rank": "138.0",
         "mean_rank": "146.83045959472656",
-        "num_queries": "348"
       },
       {
         "task": "cross_modal_retrieval",
@@ -167053,7 +167171,8 @@
         "top10_accuracy": "0.9770114942528736",
         "median_rank": "1.0",
         "mean_rank": "2.181034564971924",
-        "num_queries": "348"
       },
       {
         "task": "modality_reconstruction",
@@ -167089,7 +167208,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "modality_reconstruction",
@@ -167125,7 +167245,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "modality_reconstruction",
@@ -167161,7 +167282,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "modality_reconstruction",
@@ -167197,7 +167319,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "modality_reconstruction",
@@ -167233,7 +167356,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "modality_reconstruction",
@@ -167269,7 +167393,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "modality_reconstruction",
@@ -167305,7 +167430,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "modality_reconstruction",
@@ -167341,7 +167467,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "modality_reconstruction",
@@ -167377,7 +167504,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "temporal_order",
@@ -167413,7 +167541,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "temporal_order",
@@ -167449,7 +167578,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "temporal_order",
@@ -167485,7 +167615,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "temporal_order",
@@ -167521,7 +167652,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "temporal_order",
@@ -167557,7 +167689,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "temporal_order",
@@ -167593,7 +167726,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "temporal_order",
@@ -167629,7 +167763,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "temporal_order",
@@ -167665,7 +167800,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "temporal_order",
@@ -167701,7 +167837,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "misalignment_detection",
@@ -167737,7 +167874,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "misalignment_detection",
@@ -167773,7 +167911,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "misalignment_detection",
@@ -167809,7 +167948,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "misalignment_detection",
@@ -167845,7 +167985,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "misalignment_detection",
@@ -167881,7 +168022,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "misalignment_detection",
@@ -167917,7 +168059,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "misalignment_detection",
@@ -167953,7 +168096,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "misalignment_detection",
@@ -167989,7 +168133,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       },
       {
         "task": "misalignment_detection",
@@ -168025,7 +168170,8 @@
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
-        "num_queries": ""
       }
     ]
   },
@@ -168841,4 +168987,4 @@
       "num_queries": "308"
     }
   ]
-}

 {
   "meta": {
+    "generated_at": "2026-06-06T21:22:14.639673+00:00",
     "window_count": 1161,
     "feature_dim": 8546,
     "object_label_rows": 1161,
     }
   },
   "tasks": {
+    "timeline_action": "Action Recognition",
+    "timeline_subtask": "Procedure Step Recognition",
+    "transition_detection": "Action Boundary Detection",
     "next_action": "Next-Action Prediction",
     "contact_prediction": "Contact State Prediction",
+    "object_relevance": "Object Relevance Prediction"
+  },
+  "task_display_names": {
+    "timeline_action": "Action Recognition",
+    "timeline_subtask": "Procedure Step Recognition",
+    "transition_detection": "Action Boundary Detection",
+    "next_action": "Next-Action Prediction",
+    "hand_trajectory_forecast": "Hand Trajectory Forecasting",
+    "contact_prediction": "Contact State Prediction",
+    "object_relevance": "Object Relevance Prediction",
+    "caption_grounding": "Language Grounding",
+    "cross_modal_retrieval": "Cross-Modal Retrieval",
+    "modality_reconstruction": "Cross-Modal Reconstruction",
+    "temporal_order": "Temporal Order Verification",
+    "misalignment_detection": "Multimodal Synchronization Detection"
   },
   "feature_blocks": [
     {
     },
     {
       "name": "audio_fisheye_cam0_aac",
+      "display": "Audio",
       "modality": "audio",
       "start": 7343,
       "end": 7511,
   "ablation": {
     "best_by_task": {
       "caption_grounding": {
+        "task": "caption_grounding",
+        "task_display_name": "Language Grounding",
         "best": {
           "modality_group": "language",
           "modality_display": "Language",
         }
       },
       "contact_prediction": {
+        "task": "contact_prediction",
+        "task_display_name": "Contact State Prediction",
         "best": {
           "modality_group": "all_features",
           "modality_display": "All Features",
         }
       },
       "cross_modal_retrieval": {
+        "task": "cross_modal_retrieval",
+        "task_display_name": "Cross-Modal Retrieval",
         "best": {
           "modality_group": "all_features",
           "modality_display": "All Features",
         }
       },
       "hand_trajectory_forecast": {
+        "task": "hand_trajectory_forecast",
+        "task_display_name": "Hand Trajectory Forecasting",
         "best": {
           "modality_group": "inertial",
           "modality_display": "Inertial",
         }
       },
       "misalignment_detection": {
+        "task": "misalignment_detection",
+        "task_display_name": "Multimodal Synchronization Detection",
         "best": {
           "modality_group": "audio",
           "modality_display": "Audio",
         }
       },
       "modality_reconstruction": {
+        "task": "modality_reconstruction",
+        "task_display_name": "Cross-Modal Reconstruction",
         "best": {
           "modality_group": "video",
           "modality_display": "Video",
         }
       },
       "next_action": {
+        "task": "next_action",
+        "task_display_name": "Next-Action Prediction",
         "best": {
           "modality_group": "language",
           "modality_display": "Language",
         }
       },
       "object_relevance": {
+        "task": "object_relevance",
+        "task_display_name": "Object Relevance Prediction",
         "best": {
           "modality_group": "language",
           "modality_display": "Language",
         }
       },
       "temporal_order": {
+        "task": "temporal_order",
+        "task_display_name": "Temporal Order Verification",
         "best": {
           "modality_group": "pose_slam",
           "modality_display": "Pose + SLAM",
         }
       },
       "timeline_action": {
+        "task": "timeline_action",
+        "task_display_name": "Action Recognition",
         "best": {
           "modality_group": "language",
           "modality_display": "Language",
         }
       },
       "timeline_subtask": {
+        "task": "timeline_subtask",
+        "task_display_name": "Procedure Step Recognition",
         "best": {
           "modality_group": "language",
           "modality_display": "Language",
         }
       },
       "transition_detection": {
+        "task": "transition_detection",
+        "task_display_name": "Action Boundary Detection",
         "best": {
           "modality_group": "language",
           "modality_display": "Language",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Recognition"
       },
       {
         "task": "timeline_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Recognition"
       },
       {
         "task": "timeline_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Recognition"
       },
       {
         "task": "timeline_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Recognition"
       },
       {
         "task": "timeline_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Recognition"
       },
       {
         "task": "timeline_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Recognition"
       },
       {
         "task": "timeline_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Recognition"
       },
       {
         "task": "timeline_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Recognition"
       },
       {
         "task": "timeline_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Recognition"
       },
       {
         "task": "timeline_subtask",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Procedure Step Recognition"
       },
       {
         "task": "timeline_subtask",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Procedure Step Recognition"
       },
       {
         "task": "timeline_subtask",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Procedure Step Recognition"
       },
       {
         "task": "timeline_subtask",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Procedure Step Recognition"
       },
       {
         "task": "timeline_subtask",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Procedure Step Recognition"
       },
       {
         "task": "timeline_subtask",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Procedure Step Recognition"
       },
       {
         "task": "timeline_subtask",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Procedure Step Recognition"
       },
       {
         "task": "timeline_subtask",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Procedure Step Recognition"
       },
       {
         "task": "timeline_subtask",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Procedure Step Recognition"
       },
       {
         "task": "transition_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Boundary Detection"
       },
       {
         "task": "transition_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Boundary Detection"
       },
       {
         "task": "transition_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Boundary Detection"
       },
       {
         "task": "transition_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Boundary Detection"
       },
       {
         "task": "transition_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Boundary Detection"
       },
       {
         "task": "transition_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Boundary Detection"
       },
       {
         "task": "transition_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Boundary Detection"
       },
       {
         "task": "transition_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Boundary Detection"
       },
       {
         "task": "transition_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Action Boundary Detection"
       },
       {
         "task": "next_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Next-Action Prediction"
       },
       {
         "task": "next_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Next-Action Prediction"
       },
       {
         "task": "next_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Next-Action Prediction"
       },
       {
         "task": "next_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Next-Action Prediction"
       },
       {
         "task": "next_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Next-Action Prediction"
       },
       {
         "task": "next_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Next-Action Prediction"
       },
       {
         "task": "next_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Next-Action Prediction"
       },
       {
         "task": "next_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Next-Action Prediction"
       },
       {
         "task": "next_action",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Next-Action Prediction"
       },
       {
         "task": "hand_trajectory_forecast",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Hand Trajectory Forecasting"
       },
       {
         "task": "hand_trajectory_forecast",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Hand Trajectory Forecasting"
       },
       {
         "task": "hand_trajectory_forecast",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Hand Trajectory Forecasting"
       },
       {
         "task": "hand_trajectory_forecast",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Hand Trajectory Forecasting"
       },
       {
         "task": "hand_trajectory_forecast",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Hand Trajectory Forecasting"
       },
       {
         "task": "hand_trajectory_forecast",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Hand Trajectory Forecasting"
       },
       {
         "task": "hand_trajectory_forecast",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Hand Trajectory Forecasting"
       },
       {
         "task": "hand_trajectory_forecast",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Hand Trajectory Forecasting"
       },
       {
         "task": "hand_trajectory_forecast",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Hand Trajectory Forecasting"
       },
       {
         "task": "contact_prediction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Contact State Prediction"
       },
       {
         "task": "contact_prediction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Contact State Prediction"
       },
       {
         "task": "contact_prediction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Contact State Prediction"
       },
       {
         "task": "contact_prediction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Contact State Prediction"
       },
       {
         "task": "contact_prediction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Contact State Prediction"
       },
       {
         "task": "contact_prediction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Contact State Prediction"
       },
       {
         "task": "contact_prediction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Contact State Prediction"
       },
       {
         "task": "contact_prediction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Contact State Prediction"
       },
       {
         "task": "contact_prediction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Contact State Prediction"
       },
       {
         "task": "object_relevance",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Object Relevance Prediction"
       },
       {
         "task": "object_relevance",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Object Relevance Prediction"
       },
       {
         "task": "object_relevance",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Object Relevance Prediction"
       },
       {
         "task": "object_relevance",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Object Relevance Prediction"
       },
       {
         "task": "object_relevance",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Object Relevance Prediction"
       },
       {
         "task": "object_relevance",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Object Relevance Prediction"
       },
       {
         "task": "object_relevance",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Object Relevance Prediction"
       },
       {
         "task": "object_relevance",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Object Relevance Prediction"
       },
       {
         "task": "object_relevance",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Object Relevance Prediction"
       },
       {
         "task": "caption_grounding",
         "top10_accuracy": "0.4454022988505747",
         "median_rank": "13.0",
         "mean_rank": "23.19827651977539",
+        "num_queries": "348",
+        "task_display_name": "Language Grounding"
       },
       {
         "task": "caption_grounding",
         "top10_accuracy": "0.034482758620689655",
         "median_rank": "162.0",
         "mean_rank": "161.4770050048828",
+        "num_queries": "348",
+        "task_display_name": "Language Grounding"
       },
       {
         "task": "caption_grounding",
         "top10_accuracy": "0.03735632183908046",
         "median_rank": "114.0",
         "mean_rank": "137.90805053710938",
+        "num_queries": "348",
+        "task_display_name": "Language Grounding"
       },
       {
         "task": "caption_grounding",
         "top10_accuracy": "0.04597701149425287",
         "median_rank": "143.5",
         "mean_rank": "155.4712677001953",
+        "num_queries": "348",
+        "task_display_name": "Language Grounding"
       },
       {
         "task": "caption_grounding",
         "top10_accuracy": "0.04885057471264368",
         "median_rank": "110.5",
         "mean_rank": "130.32470703125",
+        "num_queries": "348",
+        "task_display_name": "Language Grounding"
       },
       {
         "task": "caption_grounding",
         "top10_accuracy": "0.04597701149425287",
         "median_rank": "123.0",
         "mean_rank": "138.61207580566406",
+        "num_queries": "348",
+        "task_display_name": "Language Grounding"
       },
       {
         "task": "caption_grounding",
         "top10_accuracy": "0.07758620689655173",
         "median_rank": "141.0",
         "mean_rank": "152.14942932128906",
+        "num_queries": "348",
+        "task_display_name": "Language Grounding"
       },
       {
         "task": "caption_grounding",
         "top10_accuracy": "0.47126436781609193",
         "median_rank": "12.0",
         "mean_rank": "15.106322288513184",
+        "num_queries": "348",
+        "task_display_name": "Language Grounding"
       },
       {
         "task": "caption_grounding",
         "top10_accuracy": "0.06896551724137931",
         "median_rank": "132.0",
         "mean_rank": "137.30746459960938",
+        "num_queries": "348",
+        "task_display_name": "Language Grounding"
       },
       {
         "task": "cross_modal_retrieval",
         "top10_accuracy": "0.9798850574712644",
         "median_rank": "1.0",
         "mean_rank": "2.0862069129943848",
+        "num_queries": "348",
+        "task_display_name": "Cross-Modal Retrieval"
       },
       {
         "task": "cross_modal_retrieval",
         "top10_accuracy": "0.9798850574712644",
         "median_rank": "1.0",
         "mean_rank": "3.844827651977539",
+        "num_queries": "348",
+        "task_display_name": "Cross-Modal Retrieval"
       },
       {
         "task": "cross_modal_retrieval",
         "top10_accuracy": "0.8620689655172413",
         "median_rank": "1.0",
         "mean_rank": "5.729885101318359",
+        "num_queries": "348",
+        "task_display_name": "Cross-Modal Retrieval"
       },
       {
         "task": "cross_modal_retrieval",
         "top10_accuracy": "0.6551724137931034",
         "median_rank": "4.0",
         "mean_rank": "15.623562812805176",
+        "num_queries": "348",
+        "task_display_name": "Cross-Modal Retrieval"
       },
       {
         "task": "cross_modal_retrieval",
         "top10_accuracy": "0.3994252873563218",
         "median_rank": "21.5",
         "mean_rank": "49.181034088134766",
+        "num_queries": "348",
+        "task_display_name": "Cross-Modal Retrieval"
       },
       {
         "task": "cross_modal_retrieval",
         "top10_accuracy": "0.5229885057471264",
         "median_rank": "10.0",
         "mean_rank": "20.577587127685547",
+        "num_queries": "348",
+        "task_display_name": "Cross-Modal Retrieval"
       },
       {
         "task": "cross_modal_retrieval",
         "top10_accuracy": "0.031609195402298854",
         "median_rank": "152.5",
         "mean_rank": "161.44540405273438",
+        "num_queries": "348",
+        "task_display_name": "Cross-Modal Retrieval"
       },
       {
         "task": "cross_modal_retrieval",
         "top10_accuracy": "0.05747126436781609",
         "median_rank": "138.0",
         "mean_rank": "146.83045959472656",
+        "num_queries": "348",
+        "task_display_name": "Cross-Modal Retrieval"
       },
       {
         "task": "cross_modal_retrieval",
         "top10_accuracy": "0.9770114942528736",
         "median_rank": "1.0",
         "mean_rank": "2.181034564971924",
+        "num_queries": "348",
+        "task_display_name": "Cross-Modal Retrieval"
       },
       {
         "task": "modality_reconstruction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Cross-Modal Reconstruction"
       },
       {
         "task": "modality_reconstruction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Cross-Modal Reconstruction"
       },
       {
         "task": "modality_reconstruction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Cross-Modal Reconstruction"
       },
       {
         "task": "modality_reconstruction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Cross-Modal Reconstruction"
       },
       {
         "task": "modality_reconstruction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Cross-Modal Reconstruction"
       },
       {
         "task": "modality_reconstruction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Cross-Modal Reconstruction"
       },
       {
         "task": "modality_reconstruction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Cross-Modal Reconstruction"
       },
       {
         "task": "modality_reconstruction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Cross-Modal Reconstruction"
       },
       {
         "task": "modality_reconstruction",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Cross-Modal Reconstruction"
       },
       {
         "task": "temporal_order",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Temporal Order Verification"
       },
       {
         "task": "temporal_order",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Temporal Order Verification"
       },
       {
         "task": "temporal_order",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Temporal Order Verification"
       },
       {
         "task": "temporal_order",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Temporal Order Verification"
       },
       {
         "task": "temporal_order",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Temporal Order Verification"
       },
       {
         "task": "temporal_order",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Temporal Order Verification"
       },
       {
         "task": "temporal_order",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Temporal Order Verification"
       },
       {
         "task": "temporal_order",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Temporal Order Verification"
       },
       {
         "task": "temporal_order",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Temporal Order Verification"
       },
       {
         "task": "misalignment_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Multimodal Synchronization Detection"
       },
       {
         "task": "misalignment_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Multimodal Synchronization Detection"
       },
       {
         "task": "misalignment_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Multimodal Synchronization Detection"
       },
       {
         "task": "misalignment_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Multimodal Synchronization Detection"
       },
       {
         "task": "misalignment_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Multimodal Synchronization Detection"
       },
       {
         "task": "misalignment_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Multimodal Synchronization Detection"
       },
       {
         "task": "misalignment_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Multimodal Synchronization Detection"
       },
       {
         "task": "misalignment_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Multimodal Synchronization Detection"
       },
       {
         "task": "misalignment_detection",
         "top10_accuracy": "",
         "median_rank": "",
         "mean_rank": "",
+        "num_queries": "",
+        "task_display_name": "Multimodal Synchronization Detection"
       }
     ]
   },
       "num_queries": "308"
     }
   ]
+}

metrics/summary_metrics.json CHANGED Viewed

@@ -1,27 +1,12 @@
 {
   "omni_relay": {
-    "status": "verified_validation_aware_diagnostic_pilot",
     "dataset": "ropedia-ai/xperience-10m",
-    "staging": "selected_episode_verified_validation_package",
-    "training_target": "json_reliability_and_task_quality_improvement",
     "selection_strategy": "stratified_round_robin_by_top_level_session",
     "target_episodes": 128,
     "selected_sessions": 128,
-    "selected_split_counts": {
-      "train": 96,
-      "val": 16,
-      "test": 16
-    },
-    "exported_window_counts": {
-      "train": 2848,
-      "val": 512,
-      "test": 448
-    },
-    "held_out_episode_count": 14,
-    "held_out_test_windows": 448,
-    "json_validity_rate": 0.875,
-    "action_macro_f1": 0.0026621494447581404,
-    "quality_target_met": false,
     "candidate_scan_top_level_sessions": 802,
     "valid_candidates": 12102,
     "estimated_bytes": 298188841943,
@@ -29,16 +14,7 @@
       "visualization.rrd"
     ],
     "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
-    "current_scope": "The selected-episode Qwen3-Omni validation-aware diagnostic pilot is verified, but held-out quality is still weak and JSON validity remains below the 98% target.",
-    "validation_samples_used": 512,
-    "train_loss": 0.41304643672440994,
-    "val_loss": 0.0330660454928875,
-    "num_val_samples": 512,
-    "subtask_accuracy": 0.006696428571428571,
-    "transition_accuracy": 0.8504464285714286,
-    "next_action_accuracy": 0.024553571428571428,
-    "contact_accuracy": 0.6450892857142857,
-    "object_micro_f1": 0.22299431459254582
   },
   "models": {
     "motion_action": {
@@ -120,7 +96,8 @@
           "Pour coffee",
           "Pour milk into coffee",
           "Wait/Prepare for pouring"
-        ]
       },
       "timeline_subtask": {
         "accuracy": 0.05813953488372093,
@@ -144,7 +121,8 @@
           "Pour coffee",
           "Pour milk into coffee",
           "Prepare for pouring"
-        ]
       },
       "transition_detection": {
         "accuracy": 0.9080459770114943,
@@ -170,7 +148,8 @@
         "matched_boundaries": 2,
         "true_boundaries": 4,
         "predicted_boundaries": 28,
-        "mean_abs_timing_error_frames": 3.5
       },
       "next_action": {
         "accuracy": 0.034482758620689655,
@@ -194,7 +173,8 @@
           "Pour coffee",
           "Pour milk into coffee",
           "Wait/Prepare for pouring"
-        ]
       },
       "hand_trajectory_forecast": {
         "mse": 14.956222534179688,
@@ -209,7 +189,8 @@
         "forecast_frames": 10,
         "mpjpe": 0.8646570444107056,
         "final_frame_mpjpe": 1.0330793857574463,
-        "target_dim": 1260
       },
       "contact_prediction": {
         "accuracy": 1.0,
@@ -228,7 +209,8 @@
         "majority_baseline_accuracy": 1.0,
         "train_final_accuracy": 1.0,
         "train_final_loss": 0.0006056802230887115,
-        "unseen_test_classes": []
       },
       "object_relevance": {
         "micro_f1": 0.18034382095361662,
@@ -242,7 +224,8 @@
         "num_windows": 1161,
         "num_train_windows": 813,
         "num_test_windows": 348,
-        "num_objects": 34
       },
       "caption_grounding": {
         "mrr": 0.016023479050338015,
@@ -257,7 +240,8 @@
         "output": "matching time window",
         "split": "chronological",
         "num_train_windows": 813,
-        "num_test_windows": 348
       },
       "cross_modal_retrieval": {
         "mrr": 0.26925966892956127,
@@ -272,7 +256,8 @@
         "output": "matching depth/video window",
         "split": "chronological",
         "num_train_windows": 813,
-        "num_test_windows": 348
       },
       "modality_reconstruction": {
         "mse": 1358.1593017578125,
@@ -284,7 +269,8 @@
         "split": "chronological",
         "num_train_windows": 813,
         "num_test_windows": 348,
-        "target_dim": 5096
       },
       "temporal_order": {
         "accuracy": 0.4540229885057471,
@@ -303,7 +289,8 @@
         "num_samples": 2320,
         "num_train_samples": 1624,
         "num_test_samples": 696,
-        "train_final_accuracy": 0.5086206896551724
       },
       "misalignment_detection": {
         "accuracy": 0.5158959537572254,
@@ -322,7 +309,8 @@
         "num_samples": 2306,
         "num_train_samples": 1614,
         "num_test_samples": 692,
-        "train_final_accuracy": 0.49380421313506817
       }
     },
     "neural_model": {
@@ -368,7 +356,8 @@
         "neural_dropout": 0.1,
         "neural_device": "cpu",
         "train_final_loss": 0.04246756529782,
-        "train_final_accuracy": 0.9875156054931336
       },
       "timeline_subtask": {
         "accuracy": 0.0377906976744186,
@@ -401,7 +390,8 @@
         "neural_dropout": 0.1,
         "neural_device": "cpu",
         "train_final_loss": 5.4104819144748596e-05,
-        "train_final_accuracy": 1.0
       },
       "transition_detection": {
         "accuracy": 0.8735632183908046,
@@ -436,7 +426,8 @@
         "matched_boundaries": 3,
         "true_boundaries": 4,
         "predicted_boundaries": 42,
-        "mean_abs_timing_error_frames": 2.6666666666666665
       },
       "next_action": {
         "accuracy": 0.02586206896551724,
@@ -469,7 +460,8 @@
         "neural_dropout": 0.1,
         "neural_device": "cpu",
         "train_final_loss": 0.000416612956025105,
-        "train_final_accuracy": 1.0
       },
       "hand_trajectory_forecast": {
         "mse": 0.004775360692292452,
@@ -494,7 +486,8 @@
         "neural_weight_decay": 0.0001,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
-        "train_final_loss": 0.055699273420247435
       },
       "contact_prediction": {
         "accuracy": 1.0,
@@ -522,7 +515,8 @@
         "neural_dropout": 0.1,
         "neural_device": "cpu",
         "train_final_loss": 0.0,
-        "train_final_accuracy": 1.0
       },
       "object_relevance": {
         "micro_f1": 0.1679279279279279,
@@ -547,7 +541,8 @@
         "neural_weight_decay": 0.0001,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
-        "train_final_loss": 0.003651880362182214
       },
       "caption_grounding": {
         "mrr": 0.01684125567132316,
@@ -573,7 +568,8 @@
         "neural_weight_decay": 0.0001,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
-        "train_final_loss": 0.06317874967483723
       },
       "cross_modal_retrieval": {
         "mrr": 0.1299971898648288,
@@ -599,7 +595,8 @@
         "neural_weight_decay": 0.0001,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
-        "train_final_loss": 0.21891545446596464
       },
       "modality_reconstruction": {
         "mse": 1351.3363037109375,
@@ -621,7 +618,8 @@
         "neural_weight_decay": 0.0001,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
-        "train_final_loss": 0.21891545446596464
       },
       "temporal_order": {
         "accuracy": 0.8577586206896551,
@@ -651,7 +649,8 @@
         "neural_dropout": 0.1,
         "neural_device": "cpu",
         "train_final_loss": 0.0005108328477586757,
-        "train_final_accuracy": 1.0
       },
       "misalignment_detection": {
         "accuracy": 0.7008670520231214,
@@ -681,8 +680,23 @@
         "neural_dropout": 0.1,
         "neural_device": "cpu",
         "train_final_loss": 0.010604870708167664,
-        "train_final_accuracy": 0.9956629491945477
       }
     }
   },
   "feature_manifest": [

 {
   "omni_relay": {
+    "status": "verified_full_128_episode_diagnostic_result",
     "dataset": "ropedia-ai/xperience-10m",
+    "staging": "verified_public_package_and_adapter_publication",
+    "training_target": "action_subtask_quality_and_unseen_label_error_analysis",
     "selection_strategy": "stratified_round_robin_by_top_level_session",
     "target_episodes": 128,
     "selected_sessions": 128,
     "candidate_scan_top_level_sessions": 802,
     "valid_candidates": 12102,
     "estimated_bytes": 298188841943,
       "visualization.rrd"
     ],
     "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
+    "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
   },
   "models": {
     "motion_action": {
           "Pour coffee",
           "Pour milk into coffee",
           "Wait/Prepare for pouring"
+        ],
+        "task_display_name": "Action Recognition"
       },
       "timeline_subtask": {
         "accuracy": 0.05813953488372093,
           "Pour coffee",
           "Pour milk into coffee",
           "Prepare for pouring"
+        ],
+        "task_display_name": "Procedure Step Recognition"
       },
       "transition_detection": {
         "accuracy": 0.9080459770114943,
         "matched_boundaries": 2,
         "true_boundaries": 4,
         "predicted_boundaries": 28,
+        "mean_abs_timing_error_frames": 3.5,
+        "task_display_name": "Action Boundary Detection"
       },
       "next_action": {
         "accuracy": 0.034482758620689655,
           "Pour coffee",
           "Pour milk into coffee",
           "Wait/Prepare for pouring"
+        ],
+        "task_display_name": "Next-Action Prediction"
       },
       "hand_trajectory_forecast": {
         "mse": 14.956222534179688,
         "forecast_frames": 10,
         "mpjpe": 0.8646570444107056,
         "final_frame_mpjpe": 1.0330793857574463,
+        "target_dim": 1260,
+        "task_display_name": "Hand Trajectory Forecasting"
       },
       "contact_prediction": {
         "accuracy": 1.0,
         "majority_baseline_accuracy": 1.0,
         "train_final_accuracy": 1.0,
         "train_final_loss": 0.0006056802230887115,
+        "unseen_test_classes": [],
+        "task_display_name": "Contact State Prediction"
       },
       "object_relevance": {
         "micro_f1": 0.18034382095361662,
         "num_windows": 1161,
         "num_train_windows": 813,
         "num_test_windows": 348,
+        "num_objects": 34,
+        "task_display_name": "Object Relevance Prediction"
       },
       "caption_grounding": {
         "mrr": 0.016023479050338015,
         "output": "matching time window",
         "split": "chronological",
         "num_train_windows": 813,
+        "num_test_windows": 348,
+        "task_display_name": "Language Grounding"
       },
       "cross_modal_retrieval": {
         "mrr": 0.26925966892956127,
         "output": "matching depth/video window",
         "split": "chronological",
         "num_train_windows": 813,
+        "num_test_windows": 348,
+        "task_display_name": "Cross-Modal Retrieval"
       },
       "modality_reconstruction": {
         "mse": 1358.1593017578125,
         "split": "chronological",
         "num_train_windows": 813,
         "num_test_windows": 348,
+        "target_dim": 5096,
+        "task_display_name": "Cross-Modal Reconstruction"
       },
       "temporal_order": {
         "accuracy": 0.4540229885057471,
         "num_samples": 2320,
         "num_train_samples": 1624,
         "num_test_samples": 696,
+        "train_final_accuracy": 0.5086206896551724,
+        "task_display_name": "Temporal Order Verification"
       },
       "misalignment_detection": {
         "accuracy": 0.5158959537572254,
         "num_samples": 2306,
         "num_train_samples": 1614,
         "num_test_samples": 692,
+        "train_final_accuracy": 0.49380421313506817,
+        "task_display_name": "Multimodal Synchronization Detection"
       }
     },
     "neural_model": {
         "neural_dropout": 0.1,
         "neural_device": "cpu",
         "train_final_loss": 0.04246756529782,
+        "train_final_accuracy": 0.9875156054931336,
+        "task_display_name": "Action Recognition"
       },
       "timeline_subtask": {
         "accuracy": 0.0377906976744186,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
         "train_final_loss": 5.4104819144748596e-05,
+        "train_final_accuracy": 1.0,
+        "task_display_name": "Procedure Step Recognition"
       },
       "transition_detection": {
         "accuracy": 0.8735632183908046,
         "matched_boundaries": 3,
         "true_boundaries": 4,
         "predicted_boundaries": 42,
+        "mean_abs_timing_error_frames": 2.6666666666666665,
+        "task_display_name": "Action Boundary Detection"
       },
       "next_action": {
         "accuracy": 0.02586206896551724,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
         "train_final_loss": 0.000416612956025105,
+        "train_final_accuracy": 1.0,
+        "task_display_name": "Next-Action Prediction"
       },
       "hand_trajectory_forecast": {
         "mse": 0.004775360692292452,
         "neural_weight_decay": 0.0001,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
+        "train_final_loss": 0.055699273420247435,
+        "task_display_name": "Hand Trajectory Forecasting"
       },
       "contact_prediction": {
         "accuracy": 1.0,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
         "train_final_loss": 0.0,
+        "train_final_accuracy": 1.0,
+        "task_display_name": "Contact State Prediction"
       },
       "object_relevance": {
         "micro_f1": 0.1679279279279279,
         "neural_weight_decay": 0.0001,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
+        "train_final_loss": 0.003651880362182214,
+        "task_display_name": "Object Relevance Prediction"
       },
       "caption_grounding": {
         "mrr": 0.01684125567132316,
         "neural_weight_decay": 0.0001,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
+        "train_final_loss": 0.06317874967483723,
+        "task_display_name": "Language Grounding"
       },
       "cross_modal_retrieval": {
         "mrr": 0.1299971898648288,
         "neural_weight_decay": 0.0001,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
+        "train_final_loss": 0.21891545446596464,
+        "task_display_name": "Cross-Modal Retrieval"
       },
       "modality_reconstruction": {
         "mse": 1351.3363037109375,
         "neural_weight_decay": 0.0001,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
+        "train_final_loss": 0.21891545446596464,
+        "task_display_name": "Cross-Modal Reconstruction"
       },
       "temporal_order": {
         "accuracy": 0.8577586206896551,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
         "train_final_loss": 0.0005108328477586757,
+        "train_final_accuracy": 1.0,
+        "task_display_name": "Temporal Order Verification"
       },
       "misalignment_detection": {
         "accuracy": 0.7008670520231214,
         "neural_dropout": 0.1,
         "neural_device": "cpu",
         "train_final_loss": 0.010604870708167664,
+        "train_final_accuracy": 0.9956629491945477,
+        "task_display_name": "Multimodal Synchronization Detection"
       }
+    },
+    "task_display_names": {
+      "timeline_action": "Action Recognition",
+      "timeline_subtask": "Procedure Step Recognition",
+      "transition_detection": "Action Boundary Detection",
+      "next_action": "Next-Action Prediction",
+      "hand_trajectory_forecast": "Hand Trajectory Forecasting",
+      "contact_prediction": "Contact State Prediction",
+      "object_relevance": "Object Relevance Prediction",
+      "caption_grounding": "Language Grounding",
+      "cross_modal_retrieval": "Cross-Modal Retrieval",
+      "modality_reconstruction": "Cross-Modal Reconstruction",
+      "temporal_order": "Temporal Order Verification",
+      "misalignment_detection": "Multimodal Synchronization Detection"
     }
   },
   "feature_manifest": [

metrics/task_surface_integrity.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-06T17:43:54+00:00",
   "summary": {
     "task_count": 12,
     "expected_task_count": 12,
@@ -64,9 +64,9 @@
       "observed": "timeline_action"
     },
     {
-      "name": "timeline_action: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Egocentric Action Recognition",
       "raw_hits": []
     },
     {
@@ -76,15 +76,15 @@
       "raw_hits": []
     },
     {
-      "name": "timeline_action: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "20-frame multimodal window",
       "raw_hits": []
     },
     {
-      "name": "timeline_action: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Look at one short multimodal window and name what action is happening now.",
       "raw_hits": []
     },
     {
@@ -94,15 +94,15 @@
       "raw_hits": []
     },
     {
-      "name": "timeline_action: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Action Recognition",
       "raw_hits": []
     },
     {
-      "name": "timeline_action: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "window features -> action label builder -> classifier",
       "raw_hits": []
     },
     {
@@ -184,9 +184,9 @@
       "observed": "timeline_subtask"
     },
     {
-      "name": "timeline_subtask: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Temporal Subtask Recognition",
       "raw_hits": []
     },
     {
@@ -196,15 +196,15 @@
       "raw_hits": []
     },
     {
-      "name": "timeline_subtask: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "20-frame multimodal window",
       "raw_hits": []
     },
     {
-      "name": "timeline_subtask: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Predict the higher-level task stage for the current window.",
       "raw_hits": []
     },
     {
@@ -214,15 +214,15 @@
       "raw_hits": []
     },
     {
-      "name": "timeline_subtask: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Procedure Step Recognition",
       "raw_hits": []
     },
     {
-      "name": "timeline_subtask: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "window features -> subtask label builder -> classifier",
       "raw_hits": []
     },
     {
@@ -304,9 +304,9 @@
       "observed": "transition_detection"
     },
     {
-      "name": "transition_detection: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Temporal Action Segmentation",
       "raw_hits": []
     },
     {
@@ -316,15 +316,15 @@
       "raw_hits": []
     },
     {
-      "name": "transition_detection: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "current window with boundary target",
       "raw_hits": []
     },
     {
-      "name": "transition_detection: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Detect whether the current window is near a boundary between actions.",
       "raw_hits": []
     },
     {
@@ -334,15 +334,15 @@
       "raw_hits": []
     },
     {
-      "name": "transition_detection: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Action Boundary Detection",
       "raw_hits": []
     },
     {
-      "name": "transition_detection: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "action changes -> boundary labels -> binary classifier",
       "raw_hits": []
     },
     {
@@ -422,9 +422,9 @@
       "observed": "next_action"
     },
     {
-      "name": "next_action: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Short-Horizon Intention Prediction",
       "raw_hits": []
     },
     {
@@ -434,15 +434,15 @@
       "raw_hits": []
     },
     {
-      "name": "next_action: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "current window at time t",
       "raw_hits": []
     },
     {
-      "name": "next_action: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Use the current window to guess the action that will happen shortly after it.",
       "raw_hits": []
     },
     {
@@ -452,15 +452,15 @@
       "raw_hits": []
     },
     {
-      "name": "next_action: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Next-Action Prediction",
       "raw_hits": []
     },
     {
-      "name": "next_action: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "current features -> future label shift -> classifier",
       "raw_hits": []
     },
     {
@@ -540,9 +540,9 @@
       "observed": "hand_trajectory_forecast"
     },
     {
-      "name": "hand_trajectory_forecast: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "3D Hand Motion Forecasting",
       "raw_hits": []
     },
     {
@@ -552,15 +552,15 @@
       "raw_hits": []
     },
     {
-      "name": "hand_trajectory_forecast: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "current multimodal window",
       "raw_hits": []
     },
     {
-      "name": "hand_trajectory_forecast: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Predict where the hands will move over the next few frames.",
       "raw_hits": []
     },
     {
@@ -570,15 +570,15 @@
       "raw_hits": []
     },
     {
-      "name": "hand_trajectory_forecast: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Hand Trajectory Forecasting",
       "raw_hits": []
     },
     {
-      "name": "hand_trajectory_forecast: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "current features -> future mocap target -> regression head",
       "raw_hits": []
     },
     {
@@ -658,9 +658,9 @@
       "observed": "contact_prediction"
     },
     {
-      "name": "contact_prediction: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Human-Object Contact Prediction",
       "raw_hits": []
     },
     {
@@ -670,15 +670,15 @@
       "raw_hits": []
     },
     {
-      "name": "contact_prediction: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "non-contact, non-caption features",
       "raw_hits": []
     },
     {
-      "name": "contact_prediction: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Predict whether the body or hand is in contact with something.",
       "raw_hits": []
     },
     {
@@ -688,15 +688,15 @@
       "raw_hits": []
     },
     {
-      "name": "contact_prediction: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Contact State Prediction",
       "raw_hits": []
     },
     {
-      "name": "contact_prediction: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "feature filter -> contact target -> binary classifier",
       "raw_hits": []
     },
     {
@@ -774,9 +774,9 @@
       "observed": "object_relevance"
     },
     {
-      "name": "object_relevance: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Object-Centric Interaction Recognition",
       "raw_hits": []
     },
     {
@@ -786,15 +786,15 @@
       "raw_hits": []
     },
     {
-      "name": "object_relevance: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "non-caption multimodal features",
       "raw_hits": []
     },
     {
-      "name": "object_relevance: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Predict which objects matter in the current window.",
       "raw_hits": []
     },
     {
@@ -804,15 +804,15 @@
       "raw_hits": []
     },
     {
-      "name": "object_relevance: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Object Relevance Prediction",
       "raw_hits": []
     },
     {
-      "name": "object_relevance: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "object vocabulary -> multi-hot labels -> sigmoid heads",
       "raw_hits": []
     },
     {
@@ -892,9 +892,9 @@
       "observed": "caption_grounding"
     },
     {
-      "name": "caption_grounding: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Language-to-Moment Grounding",
       "raw_hits": []
     },
     {
@@ -904,15 +904,15 @@
       "raw_hits": []
     },
     {
-      "name": "caption_grounding: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "text-like query and candidate windows",
       "raw_hits": []
     },
     {
-      "name": "caption_grounding: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Given a text-like query from annotation, find the matching time window.",
       "raw_hits": []
     },
     {
@@ -922,15 +922,15 @@
       "raw_hits": []
     },
     {
-      "name": "caption_grounding: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Language Grounding",
       "raw_hits": []
     },
     {
-      "name": "caption_grounding: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "query features -> candidate index -> cosine ranker",
       "raw_hits": []
     },
     {
@@ -1008,9 +1008,9 @@
       "observed": "cross_modal_retrieval"
     },
     {
-      "name": "cross_modal_retrieval: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Multimodal Representation Retrieval",
       "raw_hits": []
     },
     {
@@ -1020,15 +1020,15 @@
       "raw_hits": []
     },
     {
-      "name": "cross_modal_retrieval: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "motion/IMU/pose query; depth/video candidates",
       "raw_hits": []
     },
     {
-      "name": "cross_modal_retrieval: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Use one group of modalities to retrieve the matching window from another group.",
       "raw_hits": []
     },
     {
@@ -1038,15 +1038,15 @@
       "raw_hits": []
     },
     {
-      "name": "cross_modal_retrieval: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Cross-Modal Retrieval",
       "raw_hits": []
     },
     {
-      "name": "cross_modal_retrieval: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "modality split -> projection -> nearest-neighbor ranker",
       "raw_hits": []
     },
     {
@@ -1126,9 +1126,9 @@
       "observed": "modality_reconstruction"
     },
     {
-      "name": "modality_reconstruction: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Modality Feature Reconstruction",
       "raw_hits": []
     },
     {
@@ -1138,15 +1138,15 @@
       "raw_hits": []
     },
     {
-      "name": "modality_reconstruction: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "motion, IMU, and camera/pose features",
       "raw_hits": []
     },
     {
-      "name": "modality_reconstruction: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Predict one modality feature block from other modality blocks.",
       "raw_hits": []
     },
     {
@@ -1156,15 +1156,15 @@
       "raw_hits": []
     },
     {
-      "name": "modality_reconstruction: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Cross-Modal Reconstruction",
       "raw_hits": []
     },
     {
-      "name": "modality_reconstruction: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "source-target split -> scaler -> regression head",
       "raw_hits": []
     },
     {
@@ -1244,9 +1244,9 @@
       "observed": "temporal_order"
     },
     {
-      "name": "temporal_order: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Temporal Order Verification",
       "raw_hits": []
     },
     {
@@ -1256,15 +1256,15 @@
       "raw_hits": []
     },
     {
-      "name": "temporal_order: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "two adjacent windows plus difference vector",
       "raw_hits": []
     },
     {
-      "name": "temporal_order: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Tell whether two nearby windows are in the correct time order.",
       "raw_hits": []
     },
     {
@@ -1274,15 +1274,15 @@
       "raw_hits": []
     },
     {
-      "name": "temporal_order: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Temporal Order Verification",
       "raw_hits": []
     },
     {
-      "name": "temporal_order: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "pair builder -> feature combiner -> binary classifier",
       "raw_hits": []
     },
     {
@@ -1360,9 +1360,9 @@
       "observed": "misalignment_detection"
     },
     {
-      "name": "misalignment_detection: public_field_research_name_is_human_readable",
       "status": "pass",
-      "value": "Cross-Modal Misalignment Detection",
       "raw_hits": []
     },
     {
@@ -1372,15 +1372,15 @@
       "raw_hits": []
     },
     {
-      "name": "misalignment_detection: public_field_input_short_is_human_readable",
       "status": "pass",
-      "value": "motion-side and visual/depth-side feature groups",
       "raw_hits": []
     },
     {
-      "name": "misalignment_detection: public_field_plain_goal_is_human_readable",
       "status": "pass",
-      "value": "Detect when modalities that should match are shifted out of sync.",
       "raw_hits": []
     },
     {
@@ -1390,15 +1390,15 @@
       "raw_hits": []
     },
     {
-      "name": "misalignment_detection: public_field_display_name_is_human_readable",
       "status": "pass",
-      "value": "Multimodal Synchronization Detection",
       "raw_hits": []
     },
     {
-      "name": "misalignment_detection: public_field_process_short_is_human_readable",
       "status": "pass",
-      "value": "aligned/shifted pairs -> feature combiner -> binary classifier",
       "raw_hits": []
     },
     {

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-06T23:27:06+00:00",
   "summary": {
     "task_count": 12,
     "expected_task_count": 12,
       "observed": "timeline_action"
     },
     {
+      "name": "timeline_action: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Look at one short multimodal window and name what action is happening now.",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "timeline_action: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Action Recognition",
       "raw_hits": []
     },
     {
+      "name": "timeline_action: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "20-frame multimodal window",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "timeline_action: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "window features -> action label builder -> classifier",
       "raw_hits": []
     },
     {
+      "name": "timeline_action: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Egocentric Action Recognition",
       "raw_hits": []
     },
     {
       "observed": "timeline_subtask"
     },
     {
+      "name": "timeline_subtask: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Predict the higher-level task stage for the current window.",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "timeline_subtask: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Procedure Step Recognition",
       "raw_hits": []
     },
     {
+      "name": "timeline_subtask: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "20-frame multimodal window",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "timeline_subtask: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "window features -> subtask label builder -> classifier",
       "raw_hits": []
     },
     {
+      "name": "timeline_subtask: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Temporal Subtask Recognition",
       "raw_hits": []
     },
     {
       "observed": "transition_detection"
     },
     {
+      "name": "transition_detection: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Detect whether the current window is near a boundary between actions.",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "transition_detection: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Action Boundary Detection",
       "raw_hits": []
     },
     {
+      "name": "transition_detection: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "current window with boundary target",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "transition_detection: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "action changes -> boundary labels -> binary classifier",
       "raw_hits": []
     },
     {
+      "name": "transition_detection: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Temporal Action Segmentation",
       "raw_hits": []
     },
     {
       "observed": "next_action"
     },
     {
+      "name": "next_action: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Use the current window to guess the action that will happen shortly after it.",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "next_action: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Next-Action Prediction",
       "raw_hits": []
     },
     {
+      "name": "next_action: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "current window at time t",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "next_action: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "current features -> future label shift -> classifier",
       "raw_hits": []
     },
     {
+      "name": "next_action: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Short-Horizon Intention Prediction",
       "raw_hits": []
     },
     {
       "observed": "hand_trajectory_forecast"
     },
     {
+      "name": "hand_trajectory_forecast: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Predict where the hands will move over the next few frames.",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "hand_trajectory_forecast: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Hand Trajectory Forecasting",
       "raw_hits": []
     },
     {
+      "name": "hand_trajectory_forecast: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "current multimodal window",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "hand_trajectory_forecast: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "current features -> future mocap target -> regression head",
       "raw_hits": []
     },
     {
+      "name": "hand_trajectory_forecast: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "3D Hand Motion Forecasting",
       "raw_hits": []
     },
     {
       "observed": "contact_prediction"
     },
     {
+      "name": "contact_prediction: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Predict whether the body or hand is in contact with something.",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "contact_prediction: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Contact State Prediction",
       "raw_hits": []
     },
     {
+      "name": "contact_prediction: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "non-contact, non-caption features",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "contact_prediction: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "feature filter -> contact target -> binary classifier",
       "raw_hits": []
     },
     {
+      "name": "contact_prediction: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Human-Object Contact Prediction",
       "raw_hits": []
     },
     {
       "observed": "object_relevance"
     },
     {
+      "name": "object_relevance: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Predict which objects matter in the current window.",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "object_relevance: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Object Relevance Prediction",
       "raw_hits": []
     },
     {
+      "name": "object_relevance: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "non-caption multimodal features",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "object_relevance: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "object vocabulary -> multi-hot labels -> sigmoid heads",
       "raw_hits": []
     },
     {
+      "name": "object_relevance: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Object-Centric Interaction Recognition",
       "raw_hits": []
     },
     {
       "observed": "caption_grounding"
     },
     {
+      "name": "caption_grounding: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Given a text-like query from annotation, find the matching time window.",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "caption_grounding: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Language Grounding",
       "raw_hits": []
     },
     {
+      "name": "caption_grounding: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "text-like query and candidate windows",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "caption_grounding: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "query features -> candidate index -> cosine ranker",
       "raw_hits": []
     },
     {
+      "name": "caption_grounding: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Language-to-Moment Grounding",
       "raw_hits": []
     },
     {
       "observed": "cross_modal_retrieval"
     },
     {
+      "name": "cross_modal_retrieval: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Use one group of modalities to retrieve the matching window from another group.",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "cross_modal_retrieval: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Cross-Modal Retrieval",
       "raw_hits": []
     },
     {
+      "name": "cross_modal_retrieval: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "motion/IMU/pose query; depth/video candidates",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "cross_modal_retrieval: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "modality split -> projection -> nearest-neighbor ranker",
       "raw_hits": []
     },
     {
+      "name": "cross_modal_retrieval: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Multimodal Representation Retrieval",
       "raw_hits": []
     },
     {
       "observed": "modality_reconstruction"
     },
     {
+      "name": "modality_reconstruction: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Predict one modality feature block from other modality blocks.",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "modality_reconstruction: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Cross-Modal Reconstruction",
       "raw_hits": []
     },
     {
+      "name": "modality_reconstruction: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "motion, IMU, and camera/pose features",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "modality_reconstruction: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "source-target split -> scaler -> regression head",
       "raw_hits": []
     },
     {
+      "name": "modality_reconstruction: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Modality Feature Reconstruction",
       "raw_hits": []
     },
     {
       "observed": "temporal_order"
     },
     {
+      "name": "temporal_order: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Tell whether two nearby windows are in the correct time order.",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "temporal_order: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Temporal Order Verification",
       "raw_hits": []
     },
     {
+      "name": "temporal_order: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "two adjacent windows plus difference vector",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "temporal_order: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "pair builder -> feature combiner -> binary classifier",
       "raw_hits": []
     },
     {
+      "name": "temporal_order: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Temporal Order Verification",
       "raw_hits": []
     },
     {
       "observed": "misalignment_detection"
     },
     {
+      "name": "misalignment_detection: public_field_plain_goal_is_human_readable",
       "status": "pass",
+      "value": "Detect when modalities that should match are shifted out of sync.",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "misalignment_detection: public_field_display_name_is_human_readable",
       "status": "pass",
+      "value": "Multimodal Synchronization Detection",
       "raw_hits": []
     },
     {
+      "name": "misalignment_detection: public_field_input_short_is_human_readable",
       "status": "pass",
+      "value": "motion-side and visual/depth-side feature groups",
       "raw_hits": []
     },
     {
       "raw_hits": []
     },
     {
+      "name": "misalignment_detection: public_field_process_short_is_human_readable",
       "status": "pass",
+      "value": "aligned/shifted pairs -> feature combiner -> binary classifier",
       "raw_hits": []
     },
     {
+      "name": "misalignment_detection: public_field_research_name_is_human_readable",
       "status": "pass",
+      "value": "Cross-Modal Misalignment Detection",
       "raw_hits": []
     },
     {

metrics/website_integrity.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-06T17:43:55+00:00",
   "docs_root": "docs",
   "site_base": "/ropedia-xperience-10m-task-suite/",
   "summary": {
     "html_pages": 4,
-    "local_references": 133,
-    "external_reference_count": 107,
-    "json_files": 34,
     "image_assets_referenced": 22,
     "failure_count": 0
   },
@@ -75,7 +75,7 @@
       "status": "pass",
       "reason": "The project overview should appear before the deeper progress ledger.",
       "overview_index": 67412,
-      "evidence_index": 90414
     },
     {
       "name": "project_status_links_json",
@@ -153,8 +153,8 @@
       "status": "pass",
       "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
       "overview_index": 67412,
-      "protocol_index": 87152,
-      "evidence_index": 90414
     },
     {
       "name": "evaluation_protocol_links_json",
@@ -228,7 +228,7 @@
     {
       "path": "index.html",
       "id_count": 77,
-      "reference_count": 110,
       "image_count": 24
     },
     {
@@ -252,12 +252,12 @@
     },
     {
       "path": "data/artifact_index.json",
-      "bytes": 41126,
       "top_level_type": "dict"
     },
     {
       "path": "data/audio_ablation_summary.json",
-      "bytes": 9701,
       "top_level_type": "dict"
     },
     {
@@ -267,7 +267,7 @@
     },
     {
       "path": "data/evaluation_protocol.json",
-      "bytes": 13788,
       "top_level_type": "dict"
     },
     {
@@ -282,7 +282,7 @@
     },
     {
       "path": "data/foundation_model_plan.json",
-      "bytes": 13112,
       "top_level_type": "dict"
     },
     {
@@ -292,7 +292,7 @@
     },
     {
       "path": "data/mirror_parity.json",
-      "bytes": 131036,
       "top_level_type": "dict"
     },
     {
@@ -302,27 +302,32 @@
     },
     {
       "path": "data/omni_finetune_verified_result.json",
-      "bytes": 4213,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_brief.json",
-      "bytes": 3752,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_manifest.json",
-      "bytes": 4927,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_packet.json",
-      "bytes": 7802,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_status.json",
-      "bytes": 12602,
       "top_level_type": "dict"
     },
     {
@@ -347,7 +352,7 @@
     },
     {
       "path": "data/reproducibility_matrix.json",
-      "bytes": 5223,
       "top_level_type": "dict"
     },
     {
@@ -357,32 +362,32 @@
     },
     {
       "path": "data/research_directions.json",
-      "bytes": 14414,
       "top_level_type": "dict"
     },
     {
       "path": "data/research_roadmap.json",
-      "bytes": 10052,
       "top_level_type": "dict"
     },
     {
       "path": "data/research_roadmap_interactive.json",
-      "bytes": 142418,
       "top_level_type": "dict"
     },
     {
       "path": "data/research_takeaways.json",
-      "bytes": 7102,
       "top_level_type": "dict"
     },
     {
       "path": "data/scope_claims_audit.json",
-      "bytes": 20823,
       "top_level_type": "dict"
     },
     {
       "path": "data/single_episode_explorer.json",
-      "bytes": 4297465,
       "top_level_type": "dict"
     },
     {
@@ -392,7 +397,7 @@
     },
     {
       "path": "data/summary_metrics.json",
-      "bytes": 26028,
       "top_level_type": "dict"
     },
     {
@@ -407,7 +412,7 @@
     },
     {
       "path": "data/website_integrity.json",
-      "bytes": 15259,
       "top_level_type": "dict"
     },
     {
@@ -450,21 +455,21 @@
     {
       "path": "assets/charts/episode_task_scores.svg",
       "exists": true,
-      "bytes": 5903,
       "format": "SVG",
       "has_viewbox": true
     },
     {
       "path": "assets/charts/episode_task_scores_minimal_vs_neural.svg",
       "exists": true,
-      "bytes": 10040,
       "format": "SVG",
       "has_viewbox": true
     },
     {
       "path": "assets/charts/episode_task_scores_neural_mlp.svg",
       "exists": true,
-      "bytes": 5917,
       "format": "SVG",
       "has_viewbox": true
     },
@@ -485,7 +490,7 @@
     {
       "path": "assets/charts/research_direction_coverage.svg",
       "exists": true,
-      "bytes": 4968,
       "format": "SVG",
       "has_viewbox": true
     },
@@ -571,7 +576,7 @@
     {
       "path": "assets/task_architectures.png",
       "exists": true,
-      "bytes": 761507,
       "width": 1800,
       "height": 2450,
       "format": "PNG"
@@ -579,7 +584,7 @@
     {
       "path": "assets/task_suite_infographic.png",
       "exists": true,
-      "bytes": 2612510,
       "width": 1800,
       "height": 6600,
       "format": "PNG"

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-06T23:27:27+00:00",
   "docs_root": "docs",
   "site_base": "/ropedia-xperience-10m-task-suite/",
   "summary": {
     "html_pages": 4,
+    "local_references": 136,
+    "external_reference_count": 106,
+    "json_files": 35,
     "image_assets_referenced": 22,
     "failure_count": 0
   },
       "status": "pass",
       "reason": "The project overview should appear before the deeper progress ledger.",
       "overview_index": 67412,
+      "evidence_index": 90421
     },
     {
       "name": "project_status_links_json",
       "status": "pass",
       "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
       "overview_index": 67412,
+      "protocol_index": 87159,
+      "evidence_index": 90421
     },
     {
       "name": "evaluation_protocol_links_json",
     {
       "path": "index.html",
       "id_count": 77,
+      "reference_count": 113,
       "image_count": 24
     },
     {
     },
     {
       "path": "data/artifact_index.json",
+      "bytes": 60162,
       "top_level_type": "dict"
     },
     {
       "path": "data/audio_ablation_summary.json",
+      "bytes": 10370,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/evaluation_protocol.json",
+      "bytes": 14511,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/foundation_model_plan.json",
+      "bytes": 13193,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/mirror_parity.json",
+      "bytes": 235815,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/omni_finetune_verified_result.json",
+      "bytes": 3483,
+      "top_level_type": "dict"
+    },
+    {
+      "path": "data/omni_model_comparison.json",
+      "bytes": 21433,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_brief.json",
+      "bytes": 3811,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_manifest.json",
+      "bytes": 5193,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_packet.json",
+      "bytes": 7943,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_status.json",
+      "bytes": 15049,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/reproducibility_matrix.json",
+      "bytes": 5280,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/research_directions.json",
+      "bytes": 16694,
       "top_level_type": "dict"
     },
     {
       "path": "data/research_roadmap.json",
+      "bytes": 10133,
       "top_level_type": "dict"
     },
     {
       "path": "data/research_roadmap_interactive.json",
+      "bytes": 143560,
       "top_level_type": "dict"
     },
     {
       "path": "data/research_takeaways.json",
+      "bytes": 7139,
       "top_level_type": "dict"
     },
     {
       "path": "data/scope_claims_audit.json",
+      "bytes": 21234,
       "top_level_type": "dict"
     },
     {
       "path": "data/single_episode_explorer.json",
+      "bytes": 4305527,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/summary_metrics.json",
+      "bytes": 27490,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/website_integrity.json",
+      "bytes": 15777,
       "top_level_type": "dict"
     },
     {
     {
       "path": "assets/charts/episode_task_scores.svg",
       "exists": true,
+      "bytes": 5983,
       "format": "SVG",
       "has_viewbox": true
     },
     {
       "path": "assets/charts/episode_task_scores_minimal_vs_neural.svg",
       "exists": true,
+      "bytes": 10200,
       "format": "SVG",
       "has_viewbox": true
     },
     {
       "path": "assets/charts/episode_task_scores_neural_mlp.svg",
       "exists": true,
+      "bytes": 5997,
       "format": "SVG",
       "has_viewbox": true
     },
     {
       "path": "assets/charts/research_direction_coverage.svg",
       "exists": true,
+      "bytes": 5078,
       "format": "SVG",
       "has_viewbox": true
     },
     {
       "path": "assets/task_architectures.png",
       "exists": true,
+      "bytes": 774391,
       "width": 1800,
       "height": 2450,
       "format": "PNG"
     {
       "path": "assets/task_suite_infographic.png",
       "exists": true,
+      "bytes": 1588641,
       "width": 1800,
       "height": 6600,
       "format": "PNG"