cy0307 commited on
Commit
2d80be0
·
verified ·
1 Parent(s): a07660e

Update final Qwen public metrics

Browse files
metrics/artifact_index.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
- "generated_at_utc": "2026-06-06T17:44:58+00:00",
4
  "status": "pass",
5
- "artifact_count": 89,
6
  "missing": [],
7
  "by_kind": {
8
  "project_path": 14,
9
  "scaleup_contract": 7,
10
- "scaleup_status": 6,
 
11
  "project_scope": 1,
12
  "source_alignment": 5,
13
- "publication_workflow": 3,
14
  "evaluation_protocol": 3,
15
  "result_interpretation": 5,
16
- "metrics_source": 4,
17
  "website_data": 3,
18
  "visual_evidence": 7,
19
  "quality_gate": 12,
@@ -30,7 +30,9 @@
30
  "generated_figure": 3,
31
  "generated_figure_assets": 1,
32
  "citation": 1,
33
- "license": 1
 
 
34
  },
35
  "artifacts": [
36
  {
@@ -41,8 +43,8 @@
41
  "surface": "repo_hf",
42
  "shows": "Gives first-pass readers a concise project shape before the detailed artifact trail.",
43
  "exists": true,
44
- "bytes": 3829,
45
- "sha256": "3d16e700c31aafe889b3d8e43d52250208766809c483a716a13cbd26961b3e72"
46
  },
47
  {
48
  "id": "project_brief_json",
@@ -52,8 +54,8 @@
52
  "surface": "website_hf",
53
  "shows": "Machine-readable first-reader project brief for the website and Hugging Face mirrors.",
54
  "exists": true,
55
- "bytes": 3752,
56
- "sha256": "4bd2f3471d0141abf64a2327541e25031e9efa39940cc68a29c316cf36f7c884"
57
  },
58
  {
59
  "id": "project_status",
@@ -63,8 +65,8 @@
63
  "surface": "repo_hf",
64
  "shows": "Gives a compact current-state table for first-pass readers.",
65
  "exists": true,
66
- "bytes": 9818,
67
- "sha256": "ae59a373796d279cf0c14208e14a1feca1ecbf3d31e5099fcf126c4c8de8a93a"
68
  },
69
  {
70
  "id": "project_status_json",
@@ -74,8 +76,8 @@
74
  "surface": "website_hf",
75
  "shows": "Machine-readable copy of the current project status for website and HF mirrors.",
76
  "exists": true,
77
- "bytes": 12602,
78
- "sha256": "38c3ea58375f127bc72653beffef5fb9d872430843137944f626f395395a5b1f"
79
  },
80
  {
81
  "id": "research_roadmap",
@@ -85,8 +87,8 @@
85
  "surface": "repo_hf",
86
  "shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
87
  "exists": true,
88
- "bytes": 12045,
89
- "sha256": "dece941b1be0f03aea11f0ce3e1a8240977cd9d9629b640fdcf233825cfd5f48"
90
  },
91
  {
92
  "id": "research_roadmap_json",
@@ -96,8 +98,8 @@
96
  "surface": "website_hf",
97
  "shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
98
  "exists": true,
99
- "bytes": 10052,
100
- "sha256": "0a2ce1c96e9546ce32571f26bf4a2c580708e0cb7bb56b51e3bfbc6eef240ff9"
101
  },
102
  {
103
  "id": "foundation_model_plan",
@@ -118,8 +120,8 @@
118
  "surface": "website_hf",
119
  "shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
120
  "exists": true,
121
- "bytes": 13112,
122
- "sha256": "0850a9b74f2c62aba30b297089ad0a4a04424cc16d4a265f71e39538c6d22792"
123
  },
124
  {
125
  "id": "omni_model_extension_contract",
@@ -141,7 +143,7 @@
141
  "shows": "Stores the implemented Qwen3-Omni LoRA contract and planned Cosmos-style world-model and VLA/policy branch contracts.",
142
  "exists": true,
143
  "file_count": 3,
144
- "bytes": 8904
145
  },
146
  {
147
  "id": "omni_backbone_registry_validator",
@@ -206,8 +208,19 @@
206
  "surface": "repo_hf",
207
  "shows": "Runs simple metadata and neural MLP baselines on the same selected 96/16/16 episode split used by the Qwen3-Omni diagnostic pilot.",
208
  "exists": true,
209
- "bytes": 46953,
210
- "sha256": "f6579d123275ec098184ac2021372e33ed48100e42fd62fe37ab7cb7126e5a1c"
 
 
 
 
 
 
 
 
 
 
 
211
  },
212
  {
213
  "id": "additional_development_directions",
@@ -261,8 +274,8 @@
261
  "surface": "website_hf",
262
  "shows": "Gives a short project path with scope status and public surfaces.",
263
  "exists": true,
264
- "bytes": 7802,
265
- "sha256": "40964c04cd769970e212288dc61a2a462c68ef5e4d962a7b9f50fe5a1fc84ce7"
266
  },
267
  {
268
  "id": "artifact_guide",
@@ -272,8 +285,8 @@
272
  "surface": "repo_hf",
273
  "shows": "Gives the human-readable map from project scope to data, tasks, platform mirrors, and scale-up status.",
274
  "exists": true,
275
- "bytes": 17246,
276
- "sha256": "0e3739a51aca083bb9be8195b1759dddf84378f34059eaad950013307dd377ec"
277
  },
278
  {
279
  "id": "official_dataset_card_alignment",
@@ -371,8 +384,8 @@
371
  "surface": "repo_hf",
372
  "shows": "Defines the window unit, chronological split, task metrics, leakage controls, and current limitations.",
373
  "exists": true,
374
- "bytes": 6003,
375
- "sha256": "b0dce21ec27228e49693c1aefed1be120c73f80551d6d3609c48c473d792e709"
376
  },
377
  {
378
  "id": "evaluation_protocol_json",
@@ -382,8 +395,8 @@
382
  "surface": "website_hf",
383
  "shows": "Machine-readable protocol generated from committed task metrics for website and HF mirrors.",
384
  "exists": true,
385
- "bytes": 13788,
386
- "sha256": "ff76b2af0f6d626d16daafffc2fdfda27801a9a15a8cca255a3d9f6be1b2a8a1"
387
  },
388
  {
389
  "id": "evaluation_protocol_builder",
@@ -393,8 +406,8 @@
393
  "surface": "repo_hf",
394
  "shows": "Regenerates the protocol from committed summary metrics and task artifacts.",
395
  "exists": true,
396
- "bytes": 16289,
397
- "sha256": "0e404d53826ea893ed1a9d6f07b3e98cdf16b64b37088480a1b8ddb957997164"
398
  },
399
  {
400
  "id": "research_takeaways",
@@ -404,8 +417,8 @@
404
  "surface": "repo_hf",
405
  "shows": "Summarizes the main research lessons from committed metrics and identifies which experiments need held-out episodes.",
406
  "exists": true,
407
- "bytes": 5101,
408
- "sha256": "29856af67d4c2e4ea8f339e5a9bde362da08bbd17b1a3c681ee9b4aa579559f0"
409
  },
410
  {
411
  "id": "research_takeaways_json",
@@ -415,8 +428,8 @@
415
  "surface": "website_hf",
416
  "shows": "Machine-readable result interpretation for the website, HF cards, and mirror checks.",
417
  "exists": true,
418
- "bytes": 7102,
419
- "sha256": "fe5ca81a9212ac2122787781868ad6955b74734f5eca8dd52def0c186ed8039e"
420
  },
421
  {
422
  "id": "research_takeaways_builder",
@@ -426,8 +439,8 @@
426
  "surface": "repo_hf",
427
  "shows": "Regenerates the research takeaways from committed summary metrics and task result artifacts.",
428
  "exists": true,
429
- "bytes": 13503,
430
- "sha256": "88711ef1e9c5f874d886fbc39c5138d8945d73358b3d0938c5d668bc0b9dba9a"
431
  },
432
  {
433
  "id": "audio_ablation_script",
@@ -470,8 +483,8 @@
470
  "surface": "website_hf",
471
  "shows": "Machine-readable audio ablation summary mirrored into the static website and Hugging Face bundles.",
472
  "exists": true,
473
- "bytes": 9701,
474
- "sha256": "7debd9e67b7df09322d743193bf8b785277a992c9e8c82c08c7a36c4e066e6de"
475
  },
476
  {
477
  "id": "audio_ablation_delta_chart",
@@ -661,8 +674,8 @@
661
  "surface": "repo_hf",
662
  "shows": "Regenerates the task-surface integrity report and fails if task cards expose raw artifact ids or lose the interactive player wiring.",
663
  "exists": true,
664
- "bytes": 15964,
665
- "sha256": "e674a6301692132fdbd6e379e4fa8db677388d762d86d3b1bb1f9f76b3b453de"
666
  },
667
  {
668
  "id": "live_publication_status",
@@ -684,8 +697,8 @@
684
  "surface": "repo",
685
  "shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
686
  "exists": true,
687
- "bytes": 34779,
688
- "sha256": "73c45e1ae1b7509b9f2c54cf42424c9ac0aab8a9e54be58fc488eaa2a696acc7"
689
  },
690
  {
691
  "id": "reproducibility_contract",
@@ -706,8 +719,8 @@
706
  "surface": "website_hf",
707
  "shows": "Machine-readable reproduction steps with expected artifacts and public boundaries.",
708
  "exists": true,
709
- "bytes": 5223,
710
- "sha256": "f218a630d3894f402bcb43d7eb24e4fdcedd7a93caf1816539cdf052c0620727"
711
  },
712
  {
713
  "id": "artifact_index_builder",
@@ -717,8 +730,8 @@
717
  "surface": "repo_hf",
718
  "shows": "Generates the selective artifact catalog from local files.",
719
  "exists": true,
720
- "bytes": 33486,
721
- "sha256": "9c41f5660ca8380deb0e0f466f21fb4fc73c85cb67c7a058de17173a4d075179"
722
  },
723
  {
724
  "id": "publication_audit",
@@ -741,7 +754,7 @@
741
  "volatile": true,
742
  "shows": "Separates setup paths from completed held-out-episode results.",
743
  "exists": true,
744
- "bytes": 20823,
745
  "hash_policy": "existence_and_size_only"
746
  },
747
  {
@@ -753,7 +766,7 @@
753
  "volatile": true,
754
  "shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
755
  "exists": true,
756
- "bytes": 131036,
757
  "hash_policy": "existence_and_size_only"
758
  },
759
  {
@@ -765,7 +778,7 @@
765
  "volatile": true,
766
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
767
  "exists": true,
768
- "bytes": 15259,
769
  "hash_policy": "existence_and_size_only"
770
  },
771
  {
@@ -776,8 +789,8 @@
776
  "surface": "website_hf",
777
  "shows": "Lists public URLs, upstream sources, and machine-readable project metadata.",
778
  "exists": true,
779
- "bytes": 4927,
780
- "sha256": "032d9aa43c467bfa5004e18e1d7881ba6901371a5d8ec23cacf1a1058de50b2a"
781
  },
782
  {
783
  "id": "task_summary",
@@ -787,8 +800,8 @@
787
  "surface": "repo_hf",
788
  "shows": "Stores the task definitions, splits, feature dimension, and minimal/neural metrics.",
789
  "exists": true,
790
- "bytes": 19653,
791
- "sha256": "4c4db850c37268a8dc4d2e86c21f99c8d92c4cee106b27f7b8db0347631947cd"
792
  },
793
  {
794
  "id": "website_metrics_bundle",
@@ -798,8 +811,8 @@
798
  "surface": "website_hf",
799
  "shows": "Mirrors task metrics for the static dashboard.",
800
  "exists": true,
801
- "bytes": 26028,
802
- "sha256": "5259cf5373cb07ce6fad2bed69c35bfa77550515949588326f792d5d6043c082"
803
  },
804
  {
805
  "id": "feature_manifest",
@@ -843,7 +856,7 @@
843
  "shows": "Stores matching PyTorch MLP results for the 12 task contracts.",
844
  "exists": true,
845
  "file_count": 60,
846
- "bytes": 90608884
847
  },
848
  {
849
  "id": "research_direction_taxonomy",
@@ -853,8 +866,8 @@
853
  "surface": "repo_hf",
854
  "shows": "Maps the 12 tasks to the four Ropedia research directions as direct/proxy/diagnostic.",
855
  "exists": true,
856
- "bytes": 14414,
857
- "sha256": "7215681c55a6739da7f16d833f62fcb8f6d58069840963182e98427eaf0cf654"
858
  },
859
  {
860
  "id": "research_direction_extensions",
@@ -864,8 +877,8 @@
864
  "surface": "repo_hf",
865
  "shows": "Stores one coded extension probe per research direction with minimal and neural metrics.",
866
  "exists": true,
867
- "bytes": 11903,
868
- "sha256": "010e254b61de6cc199e8e710fb10d8304c8156835f5cbafb79202996a74c0c77"
869
  },
870
  {
871
  "id": "task_walkthroughs",
@@ -886,8 +899,8 @@
886
  "surface": "website_hf",
887
  "shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
888
  "exists": true,
889
- "bytes": 2612510,
890
- "sha256": "213d81f49d27e3f2560c79e29a017c017cbe38d8d605815bf3bc87834a1424ae"
891
  },
892
  {
893
  "id": "modality_atlas",
@@ -930,8 +943,8 @@
930
  "surface": "website_hf",
931
  "shows": "Shows the shared feature pipeline and minimal/neural head families.",
932
  "exists": true,
933
- "bytes": 761507,
934
- "sha256": "076c2e463ddce473e9138ac6f3615152d59031d6be2aa5c3d9ae1ace3d3f6c83"
935
  },
936
  {
937
  "id": "qwen_data_access_status",
@@ -944,6 +957,17 @@
944
  "bytes": 3499,
945
  "sha256": "c2999f0ea75765c8da3b94aa54d8a9628edd687a0fe38c09d2582f578f2b1ba7"
946
  },
 
 
 
 
 
 
 
 
 
 
 
947
  {
948
  "id": "multi_episode_access_status",
949
  "title": "Multi-episode access status",
@@ -961,7 +985,7 @@
961
  "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/ERROR_ANALYSIS.md",
962
  "kind": "scaleup_status",
963
  "surface": "repo_hf",
964
- "shows": "Summarizes validation-aware Qwen3-Omni held-out failures by episode, action family, train-seen status, required-modality state, and object category.",
965
  "exists": true,
966
  "bytes": 3331,
967
  "sha256": "063fcc2ebd7b57ab5b281fd5e8edc629da4e1f4e5a708483ba27375d02af9467"
@@ -985,8 +1009,8 @@
985
  "surface": "repo_hf",
986
  "shows": "Summarizes same-split simple and neural metadata baselines for the 12 task ids, with unsupported markers for tasks that need missing raw 128 feature blocks.",
987
  "exists": true,
988
- "bytes": 1861,
989
- "sha256": "6e233609117917c9d14dcd815457cb2884f2000bef6cde24b7628d6060737b2b"
990
  },
991
  {
992
  "id": "multi_episode_128_baseline_summary",
@@ -996,8 +1020,52 @@
996
  "surface": "repo_hf",
997
  "shows": "Machine-readable 96/16/16 split counts, run configuration, per-task simple metrics, neural metrics, and raw-feature unsupported statuses.",
998
  "exists": true,
999
- "bytes": 42129,
1000
- "sha256": "32592b0d976a4bf610a6e93412114d792989344570f30c4e89702e310c422f1e"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1001
  },
1002
  {
1003
  "id": "citation",
@@ -1020,6 +1088,259 @@
1020
  "exists": true,
1021
  "bytes": 1745,
1022
  "sha256": "09cf3f632d1248b6aa4457fdd510f878dcbd1e2e51bafba0ddc7cd4f05e23d07"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1023
  }
1024
  ]
1025
  }
 
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
+ "generated_at_utc": "2026-06-06T23:27:35+00:00",
4
  "status": "pass",
5
+ "artifact_count": 118,
6
  "missing": [],
7
  "by_kind": {
8
  "project_path": 14,
9
  "scaleup_contract": 7,
10
+ "scaleup_status": 16,
11
+ "publication_workflow": 5,
12
  "project_scope": 1,
13
  "source_alignment": 5,
 
14
  "evaluation_protocol": 3,
15
  "result_interpretation": 5,
16
+ "metrics_source": 14,
17
  "website_data": 3,
18
  "visual_evidence": 7,
19
  "quality_gate": 12,
 
30
  "generated_figure": 3,
31
  "generated_figure_assets": 1,
32
  "citation": 1,
33
+ "license": 1,
34
+ "verified_public_package": 4,
35
+ "publication_audit": 3
36
  },
37
  "artifacts": [
38
  {
 
43
  "surface": "repo_hf",
44
  "shows": "Gives first-pass readers a concise project shape before the detailed artifact trail.",
45
  "exists": true,
46
+ "bytes": 3837,
47
+ "sha256": "fbaa540aadbe2cf9b6581c5b43cac8cee3056f98cfc7386d322d6f38e70e42a4"
48
  },
49
  {
50
  "id": "project_brief_json",
 
54
  "surface": "website_hf",
55
  "shows": "Machine-readable first-reader project brief for the website and Hugging Face mirrors.",
56
  "exists": true,
57
+ "bytes": 3811,
58
+ "sha256": "ebf3d73a94c31ec8ba67e2aed8cfb04edfad07ad75694eb5373b2fe5a5da9dd9"
59
  },
60
  {
61
  "id": "project_status",
 
65
  "surface": "repo_hf",
66
  "shows": "Gives a compact current-state table for first-pass readers.",
67
  "exists": true,
68
+ "bytes": 9845,
69
+ "sha256": "e77d3facc533bffe35586e4de6500400352c07b4ca0df5ffc523855f38faa26e"
70
  },
71
  {
72
  "id": "project_status_json",
 
76
  "surface": "website_hf",
77
  "shows": "Machine-readable copy of the current project status for website and HF mirrors.",
78
  "exists": true,
79
+ "bytes": 15049,
80
+ "sha256": "23873ed59f3a38f46e45b15a5965afbb1365d49eb359bd5089a4ba6bda990d3c"
81
  },
82
  {
83
  "id": "research_roadmap",
 
87
  "surface": "repo_hf",
88
  "shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
89
  "exists": true,
90
+ "bytes": 12194,
91
+ "sha256": "8773f240e362198b3a669d1ac848d6f1629df3a33e41bd76fba157cbf566479c"
92
  },
93
  {
94
  "id": "research_roadmap_json",
 
98
  "surface": "website_hf",
99
  "shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
100
  "exists": true,
101
+ "bytes": 10133,
102
+ "sha256": "45fd3a1bde93654ccfe14f9271928a67b36eb3f166826bfbdbb9c1092ad33bcf"
103
  },
104
  {
105
  "id": "foundation_model_plan",
 
120
  "surface": "website_hf",
121
  "shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
122
  "exists": true,
123
+ "bytes": 13193,
124
+ "sha256": "63529cbaf1d5c549f595b3ed49f49feda03edf96952b5cb321117fee340849c9"
125
  },
126
  {
127
  "id": "omni_model_extension_contract",
 
143
  "shows": "Stores the implemented Qwen3-Omni LoRA contract and planned Cosmos-style world-model and VLA/policy branch contracts.",
144
  "exists": true,
145
  "file_count": 3,
146
+ "bytes": 9203
147
  },
148
  {
149
  "id": "omni_backbone_registry_validator",
 
208
  "surface": "repo_hf",
209
  "shows": "Runs simple metadata and neural MLP baselines on the same selected 96/16/16 episode split used by the Qwen3-Omni diagnostic pilot.",
210
  "exists": true,
211
+ "bytes": 48164,
212
+ "sha256": "fbefe3f31e2d19566ed1fc356a25c564ecb4f0645de4d595f5926e1426c058d5"
213
+ },
214
+ {
215
+ "id": "qwen3_lora_hf_package_builder",
216
+ "title": "Qwen3 LoRA HF package builder",
217
+ "path": "scripts/omni/prepare_qwen3_lora_hf_package.py",
218
+ "kind": "publication_workflow",
219
+ "surface": "repo_hf",
220
+ "shows": "Builds the upload-ready Hugging Face adapter folder from a verified Qwen3 LoRA result summary and adapter directory.",
221
+ "exists": true,
222
+ "bytes": 9843,
223
+ "sha256": "636132a7d299db4d874ec797e34acd7e37eea69994c2d39afaafaec6587169a0"
224
  },
225
  {
226
  "id": "additional_development_directions",
 
274
  "surface": "website_hf",
275
  "shows": "Gives a short project path with scope status and public surfaces.",
276
  "exists": true,
277
+ "bytes": 7943,
278
+ "sha256": "ffd5da5fd2c2dc82fa1beb74335a51a33317923b3e7ee4864e2b5031082b0a42"
279
  },
280
  {
281
  "id": "artifact_guide",
 
285
  "surface": "repo_hf",
286
  "shows": "Gives the human-readable map from project scope to data, tasks, platform mirrors, and scale-up status.",
287
  "exists": true,
288
+ "bytes": 17508,
289
+ "sha256": "fbbd9f460610464efb27c371a17cf23c3fa409d853f8148368f48707192427d7"
290
  },
291
  {
292
  "id": "official_dataset_card_alignment",
 
384
  "surface": "repo_hf",
385
  "shows": "Defines the window unit, chronological split, task metrics, leakage controls, and current limitations.",
386
  "exists": true,
387
+ "bytes": 6434,
388
+ "sha256": "4817266bdfdf852ad97b3d37614141c56794d955d82110a819daa1d76755a675"
389
  },
390
  {
391
  "id": "evaluation_protocol_json",
 
395
  "surface": "website_hf",
396
  "shows": "Machine-readable protocol generated from committed task metrics for website and HF mirrors.",
397
  "exists": true,
398
+ "bytes": 14511,
399
+ "sha256": "ea7caff963fcf048f803a852e5cdae8d3975ba4a36d805c5e42211b1bf2744ef"
400
  },
401
  {
402
  "id": "evaluation_protocol_builder",
 
406
  "surface": "repo_hf",
407
  "shows": "Regenerates the protocol from committed summary metrics and task artifacts.",
408
  "exists": true,
409
+ "bytes": 16584,
410
+ "sha256": "e8cd8df471985688fa71e2b1be801e346e50911465ef886625a5d863bf9158f1"
411
  },
412
  {
413
  "id": "research_takeaways",
 
417
  "surface": "repo_hf",
418
  "shows": "Summarizes the main research lessons from committed metrics and identifies which experiments need held-out episodes.",
419
  "exists": true,
420
+ "bytes": 5149,
421
+ "sha256": "a2ab81a52a825b4f1dae59023cfe905a63128384f892dcc8e91c4c4351500aef"
422
  },
423
  {
424
  "id": "research_takeaways_json",
 
428
  "surface": "website_hf",
429
  "shows": "Machine-readable result interpretation for the website, HF cards, and mirror checks.",
430
  "exists": true,
431
+ "bytes": 7139,
432
+ "sha256": "eb87b65ef2f6ef910b4cda29c33f3c75014a5cce8ebf8299f71eb09c856a2481"
433
  },
434
  {
435
  "id": "research_takeaways_builder",
 
439
  "surface": "repo_hf",
440
  "shows": "Regenerates the research takeaways from committed summary metrics and task result artifacts.",
441
  "exists": true,
442
+ "bytes": 13473,
443
+ "sha256": "40ab06b9adaf2c2a9a8d55e07b361198f4cb3a88285596625cc8133e5135a4d2"
444
  },
445
  {
446
  "id": "audio_ablation_script",
 
483
  "surface": "website_hf",
484
  "shows": "Machine-readable audio ablation summary mirrored into the static website and Hugging Face bundles.",
485
  "exists": true,
486
+ "bytes": 10370,
487
+ "sha256": "d6de8db171993f8cc39153075a7e17cda79762659fad7d1944556f8bf10afd0d"
488
  },
489
  {
490
  "id": "audio_ablation_delta_chart",
 
674
  "surface": "repo_hf",
675
  "shows": "Regenerates the task-surface integrity report and fails if task cards expose raw artifact ids or lose the interactive player wiring.",
676
  "exists": true,
677
+ "bytes": 15366,
678
+ "sha256": "8d4573b7a4b75e433da577067369e5221515184536a281c4d2e30c3422ddc4ad"
679
  },
680
  {
681
  "id": "live_publication_status",
 
697
  "surface": "repo",
698
  "shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
699
  "exists": true,
700
+ "bytes": 36847,
701
+ "sha256": "07fd059a9ff8c13b073f349c79f1f7d3abe839559cf0809e291f6ea9bbad21e8"
702
  },
703
  {
704
  "id": "reproducibility_contract",
 
719
  "surface": "website_hf",
720
  "shows": "Machine-readable reproduction steps with expected artifacts and public boundaries.",
721
  "exists": true,
722
+ "bytes": 5280,
723
+ "sha256": "bfb34f14206943da909aee36465e8211c592615fca15a284e2fa8ef9ea1d438b"
724
  },
725
  {
726
  "id": "artifact_index_builder",
 
730
  "surface": "repo_hf",
731
  "shows": "Generates the selective artifact catalog from local files.",
732
  "exists": true,
733
+ "bytes": 38561,
734
+ "sha256": "571a06684909bd4d544d455d5cdee2fb69439b1e16de95609dd51fecc7b58b29"
735
  },
736
  {
737
  "id": "publication_audit",
 
754
  "volatile": true,
755
  "shows": "Separates setup paths from completed held-out-episode results.",
756
  "exists": true,
757
+ "bytes": 21234,
758
  "hash_policy": "existence_and_size_only"
759
  },
760
  {
 
766
  "volatile": true,
767
  "shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
768
  "exists": true,
769
+ "bytes": 235815,
770
  "hash_policy": "existence_and_size_only"
771
  },
772
  {
 
778
  "volatile": true,
779
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
780
  "exists": true,
781
+ "bytes": 15375,
782
  "hash_policy": "existence_and_size_only"
783
  },
784
  {
 
789
  "surface": "website_hf",
790
  "shows": "Lists public URLs, upstream sources, and machine-readable project metadata.",
791
  "exists": true,
792
+ "bytes": 5193,
793
+ "sha256": "1ae4c41fdcca6638e570e081d07f700d56ca490fecc25d681d5066b1ca8319ee"
794
  },
795
  {
796
  "id": "task_summary",
 
800
  "surface": "repo_hf",
801
  "shows": "Stores the task definitions, splits, feature dimension, and minimal/neural metrics.",
802
  "exists": true,
803
+ "bytes": 21680,
804
+ "sha256": "5860c901536495b7a8cb592ca0728a546566a70cef6d2b7d1a986e5140fbfe08"
805
  },
806
  {
807
  "id": "website_metrics_bundle",
 
811
  "surface": "website_hf",
812
  "shows": "Mirrors task metrics for the static dashboard.",
813
  "exists": true,
814
+ "bytes": 27490,
815
+ "sha256": "159ed565571aa4215ef30a5ea8fce057481cf0f77ad50aec3ae15de6a38e12ba"
816
  },
817
  {
818
  "id": "feature_manifest",
 
856
  "shows": "Stores matching PyTorch MLP results for the 12 task contracts.",
857
  "exists": true,
858
  "file_count": 60,
859
+ "bytes": 90609517
860
  },
861
  {
862
  "id": "research_direction_taxonomy",
 
866
  "surface": "repo_hf",
867
  "shows": "Maps the 12 tasks to the four Ropedia research directions as direct/proxy/diagnostic.",
868
  "exists": true,
869
+ "bytes": 19204,
870
+ "sha256": "59bece1a151d8475fde50396fd2e70ed4abcfec33f10e400ef165148fd6e7dde"
871
  },
872
  {
873
  "id": "research_direction_extensions",
 
877
  "surface": "repo_hf",
878
  "shows": "Stores one coded extension probe per research direction with minimal and neural metrics.",
879
  "exists": true,
880
+ "bytes": 12592,
881
+ "sha256": "6fa965d5e8249f0972e93558dcc1e7de15d53bdcfd253354255637c421b68dc4"
882
  },
883
  {
884
  "id": "task_walkthroughs",
 
899
  "surface": "website_hf",
900
  "shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
901
  "exists": true,
902
+ "bytes": 1588641,
903
+ "sha256": "1275e2adaef920ecde7c29dc62c8d79d4f13475a0c09bc3baa693f47cdec2e1f"
904
  },
905
  {
906
  "id": "modality_atlas",
 
943
  "surface": "website_hf",
944
  "shows": "Shows the shared feature pipeline and minimal/neural head families.",
945
  "exists": true,
946
+ "bytes": 774391,
947
+ "sha256": "f08b03bc21e194efe382347d74cf89cd6ac65dede51889971dbfc2fb9d1de3c2"
948
  },
949
  {
950
  "id": "qwen_data_access_status",
 
957
  "bytes": 3499,
958
  "sha256": "c2999f0ea75765c8da3b94aa54d8a9628edd687a0fe38c09d2582f578f2b1ba7"
959
  },
960
+ {
961
+ "id": "qwen3_lora_hf_upload_note",
962
+ "title": "Qwen3 LoRA HF upload note",
963
+ "path": "results/omni_finetune/HF_UPLOAD.md",
964
+ "kind": "publication_workflow",
965
+ "surface": "repo_hf",
966
+ "shows": "Documents the final 128-episode LoRA adapter upload path, target model repo, package builder, and forbidden files.",
967
+ "exists": true,
968
+ "bytes": 1875,
969
+ "sha256": "7a822452347e8c4241a5160d67a9782f17f3d3cb9bd2960b00bac0ca1bf2392f"
970
+ },
971
  {
972
  "id": "multi_episode_access_status",
973
  "title": "Multi-episode access status",
 
985
  "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/ERROR_ANALYSIS.md",
986
  "kind": "scaleup_status",
987
  "surface": "repo_hf",
988
+ "shows": "Summarizes the earlier validation-aware Qwen3-Omni held-out failures by episode, action family, train-seen status, required-modality state, and object category.",
989
  "exists": true,
990
  "bytes": 3331,
991
  "sha256": "063fcc2ebd7b57ab5b281fd5e8edc629da4e1f4e5a708483ba27375d02af9467"
 
1009
  "surface": "repo_hf",
1010
  "shows": "Summarizes same-split simple and neural metadata baselines for the 12 task ids, with unsupported markers for tasks that need missing raw 128 feature blocks.",
1011
  "exists": true,
1012
+ "bytes": 2238,
1013
+ "sha256": "c70440aa502ec569a840159ab7e05b8e7d4ed70e0091ad9a4b2fb3fb0d3803c1"
1014
  },
1015
  {
1016
  "id": "multi_episode_128_baseline_summary",
 
1020
  "surface": "repo_hf",
1021
  "shows": "Machine-readable 96/16/16 split counts, run configuration, per-task simple metrics, neural metrics, and raw-feature unsupported statuses.",
1022
  "exists": true,
1023
+ "bytes": 44519,
1024
+ "sha256": "107a4bedf53a22a1395f5e08b7f1cc9bb1becb8c0e95bc03178029abb3a83aef"
1025
+ },
1026
+ {
1027
+ "id": "omni_model_comparison_report",
1028
+ "title": "Omni model comparison report",
1029
+ "path": "results/omni_finetune/OMNI_MODEL_COMPARISON.md",
1030
+ "kind": "scaleup_status",
1031
+ "surface": "repo_hf",
1032
+ "shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
1033
+ "exists": true,
1034
+ "bytes": 3110,
1035
+ "sha256": "11c22b7ac1e16fd8db86eb7c6fc33cf28fee97a38098f1606a35daee113dc72b"
1036
+ },
1037
+ {
1038
+ "id": "omni_model_comparison_json",
1039
+ "title": "Omni model comparison JSON",
1040
+ "path": "docs/data/omni_model_comparison.json",
1041
+ "kind": "metrics_source",
1042
+ "surface": "repo_hf",
1043
+ "shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
1044
+ "exists": true,
1045
+ "bytes": 21433,
1046
+ "sha256": "b539a489a8974ecec90dda312471be54f466b81bef9d1ebc99d08155f8c21c94"
1047
+ },
1048
+ {
1049
+ "id": "cosmos3_nano_verified_summary",
1050
+ "title": "Cosmos3-Nano verified package summary",
1051
+ "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json",
1052
+ "kind": "metrics_source",
1053
+ "surface": "repo_hf",
1054
+ "shows": "Machine-readable verified public summary for the Cosmos3-Nano future-window compatibility package.",
1055
+ "exists": true,
1056
+ "bytes": 6151,
1057
+ "sha256": "386b374ef1837fe0087f9eeb21248e6c823334270fe4b1a52dadb3a11c09ef88"
1058
+ },
1059
+ {
1060
+ "id": "cosmos3_nano_run_report",
1061
+ "title": "Cosmos3-Nano future-window run report",
1062
+ "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/RUN_REPORT.md",
1063
+ "kind": "scaleup_status",
1064
+ "surface": "repo_hf",
1065
+ "shows": "Reader-facing held-out metrics and interpretation for the Cosmos3-Nano future-window compatibility branch.",
1066
+ "exists": true,
1067
+ "bytes": 698,
1068
+ "sha256": "3f56dc6ed58ea079a98a8f7e7ccd294238623a5f06bb9a01f1448665cf3eeb60"
1069
  },
1070
  {
1071
  "id": "citation",
 
1088
  "exists": true,
1089
  "bytes": 1745,
1090
  "sha256": "09cf3f632d1248b6aa4457fdd510f878dcbd1e2e51bafba0ddc7cd4f05e23d07"
1091
+ },
1092
+ {
1093
+ "id": "verified_public_package_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
1094
+ "title": "Verified public package: Cosmos3-Nano Future-Window World Model",
1095
+ "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
1096
+ "kind": "verified_public_package",
1097
+ "surface": "repo_hf",
1098
+ "shows": "Public-safe verified package for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full (cosmos_world_model, status=verified).",
1099
+ "exists": true,
1100
+ "file_count": 14,
1101
+ "bytes": 745194
1102
+ },
1103
+ {
1104
+ "id": "verified_public_summary_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
1105
+ "title": "Verified summary: Cosmos3-Nano Future-Window World Model",
1106
+ "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json",
1107
+ "kind": "metrics_source",
1108
+ "surface": "repo_hf",
1109
+ "shows": "Machine-readable verified summary for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full.",
1110
+ "exists": true,
1111
+ "bytes": 6151,
1112
+ "sha256": "386b374ef1837fe0087f9eeb21248e6c823334270fe4b1a52dadb3a11c09ef88"
1113
+ },
1114
+ {
1115
+ "id": "verified_public_public_result_summary_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
1116
+ "title": "Verified public result summary: Cosmos3-Nano Future-Window World Model",
1117
+ "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/PUBLIC_RESULT_SUMMARY.md",
1118
+ "kind": "scaleup_status",
1119
+ "surface": "repo_hf",
1120
+ "shows": "Public result summary for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full.",
1121
+ "exists": true,
1122
+ "bytes": 984,
1123
+ "sha256": "e7a98bb4bbea34e4dfed25bb1682284514996b722661b13cc59eb70b4163d682"
1124
+ },
1125
+ {
1126
+ "id": "verified_public_run_report_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
1127
+ "title": "Verified run report: Cosmos3-Nano Future-Window World Model",
1128
+ "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/RUN_REPORT.md",
1129
+ "kind": "scaleup_status",
1130
+ "surface": "repo_hf",
1131
+ "shows": "Run report for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full.",
1132
+ "exists": true,
1133
+ "bytes": 698,
1134
+ "sha256": "3f56dc6ed58ea079a98a8f7e7ccd294238623a5f06bb9a01f1448665cf3eeb60"
1135
+ },
1136
+ {
1137
+ "id": "verified_public_metrics_JSON_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
1138
+ "title": "Verified metrics JSON: Cosmos3-Nano Future-Window World Model",
1139
+ "path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
1140
+ "kind": "metrics_source",
1141
+ "surface": "repo_hf",
1142
+ "shows": "Metrics json for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full.",
1143
+ "exists": true,
1144
+ "bytes": 1099,
1145
+ "sha256": "f11ccb167908d4f5bfb49c0be0b4bc6c9254901462aa52ae98a2a98e8af16558"
1146
+ },
1147
+ {
1148
+ "id": "verified_public_package_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
1149
+ "title": "Verified public package: Qwen3-Omni LoRA",
1150
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
1151
+ "kind": "verified_public_package",
1152
+ "surface": "repo_hf",
1153
+ "shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval (qwen3_omni_lora, status=verified).",
1154
+ "exists": true,
1155
+ "file_count": 21,
1156
+ "bytes": 5561131
1157
+ },
1158
+ {
1159
+ "id": "verified_public_summary_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
1160
+ "title": "Verified summary: Qwen3-Omni LoRA",
1161
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json",
1162
+ "kind": "metrics_source",
1163
+ "surface": "repo_hf",
1164
+ "shows": "Machine-readable verified summary for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
1165
+ "exists": true,
1166
+ "bytes": 5933,
1167
+ "sha256": "b5f8ef88cd9d8515f03bf092107a9e788695e4c4853feae0db98d384f0c39c9d"
1168
+ },
1169
+ {
1170
+ "id": "verified_public_public_result_summary_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
1171
+ "title": "Verified public result summary: Qwen3-Omni LoRA",
1172
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/PUBLIC_RESULT_SUMMARY.md",
1173
+ "kind": "scaleup_status",
1174
+ "surface": "repo_hf",
1175
+ "shows": "Public result summary for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
1176
+ "exists": true,
1177
+ "bytes": 1779,
1178
+ "sha256": "11305d535a6cb60530560f3862b8374ec083adfc7cf714b49fe06b079e3c049d"
1179
+ },
1180
+ {
1181
+ "id": "verified_public_run_report_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
1182
+ "title": "Verified run report: Qwen3-Omni LoRA",
1183
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/RUN_REPORT.md",
1184
+ "kind": "scaleup_status",
1185
+ "surface": "repo_hf",
1186
+ "shows": "Run report for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
1187
+ "exists": true,
1188
+ "bytes": 603,
1189
+ "sha256": "6792b92c8d8661d8f4f3670e7961a14fd0c495dbb4279602a6fba1480179ad9b"
1190
+ },
1191
+ {
1192
+ "id": "verified_public_metrics_JSON_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
1193
+ "title": "Verified metrics JSON: Qwen3-Omni LoRA",
1194
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/metrics.json",
1195
+ "kind": "metrics_source",
1196
+ "surface": "repo_hf",
1197
+ "shows": "Metrics json for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
1198
+ "exists": true,
1199
+ "bytes": 75629,
1200
+ "sha256": "055b0932ea439338839256ded2fa5fb3ddb562ced0f149d2ea37460e966c4404"
1201
+ },
1202
+ {
1203
+ "id": "verified_public_package_audit_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
1204
+ "title": "Verified package audit: Qwen3-Omni LoRA",
1205
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/package_audit.json",
1206
+ "kind": "publication_audit",
1207
+ "surface": "repo_hf",
1208
+ "shows": "Package audit for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
1209
+ "exists": true,
1210
+ "bytes": 611,
1211
+ "sha256": "2226cdd2e457b23c89b909e40ca469dd08f3db81c1bb797aaafb6cd19de6deea"
1212
+ },
1213
+ {
1214
+ "id": "verified_public_package_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
1215
+ "title": "Verified public package: Qwen3-Omni LoRA",
1216
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
1217
+ "kind": "verified_public_package",
1218
+ "surface": "repo_hf",
1219
+ "shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full (qwen3_omni_lora, status=verified).",
1220
+ "exists": true,
1221
+ "file_count": 16,
1222
+ "bytes": 5872232
1223
+ },
1224
+ {
1225
+ "id": "verified_public_summary_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
1226
+ "title": "Verified summary: Qwen3-Omni LoRA",
1227
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json",
1228
+ "kind": "metrics_source",
1229
+ "surface": "repo_hf",
1230
+ "shows": "Machine-readable verified summary for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
1231
+ "exists": true,
1232
+ "bytes": 6270,
1233
+ "sha256": "e4dac96e88eb03a36ead205f509c680aa2bb763b4da2256e265311bc17304d7f"
1234
+ },
1235
+ {
1236
+ "id": "verified_public_public_result_summary_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
1237
+ "title": "Verified public result summary: Qwen3-Omni LoRA",
1238
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/PUBLIC_RESULT_SUMMARY.md",
1239
+ "kind": "scaleup_status",
1240
+ "surface": "repo_hf",
1241
+ "shows": "Public result summary for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
1242
+ "exists": true,
1243
+ "bytes": 1061,
1244
+ "sha256": "5e4de510a64b90d0632d72575965208f6b272b4531bf9f4c515bab23876654aa"
1245
+ },
1246
+ {
1247
+ "id": "verified_public_run_report_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
1248
+ "title": "Verified run report: Qwen3-Omni LoRA",
1249
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/eval/RUN_REPORT.md",
1250
+ "kind": "scaleup_status",
1251
+ "surface": "repo_hf",
1252
+ "shows": "Run report for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
1253
+ "exists": true,
1254
+ "bytes": 618,
1255
+ "sha256": "2e572809cb3e97c4c17e5f126a63ec1d470e5da345f8a3b6026a6efd5fb927d9"
1256
+ },
1257
+ {
1258
+ "id": "verified_public_metrics_JSON_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
1259
+ "title": "Verified metrics JSON: Qwen3-Omni LoRA",
1260
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/eval/metrics.json",
1261
+ "kind": "metrics_source",
1262
+ "surface": "repo_hf",
1263
+ "shows": "Metrics json for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
1264
+ "exists": true,
1265
+ "bytes": 108127,
1266
+ "sha256": "4c11c61ee661ee201ae91f50d2dc9c0eabe2a1040a2534fe91f4b5b54c96b27c"
1267
+ },
1268
+ {
1269
+ "id": "verified_public_package_audit_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
1270
+ "title": "Verified package audit: Qwen3-Omni LoRA",
1271
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/package_audit.json",
1272
+ "kind": "publication_audit",
1273
+ "surface": "repo_hf",
1274
+ "shows": "Package audit for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
1275
+ "exists": true,
1276
+ "bytes": 669,
1277
+ "sha256": "3d427e70e44b22b882be49f2963e2afcf5b497f25c445850c9f567cdbc41ed15"
1278
+ },
1279
+ {
1280
+ "id": "verified_public_package_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
1281
+ "title": "Verified public package: Qwen3-Omni LoRA",
1282
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
1283
+ "kind": "verified_public_package",
1284
+ "surface": "repo_hf",
1285
+ "shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full (qwen3_omni_lora, status=verified).",
1286
+ "exists": true,
1287
+ "file_count": 16,
1288
+ "bytes": 4898687
1289
+ },
1290
+ {
1291
+ "id": "verified_public_summary_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
1292
+ "title": "Verified summary: Qwen3-Omni LoRA",
1293
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json",
1294
+ "kind": "metrics_source",
1295
+ "surface": "repo_hf",
1296
+ "shows": "Machine-readable verified summary for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
1297
+ "exists": true,
1298
+ "bytes": 6207,
1299
+ "sha256": "d7dae7bc17d0fd07a3f29fd61d57803b9d96d65da2ebd7f5436683a9aa18bfeb"
1300
+ },
1301
+ {
1302
+ "id": "verified_public_public_result_summary_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
1303
+ "title": "Verified public result summary: Qwen3-Omni LoRA",
1304
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/PUBLIC_RESULT_SUMMARY.md",
1305
+ "kind": "scaleup_status",
1306
+ "surface": "repo_hf",
1307
+ "shows": "Public result summary for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
1308
+ "exists": true,
1309
+ "bytes": 1008,
1310
+ "sha256": "080636ce30a37a259c4eaad0791fe5dd03fd60d61092407470d616391f0079ea"
1311
+ },
1312
+ {
1313
+ "id": "verified_public_run_report_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
1314
+ "title": "Verified run report: Qwen3-Omni LoRA",
1315
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/eval/RUN_REPORT.md",
1316
+ "kind": "scaleup_status",
1317
+ "surface": "repo_hf",
1318
+ "shows": "Run report for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
1319
+ "exists": true,
1320
+ "bytes": 590,
1321
+ "sha256": "4309393cd227803f766a9c7b317f5917e39b09cfb6f2618105c5c6cdb064f1a5"
1322
+ },
1323
+ {
1324
+ "id": "verified_public_metrics_JSON_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
1325
+ "title": "Verified metrics JSON: Qwen3-Omni LoRA",
1326
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/eval/metrics.json",
1327
+ "kind": "metrics_source",
1328
+ "surface": "repo_hf",
1329
+ "shows": "Metrics json for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
1330
+ "exists": true,
1331
+ "bytes": 52485,
1332
+ "sha256": "4174640ef32665853b0b807329855344302018952cfa97639cec66649adcbec7"
1333
+ },
1334
+ {
1335
+ "id": "verified_public_package_audit_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
1336
+ "title": "Verified package audit: Qwen3-Omni LoRA",
1337
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/package_audit.json",
1338
+ "kind": "publication_audit",
1339
+ "surface": "repo_hf",
1340
+ "shows": "Package audit for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
1341
+ "exists": true,
1342
+ "bytes": 623,
1343
+ "sha256": "d7264cfb34e48b5c41c89444ea9cd1314b8f4d0bcc0224debbbe5ea512450197"
1344
  }
1345
  ]
1346
  }
metrics/audio_ablation_summary.json CHANGED
@@ -38,7 +38,8 @@
38
  "raw_replacement_delta_vs_no_audio": -0.007422402159244265,
39
  "raw_replacement_delta_vs_handcrafted": -0.00770504201550171,
40
  "all_plus_raw_logmel": 0.002734107997265892,
41
- "all_plus_raw_delta_vs_handcrafted": -0.006320461683552957
 
42
  },
43
  {
44
  "task": "timeline_subtask",
@@ -53,7 +54,8 @@
53
  "raw_replacement_delta_vs_no_audio": -0.01034742052599772,
54
  "raw_replacement_delta_vs_handcrafted": -0.010430590562065117,
55
  "all_plus_raw_logmel": 0.0017889087656529517,
56
- "all_plus_raw_delta_vs_handcrafted": -0.009467445627956345
 
57
  },
58
  {
59
  "task": "transition_detection",
@@ -68,7 +70,8 @@
68
  "raw_replacement_delta_vs_no_audio": 0.010507780641701658,
69
  "raw_replacement_delta_vs_handcrafted": 0.01707714954338524,
70
  "all_plus_raw_logmel": 0.4816233470132239,
71
- "all_plus_raw_delta_vs_handcrafted": 0.019490425838571634
 
72
  },
73
  {
74
  "task": "next_action",
@@ -83,7 +86,8 @@
83
  "raw_replacement_delta_vs_no_audio": -0.004703498679402295,
84
  "raw_replacement_delta_vs_handcrafted": -0.004576004576004574,
85
  "all_plus_raw_logmel": 0.0058479532163742695,
86
- "all_plus_raw_delta_vs_handcrafted": -0.00473405736563631
 
87
  },
88
  {
89
  "task": "hand_trajectory_forecast",
@@ -98,7 +102,8 @@
98
  "raw_replacement_delta_vs_no_audio": -0.0021152496337890625,
99
  "raw_replacement_delta_vs_handcrafted": 0.16052484512329102,
100
  "all_plus_raw_logmel": 4.1367621421813965,
101
- "all_plus_raw_delta_vs_handcrafted": 0.3296332359313965
 
102
  },
103
  {
104
  "task": "contact_prediction",
@@ -113,7 +118,8 @@
113
  "raw_replacement_delta_vs_no_audio": 0.0,
114
  "raw_replacement_delta_vs_handcrafted": 0.0,
115
  "all_plus_raw_logmel": 1.0,
116
- "all_plus_raw_delta_vs_handcrafted": 0.0
 
117
  },
118
  {
119
  "task": "object_relevance",
@@ -128,7 +134,8 @@
128
  "raw_replacement_delta_vs_no_audio": 0.030784313919472256,
129
  "raw_replacement_delta_vs_handcrafted": 0.020578064024873888,
130
  "all_plus_raw_logmel": 0.18262653898768813,
131
- "all_plus_raw_delta_vs_handcrafted": 0.024487004103967203
 
132
  },
133
  {
134
  "task": "caption_grounding",
@@ -143,7 +150,8 @@
143
  "raw_replacement_delta_vs_no_audio": -0.002380702644586563,
144
  "raw_replacement_delta_vs_handcrafted": -0.007237853482365608,
145
  "all_plus_raw_logmel": 0.02719014883041382,
146
- "all_plus_raw_delta_vs_handcrafted": -0.004895530641078949
 
147
  },
148
  {
149
  "task": "cross_modal_retrieval",
@@ -158,7 +166,8 @@
158
  "raw_replacement_delta_vs_no_audio": -0.061719030141830444,
159
  "raw_replacement_delta_vs_handcrafted": -0.04763227701187134,
160
  "all_plus_raw_logmel": 0.31795138120651245,
161
- "all_plus_raw_delta_vs_handcrafted": -0.05717244744300842
 
162
  },
163
  {
164
  "task": "modality_reconstruction",
@@ -173,7 +182,8 @@
173
  "raw_replacement_delta_vs_no_audio": 1.615983009338379,
174
  "raw_replacement_delta_vs_handcrafted": 0.9635343551635742,
175
  "all_plus_raw_logmel": 8.392388343811035,
176
- "all_plus_raw_delta_vs_handcrafted": 1.401824951171875
 
177
  },
178
  {
179
  "task": "temporal_order",
@@ -188,7 +198,8 @@
188
  "raw_replacement_delta_vs_no_audio": 0.03591857034334939,
189
  "raw_replacement_delta_vs_handcrafted": 0.012930064596222923,
190
  "all_plus_raw_logmel": 0.5330450130569861,
191
- "all_plus_raw_delta_vs_handcrafted": 0.015803633746641288
 
192
  },
193
  {
194
  "task": "misalignment_detection",
@@ -203,7 +214,8 @@
203
  "raw_replacement_delta_vs_no_audio": 0.021203945154488313,
204
  "raw_replacement_delta_vs_handcrafted": 0.02644906505448169,
205
  "all_plus_raw_logmel": 0.4373795761078998,
206
- "all_plus_raw_delta_vs_handcrafted": 0.02003912235410793
 
207
  }
208
  ],
209
  "aggregate": {
 
38
  "raw_replacement_delta_vs_no_audio": -0.007422402159244265,
39
  "raw_replacement_delta_vs_handcrafted": -0.00770504201550171,
40
  "all_plus_raw_logmel": 0.002734107997265892,
41
+ "all_plus_raw_delta_vs_handcrafted": -0.006320461683552957,
42
+ "task_display_name": "Action Recognition"
43
  },
44
  {
45
  "task": "timeline_subtask",
 
54
  "raw_replacement_delta_vs_no_audio": -0.01034742052599772,
55
  "raw_replacement_delta_vs_handcrafted": -0.010430590562065117,
56
  "all_plus_raw_logmel": 0.0017889087656529517,
57
+ "all_plus_raw_delta_vs_handcrafted": -0.009467445627956345,
58
+ "task_display_name": "Procedure Step Recognition"
59
  },
60
  {
61
  "task": "transition_detection",
 
70
  "raw_replacement_delta_vs_no_audio": 0.010507780641701658,
71
  "raw_replacement_delta_vs_handcrafted": 0.01707714954338524,
72
  "all_plus_raw_logmel": 0.4816233470132239,
73
+ "all_plus_raw_delta_vs_handcrafted": 0.019490425838571634,
74
+ "task_display_name": "Action Boundary Detection"
75
  },
76
  {
77
  "task": "next_action",
 
86
  "raw_replacement_delta_vs_no_audio": -0.004703498679402295,
87
  "raw_replacement_delta_vs_handcrafted": -0.004576004576004574,
88
  "all_plus_raw_logmel": 0.0058479532163742695,
89
+ "all_plus_raw_delta_vs_handcrafted": -0.00473405736563631,
90
+ "task_display_name": "Next-Action Prediction"
91
  },
92
  {
93
  "task": "hand_trajectory_forecast",
 
102
  "raw_replacement_delta_vs_no_audio": -0.0021152496337890625,
103
  "raw_replacement_delta_vs_handcrafted": 0.16052484512329102,
104
  "all_plus_raw_logmel": 4.1367621421813965,
105
+ "all_plus_raw_delta_vs_handcrafted": 0.3296332359313965,
106
+ "task_display_name": "Hand Trajectory Forecasting"
107
  },
108
  {
109
  "task": "contact_prediction",
 
118
  "raw_replacement_delta_vs_no_audio": 0.0,
119
  "raw_replacement_delta_vs_handcrafted": 0.0,
120
  "all_plus_raw_logmel": 1.0,
121
+ "all_plus_raw_delta_vs_handcrafted": 0.0,
122
+ "task_display_name": "Contact State Prediction"
123
  },
124
  {
125
  "task": "object_relevance",
 
134
  "raw_replacement_delta_vs_no_audio": 0.030784313919472256,
135
  "raw_replacement_delta_vs_handcrafted": 0.020578064024873888,
136
  "all_plus_raw_logmel": 0.18262653898768813,
137
+ "all_plus_raw_delta_vs_handcrafted": 0.024487004103967203,
138
+ "task_display_name": "Object Relevance Prediction"
139
  },
140
  {
141
  "task": "caption_grounding",
 
150
  "raw_replacement_delta_vs_no_audio": -0.002380702644586563,
151
  "raw_replacement_delta_vs_handcrafted": -0.007237853482365608,
152
  "all_plus_raw_logmel": 0.02719014883041382,
153
+ "all_plus_raw_delta_vs_handcrafted": -0.004895530641078949,
154
+ "task_display_name": "Language Grounding"
155
  },
156
  {
157
  "task": "cross_modal_retrieval",
 
166
  "raw_replacement_delta_vs_no_audio": -0.061719030141830444,
167
  "raw_replacement_delta_vs_handcrafted": -0.04763227701187134,
168
  "all_plus_raw_logmel": 0.31795138120651245,
169
+ "all_plus_raw_delta_vs_handcrafted": -0.05717244744300842,
170
+ "task_display_name": "Cross-Modal Retrieval"
171
  },
172
  {
173
  "task": "modality_reconstruction",
 
182
  "raw_replacement_delta_vs_no_audio": 1.615983009338379,
183
  "raw_replacement_delta_vs_handcrafted": 0.9635343551635742,
184
  "all_plus_raw_logmel": 8.392388343811035,
185
+ "all_plus_raw_delta_vs_handcrafted": 1.401824951171875,
186
+ "task_display_name": "Cross-Modal Reconstruction"
187
  },
188
  {
189
  "task": "temporal_order",
 
198
  "raw_replacement_delta_vs_no_audio": 0.03591857034334939,
199
  "raw_replacement_delta_vs_handcrafted": 0.012930064596222923,
200
  "all_plus_raw_logmel": 0.5330450130569861,
201
+ "all_plus_raw_delta_vs_handcrafted": 0.015803633746641288,
202
+ "task_display_name": "Temporal Order Verification"
203
  },
204
  {
205
  "task": "misalignment_detection",
 
214
  "raw_replacement_delta_vs_no_audio": 0.021203945154488313,
215
  "raw_replacement_delta_vs_handcrafted": 0.02644906505448169,
216
  "all_plus_raw_logmel": 0.4373795761078998,
217
+ "all_plus_raw_delta_vs_handcrafted": 0.02003912235410793,
218
+ "task_display_name": "Multimodal Synchronization Detection"
219
  }
220
  ],
221
  "aggregate": {
metrics/evaluation_protocol.json CHANGED
@@ -2,7 +2,7 @@
2
  "title": "Ropedia Xperience-10M Task Suite Evaluation Protocol",
3
  "status": "pass",
4
  "version": "2026-06-01",
5
- "generated_at_utc": "2026-06-06T13:49:32+00:00",
6
  "source_files": [
7
  "docs/data/summary_metrics.json",
8
  "results/episode_task_suite/summary_report.json",
@@ -69,6 +69,7 @@
69
  "task_protocols": [
70
  {
71
  "task": "timeline_action",
 
72
  "family": "supervised classification",
73
  "unit": "single window",
74
  "input": "current 20-frame all-feature window",
@@ -88,6 +89,7 @@
88
  },
89
  {
90
  "task": "timeline_subtask",
 
91
  "family": "supervised classification",
92
  "unit": "single window",
93
  "input": "current 20-frame all-feature window",
@@ -107,6 +109,7 @@
107
  },
108
  {
109
  "task": "transition_detection",
 
110
  "family": "temporal diagnostic",
111
  "unit": "single window",
112
  "input": "current 20-frame all-feature window",
@@ -126,6 +129,7 @@
126
  },
127
  {
128
  "task": "next_action",
 
129
  "family": "short-horizon prediction",
130
  "unit": "single window",
131
  "input": "current 20-frame all-feature window at time t",
@@ -145,6 +149,7 @@
145
  },
146
  {
147
  "task": "hand_trajectory_forecast",
 
148
  "family": "trajectory regression",
149
  "unit": "single window",
150
  "input": "current all-feature window",
@@ -164,6 +169,7 @@
164
  },
165
  {
166
  "task": "contact_prediction",
 
167
  "family": "binary classification",
168
  "unit": "single window",
169
  "input": "non-contact and non-caption feature blocks",
@@ -183,6 +189,7 @@
183
  },
184
  {
185
  "task": "object_relevance",
 
186
  "family": "multi-label classification",
187
  "unit": "single window",
188
  "input": "non-caption feature blocks",
@@ -202,6 +209,7 @@
202
  },
203
  {
204
  "task": "caption_grounding",
 
205
  "family": "retrieval",
206
  "unit": "caption query",
207
  "input": "caption object/interaction query plus candidate sensor windows",
@@ -221,6 +229,7 @@
221
  },
222
  {
223
  "task": "cross_modal_retrieval",
 
224
  "family": "retrieval",
225
  "unit": "sensor query",
226
  "input": "motion, IMU, and camera query features",
@@ -240,6 +249,7 @@
240
  },
241
  {
242
  "task": "modality_reconstruction",
 
243
  "family": "cross-modal regression",
244
  "unit": "single window",
245
  "input": "motion, IMU, and camera features",
@@ -258,6 +268,7 @@
258
  },
259
  {
260
  "task": "temporal_order",
 
261
  "family": "pairwise diagnostic",
262
  "unit": "adjacent window pair",
263
  "input": "two adjacent windows",
@@ -277,6 +288,7 @@
277
  },
278
  {
279
  "task": "misalignment_detection",
 
280
  "family": "pairwise diagnostic",
281
  "unit": "paired modality window",
282
  "input": "motion side plus visual/depth side",
@@ -305,7 +317,7 @@
305
  "current_limitations": [
306
  "Cross-episode generalization for Qwen3-Omni has a first verified diagnostic pilot, but strong model quality is not yet shown.",
307
  "Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.",
308
- "The verified validation-aware Qwen3-Omni diagnostic pilot has weak held-out metrics and needs structured-output and task-quality improvements before larger model-quality claims.",
309
  "Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations."
310
  ],
311
  "scale_up_gate": {
@@ -316,7 +328,7 @@
316
  "manifest, training metadata, progress logs, metrics, predictions, and run report",
317
  "held-out evaluation on test episodes rather than train windows"
318
  ],
319
- "current_status": "verified diagnostic pilot; quality target not met",
320
  "evidence": [
321
  "docs/data/omni_finetune_verified_result.json",
322
  "results/omni_finetune/verified_public/"
 
2
  "title": "Ropedia Xperience-10M Task Suite Evaluation Protocol",
3
  "status": "pass",
4
  "version": "2026-06-01",
5
+ "generated_at_utc": "2026-06-06T23:26:13+00:00",
6
  "source_files": [
7
  "docs/data/summary_metrics.json",
8
  "results/episode_task_suite/summary_report.json",
 
69
  "task_protocols": [
70
  {
71
  "task": "timeline_action",
72
+ "task_display_name": "Action Recognition",
73
  "family": "supervised classification",
74
  "unit": "single window",
75
  "input": "current 20-frame all-feature window",
 
89
  },
90
  {
91
  "task": "timeline_subtask",
92
+ "task_display_name": "Procedure Step Recognition",
93
  "family": "supervised classification",
94
  "unit": "single window",
95
  "input": "current 20-frame all-feature window",
 
109
  },
110
  {
111
  "task": "transition_detection",
112
+ "task_display_name": "Action Boundary Detection",
113
  "family": "temporal diagnostic",
114
  "unit": "single window",
115
  "input": "current 20-frame all-feature window",
 
129
  },
130
  {
131
  "task": "next_action",
132
+ "task_display_name": "Next-Action Prediction",
133
  "family": "short-horizon prediction",
134
  "unit": "single window",
135
  "input": "current 20-frame all-feature window at time t",
 
149
  },
150
  {
151
  "task": "hand_trajectory_forecast",
152
+ "task_display_name": "Hand Trajectory Forecasting",
153
  "family": "trajectory regression",
154
  "unit": "single window",
155
  "input": "current all-feature window",
 
169
  },
170
  {
171
  "task": "contact_prediction",
172
+ "task_display_name": "Contact State Prediction",
173
  "family": "binary classification",
174
  "unit": "single window",
175
  "input": "non-contact and non-caption feature blocks",
 
189
  },
190
  {
191
  "task": "object_relevance",
192
+ "task_display_name": "Object Relevance Prediction",
193
  "family": "multi-label classification",
194
  "unit": "single window",
195
  "input": "non-caption feature blocks",
 
209
  },
210
  {
211
  "task": "caption_grounding",
212
+ "task_display_name": "Language Grounding",
213
  "family": "retrieval",
214
  "unit": "caption query",
215
  "input": "caption object/interaction query plus candidate sensor windows",
 
229
  },
230
  {
231
  "task": "cross_modal_retrieval",
232
+ "task_display_name": "Cross-Modal Retrieval",
233
  "family": "retrieval",
234
  "unit": "sensor query",
235
  "input": "motion, IMU, and camera query features",
 
249
  },
250
  {
251
  "task": "modality_reconstruction",
252
+ "task_display_name": "Cross-Modal Reconstruction",
253
  "family": "cross-modal regression",
254
  "unit": "single window",
255
  "input": "motion, IMU, and camera features",
 
268
  },
269
  {
270
  "task": "temporal_order",
271
+ "task_display_name": "Temporal Order Verification",
272
  "family": "pairwise diagnostic",
273
  "unit": "adjacent window pair",
274
  "input": "two adjacent windows",
 
288
  },
289
  {
290
  "task": "misalignment_detection",
291
+ "task_display_name": "Multimodal Synchronization Detection",
292
  "family": "pairwise diagnostic",
293
  "unit": "paired modality window",
294
  "input": "motion side plus visual/depth side",
 
317
  "current_limitations": [
318
  "Cross-episode generalization for Qwen3-Omni has a first verified diagnostic pilot, but strong model quality is not yet shown.",
319
  "Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.",
320
+ "The final verified Qwen3-Omni diagnostic result meets the strict-JSON target, but action/subtask held-out quality remains weak and needs error analysis before larger model-quality claims.",
321
  "Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations."
322
  ],
323
  "scale_up_gate": {
 
328
  "manifest, training metadata, progress logs, metrics, predictions, and run report",
329
  "held-out evaluation on test episodes rather than train windows"
330
  ],
331
+ "current_status": "verified diagnostic result; strict-JSON quality target met, action/subtask quality still weak",
332
  "evidence": [
333
  "docs/data/omni_finetune_verified_result.json",
334
  "results/omni_finetune/verified_public/"
metrics/foundation_model_plan.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Xperience-10M Foundation Model Plan",
3
  "status": "planning_artifact",
4
- "current_boundary": "A first held-out multi-episode Qwen3-Omni diagnostic pilot is verified in this repo, but it is not a strong model result. The current foundation-model work should treat it as the baseline train/eval/package loop before validation-aware Qwen reruns, Cosmos-style world modeling, or policy/VLA branches.",
5
  "backbone_registry": {
6
  "config_dir": "configs/omni_backbones",
7
  "validator": "scripts/omni/backbone_registry.py --validate --json",
@@ -206,7 +206,7 @@
206
  {
207
  "step": 2,
208
  "name": "First held-out baseline",
209
- "action": "Run validation-aware Qwen3-Omni LoRA to improve the verified diagnostic baseline."
210
  },
211
  {
212
  "step": 3,
 
1
  {
2
  "title": "Xperience-10M Foundation Model Plan",
3
  "status": "planning_artifact",
4
+ "current_boundary": "A final held-out multi-episode Qwen3-Omni diagnostic result is verified in this repo and meets the strict-JSON target, but it is not a strong action/subtask model result. The current foundation-model work should treat it as the baseline train/eval/package loop before Qwen action/subtask improvements, Cosmos-style world modeling, or policy/VLA branches.",
5
  "backbone_registry": {
6
  "config_dir": "configs/omni_backbones",
7
  "validator": "scripts/omni/backbone_registry.py --validate --json",
 
206
  {
207
  "step": 2,
208
  "name": "First held-out baseline",
209
+ "action": "Run Qwen3-Omni action/subtask error analysis and targeted reruns to improve the verified diagnostic baseline."
210
  },
211
  {
212
  "step": 3,
metrics/mirror_parity.json CHANGED
The diff for this file is too large to render. See raw diff
 
metrics/omni_finetune_verified_result.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "title": "Verified Qwen3-Omni LoRA Validation-Aware Held-Out Pilot",
3
- "status": "verified_validation_aware_diagnostic_pilot",
4
- "status_date": "2026-06-06",
5
  "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
6
  "adapter": "Qwen3-Omni LoRA",
7
  "dataset": "Ropedia Xperience-10M selected 128-episode pilot",
@@ -27,7 +27,7 @@
27
  },
28
  "training": {
29
  "num_processes": 8,
30
- "epochs": 1,
31
  "lora_rank": 16,
32
  "lora_alpha": 32,
33
  "lora_dropout": 0.05,
@@ -36,64 +36,50 @@
36
  "history": [
37
  {
38
  "epoch": 1,
39
- "train_loss": 0.41304643672440994,
40
- "val_loss": 0.0330660454928875,
41
  "global_step": 356
 
 
 
 
 
 
42
  }
43
  ],
44
  "loss": "answer-token cross entropy over supervised JSON tokens",
45
- "note": "This validation-aware run uses the selected validation split during training and preserves the held-out test split for final evaluation."
46
  },
47
  "evaluation": {
48
  "split": "test",
49
  "num_samples": 448,
50
  "held_out_episode_count": 14,
51
- "json_validity_rate": 0.875,
52
- "action_macro_f1": 0.0026621494447581404,
53
- "subtask_accuracy": 0.006696428571428571,
54
- "transition_accuracy": 0.8504464285714286,
55
- "next_action_accuracy": 0.024553571428571428,
56
- "contact_accuracy": 0.6450892857142857,
57
- "object_micro_f1": 0.22299431459254582,
58
  "quality_target": {
59
  "json_validity_rate": 0.98,
60
- "status": "not_met"
61
  },
62
- "previous_diagnostic_json_validity_rate": 0.8526785714285714
63
  },
64
- "interpretation": "This is a real held-out multi-episode validation-aware diagnostic pilot proving the export, LoRA training with validation monitoring, evaluation, validation, and public-safe packaging loop. JSON validity improved over the earlier no-validation diagnostic run, but task-quality metrics remain weak, so it should be used as a baseline and error-analysis starting point rather than a strong Xperience-10M model.",
65
  "public_package": {
66
- "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
67
  "audit_status": "pass",
68
  "contains_raw_xperience10m_data": false,
69
  "contains_qwen_base_weights": false,
70
  "contains_lora_weights": false,
71
- "error_analysis": {
72
- "status": "pass",
73
- "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/error_analysis_summary.json",
74
- "markdown_report": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/ERROR_ANALYSIS.md",
75
- "groupings": [
76
- "episode",
77
- "action_family",
78
- "train_seen_status",
79
- "required_modality_state",
80
- "object_category"
81
- ],
82
- "key_readouts": {
83
- "parsed_prediction_rate": 0.8772321428571429,
84
- "weakest_action_family": "locomotion",
85
- "weakest_action_family_samples": 23,
86
- "weakest_action_family_parsed_prediction_rate": 0.2608695652173913,
87
- "seen_action_exact_rate": 0.04580152671755725,
88
- "unseen_action_exact_rate": 0.015772870662460567,
89
- "required_modality_state": "rrd_missing_only_required_modalities_present"
90
- }
91
- }
92
  },
93
  "required_next_steps": [
94
- "Improve JSON-format reliability through prompt, decoding, constrained parsing, or target formatting changes.",
95
- "Use the published held-out error analysis to prioritize JSON constraints, action/subtask formatting, object vocabulary handling, and missing-modality robustness.",
96
- "Run a second validation-aware Qwen3-Omni pass only after the JSON/output contract is tightened.",
97
- "Keep the same verified package contract for Cosmos-style world-model and VLA/policy branches."
98
  ]
99
  }
 
1
  {
2
+ "title": "Verified Qwen3-Omni LoRA 128-Episode Held-Out Result",
3
+ "status": "verified_full_128_episode_diagnostic_result",
4
+ "status_date": "2026-06-07",
5
  "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
6
  "adapter": "Qwen3-Omni LoRA",
7
  "dataset": "Ropedia Xperience-10M selected 128-episode pilot",
 
27
  },
28
  "training": {
29
  "num_processes": 8,
30
+ "epochs": 2,
31
  "lora_rank": 16,
32
  "lora_alpha": 32,
33
  "lora_dropout": 0.05,
 
36
  "history": [
37
  {
38
  "epoch": 1,
39
+ "train_loss": 0.41282760031950355,
40
+ "val_loss": 0.03288277983665466,
41
  "global_step": 356
42
+ },
43
+ {
44
+ "epoch": 2,
45
+ "train_loss": 0.027745448225544075,
46
+ "val_loss": 0.027823254466056824,
47
+ "global_step": 712
48
  }
49
  ],
50
  "loss": "answer-token cross entropy over supervised JSON tokens",
51
+ "note": "This final Qwen3-Omni LoRA pass reused the selected 96/16/16 episode setup, trained on all exported train windows with validation monitoring, and preserved the held-out test split for final evaluation."
52
  },
53
  "evaluation": {
54
  "split": "test",
55
  "num_samples": 448,
56
  "held_out_episode_count": 14,
57
+ "json_validity_rate": 0.9977678571428571,
58
+ "action_macro_f1": 0.0024331644885523347,
59
+ "subtask_accuracy": 0.002232142857142857,
60
+ "transition_accuracy": 0.9709821428571429,
61
+ "next_action_accuracy": 0.029017857142857144,
62
+ "contact_accuracy": 0.71875,
63
+ "object_micro_f1": 0.30160427807486634,
64
  "quality_target": {
65
  "json_validity_rate": 0.98,
66
+ "status": "met"
67
  },
68
+ "previous_validation_aware_json_validity_rate": 0.875
69
  },
70
+ "interpretation": "This is the final verified two-epoch Qwen3-Omni LoRA diagnostic result for the selected 128-episode setup. It meets the 98% JSON-validity target and improves transition, contact, and object metrics over the earlier validation-aware pilot, but action and subtask classification remain weak on held-out episodes, so this is still a baseline-quality diagnostic model rather than a strong Xperience-10M action recognizer.",
71
  "public_package": {
72
+ "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
73
  "audit_status": "pass",
74
  "contains_raw_xperience10m_data": false,
75
  "contains_qwen_base_weights": false,
76
  "contains_lora_weights": false,
77
+ "adapter_weights_repo": "cy0307/ropedia-qwen3-omni-lora-128ep"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  },
79
  "required_next_steps": [
80
+ "Verify the public Hugging Face LoRA adapter repository hashes after publication.",
81
+ "Publish the final verified package and refreshed comparison tables to all public mirrors, then run live publication verification.",
82
+ "Use the full-eval predictions for error analysis focused on action/subtask confusions and unseen-label behavior.",
83
+ "Keep the same verified package contract for the Cosmos3 world-model branch and any future VLA/policy branches."
84
  ]
85
  }
metrics/omni_model_comparison.json ADDED
@@ -0,0 +1,513 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "title": "Ropedia Xperience-10M Current Result Versions",
3
+ "generated_at_utc": "2026-06-06T23:26:13+00:00",
4
+ "status": "pass",
5
+ "version_count": 3,
6
+ "comparison_rule": "Compare only rows with the same scope and target. Single-episode raw-feature metrics, 128-episode metadata baselines, Qwen3 structured JSON metrics, and Cosmos3 future-window metrics answer different questions.",
7
+ "version_reading_notes": [
8
+ "Version 1 is the public-sample 12-task harness with minimal and neural heads.",
9
+ "Version 2 is the selected 128-episode same-split simple/NN baseline alignment.",
10
+ "Version 3 is the verified model-branch layer: the current final Qwen3-Omni LoRA package is the JSON-task diagnostic result, while Cosmos3-Nano is a future-window compatibility result rather than a full Cosmos diffusion fine-tune."
11
+ ],
12
+ "versions": [
13
+ {
14
+ "id": "v1_single_episode_public_sample",
15
+ "title": "Single-Episode Public-Sample Task Suite",
16
+ "status": "verified",
17
+ "scope": "one public Xperience-10M sample episode",
18
+ "source": "results/episode_task_suite/summary_report.json",
19
+ "split": "chronological 70/30 within one episode",
20
+ "counts": {
21
+ "episodes": 1,
22
+ "windows": 1161,
23
+ "frames": 5821,
24
+ "feature_dim": 8546,
25
+ "task_count": 12,
26
+ "neural_task_count": 12
27
+ },
28
+ "models": [
29
+ "minimal task heads",
30
+ "compact neural MLP task heads"
31
+ ],
32
+ "task_metrics": [
33
+ {
34
+ "task": "caption_grounding",
35
+ "task_display_name": "Language Grounding",
36
+ "simple_status": "pass",
37
+ "simple_primary_metric": "mrr",
38
+ "simple_primary_score": 0.016023479050338015,
39
+ "neural_status": "pass",
40
+ "neural_primary_metric": "mrr",
41
+ "neural_primary_score": 0.01684125567132316
42
+ },
43
+ {
44
+ "task": "contact_prediction",
45
+ "task_display_name": "Contact State Prediction",
46
+ "simple_status": "pass",
47
+ "simple_primary_metric": "macro_f1",
48
+ "simple_primary_score": 1.0,
49
+ "neural_status": "pass",
50
+ "neural_primary_metric": "macro_f1",
51
+ "neural_primary_score": 1.0
52
+ },
53
+ {
54
+ "task": "cross_modal_retrieval",
55
+ "task_display_name": "Cross-Modal Retrieval",
56
+ "simple_status": "pass",
57
+ "simple_primary_metric": "mrr",
58
+ "simple_primary_score": 0.26925966892956127,
59
+ "neural_status": "pass",
60
+ "neural_primary_metric": "mrr",
61
+ "neural_primary_score": 0.1299971898648288
62
+ },
63
+ {
64
+ "task": "hand_trajectory_forecast",
65
+ "task_display_name": "Hand Trajectory Forecasting",
66
+ "simple_status": "pass",
67
+ "simple_primary_metric": "mpjpe",
68
+ "simple_primary_score": 0.8646570444107056,
69
+ "neural_status": "pass",
70
+ "neural_primary_metric": "mpjpe",
71
+ "neural_primary_score": 0.10785018652677536
72
+ },
73
+ {
74
+ "task": "misalignment_detection",
75
+ "task_display_name": "Multimodal Synchronization Detection",
76
+ "simple_status": "pass",
77
+ "simple_primary_metric": "f1",
78
+ "simple_primary_score": 0.5051698670605613,
79
+ "neural_status": "pass",
80
+ "neural_primary_metric": "f1",
81
+ "neural_primary_score": 0.7152682255845944
82
+ },
83
+ {
84
+ "task": "modality_reconstruction",
85
+ "task_display_name": "Cross-Modal Reconstruction",
86
+ "simple_status": "pass",
87
+ "simple_primary_metric": "r2",
88
+ "simple_primary_score": -0.015271898913936655,
89
+ "neural_status": "pass",
90
+ "neural_primary_metric": "r2",
91
+ "neural_primary_score": -0.010171410134180991
92
+ },
93
+ {
94
+ "task": "next_action",
95
+ "task_display_name": "Next-Action Prediction",
96
+ "simple_status": "pass",
97
+ "simple_primary_metric": "macro_f1",
98
+ "simple_primary_score": 0.05925925925925927,
99
+ "neural_status": "pass",
100
+ "neural_primary_metric": "macro_f1",
101
+ "neural_primary_score": 0.04186046511627907
102
+ },
103
+ {
104
+ "task": "object_relevance",
105
+ "task_display_name": "Object Relevance Prediction",
106
+ "simple_status": "pass",
107
+ "simple_primary_metric": "micro_f1",
108
+ "simple_primary_score": 0.18034382095361662,
109
+ "neural_status": "pass",
110
+ "neural_primary_metric": "micro_f1",
111
+ "neural_primary_score": 0.1679279279279279
112
+ },
113
+ {
114
+ "task": "temporal_order",
115
+ "task_display_name": "Temporal Order Verification",
116
+ "simple_status": "pass",
117
+ "simple_primary_metric": "accuracy",
118
+ "simple_primary_score": 0.4540229885057471,
119
+ "neural_status": "pass",
120
+ "neural_primary_metric": "accuracy",
121
+ "neural_primary_score": 0.8577586206896551
122
+ },
123
+ {
124
+ "task": "timeline_action",
125
+ "task_display_name": "Action Recognition",
126
+ "simple_status": "pass",
127
+ "simple_primary_metric": "macro_f1",
128
+ "simple_primary_score": 0.05,
129
+ "neural_status": "pass",
130
+ "neural_primary_metric": "macro_f1",
131
+ "neural_primary_score": 0.014814814814814814
132
+ },
133
+ {
134
+ "task": "timeline_subtask",
135
+ "task_display_name": "Procedure Step Recognition",
136
+ "simple_status": "pass",
137
+ "simple_primary_metric": "macro_f1",
138
+ "simple_primary_score": 0.05056355513846935,
139
+ "neural_status": "pass",
140
+ "neural_primary_metric": "macro_f1",
141
+ "neural_primary_score": 0.02810810810810811
142
+ },
143
+ {
144
+ "task": "transition_detection",
145
+ "task_display_name": "Action Boundary Detection",
146
+ "simple_status": "pass",
147
+ "simple_primary_metric": "macro_f1",
148
+ "simple_primary_score": 0.6118237590630229,
149
+ "neural_status": "pass",
150
+ "neural_primary_metric": "macro_f1",
151
+ "neural_primary_score": 0.5862068965517241
152
+ }
153
+ ],
154
+ "interpretation": "This layer verifies the 12 task contracts and raw multimodal feature pipeline on the public sample. It is not a cross-episode benchmark."
155
+ },
156
+ {
157
+ "id": "v2_multi_episode_128_aligned_metadata_baselines",
158
+ "title": "128-Episode Aligned Simple/NN Baselines",
159
+ "status": "pass",
160
+ "scope": "selected 128-episode 96/16/16 split",
161
+ "source": "results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
162
+ "split": "train/val/test by selected episode/session",
163
+ "counts": {
164
+ "rows": 3808,
165
+ "split_counts": {
166
+ "train": 2848,
167
+ "val": 512,
168
+ "test": 448
169
+ },
170
+ "episode_counts": {
171
+ "test": 16,
172
+ "train": 96,
173
+ "val": 16
174
+ },
175
+ "task_count": 12,
176
+ "simple_supported_task_count": 8,
177
+ "neural_supported_task_count": 6
178
+ },
179
+ "models": [
180
+ "metadata/text simple baselines",
181
+ "metadata/text neural MLP baselines"
182
+ ],
183
+ "task_metrics": [
184
+ {
185
+ "task": "timeline_action",
186
+ "task_display_name": "Action Recognition",
187
+ "simple_status": "pass",
188
+ "simple_primary_metric": "macro_f1",
189
+ "simple_primary_score": 0.00017511601435951318,
190
+ "neural_status": "pass",
191
+ "neural_primary_metric": "macro_f1",
192
+ "neural_primary_score": 0.0
193
+ },
194
+ {
195
+ "task": "timeline_subtask",
196
+ "task_display_name": "Procedure Step Recognition",
197
+ "simple_status": "pass",
198
+ "simple_primary_metric": "macro_f1",
199
+ "simple_primary_score": 0.0,
200
+ "neural_status": "pass",
201
+ "neural_primary_metric": "macro_f1",
202
+ "neural_primary_score": 0.0
203
+ },
204
+ {
205
+ "task": "transition_detection",
206
+ "task_display_name": "Action Boundary Detection",
207
+ "simple_status": "pass",
208
+ "simple_primary_metric": "macro_f1",
209
+ "simple_primary_score": 0.5219803670507895,
210
+ "neural_status": "pass",
211
+ "neural_primary_metric": "macro_f1",
212
+ "neural_primary_score": 0.45822172492907925
213
+ },
214
+ {
215
+ "task": "next_action",
216
+ "task_display_name": "Next-Action Prediction",
217
+ "simple_status": "pass",
218
+ "simple_primary_metric": "macro_f1",
219
+ "simple_primary_score": 0.00019966057701906761,
220
+ "neural_status": "pass",
221
+ "neural_primary_metric": "macro_f1",
222
+ "neural_primary_score": 0.0
223
+ },
224
+ {
225
+ "task": "hand_trajectory_forecast",
226
+ "task_display_name": "Hand Trajectory Forecasting",
227
+ "simple_status": "unsupported_without_raw_128_feature_blocks",
228
+ "simple_primary_metric": "mpjpe",
229
+ "simple_primary_score": null,
230
+ "neural_status": "not_run",
231
+ "neural_primary_metric": "",
232
+ "neural_primary_score": null
233
+ },
234
+ {
235
+ "task": "contact_prediction",
236
+ "task_display_name": "Contact State Prediction",
237
+ "simple_status": "pass",
238
+ "simple_primary_metric": "macro_f1",
239
+ "simple_primary_score": 0.5167950693374422,
240
+ "neural_status": "pass",
241
+ "neural_primary_metric": "macro_f1",
242
+ "neural_primary_score": 0.21951219512195122
243
+ },
244
+ {
245
+ "task": "object_relevance",
246
+ "task_display_name": "Object Relevance Prediction",
247
+ "simple_status": "pass",
248
+ "simple_primary_metric": "micro_f1",
249
+ "simple_primary_score": 0.18221614227086183,
250
+ "neural_status": "pass",
251
+ "neural_primary_metric": "micro_f1",
252
+ "neural_primary_score": 0.1053878034339846
253
+ },
254
+ {
255
+ "task": "caption_grounding",
256
+ "task_display_name": "Language Grounding",
257
+ "simple_status": "pass",
258
+ "simple_primary_metric": "mrr",
259
+ "simple_primary_score": 0.012785504572093487,
260
+ "neural_status": "not_run",
261
+ "neural_primary_metric": "",
262
+ "neural_primary_score": null
263
+ },
264
+ {
265
+ "task": "cross_modal_retrieval",
266
+ "task_display_name": "Cross-Modal Retrieval",
267
+ "simple_status": "unsupported_without_raw_128_feature_blocks",
268
+ "simple_primary_metric": "mrr",
269
+ "simple_primary_score": null,
270
+ "neural_status": "not_run",
271
+ "neural_primary_metric": "",
272
+ "neural_primary_score": null
273
+ },
274
+ {
275
+ "task": "modality_reconstruction",
276
+ "task_display_name": "Cross-Modal Reconstruction",
277
+ "simple_status": "unsupported_without_raw_128_feature_blocks",
278
+ "simple_primary_metric": "r2",
279
+ "simple_primary_score": null,
280
+ "neural_status": "not_run",
281
+ "neural_primary_metric": "",
282
+ "neural_primary_score": null
283
+ },
284
+ {
285
+ "task": "temporal_order",
286
+ "task_display_name": "Temporal Order Verification",
287
+ "simple_status": "pass",
288
+ "simple_primary_metric": "f1",
289
+ "simple_primary_score": 0.32713178294573647,
290
+ "neural_status": "not_run",
291
+ "neural_primary_metric": "",
292
+ "neural_primary_score": null
293
+ },
294
+ {
295
+ "task": "misalignment_detection",
296
+ "task_display_name": "Multimodal Synchronization Detection",
297
+ "simple_status": "unsupported_without_raw_128_feature_blocks",
298
+ "simple_primary_metric": "f1",
299
+ "simple_primary_score": null,
300
+ "neural_status": "not_run",
301
+ "neural_primary_metric": "",
302
+ "neural_primary_score": null
303
+ }
304
+ ],
305
+ "interpretation": "This layer aligns the previous simple and neural baseline framing to the same selected 96/16/16 split used by the model branches. It uses public-safe JSONL metadata/text features, so raw-feature-only tasks remain explicitly unsupported until 128-run sensor feature blocks exist."
306
+ },
307
+ {
308
+ "id": "v3_multi_episode_foundation_model_branches",
309
+ "title": "128-Episode Foundation-Model Branches",
310
+ "status": "partial_verified",
311
+ "scope": "selected 128-episode split and compatible derived windows",
312
+ "source": "results/omni_finetune/verified_public/",
313
+ "split": "episode/session held-out split; exact task target depends on backbone contract",
314
+ "counts": {
315
+ "verified_branch_count": 4,
316
+ "qwen3_verified_package_count": 3,
317
+ "cosmos3_verified_package_count": 1
318
+ },
319
+ "models": [
320
+ "Qwen3-Omni LoRA",
321
+ "Cosmos3-Nano future-window compatibility branch"
322
+ ],
323
+ "branches": [
324
+ {
325
+ "id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
326
+ "title": "Cosmos3-Nano Future-Window World Model",
327
+ "status": "verified",
328
+ "backbone": "cosmos_world_model",
329
+ "dataset_contract": "xperience10m_future_window_world_model_v0",
330
+ "training_objective": "future_window_and_action_conditioned_world_modeling",
331
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json",
332
+ "dataset_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat",
333
+ "train_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter",
334
+ "eval_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
335
+ "counts": {
336
+ "dataset_samples": 3213,
337
+ "dataset_episodes": 119,
338
+ "split_counts": {
339
+ "train": 2403,
340
+ "test": 378,
341
+ "val": 432
342
+ },
343
+ "train_samples": 2403,
344
+ "val_samples": 432,
345
+ "eval_samples": 378,
346
+ "held_out_episode_count": 14,
347
+ "num_processes": 1
348
+ },
349
+ "primary_metrics": {
350
+ "future_retrieval_mrr": 0.022138720585222767,
351
+ "future_retrieval_recall_at_5": 0.015873015873015872,
352
+ "temporal_consistency": 0.09523809523809523,
353
+ "feature_reconstruction_error": 3479.218317102503,
354
+ "transition_accuracy": 0.9682539682539683,
355
+ "contact_accuracy": 0.7433862433862434,
356
+ "held_out_episode_count": 14
357
+ },
358
+ "history": [
359
+ {
360
+ "epoch": 0,
361
+ "train_loss": null,
362
+ "val_loss": null,
363
+ "note": "closed-form mean-delta adapter; no Cosmos diffusion weights fine-tuned in this compatibility run"
364
+ }
365
+ ]
366
+ },
367
+ {
368
+ "id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
369
+ "title": "Qwen3-Omni LoRA",
370
+ "status": "verified",
371
+ "backbone": "qwen3_omni_lora",
372
+ "dataset_contract": "xperience10m_episode_json_qa_v1",
373
+ "training_objective": "structured_episode_understanding_json_qa",
374
+ "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json",
375
+ "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
376
+ "train_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora",
377
+ "eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
378
+ "counts": {
379
+ "dataset_samples": 3808,
380
+ "dataset_episodes": 119,
381
+ "split_counts": {
382
+ "train": 2848,
383
+ "val": 512,
384
+ "test": 448
385
+ },
386
+ "train_samples": 2848,
387
+ "val_samples": 512,
388
+ "eval_samples": 448,
389
+ "held_out_episode_count": 14,
390
+ "num_processes": 8
391
+ },
392
+ "primary_metrics": {
393
+ "json_validity_rate": 0.875,
394
+ "action_macro_f1": 0.0026621494447581404,
395
+ "subtask_accuracy": 0.006696428571428571,
396
+ "transition_accuracy": 0.8504464285714286,
397
+ "next_action_accuracy": 0.024553571428571428,
398
+ "contact_accuracy": 0.6450892857142857,
399
+ "object_micro_f1": 0.22299431459254582,
400
+ "held_out_episode_count": 14
401
+ },
402
+ "history": [
403
+ {
404
+ "epoch": 1,
405
+ "train_loss": 0.41304643672440994,
406
+ "val_loss": 0.0330660454928875,
407
+ "global_step": 356
408
+ }
409
+ ]
410
+ },
411
+ {
412
+ "id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
413
+ "title": "Qwen3-Omni LoRA",
414
+ "status": "verified",
415
+ "backbone": "qwen3_omni_lora",
416
+ "dataset_contract": "xperience10m_episode_json_qa_v1",
417
+ "training_objective": "structured_episode_understanding_json_qa",
418
+ "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json",
419
+ "dataset_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu",
420
+ "train_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6",
421
+ "eval_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
422
+ "counts": {
423
+ "dataset_samples": 3808,
424
+ "dataset_episodes": 119,
425
+ "split_counts": {
426
+ "train": 2848,
427
+ "val": 512,
428
+ "test": 448
429
+ },
430
+ "train_samples": 2848,
431
+ "val_samples": 0,
432
+ "eval_samples": 448,
433
+ "held_out_episode_count": 14,
434
+ "num_processes": 8
435
+ },
436
+ "primary_metrics": {
437
+ "json_validity_rate": 0.8526785714285714,
438
+ "action_macro_f1": 0.00213753459655099,
439
+ "subtask_accuracy": 0.004464285714285714,
440
+ "transition_accuracy": 0.828125,
441
+ "next_action_accuracy": 0.022321428571428572,
442
+ "contact_accuracy": 0.6517857142857143,
443
+ "object_micro_f1": 0.23062730627306272,
444
+ "held_out_episode_count": 14
445
+ },
446
+ "history": [
447
+ {
448
+ "epoch": 1,
449
+ "train_loss": 0.4121775626560694,
450
+ "val_loss": null,
451
+ "global_step": 356
452
+ }
453
+ ]
454
+ },
455
+ {
456
+ "id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
457
+ "title": "Qwen3-Omni LoRA",
458
+ "status": "verified",
459
+ "backbone": "qwen3_omni_lora",
460
+ "dataset_contract": "xperience10m_episode_json_qa_v1",
461
+ "training_objective": "structured_episode_understanding_json_qa",
462
+ "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json",
463
+ "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
464
+ "train_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora",
465
+ "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
466
+ "counts": {
467
+ "dataset_samples": 3808,
468
+ "dataset_episodes": 119,
469
+ "split_counts": {
470
+ "train": 2848,
471
+ "val": 512,
472
+ "test": 448
473
+ },
474
+ "train_samples": 2848,
475
+ "val_samples": 512,
476
+ "eval_samples": 448,
477
+ "held_out_episode_count": 14,
478
+ "num_processes": 8
479
+ },
480
+ "primary_metrics": {
481
+ "json_validity_rate": 0.9977678571428571,
482
+ "action_macro_f1": 0.0024331644885523347,
483
+ "subtask_accuracy": 0.002232142857142857,
484
+ "transition_accuracy": 0.9709821428571429,
485
+ "next_action_accuracy": 0.029017857142857144,
486
+ "contact_accuracy": 0.71875,
487
+ "object_micro_f1": 0.30160427807486634,
488
+ "held_out_episode_count": 14
489
+ },
490
+ "history": [
491
+ {
492
+ "epoch": 1,
493
+ "train_loss": 0.41282760031950355,
494
+ "val_loss": 0.03288277983665466,
495
+ "global_step": 356
496
+ },
497
+ {
498
+ "epoch": 2,
499
+ "train_loss": 0.027745448225544075,
500
+ "val_loss": 0.027823254466056824,
501
+ "global_step": 712
502
+ }
503
+ ]
504
+ }
505
+ ],
506
+ "interpretation": "This layer contains the held-out foundation-model packages. Qwen3-Omni packages evaluate structured JSON task prediction; Cosmos3-Nano currently evaluates a future-window world-model compatibility adapter, not a full diffusion-weight fine-tune."
507
+ }
508
+ ],
509
+ "pending": [
510
+ "Use the final Qwen3 full-eval package as the current Qwen result; older Qwen package rows remain historical diagnostics for comparison.",
511
+ "Promote Cosmos3 from compatibility adapter to full Cosmos3 fine-tuning only after a separate environment with matching Diffusers/Cosmos dependencies is prepared."
512
+ ]
513
+ }
metrics/project_brief.json CHANGED
@@ -17,7 +17,7 @@
17
  },
18
  {
19
  "capability": "Scale-up planning",
20
- "evidence": "verified 96/16/16 Qwen3-Omni validation-monitored diagnostic pilot, structured-output improvement path, Cosmos 3 branch, and policy-model candidates after action-space conversion"
21
  }
22
  ],
23
  "current_artifacts": [
@@ -43,7 +43,7 @@
43
  },
44
  {
45
  "layer": "Scale-up path",
46
- "status": "A selected 96/16/16 Qwen3-Omni LoRA validation-monitored diagnostic pilot is verified; current model-quality metrics are weak and guide the next structured-output improvement pass"
47
  }
48
  ],
49
  "reading_order": [
@@ -54,8 +54,8 @@
54
  "Inspect results/episode_task_suite/feature_manifest.json to understand one model input.",
55
  "Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
56
  ],
57
- "scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The validation-aware multi-episode Qwen3-Omni pilot verifies the training loop but does not yet show strong model quality.",
58
- "next_stage": "Improve structured JSON reliability and error analysis before larger robustness or alternative-backbone claims.",
59
  "entry_points": {
60
  "visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
61
  "hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
 
17
  },
18
  {
19
  "capability": "Scale-up planning",
20
+ "evidence": "final verified 96/16/16 Qwen3-Omni diagnostic result, same-split 128-episode baseline alignment, Cosmos3-Nano compatibility branch, and policy-model candidates after action-space conversion"
21
  }
22
  ],
23
  "current_artifacts": [
 
43
  },
44
  {
45
  "layer": "Scale-up path",
46
+ "status": "A selected 96/16/16 Qwen3-Omni LoRA final diagnostic result is verified; strict-JSON validity meets target, while weak action/subtask metrics guide the next error-analysis pass"
47
  }
48
  ],
49
  "reading_order": [
 
54
  "Inspect results/episode_task_suite/feature_manifest.json to understand one model input.",
55
  "Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
56
  ],
57
+ "scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
58
+ "next_stage": "Improve action/subtask quality through error analysis before larger robustness or alternative-backbone claims.",
59
  "entry_points": {
60
  "visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
61
  "hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
metrics/project_manifest.json CHANGED
@@ -2,9 +2,9 @@
2
  "name": "Ropedia Xperience-10M Task Suite",
3
  "slug": "ropedia-xperience-10m-task-suite",
4
  "version": "0.1.0",
5
- "status": "single_episode_suite_plus_qwen3_omni_diagnostic_pilot",
6
- "last_metadata_update": "2026-06-05",
7
- "summary": "Research-development repo built around one public Xperience-10M sample episode plus a verified selected-episode Qwen3-Omni diagnostic pilot.",
8
  "scope_boundary": {
9
  "raw_data_redistributed": false,
10
  "episode_count_verified": 1,
@@ -19,7 +19,9 @@
19
  "test": 16
20
  },
21
  "qwen3_omni_held_out_test_windows": 448,
22
- "qwen3_omni_json_validity_rate": 0.875
 
 
23
  },
24
  "public_surfaces": {
25
  "github_repo": "https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite",
@@ -28,6 +30,7 @@
28
  "hf_static_space": "https://cy0307-ropedia-xperience-10m-task-suite.static.hf.space/",
29
  "hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts",
30
  "hf_model_repo": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines",
 
31
  "hf_collection": "https://huggingface.co/collections/cy0307/ropedia-xperience-10m-task-suite"
32
  },
33
  "upstream_sources": {
 
2
  "name": "Ropedia Xperience-10M Task Suite",
3
  "slug": "ropedia-xperience-10m-task-suite",
4
  "version": "0.1.0",
5
+ "status": "single_episode_suite_plus_final_qwen3_omni_diagnostic_result",
6
+ "last_metadata_update": "2026-06-07",
7
+ "summary": "Research-development repo built around one public Xperience-10M sample episode plus a final verified selected-episode Qwen3-Omni diagnostic result.",
8
  "scope_boundary": {
9
  "raw_data_redistributed": false,
10
  "episode_count_verified": 1,
 
19
  "test": 16
20
  },
21
  "qwen3_omni_held_out_test_windows": 448,
22
+ "qwen3_omni_json_validity_rate": 0.9977678571428571,
23
+ "qwen3_omni_json_quality_target_met": true,
24
+ "qwen3_omni_lora_adapter_repo": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
25
  },
26
  "public_surfaces": {
27
  "github_repo": "https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite",
 
30
  "hf_static_space": "https://cy0307-ropedia-xperience-10m-task-suite.static.hf.space/",
31
  "hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts",
32
  "hf_model_repo": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines",
33
+ "hf_qwen3_lora_adapter": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
34
  "hf_collection": "https://huggingface.co/collections/cy0307/ropedia-xperience-10m-task-suite"
35
  },
36
  "upstream_sources": {
metrics/project_packet.json CHANGED
@@ -12,7 +12,7 @@
12
  "raw_xperience10m_data_in_repo": false,
13
  "audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
14
  "qwen3_omni_32_episode_claim": false,
15
- "qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni validation-monitored diagnostic pilot is verified, with weak held-out metrics that guide the next structured-output improvement pass."
16
  },
17
  "reading_path": [
18
  {
@@ -41,7 +41,7 @@
41
  "docs/data/scope_claims_audit.json",
42
  "docs/data/website_integrity.json"
43
  ],
44
- "readout": "The project status table and roadmap give the compact current-state summary. Single-episode task engineering, metrics, visualizations, public website integrity, mirror parity, and the validation-aware selected-episode Qwen3-Omni diagnostic pilot are implemented; stronger cross-episode model quality remains a follow-up."
45
  },
46
  {
47
  "step": 2,
@@ -116,7 +116,7 @@
116
  "scripts/omni/discover_xperience10m_sources.py",
117
  "docs/data/omni_finetune_verified_result.json"
118
  ],
119
- "readout": "The selected-episode held-out Qwen3-Omni diagnostic pilot is verified. The next milestone is a validation-aware diagnostic run with stronger JSON-format reliability and error analysis."
120
  }
121
  ],
122
  "project_status": "PROJECT_STATUS.md",
 
12
  "raw_xperience10m_data_in_repo": false,
13
  "audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
14
  "qwen3_omni_32_episode_claim": false,
15
+ "qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni final diagnostic result is verified, meets the strict-JSON target, and still has weak action/subtask metrics that guide the next error-analysis pass."
16
  },
17
  "reading_path": [
18
  {
 
41
  "docs/data/scope_claims_audit.json",
42
  "docs/data/website_integrity.json"
43
  ],
44
+ "readout": "The project status table and roadmap give the compact current-state summary. Single-episode task engineering, metrics, visualizations, public website integrity, mirror parity, same-split 128-episode baselines, the final selected-episode Qwen3-Omni diagnostic result, and the Cosmos3-Nano compatibility package are implemented; stronger action/subtask and full Cosmos model quality remain follow-ups."
45
  },
46
  {
47
  "step": 2,
 
116
  "scripts/omni/discover_xperience10m_sources.py",
117
  "docs/data/omni_finetune_verified_result.json"
118
  ],
119
+ "readout": "The selected-episode held-out Qwen3-Omni final diagnostic result is verified and JSON-format reliability meets the 98% target. The next milestone is action/subtask error analysis and a stronger model-quality run on the same split."
120
  }
121
  ],
122
  "project_status": "PROJECT_STATUS.md",
metrics/project_status.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Project Status",
3
  "version": "2026-06-01",
4
- "decision": "public_sample_pipeline_verified_qwen3_omni_validation_aware_diagnostic_pilot",
5
- "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, and uses the selected-episode Qwen3-Omni validation-aware diagnostic pilot as a verified but weak cross-episode baseline.",
6
  "scope_boundary": {
7
  "validated_episode_count": 1,
8
  "aligned_frames": 5821,
@@ -25,8 +25,13 @@
25
  "val": 512,
26
  "test": 448
27
  },
28
- "qwen3_omni_json_validity_rate": 0.875,
29
  "qwen3_omni_validation_aware": true,
 
 
 
 
 
30
  "multi_episode_128_aligned_baselines": true,
31
  "multi_episode_128_baseline_window_counts": {
32
  "train": 2848,
@@ -102,7 +107,7 @@
102
  "RESEARCH_ROADMAP.md",
103
  "docs/data/research_roadmap.json"
104
  ],
105
- "readout": "The roadmap connects public-sample task development to the verified Qwen3-Omni diagnostic pilot, validation-aware diagnostics, foundation-model selection, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
106
  },
107
  {
108
  "area": "Foundation-model plan",
@@ -111,7 +116,7 @@
111
  "FOUNDATION_MODEL_PLAN.md",
112
  "docs/data/foundation_model_plan.json"
113
  ],
114
- "readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is added as the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit."
115
  },
116
  {
117
  "area": "Omni model extension contract",
@@ -191,18 +196,39 @@
191
  ],
192
  "readout": "The earlier simple and neural baseline framing is aligned to the selected 96/16/16 episode split used by the Qwen3-Omni pilot. JSON-supported tasks have metadata/text simple and neural MLP metrics; raw-feature-only tasks are explicitly marked unsupported until 128-run sensor feature blocks are available."
193
  },
 
 
 
 
 
 
 
 
 
 
194
  {
195
  "area": "Qwen3-Omni fine-tuning",
196
- "status": "verified_validation_aware_diagnostic_pilot_quality_target_not_met",
197
  "evidence": [
198
  "docs/data/omni_finetune_verified_result.json",
199
- "results/omni_finetune/verified_public/",
200
- "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/",
201
  "scripts/omni/package_verified_omni_result.py",
202
  "scripts/omni/audit_verified_omni_package.py",
203
  "scripts/omni/analyze_qwen3_omni_errors.py"
204
  ],
205
- "readout": "The selected 96/16/16 episode split produced a validation-aware public-safe held-out package with 3,808 exported windows, 512 validation windows, 448 test predictions, and derived error-analysis tables by episode, action family, train-seen status, required-modality state, and object category. JSON validity is 87.50%, below the 98% target, so it is a diagnostic baseline but not a strong model-quality result."
 
 
 
 
 
 
 
 
 
 
 
206
  },
207
  {
208
  "area": "Raw Xperience-10M redistribution",
@@ -228,12 +254,15 @@
228
  "Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
229
  "Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
230
  "Inspect results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md before comparing simple/NN baselines to the selected 128-episode setup.",
 
231
  "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
232
  ],
233
  "current_reading_notes": [
234
- "The validation-aware Qwen3-Omni diagnostic pilot is verified, but current held-out quality is still weak.",
235
- "Use docs/data/omni_finetune_verified_result.json and the latest verified_public validation-aware package for current held-out results.",
 
236
  "The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
 
237
  "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
238
  "Audio is one of the synchronized source modalities in the current task representation.",
239
  "The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",
 
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Project Status",
3
  "version": "2026-06-01",
4
+ "decision": "public_sample_pipeline_verified_128_aligned_baselines_qwen3_cosmos_comparison",
5
+ "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, and compares verified Qwen3-Omni and Cosmos3 branch packages as early cross-episode diagnostics.",
6
  "scope_boundary": {
7
  "validated_episode_count": 1,
8
  "aligned_frames": 5821,
 
25
  "val": 512,
26
  "test": 448
27
  },
28
+ "qwen3_omni_json_validity_rate": 0.9977678571428571,
29
  "qwen3_omni_validation_aware": true,
30
+ "qwen3_omni_json_quality_target_met": true,
31
+ "qwen3_omni_lora_adapter_repo": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
32
+ "cosmos3_nano_future_window_compatibility_verified": true,
33
+ "cosmos3_nano_future_window_test_predictions": 378,
34
+ "omni_model_comparison_available": true,
35
  "multi_episode_128_aligned_baselines": true,
36
  "multi_episode_128_baseline_window_counts": {
37
  "train": 2848,
 
107
  "RESEARCH_ROADMAP.md",
108
  "docs/data/research_roadmap.json"
109
  ],
110
+ "readout": "The roadmap connects public-sample task development to the final verified Qwen3-Omni diagnostic result, same-split baseline alignment, action/subtask error analysis, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
111
  },
112
  {
113
  "area": "Foundation-model plan",
 
116
  "FOUNDATION_MODEL_PLAN.md",
117
  "docs/data/foundation_model_plan.json"
118
  ],
119
+ "readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is now represented by a verified Cosmos3-Nano future-window compatibility package and remains the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit."
120
  },
121
  {
122
  "area": "Omni model extension contract",
 
196
  ],
197
  "readout": "The earlier simple and neural baseline framing is aligned to the selected 96/16/16 episode split used by the Qwen3-Omni pilot. JSON-supported tasks have metadata/text simple and neural MLP metrics; raw-feature-only tasks are explicitly marked unsupported until 128-run sensor feature blocks are available."
198
  },
199
+ {
200
+ "area": "Current result comparison",
201
+ "status": "verified_generated_summary",
202
+ "evidence": [
203
+ "docs/data/omni_model_comparison.json",
204
+ "results/omni_finetune/OMNI_MODEL_COMPARISON.md",
205
+ "scripts/omni/build_omni_model_comparison.py"
206
+ ],
207
+ "readout": "The public comparison separates three layers: the single-episode raw-feature task suite, the selected 128-episode simple/NN metadata baselines, and verified foundation-model branch packages for Qwen3-Omni and Cosmos3-Nano future-window compatibility."
208
+ },
209
  {
210
  "area": "Qwen3-Omni fine-tuning",
211
+ "status": "final_verified_diagnostic_result_json_target_met",
212
  "evidence": [
213
  "docs/data/omni_finetune_verified_result.json",
214
+ "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/",
215
+ "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
216
  "scripts/omni/package_verified_omni_result.py",
217
  "scripts/omni/audit_verified_omni_package.py",
218
  "scripts/omni/analyze_qwen3_omni_errors.py"
219
  ],
220
+ "readout": "The selected 96/16/16 episode split produced a final public-safe held-out package with 3,808 exported windows, 512 validation windows, 448 test predictions, two training epochs, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.78%, meeting the 98% target; transition accuracy is 97.10%, contact accuracy is 71.88%, object micro-F1 is 30.16%, and action/subtask metrics remain weak, so it is still a diagnostic baseline rather than a strong model-quality claim."
221
+ },
222
+ {
223
+ "area": "Cosmos3-Nano future-window branch",
224
+ "status": "verified_compatibility_result",
225
+ "evidence": [
226
+ "configs/omni_backbones/cosmos_world_model.json",
227
+ "scripts/omni/export_cosmos3_future_window_dataset.py",
228
+ "scripts/omni/eval_cosmos3_future_window_retrieval.py",
229
+ "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json"
230
+ ],
231
+ "readout": "The Cosmos3-Nano branch now has a public-safe verified future-window compatibility package with 3,213 future-window samples, 378 held-out test predictions, future retrieval MRR 0.0221, temporal consistency 0.0952, transition accuracy 0.9683, and contact accuracy 0.7434. It is a compatibility adapter result, not a full Cosmos diffusion-weight fine-tune."
232
  },
233
  {
234
  "area": "Raw Xperience-10M redistribution",
 
254
  "Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
255
  "Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
256
  "Inspect results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md before comparing simple/NN baselines to the selected 128-episode setup.",
257
+ "Inspect docs/data/omni_model_comparison.json before comparing the current three result versions.",
258
  "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
259
  ],
260
  "current_reading_notes": [
261
+ "The final Qwen3-Omni diagnostic result is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak.",
262
+ "Use docs/data/omni_model_comparison.json to compare the single-episode task suite, 128-episode aligned baselines, and verified Qwen3/Cosmos branch packages without mixing incompatible metric targets.",
263
+ "Use docs/data/omni_finetune_verified_result.json and the latest verified_public final Qwen package for current held-out results.",
264
  "The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
265
+ "The Cosmos3-Nano future-window branch is verified as a compatibility adapter result; full Cosmos diffusion-weight fine-tuning remains pending.",
266
  "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
267
  "Audio is one of the synchronized source modalities in the current task representation.",
268
  "The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",
metrics/publication_audit.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-06T17:44:50+00:00",
4
  "checks": [
5
  {
6
  "name": "required_publication_assets_present",
@@ -182,8 +182,8 @@
182
  "github_repo": {
183
  "root": "repo",
184
  "exists": true,
185
- "file_count": 517,
186
- "text_file_count": 440,
187
  "largest_file": {
188
  "path": "tmp/omni_128_dataset_fetch/dataset.jsonl",
189
  "bytes": 582271586
@@ -193,8 +193,8 @@
193
  "hf_space_bundle": {
194
  "root": "hf_publish/space",
195
  "exists": true,
196
- "file_count": 428,
197
- "text_file_count": 352,
198
  "largest_file": {
199
  "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
200
  "bytes": 55702978
@@ -204,8 +204,8 @@
204
  "hf_artifact_bundle": {
205
  "root": "hf_publish/artifacts",
206
  "exists": true,
207
- "file_count": 588,
208
- "text_file_count": 488,
209
  "largest_file": {
210
  "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
211
  "bytes": 55702978
@@ -215,8 +215,8 @@
215
  "hf_model_bundle": {
216
  "root": "hf_publish/model",
217
  "exists": true,
218
- "file_count": 775,
219
- "text_file_count": 640,
220
  "largest_file": {
221
  "path": "pytorch_model.bin",
222
  "bytes": 93495480
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-06T23:31:25+00:00",
4
  "checks": [
5
  {
6
  "name": "required_publication_assets_present",
 
182
  "github_repo": {
183
  "root": "repo",
184
  "exists": true,
185
+ "file_count": 586,
186
+ "text_file_count": 497,
187
  "largest_file": {
188
  "path": "tmp/omni_128_dataset_fetch/dataset.jsonl",
189
  "bytes": 582271586
 
193
  "hf_space_bundle": {
194
  "root": "hf_publish/space",
195
  "exists": true,
196
+ "file_count": 460,
197
+ "text_file_count": 380,
198
  "largest_file": {
199
  "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
200
  "bytes": 55702978
 
204
  "hf_artifact_bundle": {
205
  "root": "hf_publish/artifacts",
206
  "exists": true,
207
+ "file_count": 631,
208
+ "text_file_count": 527,
209
  "largest_file": {
210
  "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
211
  "bytes": 55702978
 
215
  "hf_model_bundle": {
216
  "root": "hf_publish/model",
217
  "exists": true,
218
+ "file_count": 819,
219
+ "text_file_count": 680,
220
  "largest_file": {
221
  "path": "pytorch_model.bin",
222
  "bytes": 93495480
metrics/reproducibility_matrix.json CHANGED
@@ -79,10 +79,10 @@
79
  },
80
  {
81
  "id": "qwen3_omni_multi_episode_pilot",
82
- "status": "verified_diagnostic_pilot_not_publicly_rerunnable_without_gated_data",
83
  "command": "scripts/omni/build_qwen3_omni_dataset.py and scripts/omni/train_qwen3_omni_lora.py on the selected gated episodes",
84
- "expected": "verified diagnostic LoRA package with 3,808 exported windows, 2,848 train windows, and 448 held-out test predictions",
85
- "boundary": "the public package records metrics and manifests, but rerunning requires gated Xperience-10M episode access and base-model weights; current JSON validity is 87.50%, below the 98% target"
86
  }
87
  ]
88
  }
 
79
  },
80
  {
81
  "id": "qwen3_omni_multi_episode_pilot",
82
+ "status": "verified_final_diagnostic_result_not_publicly_rerunnable_without_gated_data",
83
  "command": "scripts/omni/build_qwen3_omni_dataset.py and scripts/omni/train_qwen3_omni_lora.py on the selected gated episodes",
84
+ "expected": "verified final diagnostic LoRA package with 3,808 exported windows, 2,848 train windows, and 448 held-out test predictions",
85
+ "boundary": "the public package records metrics and manifests, but rerunning requires gated Xperience-10M episode access and base-model weights; current JSON validity is 99.78%, meeting the 98% target, while action/subtask metrics remain weak"
86
  }
87
  ]
88
  }
metrics/research_directions.json CHANGED
@@ -30,6 +30,12 @@
30
  "contact_prediction",
31
  "object_relevance"
32
  ],
 
 
 
 
 
 
33
  "counts": {
34
  "direct": 2,
35
  "proxy": 2,
@@ -54,6 +60,11 @@
54
  "modality_reconstruction",
55
  "misalignment_detection"
56
  ],
 
 
 
 
 
57
  "counts": {
58
  "direct": 0,
59
  "proxy": 2,
@@ -86,6 +97,19 @@
86
  "temporal_order",
87
  "misalignment_detection"
88
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  "counts": {
90
  "direct": 6,
91
  "proxy": 2,
@@ -116,6 +140,17 @@
116
  "temporal_order",
117
  "misalignment_detection"
118
  ],
 
 
 
 
 
 
 
 
 
 
 
119
  "counts": {
120
  "direct": 0,
121
  "proxy": 6,
@@ -137,6 +172,8 @@
137
  },
138
  "why": "Reads egocentric sensor state as the current human action; also provides a weak human-motion readout.",
139
  "current_limit": "Chronological single-episode split creates unseen future action classes.",
 
 
140
  "metric": {
141
  "key": "macro_f1",
142
  "name": "macro-F1",
@@ -158,6 +195,8 @@
158
  },
159
  "why": "Segments egocentric task state and provides a first proxy for symbolic world/task state.",
160
  "current_limit": "Single-episode ordering makes future subtasks hard to generalize.",
 
 
161
  "metric": {
162
  "key": "macro_f1",
163
  "name": "macro-F1",
@@ -179,6 +218,8 @@
179
  },
180
  "why": "Localizes egocentric task boundaries and diagnoses temporal state changes.",
181
  "current_limit": "Boundary class is sparse, so accuracy alone is misleading.",
 
 
182
  "metric": {
183
  "key": "macro_f1",
184
  "name": "macro-F1",
@@ -200,6 +241,8 @@
200
  },
201
  "why": "Tests action intention/task-flow prediction from egocentric context.",
202
  "current_limit": "Unseen future labels dominate the single-episode chronological test.",
 
 
203
  "metric": {
204
  "key": "macro_f1",
205
  "name": "macro-F1",
@@ -221,6 +264,8 @@
221
  },
222
  "why": "Directly predicts human hand motion and supports hand-object interaction modeling.",
223
  "current_limit": "Forecasting is window-level and not yet a full sequence or policy model.",
 
 
224
  "metric": {
225
  "key": "mpjpe",
226
  "name": "MPJPE",
@@ -242,6 +287,8 @@
242
  },
243
  "why": "Targets physical interaction state, a core affordance and manipulation signal.",
244
  "current_limit": "The public sample is degenerate for this target because one class dominates.",
 
 
245
  "metric": {
246
  "key": "macro_f1",
247
  "name": "macro-F1",
@@ -264,6 +311,8 @@
264
  },
265
  "why": "Connects egocentric activity to manipulated objects and early object-centric state.",
266
  "current_limit": "Object labels are language-derived and sparse in one episode.",
 
 
267
  "metric": {
268
  "key": "micro_f1",
269
  "name": "micro-F1",
@@ -285,6 +334,8 @@
285
  },
286
  "why": "Grounds language annotation into egocentric sensor time and task state.",
287
  "current_limit": "Bag-of-objects language features are too weak for rich grounding.",
 
 
288
  "metric": {
289
  "key": "mrr",
290
  "name": "MRR",
@@ -307,6 +358,8 @@
307
  },
308
  "why": "Tests whether synchronized modalities identify the same 4D moment, a prerequisite for reconstruction and world modeling.",
309
  "current_limit": "Retrieval shows an alignment signal, not geometric reconstruction.",
 
 
310
  "metric": {
311
  "key": "mrr",
312
  "name": "MRR",
@@ -328,6 +381,8 @@
328
  },
329
  "why": "Predicts visual/depth state from non-target sensors as a weak reconstruction/world-model objective.",
330
  "current_limit": "Feature-vector reconstruction is not pixel, depth-map, mesh, NeRF, or Gaussian reconstruction.",
 
 
331
  "metric": {
332
  "key": "r2",
333
  "name": "R2",
@@ -349,6 +404,8 @@
349
  },
350
  "why": "Checks whether features encode local time direction and task progression.",
351
  "current_limit": "Only local adjacent ordering, not long-horizon causal modeling.",
 
 
352
  "metric": {
353
  "key": "f1",
354
  "name": "F1",
@@ -371,6 +428,8 @@
371
  },
372
  "why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models.",
373
  "current_limit": "Synthetic shifts diagnose alignment but do not solve calibration or mapping.",
 
 
374
  "metric": {
375
  "key": "f1",
376
  "name": "F1",
 
30
  "contact_prediction",
31
  "object_relevance"
32
  ],
33
+ "task_display_names": [
34
+ "Action Recognition",
35
+ "Hand Trajectory Forecasting",
36
+ "Contact State Prediction",
37
+ "Object Relevance Prediction"
38
+ ],
39
  "counts": {
40
  "direct": 2,
41
  "proxy": 2,
 
60
  "modality_reconstruction",
61
  "misalignment_detection"
62
  ],
63
+ "task_display_names": [
64
+ "Cross-Modal Retrieval",
65
+ "Cross-Modal Reconstruction",
66
+ "Multimodal Synchronization Detection"
67
+ ],
68
  "counts": {
69
  "direct": 0,
70
  "proxy": 2,
 
97
  "temporal_order",
98
  "misalignment_detection"
99
  ],
100
+ "task_display_names": [
101
+ "Action Recognition",
102
+ "Procedure Step Recognition",
103
+ "Action Boundary Detection",
104
+ "Next-Action Prediction",
105
+ "Hand Trajectory Forecasting",
106
+ "Contact State Prediction",
107
+ "Object Relevance Prediction",
108
+ "Language Grounding",
109
+ "Cross-Modal Retrieval",
110
+ "Temporal Order Verification",
111
+ "Multimodal Synchronization Detection"
112
+ ],
113
  "counts": {
114
  "direct": 6,
115
  "proxy": 2,
 
140
  "temporal_order",
141
  "misalignment_detection"
142
  ],
143
+ "task_display_names": [
144
+ "Procedure Step Recognition",
145
+ "Action Boundary Detection",
146
+ "Next-Action Prediction",
147
+ "Object Relevance Prediction",
148
+ "Language Grounding",
149
+ "Cross-Modal Retrieval",
150
+ "Cross-Modal Reconstruction",
151
+ "Temporal Order Verification",
152
+ "Multimodal Synchronization Detection"
153
+ ],
154
  "counts": {
155
  "direct": 0,
156
  "proxy": 6,
 
172
  },
173
  "why": "Reads egocentric sensor state as the current human action; also provides a weak human-motion readout.",
174
  "current_limit": "Chronological single-episode split creates unseen future action classes.",
175
+ "display_name": "Action Recognition",
176
+ "artifact_id": "timeline_action",
177
  "metric": {
178
  "key": "macro_f1",
179
  "name": "macro-F1",
 
195
  },
196
  "why": "Segments egocentric task state and provides a first proxy for symbolic world/task state.",
197
  "current_limit": "Single-episode ordering makes future subtasks hard to generalize.",
198
+ "display_name": "Procedure Step Recognition",
199
+ "artifact_id": "timeline_subtask",
200
  "metric": {
201
  "key": "macro_f1",
202
  "name": "macro-F1",
 
218
  },
219
  "why": "Localizes egocentric task boundaries and diagnoses temporal state changes.",
220
  "current_limit": "Boundary class is sparse, so accuracy alone is misleading.",
221
+ "display_name": "Action Boundary Detection",
222
+ "artifact_id": "transition_detection",
223
  "metric": {
224
  "key": "macro_f1",
225
  "name": "macro-F1",
 
241
  },
242
  "why": "Tests action intention/task-flow prediction from egocentric context.",
243
  "current_limit": "Unseen future labels dominate the single-episode chronological test.",
244
+ "display_name": "Next-Action Prediction",
245
+ "artifact_id": "next_action",
246
  "metric": {
247
  "key": "macro_f1",
248
  "name": "macro-F1",
 
264
  },
265
  "why": "Directly predicts human hand motion and supports hand-object interaction modeling.",
266
  "current_limit": "Forecasting is window-level and not yet a full sequence or policy model.",
267
+ "display_name": "Hand Trajectory Forecasting",
268
+ "artifact_id": "hand_trajectory_forecast",
269
  "metric": {
270
  "key": "mpjpe",
271
  "name": "MPJPE",
 
287
  },
288
  "why": "Targets physical interaction state, a core affordance and manipulation signal.",
289
  "current_limit": "The public sample is degenerate for this target because one class dominates.",
290
+ "display_name": "Contact State Prediction",
291
+ "artifact_id": "contact_prediction",
292
  "metric": {
293
  "key": "macro_f1",
294
  "name": "macro-F1",
 
311
  },
312
  "why": "Connects egocentric activity to manipulated objects and early object-centric state.",
313
  "current_limit": "Object labels are language-derived and sparse in one episode.",
314
+ "display_name": "Object Relevance Prediction",
315
+ "artifact_id": "object_relevance",
316
  "metric": {
317
  "key": "micro_f1",
318
  "name": "micro-F1",
 
334
  },
335
  "why": "Grounds language annotation into egocentric sensor time and task state.",
336
  "current_limit": "Bag-of-objects language features are too weak for rich grounding.",
337
+ "display_name": "Language Grounding",
338
+ "artifact_id": "caption_grounding",
339
  "metric": {
340
  "key": "mrr",
341
  "name": "MRR",
 
358
  },
359
  "why": "Tests whether synchronized modalities identify the same 4D moment, a prerequisite for reconstruction and world modeling.",
360
  "current_limit": "Retrieval shows an alignment signal, not geometric reconstruction.",
361
+ "display_name": "Cross-Modal Retrieval",
362
+ "artifact_id": "cross_modal_retrieval",
363
  "metric": {
364
  "key": "mrr",
365
  "name": "MRR",
 
381
  },
382
  "why": "Predicts visual/depth state from non-target sensors as a weak reconstruction/world-model objective.",
383
  "current_limit": "Feature-vector reconstruction is not pixel, depth-map, mesh, NeRF, or Gaussian reconstruction.",
384
+ "display_name": "Cross-Modal Reconstruction",
385
+ "artifact_id": "modality_reconstruction",
386
  "metric": {
387
  "key": "r2",
388
  "name": "R2",
 
404
  },
405
  "why": "Checks whether features encode local time direction and task progression.",
406
  "current_limit": "Only local adjacent ordering, not long-horizon causal modeling.",
407
+ "display_name": "Temporal Order Verification",
408
+ "artifact_id": "temporal_order",
409
  "metric": {
410
  "key": "f1",
411
  "name": "F1",
 
428
  },
429
  "why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models.",
430
  "current_limit": "Synthetic shifts diagnose alignment but do not solve calibration or mapping.",
431
+ "display_name": "Multimodal Synchronization Detection",
432
+ "artifact_id": "misalignment_detection",
433
  "metric": {
434
  "key": "f1",
435
  "name": "F1",
metrics/research_roadmap.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Research Roadmap",
3
- "summary": "Staged path from the public-sample task lab to a verified validation-aware Qwen3-Omni diagnostic pilot, structured-output improvement pass, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
4
- "current_decision_point": "Keep the public-sample task suite as the development harness, use the verified selected-episode Qwen3-Omni validation-aware diagnostic pilot and the same-split 128-episode simple/NN metadata baselines as the first cross-episode references, improve structured-output reliability and task-quality error analysis, then branch into Cosmos 3 world modeling and policy-model experiments after their targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
5
  "additional_development_directions": {
6
  "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
7
  "source_json": "docs/data/additional_development_directions.json",
@@ -52,7 +52,7 @@
52
  },
53
  {
54
  "id": "qwen3_omni_lora_diagnostic_pilot",
55
- "name": "Qwen3-Omni LoRA Validation-Aware Diagnostic Pilot",
56
  "status": "verified_baseline",
57
  "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
58
  "deliverables": [
@@ -63,7 +63,8 @@
63
  "held-out predictions",
64
  "metrics",
65
  "confusion matrices",
66
- "run report"
 
67
  ],
68
  "completion_evidence": [
69
  "docs/data/omni_finetune_verified_result.json",
@@ -75,7 +76,7 @@
75
  "predictions.jsonl",
76
  "RUN_REPORT.md"
77
  ],
78
- "reader_takeaway": "The first omni-model pilot establishes the full held-out training/validation/evaluation loop, but the weak metrics make it a diagnostic baseline."
79
  },
80
  {
81
  "id": "multi_episode_128_same_split_baselines",
@@ -97,23 +98,23 @@
97
  },
98
  {
99
  "id": "qwen3_omni_structured_output_error_analysis",
100
- "name": "Structured-Output And Error-Analysis Pass",
101
  "status": "active_next_step",
102
- "entry_condition": "The validation-aware diagnostic package exists and shows weak held-out quality.",
103
  "deliverables": [
104
  "same 96/16/16 episode split",
105
- "stricter JSON decoding or target formatting",
106
- "episode/action/object error analysis",
 
107
  "held-out test evaluation",
108
- "comparison to the verified validation-aware baseline"
109
  ],
110
  "completion_evidence": [
111
- "quality-target report",
112
  "error-analysis tables",
113
- "held-out metrics",
114
  "verified public-safe package"
115
  ],
116
- "reader_takeaway": "The next pass should improve output reliability and task metrics before larger model-quality claims."
117
  },
118
  {
119
  "id": "foundation_model_selection_matrix",
 
1
  {
2
  "title": "Ropedia Xperience-10M Research Roadmap",
3
+ "summary": "Staged path from the public-sample task lab to a final verified Qwen3-Omni diagnostic result, same-split 128-episode baseline alignment, action/subtask error analysis, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
4
+ "current_decision_point": "Keep the public-sample task suite as the development harness, use the final verified selected-episode Qwen3-Omni diagnostic result and the same-split 128-episode simple/NN metadata baselines as the first cross-episode references, improve action/subtask quality through error analysis, then branch into Cosmos 3 world modeling and policy-model experiments after their targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
5
  "additional_development_directions": {
6
  "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
7
  "source_json": "docs/data/additional_development_directions.json",
 
52
  },
53
  {
54
  "id": "qwen3_omni_lora_diagnostic_pilot",
55
+ "name": "Qwen3-Omni LoRA Final Diagnostic Result",
56
  "status": "verified_baseline",
57
  "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
58
  "deliverables": [
 
63
  "held-out predictions",
64
  "metrics",
65
  "confusion matrices",
66
+ "run report",
67
+ "public LoRA adapter repo"
68
  ],
69
  "completion_evidence": [
70
  "docs/data/omni_finetune_verified_result.json",
 
76
  "predictions.jsonl",
77
  "RUN_REPORT.md"
78
  ],
79
+ "reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline."
80
  },
81
  {
82
  "id": "multi_episode_128_same_split_baselines",
 
98
  },
99
  {
100
  "id": "qwen3_omni_structured_output_error_analysis",
101
+ "name": "Action/Subtask Error-Analysis Pass",
102
  "status": "active_next_step",
103
+ "entry_condition": "The final diagnostic package meets strict JSON validity but has weak action/subtask held-out quality.",
104
  "deliverables": [
105
  "same 96/16/16 episode split",
106
+ "action/subtask confusion analysis",
107
+ "unseen-label analysis",
108
+ "object/action family breakdowns",
109
  "held-out test evaluation",
110
+ "comparison to the final verified Qwen baseline"
111
  ],
112
  "completion_evidence": [
 
113
  "error-analysis tables",
114
+ "held-out metrics by failure type",
115
  "verified public-safe package"
116
  ],
117
+ "reader_takeaway": "The next pass should improve action/subtask quality before larger model-quality claims."
118
  },
119
  {
120
  "id": "foundation_model_selection_matrix",
metrics/research_roadmap_interactive.json CHANGED
@@ -2035,7 +2035,7 @@
2035
  "step": 1
2036
  },
2037
  {
2038
- "action": "Run validation-aware Qwen3-Omni LoRA to improve the verified diagnostic baseline.",
2039
  "name": "First held-out baseline",
2040
  "step": 2
2041
  },
@@ -2222,7 +2222,7 @@
2222
  ],
2223
  "status": "planning_artifact"
2224
  },
2225
- "generated_at_utc": "2026-06-06T13:49:32+00:00",
2226
  "omni_plan": {
2227
  "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
2228
  "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
@@ -2303,33 +2303,53 @@
2303
  "held-out predictions",
2304
  "metrics",
2305
  "confusion matrices",
2306
- "run report"
 
2307
  ],
2308
  "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
2309
  "id": "qwen3_omni_lora_diagnostic_pilot",
2310
- "name": "Qwen3-Omni LoRA Validation-Aware Diagnostic Pilot",
2311
- "reader_takeaway": "The first omni-model pilot establishes the full held-out training/validation/evaluation loop, but the weak metrics make it a diagnostic baseline.",
2312
  "stage": "future",
2313
  "status": "verified_baseline"
2314
  },
2315
  {
2316
  "completion_evidence": [
2317
- "quality-target report",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2318
  "error-analysis tables",
2319
- "held-out metrics",
2320
  "verified public-safe package"
2321
  ],
2322
  "deliverables": [
2323
  "same 96/16/16 episode split",
2324
- "stricter JSON decoding or target formatting",
2325
- "episode/action/object error analysis",
 
2326
  "held-out test evaluation",
2327
- "comparison to the verified validation-aware baseline"
2328
  ],
2329
- "entry_condition": "The validation-aware diagnostic package exists and shows weak held-out quality.",
2330
  "id": "qwen3_omni_structured_output_error_analysis",
2331
- "name": "Structured-Output And Error-Analysis Pass",
2332
- "reader_takeaway": "The next pass should improve output reliability and task metrics before larger model-quality claims.",
2333
  "stage": "future",
2334
  "status": "active_next_step"
2335
  },
@@ -2428,7 +2448,7 @@
2428
  "visualization.rrd"
2429
  ],
2430
  "selection_strategy": "stratified_round_robin_by_top_level_session",
2431
- "status": "verified_validation_aware_diagnostic_pilot",
2432
  "target_episodes": 128,
2433
  "valid_candidates": 12102
2434
  },
 
2035
  "step": 1
2036
  },
2037
  {
2038
+ "action": "Run Qwen3-Omni action/subtask error analysis and targeted reruns to improve the verified diagnostic baseline.",
2039
  "name": "First held-out baseline",
2040
  "step": 2
2041
  },
 
2222
  ],
2223
  "status": "planning_artifact"
2224
  },
2225
+ "generated_at_utc": "2026-06-06T23:26:13+00:00",
2226
  "omni_plan": {
2227
  "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
2228
  "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
 
2303
  "held-out predictions",
2304
  "metrics",
2305
  "confusion matrices",
2306
+ "run report",
2307
+ "public LoRA adapter repo"
2308
  ],
2309
  "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
2310
  "id": "qwen3_omni_lora_diagnostic_pilot",
2311
+ "name": "Qwen3-Omni LoRA Final Diagnostic Result",
2312
+ "reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline.",
2313
  "stage": "future",
2314
  "status": "verified_baseline"
2315
  },
2316
  {
2317
  "completion_evidence": [
2318
+ "results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
2319
+ "results/omni_finetune/multi_episode_128_task_baselines/summary_report.json",
2320
+ "scripts/omni/run_128_task_baselines.py"
2321
+ ],
2322
+ "deliverables": [
2323
+ "same 12 task ids",
2324
+ "simple metadata/text baselines",
2325
+ "neural MLP baselines for JSON-supported labels",
2326
+ "explicit unsupported markers for raw-feature-only tasks"
2327
+ ],
2328
+ "entry_condition": "Derived Qwen JSONL export for the selected 96/16/16 split.",
2329
+ "id": "multi_episode_128_same_split_baselines",
2330
+ "name": "128-Episode Same-Split Simple/NN Baselines",
2331
+ "reader_takeaway": "The simple and neural baseline framing is now aligned to the selected 128-episode setup; trajectory, retrieval, reconstruction, and misalignment variants still need raw 128 feature blocks for exact feature-level reproduction.",
2332
+ "stage": "future",
2333
+ "status": "verified_companion_result"
2334
+ },
2335
+ {
2336
+ "completion_evidence": [
2337
  "error-analysis tables",
2338
+ "held-out metrics by failure type",
2339
  "verified public-safe package"
2340
  ],
2341
  "deliverables": [
2342
  "same 96/16/16 episode split",
2343
+ "action/subtask confusion analysis",
2344
+ "unseen-label analysis",
2345
+ "object/action family breakdowns",
2346
  "held-out test evaluation",
2347
+ "comparison to the final verified Qwen baseline"
2348
  ],
2349
+ "entry_condition": "The final diagnostic package meets strict JSON validity but has weak action/subtask held-out quality.",
2350
  "id": "qwen3_omni_structured_output_error_analysis",
2351
+ "name": "Action/Subtask Error-Analysis Pass",
2352
+ "reader_takeaway": "The next pass should improve action/subtask quality before larger model-quality claims.",
2353
  "stage": "future",
2354
  "status": "active_next_step"
2355
  },
 
2448
  "visualization.rrd"
2449
  ],
2450
  "selection_strategy": "stratified_round_robin_by_top_level_session",
2451
+ "status": "verified_full_128_episode_diagnostic_result",
2452
  "target_episodes": 128,
2453
  "valid_candidates": 12102
2454
  },
metrics/research_takeaways.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Research Takeaways",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-06T13:49:32+00:00",
5
  "source_files": [
6
  "docs/data/summary_metrics.json",
7
  "results/episode_task_suite/summary_report.json",
@@ -166,7 +166,7 @@
166
  {
167
  "id": "scale_requires_episodes",
168
  "title": "The next scientific unit is held-out episodes, not more adjacent windows",
169
- "readout": "The selected Qwen3-Omni path now has a verified validation-aware held-out diagnostic pilot. It proves the cross-episode train/validation/eval loop, but the weak metrics show that structured-output reliability and task-quality error analysis are the next modeling problems.",
170
  "evidence": [
171
  {
172
  "label": "selected_episodes",
@@ -174,19 +174,19 @@
174
  },
175
  {
176
  "label": "held_out_test_windows",
177
- "value": 448
178
  },
179
  {
180
  "label": "json_validity_rate",
181
- "value": 0.875
182
  },
183
  {
184
  "label": "action_macro_f1",
185
- "value": 0.0026621494447581404
186
  }
187
  ],
188
  "source": "docs/data/omni_finetune_verified_result.json",
189
- "current_scope": "The selected-episode Qwen3-Omni validation-aware diagnostic pilot is verified, but held-out quality is still weak and JSON validity remains below the 98% target."
190
  }
191
  ]
192
  }
 
1
  {
2
  "title": "Ropedia Xperience-10M Research Takeaways",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-06T23:26:13+00:00",
5
  "source_files": [
6
  "docs/data/summary_metrics.json",
7
  "results/episode_task_suite/summary_report.json",
 
166
  {
167
  "id": "scale_requires_episodes",
168
  "title": "The next scientific unit is held-out episodes, not more adjacent windows",
169
+ "readout": "The selected Qwen3-Omni path now has a verified two-epoch held-out diagnostic result. It proves the cross-episode train/validation/eval loop and meets the strict-JSON target, while weak action/subtask metrics remain the next modeling problem.",
170
  "evidence": [
171
  {
172
  "label": "selected_episodes",
 
174
  },
175
  {
176
  "label": "held_out_test_windows",
177
+ "value": null
178
  },
179
  {
180
  "label": "json_validity_rate",
181
+ "value": null
182
  },
183
  {
184
  "label": "action_macro_f1",
185
+ "value": null
186
  }
187
  ],
188
  "source": "docs/data/omni_finetune_verified_result.json",
189
+ "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
190
  }
191
  ]
192
  }
metrics/scope_claims_audit.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-06T17:43:55+00:00",
4
  "summary": {
5
  "qwen3_omni_verified_diagnostic_pilot": true,
6
  "dataset_manifest_num_episodes": 119,
7
  "dataset_manifest_num_samples": 3808,
8
  "training_metadata_num_train_samples": 2848,
9
  "eval_num_samples": 448,
10
- "eval_json_validity_rate": 0.875,
11
- "quality_target_met": false,
12
- "historical_identifier_count": 132,
13
  "public_32_episode_status_file_count": 1,
14
  "failure_count": 0
15
  },
@@ -25,7 +25,7 @@
25
  {
26
  "name": "summary_metrics_preserves_verified_diagnostic_status",
27
  "status": "pass",
28
- "detail": "The selected-episode Qwen3-Omni validation-aware diagnostic pilot is verified, but held-out quality is still weak and JSON validity remains below the 98% target.",
29
  "evidence": [
30
  "docs/data/summary_metrics.json"
31
  ]
@@ -35,7 +35,7 @@
35
  "status": "pass",
36
  "detail": "episodes=119, samples=3808, split_counts={'train': 2848, 'val': 512, 'test': 448}",
37
  "evidence": [
38
- "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/dataset/dataset_manifest.json"
39
  ]
40
  },
41
  {
@@ -43,15 +43,15 @@
43
  "status": "pass",
44
  "detail": "train=2848, val=512, processes=8",
45
  "evidence": [
46
- "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/training/training_metadata.json"
47
  ]
48
  },
49
  {
50
  "name": "verified_package_eval_records_real_held_out_metrics",
51
  "status": "pass",
52
- "detail": "samples=448, split=test, held_out=14, json_validity=0.875",
53
  "evidence": [
54
- "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/metrics.json"
55
  ]
56
  },
57
  {
@@ -59,7 +59,7 @@
59
  "status": "pass",
60
  "detail": "audit_status=pass, issues=0",
61
  "evidence": [
62
- "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/package_audit.json"
63
  ]
64
  },
65
  {
@@ -84,7 +84,7 @@
84
  {
85
  "name": "historical_32ep_identifiers_are_confined_to_readiness_artifacts",
86
  "status": "pass",
87
- "detail": "historical identifiers found in result provenance files=132",
88
  "evidence": [
89
  "results/omni_finetune/"
90
  ]
@@ -97,16 +97,6 @@
97
  }
98
  ],
99
  "historical_identifiers": [
100
- {
101
- "classification": "historical_identifier_in_readiness_artifact",
102
- "path": "results/omni_finetune/HF_UPLOAD.md",
103
- "line": 5,
104
- "patterns": [
105
- "qwen3_omni_32ep",
106
- "xperience10m_qwen3_omni_32ep"
107
- ],
108
- "example": "- `results/omni_finetune/adapter_lora/` (`xperience10m_qwen3_omni_32ep_lora`)"
109
- },
110
  {
111
  "classification": "historical_identifier_in_readiness_artifact",
112
  "path": "results/omni_finetune/XPERIENCE10M_128_DATA_PREPARATION_AND_FINETUNE_PLAN.md",
@@ -421,8 +411,19 @@
421
  "ropedia-episode-task-suite"
422
  ],
423
  "example": "{\"id\": \"xperience-10m-sample:qa:52\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1040, \"end_frame\": 1059, \"num_frames\": 20}, \"media\": {\"video_path"
 
 
 
 
 
 
 
 
 
 
 
424
  }
425
  ],
426
- "historical_identifier_total_count": 132,
427
  "failures": []
428
  }
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-06T23:27:07+00:00",
4
  "summary": {
5
  "qwen3_omni_verified_diagnostic_pilot": true,
6
  "dataset_manifest_num_episodes": 119,
7
  "dataset_manifest_num_samples": 3808,
8
  "training_metadata_num_train_samples": 2848,
9
  "eval_num_samples": 448,
10
+ "eval_json_validity_rate": 0.9977678571428571,
11
+ "quality_target_met": true,
12
+ "historical_identifier_count": 131,
13
  "public_32_episode_status_file_count": 1,
14
  "failure_count": 0
15
  },
 
25
  {
26
  "name": "summary_metrics_preserves_verified_diagnostic_status",
27
  "status": "pass",
28
+ "detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims.",
29
  "evidence": [
30
  "docs/data/summary_metrics.json"
31
  ]
 
35
  "status": "pass",
36
  "detail": "episodes=119, samples=3808, split_counts={'train': 2848, 'val': 512, 'test': 448}",
37
  "evidence": [
38
+ "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/dataset/dataset_manifest.json"
39
  ]
40
  },
41
  {
 
43
  "status": "pass",
44
  "detail": "train=2848, val=512, processes=8",
45
  "evidence": [
46
+ "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/training/training_metadata.json"
47
  ]
48
  },
49
  {
50
  "name": "verified_package_eval_records_real_held_out_metrics",
51
  "status": "pass",
52
+ "detail": "samples=448, split=test, held_out=14, json_validity=0.9977678571428571",
53
  "evidence": [
54
+ "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/eval/metrics.json"
55
  ]
56
  },
57
  {
 
59
  "status": "pass",
60
  "detail": "audit_status=pass, issues=0",
61
  "evidence": [
62
+ "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/package_audit.json"
63
  ]
64
  },
65
  {
 
84
  {
85
  "name": "historical_32ep_identifiers_are_confined_to_readiness_artifacts",
86
  "status": "pass",
87
+ "detail": "historical identifiers found in result provenance files=131",
88
  "evidence": [
89
  "results/omni_finetune/"
90
  ]
 
97
  }
98
  ],
99
  "historical_identifiers": [
 
 
 
 
 
 
 
 
 
 
100
  {
101
  "classification": "historical_identifier_in_readiness_artifact",
102
  "path": "results/omni_finetune/XPERIENCE10M_128_DATA_PREPARATION_AND_FINETUNE_PLAN.md",
 
411
  "ropedia-episode-task-suite"
412
  ],
413
  "example": "{\"id\": \"xperience-10m-sample:qa:52\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1040, \"end_frame\": 1059, \"num_frames\": 20}, \"media\": {\"video_path"
414
+ },
415
+ {
416
+ "classification": "historical_identifier_in_readiness_artifact",
417
+ "path": "results/omni_finetune/dataset.jsonl",
418
+ "line": 28,
419
+ "patterns": [
420
+ "qwen3_omni_32ep",
421
+ "xperience10m_qwen3_omni_32ep",
422
+ "ropedia-episode-task-suite"
423
+ ],
424
+ "example": "{\"id\": \"xperience-10m-sample:qa:53\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1060, \"end_frame\": 1079, \"num_frames\": 20}, \"media\": {\"video_path"
425
  }
426
  ],
427
+ "historical_identifier_total_count": 131,
428
  "failures": []
429
  }
metrics/single_episode_explorer.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "meta": {
3
- "generated_at": "2026-06-03T12:47:16.188806+00:00",
4
  "window_count": 1161,
5
  "feature_dim": 8546,
6
  "object_label_rows": 1161,
@@ -16,12 +16,26 @@
16
  }
17
  },
18
  "tasks": {
19
- "timeline_action": "Current Action Recognition",
20
- "timeline_subtask": "Current Subtask Recognition",
21
- "transition_detection": "Action Transition Detection",
22
  "next_action": "Next-Action Prediction",
23
  "contact_prediction": "Contact State Prediction",
24
- "object_relevance": "Relevant Object Prediction"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  },
26
  "feature_blocks": [
27
  {
@@ -138,7 +152,7 @@
138
  },
139
  {
140
  "name": "audio_fisheye_cam0_aac",
141
- "display": "Audio AAC",
142
  "modality": "audio",
143
  "start": 7343,
144
  "end": 7511,
@@ -163958,6 +163972,8 @@
163958
  "ablation": {
163959
  "best_by_task": {
163960
  "caption_grounding": {
 
 
163961
  "best": {
163962
  "modality_group": "language",
163963
  "modality_display": "Language",
@@ -163973,6 +163989,8 @@
163973
  }
163974
  },
163975
  "contact_prediction": {
 
 
163976
  "best": {
163977
  "modality_group": "all_features",
163978
  "modality_display": "All Features",
@@ -163988,6 +164006,8 @@
163988
  }
163989
  },
163990
  "cross_modal_retrieval": {
 
 
163991
  "best": {
163992
  "modality_group": "all_features",
163993
  "modality_display": "All Features",
@@ -164003,6 +164023,8 @@
164003
  }
164004
  },
164005
  "hand_trajectory_forecast": {
 
 
164006
  "best": {
164007
  "modality_group": "inertial",
164008
  "modality_display": "Inertial",
@@ -164018,6 +164040,8 @@
164018
  }
164019
  },
164020
  "misalignment_detection": {
 
 
164021
  "best": {
164022
  "modality_group": "audio",
164023
  "modality_display": "Audio",
@@ -164033,6 +164057,8 @@
164033
  }
164034
  },
164035
  "modality_reconstruction": {
 
 
164036
  "best": {
164037
  "modality_group": "video",
164038
  "modality_display": "Video",
@@ -164048,6 +164074,8 @@
164048
  }
164049
  },
164050
  "next_action": {
 
 
164051
  "best": {
164052
  "modality_group": "language",
164053
  "modality_display": "Language",
@@ -164063,6 +164091,8 @@
164063
  }
164064
  },
164065
  "object_relevance": {
 
 
164066
  "best": {
164067
  "modality_group": "language",
164068
  "modality_display": "Language",
@@ -164078,6 +164108,8 @@
164078
  }
164079
  },
164080
  "temporal_order": {
 
 
164081
  "best": {
164082
  "modality_group": "pose_slam",
164083
  "modality_display": "Pose + SLAM",
@@ -164093,6 +164125,8 @@
164093
  }
164094
  },
164095
  "timeline_action": {
 
 
164096
  "best": {
164097
  "modality_group": "language",
164098
  "modality_display": "Language",
@@ -164108,6 +164142,8 @@
164108
  }
164109
  },
164110
  "timeline_subtask": {
 
 
164111
  "best": {
164112
  "modality_group": "language",
164113
  "modality_display": "Language",
@@ -164123,6 +164159,8 @@
164123
  }
164124
  },
164125
  "transition_detection": {
 
 
164126
  "best": {
164127
  "modality_group": "language",
164128
  "modality_display": "Language",
@@ -164173,7 +164211,8 @@
164173
  "top10_accuracy": "",
164174
  "median_rank": "",
164175
  "mean_rank": "",
164176
- "num_queries": ""
 
164177
  },
164178
  {
164179
  "task": "timeline_action",
@@ -164209,7 +164248,8 @@
164209
  "top10_accuracy": "",
164210
  "median_rank": "",
164211
  "mean_rank": "",
164212
- "num_queries": ""
 
164213
  },
164214
  {
164215
  "task": "timeline_action",
@@ -164245,7 +164285,8 @@
164245
  "top10_accuracy": "",
164246
  "median_rank": "",
164247
  "mean_rank": "",
164248
- "num_queries": ""
 
164249
  },
164250
  {
164251
  "task": "timeline_action",
@@ -164281,7 +164322,8 @@
164281
  "top10_accuracy": "",
164282
  "median_rank": "",
164283
  "mean_rank": "",
164284
- "num_queries": ""
 
164285
  },
164286
  {
164287
  "task": "timeline_action",
@@ -164317,7 +164359,8 @@
164317
  "top10_accuracy": "",
164318
  "median_rank": "",
164319
  "mean_rank": "",
164320
- "num_queries": ""
 
164321
  },
164322
  {
164323
  "task": "timeline_action",
@@ -164353,7 +164396,8 @@
164353
  "top10_accuracy": "",
164354
  "median_rank": "",
164355
  "mean_rank": "",
164356
- "num_queries": ""
 
164357
  },
164358
  {
164359
  "task": "timeline_action",
@@ -164389,7 +164433,8 @@
164389
  "top10_accuracy": "",
164390
  "median_rank": "",
164391
  "mean_rank": "",
164392
- "num_queries": ""
 
164393
  },
164394
  {
164395
  "task": "timeline_action",
@@ -164425,7 +164470,8 @@
164425
  "top10_accuracy": "",
164426
  "median_rank": "",
164427
  "mean_rank": "",
164428
- "num_queries": ""
 
164429
  },
164430
  {
164431
  "task": "timeline_action",
@@ -164461,7 +164507,8 @@
164461
  "top10_accuracy": "",
164462
  "median_rank": "",
164463
  "mean_rank": "",
164464
- "num_queries": ""
 
164465
  },
164466
  {
164467
  "task": "timeline_subtask",
@@ -164497,7 +164544,8 @@
164497
  "top10_accuracy": "",
164498
  "median_rank": "",
164499
  "mean_rank": "",
164500
- "num_queries": ""
 
164501
  },
164502
  {
164503
  "task": "timeline_subtask",
@@ -164533,7 +164581,8 @@
164533
  "top10_accuracy": "",
164534
  "median_rank": "",
164535
  "mean_rank": "",
164536
- "num_queries": ""
 
164537
  },
164538
  {
164539
  "task": "timeline_subtask",
@@ -164569,7 +164618,8 @@
164569
  "top10_accuracy": "",
164570
  "median_rank": "",
164571
  "mean_rank": "",
164572
- "num_queries": ""
 
164573
  },
164574
  {
164575
  "task": "timeline_subtask",
@@ -164605,7 +164655,8 @@
164605
  "top10_accuracy": "",
164606
  "median_rank": "",
164607
  "mean_rank": "",
164608
- "num_queries": ""
 
164609
  },
164610
  {
164611
  "task": "timeline_subtask",
@@ -164641,7 +164692,8 @@
164641
  "top10_accuracy": "",
164642
  "median_rank": "",
164643
  "mean_rank": "",
164644
- "num_queries": ""
 
164645
  },
164646
  {
164647
  "task": "timeline_subtask",
@@ -164677,7 +164729,8 @@
164677
  "top10_accuracy": "",
164678
  "median_rank": "",
164679
  "mean_rank": "",
164680
- "num_queries": ""
 
164681
  },
164682
  {
164683
  "task": "timeline_subtask",
@@ -164713,7 +164766,8 @@
164713
  "top10_accuracy": "",
164714
  "median_rank": "",
164715
  "mean_rank": "",
164716
- "num_queries": ""
 
164717
  },
164718
  {
164719
  "task": "timeline_subtask",
@@ -164749,7 +164803,8 @@
164749
  "top10_accuracy": "",
164750
  "median_rank": "",
164751
  "mean_rank": "",
164752
- "num_queries": ""
 
164753
  },
164754
  {
164755
  "task": "timeline_subtask",
@@ -164785,7 +164840,8 @@
164785
  "top10_accuracy": "",
164786
  "median_rank": "",
164787
  "mean_rank": "",
164788
- "num_queries": ""
 
164789
  },
164790
  {
164791
  "task": "transition_detection",
@@ -164821,7 +164877,8 @@
164821
  "top10_accuracy": "",
164822
  "median_rank": "",
164823
  "mean_rank": "",
164824
- "num_queries": ""
 
164825
  },
164826
  {
164827
  "task": "transition_detection",
@@ -164857,7 +164914,8 @@
164857
  "top10_accuracy": "",
164858
  "median_rank": "",
164859
  "mean_rank": "",
164860
- "num_queries": ""
 
164861
  },
164862
  {
164863
  "task": "transition_detection",
@@ -164893,7 +164951,8 @@
164893
  "top10_accuracy": "",
164894
  "median_rank": "",
164895
  "mean_rank": "",
164896
- "num_queries": ""
 
164897
  },
164898
  {
164899
  "task": "transition_detection",
@@ -164929,7 +164988,8 @@
164929
  "top10_accuracy": "",
164930
  "median_rank": "",
164931
  "mean_rank": "",
164932
- "num_queries": ""
 
164933
  },
164934
  {
164935
  "task": "transition_detection",
@@ -164965,7 +165025,8 @@
164965
  "top10_accuracy": "",
164966
  "median_rank": "",
164967
  "mean_rank": "",
164968
- "num_queries": ""
 
164969
  },
164970
  {
164971
  "task": "transition_detection",
@@ -165001,7 +165062,8 @@
165001
  "top10_accuracy": "",
165002
  "median_rank": "",
165003
  "mean_rank": "",
165004
- "num_queries": ""
 
165005
  },
165006
  {
165007
  "task": "transition_detection",
@@ -165037,7 +165099,8 @@
165037
  "top10_accuracy": "",
165038
  "median_rank": "",
165039
  "mean_rank": "",
165040
- "num_queries": ""
 
165041
  },
165042
  {
165043
  "task": "transition_detection",
@@ -165073,7 +165136,8 @@
165073
  "top10_accuracy": "",
165074
  "median_rank": "",
165075
  "mean_rank": "",
165076
- "num_queries": ""
 
165077
  },
165078
  {
165079
  "task": "transition_detection",
@@ -165109,7 +165173,8 @@
165109
  "top10_accuracy": "",
165110
  "median_rank": "",
165111
  "mean_rank": "",
165112
- "num_queries": ""
 
165113
  },
165114
  {
165115
  "task": "next_action",
@@ -165145,7 +165210,8 @@
165145
  "top10_accuracy": "",
165146
  "median_rank": "",
165147
  "mean_rank": "",
165148
- "num_queries": ""
 
165149
  },
165150
  {
165151
  "task": "next_action",
@@ -165181,7 +165247,8 @@
165181
  "top10_accuracy": "",
165182
  "median_rank": "",
165183
  "mean_rank": "",
165184
- "num_queries": ""
 
165185
  },
165186
  {
165187
  "task": "next_action",
@@ -165217,7 +165284,8 @@
165217
  "top10_accuracy": "",
165218
  "median_rank": "",
165219
  "mean_rank": "",
165220
- "num_queries": ""
 
165221
  },
165222
  {
165223
  "task": "next_action",
@@ -165253,7 +165321,8 @@
165253
  "top10_accuracy": "",
165254
  "median_rank": "",
165255
  "mean_rank": "",
165256
- "num_queries": ""
 
165257
  },
165258
  {
165259
  "task": "next_action",
@@ -165289,7 +165358,8 @@
165289
  "top10_accuracy": "",
165290
  "median_rank": "",
165291
  "mean_rank": "",
165292
- "num_queries": ""
 
165293
  },
165294
  {
165295
  "task": "next_action",
@@ -165325,7 +165395,8 @@
165325
  "top10_accuracy": "",
165326
  "median_rank": "",
165327
  "mean_rank": "",
165328
- "num_queries": ""
 
165329
  },
165330
  {
165331
  "task": "next_action",
@@ -165361,7 +165432,8 @@
165361
  "top10_accuracy": "",
165362
  "median_rank": "",
165363
  "mean_rank": "",
165364
- "num_queries": ""
 
165365
  },
165366
  {
165367
  "task": "next_action",
@@ -165397,7 +165469,8 @@
165397
  "top10_accuracy": "",
165398
  "median_rank": "",
165399
  "mean_rank": "",
165400
- "num_queries": ""
 
165401
  },
165402
  {
165403
  "task": "next_action",
@@ -165433,7 +165506,8 @@
165433
  "top10_accuracy": "",
165434
  "median_rank": "",
165435
  "mean_rank": "",
165436
- "num_queries": ""
 
165437
  },
165438
  {
165439
  "task": "hand_trajectory_forecast",
@@ -165469,7 +165543,8 @@
165469
  "top10_accuracy": "",
165470
  "median_rank": "",
165471
  "mean_rank": "",
165472
- "num_queries": ""
 
165473
  },
165474
  {
165475
  "task": "hand_trajectory_forecast",
@@ -165505,7 +165580,8 @@
165505
  "top10_accuracy": "",
165506
  "median_rank": "",
165507
  "mean_rank": "",
165508
- "num_queries": ""
 
165509
  },
165510
  {
165511
  "task": "hand_trajectory_forecast",
@@ -165541,7 +165617,8 @@
165541
  "top10_accuracy": "",
165542
  "median_rank": "",
165543
  "mean_rank": "",
165544
- "num_queries": ""
 
165545
  },
165546
  {
165547
  "task": "hand_trajectory_forecast",
@@ -165577,7 +165654,8 @@
165577
  "top10_accuracy": "",
165578
  "median_rank": "",
165579
  "mean_rank": "",
165580
- "num_queries": ""
 
165581
  },
165582
  {
165583
  "task": "hand_trajectory_forecast",
@@ -165613,7 +165691,8 @@
165613
  "top10_accuracy": "",
165614
  "median_rank": "",
165615
  "mean_rank": "",
165616
- "num_queries": ""
 
165617
  },
165618
  {
165619
  "task": "hand_trajectory_forecast",
@@ -165649,7 +165728,8 @@
165649
  "top10_accuracy": "",
165650
  "median_rank": "",
165651
  "mean_rank": "",
165652
- "num_queries": ""
 
165653
  },
165654
  {
165655
  "task": "hand_trajectory_forecast",
@@ -165685,7 +165765,8 @@
165685
  "top10_accuracy": "",
165686
  "median_rank": "",
165687
  "mean_rank": "",
165688
- "num_queries": ""
 
165689
  },
165690
  {
165691
  "task": "hand_trajectory_forecast",
@@ -165721,7 +165802,8 @@
165721
  "top10_accuracy": "",
165722
  "median_rank": "",
165723
  "mean_rank": "",
165724
- "num_queries": ""
 
165725
  },
165726
  {
165727
  "task": "hand_trajectory_forecast",
@@ -165757,7 +165839,8 @@
165757
  "top10_accuracy": "",
165758
  "median_rank": "",
165759
  "mean_rank": "",
165760
- "num_queries": ""
 
165761
  },
165762
  {
165763
  "task": "contact_prediction",
@@ -165793,7 +165876,8 @@
165793
  "top10_accuracy": "",
165794
  "median_rank": "",
165795
  "mean_rank": "",
165796
- "num_queries": ""
 
165797
  },
165798
  {
165799
  "task": "contact_prediction",
@@ -165829,7 +165913,8 @@
165829
  "top10_accuracy": "",
165830
  "median_rank": "",
165831
  "mean_rank": "",
165832
- "num_queries": ""
 
165833
  },
165834
  {
165835
  "task": "contact_prediction",
@@ -165865,7 +165950,8 @@
165865
  "top10_accuracy": "",
165866
  "median_rank": "",
165867
  "mean_rank": "",
165868
- "num_queries": ""
 
165869
  },
165870
  {
165871
  "task": "contact_prediction",
@@ -165901,7 +165987,8 @@
165901
  "top10_accuracy": "",
165902
  "median_rank": "",
165903
  "mean_rank": "",
165904
- "num_queries": ""
 
165905
  },
165906
  {
165907
  "task": "contact_prediction",
@@ -165937,7 +166024,8 @@
165937
  "top10_accuracy": "",
165938
  "median_rank": "",
165939
  "mean_rank": "",
165940
- "num_queries": ""
 
165941
  },
165942
  {
165943
  "task": "contact_prediction",
@@ -165973,7 +166061,8 @@
165973
  "top10_accuracy": "",
165974
  "median_rank": "",
165975
  "mean_rank": "",
165976
- "num_queries": ""
 
165977
  },
165978
  {
165979
  "task": "contact_prediction",
@@ -166009,7 +166098,8 @@
166009
  "top10_accuracy": "",
166010
  "median_rank": "",
166011
  "mean_rank": "",
166012
- "num_queries": ""
 
166013
  },
166014
  {
166015
  "task": "contact_prediction",
@@ -166045,7 +166135,8 @@
166045
  "top10_accuracy": "",
166046
  "median_rank": "",
166047
  "mean_rank": "",
166048
- "num_queries": ""
 
166049
  },
166050
  {
166051
  "task": "contact_prediction",
@@ -166081,7 +166172,8 @@
166081
  "top10_accuracy": "",
166082
  "median_rank": "",
166083
  "mean_rank": "",
166084
- "num_queries": ""
 
166085
  },
166086
  {
166087
  "task": "object_relevance",
@@ -166117,7 +166209,8 @@
166117
  "top10_accuracy": "",
166118
  "median_rank": "",
166119
  "mean_rank": "",
166120
- "num_queries": ""
 
166121
  },
166122
  {
166123
  "task": "object_relevance",
@@ -166153,7 +166246,8 @@
166153
  "top10_accuracy": "",
166154
  "median_rank": "",
166155
  "mean_rank": "",
166156
- "num_queries": ""
 
166157
  },
166158
  {
166159
  "task": "object_relevance",
@@ -166189,7 +166283,8 @@
166189
  "top10_accuracy": "",
166190
  "median_rank": "",
166191
  "mean_rank": "",
166192
- "num_queries": ""
 
166193
  },
166194
  {
166195
  "task": "object_relevance",
@@ -166225,7 +166320,8 @@
166225
  "top10_accuracy": "",
166226
  "median_rank": "",
166227
  "mean_rank": "",
166228
- "num_queries": ""
 
166229
  },
166230
  {
166231
  "task": "object_relevance",
@@ -166261,7 +166357,8 @@
166261
  "top10_accuracy": "",
166262
  "median_rank": "",
166263
  "mean_rank": "",
166264
- "num_queries": ""
 
166265
  },
166266
  {
166267
  "task": "object_relevance",
@@ -166297,7 +166394,8 @@
166297
  "top10_accuracy": "",
166298
  "median_rank": "",
166299
  "mean_rank": "",
166300
- "num_queries": ""
 
166301
  },
166302
  {
166303
  "task": "object_relevance",
@@ -166333,7 +166431,8 @@
166333
  "top10_accuracy": "",
166334
  "median_rank": "",
166335
  "mean_rank": "",
166336
- "num_queries": ""
 
166337
  },
166338
  {
166339
  "task": "object_relevance",
@@ -166369,7 +166468,8 @@
166369
  "top10_accuracy": "",
166370
  "median_rank": "",
166371
  "mean_rank": "",
166372
- "num_queries": ""
 
166373
  },
166374
  {
166375
  "task": "object_relevance",
@@ -166405,7 +166505,8 @@
166405
  "top10_accuracy": "",
166406
  "median_rank": "",
166407
  "mean_rank": "",
166408
- "num_queries": ""
 
166409
  },
166410
  {
166411
  "task": "caption_grounding",
@@ -166441,7 +166542,8 @@
166441
  "top10_accuracy": "0.4454022988505747",
166442
  "median_rank": "13.0",
166443
  "mean_rank": "23.19827651977539",
166444
- "num_queries": "348"
 
166445
  },
166446
  {
166447
  "task": "caption_grounding",
@@ -166477,7 +166579,8 @@
166477
  "top10_accuracy": "0.034482758620689655",
166478
  "median_rank": "162.0",
166479
  "mean_rank": "161.4770050048828",
166480
- "num_queries": "348"
 
166481
  },
166482
  {
166483
  "task": "caption_grounding",
@@ -166513,7 +166616,8 @@
166513
  "top10_accuracy": "0.03735632183908046",
166514
  "median_rank": "114.0",
166515
  "mean_rank": "137.90805053710938",
166516
- "num_queries": "348"
 
166517
  },
166518
  {
166519
  "task": "caption_grounding",
@@ -166549,7 +166653,8 @@
166549
  "top10_accuracy": "0.04597701149425287",
166550
  "median_rank": "143.5",
166551
  "mean_rank": "155.4712677001953",
166552
- "num_queries": "348"
 
166553
  },
166554
  {
166555
  "task": "caption_grounding",
@@ -166585,7 +166690,8 @@
166585
  "top10_accuracy": "0.04885057471264368",
166586
  "median_rank": "110.5",
166587
  "mean_rank": "130.32470703125",
166588
- "num_queries": "348"
 
166589
  },
166590
  {
166591
  "task": "caption_grounding",
@@ -166621,7 +166727,8 @@
166621
  "top10_accuracy": "0.04597701149425287",
166622
  "median_rank": "123.0",
166623
  "mean_rank": "138.61207580566406",
166624
- "num_queries": "348"
 
166625
  },
166626
  {
166627
  "task": "caption_grounding",
@@ -166657,7 +166764,8 @@
166657
  "top10_accuracy": "0.07758620689655173",
166658
  "median_rank": "141.0",
166659
  "mean_rank": "152.14942932128906",
166660
- "num_queries": "348"
 
166661
  },
166662
  {
166663
  "task": "caption_grounding",
@@ -166693,7 +166801,8 @@
166693
  "top10_accuracy": "0.47126436781609193",
166694
  "median_rank": "12.0",
166695
  "mean_rank": "15.106322288513184",
166696
- "num_queries": "348"
 
166697
  },
166698
  {
166699
  "task": "caption_grounding",
@@ -166729,7 +166838,8 @@
166729
  "top10_accuracy": "0.06896551724137931",
166730
  "median_rank": "132.0",
166731
  "mean_rank": "137.30746459960938",
166732
- "num_queries": "348"
 
166733
  },
166734
  {
166735
  "task": "cross_modal_retrieval",
@@ -166765,7 +166875,8 @@
166765
  "top10_accuracy": "0.9798850574712644",
166766
  "median_rank": "1.0",
166767
  "mean_rank": "2.0862069129943848",
166768
- "num_queries": "348"
 
166769
  },
166770
  {
166771
  "task": "cross_modal_retrieval",
@@ -166801,7 +166912,8 @@
166801
  "top10_accuracy": "0.9798850574712644",
166802
  "median_rank": "1.0",
166803
  "mean_rank": "3.844827651977539",
166804
- "num_queries": "348"
 
166805
  },
166806
  {
166807
  "task": "cross_modal_retrieval",
@@ -166837,7 +166949,8 @@
166837
  "top10_accuracy": "0.8620689655172413",
166838
  "median_rank": "1.0",
166839
  "mean_rank": "5.729885101318359",
166840
- "num_queries": "348"
 
166841
  },
166842
  {
166843
  "task": "cross_modal_retrieval",
@@ -166873,7 +166986,8 @@
166873
  "top10_accuracy": "0.6551724137931034",
166874
  "median_rank": "4.0",
166875
  "mean_rank": "15.623562812805176",
166876
- "num_queries": "348"
 
166877
  },
166878
  {
166879
  "task": "cross_modal_retrieval",
@@ -166909,7 +167023,8 @@
166909
  "top10_accuracy": "0.3994252873563218",
166910
  "median_rank": "21.5",
166911
  "mean_rank": "49.181034088134766",
166912
- "num_queries": "348"
 
166913
  },
166914
  {
166915
  "task": "cross_modal_retrieval",
@@ -166945,7 +167060,8 @@
166945
  "top10_accuracy": "0.5229885057471264",
166946
  "median_rank": "10.0",
166947
  "mean_rank": "20.577587127685547",
166948
- "num_queries": "348"
 
166949
  },
166950
  {
166951
  "task": "cross_modal_retrieval",
@@ -166981,7 +167097,8 @@
166981
  "top10_accuracy": "0.031609195402298854",
166982
  "median_rank": "152.5",
166983
  "mean_rank": "161.44540405273438",
166984
- "num_queries": "348"
 
166985
  },
166986
  {
166987
  "task": "cross_modal_retrieval",
@@ -167017,7 +167134,8 @@
167017
  "top10_accuracy": "0.05747126436781609",
167018
  "median_rank": "138.0",
167019
  "mean_rank": "146.83045959472656",
167020
- "num_queries": "348"
 
167021
  },
167022
  {
167023
  "task": "cross_modal_retrieval",
@@ -167053,7 +167171,8 @@
167053
  "top10_accuracy": "0.9770114942528736",
167054
  "median_rank": "1.0",
167055
  "mean_rank": "2.181034564971924",
167056
- "num_queries": "348"
 
167057
  },
167058
  {
167059
  "task": "modality_reconstruction",
@@ -167089,7 +167208,8 @@
167089
  "top10_accuracy": "",
167090
  "median_rank": "",
167091
  "mean_rank": "",
167092
- "num_queries": ""
 
167093
  },
167094
  {
167095
  "task": "modality_reconstruction",
@@ -167125,7 +167245,8 @@
167125
  "top10_accuracy": "",
167126
  "median_rank": "",
167127
  "mean_rank": "",
167128
- "num_queries": ""
 
167129
  },
167130
  {
167131
  "task": "modality_reconstruction",
@@ -167161,7 +167282,8 @@
167161
  "top10_accuracy": "",
167162
  "median_rank": "",
167163
  "mean_rank": "",
167164
- "num_queries": ""
 
167165
  },
167166
  {
167167
  "task": "modality_reconstruction",
@@ -167197,7 +167319,8 @@
167197
  "top10_accuracy": "",
167198
  "median_rank": "",
167199
  "mean_rank": "",
167200
- "num_queries": ""
 
167201
  },
167202
  {
167203
  "task": "modality_reconstruction",
@@ -167233,7 +167356,8 @@
167233
  "top10_accuracy": "",
167234
  "median_rank": "",
167235
  "mean_rank": "",
167236
- "num_queries": ""
 
167237
  },
167238
  {
167239
  "task": "modality_reconstruction",
@@ -167269,7 +167393,8 @@
167269
  "top10_accuracy": "",
167270
  "median_rank": "",
167271
  "mean_rank": "",
167272
- "num_queries": ""
 
167273
  },
167274
  {
167275
  "task": "modality_reconstruction",
@@ -167305,7 +167430,8 @@
167305
  "top10_accuracy": "",
167306
  "median_rank": "",
167307
  "mean_rank": "",
167308
- "num_queries": ""
 
167309
  },
167310
  {
167311
  "task": "modality_reconstruction",
@@ -167341,7 +167467,8 @@
167341
  "top10_accuracy": "",
167342
  "median_rank": "",
167343
  "mean_rank": "",
167344
- "num_queries": ""
 
167345
  },
167346
  {
167347
  "task": "modality_reconstruction",
@@ -167377,7 +167504,8 @@
167377
  "top10_accuracy": "",
167378
  "median_rank": "",
167379
  "mean_rank": "",
167380
- "num_queries": ""
 
167381
  },
167382
  {
167383
  "task": "temporal_order",
@@ -167413,7 +167541,8 @@
167413
  "top10_accuracy": "",
167414
  "median_rank": "",
167415
  "mean_rank": "",
167416
- "num_queries": ""
 
167417
  },
167418
  {
167419
  "task": "temporal_order",
@@ -167449,7 +167578,8 @@
167449
  "top10_accuracy": "",
167450
  "median_rank": "",
167451
  "mean_rank": "",
167452
- "num_queries": ""
 
167453
  },
167454
  {
167455
  "task": "temporal_order",
@@ -167485,7 +167615,8 @@
167485
  "top10_accuracy": "",
167486
  "median_rank": "",
167487
  "mean_rank": "",
167488
- "num_queries": ""
 
167489
  },
167490
  {
167491
  "task": "temporal_order",
@@ -167521,7 +167652,8 @@
167521
  "top10_accuracy": "",
167522
  "median_rank": "",
167523
  "mean_rank": "",
167524
- "num_queries": ""
 
167525
  },
167526
  {
167527
  "task": "temporal_order",
@@ -167557,7 +167689,8 @@
167557
  "top10_accuracy": "",
167558
  "median_rank": "",
167559
  "mean_rank": "",
167560
- "num_queries": ""
 
167561
  },
167562
  {
167563
  "task": "temporal_order",
@@ -167593,7 +167726,8 @@
167593
  "top10_accuracy": "",
167594
  "median_rank": "",
167595
  "mean_rank": "",
167596
- "num_queries": ""
 
167597
  },
167598
  {
167599
  "task": "temporal_order",
@@ -167629,7 +167763,8 @@
167629
  "top10_accuracy": "",
167630
  "median_rank": "",
167631
  "mean_rank": "",
167632
- "num_queries": ""
 
167633
  },
167634
  {
167635
  "task": "temporal_order",
@@ -167665,7 +167800,8 @@
167665
  "top10_accuracy": "",
167666
  "median_rank": "",
167667
  "mean_rank": "",
167668
- "num_queries": ""
 
167669
  },
167670
  {
167671
  "task": "temporal_order",
@@ -167701,7 +167837,8 @@
167701
  "top10_accuracy": "",
167702
  "median_rank": "",
167703
  "mean_rank": "",
167704
- "num_queries": ""
 
167705
  },
167706
  {
167707
  "task": "misalignment_detection",
@@ -167737,7 +167874,8 @@
167737
  "top10_accuracy": "",
167738
  "median_rank": "",
167739
  "mean_rank": "",
167740
- "num_queries": ""
 
167741
  },
167742
  {
167743
  "task": "misalignment_detection",
@@ -167773,7 +167911,8 @@
167773
  "top10_accuracy": "",
167774
  "median_rank": "",
167775
  "mean_rank": "",
167776
- "num_queries": ""
 
167777
  },
167778
  {
167779
  "task": "misalignment_detection",
@@ -167809,7 +167948,8 @@
167809
  "top10_accuracy": "",
167810
  "median_rank": "",
167811
  "mean_rank": "",
167812
- "num_queries": ""
 
167813
  },
167814
  {
167815
  "task": "misalignment_detection",
@@ -167845,7 +167985,8 @@
167845
  "top10_accuracy": "",
167846
  "median_rank": "",
167847
  "mean_rank": "",
167848
- "num_queries": ""
 
167849
  },
167850
  {
167851
  "task": "misalignment_detection",
@@ -167881,7 +168022,8 @@
167881
  "top10_accuracy": "",
167882
  "median_rank": "",
167883
  "mean_rank": "",
167884
- "num_queries": ""
 
167885
  },
167886
  {
167887
  "task": "misalignment_detection",
@@ -167917,7 +168059,8 @@
167917
  "top10_accuracy": "",
167918
  "median_rank": "",
167919
  "mean_rank": "",
167920
- "num_queries": ""
 
167921
  },
167922
  {
167923
  "task": "misalignment_detection",
@@ -167953,7 +168096,8 @@
167953
  "top10_accuracy": "",
167954
  "median_rank": "",
167955
  "mean_rank": "",
167956
- "num_queries": ""
 
167957
  },
167958
  {
167959
  "task": "misalignment_detection",
@@ -167989,7 +168133,8 @@
167989
  "top10_accuracy": "",
167990
  "median_rank": "",
167991
  "mean_rank": "",
167992
- "num_queries": ""
 
167993
  },
167994
  {
167995
  "task": "misalignment_detection",
@@ -168025,7 +168170,8 @@
168025
  "top10_accuracy": "",
168026
  "median_rank": "",
168027
  "mean_rank": "",
168028
- "num_queries": ""
 
168029
  }
168030
  ]
168031
  },
@@ -168841,4 +168987,4 @@
168841
  "num_queries": "308"
168842
  }
168843
  ]
168844
- }
 
1
  {
2
  "meta": {
3
+ "generated_at": "2026-06-06T21:22:14.639673+00:00",
4
  "window_count": 1161,
5
  "feature_dim": 8546,
6
  "object_label_rows": 1161,
 
16
  }
17
  },
18
  "tasks": {
19
+ "timeline_action": "Action Recognition",
20
+ "timeline_subtask": "Procedure Step Recognition",
21
+ "transition_detection": "Action Boundary Detection",
22
  "next_action": "Next-Action Prediction",
23
  "contact_prediction": "Contact State Prediction",
24
+ "object_relevance": "Object Relevance Prediction"
25
+ },
26
+ "task_display_names": {
27
+ "timeline_action": "Action Recognition",
28
+ "timeline_subtask": "Procedure Step Recognition",
29
+ "transition_detection": "Action Boundary Detection",
30
+ "next_action": "Next-Action Prediction",
31
+ "hand_trajectory_forecast": "Hand Trajectory Forecasting",
32
+ "contact_prediction": "Contact State Prediction",
33
+ "object_relevance": "Object Relevance Prediction",
34
+ "caption_grounding": "Language Grounding",
35
+ "cross_modal_retrieval": "Cross-Modal Retrieval",
36
+ "modality_reconstruction": "Cross-Modal Reconstruction",
37
+ "temporal_order": "Temporal Order Verification",
38
+ "misalignment_detection": "Multimodal Synchronization Detection"
39
  },
40
  "feature_blocks": [
41
  {
 
152
  },
153
  {
154
  "name": "audio_fisheye_cam0_aac",
155
+ "display": "Audio",
156
  "modality": "audio",
157
  "start": 7343,
158
  "end": 7511,
 
163972
  "ablation": {
163973
  "best_by_task": {
163974
  "caption_grounding": {
163975
+ "task": "caption_grounding",
163976
+ "task_display_name": "Language Grounding",
163977
  "best": {
163978
  "modality_group": "language",
163979
  "modality_display": "Language",
 
163989
  }
163990
  },
163991
  "contact_prediction": {
163992
+ "task": "contact_prediction",
163993
+ "task_display_name": "Contact State Prediction",
163994
  "best": {
163995
  "modality_group": "all_features",
163996
  "modality_display": "All Features",
 
164006
  }
164007
  },
164008
  "cross_modal_retrieval": {
164009
+ "task": "cross_modal_retrieval",
164010
+ "task_display_name": "Cross-Modal Retrieval",
164011
  "best": {
164012
  "modality_group": "all_features",
164013
  "modality_display": "All Features",
 
164023
  }
164024
  },
164025
  "hand_trajectory_forecast": {
164026
+ "task": "hand_trajectory_forecast",
164027
+ "task_display_name": "Hand Trajectory Forecasting",
164028
  "best": {
164029
  "modality_group": "inertial",
164030
  "modality_display": "Inertial",
 
164040
  }
164041
  },
164042
  "misalignment_detection": {
164043
+ "task": "misalignment_detection",
164044
+ "task_display_name": "Multimodal Synchronization Detection",
164045
  "best": {
164046
  "modality_group": "audio",
164047
  "modality_display": "Audio",
 
164057
  }
164058
  },
164059
  "modality_reconstruction": {
164060
+ "task": "modality_reconstruction",
164061
+ "task_display_name": "Cross-Modal Reconstruction",
164062
  "best": {
164063
  "modality_group": "video",
164064
  "modality_display": "Video",
 
164074
  }
164075
  },
164076
  "next_action": {
164077
+ "task": "next_action",
164078
+ "task_display_name": "Next-Action Prediction",
164079
  "best": {
164080
  "modality_group": "language",
164081
  "modality_display": "Language",
 
164091
  }
164092
  },
164093
  "object_relevance": {
164094
+ "task": "object_relevance",
164095
+ "task_display_name": "Object Relevance Prediction",
164096
  "best": {
164097
  "modality_group": "language",
164098
  "modality_display": "Language",
 
164108
  }
164109
  },
164110
  "temporal_order": {
164111
+ "task": "temporal_order",
164112
+ "task_display_name": "Temporal Order Verification",
164113
  "best": {
164114
  "modality_group": "pose_slam",
164115
  "modality_display": "Pose + SLAM",
 
164125
  }
164126
  },
164127
  "timeline_action": {
164128
+ "task": "timeline_action",
164129
+ "task_display_name": "Action Recognition",
164130
  "best": {
164131
  "modality_group": "language",
164132
  "modality_display": "Language",
 
164142
  }
164143
  },
164144
  "timeline_subtask": {
164145
+ "task": "timeline_subtask",
164146
+ "task_display_name": "Procedure Step Recognition",
164147
  "best": {
164148
  "modality_group": "language",
164149
  "modality_display": "Language",
 
164159
  }
164160
  },
164161
  "transition_detection": {
164162
+ "task": "transition_detection",
164163
+ "task_display_name": "Action Boundary Detection",
164164
  "best": {
164165
  "modality_group": "language",
164166
  "modality_display": "Language",
 
164211
  "top10_accuracy": "",
164212
  "median_rank": "",
164213
  "mean_rank": "",
164214
+ "num_queries": "",
164215
+ "task_display_name": "Action Recognition"
164216
  },
164217
  {
164218
  "task": "timeline_action",
 
164248
  "top10_accuracy": "",
164249
  "median_rank": "",
164250
  "mean_rank": "",
164251
+ "num_queries": "",
164252
+ "task_display_name": "Action Recognition"
164253
  },
164254
  {
164255
  "task": "timeline_action",
 
164285
  "top10_accuracy": "",
164286
  "median_rank": "",
164287
  "mean_rank": "",
164288
+ "num_queries": "",
164289
+ "task_display_name": "Action Recognition"
164290
  },
164291
  {
164292
  "task": "timeline_action",
 
164322
  "top10_accuracy": "",
164323
  "median_rank": "",
164324
  "mean_rank": "",
164325
+ "num_queries": "",
164326
+ "task_display_name": "Action Recognition"
164327
  },
164328
  {
164329
  "task": "timeline_action",
 
164359
  "top10_accuracy": "",
164360
  "median_rank": "",
164361
  "mean_rank": "",
164362
+ "num_queries": "",
164363
+ "task_display_name": "Action Recognition"
164364
  },
164365
  {
164366
  "task": "timeline_action",
 
164396
  "top10_accuracy": "",
164397
  "median_rank": "",
164398
  "mean_rank": "",
164399
+ "num_queries": "",
164400
+ "task_display_name": "Action Recognition"
164401
  },
164402
  {
164403
  "task": "timeline_action",
 
164433
  "top10_accuracy": "",
164434
  "median_rank": "",
164435
  "mean_rank": "",
164436
+ "num_queries": "",
164437
+ "task_display_name": "Action Recognition"
164438
  },
164439
  {
164440
  "task": "timeline_action",
 
164470
  "top10_accuracy": "",
164471
  "median_rank": "",
164472
  "mean_rank": "",
164473
+ "num_queries": "",
164474
+ "task_display_name": "Action Recognition"
164475
  },
164476
  {
164477
  "task": "timeline_action",
 
164507
  "top10_accuracy": "",
164508
  "median_rank": "",
164509
  "mean_rank": "",
164510
+ "num_queries": "",
164511
+ "task_display_name": "Action Recognition"
164512
  },
164513
  {
164514
  "task": "timeline_subtask",
 
164544
  "top10_accuracy": "",
164545
  "median_rank": "",
164546
  "mean_rank": "",
164547
+ "num_queries": "",
164548
+ "task_display_name": "Procedure Step Recognition"
164549
  },
164550
  {
164551
  "task": "timeline_subtask",
 
164581
  "top10_accuracy": "",
164582
  "median_rank": "",
164583
  "mean_rank": "",
164584
+ "num_queries": "",
164585
+ "task_display_name": "Procedure Step Recognition"
164586
  },
164587
  {
164588
  "task": "timeline_subtask",
 
164618
  "top10_accuracy": "",
164619
  "median_rank": "",
164620
  "mean_rank": "",
164621
+ "num_queries": "",
164622
+ "task_display_name": "Procedure Step Recognition"
164623
  },
164624
  {
164625
  "task": "timeline_subtask",
 
164655
  "top10_accuracy": "",
164656
  "median_rank": "",
164657
  "mean_rank": "",
164658
+ "num_queries": "",
164659
+ "task_display_name": "Procedure Step Recognition"
164660
  },
164661
  {
164662
  "task": "timeline_subtask",
 
164692
  "top10_accuracy": "",
164693
  "median_rank": "",
164694
  "mean_rank": "",
164695
+ "num_queries": "",
164696
+ "task_display_name": "Procedure Step Recognition"
164697
  },
164698
  {
164699
  "task": "timeline_subtask",
 
164729
  "top10_accuracy": "",
164730
  "median_rank": "",
164731
  "mean_rank": "",
164732
+ "num_queries": "",
164733
+ "task_display_name": "Procedure Step Recognition"
164734
  },
164735
  {
164736
  "task": "timeline_subtask",
 
164766
  "top10_accuracy": "",
164767
  "median_rank": "",
164768
  "mean_rank": "",
164769
+ "num_queries": "",
164770
+ "task_display_name": "Procedure Step Recognition"
164771
  },
164772
  {
164773
  "task": "timeline_subtask",
 
164803
  "top10_accuracy": "",
164804
  "median_rank": "",
164805
  "mean_rank": "",
164806
+ "num_queries": "",
164807
+ "task_display_name": "Procedure Step Recognition"
164808
  },
164809
  {
164810
  "task": "timeline_subtask",
 
164840
  "top10_accuracy": "",
164841
  "median_rank": "",
164842
  "mean_rank": "",
164843
+ "num_queries": "",
164844
+ "task_display_name": "Procedure Step Recognition"
164845
  },
164846
  {
164847
  "task": "transition_detection",
 
164877
  "top10_accuracy": "",
164878
  "median_rank": "",
164879
  "mean_rank": "",
164880
+ "num_queries": "",
164881
+ "task_display_name": "Action Boundary Detection"
164882
  },
164883
  {
164884
  "task": "transition_detection",
 
164914
  "top10_accuracy": "",
164915
  "median_rank": "",
164916
  "mean_rank": "",
164917
+ "num_queries": "",
164918
+ "task_display_name": "Action Boundary Detection"
164919
  },
164920
  {
164921
  "task": "transition_detection",
 
164951
  "top10_accuracy": "",
164952
  "median_rank": "",
164953
  "mean_rank": "",
164954
+ "num_queries": "",
164955
+ "task_display_name": "Action Boundary Detection"
164956
  },
164957
  {
164958
  "task": "transition_detection",
 
164988
  "top10_accuracy": "",
164989
  "median_rank": "",
164990
  "mean_rank": "",
164991
+ "num_queries": "",
164992
+ "task_display_name": "Action Boundary Detection"
164993
  },
164994
  {
164995
  "task": "transition_detection",
 
165025
  "top10_accuracy": "",
165026
  "median_rank": "",
165027
  "mean_rank": "",
165028
+ "num_queries": "",
165029
+ "task_display_name": "Action Boundary Detection"
165030
  },
165031
  {
165032
  "task": "transition_detection",
 
165062
  "top10_accuracy": "",
165063
  "median_rank": "",
165064
  "mean_rank": "",
165065
+ "num_queries": "",
165066
+ "task_display_name": "Action Boundary Detection"
165067
  },
165068
  {
165069
  "task": "transition_detection",
 
165099
  "top10_accuracy": "",
165100
  "median_rank": "",
165101
  "mean_rank": "",
165102
+ "num_queries": "",
165103
+ "task_display_name": "Action Boundary Detection"
165104
  },
165105
  {
165106
  "task": "transition_detection",
 
165136
  "top10_accuracy": "",
165137
  "median_rank": "",
165138
  "mean_rank": "",
165139
+ "num_queries": "",
165140
+ "task_display_name": "Action Boundary Detection"
165141
  },
165142
  {
165143
  "task": "transition_detection",
 
165173
  "top10_accuracy": "",
165174
  "median_rank": "",
165175
  "mean_rank": "",
165176
+ "num_queries": "",
165177
+ "task_display_name": "Action Boundary Detection"
165178
  },
165179
  {
165180
  "task": "next_action",
 
165210
  "top10_accuracy": "",
165211
  "median_rank": "",
165212
  "mean_rank": "",
165213
+ "num_queries": "",
165214
+ "task_display_name": "Next-Action Prediction"
165215
  },
165216
  {
165217
  "task": "next_action",
 
165247
  "top10_accuracy": "",
165248
  "median_rank": "",
165249
  "mean_rank": "",
165250
+ "num_queries": "",
165251
+ "task_display_name": "Next-Action Prediction"
165252
  },
165253
  {
165254
  "task": "next_action",
 
165284
  "top10_accuracy": "",
165285
  "median_rank": "",
165286
  "mean_rank": "",
165287
+ "num_queries": "",
165288
+ "task_display_name": "Next-Action Prediction"
165289
  },
165290
  {
165291
  "task": "next_action",
 
165321
  "top10_accuracy": "",
165322
  "median_rank": "",
165323
  "mean_rank": "",
165324
+ "num_queries": "",
165325
+ "task_display_name": "Next-Action Prediction"
165326
  },
165327
  {
165328
  "task": "next_action",
 
165358
  "top10_accuracy": "",
165359
  "median_rank": "",
165360
  "mean_rank": "",
165361
+ "num_queries": "",
165362
+ "task_display_name": "Next-Action Prediction"
165363
  },
165364
  {
165365
  "task": "next_action",
 
165395
  "top10_accuracy": "",
165396
  "median_rank": "",
165397
  "mean_rank": "",
165398
+ "num_queries": "",
165399
+ "task_display_name": "Next-Action Prediction"
165400
  },
165401
  {
165402
  "task": "next_action",
 
165432
  "top10_accuracy": "",
165433
  "median_rank": "",
165434
  "mean_rank": "",
165435
+ "num_queries": "",
165436
+ "task_display_name": "Next-Action Prediction"
165437
  },
165438
  {
165439
  "task": "next_action",
 
165469
  "top10_accuracy": "",
165470
  "median_rank": "",
165471
  "mean_rank": "",
165472
+ "num_queries": "",
165473
+ "task_display_name": "Next-Action Prediction"
165474
  },
165475
  {
165476
  "task": "next_action",
 
165506
  "top10_accuracy": "",
165507
  "median_rank": "",
165508
  "mean_rank": "",
165509
+ "num_queries": "",
165510
+ "task_display_name": "Next-Action Prediction"
165511
  },
165512
  {
165513
  "task": "hand_trajectory_forecast",
 
165543
  "top10_accuracy": "",
165544
  "median_rank": "",
165545
  "mean_rank": "",
165546
+ "num_queries": "",
165547
+ "task_display_name": "Hand Trajectory Forecasting"
165548
  },
165549
  {
165550
  "task": "hand_trajectory_forecast",
 
165580
  "top10_accuracy": "",
165581
  "median_rank": "",
165582
  "mean_rank": "",
165583
+ "num_queries": "",
165584
+ "task_display_name": "Hand Trajectory Forecasting"
165585
  },
165586
  {
165587
  "task": "hand_trajectory_forecast",
 
165617
  "top10_accuracy": "",
165618
  "median_rank": "",
165619
  "mean_rank": "",
165620
+ "num_queries": "",
165621
+ "task_display_name": "Hand Trajectory Forecasting"
165622
  },
165623
  {
165624
  "task": "hand_trajectory_forecast",
 
165654
  "top10_accuracy": "",
165655
  "median_rank": "",
165656
  "mean_rank": "",
165657
+ "num_queries": "",
165658
+ "task_display_name": "Hand Trajectory Forecasting"
165659
  },
165660
  {
165661
  "task": "hand_trajectory_forecast",
 
165691
  "top10_accuracy": "",
165692
  "median_rank": "",
165693
  "mean_rank": "",
165694
+ "num_queries": "",
165695
+ "task_display_name": "Hand Trajectory Forecasting"
165696
  },
165697
  {
165698
  "task": "hand_trajectory_forecast",
 
165728
  "top10_accuracy": "",
165729
  "median_rank": "",
165730
  "mean_rank": "",
165731
+ "num_queries": "",
165732
+ "task_display_name": "Hand Trajectory Forecasting"
165733
  },
165734
  {
165735
  "task": "hand_trajectory_forecast",
 
165765
  "top10_accuracy": "",
165766
  "median_rank": "",
165767
  "mean_rank": "",
165768
+ "num_queries": "",
165769
+ "task_display_name": "Hand Trajectory Forecasting"
165770
  },
165771
  {
165772
  "task": "hand_trajectory_forecast",
 
165802
  "top10_accuracy": "",
165803
  "median_rank": "",
165804
  "mean_rank": "",
165805
+ "num_queries": "",
165806
+ "task_display_name": "Hand Trajectory Forecasting"
165807
  },
165808
  {
165809
  "task": "hand_trajectory_forecast",
 
165839
  "top10_accuracy": "",
165840
  "median_rank": "",
165841
  "mean_rank": "",
165842
+ "num_queries": "",
165843
+ "task_display_name": "Hand Trajectory Forecasting"
165844
  },
165845
  {
165846
  "task": "contact_prediction",
 
165876
  "top10_accuracy": "",
165877
  "median_rank": "",
165878
  "mean_rank": "",
165879
+ "num_queries": "",
165880
+ "task_display_name": "Contact State Prediction"
165881
  },
165882
  {
165883
  "task": "contact_prediction",
 
165913
  "top10_accuracy": "",
165914
  "median_rank": "",
165915
  "mean_rank": "",
165916
+ "num_queries": "",
165917
+ "task_display_name": "Contact State Prediction"
165918
  },
165919
  {
165920
  "task": "contact_prediction",
 
165950
  "top10_accuracy": "",
165951
  "median_rank": "",
165952
  "mean_rank": "",
165953
+ "num_queries": "",
165954
+ "task_display_name": "Contact State Prediction"
165955
  },
165956
  {
165957
  "task": "contact_prediction",
 
165987
  "top10_accuracy": "",
165988
  "median_rank": "",
165989
  "mean_rank": "",
165990
+ "num_queries": "",
165991
+ "task_display_name": "Contact State Prediction"
165992
  },
165993
  {
165994
  "task": "contact_prediction",
 
166024
  "top10_accuracy": "",
166025
  "median_rank": "",
166026
  "mean_rank": "",
166027
+ "num_queries": "",
166028
+ "task_display_name": "Contact State Prediction"
166029
  },
166030
  {
166031
  "task": "contact_prediction",
 
166061
  "top10_accuracy": "",
166062
  "median_rank": "",
166063
  "mean_rank": "",
166064
+ "num_queries": "",
166065
+ "task_display_name": "Contact State Prediction"
166066
  },
166067
  {
166068
  "task": "contact_prediction",
 
166098
  "top10_accuracy": "",
166099
  "median_rank": "",
166100
  "mean_rank": "",
166101
+ "num_queries": "",
166102
+ "task_display_name": "Contact State Prediction"
166103
  },
166104
  {
166105
  "task": "contact_prediction",
 
166135
  "top10_accuracy": "",
166136
  "median_rank": "",
166137
  "mean_rank": "",
166138
+ "num_queries": "",
166139
+ "task_display_name": "Contact State Prediction"
166140
  },
166141
  {
166142
  "task": "contact_prediction",
 
166172
  "top10_accuracy": "",
166173
  "median_rank": "",
166174
  "mean_rank": "",
166175
+ "num_queries": "",
166176
+ "task_display_name": "Contact State Prediction"
166177
  },
166178
  {
166179
  "task": "object_relevance",
 
166209
  "top10_accuracy": "",
166210
  "median_rank": "",
166211
  "mean_rank": "",
166212
+ "num_queries": "",
166213
+ "task_display_name": "Object Relevance Prediction"
166214
  },
166215
  {
166216
  "task": "object_relevance",
 
166246
  "top10_accuracy": "",
166247
  "median_rank": "",
166248
  "mean_rank": "",
166249
+ "num_queries": "",
166250
+ "task_display_name": "Object Relevance Prediction"
166251
  },
166252
  {
166253
  "task": "object_relevance",
 
166283
  "top10_accuracy": "",
166284
  "median_rank": "",
166285
  "mean_rank": "",
166286
+ "num_queries": "",
166287
+ "task_display_name": "Object Relevance Prediction"
166288
  },
166289
  {
166290
  "task": "object_relevance",
 
166320
  "top10_accuracy": "",
166321
  "median_rank": "",
166322
  "mean_rank": "",
166323
+ "num_queries": "",
166324
+ "task_display_name": "Object Relevance Prediction"
166325
  },
166326
  {
166327
  "task": "object_relevance",
 
166357
  "top10_accuracy": "",
166358
  "median_rank": "",
166359
  "mean_rank": "",
166360
+ "num_queries": "",
166361
+ "task_display_name": "Object Relevance Prediction"
166362
  },
166363
  {
166364
  "task": "object_relevance",
 
166394
  "top10_accuracy": "",
166395
  "median_rank": "",
166396
  "mean_rank": "",
166397
+ "num_queries": "",
166398
+ "task_display_name": "Object Relevance Prediction"
166399
  },
166400
  {
166401
  "task": "object_relevance",
 
166431
  "top10_accuracy": "",
166432
  "median_rank": "",
166433
  "mean_rank": "",
166434
+ "num_queries": "",
166435
+ "task_display_name": "Object Relevance Prediction"
166436
  },
166437
  {
166438
  "task": "object_relevance",
 
166468
  "top10_accuracy": "",
166469
  "median_rank": "",
166470
  "mean_rank": "",
166471
+ "num_queries": "",
166472
+ "task_display_name": "Object Relevance Prediction"
166473
  },
166474
  {
166475
  "task": "object_relevance",
 
166505
  "top10_accuracy": "",
166506
  "median_rank": "",
166507
  "mean_rank": "",
166508
+ "num_queries": "",
166509
+ "task_display_name": "Object Relevance Prediction"
166510
  },
166511
  {
166512
  "task": "caption_grounding",
 
166542
  "top10_accuracy": "0.4454022988505747",
166543
  "median_rank": "13.0",
166544
  "mean_rank": "23.19827651977539",
166545
+ "num_queries": "348",
166546
+ "task_display_name": "Language Grounding"
166547
  },
166548
  {
166549
  "task": "caption_grounding",
 
166579
  "top10_accuracy": "0.034482758620689655",
166580
  "median_rank": "162.0",
166581
  "mean_rank": "161.4770050048828",
166582
+ "num_queries": "348",
166583
+ "task_display_name": "Language Grounding"
166584
  },
166585
  {
166586
  "task": "caption_grounding",
 
166616
  "top10_accuracy": "0.03735632183908046",
166617
  "median_rank": "114.0",
166618
  "mean_rank": "137.90805053710938",
166619
+ "num_queries": "348",
166620
+ "task_display_name": "Language Grounding"
166621
  },
166622
  {
166623
  "task": "caption_grounding",
 
166653
  "top10_accuracy": "0.04597701149425287",
166654
  "median_rank": "143.5",
166655
  "mean_rank": "155.4712677001953",
166656
+ "num_queries": "348",
166657
+ "task_display_name": "Language Grounding"
166658
  },
166659
  {
166660
  "task": "caption_grounding",
 
166690
  "top10_accuracy": "0.04885057471264368",
166691
  "median_rank": "110.5",
166692
  "mean_rank": "130.32470703125",
166693
+ "num_queries": "348",
166694
+ "task_display_name": "Language Grounding"
166695
  },
166696
  {
166697
  "task": "caption_grounding",
 
166727
  "top10_accuracy": "0.04597701149425287",
166728
  "median_rank": "123.0",
166729
  "mean_rank": "138.61207580566406",
166730
+ "num_queries": "348",
166731
+ "task_display_name": "Language Grounding"
166732
  },
166733
  {
166734
  "task": "caption_grounding",
 
166764
  "top10_accuracy": "0.07758620689655173",
166765
  "median_rank": "141.0",
166766
  "mean_rank": "152.14942932128906",
166767
+ "num_queries": "348",
166768
+ "task_display_name": "Language Grounding"
166769
  },
166770
  {
166771
  "task": "caption_grounding",
 
166801
  "top10_accuracy": "0.47126436781609193",
166802
  "median_rank": "12.0",
166803
  "mean_rank": "15.106322288513184",
166804
+ "num_queries": "348",
166805
+ "task_display_name": "Language Grounding"
166806
  },
166807
  {
166808
  "task": "caption_grounding",
 
166838
  "top10_accuracy": "0.06896551724137931",
166839
  "median_rank": "132.0",
166840
  "mean_rank": "137.30746459960938",
166841
+ "num_queries": "348",
166842
+ "task_display_name": "Language Grounding"
166843
  },
166844
  {
166845
  "task": "cross_modal_retrieval",
 
166875
  "top10_accuracy": "0.9798850574712644",
166876
  "median_rank": "1.0",
166877
  "mean_rank": "2.0862069129943848",
166878
+ "num_queries": "348",
166879
+ "task_display_name": "Cross-Modal Retrieval"
166880
  },
166881
  {
166882
  "task": "cross_modal_retrieval",
 
166912
  "top10_accuracy": "0.9798850574712644",
166913
  "median_rank": "1.0",
166914
  "mean_rank": "3.844827651977539",
166915
+ "num_queries": "348",
166916
+ "task_display_name": "Cross-Modal Retrieval"
166917
  },
166918
  {
166919
  "task": "cross_modal_retrieval",
 
166949
  "top10_accuracy": "0.8620689655172413",
166950
  "median_rank": "1.0",
166951
  "mean_rank": "5.729885101318359",
166952
+ "num_queries": "348",
166953
+ "task_display_name": "Cross-Modal Retrieval"
166954
  },
166955
  {
166956
  "task": "cross_modal_retrieval",
 
166986
  "top10_accuracy": "0.6551724137931034",
166987
  "median_rank": "4.0",
166988
  "mean_rank": "15.623562812805176",
166989
+ "num_queries": "348",
166990
+ "task_display_name": "Cross-Modal Retrieval"
166991
  },
166992
  {
166993
  "task": "cross_modal_retrieval",
 
167023
  "top10_accuracy": "0.3994252873563218",
167024
  "median_rank": "21.5",
167025
  "mean_rank": "49.181034088134766",
167026
+ "num_queries": "348",
167027
+ "task_display_name": "Cross-Modal Retrieval"
167028
  },
167029
  {
167030
  "task": "cross_modal_retrieval",
 
167060
  "top10_accuracy": "0.5229885057471264",
167061
  "median_rank": "10.0",
167062
  "mean_rank": "20.577587127685547",
167063
+ "num_queries": "348",
167064
+ "task_display_name": "Cross-Modal Retrieval"
167065
  },
167066
  {
167067
  "task": "cross_modal_retrieval",
 
167097
  "top10_accuracy": "0.031609195402298854",
167098
  "median_rank": "152.5",
167099
  "mean_rank": "161.44540405273438",
167100
+ "num_queries": "348",
167101
+ "task_display_name": "Cross-Modal Retrieval"
167102
  },
167103
  {
167104
  "task": "cross_modal_retrieval",
 
167134
  "top10_accuracy": "0.05747126436781609",
167135
  "median_rank": "138.0",
167136
  "mean_rank": "146.83045959472656",
167137
+ "num_queries": "348",
167138
+ "task_display_name": "Cross-Modal Retrieval"
167139
  },
167140
  {
167141
  "task": "cross_modal_retrieval",
 
167171
  "top10_accuracy": "0.9770114942528736",
167172
  "median_rank": "1.0",
167173
  "mean_rank": "2.181034564971924",
167174
+ "num_queries": "348",
167175
+ "task_display_name": "Cross-Modal Retrieval"
167176
  },
167177
  {
167178
  "task": "modality_reconstruction",
 
167208
  "top10_accuracy": "",
167209
  "median_rank": "",
167210
  "mean_rank": "",
167211
+ "num_queries": "",
167212
+ "task_display_name": "Cross-Modal Reconstruction"
167213
  },
167214
  {
167215
  "task": "modality_reconstruction",
 
167245
  "top10_accuracy": "",
167246
  "median_rank": "",
167247
  "mean_rank": "",
167248
+ "num_queries": "",
167249
+ "task_display_name": "Cross-Modal Reconstruction"
167250
  },
167251
  {
167252
  "task": "modality_reconstruction",
 
167282
  "top10_accuracy": "",
167283
  "median_rank": "",
167284
  "mean_rank": "",
167285
+ "num_queries": "",
167286
+ "task_display_name": "Cross-Modal Reconstruction"
167287
  },
167288
  {
167289
  "task": "modality_reconstruction",
 
167319
  "top10_accuracy": "",
167320
  "median_rank": "",
167321
  "mean_rank": "",
167322
+ "num_queries": "",
167323
+ "task_display_name": "Cross-Modal Reconstruction"
167324
  },
167325
  {
167326
  "task": "modality_reconstruction",
 
167356
  "top10_accuracy": "",
167357
  "median_rank": "",
167358
  "mean_rank": "",
167359
+ "num_queries": "",
167360
+ "task_display_name": "Cross-Modal Reconstruction"
167361
  },
167362
  {
167363
  "task": "modality_reconstruction",
 
167393
  "top10_accuracy": "",
167394
  "median_rank": "",
167395
  "mean_rank": "",
167396
+ "num_queries": "",
167397
+ "task_display_name": "Cross-Modal Reconstruction"
167398
  },
167399
  {
167400
  "task": "modality_reconstruction",
 
167430
  "top10_accuracy": "",
167431
  "median_rank": "",
167432
  "mean_rank": "",
167433
+ "num_queries": "",
167434
+ "task_display_name": "Cross-Modal Reconstruction"
167435
  },
167436
  {
167437
  "task": "modality_reconstruction",
 
167467
  "top10_accuracy": "",
167468
  "median_rank": "",
167469
  "mean_rank": "",
167470
+ "num_queries": "",
167471
+ "task_display_name": "Cross-Modal Reconstruction"
167472
  },
167473
  {
167474
  "task": "modality_reconstruction",
 
167504
  "top10_accuracy": "",
167505
  "median_rank": "",
167506
  "mean_rank": "",
167507
+ "num_queries": "",
167508
+ "task_display_name": "Cross-Modal Reconstruction"
167509
  },
167510
  {
167511
  "task": "temporal_order",
 
167541
  "top10_accuracy": "",
167542
  "median_rank": "",
167543
  "mean_rank": "",
167544
+ "num_queries": "",
167545
+ "task_display_name": "Temporal Order Verification"
167546
  },
167547
  {
167548
  "task": "temporal_order",
 
167578
  "top10_accuracy": "",
167579
  "median_rank": "",
167580
  "mean_rank": "",
167581
+ "num_queries": "",
167582
+ "task_display_name": "Temporal Order Verification"
167583
  },
167584
  {
167585
  "task": "temporal_order",
 
167615
  "top10_accuracy": "",
167616
  "median_rank": "",
167617
  "mean_rank": "",
167618
+ "num_queries": "",
167619
+ "task_display_name": "Temporal Order Verification"
167620
  },
167621
  {
167622
  "task": "temporal_order",
 
167652
  "top10_accuracy": "",
167653
  "median_rank": "",
167654
  "mean_rank": "",
167655
+ "num_queries": "",
167656
+ "task_display_name": "Temporal Order Verification"
167657
  },
167658
  {
167659
  "task": "temporal_order",
 
167689
  "top10_accuracy": "",
167690
  "median_rank": "",
167691
  "mean_rank": "",
167692
+ "num_queries": "",
167693
+ "task_display_name": "Temporal Order Verification"
167694
  },
167695
  {
167696
  "task": "temporal_order",
 
167726
  "top10_accuracy": "",
167727
  "median_rank": "",
167728
  "mean_rank": "",
167729
+ "num_queries": "",
167730
+ "task_display_name": "Temporal Order Verification"
167731
  },
167732
  {
167733
  "task": "temporal_order",
 
167763
  "top10_accuracy": "",
167764
  "median_rank": "",
167765
  "mean_rank": "",
167766
+ "num_queries": "",
167767
+ "task_display_name": "Temporal Order Verification"
167768
  },
167769
  {
167770
  "task": "temporal_order",
 
167800
  "top10_accuracy": "",
167801
  "median_rank": "",
167802
  "mean_rank": "",
167803
+ "num_queries": "",
167804
+ "task_display_name": "Temporal Order Verification"
167805
  },
167806
  {
167807
  "task": "temporal_order",
 
167837
  "top10_accuracy": "",
167838
  "median_rank": "",
167839
  "mean_rank": "",
167840
+ "num_queries": "",
167841
+ "task_display_name": "Temporal Order Verification"
167842
  },
167843
  {
167844
  "task": "misalignment_detection",
 
167874
  "top10_accuracy": "",
167875
  "median_rank": "",
167876
  "mean_rank": "",
167877
+ "num_queries": "",
167878
+ "task_display_name": "Multimodal Synchronization Detection"
167879
  },
167880
  {
167881
  "task": "misalignment_detection",
 
167911
  "top10_accuracy": "",
167912
  "median_rank": "",
167913
  "mean_rank": "",
167914
+ "num_queries": "",
167915
+ "task_display_name": "Multimodal Synchronization Detection"
167916
  },
167917
  {
167918
  "task": "misalignment_detection",
 
167948
  "top10_accuracy": "",
167949
  "median_rank": "",
167950
  "mean_rank": "",
167951
+ "num_queries": "",
167952
+ "task_display_name": "Multimodal Synchronization Detection"
167953
  },
167954
  {
167955
  "task": "misalignment_detection",
 
167985
  "top10_accuracy": "",
167986
  "median_rank": "",
167987
  "mean_rank": "",
167988
+ "num_queries": "",
167989
+ "task_display_name": "Multimodal Synchronization Detection"
167990
  },
167991
  {
167992
  "task": "misalignment_detection",
 
168022
  "top10_accuracy": "",
168023
  "median_rank": "",
168024
  "mean_rank": "",
168025
+ "num_queries": "",
168026
+ "task_display_name": "Multimodal Synchronization Detection"
168027
  },
168028
  {
168029
  "task": "misalignment_detection",
 
168059
  "top10_accuracy": "",
168060
  "median_rank": "",
168061
  "mean_rank": "",
168062
+ "num_queries": "",
168063
+ "task_display_name": "Multimodal Synchronization Detection"
168064
  },
168065
  {
168066
  "task": "misalignment_detection",
 
168096
  "top10_accuracy": "",
168097
  "median_rank": "",
168098
  "mean_rank": "",
168099
+ "num_queries": "",
168100
+ "task_display_name": "Multimodal Synchronization Detection"
168101
  },
168102
  {
168103
  "task": "misalignment_detection",
 
168133
  "top10_accuracy": "",
168134
  "median_rank": "",
168135
  "mean_rank": "",
168136
+ "num_queries": "",
168137
+ "task_display_name": "Multimodal Synchronization Detection"
168138
  },
168139
  {
168140
  "task": "misalignment_detection",
 
168170
  "top10_accuracy": "",
168171
  "median_rank": "",
168172
  "mean_rank": "",
168173
+ "num_queries": "",
168174
+ "task_display_name": "Multimodal Synchronization Detection"
168175
  }
168176
  ]
168177
  },
 
168987
  "num_queries": "308"
168988
  }
168989
  ]
168990
+ }
metrics/summary_metrics.json CHANGED
@@ -1,27 +1,12 @@
1
  {
2
  "omni_relay": {
3
- "status": "verified_validation_aware_diagnostic_pilot",
4
  "dataset": "ropedia-ai/xperience-10m",
5
- "staging": "selected_episode_verified_validation_package",
6
- "training_target": "json_reliability_and_task_quality_improvement",
7
  "selection_strategy": "stratified_round_robin_by_top_level_session",
8
  "target_episodes": 128,
9
  "selected_sessions": 128,
10
- "selected_split_counts": {
11
- "train": 96,
12
- "val": 16,
13
- "test": 16
14
- },
15
- "exported_window_counts": {
16
- "train": 2848,
17
- "val": 512,
18
- "test": 448
19
- },
20
- "held_out_episode_count": 14,
21
- "held_out_test_windows": 448,
22
- "json_validity_rate": 0.875,
23
- "action_macro_f1": 0.0026621494447581404,
24
- "quality_target_met": false,
25
  "candidate_scan_top_level_sessions": 802,
26
  "valid_candidates": 12102,
27
  "estimated_bytes": 298188841943,
@@ -29,16 +14,7 @@
29
  "visualization.rrd"
30
  ],
31
  "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
32
- "current_scope": "The selected-episode Qwen3-Omni validation-aware diagnostic pilot is verified, but held-out quality is still weak and JSON validity remains below the 98% target.",
33
- "validation_samples_used": 512,
34
- "train_loss": 0.41304643672440994,
35
- "val_loss": 0.0330660454928875,
36
- "num_val_samples": 512,
37
- "subtask_accuracy": 0.006696428571428571,
38
- "transition_accuracy": 0.8504464285714286,
39
- "next_action_accuracy": 0.024553571428571428,
40
- "contact_accuracy": 0.6450892857142857,
41
- "object_micro_f1": 0.22299431459254582
42
  },
43
  "models": {
44
  "motion_action": {
@@ -120,7 +96,8 @@
120
  "Pour coffee",
121
  "Pour milk into coffee",
122
  "Wait/Prepare for pouring"
123
- ]
 
124
  },
125
  "timeline_subtask": {
126
  "accuracy": 0.05813953488372093,
@@ -144,7 +121,8 @@
144
  "Pour coffee",
145
  "Pour milk into coffee",
146
  "Prepare for pouring"
147
- ]
 
148
  },
149
  "transition_detection": {
150
  "accuracy": 0.9080459770114943,
@@ -170,7 +148,8 @@
170
  "matched_boundaries": 2,
171
  "true_boundaries": 4,
172
  "predicted_boundaries": 28,
173
- "mean_abs_timing_error_frames": 3.5
 
174
  },
175
  "next_action": {
176
  "accuracy": 0.034482758620689655,
@@ -194,7 +173,8 @@
194
  "Pour coffee",
195
  "Pour milk into coffee",
196
  "Wait/Prepare for pouring"
197
- ]
 
198
  },
199
  "hand_trajectory_forecast": {
200
  "mse": 14.956222534179688,
@@ -209,7 +189,8 @@
209
  "forecast_frames": 10,
210
  "mpjpe": 0.8646570444107056,
211
  "final_frame_mpjpe": 1.0330793857574463,
212
- "target_dim": 1260
 
213
  },
214
  "contact_prediction": {
215
  "accuracy": 1.0,
@@ -228,7 +209,8 @@
228
  "majority_baseline_accuracy": 1.0,
229
  "train_final_accuracy": 1.0,
230
  "train_final_loss": 0.0006056802230887115,
231
- "unseen_test_classes": []
 
232
  },
233
  "object_relevance": {
234
  "micro_f1": 0.18034382095361662,
@@ -242,7 +224,8 @@
242
  "num_windows": 1161,
243
  "num_train_windows": 813,
244
  "num_test_windows": 348,
245
- "num_objects": 34
 
246
  },
247
  "caption_grounding": {
248
  "mrr": 0.016023479050338015,
@@ -257,7 +240,8 @@
257
  "output": "matching time window",
258
  "split": "chronological",
259
  "num_train_windows": 813,
260
- "num_test_windows": 348
 
261
  },
262
  "cross_modal_retrieval": {
263
  "mrr": 0.26925966892956127,
@@ -272,7 +256,8 @@
272
  "output": "matching depth/video window",
273
  "split": "chronological",
274
  "num_train_windows": 813,
275
- "num_test_windows": 348
 
276
  },
277
  "modality_reconstruction": {
278
  "mse": 1358.1593017578125,
@@ -284,7 +269,8 @@
284
  "split": "chronological",
285
  "num_train_windows": 813,
286
  "num_test_windows": 348,
287
- "target_dim": 5096
 
288
  },
289
  "temporal_order": {
290
  "accuracy": 0.4540229885057471,
@@ -303,7 +289,8 @@
303
  "num_samples": 2320,
304
  "num_train_samples": 1624,
305
  "num_test_samples": 696,
306
- "train_final_accuracy": 0.5086206896551724
 
307
  },
308
  "misalignment_detection": {
309
  "accuracy": 0.5158959537572254,
@@ -322,7 +309,8 @@
322
  "num_samples": 2306,
323
  "num_train_samples": 1614,
324
  "num_test_samples": 692,
325
- "train_final_accuracy": 0.49380421313506817
 
326
  }
327
  },
328
  "neural_model": {
@@ -368,7 +356,8 @@
368
  "neural_dropout": 0.1,
369
  "neural_device": "cpu",
370
  "train_final_loss": 0.04246756529782,
371
- "train_final_accuracy": 0.9875156054931336
 
372
  },
373
  "timeline_subtask": {
374
  "accuracy": 0.0377906976744186,
@@ -401,7 +390,8 @@
401
  "neural_dropout": 0.1,
402
  "neural_device": "cpu",
403
  "train_final_loss": 5.4104819144748596e-05,
404
- "train_final_accuracy": 1.0
 
405
  },
406
  "transition_detection": {
407
  "accuracy": 0.8735632183908046,
@@ -436,7 +426,8 @@
436
  "matched_boundaries": 3,
437
  "true_boundaries": 4,
438
  "predicted_boundaries": 42,
439
- "mean_abs_timing_error_frames": 2.6666666666666665
 
440
  },
441
  "next_action": {
442
  "accuracy": 0.02586206896551724,
@@ -469,7 +460,8 @@
469
  "neural_dropout": 0.1,
470
  "neural_device": "cpu",
471
  "train_final_loss": 0.000416612956025105,
472
- "train_final_accuracy": 1.0
 
473
  },
474
  "hand_trajectory_forecast": {
475
  "mse": 0.004775360692292452,
@@ -494,7 +486,8 @@
494
  "neural_weight_decay": 0.0001,
495
  "neural_dropout": 0.1,
496
  "neural_device": "cpu",
497
- "train_final_loss": 0.055699273420247435
 
498
  },
499
  "contact_prediction": {
500
  "accuracy": 1.0,
@@ -522,7 +515,8 @@
522
  "neural_dropout": 0.1,
523
  "neural_device": "cpu",
524
  "train_final_loss": 0.0,
525
- "train_final_accuracy": 1.0
 
526
  },
527
  "object_relevance": {
528
  "micro_f1": 0.1679279279279279,
@@ -547,7 +541,8 @@
547
  "neural_weight_decay": 0.0001,
548
  "neural_dropout": 0.1,
549
  "neural_device": "cpu",
550
- "train_final_loss": 0.003651880362182214
 
551
  },
552
  "caption_grounding": {
553
  "mrr": 0.01684125567132316,
@@ -573,7 +568,8 @@
573
  "neural_weight_decay": 0.0001,
574
  "neural_dropout": 0.1,
575
  "neural_device": "cpu",
576
- "train_final_loss": 0.06317874967483723
 
577
  },
578
  "cross_modal_retrieval": {
579
  "mrr": 0.1299971898648288,
@@ -599,7 +595,8 @@
599
  "neural_weight_decay": 0.0001,
600
  "neural_dropout": 0.1,
601
  "neural_device": "cpu",
602
- "train_final_loss": 0.21891545446596464
 
603
  },
604
  "modality_reconstruction": {
605
  "mse": 1351.3363037109375,
@@ -621,7 +618,8 @@
621
  "neural_weight_decay": 0.0001,
622
  "neural_dropout": 0.1,
623
  "neural_device": "cpu",
624
- "train_final_loss": 0.21891545446596464
 
625
  },
626
  "temporal_order": {
627
  "accuracy": 0.8577586206896551,
@@ -651,7 +649,8 @@
651
  "neural_dropout": 0.1,
652
  "neural_device": "cpu",
653
  "train_final_loss": 0.0005108328477586757,
654
- "train_final_accuracy": 1.0
 
655
  },
656
  "misalignment_detection": {
657
  "accuracy": 0.7008670520231214,
@@ -681,8 +680,23 @@
681
  "neural_dropout": 0.1,
682
  "neural_device": "cpu",
683
  "train_final_loss": 0.010604870708167664,
684
- "train_final_accuracy": 0.9956629491945477
 
685
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
686
  }
687
  },
688
  "feature_manifest": [
 
1
  {
2
  "omni_relay": {
3
+ "status": "verified_full_128_episode_diagnostic_result",
4
  "dataset": "ropedia-ai/xperience-10m",
5
+ "staging": "verified_public_package_and_adapter_publication",
6
+ "training_target": "action_subtask_quality_and_unseen_label_error_analysis",
7
  "selection_strategy": "stratified_round_robin_by_top_level_session",
8
  "target_episodes": 128,
9
  "selected_sessions": 128,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "candidate_scan_top_level_sessions": 802,
11
  "valid_candidates": 12102,
12
  "estimated_bytes": 298188841943,
 
14
  "visualization.rrd"
15
  ],
16
  "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
17
+ "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
 
 
 
 
 
 
 
 
 
18
  },
19
  "models": {
20
  "motion_action": {
 
96
  "Pour coffee",
97
  "Pour milk into coffee",
98
  "Wait/Prepare for pouring"
99
+ ],
100
+ "task_display_name": "Action Recognition"
101
  },
102
  "timeline_subtask": {
103
  "accuracy": 0.05813953488372093,
 
121
  "Pour coffee",
122
  "Pour milk into coffee",
123
  "Prepare for pouring"
124
+ ],
125
+ "task_display_name": "Procedure Step Recognition"
126
  },
127
  "transition_detection": {
128
  "accuracy": 0.9080459770114943,
 
148
  "matched_boundaries": 2,
149
  "true_boundaries": 4,
150
  "predicted_boundaries": 28,
151
+ "mean_abs_timing_error_frames": 3.5,
152
+ "task_display_name": "Action Boundary Detection"
153
  },
154
  "next_action": {
155
  "accuracy": 0.034482758620689655,
 
173
  "Pour coffee",
174
  "Pour milk into coffee",
175
  "Wait/Prepare for pouring"
176
+ ],
177
+ "task_display_name": "Next-Action Prediction"
178
  },
179
  "hand_trajectory_forecast": {
180
  "mse": 14.956222534179688,
 
189
  "forecast_frames": 10,
190
  "mpjpe": 0.8646570444107056,
191
  "final_frame_mpjpe": 1.0330793857574463,
192
+ "target_dim": 1260,
193
+ "task_display_name": "Hand Trajectory Forecasting"
194
  },
195
  "contact_prediction": {
196
  "accuracy": 1.0,
 
209
  "majority_baseline_accuracy": 1.0,
210
  "train_final_accuracy": 1.0,
211
  "train_final_loss": 0.0006056802230887115,
212
+ "unseen_test_classes": [],
213
+ "task_display_name": "Contact State Prediction"
214
  },
215
  "object_relevance": {
216
  "micro_f1": 0.18034382095361662,
 
224
  "num_windows": 1161,
225
  "num_train_windows": 813,
226
  "num_test_windows": 348,
227
+ "num_objects": 34,
228
+ "task_display_name": "Object Relevance Prediction"
229
  },
230
  "caption_grounding": {
231
  "mrr": 0.016023479050338015,
 
240
  "output": "matching time window",
241
  "split": "chronological",
242
  "num_train_windows": 813,
243
+ "num_test_windows": 348,
244
+ "task_display_name": "Language Grounding"
245
  },
246
  "cross_modal_retrieval": {
247
  "mrr": 0.26925966892956127,
 
256
  "output": "matching depth/video window",
257
  "split": "chronological",
258
  "num_train_windows": 813,
259
+ "num_test_windows": 348,
260
+ "task_display_name": "Cross-Modal Retrieval"
261
  },
262
  "modality_reconstruction": {
263
  "mse": 1358.1593017578125,
 
269
  "split": "chronological",
270
  "num_train_windows": 813,
271
  "num_test_windows": 348,
272
+ "target_dim": 5096,
273
+ "task_display_name": "Cross-Modal Reconstruction"
274
  },
275
  "temporal_order": {
276
  "accuracy": 0.4540229885057471,
 
289
  "num_samples": 2320,
290
  "num_train_samples": 1624,
291
  "num_test_samples": 696,
292
+ "train_final_accuracy": 0.5086206896551724,
293
+ "task_display_name": "Temporal Order Verification"
294
  },
295
  "misalignment_detection": {
296
  "accuracy": 0.5158959537572254,
 
309
  "num_samples": 2306,
310
  "num_train_samples": 1614,
311
  "num_test_samples": 692,
312
+ "train_final_accuracy": 0.49380421313506817,
313
+ "task_display_name": "Multimodal Synchronization Detection"
314
  }
315
  },
316
  "neural_model": {
 
356
  "neural_dropout": 0.1,
357
  "neural_device": "cpu",
358
  "train_final_loss": 0.04246756529782,
359
+ "train_final_accuracy": 0.9875156054931336,
360
+ "task_display_name": "Action Recognition"
361
  },
362
  "timeline_subtask": {
363
  "accuracy": 0.0377906976744186,
 
390
  "neural_dropout": 0.1,
391
  "neural_device": "cpu",
392
  "train_final_loss": 5.4104819144748596e-05,
393
+ "train_final_accuracy": 1.0,
394
+ "task_display_name": "Procedure Step Recognition"
395
  },
396
  "transition_detection": {
397
  "accuracy": 0.8735632183908046,
 
426
  "matched_boundaries": 3,
427
  "true_boundaries": 4,
428
  "predicted_boundaries": 42,
429
+ "mean_abs_timing_error_frames": 2.6666666666666665,
430
+ "task_display_name": "Action Boundary Detection"
431
  },
432
  "next_action": {
433
  "accuracy": 0.02586206896551724,
 
460
  "neural_dropout": 0.1,
461
  "neural_device": "cpu",
462
  "train_final_loss": 0.000416612956025105,
463
+ "train_final_accuracy": 1.0,
464
+ "task_display_name": "Next-Action Prediction"
465
  },
466
  "hand_trajectory_forecast": {
467
  "mse": 0.004775360692292452,
 
486
  "neural_weight_decay": 0.0001,
487
  "neural_dropout": 0.1,
488
  "neural_device": "cpu",
489
+ "train_final_loss": 0.055699273420247435,
490
+ "task_display_name": "Hand Trajectory Forecasting"
491
  },
492
  "contact_prediction": {
493
  "accuracy": 1.0,
 
515
  "neural_dropout": 0.1,
516
  "neural_device": "cpu",
517
  "train_final_loss": 0.0,
518
+ "train_final_accuracy": 1.0,
519
+ "task_display_name": "Contact State Prediction"
520
  },
521
  "object_relevance": {
522
  "micro_f1": 0.1679279279279279,
 
541
  "neural_weight_decay": 0.0001,
542
  "neural_dropout": 0.1,
543
  "neural_device": "cpu",
544
+ "train_final_loss": 0.003651880362182214,
545
+ "task_display_name": "Object Relevance Prediction"
546
  },
547
  "caption_grounding": {
548
  "mrr": 0.01684125567132316,
 
568
  "neural_weight_decay": 0.0001,
569
  "neural_dropout": 0.1,
570
  "neural_device": "cpu",
571
+ "train_final_loss": 0.06317874967483723,
572
+ "task_display_name": "Language Grounding"
573
  },
574
  "cross_modal_retrieval": {
575
  "mrr": 0.1299971898648288,
 
595
  "neural_weight_decay": 0.0001,
596
  "neural_dropout": 0.1,
597
  "neural_device": "cpu",
598
+ "train_final_loss": 0.21891545446596464,
599
+ "task_display_name": "Cross-Modal Retrieval"
600
  },
601
  "modality_reconstruction": {
602
  "mse": 1351.3363037109375,
 
618
  "neural_weight_decay": 0.0001,
619
  "neural_dropout": 0.1,
620
  "neural_device": "cpu",
621
+ "train_final_loss": 0.21891545446596464,
622
+ "task_display_name": "Cross-Modal Reconstruction"
623
  },
624
  "temporal_order": {
625
  "accuracy": 0.8577586206896551,
 
649
  "neural_dropout": 0.1,
650
  "neural_device": "cpu",
651
  "train_final_loss": 0.0005108328477586757,
652
+ "train_final_accuracy": 1.0,
653
+ "task_display_name": "Temporal Order Verification"
654
  },
655
  "misalignment_detection": {
656
  "accuracy": 0.7008670520231214,
 
680
  "neural_dropout": 0.1,
681
  "neural_device": "cpu",
682
  "train_final_loss": 0.010604870708167664,
683
+ "train_final_accuracy": 0.9956629491945477,
684
+ "task_display_name": "Multimodal Synchronization Detection"
685
  }
686
+ },
687
+ "task_display_names": {
688
+ "timeline_action": "Action Recognition",
689
+ "timeline_subtask": "Procedure Step Recognition",
690
+ "transition_detection": "Action Boundary Detection",
691
+ "next_action": "Next-Action Prediction",
692
+ "hand_trajectory_forecast": "Hand Trajectory Forecasting",
693
+ "contact_prediction": "Contact State Prediction",
694
+ "object_relevance": "Object Relevance Prediction",
695
+ "caption_grounding": "Language Grounding",
696
+ "cross_modal_retrieval": "Cross-Modal Retrieval",
697
+ "modality_reconstruction": "Cross-Modal Reconstruction",
698
+ "temporal_order": "Temporal Order Verification",
699
+ "misalignment_detection": "Multimodal Synchronization Detection"
700
  }
701
  },
702
  "feature_manifest": [
metrics/task_surface_integrity.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-06T17:43:54+00:00",
4
  "summary": {
5
  "task_count": 12,
6
  "expected_task_count": 12,
@@ -64,9 +64,9 @@
64
  "observed": "timeline_action"
65
  },
66
  {
67
- "name": "timeline_action: public_field_research_name_is_human_readable",
68
  "status": "pass",
69
- "value": "Egocentric Action Recognition",
70
  "raw_hits": []
71
  },
72
  {
@@ -76,15 +76,15 @@
76
  "raw_hits": []
77
  },
78
  {
79
- "name": "timeline_action: public_field_input_short_is_human_readable",
80
  "status": "pass",
81
- "value": "20-frame multimodal window",
82
  "raw_hits": []
83
  },
84
  {
85
- "name": "timeline_action: public_field_plain_goal_is_human_readable",
86
  "status": "pass",
87
- "value": "Look at one short multimodal window and name what action is happening now.",
88
  "raw_hits": []
89
  },
90
  {
@@ -94,15 +94,15 @@
94
  "raw_hits": []
95
  },
96
  {
97
- "name": "timeline_action: public_field_display_name_is_human_readable",
98
  "status": "pass",
99
- "value": "Action Recognition",
100
  "raw_hits": []
101
  },
102
  {
103
- "name": "timeline_action: public_field_process_short_is_human_readable",
104
  "status": "pass",
105
- "value": "window features -> action label builder -> classifier",
106
  "raw_hits": []
107
  },
108
  {
@@ -184,9 +184,9 @@
184
  "observed": "timeline_subtask"
185
  },
186
  {
187
- "name": "timeline_subtask: public_field_research_name_is_human_readable",
188
  "status": "pass",
189
- "value": "Temporal Subtask Recognition",
190
  "raw_hits": []
191
  },
192
  {
@@ -196,15 +196,15 @@
196
  "raw_hits": []
197
  },
198
  {
199
- "name": "timeline_subtask: public_field_input_short_is_human_readable",
200
  "status": "pass",
201
- "value": "20-frame multimodal window",
202
  "raw_hits": []
203
  },
204
  {
205
- "name": "timeline_subtask: public_field_plain_goal_is_human_readable",
206
  "status": "pass",
207
- "value": "Predict the higher-level task stage for the current window.",
208
  "raw_hits": []
209
  },
210
  {
@@ -214,15 +214,15 @@
214
  "raw_hits": []
215
  },
216
  {
217
- "name": "timeline_subtask: public_field_display_name_is_human_readable",
218
  "status": "pass",
219
- "value": "Procedure Step Recognition",
220
  "raw_hits": []
221
  },
222
  {
223
- "name": "timeline_subtask: public_field_process_short_is_human_readable",
224
  "status": "pass",
225
- "value": "window features -> subtask label builder -> classifier",
226
  "raw_hits": []
227
  },
228
  {
@@ -304,9 +304,9 @@
304
  "observed": "transition_detection"
305
  },
306
  {
307
- "name": "transition_detection: public_field_research_name_is_human_readable",
308
  "status": "pass",
309
- "value": "Temporal Action Segmentation",
310
  "raw_hits": []
311
  },
312
  {
@@ -316,15 +316,15 @@
316
  "raw_hits": []
317
  },
318
  {
319
- "name": "transition_detection: public_field_input_short_is_human_readable",
320
  "status": "pass",
321
- "value": "current window with boundary target",
322
  "raw_hits": []
323
  },
324
  {
325
- "name": "transition_detection: public_field_plain_goal_is_human_readable",
326
  "status": "pass",
327
- "value": "Detect whether the current window is near a boundary between actions.",
328
  "raw_hits": []
329
  },
330
  {
@@ -334,15 +334,15 @@
334
  "raw_hits": []
335
  },
336
  {
337
- "name": "transition_detection: public_field_display_name_is_human_readable",
338
  "status": "pass",
339
- "value": "Action Boundary Detection",
340
  "raw_hits": []
341
  },
342
  {
343
- "name": "transition_detection: public_field_process_short_is_human_readable",
344
  "status": "pass",
345
- "value": "action changes -> boundary labels -> binary classifier",
346
  "raw_hits": []
347
  },
348
  {
@@ -422,9 +422,9 @@
422
  "observed": "next_action"
423
  },
424
  {
425
- "name": "next_action: public_field_research_name_is_human_readable",
426
  "status": "pass",
427
- "value": "Short-Horizon Intention Prediction",
428
  "raw_hits": []
429
  },
430
  {
@@ -434,15 +434,15 @@
434
  "raw_hits": []
435
  },
436
  {
437
- "name": "next_action: public_field_input_short_is_human_readable",
438
  "status": "pass",
439
- "value": "current window at time t",
440
  "raw_hits": []
441
  },
442
  {
443
- "name": "next_action: public_field_plain_goal_is_human_readable",
444
  "status": "pass",
445
- "value": "Use the current window to guess the action that will happen shortly after it.",
446
  "raw_hits": []
447
  },
448
  {
@@ -452,15 +452,15 @@
452
  "raw_hits": []
453
  },
454
  {
455
- "name": "next_action: public_field_display_name_is_human_readable",
456
  "status": "pass",
457
- "value": "Next-Action Prediction",
458
  "raw_hits": []
459
  },
460
  {
461
- "name": "next_action: public_field_process_short_is_human_readable",
462
  "status": "pass",
463
- "value": "current features -> future label shift -> classifier",
464
  "raw_hits": []
465
  },
466
  {
@@ -540,9 +540,9 @@
540
  "observed": "hand_trajectory_forecast"
541
  },
542
  {
543
- "name": "hand_trajectory_forecast: public_field_research_name_is_human_readable",
544
  "status": "pass",
545
- "value": "3D Hand Motion Forecasting",
546
  "raw_hits": []
547
  },
548
  {
@@ -552,15 +552,15 @@
552
  "raw_hits": []
553
  },
554
  {
555
- "name": "hand_trajectory_forecast: public_field_input_short_is_human_readable",
556
  "status": "pass",
557
- "value": "current multimodal window",
558
  "raw_hits": []
559
  },
560
  {
561
- "name": "hand_trajectory_forecast: public_field_plain_goal_is_human_readable",
562
  "status": "pass",
563
- "value": "Predict where the hands will move over the next few frames.",
564
  "raw_hits": []
565
  },
566
  {
@@ -570,15 +570,15 @@
570
  "raw_hits": []
571
  },
572
  {
573
- "name": "hand_trajectory_forecast: public_field_display_name_is_human_readable",
574
  "status": "pass",
575
- "value": "Hand Trajectory Forecasting",
576
  "raw_hits": []
577
  },
578
  {
579
- "name": "hand_trajectory_forecast: public_field_process_short_is_human_readable",
580
  "status": "pass",
581
- "value": "current features -> future mocap target -> regression head",
582
  "raw_hits": []
583
  },
584
  {
@@ -658,9 +658,9 @@
658
  "observed": "contact_prediction"
659
  },
660
  {
661
- "name": "contact_prediction: public_field_research_name_is_human_readable",
662
  "status": "pass",
663
- "value": "Human-Object Contact Prediction",
664
  "raw_hits": []
665
  },
666
  {
@@ -670,15 +670,15 @@
670
  "raw_hits": []
671
  },
672
  {
673
- "name": "contact_prediction: public_field_input_short_is_human_readable",
674
  "status": "pass",
675
- "value": "non-contact, non-caption features",
676
  "raw_hits": []
677
  },
678
  {
679
- "name": "contact_prediction: public_field_plain_goal_is_human_readable",
680
  "status": "pass",
681
- "value": "Predict whether the body or hand is in contact with something.",
682
  "raw_hits": []
683
  },
684
  {
@@ -688,15 +688,15 @@
688
  "raw_hits": []
689
  },
690
  {
691
- "name": "contact_prediction: public_field_display_name_is_human_readable",
692
  "status": "pass",
693
- "value": "Contact State Prediction",
694
  "raw_hits": []
695
  },
696
  {
697
- "name": "contact_prediction: public_field_process_short_is_human_readable",
698
  "status": "pass",
699
- "value": "feature filter -> contact target -> binary classifier",
700
  "raw_hits": []
701
  },
702
  {
@@ -774,9 +774,9 @@
774
  "observed": "object_relevance"
775
  },
776
  {
777
- "name": "object_relevance: public_field_research_name_is_human_readable",
778
  "status": "pass",
779
- "value": "Object-Centric Interaction Recognition",
780
  "raw_hits": []
781
  },
782
  {
@@ -786,15 +786,15 @@
786
  "raw_hits": []
787
  },
788
  {
789
- "name": "object_relevance: public_field_input_short_is_human_readable",
790
  "status": "pass",
791
- "value": "non-caption multimodal features",
792
  "raw_hits": []
793
  },
794
  {
795
- "name": "object_relevance: public_field_plain_goal_is_human_readable",
796
  "status": "pass",
797
- "value": "Predict which objects matter in the current window.",
798
  "raw_hits": []
799
  },
800
  {
@@ -804,15 +804,15 @@
804
  "raw_hits": []
805
  },
806
  {
807
- "name": "object_relevance: public_field_display_name_is_human_readable",
808
  "status": "pass",
809
- "value": "Object Relevance Prediction",
810
  "raw_hits": []
811
  },
812
  {
813
- "name": "object_relevance: public_field_process_short_is_human_readable",
814
  "status": "pass",
815
- "value": "object vocabulary -> multi-hot labels -> sigmoid heads",
816
  "raw_hits": []
817
  },
818
  {
@@ -892,9 +892,9 @@
892
  "observed": "caption_grounding"
893
  },
894
  {
895
- "name": "caption_grounding: public_field_research_name_is_human_readable",
896
  "status": "pass",
897
- "value": "Language-to-Moment Grounding",
898
  "raw_hits": []
899
  },
900
  {
@@ -904,15 +904,15 @@
904
  "raw_hits": []
905
  },
906
  {
907
- "name": "caption_grounding: public_field_input_short_is_human_readable",
908
  "status": "pass",
909
- "value": "text-like query and candidate windows",
910
  "raw_hits": []
911
  },
912
  {
913
- "name": "caption_grounding: public_field_plain_goal_is_human_readable",
914
  "status": "pass",
915
- "value": "Given a text-like query from annotation, find the matching time window.",
916
  "raw_hits": []
917
  },
918
  {
@@ -922,15 +922,15 @@
922
  "raw_hits": []
923
  },
924
  {
925
- "name": "caption_grounding: public_field_display_name_is_human_readable",
926
  "status": "pass",
927
- "value": "Language Grounding",
928
  "raw_hits": []
929
  },
930
  {
931
- "name": "caption_grounding: public_field_process_short_is_human_readable",
932
  "status": "pass",
933
- "value": "query features -> candidate index -> cosine ranker",
934
  "raw_hits": []
935
  },
936
  {
@@ -1008,9 +1008,9 @@
1008
  "observed": "cross_modal_retrieval"
1009
  },
1010
  {
1011
- "name": "cross_modal_retrieval: public_field_research_name_is_human_readable",
1012
  "status": "pass",
1013
- "value": "Multimodal Representation Retrieval",
1014
  "raw_hits": []
1015
  },
1016
  {
@@ -1020,15 +1020,15 @@
1020
  "raw_hits": []
1021
  },
1022
  {
1023
- "name": "cross_modal_retrieval: public_field_input_short_is_human_readable",
1024
  "status": "pass",
1025
- "value": "motion/IMU/pose query; depth/video candidates",
1026
  "raw_hits": []
1027
  },
1028
  {
1029
- "name": "cross_modal_retrieval: public_field_plain_goal_is_human_readable",
1030
  "status": "pass",
1031
- "value": "Use one group of modalities to retrieve the matching window from another group.",
1032
  "raw_hits": []
1033
  },
1034
  {
@@ -1038,15 +1038,15 @@
1038
  "raw_hits": []
1039
  },
1040
  {
1041
- "name": "cross_modal_retrieval: public_field_display_name_is_human_readable",
1042
  "status": "pass",
1043
- "value": "Cross-Modal Retrieval",
1044
  "raw_hits": []
1045
  },
1046
  {
1047
- "name": "cross_modal_retrieval: public_field_process_short_is_human_readable",
1048
  "status": "pass",
1049
- "value": "modality split -> projection -> nearest-neighbor ranker",
1050
  "raw_hits": []
1051
  },
1052
  {
@@ -1126,9 +1126,9 @@
1126
  "observed": "modality_reconstruction"
1127
  },
1128
  {
1129
- "name": "modality_reconstruction: public_field_research_name_is_human_readable",
1130
  "status": "pass",
1131
- "value": "Modality Feature Reconstruction",
1132
  "raw_hits": []
1133
  },
1134
  {
@@ -1138,15 +1138,15 @@
1138
  "raw_hits": []
1139
  },
1140
  {
1141
- "name": "modality_reconstruction: public_field_input_short_is_human_readable",
1142
  "status": "pass",
1143
- "value": "motion, IMU, and camera/pose features",
1144
  "raw_hits": []
1145
  },
1146
  {
1147
- "name": "modality_reconstruction: public_field_plain_goal_is_human_readable",
1148
  "status": "pass",
1149
- "value": "Predict one modality feature block from other modality blocks.",
1150
  "raw_hits": []
1151
  },
1152
  {
@@ -1156,15 +1156,15 @@
1156
  "raw_hits": []
1157
  },
1158
  {
1159
- "name": "modality_reconstruction: public_field_display_name_is_human_readable",
1160
  "status": "pass",
1161
- "value": "Cross-Modal Reconstruction",
1162
  "raw_hits": []
1163
  },
1164
  {
1165
- "name": "modality_reconstruction: public_field_process_short_is_human_readable",
1166
  "status": "pass",
1167
- "value": "source-target split -> scaler -> regression head",
1168
  "raw_hits": []
1169
  },
1170
  {
@@ -1244,9 +1244,9 @@
1244
  "observed": "temporal_order"
1245
  },
1246
  {
1247
- "name": "temporal_order: public_field_research_name_is_human_readable",
1248
  "status": "pass",
1249
- "value": "Temporal Order Verification",
1250
  "raw_hits": []
1251
  },
1252
  {
@@ -1256,15 +1256,15 @@
1256
  "raw_hits": []
1257
  },
1258
  {
1259
- "name": "temporal_order: public_field_input_short_is_human_readable",
1260
  "status": "pass",
1261
- "value": "two adjacent windows plus difference vector",
1262
  "raw_hits": []
1263
  },
1264
  {
1265
- "name": "temporal_order: public_field_plain_goal_is_human_readable",
1266
  "status": "pass",
1267
- "value": "Tell whether two nearby windows are in the correct time order.",
1268
  "raw_hits": []
1269
  },
1270
  {
@@ -1274,15 +1274,15 @@
1274
  "raw_hits": []
1275
  },
1276
  {
1277
- "name": "temporal_order: public_field_display_name_is_human_readable",
1278
  "status": "pass",
1279
- "value": "Temporal Order Verification",
1280
  "raw_hits": []
1281
  },
1282
  {
1283
- "name": "temporal_order: public_field_process_short_is_human_readable",
1284
  "status": "pass",
1285
- "value": "pair builder -> feature combiner -> binary classifier",
1286
  "raw_hits": []
1287
  },
1288
  {
@@ -1360,9 +1360,9 @@
1360
  "observed": "misalignment_detection"
1361
  },
1362
  {
1363
- "name": "misalignment_detection: public_field_research_name_is_human_readable",
1364
  "status": "pass",
1365
- "value": "Cross-Modal Misalignment Detection",
1366
  "raw_hits": []
1367
  },
1368
  {
@@ -1372,15 +1372,15 @@
1372
  "raw_hits": []
1373
  },
1374
  {
1375
- "name": "misalignment_detection: public_field_input_short_is_human_readable",
1376
  "status": "pass",
1377
- "value": "motion-side and visual/depth-side feature groups",
1378
  "raw_hits": []
1379
  },
1380
  {
1381
- "name": "misalignment_detection: public_field_plain_goal_is_human_readable",
1382
  "status": "pass",
1383
- "value": "Detect when modalities that should match are shifted out of sync.",
1384
  "raw_hits": []
1385
  },
1386
  {
@@ -1390,15 +1390,15 @@
1390
  "raw_hits": []
1391
  },
1392
  {
1393
- "name": "misalignment_detection: public_field_display_name_is_human_readable",
1394
  "status": "pass",
1395
- "value": "Multimodal Synchronization Detection",
1396
  "raw_hits": []
1397
  },
1398
  {
1399
- "name": "misalignment_detection: public_field_process_short_is_human_readable",
1400
  "status": "pass",
1401
- "value": "aligned/shifted pairs -> feature combiner -> binary classifier",
1402
  "raw_hits": []
1403
  },
1404
  {
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-06T23:27:06+00:00",
4
  "summary": {
5
  "task_count": 12,
6
  "expected_task_count": 12,
 
64
  "observed": "timeline_action"
65
  },
66
  {
67
+ "name": "timeline_action: public_field_plain_goal_is_human_readable",
68
  "status": "pass",
69
+ "value": "Look at one short multimodal window and name what action is happening now.",
70
  "raw_hits": []
71
  },
72
  {
 
76
  "raw_hits": []
77
  },
78
  {
79
+ "name": "timeline_action: public_field_display_name_is_human_readable",
80
  "status": "pass",
81
+ "value": "Action Recognition",
82
  "raw_hits": []
83
  },
84
  {
85
+ "name": "timeline_action: public_field_input_short_is_human_readable",
86
  "status": "pass",
87
+ "value": "20-frame multimodal window",
88
  "raw_hits": []
89
  },
90
  {
 
94
  "raw_hits": []
95
  },
96
  {
97
+ "name": "timeline_action: public_field_process_short_is_human_readable",
98
  "status": "pass",
99
+ "value": "window features -> action label builder -> classifier",
100
  "raw_hits": []
101
  },
102
  {
103
+ "name": "timeline_action: public_field_research_name_is_human_readable",
104
  "status": "pass",
105
+ "value": "Egocentric Action Recognition",
106
  "raw_hits": []
107
  },
108
  {
 
184
  "observed": "timeline_subtask"
185
  },
186
  {
187
+ "name": "timeline_subtask: public_field_plain_goal_is_human_readable",
188
  "status": "pass",
189
+ "value": "Predict the higher-level task stage for the current window.",
190
  "raw_hits": []
191
  },
192
  {
 
196
  "raw_hits": []
197
  },
198
  {
199
+ "name": "timeline_subtask: public_field_display_name_is_human_readable",
200
  "status": "pass",
201
+ "value": "Procedure Step Recognition",
202
  "raw_hits": []
203
  },
204
  {
205
+ "name": "timeline_subtask: public_field_input_short_is_human_readable",
206
  "status": "pass",
207
+ "value": "20-frame multimodal window",
208
  "raw_hits": []
209
  },
210
  {
 
214
  "raw_hits": []
215
  },
216
  {
217
+ "name": "timeline_subtask: public_field_process_short_is_human_readable",
218
  "status": "pass",
219
+ "value": "window features -> subtask label builder -> classifier",
220
  "raw_hits": []
221
  },
222
  {
223
+ "name": "timeline_subtask: public_field_research_name_is_human_readable",
224
  "status": "pass",
225
+ "value": "Temporal Subtask Recognition",
226
  "raw_hits": []
227
  },
228
  {
 
304
  "observed": "transition_detection"
305
  },
306
  {
307
+ "name": "transition_detection: public_field_plain_goal_is_human_readable",
308
  "status": "pass",
309
+ "value": "Detect whether the current window is near a boundary between actions.",
310
  "raw_hits": []
311
  },
312
  {
 
316
  "raw_hits": []
317
  },
318
  {
319
+ "name": "transition_detection: public_field_display_name_is_human_readable",
320
  "status": "pass",
321
+ "value": "Action Boundary Detection",
322
  "raw_hits": []
323
  },
324
  {
325
+ "name": "transition_detection: public_field_input_short_is_human_readable",
326
  "status": "pass",
327
+ "value": "current window with boundary target",
328
  "raw_hits": []
329
  },
330
  {
 
334
  "raw_hits": []
335
  },
336
  {
337
+ "name": "transition_detection: public_field_process_short_is_human_readable",
338
  "status": "pass",
339
+ "value": "action changes -> boundary labels -> binary classifier",
340
  "raw_hits": []
341
  },
342
  {
343
+ "name": "transition_detection: public_field_research_name_is_human_readable",
344
  "status": "pass",
345
+ "value": "Temporal Action Segmentation",
346
  "raw_hits": []
347
  },
348
  {
 
422
  "observed": "next_action"
423
  },
424
  {
425
+ "name": "next_action: public_field_plain_goal_is_human_readable",
426
  "status": "pass",
427
+ "value": "Use the current window to guess the action that will happen shortly after it.",
428
  "raw_hits": []
429
  },
430
  {
 
434
  "raw_hits": []
435
  },
436
  {
437
+ "name": "next_action: public_field_display_name_is_human_readable",
438
  "status": "pass",
439
+ "value": "Next-Action Prediction",
440
  "raw_hits": []
441
  },
442
  {
443
+ "name": "next_action: public_field_input_short_is_human_readable",
444
  "status": "pass",
445
+ "value": "current window at time t",
446
  "raw_hits": []
447
  },
448
  {
 
452
  "raw_hits": []
453
  },
454
  {
455
+ "name": "next_action: public_field_process_short_is_human_readable",
456
  "status": "pass",
457
+ "value": "current features -> future label shift -> classifier",
458
  "raw_hits": []
459
  },
460
  {
461
+ "name": "next_action: public_field_research_name_is_human_readable",
462
  "status": "pass",
463
+ "value": "Short-Horizon Intention Prediction",
464
  "raw_hits": []
465
  },
466
  {
 
540
  "observed": "hand_trajectory_forecast"
541
  },
542
  {
543
+ "name": "hand_trajectory_forecast: public_field_plain_goal_is_human_readable",
544
  "status": "pass",
545
+ "value": "Predict where the hands will move over the next few frames.",
546
  "raw_hits": []
547
  },
548
  {
 
552
  "raw_hits": []
553
  },
554
  {
555
+ "name": "hand_trajectory_forecast: public_field_display_name_is_human_readable",
556
  "status": "pass",
557
+ "value": "Hand Trajectory Forecasting",
558
  "raw_hits": []
559
  },
560
  {
561
+ "name": "hand_trajectory_forecast: public_field_input_short_is_human_readable",
562
  "status": "pass",
563
+ "value": "current multimodal window",
564
  "raw_hits": []
565
  },
566
  {
 
570
  "raw_hits": []
571
  },
572
  {
573
+ "name": "hand_trajectory_forecast: public_field_process_short_is_human_readable",
574
  "status": "pass",
575
+ "value": "current features -> future mocap target -> regression head",
576
  "raw_hits": []
577
  },
578
  {
579
+ "name": "hand_trajectory_forecast: public_field_research_name_is_human_readable",
580
  "status": "pass",
581
+ "value": "3D Hand Motion Forecasting",
582
  "raw_hits": []
583
  },
584
  {
 
658
  "observed": "contact_prediction"
659
  },
660
  {
661
+ "name": "contact_prediction: public_field_plain_goal_is_human_readable",
662
  "status": "pass",
663
+ "value": "Predict whether the body or hand is in contact with something.",
664
  "raw_hits": []
665
  },
666
  {
 
670
  "raw_hits": []
671
  },
672
  {
673
+ "name": "contact_prediction: public_field_display_name_is_human_readable",
674
  "status": "pass",
675
+ "value": "Contact State Prediction",
676
  "raw_hits": []
677
  },
678
  {
679
+ "name": "contact_prediction: public_field_input_short_is_human_readable",
680
  "status": "pass",
681
+ "value": "non-contact, non-caption features",
682
  "raw_hits": []
683
  },
684
  {
 
688
  "raw_hits": []
689
  },
690
  {
691
+ "name": "contact_prediction: public_field_process_short_is_human_readable",
692
  "status": "pass",
693
+ "value": "feature filter -> contact target -> binary classifier",
694
  "raw_hits": []
695
  },
696
  {
697
+ "name": "contact_prediction: public_field_research_name_is_human_readable",
698
  "status": "pass",
699
+ "value": "Human-Object Contact Prediction",
700
  "raw_hits": []
701
  },
702
  {
 
774
  "observed": "object_relevance"
775
  },
776
  {
777
+ "name": "object_relevance: public_field_plain_goal_is_human_readable",
778
  "status": "pass",
779
+ "value": "Predict which objects matter in the current window.",
780
  "raw_hits": []
781
  },
782
  {
 
786
  "raw_hits": []
787
  },
788
  {
789
+ "name": "object_relevance: public_field_display_name_is_human_readable",
790
  "status": "pass",
791
+ "value": "Object Relevance Prediction",
792
  "raw_hits": []
793
  },
794
  {
795
+ "name": "object_relevance: public_field_input_short_is_human_readable",
796
  "status": "pass",
797
+ "value": "non-caption multimodal features",
798
  "raw_hits": []
799
  },
800
  {
 
804
  "raw_hits": []
805
  },
806
  {
807
+ "name": "object_relevance: public_field_process_short_is_human_readable",
808
  "status": "pass",
809
+ "value": "object vocabulary -> multi-hot labels -> sigmoid heads",
810
  "raw_hits": []
811
  },
812
  {
813
+ "name": "object_relevance: public_field_research_name_is_human_readable",
814
  "status": "pass",
815
+ "value": "Object-Centric Interaction Recognition",
816
  "raw_hits": []
817
  },
818
  {
 
892
  "observed": "caption_grounding"
893
  },
894
  {
895
+ "name": "caption_grounding: public_field_plain_goal_is_human_readable",
896
  "status": "pass",
897
+ "value": "Given a text-like query from annotation, find the matching time window.",
898
  "raw_hits": []
899
  },
900
  {
 
904
  "raw_hits": []
905
  },
906
  {
907
+ "name": "caption_grounding: public_field_display_name_is_human_readable",
908
  "status": "pass",
909
+ "value": "Language Grounding",
910
  "raw_hits": []
911
  },
912
  {
913
+ "name": "caption_grounding: public_field_input_short_is_human_readable",
914
  "status": "pass",
915
+ "value": "text-like query and candidate windows",
916
  "raw_hits": []
917
  },
918
  {
 
922
  "raw_hits": []
923
  },
924
  {
925
+ "name": "caption_grounding: public_field_process_short_is_human_readable",
926
  "status": "pass",
927
+ "value": "query features -> candidate index -> cosine ranker",
928
  "raw_hits": []
929
  },
930
  {
931
+ "name": "caption_grounding: public_field_research_name_is_human_readable",
932
  "status": "pass",
933
+ "value": "Language-to-Moment Grounding",
934
  "raw_hits": []
935
  },
936
  {
 
1008
  "observed": "cross_modal_retrieval"
1009
  },
1010
  {
1011
+ "name": "cross_modal_retrieval: public_field_plain_goal_is_human_readable",
1012
  "status": "pass",
1013
+ "value": "Use one group of modalities to retrieve the matching window from another group.",
1014
  "raw_hits": []
1015
  },
1016
  {
 
1020
  "raw_hits": []
1021
  },
1022
  {
1023
+ "name": "cross_modal_retrieval: public_field_display_name_is_human_readable",
1024
  "status": "pass",
1025
+ "value": "Cross-Modal Retrieval",
1026
  "raw_hits": []
1027
  },
1028
  {
1029
+ "name": "cross_modal_retrieval: public_field_input_short_is_human_readable",
1030
  "status": "pass",
1031
+ "value": "motion/IMU/pose query; depth/video candidates",
1032
  "raw_hits": []
1033
  },
1034
  {
 
1038
  "raw_hits": []
1039
  },
1040
  {
1041
+ "name": "cross_modal_retrieval: public_field_process_short_is_human_readable",
1042
  "status": "pass",
1043
+ "value": "modality split -> projection -> nearest-neighbor ranker",
1044
  "raw_hits": []
1045
  },
1046
  {
1047
+ "name": "cross_modal_retrieval: public_field_research_name_is_human_readable",
1048
  "status": "pass",
1049
+ "value": "Multimodal Representation Retrieval",
1050
  "raw_hits": []
1051
  },
1052
  {
 
1126
  "observed": "modality_reconstruction"
1127
  },
1128
  {
1129
+ "name": "modality_reconstruction: public_field_plain_goal_is_human_readable",
1130
  "status": "pass",
1131
+ "value": "Predict one modality feature block from other modality blocks.",
1132
  "raw_hits": []
1133
  },
1134
  {
 
1138
  "raw_hits": []
1139
  },
1140
  {
1141
+ "name": "modality_reconstruction: public_field_display_name_is_human_readable",
1142
  "status": "pass",
1143
+ "value": "Cross-Modal Reconstruction",
1144
  "raw_hits": []
1145
  },
1146
  {
1147
+ "name": "modality_reconstruction: public_field_input_short_is_human_readable",
1148
  "status": "pass",
1149
+ "value": "motion, IMU, and camera/pose features",
1150
  "raw_hits": []
1151
  },
1152
  {
 
1156
  "raw_hits": []
1157
  },
1158
  {
1159
+ "name": "modality_reconstruction: public_field_process_short_is_human_readable",
1160
  "status": "pass",
1161
+ "value": "source-target split -> scaler -> regression head",
1162
  "raw_hits": []
1163
  },
1164
  {
1165
+ "name": "modality_reconstruction: public_field_research_name_is_human_readable",
1166
  "status": "pass",
1167
+ "value": "Modality Feature Reconstruction",
1168
  "raw_hits": []
1169
  },
1170
  {
 
1244
  "observed": "temporal_order"
1245
  },
1246
  {
1247
+ "name": "temporal_order: public_field_plain_goal_is_human_readable",
1248
  "status": "pass",
1249
+ "value": "Tell whether two nearby windows are in the correct time order.",
1250
  "raw_hits": []
1251
  },
1252
  {
 
1256
  "raw_hits": []
1257
  },
1258
  {
1259
+ "name": "temporal_order: public_field_display_name_is_human_readable",
1260
  "status": "pass",
1261
+ "value": "Temporal Order Verification",
1262
  "raw_hits": []
1263
  },
1264
  {
1265
+ "name": "temporal_order: public_field_input_short_is_human_readable",
1266
  "status": "pass",
1267
+ "value": "two adjacent windows plus difference vector",
1268
  "raw_hits": []
1269
  },
1270
  {
 
1274
  "raw_hits": []
1275
  },
1276
  {
1277
+ "name": "temporal_order: public_field_process_short_is_human_readable",
1278
  "status": "pass",
1279
+ "value": "pair builder -> feature combiner -> binary classifier",
1280
  "raw_hits": []
1281
  },
1282
  {
1283
+ "name": "temporal_order: public_field_research_name_is_human_readable",
1284
  "status": "pass",
1285
+ "value": "Temporal Order Verification",
1286
  "raw_hits": []
1287
  },
1288
  {
 
1360
  "observed": "misalignment_detection"
1361
  },
1362
  {
1363
+ "name": "misalignment_detection: public_field_plain_goal_is_human_readable",
1364
  "status": "pass",
1365
+ "value": "Detect when modalities that should match are shifted out of sync.",
1366
  "raw_hits": []
1367
  },
1368
  {
 
1372
  "raw_hits": []
1373
  },
1374
  {
1375
+ "name": "misalignment_detection: public_field_display_name_is_human_readable",
1376
  "status": "pass",
1377
+ "value": "Multimodal Synchronization Detection",
1378
  "raw_hits": []
1379
  },
1380
  {
1381
+ "name": "misalignment_detection: public_field_input_short_is_human_readable",
1382
  "status": "pass",
1383
+ "value": "motion-side and visual/depth-side feature groups",
1384
  "raw_hits": []
1385
  },
1386
  {
 
1390
  "raw_hits": []
1391
  },
1392
  {
1393
+ "name": "misalignment_detection: public_field_process_short_is_human_readable",
1394
  "status": "pass",
1395
+ "value": "aligned/shifted pairs -> feature combiner -> binary classifier",
1396
  "raw_hits": []
1397
  },
1398
  {
1399
+ "name": "misalignment_detection: public_field_research_name_is_human_readable",
1400
  "status": "pass",
1401
+ "value": "Cross-Modal Misalignment Detection",
1402
  "raw_hits": []
1403
  },
1404
  {
metrics/website_integrity.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-06T17:43:55+00:00",
4
  "docs_root": "docs",
5
  "site_base": "/ropedia-xperience-10m-task-suite/",
6
  "summary": {
7
  "html_pages": 4,
8
- "local_references": 133,
9
- "external_reference_count": 107,
10
- "json_files": 34,
11
  "image_assets_referenced": 22,
12
  "failure_count": 0
13
  },
@@ -75,7 +75,7 @@
75
  "status": "pass",
76
  "reason": "The project overview should appear before the deeper progress ledger.",
77
  "overview_index": 67412,
78
- "evidence_index": 90414
79
  },
80
  {
81
  "name": "project_status_links_json",
@@ -153,8 +153,8 @@
153
  "status": "pass",
154
  "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
155
  "overview_index": 67412,
156
- "protocol_index": 87152,
157
- "evidence_index": 90414
158
  },
159
  {
160
  "name": "evaluation_protocol_links_json",
@@ -228,7 +228,7 @@
228
  {
229
  "path": "index.html",
230
  "id_count": 77,
231
- "reference_count": 110,
232
  "image_count": 24
233
  },
234
  {
@@ -252,12 +252,12 @@
252
  },
253
  {
254
  "path": "data/artifact_index.json",
255
- "bytes": 41126,
256
  "top_level_type": "dict"
257
  },
258
  {
259
  "path": "data/audio_ablation_summary.json",
260
- "bytes": 9701,
261
  "top_level_type": "dict"
262
  },
263
  {
@@ -267,7 +267,7 @@
267
  },
268
  {
269
  "path": "data/evaluation_protocol.json",
270
- "bytes": 13788,
271
  "top_level_type": "dict"
272
  },
273
  {
@@ -282,7 +282,7 @@
282
  },
283
  {
284
  "path": "data/foundation_model_plan.json",
285
- "bytes": 13112,
286
  "top_level_type": "dict"
287
  },
288
  {
@@ -292,7 +292,7 @@
292
  },
293
  {
294
  "path": "data/mirror_parity.json",
295
- "bytes": 131036,
296
  "top_level_type": "dict"
297
  },
298
  {
@@ -302,27 +302,32 @@
302
  },
303
  {
304
  "path": "data/omni_finetune_verified_result.json",
305
- "bytes": 4213,
 
 
 
 
 
306
  "top_level_type": "dict"
307
  },
308
  {
309
  "path": "data/project_brief.json",
310
- "bytes": 3752,
311
  "top_level_type": "dict"
312
  },
313
  {
314
  "path": "data/project_manifest.json",
315
- "bytes": 4927,
316
  "top_level_type": "dict"
317
  },
318
  {
319
  "path": "data/project_packet.json",
320
- "bytes": 7802,
321
  "top_level_type": "dict"
322
  },
323
  {
324
  "path": "data/project_status.json",
325
- "bytes": 12602,
326
  "top_level_type": "dict"
327
  },
328
  {
@@ -347,7 +352,7 @@
347
  },
348
  {
349
  "path": "data/reproducibility_matrix.json",
350
- "bytes": 5223,
351
  "top_level_type": "dict"
352
  },
353
  {
@@ -357,32 +362,32 @@
357
  },
358
  {
359
  "path": "data/research_directions.json",
360
- "bytes": 14414,
361
  "top_level_type": "dict"
362
  },
363
  {
364
  "path": "data/research_roadmap.json",
365
- "bytes": 10052,
366
  "top_level_type": "dict"
367
  },
368
  {
369
  "path": "data/research_roadmap_interactive.json",
370
- "bytes": 142418,
371
  "top_level_type": "dict"
372
  },
373
  {
374
  "path": "data/research_takeaways.json",
375
- "bytes": 7102,
376
  "top_level_type": "dict"
377
  },
378
  {
379
  "path": "data/scope_claims_audit.json",
380
- "bytes": 20823,
381
  "top_level_type": "dict"
382
  },
383
  {
384
  "path": "data/single_episode_explorer.json",
385
- "bytes": 4297465,
386
  "top_level_type": "dict"
387
  },
388
  {
@@ -392,7 +397,7 @@
392
  },
393
  {
394
  "path": "data/summary_metrics.json",
395
- "bytes": 26028,
396
  "top_level_type": "dict"
397
  },
398
  {
@@ -407,7 +412,7 @@
407
  },
408
  {
409
  "path": "data/website_integrity.json",
410
- "bytes": 15259,
411
  "top_level_type": "dict"
412
  },
413
  {
@@ -450,21 +455,21 @@
450
  {
451
  "path": "assets/charts/episode_task_scores.svg",
452
  "exists": true,
453
- "bytes": 5903,
454
  "format": "SVG",
455
  "has_viewbox": true
456
  },
457
  {
458
  "path": "assets/charts/episode_task_scores_minimal_vs_neural.svg",
459
  "exists": true,
460
- "bytes": 10040,
461
  "format": "SVG",
462
  "has_viewbox": true
463
  },
464
  {
465
  "path": "assets/charts/episode_task_scores_neural_mlp.svg",
466
  "exists": true,
467
- "bytes": 5917,
468
  "format": "SVG",
469
  "has_viewbox": true
470
  },
@@ -485,7 +490,7 @@
485
  {
486
  "path": "assets/charts/research_direction_coverage.svg",
487
  "exists": true,
488
- "bytes": 4968,
489
  "format": "SVG",
490
  "has_viewbox": true
491
  },
@@ -571,7 +576,7 @@
571
  {
572
  "path": "assets/task_architectures.png",
573
  "exists": true,
574
- "bytes": 761507,
575
  "width": 1800,
576
  "height": 2450,
577
  "format": "PNG"
@@ -579,7 +584,7 @@
579
  {
580
  "path": "assets/task_suite_infographic.png",
581
  "exists": true,
582
- "bytes": 2612510,
583
  "width": 1800,
584
  "height": 6600,
585
  "format": "PNG"
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-06T23:27:27+00:00",
4
  "docs_root": "docs",
5
  "site_base": "/ropedia-xperience-10m-task-suite/",
6
  "summary": {
7
  "html_pages": 4,
8
+ "local_references": 136,
9
+ "external_reference_count": 106,
10
+ "json_files": 35,
11
  "image_assets_referenced": 22,
12
  "failure_count": 0
13
  },
 
75
  "status": "pass",
76
  "reason": "The project overview should appear before the deeper progress ledger.",
77
  "overview_index": 67412,
78
+ "evidence_index": 90421
79
  },
80
  {
81
  "name": "project_status_links_json",
 
153
  "status": "pass",
154
  "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
155
  "overview_index": 67412,
156
+ "protocol_index": 87159,
157
+ "evidence_index": 90421
158
  },
159
  {
160
  "name": "evaluation_protocol_links_json",
 
228
  {
229
  "path": "index.html",
230
  "id_count": 77,
231
+ "reference_count": 113,
232
  "image_count": 24
233
  },
234
  {
 
252
  },
253
  {
254
  "path": "data/artifact_index.json",
255
+ "bytes": 60162,
256
  "top_level_type": "dict"
257
  },
258
  {
259
  "path": "data/audio_ablation_summary.json",
260
+ "bytes": 10370,
261
  "top_level_type": "dict"
262
  },
263
  {
 
267
  },
268
  {
269
  "path": "data/evaluation_protocol.json",
270
+ "bytes": 14511,
271
  "top_level_type": "dict"
272
  },
273
  {
 
282
  },
283
  {
284
  "path": "data/foundation_model_plan.json",
285
+ "bytes": 13193,
286
  "top_level_type": "dict"
287
  },
288
  {
 
292
  },
293
  {
294
  "path": "data/mirror_parity.json",
295
+ "bytes": 235815,
296
  "top_level_type": "dict"
297
  },
298
  {
 
302
  },
303
  {
304
  "path": "data/omni_finetune_verified_result.json",
305
+ "bytes": 3483,
306
+ "top_level_type": "dict"
307
+ },
308
+ {
309
+ "path": "data/omni_model_comparison.json",
310
+ "bytes": 21433,
311
  "top_level_type": "dict"
312
  },
313
  {
314
  "path": "data/project_brief.json",
315
+ "bytes": 3811,
316
  "top_level_type": "dict"
317
  },
318
  {
319
  "path": "data/project_manifest.json",
320
+ "bytes": 5193,
321
  "top_level_type": "dict"
322
  },
323
  {
324
  "path": "data/project_packet.json",
325
+ "bytes": 7943,
326
  "top_level_type": "dict"
327
  },
328
  {
329
  "path": "data/project_status.json",
330
+ "bytes": 15049,
331
  "top_level_type": "dict"
332
  },
333
  {
 
352
  },
353
  {
354
  "path": "data/reproducibility_matrix.json",
355
+ "bytes": 5280,
356
  "top_level_type": "dict"
357
  },
358
  {
 
362
  },
363
  {
364
  "path": "data/research_directions.json",
365
+ "bytes": 16694,
366
  "top_level_type": "dict"
367
  },
368
  {
369
  "path": "data/research_roadmap.json",
370
+ "bytes": 10133,
371
  "top_level_type": "dict"
372
  },
373
  {
374
  "path": "data/research_roadmap_interactive.json",
375
+ "bytes": 143560,
376
  "top_level_type": "dict"
377
  },
378
  {
379
  "path": "data/research_takeaways.json",
380
+ "bytes": 7139,
381
  "top_level_type": "dict"
382
  },
383
  {
384
  "path": "data/scope_claims_audit.json",
385
+ "bytes": 21234,
386
  "top_level_type": "dict"
387
  },
388
  {
389
  "path": "data/single_episode_explorer.json",
390
+ "bytes": 4305527,
391
  "top_level_type": "dict"
392
  },
393
  {
 
397
  },
398
  {
399
  "path": "data/summary_metrics.json",
400
+ "bytes": 27490,
401
  "top_level_type": "dict"
402
  },
403
  {
 
412
  },
413
  {
414
  "path": "data/website_integrity.json",
415
+ "bytes": 15777,
416
  "top_level_type": "dict"
417
  },
418
  {
 
455
  {
456
  "path": "assets/charts/episode_task_scores.svg",
457
  "exists": true,
458
+ "bytes": 5983,
459
  "format": "SVG",
460
  "has_viewbox": true
461
  },
462
  {
463
  "path": "assets/charts/episode_task_scores_minimal_vs_neural.svg",
464
  "exists": true,
465
+ "bytes": 10200,
466
  "format": "SVG",
467
  "has_viewbox": true
468
  },
469
  {
470
  "path": "assets/charts/episode_task_scores_neural_mlp.svg",
471
  "exists": true,
472
+ "bytes": 5997,
473
  "format": "SVG",
474
  "has_viewbox": true
475
  },
 
490
  {
491
  "path": "assets/charts/research_direction_coverage.svg",
492
  "exists": true,
493
+ "bytes": 5078,
494
  "format": "SVG",
495
  "has_viewbox": true
496
  },
 
576
  {
577
  "path": "assets/task_architectures.png",
578
  "exists": true,
579
+ "bytes": 774391,
580
  "width": 1800,
581
  "height": 2450,
582
  "format": "PNG"
 
584
  {
585
  "path": "assets/task_suite_infographic.png",
586
  "exists": true,
587
+ "bytes": 1588641,
588
  "width": 1800,
589
  "height": 6600,
590
  "format": "PNG"