cy0307 commited on
Commit
d8a3faa
·
verified ·
1 Parent(s): 8a19bcd

Add files using upload-large-folder tool

Browse files
Files changed (21) hide show
  1. data/artifact_index.json +22 -22
  2. data/episode128_task_model_radar.json +77 -77
  3. data/mirror_parity.json +762 -190
  4. data/public_surface_qa.json +7 -7
  5. data/quality_gates.json +1 -1
  6. data/source_alignment_audit.json +1 -1
  7. data/task_method_20_gap_audit.json +12 -72
  8. data/task_method_20_result_matrix.json +45 -45
  9. data/task_surface_integrity.json +1 -1
  10. data/unified_task_model_radar.json +90 -90
  11. data/website_integrity.json +15 -15
  12. results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/RUN_REPORT.md +11 -0
  13. results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json +33 -0
  14. results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json +157 -0
  15. results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json +30 -0
  16. results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/server_info.json +30 -0
  17. results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json +33 -0
  18. results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard0.progress.jsonl +0 -0
  19. results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard1.progress.jsonl +0 -0
  20. results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard2.progress.jsonl +0 -0
  21. results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard3.progress.jsonl +0 -0
data/artifact_index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
- "generated_at_utc": "2026-06-20T14:03:31+00:00",
4
  "status": "pass",
5
  "artifact_count": 214,
6
  "missing": [],
@@ -610,7 +610,7 @@
610
  "shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
611
  "exists": true,
612
  "bytes": 4432,
613
- "sha256": "03e110008eda658d725dc690ababf5e07dab8bab7ec4a6155d237ab9f9f95ecc"
614
  },
615
  {
616
  "id": "source_alignment_validator",
@@ -730,8 +730,8 @@
730
  "surface": "website_hf",
731
  "shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3/Cosmos overlay mappings, branch-card caveats, and explicit scoreless status records.",
732
  "exists": true,
733
- "bytes": 228639,
734
- "sha256": "5e163cc77679766fd86fdc40b324e61417f5a355b447922c2fce1d17ba52730e"
735
  },
736
  {
737
  "id": "single_episode_task_model_radar_json",
@@ -742,7 +742,7 @@
742
  "shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
743
  "exists": true,
744
  "bytes": 51064,
745
- "sha256": "5176552d9b735d36a1033277ad64b68038cc27605565ec9d714397d2a6de7623"
746
  },
747
  {
748
  "id": "episode128_task_model_radar_json",
@@ -752,8 +752,8 @@
752
  "surface": "website_hf",
753
  "shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines and verified Qwen3/Cosmos branches, preserving explicit scoreless cells.",
754
  "exists": true,
755
- "bytes": 184785,
756
- "sha256": "5d6119e3233992b9d4b4d9d9dc9a42024b419bb95de78028f9831e87fac6e88e"
757
  },
758
  {
759
  "id": "task_method_20_result_matrix_json",
@@ -763,8 +763,8 @@
763
  "surface": "website_hf",
764
  "shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and scoreless cells carry unsupported/not-evaluated reasons.",
765
  "exists": true,
766
- "bytes": 128510,
767
- "sha256": "13722fe288669b640230ac894909d247f348cf0b3477f6f223943b87c728af1a"
768
  },
769
  {
770
  "id": "task_method_20_result_matrix",
@@ -774,8 +774,8 @@
774
  "surface": "repo_hf",
775
  "shows": "Reader-facing table that separates 20 records per method from numeric scored axes, documented raw128 proxy scores, unsupported metadata targets, and model targets not evaluated in verified packages.",
776
  "exists": true,
777
- "bytes": 3771,
778
- "sha256": "a17499bf2e6f81dbb5682abd42ae2f66ff2ae41d28ea8f4a437eb65350956fc6"
779
  },
780
  {
781
  "id": "task_method_20_gap_audit_json",
@@ -786,7 +786,7 @@
786
  "shows": "Machine-readable 180-record gap ledger with numeric scores, scoreless cells, explicit status reasons, and next evidence needed before new scores can be published.",
787
  "exists": true,
788
  "bytes": 20037,
789
- "sha256": "1e96af5557f4617e160233eb7d2180644f29fa9a8c67afed710ac0425fc71ddd"
790
  },
791
  {
792
  "id": "task_method_20_gap_audit",
@@ -797,7 +797,7 @@
797
  "shows": "Reader-facing ledger that lists every scoreless method-task cell and the concrete target or model-output evidence required before it can become numeric.",
798
  "exists": true,
799
  "bytes": 6398,
800
- "sha256": "5d63a8ad52dcdd3624c9c73fd292121cf9a14644ad369784894c3fd1fcaa83a1"
801
  },
802
  {
803
  "id": "unified_task_model_radar_chart",
@@ -807,8 +807,8 @@
807
  "surface": "website_hf",
808
  "shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3/Cosmos task-aligned model overlays.",
809
  "exists": true,
810
- "bytes": 56167,
811
- "sha256": "1953a2c56b8c5a11a7def73c2899a49dc6c20d0cef3ae63c53fb10bfec7b7264"
812
  },
813
  {
814
  "id": "single_episode_task_model_radar_chart",
@@ -829,8 +829,8 @@
829
  "surface": "website_hf",
830
  "shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons and metadata/Qwen/Cosmos as task-aligned overlays.",
831
  "exists": true,
832
- "bytes": 50154,
833
- "sha256": "be065f88f043241ce9d006bdc4a4e90b3b44ce5e04a8370ce65ce9b8f66fa5bc"
834
  },
835
  {
836
  "id": "unified_task_model_radar_builder",
@@ -1116,7 +1116,7 @@
1116
  "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
1117
  "exists": true,
1118
  "bytes": 8100,
1119
- "sha256": "ee5c7a273f492e6fc2fe2ab92f7eae6a1a8bedf963b5faecdd362ac5495864cf"
1120
  },
1121
  {
1122
  "id": "public_surface_qa",
@@ -1241,7 +1241,7 @@
1241
  "volatile": true,
1242
  "shows": "Records the last live GitHub/HF URL verification after upload.",
1243
  "exists": true,
1244
- "bytes": 172703,
1245
  "hash_policy": "existence_and_size_only"
1246
  },
1247
  {
@@ -1297,7 +1297,7 @@
1297
  "volatile": true,
1298
  "shows": "Confirms public bundles exclude raw data, caches, heavy archives, and credential text.",
1299
  "exists": true,
1300
- "bytes": 9758,
1301
  "hash_policy": "existence_and_size_only"
1302
  },
1303
  {
@@ -1321,7 +1321,7 @@
1321
  "volatile": true,
1322
  "shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
1323
  "exists": true,
1324
- "bytes": 1194785,
1325
  "hash_policy": "existence_and_size_only"
1326
  },
1327
  {
@@ -1333,7 +1333,7 @@
1333
  "volatile": true,
1334
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
1335
  "exists": true,
1336
- "bytes": 20022,
1337
  "hash_policy": "existence_and_size_only"
1338
  },
1339
  {
 
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
+ "generated_at_utc": "2026-06-20T15:20:34+00:00",
4
  "status": "pass",
5
  "artifact_count": 214,
6
  "missing": [],
 
610
  "shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
611
  "exists": true,
612
  "bytes": 4432,
613
+ "sha256": "98bffea722a8d50936e214538b37766d3952ca362854757bf31121031fdecbc8"
614
  },
615
  {
616
  "id": "source_alignment_validator",
 
730
  "surface": "website_hf",
731
  "shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3/Cosmos overlay mappings, branch-card caveats, and explicit scoreless status records.",
732
  "exists": true,
733
+ "bytes": 228423,
734
+ "sha256": "a84269541f448eb6b9d2ad29199dc2cab23c55ad4eacb1f0b4fbd2948e1b75c4"
735
  },
736
  {
737
  "id": "single_episode_task_model_radar_json",
 
742
  "shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
743
  "exists": true,
744
  "bytes": 51064,
745
+ "sha256": "dad53d1c12eb8947b064372e5ccd7c416cefab6e57175cd990e60af9e56ec958"
746
  },
747
  {
748
  "id": "episode128_task_model_radar_json",
 
752
  "surface": "website_hf",
753
  "shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines and verified Qwen3/Cosmos branches, preserving explicit scoreless cells.",
754
  "exists": true,
755
+ "bytes": 184569,
756
+ "sha256": "a07df912c0909bd6067eb81a0f54856c1ed6da5ca5feee54f6b32d3296b3724e"
757
  },
758
  {
759
  "id": "task_method_20_result_matrix_json",
 
763
  "surface": "website_hf",
764
  "shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and scoreless cells carry unsupported/not-evaluated reasons.",
765
  "exists": true,
766
+ "bytes": 128400,
767
+ "sha256": "afee9a09eeb7406a8f2ef4eb4205697fb9e05e5bad132686f21d28f93ac204a2"
768
  },
769
  {
770
  "id": "task_method_20_result_matrix",
 
774
  "surface": "repo_hf",
775
  "shows": "Reader-facing table that separates 20 records per method from numeric scored axes, documented raw128 proxy scores, unsupported metadata targets, and model targets not evaluated in verified packages.",
776
  "exists": true,
777
+ "bytes": 3739,
778
+ "sha256": "325df952cc73a1a530781c3f55cf87105a178ac3cb3553515c817d00916deb27"
779
  },
780
  {
781
  "id": "task_method_20_gap_audit_json",
 
786
  "shows": "Machine-readable 180-record gap ledger with numeric scores, scoreless cells, explicit status reasons, and next evidence needed before new scores can be published.",
787
  "exists": true,
788
  "bytes": 20037,
789
+ "sha256": "20752c326190b793e2e89415a01c29c140a0ab1bff45825cdfbcca81fe8fcdec"
790
  },
791
  {
792
  "id": "task_method_20_gap_audit",
 
797
  "shows": "Reader-facing ledger that lists every scoreless method-task cell and the concrete target or model-output evidence required before it can become numeric.",
798
  "exists": true,
799
  "bytes": 6398,
800
+ "sha256": "ab8ccca20391194c3211ec3013fb0ec7f88bba1d6b94aadd46a5de3f0f5f3324"
801
  },
802
  {
803
  "id": "unified_task_model_radar_chart",
 
807
  "surface": "website_hf",
808
  "shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3/Cosmos task-aligned model overlays.",
809
  "exists": true,
810
+ "bytes": 56612,
811
+ "sha256": "03a3eb9369cda807197220eaa22282dec49b6119fa24c211a67c520daf63a611"
812
  },
813
  {
814
  "id": "single_episode_task_model_radar_chart",
 
829
  "surface": "website_hf",
830
  "shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons and metadata/Qwen/Cosmos as task-aligned overlays.",
831
  "exists": true,
832
+ "bytes": 50599,
833
+ "sha256": "ec9cbf2a69d5b5cd466500dfbeaf996fa666af94a47a597abcc2465bbfad0bad"
834
  },
835
  {
836
  "id": "unified_task_model_radar_builder",
 
1116
  "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
1117
  "exists": true,
1118
  "bytes": 8100,
1119
+ "sha256": "54867eb8abcebb9948f3a6f2f5f3c3a995373b856f00d1af23212a7c39633bc4"
1120
  },
1121
  {
1122
  "id": "public_surface_qa",
 
1241
  "volatile": true,
1242
  "shows": "Records the last live GitHub/HF URL verification after upload.",
1243
  "exists": true,
1244
+ "bytes": 181991,
1245
  "hash_policy": "existence_and_size_only"
1246
  },
1247
  {
 
1297
  "volatile": true,
1298
  "shows": "Confirms public bundles exclude raw data, caches, heavy archives, and credential text.",
1299
  "exists": true,
1300
+ "bytes": 10277,
1301
  "hash_policy": "existence_and_size_only"
1302
  },
1303
  {
 
1321
  "volatile": true,
1322
  "shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
1323
  "exists": true,
1324
+ "bytes": 1231414,
1325
  "hash_policy": "existence_and_size_only"
1326
  },
1327
  {
 
1333
  "volatile": true,
1334
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
1335
  "exists": true,
1336
+ "bytes": 20023,
1337
  "hash_policy": "existence_and_size_only"
1338
  },
1339
  {
data/episode128_task_model_radar.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "title": "128-Episode 20-Task Radar",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-20T13:58:04+00:00",
5
  "description": "Selected 128-episode metadata/raw baselines plus verified Qwen3/Cosmos branches. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
6
  "task_count": 20,
7
  "method_count": 7,
8
  "method_task_record_count": 140,
9
- "scored_method_task_count": 124,
10
  "normalization_policy": {
11
  "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
12
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
@@ -149,17 +149,17 @@
149
  "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 5/8/9/10/11/12/13/14/16/17/18/19/20 probes where public metrics exist.",
150
  "plotted_as": "colored point overlay",
151
  "result_record_count": 20,
152
- "scored_task_count": 15,
153
- "covered_task_count": 15,
154
  "proxy_scored_task_count": 0,
155
- "scoreless_task_count": 5,
156
  "unsupported_task_count": 0,
157
- "not_evaluated_task_count": 5,
158
  "status_counts": {
159
- "not_evaluated_in_verified_package": 5,
160
- "scored": 15
161
  },
162
- "coverage_fraction": 0.75,
163
  "result_record_fraction": 1.0
164
  },
165
  {
@@ -1166,15 +1166,15 @@
1166
  "status_label": "scored"
1167
  },
1168
  "cosmos3_super_reasoner": {
1169
- "raw": null,
1170
- "metric_key": "f1",
1171
- "source": null,
1172
  "scope": "multi_episode_128_partial_model_overlay",
1173
- "status": "not_evaluated_in_verified_package",
1174
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
1175
- "normalized_score": null,
1176
- "raw_text": "n/a",
1177
- "status_label": "not evaluated"
1178
  },
1179
  "cosmos3_nano_future_window": {
1180
  "raw": null,
@@ -1257,15 +1257,15 @@
1257
  "status_label": "scored"
1258
  },
1259
  "cosmos3_super_reasoner": {
1260
- "raw": null,
1261
- "metric_key": "f1",
1262
- "source": null,
1263
  "scope": "multi_episode_128_partial_model_overlay",
1264
- "status": "not_evaluated_in_verified_package",
1265
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
1266
- "normalized_score": null,
1267
- "raw_text": "n/a",
1268
- "status_label": "not evaluated"
1269
  },
1270
  "cosmos3_nano_future_window": {
1271
  "raw": null,
@@ -1439,15 +1439,15 @@
1439
  "status_label": "scored"
1440
  },
1441
  "cosmos3_super_reasoner": {
1442
- "raw": null,
1443
- "metric_key": "macro_f1",
1444
- "source": null,
1445
  "scope": "multi_episode_128_partial_model_overlay",
1446
- "status": "not_evaluated_in_verified_package",
1447
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
1448
- "normalized_score": null,
1449
- "raw_text": "n/a",
1450
- "status_label": "not evaluated"
1451
  },
1452
  "cosmos3_nano_future_window": {
1453
  "raw": 0.006614876224708678,
@@ -1712,15 +1712,15 @@
1712
  "status_label": "scored"
1713
  },
1714
  "cosmos3_super_reasoner": {
1715
- "raw": null,
1716
- "metric_key": "micro_f1",
1717
- "source": null,
1718
  "scope": "multi_episode_128_partial_model_overlay",
1719
- "status": "not_evaluated_in_verified_package",
1720
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
1721
- "normalized_score": null,
1722
- "raw_text": "n/a",
1723
- "status_label": "not evaluated"
1724
  },
1725
  "cosmos3_nano_future_window": {
1726
  "raw": 0.01781970649895178,
@@ -3366,17 +3366,17 @@
3366
  "task_label": "Temporal Order Verification",
3367
  "series_id": "cosmos3_super_reasoner",
3368
  "method": "Cosmos3-Super Reasoner",
3369
- "status": "not_evaluated_in_verified_package",
3370
- "status_label": "not evaluated",
3371
- "scored": false,
3372
  "proxy_scored": false,
3373
- "raw": null,
3374
- "raw_text": "n/a",
3375
- "normalized_score": null,
3376
- "metric_key": "f1",
3377
- "source": null,
3378
  "scope": "multi_episode_128_partial_model_overlay",
3379
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
3380
  },
3381
  {
3382
  "task_number": 11,
@@ -3492,17 +3492,17 @@
3492
  "task_label": "Multimodal Synchronization Detection",
3493
  "series_id": "cosmos3_super_reasoner",
3494
  "method": "Cosmos3-Super Reasoner",
3495
- "status": "not_evaluated_in_verified_package",
3496
- "status_label": "not evaluated",
3497
- "scored": false,
3498
  "proxy_scored": false,
3499
- "raw": null,
3500
- "raw_text": "n/a",
3501
- "normalized_score": null,
3502
- "metric_key": "f1",
3503
- "source": null,
3504
  "scope": "multi_episode_128_partial_model_overlay",
3505
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
3506
  },
3507
  {
3508
  "task_number": 12,
@@ -3744,17 +3744,17 @@
3744
  "task_label": "Long-Horizon Next-Subtask Forecasting",
3745
  "series_id": "cosmos3_super_reasoner",
3746
  "method": "Cosmos3-Super Reasoner",
3747
- "status": "not_evaluated_in_verified_package",
3748
- "status_label": "not evaluated",
3749
- "scored": false,
3750
  "proxy_scored": false,
3751
- "raw": null,
3752
- "raw_text": "n/a",
3753
- "normalized_score": null,
3754
- "metric_key": "macro_f1",
3755
- "source": null,
3756
  "scope": "multi_episode_128_partial_model_overlay",
3757
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
3758
  },
3759
  {
3760
  "task_number": 14,
@@ -4122,17 +4122,17 @@
4122
  "task_label": "Future Object-Set Forecasting",
4123
  "series_id": "cosmos3_super_reasoner",
4124
  "method": "Cosmos3-Super Reasoner",
4125
- "status": "not_evaluated_in_verified_package",
4126
- "status_label": "not evaluated",
4127
- "scored": false,
4128
  "proxy_scored": false,
4129
- "raw": null,
4130
- "raw_text": "n/a",
4131
- "normalized_score": null,
4132
- "metric_key": "micro_f1",
4133
- "source": null,
4134
  "scope": "multi_episode_128_partial_model_overlay",
4135
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
4136
  },
4137
  {
4138
  "task_number": 17,
 
1
  {
2
  "title": "128-Episode 20-Task Radar",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-20T15:20:32+00:00",
5
  "description": "Selected 128-episode metadata/raw baselines plus verified Qwen3/Cosmos branches. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
6
  "task_count": 20,
7
  "method_count": 7,
8
  "method_task_record_count": 140,
9
+ "scored_method_task_count": 128,
10
  "normalization_policy": {
11
  "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
12
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
 
149
  "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 5/8/9/10/11/12/13/14/16/17/18/19/20 probes where public metrics exist.",
150
  "plotted_as": "colored point overlay",
151
  "result_record_count": 20,
152
+ "scored_task_count": 19,
153
+ "covered_task_count": 19,
154
  "proxy_scored_task_count": 0,
155
+ "scoreless_task_count": 1,
156
  "unsupported_task_count": 0,
157
+ "not_evaluated_task_count": 1,
158
  "status_counts": {
159
+ "not_evaluated_in_verified_package": 1,
160
+ "scored": 19
161
  },
162
+ "coverage_fraction": 0.95,
163
  "result_record_fraction": 1.0
164
  },
165
  {
 
1166
  "status_label": "scored"
1167
  },
1168
  "cosmos3_super_reasoner": {
1169
+ "raw": 0.6286317274823326,
1170
+ "metric_key": "temporal_order_f1",
1171
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
1172
  "scope": "multi_episode_128_partial_model_overlay",
1173
+ "status": "scored",
1174
+ "reason": null,
1175
+ "normalized_score": 0.6286317274823326,
1176
+ "raw_text": "0.6286",
1177
+ "status_label": "scored"
1178
  },
1179
  "cosmos3_nano_future_window": {
1180
  "raw": null,
 
1257
  "status_label": "scored"
1258
  },
1259
  "cosmos3_super_reasoner": {
1260
+ "raw": 0.37271645981034185,
1261
+ "metric_key": "misalignment_detection_f1",
1262
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
1263
  "scope": "multi_episode_128_partial_model_overlay",
1264
+ "status": "scored",
1265
+ "reason": null,
1266
+ "normalized_score": 0.37271645981034185,
1267
+ "raw_text": "0.3727",
1268
+ "status_label": "scored"
1269
  },
1270
  "cosmos3_nano_future_window": {
1271
  "raw": null,
 
1439
  "status_label": "scored"
1440
  },
1441
  "cosmos3_super_reasoner": {
1442
+ "raw": 0.0,
1443
+ "metric_key": "next_subtask_forecast_macro_f1",
1444
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
1445
  "scope": "multi_episode_128_partial_model_overlay",
1446
+ "status": "scored",
1447
+ "reason": null,
1448
+ "normalized_score": 0.0,
1449
+ "raw_text": "0.0000",
1450
+ "status_label": "scored"
1451
  },
1452
  "cosmos3_nano_future_window": {
1453
  "raw": 0.006614876224708678,
 
1712
  "status_label": "scored"
1713
  },
1714
  "cosmos3_super_reasoner": {
1715
+ "raw": 0.0009279881217520415,
1716
+ "metric_key": "object_set_forecast_micro_f1",
1717
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
1718
  "scope": "multi_episode_128_partial_model_overlay",
1719
+ "status": "scored",
1720
+ "reason": null,
1721
+ "normalized_score": 0.0009279881217520415,
1722
+ "raw_text": "0.0009",
1723
+ "status_label": "scored"
1724
  },
1725
  "cosmos3_nano_future_window": {
1726
  "raw": 0.01781970649895178,
 
3366
  "task_label": "Temporal Order Verification",
3367
  "series_id": "cosmos3_super_reasoner",
3368
  "method": "Cosmos3-Super Reasoner",
3369
+ "status": "scored",
3370
+ "status_label": "scored",
3371
+ "scored": true,
3372
  "proxy_scored": false,
3373
+ "raw": 0.6286317274823326,
3374
+ "raw_text": "0.6286",
3375
+ "normalized_score": 0.6286317274823326,
3376
+ "metric_key": "temporal_order_f1",
3377
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
3378
  "scope": "multi_episode_128_partial_model_overlay",
3379
+ "reason": null
3380
  },
3381
  {
3382
  "task_number": 11,
 
3492
  "task_label": "Multimodal Synchronization Detection",
3493
  "series_id": "cosmos3_super_reasoner",
3494
  "method": "Cosmos3-Super Reasoner",
3495
+ "status": "scored",
3496
+ "status_label": "scored",
3497
+ "scored": true,
3498
  "proxy_scored": false,
3499
+ "raw": 0.37271645981034185,
3500
+ "raw_text": "0.3727",
3501
+ "normalized_score": 0.37271645981034185,
3502
+ "metric_key": "misalignment_detection_f1",
3503
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
3504
  "scope": "multi_episode_128_partial_model_overlay",
3505
+ "reason": null
3506
  },
3507
  {
3508
  "task_number": 12,
 
3744
  "task_label": "Long-Horizon Next-Subtask Forecasting",
3745
  "series_id": "cosmos3_super_reasoner",
3746
  "method": "Cosmos3-Super Reasoner",
3747
+ "status": "scored",
3748
+ "status_label": "scored",
3749
+ "scored": true,
3750
  "proxy_scored": false,
3751
+ "raw": 0.0,
3752
+ "raw_text": "0.0000",
3753
+ "normalized_score": 0.0,
3754
+ "metric_key": "next_subtask_forecast_macro_f1",
3755
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
3756
  "scope": "multi_episode_128_partial_model_overlay",
3757
+ "reason": null
3758
  },
3759
  {
3760
  "task_number": 14,
 
4122
  "task_label": "Future Object-Set Forecasting",
4123
  "series_id": "cosmos3_super_reasoner",
4124
  "method": "Cosmos3-Super Reasoner",
4125
+ "status": "scored",
4126
+ "status_label": "scored",
4127
+ "scored": true,
4128
  "proxy_scored": false,
4129
+ "raw": 0.0009279881217520415,
4130
+ "raw_text": "0.0009",
4131
+ "normalized_score": 0.0009279881217520415,
4132
+ "metric_key": "object_set_forecast_micro_f1",
4133
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
4134
  "scope": "multi_episode_128_partial_model_overlay",
4135
+ "reason": null
4136
  },
4137
  {
4138
  "task_number": 17,
data/mirror_parity.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-20T14:52:37+00:00",
4
  "hf_root": "hf_publish",
5
  "summary": {
6
- "group_count": 1088,
7
  "failure_count": 0,
8
  "failures_by_surface": {}
9
  },
@@ -138,45 +138,45 @@
138
  "local": {
139
  "path": "repo:docs/data/artifact_index.json",
140
  "exists": true,
141
- "bytes": 116644,
142
- "sha256": "b5dc98a747e85ce2b82fb81f72d5ae53d71817b365c26d2cc08a2bbcdc6a098e"
143
  },
144
  "mirrors": {
145
  "hf_space": {
146
  "path": "hf_space:data/artifact_index.json",
147
  "exists": true,
148
- "bytes": 116644,
149
- "sha256": "b5dc98a747e85ce2b82fb81f72d5ae53d71817b365c26d2cc08a2bbcdc6a098e"
150
  },
151
  "hf_artifacts_data": {
152
  "path": "hf_artifacts:data/artifact_index.json",
153
  "exists": true,
154
- "bytes": 116644,
155
- "sha256": "b5dc98a747e85ce2b82fb81f72d5ae53d71817b365c26d2cc08a2bbcdc6a098e"
156
  },
157
  "hf_artifacts": {
158
  "path": "hf_artifacts:docs/data/artifact_index.json",
159
  "exists": true,
160
- "bytes": 116644,
161
- "sha256": "b5dc98a747e85ce2b82fb81f72d5ae53d71817b365c26d2cc08a2bbcdc6a098e"
162
  },
163
  "hf_model_data": {
164
  "path": "hf_model:data/artifact_index.json",
165
  "exists": true,
166
- "bytes": 116644,
167
- "sha256": "b5dc98a747e85ce2b82fb81f72d5ae53d71817b365c26d2cc08a2bbcdc6a098e"
168
  },
169
  "hf_model_docs_data": {
170
  "path": "hf_model:docs/data/artifact_index.json",
171
  "exists": true,
172
- "bytes": 116644,
173
- "sha256": "b5dc98a747e85ce2b82fb81f72d5ae53d71817b365c26d2cc08a2bbcdc6a098e"
174
  },
175
  "hf_model": {
176
  "path": "hf_model:metrics/artifact_index.json",
177
  "exists": true,
178
- "bytes": 116644,
179
- "sha256": "b5dc98a747e85ce2b82fb81f72d5ae53d71817b365c26d2cc08a2bbcdc6a098e"
180
  }
181
  },
182
  "failures": []
@@ -432,45 +432,45 @@
432
  "local": {
433
  "path": "repo:docs/data/live_publication_status.json",
434
  "exists": true,
435
- "bytes": 181998,
436
- "sha256": "bc2156e09848b718fbfc6a940f9bcab88fc9b6e9d1887504518acf16d367db3d"
437
  },
438
  "mirrors": {
439
  "hf_space": {
440
  "path": "hf_space:data/live_publication_status.json",
441
  "exists": true,
442
- "bytes": 181998,
443
- "sha256": "bc2156e09848b718fbfc6a940f9bcab88fc9b6e9d1887504518acf16d367db3d"
444
  },
445
  "hf_artifacts_data": {
446
  "path": "hf_artifacts:data/live_publication_status.json",
447
  "exists": true,
448
- "bytes": 181998,
449
- "sha256": "bc2156e09848b718fbfc6a940f9bcab88fc9b6e9d1887504518acf16d367db3d"
450
  },
451
  "hf_artifacts": {
452
  "path": "hf_artifacts:docs/data/live_publication_status.json",
453
  "exists": true,
454
- "bytes": 181998,
455
- "sha256": "bc2156e09848b718fbfc6a940f9bcab88fc9b6e9d1887504518acf16d367db3d"
456
  },
457
  "hf_model_data": {
458
  "path": "hf_model:data/live_publication_status.json",
459
  "exists": true,
460
- "bytes": 181998,
461
- "sha256": "bc2156e09848b718fbfc6a940f9bcab88fc9b6e9d1887504518acf16d367db3d"
462
  },
463
  "hf_model_docs_data": {
464
  "path": "hf_model:docs/data/live_publication_status.json",
465
  "exists": true,
466
- "bytes": 181998,
467
- "sha256": "bc2156e09848b718fbfc6a940f9bcab88fc9b6e9d1887504518acf16d367db3d"
468
  },
469
  "hf_model": {
470
  "path": "hf_model:metrics/live_publication_status.json",
471
  "exists": true,
472
- "bytes": 181998,
473
- "sha256": "bc2156e09848b718fbfc6a940f9bcab88fc9b6e9d1887504518acf16d367db3d"
474
  }
475
  },
476
  "failures": []
@@ -923,44 +923,44 @@
923
  "path": "repo:docs/data/publication_audit.json",
924
  "exists": true,
925
  "bytes": 10277,
926
- "sha256": "f1f9b13d031dfb095c257e0457ef0871c09057baefd080c5843569111ebf50b3"
927
  },
928
  "mirrors": {
929
  "hf_space": {
930
  "path": "hf_space:data/publication_audit.json",
931
  "exists": true,
932
  "bytes": 10277,
933
- "sha256": "f1f9b13d031dfb095c257e0457ef0871c09057baefd080c5843569111ebf50b3"
934
  },
935
  "hf_artifacts_data": {
936
  "path": "hf_artifacts:data/publication_audit.json",
937
  "exists": true,
938
  "bytes": 10277,
939
- "sha256": "f1f9b13d031dfb095c257e0457ef0871c09057baefd080c5843569111ebf50b3"
940
  },
941
  "hf_artifacts": {
942
  "path": "hf_artifacts:docs/data/publication_audit.json",
943
  "exists": true,
944
  "bytes": 10277,
945
- "sha256": "f1f9b13d031dfb095c257e0457ef0871c09057baefd080c5843569111ebf50b3"
946
  },
947
  "hf_model_data": {
948
  "path": "hf_model:data/publication_audit.json",
949
  "exists": true,
950
  "bytes": 10277,
951
- "sha256": "f1f9b13d031dfb095c257e0457ef0871c09057baefd080c5843569111ebf50b3"
952
  },
953
  "hf_model_docs_data": {
954
  "path": "hf_model:docs/data/publication_audit.json",
955
  "exists": true,
956
  "bytes": 10277,
957
- "sha256": "f1f9b13d031dfb095c257e0457ef0871c09057baefd080c5843569111ebf50b3"
958
  },
959
  "hf_model": {
960
  "path": "hf_model:metrics/publication_audit.json",
961
  "exists": true,
962
  "bytes": 10277,
963
- "sha256": "f1f9b13d031dfb095c257e0457ef0871c09057baefd080c5843569111ebf50b3"
964
  }
965
  },
966
  "failures": []
@@ -972,44 +972,44 @@
972
  "path": "repo:docs/data/public_surface_qa.json",
973
  "exists": true,
974
  "bytes": 7126,
975
- "sha256": "ee3b7875c232434650beef885c3c5537b2657039b71004d555e87fd8bff166df"
976
  },
977
  "mirrors": {
978
  "hf_space": {
979
  "path": "hf_space:data/public_surface_qa.json",
980
  "exists": true,
981
  "bytes": 7126,
982
- "sha256": "ee3b7875c232434650beef885c3c5537b2657039b71004d555e87fd8bff166df"
983
  },
984
  "hf_artifacts_data": {
985
  "path": "hf_artifacts:data/public_surface_qa.json",
986
  "exists": true,
987
  "bytes": 7126,
988
- "sha256": "ee3b7875c232434650beef885c3c5537b2657039b71004d555e87fd8bff166df"
989
  },
990
  "hf_artifacts": {
991
  "path": "hf_artifacts:docs/data/public_surface_qa.json",
992
  "exists": true,
993
  "bytes": 7126,
994
- "sha256": "ee3b7875c232434650beef885c3c5537b2657039b71004d555e87fd8bff166df"
995
  },
996
  "hf_model_data": {
997
  "path": "hf_model:data/public_surface_qa.json",
998
  "exists": true,
999
  "bytes": 7126,
1000
- "sha256": "ee3b7875c232434650beef885c3c5537b2657039b71004d555e87fd8bff166df"
1001
  },
1002
  "hf_model_docs_data": {
1003
  "path": "hf_model:docs/data/public_surface_qa.json",
1004
  "exists": true,
1005
  "bytes": 7126,
1006
- "sha256": "ee3b7875c232434650beef885c3c5537b2657039b71004d555e87fd8bff166df"
1007
  },
1008
  "hf_model": {
1009
  "path": "hf_model:metrics/public_surface_qa.json",
1010
  "exists": true,
1011
  "bytes": 7126,
1012
- "sha256": "ee3b7875c232434650beef885c3c5537b2657039b71004d555e87fd8bff166df"
1013
  }
1014
  },
1015
  "failures": []
@@ -1119,44 +1119,44 @@
1119
  "path": "repo:docs/data/quality_gates.json",
1120
  "exists": true,
1121
  "bytes": 8100,
1122
- "sha256": "ee5c7a273f492e6fc2fe2ab92f7eae6a1a8bedf963b5faecdd362ac5495864cf"
1123
  },
1124
  "mirrors": {
1125
  "hf_space": {
1126
  "path": "hf_space:data/quality_gates.json",
1127
  "exists": true,
1128
  "bytes": 8100,
1129
- "sha256": "ee5c7a273f492e6fc2fe2ab92f7eae6a1a8bedf963b5faecdd362ac5495864cf"
1130
  },
1131
  "hf_artifacts_data": {
1132
  "path": "hf_artifacts:data/quality_gates.json",
1133
  "exists": true,
1134
  "bytes": 8100,
1135
- "sha256": "ee5c7a273f492e6fc2fe2ab92f7eae6a1a8bedf963b5faecdd362ac5495864cf"
1136
  },
1137
  "hf_artifacts": {
1138
  "path": "hf_artifacts:docs/data/quality_gates.json",
1139
  "exists": true,
1140
  "bytes": 8100,
1141
- "sha256": "ee5c7a273f492e6fc2fe2ab92f7eae6a1a8bedf963b5faecdd362ac5495864cf"
1142
  },
1143
  "hf_model_data": {
1144
  "path": "hf_model:data/quality_gates.json",
1145
  "exists": true,
1146
  "bytes": 8100,
1147
- "sha256": "ee5c7a273f492e6fc2fe2ab92f7eae6a1a8bedf963b5faecdd362ac5495864cf"
1148
  },
1149
  "hf_model_docs_data": {
1150
  "path": "hf_model:docs/data/quality_gates.json",
1151
  "exists": true,
1152
  "bytes": 8100,
1153
- "sha256": "ee5c7a273f492e6fc2fe2ab92f7eae6a1a8bedf963b5faecdd362ac5495864cf"
1154
  },
1155
  "hf_model": {
1156
  "path": "hf_model:metrics/quality_gates.json",
1157
  "exists": true,
1158
  "bytes": 8100,
1159
- "sha256": "ee5c7a273f492e6fc2fe2ab92f7eae6a1a8bedf963b5faecdd362ac5495864cf"
1160
  }
1161
  },
1162
  "failures": []
@@ -1560,44 +1560,44 @@
1560
  "path": "repo:docs/data/scope_claims_audit.json",
1561
  "exists": true,
1562
  "bytes": 21630,
1563
- "sha256": "6a80b6012bbfba66cba524432b08421496ef0ba1d6eee6c7e5db8b382aa0f3ba"
1564
  },
1565
  "mirrors": {
1566
  "hf_space": {
1567
  "path": "hf_space:data/scope_claims_audit.json",
1568
  "exists": true,
1569
  "bytes": 21630,
1570
- "sha256": "6a80b6012bbfba66cba524432b08421496ef0ba1d6eee6c7e5db8b382aa0f3ba"
1571
  },
1572
  "hf_artifacts_data": {
1573
  "path": "hf_artifacts:data/scope_claims_audit.json",
1574
  "exists": true,
1575
  "bytes": 21630,
1576
- "sha256": "6a80b6012bbfba66cba524432b08421496ef0ba1d6eee6c7e5db8b382aa0f3ba"
1577
  },
1578
  "hf_artifacts": {
1579
  "path": "hf_artifacts:docs/data/scope_claims_audit.json",
1580
  "exists": true,
1581
  "bytes": 21630,
1582
- "sha256": "6a80b6012bbfba66cba524432b08421496ef0ba1d6eee6c7e5db8b382aa0f3ba"
1583
  },
1584
  "hf_model_data": {
1585
  "path": "hf_model:data/scope_claims_audit.json",
1586
  "exists": true,
1587
  "bytes": 21630,
1588
- "sha256": "6a80b6012bbfba66cba524432b08421496ef0ba1d6eee6c7e5db8b382aa0f3ba"
1589
  },
1590
  "hf_model_docs_data": {
1591
  "path": "hf_model:docs/data/scope_claims_audit.json",
1592
  "exists": true,
1593
  "bytes": 21630,
1594
- "sha256": "6a80b6012bbfba66cba524432b08421496ef0ba1d6eee6c7e5db8b382aa0f3ba"
1595
  },
1596
  "hf_model": {
1597
  "path": "hf_model:metrics/scope_claims_audit.json",
1598
  "exists": true,
1599
  "bytes": 21630,
1600
- "sha256": "6a80b6012bbfba66cba524432b08421496ef0ba1d6eee6c7e5db8b382aa0f3ba"
1601
  }
1602
  },
1603
  "failures": []
@@ -1658,44 +1658,44 @@
1658
  "path": "repo:docs/data/source_alignment_audit.json",
1659
  "exists": true,
1660
  "bytes": 4432,
1661
- "sha256": "98bffea722a8d50936e214538b37766d3952ca362854757bf31121031fdecbc8"
1662
  },
1663
  "mirrors": {
1664
  "hf_space": {
1665
  "path": "hf_space:data/source_alignment_audit.json",
1666
  "exists": true,
1667
  "bytes": 4432,
1668
- "sha256": "98bffea722a8d50936e214538b37766d3952ca362854757bf31121031fdecbc8"
1669
  },
1670
  "hf_artifacts_data": {
1671
  "path": "hf_artifacts:data/source_alignment_audit.json",
1672
  "exists": true,
1673
  "bytes": 4432,
1674
- "sha256": "98bffea722a8d50936e214538b37766d3952ca362854757bf31121031fdecbc8"
1675
  },
1676
  "hf_artifacts": {
1677
  "path": "hf_artifacts:docs/data/source_alignment_audit.json",
1678
  "exists": true,
1679
  "bytes": 4432,
1680
- "sha256": "98bffea722a8d50936e214538b37766d3952ca362854757bf31121031fdecbc8"
1681
  },
1682
  "hf_model_data": {
1683
  "path": "hf_model:data/source_alignment_audit.json",
1684
  "exists": true,
1685
  "bytes": 4432,
1686
- "sha256": "98bffea722a8d50936e214538b37766d3952ca362854757bf31121031fdecbc8"
1687
  },
1688
  "hf_model_docs_data": {
1689
  "path": "hf_model:docs/data/source_alignment_audit.json",
1690
  "exists": true,
1691
  "bytes": 4432,
1692
- "sha256": "98bffea722a8d50936e214538b37766d3952ca362854757bf31121031fdecbc8"
1693
  },
1694
  "hf_model": {
1695
  "path": "hf_model:metrics/source_alignment_audit.json",
1696
  "exists": true,
1697
  "bytes": 4432,
1698
- "sha256": "98bffea722a8d50936e214538b37766d3952ca362854757bf31121031fdecbc8"
1699
  }
1700
  },
1701
  "failures": []
@@ -1756,44 +1756,44 @@
1756
  "path": "repo:docs/data/single_episode_task_model_radar.json",
1757
  "exists": true,
1758
  "bytes": 51064,
1759
- "sha256": "5176552d9b735d36a1033277ad64b68038cc27605565ec9d714397d2a6de7623"
1760
  },
1761
  "mirrors": {
1762
  "hf_space": {
1763
  "path": "hf_space:data/single_episode_task_model_radar.json",
1764
  "exists": true,
1765
  "bytes": 51064,
1766
- "sha256": "5176552d9b735d36a1033277ad64b68038cc27605565ec9d714397d2a6de7623"
1767
  },
1768
  "hf_artifacts_data": {
1769
  "path": "hf_artifacts:data/single_episode_task_model_radar.json",
1770
  "exists": true,
1771
  "bytes": 51064,
1772
- "sha256": "5176552d9b735d36a1033277ad64b68038cc27605565ec9d714397d2a6de7623"
1773
  },
1774
  "hf_artifacts": {
1775
  "path": "hf_artifacts:docs/data/single_episode_task_model_radar.json",
1776
  "exists": true,
1777
  "bytes": 51064,
1778
- "sha256": "5176552d9b735d36a1033277ad64b68038cc27605565ec9d714397d2a6de7623"
1779
  },
1780
  "hf_model_data": {
1781
  "path": "hf_model:data/single_episode_task_model_radar.json",
1782
  "exists": true,
1783
  "bytes": 51064,
1784
- "sha256": "5176552d9b735d36a1033277ad64b68038cc27605565ec9d714397d2a6de7623"
1785
  },
1786
  "hf_model_docs_data": {
1787
  "path": "hf_model:docs/data/single_episode_task_model_radar.json",
1788
  "exists": true,
1789
  "bytes": 51064,
1790
- "sha256": "5176552d9b735d36a1033277ad64b68038cc27605565ec9d714397d2a6de7623"
1791
  },
1792
  "hf_model": {
1793
  "path": "hf_model:metrics/single_episode_task_model_radar.json",
1794
  "exists": true,
1795
  "bytes": 51064,
1796
- "sha256": "5176552d9b735d36a1033277ad64b68038cc27605565ec9d714397d2a6de7623"
1797
  }
1798
  },
1799
  "failures": []
@@ -1804,45 +1804,45 @@
1804
  "local": {
1805
  "path": "repo:docs/data/episode128_task_model_radar.json",
1806
  "exists": true,
1807
- "bytes": 184785,
1808
- "sha256": "5d6119e3233992b9d4b4d9d9dc9a42024b419bb95de78028f9831e87fac6e88e"
1809
  },
1810
  "mirrors": {
1811
  "hf_space": {
1812
  "path": "hf_space:data/episode128_task_model_radar.json",
1813
  "exists": true,
1814
- "bytes": 184785,
1815
- "sha256": "5d6119e3233992b9d4b4d9d9dc9a42024b419bb95de78028f9831e87fac6e88e"
1816
  },
1817
  "hf_artifacts_data": {
1818
  "path": "hf_artifacts:data/episode128_task_model_radar.json",
1819
  "exists": true,
1820
- "bytes": 184785,
1821
- "sha256": "5d6119e3233992b9d4b4d9d9dc9a42024b419bb95de78028f9831e87fac6e88e"
1822
  },
1823
  "hf_artifacts": {
1824
  "path": "hf_artifacts:docs/data/episode128_task_model_radar.json",
1825
  "exists": true,
1826
- "bytes": 184785,
1827
- "sha256": "5d6119e3233992b9d4b4d9d9dc9a42024b419bb95de78028f9831e87fac6e88e"
1828
  },
1829
  "hf_model_data": {
1830
  "path": "hf_model:data/episode128_task_model_radar.json",
1831
  "exists": true,
1832
- "bytes": 184785,
1833
- "sha256": "5d6119e3233992b9d4b4d9d9dc9a42024b419bb95de78028f9831e87fac6e88e"
1834
  },
1835
  "hf_model_docs_data": {
1836
  "path": "hf_model:docs/data/episode128_task_model_radar.json",
1837
  "exists": true,
1838
- "bytes": 184785,
1839
- "sha256": "5d6119e3233992b9d4b4d9d9dc9a42024b419bb95de78028f9831e87fac6e88e"
1840
  },
1841
  "hf_model": {
1842
  "path": "hf_model:metrics/episode128_task_model_radar.json",
1843
  "exists": true,
1844
- "bytes": 184785,
1845
- "sha256": "5d6119e3233992b9d4b4d9d9dc9a42024b419bb95de78028f9831e87fac6e88e"
1846
  }
1847
  },
1848
  "failures": []
@@ -2001,44 +2001,44 @@
2001
  "path": "repo:docs/data/task_surface_integrity.json",
2002
  "exists": true,
2003
  "bytes": 45779,
2004
- "sha256": "4a6ef81221417db753a9324daab4f5826fbd92d192a97680b7234cff5817c9f3"
2005
  },
2006
  "mirrors": {
2007
  "hf_space": {
2008
  "path": "hf_space:data/task_surface_integrity.json",
2009
  "exists": true,
2010
  "bytes": 45779,
2011
- "sha256": "4a6ef81221417db753a9324daab4f5826fbd92d192a97680b7234cff5817c9f3"
2012
  },
2013
  "hf_artifacts_data": {
2014
  "path": "hf_artifacts:data/task_surface_integrity.json",
2015
  "exists": true,
2016
  "bytes": 45779,
2017
- "sha256": "4a6ef81221417db753a9324daab4f5826fbd92d192a97680b7234cff5817c9f3"
2018
  },
2019
  "hf_artifacts": {
2020
  "path": "hf_artifacts:docs/data/task_surface_integrity.json",
2021
  "exists": true,
2022
  "bytes": 45779,
2023
- "sha256": "4a6ef81221417db753a9324daab4f5826fbd92d192a97680b7234cff5817c9f3"
2024
  },
2025
  "hf_model_data": {
2026
  "path": "hf_model:data/task_surface_integrity.json",
2027
  "exists": true,
2028
  "bytes": 45779,
2029
- "sha256": "4a6ef81221417db753a9324daab4f5826fbd92d192a97680b7234cff5817c9f3"
2030
  },
2031
  "hf_model_docs_data": {
2032
  "path": "hf_model:docs/data/task_surface_integrity.json",
2033
  "exists": true,
2034
  "bytes": 45779,
2035
- "sha256": "4a6ef81221417db753a9324daab4f5826fbd92d192a97680b7234cff5817c9f3"
2036
  },
2037
  "hf_model": {
2038
  "path": "hf_model:metrics/task_surface_integrity.json",
2039
  "exists": true,
2040
  "bytes": 45779,
2041
- "sha256": "4a6ef81221417db753a9324daab4f5826fbd92d192a97680b7234cff5817c9f3"
2042
  }
2043
  },
2044
  "failures": []
@@ -2098,45 +2098,45 @@
2098
  "local": {
2099
  "path": "repo:docs/data/task_method_20_result_matrix.json",
2100
  "exists": true,
2101
- "bytes": 128510,
2102
- "sha256": "13722fe288669b640230ac894909d247f348cf0b3477f6f223943b87c728af1a"
2103
  },
2104
  "mirrors": {
2105
  "hf_space": {
2106
  "path": "hf_space:data/task_method_20_result_matrix.json",
2107
  "exists": true,
2108
- "bytes": 128510,
2109
- "sha256": "13722fe288669b640230ac894909d247f348cf0b3477f6f223943b87c728af1a"
2110
  },
2111
  "hf_artifacts_data": {
2112
  "path": "hf_artifacts:data/task_method_20_result_matrix.json",
2113
  "exists": true,
2114
- "bytes": 128510,
2115
- "sha256": "13722fe288669b640230ac894909d247f348cf0b3477f6f223943b87c728af1a"
2116
  },
2117
  "hf_artifacts": {
2118
  "path": "hf_artifacts:docs/data/task_method_20_result_matrix.json",
2119
  "exists": true,
2120
- "bytes": 128510,
2121
- "sha256": "13722fe288669b640230ac894909d247f348cf0b3477f6f223943b87c728af1a"
2122
  },
2123
  "hf_model_data": {
2124
  "path": "hf_model:data/task_method_20_result_matrix.json",
2125
  "exists": true,
2126
- "bytes": 128510,
2127
- "sha256": "13722fe288669b640230ac894909d247f348cf0b3477f6f223943b87c728af1a"
2128
  },
2129
  "hf_model_docs_data": {
2130
  "path": "hf_model:docs/data/task_method_20_result_matrix.json",
2131
  "exists": true,
2132
- "bytes": 128510,
2133
- "sha256": "13722fe288669b640230ac894909d247f348cf0b3477f6f223943b87c728af1a"
2134
  },
2135
  "hf_model": {
2136
  "path": "hf_model:metrics/task_method_20_result_matrix.json",
2137
  "exists": true,
2138
- "bytes": 128510,
2139
- "sha256": "13722fe288669b640230ac894909d247f348cf0b3477f6f223943b87c728af1a"
2140
  }
2141
  },
2142
  "failures": []
@@ -2147,45 +2147,45 @@
2147
  "local": {
2148
  "path": "repo:docs/data/task_method_20_gap_audit.json",
2149
  "exists": true,
2150
- "bytes": 20037,
2151
- "sha256": "1e96af5557f4617e160233eb7d2180644f29fa9a8c67afed710ac0425fc71ddd"
2152
  },
2153
  "mirrors": {
2154
  "hf_space": {
2155
  "path": "hf_space:data/task_method_20_gap_audit.json",
2156
  "exists": true,
2157
- "bytes": 20037,
2158
- "sha256": "1e96af5557f4617e160233eb7d2180644f29fa9a8c67afed710ac0425fc71ddd"
2159
  },
2160
  "hf_artifacts_data": {
2161
  "path": "hf_artifacts:data/task_method_20_gap_audit.json",
2162
  "exists": true,
2163
- "bytes": 20037,
2164
- "sha256": "1e96af5557f4617e160233eb7d2180644f29fa9a8c67afed710ac0425fc71ddd"
2165
  },
2166
  "hf_artifacts": {
2167
  "path": "hf_artifacts:docs/data/task_method_20_gap_audit.json",
2168
  "exists": true,
2169
- "bytes": 20037,
2170
- "sha256": "1e96af5557f4617e160233eb7d2180644f29fa9a8c67afed710ac0425fc71ddd"
2171
  },
2172
  "hf_model_data": {
2173
  "path": "hf_model:data/task_method_20_gap_audit.json",
2174
  "exists": true,
2175
- "bytes": 20037,
2176
- "sha256": "1e96af5557f4617e160233eb7d2180644f29fa9a8c67afed710ac0425fc71ddd"
2177
  },
2178
  "hf_model_docs_data": {
2179
  "path": "hf_model:docs/data/task_method_20_gap_audit.json",
2180
  "exists": true,
2181
- "bytes": 20037,
2182
- "sha256": "1e96af5557f4617e160233eb7d2180644f29fa9a8c67afed710ac0425fc71ddd"
2183
  },
2184
  "hf_model": {
2185
  "path": "hf_model:metrics/task_method_20_gap_audit.json",
2186
  "exists": true,
2187
- "bytes": 20037,
2188
- "sha256": "1e96af5557f4617e160233eb7d2180644f29fa9a8c67afed710ac0425fc71ddd"
2189
  }
2190
  },
2191
  "failures": []
@@ -2245,45 +2245,45 @@
2245
  "local": {
2246
  "path": "repo:docs/data/unified_task_model_radar.json",
2247
  "exists": true,
2248
- "bytes": 228639,
2249
- "sha256": "5e163cc77679766fd86fdc40b324e61417f5a355b447922c2fce1d17ba52730e"
2250
  },
2251
  "mirrors": {
2252
  "hf_space": {
2253
  "path": "hf_space:data/unified_task_model_radar.json",
2254
  "exists": true,
2255
- "bytes": 228639,
2256
- "sha256": "5e163cc77679766fd86fdc40b324e61417f5a355b447922c2fce1d17ba52730e"
2257
  },
2258
  "hf_artifacts_data": {
2259
  "path": "hf_artifacts:data/unified_task_model_radar.json",
2260
  "exists": true,
2261
- "bytes": 228639,
2262
- "sha256": "5e163cc77679766fd86fdc40b324e61417f5a355b447922c2fce1d17ba52730e"
2263
  },
2264
  "hf_artifacts": {
2265
  "path": "hf_artifacts:docs/data/unified_task_model_radar.json",
2266
  "exists": true,
2267
- "bytes": 228639,
2268
- "sha256": "5e163cc77679766fd86fdc40b324e61417f5a355b447922c2fce1d17ba52730e"
2269
  },
2270
  "hf_model_data": {
2271
  "path": "hf_model:data/unified_task_model_radar.json",
2272
  "exists": true,
2273
- "bytes": 228639,
2274
- "sha256": "5e163cc77679766fd86fdc40b324e61417f5a355b447922c2fce1d17ba52730e"
2275
  },
2276
  "hf_model_docs_data": {
2277
  "path": "hf_model:docs/data/unified_task_model_radar.json",
2278
  "exists": true,
2279
- "bytes": 228639,
2280
- "sha256": "5e163cc77679766fd86fdc40b324e61417f5a355b447922c2fce1d17ba52730e"
2281
  },
2282
  "hf_model": {
2283
  "path": "hf_model:metrics/unified_task_model_radar.json",
2284
  "exists": true,
2285
- "bytes": 228639,
2286
- "sha256": "5e163cc77679766fd86fdc40b324e61417f5a355b447922c2fce1d17ba52730e"
2287
  }
2288
  },
2289
  "failures": []
@@ -2295,44 +2295,44 @@
2295
  "path": "repo:docs/data/website_integrity.json",
2296
  "exists": true,
2297
  "bytes": 20023,
2298
- "sha256": "fb7171fd1d4962c3b8b289a1499c2eacb5404bf25587df34043c8712ef803acc"
2299
  },
2300
  "mirrors": {
2301
  "hf_space": {
2302
  "path": "hf_space:data/website_integrity.json",
2303
  "exists": true,
2304
  "bytes": 20023,
2305
- "sha256": "fb7171fd1d4962c3b8b289a1499c2eacb5404bf25587df34043c8712ef803acc"
2306
  },
2307
  "hf_artifacts_data": {
2308
  "path": "hf_artifacts:data/website_integrity.json",
2309
  "exists": true,
2310
  "bytes": 20023,
2311
- "sha256": "fb7171fd1d4962c3b8b289a1499c2eacb5404bf25587df34043c8712ef803acc"
2312
  },
2313
  "hf_artifacts": {
2314
  "path": "hf_artifacts:docs/data/website_integrity.json",
2315
  "exists": true,
2316
  "bytes": 20023,
2317
- "sha256": "fb7171fd1d4962c3b8b289a1499c2eacb5404bf25587df34043c8712ef803acc"
2318
  },
2319
  "hf_model_data": {
2320
  "path": "hf_model:data/website_integrity.json",
2321
  "exists": true,
2322
  "bytes": 20023,
2323
- "sha256": "fb7171fd1d4962c3b8b289a1499c2eacb5404bf25587df34043c8712ef803acc"
2324
  },
2325
  "hf_model_docs_data": {
2326
  "path": "hf_model:docs/data/website_integrity.json",
2327
  "exists": true,
2328
  "bytes": 20023,
2329
- "sha256": "fb7171fd1d4962c3b8b289a1499c2eacb5404bf25587df34043c8712ef803acc"
2330
  },
2331
  "hf_model": {
2332
  "path": "hf_model:metrics/website_integrity.json",
2333
  "exists": true,
2334
  "bytes": 20023,
2335
- "sha256": "fb7171fd1d4962c3b8b289a1499c2eacb5404bf25587df34043c8712ef803acc"
2336
  }
2337
  },
2338
  "failures": []
@@ -2515,33 +2515,33 @@
2515
  "local": {
2516
  "path": "repo:docs/assets/charts/episode128_task_model_radar.svg",
2517
  "exists": true,
2518
- "bytes": 50154,
2519
- "sha256": "be065f88f043241ce9d006bdc4a4e90b3b44ce5e04a8370ce65ce9b8f66fa5bc"
2520
  },
2521
  "mirrors": {
2522
  "hf_space": {
2523
  "path": "hf_space:assets/charts/episode128_task_model_radar.svg",
2524
  "exists": true,
2525
- "bytes": 50154,
2526
- "sha256": "be065f88f043241ce9d006bdc4a4e90b3b44ce5e04a8370ce65ce9b8f66fa5bc"
2527
  },
2528
  "hf_artifacts_docs": {
2529
  "path": "hf_artifacts:docs/assets/charts/episode128_task_model_radar.svg",
2530
  "exists": true,
2531
- "bytes": 50154,
2532
- "sha256": "be065f88f043241ce9d006bdc4a4e90b3b44ce5e04a8370ce65ce9b8f66fa5bc"
2533
  },
2534
  "hf_artifacts_card": {
2535
  "path": "hf_artifacts:assets/charts/episode128_task_model_radar.svg",
2536
  "exists": true,
2537
- "bytes": 50154,
2538
- "sha256": "be065f88f043241ce9d006bdc4a4e90b3b44ce5e04a8370ce65ce9b8f66fa5bc"
2539
  },
2540
  "hf_model": {
2541
  "path": "hf_model:assets/charts/episode128_task_model_radar.svg",
2542
  "exists": true,
2543
- "bytes": 50154,
2544
- "sha256": "be065f88f043241ce9d006bdc4a4e90b3b44ce5e04a8370ce65ce9b8f66fa5bc"
2545
  }
2546
  },
2547
  "failures": []
@@ -2589,33 +2589,33 @@
2589
  "local": {
2590
  "path": "repo:docs/assets/charts/unified_task_model_radar.svg",
2591
  "exists": true,
2592
- "bytes": 56167,
2593
- "sha256": "1953a2c56b8c5a11a7def73c2899a49dc6c20d0cef3ae63c53fb10bfec7b7264"
2594
  },
2595
  "mirrors": {
2596
  "hf_space": {
2597
  "path": "hf_space:assets/charts/unified_task_model_radar.svg",
2598
  "exists": true,
2599
- "bytes": 56167,
2600
- "sha256": "1953a2c56b8c5a11a7def73c2899a49dc6c20d0cef3ae63c53fb10bfec7b7264"
2601
  },
2602
  "hf_artifacts_docs": {
2603
  "path": "hf_artifacts:docs/assets/charts/unified_task_model_radar.svg",
2604
  "exists": true,
2605
- "bytes": 56167,
2606
- "sha256": "1953a2c56b8c5a11a7def73c2899a49dc6c20d0cef3ae63c53fb10bfec7b7264"
2607
  },
2608
  "hf_artifacts_card": {
2609
  "path": "hf_artifacts:assets/charts/unified_task_model_radar.svg",
2610
  "exists": true,
2611
- "bytes": 56167,
2612
- "sha256": "1953a2c56b8c5a11a7def73c2899a49dc6c20d0cef3ae63c53fb10bfec7b7264"
2613
  },
2614
  "hf_model": {
2615
  "path": "hf_model:assets/charts/unified_task_model_radar.svg",
2616
  "exists": true,
2617
- "bytes": 56167,
2618
- "sha256": "1953a2c56b8c5a11a7def73c2899a49dc6c20d0cef3ae63c53fb10bfec7b7264"
2619
  }
2620
  },
2621
  "failures": []
@@ -5638,39 +5638,39 @@
5638
  "local": {
5639
  "path": "repo:docs/index.html",
5640
  "exists": true,
5641
- "bytes": 254760,
5642
- "sha256": "56300081529887298a0e412302866cb7539384f0ed73b16cd9240e605fd4046a"
5643
  },
5644
  "mirrors": {
5645
  "hf_space": {
5646
  "path": "hf_space:index.html",
5647
  "exists": true,
5648
- "bytes": 254760,
5649
- "sha256": "56300081529887298a0e412302866cb7539384f0ed73b16cd9240e605fd4046a"
5650
  },
5651
  "hf_artifacts_root": {
5652
  "path": "hf_artifacts:index.html",
5653
  "exists": true,
5654
- "bytes": 254760,
5655
- "sha256": "56300081529887298a0e412302866cb7539384f0ed73b16cd9240e605fd4046a"
5656
  },
5657
  "hf_artifacts_docs": {
5658
  "path": "hf_artifacts:docs/index.html",
5659
  "exists": true,
5660
- "bytes": 254760,
5661
- "sha256": "56300081529887298a0e412302866cb7539384f0ed73b16cd9240e605fd4046a"
5662
  },
5663
  "hf_model": {
5664
  "path": "hf_model:index.html",
5665
  "exists": true,
5666
- "bytes": 254760,
5667
- "sha256": "56300081529887298a0e412302866cb7539384f0ed73b16cd9240e605fd4046a"
5668
  },
5669
  "hf_model_docs": {
5670
  "path": "hf_model:docs/index.html",
5671
  "exists": true,
5672
- "bytes": 254760,
5673
- "sha256": "56300081529887298a0e412302866cb7539384f0ed73b16cd9240e605fd4046a"
5674
  }
5675
  },
5676
  "failures": []
@@ -12829,6 +12829,31 @@
12829
  },
12830
  "failures": []
12831
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12832
  {
12833
  "name": "results/omni_finetune/deferred_launchers/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620.runner.log",
12834
  "status": "pass",
@@ -19154,6 +19179,306 @@
19154
  },
19155
  "failures": []
19156
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19157
  {
19158
  "name": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/RUN_REPORT.md",
19159
  "status": "pass",
@@ -23267,6 +23592,25 @@
23267
  },
23268
  "failures": []
23269
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23270
  {
23271
  "name": "space_results/omni_finetune/deferred_launchers/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620.runner.log",
23272
  "status": "pass",
@@ -24445,6 +24789,234 @@
24445
  },
24446
  "failures": []
24447
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24448
  {
24449
  "name": "space_results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/RUN_REPORT.md",
24450
  "status": "pass",
@@ -27539,27 +28111,27 @@
27539
  "local": {
27540
  "path": "repo:TASK_METHOD_20_GAP_AUDIT.md",
27541
  "exists": true,
27542
- "bytes": 6398,
27543
- "sha256": "5d63a8ad52dcdd3624c9c73fd292121cf9a14644ad369784894c3fd1fcaa83a1"
27544
  },
27545
  "mirrors": {
27546
  "hf_space": {
27547
  "path": "hf_space:TASK_METHOD_20_GAP_AUDIT.md",
27548
  "exists": true,
27549
- "bytes": 6398,
27550
- "sha256": "5d63a8ad52dcdd3624c9c73fd292121cf9a14644ad369784894c3fd1fcaa83a1"
27551
  },
27552
  "hf_artifacts": {
27553
  "path": "hf_artifacts:TASK_METHOD_20_GAP_AUDIT.md",
27554
  "exists": true,
27555
- "bytes": 6398,
27556
- "sha256": "5d63a8ad52dcdd3624c9c73fd292121cf9a14644ad369784894c3fd1fcaa83a1"
27557
  },
27558
  "hf_model": {
27559
  "path": "hf_model:TASK_METHOD_20_GAP_AUDIT.md",
27560
  "exists": true,
27561
- "bytes": 6398,
27562
- "sha256": "5d63a8ad52dcdd3624c9c73fd292121cf9a14644ad369784894c3fd1fcaa83a1"
27563
  }
27564
  },
27565
  "failures": []
@@ -27570,27 +28142,27 @@
27570
  "local": {
27571
  "path": "repo:TASK_METHOD_20_RESULT_MATRIX.md",
27572
  "exists": true,
27573
- "bytes": 3771,
27574
- "sha256": "a17499bf2e6f81dbb5682abd42ae2f66ff2ae41d28ea8f4a437eb65350956fc6"
27575
  },
27576
  "mirrors": {
27577
  "hf_space": {
27578
  "path": "hf_space:TASK_METHOD_20_RESULT_MATRIX.md",
27579
  "exists": true,
27580
- "bytes": 3771,
27581
- "sha256": "a17499bf2e6f81dbb5682abd42ae2f66ff2ae41d28ea8f4a437eb65350956fc6"
27582
  },
27583
  "hf_artifacts": {
27584
  "path": "hf_artifacts:TASK_METHOD_20_RESULT_MATRIX.md",
27585
  "exists": true,
27586
- "bytes": 3771,
27587
- "sha256": "a17499bf2e6f81dbb5682abd42ae2f66ff2ae41d28ea8f4a437eb65350956fc6"
27588
  },
27589
  "hf_model": {
27590
  "path": "hf_model:TASK_METHOD_20_RESULT_MATRIX.md",
27591
  "exists": true,
27592
- "bytes": 3771,
27593
- "sha256": "a17499bf2e6f81dbb5682abd42ae2f66ff2ae41d28ea8f4a437eb65350956fc6"
27594
  }
27595
  },
27596
  "failures": []
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-20T15:23:41+00:00",
4
  "hf_root": "hf_publish",
5
  "summary": {
6
+ "group_count": 1114,
7
  "failure_count": 0,
8
  "failures_by_surface": {}
9
  },
 
138
  "local": {
139
  "path": "repo:docs/data/artifact_index.json",
140
  "exists": true,
141
+ "bytes": 116645,
142
+ "sha256": "822a54d5f0c4b90c582057bc8c83f22b4f9d78aaf64262c6302cf40898f42b78"
143
  },
144
  "mirrors": {
145
  "hf_space": {
146
  "path": "hf_space:data/artifact_index.json",
147
  "exists": true,
148
+ "bytes": 116645,
149
+ "sha256": "822a54d5f0c4b90c582057bc8c83f22b4f9d78aaf64262c6302cf40898f42b78"
150
  },
151
  "hf_artifacts_data": {
152
  "path": "hf_artifacts:data/artifact_index.json",
153
  "exists": true,
154
+ "bytes": 116645,
155
+ "sha256": "822a54d5f0c4b90c582057bc8c83f22b4f9d78aaf64262c6302cf40898f42b78"
156
  },
157
  "hf_artifacts": {
158
  "path": "hf_artifacts:docs/data/artifact_index.json",
159
  "exists": true,
160
+ "bytes": 116645,
161
+ "sha256": "822a54d5f0c4b90c582057bc8c83f22b4f9d78aaf64262c6302cf40898f42b78"
162
  },
163
  "hf_model_data": {
164
  "path": "hf_model:data/artifact_index.json",
165
  "exists": true,
166
+ "bytes": 116645,
167
+ "sha256": "822a54d5f0c4b90c582057bc8c83f22b4f9d78aaf64262c6302cf40898f42b78"
168
  },
169
  "hf_model_docs_data": {
170
  "path": "hf_model:docs/data/artifact_index.json",
171
  "exists": true,
172
+ "bytes": 116645,
173
+ "sha256": "822a54d5f0c4b90c582057bc8c83f22b4f9d78aaf64262c6302cf40898f42b78"
174
  },
175
  "hf_model": {
176
  "path": "hf_model:metrics/artifact_index.json",
177
  "exists": true,
178
+ "bytes": 116645,
179
+ "sha256": "822a54d5f0c4b90c582057bc8c83f22b4f9d78aaf64262c6302cf40898f42b78"
180
  }
181
  },
182
  "failures": []
 
432
  "local": {
433
  "path": "repo:docs/data/live_publication_status.json",
434
  "exists": true,
435
+ "bytes": 181991,
436
+ "sha256": "6b6a6c3a7dca04546fbf26e9dfdbb29d5318084d7d1553018d345d83c0414cc3"
437
  },
438
  "mirrors": {
439
  "hf_space": {
440
  "path": "hf_space:data/live_publication_status.json",
441
  "exists": true,
442
+ "bytes": 181991,
443
+ "sha256": "6b6a6c3a7dca04546fbf26e9dfdbb29d5318084d7d1553018d345d83c0414cc3"
444
  },
445
  "hf_artifacts_data": {
446
  "path": "hf_artifacts:data/live_publication_status.json",
447
  "exists": true,
448
+ "bytes": 181991,
449
+ "sha256": "6b6a6c3a7dca04546fbf26e9dfdbb29d5318084d7d1553018d345d83c0414cc3"
450
  },
451
  "hf_artifacts": {
452
  "path": "hf_artifacts:docs/data/live_publication_status.json",
453
  "exists": true,
454
+ "bytes": 181991,
455
+ "sha256": "6b6a6c3a7dca04546fbf26e9dfdbb29d5318084d7d1553018d345d83c0414cc3"
456
  },
457
  "hf_model_data": {
458
  "path": "hf_model:data/live_publication_status.json",
459
  "exists": true,
460
+ "bytes": 181991,
461
+ "sha256": "6b6a6c3a7dca04546fbf26e9dfdbb29d5318084d7d1553018d345d83c0414cc3"
462
  },
463
  "hf_model_docs_data": {
464
  "path": "hf_model:docs/data/live_publication_status.json",
465
  "exists": true,
466
+ "bytes": 181991,
467
+ "sha256": "6b6a6c3a7dca04546fbf26e9dfdbb29d5318084d7d1553018d345d83c0414cc3"
468
  },
469
  "hf_model": {
470
  "path": "hf_model:metrics/live_publication_status.json",
471
  "exists": true,
472
+ "bytes": 181991,
473
+ "sha256": "6b6a6c3a7dca04546fbf26e9dfdbb29d5318084d7d1553018d345d83c0414cc3"
474
  }
475
  },
476
  "failures": []
 
923
  "path": "repo:docs/data/publication_audit.json",
924
  "exists": true,
925
  "bytes": 10277,
926
+ "sha256": "622756fbfd302077bea53ba1aaf2f210ea29ea14b3e161de3d9653f519213516"
927
  },
928
  "mirrors": {
929
  "hf_space": {
930
  "path": "hf_space:data/publication_audit.json",
931
  "exists": true,
932
  "bytes": 10277,
933
+ "sha256": "622756fbfd302077bea53ba1aaf2f210ea29ea14b3e161de3d9653f519213516"
934
  },
935
  "hf_artifacts_data": {
936
  "path": "hf_artifacts:data/publication_audit.json",
937
  "exists": true,
938
  "bytes": 10277,
939
+ "sha256": "622756fbfd302077bea53ba1aaf2f210ea29ea14b3e161de3d9653f519213516"
940
  },
941
  "hf_artifacts": {
942
  "path": "hf_artifacts:docs/data/publication_audit.json",
943
  "exists": true,
944
  "bytes": 10277,
945
+ "sha256": "622756fbfd302077bea53ba1aaf2f210ea29ea14b3e161de3d9653f519213516"
946
  },
947
  "hf_model_data": {
948
  "path": "hf_model:data/publication_audit.json",
949
  "exists": true,
950
  "bytes": 10277,
951
+ "sha256": "622756fbfd302077bea53ba1aaf2f210ea29ea14b3e161de3d9653f519213516"
952
  },
953
  "hf_model_docs_data": {
954
  "path": "hf_model:docs/data/publication_audit.json",
955
  "exists": true,
956
  "bytes": 10277,
957
+ "sha256": "622756fbfd302077bea53ba1aaf2f210ea29ea14b3e161de3d9653f519213516"
958
  },
959
  "hf_model": {
960
  "path": "hf_model:metrics/publication_audit.json",
961
  "exists": true,
962
  "bytes": 10277,
963
+ "sha256": "622756fbfd302077bea53ba1aaf2f210ea29ea14b3e161de3d9653f519213516"
964
  }
965
  },
966
  "failures": []
 
972
  "path": "repo:docs/data/public_surface_qa.json",
973
  "exists": true,
974
  "bytes": 7126,
975
+ "sha256": "057cd4aef556cdf667f4c92ba67009ea0dd8f08eae9f81019a51773c5b63dea4"
976
  },
977
  "mirrors": {
978
  "hf_space": {
979
  "path": "hf_space:data/public_surface_qa.json",
980
  "exists": true,
981
  "bytes": 7126,
982
+ "sha256": "057cd4aef556cdf667f4c92ba67009ea0dd8f08eae9f81019a51773c5b63dea4"
983
  },
984
  "hf_artifacts_data": {
985
  "path": "hf_artifacts:data/public_surface_qa.json",
986
  "exists": true,
987
  "bytes": 7126,
988
+ "sha256": "057cd4aef556cdf667f4c92ba67009ea0dd8f08eae9f81019a51773c5b63dea4"
989
  },
990
  "hf_artifacts": {
991
  "path": "hf_artifacts:docs/data/public_surface_qa.json",
992
  "exists": true,
993
  "bytes": 7126,
994
+ "sha256": "057cd4aef556cdf667f4c92ba67009ea0dd8f08eae9f81019a51773c5b63dea4"
995
  },
996
  "hf_model_data": {
997
  "path": "hf_model:data/public_surface_qa.json",
998
  "exists": true,
999
  "bytes": 7126,
1000
+ "sha256": "057cd4aef556cdf667f4c92ba67009ea0dd8f08eae9f81019a51773c5b63dea4"
1001
  },
1002
  "hf_model_docs_data": {
1003
  "path": "hf_model:docs/data/public_surface_qa.json",
1004
  "exists": true,
1005
  "bytes": 7126,
1006
+ "sha256": "057cd4aef556cdf667f4c92ba67009ea0dd8f08eae9f81019a51773c5b63dea4"
1007
  },
1008
  "hf_model": {
1009
  "path": "hf_model:metrics/public_surface_qa.json",
1010
  "exists": true,
1011
  "bytes": 7126,
1012
+ "sha256": "057cd4aef556cdf667f4c92ba67009ea0dd8f08eae9f81019a51773c5b63dea4"
1013
  }
1014
  },
1015
  "failures": []
 
1119
  "path": "repo:docs/data/quality_gates.json",
1120
  "exists": true,
1121
  "bytes": 8100,
1122
+ "sha256": "54867eb8abcebb9948f3a6f2f5f3c3a995373b856f00d1af23212a7c39633bc4"
1123
  },
1124
  "mirrors": {
1125
  "hf_space": {
1126
  "path": "hf_space:data/quality_gates.json",
1127
  "exists": true,
1128
  "bytes": 8100,
1129
+ "sha256": "54867eb8abcebb9948f3a6f2f5f3c3a995373b856f00d1af23212a7c39633bc4"
1130
  },
1131
  "hf_artifacts_data": {
1132
  "path": "hf_artifacts:data/quality_gates.json",
1133
  "exists": true,
1134
  "bytes": 8100,
1135
+ "sha256": "54867eb8abcebb9948f3a6f2f5f3c3a995373b856f00d1af23212a7c39633bc4"
1136
  },
1137
  "hf_artifacts": {
1138
  "path": "hf_artifacts:docs/data/quality_gates.json",
1139
  "exists": true,
1140
  "bytes": 8100,
1141
+ "sha256": "54867eb8abcebb9948f3a6f2f5f3c3a995373b856f00d1af23212a7c39633bc4"
1142
  },
1143
  "hf_model_data": {
1144
  "path": "hf_model:data/quality_gates.json",
1145
  "exists": true,
1146
  "bytes": 8100,
1147
+ "sha256": "54867eb8abcebb9948f3a6f2f5f3c3a995373b856f00d1af23212a7c39633bc4"
1148
  },
1149
  "hf_model_docs_data": {
1150
  "path": "hf_model:docs/data/quality_gates.json",
1151
  "exists": true,
1152
  "bytes": 8100,
1153
+ "sha256": "54867eb8abcebb9948f3a6f2f5f3c3a995373b856f00d1af23212a7c39633bc4"
1154
  },
1155
  "hf_model": {
1156
  "path": "hf_model:metrics/quality_gates.json",
1157
  "exists": true,
1158
  "bytes": 8100,
1159
+ "sha256": "54867eb8abcebb9948f3a6f2f5f3c3a995373b856f00d1af23212a7c39633bc4"
1160
  }
1161
  },
1162
  "failures": []
 
1560
  "path": "repo:docs/data/scope_claims_audit.json",
1561
  "exists": true,
1562
  "bytes": 21630,
1563
+ "sha256": "947dfce64ff88a0f2dd7b1f72091aa61ccd58596525e2e43fd63c9b02ef23f62"
1564
  },
1565
  "mirrors": {
1566
  "hf_space": {
1567
  "path": "hf_space:data/scope_claims_audit.json",
1568
  "exists": true,
1569
  "bytes": 21630,
1570
+ "sha256": "947dfce64ff88a0f2dd7b1f72091aa61ccd58596525e2e43fd63c9b02ef23f62"
1571
  },
1572
  "hf_artifacts_data": {
1573
  "path": "hf_artifacts:data/scope_claims_audit.json",
1574
  "exists": true,
1575
  "bytes": 21630,
1576
+ "sha256": "947dfce64ff88a0f2dd7b1f72091aa61ccd58596525e2e43fd63c9b02ef23f62"
1577
  },
1578
  "hf_artifacts": {
1579
  "path": "hf_artifacts:docs/data/scope_claims_audit.json",
1580
  "exists": true,
1581
  "bytes": 21630,
1582
+ "sha256": "947dfce64ff88a0f2dd7b1f72091aa61ccd58596525e2e43fd63c9b02ef23f62"
1583
  },
1584
  "hf_model_data": {
1585
  "path": "hf_model:data/scope_claims_audit.json",
1586
  "exists": true,
1587
  "bytes": 21630,
1588
+ "sha256": "947dfce64ff88a0f2dd7b1f72091aa61ccd58596525e2e43fd63c9b02ef23f62"
1589
  },
1590
  "hf_model_docs_data": {
1591
  "path": "hf_model:docs/data/scope_claims_audit.json",
1592
  "exists": true,
1593
  "bytes": 21630,
1594
+ "sha256": "947dfce64ff88a0f2dd7b1f72091aa61ccd58596525e2e43fd63c9b02ef23f62"
1595
  },
1596
  "hf_model": {
1597
  "path": "hf_model:metrics/scope_claims_audit.json",
1598
  "exists": true,
1599
  "bytes": 21630,
1600
+ "sha256": "947dfce64ff88a0f2dd7b1f72091aa61ccd58596525e2e43fd63c9b02ef23f62"
1601
  }
1602
  },
1603
  "failures": []
 
1658
  "path": "repo:docs/data/source_alignment_audit.json",
1659
  "exists": true,
1660
  "bytes": 4432,
1661
+ "sha256": "ba571e397b5dfe71f82efc83dc30a44ec693b92f1316e6dd52adca12811ad52f"
1662
  },
1663
  "mirrors": {
1664
  "hf_space": {
1665
  "path": "hf_space:data/source_alignment_audit.json",
1666
  "exists": true,
1667
  "bytes": 4432,
1668
+ "sha256": "ba571e397b5dfe71f82efc83dc30a44ec693b92f1316e6dd52adca12811ad52f"
1669
  },
1670
  "hf_artifacts_data": {
1671
  "path": "hf_artifacts:data/source_alignment_audit.json",
1672
  "exists": true,
1673
  "bytes": 4432,
1674
+ "sha256": "ba571e397b5dfe71f82efc83dc30a44ec693b92f1316e6dd52adca12811ad52f"
1675
  },
1676
  "hf_artifacts": {
1677
  "path": "hf_artifacts:docs/data/source_alignment_audit.json",
1678
  "exists": true,
1679
  "bytes": 4432,
1680
+ "sha256": "ba571e397b5dfe71f82efc83dc30a44ec693b92f1316e6dd52adca12811ad52f"
1681
  },
1682
  "hf_model_data": {
1683
  "path": "hf_model:data/source_alignment_audit.json",
1684
  "exists": true,
1685
  "bytes": 4432,
1686
+ "sha256": "ba571e397b5dfe71f82efc83dc30a44ec693b92f1316e6dd52adca12811ad52f"
1687
  },
1688
  "hf_model_docs_data": {
1689
  "path": "hf_model:docs/data/source_alignment_audit.json",
1690
  "exists": true,
1691
  "bytes": 4432,
1692
+ "sha256": "ba571e397b5dfe71f82efc83dc30a44ec693b92f1316e6dd52adca12811ad52f"
1693
  },
1694
  "hf_model": {
1695
  "path": "hf_model:metrics/source_alignment_audit.json",
1696
  "exists": true,
1697
  "bytes": 4432,
1698
+ "sha256": "ba571e397b5dfe71f82efc83dc30a44ec693b92f1316e6dd52adca12811ad52f"
1699
  }
1700
  },
1701
  "failures": []
 
1756
  "path": "repo:docs/data/single_episode_task_model_radar.json",
1757
  "exists": true,
1758
  "bytes": 51064,
1759
+ "sha256": "dad53d1c12eb8947b064372e5ccd7c416cefab6e57175cd990e60af9e56ec958"
1760
  },
1761
  "mirrors": {
1762
  "hf_space": {
1763
  "path": "hf_space:data/single_episode_task_model_radar.json",
1764
  "exists": true,
1765
  "bytes": 51064,
1766
+ "sha256": "dad53d1c12eb8947b064372e5ccd7c416cefab6e57175cd990e60af9e56ec958"
1767
  },
1768
  "hf_artifacts_data": {
1769
  "path": "hf_artifacts:data/single_episode_task_model_radar.json",
1770
  "exists": true,
1771
  "bytes": 51064,
1772
+ "sha256": "dad53d1c12eb8947b064372e5ccd7c416cefab6e57175cd990e60af9e56ec958"
1773
  },
1774
  "hf_artifacts": {
1775
  "path": "hf_artifacts:docs/data/single_episode_task_model_radar.json",
1776
  "exists": true,
1777
  "bytes": 51064,
1778
+ "sha256": "dad53d1c12eb8947b064372e5ccd7c416cefab6e57175cd990e60af9e56ec958"
1779
  },
1780
  "hf_model_data": {
1781
  "path": "hf_model:data/single_episode_task_model_radar.json",
1782
  "exists": true,
1783
  "bytes": 51064,
1784
+ "sha256": "dad53d1c12eb8947b064372e5ccd7c416cefab6e57175cd990e60af9e56ec958"
1785
  },
1786
  "hf_model_docs_data": {
1787
  "path": "hf_model:docs/data/single_episode_task_model_radar.json",
1788
  "exists": true,
1789
  "bytes": 51064,
1790
+ "sha256": "dad53d1c12eb8947b064372e5ccd7c416cefab6e57175cd990e60af9e56ec958"
1791
  },
1792
  "hf_model": {
1793
  "path": "hf_model:metrics/single_episode_task_model_radar.json",
1794
  "exists": true,
1795
  "bytes": 51064,
1796
+ "sha256": "dad53d1c12eb8947b064372e5ccd7c416cefab6e57175cd990e60af9e56ec958"
1797
  }
1798
  },
1799
  "failures": []
 
1804
  "local": {
1805
  "path": "repo:docs/data/episode128_task_model_radar.json",
1806
  "exists": true,
1807
+ "bytes": 184569,
1808
+ "sha256": "a07df912c0909bd6067eb81a0f54856c1ed6da5ca5feee54f6b32d3296b3724e"
1809
  },
1810
  "mirrors": {
1811
  "hf_space": {
1812
  "path": "hf_space:data/episode128_task_model_radar.json",
1813
  "exists": true,
1814
+ "bytes": 184569,
1815
+ "sha256": "a07df912c0909bd6067eb81a0f54856c1ed6da5ca5feee54f6b32d3296b3724e"
1816
  },
1817
  "hf_artifacts_data": {
1818
  "path": "hf_artifacts:data/episode128_task_model_radar.json",
1819
  "exists": true,
1820
+ "bytes": 184569,
1821
+ "sha256": "a07df912c0909bd6067eb81a0f54856c1ed6da5ca5feee54f6b32d3296b3724e"
1822
  },
1823
  "hf_artifacts": {
1824
  "path": "hf_artifacts:docs/data/episode128_task_model_radar.json",
1825
  "exists": true,
1826
+ "bytes": 184569,
1827
+ "sha256": "a07df912c0909bd6067eb81a0f54856c1ed6da5ca5feee54f6b32d3296b3724e"
1828
  },
1829
  "hf_model_data": {
1830
  "path": "hf_model:data/episode128_task_model_radar.json",
1831
  "exists": true,
1832
+ "bytes": 184569,
1833
+ "sha256": "a07df912c0909bd6067eb81a0f54856c1ed6da5ca5feee54f6b32d3296b3724e"
1834
  },
1835
  "hf_model_docs_data": {
1836
  "path": "hf_model:docs/data/episode128_task_model_radar.json",
1837
  "exists": true,
1838
+ "bytes": 184569,
1839
+ "sha256": "a07df912c0909bd6067eb81a0f54856c1ed6da5ca5feee54f6b32d3296b3724e"
1840
  },
1841
  "hf_model": {
1842
  "path": "hf_model:metrics/episode128_task_model_radar.json",
1843
  "exists": true,
1844
+ "bytes": 184569,
1845
+ "sha256": "a07df912c0909bd6067eb81a0f54856c1ed6da5ca5feee54f6b32d3296b3724e"
1846
  }
1847
  },
1848
  "failures": []
 
2001
  "path": "repo:docs/data/task_surface_integrity.json",
2002
  "exists": true,
2003
  "bytes": 45779,
2004
+ "sha256": "47c81fe3645e87eefe015d7fac0c23fb5a5411efce37f7d2b61ccc5398b03b1d"
2005
  },
2006
  "mirrors": {
2007
  "hf_space": {
2008
  "path": "hf_space:data/task_surface_integrity.json",
2009
  "exists": true,
2010
  "bytes": 45779,
2011
+ "sha256": "47c81fe3645e87eefe015d7fac0c23fb5a5411efce37f7d2b61ccc5398b03b1d"
2012
  },
2013
  "hf_artifacts_data": {
2014
  "path": "hf_artifacts:data/task_surface_integrity.json",
2015
  "exists": true,
2016
  "bytes": 45779,
2017
+ "sha256": "47c81fe3645e87eefe015d7fac0c23fb5a5411efce37f7d2b61ccc5398b03b1d"
2018
  },
2019
  "hf_artifacts": {
2020
  "path": "hf_artifacts:docs/data/task_surface_integrity.json",
2021
  "exists": true,
2022
  "bytes": 45779,
2023
+ "sha256": "47c81fe3645e87eefe015d7fac0c23fb5a5411efce37f7d2b61ccc5398b03b1d"
2024
  },
2025
  "hf_model_data": {
2026
  "path": "hf_model:data/task_surface_integrity.json",
2027
  "exists": true,
2028
  "bytes": 45779,
2029
+ "sha256": "47c81fe3645e87eefe015d7fac0c23fb5a5411efce37f7d2b61ccc5398b03b1d"
2030
  },
2031
  "hf_model_docs_data": {
2032
  "path": "hf_model:docs/data/task_surface_integrity.json",
2033
  "exists": true,
2034
  "bytes": 45779,
2035
+ "sha256": "47c81fe3645e87eefe015d7fac0c23fb5a5411efce37f7d2b61ccc5398b03b1d"
2036
  },
2037
  "hf_model": {
2038
  "path": "hf_model:metrics/task_surface_integrity.json",
2039
  "exists": true,
2040
  "bytes": 45779,
2041
+ "sha256": "47c81fe3645e87eefe015d7fac0c23fb5a5411efce37f7d2b61ccc5398b03b1d"
2042
  }
2043
  },
2044
  "failures": []
 
2098
  "local": {
2099
  "path": "repo:docs/data/task_method_20_result_matrix.json",
2100
  "exists": true,
2101
+ "bytes": 128400,
2102
+ "sha256": "afee9a09eeb7406a8f2ef4eb4205697fb9e05e5bad132686f21d28f93ac204a2"
2103
  },
2104
  "mirrors": {
2105
  "hf_space": {
2106
  "path": "hf_space:data/task_method_20_result_matrix.json",
2107
  "exists": true,
2108
+ "bytes": 128400,
2109
+ "sha256": "afee9a09eeb7406a8f2ef4eb4205697fb9e05e5bad132686f21d28f93ac204a2"
2110
  },
2111
  "hf_artifacts_data": {
2112
  "path": "hf_artifacts:data/task_method_20_result_matrix.json",
2113
  "exists": true,
2114
+ "bytes": 128400,
2115
+ "sha256": "afee9a09eeb7406a8f2ef4eb4205697fb9e05e5bad132686f21d28f93ac204a2"
2116
  },
2117
  "hf_artifacts": {
2118
  "path": "hf_artifacts:docs/data/task_method_20_result_matrix.json",
2119
  "exists": true,
2120
+ "bytes": 128400,
2121
+ "sha256": "afee9a09eeb7406a8f2ef4eb4205697fb9e05e5bad132686f21d28f93ac204a2"
2122
  },
2123
  "hf_model_data": {
2124
  "path": "hf_model:data/task_method_20_result_matrix.json",
2125
  "exists": true,
2126
+ "bytes": 128400,
2127
+ "sha256": "afee9a09eeb7406a8f2ef4eb4205697fb9e05e5bad132686f21d28f93ac204a2"
2128
  },
2129
  "hf_model_docs_data": {
2130
  "path": "hf_model:docs/data/task_method_20_result_matrix.json",
2131
  "exists": true,
2132
+ "bytes": 128400,
2133
+ "sha256": "afee9a09eeb7406a8f2ef4eb4205697fb9e05e5bad132686f21d28f93ac204a2"
2134
  },
2135
  "hf_model": {
2136
  "path": "hf_model:metrics/task_method_20_result_matrix.json",
2137
  "exists": true,
2138
+ "bytes": 128400,
2139
+ "sha256": "afee9a09eeb7406a8f2ef4eb4205697fb9e05e5bad132686f21d28f93ac204a2"
2140
  }
2141
  },
2142
  "failures": []
 
2147
  "local": {
2148
  "path": "repo:docs/data/task_method_20_gap_audit.json",
2149
  "exists": true,
2150
+ "bytes": 16980,
2151
+ "sha256": "50d966cb219b1bf89efda1bc0d1a0ed25ee058c65ca981cec37e67ea3e5f8061"
2152
  },
2153
  "mirrors": {
2154
  "hf_space": {
2155
  "path": "hf_space:data/task_method_20_gap_audit.json",
2156
  "exists": true,
2157
+ "bytes": 16980,
2158
+ "sha256": "50d966cb219b1bf89efda1bc0d1a0ed25ee058c65ca981cec37e67ea3e5f8061"
2159
  },
2160
  "hf_artifacts_data": {
2161
  "path": "hf_artifacts:data/task_method_20_gap_audit.json",
2162
  "exists": true,
2163
+ "bytes": 16980,
2164
+ "sha256": "50d966cb219b1bf89efda1bc0d1a0ed25ee058c65ca981cec37e67ea3e5f8061"
2165
  },
2166
  "hf_artifacts": {
2167
  "path": "hf_artifacts:docs/data/task_method_20_gap_audit.json",
2168
  "exists": true,
2169
+ "bytes": 16980,
2170
+ "sha256": "50d966cb219b1bf89efda1bc0d1a0ed25ee058c65ca981cec37e67ea3e5f8061"
2171
  },
2172
  "hf_model_data": {
2173
  "path": "hf_model:data/task_method_20_gap_audit.json",
2174
  "exists": true,
2175
+ "bytes": 16980,
2176
+ "sha256": "50d966cb219b1bf89efda1bc0d1a0ed25ee058c65ca981cec37e67ea3e5f8061"
2177
  },
2178
  "hf_model_docs_data": {
2179
  "path": "hf_model:docs/data/task_method_20_gap_audit.json",
2180
  "exists": true,
2181
+ "bytes": 16980,
2182
+ "sha256": "50d966cb219b1bf89efda1bc0d1a0ed25ee058c65ca981cec37e67ea3e5f8061"
2183
  },
2184
  "hf_model": {
2185
  "path": "hf_model:metrics/task_method_20_gap_audit.json",
2186
  "exists": true,
2187
+ "bytes": 16980,
2188
+ "sha256": "50d966cb219b1bf89efda1bc0d1a0ed25ee058c65ca981cec37e67ea3e5f8061"
2189
  }
2190
  },
2191
  "failures": []
 
2245
  "local": {
2246
  "path": "repo:docs/data/unified_task_model_radar.json",
2247
  "exists": true,
2248
+ "bytes": 228423,
2249
+ "sha256": "a84269541f448eb6b9d2ad29199dc2cab23c55ad4eacb1f0b4fbd2948e1b75c4"
2250
  },
2251
  "mirrors": {
2252
  "hf_space": {
2253
  "path": "hf_space:data/unified_task_model_radar.json",
2254
  "exists": true,
2255
+ "bytes": 228423,
2256
+ "sha256": "a84269541f448eb6b9d2ad29199dc2cab23c55ad4eacb1f0b4fbd2948e1b75c4"
2257
  },
2258
  "hf_artifacts_data": {
2259
  "path": "hf_artifacts:data/unified_task_model_radar.json",
2260
  "exists": true,
2261
+ "bytes": 228423,
2262
+ "sha256": "a84269541f448eb6b9d2ad29199dc2cab23c55ad4eacb1f0b4fbd2948e1b75c4"
2263
  },
2264
  "hf_artifacts": {
2265
  "path": "hf_artifacts:docs/data/unified_task_model_radar.json",
2266
  "exists": true,
2267
+ "bytes": 228423,
2268
+ "sha256": "a84269541f448eb6b9d2ad29199dc2cab23c55ad4eacb1f0b4fbd2948e1b75c4"
2269
  },
2270
  "hf_model_data": {
2271
  "path": "hf_model:data/unified_task_model_radar.json",
2272
  "exists": true,
2273
+ "bytes": 228423,
2274
+ "sha256": "a84269541f448eb6b9d2ad29199dc2cab23c55ad4eacb1f0b4fbd2948e1b75c4"
2275
  },
2276
  "hf_model_docs_data": {
2277
  "path": "hf_model:docs/data/unified_task_model_radar.json",
2278
  "exists": true,
2279
+ "bytes": 228423,
2280
+ "sha256": "a84269541f448eb6b9d2ad29199dc2cab23c55ad4eacb1f0b4fbd2948e1b75c4"
2281
  },
2282
  "hf_model": {
2283
  "path": "hf_model:metrics/unified_task_model_radar.json",
2284
  "exists": true,
2285
+ "bytes": 228423,
2286
+ "sha256": "a84269541f448eb6b9d2ad29199dc2cab23c55ad4eacb1f0b4fbd2948e1b75c4"
2287
  }
2288
  },
2289
  "failures": []
 
2295
  "path": "repo:docs/data/website_integrity.json",
2296
  "exists": true,
2297
  "bytes": 20023,
2298
+ "sha256": "2e525e86c7ecc338a5c5578eb6b8b1147e2b69a8ed632c942ae93c42d4e93876"
2299
  },
2300
  "mirrors": {
2301
  "hf_space": {
2302
  "path": "hf_space:data/website_integrity.json",
2303
  "exists": true,
2304
  "bytes": 20023,
2305
+ "sha256": "2e525e86c7ecc338a5c5578eb6b8b1147e2b69a8ed632c942ae93c42d4e93876"
2306
  },
2307
  "hf_artifacts_data": {
2308
  "path": "hf_artifacts:data/website_integrity.json",
2309
  "exists": true,
2310
  "bytes": 20023,
2311
+ "sha256": "2e525e86c7ecc338a5c5578eb6b8b1147e2b69a8ed632c942ae93c42d4e93876"
2312
  },
2313
  "hf_artifacts": {
2314
  "path": "hf_artifacts:docs/data/website_integrity.json",
2315
  "exists": true,
2316
  "bytes": 20023,
2317
+ "sha256": "2e525e86c7ecc338a5c5578eb6b8b1147e2b69a8ed632c942ae93c42d4e93876"
2318
  },
2319
  "hf_model_data": {
2320
  "path": "hf_model:data/website_integrity.json",
2321
  "exists": true,
2322
  "bytes": 20023,
2323
+ "sha256": "2e525e86c7ecc338a5c5578eb6b8b1147e2b69a8ed632c942ae93c42d4e93876"
2324
  },
2325
  "hf_model_docs_data": {
2326
  "path": "hf_model:docs/data/website_integrity.json",
2327
  "exists": true,
2328
  "bytes": 20023,
2329
+ "sha256": "2e525e86c7ecc338a5c5578eb6b8b1147e2b69a8ed632c942ae93c42d4e93876"
2330
  },
2331
  "hf_model": {
2332
  "path": "hf_model:metrics/website_integrity.json",
2333
  "exists": true,
2334
  "bytes": 20023,
2335
+ "sha256": "2e525e86c7ecc338a5c5578eb6b8b1147e2b69a8ed632c942ae93c42d4e93876"
2336
  }
2337
  },
2338
  "failures": []
 
2515
  "local": {
2516
  "path": "repo:docs/assets/charts/episode128_task_model_radar.svg",
2517
  "exists": true,
2518
+ "bytes": 50599,
2519
+ "sha256": "ec9cbf2a69d5b5cd466500dfbeaf996fa666af94a47a597abcc2465bbfad0bad"
2520
  },
2521
  "mirrors": {
2522
  "hf_space": {
2523
  "path": "hf_space:assets/charts/episode128_task_model_radar.svg",
2524
  "exists": true,
2525
+ "bytes": 50599,
2526
+ "sha256": "ec9cbf2a69d5b5cd466500dfbeaf996fa666af94a47a597abcc2465bbfad0bad"
2527
  },
2528
  "hf_artifacts_docs": {
2529
  "path": "hf_artifacts:docs/assets/charts/episode128_task_model_radar.svg",
2530
  "exists": true,
2531
+ "bytes": 50599,
2532
+ "sha256": "ec9cbf2a69d5b5cd466500dfbeaf996fa666af94a47a597abcc2465bbfad0bad"
2533
  },
2534
  "hf_artifacts_card": {
2535
  "path": "hf_artifacts:assets/charts/episode128_task_model_radar.svg",
2536
  "exists": true,
2537
+ "bytes": 50599,
2538
+ "sha256": "ec9cbf2a69d5b5cd466500dfbeaf996fa666af94a47a597abcc2465bbfad0bad"
2539
  },
2540
  "hf_model": {
2541
  "path": "hf_model:assets/charts/episode128_task_model_radar.svg",
2542
  "exists": true,
2543
+ "bytes": 50599,
2544
+ "sha256": "ec9cbf2a69d5b5cd466500dfbeaf996fa666af94a47a597abcc2465bbfad0bad"
2545
  }
2546
  },
2547
  "failures": []
 
2589
  "local": {
2590
  "path": "repo:docs/assets/charts/unified_task_model_radar.svg",
2591
  "exists": true,
2592
+ "bytes": 56612,
2593
+ "sha256": "03a3eb9369cda807197220eaa22282dec49b6119fa24c211a67c520daf63a611"
2594
  },
2595
  "mirrors": {
2596
  "hf_space": {
2597
  "path": "hf_space:assets/charts/unified_task_model_radar.svg",
2598
  "exists": true,
2599
+ "bytes": 56612,
2600
+ "sha256": "03a3eb9369cda807197220eaa22282dec49b6119fa24c211a67c520daf63a611"
2601
  },
2602
  "hf_artifacts_docs": {
2603
  "path": "hf_artifacts:docs/assets/charts/unified_task_model_radar.svg",
2604
  "exists": true,
2605
+ "bytes": 56612,
2606
+ "sha256": "03a3eb9369cda807197220eaa22282dec49b6119fa24c211a67c520daf63a611"
2607
  },
2608
  "hf_artifacts_card": {
2609
  "path": "hf_artifacts:assets/charts/unified_task_model_radar.svg",
2610
  "exists": true,
2611
+ "bytes": 56612,
2612
+ "sha256": "03a3eb9369cda807197220eaa22282dec49b6119fa24c211a67c520daf63a611"
2613
  },
2614
  "hf_model": {
2615
  "path": "hf_model:assets/charts/unified_task_model_radar.svg",
2616
  "exists": true,
2617
+ "bytes": 56612,
2618
+ "sha256": "03a3eb9369cda807197220eaa22282dec49b6119fa24c211a67c520daf63a611"
2619
  }
2620
  },
2621
  "failures": []
 
5638
  "local": {
5639
  "path": "repo:docs/index.html",
5640
  "exists": true,
5641
+ "bytes": 254759,
5642
+ "sha256": "f913334541de577289a05e8d8e2ad99b1e3f52c2ae889e7e484a97d9a24b5c46"
5643
  },
5644
  "mirrors": {
5645
  "hf_space": {
5646
  "path": "hf_space:index.html",
5647
  "exists": true,
5648
+ "bytes": 254759,
5649
+ "sha256": "f913334541de577289a05e8d8e2ad99b1e3f52c2ae889e7e484a97d9a24b5c46"
5650
  },
5651
  "hf_artifacts_root": {
5652
  "path": "hf_artifacts:index.html",
5653
  "exists": true,
5654
+ "bytes": 254759,
5655
+ "sha256": "f913334541de577289a05e8d8e2ad99b1e3f52c2ae889e7e484a97d9a24b5c46"
5656
  },
5657
  "hf_artifacts_docs": {
5658
  "path": "hf_artifacts:docs/index.html",
5659
  "exists": true,
5660
+ "bytes": 254759,
5661
+ "sha256": "f913334541de577289a05e8d8e2ad99b1e3f52c2ae889e7e484a97d9a24b5c46"
5662
  },
5663
  "hf_model": {
5664
  "path": "hf_model:index.html",
5665
  "exists": true,
5666
+ "bytes": 254759,
5667
+ "sha256": "f913334541de577289a05e8d8e2ad99b1e3f52c2ae889e7e484a97d9a24b5c46"
5668
  },
5669
  "hf_model_docs": {
5670
  "path": "hf_model:docs/index.html",
5671
  "exists": true,
5672
+ "bytes": 254759,
5673
+ "sha256": "f913334541de577289a05e8d8e2ad99b1e3f52c2ae889e7e484a97d9a24b5c46"
5674
  }
5675
  },
5676
  "failures": []
 
12829
  },
12830
  "failures": []
12831
  },
12832
+ {
12833
+ "name": "results/omni_finetune/deferred_launchers/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620.launcher.log",
12834
+ "status": "pass",
12835
+ "local": {
12836
+ "path": "repo:results/omni_finetune/deferred_launchers/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620.launcher.log",
12837
+ "exists": true,
12838
+ "bytes": 10565,
12839
+ "sha256": "8cb5a4b6f45680c9df0b6b313f3d86abe64940f5a99793cffaaed205f80175c5"
12840
+ },
12841
+ "mirrors": {
12842
+ "hf_artifacts": {
12843
+ "path": "hf_artifacts:results/omni_finetune/deferred_launchers/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620.launcher.log",
12844
+ "exists": true,
12845
+ "bytes": 10565,
12846
+ "sha256": "8cb5a4b6f45680c9df0b6b313f3d86abe64940f5a99793cffaaed205f80175c5"
12847
+ },
12848
+ "hf_model": {
12849
+ "path": "hf_model:results/omni_finetune/deferred_launchers/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620.launcher.log",
12850
+ "exists": true,
12851
+ "bytes": 10565,
12852
+ "sha256": "8cb5a4b6f45680c9df0b6b313f3d86abe64940f5a99793cffaaed205f80175c5"
12853
+ }
12854
+ },
12855
+ "failures": []
12856
+ },
12857
  {
12858
  "name": "results/omni_finetune/deferred_launchers/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620.runner.log",
12859
  "status": "pass",
 
19179
  },
19180
  "failures": []
19181
  },
19182
+ {
19183
+ "name": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/RUN_REPORT.md",
19184
+ "status": "pass",
19185
+ "local": {
19186
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/RUN_REPORT.md",
19187
+ "exists": true,
19188
+ "bytes": 496,
19189
+ "sha256": "2ddec993b14e63c1b4d3164c55a3d496b96489262ddc950a4a3903158aa33c47"
19190
+ },
19191
+ "mirrors": {
19192
+ "hf_artifacts": {
19193
+ "path": "hf_artifacts:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/RUN_REPORT.md",
19194
+ "exists": true,
19195
+ "bytes": 496,
19196
+ "sha256": "2ddec993b14e63c1b4d3164c55a3d496b96489262ddc950a4a3903158aa33c47"
19197
+ },
19198
+ "hf_model": {
19199
+ "path": "hf_model:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/RUN_REPORT.md",
19200
+ "exists": true,
19201
+ "bytes": 496,
19202
+ "sha256": "2ddec993b14e63c1b4d3164c55a3d496b96489262ddc950a4a3903158aa33c47"
19203
+ }
19204
+ },
19205
+ "failures": []
19206
+ },
19207
+ {
19208
+ "name": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/collection_validation.json",
19209
+ "status": "pass",
19210
+ "local": {
19211
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/collection_validation.json",
19212
+ "exists": true,
19213
+ "bytes": 1606,
19214
+ "sha256": "d0f5bc9ec3064b86775bfd33f0c4c6c40302cbee6f7809468db4315888e9f57c"
19215
+ },
19216
+ "mirrors": {
19217
+ "hf_artifacts": {
19218
+ "path": "hf_artifacts:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/collection_validation.json",
19219
+ "exists": true,
19220
+ "bytes": 1606,
19221
+ "sha256": "d0f5bc9ec3064b86775bfd33f0c4c6c40302cbee6f7809468db4315888e9f57c"
19222
+ },
19223
+ "hf_model": {
19224
+ "path": "hf_model:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/collection_validation.json",
19225
+ "exists": true,
19226
+ "bytes": 1606,
19227
+ "sha256": "d0f5bc9ec3064b86775bfd33f0c4c6c40302cbee6f7809468db4315888e9f57c"
19228
+ }
19229
+ },
19230
+ "failures": []
19231
+ },
19232
+ {
19233
+ "name": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json",
19234
+ "status": "pass",
19235
+ "local": {
19236
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json",
19237
+ "exists": true,
19238
+ "bytes": 1619,
19239
+ "sha256": "3cb0ed942c5af21223c582dd0dc7467a326306f10d2f03592d120fa8dcb173c5"
19240
+ },
19241
+ "mirrors": {
19242
+ "hf_artifacts": {
19243
+ "path": "hf_artifacts:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json",
19244
+ "exists": true,
19245
+ "bytes": 1619,
19246
+ "sha256": "3cb0ed942c5af21223c582dd0dc7467a326306f10d2f03592d120fa8dcb173c5"
19247
+ },
19248
+ "hf_model": {
19249
+ "path": "hf_model:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json",
19250
+ "exists": true,
19251
+ "bytes": 1619,
19252
+ "sha256": "3cb0ed942c5af21223c582dd0dc7467a326306f10d2f03592d120fa8dcb173c5"
19253
+ }
19254
+ },
19255
+ "failures": []
19256
+ },
19257
+ {
19258
+ "name": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json",
19259
+ "status": "pass",
19260
+ "local": {
19261
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json",
19262
+ "exists": true,
19263
+ "bytes": 5774,
19264
+ "sha256": "e1ed24bc3a6507b427bc390f1dbea83c09195ac7bb80b99ed87ac7cc2e90a24d"
19265
+ },
19266
+ "mirrors": {
19267
+ "hf_artifacts": {
19268
+ "path": "hf_artifacts:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json",
19269
+ "exists": true,
19270
+ "bytes": 5774,
19271
+ "sha256": "e1ed24bc3a6507b427bc390f1dbea83c09195ac7bb80b99ed87ac7cc2e90a24d"
19272
+ },
19273
+ "hf_model": {
19274
+ "path": "hf_model:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json",
19275
+ "exists": true,
19276
+ "bytes": 5774,
19277
+ "sha256": "e1ed24bc3a6507b427bc390f1dbea83c09195ac7bb80b99ed87ac7cc2e90a24d"
19278
+ }
19279
+ },
19280
+ "failures": []
19281
+ },
19282
+ {
19283
+ "name": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json",
19284
+ "status": "pass",
19285
+ "local": {
19286
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json",
19287
+ "exists": true,
19288
+ "bytes": 1565,
19289
+ "sha256": "ebbf5bfef95d8093fcbc094d3a03f9d765bbee5f8d80da027e948c55c35d0026"
19290
+ },
19291
+ "mirrors": {
19292
+ "hf_artifacts": {
19293
+ "path": "hf_artifacts:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json",
19294
+ "exists": true,
19295
+ "bytes": 1565,
19296
+ "sha256": "ebbf5bfef95d8093fcbc094d3a03f9d765bbee5f8d80da027e948c55c35d0026"
19297
+ },
19298
+ "hf_model": {
19299
+ "path": "hf_model:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json",
19300
+ "exists": true,
19301
+ "bytes": 1565,
19302
+ "sha256": "ebbf5bfef95d8093fcbc094d3a03f9d765bbee5f8d80da027e948c55c35d0026"
19303
+ }
19304
+ },
19305
+ "failures": []
19306
+ },
19307
+ {
19308
+ "name": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/server_info.json",
19309
+ "status": "pass",
19310
+ "local": {
19311
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/server_info.json",
19312
+ "exists": true,
19313
+ "bytes": 858,
19314
+ "sha256": "ebcc6d877fd412ba5d2383ac343672e65cbd5023deff9ebb51a38b64f0b0ea77"
19315
+ },
19316
+ "mirrors": {
19317
+ "hf_artifacts": {
19318
+ "path": "hf_artifacts:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/server_info.json",
19319
+ "exists": true,
19320
+ "bytes": 858,
19321
+ "sha256": "ebcc6d877fd412ba5d2383ac343672e65cbd5023deff9ebb51a38b64f0b0ea77"
19322
+ },
19323
+ "hf_model": {
19324
+ "path": "hf_model:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/server_info.json",
19325
+ "exists": true,
19326
+ "bytes": 858,
19327
+ "sha256": "ebcc6d877fd412ba5d2383ac343672e65cbd5023deff9ebb51a38b64f0b0ea77"
19328
+ }
19329
+ },
19330
+ "failures": []
19331
+ },
19332
+ {
19333
+ "name": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/summary.json",
19334
+ "status": "pass",
19335
+ "local": {
19336
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/summary.json",
19337
+ "exists": true,
19338
+ "bytes": 2130,
19339
+ "sha256": "f46a66c4fd06e944129b052be009d23e243567fdc54c9e101852c11dfabb0637"
19340
+ },
19341
+ "mirrors": {
19342
+ "hf_artifacts": {
19343
+ "path": "hf_artifacts:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/summary.json",
19344
+ "exists": true,
19345
+ "bytes": 2130,
19346
+ "sha256": "f46a66c4fd06e944129b052be009d23e243567fdc54c9e101852c11dfabb0637"
19347
+ },
19348
+ "hf_model": {
19349
+ "path": "hf_model:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/summary.json",
19350
+ "exists": true,
19351
+ "bytes": 2130,
19352
+ "sha256": "f46a66c4fd06e944129b052be009d23e243567fdc54c9e101852c11dfabb0637"
19353
+ }
19354
+ },
19355
+ "failures": []
19356
+ },
19357
+ {
19358
+ "name": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json",
19359
+ "status": "pass",
19360
+ "local": {
19361
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json",
19362
+ "exists": true,
19363
+ "bytes": 1562,
19364
+ "sha256": "a2cc896d36516569829b955d9cf2f5d8586804b1805b778ef4face1be9074963"
19365
+ },
19366
+ "mirrors": {
19367
+ "hf_artifacts": {
19368
+ "path": "hf_artifacts:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json",
19369
+ "exists": true,
19370
+ "bytes": 1562,
19371
+ "sha256": "a2cc896d36516569829b955d9cf2f5d8586804b1805b778ef4face1be9074963"
19372
+ },
19373
+ "hf_model": {
19374
+ "path": "hf_model:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json",
19375
+ "exists": true,
19376
+ "bytes": 1562,
19377
+ "sha256": "a2cc896d36516569829b955d9cf2f5d8586804b1805b778ef4face1be9074963"
19378
+ }
19379
+ },
19380
+ "failures": []
19381
+ },
19382
+ {
19383
+ "name": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard0.progress.jsonl",
19384
+ "status": "pass",
19385
+ "local": {
19386
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard0.progress.jsonl",
19387
+ "exists": true,
19388
+ "bytes": 1100734,
19389
+ "sha256": "1fd8f293af1432d49653dd93f00924d07ba6117129e1fc8c6cdb3b7c8a2bfb05"
19390
+ },
19391
+ "mirrors": {
19392
+ "hf_artifacts": {
19393
+ "path": "hf_artifacts:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard0.progress.jsonl",
19394
+ "exists": true,
19395
+ "bytes": 1100734,
19396
+ "sha256": "1fd8f293af1432d49653dd93f00924d07ba6117129e1fc8c6cdb3b7c8a2bfb05"
19397
+ },
19398
+ "hf_model": {
19399
+ "path": "hf_model:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard0.progress.jsonl",
19400
+ "exists": true,
19401
+ "bytes": 1100734,
19402
+ "sha256": "1fd8f293af1432d49653dd93f00924d07ba6117129e1fc8c6cdb3b7c8a2bfb05"
19403
+ }
19404
+ },
19405
+ "failures": []
19406
+ },
19407
+ {
19408
+ "name": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard1.progress.jsonl",
19409
+ "status": "pass",
19410
+ "local": {
19411
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard1.progress.jsonl",
19412
+ "exists": true,
19413
+ "bytes": 1082556,
19414
+ "sha256": "43c2c76dffb1661c2dfb9947b75ac29fc68c311b793648801f75a2b3a1592922"
19415
+ },
19416
+ "mirrors": {
19417
+ "hf_artifacts": {
19418
+ "path": "hf_artifacts:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard1.progress.jsonl",
19419
+ "exists": true,
19420
+ "bytes": 1082556,
19421
+ "sha256": "43c2c76dffb1661c2dfb9947b75ac29fc68c311b793648801f75a2b3a1592922"
19422
+ },
19423
+ "hf_model": {
19424
+ "path": "hf_model:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard1.progress.jsonl",
19425
+ "exists": true,
19426
+ "bytes": 1082556,
19427
+ "sha256": "43c2c76dffb1661c2dfb9947b75ac29fc68c311b793648801f75a2b3a1592922"
19428
+ }
19429
+ },
19430
+ "failures": []
19431
+ },
19432
+ {
19433
+ "name": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard2.progress.jsonl",
19434
+ "status": "pass",
19435
+ "local": {
19436
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard2.progress.jsonl",
19437
+ "exists": true,
19438
+ "bytes": 1077207,
19439
+ "sha256": "6e163ccfd831801dd5dcb6d6336c08ce6be0099fa427d0291af97a846430b2a2"
19440
+ },
19441
+ "mirrors": {
19442
+ "hf_artifacts": {
19443
+ "path": "hf_artifacts:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard2.progress.jsonl",
19444
+ "exists": true,
19445
+ "bytes": 1077207,
19446
+ "sha256": "6e163ccfd831801dd5dcb6d6336c08ce6be0099fa427d0291af97a846430b2a2"
19447
+ },
19448
+ "hf_model": {
19449
+ "path": "hf_model:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard2.progress.jsonl",
19450
+ "exists": true,
19451
+ "bytes": 1077207,
19452
+ "sha256": "6e163ccfd831801dd5dcb6d6336c08ce6be0099fa427d0291af97a846430b2a2"
19453
+ }
19454
+ },
19455
+ "failures": []
19456
+ },
19457
+ {
19458
+ "name": "results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard3.progress.jsonl",
19459
+ "status": "pass",
19460
+ "local": {
19461
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard3.progress.jsonl",
19462
+ "exists": true,
19463
+ "bytes": 1072835,
19464
+ "sha256": "af208b8afe02886022915f634927b073646fa15465f157d4e1f7cad3c3408f0b"
19465
+ },
19466
+ "mirrors": {
19467
+ "hf_artifacts": {
19468
+ "path": "hf_artifacts:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard3.progress.jsonl",
19469
+ "exists": true,
19470
+ "bytes": 1072835,
19471
+ "sha256": "af208b8afe02886022915f634927b073646fa15465f157d4e1f7cad3c3408f0b"
19472
+ },
19473
+ "hf_model": {
19474
+ "path": "hf_model:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard3.progress.jsonl",
19475
+ "exists": true,
19476
+ "bytes": 1072835,
19477
+ "sha256": "af208b8afe02886022915f634927b073646fa15465f157d4e1f7cad3c3408f0b"
19478
+ }
19479
+ },
19480
+ "failures": []
19481
+ },
19482
  {
19483
  "name": "results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/RUN_REPORT.md",
19484
  "status": "pass",
 
23592
  },
23593
  "failures": []
23594
  },
23595
+ {
23596
+ "name": "space_results/omni_finetune/deferred_launchers/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620.launcher.log",
23597
+ "status": "pass",
23598
+ "local": {
23599
+ "path": "repo:results/omni_finetune/deferred_launchers/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620.launcher.log",
23600
+ "exists": true,
23601
+ "bytes": 10565,
23602
+ "sha256": "8cb5a4b6f45680c9df0b6b313f3d86abe64940f5a99793cffaaed205f80175c5"
23603
+ },
23604
+ "mirrors": {
23605
+ "hf_space": {
23606
+ "path": "hf_space:results/omni_finetune/deferred_launchers/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620.launcher.log",
23607
+ "exists": true,
23608
+ "bytes": 10565,
23609
+ "sha256": "8cb5a4b6f45680c9df0b6b313f3d86abe64940f5a99793cffaaed205f80175c5"
23610
+ }
23611
+ },
23612
+ "failures": []
23613
+ },
23614
  {
23615
  "name": "space_results/omni_finetune/deferred_launchers/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620.runner.log",
23616
  "status": "pass",
 
24789
  },
24790
  "failures": []
24791
  },
24792
+ {
24793
+ "name": "space_results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/RUN_REPORT.md",
24794
+ "status": "pass",
24795
+ "local": {
24796
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/RUN_REPORT.md",
24797
+ "exists": true,
24798
+ "bytes": 496,
24799
+ "sha256": "2ddec993b14e63c1b4d3164c55a3d496b96489262ddc950a4a3903158aa33c47"
24800
+ },
24801
+ "mirrors": {
24802
+ "hf_space": {
24803
+ "path": "hf_space:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/RUN_REPORT.md",
24804
+ "exists": true,
24805
+ "bytes": 496,
24806
+ "sha256": "2ddec993b14e63c1b4d3164c55a3d496b96489262ddc950a4a3903158aa33c47"
24807
+ }
24808
+ },
24809
+ "failures": []
24810
+ },
24811
+ {
24812
+ "name": "space_results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/collection_validation.json",
24813
+ "status": "pass",
24814
+ "local": {
24815
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/collection_validation.json",
24816
+ "exists": true,
24817
+ "bytes": 1606,
24818
+ "sha256": "d0f5bc9ec3064b86775bfd33f0c4c6c40302cbee6f7809468db4315888e9f57c"
24819
+ },
24820
+ "mirrors": {
24821
+ "hf_space": {
24822
+ "path": "hf_space:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/collection_validation.json",
24823
+ "exists": true,
24824
+ "bytes": 1606,
24825
+ "sha256": "d0f5bc9ec3064b86775bfd33f0c4c6c40302cbee6f7809468db4315888e9f57c"
24826
+ }
24827
+ },
24828
+ "failures": []
24829
+ },
24830
+ {
24831
+ "name": "space_results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json",
24832
+ "status": "pass",
24833
+ "local": {
24834
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json",
24835
+ "exists": true,
24836
+ "bytes": 1619,
24837
+ "sha256": "3cb0ed942c5af21223c582dd0dc7467a326306f10d2f03592d120fa8dcb173c5"
24838
+ },
24839
+ "mirrors": {
24840
+ "hf_space": {
24841
+ "path": "hf_space:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json",
24842
+ "exists": true,
24843
+ "bytes": 1619,
24844
+ "sha256": "3cb0ed942c5af21223c582dd0dc7467a326306f10d2f03592d120fa8dcb173c5"
24845
+ }
24846
+ },
24847
+ "failures": []
24848
+ },
24849
+ {
24850
+ "name": "space_results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json",
24851
+ "status": "pass",
24852
+ "local": {
24853
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json",
24854
+ "exists": true,
24855
+ "bytes": 5774,
24856
+ "sha256": "e1ed24bc3a6507b427bc390f1dbea83c09195ac7bb80b99ed87ac7cc2e90a24d"
24857
+ },
24858
+ "mirrors": {
24859
+ "hf_space": {
24860
+ "path": "hf_space:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json",
24861
+ "exists": true,
24862
+ "bytes": 5774,
24863
+ "sha256": "e1ed24bc3a6507b427bc390f1dbea83c09195ac7bb80b99ed87ac7cc2e90a24d"
24864
+ }
24865
+ },
24866
+ "failures": []
24867
+ },
24868
+ {
24869
+ "name": "space_results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json",
24870
+ "status": "pass",
24871
+ "local": {
24872
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json",
24873
+ "exists": true,
24874
+ "bytes": 1565,
24875
+ "sha256": "ebbf5bfef95d8093fcbc094d3a03f9d765bbee5f8d80da027e948c55c35d0026"
24876
+ },
24877
+ "mirrors": {
24878
+ "hf_space": {
24879
+ "path": "hf_space:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json",
24880
+ "exists": true,
24881
+ "bytes": 1565,
24882
+ "sha256": "ebbf5bfef95d8093fcbc094d3a03f9d765bbee5f8d80da027e948c55c35d0026"
24883
+ }
24884
+ },
24885
+ "failures": []
24886
+ },
24887
+ {
24888
+ "name": "space_results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/server_info.json",
24889
+ "status": "pass",
24890
+ "local": {
24891
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/server_info.json",
24892
+ "exists": true,
24893
+ "bytes": 858,
24894
+ "sha256": "ebcc6d877fd412ba5d2383ac343672e65cbd5023deff9ebb51a38b64f0b0ea77"
24895
+ },
24896
+ "mirrors": {
24897
+ "hf_space": {
24898
+ "path": "hf_space:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/server_info.json",
24899
+ "exists": true,
24900
+ "bytes": 858,
24901
+ "sha256": "ebcc6d877fd412ba5d2383ac343672e65cbd5023deff9ebb51a38b64f0b0ea77"
24902
+ }
24903
+ },
24904
+ "failures": []
24905
+ },
24906
+ {
24907
+ "name": "space_results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/summary.json",
24908
+ "status": "pass",
24909
+ "local": {
24910
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/summary.json",
24911
+ "exists": true,
24912
+ "bytes": 2130,
24913
+ "sha256": "f46a66c4fd06e944129b052be009d23e243567fdc54c9e101852c11dfabb0637"
24914
+ },
24915
+ "mirrors": {
24916
+ "hf_space": {
24917
+ "path": "hf_space:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/summary.json",
24918
+ "exists": true,
24919
+ "bytes": 2130,
24920
+ "sha256": "f46a66c4fd06e944129b052be009d23e243567fdc54c9e101852c11dfabb0637"
24921
+ }
24922
+ },
24923
+ "failures": []
24924
+ },
24925
+ {
24926
+ "name": "space_results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json",
24927
+ "status": "pass",
24928
+ "local": {
24929
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json",
24930
+ "exists": true,
24931
+ "bytes": 1562,
24932
+ "sha256": "a2cc896d36516569829b955d9cf2f5d8586804b1805b778ef4face1be9074963"
24933
+ },
24934
+ "mirrors": {
24935
+ "hf_space": {
24936
+ "path": "hf_space:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json",
24937
+ "exists": true,
24938
+ "bytes": 1562,
24939
+ "sha256": "a2cc896d36516569829b955d9cf2f5d8586804b1805b778ef4face1be9074963"
24940
+ }
24941
+ },
24942
+ "failures": []
24943
+ },
24944
+ {
24945
+ "name": "space_results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard0.progress.jsonl",
24946
+ "status": "pass",
24947
+ "local": {
24948
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard0.progress.jsonl",
24949
+ "exists": true,
24950
+ "bytes": 1100734,
24951
+ "sha256": "1fd8f293af1432d49653dd93f00924d07ba6117129e1fc8c6cdb3b7c8a2bfb05"
24952
+ },
24953
+ "mirrors": {
24954
+ "hf_space": {
24955
+ "path": "hf_space:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard0.progress.jsonl",
24956
+ "exists": true,
24957
+ "bytes": 1100734,
24958
+ "sha256": "1fd8f293af1432d49653dd93f00924d07ba6117129e1fc8c6cdb3b7c8a2bfb05"
24959
+ }
24960
+ },
24961
+ "failures": []
24962
+ },
24963
+ {
24964
+ "name": "space_results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard1.progress.jsonl",
24965
+ "status": "pass",
24966
+ "local": {
24967
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard1.progress.jsonl",
24968
+ "exists": true,
24969
+ "bytes": 1082556,
24970
+ "sha256": "43c2c76dffb1661c2dfb9947b75ac29fc68c311b793648801f75a2b3a1592922"
24971
+ },
24972
+ "mirrors": {
24973
+ "hf_space": {
24974
+ "path": "hf_space:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard1.progress.jsonl",
24975
+ "exists": true,
24976
+ "bytes": 1082556,
24977
+ "sha256": "43c2c76dffb1661c2dfb9947b75ac29fc68c311b793648801f75a2b3a1592922"
24978
+ }
24979
+ },
24980
+ "failures": []
24981
+ },
24982
+ {
24983
+ "name": "space_results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard2.progress.jsonl",
24984
+ "status": "pass",
24985
+ "local": {
24986
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard2.progress.jsonl",
24987
+ "exists": true,
24988
+ "bytes": 1077207,
24989
+ "sha256": "6e163ccfd831801dd5dcb6d6336c08ce6be0099fa427d0291af97a846430b2a2"
24990
+ },
24991
+ "mirrors": {
24992
+ "hf_space": {
24993
+ "path": "hf_space:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard2.progress.jsonl",
24994
+ "exists": true,
24995
+ "bytes": 1077207,
24996
+ "sha256": "6e163ccfd831801dd5dcb6d6336c08ce6be0099fa427d0291af97a846430b2a2"
24997
+ }
24998
+ },
24999
+ "failures": []
25000
+ },
25001
+ {
25002
+ "name": "space_results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard3.progress.jsonl",
25003
+ "status": "pass",
25004
+ "local": {
25005
+ "path": "repo:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard3.progress.jsonl",
25006
+ "exists": true,
25007
+ "bytes": 1072835,
25008
+ "sha256": "af208b8afe02886022915f634927b073646fa15465f157d4e1f7cad3c3408f0b"
25009
+ },
25010
+ "mirrors": {
25011
+ "hf_space": {
25012
+ "path": "hf_space:results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard3.progress.jsonl",
25013
+ "exists": true,
25014
+ "bytes": 1072835,
25015
+ "sha256": "af208b8afe02886022915f634927b073646fa15465f157d4e1f7cad3c3408f0b"
25016
+ }
25017
+ },
25018
+ "failures": []
25019
+ },
25020
  {
25021
  "name": "space_results/omni_finetune/xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620/RUN_REPORT.md",
25022
  "status": "pass",
 
28111
  "local": {
28112
  "path": "repo:TASK_METHOD_20_GAP_AUDIT.md",
28113
  "exists": true,
28114
+ "bytes": 5661,
28115
+ "sha256": "58369ff12eae39ac2b1518623b63b6d070e633c7a88bb0a06e54c556f3027f06"
28116
  },
28117
  "mirrors": {
28118
  "hf_space": {
28119
  "path": "hf_space:TASK_METHOD_20_GAP_AUDIT.md",
28120
  "exists": true,
28121
+ "bytes": 5661,
28122
+ "sha256": "58369ff12eae39ac2b1518623b63b6d070e633c7a88bb0a06e54c556f3027f06"
28123
  },
28124
  "hf_artifacts": {
28125
  "path": "hf_artifacts:TASK_METHOD_20_GAP_AUDIT.md",
28126
  "exists": true,
28127
+ "bytes": 5661,
28128
+ "sha256": "58369ff12eae39ac2b1518623b63b6d070e633c7a88bb0a06e54c556f3027f06"
28129
  },
28130
  "hf_model": {
28131
  "path": "hf_model:TASK_METHOD_20_GAP_AUDIT.md",
28132
  "exists": true,
28133
+ "bytes": 5661,
28134
+ "sha256": "58369ff12eae39ac2b1518623b63b6d070e633c7a88bb0a06e54c556f3027f06"
28135
  }
28136
  },
28137
  "failures": []
 
28142
  "local": {
28143
  "path": "repo:TASK_METHOD_20_RESULT_MATRIX.md",
28144
  "exists": true,
28145
+ "bytes": 3739,
28146
+ "sha256": "325df952cc73a1a530781c3f55cf87105a178ac3cb3553515c817d00916deb27"
28147
  },
28148
  "mirrors": {
28149
  "hf_space": {
28150
  "path": "hf_space:TASK_METHOD_20_RESULT_MATRIX.md",
28151
  "exists": true,
28152
+ "bytes": 3739,
28153
+ "sha256": "325df952cc73a1a530781c3f55cf87105a178ac3cb3553515c817d00916deb27"
28154
  },
28155
  "hf_artifacts": {
28156
  "path": "hf_artifacts:TASK_METHOD_20_RESULT_MATRIX.md",
28157
  "exists": true,
28158
+ "bytes": 3739,
28159
+ "sha256": "325df952cc73a1a530781c3f55cf87105a178ac3cb3553515c817d00916deb27"
28160
  },
28161
  "hf_model": {
28162
  "path": "hf_model:TASK_METHOD_20_RESULT_MATRIX.md",
28163
  "exists": true,
28164
+ "bytes": 3739,
28165
+ "sha256": "325df952cc73a1a530781c3f55cf87105a178ac3cb3553515c817d00916deb27"
28166
  }
28167
  },
28168
  "failures": []
data/public_surface_qa.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Public Project Surface",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-20T14:51:35+00:00",
5
  "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
6
  "checks": [
7
  {
@@ -18,7 +18,7 @@
18
  "website_integrity": {
19
  "exists": true,
20
  "status": "pass",
21
- "generated_at_utc": "2026-06-20T14:49:37+00:00"
22
  },
23
  "rendered_site_check": {
24
  "exists": true,
@@ -28,27 +28,27 @@
28
  "task_surface_integrity": {
29
  "exists": true,
30
  "status": "pass",
31
- "generated_at_utc": "2026-06-20T14:04:25+00:00"
32
  },
33
  "source_alignment": {
34
  "exists": true,
35
  "status": "pass",
36
- "generated_at_utc": "2026-06-20T14:03:48+00:00"
37
  },
38
  "scale_up_status": {
39
  "exists": true,
40
  "status": "pass",
41
- "generated_at_utc": "2026-06-20T14:04:01+00:00"
42
  },
43
  "publication_package": {
44
  "exists": true,
45
  "status": "pass",
46
- "generated_at_utc": "2026-06-20T14:50:09+00:00"
47
  },
48
  "mirror_parity": {
49
  "exists": true,
50
  "status": "pass",
51
- "generated_at_utc": "2026-06-20T14:51:20+00:00"
52
  }
53
  },
54
  "failures": {}
 
1
  {
2
  "title": "Ropedia Xperience-10M Public Project Surface",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-20T15:22:58+00:00",
5
  "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
6
  "checks": [
7
  {
 
18
  "website_integrity": {
19
  "exists": true,
20
  "status": "pass",
21
+ "generated_at_utc": "2026-06-20T15:22:30+00:00"
22
  },
23
  "rendered_site_check": {
24
  "exists": true,
 
28
  "task_surface_integrity": {
29
  "exists": true,
30
  "status": "pass",
31
+ "generated_at_utc": "2026-06-20T14:51:35+00:00"
32
  },
33
  "source_alignment": {
34
  "exists": true,
35
  "status": "pass",
36
+ "generated_at_utc": "2026-06-20T15:22:29+00:00"
37
  },
38
  "scale_up_status": {
39
  "exists": true,
40
  "status": "pass",
41
+ "generated_at_utc": "2026-06-20T15:22:32+00:00"
42
  },
43
  "publication_package": {
44
  "exists": true,
45
  "status": "pass",
46
+ "generated_at_utc": "2026-06-20T15:22:46+00:00"
47
  },
48
  "mirror_parity": {
49
  "exists": true,
50
  "status": "pass",
51
+ "generated_at_utc": "2026-06-20T14:52:37+00:00"
52
  }
53
  },
54
  "failures": {}
data/quality_gates.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Release Checks",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-20T14:03:29+00:00",
5
  "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
6
  "automated_gates": [
7
  {
 
1
  {
2
  "title": "Ropedia Xperience-10M Release Checks",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-20T15:20:32+00:00",
5
  "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
6
  "automated_gates": [
7
  {
data/source_alignment_audit.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Source Alignment Note",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-20T14:51:35+00:00",
5
  "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
6
  "alignment_summary": {
7
  "full_dataset_repo": "ropedia-ai/xperience-10m",
 
1
  {
2
  "title": "Ropedia Xperience-10M Source Alignment Note",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-20T15:22:29+00:00",
5
  "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
6
  "alignment_summary": {
7
  "full_dataset_repo": "ropedia-ai/xperience-10m",
data/task_method_20_gap_audit.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "generated_at_utc": "2026-06-20T13:58:05+00:00",
3
  "immediate_actions": [
4
  {
5
  "artifact": "docs/data/task_method_20_gap_audit.json",
6
  "id": "gap_audit",
7
- "purpose": "Keep the 16 scoreless cells visible and reproducible."
8
  },
9
  {
10
  "artifact": "scripts/omni/score_model_output_probes.py",
@@ -37,11 +37,11 @@
37
  "proxy_scored_task_count": 0,
38
  "result_record_count": 20,
39
  "scope": "128 selected episodes, held-out test",
40
- "scored_task_count": 15,
41
- "scoreless_task_count": 5,
42
  "status_counts": {
43
- "not_evaluated_in_verified_package": 5,
44
- "scored": 15
45
  }
46
  },
47
  "metadata128_neural_mlp": {
@@ -135,12 +135,12 @@
135
  },
136
  "missing_by_method": {
137
  "cosmos3_nano_future_window": 9,
138
- "cosmos3_super_reasoner": 5,
139
  "metadata128_neural_mlp": 1,
140
  "metadata128_simple": 1
141
  },
142
  "missing_by_status": {
143
- "not_evaluated_in_verified_package": 14,
144
  "not_supported_by_metadata_only_package": 1,
145
  "unsupported_without_required_target": 1
146
  },
@@ -158,23 +158,15 @@
158
  "cosmos3_nano_future_window"
159
  ],
160
  "11 Temporal Order Verification": [
161
- "cosmos3_nano_future_window",
162
- "cosmos3_super_reasoner"
163
  ],
164
  "12 Multimodal Synchronization Detection": [
165
- "cosmos3_nano_future_window",
166
- "cosmos3_super_reasoner"
167
- ],
168
- "14 Long-Horizon Next-Subtask Forecasting": [
169
- "cosmos3_super_reasoner"
170
  ],
171
  "15 Interaction Text Prediction": [
172
  "cosmos3_nano_future_window",
173
  "cosmos3_super_reasoner"
174
  ],
175
- "17 Future Object-Set Forecasting": [
176
- "cosmos3_super_reasoner"
177
- ],
178
  "18 IMU-to-Hand Pose Reconstruction": [
179
  "cosmos3_nano_future_window"
180
  ],
@@ -237,19 +229,6 @@
237
  "task_label": "Language Grounding",
238
  "task_number": 8
239
  },
240
- {
241
- "method": "Cosmos3-Super Reasoner",
242
- "metric_key": "f1",
243
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
244
- "recommended_next_step": "Generate verified model outputs for this task contract and score them against the held-out labels.",
245
- "scope": "multi_episode_128_partial_model_overlay",
246
- "series_id": "cosmos3_super_reasoner",
247
- "status": "not_evaluated_in_verified_package",
248
- "status_label": "not evaluated",
249
- "task_id": "temporal_order",
250
- "task_label": "Temporal Order Verification",
251
- "task_number": 11
252
- },
253
  {
254
  "method": "Cosmos3-Nano Future Window",
255
  "metric_key": "f1",
@@ -263,19 +242,6 @@
263
  "task_label": "Temporal Order Verification",
264
  "task_number": 11
265
  },
266
- {
267
- "method": "Cosmos3-Super Reasoner",
268
- "metric_key": "f1",
269
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
270
- "recommended_next_step": "Generate verified model outputs for this task contract and score them against the held-out labels.",
271
- "scope": "multi_episode_128_partial_model_overlay",
272
- "series_id": "cosmos3_super_reasoner",
273
- "status": "not_evaluated_in_verified_package",
274
- "status_label": "not evaluated",
275
- "task_id": "misalignment_detection",
276
- "task_label": "Multimodal Synchronization Detection",
277
- "task_number": 12
278
- },
279
  {
280
  "method": "Cosmos3-Nano Future Window",
281
  "metric_key": "f1",
@@ -289,19 +255,6 @@
289
  "task_label": "Multimodal Synchronization Detection",
290
  "task_number": 12
291
  },
292
- {
293
- "method": "Cosmos3-Super Reasoner",
294
- "metric_key": "macro_f1",
295
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
296
- "recommended_next_step": "Generate verified model outputs for this task contract and score them against the held-out labels.",
297
- "scope": "multi_episode_128_partial_model_overlay",
298
- "series_id": "cosmos3_super_reasoner",
299
- "status": "not_evaluated_in_verified_package",
300
- "status_label": "not evaluated",
301
- "task_id": "next_subtask_forecast",
302
- "task_label": "Long-Horizon Next-Subtask Forecasting",
303
- "task_number": 14
304
- },
305
  {
306
  "method": "Cosmos3-Super Reasoner",
307
  "metric_key": "macro_f1",
@@ -328,19 +281,6 @@
328
  "task_label": "Interaction Text Prediction",
329
  "task_number": 15
330
  },
331
- {
332
- "method": "Cosmos3-Super Reasoner",
333
- "metric_key": "micro_f1",
334
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
335
- "recommended_next_step": "Generate verified model outputs for this task contract and score them against the held-out labels.",
336
- "scope": "multi_episode_128_partial_model_overlay",
337
- "series_id": "cosmos3_super_reasoner",
338
- "status": "not_evaluated_in_verified_package",
339
- "status_label": "not evaluated",
340
- "task_id": "object_set_forecast",
341
- "task_label": "Future Object-Set Forecasting",
342
- "task_number": 17
343
- },
344
  {
345
  "method": "Cosmos3-Nano Future Window",
346
  "metric_key": "mae",
@@ -440,8 +380,8 @@
440
  "method_count": 9,
441
  "method_task_record_count": 180,
442
  "proxy_scored_method_task_count": 4,
443
- "scored_method_task_count": 164,
444
- "scoreless_method_task_count": 16,
445
  "task_count": 20
446
  },
447
  "source_matrix": "docs/data/task_method_20_result_matrix.json",
 
1
  {
2
+ "generated_at_utc": "2026-06-20T15:21:33+00:00",
3
  "immediate_actions": [
4
  {
5
  "artifact": "docs/data/task_method_20_gap_audit.json",
6
  "id": "gap_audit",
7
+ "purpose": "Keep the 12 scoreless cells visible and reproducible."
8
  },
9
  {
10
  "artifact": "scripts/omni/score_model_output_probes.py",
 
37
  "proxy_scored_task_count": 0,
38
  "result_record_count": 20,
39
  "scope": "128 selected episodes, held-out test",
40
+ "scored_task_count": 19,
41
+ "scoreless_task_count": 1,
42
  "status_counts": {
43
+ "not_evaluated_in_verified_package": 1,
44
+ "scored": 19
45
  }
46
  },
47
  "metadata128_neural_mlp": {
 
135
  },
136
  "missing_by_method": {
137
  "cosmos3_nano_future_window": 9,
138
+ "cosmos3_super_reasoner": 1,
139
  "metadata128_neural_mlp": 1,
140
  "metadata128_simple": 1
141
  },
142
  "missing_by_status": {
143
+ "not_evaluated_in_verified_package": 10,
144
  "not_supported_by_metadata_only_package": 1,
145
  "unsupported_without_required_target": 1
146
  },
 
158
  "cosmos3_nano_future_window"
159
  ],
160
  "11 Temporal Order Verification": [
161
+ "cosmos3_nano_future_window"
 
162
  ],
163
  "12 Multimodal Synchronization Detection": [
164
+ "cosmos3_nano_future_window"
 
 
 
 
165
  ],
166
  "15 Interaction Text Prediction": [
167
  "cosmos3_nano_future_window",
168
  "cosmos3_super_reasoner"
169
  ],
 
 
 
170
  "18 IMU-to-Hand Pose Reconstruction": [
171
  "cosmos3_nano_future_window"
172
  ],
 
229
  "task_label": "Language Grounding",
230
  "task_number": 8
231
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  {
233
  "method": "Cosmos3-Nano Future Window",
234
  "metric_key": "f1",
 
242
  "task_label": "Temporal Order Verification",
243
  "task_number": 11
244
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  {
246
  "method": "Cosmos3-Nano Future Window",
247
  "metric_key": "f1",
 
255
  "task_label": "Multimodal Synchronization Detection",
256
  "task_number": 12
257
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  {
259
  "method": "Cosmos3-Super Reasoner",
260
  "metric_key": "macro_f1",
 
281
  "task_label": "Interaction Text Prediction",
282
  "task_number": 15
283
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  {
285
  "method": "Cosmos3-Nano Future Window",
286
  "metric_key": "mae",
 
380
  "method_count": 9,
381
  "method_task_record_count": 180,
382
  "proxy_scored_method_task_count": 4,
383
+ "scored_method_task_count": 168,
384
+ "scoreless_method_task_count": 12,
385
  "task_count": 20
386
  },
387
  "source_matrix": "docs/data/task_method_20_result_matrix.json",
data/task_method_20_result_matrix.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "title": "Task Method 20-Result Matrix",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-20T13:58:04+00:00",
5
  "task_count": 20,
6
  "method_count": 9,
7
  "method_task_record_count": 180,
8
- "scored_method_task_count": 164,
9
  "series": [
10
  {
11
  "id": "minimal",
@@ -183,17 +183,17 @@
183
  "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 5/8/9/10/11/12/13/14/16/17/18/19/20 probes where public metrics exist.",
184
  "plotted_as": "colored point overlay",
185
  "result_record_count": 20,
186
- "scored_task_count": 15,
187
- "covered_task_count": 15,
188
  "proxy_scored_task_count": 0,
189
- "scoreless_task_count": 5,
190
  "unsupported_task_count": 0,
191
- "not_evaluated_task_count": 5,
192
  "status_counts": {
193
- "not_evaluated_in_verified_package": 5,
194
- "scored": 15
195
  },
196
- "coverage_fraction": 0.75,
197
  "result_record_fraction": 1.0
198
  },
199
  {
@@ -1974,17 +1974,17 @@
1974
  "task_label": "Temporal Order Verification",
1975
  "series_id": "cosmos3_super_reasoner",
1976
  "method": "Cosmos3-Super Reasoner",
1977
- "status": "not_evaluated_in_verified_package",
1978
- "status_label": "not evaluated",
1979
- "scored": false,
1980
  "proxy_scored": false,
1981
- "raw": null,
1982
- "raw_text": "n/a",
1983
- "normalized_score": null,
1984
- "metric_key": "f1",
1985
- "source": null,
1986
  "scope": "multi_episode_128_partial_model_overlay",
1987
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
1988
  },
1989
  {
1990
  "task_number": 11,
@@ -2136,17 +2136,17 @@
2136
  "task_label": "Multimodal Synchronization Detection",
2137
  "series_id": "cosmos3_super_reasoner",
2138
  "method": "Cosmos3-Super Reasoner",
2139
- "status": "not_evaluated_in_verified_package",
2140
- "status_label": "not evaluated",
2141
- "scored": false,
2142
  "proxy_scored": false,
2143
- "raw": null,
2144
- "raw_text": "n/a",
2145
- "normalized_score": null,
2146
- "metric_key": "f1",
2147
- "source": null,
2148
  "scope": "multi_episode_128_partial_model_overlay",
2149
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
2150
  },
2151
  {
2152
  "task_number": 12,
@@ -2460,17 +2460,17 @@
2460
  "task_label": "Long-Horizon Next-Subtask Forecasting",
2461
  "series_id": "cosmos3_super_reasoner",
2462
  "method": "Cosmos3-Super Reasoner",
2463
- "status": "not_evaluated_in_verified_package",
2464
- "status_label": "not evaluated",
2465
- "scored": false,
2466
  "proxy_scored": false,
2467
- "raw": null,
2468
- "raw_text": "n/a",
2469
- "normalized_score": null,
2470
- "metric_key": "macro_f1",
2471
- "source": null,
2472
  "scope": "multi_episode_128_partial_model_overlay",
2473
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
2474
  },
2475
  {
2476
  "task_number": 14,
@@ -2946,17 +2946,17 @@
2946
  "task_label": "Future Object-Set Forecasting",
2947
  "series_id": "cosmos3_super_reasoner",
2948
  "method": "Cosmos3-Super Reasoner",
2949
- "status": "not_evaluated_in_verified_package",
2950
- "status_label": "not evaluated",
2951
- "scored": false,
2952
  "proxy_scored": false,
2953
- "raw": null,
2954
- "raw_text": "n/a",
2955
- "normalized_score": null,
2956
- "metric_key": "micro_f1",
2957
- "source": null,
2958
  "scope": "multi_episode_128_partial_model_overlay",
2959
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
2960
  },
2961
  {
2962
  "task_number": 17,
 
1
  {
2
  "title": "Task Method 20-Result Matrix",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-20T15:20:32+00:00",
5
  "task_count": 20,
6
  "method_count": 9,
7
  "method_task_record_count": 180,
8
+ "scored_method_task_count": 168,
9
  "series": [
10
  {
11
  "id": "minimal",
 
183
  "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 5/8/9/10/11/12/13/14/16/17/18/19/20 probes where public metrics exist.",
184
  "plotted_as": "colored point overlay",
185
  "result_record_count": 20,
186
+ "scored_task_count": 19,
187
+ "covered_task_count": 19,
188
  "proxy_scored_task_count": 0,
189
+ "scoreless_task_count": 1,
190
  "unsupported_task_count": 0,
191
+ "not_evaluated_task_count": 1,
192
  "status_counts": {
193
+ "not_evaluated_in_verified_package": 1,
194
+ "scored": 19
195
  },
196
+ "coverage_fraction": 0.95,
197
  "result_record_fraction": 1.0
198
  },
199
  {
 
1974
  "task_label": "Temporal Order Verification",
1975
  "series_id": "cosmos3_super_reasoner",
1976
  "method": "Cosmos3-Super Reasoner",
1977
+ "status": "scored",
1978
+ "status_label": "scored",
1979
+ "scored": true,
1980
  "proxy_scored": false,
1981
+ "raw": 0.6286317274823326,
1982
+ "raw_text": "0.6286",
1983
+ "normalized_score": 0.6286317274823326,
1984
+ "metric_key": "temporal_order_f1",
1985
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
1986
  "scope": "multi_episode_128_partial_model_overlay",
1987
+ "reason": null
1988
  },
1989
  {
1990
  "task_number": 11,
 
2136
  "task_label": "Multimodal Synchronization Detection",
2137
  "series_id": "cosmos3_super_reasoner",
2138
  "method": "Cosmos3-Super Reasoner",
2139
+ "status": "scored",
2140
+ "status_label": "scored",
2141
+ "scored": true,
2142
  "proxy_scored": false,
2143
+ "raw": 0.37271645981034185,
2144
+ "raw_text": "0.3727",
2145
+ "normalized_score": 0.37271645981034185,
2146
+ "metric_key": "misalignment_detection_f1",
2147
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
2148
  "scope": "multi_episode_128_partial_model_overlay",
2149
+ "reason": null
2150
  },
2151
  {
2152
  "task_number": 12,
 
2460
  "task_label": "Long-Horizon Next-Subtask Forecasting",
2461
  "series_id": "cosmos3_super_reasoner",
2462
  "method": "Cosmos3-Super Reasoner",
2463
+ "status": "scored",
2464
+ "status_label": "scored",
2465
+ "scored": true,
2466
  "proxy_scored": false,
2467
+ "raw": 0.0,
2468
+ "raw_text": "0.0000",
2469
+ "normalized_score": 0.0,
2470
+ "metric_key": "next_subtask_forecast_macro_f1",
2471
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
2472
  "scope": "multi_episode_128_partial_model_overlay",
2473
+ "reason": null
2474
  },
2475
  {
2476
  "task_number": 14,
 
2946
  "task_label": "Future Object-Set Forecasting",
2947
  "series_id": "cosmos3_super_reasoner",
2948
  "method": "Cosmos3-Super Reasoner",
2949
+ "status": "scored",
2950
+ "status_label": "scored",
2951
+ "scored": true,
2952
  "proxy_scored": false,
2953
+ "raw": 0.0009279881217520415,
2954
+ "raw_text": "0.0009",
2955
+ "normalized_score": 0.0009279881217520415,
2956
+ "metric_key": "object_set_forecast_micro_f1",
2957
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
2958
  "scope": "multi_episode_128_partial_model_overlay",
2959
+ "reason": null
2960
  },
2961
  {
2962
  "task_number": 17,
data/task_surface_integrity.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-20T14:51:35+00:00",
4
  "summary": {
5
  "task_count": 12,
6
  "expected_task_count": 12,
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-20T15:22:58+00:00",
4
  "summary": {
5
  "task_count": 12,
6
  "expected_task_count": 12,
data/unified_task_model_radar.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "title": "Unified 20-Task Model Radar",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-20T13:58:04+00:00",
5
  "task_count": 20,
6
  "method_count": 9,
7
  "method_task_record_count": 180,
8
- "scored_method_task_count": 164,
9
  "normalization_policy": {
10
  "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
11
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
@@ -192,17 +192,17 @@
192
  "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 5/8/9/10/11/12/13/14/16/17/18/19/20 probes where public metrics exist.",
193
  "plotted_as": "colored point overlay",
194
  "result_record_count": 20,
195
- "scored_task_count": 15,
196
- "covered_task_count": 15,
197
  "proxy_scored_task_count": 0,
198
- "scoreless_task_count": 5,
199
  "unsupported_task_count": 0,
200
- "not_evaluated_task_count": 5,
201
  "status_counts": {
202
- "not_evaluated_in_verified_package": 5,
203
- "scored": 15
204
  },
205
- "coverage_fraction": 0.75,
206
  "result_record_fraction": 1.0
207
  },
208
  {
@@ -1384,6 +1384,17 @@
1384
  "raw_text": "0.4098",
1385
  "status_label": "scored"
1386
  },
 
 
 
 
 
 
 
 
 
 
 
1387
  "metadata128_simple": {
1388
  "raw": 0.4198864140782312,
1389
  "metric_key": "f1",
@@ -1428,17 +1439,6 @@
1428
  "raw_text": "0.8030",
1429
  "status_label": "scored"
1430
  },
1431
- "cosmos3_super_reasoner": {
1432
- "raw": null,
1433
- "metric_key": "f1",
1434
- "source": null,
1435
- "scope": "multi_episode_128_partial_model_overlay",
1436
- "status": "not_evaluated_in_verified_package",
1437
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
1438
- "normalized_score": null,
1439
- "raw_text": "n/a",
1440
- "status_label": "not evaluated"
1441
- },
1442
  "cosmos3_nano_future_window": {
1443
  "raw": null,
1444
  "metric_key": "f1",
@@ -1495,6 +1495,17 @@
1495
  "raw_text": "0.3345",
1496
  "status_label": "scored"
1497
  },
 
 
 
 
 
 
 
 
 
 
 
1498
  "metadata128_simple": {
1499
  "raw": 0.49980060227663614,
1500
  "metric_key": "f1",
@@ -1539,17 +1550,6 @@
1539
  "raw_text": "0.8273",
1540
  "status_label": "scored"
1541
  },
1542
- "cosmos3_super_reasoner": {
1543
- "raw": null,
1544
- "metric_key": "f1",
1545
- "source": null,
1546
- "scope": "multi_episode_128_partial_model_overlay",
1547
- "status": "not_evaluated_in_verified_package",
1548
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
1549
- "normalized_score": null,
1550
- "raw_text": "n/a",
1551
- "status_label": "not evaluated"
1552
- },
1553
  "cosmos3_nano_future_window": {
1554
  "raw": null,
1555
  "metric_key": "f1",
@@ -1728,6 +1728,17 @@
1728
  "raw_text": "0.0042",
1729
  "status_label": "scored"
1730
  },
 
 
 
 
 
 
 
 
 
 
 
1731
  "metadata128_simple": {
1732
  "raw": 0.0001206030150753769,
1733
  "metric_key": "macro_f1",
@@ -1771,17 +1782,6 @@
1771
  "normalized_score": 0.0,
1772
  "raw_text": "0.0000",
1773
  "status_label": "scored"
1774
- },
1775
- "cosmos3_super_reasoner": {
1776
- "raw": null,
1777
- "metric_key": "macro_f1",
1778
- "source": null,
1779
- "scope": "multi_episode_128_partial_model_overlay",
1780
- "status": "not_evaluated_in_verified_package",
1781
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
1782
- "normalized_score": null,
1783
- "raw_text": "n/a",
1784
- "status_label": "not evaluated"
1785
  }
1786
  }
1787
  },
@@ -2061,6 +2061,17 @@
2061
  "raw_text": "0.1659",
2062
  "status_label": "scored"
2063
  },
 
 
 
 
 
 
 
 
 
 
 
2064
  "metadata128_simple": {
2065
  "raw": 0.17656983343047333,
2066
  "metric_key": "micro_f1",
@@ -2104,17 +2115,6 @@
2104
  "normalized_score": 0.17523098630012288,
2105
  "raw_text": "0.1752",
2106
  "status_label": "scored"
2107
- },
2108
- "cosmos3_super_reasoner": {
2109
- "raw": null,
2110
- "metric_key": "micro_f1",
2111
- "source": null,
2112
- "scope": "multi_episode_128_partial_model_overlay",
2113
- "status": "not_evaluated_in_verified_package",
2114
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
2115
- "normalized_score": null,
2116
- "raw_text": "n/a",
2117
- "status_label": "not evaluated"
2118
  }
2119
  }
2120
  },
@@ -2498,7 +2498,7 @@
2498
  "id": "cosmos3_super_reasoner",
2499
  "title": "Cosmos3-Super Reasoner",
2500
  "status": "verified_base_weight_eval",
2501
- "coverage": "20 records / 15 scored task-aligned axes",
2502
  "headline": "JSON validity 0.5112; action macro-F1 0.0008",
2503
  "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json"
2504
  },
@@ -4272,17 +4272,17 @@
4272
  "task_label": "Temporal Order Verification",
4273
  "series_id": "cosmos3_super_reasoner",
4274
  "method": "Cosmos3-Super Reasoner",
4275
- "status": "not_evaluated_in_verified_package",
4276
- "status_label": "not evaluated",
4277
- "scored": false,
4278
  "proxy_scored": false,
4279
- "raw": null,
4280
- "raw_text": "n/a",
4281
- "normalized_score": null,
4282
- "metric_key": "f1",
4283
- "source": null,
4284
  "scope": "multi_episode_128_partial_model_overlay",
4285
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
4286
  },
4287
  {
4288
  "task_number": 11,
@@ -4434,17 +4434,17 @@
4434
  "task_label": "Multimodal Synchronization Detection",
4435
  "series_id": "cosmos3_super_reasoner",
4436
  "method": "Cosmos3-Super Reasoner",
4437
- "status": "not_evaluated_in_verified_package",
4438
- "status_label": "not evaluated",
4439
- "scored": false,
4440
  "proxy_scored": false,
4441
- "raw": null,
4442
- "raw_text": "n/a",
4443
- "normalized_score": null,
4444
- "metric_key": "f1",
4445
- "source": null,
4446
  "scope": "multi_episode_128_partial_model_overlay",
4447
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
4448
  },
4449
  {
4450
  "task_number": 12,
@@ -4758,17 +4758,17 @@
4758
  "task_label": "Long-Horizon Next-Subtask Forecasting",
4759
  "series_id": "cosmos3_super_reasoner",
4760
  "method": "Cosmos3-Super Reasoner",
4761
- "status": "not_evaluated_in_verified_package",
4762
- "status_label": "not evaluated",
4763
- "scored": false,
4764
  "proxy_scored": false,
4765
- "raw": null,
4766
- "raw_text": "n/a",
4767
- "normalized_score": null,
4768
- "metric_key": "macro_f1",
4769
- "source": null,
4770
  "scope": "multi_episode_128_partial_model_overlay",
4771
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
4772
  },
4773
  {
4774
  "task_number": 14,
@@ -5244,17 +5244,17 @@
5244
  "task_label": "Future Object-Set Forecasting",
5245
  "series_id": "cosmos3_super_reasoner",
5246
  "method": "Cosmos3-Super Reasoner",
5247
- "status": "not_evaluated_in_verified_package",
5248
- "status_label": "not evaluated",
5249
- "scored": false,
5250
  "proxy_scored": false,
5251
- "raw": null,
5252
- "raw_text": "n/a",
5253
- "normalized_score": null,
5254
- "metric_key": "micro_f1",
5255
- "source": null,
5256
  "scope": "multi_episode_128_partial_model_overlay",
5257
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
5258
  },
5259
  {
5260
  "task_number": 17,
 
1
  {
2
  "title": "Unified 20-Task Model Radar",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-20T15:20:32+00:00",
5
  "task_count": 20,
6
  "method_count": 9,
7
  "method_task_record_count": 180,
8
+ "scored_method_task_count": 168,
9
  "normalization_policy": {
10
  "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
11
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
 
192
  "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 5/8/9/10/11/12/13/14/16/17/18/19/20 probes where public metrics exist.",
193
  "plotted_as": "colored point overlay",
194
  "result_record_count": 20,
195
+ "scored_task_count": 19,
196
+ "covered_task_count": 19,
197
  "proxy_scored_task_count": 0,
198
+ "scoreless_task_count": 1,
199
  "unsupported_task_count": 0,
200
+ "not_evaluated_task_count": 1,
201
  "status_counts": {
202
+ "not_evaluated_in_verified_package": 1,
203
+ "scored": 19
204
  },
205
+ "coverage_fraction": 0.95,
206
  "result_record_fraction": 1.0
207
  },
208
  {
 
1384
  "raw_text": "0.4098",
1385
  "status_label": "scored"
1386
  },
1387
+ "cosmos3_super_reasoner": {
1388
+ "raw": 0.6286317274823326,
1389
+ "metric_key": "temporal_order_f1",
1390
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
1391
+ "scope": "multi_episode_128_partial_model_overlay",
1392
+ "status": "scored",
1393
+ "reason": null,
1394
+ "normalized_score": 0.6286317274823326,
1395
+ "raw_text": "0.6286",
1396
+ "status_label": "scored"
1397
+ },
1398
  "metadata128_simple": {
1399
  "raw": 0.4198864140782312,
1400
  "metric_key": "f1",
 
1439
  "raw_text": "0.8030",
1440
  "status_label": "scored"
1441
  },
 
 
 
 
 
 
 
 
 
 
 
1442
  "cosmos3_nano_future_window": {
1443
  "raw": null,
1444
  "metric_key": "f1",
 
1495
  "raw_text": "0.3345",
1496
  "status_label": "scored"
1497
  },
1498
+ "cosmos3_super_reasoner": {
1499
+ "raw": 0.37271645981034185,
1500
+ "metric_key": "misalignment_detection_f1",
1501
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
1502
+ "scope": "multi_episode_128_partial_model_overlay",
1503
+ "status": "scored",
1504
+ "reason": null,
1505
+ "normalized_score": 0.37271645981034185,
1506
+ "raw_text": "0.3727",
1507
+ "status_label": "scored"
1508
+ },
1509
  "metadata128_simple": {
1510
  "raw": 0.49980060227663614,
1511
  "metric_key": "f1",
 
1550
  "raw_text": "0.8273",
1551
  "status_label": "scored"
1552
  },
 
 
 
 
 
 
 
 
 
 
 
1553
  "cosmos3_nano_future_window": {
1554
  "raw": null,
1555
  "metric_key": "f1",
 
1728
  "raw_text": "0.0042",
1729
  "status_label": "scored"
1730
  },
1731
+ "cosmos3_super_reasoner": {
1732
+ "raw": 0.0,
1733
+ "metric_key": "next_subtask_forecast_macro_f1",
1734
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
1735
+ "scope": "multi_episode_128_partial_model_overlay",
1736
+ "status": "scored",
1737
+ "reason": null,
1738
+ "normalized_score": 0.0,
1739
+ "raw_text": "0.0000",
1740
+ "status_label": "scored"
1741
+ },
1742
  "metadata128_simple": {
1743
  "raw": 0.0001206030150753769,
1744
  "metric_key": "macro_f1",
 
1782
  "normalized_score": 0.0,
1783
  "raw_text": "0.0000",
1784
  "status_label": "scored"
 
 
 
 
 
 
 
 
 
 
 
1785
  }
1786
  }
1787
  },
 
2061
  "raw_text": "0.1659",
2062
  "status_label": "scored"
2063
  },
2064
+ "cosmos3_super_reasoner": {
2065
+ "raw": 0.0009279881217520415,
2066
+ "metric_key": "object_set_forecast_micro_f1",
2067
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
2068
+ "scope": "multi_episode_128_partial_model_overlay",
2069
+ "status": "scored",
2070
+ "reason": null,
2071
+ "normalized_score": 0.0009279881217520415,
2072
+ "raw_text": "0.0009",
2073
+ "status_label": "scored"
2074
+ },
2075
  "metadata128_simple": {
2076
  "raw": 0.17656983343047333,
2077
  "metric_key": "micro_f1",
 
2115
  "normalized_score": 0.17523098630012288,
2116
  "raw_text": "0.1752",
2117
  "status_label": "scored"
 
 
 
 
 
 
 
 
 
 
 
2118
  }
2119
  }
2120
  },
 
2498
  "id": "cosmos3_super_reasoner",
2499
  "title": "Cosmos3-Super Reasoner",
2500
  "status": "verified_base_weight_eval",
2501
+ "coverage": "20 records / 19 scored task-aligned axes",
2502
  "headline": "JSON validity 0.5112; action macro-F1 0.0008",
2503
  "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json"
2504
  },
 
4272
  "task_label": "Temporal Order Verification",
4273
  "series_id": "cosmos3_super_reasoner",
4274
  "method": "Cosmos3-Super Reasoner",
4275
+ "status": "scored",
4276
+ "status_label": "scored",
4277
+ "scored": true,
4278
  "proxy_scored": false,
4279
+ "raw": 0.6286317274823326,
4280
+ "raw_text": "0.6286",
4281
+ "normalized_score": 0.6286317274823326,
4282
+ "metric_key": "temporal_order_f1",
4283
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
4284
  "scope": "multi_episode_128_partial_model_overlay",
4285
+ "reason": null
4286
  },
4287
  {
4288
  "task_number": 11,
 
4434
  "task_label": "Multimodal Synchronization Detection",
4435
  "series_id": "cosmos3_super_reasoner",
4436
  "method": "Cosmos3-Super Reasoner",
4437
+ "status": "scored",
4438
+ "status_label": "scored",
4439
+ "scored": true,
4440
  "proxy_scored": false,
4441
+ "raw": 0.37271645981034185,
4442
+ "raw_text": "0.3727",
4443
+ "normalized_score": 0.37271645981034185,
4444
+ "metric_key": "misalignment_detection_f1",
4445
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
4446
  "scope": "multi_episode_128_partial_model_overlay",
4447
+ "reason": null
4448
  },
4449
  {
4450
  "task_number": 12,
 
4758
  "task_label": "Long-Horizon Next-Subtask Forecasting",
4759
  "series_id": "cosmos3_super_reasoner",
4760
  "method": "Cosmos3-Super Reasoner",
4761
+ "status": "scored",
4762
+ "status_label": "scored",
4763
+ "scored": true,
4764
  "proxy_scored": false,
4765
+ "raw": 0.0,
4766
+ "raw_text": "0.0000",
4767
+ "normalized_score": 0.0,
4768
+ "metric_key": "next_subtask_forecast_macro_f1",
4769
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
4770
  "scope": "multi_episode_128_partial_model_overlay",
4771
+ "reason": null
4772
  },
4773
  {
4774
  "task_number": 14,
 
5244
  "task_label": "Future Object-Set Forecasting",
5245
  "series_id": "cosmos3_super_reasoner",
5246
  "method": "Cosmos3-Super Reasoner",
5247
+ "status": "scored",
5248
+ "status_label": "scored",
5249
+ "scored": true,
5250
  "proxy_scored": false,
5251
+ "raw": 0.0009279881217520415,
5252
+ "raw_text": "0.0009",
5253
+ "normalized_score": 0.0009279881217520415,
5254
+ "metric_key": "object_set_forecast_micro_f1",
5255
+ "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json",
5256
  "scope": "multi_episode_128_partial_model_overlay",
5257
+ "reason": null
5258
  },
5259
  {
5260
  "task_number": 17,
data/website_integrity.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-20T14:49:37+00:00",
4
  "docs_root": "docs",
5
  "site_base": "/ropedia-xperience-10m-task-suite/",
6
  "summary": {
@@ -80,8 +80,8 @@
80
  "name": "project_overview_precedes_progress_ledger",
81
  "status": "pass",
82
  "reason": "The project overview should appear before the deeper progress ledger.",
83
- "overview_index": 95752,
84
- "evidence_index": 132135
85
  },
86
  {
87
  "name": "project_status_links_json",
@@ -159,9 +159,9 @@
159
  "name": "evaluation_protocol_between_overview_and_progress",
160
  "status": "pass",
161
  "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
162
- "overview_index": 95752,
163
- "protocol_index": 128316,
164
- "evidence_index": 132135
165
  },
166
  {
167
  "name": "evaluation_protocol_links_json",
@@ -301,7 +301,7 @@
301
  },
302
  {
303
  "path": "data/artifact_index.json",
304
- "bytes": 116644,
305
  "top_level_type": "dict"
306
  },
307
  {
@@ -316,7 +316,7 @@
316
  },
317
  {
318
  "path": "data/episode128_task_model_radar.json",
319
- "bytes": 184785,
320
  "top_level_type": "dict"
321
  },
322
  {
@@ -346,7 +346,7 @@
346
  },
347
  {
348
  "path": "data/live_publication_status.json",
349
- "bytes": 181998,
350
  "top_level_type": "dict"
351
  },
352
  {
@@ -486,12 +486,12 @@
486
  },
487
  {
488
  "path": "data/task_method_20_gap_audit.json",
489
- "bytes": 20037,
490
  "top_level_type": "dict"
491
  },
492
  {
493
  "path": "data/task_method_20_result_matrix.json",
494
- "bytes": 128510,
495
  "top_level_type": "dict"
496
  },
497
  {
@@ -526,12 +526,12 @@
526
  },
527
  {
528
  "path": "data/unified_task_model_radar.json",
529
- "bytes": 228639,
530
  "top_level_type": "dict"
531
  },
532
  {
533
  "path": "data/website_integrity.json",
534
- "bytes": 20022,
535
  "top_level_type": "dict"
536
  },
537
  {
@@ -571,7 +571,7 @@
571
  {
572
  "path": "assets/charts/episode128_task_model_radar.svg",
573
  "exists": true,
574
- "bytes": 50154,
575
  "format": "SVG",
576
  "has_viewbox": true
577
  },
@@ -641,7 +641,7 @@
641
  {
642
  "path": "assets/charts/unified_task_model_radar.svg",
643
  "exists": true,
644
- "bytes": 56167,
645
  "format": "SVG",
646
  "has_viewbox": true
647
  },
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-20T15:22:30+00:00",
4
  "docs_root": "docs",
5
  "site_base": "/ropedia-xperience-10m-task-suite/",
6
  "summary": {
 
80
  "name": "project_overview_precedes_progress_ledger",
81
  "status": "pass",
82
  "reason": "The project overview should appear before the deeper progress ledger.",
83
+ "overview_index": 95751,
84
+ "evidence_index": 132134
85
  },
86
  {
87
  "name": "project_status_links_json",
 
159
  "name": "evaluation_protocol_between_overview_and_progress",
160
  "status": "pass",
161
  "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
162
+ "overview_index": 95751,
163
+ "protocol_index": 128315,
164
+ "evidence_index": 132134
165
  },
166
  {
167
  "name": "evaluation_protocol_links_json",
 
301
  },
302
  {
303
  "path": "data/artifact_index.json",
304
+ "bytes": 116645,
305
  "top_level_type": "dict"
306
  },
307
  {
 
316
  },
317
  {
318
  "path": "data/episode128_task_model_radar.json",
319
+ "bytes": 184569,
320
  "top_level_type": "dict"
321
  },
322
  {
 
346
  },
347
  {
348
  "path": "data/live_publication_status.json",
349
+ "bytes": 181991,
350
  "top_level_type": "dict"
351
  },
352
  {
 
486
  },
487
  {
488
  "path": "data/task_method_20_gap_audit.json",
489
+ "bytes": 16980,
490
  "top_level_type": "dict"
491
  },
492
  {
493
  "path": "data/task_method_20_result_matrix.json",
494
+ "bytes": 128400,
495
  "top_level_type": "dict"
496
  },
497
  {
 
526
  },
527
  {
528
  "path": "data/unified_task_model_radar.json",
529
+ "bytes": 228423,
530
  "top_level_type": "dict"
531
  },
532
  {
533
  "path": "data/website_integrity.json",
534
+ "bytes": 20023,
535
  "top_level_type": "dict"
536
  },
537
  {
 
571
  {
572
  "path": "assets/charts/episode128_task_model_radar.svg",
573
  "exists": true,
574
+ "bytes": 50599,
575
  "format": "SVG",
576
  "has_viewbox": true
577
  },
 
641
  {
642
  "path": "assets/charts/unified_task_model_radar.svg",
643
  "exists": true,
644
+ "bytes": 56612,
645
  "format": "SVG",
646
  "has_viewbox": true
647
  },
results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/RUN_REPORT.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Cosmos3-Super Reasoner Future Task Probes
2
+
3
+ - Run ID: `xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620`
4
+ - Shards: `4`
5
+
6
+ | Task | Metric | Score | Samples |
7
+ | --- | --- | ---: | ---: |
8
+ | Temporal Order Verification | temporal_order_f1 | 0.628632 | 3951 |
9
+ | Multimodal Misalignment Detection | misalignment_detection_f1 | 0.372716 | 3951 |
10
+ | Long-Horizon Next-Subtask Forecasting | macro_f1 | 0.000000 | 3951 |
11
+ | Future Object-Set Forecasting | micro_f1 | 0.000928 | 3951 |
results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/misalignment_detection/metrics.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "accuracy": 0.4986079473551,
3
+ "adapter_dir": ".",
4
+ "base_url": "http://127.0.0.1:8000/v1",
5
+ "dataset_jsonl": "results/omni_finetune/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_dataset/dataset_a100_eval.jsonl",
6
+ "eval_split": "test",
7
+ "future_frames": 100,
8
+ "labels": [
9
+ "aligned",
10
+ "shifted"
11
+ ],
12
+ "macro_f1": 0.37271645981034185,
13
+ "media_mode": "text_only",
14
+ "metric_key": "misalignment_detection_f1",
15
+ "misalignment_detection_accuracy": 0.4986079473551,
16
+ "misalignment_detection_f1": 0.37271645981034185,
17
+ "misalignment_detection_macro_f1": 0.37271645981034185,
18
+ "model": "/mnt/kgc/chaoyue/ropedia-xperience10m/models/nvidia__Cosmos3-Super_reasoner_overlay",
19
+ "model_id": "/mnt/kgc/chaoyue/ropedia-xperience10m/models/nvidia__Cosmos3-Super_reasoner_overlay",
20
+ "num_samples": 3951,
21
+ "primary_metric": "misalignment_detection_f1",
22
+ "primary_score": 0.37271645981034185,
23
+ "run_id": "xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620",
24
+ "sample_offset": 0,
25
+ "sample_stride": 1,
26
+ "scope": "held_out_test_cosmos3_super_future_task_probe",
27
+ "score_policy": "GPU-backed Cosmos3-Super Reasoner future-task probe over real held-out targets derivable from the 128-episode JSON export. In text_only mode, raw video/audio is omitted and the artifact is labeled as a text-only model-output probe; no labels are fabricated and no weights are updated.",
28
+ "status": "pass",
29
+ "task_id": "misalignment_detection",
30
+ "task_label": "Multimodal Misalignment Detection",
31
+ "task_number": 12,
32
+ "title": "Cosmos3-Super Reasoner Multimodal Misalignment Detection"
33
+ }
results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/next_subtask_forecast/metrics.json ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "accuracy": 0.0,
3
+ "adapter_dir": ".",
4
+ "base_url": "http://127.0.0.1:8000/v1",
5
+ "dataset_jsonl": "results/omni_finetune/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_dataset/dataset_a100_eval.jsonl",
6
+ "eval_split": "test",
7
+ "future_frames": 100,
8
+ "labels": [
9
+ "Adjust lantern string and handle components",
10
+ "Adjusting and securing paper structure",
11
+ "Adjusting cookware",
12
+ "Adjusting paper edge and placing strip",
13
+ "Adjusting stock and finishing placement",
14
+ "Align paper lantern edges",
15
+ "Aligning canned goods on the shelf",
16
+ "Approach inventory boxes",
17
+ "Approaching the stove",
18
+ "Arrange buttons",
19
+ "Arrange buttons in a line",
20
+ "Assembling material pieces",
21
+ "Attempt to fit and place puzzle piece",
22
+ "Bending plastic strip",
23
+ "Browsing smartphone",
24
+ "Bundle display hooks",
25
+ "Checking cooking pot",
26
+ "Checking smartphone",
27
+ "Cleaning cloth maintenance",
28
+ "Cleaning kitchen surfaces",
29
+ "Cooking at the stove",
30
+ "Count beads and retrieve more",
31
+ "Cut along the marked line",
32
+ "Cut cardboard",
33
+ "Cutting cardboard pieces",
34
+ "Document bead counts",
35
+ "Entering the training area",
36
+ "Expand and adjust lantern shape",
37
+ "Expand paper lantern",
38
+ "Extract wire hangers from inventory",
39
+ "Final alignment of lantern edges",
40
+ "Finalizing shelf organization",
41
+ "Finish cutting along the marked line and reposition",
42
+ "Fold and grasp lantern",
43
+ "Folding plastic strip",
44
+ "Grasp lantern component",
45
+ "Greeting participants",
46
+ "Handle paper lantern component",
47
+ "Handling container lid",
48
+ "Handling earbud case",
49
+ "Handling plastic strip",
50
+ "Holding smartphone",
51
+ "Initiating assembly",
52
+ "Inspect shelf condition",
53
+ "Inspect shelf condition and observe surroundings",
54
+ "Install display hooks",
55
+ "Labeling cardboard pieces",
56
+ "Manipulate adhesive strip",
57
+ "Manipulate and release paper strips",
58
+ "Manipulate craft pieces",
59
+ "Manipulate paper decoration",
60
+ "Manipulate paper edge",
61
+ "Manipulate paper strip",
62
+ "Manipulate paper strips",
63
+ "Manipulate puzzle piece",
64
+ "Manipulate puzzle pieces",
65
+ "Manipulating beads",
66
+ "Manipulating plastic strip",
67
+ "Manipulating yellow strips",
68
+ "Move to stocking area",
69
+ "Moving around the kitchen",
70
+ "Moving through the room",
71
+ "Observe puzzle progress",
72
+ "Observing and pausing",
73
+ "Open paper lantern",
74
+ "Operate and release smartphone",
75
+ "Organize and count beads",
76
+ "Organizing cardboard pieces",
77
+ "Paper quilling craft",
78
+ "Pick up and place buttons",
79
+ "Pick up and place puzzle piece",
80
+ "Pick, place, and count beads",
81
+ "Picking up and placing canned goods",
82
+ "Place and adjust items on shelf",
83
+ "Place and retrieve items for stocking",
84
+ "Place items on shelf",
85
+ "Place puzzle piece",
86
+ "Placing and retrieving canned goods",
87
+ "Placing canned goods on the shelf",
88
+ "Prepare to cut cardboard",
89
+ "Preparing craft area",
90
+ "Preparing materials",
91
+ "Preparing plastic strip for folding",
92
+ "Reach for, pick up, and attempt to fit puzzle piece",
93
+ "Reaching into the box for more stock",
94
+ "Release and adjust puzzle piece",
95
+ "Release scissors and operate smartphone",
96
+ "Remove packaging and prepare component",
97
+ "Retrieving and placing canned goods",
98
+ "Retrieving cleaning supplies",
99
+ "Scrolling and placing smartphone down",
100
+ "Scrolling and setting down smartphone",
101
+ "Scrolling smartphone screen",
102
+ "Search for and pick up puzzle piece",
103
+ "Secure lantern with adhesive",
104
+ "Secure paper edges with adhesive",
105
+ "Setting up smartphone",
106
+ "Sort and adjust button line",
107
+ "Sort and arrange buttons",
108
+ "Sort and group beads",
109
+ "Sort and place buttons",
110
+ "Sort and record bead counts",
111
+ "Sort beads and record count",
112
+ "Sort buttons",
113
+ "Sort buttons by color",
114
+ "Sort craft materials into piles",
115
+ "Sort puzzle pieces",
116
+ "Sort small craft pieces",
117
+ "Stocking multiple cans on the shelf",
118
+ "Tidying workspace",
119
+ "Transitioning to smartphone usage",
120
+ "Unfold paper lantern",
121
+ "Unpack additional lantern component",
122
+ "Unpack and place items on shelf",
123
+ "Use smartphone",
124
+ "Using a smartphone",
125
+ "Using phone",
126
+ "Using phone and resuming work",
127
+ "Using smartphone",
128
+ "Walk to shelf location",
129
+ "Walking to the crafting area",
130
+ "Wiping counter",
131
+ "Working with paper strips",
132
+ "Working with paper strips and using phone",
133
+ "unknown",
134
+ "Adjusting and folding cardboard",
135
+ "Adjust and align Mahjong tiles"
136
+ ],
137
+ "macro_f1": 0.0,
138
+ "media_mode": "text_only",
139
+ "metric_key": "macro_f1",
140
+ "model": "/mnt/kgc/chaoyue/ropedia-xperience10m/models/nvidia__Cosmos3-Super_reasoner_overlay",
141
+ "model_id": "/mnt/kgc/chaoyue/ropedia-xperience10m/models/nvidia__Cosmos3-Super_reasoner_overlay",
142
+ "next_subtask_forecast_accuracy": 0.0,
143
+ "next_subtask_forecast_macro_f1": 0.0,
144
+ "num_samples": 3951,
145
+ "primary_metric": "macro_f1",
146
+ "primary_score": 0.0,
147
+ "run_id": "xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620",
148
+ "sample_offset": 0,
149
+ "sample_stride": 1,
150
+ "scope": "held_out_test_cosmos3_super_future_task_probe",
151
+ "score_policy": "GPU-backed Cosmos3-Super Reasoner future-task probe over real held-out targets derivable from the 128-episode JSON export. In text_only mode, raw video/audio is omitted and the artifact is labeled as a text-only model-output probe; no labels are fabricated and no weights are updated.",
152
+ "status": "pass",
153
+ "task_id": "next_subtask_forecast",
154
+ "task_label": "Long-Horizon Next-Subtask Forecasting",
155
+ "task_number": 14,
156
+ "title": "Cosmos3-Super Reasoner Long-Horizon Next-Subtask Forecasting"
157
+ }
results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/object_set_forecast/metrics.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_dir": ".",
3
+ "base_url": "http://127.0.0.1:8000/v1",
4
+ "dataset_jsonl": "results/omni_finetune/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_dataset/dataset_a100_eval.jsonl",
5
+ "eval_split": "test",
6
+ "exact_match": 0.0002531004808909137,
7
+ "future_frames": 100,
8
+ "media_mode": "text_only",
9
+ "metric_key": "micro_f1",
10
+ "micro_f1": 0.0009279881217520415,
11
+ "model": "/mnt/kgc/chaoyue/ropedia-xperience10m/models/nvidia__Cosmos3-Super_reasoner_overlay",
12
+ "model_id": "/mnt/kgc/chaoyue/ropedia-xperience10m/models/nvidia__Cosmos3-Super_reasoner_overlay",
13
+ "num_samples": 3951,
14
+ "object_set_forecast_exact_match": 0.0002531004808909137,
15
+ "object_set_forecast_micro_f1": 0.0009279881217520415,
16
+ "precision": 0.0006463497398442297,
17
+ "primary_metric": "micro_f1",
18
+ "primary_score": 0.0009279881217520415,
19
+ "recall": 0.0016446015952635473,
20
+ "run_id": "xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620",
21
+ "sample_offset": 0,
22
+ "sample_stride": 1,
23
+ "scope": "held_out_test_cosmos3_super_future_task_probe",
24
+ "score_policy": "GPU-backed Cosmos3-Super Reasoner future-task probe over real held-out targets derivable from the 128-episode JSON export. In text_only mode, raw video/audio is omitted and the artifact is labeled as a text-only model-output probe; no labels are fabricated and no weights are updated.",
25
+ "status": "pass",
26
+ "task_id": "object_set_forecast",
27
+ "task_label": "Future Object-Set Forecasting",
28
+ "task_number": 17,
29
+ "title": "Cosmos3-Super Reasoner Future Object-Set Forecasting"
30
+ }
results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/server_info.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": [
3
+ {
4
+ "created": 1781964084,
5
+ "id": "/mnt/kgc/chaoyue/ropedia-xperience10m/models/nvidia__Cosmos3-Super_reasoner_overlay",
6
+ "max_model_len": 262144,
7
+ "object": "model",
8
+ "owned_by": "vllm",
9
+ "parent": null,
10
+ "permission": [
11
+ {
12
+ "allow_create_engine": false,
13
+ "allow_fine_tuning": false,
14
+ "allow_logprobs": true,
15
+ "allow_sampling": true,
16
+ "allow_search_indices": false,
17
+ "allow_view": true,
18
+ "created": 1781964084,
19
+ "group": null,
20
+ "id": "modelperm-aa95c0c70bbe5707",
21
+ "is_blocking": false,
22
+ "object": "model_permission",
23
+ "organization": "*"
24
+ }
25
+ ],
26
+ "root": "/mnt/kgc/chaoyue/ropedia-xperience10m/models/nvidia__Cosmos3-Super_reasoner_overlay"
27
+ }
28
+ ],
29
+ "object": "list"
30
+ }
results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/temporal_order/metrics.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "accuracy": 0.6370539104024298,
3
+ "adapter_dir": ".",
4
+ "base_url": "http://127.0.0.1:8000/v1",
5
+ "dataset_jsonl": "results/omni_finetune/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_dataset/dataset_a100_eval.jsonl",
6
+ "eval_split": "test",
7
+ "future_frames": 100,
8
+ "labels": [
9
+ "correct",
10
+ "reversed"
11
+ ],
12
+ "macro_f1": 0.6286317274823326,
13
+ "media_mode": "text_only",
14
+ "metric_key": "temporal_order_f1",
15
+ "model": "/mnt/kgc/chaoyue/ropedia-xperience10m/models/nvidia__Cosmos3-Super_reasoner_overlay",
16
+ "model_id": "/mnt/kgc/chaoyue/ropedia-xperience10m/models/nvidia__Cosmos3-Super_reasoner_overlay",
17
+ "num_samples": 3951,
18
+ "primary_metric": "temporal_order_f1",
19
+ "primary_score": 0.6286317274823326,
20
+ "run_id": "xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620",
21
+ "sample_offset": 0,
22
+ "sample_stride": 1,
23
+ "scope": "held_out_test_cosmos3_super_future_task_probe",
24
+ "score_policy": "GPU-backed Cosmos3-Super Reasoner future-task probe over real held-out targets derivable from the 128-episode JSON export. In text_only mode, raw video/audio is omitted and the artifact is labeled as a text-only model-output probe; no labels are fabricated and no weights are updated.",
25
+ "status": "pass",
26
+ "task_id": "temporal_order",
27
+ "task_label": "Temporal Order Verification",
28
+ "task_number": 11,
29
+ "temporal_order_accuracy": 0.6370539104024298,
30
+ "temporal_order_f1": 0.6286317274823326,
31
+ "temporal_order_macro_f1": 0.6286317274823326,
32
+ "title": "Cosmos3-Super Reasoner Temporal Order Verification"
33
+ }
results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard0.progress.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard1.progress.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard2.progress.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/omni_finetune/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620/xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620_shard3.progress.jsonl ADDED
The diff for this file is too large to render. See raw diff