cy0307 commited on
Commit
8ca5135
·
verified ·
1 Parent(s): c0ec867

Add files using upload-large-folder tool

Browse files
docs/data/artifact_index.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
- "generated_at_utc": "2026-06-16T12:01:29+00:00",
4
  "status": "pass",
5
- "artifact_count": 189,
6
  "missing": [],
7
  "by_kind": {
8
  "project_path": 14,
9
  "scaleup_contract": 7,
10
- "scaleup_status": 41,
11
  "publication_workflow": 6,
12
  "reproducibility": 4,
13
  "project_scope": 1,
@@ -16,7 +16,7 @@
16
  "website_data": 10,
17
  "generated_figure": 7,
18
  "visualization_builder": 1,
19
- "model_result": 2,
20
  "result_interpretation": 5,
21
  "metrics_source": 27,
22
  "visual_evidence": 7,
@@ -465,7 +465,7 @@
465
  "shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
466
  "exists": true,
467
  "bytes": 4432,
468
- "sha256": "fce9e1525d5bfb4ddbdd0d36febfcf7df9f7115e29457f85ff322db1e7418a1d"
469
  },
470
  {
471
  "id": "source_alignment_validator",
@@ -585,8 +585,8 @@
585
  "surface": "website_hf",
586
  "shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3/Cosmos overlay mappings, branch-card caveats, and explicit scoreless status records.",
587
  "exists": true,
588
- "bytes": 231290,
589
- "sha256": "851677db9ca0a3255d44e5540c2fbd19b3bb4ae5a0d8e31b8faa62f9d6c73a60"
590
  },
591
  {
592
  "id": "single_episode_task_model_radar_json",
@@ -597,7 +597,7 @@
597
  "shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
598
  "exists": true,
599
  "bytes": 50973,
600
- "sha256": "08904cd9b7a7f2d7090dd232ef026b21a9aaf1891ebff250ab768dce09e71749"
601
  },
602
  {
603
  "id": "episode128_task_model_radar_json",
@@ -607,8 +607,8 @@
607
  "surface": "website_hf",
608
  "shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines and verified Qwen3/Cosmos branches, preserving explicit scoreless cells.",
609
  "exists": true,
610
- "bytes": 187439,
611
- "sha256": "98b3e4367bcdbb22f21bb03896f1ee6db305bafca7b26e4ffd7a3418aa12d95e"
612
  },
613
  {
614
  "id": "task_method_20_result_matrix_json",
@@ -618,8 +618,8 @@
618
  "surface": "website_hf",
619
  "shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and scoreless cells carry unsupported/not-evaluated reasons.",
620
  "exists": true,
621
- "bytes": 129711,
622
- "sha256": "b15009dd8c4caf77d62654f1dcc80e3fe328dcebd80e397e679f1a44597f3294"
623
  },
624
  {
625
  "id": "task_method_20_result_matrix",
@@ -629,8 +629,8 @@
629
  "surface": "repo_hf",
630
  "shows": "Reader-facing table that separates 20 records per method from numeric scored axes, documented raw128 proxy scores, unsupported metadata targets, and model targets not evaluated in verified packages.",
631
  "exists": true,
632
- "bytes": 4224,
633
- "sha256": "fe14a0e1709a8bb5b9fb1fd720ffde0aecfed578329d8e1e71b352a83b23b1f8"
634
  },
635
  {
636
  "id": "task_method_20_gap_audit_json",
@@ -638,10 +638,10 @@
638
  "path": "docs/data/task_method_20_gap_audit.json",
639
  "kind": "website_data",
640
  "surface": "website_hf",
641
- "shows": "Machine-readable 180-record gap ledger with 111 numeric scores, 69 scoreless cells, explicit status reasons, and next evidence needed before new scores can be published.",
642
  "exists": true,
643
- "bytes": 59421,
644
- "sha256": "872954cbf001c2286b186e17c8d590d9d637852cade0ecc2357c63247e4803fb"
645
  },
646
  {
647
  "id": "task_method_20_gap_audit",
@@ -651,8 +651,8 @@
651
  "surface": "repo_hf",
652
  "shows": "Reader-facing ledger that lists every scoreless method-task cell and the concrete target or model-output evidence required before it can become numeric.",
653
  "exists": true,
654
- "bytes": 16600,
655
- "sha256": "e6bd5b49d01eb095fe6f0d38436e08f2c2ff1a4449f98a08928d51e965e563bb"
656
  },
657
  {
658
  "id": "unified_task_model_radar_chart",
@@ -662,8 +662,8 @@
662
  "surface": "website_hf",
663
  "shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3/Cosmos task-aligned model overlays.",
664
  "exists": true,
665
- "bytes": 49046,
666
- "sha256": "60c4ed6818b1ebf00165fc7187613c8e41bc0359d5053dfc3659f497f4acef44"
667
  },
668
  {
669
  "id": "single_episode_task_model_radar_chart",
@@ -684,8 +684,8 @@
684
  "surface": "website_hf",
685
  "shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons and metadata/Qwen/Cosmos as task-aligned overlays.",
686
  "exists": true,
687
- "bytes": 43020,
688
- "sha256": "fad2d354e09ece917350dbbb66a65a4dcfabc38e000da61b002315c67b6aefa9"
689
  },
690
  {
691
  "id": "unified_task_model_radar_builder",
@@ -695,8 +695,8 @@
695
  "surface": "repo_hf",
696
  "shows": "Regenerates the direction-aware radar chart and machine-readable metric overlay JSON.",
697
  "exists": true,
698
- "bytes": 47539,
699
- "sha256": "23a655bc2ba72e5326a0db8a5cad06d297d5fb76d7985135192f1d34368b966b"
700
  },
701
  {
702
  "id": "task_method_20_gap_audit_builder",
@@ -706,8 +706,8 @@
706
  "surface": "repo_hf",
707
  "shows": "Regenerates the public gap audit from the 9-method by 20-task matrix without inventing scores for unsupported or unevaluated cells.",
708
  "exists": true,
709
- "bytes": 9959,
710
- "sha256": "6a0086ccbc567529b2b24061ca36228e8eac213331621bc66b96db2aea62e1f4"
711
  },
712
  {
713
  "id": "all_task_model_scoring_waiter",
@@ -742,6 +742,28 @@
742
  "bytes": 9133,
743
  "sha256": "3a867d0333fe591999715158e311011db25da018ca39c9b4638930841f35efb8"
744
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
745
  {
746
  "id": "a100_128_metadata_task_baselines",
747
  "title": "128-episode metadata task baselines",
@@ -949,7 +971,7 @@
949
  "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
950
  "exists": true,
951
  "bytes": 8100,
952
- "sha256": "9d6069e48f2ee57b910193ff3c5f52233c3864a85e3c4e753ecf6f2b411c54ac"
953
  },
954
  {
955
  "id": "public_surface_qa",
@@ -971,7 +993,7 @@
971
  "volatile": true,
972
  "shows": "Machine-readable report for SEO/social metadata, accessible tab semantics, public links, project links, and clear project presentation.",
973
  "exists": true,
974
- "bytes": 6096,
975
  "hash_policy": "existence_and_size_only"
976
  },
977
  {
@@ -1096,8 +1118,8 @@
1096
  "surface": "repo_hf",
1097
  "shows": "Generates the selective artifact catalog from local files.",
1098
  "exists": true,
1099
- "bytes": 53849,
1100
- "sha256": "c6407d4644cd6fee2981a6240c8087b0a83e43133aeb5bcff52d67b27f469e2d"
1101
  },
1102
  {
1103
  "id": "publication_audit",
@@ -1108,7 +1130,7 @@
1108
  "volatile": true,
1109
  "shows": "Confirms public bundles exclude raw data, caches, heavy archives, and credential text.",
1110
  "exists": true,
1111
- "bytes": 8189,
1112
  "hash_policy": "existence_and_size_only"
1113
  },
1114
  {
@@ -1132,7 +1154,7 @@
1132
  "volatile": true,
1133
  "shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
1134
  "exists": true,
1135
- "bytes": 847909,
1136
  "hash_policy": "existence_and_size_only"
1137
  },
1138
  {
@@ -1144,7 +1166,7 @@
1144
  "volatile": true,
1145
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
1146
  "exists": true,
1147
- "bytes": 18814,
1148
  "hash_policy": "existence_and_size_only"
1149
  },
1150
  {
 
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
+ "generated_at_utc": "2026-06-16T13:35:38+00:00",
4
  "status": "pass",
5
+ "artifact_count": 191,
6
  "missing": [],
7
  "by_kind": {
8
  "project_path": 14,
9
  "scaleup_contract": 7,
10
+ "scaleup_status": 42,
11
  "publication_workflow": 6,
12
  "reproducibility": 4,
13
  "project_scope": 1,
 
16
  "website_data": 10,
17
  "generated_figure": 7,
18
  "visualization_builder": 1,
19
+ "model_result": 3,
20
  "result_interpretation": 5,
21
  "metrics_source": 27,
22
  "visual_evidence": 7,
 
465
  "shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
466
  "exists": true,
467
  "bytes": 4432,
468
+ "sha256": "4c6e5505884c13ad9d60195e977d1554adafe2a83d07307644532d77f36850bb"
469
  },
470
  {
471
  "id": "source_alignment_validator",
 
585
  "surface": "website_hf",
586
  "shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3/Cosmos overlay mappings, branch-card caveats, and explicit scoreless status records.",
587
  "exists": true,
588
+ "bytes": 231251,
589
+ "sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
590
  },
591
  {
592
  "id": "single_episode_task_model_radar_json",
 
597
  "shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
598
  "exists": true,
599
  "bytes": 50973,
600
+ "sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
601
  },
602
  {
603
  "id": "episode128_task_model_radar_json",
 
607
  "surface": "website_hf",
608
  "shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines and verified Qwen3/Cosmos branches, preserving explicit scoreless cells.",
609
  "exists": true,
610
+ "bytes": 187400,
611
+ "sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
612
  },
613
  {
614
  "id": "task_method_20_result_matrix_json",
 
618
  "surface": "website_hf",
619
  "shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and scoreless cells carry unsupported/not-evaluated reasons.",
620
  "exists": true,
621
+ "bytes": 129740,
622
+ "sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
623
  },
624
  {
625
  "id": "task_method_20_result_matrix",
 
629
  "surface": "repo_hf",
630
  "shows": "Reader-facing table that separates 20 records per method from numeric scored axes, documented raw128 proxy scores, unsupported metadata targets, and model targets not evaluated in verified packages.",
631
  "exists": true,
632
+ "bytes": 4208,
633
+ "sha256": "bfe01b258fe121448d2b5b188c6995041c81d7ce1286dcd2ee0bf9c01d6a6193"
634
  },
635
  {
636
  "id": "task_method_20_gap_audit_json",
 
638
  "path": "docs/data/task_method_20_gap_audit.json",
639
  "kind": "website_data",
640
  "surface": "website_hf",
641
+ "shows": "Machine-readable 180-record gap ledger with numeric scores, scoreless cells, explicit status reasons, and next evidence needed before new scores can be published.",
642
  "exists": true,
643
+ "bytes": 57943,
644
+ "sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
645
  },
646
  {
647
  "id": "task_method_20_gap_audit",
 
651
  "surface": "repo_hf",
652
  "shows": "Reader-facing ledger that lists every scoreless method-task cell and the concrete target or model-output evidence required before it can become numeric.",
653
  "exists": true,
654
+ "bytes": 16234,
655
+ "sha256": "351e55637374770221115486fd6ef46550ae923c4fae5c01d3bb1e777b5a44fb"
656
  },
657
  {
658
  "id": "unified_task_model_radar_chart",
 
662
  "surface": "website_hf",
663
  "shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3/Cosmos task-aligned model overlays.",
664
  "exists": true,
665
+ "bytes": 49695,
666
+ "sha256": "2ad63af1cd1e3e6f9c354b20792c20683e8877e2f92d9da5b5284dce56e668b7"
667
  },
668
  {
669
  "id": "single_episode_task_model_radar_chart",
 
684
  "surface": "website_hf",
685
  "shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons and metadata/Qwen/Cosmos as task-aligned overlays.",
686
  "exists": true,
687
+ "bytes": 43679,
688
+ "sha256": "51991fd33719be32da422f559b7d27895fc96b1332b0952f0c19b573d58c1325"
689
  },
690
  {
691
  "id": "unified_task_model_radar_builder",
 
695
  "surface": "repo_hf",
696
  "shows": "Regenerates the direction-aware radar chart and machine-readable metric overlay JSON.",
697
  "exists": true,
698
+ "bytes": 48861,
699
+ "sha256": "621ff19f057e6ed279388c8e944f2e4a2cfc87b9be1bd9f1feee6d329f0ec431"
700
  },
701
  {
702
  "id": "task_method_20_gap_audit_builder",
 
706
  "surface": "repo_hf",
707
  "shows": "Regenerates the public gap audit from the 9-method by 20-task matrix without inventing scores for unsupported or unevaluated cells.",
708
  "exists": true,
709
+ "bytes": 10094,
710
+ "sha256": "4ba5a43fa045b02303ba16a146ab15a891155370a85dd7c09cbe68f8f0eac7ed"
711
  },
712
  {
713
  "id": "all_task_model_scoring_waiter",
 
742
  "bytes": 9133,
743
  "sha256": "3a867d0333fe591999715158e311011db25da018ca39c9b4638930841f35efb8"
744
  },
745
+ {
746
+ "id": "existing_model_output_task_probe",
747
+ "title": "Existing model-output task probe package",
748
+ "path": "results/omni_finetune/model_output_task_probes_20260616/summary.json",
749
+ "kind": "model_result",
750
+ "surface": "repo_hf",
751
+ "shows": "Scores task 16 action-object relation only where verified held-out prediction JSON already contains action and object-set fields.",
752
+ "exists": true,
753
+ "bytes": 2000,
754
+ "sha256": "de95421d93fec5d12049e492db9fbe7c10af5c2cd0d1039da82c05ec82bf98c5"
755
+ },
756
+ {
757
+ "id": "existing_model_output_task_probe_script",
758
+ "title": "Existing model-output task probe scorer",
759
+ "path": "scripts/omni/score_existing_model_output_task_probes.py",
760
+ "kind": "scaleup_status",
761
+ "surface": "repo_hf",
762
+ "shows": "Derives task-specific scores from committed verified model outputs without running new inference or backfilling absent targets.",
763
+ "exists": true,
764
+ "bytes": 13291,
765
+ "sha256": "25b8e7ba394c1e604fe8d7c900a013b3ea330f32b393cdcd416b2d0f3a2727b8"
766
+ },
767
  {
768
  "id": "a100_128_metadata_task_baselines",
769
  "title": "128-episode metadata task baselines",
 
971
  "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
972
  "exists": true,
973
  "bytes": 8100,
974
+ "sha256": "b9f9236ae07336c8984a65bcda558d49453c2e57f6c903a44235bec4fd0d8df0"
975
  },
976
  {
977
  "id": "public_surface_qa",
 
993
  "volatile": true,
994
  "shows": "Machine-readable report for SEO/social metadata, accessible tab semantics, public links, project links, and clear project presentation.",
995
  "exists": true,
996
+ "bytes": 6146,
997
  "hash_policy": "existence_and_size_only"
998
  },
999
  {
 
1118
  "surface": "repo_hf",
1119
  "shows": "Generates the selective artifact catalog from local files.",
1120
  "exists": true,
1121
+ "bytes": 54683,
1122
+ "sha256": "0a9e7c7391f0e2822d8490ddd216f155e1ce3a50d2ba3c90476ee09685f74d65"
1123
  },
1124
  {
1125
  "id": "publication_audit",
 
1130
  "volatile": true,
1131
  "shows": "Confirms public bundles exclude raw data, caches, heavy archives, and credential text.",
1132
  "exists": true,
1133
+ "bytes": 8298,
1134
  "hash_policy": "existence_and_size_only"
1135
  },
1136
  {
 
1154
  "volatile": true,
1155
  "shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
1156
  "exists": true,
1157
+ "bytes": 868302,
1158
  "hash_policy": "existence_and_size_only"
1159
  },
1160
  {
 
1166
  "volatile": true,
1167
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
1168
  "exists": true,
1169
+ "bytes": 18933,
1170
  "hash_policy": "existence_and_size_only"
1171
  },
1172
  {
docs/data/episode128_task_model_radar.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "title": "128-Episode 20-Task Radar",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-16T11:26:57+00:00",
5
  "description": "Selected 128-episode metadata/raw baselines plus verified Qwen3/Cosmos branches. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
6
  "task_count": 20,
7
  "method_count": 7,
8
  "method_task_record_count": 140,
9
- "scored_method_task_count": 71,
10
  "normalization_policy": {
11
  "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
12
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
@@ -124,20 +124,20 @@
124
  "kind": "partial_128_episode_foundation_model_overlay",
125
  "scope": "128 selected episodes, held-out test",
126
  "stroke_dasharray": "7 7",
127
- "method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics on task-aligned JSON outputs.",
128
  "plotted_as": "colored point overlay",
129
  "result_record_count": 20,
130
- "scored_task_count": 6,
131
- "covered_task_count": 6,
132
  "proxy_scored_task_count": 0,
133
- "scoreless_task_count": 14,
134
  "unsupported_task_count": 0,
135
- "not_evaluated_task_count": 14,
136
  "status_counts": {
137
- "not_evaluated_in_verified_package": 14,
138
- "scored": 6
139
  },
140
- "coverage_fraction": 0.3,
141
  "result_record_fraction": 1.0
142
  },
143
  {
@@ -148,20 +148,20 @@
148
  "kind": "partial_128_episode_foundation_model_overlay",
149
  "scope": "128 selected episodes, held-out test",
150
  "stroke_dasharray": "4 7",
151
- "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation.",
152
  "plotted_as": "colored point overlay",
153
  "result_record_count": 20,
154
- "scored_task_count": 6,
155
- "covered_task_count": 6,
156
  "proxy_scored_task_count": 0,
157
- "scoreless_task_count": 14,
158
  "unsupported_task_count": 0,
159
- "not_evaluated_task_count": 14,
160
  "status_counts": {
161
- "not_evaluated_in_verified_package": 14,
162
- "scored": 6
163
  },
164
- "coverage_fraction": 0.3,
165
  "result_record_fraction": 1.0
166
  },
167
  {
@@ -1612,26 +1612,26 @@
1612
  "status_label": "scored"
1613
  },
1614
  "qwen3_omni_v6_lora": {
1615
- "raw": null,
1616
- "metric_key": "macro_f1",
1617
- "source": null,
1618
  "scope": "multi_episode_128_partial_model_overlay",
1619
- "status": "not_evaluated_in_verified_package",
1620
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
1621
- "normalized_score": null,
1622
- "raw_text": "n/a",
1623
- "status_label": "not evaluated"
1624
  },
1625
  "cosmos3_super_reasoner": {
1626
- "raw": null,
1627
- "metric_key": "macro_f1",
1628
- "source": null,
1629
  "scope": "multi_episode_128_partial_model_overlay",
1630
- "status": "not_evaluated_in_verified_package",
1631
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
1632
- "normalized_score": null,
1633
- "raw_text": "n/a",
1634
- "status_label": "not evaluated"
1635
  },
1636
  "cosmos3_nano_future_window": {
1637
  "raw": null,
@@ -3980,17 +3980,17 @@
3980
  "task_label": "Action-Object Relation Prediction",
3981
  "series_id": "qwen3_omni_v6_lora",
3982
  "method": "Qwen3-Omni v6 LoRA",
3983
- "status": "not_evaluated_in_verified_package",
3984
- "status_label": "not evaluated",
3985
- "scored": false,
3986
  "proxy_scored": false,
3987
- "raw": null,
3988
- "raw_text": "n/a",
3989
- "normalized_score": null,
3990
- "metric_key": "macro_f1",
3991
- "source": null,
3992
  "scope": "multi_episode_128_partial_model_overlay",
3993
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
3994
  },
3995
  {
3996
  "task_number": 16,
@@ -3998,17 +3998,17 @@
3998
  "task_label": "Action-Object Relation Prediction",
3999
  "series_id": "cosmos3_super_reasoner",
4000
  "method": "Cosmos3-Super Reasoner",
4001
- "status": "not_evaluated_in_verified_package",
4002
- "status_label": "not evaluated",
4003
- "scored": false,
4004
  "proxy_scored": false,
4005
- "raw": null,
4006
- "raw_text": "n/a",
4007
- "normalized_score": null,
4008
- "metric_key": "macro_f1",
4009
- "source": null,
4010
  "scope": "multi_episode_128_partial_model_overlay",
4011
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
4012
  },
4013
  {
4014
  "task_number": 16,
 
1
  {
2
  "title": "128-Episode 20-Task Radar",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-16T13:35:38+00:00",
5
  "description": "Selected 128-episode metadata/raw baselines plus verified Qwen3/Cosmos branches. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
6
  "task_count": 20,
7
  "method_count": 7,
8
  "method_task_record_count": 140,
9
+ "scored_method_task_count": 73,
10
  "normalization_policy": {
11
  "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
12
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
 
124
  "kind": "partial_128_episode_foundation_model_overlay",
125
  "scope": "128 selected episodes, held-out test",
126
  "stroke_dasharray": "7 7",
127
+ "method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics, plus task 16 scored from existing verified action/object JSON.",
128
  "plotted_as": "colored point overlay",
129
  "result_record_count": 20,
130
+ "scored_task_count": 7,
131
+ "covered_task_count": 7,
132
  "proxy_scored_task_count": 0,
133
+ "scoreless_task_count": 13,
134
  "unsupported_task_count": 0,
135
+ "not_evaluated_task_count": 13,
136
  "status_counts": {
137
+ "not_evaluated_in_verified_package": 13,
138
+ "scored": 7
139
  },
140
+ "coverage_fraction": 0.35,
141
  "result_record_fraction": 1.0
142
  },
143
  {
 
148
  "kind": "partial_128_episode_foundation_model_overlay",
149
  "scope": "128 selected episodes, held-out test",
150
  "stroke_dasharray": "4 7",
151
+ "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 16 scored from existing verified action/object JSON.",
152
  "plotted_as": "colored point overlay",
153
  "result_record_count": 20,
154
+ "scored_task_count": 7,
155
+ "covered_task_count": 7,
156
  "proxy_scored_task_count": 0,
157
+ "scoreless_task_count": 13,
158
  "unsupported_task_count": 0,
159
+ "not_evaluated_task_count": 13,
160
  "status_counts": {
161
+ "not_evaluated_in_verified_package": 13,
162
+ "scored": 7
163
  },
164
+ "coverage_fraction": 0.35,
165
  "result_record_fraction": 1.0
166
  },
167
  {
 
1612
  "status_label": "scored"
1613
  },
1614
  "qwen3_omni_v6_lora": {
1615
+ "raw": 0.0002220083079671497,
1616
+ "metric_key": "action_object_relation_macro_f1",
1617
+ "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
1618
  "scope": "multi_episode_128_partial_model_overlay",
1619
+ "status": "scored",
1620
+ "reason": null,
1621
+ "normalized_score": 0.0002220083079671497,
1622
+ "raw_text": "0.0002",
1623
+ "status_label": "scored"
1624
  },
1625
  "cosmos3_super_reasoner": {
1626
+ "raw": 0.0,
1627
+ "metric_key": "action_object_relation_macro_f1",
1628
+ "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
1629
  "scope": "multi_episode_128_partial_model_overlay",
1630
+ "status": "scored",
1631
+ "reason": null,
1632
+ "normalized_score": 0.0,
1633
+ "raw_text": "0.0000",
1634
+ "status_label": "scored"
1635
  },
1636
  "cosmos3_nano_future_window": {
1637
  "raw": null,
 
3980
  "task_label": "Action-Object Relation Prediction",
3981
  "series_id": "qwen3_omni_v6_lora",
3982
  "method": "Qwen3-Omni v6 LoRA",
3983
+ "status": "scored",
3984
+ "status_label": "scored",
3985
+ "scored": true,
3986
  "proxy_scored": false,
3987
+ "raw": 0.0002220083079671497,
3988
+ "raw_text": "0.0002",
3989
+ "normalized_score": 0.0002220083079671497,
3990
+ "metric_key": "action_object_relation_macro_f1",
3991
+ "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
3992
  "scope": "multi_episode_128_partial_model_overlay",
3993
+ "reason": null
3994
  },
3995
  {
3996
  "task_number": 16,
 
3998
  "task_label": "Action-Object Relation Prediction",
3999
  "series_id": "cosmos3_super_reasoner",
4000
  "method": "Cosmos3-Super Reasoner",
4001
+ "status": "scored",
4002
+ "status_label": "scored",
4003
+ "scored": true,
4004
  "proxy_scored": false,
4005
+ "raw": 0.0,
4006
+ "raw_text": "0.0000",
4007
+ "normalized_score": 0.0,
4008
+ "metric_key": "action_object_relation_macro_f1",
4009
+ "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
4010
  "scope": "multi_episode_128_partial_model_overlay",
4011
+ "reason": null
4012
  },
4013
  {
4014
  "task_number": 16,
docs/data/mirror_parity.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-16T12:05:03+00:00",
4
  "hf_root": "hf_publish",
5
  "summary": {
6
- "group_count": 578,
7
  "failure_count": 0,
8
  "failures_by_surface": {}
9
  },
@@ -138,45 +138,45 @@
138
  "local": {
139
  "path": "repo:docs/data/artifact_index.json",
140
  "exists": true,
141
- "bytes": 103135,
142
- "sha256": "7477a20154930bf56c85d8973cafa2636e68fc0fc20b2fac47ba39cd0b96bb52"
143
  },
144
  "mirrors": {
145
  "hf_space": {
146
  "path": "hf_space:data/artifact_index.json",
147
  "exists": true,
148
- "bytes": 103135,
149
- "sha256": "7477a20154930bf56c85d8973cafa2636e68fc0fc20b2fac47ba39cd0b96bb52"
150
  },
151
  "hf_artifacts_data": {
152
  "path": "hf_artifacts:data/artifact_index.json",
153
  "exists": true,
154
- "bytes": 103135,
155
- "sha256": "7477a20154930bf56c85d8973cafa2636e68fc0fc20b2fac47ba39cd0b96bb52"
156
  },
157
  "hf_artifacts": {
158
  "path": "hf_artifacts:docs/data/artifact_index.json",
159
  "exists": true,
160
- "bytes": 103135,
161
- "sha256": "7477a20154930bf56c85d8973cafa2636e68fc0fc20b2fac47ba39cd0b96bb52"
162
  },
163
  "hf_model_data": {
164
  "path": "hf_model:data/artifact_index.json",
165
  "exists": true,
166
- "bytes": 103135,
167
- "sha256": "7477a20154930bf56c85d8973cafa2636e68fc0fc20b2fac47ba39cd0b96bb52"
168
  },
169
  "hf_model_docs_data": {
170
  "path": "hf_model:docs/data/artifact_index.json",
171
  "exists": true,
172
- "bytes": 103135,
173
- "sha256": "7477a20154930bf56c85d8973cafa2636e68fc0fc20b2fac47ba39cd0b96bb52"
174
  },
175
  "hf_model": {
176
  "path": "hf_model:metrics/artifact_index.json",
177
  "exists": true,
178
- "bytes": 103135,
179
- "sha256": "7477a20154930bf56c85d8973cafa2636e68fc0fc20b2fac47ba39cd0b96bb52"
180
  }
181
  },
182
  "failures": []
@@ -825,44 +825,44 @@
825
  "path": "repo:docs/data/publication_audit.json",
826
  "exists": true,
827
  "bytes": 8298,
828
- "sha256": "c202aea91c9a6e311c190255b51217b6e91cfee45ae8db2cb7ee5174e9ceed28"
829
  },
830
  "mirrors": {
831
  "hf_space": {
832
  "path": "hf_space:data/publication_audit.json",
833
  "exists": true,
834
  "bytes": 8298,
835
- "sha256": "c202aea91c9a6e311c190255b51217b6e91cfee45ae8db2cb7ee5174e9ceed28"
836
  },
837
  "hf_artifacts_data": {
838
  "path": "hf_artifacts:data/publication_audit.json",
839
  "exists": true,
840
  "bytes": 8298,
841
- "sha256": "c202aea91c9a6e311c190255b51217b6e91cfee45ae8db2cb7ee5174e9ceed28"
842
  },
843
  "hf_artifacts": {
844
  "path": "hf_artifacts:docs/data/publication_audit.json",
845
  "exists": true,
846
  "bytes": 8298,
847
- "sha256": "c202aea91c9a6e311c190255b51217b6e91cfee45ae8db2cb7ee5174e9ceed28"
848
  },
849
  "hf_model_data": {
850
  "path": "hf_model:data/publication_audit.json",
851
  "exists": true,
852
  "bytes": 8298,
853
- "sha256": "c202aea91c9a6e311c190255b51217b6e91cfee45ae8db2cb7ee5174e9ceed28"
854
  },
855
  "hf_model_docs_data": {
856
  "path": "hf_model:docs/data/publication_audit.json",
857
  "exists": true,
858
  "bytes": 8298,
859
- "sha256": "c202aea91c9a6e311c190255b51217b6e91cfee45ae8db2cb7ee5174e9ceed28"
860
  },
861
  "hf_model": {
862
  "path": "hf_model:metrics/publication_audit.json",
863
  "exists": true,
864
  "bytes": 8298,
865
- "sha256": "c202aea91c9a6e311c190255b51217b6e91cfee45ae8db2cb7ee5174e9ceed28"
866
  }
867
  },
868
  "failures": []
@@ -874,44 +874,44 @@
874
  "path": "repo:docs/data/public_surface_qa.json",
875
  "exists": true,
876
  "bytes": 6146,
877
- "sha256": "087dd5d2cd87fdc86cd16df74b6f8db23180c84af829e153ccadf646f7c998be"
878
  },
879
  "mirrors": {
880
  "hf_space": {
881
  "path": "hf_space:data/public_surface_qa.json",
882
  "exists": true,
883
  "bytes": 6146,
884
- "sha256": "087dd5d2cd87fdc86cd16df74b6f8db23180c84af829e153ccadf646f7c998be"
885
  },
886
  "hf_artifacts_data": {
887
  "path": "hf_artifacts:data/public_surface_qa.json",
888
  "exists": true,
889
  "bytes": 6146,
890
- "sha256": "087dd5d2cd87fdc86cd16df74b6f8db23180c84af829e153ccadf646f7c998be"
891
  },
892
  "hf_artifacts": {
893
  "path": "hf_artifacts:docs/data/public_surface_qa.json",
894
  "exists": true,
895
  "bytes": 6146,
896
- "sha256": "087dd5d2cd87fdc86cd16df74b6f8db23180c84af829e153ccadf646f7c998be"
897
  },
898
  "hf_model_data": {
899
  "path": "hf_model:data/public_surface_qa.json",
900
  "exists": true,
901
  "bytes": 6146,
902
- "sha256": "087dd5d2cd87fdc86cd16df74b6f8db23180c84af829e153ccadf646f7c998be"
903
  },
904
  "hf_model_docs_data": {
905
  "path": "hf_model:docs/data/public_surface_qa.json",
906
  "exists": true,
907
  "bytes": 6146,
908
- "sha256": "087dd5d2cd87fdc86cd16df74b6f8db23180c84af829e153ccadf646f7c998be"
909
  },
910
  "hf_model": {
911
  "path": "hf_model:metrics/public_surface_qa.json",
912
  "exists": true,
913
  "bytes": 6146,
914
- "sha256": "087dd5d2cd87fdc86cd16df74b6f8db23180c84af829e153ccadf646f7c998be"
915
  }
916
  },
917
  "failures": []
@@ -1021,44 +1021,44 @@
1021
  "path": "repo:docs/data/quality_gates.json",
1022
  "exists": true,
1023
  "bytes": 8100,
1024
- "sha256": "6b952e2866cd38e7f190f24530ab78d1625b8cb94d0cd8b675b594fda3df235b"
1025
  },
1026
  "mirrors": {
1027
  "hf_space": {
1028
  "path": "hf_space:data/quality_gates.json",
1029
  "exists": true,
1030
  "bytes": 8100,
1031
- "sha256": "6b952e2866cd38e7f190f24530ab78d1625b8cb94d0cd8b675b594fda3df235b"
1032
  },
1033
  "hf_artifacts_data": {
1034
  "path": "hf_artifacts:data/quality_gates.json",
1035
  "exists": true,
1036
  "bytes": 8100,
1037
- "sha256": "6b952e2866cd38e7f190f24530ab78d1625b8cb94d0cd8b675b594fda3df235b"
1038
  },
1039
  "hf_artifacts": {
1040
  "path": "hf_artifacts:docs/data/quality_gates.json",
1041
  "exists": true,
1042
  "bytes": 8100,
1043
- "sha256": "6b952e2866cd38e7f190f24530ab78d1625b8cb94d0cd8b675b594fda3df235b"
1044
  },
1045
  "hf_model_data": {
1046
  "path": "hf_model:data/quality_gates.json",
1047
  "exists": true,
1048
  "bytes": 8100,
1049
- "sha256": "6b952e2866cd38e7f190f24530ab78d1625b8cb94d0cd8b675b594fda3df235b"
1050
  },
1051
  "hf_model_docs_data": {
1052
  "path": "hf_model:docs/data/quality_gates.json",
1053
  "exists": true,
1054
  "bytes": 8100,
1055
- "sha256": "6b952e2866cd38e7f190f24530ab78d1625b8cb94d0cd8b675b594fda3df235b"
1056
  },
1057
  "hf_model": {
1058
  "path": "hf_model:metrics/quality_gates.json",
1059
  "exists": true,
1060
  "bytes": 8100,
1061
- "sha256": "6b952e2866cd38e7f190f24530ab78d1625b8cb94d0cd8b675b594fda3df235b"
1062
  }
1063
  },
1064
  "failures": []
@@ -1462,44 +1462,44 @@
1462
  "path": "repo:docs/data/scope_claims_audit.json",
1463
  "exists": true,
1464
  "bytes": 21630,
1465
- "sha256": "403dd79220a3322ff575074a6a4073de74f2c4688c566ba479785d7495872dc3"
1466
  },
1467
  "mirrors": {
1468
  "hf_space": {
1469
  "path": "hf_space:data/scope_claims_audit.json",
1470
  "exists": true,
1471
  "bytes": 21630,
1472
- "sha256": "403dd79220a3322ff575074a6a4073de74f2c4688c566ba479785d7495872dc3"
1473
  },
1474
  "hf_artifacts_data": {
1475
  "path": "hf_artifacts:data/scope_claims_audit.json",
1476
  "exists": true,
1477
  "bytes": 21630,
1478
- "sha256": "403dd79220a3322ff575074a6a4073de74f2c4688c566ba479785d7495872dc3"
1479
  },
1480
  "hf_artifacts": {
1481
  "path": "hf_artifacts:docs/data/scope_claims_audit.json",
1482
  "exists": true,
1483
  "bytes": 21630,
1484
- "sha256": "403dd79220a3322ff575074a6a4073de74f2c4688c566ba479785d7495872dc3"
1485
  },
1486
  "hf_model_data": {
1487
  "path": "hf_model:data/scope_claims_audit.json",
1488
  "exists": true,
1489
  "bytes": 21630,
1490
- "sha256": "403dd79220a3322ff575074a6a4073de74f2c4688c566ba479785d7495872dc3"
1491
  },
1492
  "hf_model_docs_data": {
1493
  "path": "hf_model:docs/data/scope_claims_audit.json",
1494
  "exists": true,
1495
  "bytes": 21630,
1496
- "sha256": "403dd79220a3322ff575074a6a4073de74f2c4688c566ba479785d7495872dc3"
1497
  },
1498
  "hf_model": {
1499
  "path": "hf_model:metrics/scope_claims_audit.json",
1500
  "exists": true,
1501
  "bytes": 21630,
1502
- "sha256": "403dd79220a3322ff575074a6a4073de74f2c4688c566ba479785d7495872dc3"
1503
  }
1504
  },
1505
  "failures": []
@@ -1560,44 +1560,44 @@
1560
  "path": "repo:docs/data/source_alignment_audit.json",
1561
  "exists": true,
1562
  "bytes": 4432,
1563
- "sha256": "7abfe3e11f8c92399b96b0fe3d5edc1ee501666553ca162ba9958e5684ba1940"
1564
  },
1565
  "mirrors": {
1566
  "hf_space": {
1567
  "path": "hf_space:data/source_alignment_audit.json",
1568
  "exists": true,
1569
  "bytes": 4432,
1570
- "sha256": "7abfe3e11f8c92399b96b0fe3d5edc1ee501666553ca162ba9958e5684ba1940"
1571
  },
1572
  "hf_artifacts_data": {
1573
  "path": "hf_artifacts:data/source_alignment_audit.json",
1574
  "exists": true,
1575
  "bytes": 4432,
1576
- "sha256": "7abfe3e11f8c92399b96b0fe3d5edc1ee501666553ca162ba9958e5684ba1940"
1577
  },
1578
  "hf_artifacts": {
1579
  "path": "hf_artifacts:docs/data/source_alignment_audit.json",
1580
  "exists": true,
1581
  "bytes": 4432,
1582
- "sha256": "7abfe3e11f8c92399b96b0fe3d5edc1ee501666553ca162ba9958e5684ba1940"
1583
  },
1584
  "hf_model_data": {
1585
  "path": "hf_model:data/source_alignment_audit.json",
1586
  "exists": true,
1587
  "bytes": 4432,
1588
- "sha256": "7abfe3e11f8c92399b96b0fe3d5edc1ee501666553ca162ba9958e5684ba1940"
1589
  },
1590
  "hf_model_docs_data": {
1591
  "path": "hf_model:docs/data/source_alignment_audit.json",
1592
  "exists": true,
1593
  "bytes": 4432,
1594
- "sha256": "7abfe3e11f8c92399b96b0fe3d5edc1ee501666553ca162ba9958e5684ba1940"
1595
  },
1596
  "hf_model": {
1597
  "path": "hf_model:metrics/source_alignment_audit.json",
1598
  "exists": true,
1599
  "bytes": 4432,
1600
- "sha256": "7abfe3e11f8c92399b96b0fe3d5edc1ee501666553ca162ba9958e5684ba1940"
1601
  }
1602
  },
1603
  "failures": []
@@ -1658,44 +1658,44 @@
1658
  "path": "repo:docs/data/single_episode_task_model_radar.json",
1659
  "exists": true,
1660
  "bytes": 50973,
1661
- "sha256": "08904cd9b7a7f2d7090dd232ef026b21a9aaf1891ebff250ab768dce09e71749"
1662
  },
1663
  "mirrors": {
1664
  "hf_space": {
1665
  "path": "hf_space:data/single_episode_task_model_radar.json",
1666
  "exists": true,
1667
  "bytes": 50973,
1668
- "sha256": "08904cd9b7a7f2d7090dd232ef026b21a9aaf1891ebff250ab768dce09e71749"
1669
  },
1670
  "hf_artifacts_data": {
1671
  "path": "hf_artifacts:data/single_episode_task_model_radar.json",
1672
  "exists": true,
1673
  "bytes": 50973,
1674
- "sha256": "08904cd9b7a7f2d7090dd232ef026b21a9aaf1891ebff250ab768dce09e71749"
1675
  },
1676
  "hf_artifacts": {
1677
  "path": "hf_artifacts:docs/data/single_episode_task_model_radar.json",
1678
  "exists": true,
1679
  "bytes": 50973,
1680
- "sha256": "08904cd9b7a7f2d7090dd232ef026b21a9aaf1891ebff250ab768dce09e71749"
1681
  },
1682
  "hf_model_data": {
1683
  "path": "hf_model:data/single_episode_task_model_radar.json",
1684
  "exists": true,
1685
  "bytes": 50973,
1686
- "sha256": "08904cd9b7a7f2d7090dd232ef026b21a9aaf1891ebff250ab768dce09e71749"
1687
  },
1688
  "hf_model_docs_data": {
1689
  "path": "hf_model:docs/data/single_episode_task_model_radar.json",
1690
  "exists": true,
1691
  "bytes": 50973,
1692
- "sha256": "08904cd9b7a7f2d7090dd232ef026b21a9aaf1891ebff250ab768dce09e71749"
1693
  },
1694
  "hf_model": {
1695
  "path": "hf_model:metrics/single_episode_task_model_radar.json",
1696
  "exists": true,
1697
  "bytes": 50973,
1698
- "sha256": "08904cd9b7a7f2d7090dd232ef026b21a9aaf1891ebff250ab768dce09e71749"
1699
  }
1700
  },
1701
  "failures": []
@@ -1706,45 +1706,45 @@
1706
  "local": {
1707
  "path": "repo:docs/data/episode128_task_model_radar.json",
1708
  "exists": true,
1709
- "bytes": 187439,
1710
- "sha256": "98b3e4367bcdbb22f21bb03896f1ee6db305bafca7b26e4ffd7a3418aa12d95e"
1711
  },
1712
  "mirrors": {
1713
  "hf_space": {
1714
  "path": "hf_space:data/episode128_task_model_radar.json",
1715
  "exists": true,
1716
- "bytes": 187439,
1717
- "sha256": "98b3e4367bcdbb22f21bb03896f1ee6db305bafca7b26e4ffd7a3418aa12d95e"
1718
  },
1719
  "hf_artifacts_data": {
1720
  "path": "hf_artifacts:data/episode128_task_model_radar.json",
1721
  "exists": true,
1722
- "bytes": 187439,
1723
- "sha256": "98b3e4367bcdbb22f21bb03896f1ee6db305bafca7b26e4ffd7a3418aa12d95e"
1724
  },
1725
  "hf_artifacts": {
1726
  "path": "hf_artifacts:docs/data/episode128_task_model_radar.json",
1727
  "exists": true,
1728
- "bytes": 187439,
1729
- "sha256": "98b3e4367bcdbb22f21bb03896f1ee6db305bafca7b26e4ffd7a3418aa12d95e"
1730
  },
1731
  "hf_model_data": {
1732
  "path": "hf_model:data/episode128_task_model_radar.json",
1733
  "exists": true,
1734
- "bytes": 187439,
1735
- "sha256": "98b3e4367bcdbb22f21bb03896f1ee6db305bafca7b26e4ffd7a3418aa12d95e"
1736
  },
1737
  "hf_model_docs_data": {
1738
  "path": "hf_model:docs/data/episode128_task_model_radar.json",
1739
  "exists": true,
1740
- "bytes": 187439,
1741
- "sha256": "98b3e4367bcdbb22f21bb03896f1ee6db305bafca7b26e4ffd7a3418aa12d95e"
1742
  },
1743
  "hf_model": {
1744
  "path": "hf_model:metrics/episode128_task_model_radar.json",
1745
  "exists": true,
1746
- "bytes": 187439,
1747
- "sha256": "98b3e4367bcdbb22f21bb03896f1ee6db305bafca7b26e4ffd7a3418aa12d95e"
1748
  }
1749
  },
1750
  "failures": []
@@ -1854,44 +1854,44 @@
1854
  "path": "repo:docs/data/task_surface_integrity.json",
1855
  "exists": true,
1856
  "bytes": 45779,
1857
- "sha256": "aca78406b797e12bfcb5d0dfb19c720ab153ff80943b501894a31c2f5f850b41"
1858
  },
1859
  "mirrors": {
1860
  "hf_space": {
1861
  "path": "hf_space:data/task_surface_integrity.json",
1862
  "exists": true,
1863
  "bytes": 45779,
1864
- "sha256": "aca78406b797e12bfcb5d0dfb19c720ab153ff80943b501894a31c2f5f850b41"
1865
  },
1866
  "hf_artifacts_data": {
1867
  "path": "hf_artifacts:data/task_surface_integrity.json",
1868
  "exists": true,
1869
  "bytes": 45779,
1870
- "sha256": "aca78406b797e12bfcb5d0dfb19c720ab153ff80943b501894a31c2f5f850b41"
1871
  },
1872
  "hf_artifacts": {
1873
  "path": "hf_artifacts:docs/data/task_surface_integrity.json",
1874
  "exists": true,
1875
  "bytes": 45779,
1876
- "sha256": "aca78406b797e12bfcb5d0dfb19c720ab153ff80943b501894a31c2f5f850b41"
1877
  },
1878
  "hf_model_data": {
1879
  "path": "hf_model:data/task_surface_integrity.json",
1880
  "exists": true,
1881
  "bytes": 45779,
1882
- "sha256": "aca78406b797e12bfcb5d0dfb19c720ab153ff80943b501894a31c2f5f850b41"
1883
  },
1884
  "hf_model_docs_data": {
1885
  "path": "hf_model:docs/data/task_surface_integrity.json",
1886
  "exists": true,
1887
  "bytes": 45779,
1888
- "sha256": "aca78406b797e12bfcb5d0dfb19c720ab153ff80943b501894a31c2f5f850b41"
1889
  },
1890
  "hf_model": {
1891
  "path": "hf_model:metrics/task_surface_integrity.json",
1892
  "exists": true,
1893
  "bytes": 45779,
1894
- "sha256": "aca78406b797e12bfcb5d0dfb19c720ab153ff80943b501894a31c2f5f850b41"
1895
  }
1896
  },
1897
  "failures": []
@@ -1951,45 +1951,45 @@
1951
  "local": {
1952
  "path": "repo:docs/data/task_method_20_result_matrix.json",
1953
  "exists": true,
1954
- "bytes": 129711,
1955
- "sha256": "b15009dd8c4caf77d62654f1dcc80e3fe328dcebd80e397e679f1a44597f3294"
1956
  },
1957
  "mirrors": {
1958
  "hf_space": {
1959
  "path": "hf_space:data/task_method_20_result_matrix.json",
1960
  "exists": true,
1961
- "bytes": 129711,
1962
- "sha256": "b15009dd8c4caf77d62654f1dcc80e3fe328dcebd80e397e679f1a44597f3294"
1963
  },
1964
  "hf_artifacts_data": {
1965
  "path": "hf_artifacts:data/task_method_20_result_matrix.json",
1966
  "exists": true,
1967
- "bytes": 129711,
1968
- "sha256": "b15009dd8c4caf77d62654f1dcc80e3fe328dcebd80e397e679f1a44597f3294"
1969
  },
1970
  "hf_artifacts": {
1971
  "path": "hf_artifacts:docs/data/task_method_20_result_matrix.json",
1972
  "exists": true,
1973
- "bytes": 129711,
1974
- "sha256": "b15009dd8c4caf77d62654f1dcc80e3fe328dcebd80e397e679f1a44597f3294"
1975
  },
1976
  "hf_model_data": {
1977
  "path": "hf_model:data/task_method_20_result_matrix.json",
1978
  "exists": true,
1979
- "bytes": 129711,
1980
- "sha256": "b15009dd8c4caf77d62654f1dcc80e3fe328dcebd80e397e679f1a44597f3294"
1981
  },
1982
  "hf_model_docs_data": {
1983
  "path": "hf_model:docs/data/task_method_20_result_matrix.json",
1984
  "exists": true,
1985
- "bytes": 129711,
1986
- "sha256": "b15009dd8c4caf77d62654f1dcc80e3fe328dcebd80e397e679f1a44597f3294"
1987
  },
1988
  "hf_model": {
1989
  "path": "hf_model:metrics/task_method_20_result_matrix.json",
1990
  "exists": true,
1991
- "bytes": 129711,
1992
- "sha256": "b15009dd8c4caf77d62654f1dcc80e3fe328dcebd80e397e679f1a44597f3294"
1993
  }
1994
  },
1995
  "failures": []
@@ -2000,45 +2000,45 @@
2000
  "local": {
2001
  "path": "repo:docs/data/task_method_20_gap_audit.json",
2002
  "exists": true,
2003
- "bytes": 59421,
2004
- "sha256": "872954cbf001c2286b186e17c8d590d9d637852cade0ecc2357c63247e4803fb"
2005
  },
2006
  "mirrors": {
2007
  "hf_space": {
2008
  "path": "hf_space:data/task_method_20_gap_audit.json",
2009
  "exists": true,
2010
- "bytes": 59421,
2011
- "sha256": "872954cbf001c2286b186e17c8d590d9d637852cade0ecc2357c63247e4803fb"
2012
  },
2013
  "hf_artifacts_data": {
2014
  "path": "hf_artifacts:data/task_method_20_gap_audit.json",
2015
  "exists": true,
2016
- "bytes": 59421,
2017
- "sha256": "872954cbf001c2286b186e17c8d590d9d637852cade0ecc2357c63247e4803fb"
2018
  },
2019
  "hf_artifacts": {
2020
  "path": "hf_artifacts:docs/data/task_method_20_gap_audit.json",
2021
  "exists": true,
2022
- "bytes": 59421,
2023
- "sha256": "872954cbf001c2286b186e17c8d590d9d637852cade0ecc2357c63247e4803fb"
2024
  },
2025
  "hf_model_data": {
2026
  "path": "hf_model:data/task_method_20_gap_audit.json",
2027
  "exists": true,
2028
- "bytes": 59421,
2029
- "sha256": "872954cbf001c2286b186e17c8d590d9d637852cade0ecc2357c63247e4803fb"
2030
  },
2031
  "hf_model_docs_data": {
2032
  "path": "hf_model:docs/data/task_method_20_gap_audit.json",
2033
  "exists": true,
2034
- "bytes": 59421,
2035
- "sha256": "872954cbf001c2286b186e17c8d590d9d637852cade0ecc2357c63247e4803fb"
2036
  },
2037
  "hf_model": {
2038
  "path": "hf_model:metrics/task_method_20_gap_audit.json",
2039
  "exists": true,
2040
- "bytes": 59421,
2041
- "sha256": "872954cbf001c2286b186e17c8d590d9d637852cade0ecc2357c63247e4803fb"
2042
  }
2043
  },
2044
  "failures": []
@@ -2098,45 +2098,45 @@
2098
  "local": {
2099
  "path": "repo:docs/data/unified_task_model_radar.json",
2100
  "exists": true,
2101
- "bytes": 231290,
2102
- "sha256": "851677db9ca0a3255d44e5540c2fbd19b3bb4ae5a0d8e31b8faa62f9d6c73a60"
2103
  },
2104
  "mirrors": {
2105
  "hf_space": {
2106
  "path": "hf_space:data/unified_task_model_radar.json",
2107
  "exists": true,
2108
- "bytes": 231290,
2109
- "sha256": "851677db9ca0a3255d44e5540c2fbd19b3bb4ae5a0d8e31b8faa62f9d6c73a60"
2110
  },
2111
  "hf_artifacts_data": {
2112
  "path": "hf_artifacts:data/unified_task_model_radar.json",
2113
  "exists": true,
2114
- "bytes": 231290,
2115
- "sha256": "851677db9ca0a3255d44e5540c2fbd19b3bb4ae5a0d8e31b8faa62f9d6c73a60"
2116
  },
2117
  "hf_artifacts": {
2118
  "path": "hf_artifacts:docs/data/unified_task_model_radar.json",
2119
  "exists": true,
2120
- "bytes": 231290,
2121
- "sha256": "851677db9ca0a3255d44e5540c2fbd19b3bb4ae5a0d8e31b8faa62f9d6c73a60"
2122
  },
2123
  "hf_model_data": {
2124
  "path": "hf_model:data/unified_task_model_radar.json",
2125
  "exists": true,
2126
- "bytes": 231290,
2127
- "sha256": "851677db9ca0a3255d44e5540c2fbd19b3bb4ae5a0d8e31b8faa62f9d6c73a60"
2128
  },
2129
  "hf_model_docs_data": {
2130
  "path": "hf_model:docs/data/unified_task_model_radar.json",
2131
  "exists": true,
2132
- "bytes": 231290,
2133
- "sha256": "851677db9ca0a3255d44e5540c2fbd19b3bb4ae5a0d8e31b8faa62f9d6c73a60"
2134
  },
2135
  "hf_model": {
2136
  "path": "hf_model:metrics/unified_task_model_radar.json",
2137
  "exists": true,
2138
- "bytes": 231290,
2139
- "sha256": "851677db9ca0a3255d44e5540c2fbd19b3bb4ae5a0d8e31b8faa62f9d6c73a60"
2140
  }
2141
  },
2142
  "failures": []
@@ -2148,44 +2148,44 @@
2148
  "path": "repo:docs/data/website_integrity.json",
2149
  "exists": true,
2150
  "bytes": 18933,
2151
- "sha256": "797890b784ff4da6566e31fa89887919ca3f01eb37d713678eb1d720fa9b8780"
2152
  },
2153
  "mirrors": {
2154
  "hf_space": {
2155
  "path": "hf_space:data/website_integrity.json",
2156
  "exists": true,
2157
  "bytes": 18933,
2158
- "sha256": "797890b784ff4da6566e31fa89887919ca3f01eb37d713678eb1d720fa9b8780"
2159
  },
2160
  "hf_artifacts_data": {
2161
  "path": "hf_artifacts:data/website_integrity.json",
2162
  "exists": true,
2163
  "bytes": 18933,
2164
- "sha256": "797890b784ff4da6566e31fa89887919ca3f01eb37d713678eb1d720fa9b8780"
2165
  },
2166
  "hf_artifacts": {
2167
  "path": "hf_artifacts:docs/data/website_integrity.json",
2168
  "exists": true,
2169
  "bytes": 18933,
2170
- "sha256": "797890b784ff4da6566e31fa89887919ca3f01eb37d713678eb1d720fa9b8780"
2171
  },
2172
  "hf_model_data": {
2173
  "path": "hf_model:data/website_integrity.json",
2174
  "exists": true,
2175
  "bytes": 18933,
2176
- "sha256": "797890b784ff4da6566e31fa89887919ca3f01eb37d713678eb1d720fa9b8780"
2177
  },
2178
  "hf_model_docs_data": {
2179
  "path": "hf_model:docs/data/website_integrity.json",
2180
  "exists": true,
2181
  "bytes": 18933,
2182
- "sha256": "797890b784ff4da6566e31fa89887919ca3f01eb37d713678eb1d720fa9b8780"
2183
  },
2184
  "hf_model": {
2185
  "path": "hf_model:metrics/website_integrity.json",
2186
  "exists": true,
2187
  "bytes": 18933,
2188
- "sha256": "797890b784ff4da6566e31fa89887919ca3f01eb37d713678eb1d720fa9b8780"
2189
  }
2190
  },
2191
  "failures": []
@@ -2319,33 +2319,33 @@
2319
  "local": {
2320
  "path": "repo:docs/assets/charts/episode128_task_model_radar.svg",
2321
  "exists": true,
2322
- "bytes": 43020,
2323
- "sha256": "fad2d354e09ece917350dbbb66a65a4dcfabc38e000da61b002315c67b6aefa9"
2324
  },
2325
  "mirrors": {
2326
  "hf_space": {
2327
  "path": "hf_space:assets/charts/episode128_task_model_radar.svg",
2328
  "exists": true,
2329
- "bytes": 43020,
2330
- "sha256": "fad2d354e09ece917350dbbb66a65a4dcfabc38e000da61b002315c67b6aefa9"
2331
  },
2332
  "hf_artifacts_docs": {
2333
  "path": "hf_artifacts:docs/assets/charts/episode128_task_model_radar.svg",
2334
  "exists": true,
2335
- "bytes": 43020,
2336
- "sha256": "fad2d354e09ece917350dbbb66a65a4dcfabc38e000da61b002315c67b6aefa9"
2337
  },
2338
  "hf_artifacts_card": {
2339
  "path": "hf_artifacts:assets/charts/episode128_task_model_radar.svg",
2340
  "exists": true,
2341
- "bytes": 43020,
2342
- "sha256": "fad2d354e09ece917350dbbb66a65a4dcfabc38e000da61b002315c67b6aefa9"
2343
  },
2344
  "hf_model": {
2345
  "path": "hf_model:assets/charts/episode128_task_model_radar.svg",
2346
  "exists": true,
2347
- "bytes": 43020,
2348
- "sha256": "fad2d354e09ece917350dbbb66a65a4dcfabc38e000da61b002315c67b6aefa9"
2349
  }
2350
  },
2351
  "failures": []
@@ -2393,33 +2393,33 @@
2393
  "local": {
2394
  "path": "repo:docs/assets/charts/unified_task_model_radar.svg",
2395
  "exists": true,
2396
- "bytes": 49046,
2397
- "sha256": "60c4ed6818b1ebf00165fc7187613c8e41bc0359d5053dfc3659f497f4acef44"
2398
  },
2399
  "mirrors": {
2400
  "hf_space": {
2401
  "path": "hf_space:assets/charts/unified_task_model_radar.svg",
2402
  "exists": true,
2403
- "bytes": 49046,
2404
- "sha256": "60c4ed6818b1ebf00165fc7187613c8e41bc0359d5053dfc3659f497f4acef44"
2405
  },
2406
  "hf_artifacts_docs": {
2407
  "path": "hf_artifacts:docs/assets/charts/unified_task_model_radar.svg",
2408
  "exists": true,
2409
- "bytes": 49046,
2410
- "sha256": "60c4ed6818b1ebf00165fc7187613c8e41bc0359d5053dfc3659f497f4acef44"
2411
  },
2412
  "hf_artifacts_card": {
2413
  "path": "hf_artifacts:assets/charts/unified_task_model_radar.svg",
2414
  "exists": true,
2415
- "bytes": 49046,
2416
- "sha256": "60c4ed6818b1ebf00165fc7187613c8e41bc0359d5053dfc3659f497f4acef44"
2417
  },
2418
  "hf_model": {
2419
  "path": "hf_model:assets/charts/unified_task_model_radar.svg",
2420
  "exists": true,
2421
- "bytes": 49046,
2422
- "sha256": "60c4ed6818b1ebf00165fc7187613c8e41bc0359d5053dfc3659f497f4acef44"
2423
  }
2424
  },
2425
  "failures": []
@@ -3650,6 +3650,31 @@
3650
  },
3651
  "failures": []
3652
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3653
  {
3654
  "name": "scripts/omni/score_model_output_probes.py",
3655
  "status": "pass",
@@ -3806,21 +3831,21 @@
3806
  "local": {
3807
  "path": "repo:scripts/build_artifact_index.py",
3808
  "exists": true,
3809
- "bytes": 53849,
3810
- "sha256": "c6407d4644cd6fee2981a6240c8087b0a83e43133aeb5bcff52d67b27f469e2d"
3811
  },
3812
  "mirrors": {
3813
  "hf_artifacts": {
3814
  "path": "hf_artifacts:scripts/build_artifact_index.py",
3815
  "exists": true,
3816
- "bytes": 53849,
3817
- "sha256": "c6407d4644cd6fee2981a6240c8087b0a83e43133aeb5bcff52d67b27f469e2d"
3818
  },
3819
  "hf_model": {
3820
  "path": "hf_model:scripts/build_artifact_index.py",
3821
  "exists": true,
3822
- "bytes": 53849,
3823
- "sha256": "c6407d4644cd6fee2981a6240c8087b0a83e43133aeb5bcff52d67b27f469e2d"
3824
  }
3825
  },
3826
  "failures": []
@@ -4031,21 +4056,21 @@
4031
  "local": {
4032
  "path": "repo:scripts/build_task_method_20_gap_audit.py",
4033
  "exists": true,
4034
- "bytes": 9959,
4035
- "sha256": "6a0086ccbc567529b2b24061ca36228e8eac213331621bc66b96db2aea62e1f4"
4036
  },
4037
  "mirrors": {
4038
  "hf_artifacts": {
4039
  "path": "hf_artifacts:scripts/build_task_method_20_gap_audit.py",
4040
  "exists": true,
4041
- "bytes": 9959,
4042
- "sha256": "6a0086ccbc567529b2b24061ca36228e8eac213331621bc66b96db2aea62e1f4"
4043
  },
4044
  "hf_model": {
4045
  "path": "hf_model:scripts/build_task_method_20_gap_audit.py",
4046
  "exists": true,
4047
- "bytes": 9959,
4048
- "sha256": "6a0086ccbc567529b2b24061ca36228e8eac213331621bc66b96db2aea62e1f4"
4049
  }
4050
  },
4051
  "failures": []
@@ -4106,21 +4131,21 @@
4106
  "local": {
4107
  "path": "repo:scripts/build_unified_task_model_radar.py",
4108
  "exists": true,
4109
- "bytes": 47539,
4110
- "sha256": "23a655bc2ba72e5326a0db8a5cad06d297d5fb76d7985135192f1d34368b966b"
4111
  },
4112
  "mirrors": {
4113
  "hf_artifacts": {
4114
  "path": "hf_artifacts:scripts/build_unified_task_model_radar.py",
4115
  "exists": true,
4116
- "bytes": 47539,
4117
- "sha256": "23a655bc2ba72e5326a0db8a5cad06d297d5fb76d7985135192f1d34368b966b"
4118
  },
4119
  "hf_model": {
4120
  "path": "hf_model:scripts/build_unified_task_model_radar.py",
4121
  "exists": true,
4122
- "bytes": 47539,
4123
- "sha256": "23a655bc2ba72e5326a0db8a5cad06d297d5fb76d7985135192f1d34368b966b"
4124
  }
4125
  },
4126
  "failures": []
@@ -4181,21 +4206,21 @@
4181
  "local": {
4182
  "path": "repo:scripts/validate_mirror_parity.py",
4183
  "exists": true,
4184
- "bytes": 24235,
4185
- "sha256": "c823c3c0fbcbc004aa890738c510183e06290687b42a8d42adccb385525b605a"
4186
  },
4187
  "mirrors": {
4188
  "hf_artifacts": {
4189
  "path": "hf_artifacts:scripts/validate_mirror_parity.py",
4190
  "exists": true,
4191
- "bytes": 24235,
4192
- "sha256": "c823c3c0fbcbc004aa890738c510183e06290687b42a8d42adccb385525b605a"
4193
  },
4194
  "hf_model": {
4195
  "path": "hf_model:scripts/validate_mirror_parity.py",
4196
  "exists": true,
4197
- "bytes": 24235,
4198
- "sha256": "c823c3c0fbcbc004aa890738c510183e06290687b42a8d42adccb385525b605a"
4199
  }
4200
  },
4201
  "failures": []
@@ -4331,21 +4356,21 @@
4331
  "local": {
4332
  "path": "repo:scripts/sync_hf_publish_mirrors.py",
4333
  "exists": true,
4334
- "bytes": 17856,
4335
- "sha256": "a2ce492cb82034175208b60fec498c4bf9f2ef6c1e312ce51912c0a919e6514e"
4336
  },
4337
  "mirrors": {
4338
  "hf_artifacts": {
4339
  "path": "hf_artifacts:scripts/sync_hf_publish_mirrors.py",
4340
  "exists": true,
4341
- "bytes": 17856,
4342
- "sha256": "a2ce492cb82034175208b60fec498c4bf9f2ef6c1e312ce51912c0a919e6514e"
4343
  },
4344
  "hf_model": {
4345
  "path": "hf_model:scripts/sync_hf_publish_mirrors.py",
4346
  "exists": true,
4347
- "bytes": 17856,
4348
- "sha256": "a2ce492cb82034175208b60fec498c4bf9f2ef6c1e312ce51912c0a919e6514e"
4349
  }
4350
  },
4351
  "failures": []
@@ -4535,39 +4560,39 @@
4535
  "local": {
4536
  "path": "repo:docs/index.html",
4537
  "exists": true,
4538
- "bytes": 232536,
4539
- "sha256": "0eb22ba5498e9caefd2c7d6e70068a8a7e99ae32a4c91abef382c918d689d9bf"
4540
  },
4541
  "mirrors": {
4542
  "hf_space": {
4543
  "path": "hf_space:index.html",
4544
  "exists": true,
4545
- "bytes": 232536,
4546
- "sha256": "0eb22ba5498e9caefd2c7d6e70068a8a7e99ae32a4c91abef382c918d689d9bf"
4547
  },
4548
  "hf_artifacts_root": {
4549
  "path": "hf_artifacts:index.html",
4550
  "exists": true,
4551
- "bytes": 232536,
4552
- "sha256": "0eb22ba5498e9caefd2c7d6e70068a8a7e99ae32a4c91abef382c918d689d9bf"
4553
  },
4554
  "hf_artifacts_docs": {
4555
  "path": "hf_artifacts:docs/index.html",
4556
  "exists": true,
4557
- "bytes": 232536,
4558
- "sha256": "0eb22ba5498e9caefd2c7d6e70068a8a7e99ae32a4c91abef382c918d689d9bf"
4559
  },
4560
  "hf_model": {
4561
  "path": "hf_model:index.html",
4562
  "exists": true,
4563
- "bytes": 232536,
4564
- "sha256": "0eb22ba5498e9caefd2c7d6e70068a8a7e99ae32a4c91abef382c918d689d9bf"
4565
  },
4566
  "hf_model_docs": {
4567
  "path": "hf_model:docs/index.html",
4568
  "exists": true,
4569
- "bytes": 232536,
4570
- "sha256": "0eb22ba5498e9caefd2c7d6e70068a8a7e99ae32a4c91abef382c918d689d9bf"
4571
  }
4572
  },
4573
  "failures": []
@@ -10591,6 +10616,254 @@
10591
  },
10592
  "failures": []
10593
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10594
  {
10595
  "name": "results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
10596
  "status": "pass",
@@ -18533,27 +18806,27 @@
18533
  "local": {
18534
  "path": "repo:TASK_METHOD_20_GAP_AUDIT.md",
18535
  "exists": true,
18536
- "bytes": 16600,
18537
- "sha256": "e6bd5b49d01eb095fe6f0d38436e08f2c2ff1a4449f98a08928d51e965e563bb"
18538
  },
18539
  "mirrors": {
18540
  "hf_space": {
18541
  "path": "hf_space:TASK_METHOD_20_GAP_AUDIT.md",
18542
  "exists": true,
18543
- "bytes": 16600,
18544
- "sha256": "e6bd5b49d01eb095fe6f0d38436e08f2c2ff1a4449f98a08928d51e965e563bb"
18545
  },
18546
  "hf_artifacts": {
18547
  "path": "hf_artifacts:TASK_METHOD_20_GAP_AUDIT.md",
18548
  "exists": true,
18549
- "bytes": 16600,
18550
- "sha256": "e6bd5b49d01eb095fe6f0d38436e08f2c2ff1a4449f98a08928d51e965e563bb"
18551
  },
18552
  "hf_model": {
18553
  "path": "hf_model:TASK_METHOD_20_GAP_AUDIT.md",
18554
  "exists": true,
18555
- "bytes": 16600,
18556
- "sha256": "e6bd5b49d01eb095fe6f0d38436e08f2c2ff1a4449f98a08928d51e965e563bb"
18557
  }
18558
  },
18559
  "failures": []
@@ -18564,27 +18837,27 @@
18564
  "local": {
18565
  "path": "repo:TASK_METHOD_20_RESULT_MATRIX.md",
18566
  "exists": true,
18567
- "bytes": 4224,
18568
- "sha256": "fe14a0e1709a8bb5b9fb1fd720ffde0aecfed578329d8e1e71b352a83b23b1f8"
18569
  },
18570
  "mirrors": {
18571
  "hf_space": {
18572
  "path": "hf_space:TASK_METHOD_20_RESULT_MATRIX.md",
18573
  "exists": true,
18574
- "bytes": 4224,
18575
- "sha256": "fe14a0e1709a8bb5b9fb1fd720ffde0aecfed578329d8e1e71b352a83b23b1f8"
18576
  },
18577
  "hf_artifacts": {
18578
  "path": "hf_artifacts:TASK_METHOD_20_RESULT_MATRIX.md",
18579
  "exists": true,
18580
- "bytes": 4224,
18581
- "sha256": "fe14a0e1709a8bb5b9fb1fd720ffde0aecfed578329d8e1e71b352a83b23b1f8"
18582
  },
18583
  "hf_model": {
18584
  "path": "hf_model:TASK_METHOD_20_RESULT_MATRIX.md",
18585
  "exists": true,
18586
- "bytes": 4224,
18587
- "sha256": "fe14a0e1709a8bb5b9fb1fd720ffde0aecfed578329d8e1e71b352a83b23b1f8"
18588
  }
18589
  },
18590
  "failures": []
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-16T13:35:58+00:00",
4
  "hf_root": "hf_publish",
5
  "summary": {
6
+ "group_count": 587,
7
  "failure_count": 0,
8
  "failures_by_surface": {}
9
  },
 
138
  "local": {
139
  "path": "repo:docs/data/artifact_index.json",
140
  "exists": true,
141
+ "bytes": 104199,
142
+ "sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
143
  },
144
  "mirrors": {
145
  "hf_space": {
146
  "path": "hf_space:data/artifact_index.json",
147
  "exists": true,
148
+ "bytes": 104199,
149
+ "sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
150
  },
151
  "hf_artifacts_data": {
152
  "path": "hf_artifacts:data/artifact_index.json",
153
  "exists": true,
154
+ "bytes": 104199,
155
+ "sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
156
  },
157
  "hf_artifacts": {
158
  "path": "hf_artifacts:docs/data/artifact_index.json",
159
  "exists": true,
160
+ "bytes": 104199,
161
+ "sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
162
  },
163
  "hf_model_data": {
164
  "path": "hf_model:data/artifact_index.json",
165
  "exists": true,
166
+ "bytes": 104199,
167
+ "sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
168
  },
169
  "hf_model_docs_data": {
170
  "path": "hf_model:docs/data/artifact_index.json",
171
  "exists": true,
172
+ "bytes": 104199,
173
+ "sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
174
  },
175
  "hf_model": {
176
  "path": "hf_model:metrics/artifact_index.json",
177
  "exists": true,
178
+ "bytes": 104199,
179
+ "sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
180
  }
181
  },
182
  "failures": []
 
825
  "path": "repo:docs/data/publication_audit.json",
826
  "exists": true,
827
  "bytes": 8298,
828
+ "sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
829
  },
830
  "mirrors": {
831
  "hf_space": {
832
  "path": "hf_space:data/publication_audit.json",
833
  "exists": true,
834
  "bytes": 8298,
835
+ "sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
836
  },
837
  "hf_artifacts_data": {
838
  "path": "hf_artifacts:data/publication_audit.json",
839
  "exists": true,
840
  "bytes": 8298,
841
+ "sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
842
  },
843
  "hf_artifacts": {
844
  "path": "hf_artifacts:docs/data/publication_audit.json",
845
  "exists": true,
846
  "bytes": 8298,
847
+ "sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
848
  },
849
  "hf_model_data": {
850
  "path": "hf_model:data/publication_audit.json",
851
  "exists": true,
852
  "bytes": 8298,
853
+ "sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
854
  },
855
  "hf_model_docs_data": {
856
  "path": "hf_model:docs/data/publication_audit.json",
857
  "exists": true,
858
  "bytes": 8298,
859
+ "sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
860
  },
861
  "hf_model": {
862
  "path": "hf_model:metrics/publication_audit.json",
863
  "exists": true,
864
  "bytes": 8298,
865
+ "sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
866
  }
867
  },
868
  "failures": []
 
874
  "path": "repo:docs/data/public_surface_qa.json",
875
  "exists": true,
876
  "bytes": 6146,
877
+ "sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
878
  },
879
  "mirrors": {
880
  "hf_space": {
881
  "path": "hf_space:data/public_surface_qa.json",
882
  "exists": true,
883
  "bytes": 6146,
884
+ "sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
885
  },
886
  "hf_artifacts_data": {
887
  "path": "hf_artifacts:data/public_surface_qa.json",
888
  "exists": true,
889
  "bytes": 6146,
890
+ "sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
891
  },
892
  "hf_artifacts": {
893
  "path": "hf_artifacts:docs/data/public_surface_qa.json",
894
  "exists": true,
895
  "bytes": 6146,
896
+ "sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
897
  },
898
  "hf_model_data": {
899
  "path": "hf_model:data/public_surface_qa.json",
900
  "exists": true,
901
  "bytes": 6146,
902
+ "sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
903
  },
904
  "hf_model_docs_data": {
905
  "path": "hf_model:docs/data/public_surface_qa.json",
906
  "exists": true,
907
  "bytes": 6146,
908
+ "sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
909
  },
910
  "hf_model": {
911
  "path": "hf_model:metrics/public_surface_qa.json",
912
  "exists": true,
913
  "bytes": 6146,
914
+ "sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
915
  }
916
  },
917
  "failures": []
 
1021
  "path": "repo:docs/data/quality_gates.json",
1022
  "exists": true,
1023
  "bytes": 8100,
1024
+ "sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
1025
  },
1026
  "mirrors": {
1027
  "hf_space": {
1028
  "path": "hf_space:data/quality_gates.json",
1029
  "exists": true,
1030
  "bytes": 8100,
1031
+ "sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
1032
  },
1033
  "hf_artifacts_data": {
1034
  "path": "hf_artifacts:data/quality_gates.json",
1035
  "exists": true,
1036
  "bytes": 8100,
1037
+ "sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
1038
  },
1039
  "hf_artifacts": {
1040
  "path": "hf_artifacts:docs/data/quality_gates.json",
1041
  "exists": true,
1042
  "bytes": 8100,
1043
+ "sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
1044
  },
1045
  "hf_model_data": {
1046
  "path": "hf_model:data/quality_gates.json",
1047
  "exists": true,
1048
  "bytes": 8100,
1049
+ "sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
1050
  },
1051
  "hf_model_docs_data": {
1052
  "path": "hf_model:docs/data/quality_gates.json",
1053
  "exists": true,
1054
  "bytes": 8100,
1055
+ "sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
1056
  },
1057
  "hf_model": {
1058
  "path": "hf_model:metrics/quality_gates.json",
1059
  "exists": true,
1060
  "bytes": 8100,
1061
+ "sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
1062
  }
1063
  },
1064
  "failures": []
 
1462
  "path": "repo:docs/data/scope_claims_audit.json",
1463
  "exists": true,
1464
  "bytes": 21630,
1465
+ "sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
1466
  },
1467
  "mirrors": {
1468
  "hf_space": {
1469
  "path": "hf_space:data/scope_claims_audit.json",
1470
  "exists": true,
1471
  "bytes": 21630,
1472
+ "sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
1473
  },
1474
  "hf_artifacts_data": {
1475
  "path": "hf_artifacts:data/scope_claims_audit.json",
1476
  "exists": true,
1477
  "bytes": 21630,
1478
+ "sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
1479
  },
1480
  "hf_artifacts": {
1481
  "path": "hf_artifacts:docs/data/scope_claims_audit.json",
1482
  "exists": true,
1483
  "bytes": 21630,
1484
+ "sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
1485
  },
1486
  "hf_model_data": {
1487
  "path": "hf_model:data/scope_claims_audit.json",
1488
  "exists": true,
1489
  "bytes": 21630,
1490
+ "sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
1491
  },
1492
  "hf_model_docs_data": {
1493
  "path": "hf_model:docs/data/scope_claims_audit.json",
1494
  "exists": true,
1495
  "bytes": 21630,
1496
+ "sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
1497
  },
1498
  "hf_model": {
1499
  "path": "hf_model:metrics/scope_claims_audit.json",
1500
  "exists": true,
1501
  "bytes": 21630,
1502
+ "sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
1503
  }
1504
  },
1505
  "failures": []
 
1560
  "path": "repo:docs/data/source_alignment_audit.json",
1561
  "exists": true,
1562
  "bytes": 4432,
1563
+ "sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
1564
  },
1565
  "mirrors": {
1566
  "hf_space": {
1567
  "path": "hf_space:data/source_alignment_audit.json",
1568
  "exists": true,
1569
  "bytes": 4432,
1570
+ "sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
1571
  },
1572
  "hf_artifacts_data": {
1573
  "path": "hf_artifacts:data/source_alignment_audit.json",
1574
  "exists": true,
1575
  "bytes": 4432,
1576
+ "sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
1577
  },
1578
  "hf_artifacts": {
1579
  "path": "hf_artifacts:docs/data/source_alignment_audit.json",
1580
  "exists": true,
1581
  "bytes": 4432,
1582
+ "sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
1583
  },
1584
  "hf_model_data": {
1585
  "path": "hf_model:data/source_alignment_audit.json",
1586
  "exists": true,
1587
  "bytes": 4432,
1588
+ "sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
1589
  },
1590
  "hf_model_docs_data": {
1591
  "path": "hf_model:docs/data/source_alignment_audit.json",
1592
  "exists": true,
1593
  "bytes": 4432,
1594
+ "sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
1595
  },
1596
  "hf_model": {
1597
  "path": "hf_model:metrics/source_alignment_audit.json",
1598
  "exists": true,
1599
  "bytes": 4432,
1600
+ "sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
1601
  }
1602
  },
1603
  "failures": []
 
1658
  "path": "repo:docs/data/single_episode_task_model_radar.json",
1659
  "exists": true,
1660
  "bytes": 50973,
1661
+ "sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
1662
  },
1663
  "mirrors": {
1664
  "hf_space": {
1665
  "path": "hf_space:data/single_episode_task_model_radar.json",
1666
  "exists": true,
1667
  "bytes": 50973,
1668
+ "sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
1669
  },
1670
  "hf_artifacts_data": {
1671
  "path": "hf_artifacts:data/single_episode_task_model_radar.json",
1672
  "exists": true,
1673
  "bytes": 50973,
1674
+ "sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
1675
  },
1676
  "hf_artifacts": {
1677
  "path": "hf_artifacts:docs/data/single_episode_task_model_radar.json",
1678
  "exists": true,
1679
  "bytes": 50973,
1680
+ "sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
1681
  },
1682
  "hf_model_data": {
1683
  "path": "hf_model:data/single_episode_task_model_radar.json",
1684
  "exists": true,
1685
  "bytes": 50973,
1686
+ "sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
1687
  },
1688
  "hf_model_docs_data": {
1689
  "path": "hf_model:docs/data/single_episode_task_model_radar.json",
1690
  "exists": true,
1691
  "bytes": 50973,
1692
+ "sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
1693
  },
1694
  "hf_model": {
1695
  "path": "hf_model:metrics/single_episode_task_model_radar.json",
1696
  "exists": true,
1697
  "bytes": 50973,
1698
+ "sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
1699
  }
1700
  },
1701
  "failures": []
 
1706
  "local": {
1707
  "path": "repo:docs/data/episode128_task_model_radar.json",
1708
  "exists": true,
1709
+ "bytes": 187400,
1710
+ "sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
1711
  },
1712
  "mirrors": {
1713
  "hf_space": {
1714
  "path": "hf_space:data/episode128_task_model_radar.json",
1715
  "exists": true,
1716
+ "bytes": 187400,
1717
+ "sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
1718
  },
1719
  "hf_artifacts_data": {
1720
  "path": "hf_artifacts:data/episode128_task_model_radar.json",
1721
  "exists": true,
1722
+ "bytes": 187400,
1723
+ "sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
1724
  },
1725
  "hf_artifacts": {
1726
  "path": "hf_artifacts:docs/data/episode128_task_model_radar.json",
1727
  "exists": true,
1728
+ "bytes": 187400,
1729
+ "sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
1730
  },
1731
  "hf_model_data": {
1732
  "path": "hf_model:data/episode128_task_model_radar.json",
1733
  "exists": true,
1734
+ "bytes": 187400,
1735
+ "sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
1736
  },
1737
  "hf_model_docs_data": {
1738
  "path": "hf_model:docs/data/episode128_task_model_radar.json",
1739
  "exists": true,
1740
+ "bytes": 187400,
1741
+ "sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
1742
  },
1743
  "hf_model": {
1744
  "path": "hf_model:metrics/episode128_task_model_radar.json",
1745
  "exists": true,
1746
+ "bytes": 187400,
1747
+ "sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
1748
  }
1749
  },
1750
  "failures": []
 
1854
  "path": "repo:docs/data/task_surface_integrity.json",
1855
  "exists": true,
1856
  "bytes": 45779,
1857
+ "sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
1858
  },
1859
  "mirrors": {
1860
  "hf_space": {
1861
  "path": "hf_space:data/task_surface_integrity.json",
1862
  "exists": true,
1863
  "bytes": 45779,
1864
+ "sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
1865
  },
1866
  "hf_artifacts_data": {
1867
  "path": "hf_artifacts:data/task_surface_integrity.json",
1868
  "exists": true,
1869
  "bytes": 45779,
1870
+ "sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
1871
  },
1872
  "hf_artifacts": {
1873
  "path": "hf_artifacts:docs/data/task_surface_integrity.json",
1874
  "exists": true,
1875
  "bytes": 45779,
1876
+ "sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
1877
  },
1878
  "hf_model_data": {
1879
  "path": "hf_model:data/task_surface_integrity.json",
1880
  "exists": true,
1881
  "bytes": 45779,
1882
+ "sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
1883
  },
1884
  "hf_model_docs_data": {
1885
  "path": "hf_model:docs/data/task_surface_integrity.json",
1886
  "exists": true,
1887
  "bytes": 45779,
1888
+ "sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
1889
  },
1890
  "hf_model": {
1891
  "path": "hf_model:metrics/task_surface_integrity.json",
1892
  "exists": true,
1893
  "bytes": 45779,
1894
+ "sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
1895
  }
1896
  },
1897
  "failures": []
 
1951
  "local": {
1952
  "path": "repo:docs/data/task_method_20_result_matrix.json",
1953
  "exists": true,
1954
+ "bytes": 129740,
1955
+ "sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
1956
  },
1957
  "mirrors": {
1958
  "hf_space": {
1959
  "path": "hf_space:data/task_method_20_result_matrix.json",
1960
  "exists": true,
1961
+ "bytes": 129740,
1962
+ "sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
1963
  },
1964
  "hf_artifacts_data": {
1965
  "path": "hf_artifacts:data/task_method_20_result_matrix.json",
1966
  "exists": true,
1967
+ "bytes": 129740,
1968
+ "sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
1969
  },
1970
  "hf_artifacts": {
1971
  "path": "hf_artifacts:docs/data/task_method_20_result_matrix.json",
1972
  "exists": true,
1973
+ "bytes": 129740,
1974
+ "sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
1975
  },
1976
  "hf_model_data": {
1977
  "path": "hf_model:data/task_method_20_result_matrix.json",
1978
  "exists": true,
1979
+ "bytes": 129740,
1980
+ "sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
1981
  },
1982
  "hf_model_docs_data": {
1983
  "path": "hf_model:docs/data/task_method_20_result_matrix.json",
1984
  "exists": true,
1985
+ "bytes": 129740,
1986
+ "sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
1987
  },
1988
  "hf_model": {
1989
  "path": "hf_model:metrics/task_method_20_result_matrix.json",
1990
  "exists": true,
1991
+ "bytes": 129740,
1992
+ "sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
1993
  }
1994
  },
1995
  "failures": []
 
2000
  "local": {
2001
  "path": "repo:docs/data/task_method_20_gap_audit.json",
2002
  "exists": true,
2003
+ "bytes": 57943,
2004
+ "sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
2005
  },
2006
  "mirrors": {
2007
  "hf_space": {
2008
  "path": "hf_space:data/task_method_20_gap_audit.json",
2009
  "exists": true,
2010
+ "bytes": 57943,
2011
+ "sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
2012
  },
2013
  "hf_artifacts_data": {
2014
  "path": "hf_artifacts:data/task_method_20_gap_audit.json",
2015
  "exists": true,
2016
+ "bytes": 57943,
2017
+ "sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
2018
  },
2019
  "hf_artifacts": {
2020
  "path": "hf_artifacts:docs/data/task_method_20_gap_audit.json",
2021
  "exists": true,
2022
+ "bytes": 57943,
2023
+ "sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
2024
  },
2025
  "hf_model_data": {
2026
  "path": "hf_model:data/task_method_20_gap_audit.json",
2027
  "exists": true,
2028
+ "bytes": 57943,
2029
+ "sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
2030
  },
2031
  "hf_model_docs_data": {
2032
  "path": "hf_model:docs/data/task_method_20_gap_audit.json",
2033
  "exists": true,
2034
+ "bytes": 57943,
2035
+ "sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
2036
  },
2037
  "hf_model": {
2038
  "path": "hf_model:metrics/task_method_20_gap_audit.json",
2039
  "exists": true,
2040
+ "bytes": 57943,
2041
+ "sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
2042
  }
2043
  },
2044
  "failures": []
 
2098
  "local": {
2099
  "path": "repo:docs/data/unified_task_model_radar.json",
2100
  "exists": true,
2101
+ "bytes": 231251,
2102
+ "sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
2103
  },
2104
  "mirrors": {
2105
  "hf_space": {
2106
  "path": "hf_space:data/unified_task_model_radar.json",
2107
  "exists": true,
2108
+ "bytes": 231251,
2109
+ "sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
2110
  },
2111
  "hf_artifacts_data": {
2112
  "path": "hf_artifacts:data/unified_task_model_radar.json",
2113
  "exists": true,
2114
+ "bytes": 231251,
2115
+ "sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
2116
  },
2117
  "hf_artifacts": {
2118
  "path": "hf_artifacts:docs/data/unified_task_model_radar.json",
2119
  "exists": true,
2120
+ "bytes": 231251,
2121
+ "sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
2122
  },
2123
  "hf_model_data": {
2124
  "path": "hf_model:data/unified_task_model_radar.json",
2125
  "exists": true,
2126
+ "bytes": 231251,
2127
+ "sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
2128
  },
2129
  "hf_model_docs_data": {
2130
  "path": "hf_model:docs/data/unified_task_model_radar.json",
2131
  "exists": true,
2132
+ "bytes": 231251,
2133
+ "sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
2134
  },
2135
  "hf_model": {
2136
  "path": "hf_model:metrics/unified_task_model_radar.json",
2137
  "exists": true,
2138
+ "bytes": 231251,
2139
+ "sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
2140
  }
2141
  },
2142
  "failures": []
 
2148
  "path": "repo:docs/data/website_integrity.json",
2149
  "exists": true,
2150
  "bytes": 18933,
2151
+ "sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
2152
  },
2153
  "mirrors": {
2154
  "hf_space": {
2155
  "path": "hf_space:data/website_integrity.json",
2156
  "exists": true,
2157
  "bytes": 18933,
2158
+ "sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
2159
  },
2160
  "hf_artifacts_data": {
2161
  "path": "hf_artifacts:data/website_integrity.json",
2162
  "exists": true,
2163
  "bytes": 18933,
2164
+ "sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
2165
  },
2166
  "hf_artifacts": {
2167
  "path": "hf_artifacts:docs/data/website_integrity.json",
2168
  "exists": true,
2169
  "bytes": 18933,
2170
+ "sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
2171
  },
2172
  "hf_model_data": {
2173
  "path": "hf_model:data/website_integrity.json",
2174
  "exists": true,
2175
  "bytes": 18933,
2176
+ "sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
2177
  },
2178
  "hf_model_docs_data": {
2179
  "path": "hf_model:docs/data/website_integrity.json",
2180
  "exists": true,
2181
  "bytes": 18933,
2182
+ "sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
2183
  },
2184
  "hf_model": {
2185
  "path": "hf_model:metrics/website_integrity.json",
2186
  "exists": true,
2187
  "bytes": 18933,
2188
+ "sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
2189
  }
2190
  },
2191
  "failures": []
 
2319
  "local": {
2320
  "path": "repo:docs/assets/charts/episode128_task_model_radar.svg",
2321
  "exists": true,
2322
+ "bytes": 43679,
2323
+ "sha256": "51991fd33719be32da422f559b7d27895fc96b1332b0952f0c19b573d58c1325"
2324
  },
2325
  "mirrors": {
2326
  "hf_space": {
2327
  "path": "hf_space:assets/charts/episode128_task_model_radar.svg",
2328
  "exists": true,
2329
+ "bytes": 43679,
2330
+ "sha256": "51991fd33719be32da422f559b7d27895fc96b1332b0952f0c19b573d58c1325"
2331
  },
2332
  "hf_artifacts_docs": {
2333
  "path": "hf_artifacts:docs/assets/charts/episode128_task_model_radar.svg",
2334
  "exists": true,
2335
+ "bytes": 43679,
2336
+ "sha256": "51991fd33719be32da422f559b7d27895fc96b1332b0952f0c19b573d58c1325"
2337
  },
2338
  "hf_artifacts_card": {
2339
  "path": "hf_artifacts:assets/charts/episode128_task_model_radar.svg",
2340
  "exists": true,
2341
+ "bytes": 43679,
2342
+ "sha256": "51991fd33719be32da422f559b7d27895fc96b1332b0952f0c19b573d58c1325"
2343
  },
2344
  "hf_model": {
2345
  "path": "hf_model:assets/charts/episode128_task_model_radar.svg",
2346
  "exists": true,
2347
+ "bytes": 43679,
2348
+ "sha256": "51991fd33719be32da422f559b7d27895fc96b1332b0952f0c19b573d58c1325"
2349
  }
2350
  },
2351
  "failures": []
 
2393
  "local": {
2394
  "path": "repo:docs/assets/charts/unified_task_model_radar.svg",
2395
  "exists": true,
2396
+ "bytes": 49695,
2397
+ "sha256": "2ad63af1cd1e3e6f9c354b20792c20683e8877e2f92d9da5b5284dce56e668b7"
2398
  },
2399
  "mirrors": {
2400
  "hf_space": {
2401
  "path": "hf_space:assets/charts/unified_task_model_radar.svg",
2402
  "exists": true,
2403
+ "bytes": 49695,
2404
+ "sha256": "2ad63af1cd1e3e6f9c354b20792c20683e8877e2f92d9da5b5284dce56e668b7"
2405
  },
2406
  "hf_artifacts_docs": {
2407
  "path": "hf_artifacts:docs/assets/charts/unified_task_model_radar.svg",
2408
  "exists": true,
2409
+ "bytes": 49695,
2410
+ "sha256": "2ad63af1cd1e3e6f9c354b20792c20683e8877e2f92d9da5b5284dce56e668b7"
2411
  },
2412
  "hf_artifacts_card": {
2413
  "path": "hf_artifacts:assets/charts/unified_task_model_radar.svg",
2414
  "exists": true,
2415
+ "bytes": 49695,
2416
+ "sha256": "2ad63af1cd1e3e6f9c354b20792c20683e8877e2f92d9da5b5284dce56e668b7"
2417
  },
2418
  "hf_model": {
2419
  "path": "hf_model:assets/charts/unified_task_model_radar.svg",
2420
  "exists": true,
2421
+ "bytes": 49695,
2422
+ "sha256": "2ad63af1cd1e3e6f9c354b20792c20683e8877e2f92d9da5b5284dce56e668b7"
2423
  }
2424
  },
2425
  "failures": []
 
3650
  },
3651
  "failures": []
3652
  },
3653
+ {
3654
+ "name": "scripts/omni/score_existing_model_output_task_probes.py",
3655
+ "status": "pass",
3656
+ "local": {
3657
+ "path": "repo:scripts/omni/score_existing_model_output_task_probes.py",
3658
+ "exists": true,
3659
+ "bytes": 13291,
3660
+ "sha256": "25b8e7ba394c1e604fe8d7c900a013b3ea330f32b393cdcd416b2d0f3a2727b8"
3661
+ },
3662
+ "mirrors": {
3663
+ "hf_artifacts": {
3664
+ "path": "hf_artifacts:scripts/omni/score_existing_model_output_task_probes.py",
3665
+ "exists": true,
3666
+ "bytes": 13291,
3667
+ "sha256": "25b8e7ba394c1e604fe8d7c900a013b3ea330f32b393cdcd416b2d0f3a2727b8"
3668
+ },
3669
+ "hf_model": {
3670
+ "path": "hf_model:scripts/omni/score_existing_model_output_task_probes.py",
3671
+ "exists": true,
3672
+ "bytes": 13291,
3673
+ "sha256": "25b8e7ba394c1e604fe8d7c900a013b3ea330f32b393cdcd416b2d0f3a2727b8"
3674
+ }
3675
+ },
3676
+ "failures": []
3677
+ },
3678
  {
3679
  "name": "scripts/omni/score_model_output_probes.py",
3680
  "status": "pass",
 
3831
  "local": {
3832
  "path": "repo:scripts/build_artifact_index.py",
3833
  "exists": true,
3834
+ "bytes": 54683,
3835
+ "sha256": "0a9e7c7391f0e2822d8490ddd216f155e1ce3a50d2ba3c90476ee09685f74d65"
3836
  },
3837
  "mirrors": {
3838
  "hf_artifacts": {
3839
  "path": "hf_artifacts:scripts/build_artifact_index.py",
3840
  "exists": true,
3841
+ "bytes": 54683,
3842
+ "sha256": "0a9e7c7391f0e2822d8490ddd216f155e1ce3a50d2ba3c90476ee09685f74d65"
3843
  },
3844
  "hf_model": {
3845
  "path": "hf_model:scripts/build_artifact_index.py",
3846
  "exists": true,
3847
+ "bytes": 54683,
3848
+ "sha256": "0a9e7c7391f0e2822d8490ddd216f155e1ce3a50d2ba3c90476ee09685f74d65"
3849
  }
3850
  },
3851
  "failures": []
 
4056
  "local": {
4057
  "path": "repo:scripts/build_task_method_20_gap_audit.py",
4058
  "exists": true,
4059
+ "bytes": 10094,
4060
+ "sha256": "4ba5a43fa045b02303ba16a146ab15a891155370a85dd7c09cbe68f8f0eac7ed"
4061
  },
4062
  "mirrors": {
4063
  "hf_artifacts": {
4064
  "path": "hf_artifacts:scripts/build_task_method_20_gap_audit.py",
4065
  "exists": true,
4066
+ "bytes": 10094,
4067
+ "sha256": "4ba5a43fa045b02303ba16a146ab15a891155370a85dd7c09cbe68f8f0eac7ed"
4068
  },
4069
  "hf_model": {
4070
  "path": "hf_model:scripts/build_task_method_20_gap_audit.py",
4071
  "exists": true,
4072
+ "bytes": 10094,
4073
+ "sha256": "4ba5a43fa045b02303ba16a146ab15a891155370a85dd7c09cbe68f8f0eac7ed"
4074
  }
4075
  },
4076
  "failures": []
 
4131
  "local": {
4132
  "path": "repo:scripts/build_unified_task_model_radar.py",
4133
  "exists": true,
4134
+ "bytes": 48861,
4135
+ "sha256": "621ff19f057e6ed279388c8e944f2e4a2cfc87b9be1bd9f1feee6d329f0ec431"
4136
  },
4137
  "mirrors": {
4138
  "hf_artifacts": {
4139
  "path": "hf_artifacts:scripts/build_unified_task_model_radar.py",
4140
  "exists": true,
4141
+ "bytes": 48861,
4142
+ "sha256": "621ff19f057e6ed279388c8e944f2e4a2cfc87b9be1bd9f1feee6d329f0ec431"
4143
  },
4144
  "hf_model": {
4145
  "path": "hf_model:scripts/build_unified_task_model_radar.py",
4146
  "exists": true,
4147
+ "bytes": 48861,
4148
+ "sha256": "621ff19f057e6ed279388c8e944f2e4a2cfc87b9be1bd9f1feee6d329f0ec431"
4149
  }
4150
  },
4151
  "failures": []
 
4206
  "local": {
4207
  "path": "repo:scripts/validate_mirror_parity.py",
4208
  "exists": true,
4209
+ "bytes": 24812,
4210
+ "sha256": "c7d67a9e371f20c24951fbc357f64907365e60acab5ff189fa1675ff631bd27f"
4211
  },
4212
  "mirrors": {
4213
  "hf_artifacts": {
4214
  "path": "hf_artifacts:scripts/validate_mirror_parity.py",
4215
  "exists": true,
4216
+ "bytes": 24812,
4217
+ "sha256": "c7d67a9e371f20c24951fbc357f64907365e60acab5ff189fa1675ff631bd27f"
4218
  },
4219
  "hf_model": {
4220
  "path": "hf_model:scripts/validate_mirror_parity.py",
4221
  "exists": true,
4222
+ "bytes": 24812,
4223
+ "sha256": "c7d67a9e371f20c24951fbc357f64907365e60acab5ff189fa1675ff631bd27f"
4224
  }
4225
  },
4226
  "failures": []
 
4356
  "local": {
4357
  "path": "repo:scripts/sync_hf_publish_mirrors.py",
4358
  "exists": true,
4359
+ "bytes": 17917,
4360
+ "sha256": "0d9f9fa5669af7c8e64fa32cccde9c99d54a7bfa0fe4dd9d315eccdb0b06f585"
4361
  },
4362
  "mirrors": {
4363
  "hf_artifacts": {
4364
  "path": "hf_artifacts:scripts/sync_hf_publish_mirrors.py",
4365
  "exists": true,
4366
+ "bytes": 17917,
4367
+ "sha256": "0d9f9fa5669af7c8e64fa32cccde9c99d54a7bfa0fe4dd9d315eccdb0b06f585"
4368
  },
4369
  "hf_model": {
4370
  "path": "hf_model:scripts/sync_hf_publish_mirrors.py",
4371
  "exists": true,
4372
+ "bytes": 17917,
4373
+ "sha256": "0d9f9fa5669af7c8e64fa32cccde9c99d54a7bfa0fe4dd9d315eccdb0b06f585"
4374
  }
4375
  },
4376
  "failures": []
 
4560
  "local": {
4561
  "path": "repo:docs/index.html",
4562
  "exists": true,
4563
+ "bytes": 232578,
4564
+ "sha256": "e5427cec333219299cdac4069419918ccbb02db8f3c98e6b99cb69ff9895c0f3"
4565
  },
4566
  "mirrors": {
4567
  "hf_space": {
4568
  "path": "hf_space:index.html",
4569
  "exists": true,
4570
+ "bytes": 232578,
4571
+ "sha256": "e5427cec333219299cdac4069419918ccbb02db8f3c98e6b99cb69ff9895c0f3"
4572
  },
4573
  "hf_artifacts_root": {
4574
  "path": "hf_artifacts:index.html",
4575
  "exists": true,
4576
+ "bytes": 232578,
4577
+ "sha256": "e5427cec333219299cdac4069419918ccbb02db8f3c98e6b99cb69ff9895c0f3"
4578
  },
4579
  "hf_artifacts_docs": {
4580
  "path": "hf_artifacts:docs/index.html",
4581
  "exists": true,
4582
+ "bytes": 232578,
4583
+ "sha256": "e5427cec333219299cdac4069419918ccbb02db8f3c98e6b99cb69ff9895c0f3"
4584
  },
4585
  "hf_model": {
4586
  "path": "hf_model:index.html",
4587
  "exists": true,
4588
+ "bytes": 232578,
4589
+ "sha256": "e5427cec333219299cdac4069419918ccbb02db8f3c98e6b99cb69ff9895c0f3"
4590
  },
4591
  "hf_model_docs": {
4592
  "path": "hf_model:docs/index.html",
4593
  "exists": true,
4594
+ "bytes": 232578,
4595
+ "sha256": "e5427cec333219299cdac4069419918ccbb02db8f3c98e6b99cb69ff9895c0f3"
4596
  }
4597
  },
4598
  "failures": []
 
10616
  },
10617
  "failures": []
10618
  },
10619
+ {
10620
+ "name": "results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md",
10621
+ "status": "pass",
10622
+ "local": {
10623
+ "path": "repo:results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md",
10624
+ "exists": true,
10625
+ "bytes": 981,
10626
+ "sha256": "73e9e0f52a47f56afd1d27ce9be33de9ffb89f52f35f34c1fc1c3ee7290c34b1"
10627
+ },
10628
+ "mirrors": {
10629
+ "hf_space": {
10630
+ "path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md",
10631
+ "exists": true,
10632
+ "bytes": 981,
10633
+ "sha256": "73e9e0f52a47f56afd1d27ce9be33de9ffb89f52f35f34c1fc1c3ee7290c34b1"
10634
+ },
10635
+ "hf_artifacts": {
10636
+ "path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md",
10637
+ "exists": true,
10638
+ "bytes": 981,
10639
+ "sha256": "73e9e0f52a47f56afd1d27ce9be33de9ffb89f52f35f34c1fc1c3ee7290c34b1"
10640
+ },
10641
+ "hf_model": {
10642
+ "path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md",
10643
+ "exists": true,
10644
+ "bytes": 981,
10645
+ "sha256": "73e9e0f52a47f56afd1d27ce9be33de9ffb89f52f35f34c1fc1c3ee7290c34b1"
10646
+ }
10647
+ },
10648
+ "failures": []
10649
+ },
10650
+ {
10651
+ "name": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
10652
+ "status": "pass",
10653
+ "local": {
10654
+ "path": "repo:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
10655
+ "exists": true,
10656
+ "bytes": 21876,
10657
+ "sha256": "3ca7045bf932afe1e3b32ad8183c90056538220fa814249b993e1d574695288b"
10658
+ },
10659
+ "mirrors": {
10660
+ "hf_space": {
10661
+ "path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
10662
+ "exists": true,
10663
+ "bytes": 21876,
10664
+ "sha256": "3ca7045bf932afe1e3b32ad8183c90056538220fa814249b993e1d574695288b"
10665
+ },
10666
+ "hf_artifacts": {
10667
+ "path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
10668
+ "exists": true,
10669
+ "bytes": 21876,
10670
+ "sha256": "3ca7045bf932afe1e3b32ad8183c90056538220fa814249b993e1d574695288b"
10671
+ },
10672
+ "hf_model": {
10673
+ "path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
10674
+ "exists": true,
10675
+ "bytes": 21876,
10676
+ "sha256": "3ca7045bf932afe1e3b32ad8183c90056538220fa814249b993e1d574695288b"
10677
+ }
10678
+ },
10679
+ "failures": []
10680
+ },
10681
+ {
10682
+ "name": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv",
10683
+ "status": "pass",
10684
+ "local": {
10685
+ "path": "repo:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv",
10686
+ "exists": true,
10687
+ "bytes": 22569,
10688
+ "sha256": "e5d85ed571b3647fab6dad7b46944a65981d57c51a87708cc5e86a20d71d1358"
10689
+ },
10690
+ "mirrors": {
10691
+ "hf_space": {
10692
+ "path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv",
10693
+ "exists": true,
10694
+ "bytes": 22569,
10695
+ "sha256": "e5d85ed571b3647fab6dad7b46944a65981d57c51a87708cc5e86a20d71d1358"
10696
+ },
10697
+ "hf_artifacts": {
10698
+ "path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv",
10699
+ "exists": true,
10700
+ "bytes": 22569,
10701
+ "sha256": "e5d85ed571b3647fab6dad7b46944a65981d57c51a87708cc5e86a20d71d1358"
10702
+ },
10703
+ "hf_model": {
10704
+ "path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv",
10705
+ "exists": true,
10706
+ "bytes": 22569,
10707
+ "sha256": "e5d85ed571b3647fab6dad7b46944a65981d57c51a87708cc5e86a20d71d1358"
10708
+ }
10709
+ },
10710
+ "failures": []
10711
+ },
10712
+ {
10713
+ "name": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv",
10714
+ "status": "pass",
10715
+ "local": {
10716
+ "path": "repo:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv",
10717
+ "exists": true,
10718
+ "bytes": 176633,
10719
+ "sha256": "c198bd15c3a5589d4de1da072cb70840d245993d43801e60ca4682a267e3dfeb"
10720
+ },
10721
+ "mirrors": {
10722
+ "hf_space": {
10723
+ "path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv",
10724
+ "exists": true,
10725
+ "bytes": 176633,
10726
+ "sha256": "c198bd15c3a5589d4de1da072cb70840d245993d43801e60ca4682a267e3dfeb"
10727
+ },
10728
+ "hf_artifacts": {
10729
+ "path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv",
10730
+ "exists": true,
10731
+ "bytes": 176633,
10732
+ "sha256": "c198bd15c3a5589d4de1da072cb70840d245993d43801e60ca4682a267e3dfeb"
10733
+ },
10734
+ "hf_model": {
10735
+ "path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv",
10736
+ "exists": true,
10737
+ "bytes": 176633,
10738
+ "sha256": "c198bd15c3a5589d4de1da072cb70840d245993d43801e60ca4682a267e3dfeb"
10739
+ }
10740
+ },
10741
+ "failures": []
10742
+ },
10743
+ {
10744
+ "name": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
10745
+ "status": "pass",
10746
+ "local": {
10747
+ "path": "repo:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
10748
+ "exists": true,
10749
+ "bytes": 169938,
10750
+ "sha256": "d57137f78b67dc683fe29a7d3150f309384e58baa29938e8177b918359a0990c"
10751
+ },
10752
+ "mirrors": {
10753
+ "hf_space": {
10754
+ "path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
10755
+ "exists": true,
10756
+ "bytes": 169938,
10757
+ "sha256": "d57137f78b67dc683fe29a7d3150f309384e58baa29938e8177b918359a0990c"
10758
+ },
10759
+ "hf_artifacts": {
10760
+ "path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
10761
+ "exists": true,
10762
+ "bytes": 169938,
10763
+ "sha256": "d57137f78b67dc683fe29a7d3150f309384e58baa29938e8177b918359a0990c"
10764
+ },
10765
+ "hf_model": {
10766
+ "path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
10767
+ "exists": true,
10768
+ "bytes": 169938,
10769
+ "sha256": "d57137f78b67dc683fe29a7d3150f309384e58baa29938e8177b918359a0990c"
10770
+ }
10771
+ },
10772
+ "failures": []
10773
+ },
10774
+ {
10775
+ "name": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/per_class_metrics.csv",
10776
+ "status": "pass",
10777
+ "local": {
10778
+ "path": "repo:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/per_class_metrics.csv",
10779
+ "exists": true,
10780
+ "bytes": 188619,
10781
+ "sha256": "1f4b312a07a8e0a67a1cd6a83e2c83d0c91804f2d8a51c4eeda207444ae262e9"
10782
+ },
10783
+ "mirrors": {
10784
+ "hf_space": {
10785
+ "path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/per_class_metrics.csv",
10786
+ "exists": true,
10787
+ "bytes": 188619,
10788
+ "sha256": "1f4b312a07a8e0a67a1cd6a83e2c83d0c91804f2d8a51c4eeda207444ae262e9"
10789
+ },
10790
+ "hf_artifacts": {
10791
+ "path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/per_class_metrics.csv",
10792
+ "exists": true,
10793
+ "bytes": 188619,
10794
+ "sha256": "1f4b312a07a8e0a67a1cd6a83e2c83d0c91804f2d8a51c4eeda207444ae262e9"
10795
+ },
10796
+ "hf_model": {
10797
+ "path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/per_class_metrics.csv",
10798
+ "exists": true,
10799
+ "bytes": 188619,
10800
+ "sha256": "1f4b312a07a8e0a67a1cd6a83e2c83d0c91804f2d8a51c4eeda207444ae262e9"
10801
+ }
10802
+ },
10803
+ "failures": []
10804
+ },
10805
+ {
10806
+ "name": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/predictions.csv",
10807
+ "status": "pass",
10808
+ "local": {
10809
+ "path": "repo:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/predictions.csv",
10810
+ "exists": true,
10811
+ "bytes": 1848188,
10812
+ "sha256": "521edd5ce67341b1a4dba1dcf0f6e097ab9477536ef9cce96ec3bd0908c35469"
10813
+ },
10814
+ "mirrors": {
10815
+ "hf_space": {
10816
+ "path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/predictions.csv",
10817
+ "exists": true,
10818
+ "bytes": 1848188,
10819
+ "sha256": "521edd5ce67341b1a4dba1dcf0f6e097ab9477536ef9cce96ec3bd0908c35469"
10820
+ },
10821
+ "hf_artifacts": {
10822
+ "path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/predictions.csv",
10823
+ "exists": true,
10824
+ "bytes": 1848188,
10825
+ "sha256": "521edd5ce67341b1a4dba1dcf0f6e097ab9477536ef9cce96ec3bd0908c35469"
10826
+ },
10827
+ "hf_model": {
10828
+ "path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/predictions.csv",
10829
+ "exists": true,
10830
+ "bytes": 1848188,
10831
+ "sha256": "521edd5ce67341b1a4dba1dcf0f6e097ab9477536ef9cce96ec3bd0908c35469"
10832
+ }
10833
+ },
10834
+ "failures": []
10835
+ },
10836
+ {
10837
+ "name": "results/omni_finetune/model_output_task_probes_20260616/summary.json",
10838
+ "status": "pass",
10839
+ "local": {
10840
+ "path": "repo:results/omni_finetune/model_output_task_probes_20260616/summary.json",
10841
+ "exists": true,
10842
+ "bytes": 2000,
10843
+ "sha256": "de95421d93fec5d12049e492db9fbe7c10af5c2cd0d1039da82c05ec82bf98c5"
10844
+ },
10845
+ "mirrors": {
10846
+ "hf_space": {
10847
+ "path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/summary.json",
10848
+ "exists": true,
10849
+ "bytes": 2000,
10850
+ "sha256": "de95421d93fec5d12049e492db9fbe7c10af5c2cd0d1039da82c05ec82bf98c5"
10851
+ },
10852
+ "hf_artifacts": {
10853
+ "path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/summary.json",
10854
+ "exists": true,
10855
+ "bytes": 2000,
10856
+ "sha256": "de95421d93fec5d12049e492db9fbe7c10af5c2cd0d1039da82c05ec82bf98c5"
10857
+ },
10858
+ "hf_model": {
10859
+ "path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/summary.json",
10860
+ "exists": true,
10861
+ "bytes": 2000,
10862
+ "sha256": "de95421d93fec5d12049e492db9fbe7c10af5c2cd0d1039da82c05ec82bf98c5"
10863
+ }
10864
+ },
10865
+ "failures": []
10866
+ },
10867
  {
10868
  "name": "results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
10869
  "status": "pass",
 
18806
  "local": {
18807
  "path": "repo:TASK_METHOD_20_GAP_AUDIT.md",
18808
  "exists": true,
18809
+ "bytes": 16234,
18810
+ "sha256": "351e55637374770221115486fd6ef46550ae923c4fae5c01d3bb1e777b5a44fb"
18811
  },
18812
  "mirrors": {
18813
  "hf_space": {
18814
  "path": "hf_space:TASK_METHOD_20_GAP_AUDIT.md",
18815
  "exists": true,
18816
+ "bytes": 16234,
18817
+ "sha256": "351e55637374770221115486fd6ef46550ae923c4fae5c01d3bb1e777b5a44fb"
18818
  },
18819
  "hf_artifacts": {
18820
  "path": "hf_artifacts:TASK_METHOD_20_GAP_AUDIT.md",
18821
  "exists": true,
18822
+ "bytes": 16234,
18823
+ "sha256": "351e55637374770221115486fd6ef46550ae923c4fae5c01d3bb1e777b5a44fb"
18824
  },
18825
  "hf_model": {
18826
  "path": "hf_model:TASK_METHOD_20_GAP_AUDIT.md",
18827
  "exists": true,
18828
+ "bytes": 16234,
18829
+ "sha256": "351e55637374770221115486fd6ef46550ae923c4fae5c01d3bb1e777b5a44fb"
18830
  }
18831
  },
18832
  "failures": []
 
18837
  "local": {
18838
  "path": "repo:TASK_METHOD_20_RESULT_MATRIX.md",
18839
  "exists": true,
18840
+ "bytes": 4208,
18841
+ "sha256": "bfe01b258fe121448d2b5b188c6995041c81d7ce1286dcd2ee0bf9c01d6a6193"
18842
  },
18843
  "mirrors": {
18844
  "hf_space": {
18845
  "path": "hf_space:TASK_METHOD_20_RESULT_MATRIX.md",
18846
  "exists": true,
18847
+ "bytes": 4208,
18848
+ "sha256": "bfe01b258fe121448d2b5b188c6995041c81d7ce1286dcd2ee0bf9c01d6a6193"
18849
  },
18850
  "hf_artifacts": {
18851
  "path": "hf_artifacts:TASK_METHOD_20_RESULT_MATRIX.md",
18852
  "exists": true,
18853
+ "bytes": 4208,
18854
+ "sha256": "bfe01b258fe121448d2b5b188c6995041c81d7ce1286dcd2ee0bf9c01d6a6193"
18855
  },
18856
  "hf_model": {
18857
  "path": "hf_model:TASK_METHOD_20_RESULT_MATRIX.md",
18858
  "exists": true,
18859
+ "bytes": 4208,
18860
+ "sha256": "bfe01b258fe121448d2b5b188c6995041c81d7ce1286dcd2ee0bf9c01d6a6193"
18861
  }
18862
  },
18863
  "failures": []
docs/data/public_surface_qa.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Public Project Surface",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-16T12:04:40+00:00",
5
  "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
6
  "checks": [
7
  {
@@ -18,7 +18,7 @@
18
  "website_integrity": {
19
  "exists": true,
20
  "status": "pass",
21
- "generated_at_utc": "2026-06-16T12:01:39+00:00"
22
  },
23
  "rendered_site_check": {
24
  "exists": true,
@@ -28,27 +28,27 @@
28
  "task_surface_integrity": {
29
  "exists": true,
30
  "status": "pass",
31
- "generated_at_utc": "2026-06-16T12:01:37+00:00"
32
  },
33
  "source_alignment": {
34
  "exists": true,
35
  "status": "pass",
36
- "generated_at_utc": "2026-06-16T12:01:37+00:00"
37
  },
38
  "scale_up_status": {
39
  "exists": true,
40
  "status": "pass",
41
- "generated_at_utc": "2026-06-16T12:01:40+00:00"
42
  },
43
  "publication_package": {
44
  "exists": true,
45
  "status": "pass",
46
- "generated_at_utc": "2026-06-16T12:04:24+00:00"
47
  },
48
  "mirror_parity": {
49
  "exists": true,
50
  "status": "pass",
51
- "generated_at_utc": "2026-06-16T12:03:33+00:00"
52
  }
53
  },
54
  "failures": {}
 
1
  {
2
  "title": "Ropedia Xperience-10M Public Project Surface",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-16T13:36:07+00:00",
5
  "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
6
  "checks": [
7
  {
 
18
  "website_integrity": {
19
  "exists": true,
20
  "status": "pass",
21
+ "generated_at_utc": "2026-06-16T13:35:52+00:00"
22
  },
23
  "rendered_site_check": {
24
  "exists": true,
 
28
  "task_surface_integrity": {
29
  "exists": true,
30
  "status": "pass",
31
+ "generated_at_utc": "2026-06-16T13:35:48+00:00"
32
  },
33
  "source_alignment": {
34
  "exists": true,
35
  "status": "pass",
36
+ "generated_at_utc": "2026-06-16T13:35:47+00:00"
37
  },
38
  "scale_up_status": {
39
  "exists": true,
40
  "status": "pass",
41
+ "generated_at_utc": "2026-06-16T13:35:51+00:00"
42
  },
43
  "publication_package": {
44
  "exists": true,
45
  "status": "pass",
46
+ "generated_at_utc": "2026-06-16T13:36:07+00:00"
47
  },
48
  "mirror_parity": {
49
  "exists": true,
50
  "status": "pass",
51
+ "generated_at_utc": "2026-06-16T13:35:58+00:00"
52
  }
53
  },
54
  "failures": {}
docs/data/publication_audit.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-16T12:04:24+00:00",
4
  "checks": [
5
  {
6
  "name": "required_publication_assets_present",
@@ -200,8 +200,8 @@
200
  "github_repo": {
201
  "root": "repo",
202
  "exists": true,
203
- "file_count": 1196,
204
- "text_file_count": 1001,
205
  "largest_file": {
206
  "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
207
  "bytes": 55702978
@@ -211,8 +211,8 @@
211
  "hf_space_bundle": {
212
  "root": "hf_publish/space",
213
  "exists": true,
214
- "file_count": 976,
215
- "text_file_count": 820,
216
  "largest_file": {
217
  "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
218
  "bytes": 55702978
@@ -222,8 +222,8 @@
222
  "hf_artifact_bundle": {
223
  "root": "hf_publish/artifacts",
224
  "exists": true,
225
- "file_count": 2314,
226
- "text_file_count": 1008,
227
  "largest_file": {
228
  "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
229
  "bytes": 55702978
@@ -233,8 +233,8 @@
233
  "hf_model_bundle": {
234
  "root": "hf_publish/model",
235
  "exists": true,
236
- "file_count": 2746,
237
- "text_file_count": 1168,
238
  "largest_file": {
239
  "path": "pytorch_model.bin",
240
  "bytes": 93495480
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-16T13:36:07+00:00",
4
  "checks": [
5
  {
6
  "name": "required_publication_assets_present",
 
200
  "github_repo": {
201
  "root": "repo",
202
  "exists": true,
203
+ "file_count": 1205,
204
+ "text_file_count": 1010,
205
  "largest_file": {
206
  "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
207
  "bytes": 55702978
 
211
  "hf_space_bundle": {
212
  "root": "hf_publish/space",
213
  "exists": true,
214
+ "file_count": 984,
215
+ "text_file_count": 828,
216
  "largest_file": {
217
  "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
218
  "bytes": 55702978
 
222
  "hf_artifact_bundle": {
223
  "root": "hf_publish/artifacts",
224
  "exists": true,
225
+ "file_count": 2331,
226
+ "text_file_count": 1017,
227
  "largest_file": {
228
  "path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
229
  "bytes": 55702978
 
233
  "hf_model_bundle": {
234
  "root": "hf_publish/model",
235
  "exists": true,
236
+ "file_count": 2764,
237
+ "text_file_count": 1177,
238
  "largest_file": {
239
  "path": "pytorch_model.bin",
240
  "bytes": 93495480
docs/data/quality_gates.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Release Checks",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-16T12:04:40+00:00",
5
  "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
6
  "automated_gates": [
7
  {
 
1
  {
2
  "title": "Ropedia Xperience-10M Release Checks",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-16T13:36:07+00:00",
5
  "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
6
  "automated_gates": [
7
  {
docs/data/scope_claims_audit.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-16T12:01:40+00:00",
4
  "summary": {
5
  "qwen3_omni_verified_diagnostic_pilot": true,
6
  "dataset_manifest_num_episodes": 119,
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-16T13:35:51+00:00",
4
  "summary": {
5
  "qwen3_omni_verified_diagnostic_pilot": true,
6
  "dataset_manifest_num_episodes": 119,
docs/data/single_episode_task_model_radar.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Single-Episode 20-Task Radar",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-16T11:26:57+00:00",
5
  "description": "Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.",
6
  "task_count": 20,
7
  "method_count": 2,
 
1
  {
2
  "title": "Single-Episode 20-Task Radar",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-16T13:35:38+00:00",
5
  "description": "Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.",
6
  "task_count": 20,
7
  "method_count": 2,
docs/data/source_alignment_audit.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Source Alignment Note",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-16T12:01:37+00:00",
5
  "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
6
  "alignment_summary": {
7
  "full_dataset_repo": "ropedia-ai/xperience-10m",
 
1
  {
2
  "title": "Ropedia Xperience-10M Source Alignment Note",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-16T13:35:47+00:00",
5
  "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
6
  "alignment_summary": {
7
  "full_dataset_repo": "ropedia-ai/xperience-10m",
docs/data/task_method_20_result_matrix.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "title": "Task Method 20-Result Matrix",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-16T11:26:57+00:00",
5
  "task_count": 20,
6
  "method_count": 9,
7
  "method_task_record_count": 180,
8
- "scored_method_task_count": 111,
9
  "series": [
10
  {
11
  "id": "minimal",
@@ -158,20 +158,20 @@
158
  "kind": "partial_128_episode_foundation_model_overlay",
159
  "scope": "128 selected episodes, held-out test",
160
  "stroke_dasharray": "7 7",
161
- "method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics on task-aligned JSON outputs.",
162
  "plotted_as": "colored point overlay",
163
  "result_record_count": 20,
164
- "scored_task_count": 6,
165
- "covered_task_count": 6,
166
  "proxy_scored_task_count": 0,
167
- "scoreless_task_count": 14,
168
  "unsupported_task_count": 0,
169
- "not_evaluated_task_count": 14,
170
  "status_counts": {
171
- "not_evaluated_in_verified_package": 14,
172
- "scored": 6
173
  },
174
- "coverage_fraction": 0.3,
175
  "result_record_fraction": 1.0
176
  },
177
  {
@@ -182,20 +182,20 @@
182
  "kind": "partial_128_episode_foundation_model_overlay",
183
  "scope": "128 selected episodes, held-out test",
184
  "stroke_dasharray": "4 7",
185
- "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation.",
186
  "plotted_as": "colored point overlay",
187
  "result_record_count": 20,
188
- "scored_task_count": 6,
189
- "covered_task_count": 6,
190
  "proxy_scored_task_count": 0,
191
- "scoreless_task_count": 14,
192
  "unsupported_task_count": 0,
193
- "not_evaluated_task_count": 14,
194
  "status_counts": {
195
- "not_evaluated_in_verified_package": 14,
196
- "scored": 6
197
  },
198
- "coverage_fraction": 0.3,
199
  "result_record_fraction": 1.0
200
  },
201
  {
@@ -2768,17 +2768,17 @@
2768
  "task_label": "Action-Object Relation Prediction",
2769
  "series_id": "qwen3_omni_v6_lora",
2770
  "method": "Qwen3-Omni v6 LoRA",
2771
- "status": "not_evaluated_in_verified_package",
2772
- "status_label": "not evaluated",
2773
- "scored": false,
2774
  "proxy_scored": false,
2775
- "raw": null,
2776
- "raw_text": "n/a",
2777
- "normalized_score": null,
2778
- "metric_key": "macro_f1",
2779
- "source": null,
2780
  "scope": "multi_episode_128_partial_model_overlay",
2781
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
2782
  },
2783
  {
2784
  "task_number": 16,
@@ -2786,17 +2786,17 @@
2786
  "task_label": "Action-Object Relation Prediction",
2787
  "series_id": "cosmos3_super_reasoner",
2788
  "method": "Cosmos3-Super Reasoner",
2789
- "status": "not_evaluated_in_verified_package",
2790
- "status_label": "not evaluated",
2791
- "scored": false,
2792
  "proxy_scored": false,
2793
- "raw": null,
2794
- "raw_text": "n/a",
2795
- "normalized_score": null,
2796
- "metric_key": "macro_f1",
2797
- "source": null,
2798
  "scope": "multi_episode_128_partial_model_overlay",
2799
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
2800
  },
2801
  {
2802
  "task_number": 16,
 
1
  {
2
  "title": "Task Method 20-Result Matrix",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-16T13:35:38+00:00",
5
  "task_count": 20,
6
  "method_count": 9,
7
  "method_task_record_count": 180,
8
+ "scored_method_task_count": 113,
9
  "series": [
10
  {
11
  "id": "minimal",
 
158
  "kind": "partial_128_episode_foundation_model_overlay",
159
  "scope": "128 selected episodes, held-out test",
160
  "stroke_dasharray": "7 7",
161
+ "method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics, plus task 16 scored from existing verified action/object JSON.",
162
  "plotted_as": "colored point overlay",
163
  "result_record_count": 20,
164
+ "scored_task_count": 7,
165
+ "covered_task_count": 7,
166
  "proxy_scored_task_count": 0,
167
+ "scoreless_task_count": 13,
168
  "unsupported_task_count": 0,
169
+ "not_evaluated_task_count": 13,
170
  "status_counts": {
171
+ "not_evaluated_in_verified_package": 13,
172
+ "scored": 7
173
  },
174
+ "coverage_fraction": 0.35,
175
  "result_record_fraction": 1.0
176
  },
177
  {
 
182
  "kind": "partial_128_episode_foundation_model_overlay",
183
  "scope": "128 selected episodes, held-out test",
184
  "stroke_dasharray": "4 7",
185
+ "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 16 scored from existing verified action/object JSON.",
186
  "plotted_as": "colored point overlay",
187
  "result_record_count": 20,
188
+ "scored_task_count": 7,
189
+ "covered_task_count": 7,
190
  "proxy_scored_task_count": 0,
191
+ "scoreless_task_count": 13,
192
  "unsupported_task_count": 0,
193
+ "not_evaluated_task_count": 13,
194
  "status_counts": {
195
+ "not_evaluated_in_verified_package": 13,
196
+ "scored": 7
197
  },
198
+ "coverage_fraction": 0.35,
199
  "result_record_fraction": 1.0
200
  },
201
  {
 
2768
  "task_label": "Action-Object Relation Prediction",
2769
  "series_id": "qwen3_omni_v6_lora",
2770
  "method": "Qwen3-Omni v6 LoRA",
2771
+ "status": "scored",
2772
+ "status_label": "scored",
2773
+ "scored": true,
2774
  "proxy_scored": false,
2775
+ "raw": 0.0002220083079671497,
2776
+ "raw_text": "0.0002",
2777
+ "normalized_score": 0.0002220083079671497,
2778
+ "metric_key": "action_object_relation_macro_f1",
2779
+ "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
2780
  "scope": "multi_episode_128_partial_model_overlay",
2781
+ "reason": null
2782
  },
2783
  {
2784
  "task_number": 16,
 
2786
  "task_label": "Action-Object Relation Prediction",
2787
  "series_id": "cosmos3_super_reasoner",
2788
  "method": "Cosmos3-Super Reasoner",
2789
+ "status": "scored",
2790
+ "status_label": "scored",
2791
+ "scored": true,
2792
  "proxy_scored": false,
2793
+ "raw": 0.0,
2794
+ "raw_text": "0.0000",
2795
+ "normalized_score": 0.0,
2796
+ "metric_key": "action_object_relation_macro_f1",
2797
+ "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
2798
  "scope": "multi_episode_128_partial_model_overlay",
2799
+ "reason": null
2800
  },
2801
  {
2802
  "task_number": 16,
docs/data/task_surface_integrity.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-16T12:01:37+00:00",
4
  "summary": {
5
  "task_count": 12,
6
  "expected_task_count": 12,
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-16T13:35:48+00:00",
4
  "summary": {
5
  "task_count": 12,
6
  "expected_task_count": 12,
docs/data/unified_task_model_radar.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "title": "Unified 20-Task Model Radar",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-16T11:26:57+00:00",
5
  "task_count": 20,
6
  "method_count": 9,
7
  "method_task_record_count": 180,
8
- "scored_method_task_count": 111,
9
  "normalization_policy": {
10
  "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
11
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
@@ -167,20 +167,20 @@
167
  "kind": "partial_128_episode_foundation_model_overlay",
168
  "scope": "128 selected episodes, held-out test",
169
  "stroke_dasharray": "7 7",
170
- "method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics on task-aligned JSON outputs.",
171
  "plotted_as": "colored point overlay",
172
  "result_record_count": 20,
173
- "scored_task_count": 6,
174
- "covered_task_count": 6,
175
  "proxy_scored_task_count": 0,
176
- "scoreless_task_count": 14,
177
  "unsupported_task_count": 0,
178
- "not_evaluated_task_count": 14,
179
  "status_counts": {
180
- "not_evaluated_in_verified_package": 14,
181
- "scored": 6
182
  },
183
- "coverage_fraction": 0.3,
184
  "result_record_fraction": 1.0
185
  },
186
  {
@@ -191,20 +191,20 @@
191
  "kind": "partial_128_episode_foundation_model_overlay",
192
  "scope": "128 selected episodes, held-out test",
193
  "stroke_dasharray": "4 7",
194
- "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation.",
195
  "plotted_as": "colored point overlay",
196
  "result_record_count": 20,
197
- "scored_task_count": 6,
198
- "covered_task_count": 6,
199
  "proxy_scored_task_count": 0,
200
- "scoreless_task_count": 14,
201
  "unsupported_task_count": 0,
202
- "not_evaluated_task_count": 14,
203
  "status_counts": {
204
- "not_evaluated_in_verified_package": 14,
205
- "scored": 6
206
  },
207
- "coverage_fraction": 0.3,
208
  "result_record_fraction": 1.0
209
  },
210
  {
@@ -1930,6 +1930,28 @@
1930
  "raw_text": "0.0000",
1931
  "status_label": "scored"
1932
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1933
  "raw128_simple": {
1934
  "raw": 0.0,
1935
  "metric_key": "macro_f1",
@@ -1974,28 +1996,6 @@
1974
  "raw_text": "n/a",
1975
  "status_label": "not supported"
1976
  },
1977
- "qwen3_omni_v6_lora": {
1978
- "raw": null,
1979
- "metric_key": "macro_f1",
1980
- "source": null,
1981
- "scope": "multi_episode_128_partial_model_overlay",
1982
- "status": "not_evaluated_in_verified_package",
1983
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
1984
- "normalized_score": null,
1985
- "raw_text": "n/a",
1986
- "status_label": "not evaluated"
1987
- },
1988
- "cosmos3_super_reasoner": {
1989
- "raw": null,
1990
- "metric_key": "macro_f1",
1991
- "source": null,
1992
- "scope": "multi_episode_128_partial_model_overlay",
1993
- "status": "not_evaluated_in_verified_package",
1994
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
1995
- "normalized_score": null,
1996
- "raw_text": "n/a",
1997
- "status_label": "not evaluated"
1998
- },
1999
  "cosmos3_nano_future_window": {
2000
  "raw": null,
2001
  "metric_key": "macro_f1",
@@ -2492,7 +2492,7 @@
2492
  "title": "Qwen3-Omni v6 LoRA",
2493
  "status": "verified",
2494
  "task_aligned_axes": "Qwen3",
2495
- "coverage": "20 records / 6 scored task-aligned axes",
2496
  "headline": "JSON validity 0.9990; action macro-F1 0.0029",
2497
  "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json"
2498
  },
@@ -2500,7 +2500,7 @@
2500
  "id": "cosmos3_super_reasoner",
2501
  "title": "Cosmos3-Super Reasoner",
2502
  "status": "verified_base_weight_eval",
2503
- "coverage": "20 records / 6 scored task-aligned axes",
2504
  "headline": "JSON validity 0.5112; action macro-F1 0.0008",
2505
  "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json"
2506
  },
@@ -5066,17 +5066,17 @@
5066
  "task_label": "Action-Object Relation Prediction",
5067
  "series_id": "qwen3_omni_v6_lora",
5068
  "method": "Qwen3-Omni v6 LoRA",
5069
- "status": "not_evaluated_in_verified_package",
5070
- "status_label": "not evaluated",
5071
- "scored": false,
5072
  "proxy_scored": false,
5073
- "raw": null,
5074
- "raw_text": "n/a",
5075
- "normalized_score": null,
5076
- "metric_key": "macro_f1",
5077
- "source": null,
5078
  "scope": "multi_episode_128_partial_model_overlay",
5079
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
5080
  },
5081
  {
5082
  "task_number": 16,
@@ -5084,17 +5084,17 @@
5084
  "task_label": "Action-Object Relation Prediction",
5085
  "series_id": "cosmos3_super_reasoner",
5086
  "method": "Cosmos3-Super Reasoner",
5087
- "status": "not_evaluated_in_verified_package",
5088
- "status_label": "not evaluated",
5089
- "scored": false,
5090
  "proxy_scored": false,
5091
- "raw": null,
5092
- "raw_text": "n/a",
5093
- "normalized_score": null,
5094
- "metric_key": "macro_f1",
5095
- "source": null,
5096
  "scope": "multi_episode_128_partial_model_overlay",
5097
- "reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score"
5098
  },
5099
  {
5100
  "task_number": 16,
 
1
  {
2
  "title": "Unified 20-Task Model Radar",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-16T13:35:38+00:00",
5
  "task_count": 20,
6
  "method_count": 9,
7
  "method_task_record_count": 180,
8
+ "scored_method_task_count": 113,
9
  "normalization_policy": {
10
  "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
11
  "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
 
167
  "kind": "partial_128_episode_foundation_model_overlay",
168
  "scope": "128 selected episodes, held-out test",
169
  "stroke_dasharray": "7 7",
170
+ "method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics, plus task 16 scored from existing verified action/object JSON.",
171
  "plotted_as": "colored point overlay",
172
  "result_record_count": 20,
173
+ "scored_task_count": 7,
174
+ "covered_task_count": 7,
175
  "proxy_scored_task_count": 0,
176
+ "scoreless_task_count": 13,
177
  "unsupported_task_count": 0,
178
+ "not_evaluated_task_count": 13,
179
  "status_counts": {
180
+ "not_evaluated_in_verified_package": 13,
181
+ "scored": 7
182
  },
183
+ "coverage_fraction": 0.35,
184
  "result_record_fraction": 1.0
185
  },
186
  {
 
191
  "kind": "partial_128_episode_foundation_model_overlay",
192
  "scope": "128 selected episodes, held-out test",
193
  "stroke_dasharray": "4 7",
194
+ "method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 16 scored from existing verified action/object JSON.",
195
  "plotted_as": "colored point overlay",
196
  "result_record_count": 20,
197
+ "scored_task_count": 7,
198
+ "covered_task_count": 7,
199
  "proxy_scored_task_count": 0,
200
+ "scoreless_task_count": 13,
201
  "unsupported_task_count": 0,
202
+ "not_evaluated_task_count": 13,
203
  "status_counts": {
204
+ "not_evaluated_in_verified_package": 13,
205
+ "scored": 7
206
  },
207
+ "coverage_fraction": 0.35,
208
  "result_record_fraction": 1.0
209
  },
210
  {
 
1930
  "raw_text": "0.0000",
1931
  "status_label": "scored"
1932
  },
1933
+ "qwen3_omni_v6_lora": {
1934
+ "raw": 0.0002220083079671497,
1935
+ "metric_key": "action_object_relation_macro_f1",
1936
+ "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
1937
+ "scope": "multi_episode_128_partial_model_overlay",
1938
+ "status": "scored",
1939
+ "reason": null,
1940
+ "normalized_score": 0.0002220083079671497,
1941
+ "raw_text": "0.0002",
1942
+ "status_label": "scored"
1943
+ },
1944
+ "cosmos3_super_reasoner": {
1945
+ "raw": 0.0,
1946
+ "metric_key": "action_object_relation_macro_f1",
1947
+ "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
1948
+ "scope": "multi_episode_128_partial_model_overlay",
1949
+ "status": "scored",
1950
+ "reason": null,
1951
+ "normalized_score": 0.0,
1952
+ "raw_text": "0.0000",
1953
+ "status_label": "scored"
1954
+ },
1955
  "raw128_simple": {
1956
  "raw": 0.0,
1957
  "metric_key": "macro_f1",
 
1996
  "raw_text": "n/a",
1997
  "status_label": "not supported"
1998
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1999
  "cosmos3_nano_future_window": {
2000
  "raw": null,
2001
  "metric_key": "macro_f1",
 
2492
  "title": "Qwen3-Omni v6 LoRA",
2493
  "status": "verified",
2494
  "task_aligned_axes": "Qwen3",
2495
+ "coverage": "20 records / 7 scored task-aligned axes",
2496
  "headline": "JSON validity 0.9990; action macro-F1 0.0029",
2497
  "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json"
2498
  },
 
2500
  "id": "cosmos3_super_reasoner",
2501
  "title": "Cosmos3-Super Reasoner",
2502
  "status": "verified_base_weight_eval",
2503
+ "coverage": "20 records / 7 scored task-aligned axes",
2504
  "headline": "JSON validity 0.5112; action macro-F1 0.0008",
2505
  "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json"
2506
  },
 
5066
  "task_label": "Action-Object Relation Prediction",
5067
  "series_id": "qwen3_omni_v6_lora",
5068
  "method": "Qwen3-Omni v6 LoRA",
5069
+ "status": "scored",
5070
+ "status_label": "scored",
5071
+ "scored": true,
5072
  "proxy_scored": false,
5073
+ "raw": 0.0002220083079671497,
5074
+ "raw_text": "0.0002",
5075
+ "normalized_score": 0.0002220083079671497,
5076
+ "metric_key": "action_object_relation_macro_f1",
5077
+ "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
5078
  "scope": "multi_episode_128_partial_model_overlay",
5079
+ "reason": null
5080
  },
5081
  {
5082
  "task_number": 16,
 
5084
  "task_label": "Action-Object Relation Prediction",
5085
  "series_id": "cosmos3_super_reasoner",
5086
  "method": "Cosmos3-Super Reasoner",
5087
+ "status": "scored",
5088
+ "status_label": "scored",
5089
+ "scored": true,
5090
  "proxy_scored": false,
5091
+ "raw": 0.0,
5092
+ "raw_text": "0.0000",
5093
+ "normalized_score": 0.0,
5094
+ "metric_key": "action_object_relation_macro_f1",
5095
+ "source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
5096
  "scope": "multi_episode_128_partial_model_overlay",
5097
+ "reason": null
5098
  },
5099
  {
5100
  "task_number": 16,
results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Existing Model-Output Task Probes
2
+
3
+ Generated: `2026-06-16T13:35:37+00:00`
4
+
5
+ This package scores only task targets already present in verified held-out
6
+ prediction JSON. It does not run new inference and does not infer targets that
7
+ are absent from a model branch.
8
+
9
+ | Method | ID | Status | Scored rows | Task 16 macro-F1 | Evidence |
10
+ | --- | --- | --- | ---: | ---: | --- |
11
+ | Qwen3-Omni v6 LoRA | qwen3_omni_v6_lora | scored | 4014 | 0.000222 | results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/predictions.jsonl |
12
+ | Cosmos3-Super Reasoner | cosmos3_super_reasoner | scored | 446 | 0.000000 | results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/predictions.jsonl |
13
+ | Cosmos3-Nano Future Window | cosmos3_nano_future_window | unsupported_without_required_fields | n/a | n/a | verified future-window predictions do not contain object-set fields |
results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "accuracy": 0.0,
3
+ "action_object_relation_accuracy": 0.0,
4
+ "action_object_relation_macro_f1": 0.0,
5
+ "artifact_files": {
6
+ "metrics_json": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
7
+ "per_class_metrics_csv": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv",
8
+ "predictions_csv": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv"
9
+ },
10
+ "excluded_rows_without_true_relation": 2,
11
+ "generated_at_utc": "2026-06-16T13:35:37+00:00",
12
+ "labels": [
13
+ "Adjust canned food on shelf :: canned food | cardboard box | store shelf",
14
+ "Adjust item on shelf :: shelf | stationery package",
15
+ "Adjust lantern shape :: red pleated paper lantern",
16
+ "Adjust lantern string :: red paper lantern | string",
17
+ "Adjust paper :: cardboard square | paper | pen | star beads",
18
+ "Adjust pot position :: pot",
19
+ "Adjust puzzle piece :: jigsaw puzzle | puzzle box | puzzle pieces",
20
+ "Align canned food on shelf :: canned food | cardboard box | retail shelf",
21
+ "Align edges of paper lantern :: hands | red paper lantern",
22
+ "Align paper lantern edges :: hands | red paper lantern",
23
+ "Apply adhesive tape to lantern :: adhesive tape | red paper lantern",
24
+ "Approach boxes :: cardboard boxes | colleague | shelving unit",
25
+ "Approaching and pressing the door switch :: glass door | switch | wall",
26
+ "Approaching the table :: beads | chairs | table",
27
+ "Arrange buttons :: red buttons | smartphone | table",
28
+ "Arrange buttons in a line :: buttons | phone | table",
29
+ "Arrange star beads :: paper | pen | power bank | smartphone | star beads",
30
+ "Arrange star beads for counting :: paper | pen | star beads",
31
+ "Attempt to fit puzzle piece :: puzzle board | puzzle piece",
32
+ "Attempt to fit puzzle piece :: puzzle piece",
33
+ "Bend and manipulate plastic strip :: plastic strip | stationary box | table | water bottle",
34
+ "Browse smartphone screen :: paper strips | smartphone | star ornaments",
35
+ "Bundle display hooks :: container | display hooks",
36
+ "Closing the door :: door | dustpan",
37
+ "Counting and organizing beads :: cardboard squares | paper | pen | star-shaped beads",
38
+ "Counting star beads :: paper | pen | star beads",
39
+ "Cut along the marked line :: cardboard | hand | utility knife",
40
+ "Cut cardboard :: cardboard | desk | pen | ruler | utility knife",
41
+ "Cut cardboard piece :: cardboard strip | scissors",
42
+ "Entering the VR training room :: doorway | person | table",
43
+ "Expand paper lantern :: adhesive tape roll | cardboard box | red paper lantern",
44
+ "Extract wire hangers from box :: cardboard box | wire hangers",
45
+ "Fold paper lantern :: red paper honeycomb lantern",
46
+ "Fold plastic strip :: container with tools | purple plastic strip | water bottle | white table",
47
+ "Gather star beads :: cardboard squares | mobile phone | paper | pen | power bank | star beads",
48
+ "Gesturing :: cardboard pieces | marker | pencil case | ruler | scissors",
49
+ "Grasp cleaning bottle :: bowls | chopping board | cleaning fluid bottle",
50
+ "Grasp lantern :: red paper honeycomb lantern",
51
+ "Grasp lantern component :: cardboard box | paper lantern | tape",
52
+ "Grasping cleaning cloth :: cleaning cloth | countertop",
53
+ "Greeting/acknowledging participants :: person | table | vr headset",
54
+ "Handle paper lantern component :: cardboard box | paper lantern component | plastic bag | red paper lantern",
55
+ "Hold and bend plastic strip :: bottle | purple plastic strip | stationery box",
56
+ "Hold and manipulate paper strip :: beads | mobile phone | power bank | yellow paper strip",
57
+ "Hold beads :: paper | pen | power bank | smartphone | star-shaped beads",
58
+ "Hold canned food :: box of cans | can of soup | shelf",
59
+ "Hold cardboard piece :: cardboard pieces | marker | pencil case | ruler | scissors",
60
+ "Hold container lid :: dustpan | red plastic container lid",
61
+ "Hold paper lantern :: red paper lantern | sofa",
62
+ "Hold smartphone :: kettle | kitchen counter | sink | smartphone",
63
+ "Hold smartphone :: quilling paper strips | small blue beads | smartphone",
64
+ "Identify next cardboard piece :: cardboard pieces | marker",
65
+ "Inspect shelf condition :: colleague | shelf",
66
+ "Interact with smartphone :: beads | smartphone | yellow paper strip",
67
+ "Interact with smartphone :: cans | shelf | smartphone",
68
+ "Lift pot lid :: pot | pot lid",
69
+ "Manipulate adhesive strip :: adhesive strip | paper sheets | puzzle box | smartphone | water bottle",
70
+ "Manipulate bead :: bead piles | container | paper strips | yellow bead",
71
+ "Manipulate beads :: blue beads | power bank | smartphone | yellow beads",
72
+ "Manipulate craft paper strips :: paper strips | scissors | smartphone",
73
+ "Manipulate craft piece :: craft pieces | scissors | smartphone",
74
+ "Manipulate material :: blue crafting material | hand",
75
+ "Manipulate paper decoration :: paper cone | paper decoration | smartphone | water bottle",
76
+ "Manipulate paper edge :: hands | paper cone | puzzle box | smartphone | water bottle",
77
+ "Manipulate paper strip :: container with tools | table | yellow paper strip",
78
+ "Manipulate paper strip :: craft materials | purple paper strip | scissors | smartphone",
79
+ "Manipulate paper strip :: paper cone | paper strip | puzzle box | smartphone | water bottle",
80
+ "Manipulate paper strip :: purple paper strip | quilling paper pile | smartphone",
81
+ "Manipulate plastic strip :: purple plastic strip | storage bin | water bottle",
82
+ "Manipulate plastic strips :: desk | plastic strips | water bottle",
83
+ "Manipulate puzzle piece :: jigsaw puzzle | puzzle piece",
84
+ "Manipulate puzzle pieces :: puzzle box | puzzle mat | puzzle pieces",
85
+ "Manipulate yellow strip :: beads | cell phone | pen | yellow strip",
86
+ "Manipulating paper strips :: beads | blue paper strip | yellow paper strip",
87
+ "Mark cardboard piece :: cardboard piece | marker",
88
+ "Mark cardboard piece :: cardboard piece | marker | pouch",
89
+ "Marking cardboard piece :: cardboard pieces | marker | ruler | scissors",
90
+ "Move dustpan to side :: dustpan",
91
+ "Move hand away :: canned food | retail shelf",
92
+ "Move hand away from shelf :: canned food | cardboard box | store shelf",
93
+ "Move marker and adjust hand :: cardboard pieces | marker",
94
+ "Move phone :: cardboard | smartphone | utility knife",
95
+ "Move pot :: faucet | pot | sink",
96
+ "Move smartphone :: craft beads | paper strips | smartphone",
97
+ "Move through aisle :: aisle floor | display hooks | metal shelving units",
98
+ "Move through the training room :: chairs | fire extinguisher | tables | vr headsets",
99
+ "Move to shelf :: cardboard box | colleague | shelves",
100
+ "Move towards kitchen area :: cloth | sink",
101
+ "Move towards the stove :: blue bowl | faucet | metal container | red bowl | sink",
102
+ "Observe and pause :: cardboard pieces | marker | pencil case | ruler | scissors",
103
+ "Observe and walk through store :: colleague | inventory boxes | shelves",
104
+ "Observe colleague and workspace :: price tag | shelves | watch",
105
+ "Observe puzzle progress :: hand | jigsaw puzzle",
106
+ "Observe workspace :: cardboard boxes | colleague | shelving unit",
107
+ "Open earbud case :: earbud case",
108
+ "Open folded paper lantern :: cardboard box | red pleated paper lantern",
109
+ "Open paper lantern :: red paper honeycomb lantern",
110
+ "Open paper lantern component :: paper lantern component",
111
+ "Open stove pot lid :: blue bowl | cloth | faucet | red bowl | sink | soap dispenser | white bowl",
112
+ "Operate smartphone :: craft paper strips | folded paper fan | smartphone",
113
+ "Organize cardboard pieces :: cardboard pieces | marker | pencil case | ruler | scissors",
114
+ "Pick up button :: buttons | table",
115
+ "Pick up can :: canned food | cardboard box",
116
+ "Pick up canned food :: canned food | cardboard box",
117
+ "Pick up dustpan :: dustpan | smartphone",
118
+ "Pick up items from the shopping bag :: cardboard boxes | red shopping bag | retail shelf",
119
+ "Pick up new cardboard piece :: cardboard piece | marker",
120
+ "Pick up packaged paper lantern component :: cardboard box | packaged paper lantern component",
121
+ "Pick up pen :: paper | pen | star beads",
122
+ "Pick up puzzle piece :: jigsaw puzzle | puzzle piece",
123
+ "Pick up small piece of material :: blue crafting material | hand",
124
+ "Pick up smartphone :: cans | shelf | smartphone",
125
+ "Pick up star bead :: cardboard squares | mobile phone | paper | pen | power bank | star beads",
126
+ "Pick up utility knife :: cardboard | utility knife",
127
+ "Picking up bottle :: bottle",
128
+ "Picking up crafting material :: beads | papers | pen | table",
129
+ "Place and count bead :: paper | pen | star beads",
130
+ "Place another canned food on shelf :: box | canned food",
131
+ "Place button :: buttons | hand | smartphone | table",
132
+ "Place button :: buttons | hand | table",
133
+ "Place can on shelf :: canned food | retail shelf",
134
+ "Place canned food on shelf :: can of vegetables | cardboard boxes | store shelf",
135
+ "Place cloth on floor :: cloth",
136
+ "Place hand on table :: craft beads | paper strips | smartphone",
137
+ "Place item on shelf :: stationery package",
138
+ "Place items on the shelf :: packaged items | retail shelf",
139
+ "Place lid back :: pot | pot lid",
140
+ "Place marked piece down :: cardboard piece | marker",
141
+ "Place material :: bead design | blue crafting material | hand",
142
+ "Place phone down :: beads | pencil holder | smartphone | yellow paper strip",
143
+ "Place piece into puzzle :: jigsaw puzzle | puzzle piece",
144
+ "Place puzzle piece :: puzzle board | puzzle piece",
145
+ "Place smartphone down :: power bank | quilling paper strips | small paper stars | smartphone",
146
+ "Place smartphone down :: quilling paper strips | small paper stars | smartphone",
147
+ "Place smartphone on stand :: saucepan | smartphone | smartphone stand | yellow jacket",
148
+ "Place towel :: pot | towel",
149
+ "Placing paper strip :: adhesive strip | paper cone | phone",
150
+ "Placing paper strip :: paper cone | phone | puzzle box | water bottle",
151
+ "Preparing to craft :: beads | chairs | table",
152
+ "Put down smartphone :: paper stars | paper strips | smartphone",
153
+ "Put down smartphone :: power bank | smartphone",
154
+ "Reach for another item :: item | shelf",
155
+ "Reach for cleaning supplies :: bowls | chopping board | cleaning fluid bottle",
156
+ "Reach for craft items :: smartphone | star-shaped craft items | table",
157
+ "Reach for next can :: can of vegetables | cardboard boxes | store shelf",
158
+ "Reach for next canned food :: box | canned food | retail shelf",
159
+ "Reach for next item :: packaged item | shelf",
160
+ "Reach for puzzle piece :: puzzle board | puzzle box | puzzle pieces",
161
+ "Reach for wire hangers :: cardboard box | wire hangers",
162
+ "Reach into box :: box of cans",
163
+ "Record count :: paper | pen | star-shaped beads",
164
+ "Release cardboard piece and gesture :: cardboard piece | cardboard piles | marker | pouch | ruler | scissors",
165
+ "Release hook :: display hook",
166
+ "Release lantern :: red paper lantern",
167
+ "Release paper strip :: craft beads | paper strip | scissors | smartphone",
168
+ "Release puzzle piece :: jigsaw puzzle | puzzle box | puzzle pieces",
169
+ "Release scissors :: craft paper strips | folded paper fan | scissors | smartphone",
170
+ "Release smartphone :: paper scraps | scissors | smartphone | table",
171
+ "Remove cleaning bottle :: cleaning bottle | plastic bowls",
172
+ "Remove paper lantern part from packaging :: paper lantern | red hand fan",
173
+ "Remove plastic packaging :: packaging | paper lantern component",
174
+ "Reposition hand :: cardboard | utility knife",
175
+ "Resume observation :: cardboard pieces | marker | pencil case | ruler | scissors",
176
+ "Retrieve canned food from box :: box | canned food",
177
+ "Retrieve next canned food item :: canned food | cardboard box",
178
+ "Retrieving more beads :: paper | pen | star beads",
179
+ "Rinse cloth in sink :: cloth | sink | water faucet",
180
+ "Scroll smartphone screen :: paper stars | paper strips | smartphone",
181
+ "Search for puzzle piece :: jigsaw puzzle | puzzle pieces",
182
+ "Secure paper edges with adhesive :: adhesive strip | paper cone pieces | puzzle box | smartphone | water bottle",
183
+ "Securing paper structure :: decorative stars | paper strips | smartphone | water bottle",
184
+ "Sort and adjust button line :: buttons | smartphone | table",
185
+ "Sort and arrange buttons :: buttons | smartphones | soda can | table",
186
+ "Sort and count beads :: paper | pen | star beads",
187
+ "Sort and place buttons :: buttons | coca-cola can | smartphone",
188
+ "Sort beads :: paper | pen | smartphone | star-shaped beads",
189
+ "Sort beads and write count :: paper | pen | power bank | smartphone | star-shaped beads",
190
+ "Sort button :: buttons | cell phone",
191
+ "Sort buttons :: buttons | table",
192
+ "Sort craft items :: star-shaped craft items | table",
193
+ "Sort puzzle pieces :: jigsaw puzzle pieces | table",
194
+ "Sort small craft pieces :: paper strips | scissors | smartphone | star-shaped craft pieces",
195
+ "Sort star-shaped beads :: marker | mobile phone | paper | power bank | star-shaped beads",
196
+ "Start cutting :: cardboard | utility knife",
197
+ "Stir contents :: cooking utensil | pot",
198
+ "Use phone :: beads | paper | power bank | smartphone",
199
+ "Use phone while crafting :: beads | pencil holder | smartphone | yellow paper strip",
200
+ "Use smartphone :: buttons | chair | smartphone | table",
201
+ "Use smartphone :: buttons | smartphone | table",
202
+ "Use smartphone :: charging cable | paper strips | power bank | smartphone | star-shaped paper crafts",
203
+ "Use smartphone :: containers | kettle | kitchen counter | rice cooker | smartphone",
204
+ "Walk towards other aisles :: retail store aisle",
205
+ "Walk towards shelves :: cardboard boxes | red bin | shelving unit",
206
+ "Walking across the room :: chair | fire extinguisher | office | table",
207
+ "Walking in the hallway :: hallway | people | tables | vr headsets",
208
+ "Walking towards door :: door | dustpan",
209
+ "Washing hands in sink :: faucet | sink",
210
+ "Wipe kitchen counter :: door | laundry basket | mop",
211
+ "Wiping countertop :: cleaning cloth | countertop",
212
+ "Write count on paper :: marker | mobile phone | paper | power bank | star-shaped beads",
213
+ "Write on paper :: cardboard square | paper | pen | star beads",
214
+ "sort craft materials :: paper pieces | scissors | smartphone",
215
+ "Pick up and place canned goods on shelf :: bin | canned goods | shelf",
216
+ "Browsing smartphone content :: smartphone",
217
+ "Place item on shelf :: retail item | shelf",
218
+ "Place item on shelf :: canned food | shelf",
219
+ "<missing_pred_relation>",
220
+ "Approach packing area :: items to pack | packing area",
221
+ "Approach restocking supplies :: aisle | restocking supplies | shelf",
222
+ "unknown :: unknown",
223
+ "Approach table :: materials | table | tools | work surface",
224
+ "Place item in container :: canned goods | plastic container | shopping bag",
225
+ "Pick up canned food :: canned food | shelf",
226
+ "Adjusting items on the shelf :: items | shelf",
227
+ "Wipe hands :: hands | sink | water bottle",
228
+ "Approach the stove :: cooking pot | kitchen counter | stove",
229
+ "Place item on shelf :: plush toy | shelf",
230
+ "Place item on shelf :: canned goods | shelf",
231
+ "Adjust cardboard :: cardboard pieces | workspace surface",
232
+ "Adjust cardboard :: cardboard | cutting mat | glue stick | marker | measuring tape | ruler | scissors | work table",
233
+ "Approach packing area :: cardboard pieces | marker | ruler | scissors | work table",
234
+ "Adjust cardboard :: cardboard piece | marker | ruler | workspace surface",
235
+ "Adjust cardboard position :: cardboard piece | marker | ruler | scissors | work surface",
236
+ "Adjust cardboard position :: cardboard piece | hands | work surface",
237
+ "Adjust cardboard :: cardboard piece | glue stick | marker | measuring tape | pencil | ruler | scissors | work table",
238
+ "Adjust cardboard :: cardboard | glue stick | marker | measuring tape | pencil | ruler | scissors | work table",
239
+ "Adjust cardboard position :: cardboard piece | scissors | work table",
240
+ "Adjust cardboard position :: cardboard piece | scissors | work surface",
241
+ "Cut cardboard with utility knife :: cardboard | ruler | table | utility knife",
242
+ "Cut cardboard :: cardboard | glue | marker | measuring tape | pen | ruler | scissors | work surface",
243
+ "Adjusting items on shelf :: containers | retail items | shelf",
244
+ "Approach packing area :: container | items to pack | packing area",
245
+ "Adjust cardboard divider :: cardboard divider | retail items | shelf",
246
+ "Adjusting items on shelf :: retail items | shelf",
247
+ "Approach packing area :: boxes | items to pack | packing area",
248
+ "Adjust grip :: canned food | plastic container | shelf",
249
+ "Adjust item on shelf :: retail items | shelf",
250
+ "Approach packing area :: cardboard pieces | packing area | storage bins",
251
+ "Adjust cardboard position :: cardboard pieces | scissors | table surface",
252
+ "Adjust cardboard divider :: cardboard divider | cardboard pieces | table surface",
253
+ "Adjust cardboard divider :: cardboard divider | cardboard pieces | scissors | table",
254
+ "Place canned food on shelf :: canned food | shelf",
255
+ "Pick up canned food :: bin | canned food | shelf",
256
+ "Pick up canned food :: box | canned food | shelf",
257
+ "Pick up canned food :: canned food | container | shelf",
258
+ "Adjust canned food on shelf :: canned food | shelf",
259
+ "Move along the shelf :: retail items | shelf",
260
+ "Picking up and placing canned goods :: canned goods | shelf",
261
+ "Move along the shelf :: canned goods | plastic container | shelf",
262
+ "Adjust item on shelf :: retail item | shelf",
263
+ "Browsing smartphone content :: retail items | shelf | smartphone",
264
+ "Manipulate paper strip :: paper strip | scissors | table",
265
+ "Manipulate paper strip :: marker | paper strip | pink cloth | ruler | scissors | smartphone | table | water bottle",
266
+ "Adjust grip :: hand | red container",
267
+ "Approach packing area :: boxes | packaging materials | red hat | table",
268
+ "Approach packing area :: boxes | packaging materials | red cap | table",
269
+ "Manipulate paper strip :: paper strip",
270
+ "Adjust lantern string :: handle | lantern | string",
271
+ "Adjust grip :: lid | red container",
272
+ "Approach packing area :: items to pack | packing area | packing materials",
273
+ "Adjust lantern string :: lantern handle | lantern string | paper lantern",
274
+ "Adjust grip :: red container | small white object",
275
+ "Adjust lantern string :: handle component | paper lantern | string",
276
+ "Approach table :: people | table",
277
+ "Approach table :: items on table | people | table",
278
+ "Adjust Mahjong tile :: mahjong tile | mahjong tile stack | table surface",
279
+ "Adjusting and placing down paper pieces :: hand | paper pieces | table",
280
+ "Approach table :: cardboard | chair | mahjong tiles | marker | paper | ruler | scissors | table",
281
+ "Approach table :: beads | cardboard | marker | paper | pen | ruler | scissors | table",
282
+ "Manipulate paper strip :: craft materials | hands | marker | paper strip | ruler | scissors | table | workspace",
283
+ "Manipulate paper strip :: craft materials | glue | hands | marker | paper strip | ruler | scissors | table",
284
+ "Adjust bead piles :: beads | table",
285
+ "Manipulate paper strip :: beads | glue | marker | paper strip | ruler | scissors | smartphone | table",
286
+ "Manipulate paper strip :: beads | mahjong tiles | marker | paper strip | ruler | scissors | smartphone | table",
287
+ "Manipulate colorful pieces :: buttons | table",
288
+ "Manipulate colorful pieces :: buttons",
289
+ "Adjust Mahjong tile :: mahjong tile | mahjong tile stack | table",
290
+ "Approach packing area :: cardboard box | items to pack | packing area",
291
+ "Interacting with colleagues :: person",
292
+ "Gathering items :: boxes | chairs | equipment | materials | people | supplies | tables | tools",
293
+ "Interacting with phone :: ceiling | chair | door | floor | person | smartphone | table | wall",
294
+ "Manipulate paper strip :: cardboard | craft materials | hands | marker | paper strip | ruler | scissors | table",
295
+ "Write on paper :: paper | pen | star beads",
296
+ "Manipulate paper strip :: chair | glue | marker | paper strip | person | ruler | scissors | table",
297
+ "Sort and arrange buttons :: buttons | table"
298
+ ],
299
+ "macro_f1": 0.0,
300
+ "metric_key": "action_object_relation_macro_f1",
301
+ "missing_pred_relation_count": 226,
302
+ "model_id": "cosmos3_super_reasoner",
303
+ "model_label": "Cosmos3-Super Reasoner",
304
+ "normalization_policy": "The action component uses the verified predicted action label when present. The object component is a canonical casefolded set because task 16 is an action plus object-set relation.",
305
+ "num_samples": 446,
306
+ "primary_metric": "action_object_relation_macro_f1",
307
+ "primary_score": 0.0,
308
+ "scope": "held_out_test_existing_verified_prediction_json",
309
+ "score_policy": "Derived from existing verified held-out prediction JSON. No new model inference was run; rows without a predicted action/object relation are counted as missing predictions.",
310
+ "scored_rows": 446,
311
+ "source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/predictions.jsonl",
312
+ "status": "pass",
313
+ "task_id": "action_object_relation",
314
+ "task_label": "Action-Object Relation",
315
+ "task_number": 16,
316
+ "title": "Cosmos3-Super Reasoner Action-Object Relation Probe",
317
+ "total_prediction_rows": 448,
318
+ "valid_pred_relation_count": 220,
319
+ "valid_pred_relation_rate": 0.49327354260089684
320
+ }
results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class_name,support,predicted,precision,recall,f1
2
+ Adjust canned food on shelf :: canned food | cardboard box | store shelf,2,0,0.0,0.0,0.0
3
+ Adjust item on shelf :: shelf | stationery package,2,0,0.0,0.0,0.0
4
+ Adjust lantern shape :: red pleated paper lantern,2,0,0.0,0.0,0.0
5
+ Adjust lantern string :: red paper lantern | string,2,0,0.0,0.0,0.0
6
+ Adjust paper :: cardboard square | paper | pen | star beads,2,0,0.0,0.0,0.0
7
+ Adjust pot position :: pot,2,0,0.0,0.0,0.0
8
+ Adjust puzzle piece :: jigsaw puzzle | puzzle box | puzzle pieces,3,0,0.0,0.0,0.0
9
+ Align canned food on shelf :: canned food | cardboard box | retail shelf,2,0,0.0,0.0,0.0
10
+ Align edges of paper lantern :: hands | red paper lantern,2,0,0.0,0.0,0.0
11
+ Align paper lantern edges :: hands | red paper lantern,2,0,0.0,0.0,0.0
12
+ Apply adhesive tape to lantern :: adhesive tape | red paper lantern,2,0,0.0,0.0,0.0
13
+ Approach boxes :: cardboard boxes | colleague | shelving unit,2,0,0.0,0.0,0.0
14
+ Approaching and pressing the door switch :: glass door | switch | wall,4,0,0.0,0.0,0.0
15
+ Approaching the table :: beads | chairs | table,2,0,0.0,0.0,0.0
16
+ Arrange buttons :: red buttons | smartphone | table,4,0,0.0,0.0,0.0
17
+ Arrange buttons in a line :: buttons | phone | table,4,0,0.0,0.0,0.0
18
+ Arrange star beads :: paper | pen | power bank | smartphone | star beads,2,0,0.0,0.0,0.0
19
+ Arrange star beads for counting :: paper | pen | star beads,2,0,0.0,0.0,0.0
20
+ Attempt to fit puzzle piece :: puzzle board | puzzle piece,1,0,0.0,0.0,0.0
21
+ Attempt to fit puzzle piece :: puzzle piece,2,0,0.0,0.0,0.0
22
+ Bend and manipulate plastic strip :: plastic strip | stationary box | table | water bottle,4,0,0.0,0.0,0.0
23
+ Browse smartphone screen :: paper strips | smartphone | star ornaments,3,0,0.0,0.0,0.0
24
+ Bundle display hooks :: container | display hooks,2,0,0.0,0.0,0.0
25
+ Closing the door :: door | dustpan,2,0,0.0,0.0,0.0
26
+ Counting and organizing beads :: cardboard squares | paper | pen | star-shaped beads,2,0,0.0,0.0,0.0
27
+ Counting star beads :: paper | pen | star beads,2,0,0.0,0.0,0.0
28
+ Cut along the marked line :: cardboard | hand | utility knife,7,0,0.0,0.0,0.0
29
+ Cut cardboard :: cardboard | desk | pen | ruler | utility knife,7,0,0.0,0.0,0.0
30
+ Cut cardboard piece :: cardboard strip | scissors,3,0,0.0,0.0,0.0
31
+ Entering the VR training room :: doorway | person | table,3,0,0.0,0.0,0.0
32
+ Expand paper lantern :: adhesive tape roll | cardboard box | red paper lantern,2,0,0.0,0.0,0.0
33
+ Extract wire hangers from box :: cardboard box | wire hangers,2,0,0.0,0.0,0.0
34
+ Fold paper lantern :: red paper honeycomb lantern,2,0,0.0,0.0,0.0
35
+ Fold plastic strip :: container with tools | purple plastic strip | water bottle | white table,3,0,0.0,0.0,0.0
36
+ Gather star beads :: cardboard squares | mobile phone | paper | pen | power bank | star beads,2,0,0.0,0.0,0.0
37
+ Gesturing :: cardboard pieces | marker | pencil case | ruler | scissors,2,0,0.0,0.0,0.0
38
+ Grasp cleaning bottle :: bowls | chopping board | cleaning fluid bottle,2,0,0.0,0.0,0.0
39
+ Grasp lantern :: red paper honeycomb lantern,2,0,0.0,0.0,0.0
40
+ Grasp lantern component :: cardboard box | paper lantern | tape,2,0,0.0,0.0,0.0
41
+ Grasping cleaning cloth :: cleaning cloth | countertop,2,0,0.0,0.0,0.0
42
+ Greeting/acknowledging participants :: person | table | vr headset,3,0,0.0,0.0,0.0
43
+ Handle paper lantern component :: cardboard box | paper lantern component | plastic bag | red paper lantern,2,0,0.0,0.0,0.0
44
+ Hold and bend plastic strip :: bottle | purple plastic strip | stationery box,3,0,0.0,0.0,0.0
45
+ Hold and manipulate paper strip :: beads | mobile phone | power bank | yellow paper strip,2,0,0.0,0.0,0.0
46
+ Hold beads :: paper | pen | power bank | smartphone | star-shaped beads,2,0,0.0,0.0,0.0
47
+ Hold canned food :: box of cans | can of soup | shelf,2,0,0.0,0.0,0.0
48
+ Hold cardboard piece :: cardboard pieces | marker | pencil case | ruler | scissors,3,0,0.0,0.0,0.0
49
+ Hold container lid :: dustpan | red plastic container lid,2,0,0.0,0.0,0.0
50
+ Hold paper lantern :: red paper lantern | sofa,2,0,0.0,0.0,0.0
51
+ Hold smartphone :: kettle | kitchen counter | sink | smartphone,1,0,0.0,0.0,0.0
52
+ Hold smartphone :: quilling paper strips | small blue beads | smartphone,3,0,0.0,0.0,0.0
53
+ Identify next cardboard piece :: cardboard pieces | marker,3,0,0.0,0.0,0.0
54
+ Inspect shelf condition :: colleague | shelf,2,0,0.0,0.0,0.0
55
+ Interact with smartphone :: beads | smartphone | yellow paper strip,2,0,0.0,0.0,0.0
56
+ Interact with smartphone :: cans | shelf | smartphone,2,0,0.0,0.0,0.0
57
+ Lift pot lid :: pot | pot lid,1,0,0.0,0.0,0.0
58
+ Manipulate adhesive strip :: adhesive strip | paper sheets | puzzle box | smartphone | water bottle,5,0,0.0,0.0,0.0
59
+ Manipulate bead :: bead piles | container | paper strips | yellow bead,2,0,0.0,0.0,0.0
60
+ Manipulate beads :: blue beads | power bank | smartphone | yellow beads,2,0,0.0,0.0,0.0
61
+ Manipulate craft paper strips :: paper strips | scissors | smartphone,4,0,0.0,0.0,0.0
62
+ Manipulate craft piece :: craft pieces | scissors | smartphone,4,0,0.0,0.0,0.0
63
+ Manipulate material :: blue crafting material | hand,2,0,0.0,0.0,0.0
64
+ Manipulate paper decoration :: paper cone | paper decoration | smartphone | water bottle,5,0,0.0,0.0,0.0
65
+ Manipulate paper edge :: hands | paper cone | puzzle box | smartphone | water bottle,5,0,0.0,0.0,0.0
66
+ Manipulate paper strip :: container with tools | table | yellow paper strip,2,0,0.0,0.0,0.0
67
+ Manipulate paper strip :: craft materials | purple paper strip | scissors | smartphone,4,0,0.0,0.0,0.0
68
+ Manipulate paper strip :: paper cone | paper strip | puzzle box | smartphone | water bottle,5,0,0.0,0.0,0.0
69
+ Manipulate paper strip :: purple paper strip | quilling paper pile | smartphone,3,0,0.0,0.0,0.0
70
+ Manipulate plastic strip :: purple plastic strip | storage bin | water bottle,3,0,0.0,0.0,0.0
71
+ Manipulate plastic strips :: desk | plastic strips | water bottle,3,0,0.0,0.0,0.0
72
+ Manipulate puzzle piece :: jigsaw puzzle | puzzle piece,3,0,0.0,0.0,0.0
73
+ Manipulate puzzle pieces :: puzzle box | puzzle mat | puzzle pieces,3,0,0.0,0.0,0.0
74
+ Manipulate yellow strip :: beads | cell phone | pen | yellow strip,2,0,0.0,0.0,0.0
75
+ Manipulating paper strips :: beads | blue paper strip | yellow paper strip,2,0,0.0,0.0,0.0
76
+ Mark cardboard piece :: cardboard piece | marker,1,0,0.0,0.0,0.0
77
+ Mark cardboard piece :: cardboard piece | marker | pouch,2,0,0.0,0.0,0.0
78
+ Marking cardboard piece :: cardboard pieces | marker | ruler | scissors,3,0,0.0,0.0,0.0
79
+ Move dustpan to side :: dustpan,1,0,0.0,0.0,0.0
80
+ Move hand away :: canned food | retail shelf,2,0,0.0,0.0,0.0
81
+ Move hand away from shelf :: canned food | cardboard box | store shelf,2,0,0.0,0.0,0.0
82
+ Move marker and adjust hand :: cardboard pieces | marker,3,0,0.0,0.0,0.0
83
+ Move phone :: cardboard | smartphone | utility knife,7,0,0.0,0.0,0.0
84
+ Move pot :: faucet | pot | sink,1,0,0.0,0.0,0.0
85
+ Move smartphone :: craft beads | paper strips | smartphone,3,0,0.0,0.0,0.0
86
+ Move through aisle :: aisle floor | display hooks | metal shelving units,2,0,0.0,0.0,0.0
87
+ Move through the training room :: chairs | fire extinguisher | tables | vr headsets,3,0,0.0,0.0,0.0
88
+ Move to shelf :: cardboard box | colleague | shelves,2,0,0.0,0.0,0.0
89
+ Move towards kitchen area :: cloth | sink,1,0,0.0,0.0,0.0
90
+ Move towards the stove :: blue bowl | faucet | metal container | red bowl | sink,1,0,0.0,0.0,0.0
91
+ Observe and pause :: cardboard pieces | marker | pencil case | ruler | scissors,2,0,0.0,0.0,0.0
92
+ Observe and walk through store :: colleague | inventory boxes | shelves,2,0,0.0,0.0,0.0
93
+ Observe colleague and workspace :: price tag | shelves | watch,2,0,0.0,0.0,0.0
94
+ Observe puzzle progress :: hand | jigsaw puzzle,3,0,0.0,0.0,0.0
95
+ Observe workspace :: cardboard boxes | colleague | shelving unit,2,0,0.0,0.0,0.0
96
+ Open earbud case :: earbud case,2,0,0.0,0.0,0.0
97
+ Open folded paper lantern :: cardboard box | red pleated paper lantern,2,0,0.0,0.0,0.0
98
+ Open paper lantern :: red paper honeycomb lantern,2,0,0.0,0.0,0.0
99
+ Open paper lantern component :: paper lantern component,2,0,0.0,0.0,0.0
100
+ Open stove pot lid :: blue bowl | cloth | faucet | red bowl | sink | soap dispenser | white bowl,1,0,0.0,0.0,0.0
101
+ Operate smartphone :: craft paper strips | folded paper fan | smartphone,4,0,0.0,0.0,0.0
102
+ Organize cardboard pieces :: cardboard pieces | marker | pencil case | ruler | scissors,2,0,0.0,0.0,0.0
103
+ Pick up button :: buttons | table,3,0,0.0,0.0,0.0
104
+ Pick up can :: canned food | cardboard box,2,0,0.0,0.0,0.0
105
+ Pick up canned food :: canned food | cardboard box,2,0,0.0,0.0,0.0
106
+ Pick up dustpan :: dustpan | smartphone,1,0,0.0,0.0,0.0
107
+ Pick up items from the shopping bag :: cardboard boxes | red shopping bag | retail shelf,2,0,0.0,0.0,0.0
108
+ Pick up new cardboard piece :: cardboard piece | marker,2,0,0.0,0.0,0.0
109
+ Pick up packaged paper lantern component :: cardboard box | packaged paper lantern component,1,0,0.0,0.0,0.0
110
+ Pick up pen :: paper | pen | star beads,2,0,0.0,0.0,0.0
111
+ Pick up puzzle piece :: jigsaw puzzle | puzzle piece,3,0,0.0,0.0,0.0
112
+ Pick up small piece of material :: blue crafting material | hand,2,0,0.0,0.0,0.0
113
+ Pick up smartphone :: cans | shelf | smartphone,2,0,0.0,0.0,0.0
114
+ Pick up star bead :: cardboard squares | mobile phone | paper | pen | power bank | star beads,2,0,0.0,0.0,0.0
115
+ Pick up utility knife :: cardboard | utility knife,6,0,0.0,0.0,0.0
116
+ Picking up bottle :: bottle,1,0,0.0,0.0,0.0
117
+ Picking up crafting material :: beads | papers | pen | table,2,0,0.0,0.0,0.0
118
+ Place and count bead :: paper | pen | star beads,2,0,0.0,0.0,0.0
119
+ Place another canned food on shelf :: box | canned food,2,0,0.0,0.0,0.0
120
+ Place button :: buttons | hand | smartphone | table,1,0,0.0,0.0,0.0
121
+ Place button :: buttons | hand | table,2,0,0.0,0.0,0.0
122
+ Place can on shelf :: canned food | retail shelf,2,0,0.0,0.0,0.0
123
+ Place canned food on shelf :: can of vegetables | cardboard boxes | store shelf,1,0,0.0,0.0,0.0
124
+ Place cloth on floor :: cloth,1,0,0.0,0.0,0.0
125
+ Place hand on table :: craft beads | paper strips | smartphone,3,0,0.0,0.0,0.0
126
+ Place item on shelf :: stationery package,2,0,0.0,0.0,0.0
127
+ Place items on the shelf :: packaged items | retail shelf,2,0,0.0,0.0,0.0
128
+ Place lid back :: pot | pot lid,1,0,0.0,0.0,0.0
129
+ Place marked piece down :: cardboard piece | marker,2,0,0.0,0.0,0.0
130
+ Place material :: bead design | blue crafting material | hand,2,0,0.0,0.0,0.0
131
+ Place phone down :: beads | pencil holder | smartphone | yellow paper strip,2,0,0.0,0.0,0.0
132
+ Place piece into puzzle :: jigsaw puzzle | puzzle piece,3,0,0.0,0.0,0.0
133
+ Place puzzle piece :: puzzle board | puzzle piece,3,0,0.0,0.0,0.0
134
+ Place smartphone down :: power bank | quilling paper strips | small paper stars | smartphone,1,0,0.0,0.0,0.0
135
+ Place smartphone down :: quilling paper strips | small paper stars | smartphone,2,0,0.0,0.0,0.0
136
+ Place smartphone on stand :: saucepan | smartphone | smartphone stand | yellow jacket,1,0,0.0,0.0,0.0
137
+ Place towel :: pot | towel,1,0,0.0,0.0,0.0
138
+ Placing paper strip :: adhesive strip | paper cone | phone,2,0,0.0,0.0,0.0
139
+ Placing paper strip :: paper cone | phone | puzzle box | water bottle,2,0,0.0,0.0,0.0
140
+ Preparing to craft :: beads | chairs | table,2,0,0.0,0.0,0.0
141
+ Put down smartphone :: paper stars | paper strips | smartphone,2,0,0.0,0.0,0.0
142
+ Put down smartphone :: power bank | smartphone,1,0,0.0,0.0,0.0
143
+ Reach for another item :: item | shelf,1,0,0.0,0.0,0.0
144
+ Reach for cleaning supplies :: bowls | chopping board | cleaning fluid bottle,1,0,0.0,0.0,0.0
145
+ Reach for craft items :: smartphone | star-shaped craft items | table,3,0,0.0,0.0,0.0
146
+ Reach for next can :: can of vegetables | cardboard boxes | store shelf,1,0,0.0,0.0,0.0
147
+ Reach for next canned food :: box | canned food | retail shelf,1,0,0.0,0.0,0.0
148
+ Reach for next item :: packaged item | shelf,1,0,0.0,0.0,0.0
149
+ Reach for puzzle piece :: puzzle board | puzzle box | puzzle pieces,2,0,0.0,0.0,0.0
150
+ Reach for wire hangers :: cardboard box | wire hangers,1,0,0.0,0.0,0.0
151
+ Reach into box :: box of cans,1,0,0.0,0.0,0.0
152
+ Record count :: paper | pen | star-shaped beads,2,0,0.0,0.0,0.0
153
+ Release cardboard piece and gesture :: cardboard piece | cardboard piles | marker | pouch | ruler | scissors,2,0,0.0,0.0,0.0
154
+ Release hook :: display hook,1,0,0.0,0.0,0.0
155
+ Release lantern :: red paper lantern,1,0,0.0,0.0,0.0
156
+ Release paper strip :: craft beads | paper strip | scissors | smartphone,4,0,0.0,0.0,0.0
157
+ Release puzzle piece :: jigsaw puzzle | puzzle box | puzzle pieces,2,0,0.0,0.0,0.0
158
+ Release scissors :: craft paper strips | folded paper fan | scissors | smartphone,3,0,0.0,0.0,0.0
159
+ Release smartphone :: paper scraps | scissors | smartphone | table,3,0,0.0,0.0,0.0
160
+ Remove cleaning bottle :: cleaning bottle | plastic bowls,1,0,0.0,0.0,0.0
161
+ Remove paper lantern part from packaging :: paper lantern | red hand fan,1,0,0.0,0.0,0.0
162
+ Remove plastic packaging :: packaging | paper lantern component,1,0,0.0,0.0,0.0
163
+ Reposition hand :: cardboard | utility knife,3,0,0.0,0.0,0.0
164
+ Resume observation :: cardboard pieces | marker | pencil case | ruler | scissors,2,0,0.0,0.0,0.0
165
+ Retrieve canned food from box :: box | canned food,1,0,0.0,0.0,0.0
166
+ Retrieve next canned food item :: canned food | cardboard box,1,0,0.0,0.0,0.0
167
+ Retrieving more beads :: paper | pen | star beads,2,0,0.0,0.0,0.0
168
+ Rinse cloth in sink :: cloth | sink | water faucet,1,0,0.0,0.0,0.0
169
+ Scroll smartphone screen :: paper stars | paper strips | smartphone,3,0,0.0,0.0,0.0
170
+ Search for puzzle piece :: jigsaw puzzle | puzzle pieces,2,0,0.0,0.0,0.0
171
+ Secure paper edges with adhesive :: adhesive strip | paper cone pieces | puzzle box | smartphone | water bottle,4,0,0.0,0.0,0.0
172
+ Securing paper structure :: decorative stars | paper strips | smartphone | water bottle,4,0,0.0,0.0,0.0
173
+ Sort and adjust button line :: buttons | smartphone | table,3,0,0.0,0.0,0.0
174
+ Sort and arrange buttons :: buttons | smartphones | soda can | table,3,0,0.0,0.0,0.0
175
+ Sort and count beads :: paper | pen | star beads,2,0,0.0,0.0,0.0
176
+ Sort and place buttons :: buttons | coca-cola can | smartphone,3,0,0.0,0.0,0.0
177
+ Sort beads :: paper | pen | smartphone | star-shaped beads,2,0,0.0,0.0,0.0
178
+ Sort beads and write count :: paper | pen | power bank | smartphone | star-shaped beads,1,0,0.0,0.0,0.0
179
+ Sort button :: buttons | cell phone,3,0,0.0,0.0,0.0
180
+ Sort buttons :: buttons | table,3,0,0.0,0.0,0.0
181
+ Sort craft items :: star-shaped craft items | table,3,0,0.0,0.0,0.0
182
+ Sort puzzle pieces :: jigsaw puzzle pieces | table,2,0,0.0,0.0,0.0
183
+ Sort small craft pieces :: paper strips | scissors | smartphone | star-shaped craft pieces,3,0,0.0,0.0,0.0
184
+ Sort star-shaped beads :: marker | mobile phone | paper | power bank | star-shaped beads,1,0,0.0,0.0,0.0
185
+ Start cutting :: cardboard | utility knife,2,0,0.0,0.0,0.0
186
+ Stir contents :: cooking utensil | pot,1,0,0.0,0.0,0.0
187
+ Use phone :: beads | paper | power bank | smartphone,2,0,0.0,0.0,0.0
188
+ Use phone while crafting :: beads | pencil holder | smartphone | yellow paper strip,1,0,0.0,0.0,0.0
189
+ Use smartphone :: buttons | chair | smartphone | table,2,0,0.0,0.0,0.0
190
+ Use smartphone :: buttons | smartphone | table,1,0,0.0,0.0,0.0
191
+ Use smartphone :: charging cable | paper strips | power bank | smartphone | star-shaped paper crafts,2,0,0.0,0.0,0.0
192
+ Use smartphone :: containers | kettle | kitchen counter | rice cooker | smartphone,1,0,0.0,0.0,0.0
193
+ Walk towards other aisles :: retail store aisle,1,0,0.0,0.0,0.0
194
+ Walk towards shelves :: cardboard boxes | red bin | shelving unit,1,0,0.0,0.0,0.0
195
+ Walking across the room :: chair | fire extinguisher | office | table,1,0,0.0,0.0,0.0
196
+ Walking in the hallway :: hallway | people | tables | vr headsets,3,0,0.0,0.0,0.0
197
+ Walking towards door :: door | dustpan,1,0,0.0,0.0,0.0
198
+ Washing hands in sink :: faucet | sink,1,0,0.0,0.0,0.0
199
+ Wipe kitchen counter :: door | laundry basket | mop,1,0,0.0,0.0,0.0
200
+ Wiping countertop :: cleaning cloth | countertop,1,0,0.0,0.0,0.0
201
+ Write count on paper :: marker | mobile phone | paper | power bank | star-shaped beads,1,0,0.0,0.0,0.0
202
+ Write on paper :: cardboard square | paper | pen | star beads,1,0,0.0,0.0,0.0
203
+ sort craft materials :: paper pieces | scissors | smartphone,3,0,0.0,0.0,0.0
204
+ Pick up and place canned goods on shelf :: bin | canned goods | shelf,0,4,0.0,0.0,0.0
205
+ Browsing smartphone content :: smartphone,0,60,0.0,0.0,0.0
206
+ Place item on shelf :: retail item | shelf,0,8,0.0,0.0,0.0
207
+ Place item on shelf :: canned food | shelf,0,7,0.0,0.0,0.0
208
+ <missing_pred_relation>,0,226,0.0,0.0,0.0
209
+ Approach packing area :: items to pack | packing area,0,11,0.0,0.0,0.0
210
+ Approach restocking supplies :: aisle | restocking supplies | shelf,0,1,0.0,0.0,0.0
211
+ unknown :: unknown,0,7,0.0,0.0,0.0
212
+ Approach table :: materials | table | tools | work surface,0,1,0.0,0.0,0.0
213
+ Place item in container :: canned goods | plastic container | shopping bag,0,2,0.0,0.0,0.0
214
+ Pick up canned food :: canned food | shelf,0,2,0.0,0.0,0.0
215
+ Adjusting items on the shelf :: items | shelf,0,1,0.0,0.0,0.0
216
+ Wipe hands :: hands | sink | water bottle,0,1,0.0,0.0,0.0
217
+ Approach the stove :: cooking pot | kitchen counter | stove,0,1,0.0,0.0,0.0
218
+ Place item on shelf :: plush toy | shelf,0,1,0.0,0.0,0.0
219
+ Place item on shelf :: canned goods | shelf,0,1,0.0,0.0,0.0
220
+ Adjust cardboard :: cardboard pieces | workspace surface,0,1,0.0,0.0,0.0
221
+ Adjust cardboard :: cardboard | cutting mat | glue stick | marker | measuring tape | ruler | scissors | work table,0,1,0.0,0.0,0.0
222
+ Approach packing area :: cardboard pieces | marker | ruler | scissors | work table,0,1,0.0,0.0,0.0
223
+ Adjust cardboard :: cardboard piece | marker | ruler | workspace surface,0,2,0.0,0.0,0.0
224
+ Adjust cardboard position :: cardboard piece | marker | ruler | scissors | work surface,0,1,0.0,0.0,0.0
225
+ Adjust cardboard position :: cardboard piece | hands | work surface,0,1,0.0,0.0,0.0
226
+ Adjust cardboard :: cardboard piece | glue stick | marker | measuring tape | pencil | ruler | scissors | work table,0,1,0.0,0.0,0.0
227
+ Adjust cardboard :: cardboard | glue stick | marker | measuring tape | pencil | ruler | scissors | work table,0,1,0.0,0.0,0.0
228
+ Adjust cardboard position :: cardboard piece | scissors | work table,0,1,0.0,0.0,0.0
229
+ Adjust cardboard position :: cardboard piece | scissors | work surface,0,2,0.0,0.0,0.0
230
+ Cut cardboard with utility knife :: cardboard | ruler | table | utility knife,0,3,0.0,0.0,0.0
231
+ Cut cardboard :: cardboard | glue | marker | measuring tape | pen | ruler | scissors | work surface,0,1,0.0,0.0,0.0
232
+ Adjusting items on shelf :: containers | retail items | shelf,0,2,0.0,0.0,0.0
233
+ Approach packing area :: container | items to pack | packing area,0,1,0.0,0.0,0.0
234
+ Adjust cardboard divider :: cardboard divider | retail items | shelf,0,1,0.0,0.0,0.0
235
+ Adjusting items on shelf :: retail items | shelf,0,3,0.0,0.0,0.0
236
+ Approach packing area :: boxes | items to pack | packing area,0,2,0.0,0.0,0.0
237
+ Adjust grip :: canned food | plastic container | shelf,0,2,0.0,0.0,0.0
238
+ Adjust item on shelf :: retail items | shelf,0,1,0.0,0.0,0.0
239
+ Approach packing area :: cardboard pieces | packing area | storage bins,0,1,0.0,0.0,0.0
240
+ Adjust cardboard position :: cardboard pieces | scissors | table surface,0,1,0.0,0.0,0.0
241
+ Adjust cardboard divider :: cardboard divider | cardboard pieces | table surface,0,1,0.0,0.0,0.0
242
+ Adjust cardboard divider :: cardboard divider | cardboard pieces | scissors | table,0,1,0.0,0.0,0.0
243
+ Place canned food on shelf :: canned food | shelf,0,9,0.0,0.0,0.0
244
+ Pick up canned food :: bin | canned food | shelf,0,1,0.0,0.0,0.0
245
+ Pick up canned food :: box | canned food | shelf,0,1,0.0,0.0,0.0
246
+ Pick up canned food :: canned food | container | shelf,0,1,0.0,0.0,0.0
247
+ Adjust canned food on shelf :: canned food | shelf,0,1,0.0,0.0,0.0
248
+ Move along the shelf :: retail items | shelf,0,1,0.0,0.0,0.0
249
+ Picking up and placing canned goods :: canned goods | shelf,0,1,0.0,0.0,0.0
250
+ Move along the shelf :: canned goods | plastic container | shelf,0,1,0.0,0.0,0.0
251
+ Adjust item on shelf :: retail item | shelf,0,1,0.0,0.0,0.0
252
+ Browsing smartphone content :: retail items | shelf | smartphone,0,1,0.0,0.0,0.0
253
+ Manipulate paper strip :: paper strip | scissors | table,0,7,0.0,0.0,0.0
254
+ Manipulate paper strip :: marker | paper strip | pink cloth | ruler | scissors | smartphone | table | water bottle,0,1,0.0,0.0,0.0
255
+ Adjust grip :: hand | red container,0,1,0.0,0.0,0.0
256
+ Approach packing area :: boxes | packaging materials | red hat | table,0,1,0.0,0.0,0.0
257
+ Approach packing area :: boxes | packaging materials | red cap | table,0,1,0.0,0.0,0.0
258
+ Manipulate paper strip :: paper strip,0,3,0.0,0.0,0.0
259
+ Adjust lantern string :: handle | lantern | string,0,1,0.0,0.0,0.0
260
+ Adjust grip :: lid | red container,0,1,0.0,0.0,0.0
261
+ Approach packing area :: items to pack | packing area | packing materials,0,1,0.0,0.0,0.0
262
+ Adjust lantern string :: lantern handle | lantern string | paper lantern,0,2,0.0,0.0,0.0
263
+ Adjust grip :: red container | small white object,0,1,0.0,0.0,0.0
264
+ Adjust lantern string :: handle component | paper lantern | string,0,1,0.0,0.0,0.0
265
+ Approach table :: people | table,0,1,0.0,0.0,0.0
266
+ Approach table :: items on table | people | table,0,1,0.0,0.0,0.0
267
+ Adjust Mahjong tile :: mahjong tile | mahjong tile stack | table surface,0,2,0.0,0.0,0.0
268
+ Adjusting and placing down paper pieces :: hand | paper pieces | table,0,1,0.0,0.0,0.0
269
+ Approach table :: cardboard | chair | mahjong tiles | marker | paper | ruler | scissors | table,0,2,0.0,0.0,0.0
270
+ Approach table :: beads | cardboard | marker | paper | pen | ruler | scissors | table,0,1,0.0,0.0,0.0
271
+ Manipulate paper strip :: craft materials | hands | marker | paper strip | ruler | scissors | table | workspace,0,5,0.0,0.0,0.0
272
+ Manipulate paper strip :: craft materials | glue | hands | marker | paper strip | ruler | scissors | table,0,2,0.0,0.0,0.0
273
+ Adjust bead piles :: beads | table,0,3,0.0,0.0,0.0
274
+ Manipulate paper strip :: beads | glue | marker | paper strip | ruler | scissors | smartphone | table,0,1,0.0,0.0,0.0
275
+ Manipulate paper strip :: beads | mahjong tiles | marker | paper strip | ruler | scissors | smartphone | table,0,1,0.0,0.0,0.0
276
+ Manipulate colorful pieces :: buttons | table,0,4,0.0,0.0,0.0
277
+ Manipulate colorful pieces :: buttons,0,1,0.0,0.0,0.0
278
+ Adjust Mahjong tile :: mahjong tile | mahjong tile stack | table,0,1,0.0,0.0,0.0
279
+ Approach packing area :: cardboard box | items to pack | packing area,0,1,0.0,0.0,0.0
280
+ Interacting with colleagues :: person,0,2,0.0,0.0,0.0
281
+ Gathering items :: boxes | chairs | equipment | materials | people | supplies | tables | tools,0,1,0.0,0.0,0.0
282
+ Interacting with phone :: ceiling | chair | door | floor | person | smartphone | table | wall,0,2,0.0,0.0,0.0
283
+ Manipulate paper strip :: cardboard | craft materials | hands | marker | paper strip | ruler | scissors | table,0,1,0.0,0.0,0.0
284
+ Write on paper :: paper | pen | star beads,0,4,0.0,0.0,0.0
285
+ Manipulate paper strip :: chair | glue | marker | paper strip | person | ruler | scissors | table,0,1,0.0,0.0,0.0
286
+ Sort and arrange buttons :: buttons | table,0,3,0.0,0.0,0.0
results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json ADDED
The diff for this file is too large to render. See raw diff
 
results/omni_finetune/model_output_task_probes_20260616/summary.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "generated_at_utc": "2026-06-16T13:35:37+00:00",
3
+ "methods": {
4
+ "cosmos3_nano_future_window": {
5
+ "label": "Cosmos3-Nano Future Window",
6
+ "reason": "verified future-window predictions do not contain object-set fields",
7
+ "source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/future_predictions.jsonl",
8
+ "status": "unsupported_without_required_fields"
9
+ },
10
+ "cosmos3_super_reasoner": {
11
+ "action_object_relation_accuracy": 0.0,
12
+ "action_object_relation_macro_f1": 0.0,
13
+ "label": "Cosmos3-Super Reasoner",
14
+ "scored_rows": 446,
15
+ "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
16
+ "source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/predictions.jsonl",
17
+ "status": "scored",
18
+ "valid_pred_relation_rate": 0.49327354260089684
19
+ },
20
+ "qwen3_omni_v6_lora": {
21
+ "action_object_relation_accuracy": 0.000996512207274539,
22
+ "action_object_relation_macro_f1": 0.0002220083079671497,
23
+ "label": "Qwen3-Omni v6 LoRA",
24
+ "scored_rows": 4014,
25
+ "source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
26
+ "source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/predictions.jsonl",
27
+ "status": "scored",
28
+ "valid_pred_relation_rate": 0.9990034877927254
29
+ }
30
+ },
31
+ "scope": "Task-specific scoring from existing verified held-out model outputs. No new model inference, training, or target backfilling is performed.",
32
+ "scored_method_task_count_added": 2,
33
+ "status": "pass",
34
+ "task_count_added_to_matrix": 1,
35
+ "title": "Existing Model-Output Task Probes"
36
+ }