Add files using upload-large-folder tool
Browse files- docs/data/artifact_index.json +56 -34
- docs/data/episode128_task_model_radar.json +52 -52
- docs/data/mirror_parity.json +479 -206
- docs/data/public_surface_qa.json +7 -7
- docs/data/publication_audit.json +9 -9
- docs/data/quality_gates.json +1 -1
- docs/data/scope_claims_audit.json +1 -1
- docs/data/single_episode_task_model_radar.json +1 -1
- docs/data/source_alignment_audit.json +1 -1
- docs/data/task_method_20_result_matrix.json +36 -36
- docs/data/task_surface_integrity.json +1 -1
- docs/data/unified_task_model_radar.json +60 -60
- results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md +13 -0
- results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json +320 -0
- results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv +286 -0
- results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv +0 -0
- results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json +0 -0
- results/omni_finetune/model_output_task_probes_20260616/summary.json +36 -0
docs/data/artifact_index.json
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Artifact Index",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"status": "pass",
|
| 5 |
-
"artifact_count":
|
| 6 |
"missing": [],
|
| 7 |
"by_kind": {
|
| 8 |
"project_path": 14,
|
| 9 |
"scaleup_contract": 7,
|
| 10 |
-
"scaleup_status":
|
| 11 |
"publication_workflow": 6,
|
| 12 |
"reproducibility": 4,
|
| 13 |
"project_scope": 1,
|
|
@@ -16,7 +16,7 @@
|
|
| 16 |
"website_data": 10,
|
| 17 |
"generated_figure": 7,
|
| 18 |
"visualization_builder": 1,
|
| 19 |
-
"model_result":
|
| 20 |
"result_interpretation": 5,
|
| 21 |
"metrics_source": 27,
|
| 22 |
"visual_evidence": 7,
|
|
@@ -465,7 +465,7 @@
|
|
| 465 |
"shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
|
| 466 |
"exists": true,
|
| 467 |
"bytes": 4432,
|
| 468 |
-
"sha256": "
|
| 469 |
},
|
| 470 |
{
|
| 471 |
"id": "source_alignment_validator",
|
|
@@ -585,8 +585,8 @@
|
|
| 585 |
"surface": "website_hf",
|
| 586 |
"shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3/Cosmos overlay mappings, branch-card caveats, and explicit scoreless status records.",
|
| 587 |
"exists": true,
|
| 588 |
-
"bytes":
|
| 589 |
-
"sha256": "
|
| 590 |
},
|
| 591 |
{
|
| 592 |
"id": "single_episode_task_model_radar_json",
|
|
@@ -597,7 +597,7 @@
|
|
| 597 |
"shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
|
| 598 |
"exists": true,
|
| 599 |
"bytes": 50973,
|
| 600 |
-
"sha256": "
|
| 601 |
},
|
| 602 |
{
|
| 603 |
"id": "episode128_task_model_radar_json",
|
|
@@ -607,8 +607,8 @@
|
|
| 607 |
"surface": "website_hf",
|
| 608 |
"shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines and verified Qwen3/Cosmos branches, preserving explicit scoreless cells.",
|
| 609 |
"exists": true,
|
| 610 |
-
"bytes":
|
| 611 |
-
"sha256": "
|
| 612 |
},
|
| 613 |
{
|
| 614 |
"id": "task_method_20_result_matrix_json",
|
|
@@ -618,8 +618,8 @@
|
|
| 618 |
"surface": "website_hf",
|
| 619 |
"shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and scoreless cells carry unsupported/not-evaluated reasons.",
|
| 620 |
"exists": true,
|
| 621 |
-
"bytes":
|
| 622 |
-
"sha256": "
|
| 623 |
},
|
| 624 |
{
|
| 625 |
"id": "task_method_20_result_matrix",
|
|
@@ -629,8 +629,8 @@
|
|
| 629 |
"surface": "repo_hf",
|
| 630 |
"shows": "Reader-facing table that separates 20 records per method from numeric scored axes, documented raw128 proxy scores, unsupported metadata targets, and model targets not evaluated in verified packages.",
|
| 631 |
"exists": true,
|
| 632 |
-
"bytes":
|
| 633 |
-
"sha256": "
|
| 634 |
},
|
| 635 |
{
|
| 636 |
"id": "task_method_20_gap_audit_json",
|
|
@@ -638,10 +638,10 @@
|
|
| 638 |
"path": "docs/data/task_method_20_gap_audit.json",
|
| 639 |
"kind": "website_data",
|
| 640 |
"surface": "website_hf",
|
| 641 |
-
"shows": "Machine-readable 180-record gap ledger with
|
| 642 |
"exists": true,
|
| 643 |
-
"bytes":
|
| 644 |
-
"sha256": "
|
| 645 |
},
|
| 646 |
{
|
| 647 |
"id": "task_method_20_gap_audit",
|
|
@@ -651,8 +651,8 @@
|
|
| 651 |
"surface": "repo_hf",
|
| 652 |
"shows": "Reader-facing ledger that lists every scoreless method-task cell and the concrete target or model-output evidence required before it can become numeric.",
|
| 653 |
"exists": true,
|
| 654 |
-
"bytes":
|
| 655 |
-
"sha256": "
|
| 656 |
},
|
| 657 |
{
|
| 658 |
"id": "unified_task_model_radar_chart",
|
|
@@ -662,8 +662,8 @@
|
|
| 662 |
"surface": "website_hf",
|
| 663 |
"shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3/Cosmos task-aligned model overlays.",
|
| 664 |
"exists": true,
|
| 665 |
-
"bytes":
|
| 666 |
-
"sha256": "
|
| 667 |
},
|
| 668 |
{
|
| 669 |
"id": "single_episode_task_model_radar_chart",
|
|
@@ -684,8 +684,8 @@
|
|
| 684 |
"surface": "website_hf",
|
| 685 |
"shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons and metadata/Qwen/Cosmos as task-aligned overlays.",
|
| 686 |
"exists": true,
|
| 687 |
-
"bytes":
|
| 688 |
-
"sha256": "
|
| 689 |
},
|
| 690 |
{
|
| 691 |
"id": "unified_task_model_radar_builder",
|
|
@@ -695,8 +695,8 @@
|
|
| 695 |
"surface": "repo_hf",
|
| 696 |
"shows": "Regenerates the direction-aware radar chart and machine-readable metric overlay JSON.",
|
| 697 |
"exists": true,
|
| 698 |
-
"bytes":
|
| 699 |
-
"sha256": "
|
| 700 |
},
|
| 701 |
{
|
| 702 |
"id": "task_method_20_gap_audit_builder",
|
|
@@ -706,8 +706,8 @@
|
|
| 706 |
"surface": "repo_hf",
|
| 707 |
"shows": "Regenerates the public gap audit from the 9-method by 20-task matrix without inventing scores for unsupported or unevaluated cells.",
|
| 708 |
"exists": true,
|
| 709 |
-
"bytes":
|
| 710 |
-
"sha256": "
|
| 711 |
},
|
| 712 |
{
|
| 713 |
"id": "all_task_model_scoring_waiter",
|
|
@@ -742,6 +742,28 @@
|
|
| 742 |
"bytes": 9133,
|
| 743 |
"sha256": "3a867d0333fe591999715158e311011db25da018ca39c9b4638930841f35efb8"
|
| 744 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 745 |
{
|
| 746 |
"id": "a100_128_metadata_task_baselines",
|
| 747 |
"title": "128-episode metadata task baselines",
|
|
@@ -949,7 +971,7 @@
|
|
| 949 |
"shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
|
| 950 |
"exists": true,
|
| 951 |
"bytes": 8100,
|
| 952 |
-
"sha256": "
|
| 953 |
},
|
| 954 |
{
|
| 955 |
"id": "public_surface_qa",
|
|
@@ -971,7 +993,7 @@
|
|
| 971 |
"volatile": true,
|
| 972 |
"shows": "Machine-readable report for SEO/social metadata, accessible tab semantics, public links, project links, and clear project presentation.",
|
| 973 |
"exists": true,
|
| 974 |
-
"bytes":
|
| 975 |
"hash_policy": "existence_and_size_only"
|
| 976 |
},
|
| 977 |
{
|
|
@@ -1096,8 +1118,8 @@
|
|
| 1096 |
"surface": "repo_hf",
|
| 1097 |
"shows": "Generates the selective artifact catalog from local files.",
|
| 1098 |
"exists": true,
|
| 1099 |
-
"bytes":
|
| 1100 |
-
"sha256": "
|
| 1101 |
},
|
| 1102 |
{
|
| 1103 |
"id": "publication_audit",
|
|
@@ -1108,7 +1130,7 @@
|
|
| 1108 |
"volatile": true,
|
| 1109 |
"shows": "Confirms public bundles exclude raw data, caches, heavy archives, and credential text.",
|
| 1110 |
"exists": true,
|
| 1111 |
-
"bytes":
|
| 1112 |
"hash_policy": "existence_and_size_only"
|
| 1113 |
},
|
| 1114 |
{
|
|
@@ -1132,7 +1154,7 @@
|
|
| 1132 |
"volatile": true,
|
| 1133 |
"shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
|
| 1134 |
"exists": true,
|
| 1135 |
-
"bytes":
|
| 1136 |
"hash_policy": "existence_and_size_only"
|
| 1137 |
},
|
| 1138 |
{
|
|
@@ -1144,7 +1166,7 @@
|
|
| 1144 |
"volatile": true,
|
| 1145 |
"shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
|
| 1146 |
"exists": true,
|
| 1147 |
-
"bytes":
|
| 1148 |
"hash_policy": "existence_and_size_only"
|
| 1149 |
},
|
| 1150 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Artifact Index",
|
| 3 |
+
"generated_at_utc": "2026-06-16T13:35:38+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
+
"artifact_count": 191,
|
| 6 |
"missing": [],
|
| 7 |
"by_kind": {
|
| 8 |
"project_path": 14,
|
| 9 |
"scaleup_contract": 7,
|
| 10 |
+
"scaleup_status": 42,
|
| 11 |
"publication_workflow": 6,
|
| 12 |
"reproducibility": 4,
|
| 13 |
"project_scope": 1,
|
|
|
|
| 16 |
"website_data": 10,
|
| 17 |
"generated_figure": 7,
|
| 18 |
"visualization_builder": 1,
|
| 19 |
+
"model_result": 3,
|
| 20 |
"result_interpretation": 5,
|
| 21 |
"metrics_source": 27,
|
| 22 |
"visual_evidence": 7,
|
|
|
|
| 465 |
"shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
|
| 466 |
"exists": true,
|
| 467 |
"bytes": 4432,
|
| 468 |
+
"sha256": "4c6e5505884c13ad9d60195e977d1554adafe2a83d07307644532d77f36850bb"
|
| 469 |
},
|
| 470 |
{
|
| 471 |
"id": "source_alignment_validator",
|
|
|
|
| 585 |
"surface": "website_hf",
|
| 586 |
"shows": "Stores normalized 20-axis radar values, raw task metrics, Qwen3/Cosmos overlay mappings, branch-card caveats, and explicit scoreless status records.",
|
| 587 |
"exists": true,
|
| 588 |
+
"bytes": 231251,
|
| 589 |
+
"sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
|
| 590 |
},
|
| 591 |
{
|
| 592 |
"id": "single_episode_task_model_radar_json",
|
|
|
|
| 597 |
"shows": "Machine-readable split radar for the one-episode Minimal and Neural MLP baselines, both scored on all 20 task contracts.",
|
| 598 |
"exists": true,
|
| 599 |
"bytes": 50973,
|
| 600 |
+
"sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
|
| 601 |
},
|
| 602 |
{
|
| 603 |
"id": "episode128_task_model_radar_json",
|
|
|
|
| 607 |
"surface": "website_hf",
|
| 608 |
"shows": "Machine-readable split radar for selected 128-episode metadata/raw baselines and verified Qwen3/Cosmos branches, preserving explicit scoreless cells.",
|
| 609 |
"exists": true,
|
| 610 |
+
"bytes": 187400,
|
| 611 |
+
"sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
|
| 612 |
},
|
| 613 |
{
|
| 614 |
"id": "task_method_20_result_matrix_json",
|
|
|
|
| 618 |
"surface": "website_hf",
|
| 619 |
"shows": "Machine-readable 9-method by 20-task matrix where every method has 20 records and scoreless cells carry unsupported/not-evaluated reasons.",
|
| 620 |
"exists": true,
|
| 621 |
+
"bytes": 129740,
|
| 622 |
+
"sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
|
| 623 |
},
|
| 624 |
{
|
| 625 |
"id": "task_method_20_result_matrix",
|
|
|
|
| 629 |
"surface": "repo_hf",
|
| 630 |
"shows": "Reader-facing table that separates 20 records per method from numeric scored axes, documented raw128 proxy scores, unsupported metadata targets, and model targets not evaluated in verified packages.",
|
| 631 |
"exists": true,
|
| 632 |
+
"bytes": 4208,
|
| 633 |
+
"sha256": "bfe01b258fe121448d2b5b188c6995041c81d7ce1286dcd2ee0bf9c01d6a6193"
|
| 634 |
},
|
| 635 |
{
|
| 636 |
"id": "task_method_20_gap_audit_json",
|
|
|
|
| 638 |
"path": "docs/data/task_method_20_gap_audit.json",
|
| 639 |
"kind": "website_data",
|
| 640 |
"surface": "website_hf",
|
| 641 |
+
"shows": "Machine-readable 180-record gap ledger with numeric scores, scoreless cells, explicit status reasons, and next evidence needed before new scores can be published.",
|
| 642 |
"exists": true,
|
| 643 |
+
"bytes": 57943,
|
| 644 |
+
"sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
|
| 645 |
},
|
| 646 |
{
|
| 647 |
"id": "task_method_20_gap_audit",
|
|
|
|
| 651 |
"surface": "repo_hf",
|
| 652 |
"shows": "Reader-facing ledger that lists every scoreless method-task cell and the concrete target or model-output evidence required before it can become numeric.",
|
| 653 |
"exists": true,
|
| 654 |
+
"bytes": 16234,
|
| 655 |
+
"sha256": "351e55637374770221115486fd6ef46550ae923c4fae5c01d3bb1e777b5a44fb"
|
| 656 |
},
|
| 657 |
{
|
| 658 |
"id": "unified_task_model_radar_chart",
|
|
|
|
| 662 |
"surface": "website_hf",
|
| 663 |
"shows": "Compares minimal and neural MLP baselines across all 20 tasks, with Qwen3/Cosmos task-aligned model overlays.",
|
| 664 |
"exists": true,
|
| 665 |
+
"bytes": 49695,
|
| 666 |
+
"sha256": "2ad63af1cd1e3e6f9c354b20792c20683e8877e2f92d9da5b5284dce56e668b7"
|
| 667 |
},
|
| 668 |
{
|
| 669 |
"id": "single_episode_task_model_radar_chart",
|
|
|
|
| 684 |
"surface": "website_hf",
|
| 685 |
"shows": "Separates the selected 128-episode methods: raw-feature simple/NN as complete 20/20 scored polygons and metadata/Qwen/Cosmos as task-aligned overlays.",
|
| 686 |
"exists": true,
|
| 687 |
+
"bytes": 43679,
|
| 688 |
+
"sha256": "51991fd33719be32da422f559b7d27895fc96b1332b0952f0c19b573d58c1325"
|
| 689 |
},
|
| 690 |
{
|
| 691 |
"id": "unified_task_model_radar_builder",
|
|
|
|
| 695 |
"surface": "repo_hf",
|
| 696 |
"shows": "Regenerates the direction-aware radar chart and machine-readable metric overlay JSON.",
|
| 697 |
"exists": true,
|
| 698 |
+
"bytes": 48861,
|
| 699 |
+
"sha256": "621ff19f057e6ed279388c8e944f2e4a2cfc87b9be1bd9f1feee6d329f0ec431"
|
| 700 |
},
|
| 701 |
{
|
| 702 |
"id": "task_method_20_gap_audit_builder",
|
|
|
|
| 706 |
"surface": "repo_hf",
|
| 707 |
"shows": "Regenerates the public gap audit from the 9-method by 20-task matrix without inventing scores for unsupported or unevaluated cells.",
|
| 708 |
"exists": true,
|
| 709 |
+
"bytes": 10094,
|
| 710 |
+
"sha256": "4ba5a43fa045b02303ba16a146ab15a891155370a85dd7c09cbe68f8f0eac7ed"
|
| 711 |
},
|
| 712 |
{
|
| 713 |
"id": "all_task_model_scoring_waiter",
|
|
|
|
| 742 |
"bytes": 9133,
|
| 743 |
"sha256": "3a867d0333fe591999715158e311011db25da018ca39c9b4638930841f35efb8"
|
| 744 |
},
|
| 745 |
+
{
|
| 746 |
+
"id": "existing_model_output_task_probe",
|
| 747 |
+
"title": "Existing model-output task probe package",
|
| 748 |
+
"path": "results/omni_finetune/model_output_task_probes_20260616/summary.json",
|
| 749 |
+
"kind": "model_result",
|
| 750 |
+
"surface": "repo_hf",
|
| 751 |
+
"shows": "Scores task 16 action-object relation only where verified held-out prediction JSON already contains action and object-set fields.",
|
| 752 |
+
"exists": true,
|
| 753 |
+
"bytes": 2000,
|
| 754 |
+
"sha256": "de95421d93fec5d12049e492db9fbe7c10af5c2cd0d1039da82c05ec82bf98c5"
|
| 755 |
+
},
|
| 756 |
+
{
|
| 757 |
+
"id": "existing_model_output_task_probe_script",
|
| 758 |
+
"title": "Existing model-output task probe scorer",
|
| 759 |
+
"path": "scripts/omni/score_existing_model_output_task_probes.py",
|
| 760 |
+
"kind": "scaleup_status",
|
| 761 |
+
"surface": "repo_hf",
|
| 762 |
+
"shows": "Derives task-specific scores from committed verified model outputs without running new inference or backfilling absent targets.",
|
| 763 |
+
"exists": true,
|
| 764 |
+
"bytes": 13291,
|
| 765 |
+
"sha256": "25b8e7ba394c1e604fe8d7c900a013b3ea330f32b393cdcd416b2d0f3a2727b8"
|
| 766 |
+
},
|
| 767 |
{
|
| 768 |
"id": "a100_128_metadata_task_baselines",
|
| 769 |
"title": "128-episode metadata task baselines",
|
|
|
|
| 971 |
"shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
|
| 972 |
"exists": true,
|
| 973 |
"bytes": 8100,
|
| 974 |
+
"sha256": "b9f9236ae07336c8984a65bcda558d49453c2e57f6c903a44235bec4fd0d8df0"
|
| 975 |
},
|
| 976 |
{
|
| 977 |
"id": "public_surface_qa",
|
|
|
|
| 993 |
"volatile": true,
|
| 994 |
"shows": "Machine-readable report for SEO/social metadata, accessible tab semantics, public links, project links, and clear project presentation.",
|
| 995 |
"exists": true,
|
| 996 |
+
"bytes": 6146,
|
| 997 |
"hash_policy": "existence_and_size_only"
|
| 998 |
},
|
| 999 |
{
|
|
|
|
| 1118 |
"surface": "repo_hf",
|
| 1119 |
"shows": "Generates the selective artifact catalog from local files.",
|
| 1120 |
"exists": true,
|
| 1121 |
+
"bytes": 54683,
|
| 1122 |
+
"sha256": "0a9e7c7391f0e2822d8490ddd216f155e1ce3a50d2ba3c90476ee09685f74d65"
|
| 1123 |
},
|
| 1124 |
{
|
| 1125 |
"id": "publication_audit",
|
|
|
|
| 1130 |
"volatile": true,
|
| 1131 |
"shows": "Confirms public bundles exclude raw data, caches, heavy archives, and credential text.",
|
| 1132 |
"exists": true,
|
| 1133 |
+
"bytes": 8298,
|
| 1134 |
"hash_policy": "existence_and_size_only"
|
| 1135 |
},
|
| 1136 |
{
|
|
|
|
| 1154 |
"volatile": true,
|
| 1155 |
"shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
|
| 1156 |
"exists": true,
|
| 1157 |
+
"bytes": 868302,
|
| 1158 |
"hash_policy": "existence_and_size_only"
|
| 1159 |
},
|
| 1160 |
{
|
|
|
|
| 1166 |
"volatile": true,
|
| 1167 |
"shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
|
| 1168 |
"exists": true,
|
| 1169 |
+
"bytes": 18933,
|
| 1170 |
"hash_policy": "existence_and_size_only"
|
| 1171 |
},
|
| 1172 |
{
|
docs/data/episode128_task_model_radar.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
{
|
| 2 |
"title": "128-Episode 20-Task Radar",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"description": "Selected 128-episode metadata/raw baselines plus verified Qwen3/Cosmos branches. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
|
| 6 |
"task_count": 20,
|
| 7 |
"method_count": 7,
|
| 8 |
"method_task_record_count": 140,
|
| 9 |
-
"scored_method_task_count":
|
| 10 |
"normalization_policy": {
|
| 11 |
"higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
|
| 12 |
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
|
|
@@ -124,20 +124,20 @@
|
|
| 124 |
"kind": "partial_128_episode_foundation_model_overlay",
|
| 125 |
"scope": "128 selected episodes, held-out test",
|
| 126 |
"stroke_dasharray": "7 7",
|
| 127 |
-
"method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics
|
| 128 |
"plotted_as": "colored point overlay",
|
| 129 |
"result_record_count": 20,
|
| 130 |
-
"scored_task_count":
|
| 131 |
-
"covered_task_count":
|
| 132 |
"proxy_scored_task_count": 0,
|
| 133 |
-
"scoreless_task_count":
|
| 134 |
"unsupported_task_count": 0,
|
| 135 |
-
"not_evaluated_task_count":
|
| 136 |
"status_counts": {
|
| 137 |
-
"not_evaluated_in_verified_package":
|
| 138 |
-
"scored":
|
| 139 |
},
|
| 140 |
-
"coverage_fraction": 0.
|
| 141 |
"result_record_fraction": 1.0
|
| 142 |
},
|
| 143 |
{
|
|
@@ -148,20 +148,20 @@
|
|
| 148 |
"kind": "partial_128_episode_foundation_model_overlay",
|
| 149 |
"scope": "128 selected episodes, held-out test",
|
| 150 |
"stroke_dasharray": "4 7",
|
| 151 |
-
"method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation.",
|
| 152 |
"plotted_as": "colored point overlay",
|
| 153 |
"result_record_count": 20,
|
| 154 |
-
"scored_task_count":
|
| 155 |
-
"covered_task_count":
|
| 156 |
"proxy_scored_task_count": 0,
|
| 157 |
-
"scoreless_task_count":
|
| 158 |
"unsupported_task_count": 0,
|
| 159 |
-
"not_evaluated_task_count":
|
| 160 |
"status_counts": {
|
| 161 |
-
"not_evaluated_in_verified_package":
|
| 162 |
-
"scored":
|
| 163 |
},
|
| 164 |
-
"coverage_fraction": 0.
|
| 165 |
"result_record_fraction": 1.0
|
| 166 |
},
|
| 167 |
{
|
|
@@ -1612,26 +1612,26 @@
|
|
| 1612 |
"status_label": "scored"
|
| 1613 |
},
|
| 1614 |
"qwen3_omni_v6_lora": {
|
| 1615 |
-
"raw":
|
| 1616 |
-
"metric_key": "
|
| 1617 |
-
"source":
|
| 1618 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 1619 |
-
"status": "
|
| 1620 |
-
"reason":
|
| 1621 |
-
"normalized_score":
|
| 1622 |
-
"raw_text": "
|
| 1623 |
-
"status_label": "
|
| 1624 |
},
|
| 1625 |
"cosmos3_super_reasoner": {
|
| 1626 |
-
"raw":
|
| 1627 |
-
"metric_key": "
|
| 1628 |
-
"source":
|
| 1629 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 1630 |
-
"status": "
|
| 1631 |
-
"reason":
|
| 1632 |
-
"normalized_score":
|
| 1633 |
-
"raw_text": "
|
| 1634 |
-
"status_label": "
|
| 1635 |
},
|
| 1636 |
"cosmos3_nano_future_window": {
|
| 1637 |
"raw": null,
|
|
@@ -3980,17 +3980,17 @@
|
|
| 3980 |
"task_label": "Action-Object Relation Prediction",
|
| 3981 |
"series_id": "qwen3_omni_v6_lora",
|
| 3982 |
"method": "Qwen3-Omni v6 LoRA",
|
| 3983 |
-
"status": "
|
| 3984 |
-
"status_label": "
|
| 3985 |
-
"scored":
|
| 3986 |
"proxy_scored": false,
|
| 3987 |
-
"raw":
|
| 3988 |
-
"raw_text": "
|
| 3989 |
-
"normalized_score":
|
| 3990 |
-
"metric_key": "
|
| 3991 |
-
"source":
|
| 3992 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 3993 |
-
"reason":
|
| 3994 |
},
|
| 3995 |
{
|
| 3996 |
"task_number": 16,
|
|
@@ -3998,17 +3998,17 @@
|
|
| 3998 |
"task_label": "Action-Object Relation Prediction",
|
| 3999 |
"series_id": "cosmos3_super_reasoner",
|
| 4000 |
"method": "Cosmos3-Super Reasoner",
|
| 4001 |
-
"status": "
|
| 4002 |
-
"status_label": "
|
| 4003 |
-
"scored":
|
| 4004 |
"proxy_scored": false,
|
| 4005 |
-
"raw":
|
| 4006 |
-
"raw_text": "
|
| 4007 |
-
"normalized_score":
|
| 4008 |
-
"metric_key": "
|
| 4009 |
-
"source":
|
| 4010 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 4011 |
-
"reason":
|
| 4012 |
},
|
| 4013 |
{
|
| 4014 |
"task_number": 16,
|
|
|
|
| 1 |
{
|
| 2 |
"title": "128-Episode 20-Task Radar",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-16T13:35:38+00:00",
|
| 5 |
"description": "Selected 128-episode metadata/raw baselines plus verified Qwen3/Cosmos branches. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
|
| 6 |
"task_count": 20,
|
| 7 |
"method_count": 7,
|
| 8 |
"method_task_record_count": 140,
|
| 9 |
+
"scored_method_task_count": 73,
|
| 10 |
"normalization_policy": {
|
| 11 |
"higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
|
| 12 |
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
|
|
|
|
| 124 |
"kind": "partial_128_episode_foundation_model_overlay",
|
| 125 |
"scope": "128 selected episodes, held-out test",
|
| 126 |
"stroke_dasharray": "7 7",
|
| 127 |
+
"method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics, plus task 16 scored from existing verified action/object JSON.",
|
| 128 |
"plotted_as": "colored point overlay",
|
| 129 |
"result_record_count": 20,
|
| 130 |
+
"scored_task_count": 7,
|
| 131 |
+
"covered_task_count": 7,
|
| 132 |
"proxy_scored_task_count": 0,
|
| 133 |
+
"scoreless_task_count": 13,
|
| 134 |
"unsupported_task_count": 0,
|
| 135 |
+
"not_evaluated_task_count": 13,
|
| 136 |
"status_counts": {
|
| 137 |
+
"not_evaluated_in_verified_package": 13,
|
| 138 |
+
"scored": 7
|
| 139 |
},
|
| 140 |
+
"coverage_fraction": 0.35,
|
| 141 |
"result_record_fraction": 1.0
|
| 142 |
},
|
| 143 |
{
|
|
|
|
| 148 |
"kind": "partial_128_episode_foundation_model_overlay",
|
| 149 |
"scope": "128 selected episodes, held-out test",
|
| 150 |
"stroke_dasharray": "4 7",
|
| 151 |
+
"method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 16 scored from existing verified action/object JSON.",
|
| 152 |
"plotted_as": "colored point overlay",
|
| 153 |
"result_record_count": 20,
|
| 154 |
+
"scored_task_count": 7,
|
| 155 |
+
"covered_task_count": 7,
|
| 156 |
"proxy_scored_task_count": 0,
|
| 157 |
+
"scoreless_task_count": 13,
|
| 158 |
"unsupported_task_count": 0,
|
| 159 |
+
"not_evaluated_task_count": 13,
|
| 160 |
"status_counts": {
|
| 161 |
+
"not_evaluated_in_verified_package": 13,
|
| 162 |
+
"scored": 7
|
| 163 |
},
|
| 164 |
+
"coverage_fraction": 0.35,
|
| 165 |
"result_record_fraction": 1.0
|
| 166 |
},
|
| 167 |
{
|
|
|
|
| 1612 |
"status_label": "scored"
|
| 1613 |
},
|
| 1614 |
"qwen3_omni_v6_lora": {
|
| 1615 |
+
"raw": 0.0002220083079671497,
|
| 1616 |
+
"metric_key": "action_object_relation_macro_f1",
|
| 1617 |
+
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
|
| 1618 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 1619 |
+
"status": "scored",
|
| 1620 |
+
"reason": null,
|
| 1621 |
+
"normalized_score": 0.0002220083079671497,
|
| 1622 |
+
"raw_text": "0.0002",
|
| 1623 |
+
"status_label": "scored"
|
| 1624 |
},
|
| 1625 |
"cosmos3_super_reasoner": {
|
| 1626 |
+
"raw": 0.0,
|
| 1627 |
+
"metric_key": "action_object_relation_macro_f1",
|
| 1628 |
+
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
|
| 1629 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 1630 |
+
"status": "scored",
|
| 1631 |
+
"reason": null,
|
| 1632 |
+
"normalized_score": 0.0,
|
| 1633 |
+
"raw_text": "0.0000",
|
| 1634 |
+
"status_label": "scored"
|
| 1635 |
},
|
| 1636 |
"cosmos3_nano_future_window": {
|
| 1637 |
"raw": null,
|
|
|
|
| 3980 |
"task_label": "Action-Object Relation Prediction",
|
| 3981 |
"series_id": "qwen3_omni_v6_lora",
|
| 3982 |
"method": "Qwen3-Omni v6 LoRA",
|
| 3983 |
+
"status": "scored",
|
| 3984 |
+
"status_label": "scored",
|
| 3985 |
+
"scored": true,
|
| 3986 |
"proxy_scored": false,
|
| 3987 |
+
"raw": 0.0002220083079671497,
|
| 3988 |
+
"raw_text": "0.0002",
|
| 3989 |
+
"normalized_score": 0.0002220083079671497,
|
| 3990 |
+
"metric_key": "action_object_relation_macro_f1",
|
| 3991 |
+
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
|
| 3992 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 3993 |
+
"reason": null
|
| 3994 |
},
|
| 3995 |
{
|
| 3996 |
"task_number": 16,
|
|
|
|
| 3998 |
"task_label": "Action-Object Relation Prediction",
|
| 3999 |
"series_id": "cosmos3_super_reasoner",
|
| 4000 |
"method": "Cosmos3-Super Reasoner",
|
| 4001 |
+
"status": "scored",
|
| 4002 |
+
"status_label": "scored",
|
| 4003 |
+
"scored": true,
|
| 4004 |
"proxy_scored": false,
|
| 4005 |
+
"raw": 0.0,
|
| 4006 |
+
"raw_text": "0.0000",
|
| 4007 |
+
"normalized_score": 0.0,
|
| 4008 |
+
"metric_key": "action_object_relation_macro_f1",
|
| 4009 |
+
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
|
| 4010 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 4011 |
+
"reason": null
|
| 4012 |
},
|
| 4013 |
{
|
| 4014 |
"task_number": 16,
|
docs/data/mirror_parity.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"hf_root": "hf_publish",
|
| 5 |
"summary": {
|
| 6 |
-
"group_count":
|
| 7 |
"failure_count": 0,
|
| 8 |
"failures_by_surface": {}
|
| 9 |
},
|
|
@@ -138,45 +138,45 @@
|
|
| 138 |
"local": {
|
| 139 |
"path": "repo:docs/data/artifact_index.json",
|
| 140 |
"exists": true,
|
| 141 |
-
"bytes":
|
| 142 |
-
"sha256": "
|
| 143 |
},
|
| 144 |
"mirrors": {
|
| 145 |
"hf_space": {
|
| 146 |
"path": "hf_space:data/artifact_index.json",
|
| 147 |
"exists": true,
|
| 148 |
-
"bytes":
|
| 149 |
-
"sha256": "
|
| 150 |
},
|
| 151 |
"hf_artifacts_data": {
|
| 152 |
"path": "hf_artifacts:data/artifact_index.json",
|
| 153 |
"exists": true,
|
| 154 |
-
"bytes":
|
| 155 |
-
"sha256": "
|
| 156 |
},
|
| 157 |
"hf_artifacts": {
|
| 158 |
"path": "hf_artifacts:docs/data/artifact_index.json",
|
| 159 |
"exists": true,
|
| 160 |
-
"bytes":
|
| 161 |
-
"sha256": "
|
| 162 |
},
|
| 163 |
"hf_model_data": {
|
| 164 |
"path": "hf_model:data/artifact_index.json",
|
| 165 |
"exists": true,
|
| 166 |
-
"bytes":
|
| 167 |
-
"sha256": "
|
| 168 |
},
|
| 169 |
"hf_model_docs_data": {
|
| 170 |
"path": "hf_model:docs/data/artifact_index.json",
|
| 171 |
"exists": true,
|
| 172 |
-
"bytes":
|
| 173 |
-
"sha256": "
|
| 174 |
},
|
| 175 |
"hf_model": {
|
| 176 |
"path": "hf_model:metrics/artifact_index.json",
|
| 177 |
"exists": true,
|
| 178 |
-
"bytes":
|
| 179 |
-
"sha256": "
|
| 180 |
}
|
| 181 |
},
|
| 182 |
"failures": []
|
|
@@ -825,44 +825,44 @@
|
|
| 825 |
"path": "repo:docs/data/publication_audit.json",
|
| 826 |
"exists": true,
|
| 827 |
"bytes": 8298,
|
| 828 |
-
"sha256": "
|
| 829 |
},
|
| 830 |
"mirrors": {
|
| 831 |
"hf_space": {
|
| 832 |
"path": "hf_space:data/publication_audit.json",
|
| 833 |
"exists": true,
|
| 834 |
"bytes": 8298,
|
| 835 |
-
"sha256": "
|
| 836 |
},
|
| 837 |
"hf_artifacts_data": {
|
| 838 |
"path": "hf_artifacts:data/publication_audit.json",
|
| 839 |
"exists": true,
|
| 840 |
"bytes": 8298,
|
| 841 |
-
"sha256": "
|
| 842 |
},
|
| 843 |
"hf_artifacts": {
|
| 844 |
"path": "hf_artifacts:docs/data/publication_audit.json",
|
| 845 |
"exists": true,
|
| 846 |
"bytes": 8298,
|
| 847 |
-
"sha256": "
|
| 848 |
},
|
| 849 |
"hf_model_data": {
|
| 850 |
"path": "hf_model:data/publication_audit.json",
|
| 851 |
"exists": true,
|
| 852 |
"bytes": 8298,
|
| 853 |
-
"sha256": "
|
| 854 |
},
|
| 855 |
"hf_model_docs_data": {
|
| 856 |
"path": "hf_model:docs/data/publication_audit.json",
|
| 857 |
"exists": true,
|
| 858 |
"bytes": 8298,
|
| 859 |
-
"sha256": "
|
| 860 |
},
|
| 861 |
"hf_model": {
|
| 862 |
"path": "hf_model:metrics/publication_audit.json",
|
| 863 |
"exists": true,
|
| 864 |
"bytes": 8298,
|
| 865 |
-
"sha256": "
|
| 866 |
}
|
| 867 |
},
|
| 868 |
"failures": []
|
|
@@ -874,44 +874,44 @@
|
|
| 874 |
"path": "repo:docs/data/public_surface_qa.json",
|
| 875 |
"exists": true,
|
| 876 |
"bytes": 6146,
|
| 877 |
-
"sha256": "
|
| 878 |
},
|
| 879 |
"mirrors": {
|
| 880 |
"hf_space": {
|
| 881 |
"path": "hf_space:data/public_surface_qa.json",
|
| 882 |
"exists": true,
|
| 883 |
"bytes": 6146,
|
| 884 |
-
"sha256": "
|
| 885 |
},
|
| 886 |
"hf_artifacts_data": {
|
| 887 |
"path": "hf_artifacts:data/public_surface_qa.json",
|
| 888 |
"exists": true,
|
| 889 |
"bytes": 6146,
|
| 890 |
-
"sha256": "
|
| 891 |
},
|
| 892 |
"hf_artifacts": {
|
| 893 |
"path": "hf_artifacts:docs/data/public_surface_qa.json",
|
| 894 |
"exists": true,
|
| 895 |
"bytes": 6146,
|
| 896 |
-
"sha256": "
|
| 897 |
},
|
| 898 |
"hf_model_data": {
|
| 899 |
"path": "hf_model:data/public_surface_qa.json",
|
| 900 |
"exists": true,
|
| 901 |
"bytes": 6146,
|
| 902 |
-
"sha256": "
|
| 903 |
},
|
| 904 |
"hf_model_docs_data": {
|
| 905 |
"path": "hf_model:docs/data/public_surface_qa.json",
|
| 906 |
"exists": true,
|
| 907 |
"bytes": 6146,
|
| 908 |
-
"sha256": "
|
| 909 |
},
|
| 910 |
"hf_model": {
|
| 911 |
"path": "hf_model:metrics/public_surface_qa.json",
|
| 912 |
"exists": true,
|
| 913 |
"bytes": 6146,
|
| 914 |
-
"sha256": "
|
| 915 |
}
|
| 916 |
},
|
| 917 |
"failures": []
|
|
@@ -1021,44 +1021,44 @@
|
|
| 1021 |
"path": "repo:docs/data/quality_gates.json",
|
| 1022 |
"exists": true,
|
| 1023 |
"bytes": 8100,
|
| 1024 |
-
"sha256": "
|
| 1025 |
},
|
| 1026 |
"mirrors": {
|
| 1027 |
"hf_space": {
|
| 1028 |
"path": "hf_space:data/quality_gates.json",
|
| 1029 |
"exists": true,
|
| 1030 |
"bytes": 8100,
|
| 1031 |
-
"sha256": "
|
| 1032 |
},
|
| 1033 |
"hf_artifacts_data": {
|
| 1034 |
"path": "hf_artifacts:data/quality_gates.json",
|
| 1035 |
"exists": true,
|
| 1036 |
"bytes": 8100,
|
| 1037 |
-
"sha256": "
|
| 1038 |
},
|
| 1039 |
"hf_artifacts": {
|
| 1040 |
"path": "hf_artifacts:docs/data/quality_gates.json",
|
| 1041 |
"exists": true,
|
| 1042 |
"bytes": 8100,
|
| 1043 |
-
"sha256": "
|
| 1044 |
},
|
| 1045 |
"hf_model_data": {
|
| 1046 |
"path": "hf_model:data/quality_gates.json",
|
| 1047 |
"exists": true,
|
| 1048 |
"bytes": 8100,
|
| 1049 |
-
"sha256": "
|
| 1050 |
},
|
| 1051 |
"hf_model_docs_data": {
|
| 1052 |
"path": "hf_model:docs/data/quality_gates.json",
|
| 1053 |
"exists": true,
|
| 1054 |
"bytes": 8100,
|
| 1055 |
-
"sha256": "
|
| 1056 |
},
|
| 1057 |
"hf_model": {
|
| 1058 |
"path": "hf_model:metrics/quality_gates.json",
|
| 1059 |
"exists": true,
|
| 1060 |
"bytes": 8100,
|
| 1061 |
-
"sha256": "
|
| 1062 |
}
|
| 1063 |
},
|
| 1064 |
"failures": []
|
|
@@ -1462,44 +1462,44 @@
|
|
| 1462 |
"path": "repo:docs/data/scope_claims_audit.json",
|
| 1463 |
"exists": true,
|
| 1464 |
"bytes": 21630,
|
| 1465 |
-
"sha256": "
|
| 1466 |
},
|
| 1467 |
"mirrors": {
|
| 1468 |
"hf_space": {
|
| 1469 |
"path": "hf_space:data/scope_claims_audit.json",
|
| 1470 |
"exists": true,
|
| 1471 |
"bytes": 21630,
|
| 1472 |
-
"sha256": "
|
| 1473 |
},
|
| 1474 |
"hf_artifacts_data": {
|
| 1475 |
"path": "hf_artifacts:data/scope_claims_audit.json",
|
| 1476 |
"exists": true,
|
| 1477 |
"bytes": 21630,
|
| 1478 |
-
"sha256": "
|
| 1479 |
},
|
| 1480 |
"hf_artifacts": {
|
| 1481 |
"path": "hf_artifacts:docs/data/scope_claims_audit.json",
|
| 1482 |
"exists": true,
|
| 1483 |
"bytes": 21630,
|
| 1484 |
-
"sha256": "
|
| 1485 |
},
|
| 1486 |
"hf_model_data": {
|
| 1487 |
"path": "hf_model:data/scope_claims_audit.json",
|
| 1488 |
"exists": true,
|
| 1489 |
"bytes": 21630,
|
| 1490 |
-
"sha256": "
|
| 1491 |
},
|
| 1492 |
"hf_model_docs_data": {
|
| 1493 |
"path": "hf_model:docs/data/scope_claims_audit.json",
|
| 1494 |
"exists": true,
|
| 1495 |
"bytes": 21630,
|
| 1496 |
-
"sha256": "
|
| 1497 |
},
|
| 1498 |
"hf_model": {
|
| 1499 |
"path": "hf_model:metrics/scope_claims_audit.json",
|
| 1500 |
"exists": true,
|
| 1501 |
"bytes": 21630,
|
| 1502 |
-
"sha256": "
|
| 1503 |
}
|
| 1504 |
},
|
| 1505 |
"failures": []
|
|
@@ -1560,44 +1560,44 @@
|
|
| 1560 |
"path": "repo:docs/data/source_alignment_audit.json",
|
| 1561 |
"exists": true,
|
| 1562 |
"bytes": 4432,
|
| 1563 |
-
"sha256": "
|
| 1564 |
},
|
| 1565 |
"mirrors": {
|
| 1566 |
"hf_space": {
|
| 1567 |
"path": "hf_space:data/source_alignment_audit.json",
|
| 1568 |
"exists": true,
|
| 1569 |
"bytes": 4432,
|
| 1570 |
-
"sha256": "
|
| 1571 |
},
|
| 1572 |
"hf_artifacts_data": {
|
| 1573 |
"path": "hf_artifacts:data/source_alignment_audit.json",
|
| 1574 |
"exists": true,
|
| 1575 |
"bytes": 4432,
|
| 1576 |
-
"sha256": "
|
| 1577 |
},
|
| 1578 |
"hf_artifacts": {
|
| 1579 |
"path": "hf_artifacts:docs/data/source_alignment_audit.json",
|
| 1580 |
"exists": true,
|
| 1581 |
"bytes": 4432,
|
| 1582 |
-
"sha256": "
|
| 1583 |
},
|
| 1584 |
"hf_model_data": {
|
| 1585 |
"path": "hf_model:data/source_alignment_audit.json",
|
| 1586 |
"exists": true,
|
| 1587 |
"bytes": 4432,
|
| 1588 |
-
"sha256": "
|
| 1589 |
},
|
| 1590 |
"hf_model_docs_data": {
|
| 1591 |
"path": "hf_model:docs/data/source_alignment_audit.json",
|
| 1592 |
"exists": true,
|
| 1593 |
"bytes": 4432,
|
| 1594 |
-
"sha256": "
|
| 1595 |
},
|
| 1596 |
"hf_model": {
|
| 1597 |
"path": "hf_model:metrics/source_alignment_audit.json",
|
| 1598 |
"exists": true,
|
| 1599 |
"bytes": 4432,
|
| 1600 |
-
"sha256": "
|
| 1601 |
}
|
| 1602 |
},
|
| 1603 |
"failures": []
|
|
@@ -1658,44 +1658,44 @@
|
|
| 1658 |
"path": "repo:docs/data/single_episode_task_model_radar.json",
|
| 1659 |
"exists": true,
|
| 1660 |
"bytes": 50973,
|
| 1661 |
-
"sha256": "
|
| 1662 |
},
|
| 1663 |
"mirrors": {
|
| 1664 |
"hf_space": {
|
| 1665 |
"path": "hf_space:data/single_episode_task_model_radar.json",
|
| 1666 |
"exists": true,
|
| 1667 |
"bytes": 50973,
|
| 1668 |
-
"sha256": "
|
| 1669 |
},
|
| 1670 |
"hf_artifacts_data": {
|
| 1671 |
"path": "hf_artifacts:data/single_episode_task_model_radar.json",
|
| 1672 |
"exists": true,
|
| 1673 |
"bytes": 50973,
|
| 1674 |
-
"sha256": "
|
| 1675 |
},
|
| 1676 |
"hf_artifacts": {
|
| 1677 |
"path": "hf_artifacts:docs/data/single_episode_task_model_radar.json",
|
| 1678 |
"exists": true,
|
| 1679 |
"bytes": 50973,
|
| 1680 |
-
"sha256": "
|
| 1681 |
},
|
| 1682 |
"hf_model_data": {
|
| 1683 |
"path": "hf_model:data/single_episode_task_model_radar.json",
|
| 1684 |
"exists": true,
|
| 1685 |
"bytes": 50973,
|
| 1686 |
-
"sha256": "
|
| 1687 |
},
|
| 1688 |
"hf_model_docs_data": {
|
| 1689 |
"path": "hf_model:docs/data/single_episode_task_model_radar.json",
|
| 1690 |
"exists": true,
|
| 1691 |
"bytes": 50973,
|
| 1692 |
-
"sha256": "
|
| 1693 |
},
|
| 1694 |
"hf_model": {
|
| 1695 |
"path": "hf_model:metrics/single_episode_task_model_radar.json",
|
| 1696 |
"exists": true,
|
| 1697 |
"bytes": 50973,
|
| 1698 |
-
"sha256": "
|
| 1699 |
}
|
| 1700 |
},
|
| 1701 |
"failures": []
|
|
@@ -1706,45 +1706,45 @@
|
|
| 1706 |
"local": {
|
| 1707 |
"path": "repo:docs/data/episode128_task_model_radar.json",
|
| 1708 |
"exists": true,
|
| 1709 |
-
"bytes":
|
| 1710 |
-
"sha256": "
|
| 1711 |
},
|
| 1712 |
"mirrors": {
|
| 1713 |
"hf_space": {
|
| 1714 |
"path": "hf_space:data/episode128_task_model_radar.json",
|
| 1715 |
"exists": true,
|
| 1716 |
-
"bytes":
|
| 1717 |
-
"sha256": "
|
| 1718 |
},
|
| 1719 |
"hf_artifacts_data": {
|
| 1720 |
"path": "hf_artifacts:data/episode128_task_model_radar.json",
|
| 1721 |
"exists": true,
|
| 1722 |
-
"bytes":
|
| 1723 |
-
"sha256": "
|
| 1724 |
},
|
| 1725 |
"hf_artifacts": {
|
| 1726 |
"path": "hf_artifacts:docs/data/episode128_task_model_radar.json",
|
| 1727 |
"exists": true,
|
| 1728 |
-
"bytes":
|
| 1729 |
-
"sha256": "
|
| 1730 |
},
|
| 1731 |
"hf_model_data": {
|
| 1732 |
"path": "hf_model:data/episode128_task_model_radar.json",
|
| 1733 |
"exists": true,
|
| 1734 |
-
"bytes":
|
| 1735 |
-
"sha256": "
|
| 1736 |
},
|
| 1737 |
"hf_model_docs_data": {
|
| 1738 |
"path": "hf_model:docs/data/episode128_task_model_radar.json",
|
| 1739 |
"exists": true,
|
| 1740 |
-
"bytes":
|
| 1741 |
-
"sha256": "
|
| 1742 |
},
|
| 1743 |
"hf_model": {
|
| 1744 |
"path": "hf_model:metrics/episode128_task_model_radar.json",
|
| 1745 |
"exists": true,
|
| 1746 |
-
"bytes":
|
| 1747 |
-
"sha256": "
|
| 1748 |
}
|
| 1749 |
},
|
| 1750 |
"failures": []
|
|
@@ -1854,44 +1854,44 @@
|
|
| 1854 |
"path": "repo:docs/data/task_surface_integrity.json",
|
| 1855 |
"exists": true,
|
| 1856 |
"bytes": 45779,
|
| 1857 |
-
"sha256": "
|
| 1858 |
},
|
| 1859 |
"mirrors": {
|
| 1860 |
"hf_space": {
|
| 1861 |
"path": "hf_space:data/task_surface_integrity.json",
|
| 1862 |
"exists": true,
|
| 1863 |
"bytes": 45779,
|
| 1864 |
-
"sha256": "
|
| 1865 |
},
|
| 1866 |
"hf_artifacts_data": {
|
| 1867 |
"path": "hf_artifacts:data/task_surface_integrity.json",
|
| 1868 |
"exists": true,
|
| 1869 |
"bytes": 45779,
|
| 1870 |
-
"sha256": "
|
| 1871 |
},
|
| 1872 |
"hf_artifacts": {
|
| 1873 |
"path": "hf_artifacts:docs/data/task_surface_integrity.json",
|
| 1874 |
"exists": true,
|
| 1875 |
"bytes": 45779,
|
| 1876 |
-
"sha256": "
|
| 1877 |
},
|
| 1878 |
"hf_model_data": {
|
| 1879 |
"path": "hf_model:data/task_surface_integrity.json",
|
| 1880 |
"exists": true,
|
| 1881 |
"bytes": 45779,
|
| 1882 |
-
"sha256": "
|
| 1883 |
},
|
| 1884 |
"hf_model_docs_data": {
|
| 1885 |
"path": "hf_model:docs/data/task_surface_integrity.json",
|
| 1886 |
"exists": true,
|
| 1887 |
"bytes": 45779,
|
| 1888 |
-
"sha256": "
|
| 1889 |
},
|
| 1890 |
"hf_model": {
|
| 1891 |
"path": "hf_model:metrics/task_surface_integrity.json",
|
| 1892 |
"exists": true,
|
| 1893 |
"bytes": 45779,
|
| 1894 |
-
"sha256": "
|
| 1895 |
}
|
| 1896 |
},
|
| 1897 |
"failures": []
|
|
@@ -1951,45 +1951,45 @@
|
|
| 1951 |
"local": {
|
| 1952 |
"path": "repo:docs/data/task_method_20_result_matrix.json",
|
| 1953 |
"exists": true,
|
| 1954 |
-
"bytes":
|
| 1955 |
-
"sha256": "
|
| 1956 |
},
|
| 1957 |
"mirrors": {
|
| 1958 |
"hf_space": {
|
| 1959 |
"path": "hf_space:data/task_method_20_result_matrix.json",
|
| 1960 |
"exists": true,
|
| 1961 |
-
"bytes":
|
| 1962 |
-
"sha256": "
|
| 1963 |
},
|
| 1964 |
"hf_artifacts_data": {
|
| 1965 |
"path": "hf_artifacts:data/task_method_20_result_matrix.json",
|
| 1966 |
"exists": true,
|
| 1967 |
-
"bytes":
|
| 1968 |
-
"sha256": "
|
| 1969 |
},
|
| 1970 |
"hf_artifacts": {
|
| 1971 |
"path": "hf_artifacts:docs/data/task_method_20_result_matrix.json",
|
| 1972 |
"exists": true,
|
| 1973 |
-
"bytes":
|
| 1974 |
-
"sha256": "
|
| 1975 |
},
|
| 1976 |
"hf_model_data": {
|
| 1977 |
"path": "hf_model:data/task_method_20_result_matrix.json",
|
| 1978 |
"exists": true,
|
| 1979 |
-
"bytes":
|
| 1980 |
-
"sha256": "
|
| 1981 |
},
|
| 1982 |
"hf_model_docs_data": {
|
| 1983 |
"path": "hf_model:docs/data/task_method_20_result_matrix.json",
|
| 1984 |
"exists": true,
|
| 1985 |
-
"bytes":
|
| 1986 |
-
"sha256": "
|
| 1987 |
},
|
| 1988 |
"hf_model": {
|
| 1989 |
"path": "hf_model:metrics/task_method_20_result_matrix.json",
|
| 1990 |
"exists": true,
|
| 1991 |
-
"bytes":
|
| 1992 |
-
"sha256": "
|
| 1993 |
}
|
| 1994 |
},
|
| 1995 |
"failures": []
|
|
@@ -2000,45 +2000,45 @@
|
|
| 2000 |
"local": {
|
| 2001 |
"path": "repo:docs/data/task_method_20_gap_audit.json",
|
| 2002 |
"exists": true,
|
| 2003 |
-
"bytes":
|
| 2004 |
-
"sha256": "
|
| 2005 |
},
|
| 2006 |
"mirrors": {
|
| 2007 |
"hf_space": {
|
| 2008 |
"path": "hf_space:data/task_method_20_gap_audit.json",
|
| 2009 |
"exists": true,
|
| 2010 |
-
"bytes":
|
| 2011 |
-
"sha256": "
|
| 2012 |
},
|
| 2013 |
"hf_artifacts_data": {
|
| 2014 |
"path": "hf_artifacts:data/task_method_20_gap_audit.json",
|
| 2015 |
"exists": true,
|
| 2016 |
-
"bytes":
|
| 2017 |
-
"sha256": "
|
| 2018 |
},
|
| 2019 |
"hf_artifacts": {
|
| 2020 |
"path": "hf_artifacts:docs/data/task_method_20_gap_audit.json",
|
| 2021 |
"exists": true,
|
| 2022 |
-
"bytes":
|
| 2023 |
-
"sha256": "
|
| 2024 |
},
|
| 2025 |
"hf_model_data": {
|
| 2026 |
"path": "hf_model:data/task_method_20_gap_audit.json",
|
| 2027 |
"exists": true,
|
| 2028 |
-
"bytes":
|
| 2029 |
-
"sha256": "
|
| 2030 |
},
|
| 2031 |
"hf_model_docs_data": {
|
| 2032 |
"path": "hf_model:docs/data/task_method_20_gap_audit.json",
|
| 2033 |
"exists": true,
|
| 2034 |
-
"bytes":
|
| 2035 |
-
"sha256": "
|
| 2036 |
},
|
| 2037 |
"hf_model": {
|
| 2038 |
"path": "hf_model:metrics/task_method_20_gap_audit.json",
|
| 2039 |
"exists": true,
|
| 2040 |
-
"bytes":
|
| 2041 |
-
"sha256": "
|
| 2042 |
}
|
| 2043 |
},
|
| 2044 |
"failures": []
|
|
@@ -2098,45 +2098,45 @@
|
|
| 2098 |
"local": {
|
| 2099 |
"path": "repo:docs/data/unified_task_model_radar.json",
|
| 2100 |
"exists": true,
|
| 2101 |
-
"bytes":
|
| 2102 |
-
"sha256": "
|
| 2103 |
},
|
| 2104 |
"mirrors": {
|
| 2105 |
"hf_space": {
|
| 2106 |
"path": "hf_space:data/unified_task_model_radar.json",
|
| 2107 |
"exists": true,
|
| 2108 |
-
"bytes":
|
| 2109 |
-
"sha256": "
|
| 2110 |
},
|
| 2111 |
"hf_artifacts_data": {
|
| 2112 |
"path": "hf_artifacts:data/unified_task_model_radar.json",
|
| 2113 |
"exists": true,
|
| 2114 |
-
"bytes":
|
| 2115 |
-
"sha256": "
|
| 2116 |
},
|
| 2117 |
"hf_artifacts": {
|
| 2118 |
"path": "hf_artifacts:docs/data/unified_task_model_radar.json",
|
| 2119 |
"exists": true,
|
| 2120 |
-
"bytes":
|
| 2121 |
-
"sha256": "
|
| 2122 |
},
|
| 2123 |
"hf_model_data": {
|
| 2124 |
"path": "hf_model:data/unified_task_model_radar.json",
|
| 2125 |
"exists": true,
|
| 2126 |
-
"bytes":
|
| 2127 |
-
"sha256": "
|
| 2128 |
},
|
| 2129 |
"hf_model_docs_data": {
|
| 2130 |
"path": "hf_model:docs/data/unified_task_model_radar.json",
|
| 2131 |
"exists": true,
|
| 2132 |
-
"bytes":
|
| 2133 |
-
"sha256": "
|
| 2134 |
},
|
| 2135 |
"hf_model": {
|
| 2136 |
"path": "hf_model:metrics/unified_task_model_radar.json",
|
| 2137 |
"exists": true,
|
| 2138 |
-
"bytes":
|
| 2139 |
-
"sha256": "
|
| 2140 |
}
|
| 2141 |
},
|
| 2142 |
"failures": []
|
|
@@ -2148,44 +2148,44 @@
|
|
| 2148 |
"path": "repo:docs/data/website_integrity.json",
|
| 2149 |
"exists": true,
|
| 2150 |
"bytes": 18933,
|
| 2151 |
-
"sha256": "
|
| 2152 |
},
|
| 2153 |
"mirrors": {
|
| 2154 |
"hf_space": {
|
| 2155 |
"path": "hf_space:data/website_integrity.json",
|
| 2156 |
"exists": true,
|
| 2157 |
"bytes": 18933,
|
| 2158 |
-
"sha256": "
|
| 2159 |
},
|
| 2160 |
"hf_artifacts_data": {
|
| 2161 |
"path": "hf_artifacts:data/website_integrity.json",
|
| 2162 |
"exists": true,
|
| 2163 |
"bytes": 18933,
|
| 2164 |
-
"sha256": "
|
| 2165 |
},
|
| 2166 |
"hf_artifacts": {
|
| 2167 |
"path": "hf_artifacts:docs/data/website_integrity.json",
|
| 2168 |
"exists": true,
|
| 2169 |
"bytes": 18933,
|
| 2170 |
-
"sha256": "
|
| 2171 |
},
|
| 2172 |
"hf_model_data": {
|
| 2173 |
"path": "hf_model:data/website_integrity.json",
|
| 2174 |
"exists": true,
|
| 2175 |
"bytes": 18933,
|
| 2176 |
-
"sha256": "
|
| 2177 |
},
|
| 2178 |
"hf_model_docs_data": {
|
| 2179 |
"path": "hf_model:docs/data/website_integrity.json",
|
| 2180 |
"exists": true,
|
| 2181 |
"bytes": 18933,
|
| 2182 |
-
"sha256": "
|
| 2183 |
},
|
| 2184 |
"hf_model": {
|
| 2185 |
"path": "hf_model:metrics/website_integrity.json",
|
| 2186 |
"exists": true,
|
| 2187 |
"bytes": 18933,
|
| 2188 |
-
"sha256": "
|
| 2189 |
}
|
| 2190 |
},
|
| 2191 |
"failures": []
|
|
@@ -2319,33 +2319,33 @@
|
|
| 2319 |
"local": {
|
| 2320 |
"path": "repo:docs/assets/charts/episode128_task_model_radar.svg",
|
| 2321 |
"exists": true,
|
| 2322 |
-
"bytes":
|
| 2323 |
-
"sha256": "
|
| 2324 |
},
|
| 2325 |
"mirrors": {
|
| 2326 |
"hf_space": {
|
| 2327 |
"path": "hf_space:assets/charts/episode128_task_model_radar.svg",
|
| 2328 |
"exists": true,
|
| 2329 |
-
"bytes":
|
| 2330 |
-
"sha256": "
|
| 2331 |
},
|
| 2332 |
"hf_artifacts_docs": {
|
| 2333 |
"path": "hf_artifacts:docs/assets/charts/episode128_task_model_radar.svg",
|
| 2334 |
"exists": true,
|
| 2335 |
-
"bytes":
|
| 2336 |
-
"sha256": "
|
| 2337 |
},
|
| 2338 |
"hf_artifacts_card": {
|
| 2339 |
"path": "hf_artifacts:assets/charts/episode128_task_model_radar.svg",
|
| 2340 |
"exists": true,
|
| 2341 |
-
"bytes":
|
| 2342 |
-
"sha256": "
|
| 2343 |
},
|
| 2344 |
"hf_model": {
|
| 2345 |
"path": "hf_model:assets/charts/episode128_task_model_radar.svg",
|
| 2346 |
"exists": true,
|
| 2347 |
-
"bytes":
|
| 2348 |
-
"sha256": "
|
| 2349 |
}
|
| 2350 |
},
|
| 2351 |
"failures": []
|
|
@@ -2393,33 +2393,33 @@
|
|
| 2393 |
"local": {
|
| 2394 |
"path": "repo:docs/assets/charts/unified_task_model_radar.svg",
|
| 2395 |
"exists": true,
|
| 2396 |
-
"bytes":
|
| 2397 |
-
"sha256": "
|
| 2398 |
},
|
| 2399 |
"mirrors": {
|
| 2400 |
"hf_space": {
|
| 2401 |
"path": "hf_space:assets/charts/unified_task_model_radar.svg",
|
| 2402 |
"exists": true,
|
| 2403 |
-
"bytes":
|
| 2404 |
-
"sha256": "
|
| 2405 |
},
|
| 2406 |
"hf_artifacts_docs": {
|
| 2407 |
"path": "hf_artifacts:docs/assets/charts/unified_task_model_radar.svg",
|
| 2408 |
"exists": true,
|
| 2409 |
-
"bytes":
|
| 2410 |
-
"sha256": "
|
| 2411 |
},
|
| 2412 |
"hf_artifacts_card": {
|
| 2413 |
"path": "hf_artifacts:assets/charts/unified_task_model_radar.svg",
|
| 2414 |
"exists": true,
|
| 2415 |
-
"bytes":
|
| 2416 |
-
"sha256": "
|
| 2417 |
},
|
| 2418 |
"hf_model": {
|
| 2419 |
"path": "hf_model:assets/charts/unified_task_model_radar.svg",
|
| 2420 |
"exists": true,
|
| 2421 |
-
"bytes":
|
| 2422 |
-
"sha256": "
|
| 2423 |
}
|
| 2424 |
},
|
| 2425 |
"failures": []
|
|
@@ -3650,6 +3650,31 @@
|
|
| 3650 |
},
|
| 3651 |
"failures": []
|
| 3652 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3653 |
{
|
| 3654 |
"name": "scripts/omni/score_model_output_probes.py",
|
| 3655 |
"status": "pass",
|
|
@@ -3806,21 +3831,21 @@
|
|
| 3806 |
"local": {
|
| 3807 |
"path": "repo:scripts/build_artifact_index.py",
|
| 3808 |
"exists": true,
|
| 3809 |
-
"bytes":
|
| 3810 |
-
"sha256": "
|
| 3811 |
},
|
| 3812 |
"mirrors": {
|
| 3813 |
"hf_artifacts": {
|
| 3814 |
"path": "hf_artifacts:scripts/build_artifact_index.py",
|
| 3815 |
"exists": true,
|
| 3816 |
-
"bytes":
|
| 3817 |
-
"sha256": "
|
| 3818 |
},
|
| 3819 |
"hf_model": {
|
| 3820 |
"path": "hf_model:scripts/build_artifact_index.py",
|
| 3821 |
"exists": true,
|
| 3822 |
-
"bytes":
|
| 3823 |
-
"sha256": "
|
| 3824 |
}
|
| 3825 |
},
|
| 3826 |
"failures": []
|
|
@@ -4031,21 +4056,21 @@
|
|
| 4031 |
"local": {
|
| 4032 |
"path": "repo:scripts/build_task_method_20_gap_audit.py",
|
| 4033 |
"exists": true,
|
| 4034 |
-
"bytes":
|
| 4035 |
-
"sha256": "
|
| 4036 |
},
|
| 4037 |
"mirrors": {
|
| 4038 |
"hf_artifacts": {
|
| 4039 |
"path": "hf_artifacts:scripts/build_task_method_20_gap_audit.py",
|
| 4040 |
"exists": true,
|
| 4041 |
-
"bytes":
|
| 4042 |
-
"sha256": "
|
| 4043 |
},
|
| 4044 |
"hf_model": {
|
| 4045 |
"path": "hf_model:scripts/build_task_method_20_gap_audit.py",
|
| 4046 |
"exists": true,
|
| 4047 |
-
"bytes":
|
| 4048 |
-
"sha256": "
|
| 4049 |
}
|
| 4050 |
},
|
| 4051 |
"failures": []
|
|
@@ -4106,21 +4131,21 @@
|
|
| 4106 |
"local": {
|
| 4107 |
"path": "repo:scripts/build_unified_task_model_radar.py",
|
| 4108 |
"exists": true,
|
| 4109 |
-
"bytes":
|
| 4110 |
-
"sha256": "
|
| 4111 |
},
|
| 4112 |
"mirrors": {
|
| 4113 |
"hf_artifacts": {
|
| 4114 |
"path": "hf_artifacts:scripts/build_unified_task_model_radar.py",
|
| 4115 |
"exists": true,
|
| 4116 |
-
"bytes":
|
| 4117 |
-
"sha256": "
|
| 4118 |
},
|
| 4119 |
"hf_model": {
|
| 4120 |
"path": "hf_model:scripts/build_unified_task_model_radar.py",
|
| 4121 |
"exists": true,
|
| 4122 |
-
"bytes":
|
| 4123 |
-
"sha256": "
|
| 4124 |
}
|
| 4125 |
},
|
| 4126 |
"failures": []
|
|
@@ -4181,21 +4206,21 @@
|
|
| 4181 |
"local": {
|
| 4182 |
"path": "repo:scripts/validate_mirror_parity.py",
|
| 4183 |
"exists": true,
|
| 4184 |
-
"bytes":
|
| 4185 |
-
"sha256": "
|
| 4186 |
},
|
| 4187 |
"mirrors": {
|
| 4188 |
"hf_artifacts": {
|
| 4189 |
"path": "hf_artifacts:scripts/validate_mirror_parity.py",
|
| 4190 |
"exists": true,
|
| 4191 |
-
"bytes":
|
| 4192 |
-
"sha256": "
|
| 4193 |
},
|
| 4194 |
"hf_model": {
|
| 4195 |
"path": "hf_model:scripts/validate_mirror_parity.py",
|
| 4196 |
"exists": true,
|
| 4197 |
-
"bytes":
|
| 4198 |
-
"sha256": "
|
| 4199 |
}
|
| 4200 |
},
|
| 4201 |
"failures": []
|
|
@@ -4331,21 +4356,21 @@
|
|
| 4331 |
"local": {
|
| 4332 |
"path": "repo:scripts/sync_hf_publish_mirrors.py",
|
| 4333 |
"exists": true,
|
| 4334 |
-
"bytes":
|
| 4335 |
-
"sha256": "
|
| 4336 |
},
|
| 4337 |
"mirrors": {
|
| 4338 |
"hf_artifacts": {
|
| 4339 |
"path": "hf_artifacts:scripts/sync_hf_publish_mirrors.py",
|
| 4340 |
"exists": true,
|
| 4341 |
-
"bytes":
|
| 4342 |
-
"sha256": "
|
| 4343 |
},
|
| 4344 |
"hf_model": {
|
| 4345 |
"path": "hf_model:scripts/sync_hf_publish_mirrors.py",
|
| 4346 |
"exists": true,
|
| 4347 |
-
"bytes":
|
| 4348 |
-
"sha256": "
|
| 4349 |
}
|
| 4350 |
},
|
| 4351 |
"failures": []
|
|
@@ -4535,39 +4560,39 @@
|
|
| 4535 |
"local": {
|
| 4536 |
"path": "repo:docs/index.html",
|
| 4537 |
"exists": true,
|
| 4538 |
-
"bytes":
|
| 4539 |
-
"sha256": "
|
| 4540 |
},
|
| 4541 |
"mirrors": {
|
| 4542 |
"hf_space": {
|
| 4543 |
"path": "hf_space:index.html",
|
| 4544 |
"exists": true,
|
| 4545 |
-
"bytes":
|
| 4546 |
-
"sha256": "
|
| 4547 |
},
|
| 4548 |
"hf_artifacts_root": {
|
| 4549 |
"path": "hf_artifacts:index.html",
|
| 4550 |
"exists": true,
|
| 4551 |
-
"bytes":
|
| 4552 |
-
"sha256": "
|
| 4553 |
},
|
| 4554 |
"hf_artifacts_docs": {
|
| 4555 |
"path": "hf_artifacts:docs/index.html",
|
| 4556 |
"exists": true,
|
| 4557 |
-
"bytes":
|
| 4558 |
-
"sha256": "
|
| 4559 |
},
|
| 4560 |
"hf_model": {
|
| 4561 |
"path": "hf_model:index.html",
|
| 4562 |
"exists": true,
|
| 4563 |
-
"bytes":
|
| 4564 |
-
"sha256": "
|
| 4565 |
},
|
| 4566 |
"hf_model_docs": {
|
| 4567 |
"path": "hf_model:docs/index.html",
|
| 4568 |
"exists": true,
|
| 4569 |
-
"bytes":
|
| 4570 |
-
"sha256": "
|
| 4571 |
}
|
| 4572 |
},
|
| 4573 |
"failures": []
|
|
@@ -10591,6 +10616,254 @@
|
|
| 10591 |
},
|
| 10592 |
"failures": []
|
| 10593 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10594 |
{
|
| 10595 |
"name": "results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
|
| 10596 |
"status": "pass",
|
|
@@ -18533,27 +18806,27 @@
|
|
| 18533 |
"local": {
|
| 18534 |
"path": "repo:TASK_METHOD_20_GAP_AUDIT.md",
|
| 18535 |
"exists": true,
|
| 18536 |
-
"bytes":
|
| 18537 |
-
"sha256": "
|
| 18538 |
},
|
| 18539 |
"mirrors": {
|
| 18540 |
"hf_space": {
|
| 18541 |
"path": "hf_space:TASK_METHOD_20_GAP_AUDIT.md",
|
| 18542 |
"exists": true,
|
| 18543 |
-
"bytes":
|
| 18544 |
-
"sha256": "
|
| 18545 |
},
|
| 18546 |
"hf_artifacts": {
|
| 18547 |
"path": "hf_artifacts:TASK_METHOD_20_GAP_AUDIT.md",
|
| 18548 |
"exists": true,
|
| 18549 |
-
"bytes":
|
| 18550 |
-
"sha256": "
|
| 18551 |
},
|
| 18552 |
"hf_model": {
|
| 18553 |
"path": "hf_model:TASK_METHOD_20_GAP_AUDIT.md",
|
| 18554 |
"exists": true,
|
| 18555 |
-
"bytes":
|
| 18556 |
-
"sha256": "
|
| 18557 |
}
|
| 18558 |
},
|
| 18559 |
"failures": []
|
|
@@ -18564,27 +18837,27 @@
|
|
| 18564 |
"local": {
|
| 18565 |
"path": "repo:TASK_METHOD_20_RESULT_MATRIX.md",
|
| 18566 |
"exists": true,
|
| 18567 |
-
"bytes":
|
| 18568 |
-
"sha256": "
|
| 18569 |
},
|
| 18570 |
"mirrors": {
|
| 18571 |
"hf_space": {
|
| 18572 |
"path": "hf_space:TASK_METHOD_20_RESULT_MATRIX.md",
|
| 18573 |
"exists": true,
|
| 18574 |
-
"bytes":
|
| 18575 |
-
"sha256": "
|
| 18576 |
},
|
| 18577 |
"hf_artifacts": {
|
| 18578 |
"path": "hf_artifacts:TASK_METHOD_20_RESULT_MATRIX.md",
|
| 18579 |
"exists": true,
|
| 18580 |
-
"bytes":
|
| 18581 |
-
"sha256": "
|
| 18582 |
},
|
| 18583 |
"hf_model": {
|
| 18584 |
"path": "hf_model:TASK_METHOD_20_RESULT_MATRIX.md",
|
| 18585 |
"exists": true,
|
| 18586 |
-
"bytes":
|
| 18587 |
-
"sha256": "
|
| 18588 |
}
|
| 18589 |
},
|
| 18590 |
"failures": []
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-16T13:35:58+00:00",
|
| 4 |
"hf_root": "hf_publish",
|
| 5 |
"summary": {
|
| 6 |
+
"group_count": 587,
|
| 7 |
"failure_count": 0,
|
| 8 |
"failures_by_surface": {}
|
| 9 |
},
|
|
|
|
| 138 |
"local": {
|
| 139 |
"path": "repo:docs/data/artifact_index.json",
|
| 140 |
"exists": true,
|
| 141 |
+
"bytes": 104199,
|
| 142 |
+
"sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
|
| 143 |
},
|
| 144 |
"mirrors": {
|
| 145 |
"hf_space": {
|
| 146 |
"path": "hf_space:data/artifact_index.json",
|
| 147 |
"exists": true,
|
| 148 |
+
"bytes": 104199,
|
| 149 |
+
"sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
|
| 150 |
},
|
| 151 |
"hf_artifacts_data": {
|
| 152 |
"path": "hf_artifacts:data/artifact_index.json",
|
| 153 |
"exists": true,
|
| 154 |
+
"bytes": 104199,
|
| 155 |
+
"sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
|
| 156 |
},
|
| 157 |
"hf_artifacts": {
|
| 158 |
"path": "hf_artifacts:docs/data/artifact_index.json",
|
| 159 |
"exists": true,
|
| 160 |
+
"bytes": 104199,
|
| 161 |
+
"sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
|
| 162 |
},
|
| 163 |
"hf_model_data": {
|
| 164 |
"path": "hf_model:data/artifact_index.json",
|
| 165 |
"exists": true,
|
| 166 |
+
"bytes": 104199,
|
| 167 |
+
"sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
|
| 168 |
},
|
| 169 |
"hf_model_docs_data": {
|
| 170 |
"path": "hf_model:docs/data/artifact_index.json",
|
| 171 |
"exists": true,
|
| 172 |
+
"bytes": 104199,
|
| 173 |
+
"sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
|
| 174 |
},
|
| 175 |
"hf_model": {
|
| 176 |
"path": "hf_model:metrics/artifact_index.json",
|
| 177 |
"exists": true,
|
| 178 |
+
"bytes": 104199,
|
| 179 |
+
"sha256": "785793bcd8ec76149d03ca6fe7f9539ce1bb0fbdf62091fc57de87c123bc49c6"
|
| 180 |
}
|
| 181 |
},
|
| 182 |
"failures": []
|
|
|
|
| 825 |
"path": "repo:docs/data/publication_audit.json",
|
| 826 |
"exists": true,
|
| 827 |
"bytes": 8298,
|
| 828 |
+
"sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
|
| 829 |
},
|
| 830 |
"mirrors": {
|
| 831 |
"hf_space": {
|
| 832 |
"path": "hf_space:data/publication_audit.json",
|
| 833 |
"exists": true,
|
| 834 |
"bytes": 8298,
|
| 835 |
+
"sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
|
| 836 |
},
|
| 837 |
"hf_artifacts_data": {
|
| 838 |
"path": "hf_artifacts:data/publication_audit.json",
|
| 839 |
"exists": true,
|
| 840 |
"bytes": 8298,
|
| 841 |
+
"sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
|
| 842 |
},
|
| 843 |
"hf_artifacts": {
|
| 844 |
"path": "hf_artifacts:docs/data/publication_audit.json",
|
| 845 |
"exists": true,
|
| 846 |
"bytes": 8298,
|
| 847 |
+
"sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
|
| 848 |
},
|
| 849 |
"hf_model_data": {
|
| 850 |
"path": "hf_model:data/publication_audit.json",
|
| 851 |
"exists": true,
|
| 852 |
"bytes": 8298,
|
| 853 |
+
"sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
|
| 854 |
},
|
| 855 |
"hf_model_docs_data": {
|
| 856 |
"path": "hf_model:docs/data/publication_audit.json",
|
| 857 |
"exists": true,
|
| 858 |
"bytes": 8298,
|
| 859 |
+
"sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
|
| 860 |
},
|
| 861 |
"hf_model": {
|
| 862 |
"path": "hf_model:metrics/publication_audit.json",
|
| 863 |
"exists": true,
|
| 864 |
"bytes": 8298,
|
| 865 |
+
"sha256": "c102f6b278caca6f359330f130cd3299b09ad219ba6fcedf14416542d8f935a6"
|
| 866 |
}
|
| 867 |
},
|
| 868 |
"failures": []
|
|
|
|
| 874 |
"path": "repo:docs/data/public_surface_qa.json",
|
| 875 |
"exists": true,
|
| 876 |
"bytes": 6146,
|
| 877 |
+
"sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
|
| 878 |
},
|
| 879 |
"mirrors": {
|
| 880 |
"hf_space": {
|
| 881 |
"path": "hf_space:data/public_surface_qa.json",
|
| 882 |
"exists": true,
|
| 883 |
"bytes": 6146,
|
| 884 |
+
"sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
|
| 885 |
},
|
| 886 |
"hf_artifacts_data": {
|
| 887 |
"path": "hf_artifacts:data/public_surface_qa.json",
|
| 888 |
"exists": true,
|
| 889 |
"bytes": 6146,
|
| 890 |
+
"sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
|
| 891 |
},
|
| 892 |
"hf_artifacts": {
|
| 893 |
"path": "hf_artifacts:docs/data/public_surface_qa.json",
|
| 894 |
"exists": true,
|
| 895 |
"bytes": 6146,
|
| 896 |
+
"sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
|
| 897 |
},
|
| 898 |
"hf_model_data": {
|
| 899 |
"path": "hf_model:data/public_surface_qa.json",
|
| 900 |
"exists": true,
|
| 901 |
"bytes": 6146,
|
| 902 |
+
"sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
|
| 903 |
},
|
| 904 |
"hf_model_docs_data": {
|
| 905 |
"path": "hf_model:docs/data/public_surface_qa.json",
|
| 906 |
"exists": true,
|
| 907 |
"bytes": 6146,
|
| 908 |
+
"sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
|
| 909 |
},
|
| 910 |
"hf_model": {
|
| 911 |
"path": "hf_model:metrics/public_surface_qa.json",
|
| 912 |
"exists": true,
|
| 913 |
"bytes": 6146,
|
| 914 |
+
"sha256": "e8de5e0cd4d4042b056bd37963c1da98b01589cf4aaf68c8954f9eed2688683d"
|
| 915 |
}
|
| 916 |
},
|
| 917 |
"failures": []
|
|
|
|
| 1021 |
"path": "repo:docs/data/quality_gates.json",
|
| 1022 |
"exists": true,
|
| 1023 |
"bytes": 8100,
|
| 1024 |
+
"sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
|
| 1025 |
},
|
| 1026 |
"mirrors": {
|
| 1027 |
"hf_space": {
|
| 1028 |
"path": "hf_space:data/quality_gates.json",
|
| 1029 |
"exists": true,
|
| 1030 |
"bytes": 8100,
|
| 1031 |
+
"sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
|
| 1032 |
},
|
| 1033 |
"hf_artifacts_data": {
|
| 1034 |
"path": "hf_artifacts:data/quality_gates.json",
|
| 1035 |
"exists": true,
|
| 1036 |
"bytes": 8100,
|
| 1037 |
+
"sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
|
| 1038 |
},
|
| 1039 |
"hf_artifacts": {
|
| 1040 |
"path": "hf_artifacts:docs/data/quality_gates.json",
|
| 1041 |
"exists": true,
|
| 1042 |
"bytes": 8100,
|
| 1043 |
+
"sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
|
| 1044 |
},
|
| 1045 |
"hf_model_data": {
|
| 1046 |
"path": "hf_model:data/quality_gates.json",
|
| 1047 |
"exists": true,
|
| 1048 |
"bytes": 8100,
|
| 1049 |
+
"sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
|
| 1050 |
},
|
| 1051 |
"hf_model_docs_data": {
|
| 1052 |
"path": "hf_model:docs/data/quality_gates.json",
|
| 1053 |
"exists": true,
|
| 1054 |
"bytes": 8100,
|
| 1055 |
+
"sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
|
| 1056 |
},
|
| 1057 |
"hf_model": {
|
| 1058 |
"path": "hf_model:metrics/quality_gates.json",
|
| 1059 |
"exists": true,
|
| 1060 |
"bytes": 8100,
|
| 1061 |
+
"sha256": "69babe425b30b76780fddac26f88c81dc9ea034b5c39e85dccd58d7e58deef44"
|
| 1062 |
}
|
| 1063 |
},
|
| 1064 |
"failures": []
|
|
|
|
| 1462 |
"path": "repo:docs/data/scope_claims_audit.json",
|
| 1463 |
"exists": true,
|
| 1464 |
"bytes": 21630,
|
| 1465 |
+
"sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
|
| 1466 |
},
|
| 1467 |
"mirrors": {
|
| 1468 |
"hf_space": {
|
| 1469 |
"path": "hf_space:data/scope_claims_audit.json",
|
| 1470 |
"exists": true,
|
| 1471 |
"bytes": 21630,
|
| 1472 |
+
"sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
|
| 1473 |
},
|
| 1474 |
"hf_artifacts_data": {
|
| 1475 |
"path": "hf_artifacts:data/scope_claims_audit.json",
|
| 1476 |
"exists": true,
|
| 1477 |
"bytes": 21630,
|
| 1478 |
+
"sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
|
| 1479 |
},
|
| 1480 |
"hf_artifacts": {
|
| 1481 |
"path": "hf_artifacts:docs/data/scope_claims_audit.json",
|
| 1482 |
"exists": true,
|
| 1483 |
"bytes": 21630,
|
| 1484 |
+
"sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
|
| 1485 |
},
|
| 1486 |
"hf_model_data": {
|
| 1487 |
"path": "hf_model:data/scope_claims_audit.json",
|
| 1488 |
"exists": true,
|
| 1489 |
"bytes": 21630,
|
| 1490 |
+
"sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
|
| 1491 |
},
|
| 1492 |
"hf_model_docs_data": {
|
| 1493 |
"path": "hf_model:docs/data/scope_claims_audit.json",
|
| 1494 |
"exists": true,
|
| 1495 |
"bytes": 21630,
|
| 1496 |
+
"sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
|
| 1497 |
},
|
| 1498 |
"hf_model": {
|
| 1499 |
"path": "hf_model:metrics/scope_claims_audit.json",
|
| 1500 |
"exists": true,
|
| 1501 |
"bytes": 21630,
|
| 1502 |
+
"sha256": "d0653f4dc77b25127dad6d9615da0e89e4259de20daed61effaccfed8aa4438f"
|
| 1503 |
}
|
| 1504 |
},
|
| 1505 |
"failures": []
|
|
|
|
| 1560 |
"path": "repo:docs/data/source_alignment_audit.json",
|
| 1561 |
"exists": true,
|
| 1562 |
"bytes": 4432,
|
| 1563 |
+
"sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
|
| 1564 |
},
|
| 1565 |
"mirrors": {
|
| 1566 |
"hf_space": {
|
| 1567 |
"path": "hf_space:data/source_alignment_audit.json",
|
| 1568 |
"exists": true,
|
| 1569 |
"bytes": 4432,
|
| 1570 |
+
"sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
|
| 1571 |
},
|
| 1572 |
"hf_artifacts_data": {
|
| 1573 |
"path": "hf_artifacts:data/source_alignment_audit.json",
|
| 1574 |
"exists": true,
|
| 1575 |
"bytes": 4432,
|
| 1576 |
+
"sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
|
| 1577 |
},
|
| 1578 |
"hf_artifacts": {
|
| 1579 |
"path": "hf_artifacts:docs/data/source_alignment_audit.json",
|
| 1580 |
"exists": true,
|
| 1581 |
"bytes": 4432,
|
| 1582 |
+
"sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
|
| 1583 |
},
|
| 1584 |
"hf_model_data": {
|
| 1585 |
"path": "hf_model:data/source_alignment_audit.json",
|
| 1586 |
"exists": true,
|
| 1587 |
"bytes": 4432,
|
| 1588 |
+
"sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
|
| 1589 |
},
|
| 1590 |
"hf_model_docs_data": {
|
| 1591 |
"path": "hf_model:docs/data/source_alignment_audit.json",
|
| 1592 |
"exists": true,
|
| 1593 |
"bytes": 4432,
|
| 1594 |
+
"sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
|
| 1595 |
},
|
| 1596 |
"hf_model": {
|
| 1597 |
"path": "hf_model:metrics/source_alignment_audit.json",
|
| 1598 |
"exists": true,
|
| 1599 |
"bytes": 4432,
|
| 1600 |
+
"sha256": "3e2021bb443cb8f8f844aa1491e8874ea6f3d7c500c40a616213e6812236ffd6"
|
| 1601 |
}
|
| 1602 |
},
|
| 1603 |
"failures": []
|
|
|
|
| 1658 |
"path": "repo:docs/data/single_episode_task_model_radar.json",
|
| 1659 |
"exists": true,
|
| 1660 |
"bytes": 50973,
|
| 1661 |
+
"sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
|
| 1662 |
},
|
| 1663 |
"mirrors": {
|
| 1664 |
"hf_space": {
|
| 1665 |
"path": "hf_space:data/single_episode_task_model_radar.json",
|
| 1666 |
"exists": true,
|
| 1667 |
"bytes": 50973,
|
| 1668 |
+
"sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
|
| 1669 |
},
|
| 1670 |
"hf_artifacts_data": {
|
| 1671 |
"path": "hf_artifacts:data/single_episode_task_model_radar.json",
|
| 1672 |
"exists": true,
|
| 1673 |
"bytes": 50973,
|
| 1674 |
+
"sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
|
| 1675 |
},
|
| 1676 |
"hf_artifacts": {
|
| 1677 |
"path": "hf_artifacts:docs/data/single_episode_task_model_radar.json",
|
| 1678 |
"exists": true,
|
| 1679 |
"bytes": 50973,
|
| 1680 |
+
"sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
|
| 1681 |
},
|
| 1682 |
"hf_model_data": {
|
| 1683 |
"path": "hf_model:data/single_episode_task_model_radar.json",
|
| 1684 |
"exists": true,
|
| 1685 |
"bytes": 50973,
|
| 1686 |
+
"sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
|
| 1687 |
},
|
| 1688 |
"hf_model_docs_data": {
|
| 1689 |
"path": "hf_model:docs/data/single_episode_task_model_radar.json",
|
| 1690 |
"exists": true,
|
| 1691 |
"bytes": 50973,
|
| 1692 |
+
"sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
|
| 1693 |
},
|
| 1694 |
"hf_model": {
|
| 1695 |
"path": "hf_model:metrics/single_episode_task_model_radar.json",
|
| 1696 |
"exists": true,
|
| 1697 |
"bytes": 50973,
|
| 1698 |
+
"sha256": "10f9ca6afc9f9d69d2380faab2e2f1ebd0114bf7ab778ea30188ba849e9a44c1"
|
| 1699 |
}
|
| 1700 |
},
|
| 1701 |
"failures": []
|
|
|
|
| 1706 |
"local": {
|
| 1707 |
"path": "repo:docs/data/episode128_task_model_radar.json",
|
| 1708 |
"exists": true,
|
| 1709 |
+
"bytes": 187400,
|
| 1710 |
+
"sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
|
| 1711 |
},
|
| 1712 |
"mirrors": {
|
| 1713 |
"hf_space": {
|
| 1714 |
"path": "hf_space:data/episode128_task_model_radar.json",
|
| 1715 |
"exists": true,
|
| 1716 |
+
"bytes": 187400,
|
| 1717 |
+
"sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
|
| 1718 |
},
|
| 1719 |
"hf_artifacts_data": {
|
| 1720 |
"path": "hf_artifacts:data/episode128_task_model_radar.json",
|
| 1721 |
"exists": true,
|
| 1722 |
+
"bytes": 187400,
|
| 1723 |
+
"sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
|
| 1724 |
},
|
| 1725 |
"hf_artifacts": {
|
| 1726 |
"path": "hf_artifacts:docs/data/episode128_task_model_radar.json",
|
| 1727 |
"exists": true,
|
| 1728 |
+
"bytes": 187400,
|
| 1729 |
+
"sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
|
| 1730 |
},
|
| 1731 |
"hf_model_data": {
|
| 1732 |
"path": "hf_model:data/episode128_task_model_radar.json",
|
| 1733 |
"exists": true,
|
| 1734 |
+
"bytes": 187400,
|
| 1735 |
+
"sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
|
| 1736 |
},
|
| 1737 |
"hf_model_docs_data": {
|
| 1738 |
"path": "hf_model:docs/data/episode128_task_model_radar.json",
|
| 1739 |
"exists": true,
|
| 1740 |
+
"bytes": 187400,
|
| 1741 |
+
"sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
|
| 1742 |
},
|
| 1743 |
"hf_model": {
|
| 1744 |
"path": "hf_model:metrics/episode128_task_model_radar.json",
|
| 1745 |
"exists": true,
|
| 1746 |
+
"bytes": 187400,
|
| 1747 |
+
"sha256": "fab3dc021cfeca3547b72fd2e1321a1258c0c48a8eaa5ea5cc28600badcd2c68"
|
| 1748 |
}
|
| 1749 |
},
|
| 1750 |
"failures": []
|
|
|
|
| 1854 |
"path": "repo:docs/data/task_surface_integrity.json",
|
| 1855 |
"exists": true,
|
| 1856 |
"bytes": 45779,
|
| 1857 |
+
"sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
|
| 1858 |
},
|
| 1859 |
"mirrors": {
|
| 1860 |
"hf_space": {
|
| 1861 |
"path": "hf_space:data/task_surface_integrity.json",
|
| 1862 |
"exists": true,
|
| 1863 |
"bytes": 45779,
|
| 1864 |
+
"sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
|
| 1865 |
},
|
| 1866 |
"hf_artifacts_data": {
|
| 1867 |
"path": "hf_artifacts:data/task_surface_integrity.json",
|
| 1868 |
"exists": true,
|
| 1869 |
"bytes": 45779,
|
| 1870 |
+
"sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
|
| 1871 |
},
|
| 1872 |
"hf_artifacts": {
|
| 1873 |
"path": "hf_artifacts:docs/data/task_surface_integrity.json",
|
| 1874 |
"exists": true,
|
| 1875 |
"bytes": 45779,
|
| 1876 |
+
"sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
|
| 1877 |
},
|
| 1878 |
"hf_model_data": {
|
| 1879 |
"path": "hf_model:data/task_surface_integrity.json",
|
| 1880 |
"exists": true,
|
| 1881 |
"bytes": 45779,
|
| 1882 |
+
"sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
|
| 1883 |
},
|
| 1884 |
"hf_model_docs_data": {
|
| 1885 |
"path": "hf_model:docs/data/task_surface_integrity.json",
|
| 1886 |
"exists": true,
|
| 1887 |
"bytes": 45779,
|
| 1888 |
+
"sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
|
| 1889 |
},
|
| 1890 |
"hf_model": {
|
| 1891 |
"path": "hf_model:metrics/task_surface_integrity.json",
|
| 1892 |
"exists": true,
|
| 1893 |
"bytes": 45779,
|
| 1894 |
+
"sha256": "5cc466a245de221f9980e0a2f42df016c096515f1074d08bf2be8e758a876612"
|
| 1895 |
}
|
| 1896 |
},
|
| 1897 |
"failures": []
|
|
|
|
| 1951 |
"local": {
|
| 1952 |
"path": "repo:docs/data/task_method_20_result_matrix.json",
|
| 1953 |
"exists": true,
|
| 1954 |
+
"bytes": 129740,
|
| 1955 |
+
"sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
|
| 1956 |
},
|
| 1957 |
"mirrors": {
|
| 1958 |
"hf_space": {
|
| 1959 |
"path": "hf_space:data/task_method_20_result_matrix.json",
|
| 1960 |
"exists": true,
|
| 1961 |
+
"bytes": 129740,
|
| 1962 |
+
"sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
|
| 1963 |
},
|
| 1964 |
"hf_artifacts_data": {
|
| 1965 |
"path": "hf_artifacts:data/task_method_20_result_matrix.json",
|
| 1966 |
"exists": true,
|
| 1967 |
+
"bytes": 129740,
|
| 1968 |
+
"sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
|
| 1969 |
},
|
| 1970 |
"hf_artifacts": {
|
| 1971 |
"path": "hf_artifacts:docs/data/task_method_20_result_matrix.json",
|
| 1972 |
"exists": true,
|
| 1973 |
+
"bytes": 129740,
|
| 1974 |
+
"sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
|
| 1975 |
},
|
| 1976 |
"hf_model_data": {
|
| 1977 |
"path": "hf_model:data/task_method_20_result_matrix.json",
|
| 1978 |
"exists": true,
|
| 1979 |
+
"bytes": 129740,
|
| 1980 |
+
"sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
|
| 1981 |
},
|
| 1982 |
"hf_model_docs_data": {
|
| 1983 |
"path": "hf_model:docs/data/task_method_20_result_matrix.json",
|
| 1984 |
"exists": true,
|
| 1985 |
+
"bytes": 129740,
|
| 1986 |
+
"sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
|
| 1987 |
},
|
| 1988 |
"hf_model": {
|
| 1989 |
"path": "hf_model:metrics/task_method_20_result_matrix.json",
|
| 1990 |
"exists": true,
|
| 1991 |
+
"bytes": 129740,
|
| 1992 |
+
"sha256": "e7e398b98e4e188ee883a126bd77ad966ba563f2c43da56e13e871b6dc90f1ea"
|
| 1993 |
}
|
| 1994 |
},
|
| 1995 |
"failures": []
|
|
|
|
| 2000 |
"local": {
|
| 2001 |
"path": "repo:docs/data/task_method_20_gap_audit.json",
|
| 2002 |
"exists": true,
|
| 2003 |
+
"bytes": 57943,
|
| 2004 |
+
"sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
|
| 2005 |
},
|
| 2006 |
"mirrors": {
|
| 2007 |
"hf_space": {
|
| 2008 |
"path": "hf_space:data/task_method_20_gap_audit.json",
|
| 2009 |
"exists": true,
|
| 2010 |
+
"bytes": 57943,
|
| 2011 |
+
"sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
|
| 2012 |
},
|
| 2013 |
"hf_artifacts_data": {
|
| 2014 |
"path": "hf_artifacts:data/task_method_20_gap_audit.json",
|
| 2015 |
"exists": true,
|
| 2016 |
+
"bytes": 57943,
|
| 2017 |
+
"sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
|
| 2018 |
},
|
| 2019 |
"hf_artifacts": {
|
| 2020 |
"path": "hf_artifacts:docs/data/task_method_20_gap_audit.json",
|
| 2021 |
"exists": true,
|
| 2022 |
+
"bytes": 57943,
|
| 2023 |
+
"sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
|
| 2024 |
},
|
| 2025 |
"hf_model_data": {
|
| 2026 |
"path": "hf_model:data/task_method_20_gap_audit.json",
|
| 2027 |
"exists": true,
|
| 2028 |
+
"bytes": 57943,
|
| 2029 |
+
"sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
|
| 2030 |
},
|
| 2031 |
"hf_model_docs_data": {
|
| 2032 |
"path": "hf_model:docs/data/task_method_20_gap_audit.json",
|
| 2033 |
"exists": true,
|
| 2034 |
+
"bytes": 57943,
|
| 2035 |
+
"sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
|
| 2036 |
},
|
| 2037 |
"hf_model": {
|
| 2038 |
"path": "hf_model:metrics/task_method_20_gap_audit.json",
|
| 2039 |
"exists": true,
|
| 2040 |
+
"bytes": 57943,
|
| 2041 |
+
"sha256": "a1246c07c569238e8589fab0fb90e86733b7799816e8ef6690036a0fac289ecb"
|
| 2042 |
}
|
| 2043 |
},
|
| 2044 |
"failures": []
|
|
|
|
| 2098 |
"local": {
|
| 2099 |
"path": "repo:docs/data/unified_task_model_radar.json",
|
| 2100 |
"exists": true,
|
| 2101 |
+
"bytes": 231251,
|
| 2102 |
+
"sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
|
| 2103 |
},
|
| 2104 |
"mirrors": {
|
| 2105 |
"hf_space": {
|
| 2106 |
"path": "hf_space:data/unified_task_model_radar.json",
|
| 2107 |
"exists": true,
|
| 2108 |
+
"bytes": 231251,
|
| 2109 |
+
"sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
|
| 2110 |
},
|
| 2111 |
"hf_artifacts_data": {
|
| 2112 |
"path": "hf_artifacts:data/unified_task_model_radar.json",
|
| 2113 |
"exists": true,
|
| 2114 |
+
"bytes": 231251,
|
| 2115 |
+
"sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
|
| 2116 |
},
|
| 2117 |
"hf_artifacts": {
|
| 2118 |
"path": "hf_artifacts:docs/data/unified_task_model_radar.json",
|
| 2119 |
"exists": true,
|
| 2120 |
+
"bytes": 231251,
|
| 2121 |
+
"sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
|
| 2122 |
},
|
| 2123 |
"hf_model_data": {
|
| 2124 |
"path": "hf_model:data/unified_task_model_radar.json",
|
| 2125 |
"exists": true,
|
| 2126 |
+
"bytes": 231251,
|
| 2127 |
+
"sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
|
| 2128 |
},
|
| 2129 |
"hf_model_docs_data": {
|
| 2130 |
"path": "hf_model:docs/data/unified_task_model_radar.json",
|
| 2131 |
"exists": true,
|
| 2132 |
+
"bytes": 231251,
|
| 2133 |
+
"sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
|
| 2134 |
},
|
| 2135 |
"hf_model": {
|
| 2136 |
"path": "hf_model:metrics/unified_task_model_radar.json",
|
| 2137 |
"exists": true,
|
| 2138 |
+
"bytes": 231251,
|
| 2139 |
+
"sha256": "5cbcdf8e3a659ef98bbc623ee3e434055ad6479599c95178587b3dab84a6072d"
|
| 2140 |
}
|
| 2141 |
},
|
| 2142 |
"failures": []
|
|
|
|
| 2148 |
"path": "repo:docs/data/website_integrity.json",
|
| 2149 |
"exists": true,
|
| 2150 |
"bytes": 18933,
|
| 2151 |
+
"sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
|
| 2152 |
},
|
| 2153 |
"mirrors": {
|
| 2154 |
"hf_space": {
|
| 2155 |
"path": "hf_space:data/website_integrity.json",
|
| 2156 |
"exists": true,
|
| 2157 |
"bytes": 18933,
|
| 2158 |
+
"sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
|
| 2159 |
},
|
| 2160 |
"hf_artifacts_data": {
|
| 2161 |
"path": "hf_artifacts:data/website_integrity.json",
|
| 2162 |
"exists": true,
|
| 2163 |
"bytes": 18933,
|
| 2164 |
+
"sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
|
| 2165 |
},
|
| 2166 |
"hf_artifacts": {
|
| 2167 |
"path": "hf_artifacts:docs/data/website_integrity.json",
|
| 2168 |
"exists": true,
|
| 2169 |
"bytes": 18933,
|
| 2170 |
+
"sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
|
| 2171 |
},
|
| 2172 |
"hf_model_data": {
|
| 2173 |
"path": "hf_model:data/website_integrity.json",
|
| 2174 |
"exists": true,
|
| 2175 |
"bytes": 18933,
|
| 2176 |
+
"sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
|
| 2177 |
},
|
| 2178 |
"hf_model_docs_data": {
|
| 2179 |
"path": "hf_model:docs/data/website_integrity.json",
|
| 2180 |
"exists": true,
|
| 2181 |
"bytes": 18933,
|
| 2182 |
+
"sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
|
| 2183 |
},
|
| 2184 |
"hf_model": {
|
| 2185 |
"path": "hf_model:metrics/website_integrity.json",
|
| 2186 |
"exists": true,
|
| 2187 |
"bytes": 18933,
|
| 2188 |
+
"sha256": "9ad46ada8b1cae970c165bee1db3771e23d8e76cba84f72fd2c8d205715bc12a"
|
| 2189 |
}
|
| 2190 |
},
|
| 2191 |
"failures": []
|
|
|
|
| 2319 |
"local": {
|
| 2320 |
"path": "repo:docs/assets/charts/episode128_task_model_radar.svg",
|
| 2321 |
"exists": true,
|
| 2322 |
+
"bytes": 43679,
|
| 2323 |
+
"sha256": "51991fd33719be32da422f559b7d27895fc96b1332b0952f0c19b573d58c1325"
|
| 2324 |
},
|
| 2325 |
"mirrors": {
|
| 2326 |
"hf_space": {
|
| 2327 |
"path": "hf_space:assets/charts/episode128_task_model_radar.svg",
|
| 2328 |
"exists": true,
|
| 2329 |
+
"bytes": 43679,
|
| 2330 |
+
"sha256": "51991fd33719be32da422f559b7d27895fc96b1332b0952f0c19b573d58c1325"
|
| 2331 |
},
|
| 2332 |
"hf_artifacts_docs": {
|
| 2333 |
"path": "hf_artifacts:docs/assets/charts/episode128_task_model_radar.svg",
|
| 2334 |
"exists": true,
|
| 2335 |
+
"bytes": 43679,
|
| 2336 |
+
"sha256": "51991fd33719be32da422f559b7d27895fc96b1332b0952f0c19b573d58c1325"
|
| 2337 |
},
|
| 2338 |
"hf_artifacts_card": {
|
| 2339 |
"path": "hf_artifacts:assets/charts/episode128_task_model_radar.svg",
|
| 2340 |
"exists": true,
|
| 2341 |
+
"bytes": 43679,
|
| 2342 |
+
"sha256": "51991fd33719be32da422f559b7d27895fc96b1332b0952f0c19b573d58c1325"
|
| 2343 |
},
|
| 2344 |
"hf_model": {
|
| 2345 |
"path": "hf_model:assets/charts/episode128_task_model_radar.svg",
|
| 2346 |
"exists": true,
|
| 2347 |
+
"bytes": 43679,
|
| 2348 |
+
"sha256": "51991fd33719be32da422f559b7d27895fc96b1332b0952f0c19b573d58c1325"
|
| 2349 |
}
|
| 2350 |
},
|
| 2351 |
"failures": []
|
|
|
|
| 2393 |
"local": {
|
| 2394 |
"path": "repo:docs/assets/charts/unified_task_model_radar.svg",
|
| 2395 |
"exists": true,
|
| 2396 |
+
"bytes": 49695,
|
| 2397 |
+
"sha256": "2ad63af1cd1e3e6f9c354b20792c20683e8877e2f92d9da5b5284dce56e668b7"
|
| 2398 |
},
|
| 2399 |
"mirrors": {
|
| 2400 |
"hf_space": {
|
| 2401 |
"path": "hf_space:assets/charts/unified_task_model_radar.svg",
|
| 2402 |
"exists": true,
|
| 2403 |
+
"bytes": 49695,
|
| 2404 |
+
"sha256": "2ad63af1cd1e3e6f9c354b20792c20683e8877e2f92d9da5b5284dce56e668b7"
|
| 2405 |
},
|
| 2406 |
"hf_artifacts_docs": {
|
| 2407 |
"path": "hf_artifacts:docs/assets/charts/unified_task_model_radar.svg",
|
| 2408 |
"exists": true,
|
| 2409 |
+
"bytes": 49695,
|
| 2410 |
+
"sha256": "2ad63af1cd1e3e6f9c354b20792c20683e8877e2f92d9da5b5284dce56e668b7"
|
| 2411 |
},
|
| 2412 |
"hf_artifacts_card": {
|
| 2413 |
"path": "hf_artifacts:assets/charts/unified_task_model_radar.svg",
|
| 2414 |
"exists": true,
|
| 2415 |
+
"bytes": 49695,
|
| 2416 |
+
"sha256": "2ad63af1cd1e3e6f9c354b20792c20683e8877e2f92d9da5b5284dce56e668b7"
|
| 2417 |
},
|
| 2418 |
"hf_model": {
|
| 2419 |
"path": "hf_model:assets/charts/unified_task_model_radar.svg",
|
| 2420 |
"exists": true,
|
| 2421 |
+
"bytes": 49695,
|
| 2422 |
+
"sha256": "2ad63af1cd1e3e6f9c354b20792c20683e8877e2f92d9da5b5284dce56e668b7"
|
| 2423 |
}
|
| 2424 |
},
|
| 2425 |
"failures": []
|
|
|
|
| 3650 |
},
|
| 3651 |
"failures": []
|
| 3652 |
},
|
| 3653 |
+
{
|
| 3654 |
+
"name": "scripts/omni/score_existing_model_output_task_probes.py",
|
| 3655 |
+
"status": "pass",
|
| 3656 |
+
"local": {
|
| 3657 |
+
"path": "repo:scripts/omni/score_existing_model_output_task_probes.py",
|
| 3658 |
+
"exists": true,
|
| 3659 |
+
"bytes": 13291,
|
| 3660 |
+
"sha256": "25b8e7ba394c1e604fe8d7c900a013b3ea330f32b393cdcd416b2d0f3a2727b8"
|
| 3661 |
+
},
|
| 3662 |
+
"mirrors": {
|
| 3663 |
+
"hf_artifacts": {
|
| 3664 |
+
"path": "hf_artifacts:scripts/omni/score_existing_model_output_task_probes.py",
|
| 3665 |
+
"exists": true,
|
| 3666 |
+
"bytes": 13291,
|
| 3667 |
+
"sha256": "25b8e7ba394c1e604fe8d7c900a013b3ea330f32b393cdcd416b2d0f3a2727b8"
|
| 3668 |
+
},
|
| 3669 |
+
"hf_model": {
|
| 3670 |
+
"path": "hf_model:scripts/omni/score_existing_model_output_task_probes.py",
|
| 3671 |
+
"exists": true,
|
| 3672 |
+
"bytes": 13291,
|
| 3673 |
+
"sha256": "25b8e7ba394c1e604fe8d7c900a013b3ea330f32b393cdcd416b2d0f3a2727b8"
|
| 3674 |
+
}
|
| 3675 |
+
},
|
| 3676 |
+
"failures": []
|
| 3677 |
+
},
|
| 3678 |
{
|
| 3679 |
"name": "scripts/omni/score_model_output_probes.py",
|
| 3680 |
"status": "pass",
|
|
|
|
| 3831 |
"local": {
|
| 3832 |
"path": "repo:scripts/build_artifact_index.py",
|
| 3833 |
"exists": true,
|
| 3834 |
+
"bytes": 54683,
|
| 3835 |
+
"sha256": "0a9e7c7391f0e2822d8490ddd216f155e1ce3a50d2ba3c90476ee09685f74d65"
|
| 3836 |
},
|
| 3837 |
"mirrors": {
|
| 3838 |
"hf_artifacts": {
|
| 3839 |
"path": "hf_artifacts:scripts/build_artifact_index.py",
|
| 3840 |
"exists": true,
|
| 3841 |
+
"bytes": 54683,
|
| 3842 |
+
"sha256": "0a9e7c7391f0e2822d8490ddd216f155e1ce3a50d2ba3c90476ee09685f74d65"
|
| 3843 |
},
|
| 3844 |
"hf_model": {
|
| 3845 |
"path": "hf_model:scripts/build_artifact_index.py",
|
| 3846 |
"exists": true,
|
| 3847 |
+
"bytes": 54683,
|
| 3848 |
+
"sha256": "0a9e7c7391f0e2822d8490ddd216f155e1ce3a50d2ba3c90476ee09685f74d65"
|
| 3849 |
}
|
| 3850 |
},
|
| 3851 |
"failures": []
|
|
|
|
| 4056 |
"local": {
|
| 4057 |
"path": "repo:scripts/build_task_method_20_gap_audit.py",
|
| 4058 |
"exists": true,
|
| 4059 |
+
"bytes": 10094,
|
| 4060 |
+
"sha256": "4ba5a43fa045b02303ba16a146ab15a891155370a85dd7c09cbe68f8f0eac7ed"
|
| 4061 |
},
|
| 4062 |
"mirrors": {
|
| 4063 |
"hf_artifacts": {
|
| 4064 |
"path": "hf_artifacts:scripts/build_task_method_20_gap_audit.py",
|
| 4065 |
"exists": true,
|
| 4066 |
+
"bytes": 10094,
|
| 4067 |
+
"sha256": "4ba5a43fa045b02303ba16a146ab15a891155370a85dd7c09cbe68f8f0eac7ed"
|
| 4068 |
},
|
| 4069 |
"hf_model": {
|
| 4070 |
"path": "hf_model:scripts/build_task_method_20_gap_audit.py",
|
| 4071 |
"exists": true,
|
| 4072 |
+
"bytes": 10094,
|
| 4073 |
+
"sha256": "4ba5a43fa045b02303ba16a146ab15a891155370a85dd7c09cbe68f8f0eac7ed"
|
| 4074 |
}
|
| 4075 |
},
|
| 4076 |
"failures": []
|
|
|
|
| 4131 |
"local": {
|
| 4132 |
"path": "repo:scripts/build_unified_task_model_radar.py",
|
| 4133 |
"exists": true,
|
| 4134 |
+
"bytes": 48861,
|
| 4135 |
+
"sha256": "621ff19f057e6ed279388c8e944f2e4a2cfc87b9be1bd9f1feee6d329f0ec431"
|
| 4136 |
},
|
| 4137 |
"mirrors": {
|
| 4138 |
"hf_artifacts": {
|
| 4139 |
"path": "hf_artifacts:scripts/build_unified_task_model_radar.py",
|
| 4140 |
"exists": true,
|
| 4141 |
+
"bytes": 48861,
|
| 4142 |
+
"sha256": "621ff19f057e6ed279388c8e944f2e4a2cfc87b9be1bd9f1feee6d329f0ec431"
|
| 4143 |
},
|
| 4144 |
"hf_model": {
|
| 4145 |
"path": "hf_model:scripts/build_unified_task_model_radar.py",
|
| 4146 |
"exists": true,
|
| 4147 |
+
"bytes": 48861,
|
| 4148 |
+
"sha256": "621ff19f057e6ed279388c8e944f2e4a2cfc87b9be1bd9f1feee6d329f0ec431"
|
| 4149 |
}
|
| 4150 |
},
|
| 4151 |
"failures": []
|
|
|
|
| 4206 |
"local": {
|
| 4207 |
"path": "repo:scripts/validate_mirror_parity.py",
|
| 4208 |
"exists": true,
|
| 4209 |
+
"bytes": 24812,
|
| 4210 |
+
"sha256": "c7d67a9e371f20c24951fbc357f64907365e60acab5ff189fa1675ff631bd27f"
|
| 4211 |
},
|
| 4212 |
"mirrors": {
|
| 4213 |
"hf_artifacts": {
|
| 4214 |
"path": "hf_artifacts:scripts/validate_mirror_parity.py",
|
| 4215 |
"exists": true,
|
| 4216 |
+
"bytes": 24812,
|
| 4217 |
+
"sha256": "c7d67a9e371f20c24951fbc357f64907365e60acab5ff189fa1675ff631bd27f"
|
| 4218 |
},
|
| 4219 |
"hf_model": {
|
| 4220 |
"path": "hf_model:scripts/validate_mirror_parity.py",
|
| 4221 |
"exists": true,
|
| 4222 |
+
"bytes": 24812,
|
| 4223 |
+
"sha256": "c7d67a9e371f20c24951fbc357f64907365e60acab5ff189fa1675ff631bd27f"
|
| 4224 |
}
|
| 4225 |
},
|
| 4226 |
"failures": []
|
|
|
|
| 4356 |
"local": {
|
| 4357 |
"path": "repo:scripts/sync_hf_publish_mirrors.py",
|
| 4358 |
"exists": true,
|
| 4359 |
+
"bytes": 17917,
|
| 4360 |
+
"sha256": "0d9f9fa5669af7c8e64fa32cccde9c99d54a7bfa0fe4dd9d315eccdb0b06f585"
|
| 4361 |
},
|
| 4362 |
"mirrors": {
|
| 4363 |
"hf_artifacts": {
|
| 4364 |
"path": "hf_artifacts:scripts/sync_hf_publish_mirrors.py",
|
| 4365 |
"exists": true,
|
| 4366 |
+
"bytes": 17917,
|
| 4367 |
+
"sha256": "0d9f9fa5669af7c8e64fa32cccde9c99d54a7bfa0fe4dd9d315eccdb0b06f585"
|
| 4368 |
},
|
| 4369 |
"hf_model": {
|
| 4370 |
"path": "hf_model:scripts/sync_hf_publish_mirrors.py",
|
| 4371 |
"exists": true,
|
| 4372 |
+
"bytes": 17917,
|
| 4373 |
+
"sha256": "0d9f9fa5669af7c8e64fa32cccde9c99d54a7bfa0fe4dd9d315eccdb0b06f585"
|
| 4374 |
}
|
| 4375 |
},
|
| 4376 |
"failures": []
|
|
|
|
| 4560 |
"local": {
|
| 4561 |
"path": "repo:docs/index.html",
|
| 4562 |
"exists": true,
|
| 4563 |
+
"bytes": 232578,
|
| 4564 |
+
"sha256": "e5427cec333219299cdac4069419918ccbb02db8f3c98e6b99cb69ff9895c0f3"
|
| 4565 |
},
|
| 4566 |
"mirrors": {
|
| 4567 |
"hf_space": {
|
| 4568 |
"path": "hf_space:index.html",
|
| 4569 |
"exists": true,
|
| 4570 |
+
"bytes": 232578,
|
| 4571 |
+
"sha256": "e5427cec333219299cdac4069419918ccbb02db8f3c98e6b99cb69ff9895c0f3"
|
| 4572 |
},
|
| 4573 |
"hf_artifacts_root": {
|
| 4574 |
"path": "hf_artifacts:index.html",
|
| 4575 |
"exists": true,
|
| 4576 |
+
"bytes": 232578,
|
| 4577 |
+
"sha256": "e5427cec333219299cdac4069419918ccbb02db8f3c98e6b99cb69ff9895c0f3"
|
| 4578 |
},
|
| 4579 |
"hf_artifacts_docs": {
|
| 4580 |
"path": "hf_artifacts:docs/index.html",
|
| 4581 |
"exists": true,
|
| 4582 |
+
"bytes": 232578,
|
| 4583 |
+
"sha256": "e5427cec333219299cdac4069419918ccbb02db8f3c98e6b99cb69ff9895c0f3"
|
| 4584 |
},
|
| 4585 |
"hf_model": {
|
| 4586 |
"path": "hf_model:index.html",
|
| 4587 |
"exists": true,
|
| 4588 |
+
"bytes": 232578,
|
| 4589 |
+
"sha256": "e5427cec333219299cdac4069419918ccbb02db8f3c98e6b99cb69ff9895c0f3"
|
| 4590 |
},
|
| 4591 |
"hf_model_docs": {
|
| 4592 |
"path": "hf_model:docs/index.html",
|
| 4593 |
"exists": true,
|
| 4594 |
+
"bytes": 232578,
|
| 4595 |
+
"sha256": "e5427cec333219299cdac4069419918ccbb02db8f3c98e6b99cb69ff9895c0f3"
|
| 4596 |
}
|
| 4597 |
},
|
| 4598 |
"failures": []
|
|
|
|
| 10616 |
},
|
| 10617 |
"failures": []
|
| 10618 |
},
|
| 10619 |
+
{
|
| 10620 |
+
"name": "results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md",
|
| 10621 |
+
"status": "pass",
|
| 10622 |
+
"local": {
|
| 10623 |
+
"path": "repo:results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md",
|
| 10624 |
+
"exists": true,
|
| 10625 |
+
"bytes": 981,
|
| 10626 |
+
"sha256": "73e9e0f52a47f56afd1d27ce9be33de9ffb89f52f35f34c1fc1c3ee7290c34b1"
|
| 10627 |
+
},
|
| 10628 |
+
"mirrors": {
|
| 10629 |
+
"hf_space": {
|
| 10630 |
+
"path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md",
|
| 10631 |
+
"exists": true,
|
| 10632 |
+
"bytes": 981,
|
| 10633 |
+
"sha256": "73e9e0f52a47f56afd1d27ce9be33de9ffb89f52f35f34c1fc1c3ee7290c34b1"
|
| 10634 |
+
},
|
| 10635 |
+
"hf_artifacts": {
|
| 10636 |
+
"path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md",
|
| 10637 |
+
"exists": true,
|
| 10638 |
+
"bytes": 981,
|
| 10639 |
+
"sha256": "73e9e0f52a47f56afd1d27ce9be33de9ffb89f52f35f34c1fc1c3ee7290c34b1"
|
| 10640 |
+
},
|
| 10641 |
+
"hf_model": {
|
| 10642 |
+
"path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md",
|
| 10643 |
+
"exists": true,
|
| 10644 |
+
"bytes": 981,
|
| 10645 |
+
"sha256": "73e9e0f52a47f56afd1d27ce9be33de9ffb89f52f35f34c1fc1c3ee7290c34b1"
|
| 10646 |
+
}
|
| 10647 |
+
},
|
| 10648 |
+
"failures": []
|
| 10649 |
+
},
|
| 10650 |
+
{
|
| 10651 |
+
"name": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
|
| 10652 |
+
"status": "pass",
|
| 10653 |
+
"local": {
|
| 10654 |
+
"path": "repo:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
|
| 10655 |
+
"exists": true,
|
| 10656 |
+
"bytes": 21876,
|
| 10657 |
+
"sha256": "3ca7045bf932afe1e3b32ad8183c90056538220fa814249b993e1d574695288b"
|
| 10658 |
+
},
|
| 10659 |
+
"mirrors": {
|
| 10660 |
+
"hf_space": {
|
| 10661 |
+
"path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
|
| 10662 |
+
"exists": true,
|
| 10663 |
+
"bytes": 21876,
|
| 10664 |
+
"sha256": "3ca7045bf932afe1e3b32ad8183c90056538220fa814249b993e1d574695288b"
|
| 10665 |
+
},
|
| 10666 |
+
"hf_artifacts": {
|
| 10667 |
+
"path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
|
| 10668 |
+
"exists": true,
|
| 10669 |
+
"bytes": 21876,
|
| 10670 |
+
"sha256": "3ca7045bf932afe1e3b32ad8183c90056538220fa814249b993e1d574695288b"
|
| 10671 |
+
},
|
| 10672 |
+
"hf_model": {
|
| 10673 |
+
"path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
|
| 10674 |
+
"exists": true,
|
| 10675 |
+
"bytes": 21876,
|
| 10676 |
+
"sha256": "3ca7045bf932afe1e3b32ad8183c90056538220fa814249b993e1d574695288b"
|
| 10677 |
+
}
|
| 10678 |
+
},
|
| 10679 |
+
"failures": []
|
| 10680 |
+
},
|
| 10681 |
+
{
|
| 10682 |
+
"name": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv",
|
| 10683 |
+
"status": "pass",
|
| 10684 |
+
"local": {
|
| 10685 |
+
"path": "repo:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv",
|
| 10686 |
+
"exists": true,
|
| 10687 |
+
"bytes": 22569,
|
| 10688 |
+
"sha256": "e5d85ed571b3647fab6dad7b46944a65981d57c51a87708cc5e86a20d71d1358"
|
| 10689 |
+
},
|
| 10690 |
+
"mirrors": {
|
| 10691 |
+
"hf_space": {
|
| 10692 |
+
"path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv",
|
| 10693 |
+
"exists": true,
|
| 10694 |
+
"bytes": 22569,
|
| 10695 |
+
"sha256": "e5d85ed571b3647fab6dad7b46944a65981d57c51a87708cc5e86a20d71d1358"
|
| 10696 |
+
},
|
| 10697 |
+
"hf_artifacts": {
|
| 10698 |
+
"path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv",
|
| 10699 |
+
"exists": true,
|
| 10700 |
+
"bytes": 22569,
|
| 10701 |
+
"sha256": "e5d85ed571b3647fab6dad7b46944a65981d57c51a87708cc5e86a20d71d1358"
|
| 10702 |
+
},
|
| 10703 |
+
"hf_model": {
|
| 10704 |
+
"path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv",
|
| 10705 |
+
"exists": true,
|
| 10706 |
+
"bytes": 22569,
|
| 10707 |
+
"sha256": "e5d85ed571b3647fab6dad7b46944a65981d57c51a87708cc5e86a20d71d1358"
|
| 10708 |
+
}
|
| 10709 |
+
},
|
| 10710 |
+
"failures": []
|
| 10711 |
+
},
|
| 10712 |
+
{
|
| 10713 |
+
"name": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv",
|
| 10714 |
+
"status": "pass",
|
| 10715 |
+
"local": {
|
| 10716 |
+
"path": "repo:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv",
|
| 10717 |
+
"exists": true,
|
| 10718 |
+
"bytes": 176633,
|
| 10719 |
+
"sha256": "c198bd15c3a5589d4de1da072cb70840d245993d43801e60ca4682a267e3dfeb"
|
| 10720 |
+
},
|
| 10721 |
+
"mirrors": {
|
| 10722 |
+
"hf_space": {
|
| 10723 |
+
"path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv",
|
| 10724 |
+
"exists": true,
|
| 10725 |
+
"bytes": 176633,
|
| 10726 |
+
"sha256": "c198bd15c3a5589d4de1da072cb70840d245993d43801e60ca4682a267e3dfeb"
|
| 10727 |
+
},
|
| 10728 |
+
"hf_artifacts": {
|
| 10729 |
+
"path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv",
|
| 10730 |
+
"exists": true,
|
| 10731 |
+
"bytes": 176633,
|
| 10732 |
+
"sha256": "c198bd15c3a5589d4de1da072cb70840d245993d43801e60ca4682a267e3dfeb"
|
| 10733 |
+
},
|
| 10734 |
+
"hf_model": {
|
| 10735 |
+
"path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv",
|
| 10736 |
+
"exists": true,
|
| 10737 |
+
"bytes": 176633,
|
| 10738 |
+
"sha256": "c198bd15c3a5589d4de1da072cb70840d245993d43801e60ca4682a267e3dfeb"
|
| 10739 |
+
}
|
| 10740 |
+
},
|
| 10741 |
+
"failures": []
|
| 10742 |
+
},
|
| 10743 |
+
{
|
| 10744 |
+
"name": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
|
| 10745 |
+
"status": "pass",
|
| 10746 |
+
"local": {
|
| 10747 |
+
"path": "repo:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
|
| 10748 |
+
"exists": true,
|
| 10749 |
+
"bytes": 169938,
|
| 10750 |
+
"sha256": "d57137f78b67dc683fe29a7d3150f309384e58baa29938e8177b918359a0990c"
|
| 10751 |
+
},
|
| 10752 |
+
"mirrors": {
|
| 10753 |
+
"hf_space": {
|
| 10754 |
+
"path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
|
| 10755 |
+
"exists": true,
|
| 10756 |
+
"bytes": 169938,
|
| 10757 |
+
"sha256": "d57137f78b67dc683fe29a7d3150f309384e58baa29938e8177b918359a0990c"
|
| 10758 |
+
},
|
| 10759 |
+
"hf_artifacts": {
|
| 10760 |
+
"path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
|
| 10761 |
+
"exists": true,
|
| 10762 |
+
"bytes": 169938,
|
| 10763 |
+
"sha256": "d57137f78b67dc683fe29a7d3150f309384e58baa29938e8177b918359a0990c"
|
| 10764 |
+
},
|
| 10765 |
+
"hf_model": {
|
| 10766 |
+
"path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
|
| 10767 |
+
"exists": true,
|
| 10768 |
+
"bytes": 169938,
|
| 10769 |
+
"sha256": "d57137f78b67dc683fe29a7d3150f309384e58baa29938e8177b918359a0990c"
|
| 10770 |
+
}
|
| 10771 |
+
},
|
| 10772 |
+
"failures": []
|
| 10773 |
+
},
|
| 10774 |
+
{
|
| 10775 |
+
"name": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/per_class_metrics.csv",
|
| 10776 |
+
"status": "pass",
|
| 10777 |
+
"local": {
|
| 10778 |
+
"path": "repo:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/per_class_metrics.csv",
|
| 10779 |
+
"exists": true,
|
| 10780 |
+
"bytes": 188619,
|
| 10781 |
+
"sha256": "1f4b312a07a8e0a67a1cd6a83e2c83d0c91804f2d8a51c4eeda207444ae262e9"
|
| 10782 |
+
},
|
| 10783 |
+
"mirrors": {
|
| 10784 |
+
"hf_space": {
|
| 10785 |
+
"path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/per_class_metrics.csv",
|
| 10786 |
+
"exists": true,
|
| 10787 |
+
"bytes": 188619,
|
| 10788 |
+
"sha256": "1f4b312a07a8e0a67a1cd6a83e2c83d0c91804f2d8a51c4eeda207444ae262e9"
|
| 10789 |
+
},
|
| 10790 |
+
"hf_artifacts": {
|
| 10791 |
+
"path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/per_class_metrics.csv",
|
| 10792 |
+
"exists": true,
|
| 10793 |
+
"bytes": 188619,
|
| 10794 |
+
"sha256": "1f4b312a07a8e0a67a1cd6a83e2c83d0c91804f2d8a51c4eeda207444ae262e9"
|
| 10795 |
+
},
|
| 10796 |
+
"hf_model": {
|
| 10797 |
+
"path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/per_class_metrics.csv",
|
| 10798 |
+
"exists": true,
|
| 10799 |
+
"bytes": 188619,
|
| 10800 |
+
"sha256": "1f4b312a07a8e0a67a1cd6a83e2c83d0c91804f2d8a51c4eeda207444ae262e9"
|
| 10801 |
+
}
|
| 10802 |
+
},
|
| 10803 |
+
"failures": []
|
| 10804 |
+
},
|
| 10805 |
+
{
|
| 10806 |
+
"name": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/predictions.csv",
|
| 10807 |
+
"status": "pass",
|
| 10808 |
+
"local": {
|
| 10809 |
+
"path": "repo:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/predictions.csv",
|
| 10810 |
+
"exists": true,
|
| 10811 |
+
"bytes": 1848188,
|
| 10812 |
+
"sha256": "521edd5ce67341b1a4dba1dcf0f6e097ab9477536ef9cce96ec3bd0908c35469"
|
| 10813 |
+
},
|
| 10814 |
+
"mirrors": {
|
| 10815 |
+
"hf_space": {
|
| 10816 |
+
"path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/predictions.csv",
|
| 10817 |
+
"exists": true,
|
| 10818 |
+
"bytes": 1848188,
|
| 10819 |
+
"sha256": "521edd5ce67341b1a4dba1dcf0f6e097ab9477536ef9cce96ec3bd0908c35469"
|
| 10820 |
+
},
|
| 10821 |
+
"hf_artifacts": {
|
| 10822 |
+
"path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/predictions.csv",
|
| 10823 |
+
"exists": true,
|
| 10824 |
+
"bytes": 1848188,
|
| 10825 |
+
"sha256": "521edd5ce67341b1a4dba1dcf0f6e097ab9477536ef9cce96ec3bd0908c35469"
|
| 10826 |
+
},
|
| 10827 |
+
"hf_model": {
|
| 10828 |
+
"path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/predictions.csv",
|
| 10829 |
+
"exists": true,
|
| 10830 |
+
"bytes": 1848188,
|
| 10831 |
+
"sha256": "521edd5ce67341b1a4dba1dcf0f6e097ab9477536ef9cce96ec3bd0908c35469"
|
| 10832 |
+
}
|
| 10833 |
+
},
|
| 10834 |
+
"failures": []
|
| 10835 |
+
},
|
| 10836 |
+
{
|
| 10837 |
+
"name": "results/omni_finetune/model_output_task_probes_20260616/summary.json",
|
| 10838 |
+
"status": "pass",
|
| 10839 |
+
"local": {
|
| 10840 |
+
"path": "repo:results/omni_finetune/model_output_task_probes_20260616/summary.json",
|
| 10841 |
+
"exists": true,
|
| 10842 |
+
"bytes": 2000,
|
| 10843 |
+
"sha256": "de95421d93fec5d12049e492db9fbe7c10af5c2cd0d1039da82c05ec82bf98c5"
|
| 10844 |
+
},
|
| 10845 |
+
"mirrors": {
|
| 10846 |
+
"hf_space": {
|
| 10847 |
+
"path": "hf_space:results/omni_finetune/model_output_task_probes_20260616/summary.json",
|
| 10848 |
+
"exists": true,
|
| 10849 |
+
"bytes": 2000,
|
| 10850 |
+
"sha256": "de95421d93fec5d12049e492db9fbe7c10af5c2cd0d1039da82c05ec82bf98c5"
|
| 10851 |
+
},
|
| 10852 |
+
"hf_artifacts": {
|
| 10853 |
+
"path": "hf_artifacts:results/omni_finetune/model_output_task_probes_20260616/summary.json",
|
| 10854 |
+
"exists": true,
|
| 10855 |
+
"bytes": 2000,
|
| 10856 |
+
"sha256": "de95421d93fec5d12049e492db9fbe7c10af5c2cd0d1039da82c05ec82bf98c5"
|
| 10857 |
+
},
|
| 10858 |
+
"hf_model": {
|
| 10859 |
+
"path": "hf_model:results/omni_finetune/model_output_task_probes_20260616/summary.json",
|
| 10860 |
+
"exists": true,
|
| 10861 |
+
"bytes": 2000,
|
| 10862 |
+
"sha256": "de95421d93fec5d12049e492db9fbe7c10af5c2cd0d1039da82c05ec82bf98c5"
|
| 10863 |
+
}
|
| 10864 |
+
},
|
| 10865 |
+
"failures": []
|
| 10866 |
+
},
|
| 10867 |
{
|
| 10868 |
"name": "results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
|
| 10869 |
"status": "pass",
|
|
|
|
| 18806 |
"local": {
|
| 18807 |
"path": "repo:TASK_METHOD_20_GAP_AUDIT.md",
|
| 18808 |
"exists": true,
|
| 18809 |
+
"bytes": 16234,
|
| 18810 |
+
"sha256": "351e55637374770221115486fd6ef46550ae923c4fae5c01d3bb1e777b5a44fb"
|
| 18811 |
},
|
| 18812 |
"mirrors": {
|
| 18813 |
"hf_space": {
|
| 18814 |
"path": "hf_space:TASK_METHOD_20_GAP_AUDIT.md",
|
| 18815 |
"exists": true,
|
| 18816 |
+
"bytes": 16234,
|
| 18817 |
+
"sha256": "351e55637374770221115486fd6ef46550ae923c4fae5c01d3bb1e777b5a44fb"
|
| 18818 |
},
|
| 18819 |
"hf_artifacts": {
|
| 18820 |
"path": "hf_artifacts:TASK_METHOD_20_GAP_AUDIT.md",
|
| 18821 |
"exists": true,
|
| 18822 |
+
"bytes": 16234,
|
| 18823 |
+
"sha256": "351e55637374770221115486fd6ef46550ae923c4fae5c01d3bb1e777b5a44fb"
|
| 18824 |
},
|
| 18825 |
"hf_model": {
|
| 18826 |
"path": "hf_model:TASK_METHOD_20_GAP_AUDIT.md",
|
| 18827 |
"exists": true,
|
| 18828 |
+
"bytes": 16234,
|
| 18829 |
+
"sha256": "351e55637374770221115486fd6ef46550ae923c4fae5c01d3bb1e777b5a44fb"
|
| 18830 |
}
|
| 18831 |
},
|
| 18832 |
"failures": []
|
|
|
|
| 18837 |
"local": {
|
| 18838 |
"path": "repo:TASK_METHOD_20_RESULT_MATRIX.md",
|
| 18839 |
"exists": true,
|
| 18840 |
+
"bytes": 4208,
|
| 18841 |
+
"sha256": "bfe01b258fe121448d2b5b188c6995041c81d7ce1286dcd2ee0bf9c01d6a6193"
|
| 18842 |
},
|
| 18843 |
"mirrors": {
|
| 18844 |
"hf_space": {
|
| 18845 |
"path": "hf_space:TASK_METHOD_20_RESULT_MATRIX.md",
|
| 18846 |
"exists": true,
|
| 18847 |
+
"bytes": 4208,
|
| 18848 |
+
"sha256": "bfe01b258fe121448d2b5b188c6995041c81d7ce1286dcd2ee0bf9c01d6a6193"
|
| 18849 |
},
|
| 18850 |
"hf_artifacts": {
|
| 18851 |
"path": "hf_artifacts:TASK_METHOD_20_RESULT_MATRIX.md",
|
| 18852 |
"exists": true,
|
| 18853 |
+
"bytes": 4208,
|
| 18854 |
+
"sha256": "bfe01b258fe121448d2b5b188c6995041c81d7ce1286dcd2ee0bf9c01d6a6193"
|
| 18855 |
},
|
| 18856 |
"hf_model": {
|
| 18857 |
"path": "hf_model:TASK_METHOD_20_RESULT_MATRIX.md",
|
| 18858 |
"exists": true,
|
| 18859 |
+
"bytes": 4208,
|
| 18860 |
+
"sha256": "bfe01b258fe121448d2b5b188c6995041c81d7ce1286dcd2ee0bf9c01d6a6193"
|
| 18861 |
}
|
| 18862 |
},
|
| 18863 |
"failures": []
|
docs/data/public_surface_qa.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
@@ -18,7 +18,7 @@
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
-
"generated_at_utc": "2026-06-
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
@@ -28,27 +28,27 @@
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
-
"generated_at_utc": "2026-06-
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
| 35 |
"status": "pass",
|
| 36 |
-
"generated_at_utc": "2026-06-
|
| 37 |
},
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
-
"generated_at_utc": "2026-06-
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
| 45 |
"status": "pass",
|
| 46 |
-
"generated_at_utc": "2026-06-
|
| 47 |
},
|
| 48 |
"mirror_parity": {
|
| 49 |
"exists": true,
|
| 50 |
"status": "pass",
|
| 51 |
-
"generated_at_utc": "2026-06-
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"failures": {}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-16T13:36:07+00:00",
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
+
"generated_at_utc": "2026-06-16T13:35:52+00:00"
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
+
"generated_at_utc": "2026-06-16T13:35:48+00:00"
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
| 35 |
"status": "pass",
|
| 36 |
+
"generated_at_utc": "2026-06-16T13:35:47+00:00"
|
| 37 |
},
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
+
"generated_at_utc": "2026-06-16T13:35:51+00:00"
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
| 45 |
"status": "pass",
|
| 46 |
+
"generated_at_utc": "2026-06-16T13:36:07+00:00"
|
| 47 |
},
|
| 48 |
"mirror_parity": {
|
| 49 |
"exists": true,
|
| 50 |
"status": "pass",
|
| 51 |
+
"generated_at_utc": "2026-06-16T13:35:58+00:00"
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"failures": {}
|
docs/data/publication_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
@@ -200,8 +200,8 @@
|
|
| 200 |
"github_repo": {
|
| 201 |
"root": "repo",
|
| 202 |
"exists": true,
|
| 203 |
-
"file_count":
|
| 204 |
-
"text_file_count":
|
| 205 |
"largest_file": {
|
| 206 |
"path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
|
| 207 |
"bytes": 55702978
|
|
@@ -211,8 +211,8 @@
|
|
| 211 |
"hf_space_bundle": {
|
| 212 |
"root": "hf_publish/space",
|
| 213 |
"exists": true,
|
| 214 |
-
"file_count":
|
| 215 |
-
"text_file_count":
|
| 216 |
"largest_file": {
|
| 217 |
"path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
|
| 218 |
"bytes": 55702978
|
|
@@ -222,8 +222,8 @@
|
|
| 222 |
"hf_artifact_bundle": {
|
| 223 |
"root": "hf_publish/artifacts",
|
| 224 |
"exists": true,
|
| 225 |
-
"file_count":
|
| 226 |
-
"text_file_count":
|
| 227 |
"largest_file": {
|
| 228 |
"path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
|
| 229 |
"bytes": 55702978
|
|
@@ -233,8 +233,8 @@
|
|
| 233 |
"hf_model_bundle": {
|
| 234 |
"root": "hf_publish/model",
|
| 235 |
"exists": true,
|
| 236 |
-
"file_count":
|
| 237 |
-
"text_file_count":
|
| 238 |
"largest_file": {
|
| 239 |
"path": "pytorch_model.bin",
|
| 240 |
"bytes": 93495480
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-16T13:36:07+00:00",
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
|
|
| 200 |
"github_repo": {
|
| 201 |
"root": "repo",
|
| 202 |
"exists": true,
|
| 203 |
+
"file_count": 1205,
|
| 204 |
+
"text_file_count": 1010,
|
| 205 |
"largest_file": {
|
| 206 |
"path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
|
| 207 |
"bytes": 55702978
|
|
|
|
| 211 |
"hf_space_bundle": {
|
| 212 |
"root": "hf_publish/space",
|
| 213 |
"exists": true,
|
| 214 |
+
"file_count": 984,
|
| 215 |
+
"text_file_count": 828,
|
| 216 |
"largest_file": {
|
| 217 |
"path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
|
| 218 |
"bytes": 55702978
|
|
|
|
| 222 |
"hf_artifact_bundle": {
|
| 223 |
"root": "hf_publish/artifacts",
|
| 224 |
"exists": true,
|
| 225 |
+
"file_count": 2331,
|
| 226 |
+
"text_file_count": 1017,
|
| 227 |
"largest_file": {
|
| 228 |
"path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
|
| 229 |
"bytes": 55702978
|
|
|
|
| 233 |
"hf_model_bundle": {
|
| 234 |
"root": "hf_publish/model",
|
| 235 |
"exists": true,
|
| 236 |
+
"file_count": 2764,
|
| 237 |
+
"text_file_count": 1177,
|
| 238 |
"largest_file": {
|
| 239 |
"path": "pytorch_model.bin",
|
| 240 |
"bytes": 93495480
|
docs/data/quality_gates.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Release Checks",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
|
| 6 |
"automated_gates": [
|
| 7 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Release Checks",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-16T13:36:07+00:00",
|
| 5 |
"rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
|
| 6 |
"automated_gates": [
|
| 7 |
{
|
docs/data/scope_claims_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-16T13:35:51+00:00",
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
docs/data/single_episode_task_model_radar.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Single-Episode 20-Task Radar",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"description": "Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.",
|
| 6 |
"task_count": 20,
|
| 7 |
"method_count": 2,
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Single-Episode 20-Task Radar",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-16T13:35:38+00:00",
|
| 5 |
"description": "Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.",
|
| 6 |
"task_count": 20,
|
| 7 |
"method_count": 2,
|
docs/data/source_alignment_audit.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-16T13:35:47+00:00",
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
docs/data/task_method_20_result_matrix.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
{
|
| 2 |
"title": "Task Method 20-Result Matrix",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"task_count": 20,
|
| 6 |
"method_count": 9,
|
| 7 |
"method_task_record_count": 180,
|
| 8 |
-
"scored_method_task_count":
|
| 9 |
"series": [
|
| 10 |
{
|
| 11 |
"id": "minimal",
|
|
@@ -158,20 +158,20 @@
|
|
| 158 |
"kind": "partial_128_episode_foundation_model_overlay",
|
| 159 |
"scope": "128 selected episodes, held-out test",
|
| 160 |
"stroke_dasharray": "7 7",
|
| 161 |
-
"method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics
|
| 162 |
"plotted_as": "colored point overlay",
|
| 163 |
"result_record_count": 20,
|
| 164 |
-
"scored_task_count":
|
| 165 |
-
"covered_task_count":
|
| 166 |
"proxy_scored_task_count": 0,
|
| 167 |
-
"scoreless_task_count":
|
| 168 |
"unsupported_task_count": 0,
|
| 169 |
-
"not_evaluated_task_count":
|
| 170 |
"status_counts": {
|
| 171 |
-
"not_evaluated_in_verified_package":
|
| 172 |
-
"scored":
|
| 173 |
},
|
| 174 |
-
"coverage_fraction": 0.
|
| 175 |
"result_record_fraction": 1.0
|
| 176 |
},
|
| 177 |
{
|
|
@@ -182,20 +182,20 @@
|
|
| 182 |
"kind": "partial_128_episode_foundation_model_overlay",
|
| 183 |
"scope": "128 selected episodes, held-out test",
|
| 184 |
"stroke_dasharray": "4 7",
|
| 185 |
-
"method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation.",
|
| 186 |
"plotted_as": "colored point overlay",
|
| 187 |
"result_record_count": 20,
|
| 188 |
-
"scored_task_count":
|
| 189 |
-
"covered_task_count":
|
| 190 |
"proxy_scored_task_count": 0,
|
| 191 |
-
"scoreless_task_count":
|
| 192 |
"unsupported_task_count": 0,
|
| 193 |
-
"not_evaluated_task_count":
|
| 194 |
"status_counts": {
|
| 195 |
-
"not_evaluated_in_verified_package":
|
| 196 |
-
"scored":
|
| 197 |
},
|
| 198 |
-
"coverage_fraction": 0.
|
| 199 |
"result_record_fraction": 1.0
|
| 200 |
},
|
| 201 |
{
|
|
@@ -2768,17 +2768,17 @@
|
|
| 2768 |
"task_label": "Action-Object Relation Prediction",
|
| 2769 |
"series_id": "qwen3_omni_v6_lora",
|
| 2770 |
"method": "Qwen3-Omni v6 LoRA",
|
| 2771 |
-
"status": "
|
| 2772 |
-
"status_label": "
|
| 2773 |
-
"scored":
|
| 2774 |
"proxy_scored": false,
|
| 2775 |
-
"raw":
|
| 2776 |
-
"raw_text": "
|
| 2777 |
-
"normalized_score":
|
| 2778 |
-
"metric_key": "
|
| 2779 |
-
"source":
|
| 2780 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 2781 |
-
"reason":
|
| 2782 |
},
|
| 2783 |
{
|
| 2784 |
"task_number": 16,
|
|
@@ -2786,17 +2786,17 @@
|
|
| 2786 |
"task_label": "Action-Object Relation Prediction",
|
| 2787 |
"series_id": "cosmos3_super_reasoner",
|
| 2788 |
"method": "Cosmos3-Super Reasoner",
|
| 2789 |
-
"status": "
|
| 2790 |
-
"status_label": "
|
| 2791 |
-
"scored":
|
| 2792 |
"proxy_scored": false,
|
| 2793 |
-
"raw":
|
| 2794 |
-
"raw_text": "
|
| 2795 |
-
"normalized_score":
|
| 2796 |
-
"metric_key": "
|
| 2797 |
-
"source":
|
| 2798 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 2799 |
-
"reason":
|
| 2800 |
},
|
| 2801 |
{
|
| 2802 |
"task_number": 16,
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Task Method 20-Result Matrix",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-16T13:35:38+00:00",
|
| 5 |
"task_count": 20,
|
| 6 |
"method_count": 9,
|
| 7 |
"method_task_record_count": 180,
|
| 8 |
+
"scored_method_task_count": 113,
|
| 9 |
"series": [
|
| 10 |
{
|
| 11 |
"id": "minimal",
|
|
|
|
| 158 |
"kind": "partial_128_episode_foundation_model_overlay",
|
| 159 |
"scope": "128 selected episodes, held-out test",
|
| 160 |
"stroke_dasharray": "7 7",
|
| 161 |
+
"method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics, plus task 16 scored from existing verified action/object JSON.",
|
| 162 |
"plotted_as": "colored point overlay",
|
| 163 |
"result_record_count": 20,
|
| 164 |
+
"scored_task_count": 7,
|
| 165 |
+
"covered_task_count": 7,
|
| 166 |
"proxy_scored_task_count": 0,
|
| 167 |
+
"scoreless_task_count": 13,
|
| 168 |
"unsupported_task_count": 0,
|
| 169 |
+
"not_evaluated_task_count": 13,
|
| 170 |
"status_counts": {
|
| 171 |
+
"not_evaluated_in_verified_package": 13,
|
| 172 |
+
"scored": 7
|
| 173 |
},
|
| 174 |
+
"coverage_fraction": 0.35,
|
| 175 |
"result_record_fraction": 1.0
|
| 176 |
},
|
| 177 |
{
|
|
|
|
| 182 |
"kind": "partial_128_episode_foundation_model_overlay",
|
| 183 |
"scope": "128 selected episodes, held-out test",
|
| 184 |
"stroke_dasharray": "4 7",
|
| 185 |
+
"method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 16 scored from existing verified action/object JSON.",
|
| 186 |
"plotted_as": "colored point overlay",
|
| 187 |
"result_record_count": 20,
|
| 188 |
+
"scored_task_count": 7,
|
| 189 |
+
"covered_task_count": 7,
|
| 190 |
"proxy_scored_task_count": 0,
|
| 191 |
+
"scoreless_task_count": 13,
|
| 192 |
"unsupported_task_count": 0,
|
| 193 |
+
"not_evaluated_task_count": 13,
|
| 194 |
"status_counts": {
|
| 195 |
+
"not_evaluated_in_verified_package": 13,
|
| 196 |
+
"scored": 7
|
| 197 |
},
|
| 198 |
+
"coverage_fraction": 0.35,
|
| 199 |
"result_record_fraction": 1.0
|
| 200 |
},
|
| 201 |
{
|
|
|
|
| 2768 |
"task_label": "Action-Object Relation Prediction",
|
| 2769 |
"series_id": "qwen3_omni_v6_lora",
|
| 2770 |
"method": "Qwen3-Omni v6 LoRA",
|
| 2771 |
+
"status": "scored",
|
| 2772 |
+
"status_label": "scored",
|
| 2773 |
+
"scored": true,
|
| 2774 |
"proxy_scored": false,
|
| 2775 |
+
"raw": 0.0002220083079671497,
|
| 2776 |
+
"raw_text": "0.0002",
|
| 2777 |
+
"normalized_score": 0.0002220083079671497,
|
| 2778 |
+
"metric_key": "action_object_relation_macro_f1",
|
| 2779 |
+
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
|
| 2780 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 2781 |
+
"reason": null
|
| 2782 |
},
|
| 2783 |
{
|
| 2784 |
"task_number": 16,
|
|
|
|
| 2786 |
"task_label": "Action-Object Relation Prediction",
|
| 2787 |
"series_id": "cosmos3_super_reasoner",
|
| 2788 |
"method": "Cosmos3-Super Reasoner",
|
| 2789 |
+
"status": "scored",
|
| 2790 |
+
"status_label": "scored",
|
| 2791 |
+
"scored": true,
|
| 2792 |
"proxy_scored": false,
|
| 2793 |
+
"raw": 0.0,
|
| 2794 |
+
"raw_text": "0.0000",
|
| 2795 |
+
"normalized_score": 0.0,
|
| 2796 |
+
"metric_key": "action_object_relation_macro_f1",
|
| 2797 |
+
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
|
| 2798 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 2799 |
+
"reason": null
|
| 2800 |
},
|
| 2801 |
{
|
| 2802 |
"task_number": 16,
|
docs/data/task_surface_integrity.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"summary": {
|
| 5 |
"task_count": 12,
|
| 6 |
"expected_task_count": 12,
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-16T13:35:48+00:00",
|
| 4 |
"summary": {
|
| 5 |
"task_count": 12,
|
| 6 |
"expected_task_count": 12,
|
docs/data/unified_task_model_radar.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
{
|
| 2 |
"title": "Unified 20-Task Model Radar",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"task_count": 20,
|
| 6 |
"method_count": 9,
|
| 7 |
"method_task_record_count": 180,
|
| 8 |
-
"scored_method_task_count":
|
| 9 |
"normalization_policy": {
|
| 10 |
"higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
|
| 11 |
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
|
|
@@ -167,20 +167,20 @@
|
|
| 167 |
"kind": "partial_128_episode_foundation_model_overlay",
|
| 168 |
"scope": "128 selected episodes, held-out test",
|
| 169 |
"stroke_dasharray": "7 7",
|
| 170 |
-
"method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics
|
| 171 |
"plotted_as": "colored point overlay",
|
| 172 |
"result_record_count": 20,
|
| 173 |
-
"scored_task_count":
|
| 174 |
-
"covered_task_count":
|
| 175 |
"proxy_scored_task_count": 0,
|
| 176 |
-
"scoreless_task_count":
|
| 177 |
"unsupported_task_count": 0,
|
| 178 |
-
"not_evaluated_task_count":
|
| 179 |
"status_counts": {
|
| 180 |
-
"not_evaluated_in_verified_package":
|
| 181 |
-
"scored":
|
| 182 |
},
|
| 183 |
-
"coverage_fraction": 0.
|
| 184 |
"result_record_fraction": 1.0
|
| 185 |
},
|
| 186 |
{
|
|
@@ -191,20 +191,20 @@
|
|
| 191 |
"kind": "partial_128_episode_foundation_model_overlay",
|
| 192 |
"scope": "128 selected episodes, held-out test",
|
| 193 |
"stroke_dasharray": "4 7",
|
| 194 |
-
"method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation.",
|
| 195 |
"plotted_as": "colored point overlay",
|
| 196 |
"result_record_count": 20,
|
| 197 |
-
"scored_task_count":
|
| 198 |
-
"covered_task_count":
|
| 199 |
"proxy_scored_task_count": 0,
|
| 200 |
-
"scoreless_task_count":
|
| 201 |
"unsupported_task_count": 0,
|
| 202 |
-
"not_evaluated_task_count":
|
| 203 |
"status_counts": {
|
| 204 |
-
"not_evaluated_in_verified_package":
|
| 205 |
-
"scored":
|
| 206 |
},
|
| 207 |
-
"coverage_fraction": 0.
|
| 208 |
"result_record_fraction": 1.0
|
| 209 |
},
|
| 210 |
{
|
|
@@ -1930,6 +1930,28 @@
|
|
| 1930 |
"raw_text": "0.0000",
|
| 1931 |
"status_label": "scored"
|
| 1932 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1933 |
"raw128_simple": {
|
| 1934 |
"raw": 0.0,
|
| 1935 |
"metric_key": "macro_f1",
|
|
@@ -1974,28 +1996,6 @@
|
|
| 1974 |
"raw_text": "n/a",
|
| 1975 |
"status_label": "not supported"
|
| 1976 |
},
|
| 1977 |
-
"qwen3_omni_v6_lora": {
|
| 1978 |
-
"raw": null,
|
| 1979 |
-
"metric_key": "macro_f1",
|
| 1980 |
-
"source": null,
|
| 1981 |
-
"scope": "multi_episode_128_partial_model_overlay",
|
| 1982 |
-
"status": "not_evaluated_in_verified_package",
|
| 1983 |
-
"reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
|
| 1984 |
-
"normalized_score": null,
|
| 1985 |
-
"raw_text": "n/a",
|
| 1986 |
-
"status_label": "not evaluated"
|
| 1987 |
-
},
|
| 1988 |
-
"cosmos3_super_reasoner": {
|
| 1989 |
-
"raw": null,
|
| 1990 |
-
"metric_key": "macro_f1",
|
| 1991 |
-
"source": null,
|
| 1992 |
-
"scope": "multi_episode_128_partial_model_overlay",
|
| 1993 |
-
"status": "not_evaluated_in_verified_package",
|
| 1994 |
-
"reason": "the verified public model package did not ask this branch to emit that task target; a new task-specific evaluation package is required for a numeric score",
|
| 1995 |
-
"normalized_score": null,
|
| 1996 |
-
"raw_text": "n/a",
|
| 1997 |
-
"status_label": "not evaluated"
|
| 1998 |
-
},
|
| 1999 |
"cosmos3_nano_future_window": {
|
| 2000 |
"raw": null,
|
| 2001 |
"metric_key": "macro_f1",
|
|
@@ -2492,7 +2492,7 @@
|
|
| 2492 |
"title": "Qwen3-Omni v6 LoRA",
|
| 2493 |
"status": "verified",
|
| 2494 |
"task_aligned_axes": "Qwen3",
|
| 2495 |
-
"coverage": "20 records /
|
| 2496 |
"headline": "JSON validity 0.9990; action macro-F1 0.0029",
|
| 2497 |
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json"
|
| 2498 |
},
|
|
@@ -2500,7 +2500,7 @@
|
|
| 2500 |
"id": "cosmos3_super_reasoner",
|
| 2501 |
"title": "Cosmos3-Super Reasoner",
|
| 2502 |
"status": "verified_base_weight_eval",
|
| 2503 |
-
"coverage": "20 records /
|
| 2504 |
"headline": "JSON validity 0.5112; action macro-F1 0.0008",
|
| 2505 |
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json"
|
| 2506 |
},
|
|
@@ -5066,17 +5066,17 @@
|
|
| 5066 |
"task_label": "Action-Object Relation Prediction",
|
| 5067 |
"series_id": "qwen3_omni_v6_lora",
|
| 5068 |
"method": "Qwen3-Omni v6 LoRA",
|
| 5069 |
-
"status": "
|
| 5070 |
-
"status_label": "
|
| 5071 |
-
"scored":
|
| 5072 |
"proxy_scored": false,
|
| 5073 |
-
"raw":
|
| 5074 |
-
"raw_text": "
|
| 5075 |
-
"normalized_score":
|
| 5076 |
-
"metric_key": "
|
| 5077 |
-
"source":
|
| 5078 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 5079 |
-
"reason":
|
| 5080 |
},
|
| 5081 |
{
|
| 5082 |
"task_number": 16,
|
|
@@ -5084,17 +5084,17 @@
|
|
| 5084 |
"task_label": "Action-Object Relation Prediction",
|
| 5085 |
"series_id": "cosmos3_super_reasoner",
|
| 5086 |
"method": "Cosmos3-Super Reasoner",
|
| 5087 |
-
"status": "
|
| 5088 |
-
"status_label": "
|
| 5089 |
-
"scored":
|
| 5090 |
"proxy_scored": false,
|
| 5091 |
-
"raw":
|
| 5092 |
-
"raw_text": "
|
| 5093 |
-
"normalized_score":
|
| 5094 |
-
"metric_key": "
|
| 5095 |
-
"source":
|
| 5096 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 5097 |
-
"reason":
|
| 5098 |
},
|
| 5099 |
{
|
| 5100 |
"task_number": 16,
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Unified 20-Task Model Radar",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-16T13:35:38+00:00",
|
| 5 |
"task_count": 20,
|
| 6 |
"method_count": 9,
|
| 7 |
"method_task_record_count": 180,
|
| 8 |
+
"scored_method_task_count": 113,
|
| 9 |
"normalization_policy": {
|
| 10 |
"higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]",
|
| 11 |
"lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task",
|
|
|
|
| 167 |
"kind": "partial_128_episode_foundation_model_overlay",
|
| 168 |
"scope": "128 selected episodes, held-out test",
|
| 169 |
"stroke_dasharray": "7 7",
|
| 170 |
+
"method_detail": "Verified held-out Qwen3-Omni v6 LoRA metrics, plus task 16 scored from existing verified action/object JSON.",
|
| 171 |
"plotted_as": "colored point overlay",
|
| 172 |
"result_record_count": 20,
|
| 173 |
+
"scored_task_count": 7,
|
| 174 |
+
"covered_task_count": 7,
|
| 175 |
"proxy_scored_task_count": 0,
|
| 176 |
+
"scoreless_task_count": 13,
|
| 177 |
"unsupported_task_count": 0,
|
| 178 |
+
"not_evaluated_task_count": 13,
|
| 179 |
"status_counts": {
|
| 180 |
+
"not_evaluated_in_verified_package": 13,
|
| 181 |
+
"scored": 7
|
| 182 |
},
|
| 183 |
+
"coverage_fraction": 0.35,
|
| 184 |
"result_record_fraction": 1.0
|
| 185 |
},
|
| 186 |
{
|
|
|
|
| 191 |
"kind": "partial_128_episode_foundation_model_overlay",
|
| 192 |
"scope": "128 selected episodes, held-out test",
|
| 193 |
"stroke_dasharray": "4 7",
|
| 194 |
+
"method_detail": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 16 scored from existing verified action/object JSON.",
|
| 195 |
"plotted_as": "colored point overlay",
|
| 196 |
"result_record_count": 20,
|
| 197 |
+
"scored_task_count": 7,
|
| 198 |
+
"covered_task_count": 7,
|
| 199 |
"proxy_scored_task_count": 0,
|
| 200 |
+
"scoreless_task_count": 13,
|
| 201 |
"unsupported_task_count": 0,
|
| 202 |
+
"not_evaluated_task_count": 13,
|
| 203 |
"status_counts": {
|
| 204 |
+
"not_evaluated_in_verified_package": 13,
|
| 205 |
+
"scored": 7
|
| 206 |
},
|
| 207 |
+
"coverage_fraction": 0.35,
|
| 208 |
"result_record_fraction": 1.0
|
| 209 |
},
|
| 210 |
{
|
|
|
|
| 1930 |
"raw_text": "0.0000",
|
| 1931 |
"status_label": "scored"
|
| 1932 |
},
|
| 1933 |
+
"qwen3_omni_v6_lora": {
|
| 1934 |
+
"raw": 0.0002220083079671497,
|
| 1935 |
+
"metric_key": "action_object_relation_macro_f1",
|
| 1936 |
+
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
|
| 1937 |
+
"scope": "multi_episode_128_partial_model_overlay",
|
| 1938 |
+
"status": "scored",
|
| 1939 |
+
"reason": null,
|
| 1940 |
+
"normalized_score": 0.0002220083079671497,
|
| 1941 |
+
"raw_text": "0.0002",
|
| 1942 |
+
"status_label": "scored"
|
| 1943 |
+
},
|
| 1944 |
+
"cosmos3_super_reasoner": {
|
| 1945 |
+
"raw": 0.0,
|
| 1946 |
+
"metric_key": "action_object_relation_macro_f1",
|
| 1947 |
+
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
|
| 1948 |
+
"scope": "multi_episode_128_partial_model_overlay",
|
| 1949 |
+
"status": "scored",
|
| 1950 |
+
"reason": null,
|
| 1951 |
+
"normalized_score": 0.0,
|
| 1952 |
+
"raw_text": "0.0000",
|
| 1953 |
+
"status_label": "scored"
|
| 1954 |
+
},
|
| 1955 |
"raw128_simple": {
|
| 1956 |
"raw": 0.0,
|
| 1957 |
"metric_key": "macro_f1",
|
|
|
|
| 1996 |
"raw_text": "n/a",
|
| 1997 |
"status_label": "not supported"
|
| 1998 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1999 |
"cosmos3_nano_future_window": {
|
| 2000 |
"raw": null,
|
| 2001 |
"metric_key": "macro_f1",
|
|
|
|
| 2492 |
"title": "Qwen3-Omni v6 LoRA",
|
| 2493 |
"status": "verified",
|
| 2494 |
"task_aligned_axes": "Qwen3",
|
| 2495 |
+
"coverage": "20 records / 7 scored task-aligned axes",
|
| 2496 |
"headline": "JSON validity 0.9990; action macro-F1 0.0029",
|
| 2497 |
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json"
|
| 2498 |
},
|
|
|
|
| 2500 |
"id": "cosmos3_super_reasoner",
|
| 2501 |
"title": "Cosmos3-Super Reasoner",
|
| 2502 |
"status": "verified_base_weight_eval",
|
| 2503 |
+
"coverage": "20 records / 7 scored task-aligned axes",
|
| 2504 |
"headline": "JSON validity 0.5112; action macro-F1 0.0008",
|
| 2505 |
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/metrics.json"
|
| 2506 |
},
|
|
|
|
| 5066 |
"task_label": "Action-Object Relation Prediction",
|
| 5067 |
"series_id": "qwen3_omni_v6_lora",
|
| 5068 |
"method": "Qwen3-Omni v6 LoRA",
|
| 5069 |
+
"status": "scored",
|
| 5070 |
+
"status_label": "scored",
|
| 5071 |
+
"scored": true,
|
| 5072 |
"proxy_scored": false,
|
| 5073 |
+
"raw": 0.0002220083079671497,
|
| 5074 |
+
"raw_text": "0.0002",
|
| 5075 |
+
"normalized_score": 0.0002220083079671497,
|
| 5076 |
+
"metric_key": "action_object_relation_macro_f1",
|
| 5077 |
+
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
|
| 5078 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 5079 |
+
"reason": null
|
| 5080 |
},
|
| 5081 |
{
|
| 5082 |
"task_number": 16,
|
|
|
|
| 5084 |
"task_label": "Action-Object Relation Prediction",
|
| 5085 |
"series_id": "cosmos3_super_reasoner",
|
| 5086 |
"method": "Cosmos3-Super Reasoner",
|
| 5087 |
+
"status": "scored",
|
| 5088 |
+
"status_label": "scored",
|
| 5089 |
+
"scored": true,
|
| 5090 |
"proxy_scored": false,
|
| 5091 |
+
"raw": 0.0,
|
| 5092 |
+
"raw_text": "0.0000",
|
| 5093 |
+
"normalized_score": 0.0,
|
| 5094 |
+
"metric_key": "action_object_relation_macro_f1",
|
| 5095 |
+
"source": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
|
| 5096 |
"scope": "multi_episode_128_partial_model_overlay",
|
| 5097 |
+
"reason": null
|
| 5098 |
},
|
| 5099 |
{
|
| 5100 |
"task_number": 16,
|
results/omni_finetune/model_output_task_probes_20260616/RUN_REPORT.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Existing Model-Output Task Probes
|
| 2 |
+
|
| 3 |
+
Generated: `2026-06-16T13:35:37+00:00`
|
| 4 |
+
|
| 5 |
+
This package scores only task targets already present in verified held-out
|
| 6 |
+
prediction JSON. It does not run new inference and does not infer targets that
|
| 7 |
+
are absent from a model branch.
|
| 8 |
+
|
| 9 |
+
| Method | ID | Status | Scored rows | Task 16 macro-F1 | Evidence |
|
| 10 |
+
| --- | --- | --- | ---: | ---: | --- |
|
| 11 |
+
| Qwen3-Omni v6 LoRA | qwen3_omni_v6_lora | scored | 4014 | 0.000222 | results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/predictions.jsonl |
|
| 12 |
+
| Cosmos3-Super Reasoner | cosmos3_super_reasoner | scored | 446 | 0.000000 | results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/predictions.jsonl |
|
| 13 |
+
| Cosmos3-Nano Future Window | cosmos3_nano_future_window | unsupported_without_required_fields | n/a | n/a | verified future-window predictions do not contain object-set fields |
|
results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"accuracy": 0.0,
|
| 3 |
+
"action_object_relation_accuracy": 0.0,
|
| 4 |
+
"action_object_relation_macro_f1": 0.0,
|
| 5 |
+
"artifact_files": {
|
| 6 |
+
"metrics_json": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
|
| 7 |
+
"per_class_metrics_csv": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv",
|
| 8 |
+
"predictions_csv": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv"
|
| 9 |
+
},
|
| 10 |
+
"excluded_rows_without_true_relation": 2,
|
| 11 |
+
"generated_at_utc": "2026-06-16T13:35:37+00:00",
|
| 12 |
+
"labels": [
|
| 13 |
+
"Adjust canned food on shelf :: canned food | cardboard box | store shelf",
|
| 14 |
+
"Adjust item on shelf :: shelf | stationery package",
|
| 15 |
+
"Adjust lantern shape :: red pleated paper lantern",
|
| 16 |
+
"Adjust lantern string :: red paper lantern | string",
|
| 17 |
+
"Adjust paper :: cardboard square | paper | pen | star beads",
|
| 18 |
+
"Adjust pot position :: pot",
|
| 19 |
+
"Adjust puzzle piece :: jigsaw puzzle | puzzle box | puzzle pieces",
|
| 20 |
+
"Align canned food on shelf :: canned food | cardboard box | retail shelf",
|
| 21 |
+
"Align edges of paper lantern :: hands | red paper lantern",
|
| 22 |
+
"Align paper lantern edges :: hands | red paper lantern",
|
| 23 |
+
"Apply adhesive tape to lantern :: adhesive tape | red paper lantern",
|
| 24 |
+
"Approach boxes :: cardboard boxes | colleague | shelving unit",
|
| 25 |
+
"Approaching and pressing the door switch :: glass door | switch | wall",
|
| 26 |
+
"Approaching the table :: beads | chairs | table",
|
| 27 |
+
"Arrange buttons :: red buttons | smartphone | table",
|
| 28 |
+
"Arrange buttons in a line :: buttons | phone | table",
|
| 29 |
+
"Arrange star beads :: paper | pen | power bank | smartphone | star beads",
|
| 30 |
+
"Arrange star beads for counting :: paper | pen | star beads",
|
| 31 |
+
"Attempt to fit puzzle piece :: puzzle board | puzzle piece",
|
| 32 |
+
"Attempt to fit puzzle piece :: puzzle piece",
|
| 33 |
+
"Bend and manipulate plastic strip :: plastic strip | stationary box | table | water bottle",
|
| 34 |
+
"Browse smartphone screen :: paper strips | smartphone | star ornaments",
|
| 35 |
+
"Bundle display hooks :: container | display hooks",
|
| 36 |
+
"Closing the door :: door | dustpan",
|
| 37 |
+
"Counting and organizing beads :: cardboard squares | paper | pen | star-shaped beads",
|
| 38 |
+
"Counting star beads :: paper | pen | star beads",
|
| 39 |
+
"Cut along the marked line :: cardboard | hand | utility knife",
|
| 40 |
+
"Cut cardboard :: cardboard | desk | pen | ruler | utility knife",
|
| 41 |
+
"Cut cardboard piece :: cardboard strip | scissors",
|
| 42 |
+
"Entering the VR training room :: doorway | person | table",
|
| 43 |
+
"Expand paper lantern :: adhesive tape roll | cardboard box | red paper lantern",
|
| 44 |
+
"Extract wire hangers from box :: cardboard box | wire hangers",
|
| 45 |
+
"Fold paper lantern :: red paper honeycomb lantern",
|
| 46 |
+
"Fold plastic strip :: container with tools | purple plastic strip | water bottle | white table",
|
| 47 |
+
"Gather star beads :: cardboard squares | mobile phone | paper | pen | power bank | star beads",
|
| 48 |
+
"Gesturing :: cardboard pieces | marker | pencil case | ruler | scissors",
|
| 49 |
+
"Grasp cleaning bottle :: bowls | chopping board | cleaning fluid bottle",
|
| 50 |
+
"Grasp lantern :: red paper honeycomb lantern",
|
| 51 |
+
"Grasp lantern component :: cardboard box | paper lantern | tape",
|
| 52 |
+
"Grasping cleaning cloth :: cleaning cloth | countertop",
|
| 53 |
+
"Greeting/acknowledging participants :: person | table | vr headset",
|
| 54 |
+
"Handle paper lantern component :: cardboard box | paper lantern component | plastic bag | red paper lantern",
|
| 55 |
+
"Hold and bend plastic strip :: bottle | purple plastic strip | stationery box",
|
| 56 |
+
"Hold and manipulate paper strip :: beads | mobile phone | power bank | yellow paper strip",
|
| 57 |
+
"Hold beads :: paper | pen | power bank | smartphone | star-shaped beads",
|
| 58 |
+
"Hold canned food :: box of cans | can of soup | shelf",
|
| 59 |
+
"Hold cardboard piece :: cardboard pieces | marker | pencil case | ruler | scissors",
|
| 60 |
+
"Hold container lid :: dustpan | red plastic container lid",
|
| 61 |
+
"Hold paper lantern :: red paper lantern | sofa",
|
| 62 |
+
"Hold smartphone :: kettle | kitchen counter | sink | smartphone",
|
| 63 |
+
"Hold smartphone :: quilling paper strips | small blue beads | smartphone",
|
| 64 |
+
"Identify next cardboard piece :: cardboard pieces | marker",
|
| 65 |
+
"Inspect shelf condition :: colleague | shelf",
|
| 66 |
+
"Interact with smartphone :: beads | smartphone | yellow paper strip",
|
| 67 |
+
"Interact with smartphone :: cans | shelf | smartphone",
|
| 68 |
+
"Lift pot lid :: pot | pot lid",
|
| 69 |
+
"Manipulate adhesive strip :: adhesive strip | paper sheets | puzzle box | smartphone | water bottle",
|
| 70 |
+
"Manipulate bead :: bead piles | container | paper strips | yellow bead",
|
| 71 |
+
"Manipulate beads :: blue beads | power bank | smartphone | yellow beads",
|
| 72 |
+
"Manipulate craft paper strips :: paper strips | scissors | smartphone",
|
| 73 |
+
"Manipulate craft piece :: craft pieces | scissors | smartphone",
|
| 74 |
+
"Manipulate material :: blue crafting material | hand",
|
| 75 |
+
"Manipulate paper decoration :: paper cone | paper decoration | smartphone | water bottle",
|
| 76 |
+
"Manipulate paper edge :: hands | paper cone | puzzle box | smartphone | water bottle",
|
| 77 |
+
"Manipulate paper strip :: container with tools | table | yellow paper strip",
|
| 78 |
+
"Manipulate paper strip :: craft materials | purple paper strip | scissors | smartphone",
|
| 79 |
+
"Manipulate paper strip :: paper cone | paper strip | puzzle box | smartphone | water bottle",
|
| 80 |
+
"Manipulate paper strip :: purple paper strip | quilling paper pile | smartphone",
|
| 81 |
+
"Manipulate plastic strip :: purple plastic strip | storage bin | water bottle",
|
| 82 |
+
"Manipulate plastic strips :: desk | plastic strips | water bottle",
|
| 83 |
+
"Manipulate puzzle piece :: jigsaw puzzle | puzzle piece",
|
| 84 |
+
"Manipulate puzzle pieces :: puzzle box | puzzle mat | puzzle pieces",
|
| 85 |
+
"Manipulate yellow strip :: beads | cell phone | pen | yellow strip",
|
| 86 |
+
"Manipulating paper strips :: beads | blue paper strip | yellow paper strip",
|
| 87 |
+
"Mark cardboard piece :: cardboard piece | marker",
|
| 88 |
+
"Mark cardboard piece :: cardboard piece | marker | pouch",
|
| 89 |
+
"Marking cardboard piece :: cardboard pieces | marker | ruler | scissors",
|
| 90 |
+
"Move dustpan to side :: dustpan",
|
| 91 |
+
"Move hand away :: canned food | retail shelf",
|
| 92 |
+
"Move hand away from shelf :: canned food | cardboard box | store shelf",
|
| 93 |
+
"Move marker and adjust hand :: cardboard pieces | marker",
|
| 94 |
+
"Move phone :: cardboard | smartphone | utility knife",
|
| 95 |
+
"Move pot :: faucet | pot | sink",
|
| 96 |
+
"Move smartphone :: craft beads | paper strips | smartphone",
|
| 97 |
+
"Move through aisle :: aisle floor | display hooks | metal shelving units",
|
| 98 |
+
"Move through the training room :: chairs | fire extinguisher | tables | vr headsets",
|
| 99 |
+
"Move to shelf :: cardboard box | colleague | shelves",
|
| 100 |
+
"Move towards kitchen area :: cloth | sink",
|
| 101 |
+
"Move towards the stove :: blue bowl | faucet | metal container | red bowl | sink",
|
| 102 |
+
"Observe and pause :: cardboard pieces | marker | pencil case | ruler | scissors",
|
| 103 |
+
"Observe and walk through store :: colleague | inventory boxes | shelves",
|
| 104 |
+
"Observe colleague and workspace :: price tag | shelves | watch",
|
| 105 |
+
"Observe puzzle progress :: hand | jigsaw puzzle",
|
| 106 |
+
"Observe workspace :: cardboard boxes | colleague | shelving unit",
|
| 107 |
+
"Open earbud case :: earbud case",
|
| 108 |
+
"Open folded paper lantern :: cardboard box | red pleated paper lantern",
|
| 109 |
+
"Open paper lantern :: red paper honeycomb lantern",
|
| 110 |
+
"Open paper lantern component :: paper lantern component",
|
| 111 |
+
"Open stove pot lid :: blue bowl | cloth | faucet | red bowl | sink | soap dispenser | white bowl",
|
| 112 |
+
"Operate smartphone :: craft paper strips | folded paper fan | smartphone",
|
| 113 |
+
"Organize cardboard pieces :: cardboard pieces | marker | pencil case | ruler | scissors",
|
| 114 |
+
"Pick up button :: buttons | table",
|
| 115 |
+
"Pick up can :: canned food | cardboard box",
|
| 116 |
+
"Pick up canned food :: canned food | cardboard box",
|
| 117 |
+
"Pick up dustpan :: dustpan | smartphone",
|
| 118 |
+
"Pick up items from the shopping bag :: cardboard boxes | red shopping bag | retail shelf",
|
| 119 |
+
"Pick up new cardboard piece :: cardboard piece | marker",
|
| 120 |
+
"Pick up packaged paper lantern component :: cardboard box | packaged paper lantern component",
|
| 121 |
+
"Pick up pen :: paper | pen | star beads",
|
| 122 |
+
"Pick up puzzle piece :: jigsaw puzzle | puzzle piece",
|
| 123 |
+
"Pick up small piece of material :: blue crafting material | hand",
|
| 124 |
+
"Pick up smartphone :: cans | shelf | smartphone",
|
| 125 |
+
"Pick up star bead :: cardboard squares | mobile phone | paper | pen | power bank | star beads",
|
| 126 |
+
"Pick up utility knife :: cardboard | utility knife",
|
| 127 |
+
"Picking up bottle :: bottle",
|
| 128 |
+
"Picking up crafting material :: beads | papers | pen | table",
|
| 129 |
+
"Place and count bead :: paper | pen | star beads",
|
| 130 |
+
"Place another canned food on shelf :: box | canned food",
|
| 131 |
+
"Place button :: buttons | hand | smartphone | table",
|
| 132 |
+
"Place button :: buttons | hand | table",
|
| 133 |
+
"Place can on shelf :: canned food | retail shelf",
|
| 134 |
+
"Place canned food on shelf :: can of vegetables | cardboard boxes | store shelf",
|
| 135 |
+
"Place cloth on floor :: cloth",
|
| 136 |
+
"Place hand on table :: craft beads | paper strips | smartphone",
|
| 137 |
+
"Place item on shelf :: stationery package",
|
| 138 |
+
"Place items on the shelf :: packaged items | retail shelf",
|
| 139 |
+
"Place lid back :: pot | pot lid",
|
| 140 |
+
"Place marked piece down :: cardboard piece | marker",
|
| 141 |
+
"Place material :: bead design | blue crafting material | hand",
|
| 142 |
+
"Place phone down :: beads | pencil holder | smartphone | yellow paper strip",
|
| 143 |
+
"Place piece into puzzle :: jigsaw puzzle | puzzle piece",
|
| 144 |
+
"Place puzzle piece :: puzzle board | puzzle piece",
|
| 145 |
+
"Place smartphone down :: power bank | quilling paper strips | small paper stars | smartphone",
|
| 146 |
+
"Place smartphone down :: quilling paper strips | small paper stars | smartphone",
|
| 147 |
+
"Place smartphone on stand :: saucepan | smartphone | smartphone stand | yellow jacket",
|
| 148 |
+
"Place towel :: pot | towel",
|
| 149 |
+
"Placing paper strip :: adhesive strip | paper cone | phone",
|
| 150 |
+
"Placing paper strip :: paper cone | phone | puzzle box | water bottle",
|
| 151 |
+
"Preparing to craft :: beads | chairs | table",
|
| 152 |
+
"Put down smartphone :: paper stars | paper strips | smartphone",
|
| 153 |
+
"Put down smartphone :: power bank | smartphone",
|
| 154 |
+
"Reach for another item :: item | shelf",
|
| 155 |
+
"Reach for cleaning supplies :: bowls | chopping board | cleaning fluid bottle",
|
| 156 |
+
"Reach for craft items :: smartphone | star-shaped craft items | table",
|
| 157 |
+
"Reach for next can :: can of vegetables | cardboard boxes | store shelf",
|
| 158 |
+
"Reach for next canned food :: box | canned food | retail shelf",
|
| 159 |
+
"Reach for next item :: packaged item | shelf",
|
| 160 |
+
"Reach for puzzle piece :: puzzle board | puzzle box | puzzle pieces",
|
| 161 |
+
"Reach for wire hangers :: cardboard box | wire hangers",
|
| 162 |
+
"Reach into box :: box of cans",
|
| 163 |
+
"Record count :: paper | pen | star-shaped beads",
|
| 164 |
+
"Release cardboard piece and gesture :: cardboard piece | cardboard piles | marker | pouch | ruler | scissors",
|
| 165 |
+
"Release hook :: display hook",
|
| 166 |
+
"Release lantern :: red paper lantern",
|
| 167 |
+
"Release paper strip :: craft beads | paper strip | scissors | smartphone",
|
| 168 |
+
"Release puzzle piece :: jigsaw puzzle | puzzle box | puzzle pieces",
|
| 169 |
+
"Release scissors :: craft paper strips | folded paper fan | scissors | smartphone",
|
| 170 |
+
"Release smartphone :: paper scraps | scissors | smartphone | table",
|
| 171 |
+
"Remove cleaning bottle :: cleaning bottle | plastic bowls",
|
| 172 |
+
"Remove paper lantern part from packaging :: paper lantern | red hand fan",
|
| 173 |
+
"Remove plastic packaging :: packaging | paper lantern component",
|
| 174 |
+
"Reposition hand :: cardboard | utility knife",
|
| 175 |
+
"Resume observation :: cardboard pieces | marker | pencil case | ruler | scissors",
|
| 176 |
+
"Retrieve canned food from box :: box | canned food",
|
| 177 |
+
"Retrieve next canned food item :: canned food | cardboard box",
|
| 178 |
+
"Retrieving more beads :: paper | pen | star beads",
|
| 179 |
+
"Rinse cloth in sink :: cloth | sink | water faucet",
|
| 180 |
+
"Scroll smartphone screen :: paper stars | paper strips | smartphone",
|
| 181 |
+
"Search for puzzle piece :: jigsaw puzzle | puzzle pieces",
|
| 182 |
+
"Secure paper edges with adhesive :: adhesive strip | paper cone pieces | puzzle box | smartphone | water bottle",
|
| 183 |
+
"Securing paper structure :: decorative stars | paper strips | smartphone | water bottle",
|
| 184 |
+
"Sort and adjust button line :: buttons | smartphone | table",
|
| 185 |
+
"Sort and arrange buttons :: buttons | smartphones | soda can | table",
|
| 186 |
+
"Sort and count beads :: paper | pen | star beads",
|
| 187 |
+
"Sort and place buttons :: buttons | coca-cola can | smartphone",
|
| 188 |
+
"Sort beads :: paper | pen | smartphone | star-shaped beads",
|
| 189 |
+
"Sort beads and write count :: paper | pen | power bank | smartphone | star-shaped beads",
|
| 190 |
+
"Sort button :: buttons | cell phone",
|
| 191 |
+
"Sort buttons :: buttons | table",
|
| 192 |
+
"Sort craft items :: star-shaped craft items | table",
|
| 193 |
+
"Sort puzzle pieces :: jigsaw puzzle pieces | table",
|
| 194 |
+
"Sort small craft pieces :: paper strips | scissors | smartphone | star-shaped craft pieces",
|
| 195 |
+
"Sort star-shaped beads :: marker | mobile phone | paper | power bank | star-shaped beads",
|
| 196 |
+
"Start cutting :: cardboard | utility knife",
|
| 197 |
+
"Stir contents :: cooking utensil | pot",
|
| 198 |
+
"Use phone :: beads | paper | power bank | smartphone",
|
| 199 |
+
"Use phone while crafting :: beads | pencil holder | smartphone | yellow paper strip",
|
| 200 |
+
"Use smartphone :: buttons | chair | smartphone | table",
|
| 201 |
+
"Use smartphone :: buttons | smartphone | table",
|
| 202 |
+
"Use smartphone :: charging cable | paper strips | power bank | smartphone | star-shaped paper crafts",
|
| 203 |
+
"Use smartphone :: containers | kettle | kitchen counter | rice cooker | smartphone",
|
| 204 |
+
"Walk towards other aisles :: retail store aisle",
|
| 205 |
+
"Walk towards shelves :: cardboard boxes | red bin | shelving unit",
|
| 206 |
+
"Walking across the room :: chair | fire extinguisher | office | table",
|
| 207 |
+
"Walking in the hallway :: hallway | people | tables | vr headsets",
|
| 208 |
+
"Walking towards door :: door | dustpan",
|
| 209 |
+
"Washing hands in sink :: faucet | sink",
|
| 210 |
+
"Wipe kitchen counter :: door | laundry basket | mop",
|
| 211 |
+
"Wiping countertop :: cleaning cloth | countertop",
|
| 212 |
+
"Write count on paper :: marker | mobile phone | paper | power bank | star-shaped beads",
|
| 213 |
+
"Write on paper :: cardboard square | paper | pen | star beads",
|
| 214 |
+
"sort craft materials :: paper pieces | scissors | smartphone",
|
| 215 |
+
"Pick up and place canned goods on shelf :: bin | canned goods | shelf",
|
| 216 |
+
"Browsing smartphone content :: smartphone",
|
| 217 |
+
"Place item on shelf :: retail item | shelf",
|
| 218 |
+
"Place item on shelf :: canned food | shelf",
|
| 219 |
+
"<missing_pred_relation>",
|
| 220 |
+
"Approach packing area :: items to pack | packing area",
|
| 221 |
+
"Approach restocking supplies :: aisle | restocking supplies | shelf",
|
| 222 |
+
"unknown :: unknown",
|
| 223 |
+
"Approach table :: materials | table | tools | work surface",
|
| 224 |
+
"Place item in container :: canned goods | plastic container | shopping bag",
|
| 225 |
+
"Pick up canned food :: canned food | shelf",
|
| 226 |
+
"Adjusting items on the shelf :: items | shelf",
|
| 227 |
+
"Wipe hands :: hands | sink | water bottle",
|
| 228 |
+
"Approach the stove :: cooking pot | kitchen counter | stove",
|
| 229 |
+
"Place item on shelf :: plush toy | shelf",
|
| 230 |
+
"Place item on shelf :: canned goods | shelf",
|
| 231 |
+
"Adjust cardboard :: cardboard pieces | workspace surface",
|
| 232 |
+
"Adjust cardboard :: cardboard | cutting mat | glue stick | marker | measuring tape | ruler | scissors | work table",
|
| 233 |
+
"Approach packing area :: cardboard pieces | marker | ruler | scissors | work table",
|
| 234 |
+
"Adjust cardboard :: cardboard piece | marker | ruler | workspace surface",
|
| 235 |
+
"Adjust cardboard position :: cardboard piece | marker | ruler | scissors | work surface",
|
| 236 |
+
"Adjust cardboard position :: cardboard piece | hands | work surface",
|
| 237 |
+
"Adjust cardboard :: cardboard piece | glue stick | marker | measuring tape | pencil | ruler | scissors | work table",
|
| 238 |
+
"Adjust cardboard :: cardboard | glue stick | marker | measuring tape | pencil | ruler | scissors | work table",
|
| 239 |
+
"Adjust cardboard position :: cardboard piece | scissors | work table",
|
| 240 |
+
"Adjust cardboard position :: cardboard piece | scissors | work surface",
|
| 241 |
+
"Cut cardboard with utility knife :: cardboard | ruler | table | utility knife",
|
| 242 |
+
"Cut cardboard :: cardboard | glue | marker | measuring tape | pen | ruler | scissors | work surface",
|
| 243 |
+
"Adjusting items on shelf :: containers | retail items | shelf",
|
| 244 |
+
"Approach packing area :: container | items to pack | packing area",
|
| 245 |
+
"Adjust cardboard divider :: cardboard divider | retail items | shelf",
|
| 246 |
+
"Adjusting items on shelf :: retail items | shelf",
|
| 247 |
+
"Approach packing area :: boxes | items to pack | packing area",
|
| 248 |
+
"Adjust grip :: canned food | plastic container | shelf",
|
| 249 |
+
"Adjust item on shelf :: retail items | shelf",
|
| 250 |
+
"Approach packing area :: cardboard pieces | packing area | storage bins",
|
| 251 |
+
"Adjust cardboard position :: cardboard pieces | scissors | table surface",
|
| 252 |
+
"Adjust cardboard divider :: cardboard divider | cardboard pieces | table surface",
|
| 253 |
+
"Adjust cardboard divider :: cardboard divider | cardboard pieces | scissors | table",
|
| 254 |
+
"Place canned food on shelf :: canned food | shelf",
|
| 255 |
+
"Pick up canned food :: bin | canned food | shelf",
|
| 256 |
+
"Pick up canned food :: box | canned food | shelf",
|
| 257 |
+
"Pick up canned food :: canned food | container | shelf",
|
| 258 |
+
"Adjust canned food on shelf :: canned food | shelf",
|
| 259 |
+
"Move along the shelf :: retail items | shelf",
|
| 260 |
+
"Picking up and placing canned goods :: canned goods | shelf",
|
| 261 |
+
"Move along the shelf :: canned goods | plastic container | shelf",
|
| 262 |
+
"Adjust item on shelf :: retail item | shelf",
|
| 263 |
+
"Browsing smartphone content :: retail items | shelf | smartphone",
|
| 264 |
+
"Manipulate paper strip :: paper strip | scissors | table",
|
| 265 |
+
"Manipulate paper strip :: marker | paper strip | pink cloth | ruler | scissors | smartphone | table | water bottle",
|
| 266 |
+
"Adjust grip :: hand | red container",
|
| 267 |
+
"Approach packing area :: boxes | packaging materials | red hat | table",
|
| 268 |
+
"Approach packing area :: boxes | packaging materials | red cap | table",
|
| 269 |
+
"Manipulate paper strip :: paper strip",
|
| 270 |
+
"Adjust lantern string :: handle | lantern | string",
|
| 271 |
+
"Adjust grip :: lid | red container",
|
| 272 |
+
"Approach packing area :: items to pack | packing area | packing materials",
|
| 273 |
+
"Adjust lantern string :: lantern handle | lantern string | paper lantern",
|
| 274 |
+
"Adjust grip :: red container | small white object",
|
| 275 |
+
"Adjust lantern string :: handle component | paper lantern | string",
|
| 276 |
+
"Approach table :: people | table",
|
| 277 |
+
"Approach table :: items on table | people | table",
|
| 278 |
+
"Adjust Mahjong tile :: mahjong tile | mahjong tile stack | table surface",
|
| 279 |
+
"Adjusting and placing down paper pieces :: hand | paper pieces | table",
|
| 280 |
+
"Approach table :: cardboard | chair | mahjong tiles | marker | paper | ruler | scissors | table",
|
| 281 |
+
"Approach table :: beads | cardboard | marker | paper | pen | ruler | scissors | table",
|
| 282 |
+
"Manipulate paper strip :: craft materials | hands | marker | paper strip | ruler | scissors | table | workspace",
|
| 283 |
+
"Manipulate paper strip :: craft materials | glue | hands | marker | paper strip | ruler | scissors | table",
|
| 284 |
+
"Adjust bead piles :: beads | table",
|
| 285 |
+
"Manipulate paper strip :: beads | glue | marker | paper strip | ruler | scissors | smartphone | table",
|
| 286 |
+
"Manipulate paper strip :: beads | mahjong tiles | marker | paper strip | ruler | scissors | smartphone | table",
|
| 287 |
+
"Manipulate colorful pieces :: buttons | table",
|
| 288 |
+
"Manipulate colorful pieces :: buttons",
|
| 289 |
+
"Adjust Mahjong tile :: mahjong tile | mahjong tile stack | table",
|
| 290 |
+
"Approach packing area :: cardboard box | items to pack | packing area",
|
| 291 |
+
"Interacting with colleagues :: person",
|
| 292 |
+
"Gathering items :: boxes | chairs | equipment | materials | people | supplies | tables | tools",
|
| 293 |
+
"Interacting with phone :: ceiling | chair | door | floor | person | smartphone | table | wall",
|
| 294 |
+
"Manipulate paper strip :: cardboard | craft materials | hands | marker | paper strip | ruler | scissors | table",
|
| 295 |
+
"Write on paper :: paper | pen | star beads",
|
| 296 |
+
"Manipulate paper strip :: chair | glue | marker | paper strip | person | ruler | scissors | table",
|
| 297 |
+
"Sort and arrange buttons :: buttons | table"
|
| 298 |
+
],
|
| 299 |
+
"macro_f1": 0.0,
|
| 300 |
+
"metric_key": "action_object_relation_macro_f1",
|
| 301 |
+
"missing_pred_relation_count": 226,
|
| 302 |
+
"model_id": "cosmos3_super_reasoner",
|
| 303 |
+
"model_label": "Cosmos3-Super Reasoner",
|
| 304 |
+
"normalization_policy": "The action component uses the verified predicted action label when present. The object component is a canonical casefolded set because task 16 is an action plus object-set relation.",
|
| 305 |
+
"num_samples": 446,
|
| 306 |
+
"primary_metric": "action_object_relation_macro_f1",
|
| 307 |
+
"primary_score": 0.0,
|
| 308 |
+
"scope": "held_out_test_existing_verified_prediction_json",
|
| 309 |
+
"score_policy": "Derived from existing verified held-out prediction JSON. No new model inference was run; rows without a predicted action/object relation are counted as missing predictions.",
|
| 310 |
+
"scored_rows": 446,
|
| 311 |
+
"source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/predictions.jsonl",
|
| 312 |
+
"status": "pass",
|
| 313 |
+
"task_id": "action_object_relation",
|
| 314 |
+
"task_label": "Action-Object Relation",
|
| 315 |
+
"task_number": 16,
|
| 316 |
+
"title": "Cosmos3-Super Reasoner Action-Object Relation Probe",
|
| 317 |
+
"total_prediction_rows": 448,
|
| 318 |
+
"valid_pred_relation_count": 220,
|
| 319 |
+
"valid_pred_relation_rate": 0.49327354260089684
|
| 320 |
+
}
|
results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/per_class_metrics.csv
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class_name,support,predicted,precision,recall,f1
|
| 2 |
+
Adjust canned food on shelf :: canned food | cardboard box | store shelf,2,0,0.0,0.0,0.0
|
| 3 |
+
Adjust item on shelf :: shelf | stationery package,2,0,0.0,0.0,0.0
|
| 4 |
+
Adjust lantern shape :: red pleated paper lantern,2,0,0.0,0.0,0.0
|
| 5 |
+
Adjust lantern string :: red paper lantern | string,2,0,0.0,0.0,0.0
|
| 6 |
+
Adjust paper :: cardboard square | paper | pen | star beads,2,0,0.0,0.0,0.0
|
| 7 |
+
Adjust pot position :: pot,2,0,0.0,0.0,0.0
|
| 8 |
+
Adjust puzzle piece :: jigsaw puzzle | puzzle box | puzzle pieces,3,0,0.0,0.0,0.0
|
| 9 |
+
Align canned food on shelf :: canned food | cardboard box | retail shelf,2,0,0.0,0.0,0.0
|
| 10 |
+
Align edges of paper lantern :: hands | red paper lantern,2,0,0.0,0.0,0.0
|
| 11 |
+
Align paper lantern edges :: hands | red paper lantern,2,0,0.0,0.0,0.0
|
| 12 |
+
Apply adhesive tape to lantern :: adhesive tape | red paper lantern,2,0,0.0,0.0,0.0
|
| 13 |
+
Approach boxes :: cardboard boxes | colleague | shelving unit,2,0,0.0,0.0,0.0
|
| 14 |
+
Approaching and pressing the door switch :: glass door | switch | wall,4,0,0.0,0.0,0.0
|
| 15 |
+
Approaching the table :: beads | chairs | table,2,0,0.0,0.0,0.0
|
| 16 |
+
Arrange buttons :: red buttons | smartphone | table,4,0,0.0,0.0,0.0
|
| 17 |
+
Arrange buttons in a line :: buttons | phone | table,4,0,0.0,0.0,0.0
|
| 18 |
+
Arrange star beads :: paper | pen | power bank | smartphone | star beads,2,0,0.0,0.0,0.0
|
| 19 |
+
Arrange star beads for counting :: paper | pen | star beads,2,0,0.0,0.0,0.0
|
| 20 |
+
Attempt to fit puzzle piece :: puzzle board | puzzle piece,1,0,0.0,0.0,0.0
|
| 21 |
+
Attempt to fit puzzle piece :: puzzle piece,2,0,0.0,0.0,0.0
|
| 22 |
+
Bend and manipulate plastic strip :: plastic strip | stationary box | table | water bottle,4,0,0.0,0.0,0.0
|
| 23 |
+
Browse smartphone screen :: paper strips | smartphone | star ornaments,3,0,0.0,0.0,0.0
|
| 24 |
+
Bundle display hooks :: container | display hooks,2,0,0.0,0.0,0.0
|
| 25 |
+
Closing the door :: door | dustpan,2,0,0.0,0.0,0.0
|
| 26 |
+
Counting and organizing beads :: cardboard squares | paper | pen | star-shaped beads,2,0,0.0,0.0,0.0
|
| 27 |
+
Counting star beads :: paper | pen | star beads,2,0,0.0,0.0,0.0
|
| 28 |
+
Cut along the marked line :: cardboard | hand | utility knife,7,0,0.0,0.0,0.0
|
| 29 |
+
Cut cardboard :: cardboard | desk | pen | ruler | utility knife,7,0,0.0,0.0,0.0
|
| 30 |
+
Cut cardboard piece :: cardboard strip | scissors,3,0,0.0,0.0,0.0
|
| 31 |
+
Entering the VR training room :: doorway | person | table,3,0,0.0,0.0,0.0
|
| 32 |
+
Expand paper lantern :: adhesive tape roll | cardboard box | red paper lantern,2,0,0.0,0.0,0.0
|
| 33 |
+
Extract wire hangers from box :: cardboard box | wire hangers,2,0,0.0,0.0,0.0
|
| 34 |
+
Fold paper lantern :: red paper honeycomb lantern,2,0,0.0,0.0,0.0
|
| 35 |
+
Fold plastic strip :: container with tools | purple plastic strip | water bottle | white table,3,0,0.0,0.0,0.0
|
| 36 |
+
Gather star beads :: cardboard squares | mobile phone | paper | pen | power bank | star beads,2,0,0.0,0.0,0.0
|
| 37 |
+
Gesturing :: cardboard pieces | marker | pencil case | ruler | scissors,2,0,0.0,0.0,0.0
|
| 38 |
+
Grasp cleaning bottle :: bowls | chopping board | cleaning fluid bottle,2,0,0.0,0.0,0.0
|
| 39 |
+
Grasp lantern :: red paper honeycomb lantern,2,0,0.0,0.0,0.0
|
| 40 |
+
Grasp lantern component :: cardboard box | paper lantern | tape,2,0,0.0,0.0,0.0
|
| 41 |
+
Grasping cleaning cloth :: cleaning cloth | countertop,2,0,0.0,0.0,0.0
|
| 42 |
+
Greeting/acknowledging participants :: person | table | vr headset,3,0,0.0,0.0,0.0
|
| 43 |
+
Handle paper lantern component :: cardboard box | paper lantern component | plastic bag | red paper lantern,2,0,0.0,0.0,0.0
|
| 44 |
+
Hold and bend plastic strip :: bottle | purple plastic strip | stationery box,3,0,0.0,0.0,0.0
|
| 45 |
+
Hold and manipulate paper strip :: beads | mobile phone | power bank | yellow paper strip,2,0,0.0,0.0,0.0
|
| 46 |
+
Hold beads :: paper | pen | power bank | smartphone | star-shaped beads,2,0,0.0,0.0,0.0
|
| 47 |
+
Hold canned food :: box of cans | can of soup | shelf,2,0,0.0,0.0,0.0
|
| 48 |
+
Hold cardboard piece :: cardboard pieces | marker | pencil case | ruler | scissors,3,0,0.0,0.0,0.0
|
| 49 |
+
Hold container lid :: dustpan | red plastic container lid,2,0,0.0,0.0,0.0
|
| 50 |
+
Hold paper lantern :: red paper lantern | sofa,2,0,0.0,0.0,0.0
|
| 51 |
+
Hold smartphone :: kettle | kitchen counter | sink | smartphone,1,0,0.0,0.0,0.0
|
| 52 |
+
Hold smartphone :: quilling paper strips | small blue beads | smartphone,3,0,0.0,0.0,0.0
|
| 53 |
+
Identify next cardboard piece :: cardboard pieces | marker,3,0,0.0,0.0,0.0
|
| 54 |
+
Inspect shelf condition :: colleague | shelf,2,0,0.0,0.0,0.0
|
| 55 |
+
Interact with smartphone :: beads | smartphone | yellow paper strip,2,0,0.0,0.0,0.0
|
| 56 |
+
Interact with smartphone :: cans | shelf | smartphone,2,0,0.0,0.0,0.0
|
| 57 |
+
Lift pot lid :: pot | pot lid,1,0,0.0,0.0,0.0
|
| 58 |
+
Manipulate adhesive strip :: adhesive strip | paper sheets | puzzle box | smartphone | water bottle,5,0,0.0,0.0,0.0
|
| 59 |
+
Manipulate bead :: bead piles | container | paper strips | yellow bead,2,0,0.0,0.0,0.0
|
| 60 |
+
Manipulate beads :: blue beads | power bank | smartphone | yellow beads,2,0,0.0,0.0,0.0
|
| 61 |
+
Manipulate craft paper strips :: paper strips | scissors | smartphone,4,0,0.0,0.0,0.0
|
| 62 |
+
Manipulate craft piece :: craft pieces | scissors | smartphone,4,0,0.0,0.0,0.0
|
| 63 |
+
Manipulate material :: blue crafting material | hand,2,0,0.0,0.0,0.0
|
| 64 |
+
Manipulate paper decoration :: paper cone | paper decoration | smartphone | water bottle,5,0,0.0,0.0,0.0
|
| 65 |
+
Manipulate paper edge :: hands | paper cone | puzzle box | smartphone | water bottle,5,0,0.0,0.0,0.0
|
| 66 |
+
Manipulate paper strip :: container with tools | table | yellow paper strip,2,0,0.0,0.0,0.0
|
| 67 |
+
Manipulate paper strip :: craft materials | purple paper strip | scissors | smartphone,4,0,0.0,0.0,0.0
|
| 68 |
+
Manipulate paper strip :: paper cone | paper strip | puzzle box | smartphone | water bottle,5,0,0.0,0.0,0.0
|
| 69 |
+
Manipulate paper strip :: purple paper strip | quilling paper pile | smartphone,3,0,0.0,0.0,0.0
|
| 70 |
+
Manipulate plastic strip :: purple plastic strip | storage bin | water bottle,3,0,0.0,0.0,0.0
|
| 71 |
+
Manipulate plastic strips :: desk | plastic strips | water bottle,3,0,0.0,0.0,0.0
|
| 72 |
+
Manipulate puzzle piece :: jigsaw puzzle | puzzle piece,3,0,0.0,0.0,0.0
|
| 73 |
+
Manipulate puzzle pieces :: puzzle box | puzzle mat | puzzle pieces,3,0,0.0,0.0,0.0
|
| 74 |
+
Manipulate yellow strip :: beads | cell phone | pen | yellow strip,2,0,0.0,0.0,0.0
|
| 75 |
+
Manipulating paper strips :: beads | blue paper strip | yellow paper strip,2,0,0.0,0.0,0.0
|
| 76 |
+
Mark cardboard piece :: cardboard piece | marker,1,0,0.0,0.0,0.0
|
| 77 |
+
Mark cardboard piece :: cardboard piece | marker | pouch,2,0,0.0,0.0,0.0
|
| 78 |
+
Marking cardboard piece :: cardboard pieces | marker | ruler | scissors,3,0,0.0,0.0,0.0
|
| 79 |
+
Move dustpan to side :: dustpan,1,0,0.0,0.0,0.0
|
| 80 |
+
Move hand away :: canned food | retail shelf,2,0,0.0,0.0,0.0
|
| 81 |
+
Move hand away from shelf :: canned food | cardboard box | store shelf,2,0,0.0,0.0,0.0
|
| 82 |
+
Move marker and adjust hand :: cardboard pieces | marker,3,0,0.0,0.0,0.0
|
| 83 |
+
Move phone :: cardboard | smartphone | utility knife,7,0,0.0,0.0,0.0
|
| 84 |
+
Move pot :: faucet | pot | sink,1,0,0.0,0.0,0.0
|
| 85 |
+
Move smartphone :: craft beads | paper strips | smartphone,3,0,0.0,0.0,0.0
|
| 86 |
+
Move through aisle :: aisle floor | display hooks | metal shelving units,2,0,0.0,0.0,0.0
|
| 87 |
+
Move through the training room :: chairs | fire extinguisher | tables | vr headsets,3,0,0.0,0.0,0.0
|
| 88 |
+
Move to shelf :: cardboard box | colleague | shelves,2,0,0.0,0.0,0.0
|
| 89 |
+
Move towards kitchen area :: cloth | sink,1,0,0.0,0.0,0.0
|
| 90 |
+
Move towards the stove :: blue bowl | faucet | metal container | red bowl | sink,1,0,0.0,0.0,0.0
|
| 91 |
+
Observe and pause :: cardboard pieces | marker | pencil case | ruler | scissors,2,0,0.0,0.0,0.0
|
| 92 |
+
Observe and walk through store :: colleague | inventory boxes | shelves,2,0,0.0,0.0,0.0
|
| 93 |
+
Observe colleague and workspace :: price tag | shelves | watch,2,0,0.0,0.0,0.0
|
| 94 |
+
Observe puzzle progress :: hand | jigsaw puzzle,3,0,0.0,0.0,0.0
|
| 95 |
+
Observe workspace :: cardboard boxes | colleague | shelving unit,2,0,0.0,0.0,0.0
|
| 96 |
+
Open earbud case :: earbud case,2,0,0.0,0.0,0.0
|
| 97 |
+
Open folded paper lantern :: cardboard box | red pleated paper lantern,2,0,0.0,0.0,0.0
|
| 98 |
+
Open paper lantern :: red paper honeycomb lantern,2,0,0.0,0.0,0.0
|
| 99 |
+
Open paper lantern component :: paper lantern component,2,0,0.0,0.0,0.0
|
| 100 |
+
Open stove pot lid :: blue bowl | cloth | faucet | red bowl | sink | soap dispenser | white bowl,1,0,0.0,0.0,0.0
|
| 101 |
+
Operate smartphone :: craft paper strips | folded paper fan | smartphone,4,0,0.0,0.0,0.0
|
| 102 |
+
Organize cardboard pieces :: cardboard pieces | marker | pencil case | ruler | scissors,2,0,0.0,0.0,0.0
|
| 103 |
+
Pick up button :: buttons | table,3,0,0.0,0.0,0.0
|
| 104 |
+
Pick up can :: canned food | cardboard box,2,0,0.0,0.0,0.0
|
| 105 |
+
Pick up canned food :: canned food | cardboard box,2,0,0.0,0.0,0.0
|
| 106 |
+
Pick up dustpan :: dustpan | smartphone,1,0,0.0,0.0,0.0
|
| 107 |
+
Pick up items from the shopping bag :: cardboard boxes | red shopping bag | retail shelf,2,0,0.0,0.0,0.0
|
| 108 |
+
Pick up new cardboard piece :: cardboard piece | marker,2,0,0.0,0.0,0.0
|
| 109 |
+
Pick up packaged paper lantern component :: cardboard box | packaged paper lantern component,1,0,0.0,0.0,0.0
|
| 110 |
+
Pick up pen :: paper | pen | star beads,2,0,0.0,0.0,0.0
|
| 111 |
+
Pick up puzzle piece :: jigsaw puzzle | puzzle piece,3,0,0.0,0.0,0.0
|
| 112 |
+
Pick up small piece of material :: blue crafting material | hand,2,0,0.0,0.0,0.0
|
| 113 |
+
Pick up smartphone :: cans | shelf | smartphone,2,0,0.0,0.0,0.0
|
| 114 |
+
Pick up star bead :: cardboard squares | mobile phone | paper | pen | power bank | star beads,2,0,0.0,0.0,0.0
|
| 115 |
+
Pick up utility knife :: cardboard | utility knife,6,0,0.0,0.0,0.0
|
| 116 |
+
Picking up bottle :: bottle,1,0,0.0,0.0,0.0
|
| 117 |
+
Picking up crafting material :: beads | papers | pen | table,2,0,0.0,0.0,0.0
|
| 118 |
+
Place and count bead :: paper | pen | star beads,2,0,0.0,0.0,0.0
|
| 119 |
+
Place another canned food on shelf :: box | canned food,2,0,0.0,0.0,0.0
|
| 120 |
+
Place button :: buttons | hand | smartphone | table,1,0,0.0,0.0,0.0
|
| 121 |
+
Place button :: buttons | hand | table,2,0,0.0,0.0,0.0
|
| 122 |
+
Place can on shelf :: canned food | retail shelf,2,0,0.0,0.0,0.0
|
| 123 |
+
Place canned food on shelf :: can of vegetables | cardboard boxes | store shelf,1,0,0.0,0.0,0.0
|
| 124 |
+
Place cloth on floor :: cloth,1,0,0.0,0.0,0.0
|
| 125 |
+
Place hand on table :: craft beads | paper strips | smartphone,3,0,0.0,0.0,0.0
|
| 126 |
+
Place item on shelf :: stationery package,2,0,0.0,0.0,0.0
|
| 127 |
+
Place items on the shelf :: packaged items | retail shelf,2,0,0.0,0.0,0.0
|
| 128 |
+
Place lid back :: pot | pot lid,1,0,0.0,0.0,0.0
|
| 129 |
+
Place marked piece down :: cardboard piece | marker,2,0,0.0,0.0,0.0
|
| 130 |
+
Place material :: bead design | blue crafting material | hand,2,0,0.0,0.0,0.0
|
| 131 |
+
Place phone down :: beads | pencil holder | smartphone | yellow paper strip,2,0,0.0,0.0,0.0
|
| 132 |
+
Place piece into puzzle :: jigsaw puzzle | puzzle piece,3,0,0.0,0.0,0.0
|
| 133 |
+
Place puzzle piece :: puzzle board | puzzle piece,3,0,0.0,0.0,0.0
|
| 134 |
+
Place smartphone down :: power bank | quilling paper strips | small paper stars | smartphone,1,0,0.0,0.0,0.0
|
| 135 |
+
Place smartphone down :: quilling paper strips | small paper stars | smartphone,2,0,0.0,0.0,0.0
|
| 136 |
+
Place smartphone on stand :: saucepan | smartphone | smartphone stand | yellow jacket,1,0,0.0,0.0,0.0
|
| 137 |
+
Place towel :: pot | towel,1,0,0.0,0.0,0.0
|
| 138 |
+
Placing paper strip :: adhesive strip | paper cone | phone,2,0,0.0,0.0,0.0
|
| 139 |
+
Placing paper strip :: paper cone | phone | puzzle box | water bottle,2,0,0.0,0.0,0.0
|
| 140 |
+
Preparing to craft :: beads | chairs | table,2,0,0.0,0.0,0.0
|
| 141 |
+
Put down smartphone :: paper stars | paper strips | smartphone,2,0,0.0,0.0,0.0
|
| 142 |
+
Put down smartphone :: power bank | smartphone,1,0,0.0,0.0,0.0
|
| 143 |
+
Reach for another item :: item | shelf,1,0,0.0,0.0,0.0
|
| 144 |
+
Reach for cleaning supplies :: bowls | chopping board | cleaning fluid bottle,1,0,0.0,0.0,0.0
|
| 145 |
+
Reach for craft items :: smartphone | star-shaped craft items | table,3,0,0.0,0.0,0.0
|
| 146 |
+
Reach for next can :: can of vegetables | cardboard boxes | store shelf,1,0,0.0,0.0,0.0
|
| 147 |
+
Reach for next canned food :: box | canned food | retail shelf,1,0,0.0,0.0,0.0
|
| 148 |
+
Reach for next item :: packaged item | shelf,1,0,0.0,0.0,0.0
|
| 149 |
+
Reach for puzzle piece :: puzzle board | puzzle box | puzzle pieces,2,0,0.0,0.0,0.0
|
| 150 |
+
Reach for wire hangers :: cardboard box | wire hangers,1,0,0.0,0.0,0.0
|
| 151 |
+
Reach into box :: box of cans,1,0,0.0,0.0,0.0
|
| 152 |
+
Record count :: paper | pen | star-shaped beads,2,0,0.0,0.0,0.0
|
| 153 |
+
Release cardboard piece and gesture :: cardboard piece | cardboard piles | marker | pouch | ruler | scissors,2,0,0.0,0.0,0.0
|
| 154 |
+
Release hook :: display hook,1,0,0.0,0.0,0.0
|
| 155 |
+
Release lantern :: red paper lantern,1,0,0.0,0.0,0.0
|
| 156 |
+
Release paper strip :: craft beads | paper strip | scissors | smartphone,4,0,0.0,0.0,0.0
|
| 157 |
+
Release puzzle piece :: jigsaw puzzle | puzzle box | puzzle pieces,2,0,0.0,0.0,0.0
|
| 158 |
+
Release scissors :: craft paper strips | folded paper fan | scissors | smartphone,3,0,0.0,0.0,0.0
|
| 159 |
+
Release smartphone :: paper scraps | scissors | smartphone | table,3,0,0.0,0.0,0.0
|
| 160 |
+
Remove cleaning bottle :: cleaning bottle | plastic bowls,1,0,0.0,0.0,0.0
|
| 161 |
+
Remove paper lantern part from packaging :: paper lantern | red hand fan,1,0,0.0,0.0,0.0
|
| 162 |
+
Remove plastic packaging :: packaging | paper lantern component,1,0,0.0,0.0,0.0
|
| 163 |
+
Reposition hand :: cardboard | utility knife,3,0,0.0,0.0,0.0
|
| 164 |
+
Resume observation :: cardboard pieces | marker | pencil case | ruler | scissors,2,0,0.0,0.0,0.0
|
| 165 |
+
Retrieve canned food from box :: box | canned food,1,0,0.0,0.0,0.0
|
| 166 |
+
Retrieve next canned food item :: canned food | cardboard box,1,0,0.0,0.0,0.0
|
| 167 |
+
Retrieving more beads :: paper | pen | star beads,2,0,0.0,0.0,0.0
|
| 168 |
+
Rinse cloth in sink :: cloth | sink | water faucet,1,0,0.0,0.0,0.0
|
| 169 |
+
Scroll smartphone screen :: paper stars | paper strips | smartphone,3,0,0.0,0.0,0.0
|
| 170 |
+
Search for puzzle piece :: jigsaw puzzle | puzzle pieces,2,0,0.0,0.0,0.0
|
| 171 |
+
Secure paper edges with adhesive :: adhesive strip | paper cone pieces | puzzle box | smartphone | water bottle,4,0,0.0,0.0,0.0
|
| 172 |
+
Securing paper structure :: decorative stars | paper strips | smartphone | water bottle,4,0,0.0,0.0,0.0
|
| 173 |
+
Sort and adjust button line :: buttons | smartphone | table,3,0,0.0,0.0,0.0
|
| 174 |
+
Sort and arrange buttons :: buttons | smartphones | soda can | table,3,0,0.0,0.0,0.0
|
| 175 |
+
Sort and count beads :: paper | pen | star beads,2,0,0.0,0.0,0.0
|
| 176 |
+
Sort and place buttons :: buttons | coca-cola can | smartphone,3,0,0.0,0.0,0.0
|
| 177 |
+
Sort beads :: paper | pen | smartphone | star-shaped beads,2,0,0.0,0.0,0.0
|
| 178 |
+
Sort beads and write count :: paper | pen | power bank | smartphone | star-shaped beads,1,0,0.0,0.0,0.0
|
| 179 |
+
Sort button :: buttons | cell phone,3,0,0.0,0.0,0.0
|
| 180 |
+
Sort buttons :: buttons | table,3,0,0.0,0.0,0.0
|
| 181 |
+
Sort craft items :: star-shaped craft items | table,3,0,0.0,0.0,0.0
|
| 182 |
+
Sort puzzle pieces :: jigsaw puzzle pieces | table,2,0,0.0,0.0,0.0
|
| 183 |
+
Sort small craft pieces :: paper strips | scissors | smartphone | star-shaped craft pieces,3,0,0.0,0.0,0.0
|
| 184 |
+
Sort star-shaped beads :: marker | mobile phone | paper | power bank | star-shaped beads,1,0,0.0,0.0,0.0
|
| 185 |
+
Start cutting :: cardboard | utility knife,2,0,0.0,0.0,0.0
|
| 186 |
+
Stir contents :: cooking utensil | pot,1,0,0.0,0.0,0.0
|
| 187 |
+
Use phone :: beads | paper | power bank | smartphone,2,0,0.0,0.0,0.0
|
| 188 |
+
Use phone while crafting :: beads | pencil holder | smartphone | yellow paper strip,1,0,0.0,0.0,0.0
|
| 189 |
+
Use smartphone :: buttons | chair | smartphone | table,2,0,0.0,0.0,0.0
|
| 190 |
+
Use smartphone :: buttons | smartphone | table,1,0,0.0,0.0,0.0
|
| 191 |
+
Use smartphone :: charging cable | paper strips | power bank | smartphone | star-shaped paper crafts,2,0,0.0,0.0,0.0
|
| 192 |
+
Use smartphone :: containers | kettle | kitchen counter | rice cooker | smartphone,1,0,0.0,0.0,0.0
|
| 193 |
+
Walk towards other aisles :: retail store aisle,1,0,0.0,0.0,0.0
|
| 194 |
+
Walk towards shelves :: cardboard boxes | red bin | shelving unit,1,0,0.0,0.0,0.0
|
| 195 |
+
Walking across the room :: chair | fire extinguisher | office | table,1,0,0.0,0.0,0.0
|
| 196 |
+
Walking in the hallway :: hallway | people | tables | vr headsets,3,0,0.0,0.0,0.0
|
| 197 |
+
Walking towards door :: door | dustpan,1,0,0.0,0.0,0.0
|
| 198 |
+
Washing hands in sink :: faucet | sink,1,0,0.0,0.0,0.0
|
| 199 |
+
Wipe kitchen counter :: door | laundry basket | mop,1,0,0.0,0.0,0.0
|
| 200 |
+
Wiping countertop :: cleaning cloth | countertop,1,0,0.0,0.0,0.0
|
| 201 |
+
Write count on paper :: marker | mobile phone | paper | power bank | star-shaped beads,1,0,0.0,0.0,0.0
|
| 202 |
+
Write on paper :: cardboard square | paper | pen | star beads,1,0,0.0,0.0,0.0
|
| 203 |
+
sort craft materials :: paper pieces | scissors | smartphone,3,0,0.0,0.0,0.0
|
| 204 |
+
Pick up and place canned goods on shelf :: bin | canned goods | shelf,0,4,0.0,0.0,0.0
|
| 205 |
+
Browsing smartphone content :: smartphone,0,60,0.0,0.0,0.0
|
| 206 |
+
Place item on shelf :: retail item | shelf,0,8,0.0,0.0,0.0
|
| 207 |
+
Place item on shelf :: canned food | shelf,0,7,0.0,0.0,0.0
|
| 208 |
+
<missing_pred_relation>,0,226,0.0,0.0,0.0
|
| 209 |
+
Approach packing area :: items to pack | packing area,0,11,0.0,0.0,0.0
|
| 210 |
+
Approach restocking supplies :: aisle | restocking supplies | shelf,0,1,0.0,0.0,0.0
|
| 211 |
+
unknown :: unknown,0,7,0.0,0.0,0.0
|
| 212 |
+
Approach table :: materials | table | tools | work surface,0,1,0.0,0.0,0.0
|
| 213 |
+
Place item in container :: canned goods | plastic container | shopping bag,0,2,0.0,0.0,0.0
|
| 214 |
+
Pick up canned food :: canned food | shelf,0,2,0.0,0.0,0.0
|
| 215 |
+
Adjusting items on the shelf :: items | shelf,0,1,0.0,0.0,0.0
|
| 216 |
+
Wipe hands :: hands | sink | water bottle,0,1,0.0,0.0,0.0
|
| 217 |
+
Approach the stove :: cooking pot | kitchen counter | stove,0,1,0.0,0.0,0.0
|
| 218 |
+
Place item on shelf :: plush toy | shelf,0,1,0.0,0.0,0.0
|
| 219 |
+
Place item on shelf :: canned goods | shelf,0,1,0.0,0.0,0.0
|
| 220 |
+
Adjust cardboard :: cardboard pieces | workspace surface,0,1,0.0,0.0,0.0
|
| 221 |
+
Adjust cardboard :: cardboard | cutting mat | glue stick | marker | measuring tape | ruler | scissors | work table,0,1,0.0,0.0,0.0
|
| 222 |
+
Approach packing area :: cardboard pieces | marker | ruler | scissors | work table,0,1,0.0,0.0,0.0
|
| 223 |
+
Adjust cardboard :: cardboard piece | marker | ruler | workspace surface,0,2,0.0,0.0,0.0
|
| 224 |
+
Adjust cardboard position :: cardboard piece | marker | ruler | scissors | work surface,0,1,0.0,0.0,0.0
|
| 225 |
+
Adjust cardboard position :: cardboard piece | hands | work surface,0,1,0.0,0.0,0.0
|
| 226 |
+
Adjust cardboard :: cardboard piece | glue stick | marker | measuring tape | pencil | ruler | scissors | work table,0,1,0.0,0.0,0.0
|
| 227 |
+
Adjust cardboard :: cardboard | glue stick | marker | measuring tape | pencil | ruler | scissors | work table,0,1,0.0,0.0,0.0
|
| 228 |
+
Adjust cardboard position :: cardboard piece | scissors | work table,0,1,0.0,0.0,0.0
|
| 229 |
+
Adjust cardboard position :: cardboard piece | scissors | work surface,0,2,0.0,0.0,0.0
|
| 230 |
+
Cut cardboard with utility knife :: cardboard | ruler | table | utility knife,0,3,0.0,0.0,0.0
|
| 231 |
+
Cut cardboard :: cardboard | glue | marker | measuring tape | pen | ruler | scissors | work surface,0,1,0.0,0.0,0.0
|
| 232 |
+
Adjusting items on shelf :: containers | retail items | shelf,0,2,0.0,0.0,0.0
|
| 233 |
+
Approach packing area :: container | items to pack | packing area,0,1,0.0,0.0,0.0
|
| 234 |
+
Adjust cardboard divider :: cardboard divider | retail items | shelf,0,1,0.0,0.0,0.0
|
| 235 |
+
Adjusting items on shelf :: retail items | shelf,0,3,0.0,0.0,0.0
|
| 236 |
+
Approach packing area :: boxes | items to pack | packing area,0,2,0.0,0.0,0.0
|
| 237 |
+
Adjust grip :: canned food | plastic container | shelf,0,2,0.0,0.0,0.0
|
| 238 |
+
Adjust item on shelf :: retail items | shelf,0,1,0.0,0.0,0.0
|
| 239 |
+
Approach packing area :: cardboard pieces | packing area | storage bins,0,1,0.0,0.0,0.0
|
| 240 |
+
Adjust cardboard position :: cardboard pieces | scissors | table surface,0,1,0.0,0.0,0.0
|
| 241 |
+
Adjust cardboard divider :: cardboard divider | cardboard pieces | table surface,0,1,0.0,0.0,0.0
|
| 242 |
+
Adjust cardboard divider :: cardboard divider | cardboard pieces | scissors | table,0,1,0.0,0.0,0.0
|
| 243 |
+
Place canned food on shelf :: canned food | shelf,0,9,0.0,0.0,0.0
|
| 244 |
+
Pick up canned food :: bin | canned food | shelf,0,1,0.0,0.0,0.0
|
| 245 |
+
Pick up canned food :: box | canned food | shelf,0,1,0.0,0.0,0.0
|
| 246 |
+
Pick up canned food :: canned food | container | shelf,0,1,0.0,0.0,0.0
|
| 247 |
+
Adjust canned food on shelf :: canned food | shelf,0,1,0.0,0.0,0.0
|
| 248 |
+
Move along the shelf :: retail items | shelf,0,1,0.0,0.0,0.0
|
| 249 |
+
Picking up and placing canned goods :: canned goods | shelf,0,1,0.0,0.0,0.0
|
| 250 |
+
Move along the shelf :: canned goods | plastic container | shelf,0,1,0.0,0.0,0.0
|
| 251 |
+
Adjust item on shelf :: retail item | shelf,0,1,0.0,0.0,0.0
|
| 252 |
+
Browsing smartphone content :: retail items | shelf | smartphone,0,1,0.0,0.0,0.0
|
| 253 |
+
Manipulate paper strip :: paper strip | scissors | table,0,7,0.0,0.0,0.0
|
| 254 |
+
Manipulate paper strip :: marker | paper strip | pink cloth | ruler | scissors | smartphone | table | water bottle,0,1,0.0,0.0,0.0
|
| 255 |
+
Adjust grip :: hand | red container,0,1,0.0,0.0,0.0
|
| 256 |
+
Approach packing area :: boxes | packaging materials | red hat | table,0,1,0.0,0.0,0.0
|
| 257 |
+
Approach packing area :: boxes | packaging materials | red cap | table,0,1,0.0,0.0,0.0
|
| 258 |
+
Manipulate paper strip :: paper strip,0,3,0.0,0.0,0.0
|
| 259 |
+
Adjust lantern string :: handle | lantern | string,0,1,0.0,0.0,0.0
|
| 260 |
+
Adjust grip :: lid | red container,0,1,0.0,0.0,0.0
|
| 261 |
+
Approach packing area :: items to pack | packing area | packing materials,0,1,0.0,0.0,0.0
|
| 262 |
+
Adjust lantern string :: lantern handle | lantern string | paper lantern,0,2,0.0,0.0,0.0
|
| 263 |
+
Adjust grip :: red container | small white object,0,1,0.0,0.0,0.0
|
| 264 |
+
Adjust lantern string :: handle component | paper lantern | string,0,1,0.0,0.0,0.0
|
| 265 |
+
Approach table :: people | table,0,1,0.0,0.0,0.0
|
| 266 |
+
Approach table :: items on table | people | table,0,1,0.0,0.0,0.0
|
| 267 |
+
Adjust Mahjong tile :: mahjong tile | mahjong tile stack | table surface,0,2,0.0,0.0,0.0
|
| 268 |
+
Adjusting and placing down paper pieces :: hand | paper pieces | table,0,1,0.0,0.0,0.0
|
| 269 |
+
Approach table :: cardboard | chair | mahjong tiles | marker | paper | ruler | scissors | table,0,2,0.0,0.0,0.0
|
| 270 |
+
Approach table :: beads | cardboard | marker | paper | pen | ruler | scissors | table,0,1,0.0,0.0,0.0
|
| 271 |
+
Manipulate paper strip :: craft materials | hands | marker | paper strip | ruler | scissors | table | workspace,0,5,0.0,0.0,0.0
|
| 272 |
+
Manipulate paper strip :: craft materials | glue | hands | marker | paper strip | ruler | scissors | table,0,2,0.0,0.0,0.0
|
| 273 |
+
Adjust bead piles :: beads | table,0,3,0.0,0.0,0.0
|
| 274 |
+
Manipulate paper strip :: beads | glue | marker | paper strip | ruler | scissors | smartphone | table,0,1,0.0,0.0,0.0
|
| 275 |
+
Manipulate paper strip :: beads | mahjong tiles | marker | paper strip | ruler | scissors | smartphone | table,0,1,0.0,0.0,0.0
|
| 276 |
+
Manipulate colorful pieces :: buttons | table,0,4,0.0,0.0,0.0
|
| 277 |
+
Manipulate colorful pieces :: buttons,0,1,0.0,0.0,0.0
|
| 278 |
+
Adjust Mahjong tile :: mahjong tile | mahjong tile stack | table,0,1,0.0,0.0,0.0
|
| 279 |
+
Approach packing area :: cardboard box | items to pack | packing area,0,1,0.0,0.0,0.0
|
| 280 |
+
Interacting with colleagues :: person,0,2,0.0,0.0,0.0
|
| 281 |
+
Gathering items :: boxes | chairs | equipment | materials | people | supplies | tables | tools,0,1,0.0,0.0,0.0
|
| 282 |
+
Interacting with phone :: ceiling | chair | door | floor | person | smartphone | table | wall,0,2,0.0,0.0,0.0
|
| 283 |
+
Manipulate paper strip :: cardboard | craft materials | hands | marker | paper strip | ruler | scissors | table,0,1,0.0,0.0,0.0
|
| 284 |
+
Write on paper :: paper | pen | star beads,0,4,0.0,0.0,0.0
|
| 285 |
+
Manipulate paper strip :: chair | glue | marker | paper strip | person | ruler | scissors | table,0,1,0.0,0.0,0.0
|
| 286 |
+
Sort and arrange buttons :: buttons | table,0,3,0.0,0.0,0.0
|
results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/model_output_task_probes_20260616/summary.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"generated_at_utc": "2026-06-16T13:35:37+00:00",
|
| 3 |
+
"methods": {
|
| 4 |
+
"cosmos3_nano_future_window": {
|
| 5 |
+
"label": "Cosmos3-Nano Future Window",
|
| 6 |
+
"reason": "verified future-window predictions do not contain object-set fields",
|
| 7 |
+
"source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/future_predictions.jsonl",
|
| 8 |
+
"status": "unsupported_without_required_fields"
|
| 9 |
+
},
|
| 10 |
+
"cosmos3_super_reasoner": {
|
| 11 |
+
"action_object_relation_accuracy": 0.0,
|
| 12 |
+
"action_object_relation_macro_f1": 0.0,
|
| 13 |
+
"label": "Cosmos3-Super Reasoner",
|
| 14 |
+
"scored_rows": 446,
|
| 15 |
+
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/cosmos3_super_reasoner/metrics.json",
|
| 16 |
+
"source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/eval/predictions.jsonl",
|
| 17 |
+
"status": "scored",
|
| 18 |
+
"valid_pred_relation_rate": 0.49327354260089684
|
| 19 |
+
},
|
| 20 |
+
"qwen3_omni_v6_lora": {
|
| 21 |
+
"action_object_relation_accuracy": 0.000996512207274539,
|
| 22 |
+
"action_object_relation_macro_f1": 0.0002220083079671497,
|
| 23 |
+
"label": "Qwen3-Omni v6 LoRA",
|
| 24 |
+
"scored_rows": 4014,
|
| 25 |
+
"source_metrics_json": "results/omni_finetune/model_output_task_probes_20260616/action_object_relation/qwen3_omni_v6_lora/metrics.json",
|
| 26 |
+
"source_prediction_jsonl": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/predictions.jsonl",
|
| 27 |
+
"status": "scored",
|
| 28 |
+
"valid_pred_relation_rate": 0.9990034877927254
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"scope": "Task-specific scoring from existing verified held-out model outputs. No new model inference, training, or target backfilling is performed.",
|
| 32 |
+
"scored_method_task_count_added": 2,
|
| 33 |
+
"status": "pass",
|
| 34 |
+
"task_count_added_to_matrix": 1,
|
| 35 |
+
"title": "Existing Model-Output Task Probes"
|
| 36 |
+
}
|