Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
Add files using upload-large-folder tool
Browse files- docs/data/artifact_index.json +65 -32
- docs/data/omni_model_comparison.json +11 -10
- docs/data/project_packet.json +4 -4
- docs/data/project_status.json +17 -15
- docs/data/publication_audit.json +3 -3
- docs/data/quality_gates.json +1 -1
- docs/data/qwen3_full_parameter_gates.json +1 -1
- docs/data/research_roadmap.json +6 -3
- docs/data/research_roadmap_interactive.json +6 -3
- docs/data/scope_claims_audit.json +16 -16
- docs/data/source_alignment_audit.json +1 -1
- docs/data/summary_metrics.json +1 -1
- docs/data/task_surface_integrity.json +167 -167
- docs/data/website_integrity.json +21 -16
- results/omni_finetune/OMNI_MODEL_COMPARISON.md +5 -4
- results/omni_finetune/QWEN3_FULL_PARAMETER_GATES_20260609.md +1 -1
- results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md +31 -0
- results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/package_audit.json +24 -0
docs/data/artifact_index.json
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Artifact Index",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"status": "pass",
|
| 5 |
-
"artifact_count":
|
| 6 |
"missing": [],
|
| 7 |
"by_kind": {
|
| 8 |
"project_path": 14,
|
| 9 |
"scaleup_contract": 7,
|
| 10 |
-
"scaleup_status":
|
| 11 |
"publication_workflow": 5,
|
| 12 |
"project_scope": 1,
|
| 13 |
"source_alignment": 5,
|
|
@@ -32,7 +32,7 @@
|
|
| 32 |
"citation": 1,
|
| 33 |
"license": 1,
|
| 34 |
"verified_public_package": 10,
|
| 35 |
-
"publication_audit":
|
| 36 |
},
|
| 37 |
"artifacts": [
|
| 38 |
{
|
|
@@ -65,8 +65,8 @@
|
|
| 65 |
"surface": "repo_hf",
|
| 66 |
"shows": "Gives a compact current-state table for first-pass readers.",
|
| 67 |
"exists": true,
|
| 68 |
-
"bytes":
|
| 69 |
-
"sha256": "
|
| 70 |
},
|
| 71 |
{
|
| 72 |
"id": "project_status_json",
|
|
@@ -76,8 +76,8 @@
|
|
| 76 |
"surface": "website_hf",
|
| 77 |
"shows": "Machine-readable copy of the current project status for website and HF mirrors.",
|
| 78 |
"exists": true,
|
| 79 |
-
"bytes":
|
| 80 |
-
"sha256": "
|
| 81 |
},
|
| 82 |
{
|
| 83 |
"id": "research_roadmap",
|
|
@@ -87,8 +87,8 @@
|
|
| 87 |
"surface": "repo_hf",
|
| 88 |
"shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
|
| 89 |
"exists": true,
|
| 90 |
-
"bytes":
|
| 91 |
-
"sha256": "
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"id": "research_roadmap_json",
|
|
@@ -98,8 +98,8 @@
|
|
| 98 |
"surface": "website_hf",
|
| 99 |
"shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
|
| 100 |
"exists": true,
|
| 101 |
-
"bytes":
|
| 102 |
-
"sha256": "
|
| 103 |
},
|
| 104 |
{
|
| 105 |
"id": "foundation_model_plan",
|
|
@@ -264,7 +264,7 @@
|
|
| 264 |
"shows": "Summarizes the 2026-06-09 full-parameter FSDP feasibility gates: 1/8/32/64-step guarded runs passed, the 128-step opportunistic pilot was preempted for Qwen v5 handoff, and no full checkpoints or weights are published.",
|
| 265 |
"exists": true,
|
| 266 |
"bytes": 3253,
|
| 267 |
-
"sha256": "
|
| 268 |
},
|
| 269 |
{
|
| 270 |
"id": "qwen3_full_parameter_gates_json",
|
|
@@ -275,7 +275,29 @@
|
|
| 275 |
"shows": "Machine-readable summary of full-parameter feasibility evidence and publication policy for website and Hugging Face mirrors.",
|
| 276 |
"exists": true,
|
| 277 |
"bytes": 12183,
|
| 278 |
-
"sha256": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
},
|
| 280 |
{
|
| 281 |
"id": "qwen3_full_parameter_gates_builder",
|
|
@@ -362,8 +384,8 @@
|
|
| 362 |
"surface": "website_hf",
|
| 363 |
"shows": "Gives a short project path with scope status and public surfaces.",
|
| 364 |
"exists": true,
|
| 365 |
-
"bytes":
|
| 366 |
-
"sha256": "
|
| 367 |
},
|
| 368 |
{
|
| 369 |
"id": "artifact_guide",
|
|
@@ -418,7 +440,7 @@
|
|
| 418 |
"shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
|
| 419 |
"exists": true,
|
| 420 |
"bytes": 4432,
|
| 421 |
-
"sha256": "
|
| 422 |
},
|
| 423 |
{
|
| 424 |
"id": "source_alignment_validator",
|
|
@@ -671,7 +693,7 @@
|
|
| 671 |
"shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
|
| 672 |
"exists": true,
|
| 673 |
"bytes": 8097,
|
| 674 |
-
"sha256": "
|
| 675 |
},
|
| 676 |
{
|
| 677 |
"id": "public_surface_qa",
|
|
@@ -785,8 +807,8 @@
|
|
| 785 |
"surface": "repo",
|
| 786 |
"shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
|
| 787 |
"exists": true,
|
| 788 |
-
"bytes":
|
| 789 |
-
"sha256": "
|
| 790 |
},
|
| 791 |
{
|
| 792 |
"id": "reproducibility_contract",
|
|
@@ -818,8 +840,8 @@
|
|
| 818 |
"surface": "repo_hf",
|
| 819 |
"shows": "Generates the selective artifact catalog from local files.",
|
| 820 |
"exists": true,
|
| 821 |
-
"bytes":
|
| 822 |
-
"sha256": "
|
| 823 |
},
|
| 824 |
{
|
| 825 |
"id": "publication_audit",
|
|
@@ -842,7 +864,7 @@
|
|
| 842 |
"volatile": true,
|
| 843 |
"shows": "Separates setup paths from completed held-out-episode results.",
|
| 844 |
"exists": true,
|
| 845 |
-
"bytes":
|
| 846 |
"hash_policy": "existence_and_size_only"
|
| 847 |
},
|
| 848 |
{
|
|
@@ -866,7 +888,7 @@
|
|
| 866 |
"volatile": true,
|
| 867 |
"shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
|
| 868 |
"exists": true,
|
| 869 |
-
"bytes":
|
| 870 |
"hash_policy": "existence_and_size_only"
|
| 871 |
},
|
| 872 |
{
|
|
@@ -899,8 +921,8 @@
|
|
| 899 |
"surface": "website_hf",
|
| 900 |
"shows": "Mirrors task metrics for the static dashboard.",
|
| 901 |
"exists": true,
|
| 902 |
-
"bytes":
|
| 903 |
-
"sha256": "
|
| 904 |
},
|
| 905 |
{
|
| 906 |
"id": "feature_manifest",
|
|
@@ -1119,8 +1141,8 @@
|
|
| 1119 |
"surface": "repo_hf",
|
| 1120 |
"shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
|
| 1121 |
"exists": true,
|
| 1122 |
-
"bytes":
|
| 1123 |
-
"sha256": "
|
| 1124 |
},
|
| 1125 |
{
|
| 1126 |
"id": "omni_model_comparison_json",
|
|
@@ -1130,8 +1152,8 @@
|
|
| 1130 |
"surface": "repo_hf",
|
| 1131 |
"shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
|
| 1132 |
"exists": true,
|
| 1133 |
-
"bytes":
|
| 1134 |
-
"sha256": "
|
| 1135 |
},
|
| 1136 |
{
|
| 1137 |
"id": "cosmos3_nano_verified_summary",
|
|
@@ -1548,8 +1570,8 @@
|
|
| 1548 |
"surface": "repo_hf",
|
| 1549 |
"shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full (qwen3_omni_lora, status=verified).",
|
| 1550 |
"exists": true,
|
| 1551 |
-
"file_count":
|
| 1552 |
-
"bytes":
|
| 1553 |
},
|
| 1554 |
{
|
| 1555 |
"id": "verified_public_summary_xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
|
@@ -1595,6 +1617,17 @@
|
|
| 1595 |
"bytes": 49205,
|
| 1596 |
"sha256": "fc198c3e443877bca42cc33bec6e2a194d6cb20e97c28e931a90736c45538bba"
|
| 1597 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1598 |
{
|
| 1599 |
"id": "verified_public_package_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 1600 |
"title": "Verified public package: Qwen3-Omni LoRA",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Artifact Index",
|
| 3 |
+
"generated_at_utc": "2026-06-13T17:46:38+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
+
"artifact_count": 162,
|
| 6 |
"missing": [],
|
| 7 |
"by_kind": {
|
| 8 |
"project_path": 14,
|
| 9 |
"scaleup_contract": 7,
|
| 10 |
+
"scaleup_status": 38,
|
| 11 |
"publication_workflow": 5,
|
| 12 |
"project_scope": 1,
|
| 13 |
"source_alignment": 5,
|
|
|
|
| 32 |
"citation": 1,
|
| 33 |
"license": 1,
|
| 34 |
"verified_public_package": 10,
|
| 35 |
+
"publication_audit": 7
|
| 36 |
},
|
| 37 |
"artifacts": [
|
| 38 |
{
|
|
|
|
| 65 |
"surface": "repo_hf",
|
| 66 |
"shows": "Gives a compact current-state table for first-pass readers.",
|
| 67 |
"exists": true,
|
| 68 |
+
"bytes": 13755,
|
| 69 |
+
"sha256": "342897ae05ceab83d626765c0052c140e414ba25ebda4fce9fb07bb37a2decef"
|
| 70 |
},
|
| 71 |
{
|
| 72 |
"id": "project_status_json",
|
|
|
|
| 76 |
"surface": "website_hf",
|
| 77 |
"shows": "Machine-readable copy of the current project status for website and HF mirrors.",
|
| 78 |
"exists": true,
|
| 79 |
+
"bytes": 23535,
|
| 80 |
+
"sha256": "9ffae32ff0b3750f89179d2ce92205f95a5b53069d0aa344d6342c23b1efebbd"
|
| 81 |
},
|
| 82 |
{
|
| 83 |
"id": "research_roadmap",
|
|
|
|
| 87 |
"surface": "repo_hf",
|
| 88 |
"shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
|
| 89 |
"exists": true,
|
| 90 |
+
"bytes": 14503,
|
| 91 |
+
"sha256": "bb06fb0ccf336cafb0305883b7f93c2e1af547c9ff04b1fa6fc87481d54bcf61"
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"id": "research_roadmap_json",
|
|
|
|
| 98 |
"surface": "website_hf",
|
| 99 |
"shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
|
| 100 |
"exists": true,
|
| 101 |
+
"bytes": 13450,
|
| 102 |
+
"sha256": "668220626950f8e55410b8f829dba6306d0b2feedafdf0198800ad9814992d84"
|
| 103 |
},
|
| 104 |
{
|
| 105 |
"id": "foundation_model_plan",
|
|
|
|
| 264 |
"shows": "Summarizes the 2026-06-09 full-parameter FSDP feasibility gates: 1/8/32/64-step guarded runs passed, the 128-step opportunistic pilot was preempted for Qwen v5 handoff, and no full checkpoints or weights are published.",
|
| 265 |
"exists": true,
|
| 266 |
"bytes": 3253,
|
| 267 |
+
"sha256": "b25f1d8cde814207b4c3234bf07140cf99a0ede29af3f53dbc146aab464e8a9b"
|
| 268 |
},
|
| 269 |
{
|
| 270 |
"id": "qwen3_full_parameter_gates_json",
|
|
|
|
| 275 |
"shows": "Machine-readable summary of full-parameter feasibility evidence and publication policy for website and Hugging Face mirrors.",
|
| 276 |
"exists": true,
|
| 277 |
"bytes": 12183,
|
| 278 |
+
"sha256": "d051608d3428645778f721e538af93566ab772871a825ac12825dd5f18e94a95"
|
| 279 |
+
},
|
| 280 |
+
{
|
| 281 |
+
"id": "qwen3_v5_v6_comparison",
|
| 282 |
+
"title": "Qwen3-Omni v5/v6 comparison",
|
| 283 |
+
"path": "results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
|
| 284 |
+
"kind": "scaleup_status",
|
| 285 |
+
"surface": "repo_hf",
|
| 286 |
+
"shows": "Reader-facing comparison of the verified Qwen3 v5 release row and the latest verified v6 row, including metric deltas and release-tag policy.",
|
| 287 |
+
"exists": true,
|
| 288 |
+
"bytes": 1793,
|
| 289 |
+
"sha256": "890430b05ace20375fc9534f923d170c0509037272ba4ef523e3ca2f3c9ac746"
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"id": "qwen3_v5_v6_comparison_json",
|
| 293 |
+
"title": "Qwen3-Omni v5/v6 comparison JSON",
|
| 294 |
+
"path": "docs/data/qwen3_v5_v6_comparison.json",
|
| 295 |
+
"kind": "scaleup_status",
|
| 296 |
+
"surface": "website_hf",
|
| 297 |
+
"shows": "Machine-readable v5/v6 metric deltas and publication recommendation for website and Hugging Face mirrors.",
|
| 298 |
+
"exists": true,
|
| 299 |
+
"bytes": 2814,
|
| 300 |
+
"sha256": "f5d16e279a82cdc6266a1318584bf38cbc0b105296d437f9b8bf0398403aace5"
|
| 301 |
},
|
| 302 |
{
|
| 303 |
"id": "qwen3_full_parameter_gates_builder",
|
|
|
|
| 384 |
"surface": "website_hf",
|
| 385 |
"shows": "Gives a short project path with scope status and public surfaces.",
|
| 386 |
"exists": true,
|
| 387 |
+
"bytes": 10597,
|
| 388 |
+
"sha256": "a64b7c033c54879e0183e7ec794d3197fb483024c25947759287fcd4b7e0fec1"
|
| 389 |
},
|
| 390 |
{
|
| 391 |
"id": "artifact_guide",
|
|
|
|
| 440 |
"shows": "Machine-readable source-alignment pass/fail check for repo, website, and HF surfaces.",
|
| 441 |
"exists": true,
|
| 442 |
"bytes": 4432,
|
| 443 |
+
"sha256": "0d0d381f726c1e3787fb3fb15b6fb8879512c26fa0dc06fb943e1a239b0063dd"
|
| 444 |
},
|
| 445 |
{
|
| 446 |
"id": "source_alignment_validator",
|
|
|
|
| 693 |
"shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
|
| 694 |
"exists": true,
|
| 695 |
"bytes": 8097,
|
| 696 |
+
"sha256": "1cdc8b4767b3ca88eada654a3117aa2de253fea7af62573b080088e8f1b311bd"
|
| 697 |
},
|
| 698 |
{
|
| 699 |
"id": "public_surface_qa",
|
|
|
|
| 807 |
"surface": "repo",
|
| 808 |
"shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
|
| 809 |
"exists": true,
|
| 810 |
+
"bytes": 42394,
|
| 811 |
+
"sha256": "43a70436108eea3fa6692096ebf318fb755ff040d925094ef9de018f212fde18"
|
| 812 |
},
|
| 813 |
{
|
| 814 |
"id": "reproducibility_contract",
|
|
|
|
| 840 |
"surface": "repo_hf",
|
| 841 |
"shows": "Generates the selective artifact catalog from local files.",
|
| 842 |
"exists": true,
|
| 843 |
+
"bytes": 42809,
|
| 844 |
+
"sha256": "c03d1b1367ad191fea0be3c634fddf8ee6fdc2118bf17396920c16cc288c4ef0"
|
| 845 |
},
|
| 846 |
{
|
| 847 |
"id": "publication_audit",
|
|
|
|
| 864 |
"volatile": true,
|
| 865 |
"shows": "Separates setup paths from completed held-out-episode results.",
|
| 866 |
"exists": true,
|
| 867 |
+
"bytes": 21795,
|
| 868 |
"hash_policy": "existence_and_size_only"
|
| 869 |
},
|
| 870 |
{
|
|
|
|
| 888 |
"volatile": true,
|
| 889 |
"shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
|
| 890 |
"exists": true,
|
| 891 |
+
"bytes": 15774,
|
| 892 |
"hash_policy": "existence_and_size_only"
|
| 893 |
},
|
| 894 |
{
|
|
|
|
| 921 |
"surface": "website_hf",
|
| 922 |
"shows": "Mirrors task metrics for the static dashboard.",
|
| 923 |
"exists": true,
|
| 924 |
+
"bytes": 27807,
|
| 925 |
+
"sha256": "3a6a5ee59562ae189844cb4ba26d6e261c2f73a8e54bb6e2fbc3e307c2d123fa"
|
| 926 |
},
|
| 927 |
{
|
| 928 |
"id": "feature_manifest",
|
|
|
|
| 1141 |
"surface": "repo_hf",
|
| 1142 |
"shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
|
| 1143 |
"exists": true,
|
| 1144 |
+
"bytes": 15999,
|
| 1145 |
+
"sha256": "d5a7118a878b202adbc50e3436bbe134e5de139f2a9e97176efe9ecc0f446088"
|
| 1146 |
},
|
| 1147 |
{
|
| 1148 |
"id": "omni_model_comparison_json",
|
|
|
|
| 1152 |
"surface": "repo_hf",
|
| 1153 |
"shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
|
| 1154 |
"exists": true,
|
| 1155 |
+
"bytes": 81866,
|
| 1156 |
+
"sha256": "191125098a66ecccfa27395c0f9776616f74b4bf8fb19f16b75cda7ed06cb4b2"
|
| 1157 |
},
|
| 1158 |
{
|
| 1159 |
"id": "cosmos3_nano_verified_summary",
|
|
|
|
| 1570 |
"surface": "repo_hf",
|
| 1571 |
"shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full (qwen3_omni_lora, status=verified).",
|
| 1572 |
"exists": true,
|
| 1573 |
+
"file_count": 14,
|
| 1574 |
+
"bytes": 12189730
|
| 1575 |
},
|
| 1576 |
{
|
| 1577 |
"id": "verified_public_summary_xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
|
|
|
| 1617 |
"bytes": 49205,
|
| 1618 |
"sha256": "fc198c3e443877bca42cc33bec6e2a194d6cb20e97c28e931a90736c45538bba"
|
| 1619 |
},
|
| 1620 |
+
{
|
| 1621 |
+
"id": "verified_public_package_audit_xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
| 1622 |
+
"title": "Verified package audit: Qwen3-Omni LoRA",
|
| 1623 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/package_audit.json",
|
| 1624 |
+
"kind": "publication_audit",
|
| 1625 |
+
"surface": "repo_hf",
|
| 1626 |
+
"shows": "Package audit for xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full.",
|
| 1627 |
+
"exists": true,
|
| 1628 |
+
"bytes": 631,
|
| 1629 |
+
"sha256": "7cf478ae33c52bae0ba742e81da8e482e06d0853eecd85f895f447a708f81718"
|
| 1630 |
+
},
|
| 1631 |
{
|
| 1632 |
"id": "verified_public_package_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 1633 |
"title": "Verified public package: Qwen3-Omni LoRA",
|
docs/data/omni_model_comparison.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"status": "pass",
|
| 5 |
"version_count": 3,
|
| 6 |
"model_group_count": 5,
|
|
@@ -588,8 +588,8 @@
|
|
| 588 |
"global_step": 3204
|
| 589 |
}
|
| 590 |
],
|
| 591 |
-
"is_current":
|
| 592 |
-
"weights_repository": "
|
| 593 |
},
|
| 594 |
{
|
| 595 |
"id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
|
@@ -640,8 +640,8 @@
|
|
| 640 |
"global_step": 6408
|
| 641 |
}
|
| 642 |
],
|
| 643 |
-
"is_current":
|
| 644 |
-
"weights_repository": "
|
| 645 |
},
|
| 646 |
{
|
| 647 |
"id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
|
@@ -1202,8 +1202,8 @@
|
|
| 1202 |
"global_step": 3204
|
| 1203 |
}
|
| 1204 |
],
|
| 1205 |
-
"is_current":
|
| 1206 |
-
"weights_repository": "
|
| 1207 |
},
|
| 1208 |
{
|
| 1209 |
"id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
|
@@ -1254,8 +1254,8 @@
|
|
| 1254 |
"global_step": 6408
|
| 1255 |
}
|
| 1256 |
],
|
| 1257 |
-
"is_current":
|
| 1258 |
-
"weights_repository": "
|
| 1259 |
},
|
| 1260 |
{
|
| 1261 |
"id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
|
@@ -1753,6 +1753,7 @@
|
|
| 1753 |
"Cosmos3-Super now has both a 128-episode base-weight Reasoner evaluation on the JSON task and a fine-tuned forward-dynamics LoRA branch over camera-pose proxy targets."
|
| 1754 |
],
|
| 1755 |
"pending": [
|
| 1756 |
-
"Use the verified Qwen3
|
|
|
|
| 1757 |
]
|
| 1758 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
|
| 3 |
+
"generated_at_utc": "2026-06-13T17:41:35+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
"version_count": 3,
|
| 6 |
"model_group_count": 5,
|
|
|
|
| 588 |
"global_step": 3204
|
| 589 |
}
|
| 590 |
],
|
| 591 |
+
"is_current": false,
|
| 592 |
+
"weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo"
|
| 593 |
},
|
| 594 |
{
|
| 595 |
"id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
|
|
|
| 640 |
"global_step": 6408
|
| 641 |
}
|
| 642 |
],
|
| 643 |
+
"is_current": true,
|
| 644 |
+
"weights_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
|
| 645 |
},
|
| 646 |
{
|
| 647 |
"id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
|
|
|
| 1202 |
"global_step": 3204
|
| 1203 |
}
|
| 1204 |
],
|
| 1205 |
+
"is_current": false,
|
| 1206 |
+
"weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo"
|
| 1207 |
},
|
| 1208 |
{
|
| 1209 |
"id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
|
|
|
| 1254 |
"global_step": 6408
|
| 1255 |
}
|
| 1256 |
],
|
| 1257 |
+
"is_current": true,
|
| 1258 |
+
"weights_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
|
| 1259 |
},
|
| 1260 |
{
|
| 1261 |
"id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
|
|
|
| 1753 |
"Cosmos3-Super now has both a 128-episode base-weight Reasoner evaluation on the JSON task and a fine-tuned forward-dynamics LoRA branch over camera-pose proxy targets."
|
| 1754 |
],
|
| 1755 |
"pending": [
|
| 1756 |
+
"Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
|
| 1757 |
+
"Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before claiming v6 is globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly."
|
| 1758 |
]
|
| 1759 |
}
|
docs/data/project_packet.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Project Packet",
|
| 3 |
-
"version": "2026-06-
|
| 4 |
"scope_status": {
|
| 5 |
"validated_data": "one public Xperience-10M sample episode",
|
| 6 |
"aligned_frames": 5821,
|
|
@@ -12,7 +12,7 @@
|
|
| 12 |
"raw_xperience10m_data_in_repo": false,
|
| 13 |
"audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
|
| 14 |
"qwen3_omni_32_episode_claim": false,
|
| 15 |
-
"qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni
|
| 16 |
"cosmos3_super_forward_dynamics_lora_status": "The first Cosmos3-Super fine-tuned adapter branch is verified as a forward-dynamics LoRA over camera-pose proxy targets; it reports loss metrics, not JSON action-label accuracy.",
|
| 17 |
"task_suite_enhancement_128_status": "Current no-new-episode enhancement pack recommends multiscale_20s10_40s20_80s40, hierarchical action/subtask targets, label-normalized scoring, and raw-feature shards before adding more episodes."
|
| 18 |
},
|
|
@@ -118,7 +118,7 @@
|
|
| 118 |
"scripts/omni/discover_xperience10m_sources.py",
|
| 119 |
"docs/data/omni_finetune_verified_result.json"
|
| 120 |
],
|
| 121 |
-
"readout": "The selected-episode held-out Qwen3-Omni
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"step": 9,
|
|
@@ -155,7 +155,7 @@
|
|
| 155 |
"hf_model_baselines": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines"
|
| 156 |
},
|
| 157 |
"current_reading_notes": [
|
| 158 |
-
"The
|
| 159 |
"The current 128-episode suite has a no-new-episode enhancement plan: multiscale_20s10_40s20_80s40 windows, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
|
| 160 |
"Cosmos3-Super Forward-Dynamics LoRA is verified as a loss-based world-model adapter branch, not as JSON action-token prediction.",
|
| 161 |
"Older Qwen3-Omni setup artifacts are separate from the verified selected-episode diagnostic package.",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Project Packet",
|
| 3 |
+
"version": "2026-06-14",
|
| 4 |
"scope_status": {
|
| 5 |
"validated_data": "one public Xperience-10M sample episode",
|
| 6 |
"aligned_frames": 5821,
|
|
|
|
| 12 |
"raw_xperience10m_data_in_repo": false,
|
| 13 |
"audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
|
| 14 |
"qwen3_omni_32_episode_claim": false,
|
| 15 |
+
"qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni v6 diagnostic branch is verified, meets the strict-JSON target, improves action macro-F1/contact accuracy versus v5, and still has weak action/subtask metrics that guide the next error-analysis pass.",
|
| 16 |
"cosmos3_super_forward_dynamics_lora_status": "The first Cosmos3-Super fine-tuned adapter branch is verified as a forward-dynamics LoRA over camera-pose proxy targets; it reports loss metrics, not JSON action-label accuracy.",
|
| 17 |
"task_suite_enhancement_128_status": "Current no-new-episode enhancement pack recommends multiscale_20s10_40s20_80s40, hierarchical action/subtask targets, label-normalized scoring, and raw-feature shards before adding more episodes."
|
| 18 |
},
|
|
|
|
| 118 |
"scripts/omni/discover_xperience10m_sources.py",
|
| 119 |
"docs/data/omni_finetune_verified_result.json"
|
| 120 |
],
|
| 121 |
+
"readout": "The selected-episode held-out Qwen3-Omni v6 diagnostic branch is verified and JSON-format reliability meets the 98% target. The same public comparison also includes the verified 128-episode baselines, Cosmos3-Nano compatibility result, Cosmos3-Super Reasoner evaluation, and Cosmos3-Super Forward-Dynamics LoRA package. The next milestone is action/subtask error analysis and stronger model-quality runs on the same split."
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"step": 9,
|
|
|
|
| 155 |
"hf_model_baselines": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines"
|
| 156 |
},
|
| 157 |
"current_reading_notes": [
|
| 158 |
+
"The latest cross-episode Qwen3-Omni v6 diagnostic branch is verified, but strong model quality is not yet shown; action/subtask metrics remain weak and v5 remains stronger on several non-contact metrics.",
|
| 159 |
"The current 128-episode suite has a no-new-episode enhancement plan: multiscale_20s10_40s20_80s40 windows, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
|
| 160 |
"Cosmos3-Super Forward-Dynamics LoRA is verified as a loss-based world-model adapter branch, not as JSON action-token prediction.",
|
| 161 |
"Older Qwen3-Omni setup artifacts are separate from the verified selected-episode diagnostic package.",
|
docs/data/project_status.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Project Status",
|
| 3 |
-
"version": "2026-06-
|
| 4 |
-
"decision": "
|
| 5 |
"research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3 branch packages as early cross-episode diagnostics, and now records a no-new-episode enhancement pack for pushing the current 128-episode suite harder.",
|
| 6 |
"scope_boundary": {
|
| 7 |
"validated_episode_count": 1,
|
|
@@ -21,11 +21,11 @@
|
|
| 21 |
"test": 16
|
| 22 |
},
|
| 23 |
"qwen3_omni_exported_window_counts": {
|
| 24 |
-
"train":
|
| 25 |
-
"val":
|
| 26 |
-
"test":
|
| 27 |
},
|
| 28 |
-
"qwen3_omni_json_validity_rate":
|
| 29 |
"qwen3_omni_validation_aware": true,
|
| 30 |
"qwen3_omni_json_quality_target_met": true,
|
| 31 |
"qwen3_omni_lora_adapter_repo": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
|
|
@@ -48,12 +48,12 @@
|
|
| 48 |
"test": 448
|
| 49 |
},
|
| 50 |
"multi_episode_128_baseline_task_count": 12,
|
| 51 |
-
"qwen3_omni_current_eval_run_id": "
|
| 52 |
-
"qwen3_omni_current_train_epochs":
|
| 53 |
-
"qwen3_omni_action_macro_f1": 0.
|
| 54 |
-
"qwen3_omni_subtask_accuracy": 0.
|
| 55 |
-
"qwen3_omni_contact_accuracy": 0.
|
| 56 |
-
"qwen3_omni_object_micro_f1": 0.
|
| 57 |
"task_suite_enhancement_128_available": true,
|
| 58 |
"task_suite_enhancement_128_current_windows": 3808,
|
| 59 |
"task_suite_enhancement_128_recommended_export": "multiscale_20s10_40s20_80s40",
|
|
@@ -241,13 +241,15 @@
|
|
| 241 |
"status": "final_verified_diagnostic_result_json_target_met",
|
| 242 |
"evidence": [
|
| 243 |
"docs/data/omni_finetune_verified_result.json",
|
| 244 |
-
"
|
|
|
|
|
|
|
| 245 |
"https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
|
| 246 |
"scripts/omni/package_verified_omni_result.py",
|
| 247 |
"scripts/omni/audit_verified_omni_package.py",
|
| 248 |
"scripts/omni/analyze_qwen3_omni_errors.py"
|
| 249 |
],
|
| 250 |
-
"readout": "The selected 96/16/16 episode split now has a
|
| 251 |
},
|
| 252 |
{
|
| 253 |
"area": "Cosmos3-Nano future-window branch",
|
|
@@ -324,7 +326,7 @@
|
|
| 324 |
"Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
|
| 325 |
],
|
| 326 |
"current_reading_notes": [
|
| 327 |
-
"The
|
| 328 |
"Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
|
| 329 |
"Use docs/data/omni_model_comparison.json to compare both views: the single-episode/128-baseline/model-branch result layers and the model-family grouping for task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.",
|
| 330 |
"The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Project Status",
|
| 3 |
+
"version": "2026-06-14",
|
| 4 |
+
"decision": "public_sample_pipeline_verified_128_enhancement_qwen3_v6_cosmos_comparison",
|
| 5 |
"research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, compares verified Qwen3-Omni and Cosmos3 branch packages as early cross-episode diagnostics, and now records a no-new-episode enhancement pack for pushing the current 128-episode suite harder.",
|
| 6 |
"scope_boundary": {
|
| 7 |
"validated_episode_count": 1,
|
|
|
|
| 21 |
"test": 16
|
| 22 |
},
|
| 23 |
"qwen3_omni_exported_window_counts": {
|
| 24 |
+
"train": 25629,
|
| 25 |
+
"val": 4608,
|
| 26 |
+
"test": 4032
|
| 27 |
},
|
| 28 |
+
"qwen3_omni_json_validity_rate": 0.9990079365079365,
|
| 29 |
"qwen3_omni_validation_aware": true,
|
| 30 |
"qwen3_omni_json_quality_target_met": true,
|
| 31 |
"qwen3_omni_lora_adapter_repo": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
|
|
|
|
| 48 |
"test": 448
|
| 49 |
},
|
| 50 |
"multi_episode_128_baseline_task_count": 12,
|
| 51 |
+
"qwen3_omni_current_eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
| 52 |
+
"qwen3_omni_current_train_epochs": 2,
|
| 53 |
+
"qwen3_omni_action_macro_f1": 0.0028830723979596335,
|
| 54 |
+
"qwen3_omni_subtask_accuracy": 0.0037313432835820895,
|
| 55 |
+
"qwen3_omni_contact_accuracy": 0.8177083333333334,
|
| 56 |
+
"qwen3_omni_object_micro_f1": 0.3064982378331287,
|
| 57 |
"task_suite_enhancement_128_available": true,
|
| 58 |
"task_suite_enhancement_128_current_windows": 3808,
|
| 59 |
"task_suite_enhancement_128_recommended_export": "multiscale_20s10_40s20_80s40",
|
|
|
|
| 241 |
"status": "final_verified_diagnostic_result_json_target_met",
|
| 242 |
"evidence": [
|
| 243 |
"docs/data/omni_finetune_verified_result.json",
|
| 244 |
+
"docs/data/qwen3_v5_v6_comparison.json",
|
| 245 |
+
"results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
|
| 246 |
+
"results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/",
|
| 247 |
"https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
|
| 248 |
"scripts/omni/package_verified_omni_result.py",
|
| 249 |
"scripts/omni/audit_verified_omni_package.py",
|
| 250 |
"scripts/omni/analyze_qwen3_omni_errors.py"
|
| 251 |
],
|
| 252 |
+
"readout": "The selected 96/16/16 episode split now has a current v6 rank64/lr5e-5 public-safe held-out package with 34,269 exported windows, 4,032 test predictions, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.90%, meeting the 98% target; transition accuracy is 98.98%, contact accuracy is 81.77%, object micro-F1 is 30.65%, next-action accuracy is 4.31%, and action/subtask metrics remain weak. v6 improves action macro-F1 and contact accuracy versus v5, but v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics."
|
| 253 |
},
|
| 254 |
{
|
| 255 |
"area": "Cosmos3-Nano future-window branch",
|
|
|
|
| 326 |
"Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
|
| 327 |
],
|
| 328 |
"current_reading_notes": [
|
| 329 |
+
"The latest Qwen3-Omni v6 diagnostic branch is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak: JSON validity is 99.90%, action macro-F1 is 0.0029, and subtask accuracy is 0.0037. v5 remains the pinned prior release row because it is still stronger on several metrics.",
|
| 330 |
"Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json to push the current 128-episode suite without more raw episodes through multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
|
| 331 |
"Use docs/data/omni_model_comparison.json to compare both views: the single-episode/128-baseline/model-branch result layers and the model-family grouping for task heads, Qwen3-Omni LoRA, Cosmos3-Nano, and Cosmos3-Super.",
|
| 332 |
"The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
|
docs/data/publication_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
@@ -187,8 +187,8 @@
|
|
| 187 |
"github_repo": {
|
| 188 |
"root": "repo",
|
| 189 |
"exists": true,
|
| 190 |
-
"file_count":
|
| 191 |
-
"text_file_count":
|
| 192 |
"largest_file": {
|
| 193 |
"path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
|
| 194 |
"bytes": 55702978
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-13T17:46:58+00:00",
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
|
|
| 187 |
"github_repo": {
|
| 188 |
"root": "repo",
|
| 189 |
"exists": true,
|
| 190 |
+
"file_count": 914,
|
| 191 |
+
"text_file_count": 746,
|
| 192 |
"largest_file": {
|
| 193 |
"path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
|
| 194 |
"bytes": 55702978
|
docs/data/quality_gates.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Release Checks",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
|
| 6 |
"automated_gates": [
|
| 7 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Release Checks",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-13T17:46:37+00:00",
|
| 5 |
"rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
|
| 6 |
"automated_gates": [
|
| 7 |
{
|
docs/data/qwen3_full_parameter_gates.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Qwen3-Omni Full-Parameter Feasibility Gates",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"status": "pass",
|
| 5 |
"decision": "full_parameter_feasible_for_guarded_short_runs_not_promoted",
|
| 6 |
"interpretation": "The 2026-06-09 gates prove that Qwen3-Omni full-parameter FSDP can load, prepare, run backward/optimizer steps, and complete guarded pilots up to 128 optimizer steps on an 8-GPU remote worker. They do not prove a production full-parameter fine-tune, and they intentionally save no full checkpoints or public weights.",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Qwen3-Omni Full-Parameter Feasibility Gates",
|
| 3 |
+
"generated_at_utc": "2026-06-13T17:41:13+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
"decision": "full_parameter_feasible_for_guarded_short_runs_not_promoted",
|
| 6 |
"interpretation": "The 2026-06-09 gates prove that Qwen3-Omni full-parameter FSDP can load, prepare, run backward/optimizer steps, and complete guarded pilots up to 128 optimizer steps on an 8-GPU remote worker. They do not prove a production full-parameter fine-tune, and they intentionally save no full checkpoints or public weights.",
|
docs/data/research_roadmap.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Research Roadmap",
|
| 3 |
"summary": "Staged path from the public-sample task lab to verified Qwen3-Omni, Cosmos3-Nano, and Cosmos3-Super diagnostics, same-split 128-episode baseline alignment, a no-new-episode 128-suite enhancement pack, action/subtask error analysis, world/policy branches, and a future Xperience-native embodied foundation model.",
|
| 4 |
-
"current_decision_point": "Push the current selected 128 episodes harder before requesting more storage: keep the public-sample task suite as the development harness, use the
|
| 5 |
"additional_development_directions": {
|
| 6 |
"source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
|
| 7 |
"source_json": "docs/data/additional_development_directions.json",
|
|
@@ -52,8 +52,8 @@
|
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"id": "qwen3_omni_lora_diagnostic_pilot",
|
| 55 |
-
"name": "Qwen3-Omni LoRA
|
| 56 |
-
"status": "
|
| 57 |
"entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
|
| 58 |
"deliverables": [
|
| 59 |
"dataset JSONL/media manifests",
|
|
@@ -64,10 +64,13 @@
|
|
| 64 |
"metrics",
|
| 65 |
"confusion matrices",
|
| 66 |
"run report",
|
|
|
|
| 67 |
"public LoRA adapter repo"
|
| 68 |
],
|
| 69 |
"completion_evidence": [
|
| 70 |
"docs/data/omni_finetune_verified_result.json",
|
|
|
|
|
|
|
| 71 |
"results/omni_finetune/verified_public/",
|
| 72 |
"dataset_manifest.json",
|
| 73 |
"training_metadata.json",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Research Roadmap",
|
| 3 |
"summary": "Staged path from the public-sample task lab to verified Qwen3-Omni, Cosmos3-Nano, and Cosmos3-Super diagnostics, same-split 128-episode baseline alignment, a no-new-episode 128-suite enhancement pack, action/subtask error analysis, world/policy branches, and a future Xperience-native embodied foundation model.",
|
| 4 |
+
"current_decision_point": "Push the current selected 128 episodes harder before requesting more storage: keep the public-sample task suite as the development harness, use the latest verified selected-episode Qwen3-Omni v6 diagnostic branch plus the pinned v5 row as structured-task references, read Cosmos3-Nano and Cosmos3-Super Forward-Dynamics LoRA as separate world-model results, continue with hierarchical action/subtask targets and label-normalized scoring, and defer policy-model experiments until robot-compatible targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
|
| 5 |
"additional_development_directions": {
|
| 6 |
"source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
|
| 7 |
"source_json": "docs/data/additional_development_directions.json",
|
|
|
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"id": "qwen3_omni_lora_diagnostic_pilot",
|
| 55 |
+
"name": "Qwen3-Omni LoRA Latest Diagnostic Branch",
|
| 56 |
+
"status": "verified_latest_branch",
|
| 57 |
"entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
|
| 58 |
"deliverables": [
|
| 59 |
"dataset JSONL/media manifests",
|
|
|
|
| 64 |
"metrics",
|
| 65 |
"confusion matrices",
|
| 66 |
"run report",
|
| 67 |
+
"v5/v6 comparison",
|
| 68 |
"public LoRA adapter repo"
|
| 69 |
],
|
| 70 |
"completion_evidence": [
|
| 71 |
"docs/data/omni_finetune_verified_result.json",
|
| 72 |
+
"docs/data/qwen3_v5_v6_comparison.json",
|
| 73 |
+
"results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
|
| 74 |
"results/omni_finetune/verified_public/",
|
| 75 |
"dataset_manifest.json",
|
| 76 |
"training_metadata.json",
|
docs/data/research_roadmap_interactive.json
CHANGED
|
@@ -2222,7 +2222,7 @@
|
|
| 2222 |
],
|
| 2223 |
"status": "planning_artifact"
|
| 2224 |
},
|
| 2225 |
-
"generated_at_utc": "2026-06-
|
| 2226 |
"omni_plan": {
|
| 2227 |
"adapter": "LoRA rank 16, alpha 32, dropout 0.05",
|
| 2228 |
"backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
|
|
@@ -2287,6 +2287,8 @@
|
|
| 2287 |
{
|
| 2288 |
"completion_evidence": [
|
| 2289 |
"docs/data/omni_finetune_verified_result.json",
|
|
|
|
|
|
|
| 2290 |
"results/omni_finetune/verified_public/",
|
| 2291 |
"dataset_manifest.json",
|
| 2292 |
"training_metadata.json",
|
|
@@ -2304,14 +2306,15 @@
|
|
| 2304 |
"metrics",
|
| 2305 |
"confusion matrices",
|
| 2306 |
"run report",
|
|
|
|
| 2307 |
"public LoRA adapter repo"
|
| 2308 |
],
|
| 2309 |
"entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
|
| 2310 |
"id": "qwen3_omni_lora_diagnostic_pilot",
|
| 2311 |
-
"name": "Qwen3-Omni LoRA
|
| 2312 |
"reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline.",
|
| 2313 |
"stage": "future",
|
| 2314 |
-
"status": "
|
| 2315 |
},
|
| 2316 |
{
|
| 2317 |
"completion_evidence": [
|
|
|
|
| 2222 |
],
|
| 2223 |
"status": "planning_artifact"
|
| 2224 |
},
|
| 2225 |
+
"generated_at_utc": "2026-06-13T17:41:13+00:00",
|
| 2226 |
"omni_plan": {
|
| 2227 |
"adapter": "LoRA rank 16, alpha 32, dropout 0.05",
|
| 2228 |
"backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
|
|
|
|
| 2287 |
{
|
| 2288 |
"completion_evidence": [
|
| 2289 |
"docs/data/omni_finetune_verified_result.json",
|
| 2290 |
+
"docs/data/qwen3_v5_v6_comparison.json",
|
| 2291 |
+
"results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
|
| 2292 |
"results/omni_finetune/verified_public/",
|
| 2293 |
"dataset_manifest.json",
|
| 2294 |
"training_metadata.json",
|
|
|
|
| 2306 |
"metrics",
|
| 2307 |
"confusion matrices",
|
| 2308 |
"run report",
|
| 2309 |
+
"v5/v6 comparison",
|
| 2310 |
"public LoRA adapter repo"
|
| 2311 |
],
|
| 2312 |
"entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
|
| 2313 |
"id": "qwen3_omni_lora_diagnostic_pilot",
|
| 2314 |
+
"name": "Qwen3-Omni LoRA Latest Diagnostic Branch",
|
| 2315 |
"reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline.",
|
| 2316 |
"stage": "future",
|
| 2317 |
+
"status": "verified_latest_branch"
|
| 2318 |
},
|
| 2319 |
{
|
| 2320 |
"completion_evidence": [
|
docs/data/scope_claims_audit.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
| 7 |
-
"dataset_manifest_num_samples":
|
| 8 |
-
"training_metadata_num_train_samples":
|
| 9 |
-
"eval_num_samples":
|
| 10 |
-
"eval_json_validity_rate":
|
| 11 |
"quality_target_met": true,
|
| 12 |
-
"historical_identifier_count":
|
| 13 |
"public_32_episode_status_file_count": 1,
|
| 14 |
"failure_count": 0
|
| 15 |
},
|
|
@@ -25,7 +25,7 @@
|
|
| 25 |
{
|
| 26 |
"name": "summary_metrics_preserves_verified_diagnostic_status",
|
| 27 |
"status": "pass",
|
| 28 |
-
"detail": "The selected-episode Qwen3-Omni diagnostic
|
| 29 |
"evidence": [
|
| 30 |
"docs/data/summary_metrics.json"
|
| 31 |
]
|
|
@@ -33,25 +33,25 @@
|
|
| 33 |
{
|
| 34 |
"name": "verified_package_dataset_has_expected_windows",
|
| 35 |
"status": "pass",
|
| 36 |
-
"detail": "episodes=119, samples=
|
| 37 |
"evidence": [
|
| 38 |
-
"results/omni_finetune/verified_public/
|
| 39 |
]
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"name": "verified_package_training_records_8_processes",
|
| 43 |
"status": "pass",
|
| 44 |
-
"detail": "train=
|
| 45 |
"evidence": [
|
| 46 |
-
"results/omni_finetune/verified_public/
|
| 47 |
]
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"name": "verified_package_eval_records_real_held_out_metrics",
|
| 51 |
"status": "pass",
|
| 52 |
-
"detail": "samples=
|
| 53 |
"evidence": [
|
| 54 |
-
"results/omni_finetune/verified_public/
|
| 55 |
]
|
| 56 |
},
|
| 57 |
{
|
|
@@ -59,7 +59,7 @@
|
|
| 59 |
"status": "pass",
|
| 60 |
"detail": "audit_status=pass, issues=0",
|
| 61 |
"evidence": [
|
| 62 |
-
"results/omni_finetune/verified_public/
|
| 63 |
]
|
| 64 |
},
|
| 65 |
{
|
|
@@ -84,7 +84,7 @@
|
|
| 84 |
{
|
| 85 |
"name": "historical_32ep_identifiers_are_confined_to_readiness_artifacts",
|
| 86 |
"status": "pass",
|
| 87 |
-
"detail": "historical identifiers found in result provenance files=
|
| 88 |
"evidence": [
|
| 89 |
"results/omni_finetune/"
|
| 90 |
]
|
|
@@ -424,6 +424,6 @@
|
|
| 424 |
"example": "{\"id\": \"xperience-10m-sample:qa:53\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1060, \"end_frame\": 1079, \"num_frames\": 20}, \"media\": {\"video_path"
|
| 425 |
}
|
| 426 |
],
|
| 427 |
-
"historical_identifier_total_count":
|
| 428 |
"failures": []
|
| 429 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-13T17:46:50+00:00",
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
| 7 |
+
"dataset_manifest_num_samples": 34269,
|
| 8 |
+
"training_metadata_num_train_samples": 25629,
|
| 9 |
+
"eval_num_samples": 4032,
|
| 10 |
+
"eval_json_validity_rate": 0.9990079365079365,
|
| 11 |
"quality_target_met": true,
|
| 12 |
+
"historical_identifier_count": 1800,
|
| 13 |
"public_32_episode_status_file_count": 1,
|
| 14 |
"failure_count": 0
|
| 15 |
},
|
|
|
|
| 25 |
{
|
| 26 |
"name": "summary_metrics_preserves_verified_diagnostic_status",
|
| 27 |
"status": "pass",
|
| 28 |
+
"detail": "The selected-episode Qwen3-Omni v6 diagnostic branch is verified on the 96/16/16 split and meets the 98% target for JSON validity; action/subtask quality remains weak, so it is a structured-task baseline rather than a strong model-quality claim. v6 improves action macro-F1 and contact accuracy versus v5, while v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics. Cosmos3-Nano future-window compatibility and Cosmos3-Super Forward-Dynamics LoRA are also verified as separate world-model diagnostics with different metrics.",
|
| 29 |
"evidence": [
|
| 30 |
"docs/data/summary_metrics.json"
|
| 31 |
]
|
|
|
|
| 33 |
{
|
| 34 |
"name": "verified_package_dataset_has_expected_windows",
|
| 35 |
"status": "pass",
|
| 36 |
+
"detail": "episodes=119, samples=34269, split_counts={'test': 4032, 'train': 25629, 'val': 4608}, expected_samples=34269, expected_split_counts={'train': 25629, 'val': 4608, 'test': 4032}",
|
| 37 |
"evidence": [
|
| 38 |
+
"results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/dataset/dataset_manifest.json"
|
| 39 |
]
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"name": "verified_package_training_records_8_processes",
|
| 43 |
"status": "pass",
|
| 44 |
+
"detail": "train=25629, val=2048, processes=8, expected_train=25629, expected_val=2048, expected_processes=8",
|
| 45 |
"evidence": [
|
| 46 |
+
"results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/training/training_metadata.json"
|
| 47 |
]
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"name": "verified_package_eval_records_real_held_out_metrics",
|
| 51 |
"status": "pass",
|
| 52 |
+
"detail": "samples=4032, split=test, held_out=14, json_validity=0.9990079365079365, expected_samples=4032, expected_held_out=14",
|
| 53 |
"evidence": [
|
| 54 |
+
"results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/eval/metrics.json"
|
| 55 |
]
|
| 56 |
},
|
| 57 |
{
|
|
|
|
| 59 |
"status": "pass",
|
| 60 |
"detail": "audit_status=pass, issues=0",
|
| 61 |
"evidence": [
|
| 62 |
+
"results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/package_audit.json"
|
| 63 |
]
|
| 64 |
},
|
| 65 |
{
|
|
|
|
| 84 |
{
|
| 85 |
"name": "historical_32ep_identifiers_are_confined_to_readiness_artifacts",
|
| 86 |
"status": "pass",
|
| 87 |
+
"detail": "historical identifiers found in result provenance files=1800",
|
| 88 |
"evidence": [
|
| 89 |
"results/omni_finetune/"
|
| 90 |
]
|
|
|
|
| 424 |
"example": "{\"id\": \"xperience-10m-sample:qa:53\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1060, \"end_frame\": 1079, \"num_frames\": 20}, \"media\": {\"video_path"
|
| 425 |
}
|
| 426 |
],
|
| 427 |
+
"historical_identifier_total_count": 1800,
|
| 428 |
"failures": []
|
| 429 |
}
|
docs/data/source_alignment_audit.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-13T17:46:47+00:00",
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
docs/data/summary_metrics.json
CHANGED
|
@@ -14,7 +14,7 @@
|
|
| 14 |
"visualization.rrd"
|
| 15 |
],
|
| 16 |
"access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
|
| 17 |
-
"current_scope": "The selected-episode Qwen3-Omni diagnostic
|
| 18 |
},
|
| 19 |
"models": {
|
| 20 |
"motion_action": {
|
|
|
|
| 14 |
"visualization.rrd"
|
| 15 |
],
|
| 16 |
"access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
|
| 17 |
+
"current_scope": "The selected-episode Qwen3-Omni v6 diagnostic branch is verified on the 96/16/16 split and meets the 98% target for JSON validity; action/subtask quality remains weak, so it is a structured-task baseline rather than a strong model-quality claim. v6 improves action macro-F1 and contact accuracy versus v5, while v5 remains stronger on JSON validity, subtask, next-action, transition, and object metrics. Cosmos3-Nano future-window compatibility and Cosmos3-Super Forward-Dynamics LoRA are also verified as separate world-model diagnostics with different metrics."
|
| 18 |
},
|
| 19 |
"models": {
|
| 20 |
"motion_action": {
|
docs/data/task_surface_integrity.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"summary": {
|
| 5 |
"task_count": 12,
|
| 6 |
"expected_task_count": 12,
|
|
@@ -64,45 +64,45 @@
|
|
| 64 |
"observed": "timeline_action"
|
| 65 |
},
|
| 66 |
{
|
| 67 |
-
"name": "timeline_action:
|
| 68 |
"status": "pass",
|
| 69 |
-
"value": "
|
| 70 |
"raw_hits": []
|
| 71 |
},
|
| 72 |
{
|
| 73 |
-
"name": "timeline_action:
|
| 74 |
"status": "pass",
|
| 75 |
-
"value": "
|
| 76 |
"raw_hits": []
|
| 77 |
},
|
| 78 |
{
|
| 79 |
-
"name": "timeline_action:
|
| 80 |
"status": "pass",
|
| 81 |
-
"value": "Action Recognition",
|
| 82 |
"raw_hits": []
|
| 83 |
},
|
| 84 |
{
|
| 85 |
-
"name": "timeline_action:
|
| 86 |
"status": "pass",
|
| 87 |
-
"value": "
|
| 88 |
"raw_hits": []
|
| 89 |
},
|
| 90 |
{
|
| 91 |
-
"name": "timeline_action:
|
| 92 |
"status": "pass",
|
| 93 |
-
"value": "
|
| 94 |
"raw_hits": []
|
| 95 |
},
|
| 96 |
{
|
| 97 |
-
"name": "timeline_action:
|
| 98 |
"status": "pass",
|
| 99 |
-
"value": "window
|
| 100 |
"raw_hits": []
|
| 101 |
},
|
| 102 |
{
|
| 103 |
-
"name": "timeline_action:
|
| 104 |
"status": "pass",
|
| 105 |
-
"value": "
|
| 106 |
"raw_hits": []
|
| 107 |
},
|
| 108 |
{
|
|
@@ -184,45 +184,45 @@
|
|
| 184 |
"observed": "timeline_subtask"
|
| 185 |
},
|
| 186 |
{
|
| 187 |
-
"name": "timeline_subtask:
|
| 188 |
"status": "pass",
|
| 189 |
-
"value": "
|
| 190 |
"raw_hits": []
|
| 191 |
},
|
| 192 |
{
|
| 193 |
-
"name": "timeline_subtask:
|
| 194 |
"status": "pass",
|
| 195 |
-
"value": "
|
| 196 |
"raw_hits": []
|
| 197 |
},
|
| 198 |
{
|
| 199 |
-
"name": "timeline_subtask:
|
| 200 |
"status": "pass",
|
| 201 |
-
"value": "
|
| 202 |
"raw_hits": []
|
| 203 |
},
|
| 204 |
{
|
| 205 |
-
"name": "timeline_subtask:
|
| 206 |
"status": "pass",
|
| 207 |
-
"value": "
|
| 208 |
"raw_hits": []
|
| 209 |
},
|
| 210 |
{
|
| 211 |
-
"name": "timeline_subtask:
|
| 212 |
"status": "pass",
|
| 213 |
-
"value": "
|
| 214 |
"raw_hits": []
|
| 215 |
},
|
| 216 |
{
|
| 217 |
-
"name": "timeline_subtask:
|
| 218 |
"status": "pass",
|
| 219 |
-
"value": "
|
| 220 |
"raw_hits": []
|
| 221 |
},
|
| 222 |
{
|
| 223 |
-
"name": "timeline_subtask:
|
| 224 |
"status": "pass",
|
| 225 |
-
"value": "
|
| 226 |
"raw_hits": []
|
| 227 |
},
|
| 228 |
{
|
|
@@ -304,45 +304,45 @@
|
|
| 304 |
"observed": "transition_detection"
|
| 305 |
},
|
| 306 |
{
|
| 307 |
-
"name": "transition_detection:
|
| 308 |
"status": "pass",
|
| 309 |
-
"value": "
|
| 310 |
"raw_hits": []
|
| 311 |
},
|
| 312 |
{
|
| 313 |
-
"name": "transition_detection:
|
| 314 |
"status": "pass",
|
| 315 |
-
"value": "
|
| 316 |
"raw_hits": []
|
| 317 |
},
|
| 318 |
{
|
| 319 |
-
"name": "transition_detection:
|
| 320 |
"status": "pass",
|
| 321 |
-
"value": "Action
|
| 322 |
"raw_hits": []
|
| 323 |
},
|
| 324 |
{
|
| 325 |
-
"name": "transition_detection:
|
| 326 |
"status": "pass",
|
| 327 |
-
"value": "
|
| 328 |
"raw_hits": []
|
| 329 |
},
|
| 330 |
{
|
| 331 |
-
"name": "transition_detection:
|
| 332 |
"status": "pass",
|
| 333 |
-
"value": "
|
| 334 |
"raw_hits": []
|
| 335 |
},
|
| 336 |
{
|
| 337 |
-
"name": "transition_detection:
|
| 338 |
"status": "pass",
|
| 339 |
-
"value": "
|
| 340 |
"raw_hits": []
|
| 341 |
},
|
| 342 |
{
|
| 343 |
-
"name": "transition_detection:
|
| 344 |
"status": "pass",
|
| 345 |
-
"value": "
|
| 346 |
"raw_hits": []
|
| 347 |
},
|
| 348 |
{
|
|
@@ -422,45 +422,45 @@
|
|
| 422 |
"observed": "next_action"
|
| 423 |
},
|
| 424 |
{
|
| 425 |
-
"name": "next_action:
|
| 426 |
"status": "pass",
|
| 427 |
-
"value": "
|
| 428 |
"raw_hits": []
|
| 429 |
},
|
| 430 |
{
|
| 431 |
-
"name": "next_action:
|
| 432 |
"status": "pass",
|
| 433 |
-
"value": "
|
| 434 |
"raw_hits": []
|
| 435 |
},
|
| 436 |
{
|
| 437 |
-
"name": "next_action:
|
| 438 |
"status": "pass",
|
| 439 |
-
"value": "
|
| 440 |
"raw_hits": []
|
| 441 |
},
|
| 442 |
{
|
| 443 |
-
"name": "next_action:
|
| 444 |
"status": "pass",
|
| 445 |
-
"value": "
|
| 446 |
"raw_hits": []
|
| 447 |
},
|
| 448 |
{
|
| 449 |
-
"name": "next_action:
|
| 450 |
"status": "pass",
|
| 451 |
-
"value": "
|
| 452 |
"raw_hits": []
|
| 453 |
},
|
| 454 |
{
|
| 455 |
-
"name": "next_action:
|
| 456 |
"status": "pass",
|
| 457 |
-
"value": "current
|
| 458 |
"raw_hits": []
|
| 459 |
},
|
| 460 |
{
|
| 461 |
-
"name": "next_action:
|
| 462 |
"status": "pass",
|
| 463 |
-
"value": "
|
| 464 |
"raw_hits": []
|
| 465 |
},
|
| 466 |
{
|
|
@@ -540,45 +540,45 @@
|
|
| 540 |
"observed": "hand_trajectory_forecast"
|
| 541 |
},
|
| 542 |
{
|
| 543 |
-
"name": "hand_trajectory_forecast:
|
| 544 |
"status": "pass",
|
| 545 |
-
"value": "
|
| 546 |
"raw_hits": []
|
| 547 |
},
|
| 548 |
{
|
| 549 |
-
"name": "hand_trajectory_forecast:
|
| 550 |
"status": "pass",
|
| 551 |
-
"value": "
|
| 552 |
"raw_hits": []
|
| 553 |
},
|
| 554 |
{
|
| 555 |
-
"name": "hand_trajectory_forecast:
|
| 556 |
"status": "pass",
|
| 557 |
-
"value": "Hand
|
| 558 |
"raw_hits": []
|
| 559 |
},
|
| 560 |
{
|
| 561 |
-
"name": "hand_trajectory_forecast:
|
| 562 |
"status": "pass",
|
| 563 |
-
"value": "current multimodal
|
| 564 |
"raw_hits": []
|
| 565 |
},
|
| 566 |
{
|
| 567 |
-
"name": "hand_trajectory_forecast:
|
| 568 |
"status": "pass",
|
| 569 |
-
"value": "
|
| 570 |
"raw_hits": []
|
| 571 |
},
|
| 572 |
{
|
| 573 |
-
"name": "hand_trajectory_forecast:
|
| 574 |
"status": "pass",
|
| 575 |
-
"value": "
|
| 576 |
"raw_hits": []
|
| 577 |
},
|
| 578 |
{
|
| 579 |
-
"name": "hand_trajectory_forecast:
|
| 580 |
"status": "pass",
|
| 581 |
-
"value": "
|
| 582 |
"raw_hits": []
|
| 583 |
},
|
| 584 |
{
|
|
@@ -658,45 +658,45 @@
|
|
| 658 |
"observed": "contact_prediction"
|
| 659 |
},
|
| 660 |
{
|
| 661 |
-
"name": "contact_prediction:
|
| 662 |
"status": "pass",
|
| 663 |
-
"value": "contact
|
| 664 |
"raw_hits": []
|
| 665 |
},
|
| 666 |
{
|
| 667 |
-
"name": "contact_prediction:
|
| 668 |
"status": "pass",
|
| 669 |
-
"value": "
|
| 670 |
"raw_hits": []
|
| 671 |
},
|
| 672 |
{
|
| 673 |
-
"name": "contact_prediction:
|
| 674 |
"status": "pass",
|
| 675 |
-
"value": "Contact
|
| 676 |
"raw_hits": []
|
| 677 |
},
|
| 678 |
{
|
| 679 |
-
"name": "contact_prediction:
|
| 680 |
"status": "pass",
|
| 681 |
-
"value": "
|
| 682 |
"raw_hits": []
|
| 683 |
},
|
| 684 |
{
|
| 685 |
-
"name": "contact_prediction:
|
| 686 |
"status": "pass",
|
| 687 |
-
"value": "
|
| 688 |
"raw_hits": []
|
| 689 |
},
|
| 690 |
{
|
| 691 |
-
"name": "contact_prediction:
|
| 692 |
"status": "pass",
|
| 693 |
-
"value": "
|
| 694 |
"raw_hits": []
|
| 695 |
},
|
| 696 |
{
|
| 697 |
-
"name": "contact_prediction:
|
| 698 |
"status": "pass",
|
| 699 |
-
"value": "
|
| 700 |
"raw_hits": []
|
| 701 |
},
|
| 702 |
{
|
|
@@ -774,45 +774,45 @@
|
|
| 774 |
"observed": "object_relevance"
|
| 775 |
},
|
| 776 |
{
|
| 777 |
-
"name": "object_relevance:
|
| 778 |
"status": "pass",
|
| 779 |
-
"value": "
|
| 780 |
"raw_hits": []
|
| 781 |
},
|
| 782 |
{
|
| 783 |
-
"name": "object_relevance:
|
| 784 |
"status": "pass",
|
| 785 |
-
"value": "
|
| 786 |
"raw_hits": []
|
| 787 |
},
|
| 788 |
{
|
| 789 |
-
"name": "object_relevance:
|
| 790 |
"status": "pass",
|
| 791 |
-
"value": "Object
|
| 792 |
"raw_hits": []
|
| 793 |
},
|
| 794 |
{
|
| 795 |
-
"name": "object_relevance:
|
| 796 |
"status": "pass",
|
| 797 |
-
"value": "non-caption
|
| 798 |
"raw_hits": []
|
| 799 |
},
|
| 800 |
{
|
| 801 |
-
"name": "object_relevance:
|
| 802 |
"status": "pass",
|
| 803 |
-
"value": "
|
| 804 |
"raw_hits": []
|
| 805 |
},
|
| 806 |
{
|
| 807 |
-
"name": "object_relevance:
|
| 808 |
"status": "pass",
|
| 809 |
-
"value": "
|
| 810 |
"raw_hits": []
|
| 811 |
},
|
| 812 |
{
|
| 813 |
-
"name": "object_relevance:
|
| 814 |
"status": "pass",
|
| 815 |
-
"value": "Object
|
| 816 |
"raw_hits": []
|
| 817 |
},
|
| 818 |
{
|
|
@@ -892,45 +892,45 @@
|
|
| 892 |
"observed": "caption_grounding"
|
| 893 |
},
|
| 894 |
{
|
| 895 |
-
"name": "caption_grounding:
|
| 896 |
"status": "pass",
|
| 897 |
-
"value": "
|
| 898 |
"raw_hits": []
|
| 899 |
},
|
| 900 |
{
|
| 901 |
-
"name": "caption_grounding:
|
| 902 |
"status": "pass",
|
| 903 |
-
"value": "
|
| 904 |
"raw_hits": []
|
| 905 |
},
|
| 906 |
{
|
| 907 |
-
"name": "caption_grounding:
|
| 908 |
"status": "pass",
|
| 909 |
-
"value": "Language Grounding",
|
| 910 |
"raw_hits": []
|
| 911 |
},
|
| 912 |
{
|
| 913 |
-
"name": "caption_grounding:
|
| 914 |
"status": "pass",
|
| 915 |
-
"value": "
|
| 916 |
"raw_hits": []
|
| 917 |
},
|
| 918 |
{
|
| 919 |
-
"name": "caption_grounding:
|
| 920 |
"status": "pass",
|
| 921 |
-
"value": "
|
| 922 |
"raw_hits": []
|
| 923 |
},
|
| 924 |
{
|
| 925 |
-
"name": "caption_grounding:
|
| 926 |
"status": "pass",
|
| 927 |
-
"value": "
|
| 928 |
"raw_hits": []
|
| 929 |
},
|
| 930 |
{
|
| 931 |
-
"name": "caption_grounding:
|
| 932 |
"status": "pass",
|
| 933 |
-
"value": "Language
|
| 934 |
"raw_hits": []
|
| 935 |
},
|
| 936 |
{
|
|
@@ -1008,45 +1008,45 @@
|
|
| 1008 |
"observed": "cross_modal_retrieval"
|
| 1009 |
},
|
| 1010 |
{
|
| 1011 |
-
"name": "cross_modal_retrieval:
|
| 1012 |
"status": "pass",
|
| 1013 |
-
"value": "
|
| 1014 |
"raw_hits": []
|
| 1015 |
},
|
| 1016 |
{
|
| 1017 |
-
"name": "cross_modal_retrieval:
|
| 1018 |
"status": "pass",
|
| 1019 |
-
"value": "
|
| 1020 |
"raw_hits": []
|
| 1021 |
},
|
| 1022 |
{
|
| 1023 |
-
"name": "cross_modal_retrieval:
|
| 1024 |
"status": "pass",
|
| 1025 |
-
"value": "
|
| 1026 |
"raw_hits": []
|
| 1027 |
},
|
| 1028 |
{
|
| 1029 |
-
"name": "cross_modal_retrieval:
|
| 1030 |
"status": "pass",
|
| 1031 |
-
"value": "motion
|
| 1032 |
"raw_hits": []
|
| 1033 |
},
|
| 1034 |
{
|
| 1035 |
-
"name": "cross_modal_retrieval:
|
| 1036 |
"status": "pass",
|
| 1037 |
-
"value": "
|
| 1038 |
"raw_hits": []
|
| 1039 |
},
|
| 1040 |
{
|
| 1041 |
-
"name": "cross_modal_retrieval:
|
| 1042 |
"status": "pass",
|
| 1043 |
-
"value": "
|
| 1044 |
"raw_hits": []
|
| 1045 |
},
|
| 1046 |
{
|
| 1047 |
-
"name": "cross_modal_retrieval:
|
| 1048 |
"status": "pass",
|
| 1049 |
-
"value": "
|
| 1050 |
"raw_hits": []
|
| 1051 |
},
|
| 1052 |
{
|
|
@@ -1126,45 +1126,45 @@
|
|
| 1126 |
"observed": "modality_reconstruction"
|
| 1127 |
},
|
| 1128 |
{
|
| 1129 |
-
"name": "modality_reconstruction:
|
| 1130 |
"status": "pass",
|
| 1131 |
-
"value": "
|
| 1132 |
"raw_hits": []
|
| 1133 |
},
|
| 1134 |
{
|
| 1135 |
-
"name": "modality_reconstruction:
|
| 1136 |
"status": "pass",
|
| 1137 |
-
"value": "
|
| 1138 |
"raw_hits": []
|
| 1139 |
},
|
| 1140 |
{
|
| 1141 |
-
"name": "modality_reconstruction:
|
| 1142 |
"status": "pass",
|
| 1143 |
-
"value": "
|
| 1144 |
"raw_hits": []
|
| 1145 |
},
|
| 1146 |
{
|
| 1147 |
-
"name": "modality_reconstruction:
|
| 1148 |
"status": "pass",
|
| 1149 |
-
"value": "motion, IMU, and camera
|
| 1150 |
"raw_hits": []
|
| 1151 |
},
|
| 1152 |
{
|
| 1153 |
-
"name": "modality_reconstruction:
|
| 1154 |
"status": "pass",
|
| 1155 |
-
"value": "
|
| 1156 |
"raw_hits": []
|
| 1157 |
},
|
| 1158 |
{
|
| 1159 |
-
"name": "modality_reconstruction:
|
| 1160 |
"status": "pass",
|
| 1161 |
-
"value": "
|
| 1162 |
"raw_hits": []
|
| 1163 |
},
|
| 1164 |
{
|
| 1165 |
-
"name": "modality_reconstruction:
|
| 1166 |
"status": "pass",
|
| 1167 |
-
"value": "
|
| 1168 |
"raw_hits": []
|
| 1169 |
},
|
| 1170 |
{
|
|
@@ -1244,43 +1244,43 @@
|
|
| 1244 |
"observed": "temporal_order"
|
| 1245 |
},
|
| 1246 |
{
|
| 1247 |
-
"name": "temporal_order:
|
| 1248 |
"status": "pass",
|
| 1249 |
-
"value": "
|
| 1250 |
"raw_hits": []
|
| 1251 |
},
|
| 1252 |
{
|
| 1253 |
-
"name": "temporal_order:
|
| 1254 |
"status": "pass",
|
| 1255 |
-
"value": "
|
| 1256 |
"raw_hits": []
|
| 1257 |
},
|
| 1258 |
{
|
| 1259 |
-
"name": "temporal_order:
|
| 1260 |
"status": "pass",
|
| 1261 |
"value": "Temporal Order Verification",
|
| 1262 |
"raw_hits": []
|
| 1263 |
},
|
| 1264 |
{
|
| 1265 |
-
"name": "temporal_order:
|
| 1266 |
"status": "pass",
|
| 1267 |
-
"value": "two
|
| 1268 |
"raw_hits": []
|
| 1269 |
},
|
| 1270 |
{
|
| 1271 |
-
"name": "temporal_order:
|
| 1272 |
"status": "pass",
|
| 1273 |
-
"value": "
|
| 1274 |
"raw_hits": []
|
| 1275 |
},
|
| 1276 |
{
|
| 1277 |
-
"name": "temporal_order:
|
| 1278 |
"status": "pass",
|
| 1279 |
-
"value": "
|
| 1280 |
"raw_hits": []
|
| 1281 |
},
|
| 1282 |
{
|
| 1283 |
-
"name": "temporal_order:
|
| 1284 |
"status": "pass",
|
| 1285 |
"value": "Temporal Order Verification",
|
| 1286 |
"raw_hits": []
|
|
@@ -1360,45 +1360,45 @@
|
|
| 1360 |
"observed": "misalignment_detection"
|
| 1361 |
},
|
| 1362 |
{
|
| 1363 |
-
"name": "misalignment_detection:
|
| 1364 |
"status": "pass",
|
| 1365 |
-
"value": "
|
| 1366 |
"raw_hits": []
|
| 1367 |
},
|
| 1368 |
{
|
| 1369 |
-
"name": "misalignment_detection:
|
| 1370 |
"status": "pass",
|
| 1371 |
-
"value": "
|
| 1372 |
"raw_hits": []
|
| 1373 |
},
|
| 1374 |
{
|
| 1375 |
-
"name": "misalignment_detection:
|
| 1376 |
"status": "pass",
|
| 1377 |
-
"value": "
|
| 1378 |
"raw_hits": []
|
| 1379 |
},
|
| 1380 |
{
|
| 1381 |
-
"name": "misalignment_detection:
|
| 1382 |
"status": "pass",
|
| 1383 |
-
"value": "motion
|
| 1384 |
"raw_hits": []
|
| 1385 |
},
|
| 1386 |
{
|
| 1387 |
-
"name": "misalignment_detection:
|
| 1388 |
"status": "pass",
|
| 1389 |
-
"value": "
|
| 1390 |
"raw_hits": []
|
| 1391 |
},
|
| 1392 |
{
|
| 1393 |
-
"name": "misalignment_detection:
|
| 1394 |
"status": "pass",
|
| 1395 |
-
"value": "
|
| 1396 |
"raw_hits": []
|
| 1397 |
},
|
| 1398 |
{
|
| 1399 |
-
"name": "misalignment_detection:
|
| 1400 |
"status": "pass",
|
| 1401 |
-
"value": "
|
| 1402 |
"raw_hits": []
|
| 1403 |
},
|
| 1404 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-13T17:46:47+00:00",
|
| 4 |
"summary": {
|
| 5 |
"task_count": 12,
|
| 6 |
"expected_task_count": 12,
|
|
|
|
| 64 |
"observed": "timeline_action"
|
| 65 |
},
|
| 66 |
{
|
| 67 |
+
"name": "timeline_action: public_field_input_short_is_human_readable",
|
| 68 |
"status": "pass",
|
| 69 |
+
"value": "20-frame multimodal window",
|
| 70 |
"raw_hits": []
|
| 71 |
},
|
| 72 |
{
|
| 73 |
+
"name": "timeline_action: public_field_process_short_is_human_readable",
|
| 74 |
"status": "pass",
|
| 75 |
+
"value": "window features -> action label builder -> classifier",
|
| 76 |
"raw_hits": []
|
| 77 |
},
|
| 78 |
{
|
| 79 |
+
"name": "timeline_action: public_field_research_name_is_human_readable",
|
| 80 |
"status": "pass",
|
| 81 |
+
"value": "Egocentric Action Recognition",
|
| 82 |
"raw_hits": []
|
| 83 |
},
|
| 84 |
{
|
| 85 |
+
"name": "timeline_action: public_field_card_blurb_is_human_readable",
|
| 86 |
"status": "pass",
|
| 87 |
+
"value": "Recognize the current manipulation action from synchronized visual, motion, inertial, pose, and annotation context.",
|
| 88 |
"raw_hits": []
|
| 89 |
},
|
| 90 |
{
|
| 91 |
+
"name": "timeline_action: public_field_output_short_is_human_readable",
|
| 92 |
"status": "pass",
|
| 93 |
+
"value": "current action class",
|
| 94 |
"raw_hits": []
|
| 95 |
},
|
| 96 |
{
|
| 97 |
+
"name": "timeline_action: public_field_plain_goal_is_human_readable",
|
| 98 |
"status": "pass",
|
| 99 |
+
"value": "Look at one short multimodal window and name what action is happening now.",
|
| 100 |
"raw_hits": []
|
| 101 |
},
|
| 102 |
{
|
| 103 |
+
"name": "timeline_action: public_field_display_name_is_human_readable",
|
| 104 |
"status": "pass",
|
| 105 |
+
"value": "Action Recognition",
|
| 106 |
"raw_hits": []
|
| 107 |
},
|
| 108 |
{
|
|
|
|
| 184 |
"observed": "timeline_subtask"
|
| 185 |
},
|
| 186 |
{
|
| 187 |
+
"name": "timeline_subtask: public_field_input_short_is_human_readable",
|
| 188 |
"status": "pass",
|
| 189 |
+
"value": "20-frame multimodal window",
|
| 190 |
"raw_hits": []
|
| 191 |
},
|
| 192 |
{
|
| 193 |
+
"name": "timeline_subtask: public_field_process_short_is_human_readable",
|
| 194 |
"status": "pass",
|
| 195 |
+
"value": "window features -> subtask label builder -> classifier",
|
| 196 |
"raw_hits": []
|
| 197 |
},
|
| 198 |
{
|
| 199 |
+
"name": "timeline_subtask: public_field_research_name_is_human_readable",
|
| 200 |
"status": "pass",
|
| 201 |
+
"value": "Temporal Subtask Recognition",
|
| 202 |
"raw_hits": []
|
| 203 |
},
|
| 204 |
{
|
| 205 |
+
"name": "timeline_subtask: public_field_card_blurb_is_human_readable",
|
| 206 |
"status": "pass",
|
| 207 |
+
"value": "Recognize the broader activity stage so fine actions become a readable procedure timeline.",
|
| 208 |
"raw_hits": []
|
| 209 |
},
|
| 210 |
{
|
| 211 |
+
"name": "timeline_subtask: public_field_output_short_is_human_readable",
|
| 212 |
"status": "pass",
|
| 213 |
+
"value": "current procedure step",
|
| 214 |
"raw_hits": []
|
| 215 |
},
|
| 216 |
{
|
| 217 |
+
"name": "timeline_subtask: public_field_plain_goal_is_human_readable",
|
| 218 |
"status": "pass",
|
| 219 |
+
"value": "Predict the higher-level task stage for the current window.",
|
| 220 |
"raw_hits": []
|
| 221 |
},
|
| 222 |
{
|
| 223 |
+
"name": "timeline_subtask: public_field_display_name_is_human_readable",
|
| 224 |
"status": "pass",
|
| 225 |
+
"value": "Procedure Step Recognition",
|
| 226 |
"raw_hits": []
|
| 227 |
},
|
| 228 |
{
|
|
|
|
| 304 |
"observed": "transition_detection"
|
| 305 |
},
|
| 306 |
{
|
| 307 |
+
"name": "transition_detection: public_field_input_short_is_human_readable",
|
| 308 |
"status": "pass",
|
| 309 |
+
"value": "current window with boundary target",
|
| 310 |
"raw_hits": []
|
| 311 |
},
|
| 312 |
{
|
| 313 |
+
"name": "transition_detection: public_field_process_short_is_human_readable",
|
| 314 |
"status": "pass",
|
| 315 |
+
"value": "action changes -> boundary labels -> binary classifier",
|
| 316 |
"raw_hits": []
|
| 317 |
},
|
| 318 |
{
|
| 319 |
+
"name": "transition_detection: public_field_research_name_is_human_readable",
|
| 320 |
"status": "pass",
|
| 321 |
+
"value": "Temporal Action Segmentation",
|
| 322 |
"raw_hits": []
|
| 323 |
},
|
| 324 |
{
|
| 325 |
+
"name": "transition_detection: public_field_card_blurb_is_human_readable",
|
| 326 |
"status": "pass",
|
| 327 |
+
"value": "Detect the local moment where the episode changes from one action segment to the next.",
|
| 328 |
"raw_hits": []
|
| 329 |
},
|
| 330 |
{
|
| 331 |
+
"name": "transition_detection: public_field_output_short_is_human_readable",
|
| 332 |
"status": "pass",
|
| 333 |
+
"value": "boundary or steady",
|
| 334 |
"raw_hits": []
|
| 335 |
},
|
| 336 |
{
|
| 337 |
+
"name": "transition_detection: public_field_plain_goal_is_human_readable",
|
| 338 |
"status": "pass",
|
| 339 |
+
"value": "Detect whether the current window is near a boundary between actions.",
|
| 340 |
"raw_hits": []
|
| 341 |
},
|
| 342 |
{
|
| 343 |
+
"name": "transition_detection: public_field_display_name_is_human_readable",
|
| 344 |
"status": "pass",
|
| 345 |
+
"value": "Action Boundary Detection",
|
| 346 |
"raw_hits": []
|
| 347 |
},
|
| 348 |
{
|
|
|
|
| 422 |
"observed": "next_action"
|
| 423 |
},
|
| 424 |
{
|
| 425 |
+
"name": "next_action: public_field_input_short_is_human_readable",
|
| 426 |
"status": "pass",
|
| 427 |
+
"value": "current window at time t",
|
| 428 |
"raw_hits": []
|
| 429 |
},
|
| 430 |
{
|
| 431 |
+
"name": "next_action: public_field_process_short_is_human_readable",
|
| 432 |
"status": "pass",
|
| 433 |
+
"value": "current features -> future label shift -> classifier",
|
| 434 |
"raw_hits": []
|
| 435 |
},
|
| 436 |
{
|
| 437 |
+
"name": "next_action: public_field_research_name_is_human_readable",
|
| 438 |
"status": "pass",
|
| 439 |
+
"value": "Short-Horizon Intention Prediction",
|
| 440 |
"raw_hits": []
|
| 441 |
},
|
| 442 |
{
|
| 443 |
+
"name": "next_action: public_field_card_blurb_is_human_readable",
|
| 444 |
"status": "pass",
|
| 445 |
+
"value": "Forecast the near-future action from the current observations only.",
|
| 446 |
"raw_hits": []
|
| 447 |
},
|
| 448 |
{
|
| 449 |
+
"name": "next_action: public_field_output_short_is_human_readable",
|
| 450 |
"status": "pass",
|
| 451 |
+
"value": "action at t+20 frames",
|
| 452 |
"raw_hits": []
|
| 453 |
},
|
| 454 |
{
|
| 455 |
+
"name": "next_action: public_field_plain_goal_is_human_readable",
|
| 456 |
"status": "pass",
|
| 457 |
+
"value": "Use the current window to guess the action that will happen shortly after it.",
|
| 458 |
"raw_hits": []
|
| 459 |
},
|
| 460 |
{
|
| 461 |
+
"name": "next_action: public_field_display_name_is_human_readable",
|
| 462 |
"status": "pass",
|
| 463 |
+
"value": "Next-Action Prediction",
|
| 464 |
"raw_hits": []
|
| 465 |
},
|
| 466 |
{
|
|
|
|
| 540 |
"observed": "hand_trajectory_forecast"
|
| 541 |
},
|
| 542 |
{
|
| 543 |
+
"name": "hand_trajectory_forecast: public_field_input_short_is_human_readable",
|
| 544 |
"status": "pass",
|
| 545 |
+
"value": "current multimodal window",
|
| 546 |
"raw_hits": []
|
| 547 |
},
|
| 548 |
{
|
| 549 |
+
"name": "hand_trajectory_forecast: public_field_process_short_is_human_readable",
|
| 550 |
"status": "pass",
|
| 551 |
+
"value": "current features -> future mocap target -> regression head",
|
| 552 |
"raw_hits": []
|
| 553 |
},
|
| 554 |
{
|
| 555 |
+
"name": "hand_trajectory_forecast: public_field_research_name_is_human_readable",
|
| 556 |
"status": "pass",
|
| 557 |
+
"value": "3D Hand Motion Forecasting",
|
| 558 |
"raw_hits": []
|
| 559 |
},
|
| 560 |
{
|
| 561 |
+
"name": "hand_trajectory_forecast: public_field_card_blurb_is_human_readable",
|
| 562 |
"status": "pass",
|
| 563 |
+
"value": "Predict the future 3D left/right hand path from the current multimodal state.",
|
| 564 |
"raw_hits": []
|
| 565 |
},
|
| 566 |
{
|
| 567 |
+
"name": "hand_trajectory_forecast: public_field_output_short_is_human_readable",
|
| 568 |
"status": "pass",
|
| 569 |
+
"value": "future hand-joint trajectory",
|
| 570 |
"raw_hits": []
|
| 571 |
},
|
| 572 |
{
|
| 573 |
+
"name": "hand_trajectory_forecast: public_field_plain_goal_is_human_readable",
|
| 574 |
"status": "pass",
|
| 575 |
+
"value": "Predict where the hands will move over the next few frames.",
|
| 576 |
"raw_hits": []
|
| 577 |
},
|
| 578 |
{
|
| 579 |
+
"name": "hand_trajectory_forecast: public_field_display_name_is_human_readable",
|
| 580 |
"status": "pass",
|
| 581 |
+
"value": "Hand Trajectory Forecasting",
|
| 582 |
"raw_hits": []
|
| 583 |
},
|
| 584 |
{
|
|
|
|
| 658 |
"observed": "contact_prediction"
|
| 659 |
},
|
| 660 |
{
|
| 661 |
+
"name": "contact_prediction: public_field_input_short_is_human_readable",
|
| 662 |
"status": "pass",
|
| 663 |
+
"value": "non-contact, non-caption features",
|
| 664 |
"raw_hits": []
|
| 665 |
},
|
| 666 |
{
|
| 667 |
+
"name": "contact_prediction: public_field_process_short_is_human_readable",
|
| 668 |
"status": "pass",
|
| 669 |
+
"value": "feature filter -> contact target -> binary classifier",
|
| 670 |
"raw_hits": []
|
| 671 |
},
|
| 672 |
{
|
| 673 |
+
"name": "contact_prediction: public_field_research_name_is_human_readable",
|
| 674 |
"status": "pass",
|
| 675 |
+
"value": "Human-Object Contact Prediction",
|
| 676 |
"raw_hits": []
|
| 677 |
},
|
| 678 |
{
|
| 679 |
+
"name": "contact_prediction: public_field_card_blurb_is_human_readable",
|
| 680 |
"status": "pass",
|
| 681 |
+
"value": "Predict whether body or hand contact with the scene is occurring without leaking contact labels.",
|
| 682 |
"raw_hits": []
|
| 683 |
},
|
| 684 |
{
|
| 685 |
+
"name": "contact_prediction: public_field_output_short_is_human_readable",
|
| 686 |
"status": "pass",
|
| 687 |
+
"value": "contact or no contact",
|
| 688 |
"raw_hits": []
|
| 689 |
},
|
| 690 |
{
|
| 691 |
+
"name": "contact_prediction: public_field_plain_goal_is_human_readable",
|
| 692 |
"status": "pass",
|
| 693 |
+
"value": "Predict whether the body or hand is in contact with something.",
|
| 694 |
"raw_hits": []
|
| 695 |
},
|
| 696 |
{
|
| 697 |
+
"name": "contact_prediction: public_field_display_name_is_human_readable",
|
| 698 |
"status": "pass",
|
| 699 |
+
"value": "Contact State Prediction",
|
| 700 |
"raw_hits": []
|
| 701 |
},
|
| 702 |
{
|
|
|
|
| 774 |
"observed": "object_relevance"
|
| 775 |
},
|
| 776 |
{
|
| 777 |
+
"name": "object_relevance: public_field_input_short_is_human_readable",
|
| 778 |
"status": "pass",
|
| 779 |
+
"value": "non-caption multimodal features",
|
| 780 |
"raw_hits": []
|
| 781 |
},
|
| 782 |
{
|
| 783 |
+
"name": "object_relevance: public_field_process_short_is_human_readable",
|
| 784 |
"status": "pass",
|
| 785 |
+
"value": "object vocabulary -> multi-hot labels -> sigmoid heads",
|
| 786 |
"raw_hits": []
|
| 787 |
},
|
| 788 |
{
|
| 789 |
+
"name": "object_relevance: public_field_research_name_is_human_readable",
|
| 790 |
"status": "pass",
|
| 791 |
+
"value": "Object-Centric Interaction Recognition",
|
| 792 |
"raw_hits": []
|
| 793 |
},
|
| 794 |
{
|
| 795 |
+
"name": "object_relevance: public_field_card_blurb_is_human_readable",
|
| 796 |
"status": "pass",
|
| 797 |
+
"value": "Infer which objects are relevant to the current manipulation window from non-caption features.",
|
| 798 |
"raw_hits": []
|
| 799 |
},
|
| 800 |
{
|
| 801 |
+
"name": "object_relevance: public_field_output_short_is_human_readable",
|
| 802 |
"status": "pass",
|
| 803 |
+
"value": "relevant object set",
|
| 804 |
"raw_hits": []
|
| 805 |
},
|
| 806 |
{
|
| 807 |
+
"name": "object_relevance: public_field_plain_goal_is_human_readable",
|
| 808 |
"status": "pass",
|
| 809 |
+
"value": "Predict which objects matter in the current window.",
|
| 810 |
"raw_hits": []
|
| 811 |
},
|
| 812 |
{
|
| 813 |
+
"name": "object_relevance: public_field_display_name_is_human_readable",
|
| 814 |
"status": "pass",
|
| 815 |
+
"value": "Object Relevance Prediction",
|
| 816 |
"raw_hits": []
|
| 817 |
},
|
| 818 |
{
|
|
|
|
| 892 |
"observed": "caption_grounding"
|
| 893 |
},
|
| 894 |
{
|
| 895 |
+
"name": "caption_grounding: public_field_input_short_is_human_readable",
|
| 896 |
"status": "pass",
|
| 897 |
+
"value": "text-like query and candidate windows",
|
| 898 |
"raw_hits": []
|
| 899 |
},
|
| 900 |
{
|
| 901 |
+
"name": "caption_grounding: public_field_process_short_is_human_readable",
|
| 902 |
"status": "pass",
|
| 903 |
+
"value": "query features -> candidate index -> cosine ranker",
|
| 904 |
"raw_hits": []
|
| 905 |
},
|
| 906 |
{
|
| 907 |
+
"name": "caption_grounding: public_field_research_name_is_human_readable",
|
| 908 |
"status": "pass",
|
| 909 |
+
"value": "Language-to-Moment Grounding",
|
| 910 |
"raw_hits": []
|
| 911 |
},
|
| 912 |
{
|
| 913 |
+
"name": "caption_grounding: public_field_card_blurb_is_human_readable",
|
| 914 |
"status": "pass",
|
| 915 |
+
"value": "Retrieve the matching time window for an annotation-derived text query.",
|
| 916 |
"raw_hits": []
|
| 917 |
},
|
| 918 |
{
|
| 919 |
+
"name": "caption_grounding: public_field_output_short_is_human_readable",
|
| 920 |
"status": "pass",
|
| 921 |
+
"value": "ranked matching moments",
|
| 922 |
"raw_hits": []
|
| 923 |
},
|
| 924 |
{
|
| 925 |
+
"name": "caption_grounding: public_field_plain_goal_is_human_readable",
|
| 926 |
"status": "pass",
|
| 927 |
+
"value": "Given a text-like query from annotation, find the matching time window.",
|
| 928 |
"raw_hits": []
|
| 929 |
},
|
| 930 |
{
|
| 931 |
+
"name": "caption_grounding: public_field_display_name_is_human_readable",
|
| 932 |
"status": "pass",
|
| 933 |
+
"value": "Language Grounding",
|
| 934 |
"raw_hits": []
|
| 935 |
},
|
| 936 |
{
|
|
|
|
| 1008 |
"observed": "cross_modal_retrieval"
|
| 1009 |
},
|
| 1010 |
{
|
| 1011 |
+
"name": "cross_modal_retrieval: public_field_input_short_is_human_readable",
|
| 1012 |
"status": "pass",
|
| 1013 |
+
"value": "motion/IMU/pose query; depth/video candidates",
|
| 1014 |
"raw_hits": []
|
| 1015 |
},
|
| 1016 |
{
|
| 1017 |
+
"name": "cross_modal_retrieval: public_field_process_short_is_human_readable",
|
| 1018 |
"status": "pass",
|
| 1019 |
+
"value": "modality split -> projection -> nearest-neighbor ranker",
|
| 1020 |
"raw_hits": []
|
| 1021 |
},
|
| 1022 |
{
|
| 1023 |
+
"name": "cross_modal_retrieval: public_field_research_name_is_human_readable",
|
| 1024 |
"status": "pass",
|
| 1025 |
+
"value": "Multimodal Representation Retrieval",
|
| 1026 |
"raw_hits": []
|
| 1027 |
},
|
| 1028 |
{
|
| 1029 |
+
"name": "cross_modal_retrieval: public_field_card_blurb_is_human_readable",
|
| 1030 |
"status": "pass",
|
| 1031 |
+
"value": "Use motion, IMU, and camera-pose signals to retrieve the matching depth/video window.",
|
| 1032 |
"raw_hits": []
|
| 1033 |
},
|
| 1034 |
{
|
| 1035 |
+
"name": "cross_modal_retrieval: public_field_output_short_is_human_readable",
|
| 1036 |
"status": "pass",
|
| 1037 |
+
"value": "ranked visual windows",
|
| 1038 |
"raw_hits": []
|
| 1039 |
},
|
| 1040 |
{
|
| 1041 |
+
"name": "cross_modal_retrieval: public_field_plain_goal_is_human_readable",
|
| 1042 |
"status": "pass",
|
| 1043 |
+
"value": "Use one group of modalities to retrieve the matching window from another group.",
|
| 1044 |
"raw_hits": []
|
| 1045 |
},
|
| 1046 |
{
|
| 1047 |
+
"name": "cross_modal_retrieval: public_field_display_name_is_human_readable",
|
| 1048 |
"status": "pass",
|
| 1049 |
+
"value": "Cross-Modal Retrieval",
|
| 1050 |
"raw_hits": []
|
| 1051 |
},
|
| 1052 |
{
|
|
|
|
| 1126 |
"observed": "modality_reconstruction"
|
| 1127 |
},
|
| 1128 |
{
|
| 1129 |
+
"name": "modality_reconstruction: public_field_input_short_is_human_readable",
|
| 1130 |
"status": "pass",
|
| 1131 |
+
"value": "motion, IMU, and camera/pose features",
|
| 1132 |
"raw_hits": []
|
| 1133 |
},
|
| 1134 |
{
|
| 1135 |
+
"name": "modality_reconstruction: public_field_process_short_is_human_readable",
|
| 1136 |
"status": "pass",
|
| 1137 |
+
"value": "source-target split -> scaler -> regression head",
|
| 1138 |
"raw_hits": []
|
| 1139 |
},
|
| 1140 |
{
|
| 1141 |
+
"name": "modality_reconstruction: public_field_research_name_is_human_readable",
|
| 1142 |
"status": "pass",
|
| 1143 |
+
"value": "Modality Feature Reconstruction",
|
| 1144 |
"raw_hits": []
|
| 1145 |
},
|
| 1146 |
{
|
| 1147 |
+
"name": "modality_reconstruction: public_field_card_blurb_is_human_readable",
|
| 1148 |
"status": "pass",
|
| 1149 |
+
"value": "Predict compressed depth/video feature vectors from motion, IMU, and camera-pose features.",
|
| 1150 |
"raw_hits": []
|
| 1151 |
},
|
| 1152 |
{
|
| 1153 |
+
"name": "modality_reconstruction: public_field_output_short_is_human_readable",
|
| 1154 |
"status": "pass",
|
| 1155 |
+
"value": "reconstructed depth/video vector",
|
| 1156 |
"raw_hits": []
|
| 1157 |
},
|
| 1158 |
{
|
| 1159 |
+
"name": "modality_reconstruction: public_field_plain_goal_is_human_readable",
|
| 1160 |
"status": "pass",
|
| 1161 |
+
"value": "Predict one modality feature block from other modality blocks.",
|
| 1162 |
"raw_hits": []
|
| 1163 |
},
|
| 1164 |
{
|
| 1165 |
+
"name": "modality_reconstruction: public_field_display_name_is_human_readable",
|
| 1166 |
"status": "pass",
|
| 1167 |
+
"value": "Cross-Modal Reconstruction",
|
| 1168 |
"raw_hits": []
|
| 1169 |
},
|
| 1170 |
{
|
|
|
|
| 1244 |
"observed": "temporal_order"
|
| 1245 |
},
|
| 1246 |
{
|
| 1247 |
+
"name": "temporal_order: public_field_input_short_is_human_readable",
|
| 1248 |
"status": "pass",
|
| 1249 |
+
"value": "two adjacent windows plus difference vector",
|
| 1250 |
"raw_hits": []
|
| 1251 |
},
|
| 1252 |
{
|
| 1253 |
+
"name": "temporal_order: public_field_process_short_is_human_readable",
|
| 1254 |
"status": "pass",
|
| 1255 |
+
"value": "pair builder -> feature combiner -> binary classifier",
|
| 1256 |
"raw_hits": []
|
| 1257 |
},
|
| 1258 |
{
|
| 1259 |
+
"name": "temporal_order: public_field_research_name_is_human_readable",
|
| 1260 |
"status": "pass",
|
| 1261 |
"value": "Temporal Order Verification",
|
| 1262 |
"raw_hits": []
|
| 1263 |
},
|
| 1264 |
{
|
| 1265 |
+
"name": "temporal_order: public_field_card_blurb_is_human_readable",
|
| 1266 |
"status": "pass",
|
| 1267 |
+
"value": "Tell whether two neighboring windows are in chronological order or reversed.",
|
| 1268 |
"raw_hits": []
|
| 1269 |
},
|
| 1270 |
{
|
| 1271 |
+
"name": "temporal_order: public_field_output_short_is_human_readable",
|
| 1272 |
"status": "pass",
|
| 1273 |
+
"value": "correct or reversed",
|
| 1274 |
"raw_hits": []
|
| 1275 |
},
|
| 1276 |
{
|
| 1277 |
+
"name": "temporal_order: public_field_plain_goal_is_human_readable",
|
| 1278 |
"status": "pass",
|
| 1279 |
+
"value": "Tell whether two nearby windows are in the correct time order.",
|
| 1280 |
"raw_hits": []
|
| 1281 |
},
|
| 1282 |
{
|
| 1283 |
+
"name": "temporal_order: public_field_display_name_is_human_readable",
|
| 1284 |
"status": "pass",
|
| 1285 |
"value": "Temporal Order Verification",
|
| 1286 |
"raw_hits": []
|
|
|
|
| 1360 |
"observed": "misalignment_detection"
|
| 1361 |
},
|
| 1362 |
{
|
| 1363 |
+
"name": "misalignment_detection: public_field_input_short_is_human_readable",
|
| 1364 |
"status": "pass",
|
| 1365 |
+
"value": "motion-side and visual/depth-side feature groups",
|
| 1366 |
"raw_hits": []
|
| 1367 |
},
|
| 1368 |
{
|
| 1369 |
+
"name": "misalignment_detection: public_field_process_short_is_human_readable",
|
| 1370 |
"status": "pass",
|
| 1371 |
+
"value": "aligned/shifted pairs -> feature combiner -> binary classifier",
|
| 1372 |
"raw_hits": []
|
| 1373 |
},
|
| 1374 |
{
|
| 1375 |
+
"name": "misalignment_detection: public_field_research_name_is_human_readable",
|
| 1376 |
"status": "pass",
|
| 1377 |
+
"value": "Cross-Modal Misalignment Detection",
|
| 1378 |
"raw_hits": []
|
| 1379 |
},
|
| 1380 |
{
|
| 1381 |
+
"name": "misalignment_detection: public_field_card_blurb_is_human_readable",
|
| 1382 |
"status": "pass",
|
| 1383 |
+
"value": "Detect whether motion and visual/depth streams have been artificially shifted out of sync.",
|
| 1384 |
"raw_hits": []
|
| 1385 |
},
|
| 1386 |
{
|
| 1387 |
+
"name": "misalignment_detection: public_field_output_short_is_human_readable",
|
| 1388 |
"status": "pass",
|
| 1389 |
+
"value": "aligned or shifted",
|
| 1390 |
"raw_hits": []
|
| 1391 |
},
|
| 1392 |
{
|
| 1393 |
+
"name": "misalignment_detection: public_field_plain_goal_is_human_readable",
|
| 1394 |
"status": "pass",
|
| 1395 |
+
"value": "Detect when modalities that should match are shifted out of sync.",
|
| 1396 |
"raw_hits": []
|
| 1397 |
},
|
| 1398 |
{
|
| 1399 |
+
"name": "misalignment_detection: public_field_display_name_is_human_readable",
|
| 1400 |
"status": "pass",
|
| 1401 |
+
"value": "Multimodal Synchronization Detection",
|
| 1402 |
"raw_hits": []
|
| 1403 |
},
|
| 1404 |
{
|
docs/data/website_integrity.json
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"docs_root": "docs",
|
| 5 |
"site_base": "/ropedia-xperience-10m-task-suite/",
|
| 6 |
"summary": {
|
| 7 |
"html_pages": 4,
|
| 8 |
"local_references": 142,
|
| 9 |
"external_reference_count": 111,
|
| 10 |
-
"json_files":
|
| 11 |
"image_assets_referenced": 22,
|
| 12 |
"failure_count": 0
|
| 13 |
},
|
|
@@ -75,7 +75,7 @@
|
|
| 75 |
"status": "pass",
|
| 76 |
"reason": "The project overview should appear before the deeper progress ledger.",
|
| 77 |
"overview_index": 67412,
|
| 78 |
-
"evidence_index":
|
| 79 |
},
|
| 80 |
{
|
| 81 |
"name": "project_status_links_json",
|
|
@@ -137,7 +137,7 @@
|
|
| 137 |
"statuses": [
|
| 138 |
"implemented",
|
| 139 |
"implemented_for_first_pilot",
|
| 140 |
-
"
|
| 141 |
"verified_companion_result",
|
| 142 |
"current",
|
| 143 |
"active_next_step",
|
|
@@ -154,8 +154,8 @@
|
|
| 154 |
"status": "pass",
|
| 155 |
"reason": "The evaluation protocol should appear before the deeper evidence ledger.",
|
| 156 |
"overview_index": 67412,
|
| 157 |
-
"protocol_index":
|
| 158 |
-
"evidence_index":
|
| 159 |
},
|
| 160 |
{
|
| 161 |
"name": "evaluation_protocol_links_json",
|
|
@@ -253,7 +253,7 @@
|
|
| 253 |
},
|
| 254 |
{
|
| 255 |
"path": "data/artifact_index.json",
|
| 256 |
-
"bytes":
|
| 257 |
"top_level_type": "dict"
|
| 258 |
},
|
| 259 |
{
|
|
@@ -303,12 +303,12 @@
|
|
| 303 |
},
|
| 304 |
{
|
| 305 |
"path": "data/omni_finetune_verified_result.json",
|
| 306 |
-
"bytes":
|
| 307 |
"top_level_type": "dict"
|
| 308 |
},
|
| 309 |
{
|
| 310 |
"path": "data/omni_model_comparison.json",
|
| 311 |
-
"bytes":
|
| 312 |
"top_level_type": "dict"
|
| 313 |
},
|
| 314 |
{
|
|
@@ -323,12 +323,12 @@
|
|
| 323 |
},
|
| 324 |
{
|
| 325 |
"path": "data/project_packet.json",
|
| 326 |
-
"bytes":
|
| 327 |
"top_level_type": "dict"
|
| 328 |
},
|
| 329 |
{
|
| 330 |
"path": "data/project_status.json",
|
| 331 |
-
"bytes":
|
| 332 |
"top_level_type": "dict"
|
| 333 |
},
|
| 334 |
{
|
|
@@ -351,6 +351,11 @@
|
|
| 351 |
"bytes": 12183,
|
| 352 |
"top_level_type": "dict"
|
| 353 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
{
|
| 355 |
"path": "data/rendered_site_check.json",
|
| 356 |
"bytes": 4032,
|
|
@@ -373,12 +378,12 @@
|
|
| 373 |
},
|
| 374 |
{
|
| 375 |
"path": "data/research_roadmap.json",
|
| 376 |
-
"bytes":
|
| 377 |
"top_level_type": "dict"
|
| 378 |
},
|
| 379 |
{
|
| 380 |
"path": "data/research_roadmap_interactive.json",
|
| 381 |
-
"bytes":
|
| 382 |
"top_level_type": "dict"
|
| 383 |
},
|
| 384 |
{
|
|
@@ -388,7 +393,7 @@
|
|
| 388 |
},
|
| 389 |
{
|
| 390 |
"path": "data/scope_claims_audit.json",
|
| 391 |
-
"bytes":
|
| 392 |
"top_level_type": "dict"
|
| 393 |
},
|
| 394 |
{
|
|
@@ -403,7 +408,7 @@
|
|
| 403 |
},
|
| 404 |
{
|
| 405 |
"path": "data/summary_metrics.json",
|
| 406 |
-
"bytes":
|
| 407 |
"top_level_type": "dict"
|
| 408 |
},
|
| 409 |
{
|
|
@@ -423,7 +428,7 @@
|
|
| 423 |
},
|
| 424 |
{
|
| 425 |
"path": "data/website_integrity.json",
|
| 426 |
-
"bytes":
|
| 427 |
"top_level_type": "dict"
|
| 428 |
},
|
| 429 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-13T17:46:49+00:00",
|
| 4 |
"docs_root": "docs",
|
| 5 |
"site_base": "/ropedia-xperience-10m-task-suite/",
|
| 6 |
"summary": {
|
| 7 |
"html_pages": 4,
|
| 8 |
"local_references": 142,
|
| 9 |
"external_reference_count": 111,
|
| 10 |
+
"json_files": 38,
|
| 11 |
"image_assets_referenced": 22,
|
| 12 |
"failure_count": 0
|
| 13 |
},
|
|
|
|
| 75 |
"status": "pass",
|
| 76 |
"reason": "The project overview should appear before the deeper progress ledger.",
|
| 77 |
"overview_index": 67412,
|
| 78 |
+
"evidence_index": 93059
|
| 79 |
},
|
| 80 |
{
|
| 81 |
"name": "project_status_links_json",
|
|
|
|
| 137 |
"statuses": [
|
| 138 |
"implemented",
|
| 139 |
"implemented_for_first_pilot",
|
| 140 |
+
"verified_latest_branch",
|
| 141 |
"verified_companion_result",
|
| 142 |
"current",
|
| 143 |
"active_next_step",
|
|
|
|
| 154 |
"status": "pass",
|
| 155 |
"reason": "The evaluation protocol should appear before the deeper evidence ledger.",
|
| 156 |
"overview_index": 67412,
|
| 157 |
+
"protocol_index": 89241,
|
| 158 |
+
"evidence_index": 93059
|
| 159 |
},
|
| 160 |
{
|
| 161 |
"name": "evaluation_protocol_links_json",
|
|
|
|
| 253 |
},
|
| 254 |
{
|
| 255 |
"path": "data/artifact_index.json",
|
| 256 |
+
"bytes": 88913,
|
| 257 |
"top_level_type": "dict"
|
| 258 |
},
|
| 259 |
{
|
|
|
|
| 303 |
},
|
| 304 |
{
|
| 305 |
"path": "data/omni_finetune_verified_result.json",
|
| 306 |
+
"bytes": 4325,
|
| 307 |
"top_level_type": "dict"
|
| 308 |
},
|
| 309 |
{
|
| 310 |
"path": "data/omni_model_comparison.json",
|
| 311 |
+
"bytes": 81866,
|
| 312 |
"top_level_type": "dict"
|
| 313 |
},
|
| 314 |
{
|
|
|
|
| 323 |
},
|
| 324 |
{
|
| 325 |
"path": "data/project_packet.json",
|
| 326 |
+
"bytes": 10597,
|
| 327 |
"top_level_type": "dict"
|
| 328 |
},
|
| 329 |
{
|
| 330 |
"path": "data/project_status.json",
|
| 331 |
+
"bytes": 23535,
|
| 332 |
"top_level_type": "dict"
|
| 333 |
},
|
| 334 |
{
|
|
|
|
| 351 |
"bytes": 12183,
|
| 352 |
"top_level_type": "dict"
|
| 353 |
},
|
| 354 |
+
{
|
| 355 |
+
"path": "data/qwen3_v5_v6_comparison.json",
|
| 356 |
+
"bytes": 2814,
|
| 357 |
+
"top_level_type": "dict"
|
| 358 |
+
},
|
| 359 |
{
|
| 360 |
"path": "data/rendered_site_check.json",
|
| 361 |
"bytes": 4032,
|
|
|
|
| 378 |
},
|
| 379 |
{
|
| 380 |
"path": "data/research_roadmap.json",
|
| 381 |
+
"bytes": 13450,
|
| 382 |
"top_level_type": "dict"
|
| 383 |
},
|
| 384 |
{
|
| 385 |
"path": "data/research_roadmap_interactive.json",
|
| 386 |
+
"bytes": 145234,
|
| 387 |
"top_level_type": "dict"
|
| 388 |
},
|
| 389 |
{
|
|
|
|
| 393 |
},
|
| 394 |
{
|
| 395 |
"path": "data/scope_claims_audit.json",
|
| 396 |
+
"bytes": 21795,
|
| 397 |
"top_level_type": "dict"
|
| 398 |
},
|
| 399 |
{
|
|
|
|
| 408 |
},
|
| 409 |
{
|
| 410 |
"path": "data/summary_metrics.json",
|
| 411 |
+
"bytes": 27807,
|
| 412 |
"top_level_type": "dict"
|
| 413 |
},
|
| 414 |
{
|
|
|
|
| 428 |
},
|
| 429 |
{
|
| 430 |
"path": "data/website_integrity.json",
|
| 431 |
+
"bytes": 15774,
|
| 432 |
"top_level_type": "dict"
|
| 433 |
},
|
| 434 |
{
|
results/omni_finetune/OMNI_MODEL_COMPARISON.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# Omni Model Comparison
|
| 2 |
|
| 3 |
-
Generated: `2026-06-
|
| 4 |
|
| 5 |
Compare only rows with the same scope and target. Single-episode raw-feature metrics, 128-episode metadata baselines, Qwen3 structured JSON metrics, and the two Cosmos3 targets answer different questions: Nano future-window retrieval versus Super structured JSON Reasoner evaluation.
|
| 6 |
|
|
@@ -55,8 +55,8 @@ The one-episode Qwen entry is only a sensor-adapter smoke test with Qwen3 weight
|
|
| 55 |
| full-param gate | passed | Full-Parameter 256-Step Post-Qwen-v6 Pilot | 2048 windows/samples | full_parameter_gate=passed, observed_train_steps=256, final_step_loss=0.0096, epoch_train_loss=0.1158, checkpoint_saved=False | `results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_pilot256_after_qwen_v6_preemptible_8gpu_20260611/training_metadata.json` |
|
| 56 |
| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.8750, action_macro_f1=0.0027, transition_accuracy=0.8504, contact_accuracy=0.6451 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json` |
|
| 57 |
| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.8527, action_macro_f1=0.0021, transition_accuracy=0.8281, contact_accuracy=0.6518 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json` |
|
| 58 |
-
| 128 episode | verified
|
| 59 |
-
| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 34269 windows/samples, 4032 eval | json_validity_rate=0.9990, action_macro_f1=0.0029, transition_accuracy=0.9898, contact_accuracy=0.8177 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/verified_result_summary.json` |
|
| 60 |
| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.9978, action_macro_f1=0.0024, transition_accuracy=0.9710, contact_accuracy=0.7188 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json` |
|
| 61 |
| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=1.0000, action_macro_f1=0.0022, transition_accuracy=0.9732, contact_accuracy=0.7210 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/verified_result_summary.json` |
|
| 62 |
| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=1.0000, action_macro_f1=0.0019, transition_accuracy=0.9732, contact_accuracy=0.7299 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/verified_result_summary.json` |
|
|
@@ -132,4 +132,5 @@ This is the first verified Cosmos3-Super fine-tuned adapter branch. Its metric i
|
|
| 132 |
|
| 133 |
## Pending
|
| 134 |
|
| 135 |
-
- Use the verified Qwen3
|
|
|
|
|
|
| 1 |
# Omni Model Comparison
|
| 2 |
|
| 3 |
+
Generated: `2026-06-13T17:41:35+00:00`
|
| 4 |
|
| 5 |
Compare only rows with the same scope and target. Single-episode raw-feature metrics, 128-episode metadata baselines, Qwen3 structured JSON metrics, and the two Cosmos3 targets answer different questions: Nano future-window retrieval versus Super structured JSON Reasoner evaluation.
|
| 6 |
|
|
|
|
| 55 |
| full-param gate | passed | Full-Parameter 256-Step Post-Qwen-v6 Pilot | 2048 windows/samples | full_parameter_gate=passed, observed_train_steps=256, final_step_loss=0.0096, epoch_train_loss=0.1158, checkpoint_saved=False | `results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_pilot256_after_qwen_v6_preemptible_8gpu_20260611/training_metadata.json` |
|
| 56 |
| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.8750, action_macro_f1=0.0027, transition_accuracy=0.8504, contact_accuracy=0.6451 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json` |
|
| 57 |
| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.8527, action_macro_f1=0.0021, transition_accuracy=0.8281, contact_accuracy=0.6518 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json` |
|
| 58 |
+
| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 34269 windows/samples, 4032 eval | json_validity_rate=1.0000, action_macro_f1=0.0023, transition_accuracy=0.9908, contact_accuracy=0.7865 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full/verified_result_summary.json` |
|
| 59 |
+
| 128 episode | verified current | Qwen3-Omni LoRA | 119 episodes, 34269 windows/samples, 4032 eval | json_validity_rate=0.9990, action_macro_f1=0.0029, transition_accuracy=0.9898, contact_accuracy=0.8177 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/verified_result_summary.json` |
|
| 60 |
| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=0.9978, action_macro_f1=0.0024, transition_accuracy=0.9710, contact_accuracy=0.7188 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json` |
|
| 61 |
| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=1.0000, action_macro_f1=0.0022, transition_accuracy=0.9732, contact_accuracy=0.7210 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/verified_result_summary.json` |
|
| 62 |
| 128 episode | verified | Qwen3-Omni LoRA | 119 episodes, 3808 windows/samples, 448 eval | json_validity_rate=1.0000, action_macro_f1=0.0019, transition_accuracy=0.9732, contact_accuracy=0.7299 | `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/verified_result_summary.json` |
|
|
|
|
| 132 |
|
| 133 |
## Pending
|
| 134 |
|
| 135 |
+
- Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.
|
| 136 |
+
- Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before claiming v6 is globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly.
|
results/omni_finetune/QWEN3_FULL_PARAMETER_GATES_20260609.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# Qwen3-Omni Full-Parameter Feasibility Gates
|
| 2 |
|
| 3 |
-
Generated: `2026-06-
|
| 4 |
|
| 5 |
The 2026-06-09 gates prove that Qwen3-Omni full-parameter FSDP can load, prepare, run backward/optimizer steps, and complete guarded pilots up to 128 optimizer steps on an 8-GPU remote worker. They do not prove a production full-parameter fine-tune, and they intentionally save no full checkpoints or public weights.
|
| 6 |
|
|
|
|
| 1 |
# Qwen3-Omni Full-Parameter Feasibility Gates
|
| 2 |
|
| 3 |
+
Generated: `2026-06-13T17:41:13+00:00`
|
| 4 |
|
| 5 |
The 2026-06-09 gates prove that Qwen3-Omni full-parameter FSDP can load, prepare, run backward/optimizer steps, and complete guarded pilots up to 128 optimizer steps on an 8-GPU remote worker. They do not prove a production full-parameter fine-tune, and they intentionally save no full checkpoints or public weights.
|
| 6 |
|
results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Qwen3-Omni v5/v6 Verified Comparison
|
| 2 |
+
|
| 3 |
+
Generated: `2026-06-14`
|
| 4 |
+
|
| 5 |
+
This compares only the two dense multiscale Qwen3-Omni LoRA held-out packages on the same selected 128-episode setup. Both use 4,032 held-out test predictions from 14 exported test episodes.
|
| 6 |
+
|
| 7 |
+
| metric | v5 | v6 | v6 - v5 |
|
| 8 |
+
| --- | ---: | ---: | ---: |
|
| 9 |
+
| JSON validity | 1.000000 | 0.999008 | -0.000992 |
|
| 10 |
+
| Action macro-F1 | 0.002290 | 0.002883 | +0.000593 |
|
| 11 |
+
| Subtask accuracy | 0.011194 | 0.003731 | -0.007463 |
|
| 12 |
+
| Transition accuracy | 0.990823 | 0.989831 | -0.000992 |
|
| 13 |
+
| Next-action accuracy | 0.053619 | 0.043053 | -0.010565 |
|
| 14 |
+
| Contact accuracy | 0.786458 | 0.817708 | +0.031250 |
|
| 15 |
+
| Object micro-F1 | 0.316146 | 0.306498 | -0.009648 |
|
| 16 |
+
|
| 17 |
+
## Readout
|
| 18 |
+
|
| 19 |
+
v6 is the latest verified Qwen3-Omni LoRA branch and should be shown as the current Qwen row in generated comparisons. It improves action macro-F1 and contact accuracy. It does not dominate v5: v5 remains stronger on exact JSON validity, subtask accuracy, transition accuracy, next-action accuracy, and object micro-F1.
|
| 20 |
+
|
| 21 |
+
The public release policy is therefore:
|
| 22 |
+
|
| 23 |
+
- keep `ropedia-xperience-10m-v5` pinned to the previous stable v5 commit,
|
| 24 |
+
- publish v6 on `main`, GitHub Pages, HF Space, artifact dataset, and the Qwen LoRA model repo,
|
| 25 |
+
- create a separate `ropedia-xperience-10m-v6` tag only as an experimental/latest-Qwen release, not by moving the v5 tag.
|
| 26 |
+
|
| 27 |
+
## Sources
|
| 28 |
+
|
| 29 |
+
- v5 package: `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full/verified_result_summary.json`
|
| 30 |
+
- v6 package: `results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/verified_result_summary.json`
|
| 31 |
+
- machine-readable comparison: `docs/data/qwen3_v5_v6_comparison.json`
|
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/package_audit.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "pass",
|
| 3 |
+
"package_dir": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
|
| 4 |
+
"backbone": "qwen3_omni_lora",
|
| 5 |
+
"required_eval_files": [
|
| 6 |
+
"metrics.json",
|
| 7 |
+
"predictions.jsonl",
|
| 8 |
+
"predictions.csv",
|
| 9 |
+
"per_class_metrics.csv",
|
| 10 |
+
"confusion_matrix.csv",
|
| 11 |
+
"RUN_REPORT.md"
|
| 12 |
+
],
|
| 13 |
+
"primary_metrics": [
|
| 14 |
+
"action_macro_f1",
|
| 15 |
+
"contact_accuracy",
|
| 16 |
+
"held_out_episode_count",
|
| 17 |
+
"json_validity_rate",
|
| 18 |
+
"next_action_accuracy",
|
| 19 |
+
"object_micro_f1",
|
| 20 |
+
"subtask_accuracy",
|
| 21 |
+
"transition_accuracy"
|
| 22 |
+
],
|
| 23 |
+
"issues": []
|
| 24 |
+
}
|