Update final Qwen public metrics
Browse files- metrics/artifact_index.json +393 -72
- metrics/audio_ablation_summary.json +24 -12
- metrics/evaluation_protocol.json +15 -3
- metrics/foundation_model_plan.json +2 -2
- metrics/mirror_parity.json +0 -0
- metrics/omni_finetune_verified_result.json +29 -43
- metrics/omni_model_comparison.json +513 -0
- metrics/project_brief.json +4 -4
- metrics/project_manifest.json +7 -4
- metrics/project_packet.json +3 -3
- metrics/project_status.json +40 -11
- metrics/publication_audit.json +9 -9
- metrics/reproducibility_matrix.json +3 -3
- metrics/research_directions.json +59 -0
- metrics/research_roadmap.json +14 -13
- metrics/research_roadmap_interactive.json +34 -14
- metrics/research_takeaways.json +6 -6
- metrics/scope_claims_audit.json +23 -22
- metrics/single_episode_explorer.json +261 -115
- metrics/summary_metrics.json +66 -52
- metrics/task_surface_integrity.json +121 -121
- metrics/website_integrity.json +38 -33
metrics/artifact_index.json
CHANGED
|
@@ -1,19 +1,19 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Artifact Index",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"status": "pass",
|
| 5 |
-
"artifact_count":
|
| 6 |
"missing": [],
|
| 7 |
"by_kind": {
|
| 8 |
"project_path": 14,
|
| 9 |
"scaleup_contract": 7,
|
| 10 |
-
"scaleup_status":
|
|
|
|
| 11 |
"project_scope": 1,
|
| 12 |
"source_alignment": 5,
|
| 13 |
-
"publication_workflow": 3,
|
| 14 |
"evaluation_protocol": 3,
|
| 15 |
"result_interpretation": 5,
|
| 16 |
-
"metrics_source":
|
| 17 |
"website_data": 3,
|
| 18 |
"visual_evidence": 7,
|
| 19 |
"quality_gate": 12,
|
|
@@ -30,7 +30,9 @@
|
|
| 30 |
"generated_figure": 3,
|
| 31 |
"generated_figure_assets": 1,
|
| 32 |
"citation": 1,
|
| 33 |
-
"license": 1
|
|
|
|
|
|
|
| 34 |
},
|
| 35 |
"artifacts": [
|
| 36 |
{
|
|
@@ -41,8 +43,8 @@
|
|
| 41 |
"surface": "repo_hf",
|
| 42 |
"shows": "Gives first-pass readers a concise project shape before the detailed artifact trail.",
|
| 43 |
"exists": true,
|
| 44 |
-
"bytes":
|
| 45 |
-
"sha256": "
|
| 46 |
},
|
| 47 |
{
|
| 48 |
"id": "project_brief_json",
|
|
@@ -52,8 +54,8 @@
|
|
| 52 |
"surface": "website_hf",
|
| 53 |
"shows": "Machine-readable first-reader project brief for the website and Hugging Face mirrors.",
|
| 54 |
"exists": true,
|
| 55 |
-
"bytes":
|
| 56 |
-
"sha256": "
|
| 57 |
},
|
| 58 |
{
|
| 59 |
"id": "project_status",
|
|
@@ -63,8 +65,8 @@
|
|
| 63 |
"surface": "repo_hf",
|
| 64 |
"shows": "Gives a compact current-state table for first-pass readers.",
|
| 65 |
"exists": true,
|
| 66 |
-
"bytes":
|
| 67 |
-
"sha256": "
|
| 68 |
},
|
| 69 |
{
|
| 70 |
"id": "project_status_json",
|
|
@@ -74,8 +76,8 @@
|
|
| 74 |
"surface": "website_hf",
|
| 75 |
"shows": "Machine-readable copy of the current project status for website and HF mirrors.",
|
| 76 |
"exists": true,
|
| 77 |
-
"bytes":
|
| 78 |
-
"sha256": "
|
| 79 |
},
|
| 80 |
{
|
| 81 |
"id": "research_roadmap",
|
|
@@ -85,8 +87,8 @@
|
|
| 85 |
"surface": "repo_hf",
|
| 86 |
"shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
|
| 87 |
"exists": true,
|
| 88 |
-
"bytes":
|
| 89 |
-
"sha256": "
|
| 90 |
},
|
| 91 |
{
|
| 92 |
"id": "research_roadmap_json",
|
|
@@ -96,8 +98,8 @@
|
|
| 96 |
"surface": "website_hf",
|
| 97 |
"shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
|
| 98 |
"exists": true,
|
| 99 |
-
"bytes":
|
| 100 |
-
"sha256": "
|
| 101 |
},
|
| 102 |
{
|
| 103 |
"id": "foundation_model_plan",
|
|
@@ -118,8 +120,8 @@
|
|
| 118 |
"surface": "website_hf",
|
| 119 |
"shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
|
| 120 |
"exists": true,
|
| 121 |
-
"bytes":
|
| 122 |
-
"sha256": "
|
| 123 |
},
|
| 124 |
{
|
| 125 |
"id": "omni_model_extension_contract",
|
|
@@ -141,7 +143,7 @@
|
|
| 141 |
"shows": "Stores the implemented Qwen3-Omni LoRA contract and planned Cosmos-style world-model and VLA/policy branch contracts.",
|
| 142 |
"exists": true,
|
| 143 |
"file_count": 3,
|
| 144 |
-
"bytes":
|
| 145 |
},
|
| 146 |
{
|
| 147 |
"id": "omni_backbone_registry_validator",
|
|
@@ -206,8 +208,19 @@
|
|
| 206 |
"surface": "repo_hf",
|
| 207 |
"shows": "Runs simple metadata and neural MLP baselines on the same selected 96/16/16 episode split used by the Qwen3-Omni diagnostic pilot.",
|
| 208 |
"exists": true,
|
| 209 |
-
"bytes":
|
| 210 |
-
"sha256": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
},
|
| 212 |
{
|
| 213 |
"id": "additional_development_directions",
|
|
@@ -261,8 +274,8 @@
|
|
| 261 |
"surface": "website_hf",
|
| 262 |
"shows": "Gives a short project path with scope status and public surfaces.",
|
| 263 |
"exists": true,
|
| 264 |
-
"bytes":
|
| 265 |
-
"sha256": "
|
| 266 |
},
|
| 267 |
{
|
| 268 |
"id": "artifact_guide",
|
|
@@ -272,8 +285,8 @@
|
|
| 272 |
"surface": "repo_hf",
|
| 273 |
"shows": "Gives the human-readable map from project scope to data, tasks, platform mirrors, and scale-up status.",
|
| 274 |
"exists": true,
|
| 275 |
-
"bytes":
|
| 276 |
-
"sha256": "
|
| 277 |
},
|
| 278 |
{
|
| 279 |
"id": "official_dataset_card_alignment",
|
|
@@ -371,8 +384,8 @@
|
|
| 371 |
"surface": "repo_hf",
|
| 372 |
"shows": "Defines the window unit, chronological split, task metrics, leakage controls, and current limitations.",
|
| 373 |
"exists": true,
|
| 374 |
-
"bytes":
|
| 375 |
-
"sha256": "
|
| 376 |
},
|
| 377 |
{
|
| 378 |
"id": "evaluation_protocol_json",
|
|
@@ -382,8 +395,8 @@
|
|
| 382 |
"surface": "website_hf",
|
| 383 |
"shows": "Machine-readable protocol generated from committed task metrics for website and HF mirrors.",
|
| 384 |
"exists": true,
|
| 385 |
-
"bytes":
|
| 386 |
-
"sha256": "
|
| 387 |
},
|
| 388 |
{
|
| 389 |
"id": "evaluation_protocol_builder",
|
|
@@ -393,8 +406,8 @@
|
|
| 393 |
"surface": "repo_hf",
|
| 394 |
"shows": "Regenerates the protocol from committed summary metrics and task artifacts.",
|
| 395 |
"exists": true,
|
| 396 |
-
"bytes":
|
| 397 |
-
"sha256": "
|
| 398 |
},
|
| 399 |
{
|
| 400 |
"id": "research_takeaways",
|
|
@@ -404,8 +417,8 @@
|
|
| 404 |
"surface": "repo_hf",
|
| 405 |
"shows": "Summarizes the main research lessons from committed metrics and identifies which experiments need held-out episodes.",
|
| 406 |
"exists": true,
|
| 407 |
-
"bytes":
|
| 408 |
-
"sha256": "
|
| 409 |
},
|
| 410 |
{
|
| 411 |
"id": "research_takeaways_json",
|
|
@@ -415,8 +428,8 @@
|
|
| 415 |
"surface": "website_hf",
|
| 416 |
"shows": "Machine-readable result interpretation for the website, HF cards, and mirror checks.",
|
| 417 |
"exists": true,
|
| 418 |
-
"bytes":
|
| 419 |
-
"sha256": "
|
| 420 |
},
|
| 421 |
{
|
| 422 |
"id": "research_takeaways_builder",
|
|
@@ -426,8 +439,8 @@
|
|
| 426 |
"surface": "repo_hf",
|
| 427 |
"shows": "Regenerates the research takeaways from committed summary metrics and task result artifacts.",
|
| 428 |
"exists": true,
|
| 429 |
-
"bytes":
|
| 430 |
-
"sha256": "
|
| 431 |
},
|
| 432 |
{
|
| 433 |
"id": "audio_ablation_script",
|
|
@@ -470,8 +483,8 @@
|
|
| 470 |
"surface": "website_hf",
|
| 471 |
"shows": "Machine-readable audio ablation summary mirrored into the static website and Hugging Face bundles.",
|
| 472 |
"exists": true,
|
| 473 |
-
"bytes":
|
| 474 |
-
"sha256": "
|
| 475 |
},
|
| 476 |
{
|
| 477 |
"id": "audio_ablation_delta_chart",
|
|
@@ -661,8 +674,8 @@
|
|
| 661 |
"surface": "repo_hf",
|
| 662 |
"shows": "Regenerates the task-surface integrity report and fails if task cards expose raw artifact ids or lose the interactive player wiring.",
|
| 663 |
"exists": true,
|
| 664 |
-
"bytes":
|
| 665 |
-
"sha256": "
|
| 666 |
},
|
| 667 |
{
|
| 668 |
"id": "live_publication_status",
|
|
@@ -684,8 +697,8 @@
|
|
| 684 |
"surface": "repo",
|
| 685 |
"shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
|
| 686 |
"exists": true,
|
| 687 |
-
"bytes":
|
| 688 |
-
"sha256": "
|
| 689 |
},
|
| 690 |
{
|
| 691 |
"id": "reproducibility_contract",
|
|
@@ -706,8 +719,8 @@
|
|
| 706 |
"surface": "website_hf",
|
| 707 |
"shows": "Machine-readable reproduction steps with expected artifacts and public boundaries.",
|
| 708 |
"exists": true,
|
| 709 |
-
"bytes":
|
| 710 |
-
"sha256": "
|
| 711 |
},
|
| 712 |
{
|
| 713 |
"id": "artifact_index_builder",
|
|
@@ -717,8 +730,8 @@
|
|
| 717 |
"surface": "repo_hf",
|
| 718 |
"shows": "Generates the selective artifact catalog from local files.",
|
| 719 |
"exists": true,
|
| 720 |
-
"bytes":
|
| 721 |
-
"sha256": "
|
| 722 |
},
|
| 723 |
{
|
| 724 |
"id": "publication_audit",
|
|
@@ -741,7 +754,7 @@
|
|
| 741 |
"volatile": true,
|
| 742 |
"shows": "Separates setup paths from completed held-out-episode results.",
|
| 743 |
"exists": true,
|
| 744 |
-
"bytes":
|
| 745 |
"hash_policy": "existence_and_size_only"
|
| 746 |
},
|
| 747 |
{
|
|
@@ -753,7 +766,7 @@
|
|
| 753 |
"volatile": true,
|
| 754 |
"shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
|
| 755 |
"exists": true,
|
| 756 |
-
"bytes":
|
| 757 |
"hash_policy": "existence_and_size_only"
|
| 758 |
},
|
| 759 |
{
|
|
@@ -765,7 +778,7 @@
|
|
| 765 |
"volatile": true,
|
| 766 |
"shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
|
| 767 |
"exists": true,
|
| 768 |
-
"bytes":
|
| 769 |
"hash_policy": "existence_and_size_only"
|
| 770 |
},
|
| 771 |
{
|
|
@@ -776,8 +789,8 @@
|
|
| 776 |
"surface": "website_hf",
|
| 777 |
"shows": "Lists public URLs, upstream sources, and machine-readable project metadata.",
|
| 778 |
"exists": true,
|
| 779 |
-
"bytes":
|
| 780 |
-
"sha256": "
|
| 781 |
},
|
| 782 |
{
|
| 783 |
"id": "task_summary",
|
|
@@ -787,8 +800,8 @@
|
|
| 787 |
"surface": "repo_hf",
|
| 788 |
"shows": "Stores the task definitions, splits, feature dimension, and minimal/neural metrics.",
|
| 789 |
"exists": true,
|
| 790 |
-
"bytes":
|
| 791 |
-
"sha256": "
|
| 792 |
},
|
| 793 |
{
|
| 794 |
"id": "website_metrics_bundle",
|
|
@@ -798,8 +811,8 @@
|
|
| 798 |
"surface": "website_hf",
|
| 799 |
"shows": "Mirrors task metrics for the static dashboard.",
|
| 800 |
"exists": true,
|
| 801 |
-
"bytes":
|
| 802 |
-
"sha256": "
|
| 803 |
},
|
| 804 |
{
|
| 805 |
"id": "feature_manifest",
|
|
@@ -843,7 +856,7 @@
|
|
| 843 |
"shows": "Stores matching PyTorch MLP results for the 12 task contracts.",
|
| 844 |
"exists": true,
|
| 845 |
"file_count": 60,
|
| 846 |
-
"bytes":
|
| 847 |
},
|
| 848 |
{
|
| 849 |
"id": "research_direction_taxonomy",
|
|
@@ -853,8 +866,8 @@
|
|
| 853 |
"surface": "repo_hf",
|
| 854 |
"shows": "Maps the 12 tasks to the four Ropedia research directions as direct/proxy/diagnostic.",
|
| 855 |
"exists": true,
|
| 856 |
-
"bytes":
|
| 857 |
-
"sha256": "
|
| 858 |
},
|
| 859 |
{
|
| 860 |
"id": "research_direction_extensions",
|
|
@@ -864,8 +877,8 @@
|
|
| 864 |
"surface": "repo_hf",
|
| 865 |
"shows": "Stores one coded extension probe per research direction with minimal and neural metrics.",
|
| 866 |
"exists": true,
|
| 867 |
-
"bytes":
|
| 868 |
-
"sha256": "
|
| 869 |
},
|
| 870 |
{
|
| 871 |
"id": "task_walkthroughs",
|
|
@@ -886,8 +899,8 @@
|
|
| 886 |
"surface": "website_hf",
|
| 887 |
"shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
|
| 888 |
"exists": true,
|
| 889 |
-
"bytes":
|
| 890 |
-
"sha256": "
|
| 891 |
},
|
| 892 |
{
|
| 893 |
"id": "modality_atlas",
|
|
@@ -930,8 +943,8 @@
|
|
| 930 |
"surface": "website_hf",
|
| 931 |
"shows": "Shows the shared feature pipeline and minimal/neural head families.",
|
| 932 |
"exists": true,
|
| 933 |
-
"bytes":
|
| 934 |
-
"sha256": "
|
| 935 |
},
|
| 936 |
{
|
| 937 |
"id": "qwen_data_access_status",
|
|
@@ -944,6 +957,17 @@
|
|
| 944 |
"bytes": 3499,
|
| 945 |
"sha256": "c2999f0ea75765c8da3b94aa54d8a9628edd687a0fe38c09d2582f578f2b1ba7"
|
| 946 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 947 |
{
|
| 948 |
"id": "multi_episode_access_status",
|
| 949 |
"title": "Multi-episode access status",
|
|
@@ -961,7 +985,7 @@
|
|
| 961 |
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/ERROR_ANALYSIS.md",
|
| 962 |
"kind": "scaleup_status",
|
| 963 |
"surface": "repo_hf",
|
| 964 |
-
"shows": "Summarizes validation-aware Qwen3-Omni held-out failures by episode, action family, train-seen status, required-modality state, and object category.",
|
| 965 |
"exists": true,
|
| 966 |
"bytes": 3331,
|
| 967 |
"sha256": "063fcc2ebd7b57ab5b281fd5e8edc629da4e1f4e5a708483ba27375d02af9467"
|
|
@@ -985,8 +1009,8 @@
|
|
| 985 |
"surface": "repo_hf",
|
| 986 |
"shows": "Summarizes same-split simple and neural metadata baselines for the 12 task ids, with unsupported markers for tasks that need missing raw 128 feature blocks.",
|
| 987 |
"exists": true,
|
| 988 |
-
"bytes":
|
| 989 |
-
"sha256": "
|
| 990 |
},
|
| 991 |
{
|
| 992 |
"id": "multi_episode_128_baseline_summary",
|
|
@@ -996,8 +1020,52 @@
|
|
| 996 |
"surface": "repo_hf",
|
| 997 |
"shows": "Machine-readable 96/16/16 split counts, run configuration, per-task simple metrics, neural metrics, and raw-feature unsupported statuses.",
|
| 998 |
"exists": true,
|
| 999 |
-
"bytes":
|
| 1000 |
-
"sha256": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1001 |
},
|
| 1002 |
{
|
| 1003 |
"id": "citation",
|
|
@@ -1020,6 +1088,259 @@
|
|
| 1020 |
"exists": true,
|
| 1021 |
"bytes": 1745,
|
| 1022 |
"sha256": "09cf3f632d1248b6aa4457fdd510f878dcbd1e2e51bafba0ddc7cd4f05e23d07"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1023 |
}
|
| 1024 |
]
|
| 1025 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Artifact Index",
|
| 3 |
+
"generated_at_utc": "2026-06-06T23:27:35+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
+
"artifact_count": 118,
|
| 6 |
"missing": [],
|
| 7 |
"by_kind": {
|
| 8 |
"project_path": 14,
|
| 9 |
"scaleup_contract": 7,
|
| 10 |
+
"scaleup_status": 16,
|
| 11 |
+
"publication_workflow": 5,
|
| 12 |
"project_scope": 1,
|
| 13 |
"source_alignment": 5,
|
|
|
|
| 14 |
"evaluation_protocol": 3,
|
| 15 |
"result_interpretation": 5,
|
| 16 |
+
"metrics_source": 14,
|
| 17 |
"website_data": 3,
|
| 18 |
"visual_evidence": 7,
|
| 19 |
"quality_gate": 12,
|
|
|
|
| 30 |
"generated_figure": 3,
|
| 31 |
"generated_figure_assets": 1,
|
| 32 |
"citation": 1,
|
| 33 |
+
"license": 1,
|
| 34 |
+
"verified_public_package": 4,
|
| 35 |
+
"publication_audit": 3
|
| 36 |
},
|
| 37 |
"artifacts": [
|
| 38 |
{
|
|
|
|
| 43 |
"surface": "repo_hf",
|
| 44 |
"shows": "Gives first-pass readers a concise project shape before the detailed artifact trail.",
|
| 45 |
"exists": true,
|
| 46 |
+
"bytes": 3837,
|
| 47 |
+
"sha256": "fbaa540aadbe2cf9b6581c5b43cac8cee3056f98cfc7386d322d6f38e70e42a4"
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"id": "project_brief_json",
|
|
|
|
| 54 |
"surface": "website_hf",
|
| 55 |
"shows": "Machine-readable first-reader project brief for the website and Hugging Face mirrors.",
|
| 56 |
"exists": true,
|
| 57 |
+
"bytes": 3811,
|
| 58 |
+
"sha256": "ebf3d73a94c31ec8ba67e2aed8cfb04edfad07ad75694eb5373b2fe5a5da9dd9"
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"id": "project_status",
|
|
|
|
| 65 |
"surface": "repo_hf",
|
| 66 |
"shows": "Gives a compact current-state table for first-pass readers.",
|
| 67 |
"exists": true,
|
| 68 |
+
"bytes": 9845,
|
| 69 |
+
"sha256": "e77d3facc533bffe35586e4de6500400352c07b4ca0df5ffc523855f38faa26e"
|
| 70 |
},
|
| 71 |
{
|
| 72 |
"id": "project_status_json",
|
|
|
|
| 76 |
"surface": "website_hf",
|
| 77 |
"shows": "Machine-readable copy of the current project status for website and HF mirrors.",
|
| 78 |
"exists": true,
|
| 79 |
+
"bytes": 15049,
|
| 80 |
+
"sha256": "23873ed59f3a38f46e45b15a5965afbb1365d49eb359bd5089a4ba6bda990d3c"
|
| 81 |
},
|
| 82 |
{
|
| 83 |
"id": "research_roadmap",
|
|
|
|
| 87 |
"surface": "repo_hf",
|
| 88 |
"shows": "Defines the path from public-sample task development to multi-episode held-out evaluation and larger omni-model extensions.",
|
| 89 |
"exists": true,
|
| 90 |
+
"bytes": 12194,
|
| 91 |
+
"sha256": "8773f240e362198b3a669d1ac848d6f1629df3a33e41bd76fba157cbf566479c"
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"id": "research_roadmap_json",
|
|
|
|
| 98 |
"surface": "website_hf",
|
| 99 |
"shows": "Machine-readable research roadmap for the website and Hugging Face mirrors.",
|
| 100 |
"exists": true,
|
| 101 |
+
"bytes": 10133,
|
| 102 |
+
"sha256": "45fd3a1bde93654ccfe14f9271928a67b36eb3f166826bfbdbb9c1092ad33bcf"
|
| 103 |
},
|
| 104 |
{
|
| 105 |
"id": "foundation_model_plan",
|
|
|
|
| 120 |
"surface": "website_hf",
|
| 121 |
"shows": "Machine-readable foundation-model selection matrix with source links, entry conditions, and evaluation additions.",
|
| 122 |
"exists": true,
|
| 123 |
+
"bytes": 13193,
|
| 124 |
+
"sha256": "63529cbaf1d5c549f595b3ed49f49feda03edf96952b5cb321117fee340849c9"
|
| 125 |
},
|
| 126 |
{
|
| 127 |
"id": "omni_model_extension_contract",
|
|
|
|
| 143 |
"shows": "Stores the implemented Qwen3-Omni LoRA contract and planned Cosmos-style world-model and VLA/policy branch contracts.",
|
| 144 |
"exists": true,
|
| 145 |
"file_count": 3,
|
| 146 |
+
"bytes": 9203
|
| 147 |
},
|
| 148 |
{
|
| 149 |
"id": "omni_backbone_registry_validator",
|
|
|
|
| 208 |
"surface": "repo_hf",
|
| 209 |
"shows": "Runs simple metadata and neural MLP baselines on the same selected 96/16/16 episode split used by the Qwen3-Omni diagnostic pilot.",
|
| 210 |
"exists": true,
|
| 211 |
+
"bytes": 48164,
|
| 212 |
+
"sha256": "fbefe3f31e2d19566ed1fc356a25c564ecb4f0645de4d595f5926e1426c058d5"
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"id": "qwen3_lora_hf_package_builder",
|
| 216 |
+
"title": "Qwen3 LoRA HF package builder",
|
| 217 |
+
"path": "scripts/omni/prepare_qwen3_lora_hf_package.py",
|
| 218 |
+
"kind": "publication_workflow",
|
| 219 |
+
"surface": "repo_hf",
|
| 220 |
+
"shows": "Builds the upload-ready Hugging Face adapter folder from a verified Qwen3 LoRA result summary and adapter directory.",
|
| 221 |
+
"exists": true,
|
| 222 |
+
"bytes": 9843,
|
| 223 |
+
"sha256": "636132a7d299db4d874ec797e34acd7e37eea69994c2d39afaafaec6587169a0"
|
| 224 |
},
|
| 225 |
{
|
| 226 |
"id": "additional_development_directions",
|
|
|
|
| 274 |
"surface": "website_hf",
|
| 275 |
"shows": "Gives a short project path with scope status and public surfaces.",
|
| 276 |
"exists": true,
|
| 277 |
+
"bytes": 7943,
|
| 278 |
+
"sha256": "ffd5da5fd2c2dc82fa1beb74335a51a33317923b3e7ee4864e2b5031082b0a42"
|
| 279 |
},
|
| 280 |
{
|
| 281 |
"id": "artifact_guide",
|
|
|
|
| 285 |
"surface": "repo_hf",
|
| 286 |
"shows": "Gives the human-readable map from project scope to data, tasks, platform mirrors, and scale-up status.",
|
| 287 |
"exists": true,
|
| 288 |
+
"bytes": 17508,
|
| 289 |
+
"sha256": "fbbd9f460610464efb27c371a17cf23c3fa409d853f8148368f48707192427d7"
|
| 290 |
},
|
| 291 |
{
|
| 292 |
"id": "official_dataset_card_alignment",
|
|
|
|
| 384 |
"surface": "repo_hf",
|
| 385 |
"shows": "Defines the window unit, chronological split, task metrics, leakage controls, and current limitations.",
|
| 386 |
"exists": true,
|
| 387 |
+
"bytes": 6434,
|
| 388 |
+
"sha256": "4817266bdfdf852ad97b3d37614141c56794d955d82110a819daa1d76755a675"
|
| 389 |
},
|
| 390 |
{
|
| 391 |
"id": "evaluation_protocol_json",
|
|
|
|
| 395 |
"surface": "website_hf",
|
| 396 |
"shows": "Machine-readable protocol generated from committed task metrics for website and HF mirrors.",
|
| 397 |
"exists": true,
|
| 398 |
+
"bytes": 14511,
|
| 399 |
+
"sha256": "ea7caff963fcf048f803a852e5cdae8d3975ba4a36d805c5e42211b1bf2744ef"
|
| 400 |
},
|
| 401 |
{
|
| 402 |
"id": "evaluation_protocol_builder",
|
|
|
|
| 406 |
"surface": "repo_hf",
|
| 407 |
"shows": "Regenerates the protocol from committed summary metrics and task artifacts.",
|
| 408 |
"exists": true,
|
| 409 |
+
"bytes": 16584,
|
| 410 |
+
"sha256": "e8cd8df471985688fa71e2b1be801e346e50911465ef886625a5d863bf9158f1"
|
| 411 |
},
|
| 412 |
{
|
| 413 |
"id": "research_takeaways",
|
|
|
|
| 417 |
"surface": "repo_hf",
|
| 418 |
"shows": "Summarizes the main research lessons from committed metrics and identifies which experiments need held-out episodes.",
|
| 419 |
"exists": true,
|
| 420 |
+
"bytes": 5149,
|
| 421 |
+
"sha256": "a2ab81a52a825b4f1dae59023cfe905a63128384f892dcc8e91c4c4351500aef"
|
| 422 |
},
|
| 423 |
{
|
| 424 |
"id": "research_takeaways_json",
|
|
|
|
| 428 |
"surface": "website_hf",
|
| 429 |
"shows": "Machine-readable result interpretation for the website, HF cards, and mirror checks.",
|
| 430 |
"exists": true,
|
| 431 |
+
"bytes": 7139,
|
| 432 |
+
"sha256": "eb87b65ef2f6ef910b4cda29c33f3c75014a5cce8ebf8299f71eb09c856a2481"
|
| 433 |
},
|
| 434 |
{
|
| 435 |
"id": "research_takeaways_builder",
|
|
|
|
| 439 |
"surface": "repo_hf",
|
| 440 |
"shows": "Regenerates the research takeaways from committed summary metrics and task result artifacts.",
|
| 441 |
"exists": true,
|
| 442 |
+
"bytes": 13473,
|
| 443 |
+
"sha256": "40ab06b9adaf2c2a9a8d55e07b361198f4cb3a88285596625cc8133e5135a4d2"
|
| 444 |
},
|
| 445 |
{
|
| 446 |
"id": "audio_ablation_script",
|
|
|
|
| 483 |
"surface": "website_hf",
|
| 484 |
"shows": "Machine-readable audio ablation summary mirrored into the static website and Hugging Face bundles.",
|
| 485 |
"exists": true,
|
| 486 |
+
"bytes": 10370,
|
| 487 |
+
"sha256": "d6de8db171993f8cc39153075a7e17cda79762659fad7d1944556f8bf10afd0d"
|
| 488 |
},
|
| 489 |
{
|
| 490 |
"id": "audio_ablation_delta_chart",
|
|
|
|
| 674 |
"surface": "repo_hf",
|
| 675 |
"shows": "Regenerates the task-surface integrity report and fails if task cards expose raw artifact ids or lose the interactive player wiring.",
|
| 676 |
"exists": true,
|
| 677 |
+
"bytes": 15366,
|
| 678 |
+
"sha256": "8d4573b7a4b75e433da577067369e5221515184536a281c4d2e30c3422ddc4ad"
|
| 679 |
},
|
| 680 |
{
|
| 681 |
"id": "live_publication_status",
|
|
|
|
| 697 |
"surface": "repo",
|
| 698 |
"shows": "Fetches the published GitHub/HF URLs and compares live hashes and public-card markers against the release assets.",
|
| 699 |
"exists": true,
|
| 700 |
+
"bytes": 36847,
|
| 701 |
+
"sha256": "07fd059a9ff8c13b073f349c79f1f7d3abe839559cf0809e291f6ea9bbad21e8"
|
| 702 |
},
|
| 703 |
{
|
| 704 |
"id": "reproducibility_contract",
|
|
|
|
| 719 |
"surface": "website_hf",
|
| 720 |
"shows": "Machine-readable reproduction steps with expected artifacts and public boundaries.",
|
| 721 |
"exists": true,
|
| 722 |
+
"bytes": 5280,
|
| 723 |
+
"sha256": "bfb34f14206943da909aee36465e8211c592615fca15a284e2fa8ef9ea1d438b"
|
| 724 |
},
|
| 725 |
{
|
| 726 |
"id": "artifact_index_builder",
|
|
|
|
| 730 |
"surface": "repo_hf",
|
| 731 |
"shows": "Generates the selective artifact catalog from local files.",
|
| 732 |
"exists": true,
|
| 733 |
+
"bytes": 38561,
|
| 734 |
+
"sha256": "571a06684909bd4d544d455d5cdee2fb69439b1e16de95609dd51fecc7b58b29"
|
| 735 |
},
|
| 736 |
{
|
| 737 |
"id": "publication_audit",
|
|
|
|
| 754 |
"volatile": true,
|
| 755 |
"shows": "Separates setup paths from completed held-out-episode results.",
|
| 756 |
"exists": true,
|
| 757 |
+
"bytes": 21234,
|
| 758 |
"hash_policy": "existence_and_size_only"
|
| 759 |
},
|
| 760 |
{
|
|
|
|
| 766 |
"volatile": true,
|
| 767 |
"shows": "Confirms prepared GitHub/HF Space/artifact/model mirrors share the same critical data, figure, website HTML, and validator files.",
|
| 768 |
"exists": true,
|
| 769 |
+
"bytes": 235815,
|
| 770 |
"hash_policy": "existence_and_size_only"
|
| 771 |
},
|
| 772 |
{
|
|
|
|
| 778 |
"volatile": true,
|
| 779 |
"shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
|
| 780 |
"exists": true,
|
| 781 |
+
"bytes": 15375,
|
| 782 |
"hash_policy": "existence_and_size_only"
|
| 783 |
},
|
| 784 |
{
|
|
|
|
| 789 |
"surface": "website_hf",
|
| 790 |
"shows": "Lists public URLs, upstream sources, and machine-readable project metadata.",
|
| 791 |
"exists": true,
|
| 792 |
+
"bytes": 5193,
|
| 793 |
+
"sha256": "1ae4c41fdcca6638e570e081d07f700d56ca490fecc25d681d5066b1ca8319ee"
|
| 794 |
},
|
| 795 |
{
|
| 796 |
"id": "task_summary",
|
|
|
|
| 800 |
"surface": "repo_hf",
|
| 801 |
"shows": "Stores the task definitions, splits, feature dimension, and minimal/neural metrics.",
|
| 802 |
"exists": true,
|
| 803 |
+
"bytes": 21680,
|
| 804 |
+
"sha256": "5860c901536495b7a8cb592ca0728a546566a70cef6d2b7d1a986e5140fbfe08"
|
| 805 |
},
|
| 806 |
{
|
| 807 |
"id": "website_metrics_bundle",
|
|
|
|
| 811 |
"surface": "website_hf",
|
| 812 |
"shows": "Mirrors task metrics for the static dashboard.",
|
| 813 |
"exists": true,
|
| 814 |
+
"bytes": 27490,
|
| 815 |
+
"sha256": "159ed565571aa4215ef30a5ea8fce057481cf0f77ad50aec3ae15de6a38e12ba"
|
| 816 |
},
|
| 817 |
{
|
| 818 |
"id": "feature_manifest",
|
|
|
|
| 856 |
"shows": "Stores matching PyTorch MLP results for the 12 task contracts.",
|
| 857 |
"exists": true,
|
| 858 |
"file_count": 60,
|
| 859 |
+
"bytes": 90609517
|
| 860 |
},
|
| 861 |
{
|
| 862 |
"id": "research_direction_taxonomy",
|
|
|
|
| 866 |
"surface": "repo_hf",
|
| 867 |
"shows": "Maps the 12 tasks to the four Ropedia research directions as direct/proxy/diagnostic.",
|
| 868 |
"exists": true,
|
| 869 |
+
"bytes": 19204,
|
| 870 |
+
"sha256": "59bece1a151d8475fde50396fd2e70ed4abcfec33f10e400ef165148fd6e7dde"
|
| 871 |
},
|
| 872 |
{
|
| 873 |
"id": "research_direction_extensions",
|
|
|
|
| 877 |
"surface": "repo_hf",
|
| 878 |
"shows": "Stores one coded extension probe per research direction with minimal and neural metrics.",
|
| 879 |
"exists": true,
|
| 880 |
+
"bytes": 12592,
|
| 881 |
+
"sha256": "6fa965d5e8249f0972e93558dcc1e7de15d53bdcfd253354255637c421b68dc4"
|
| 882 |
},
|
| 883 |
{
|
| 884 |
"id": "task_walkthroughs",
|
|
|
|
| 899 |
"surface": "website_hf",
|
| 900 |
"shows": "Presents the task suite and sample modality thumbnails with metrics generated from committed files.",
|
| 901 |
"exists": true,
|
| 902 |
+
"bytes": 1588641,
|
| 903 |
+
"sha256": "1275e2adaef920ecde7c29dc62c8d79d4f13475a0c09bc3baa693f47cdec2e1f"
|
| 904 |
},
|
| 905 |
{
|
| 906 |
"id": "modality_atlas",
|
|
|
|
| 943 |
"surface": "website_hf",
|
| 944 |
"shows": "Shows the shared feature pipeline and minimal/neural head families.",
|
| 945 |
"exists": true,
|
| 946 |
+
"bytes": 774391,
|
| 947 |
+
"sha256": "f08b03bc21e194efe382347d74cf89cd6ac65dede51889971dbfc2fb9d1de3c2"
|
| 948 |
},
|
| 949 |
{
|
| 950 |
"id": "qwen_data_access_status",
|
|
|
|
| 957 |
"bytes": 3499,
|
| 958 |
"sha256": "c2999f0ea75765c8da3b94aa54d8a9628edd687a0fe38c09d2582f578f2b1ba7"
|
| 959 |
},
|
| 960 |
+
{
|
| 961 |
+
"id": "qwen3_lora_hf_upload_note",
|
| 962 |
+
"title": "Qwen3 LoRA HF upload note",
|
| 963 |
+
"path": "results/omni_finetune/HF_UPLOAD.md",
|
| 964 |
+
"kind": "publication_workflow",
|
| 965 |
+
"surface": "repo_hf",
|
| 966 |
+
"shows": "Documents the final 128-episode LoRA adapter upload path, target model repo, package builder, and forbidden files.",
|
| 967 |
+
"exists": true,
|
| 968 |
+
"bytes": 1875,
|
| 969 |
+
"sha256": "7a822452347e8c4241a5160d67a9782f17f3d3cb9bd2960b00bac0ca1bf2392f"
|
| 970 |
+
},
|
| 971 |
{
|
| 972 |
"id": "multi_episode_access_status",
|
| 973 |
"title": "Multi-episode access status",
|
|
|
|
| 985 |
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/ERROR_ANALYSIS.md",
|
| 986 |
"kind": "scaleup_status",
|
| 987 |
"surface": "repo_hf",
|
| 988 |
+
"shows": "Summarizes the earlier validation-aware Qwen3-Omni held-out failures by episode, action family, train-seen status, required-modality state, and object category.",
|
| 989 |
"exists": true,
|
| 990 |
"bytes": 3331,
|
| 991 |
"sha256": "063fcc2ebd7b57ab5b281fd5e8edc629da4e1f4e5a708483ba27375d02af9467"
|
|
|
|
| 1009 |
"surface": "repo_hf",
|
| 1010 |
"shows": "Summarizes same-split simple and neural metadata baselines for the 12 task ids, with unsupported markers for tasks that need missing raw 128 feature blocks.",
|
| 1011 |
"exists": true,
|
| 1012 |
+
"bytes": 2238,
|
| 1013 |
+
"sha256": "c70440aa502ec569a840159ab7e05b8e7d4ed70e0091ad9a4b2fb3fb0d3803c1"
|
| 1014 |
},
|
| 1015 |
{
|
| 1016 |
"id": "multi_episode_128_baseline_summary",
|
|
|
|
| 1020 |
"surface": "repo_hf",
|
| 1021 |
"shows": "Machine-readable 96/16/16 split counts, run configuration, per-task simple metrics, neural metrics, and raw-feature unsupported statuses.",
|
| 1022 |
"exists": true,
|
| 1023 |
+
"bytes": 44519,
|
| 1024 |
+
"sha256": "107a4bedf53a22a1395f5e08b7f1cc9bb1becb8c0e95bc03178029abb3a83aef"
|
| 1025 |
+
},
|
| 1026 |
+
{
|
| 1027 |
+
"id": "omni_model_comparison_report",
|
| 1028 |
+
"title": "Omni model comparison report",
|
| 1029 |
+
"path": "results/omni_finetune/OMNI_MODEL_COMPARISON.md",
|
| 1030 |
+
"kind": "scaleup_status",
|
| 1031 |
+
"surface": "repo_hf",
|
| 1032 |
+
"shows": "Reader-facing comparison of the single-episode task suite, 128-episode aligned baselines, Qwen3-Omni packages, and Cosmos3 future-window branch.",
|
| 1033 |
+
"exists": true,
|
| 1034 |
+
"bytes": 3110,
|
| 1035 |
+
"sha256": "11c22b7ac1e16fd8db86eb7c6fc33cf28fee97a38098f1606a35daee113dc72b"
|
| 1036 |
+
},
|
| 1037 |
+
{
|
| 1038 |
+
"id": "omni_model_comparison_json",
|
| 1039 |
+
"title": "Omni model comparison JSON",
|
| 1040 |
+
"path": "docs/data/omni_model_comparison.json",
|
| 1041 |
+
"kind": "metrics_source",
|
| 1042 |
+
"surface": "repo_hf",
|
| 1043 |
+
"shows": "Machine-readable comparison of the current result versions, per-task aligned baselines, verified Qwen3 packages, and Cosmos3 package.",
|
| 1044 |
+
"exists": true,
|
| 1045 |
+
"bytes": 21433,
|
| 1046 |
+
"sha256": "b539a489a8974ecec90dda312471be54f466b81bef9d1ebc99d08155f8c21c94"
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"id": "cosmos3_nano_verified_summary",
|
| 1050 |
+
"title": "Cosmos3-Nano verified package summary",
|
| 1051 |
+
"path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json",
|
| 1052 |
+
"kind": "metrics_source",
|
| 1053 |
+
"surface": "repo_hf",
|
| 1054 |
+
"shows": "Machine-readable verified public summary for the Cosmos3-Nano future-window compatibility package.",
|
| 1055 |
+
"exists": true,
|
| 1056 |
+
"bytes": 6151,
|
| 1057 |
+
"sha256": "386b374ef1837fe0087f9eeb21248e6c823334270fe4b1a52dadb3a11c09ef88"
|
| 1058 |
+
},
|
| 1059 |
+
{
|
| 1060 |
+
"id": "cosmos3_nano_run_report",
|
| 1061 |
+
"title": "Cosmos3-Nano future-window run report",
|
| 1062 |
+
"path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/RUN_REPORT.md",
|
| 1063 |
+
"kind": "scaleup_status",
|
| 1064 |
+
"surface": "repo_hf",
|
| 1065 |
+
"shows": "Reader-facing held-out metrics and interpretation for the Cosmos3-Nano future-window compatibility branch.",
|
| 1066 |
+
"exists": true,
|
| 1067 |
+
"bytes": 698,
|
| 1068 |
+
"sha256": "3f56dc6ed58ea079a98a8f7e7ccd294238623a5f06bb9a01f1448665cf3eeb60"
|
| 1069 |
},
|
| 1070 |
{
|
| 1071 |
"id": "citation",
|
|
|
|
| 1088 |
"exists": true,
|
| 1089 |
"bytes": 1745,
|
| 1090 |
"sha256": "09cf3f632d1248b6aa4457fdd510f878dcbd1e2e51bafba0ddc7cd4f05e23d07"
|
| 1091 |
+
},
|
| 1092 |
+
{
|
| 1093 |
+
"id": "verified_public_package_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
|
| 1094 |
+
"title": "Verified public package: Cosmos3-Nano Future-Window World Model",
|
| 1095 |
+
"path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
|
| 1096 |
+
"kind": "verified_public_package",
|
| 1097 |
+
"surface": "repo_hf",
|
| 1098 |
+
"shows": "Public-safe verified package for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full (cosmos_world_model, status=verified).",
|
| 1099 |
+
"exists": true,
|
| 1100 |
+
"file_count": 14,
|
| 1101 |
+
"bytes": 745194
|
| 1102 |
+
},
|
| 1103 |
+
{
|
| 1104 |
+
"id": "verified_public_summary_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
|
| 1105 |
+
"title": "Verified summary: Cosmos3-Nano Future-Window World Model",
|
| 1106 |
+
"path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json",
|
| 1107 |
+
"kind": "metrics_source",
|
| 1108 |
+
"surface": "repo_hf",
|
| 1109 |
+
"shows": "Machine-readable verified summary for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full.",
|
| 1110 |
+
"exists": true,
|
| 1111 |
+
"bytes": 6151,
|
| 1112 |
+
"sha256": "386b374ef1837fe0087f9eeb21248e6c823334270fe4b1a52dadb3a11c09ef88"
|
| 1113 |
+
},
|
| 1114 |
+
{
|
| 1115 |
+
"id": "verified_public_public_result_summary_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
|
| 1116 |
+
"title": "Verified public result summary: Cosmos3-Nano Future-Window World Model",
|
| 1117 |
+
"path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/PUBLIC_RESULT_SUMMARY.md",
|
| 1118 |
+
"kind": "scaleup_status",
|
| 1119 |
+
"surface": "repo_hf",
|
| 1120 |
+
"shows": "Public result summary for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full.",
|
| 1121 |
+
"exists": true,
|
| 1122 |
+
"bytes": 984,
|
| 1123 |
+
"sha256": "e7a98bb4bbea34e4dfed25bb1682284514996b722661b13cc59eb70b4163d682"
|
| 1124 |
+
},
|
| 1125 |
+
{
|
| 1126 |
+
"id": "verified_public_run_report_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
|
| 1127 |
+
"title": "Verified run report: Cosmos3-Nano Future-Window World Model",
|
| 1128 |
+
"path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/RUN_REPORT.md",
|
| 1129 |
+
"kind": "scaleup_status",
|
| 1130 |
+
"surface": "repo_hf",
|
| 1131 |
+
"shows": "Run report for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full.",
|
| 1132 |
+
"exists": true,
|
| 1133 |
+
"bytes": 698,
|
| 1134 |
+
"sha256": "3f56dc6ed58ea079a98a8f7e7ccd294238623a5f06bb9a01f1448665cf3eeb60"
|
| 1135 |
+
},
|
| 1136 |
+
{
|
| 1137 |
+
"id": "verified_public_metrics_JSON_xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
|
| 1138 |
+
"title": "Verified metrics JSON: Cosmos3-Nano Future-Window World Model",
|
| 1139 |
+
"path": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/eval/metrics.json",
|
| 1140 |
+
"kind": "metrics_source",
|
| 1141 |
+
"surface": "repo_hf",
|
| 1142 |
+
"shows": "Metrics json for xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full.",
|
| 1143 |
+
"exists": true,
|
| 1144 |
+
"bytes": 1099,
|
| 1145 |
+
"sha256": "f11ccb167908d4f5bfb49c0be0b4bc6c9254901462aa52ae98a2a98e8af16558"
|
| 1146 |
+
},
|
| 1147 |
+
{
|
| 1148 |
+
"id": "verified_public_package_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 1149 |
+
"title": "Verified public package: Qwen3-Omni LoRA",
|
| 1150 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 1151 |
+
"kind": "verified_public_package",
|
| 1152 |
+
"surface": "repo_hf",
|
| 1153 |
+
"shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval (qwen3_omni_lora, status=verified).",
|
| 1154 |
+
"exists": true,
|
| 1155 |
+
"file_count": 21,
|
| 1156 |
+
"bytes": 5561131
|
| 1157 |
+
},
|
| 1158 |
+
{
|
| 1159 |
+
"id": "verified_public_summary_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 1160 |
+
"title": "Verified summary: Qwen3-Omni LoRA",
|
| 1161 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json",
|
| 1162 |
+
"kind": "metrics_source",
|
| 1163 |
+
"surface": "repo_hf",
|
| 1164 |
+
"shows": "Machine-readable verified summary for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
|
| 1165 |
+
"exists": true,
|
| 1166 |
+
"bytes": 5933,
|
| 1167 |
+
"sha256": "b5f8ef88cd9d8515f03bf092107a9e788695e4c4853feae0db98d384f0c39c9d"
|
| 1168 |
+
},
|
| 1169 |
+
{
|
| 1170 |
+
"id": "verified_public_public_result_summary_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 1171 |
+
"title": "Verified public result summary: Qwen3-Omni LoRA",
|
| 1172 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/PUBLIC_RESULT_SUMMARY.md",
|
| 1173 |
+
"kind": "scaleup_status",
|
| 1174 |
+
"surface": "repo_hf",
|
| 1175 |
+
"shows": "Public result summary for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
|
| 1176 |
+
"exists": true,
|
| 1177 |
+
"bytes": 1779,
|
| 1178 |
+
"sha256": "11305d535a6cb60530560f3862b8374ec083adfc7cf714b49fe06b079e3c049d"
|
| 1179 |
+
},
|
| 1180 |
+
{
|
| 1181 |
+
"id": "verified_public_run_report_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 1182 |
+
"title": "Verified run report: Qwen3-Omni LoRA",
|
| 1183 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/RUN_REPORT.md",
|
| 1184 |
+
"kind": "scaleup_status",
|
| 1185 |
+
"surface": "repo_hf",
|
| 1186 |
+
"shows": "Run report for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
|
| 1187 |
+
"exists": true,
|
| 1188 |
+
"bytes": 603,
|
| 1189 |
+
"sha256": "6792b92c8d8661d8f4f3670e7961a14fd0c495dbb4279602a6fba1480179ad9b"
|
| 1190 |
+
},
|
| 1191 |
+
{
|
| 1192 |
+
"id": "verified_public_metrics_JSON_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 1193 |
+
"title": "Verified metrics JSON: Qwen3-Omni LoRA",
|
| 1194 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/metrics.json",
|
| 1195 |
+
"kind": "metrics_source",
|
| 1196 |
+
"surface": "repo_hf",
|
| 1197 |
+
"shows": "Metrics json for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
|
| 1198 |
+
"exists": true,
|
| 1199 |
+
"bytes": 75629,
|
| 1200 |
+
"sha256": "055b0932ea439338839256ded2fa5fb3ddb562ced0f149d2ea37460e966c4404"
|
| 1201 |
+
},
|
| 1202 |
+
{
|
| 1203 |
+
"id": "verified_public_package_audit_xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 1204 |
+
"title": "Verified package audit: Qwen3-Omni LoRA",
|
| 1205 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/package_audit.json",
|
| 1206 |
+
"kind": "publication_audit",
|
| 1207 |
+
"surface": "repo_hf",
|
| 1208 |
+
"shows": "Package audit for xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval.",
|
| 1209 |
+
"exists": true,
|
| 1210 |
+
"bytes": 611,
|
| 1211 |
+
"sha256": "2226cdd2e457b23c89b909e40ca469dd08f3db81c1bb797aaafb6cd19de6deea"
|
| 1212 |
+
},
|
| 1213 |
+
{
|
| 1214 |
+
"id": "verified_public_package_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
|
| 1215 |
+
"title": "Verified public package: Qwen3-Omni LoRA",
|
| 1216 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
|
| 1217 |
+
"kind": "verified_public_package",
|
| 1218 |
+
"surface": "repo_hf",
|
| 1219 |
+
"shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full (qwen3_omni_lora, status=verified).",
|
| 1220 |
+
"exists": true,
|
| 1221 |
+
"file_count": 16,
|
| 1222 |
+
"bytes": 5872232
|
| 1223 |
+
},
|
| 1224 |
+
{
|
| 1225 |
+
"id": "verified_public_summary_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
|
| 1226 |
+
"title": "Verified summary: Qwen3-Omni LoRA",
|
| 1227 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json",
|
| 1228 |
+
"kind": "metrics_source",
|
| 1229 |
+
"surface": "repo_hf",
|
| 1230 |
+
"shows": "Machine-readable verified summary for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
|
| 1231 |
+
"exists": true,
|
| 1232 |
+
"bytes": 6270,
|
| 1233 |
+
"sha256": "e4dac96e88eb03a36ead205f509c680aa2bb763b4da2256e265311bc17304d7f"
|
| 1234 |
+
},
|
| 1235 |
+
{
|
| 1236 |
+
"id": "verified_public_public_result_summary_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
|
| 1237 |
+
"title": "Verified public result summary: Qwen3-Omni LoRA",
|
| 1238 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/PUBLIC_RESULT_SUMMARY.md",
|
| 1239 |
+
"kind": "scaleup_status",
|
| 1240 |
+
"surface": "repo_hf",
|
| 1241 |
+
"shows": "Public result summary for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
|
| 1242 |
+
"exists": true,
|
| 1243 |
+
"bytes": 1061,
|
| 1244 |
+
"sha256": "5e4de510a64b90d0632d72575965208f6b272b4531bf9f4c515bab23876654aa"
|
| 1245 |
+
},
|
| 1246 |
+
{
|
| 1247 |
+
"id": "verified_public_run_report_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
|
| 1248 |
+
"title": "Verified run report: Qwen3-Omni LoRA",
|
| 1249 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/eval/RUN_REPORT.md",
|
| 1250 |
+
"kind": "scaleup_status",
|
| 1251 |
+
"surface": "repo_hf",
|
| 1252 |
+
"shows": "Run report for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
|
| 1253 |
+
"exists": true,
|
| 1254 |
+
"bytes": 618,
|
| 1255 |
+
"sha256": "2e572809cb3e97c4c17e5f126a63ec1d470e5da345f8a3b6026a6efd5fb927d9"
|
| 1256 |
+
},
|
| 1257 |
+
{
|
| 1258 |
+
"id": "verified_public_metrics_JSON_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
|
| 1259 |
+
"title": "Verified metrics JSON: Qwen3-Omni LoRA",
|
| 1260 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/eval/metrics.json",
|
| 1261 |
+
"kind": "metrics_source",
|
| 1262 |
+
"surface": "repo_hf",
|
| 1263 |
+
"shows": "Metrics json for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
|
| 1264 |
+
"exists": true,
|
| 1265 |
+
"bytes": 108127,
|
| 1266 |
+
"sha256": "4c11c61ee661ee201ae91f50d2dc9c0eabe2a1040a2534fe91f4b5b54c96b27c"
|
| 1267 |
+
},
|
| 1268 |
+
{
|
| 1269 |
+
"id": "verified_public_package_audit_xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
|
| 1270 |
+
"title": "Verified package audit: Qwen3-Omni LoRA",
|
| 1271 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/package_audit.json",
|
| 1272 |
+
"kind": "publication_audit",
|
| 1273 |
+
"surface": "repo_hf",
|
| 1274 |
+
"shows": "Package audit for xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full.",
|
| 1275 |
+
"exists": true,
|
| 1276 |
+
"bytes": 669,
|
| 1277 |
+
"sha256": "3d427e70e44b22b882be49f2963e2afcf5b497f25c445850c9f567cdbc41ed15"
|
| 1278 |
+
},
|
| 1279 |
+
{
|
| 1280 |
+
"id": "verified_public_package_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 1281 |
+
"title": "Verified public package: Qwen3-Omni LoRA",
|
| 1282 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 1283 |
+
"kind": "verified_public_package",
|
| 1284 |
+
"surface": "repo_hf",
|
| 1285 |
+
"shows": "Public-safe verified package for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full (qwen3_omni_lora, status=verified).",
|
| 1286 |
+
"exists": true,
|
| 1287 |
+
"file_count": 16,
|
| 1288 |
+
"bytes": 4898687
|
| 1289 |
+
},
|
| 1290 |
+
{
|
| 1291 |
+
"id": "verified_public_summary_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 1292 |
+
"title": "Verified summary: Qwen3-Omni LoRA",
|
| 1293 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json",
|
| 1294 |
+
"kind": "metrics_source",
|
| 1295 |
+
"surface": "repo_hf",
|
| 1296 |
+
"shows": "Machine-readable verified summary for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
|
| 1297 |
+
"exists": true,
|
| 1298 |
+
"bytes": 6207,
|
| 1299 |
+
"sha256": "d7dae7bc17d0fd07a3f29fd61d57803b9d96d65da2ebd7f5436683a9aa18bfeb"
|
| 1300 |
+
},
|
| 1301 |
+
{
|
| 1302 |
+
"id": "verified_public_public_result_summary_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 1303 |
+
"title": "Verified public result summary: Qwen3-Omni LoRA",
|
| 1304 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/PUBLIC_RESULT_SUMMARY.md",
|
| 1305 |
+
"kind": "scaleup_status",
|
| 1306 |
+
"surface": "repo_hf",
|
| 1307 |
+
"shows": "Public result summary for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
|
| 1308 |
+
"exists": true,
|
| 1309 |
+
"bytes": 1008,
|
| 1310 |
+
"sha256": "080636ce30a37a259c4eaad0791fe5dd03fd60d61092407470d616391f0079ea"
|
| 1311 |
+
},
|
| 1312 |
+
{
|
| 1313 |
+
"id": "verified_public_run_report_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 1314 |
+
"title": "Verified run report: Qwen3-Omni LoRA",
|
| 1315 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/eval/RUN_REPORT.md",
|
| 1316 |
+
"kind": "scaleup_status",
|
| 1317 |
+
"surface": "repo_hf",
|
| 1318 |
+
"shows": "Run report for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
|
| 1319 |
+
"exists": true,
|
| 1320 |
+
"bytes": 590,
|
| 1321 |
+
"sha256": "4309393cd227803f766a9c7b317f5917e39b09cfb6f2618105c5c6cdb064f1a5"
|
| 1322 |
+
},
|
| 1323 |
+
{
|
| 1324 |
+
"id": "verified_public_metrics_JSON_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 1325 |
+
"title": "Verified metrics JSON: Qwen3-Omni LoRA",
|
| 1326 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/eval/metrics.json",
|
| 1327 |
+
"kind": "metrics_source",
|
| 1328 |
+
"surface": "repo_hf",
|
| 1329 |
+
"shows": "Metrics json for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
|
| 1330 |
+
"exists": true,
|
| 1331 |
+
"bytes": 52485,
|
| 1332 |
+
"sha256": "4174640ef32665853b0b807329855344302018952cfa97639cec66649adcbec7"
|
| 1333 |
+
},
|
| 1334 |
+
{
|
| 1335 |
+
"id": "verified_public_package_audit_xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 1336 |
+
"title": "Verified package audit: Qwen3-Omni LoRA",
|
| 1337 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/package_audit.json",
|
| 1338 |
+
"kind": "publication_audit",
|
| 1339 |
+
"surface": "repo_hf",
|
| 1340 |
+
"shows": "Package audit for xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full.",
|
| 1341 |
+
"exists": true,
|
| 1342 |
+
"bytes": 623,
|
| 1343 |
+
"sha256": "d7264cfb34e48b5c41c89444ea9cd1314b8f4d0bcc0224debbbe5ea512450197"
|
| 1344 |
}
|
| 1345 |
]
|
| 1346 |
}
|
metrics/audio_ablation_summary.json
CHANGED
|
@@ -38,7 +38,8 @@
|
|
| 38 |
"raw_replacement_delta_vs_no_audio": -0.007422402159244265,
|
| 39 |
"raw_replacement_delta_vs_handcrafted": -0.00770504201550171,
|
| 40 |
"all_plus_raw_logmel": 0.002734107997265892,
|
| 41 |
-
"all_plus_raw_delta_vs_handcrafted": -0.006320461683552957
|
|
|
|
| 42 |
},
|
| 43 |
{
|
| 44 |
"task": "timeline_subtask",
|
|
@@ -53,7 +54,8 @@
|
|
| 53 |
"raw_replacement_delta_vs_no_audio": -0.01034742052599772,
|
| 54 |
"raw_replacement_delta_vs_handcrafted": -0.010430590562065117,
|
| 55 |
"all_plus_raw_logmel": 0.0017889087656529517,
|
| 56 |
-
"all_plus_raw_delta_vs_handcrafted": -0.009467445627956345
|
|
|
|
| 57 |
},
|
| 58 |
{
|
| 59 |
"task": "transition_detection",
|
|
@@ -68,7 +70,8 @@
|
|
| 68 |
"raw_replacement_delta_vs_no_audio": 0.010507780641701658,
|
| 69 |
"raw_replacement_delta_vs_handcrafted": 0.01707714954338524,
|
| 70 |
"all_plus_raw_logmel": 0.4816233470132239,
|
| 71 |
-
"all_plus_raw_delta_vs_handcrafted": 0.019490425838571634
|
|
|
|
| 72 |
},
|
| 73 |
{
|
| 74 |
"task": "next_action",
|
|
@@ -83,7 +86,8 @@
|
|
| 83 |
"raw_replacement_delta_vs_no_audio": -0.004703498679402295,
|
| 84 |
"raw_replacement_delta_vs_handcrafted": -0.004576004576004574,
|
| 85 |
"all_plus_raw_logmel": 0.0058479532163742695,
|
| 86 |
-
"all_plus_raw_delta_vs_handcrafted": -0.00473405736563631
|
|
|
|
| 87 |
},
|
| 88 |
{
|
| 89 |
"task": "hand_trajectory_forecast",
|
|
@@ -98,7 +102,8 @@
|
|
| 98 |
"raw_replacement_delta_vs_no_audio": -0.0021152496337890625,
|
| 99 |
"raw_replacement_delta_vs_handcrafted": 0.16052484512329102,
|
| 100 |
"all_plus_raw_logmel": 4.1367621421813965,
|
| 101 |
-
"all_plus_raw_delta_vs_handcrafted": 0.3296332359313965
|
|
|
|
| 102 |
},
|
| 103 |
{
|
| 104 |
"task": "contact_prediction",
|
|
@@ -113,7 +118,8 @@
|
|
| 113 |
"raw_replacement_delta_vs_no_audio": 0.0,
|
| 114 |
"raw_replacement_delta_vs_handcrafted": 0.0,
|
| 115 |
"all_plus_raw_logmel": 1.0,
|
| 116 |
-
"all_plus_raw_delta_vs_handcrafted": 0.0
|
|
|
|
| 117 |
},
|
| 118 |
{
|
| 119 |
"task": "object_relevance",
|
|
@@ -128,7 +134,8 @@
|
|
| 128 |
"raw_replacement_delta_vs_no_audio": 0.030784313919472256,
|
| 129 |
"raw_replacement_delta_vs_handcrafted": 0.020578064024873888,
|
| 130 |
"all_plus_raw_logmel": 0.18262653898768813,
|
| 131 |
-
"all_plus_raw_delta_vs_handcrafted": 0.024487004103967203
|
|
|
|
| 132 |
},
|
| 133 |
{
|
| 134 |
"task": "caption_grounding",
|
|
@@ -143,7 +150,8 @@
|
|
| 143 |
"raw_replacement_delta_vs_no_audio": -0.002380702644586563,
|
| 144 |
"raw_replacement_delta_vs_handcrafted": -0.007237853482365608,
|
| 145 |
"all_plus_raw_logmel": 0.02719014883041382,
|
| 146 |
-
"all_plus_raw_delta_vs_handcrafted": -0.004895530641078949
|
|
|
|
| 147 |
},
|
| 148 |
{
|
| 149 |
"task": "cross_modal_retrieval",
|
|
@@ -158,7 +166,8 @@
|
|
| 158 |
"raw_replacement_delta_vs_no_audio": -0.061719030141830444,
|
| 159 |
"raw_replacement_delta_vs_handcrafted": -0.04763227701187134,
|
| 160 |
"all_plus_raw_logmel": 0.31795138120651245,
|
| 161 |
-
"all_plus_raw_delta_vs_handcrafted": -0.05717244744300842
|
|
|
|
| 162 |
},
|
| 163 |
{
|
| 164 |
"task": "modality_reconstruction",
|
|
@@ -173,7 +182,8 @@
|
|
| 173 |
"raw_replacement_delta_vs_no_audio": 1.615983009338379,
|
| 174 |
"raw_replacement_delta_vs_handcrafted": 0.9635343551635742,
|
| 175 |
"all_plus_raw_logmel": 8.392388343811035,
|
| 176 |
-
"all_plus_raw_delta_vs_handcrafted": 1.401824951171875
|
|
|
|
| 177 |
},
|
| 178 |
{
|
| 179 |
"task": "temporal_order",
|
|
@@ -188,7 +198,8 @@
|
|
| 188 |
"raw_replacement_delta_vs_no_audio": 0.03591857034334939,
|
| 189 |
"raw_replacement_delta_vs_handcrafted": 0.012930064596222923,
|
| 190 |
"all_plus_raw_logmel": 0.5330450130569861,
|
| 191 |
-
"all_plus_raw_delta_vs_handcrafted": 0.015803633746641288
|
|
|
|
| 192 |
},
|
| 193 |
{
|
| 194 |
"task": "misalignment_detection",
|
|
@@ -203,7 +214,8 @@
|
|
| 203 |
"raw_replacement_delta_vs_no_audio": 0.021203945154488313,
|
| 204 |
"raw_replacement_delta_vs_handcrafted": 0.02644906505448169,
|
| 205 |
"all_plus_raw_logmel": 0.4373795761078998,
|
| 206 |
-
"all_plus_raw_delta_vs_handcrafted": 0.02003912235410793
|
|
|
|
| 207 |
}
|
| 208 |
],
|
| 209 |
"aggregate": {
|
|
|
|
| 38 |
"raw_replacement_delta_vs_no_audio": -0.007422402159244265,
|
| 39 |
"raw_replacement_delta_vs_handcrafted": -0.00770504201550171,
|
| 40 |
"all_plus_raw_logmel": 0.002734107997265892,
|
| 41 |
+
"all_plus_raw_delta_vs_handcrafted": -0.006320461683552957,
|
| 42 |
+
"task_display_name": "Action Recognition"
|
| 43 |
},
|
| 44 |
{
|
| 45 |
"task": "timeline_subtask",
|
|
|
|
| 54 |
"raw_replacement_delta_vs_no_audio": -0.01034742052599772,
|
| 55 |
"raw_replacement_delta_vs_handcrafted": -0.010430590562065117,
|
| 56 |
"all_plus_raw_logmel": 0.0017889087656529517,
|
| 57 |
+
"all_plus_raw_delta_vs_handcrafted": -0.009467445627956345,
|
| 58 |
+
"task_display_name": "Procedure Step Recognition"
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"task": "transition_detection",
|
|
|
|
| 70 |
"raw_replacement_delta_vs_no_audio": 0.010507780641701658,
|
| 71 |
"raw_replacement_delta_vs_handcrafted": 0.01707714954338524,
|
| 72 |
"all_plus_raw_logmel": 0.4816233470132239,
|
| 73 |
+
"all_plus_raw_delta_vs_handcrafted": 0.019490425838571634,
|
| 74 |
+
"task_display_name": "Action Boundary Detection"
|
| 75 |
},
|
| 76 |
{
|
| 77 |
"task": "next_action",
|
|
|
|
| 86 |
"raw_replacement_delta_vs_no_audio": -0.004703498679402295,
|
| 87 |
"raw_replacement_delta_vs_handcrafted": -0.004576004576004574,
|
| 88 |
"all_plus_raw_logmel": 0.0058479532163742695,
|
| 89 |
+
"all_plus_raw_delta_vs_handcrafted": -0.00473405736563631,
|
| 90 |
+
"task_display_name": "Next-Action Prediction"
|
| 91 |
},
|
| 92 |
{
|
| 93 |
"task": "hand_trajectory_forecast",
|
|
|
|
| 102 |
"raw_replacement_delta_vs_no_audio": -0.0021152496337890625,
|
| 103 |
"raw_replacement_delta_vs_handcrafted": 0.16052484512329102,
|
| 104 |
"all_plus_raw_logmel": 4.1367621421813965,
|
| 105 |
+
"all_plus_raw_delta_vs_handcrafted": 0.3296332359313965,
|
| 106 |
+
"task_display_name": "Hand Trajectory Forecasting"
|
| 107 |
},
|
| 108 |
{
|
| 109 |
"task": "contact_prediction",
|
|
|
|
| 118 |
"raw_replacement_delta_vs_no_audio": 0.0,
|
| 119 |
"raw_replacement_delta_vs_handcrafted": 0.0,
|
| 120 |
"all_plus_raw_logmel": 1.0,
|
| 121 |
+
"all_plus_raw_delta_vs_handcrafted": 0.0,
|
| 122 |
+
"task_display_name": "Contact State Prediction"
|
| 123 |
},
|
| 124 |
{
|
| 125 |
"task": "object_relevance",
|
|
|
|
| 134 |
"raw_replacement_delta_vs_no_audio": 0.030784313919472256,
|
| 135 |
"raw_replacement_delta_vs_handcrafted": 0.020578064024873888,
|
| 136 |
"all_plus_raw_logmel": 0.18262653898768813,
|
| 137 |
+
"all_plus_raw_delta_vs_handcrafted": 0.024487004103967203,
|
| 138 |
+
"task_display_name": "Object Relevance Prediction"
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"task": "caption_grounding",
|
|
|
|
| 150 |
"raw_replacement_delta_vs_no_audio": -0.002380702644586563,
|
| 151 |
"raw_replacement_delta_vs_handcrafted": -0.007237853482365608,
|
| 152 |
"all_plus_raw_logmel": 0.02719014883041382,
|
| 153 |
+
"all_plus_raw_delta_vs_handcrafted": -0.004895530641078949,
|
| 154 |
+
"task_display_name": "Language Grounding"
|
| 155 |
},
|
| 156 |
{
|
| 157 |
"task": "cross_modal_retrieval",
|
|
|
|
| 166 |
"raw_replacement_delta_vs_no_audio": -0.061719030141830444,
|
| 167 |
"raw_replacement_delta_vs_handcrafted": -0.04763227701187134,
|
| 168 |
"all_plus_raw_logmel": 0.31795138120651245,
|
| 169 |
+
"all_plus_raw_delta_vs_handcrafted": -0.05717244744300842,
|
| 170 |
+
"task_display_name": "Cross-Modal Retrieval"
|
| 171 |
},
|
| 172 |
{
|
| 173 |
"task": "modality_reconstruction",
|
|
|
|
| 182 |
"raw_replacement_delta_vs_no_audio": 1.615983009338379,
|
| 183 |
"raw_replacement_delta_vs_handcrafted": 0.9635343551635742,
|
| 184 |
"all_plus_raw_logmel": 8.392388343811035,
|
| 185 |
+
"all_plus_raw_delta_vs_handcrafted": 1.401824951171875,
|
| 186 |
+
"task_display_name": "Cross-Modal Reconstruction"
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"task": "temporal_order",
|
|
|
|
| 198 |
"raw_replacement_delta_vs_no_audio": 0.03591857034334939,
|
| 199 |
"raw_replacement_delta_vs_handcrafted": 0.012930064596222923,
|
| 200 |
"all_plus_raw_logmel": 0.5330450130569861,
|
| 201 |
+
"all_plus_raw_delta_vs_handcrafted": 0.015803633746641288,
|
| 202 |
+
"task_display_name": "Temporal Order Verification"
|
| 203 |
},
|
| 204 |
{
|
| 205 |
"task": "misalignment_detection",
|
|
|
|
| 214 |
"raw_replacement_delta_vs_no_audio": 0.021203945154488313,
|
| 215 |
"raw_replacement_delta_vs_handcrafted": 0.02644906505448169,
|
| 216 |
"all_plus_raw_logmel": 0.4373795761078998,
|
| 217 |
+
"all_plus_raw_delta_vs_handcrafted": 0.02003912235410793,
|
| 218 |
+
"task_display_name": "Multimodal Synchronization Detection"
|
| 219 |
}
|
| 220 |
],
|
| 221 |
"aggregate": {
|
metrics/evaluation_protocol.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Evaluation Protocol",
|
| 3 |
"status": "pass",
|
| 4 |
"version": "2026-06-01",
|
| 5 |
-
"generated_at_utc": "2026-06-
|
| 6 |
"source_files": [
|
| 7 |
"docs/data/summary_metrics.json",
|
| 8 |
"results/episode_task_suite/summary_report.json",
|
|
@@ -69,6 +69,7 @@
|
|
| 69 |
"task_protocols": [
|
| 70 |
{
|
| 71 |
"task": "timeline_action",
|
|
|
|
| 72 |
"family": "supervised classification",
|
| 73 |
"unit": "single window",
|
| 74 |
"input": "current 20-frame all-feature window",
|
|
@@ -88,6 +89,7 @@
|
|
| 88 |
},
|
| 89 |
{
|
| 90 |
"task": "timeline_subtask",
|
|
|
|
| 91 |
"family": "supervised classification",
|
| 92 |
"unit": "single window",
|
| 93 |
"input": "current 20-frame all-feature window",
|
|
@@ -107,6 +109,7 @@
|
|
| 107 |
},
|
| 108 |
{
|
| 109 |
"task": "transition_detection",
|
|
|
|
| 110 |
"family": "temporal diagnostic",
|
| 111 |
"unit": "single window",
|
| 112 |
"input": "current 20-frame all-feature window",
|
|
@@ -126,6 +129,7 @@
|
|
| 126 |
},
|
| 127 |
{
|
| 128 |
"task": "next_action",
|
|
|
|
| 129 |
"family": "short-horizon prediction",
|
| 130 |
"unit": "single window",
|
| 131 |
"input": "current 20-frame all-feature window at time t",
|
|
@@ -145,6 +149,7 @@
|
|
| 145 |
},
|
| 146 |
{
|
| 147 |
"task": "hand_trajectory_forecast",
|
|
|
|
| 148 |
"family": "trajectory regression",
|
| 149 |
"unit": "single window",
|
| 150 |
"input": "current all-feature window",
|
|
@@ -164,6 +169,7 @@
|
|
| 164 |
},
|
| 165 |
{
|
| 166 |
"task": "contact_prediction",
|
|
|
|
| 167 |
"family": "binary classification",
|
| 168 |
"unit": "single window",
|
| 169 |
"input": "non-contact and non-caption feature blocks",
|
|
@@ -183,6 +189,7 @@
|
|
| 183 |
},
|
| 184 |
{
|
| 185 |
"task": "object_relevance",
|
|
|
|
| 186 |
"family": "multi-label classification",
|
| 187 |
"unit": "single window",
|
| 188 |
"input": "non-caption feature blocks",
|
|
@@ -202,6 +209,7 @@
|
|
| 202 |
},
|
| 203 |
{
|
| 204 |
"task": "caption_grounding",
|
|
|
|
| 205 |
"family": "retrieval",
|
| 206 |
"unit": "caption query",
|
| 207 |
"input": "caption object/interaction query plus candidate sensor windows",
|
|
@@ -221,6 +229,7 @@
|
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"task": "cross_modal_retrieval",
|
|
|
|
| 224 |
"family": "retrieval",
|
| 225 |
"unit": "sensor query",
|
| 226 |
"input": "motion, IMU, and camera query features",
|
|
@@ -240,6 +249,7 @@
|
|
| 240 |
},
|
| 241 |
{
|
| 242 |
"task": "modality_reconstruction",
|
|
|
|
| 243 |
"family": "cross-modal regression",
|
| 244 |
"unit": "single window",
|
| 245 |
"input": "motion, IMU, and camera features",
|
|
@@ -258,6 +268,7 @@
|
|
| 258 |
},
|
| 259 |
{
|
| 260 |
"task": "temporal_order",
|
|
|
|
| 261 |
"family": "pairwise diagnostic",
|
| 262 |
"unit": "adjacent window pair",
|
| 263 |
"input": "two adjacent windows",
|
|
@@ -277,6 +288,7 @@
|
|
| 277 |
},
|
| 278 |
{
|
| 279 |
"task": "misalignment_detection",
|
|
|
|
| 280 |
"family": "pairwise diagnostic",
|
| 281 |
"unit": "paired modality window",
|
| 282 |
"input": "motion side plus visual/depth side",
|
|
@@ -305,7 +317,7 @@
|
|
| 305 |
"current_limitations": [
|
| 306 |
"Cross-episode generalization for Qwen3-Omni has a first verified diagnostic pilot, but strong model quality is not yet shown.",
|
| 307 |
"Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.",
|
| 308 |
-
"The verified
|
| 309 |
"Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations."
|
| 310 |
],
|
| 311 |
"scale_up_gate": {
|
|
@@ -316,7 +328,7 @@
|
|
| 316 |
"manifest, training metadata, progress logs, metrics, predictions, and run report",
|
| 317 |
"held-out evaluation on test episodes rather than train windows"
|
| 318 |
],
|
| 319 |
-
"current_status": "verified diagnostic
|
| 320 |
"evidence": [
|
| 321 |
"docs/data/omni_finetune_verified_result.json",
|
| 322 |
"results/omni_finetune/verified_public/"
|
|
|
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Evaluation Protocol",
|
| 3 |
"status": "pass",
|
| 4 |
"version": "2026-06-01",
|
| 5 |
+
"generated_at_utc": "2026-06-06T23:26:13+00:00",
|
| 6 |
"source_files": [
|
| 7 |
"docs/data/summary_metrics.json",
|
| 8 |
"results/episode_task_suite/summary_report.json",
|
|
|
|
| 69 |
"task_protocols": [
|
| 70 |
{
|
| 71 |
"task": "timeline_action",
|
| 72 |
+
"task_display_name": "Action Recognition",
|
| 73 |
"family": "supervised classification",
|
| 74 |
"unit": "single window",
|
| 75 |
"input": "current 20-frame all-feature window",
|
|
|
|
| 89 |
},
|
| 90 |
{
|
| 91 |
"task": "timeline_subtask",
|
| 92 |
+
"task_display_name": "Procedure Step Recognition",
|
| 93 |
"family": "supervised classification",
|
| 94 |
"unit": "single window",
|
| 95 |
"input": "current 20-frame all-feature window",
|
|
|
|
| 109 |
},
|
| 110 |
{
|
| 111 |
"task": "transition_detection",
|
| 112 |
+
"task_display_name": "Action Boundary Detection",
|
| 113 |
"family": "temporal diagnostic",
|
| 114 |
"unit": "single window",
|
| 115 |
"input": "current 20-frame all-feature window",
|
|
|
|
| 129 |
},
|
| 130 |
{
|
| 131 |
"task": "next_action",
|
| 132 |
+
"task_display_name": "Next-Action Prediction",
|
| 133 |
"family": "short-horizon prediction",
|
| 134 |
"unit": "single window",
|
| 135 |
"input": "current 20-frame all-feature window at time t",
|
|
|
|
| 149 |
},
|
| 150 |
{
|
| 151 |
"task": "hand_trajectory_forecast",
|
| 152 |
+
"task_display_name": "Hand Trajectory Forecasting",
|
| 153 |
"family": "trajectory regression",
|
| 154 |
"unit": "single window",
|
| 155 |
"input": "current all-feature window",
|
|
|
|
| 169 |
},
|
| 170 |
{
|
| 171 |
"task": "contact_prediction",
|
| 172 |
+
"task_display_name": "Contact State Prediction",
|
| 173 |
"family": "binary classification",
|
| 174 |
"unit": "single window",
|
| 175 |
"input": "non-contact and non-caption feature blocks",
|
|
|
|
| 189 |
},
|
| 190 |
{
|
| 191 |
"task": "object_relevance",
|
| 192 |
+
"task_display_name": "Object Relevance Prediction",
|
| 193 |
"family": "multi-label classification",
|
| 194 |
"unit": "single window",
|
| 195 |
"input": "non-caption feature blocks",
|
|
|
|
| 209 |
},
|
| 210 |
{
|
| 211 |
"task": "caption_grounding",
|
| 212 |
+
"task_display_name": "Language Grounding",
|
| 213 |
"family": "retrieval",
|
| 214 |
"unit": "caption query",
|
| 215 |
"input": "caption object/interaction query plus candidate sensor windows",
|
|
|
|
| 229 |
},
|
| 230 |
{
|
| 231 |
"task": "cross_modal_retrieval",
|
| 232 |
+
"task_display_name": "Cross-Modal Retrieval",
|
| 233 |
"family": "retrieval",
|
| 234 |
"unit": "sensor query",
|
| 235 |
"input": "motion, IMU, and camera query features",
|
|
|
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"task": "modality_reconstruction",
|
| 252 |
+
"task_display_name": "Cross-Modal Reconstruction",
|
| 253 |
"family": "cross-modal regression",
|
| 254 |
"unit": "single window",
|
| 255 |
"input": "motion, IMU, and camera features",
|
|
|
|
| 268 |
},
|
| 269 |
{
|
| 270 |
"task": "temporal_order",
|
| 271 |
+
"task_display_name": "Temporal Order Verification",
|
| 272 |
"family": "pairwise diagnostic",
|
| 273 |
"unit": "adjacent window pair",
|
| 274 |
"input": "two adjacent windows",
|
|
|
|
| 288 |
},
|
| 289 |
{
|
| 290 |
"task": "misalignment_detection",
|
| 291 |
+
"task_display_name": "Multimodal Synchronization Detection",
|
| 292 |
"family": "pairwise diagnostic",
|
| 293 |
"unit": "paired modality window",
|
| 294 |
"input": "motion side plus visual/depth side",
|
|
|
|
| 317 |
"current_limitations": [
|
| 318 |
"Cross-episode generalization for Qwen3-Omni has a first verified diagnostic pilot, but strong model quality is not yet shown.",
|
| 319 |
"Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.",
|
| 320 |
+
"The final verified Qwen3-Omni diagnostic result meets the strict-JSON target, but action/subtask held-out quality remains weak and needs error analysis before larger model-quality claims.",
|
| 321 |
"Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations."
|
| 322 |
],
|
| 323 |
"scale_up_gate": {
|
|
|
|
| 328 |
"manifest, training metadata, progress logs, metrics, predictions, and run report",
|
| 329 |
"held-out evaluation on test episodes rather than train windows"
|
| 330 |
],
|
| 331 |
+
"current_status": "verified diagnostic result; strict-JSON quality target met, action/subtask quality still weak",
|
| 332 |
"evidence": [
|
| 333 |
"docs/data/omni_finetune_verified_result.json",
|
| 334 |
"results/omni_finetune/verified_public/"
|
metrics/foundation_model_plan.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Xperience-10M Foundation Model Plan",
|
| 3 |
"status": "planning_artifact",
|
| 4 |
-
"current_boundary": "A
|
| 5 |
"backbone_registry": {
|
| 6 |
"config_dir": "configs/omni_backbones",
|
| 7 |
"validator": "scripts/omni/backbone_registry.py --validate --json",
|
|
@@ -206,7 +206,7 @@
|
|
| 206 |
{
|
| 207 |
"step": 2,
|
| 208 |
"name": "First held-out baseline",
|
| 209 |
-
"action": "Run
|
| 210 |
},
|
| 211 |
{
|
| 212 |
"step": 3,
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Xperience-10M Foundation Model Plan",
|
| 3 |
"status": "planning_artifact",
|
| 4 |
+
"current_boundary": "A final held-out multi-episode Qwen3-Omni diagnostic result is verified in this repo and meets the strict-JSON target, but it is not a strong action/subtask model result. The current foundation-model work should treat it as the baseline train/eval/package loop before Qwen action/subtask improvements, Cosmos-style world modeling, or policy/VLA branches.",
|
| 5 |
"backbone_registry": {
|
| 6 |
"config_dir": "configs/omni_backbones",
|
| 7 |
"validator": "scripts/omni/backbone_registry.py --validate --json",
|
|
|
|
| 206 |
{
|
| 207 |
"step": 2,
|
| 208 |
"name": "First held-out baseline",
|
| 209 |
+
"action": "Run Qwen3-Omni action/subtask error analysis and targeted reruns to improve the verified diagnostic baseline."
|
| 210 |
},
|
| 211 |
{
|
| 212 |
"step": 3,
|
metrics/mirror_parity.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
metrics/omni_finetune_verified_result.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
-
"title": "Verified Qwen3-Omni LoRA
|
| 3 |
-
"status": "
|
| 4 |
-
"status_date": "2026-06-
|
| 5 |
"backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
|
| 6 |
"adapter": "Qwen3-Omni LoRA",
|
| 7 |
"dataset": "Ropedia Xperience-10M selected 128-episode pilot",
|
|
@@ -27,7 +27,7 @@
|
|
| 27 |
},
|
| 28 |
"training": {
|
| 29 |
"num_processes": 8,
|
| 30 |
-
"epochs":
|
| 31 |
"lora_rank": 16,
|
| 32 |
"lora_alpha": 32,
|
| 33 |
"lora_dropout": 0.05,
|
|
@@ -36,64 +36,50 @@
|
|
| 36 |
"history": [
|
| 37 |
{
|
| 38 |
"epoch": 1,
|
| 39 |
-
"train_loss": 0.
|
| 40 |
-
"val_loss": 0.
|
| 41 |
"global_step": 356
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
}
|
| 43 |
],
|
| 44 |
"loss": "answer-token cross entropy over supervised JSON tokens",
|
| 45 |
-
"note": "This
|
| 46 |
},
|
| 47 |
"evaluation": {
|
| 48 |
"split": "test",
|
| 49 |
"num_samples": 448,
|
| 50 |
"held_out_episode_count": 14,
|
| 51 |
-
"json_validity_rate": 0.
|
| 52 |
-
"action_macro_f1": 0.
|
| 53 |
-
"subtask_accuracy": 0.
|
| 54 |
-
"transition_accuracy": 0.
|
| 55 |
-
"next_action_accuracy": 0.
|
| 56 |
-
"contact_accuracy": 0.
|
| 57 |
-
"object_micro_f1": 0.
|
| 58 |
"quality_target": {
|
| 59 |
"json_validity_rate": 0.98,
|
| 60 |
-
"status": "
|
| 61 |
},
|
| 62 |
-
"
|
| 63 |
},
|
| 64 |
-
"interpretation": "This is
|
| 65 |
"public_package": {
|
| 66 |
-
"path": "results/omni_finetune/verified_public/
|
| 67 |
"audit_status": "pass",
|
| 68 |
"contains_raw_xperience10m_data": false,
|
| 69 |
"contains_qwen_base_weights": false,
|
| 70 |
"contains_lora_weights": false,
|
| 71 |
-
"
|
| 72 |
-
"status": "pass",
|
| 73 |
-
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/error_analysis_summary.json",
|
| 74 |
-
"markdown_report": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/ERROR_ANALYSIS.md",
|
| 75 |
-
"groupings": [
|
| 76 |
-
"episode",
|
| 77 |
-
"action_family",
|
| 78 |
-
"train_seen_status",
|
| 79 |
-
"required_modality_state",
|
| 80 |
-
"object_category"
|
| 81 |
-
],
|
| 82 |
-
"key_readouts": {
|
| 83 |
-
"parsed_prediction_rate": 0.8772321428571429,
|
| 84 |
-
"weakest_action_family": "locomotion",
|
| 85 |
-
"weakest_action_family_samples": 23,
|
| 86 |
-
"weakest_action_family_parsed_prediction_rate": 0.2608695652173913,
|
| 87 |
-
"seen_action_exact_rate": 0.04580152671755725,
|
| 88 |
-
"unseen_action_exact_rate": 0.015772870662460567,
|
| 89 |
-
"required_modality_state": "rrd_missing_only_required_modalities_present"
|
| 90 |
-
}
|
| 91 |
-
}
|
| 92 |
},
|
| 93 |
"required_next_steps": [
|
| 94 |
-
"
|
| 95 |
-
"
|
| 96 |
-
"
|
| 97 |
-
"Keep the same verified package contract for
|
| 98 |
]
|
| 99 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"title": "Verified Qwen3-Omni LoRA 128-Episode Held-Out Result",
|
| 3 |
+
"status": "verified_full_128_episode_diagnostic_result",
|
| 4 |
+
"status_date": "2026-06-07",
|
| 5 |
"backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
|
| 6 |
"adapter": "Qwen3-Omni LoRA",
|
| 7 |
"dataset": "Ropedia Xperience-10M selected 128-episode pilot",
|
|
|
|
| 27 |
},
|
| 28 |
"training": {
|
| 29 |
"num_processes": 8,
|
| 30 |
+
"epochs": 2,
|
| 31 |
"lora_rank": 16,
|
| 32 |
"lora_alpha": 32,
|
| 33 |
"lora_dropout": 0.05,
|
|
|
|
| 36 |
"history": [
|
| 37 |
{
|
| 38 |
"epoch": 1,
|
| 39 |
+
"train_loss": 0.41282760031950355,
|
| 40 |
+
"val_loss": 0.03288277983665466,
|
| 41 |
"global_step": 356
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"epoch": 2,
|
| 45 |
+
"train_loss": 0.027745448225544075,
|
| 46 |
+
"val_loss": 0.027823254466056824,
|
| 47 |
+
"global_step": 712
|
| 48 |
}
|
| 49 |
],
|
| 50 |
"loss": "answer-token cross entropy over supervised JSON tokens",
|
| 51 |
+
"note": "This final Qwen3-Omni LoRA pass reused the selected 96/16/16 episode setup, trained on all exported train windows with validation monitoring, and preserved the held-out test split for final evaluation."
|
| 52 |
},
|
| 53 |
"evaluation": {
|
| 54 |
"split": "test",
|
| 55 |
"num_samples": 448,
|
| 56 |
"held_out_episode_count": 14,
|
| 57 |
+
"json_validity_rate": 0.9977678571428571,
|
| 58 |
+
"action_macro_f1": 0.0024331644885523347,
|
| 59 |
+
"subtask_accuracy": 0.002232142857142857,
|
| 60 |
+
"transition_accuracy": 0.9709821428571429,
|
| 61 |
+
"next_action_accuracy": 0.029017857142857144,
|
| 62 |
+
"contact_accuracy": 0.71875,
|
| 63 |
+
"object_micro_f1": 0.30160427807486634,
|
| 64 |
"quality_target": {
|
| 65 |
"json_validity_rate": 0.98,
|
| 66 |
+
"status": "met"
|
| 67 |
},
|
| 68 |
+
"previous_validation_aware_json_validity_rate": 0.875
|
| 69 |
},
|
| 70 |
+
"interpretation": "This is the final verified two-epoch Qwen3-Omni LoRA diagnostic result for the selected 128-episode setup. It meets the 98% JSON-validity target and improves transition, contact, and object metrics over the earlier validation-aware pilot, but action and subtask classification remain weak on held-out episodes, so this is still a baseline-quality diagnostic model rather than a strong Xperience-10M action recognizer.",
|
| 71 |
"public_package": {
|
| 72 |
+
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 73 |
"audit_status": "pass",
|
| 74 |
"contains_raw_xperience10m_data": false,
|
| 75 |
"contains_qwen_base_weights": false,
|
| 76 |
"contains_lora_weights": false,
|
| 77 |
+
"adapter_weights_repo": "cy0307/ropedia-qwen3-omni-lora-128ep"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
},
|
| 79 |
"required_next_steps": [
|
| 80 |
+
"Verify the public Hugging Face LoRA adapter repository hashes after publication.",
|
| 81 |
+
"Publish the final verified package and refreshed comparison tables to all public mirrors, then run live publication verification.",
|
| 82 |
+
"Use the full-eval predictions for error analysis focused on action/subtask confusions and unseen-label behavior.",
|
| 83 |
+
"Keep the same verified package contract for the Cosmos3 world-model branch and any future VLA/policy branches."
|
| 84 |
]
|
| 85 |
}
|
metrics/omni_model_comparison.json
ADDED
|
@@ -0,0 +1,513 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"title": "Ropedia Xperience-10M Current Result Versions",
|
| 3 |
+
"generated_at_utc": "2026-06-06T23:26:13+00:00",
|
| 4 |
+
"status": "pass",
|
| 5 |
+
"version_count": 3,
|
| 6 |
+
"comparison_rule": "Compare only rows with the same scope and target. Single-episode raw-feature metrics, 128-episode metadata baselines, Qwen3 structured JSON metrics, and Cosmos3 future-window metrics answer different questions.",
|
| 7 |
+
"version_reading_notes": [
|
| 8 |
+
"Version 1 is the public-sample 12-task harness with minimal and neural heads.",
|
| 9 |
+
"Version 2 is the selected 128-episode same-split simple/NN baseline alignment.",
|
| 10 |
+
"Version 3 is the verified model-branch layer: the current final Qwen3-Omni LoRA package is the JSON-task diagnostic result, while Cosmos3-Nano is a future-window compatibility result rather than a full Cosmos diffusion fine-tune."
|
| 11 |
+
],
|
| 12 |
+
"versions": [
|
| 13 |
+
{
|
| 14 |
+
"id": "v1_single_episode_public_sample",
|
| 15 |
+
"title": "Single-Episode Public-Sample Task Suite",
|
| 16 |
+
"status": "verified",
|
| 17 |
+
"scope": "one public Xperience-10M sample episode",
|
| 18 |
+
"source": "results/episode_task_suite/summary_report.json",
|
| 19 |
+
"split": "chronological 70/30 within one episode",
|
| 20 |
+
"counts": {
|
| 21 |
+
"episodes": 1,
|
| 22 |
+
"windows": 1161,
|
| 23 |
+
"frames": 5821,
|
| 24 |
+
"feature_dim": 8546,
|
| 25 |
+
"task_count": 12,
|
| 26 |
+
"neural_task_count": 12
|
| 27 |
+
},
|
| 28 |
+
"models": [
|
| 29 |
+
"minimal task heads",
|
| 30 |
+
"compact neural MLP task heads"
|
| 31 |
+
],
|
| 32 |
+
"task_metrics": [
|
| 33 |
+
{
|
| 34 |
+
"task": "caption_grounding",
|
| 35 |
+
"task_display_name": "Language Grounding",
|
| 36 |
+
"simple_status": "pass",
|
| 37 |
+
"simple_primary_metric": "mrr",
|
| 38 |
+
"simple_primary_score": 0.016023479050338015,
|
| 39 |
+
"neural_status": "pass",
|
| 40 |
+
"neural_primary_metric": "mrr",
|
| 41 |
+
"neural_primary_score": 0.01684125567132316
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"task": "contact_prediction",
|
| 45 |
+
"task_display_name": "Contact State Prediction",
|
| 46 |
+
"simple_status": "pass",
|
| 47 |
+
"simple_primary_metric": "macro_f1",
|
| 48 |
+
"simple_primary_score": 1.0,
|
| 49 |
+
"neural_status": "pass",
|
| 50 |
+
"neural_primary_metric": "macro_f1",
|
| 51 |
+
"neural_primary_score": 1.0
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"task": "cross_modal_retrieval",
|
| 55 |
+
"task_display_name": "Cross-Modal Retrieval",
|
| 56 |
+
"simple_status": "pass",
|
| 57 |
+
"simple_primary_metric": "mrr",
|
| 58 |
+
"simple_primary_score": 0.26925966892956127,
|
| 59 |
+
"neural_status": "pass",
|
| 60 |
+
"neural_primary_metric": "mrr",
|
| 61 |
+
"neural_primary_score": 0.1299971898648288
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"task": "hand_trajectory_forecast",
|
| 65 |
+
"task_display_name": "Hand Trajectory Forecasting",
|
| 66 |
+
"simple_status": "pass",
|
| 67 |
+
"simple_primary_metric": "mpjpe",
|
| 68 |
+
"simple_primary_score": 0.8646570444107056,
|
| 69 |
+
"neural_status": "pass",
|
| 70 |
+
"neural_primary_metric": "mpjpe",
|
| 71 |
+
"neural_primary_score": 0.10785018652677536
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"task": "misalignment_detection",
|
| 75 |
+
"task_display_name": "Multimodal Synchronization Detection",
|
| 76 |
+
"simple_status": "pass",
|
| 77 |
+
"simple_primary_metric": "f1",
|
| 78 |
+
"simple_primary_score": 0.5051698670605613,
|
| 79 |
+
"neural_status": "pass",
|
| 80 |
+
"neural_primary_metric": "f1",
|
| 81 |
+
"neural_primary_score": 0.7152682255845944
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"task": "modality_reconstruction",
|
| 85 |
+
"task_display_name": "Cross-Modal Reconstruction",
|
| 86 |
+
"simple_status": "pass",
|
| 87 |
+
"simple_primary_metric": "r2",
|
| 88 |
+
"simple_primary_score": -0.015271898913936655,
|
| 89 |
+
"neural_status": "pass",
|
| 90 |
+
"neural_primary_metric": "r2",
|
| 91 |
+
"neural_primary_score": -0.010171410134180991
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task": "next_action",
|
| 95 |
+
"task_display_name": "Next-Action Prediction",
|
| 96 |
+
"simple_status": "pass",
|
| 97 |
+
"simple_primary_metric": "macro_f1",
|
| 98 |
+
"simple_primary_score": 0.05925925925925927,
|
| 99 |
+
"neural_status": "pass",
|
| 100 |
+
"neural_primary_metric": "macro_f1",
|
| 101 |
+
"neural_primary_score": 0.04186046511627907
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"task": "object_relevance",
|
| 105 |
+
"task_display_name": "Object Relevance Prediction",
|
| 106 |
+
"simple_status": "pass",
|
| 107 |
+
"simple_primary_metric": "micro_f1",
|
| 108 |
+
"simple_primary_score": 0.18034382095361662,
|
| 109 |
+
"neural_status": "pass",
|
| 110 |
+
"neural_primary_metric": "micro_f1",
|
| 111 |
+
"neural_primary_score": 0.1679279279279279
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"task": "temporal_order",
|
| 115 |
+
"task_display_name": "Temporal Order Verification",
|
| 116 |
+
"simple_status": "pass",
|
| 117 |
+
"simple_primary_metric": "accuracy",
|
| 118 |
+
"simple_primary_score": 0.4540229885057471,
|
| 119 |
+
"neural_status": "pass",
|
| 120 |
+
"neural_primary_metric": "accuracy",
|
| 121 |
+
"neural_primary_score": 0.8577586206896551
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"task": "timeline_action",
|
| 125 |
+
"task_display_name": "Action Recognition",
|
| 126 |
+
"simple_status": "pass",
|
| 127 |
+
"simple_primary_metric": "macro_f1",
|
| 128 |
+
"simple_primary_score": 0.05,
|
| 129 |
+
"neural_status": "pass",
|
| 130 |
+
"neural_primary_metric": "macro_f1",
|
| 131 |
+
"neural_primary_score": 0.014814814814814814
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"task": "timeline_subtask",
|
| 135 |
+
"task_display_name": "Procedure Step Recognition",
|
| 136 |
+
"simple_status": "pass",
|
| 137 |
+
"simple_primary_metric": "macro_f1",
|
| 138 |
+
"simple_primary_score": 0.05056355513846935,
|
| 139 |
+
"neural_status": "pass",
|
| 140 |
+
"neural_primary_metric": "macro_f1",
|
| 141 |
+
"neural_primary_score": 0.02810810810810811
|
| 142 |
+
},
|
| 143 |
+
{
|
| 144 |
+
"task": "transition_detection",
|
| 145 |
+
"task_display_name": "Action Boundary Detection",
|
| 146 |
+
"simple_status": "pass",
|
| 147 |
+
"simple_primary_metric": "macro_f1",
|
| 148 |
+
"simple_primary_score": 0.6118237590630229,
|
| 149 |
+
"neural_status": "pass",
|
| 150 |
+
"neural_primary_metric": "macro_f1",
|
| 151 |
+
"neural_primary_score": 0.5862068965517241
|
| 152 |
+
}
|
| 153 |
+
],
|
| 154 |
+
"interpretation": "This layer verifies the 12 task contracts and raw multimodal feature pipeline on the public sample. It is not a cross-episode benchmark."
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"id": "v2_multi_episode_128_aligned_metadata_baselines",
|
| 158 |
+
"title": "128-Episode Aligned Simple/NN Baselines",
|
| 159 |
+
"status": "pass",
|
| 160 |
+
"scope": "selected 128-episode 96/16/16 split",
|
| 161 |
+
"source": "results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
|
| 162 |
+
"split": "train/val/test by selected episode/session",
|
| 163 |
+
"counts": {
|
| 164 |
+
"rows": 3808,
|
| 165 |
+
"split_counts": {
|
| 166 |
+
"train": 2848,
|
| 167 |
+
"val": 512,
|
| 168 |
+
"test": 448
|
| 169 |
+
},
|
| 170 |
+
"episode_counts": {
|
| 171 |
+
"test": 16,
|
| 172 |
+
"train": 96,
|
| 173 |
+
"val": 16
|
| 174 |
+
},
|
| 175 |
+
"task_count": 12,
|
| 176 |
+
"simple_supported_task_count": 8,
|
| 177 |
+
"neural_supported_task_count": 6
|
| 178 |
+
},
|
| 179 |
+
"models": [
|
| 180 |
+
"metadata/text simple baselines",
|
| 181 |
+
"metadata/text neural MLP baselines"
|
| 182 |
+
],
|
| 183 |
+
"task_metrics": [
|
| 184 |
+
{
|
| 185 |
+
"task": "timeline_action",
|
| 186 |
+
"task_display_name": "Action Recognition",
|
| 187 |
+
"simple_status": "pass",
|
| 188 |
+
"simple_primary_metric": "macro_f1",
|
| 189 |
+
"simple_primary_score": 0.00017511601435951318,
|
| 190 |
+
"neural_status": "pass",
|
| 191 |
+
"neural_primary_metric": "macro_f1",
|
| 192 |
+
"neural_primary_score": 0.0
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"task": "timeline_subtask",
|
| 196 |
+
"task_display_name": "Procedure Step Recognition",
|
| 197 |
+
"simple_status": "pass",
|
| 198 |
+
"simple_primary_metric": "macro_f1",
|
| 199 |
+
"simple_primary_score": 0.0,
|
| 200 |
+
"neural_status": "pass",
|
| 201 |
+
"neural_primary_metric": "macro_f1",
|
| 202 |
+
"neural_primary_score": 0.0
|
| 203 |
+
},
|
| 204 |
+
{
|
| 205 |
+
"task": "transition_detection",
|
| 206 |
+
"task_display_name": "Action Boundary Detection",
|
| 207 |
+
"simple_status": "pass",
|
| 208 |
+
"simple_primary_metric": "macro_f1",
|
| 209 |
+
"simple_primary_score": 0.5219803670507895,
|
| 210 |
+
"neural_status": "pass",
|
| 211 |
+
"neural_primary_metric": "macro_f1",
|
| 212 |
+
"neural_primary_score": 0.45822172492907925
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"task": "next_action",
|
| 216 |
+
"task_display_name": "Next-Action Prediction",
|
| 217 |
+
"simple_status": "pass",
|
| 218 |
+
"simple_primary_metric": "macro_f1",
|
| 219 |
+
"simple_primary_score": 0.00019966057701906761,
|
| 220 |
+
"neural_status": "pass",
|
| 221 |
+
"neural_primary_metric": "macro_f1",
|
| 222 |
+
"neural_primary_score": 0.0
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"task": "hand_trajectory_forecast",
|
| 226 |
+
"task_display_name": "Hand Trajectory Forecasting",
|
| 227 |
+
"simple_status": "unsupported_without_raw_128_feature_blocks",
|
| 228 |
+
"simple_primary_metric": "mpjpe",
|
| 229 |
+
"simple_primary_score": null,
|
| 230 |
+
"neural_status": "not_run",
|
| 231 |
+
"neural_primary_metric": "",
|
| 232 |
+
"neural_primary_score": null
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"task": "contact_prediction",
|
| 236 |
+
"task_display_name": "Contact State Prediction",
|
| 237 |
+
"simple_status": "pass",
|
| 238 |
+
"simple_primary_metric": "macro_f1",
|
| 239 |
+
"simple_primary_score": 0.5167950693374422,
|
| 240 |
+
"neural_status": "pass",
|
| 241 |
+
"neural_primary_metric": "macro_f1",
|
| 242 |
+
"neural_primary_score": 0.21951219512195122
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"task": "object_relevance",
|
| 246 |
+
"task_display_name": "Object Relevance Prediction",
|
| 247 |
+
"simple_status": "pass",
|
| 248 |
+
"simple_primary_metric": "micro_f1",
|
| 249 |
+
"simple_primary_score": 0.18221614227086183,
|
| 250 |
+
"neural_status": "pass",
|
| 251 |
+
"neural_primary_metric": "micro_f1",
|
| 252 |
+
"neural_primary_score": 0.1053878034339846
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"task": "caption_grounding",
|
| 256 |
+
"task_display_name": "Language Grounding",
|
| 257 |
+
"simple_status": "pass",
|
| 258 |
+
"simple_primary_metric": "mrr",
|
| 259 |
+
"simple_primary_score": 0.012785504572093487,
|
| 260 |
+
"neural_status": "not_run",
|
| 261 |
+
"neural_primary_metric": "",
|
| 262 |
+
"neural_primary_score": null
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"task": "cross_modal_retrieval",
|
| 266 |
+
"task_display_name": "Cross-Modal Retrieval",
|
| 267 |
+
"simple_status": "unsupported_without_raw_128_feature_blocks",
|
| 268 |
+
"simple_primary_metric": "mrr",
|
| 269 |
+
"simple_primary_score": null,
|
| 270 |
+
"neural_status": "not_run",
|
| 271 |
+
"neural_primary_metric": "",
|
| 272 |
+
"neural_primary_score": null
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"task": "modality_reconstruction",
|
| 276 |
+
"task_display_name": "Cross-Modal Reconstruction",
|
| 277 |
+
"simple_status": "unsupported_without_raw_128_feature_blocks",
|
| 278 |
+
"simple_primary_metric": "r2",
|
| 279 |
+
"simple_primary_score": null,
|
| 280 |
+
"neural_status": "not_run",
|
| 281 |
+
"neural_primary_metric": "",
|
| 282 |
+
"neural_primary_score": null
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"task": "temporal_order",
|
| 286 |
+
"task_display_name": "Temporal Order Verification",
|
| 287 |
+
"simple_status": "pass",
|
| 288 |
+
"simple_primary_metric": "f1",
|
| 289 |
+
"simple_primary_score": 0.32713178294573647,
|
| 290 |
+
"neural_status": "not_run",
|
| 291 |
+
"neural_primary_metric": "",
|
| 292 |
+
"neural_primary_score": null
|
| 293 |
+
},
|
| 294 |
+
{
|
| 295 |
+
"task": "misalignment_detection",
|
| 296 |
+
"task_display_name": "Multimodal Synchronization Detection",
|
| 297 |
+
"simple_status": "unsupported_without_raw_128_feature_blocks",
|
| 298 |
+
"simple_primary_metric": "f1",
|
| 299 |
+
"simple_primary_score": null,
|
| 300 |
+
"neural_status": "not_run",
|
| 301 |
+
"neural_primary_metric": "",
|
| 302 |
+
"neural_primary_score": null
|
| 303 |
+
}
|
| 304 |
+
],
|
| 305 |
+
"interpretation": "This layer aligns the previous simple and neural baseline framing to the same selected 96/16/16 split used by the model branches. It uses public-safe JSONL metadata/text features, so raw-feature-only tasks remain explicitly unsupported until 128-run sensor feature blocks exist."
|
| 306 |
+
},
|
| 307 |
+
{
|
| 308 |
+
"id": "v3_multi_episode_foundation_model_branches",
|
| 309 |
+
"title": "128-Episode Foundation-Model Branches",
|
| 310 |
+
"status": "partial_verified",
|
| 311 |
+
"scope": "selected 128-episode split and compatible derived windows",
|
| 312 |
+
"source": "results/omni_finetune/verified_public/",
|
| 313 |
+
"split": "episode/session held-out split; exact task target depends on backbone contract",
|
| 314 |
+
"counts": {
|
| 315 |
+
"verified_branch_count": 4,
|
| 316 |
+
"qwen3_verified_package_count": 3,
|
| 317 |
+
"cosmos3_verified_package_count": 1
|
| 318 |
+
},
|
| 319 |
+
"models": [
|
| 320 |
+
"Qwen3-Omni LoRA",
|
| 321 |
+
"Cosmos3-Nano future-window compatibility branch"
|
| 322 |
+
],
|
| 323 |
+
"branches": [
|
| 324 |
+
{
|
| 325 |
+
"id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
|
| 326 |
+
"title": "Cosmos3-Nano Future-Window World Model",
|
| 327 |
+
"status": "verified",
|
| 328 |
+
"backbone": "cosmos_world_model",
|
| 329 |
+
"dataset_contract": "xperience10m_future_window_world_model_v0",
|
| 330 |
+
"training_objective": "future_window_and_action_conditioned_world_modeling",
|
| 331 |
+
"source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json",
|
| 332 |
+
"dataset_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat",
|
| 333 |
+
"train_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter",
|
| 334 |
+
"eval_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full",
|
| 335 |
+
"counts": {
|
| 336 |
+
"dataset_samples": 3213,
|
| 337 |
+
"dataset_episodes": 119,
|
| 338 |
+
"split_counts": {
|
| 339 |
+
"train": 2403,
|
| 340 |
+
"test": 378,
|
| 341 |
+
"val": 432
|
| 342 |
+
},
|
| 343 |
+
"train_samples": 2403,
|
| 344 |
+
"val_samples": 432,
|
| 345 |
+
"eval_samples": 378,
|
| 346 |
+
"held_out_episode_count": 14,
|
| 347 |
+
"num_processes": 1
|
| 348 |
+
},
|
| 349 |
+
"primary_metrics": {
|
| 350 |
+
"future_retrieval_mrr": 0.022138720585222767,
|
| 351 |
+
"future_retrieval_recall_at_5": 0.015873015873015872,
|
| 352 |
+
"temporal_consistency": 0.09523809523809523,
|
| 353 |
+
"feature_reconstruction_error": 3479.218317102503,
|
| 354 |
+
"transition_accuracy": 0.9682539682539683,
|
| 355 |
+
"contact_accuracy": 0.7433862433862434,
|
| 356 |
+
"held_out_episode_count": 14
|
| 357 |
+
},
|
| 358 |
+
"history": [
|
| 359 |
+
{
|
| 360 |
+
"epoch": 0,
|
| 361 |
+
"train_loss": null,
|
| 362 |
+
"val_loss": null,
|
| 363 |
+
"note": "closed-form mean-delta adapter; no Cosmos diffusion weights fine-tuned in this compatibility run"
|
| 364 |
+
}
|
| 365 |
+
]
|
| 366 |
+
},
|
| 367 |
+
{
|
| 368 |
+
"id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 369 |
+
"title": "Qwen3-Omni LoRA",
|
| 370 |
+
"status": "verified",
|
| 371 |
+
"backbone": "qwen3_omni_lora",
|
| 372 |
+
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 373 |
+
"training_objective": "structured_episode_understanding_json_qa",
|
| 374 |
+
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json",
|
| 375 |
+
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
|
| 376 |
+
"train_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora",
|
| 377 |
+
"eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
|
| 378 |
+
"counts": {
|
| 379 |
+
"dataset_samples": 3808,
|
| 380 |
+
"dataset_episodes": 119,
|
| 381 |
+
"split_counts": {
|
| 382 |
+
"train": 2848,
|
| 383 |
+
"val": 512,
|
| 384 |
+
"test": 448
|
| 385 |
+
},
|
| 386 |
+
"train_samples": 2848,
|
| 387 |
+
"val_samples": 512,
|
| 388 |
+
"eval_samples": 448,
|
| 389 |
+
"held_out_episode_count": 14,
|
| 390 |
+
"num_processes": 8
|
| 391 |
+
},
|
| 392 |
+
"primary_metrics": {
|
| 393 |
+
"json_validity_rate": 0.875,
|
| 394 |
+
"action_macro_f1": 0.0026621494447581404,
|
| 395 |
+
"subtask_accuracy": 0.006696428571428571,
|
| 396 |
+
"transition_accuracy": 0.8504464285714286,
|
| 397 |
+
"next_action_accuracy": 0.024553571428571428,
|
| 398 |
+
"contact_accuracy": 0.6450892857142857,
|
| 399 |
+
"object_micro_f1": 0.22299431459254582,
|
| 400 |
+
"held_out_episode_count": 14
|
| 401 |
+
},
|
| 402 |
+
"history": [
|
| 403 |
+
{
|
| 404 |
+
"epoch": 1,
|
| 405 |
+
"train_loss": 0.41304643672440994,
|
| 406 |
+
"val_loss": 0.0330660454928875,
|
| 407 |
+
"global_step": 356
|
| 408 |
+
}
|
| 409 |
+
]
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
|
| 413 |
+
"title": "Qwen3-Omni LoRA",
|
| 414 |
+
"status": "verified",
|
| 415 |
+
"backbone": "qwen3_omni_lora",
|
| 416 |
+
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 417 |
+
"training_objective": "structured_episode_understanding_json_qa",
|
| 418 |
+
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json",
|
| 419 |
+
"dataset_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu",
|
| 420 |
+
"train_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6",
|
| 421 |
+
"eval_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
|
| 422 |
+
"counts": {
|
| 423 |
+
"dataset_samples": 3808,
|
| 424 |
+
"dataset_episodes": 119,
|
| 425 |
+
"split_counts": {
|
| 426 |
+
"train": 2848,
|
| 427 |
+
"val": 512,
|
| 428 |
+
"test": 448
|
| 429 |
+
},
|
| 430 |
+
"train_samples": 2848,
|
| 431 |
+
"val_samples": 0,
|
| 432 |
+
"eval_samples": 448,
|
| 433 |
+
"held_out_episode_count": 14,
|
| 434 |
+
"num_processes": 8
|
| 435 |
+
},
|
| 436 |
+
"primary_metrics": {
|
| 437 |
+
"json_validity_rate": 0.8526785714285714,
|
| 438 |
+
"action_macro_f1": 0.00213753459655099,
|
| 439 |
+
"subtask_accuracy": 0.004464285714285714,
|
| 440 |
+
"transition_accuracy": 0.828125,
|
| 441 |
+
"next_action_accuracy": 0.022321428571428572,
|
| 442 |
+
"contact_accuracy": 0.6517857142857143,
|
| 443 |
+
"object_micro_f1": 0.23062730627306272,
|
| 444 |
+
"held_out_episode_count": 14
|
| 445 |
+
},
|
| 446 |
+
"history": [
|
| 447 |
+
{
|
| 448 |
+
"epoch": 1,
|
| 449 |
+
"train_loss": 0.4121775626560694,
|
| 450 |
+
"val_loss": null,
|
| 451 |
+
"global_step": 356
|
| 452 |
+
}
|
| 453 |
+
]
|
| 454 |
+
},
|
| 455 |
+
{
|
| 456 |
+
"id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 457 |
+
"title": "Qwen3-Omni LoRA",
|
| 458 |
+
"status": "verified",
|
| 459 |
+
"backbone": "qwen3_omni_lora",
|
| 460 |
+
"dataset_contract": "xperience10m_episode_json_qa_v1",
|
| 461 |
+
"training_objective": "structured_episode_understanding_json_qa",
|
| 462 |
+
"source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json",
|
| 463 |
+
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
|
| 464 |
+
"train_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora",
|
| 465 |
+
"eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
|
| 466 |
+
"counts": {
|
| 467 |
+
"dataset_samples": 3808,
|
| 468 |
+
"dataset_episodes": 119,
|
| 469 |
+
"split_counts": {
|
| 470 |
+
"train": 2848,
|
| 471 |
+
"val": 512,
|
| 472 |
+
"test": 448
|
| 473 |
+
},
|
| 474 |
+
"train_samples": 2848,
|
| 475 |
+
"val_samples": 512,
|
| 476 |
+
"eval_samples": 448,
|
| 477 |
+
"held_out_episode_count": 14,
|
| 478 |
+
"num_processes": 8
|
| 479 |
+
},
|
| 480 |
+
"primary_metrics": {
|
| 481 |
+
"json_validity_rate": 0.9977678571428571,
|
| 482 |
+
"action_macro_f1": 0.0024331644885523347,
|
| 483 |
+
"subtask_accuracy": 0.002232142857142857,
|
| 484 |
+
"transition_accuracy": 0.9709821428571429,
|
| 485 |
+
"next_action_accuracy": 0.029017857142857144,
|
| 486 |
+
"contact_accuracy": 0.71875,
|
| 487 |
+
"object_micro_f1": 0.30160427807486634,
|
| 488 |
+
"held_out_episode_count": 14
|
| 489 |
+
},
|
| 490 |
+
"history": [
|
| 491 |
+
{
|
| 492 |
+
"epoch": 1,
|
| 493 |
+
"train_loss": 0.41282760031950355,
|
| 494 |
+
"val_loss": 0.03288277983665466,
|
| 495 |
+
"global_step": 356
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"epoch": 2,
|
| 499 |
+
"train_loss": 0.027745448225544075,
|
| 500 |
+
"val_loss": 0.027823254466056824,
|
| 501 |
+
"global_step": 712
|
| 502 |
+
}
|
| 503 |
+
]
|
| 504 |
+
}
|
| 505 |
+
],
|
| 506 |
+
"interpretation": "This layer contains the held-out foundation-model packages. Qwen3-Omni packages evaluate structured JSON task prediction; Cosmos3-Nano currently evaluates a future-window world-model compatibility adapter, not a full diffusion-weight fine-tune."
|
| 507 |
+
}
|
| 508 |
+
],
|
| 509 |
+
"pending": [
|
| 510 |
+
"Use the final Qwen3 full-eval package as the current Qwen result; older Qwen package rows remain historical diagnostics for comparison.",
|
| 511 |
+
"Promote Cosmos3 from compatibility adapter to full Cosmos3 fine-tuning only after a separate environment with matching Diffusers/Cosmos dependencies is prepared."
|
| 512 |
+
]
|
| 513 |
+
}
|
metrics/project_brief.json
CHANGED
|
@@ -17,7 +17,7 @@
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"capability": "Scale-up planning",
|
| 20 |
-
"evidence": "verified 96/16/16 Qwen3-Omni
|
| 21 |
}
|
| 22 |
],
|
| 23 |
"current_artifacts": [
|
|
@@ -43,7 +43,7 @@
|
|
| 43 |
},
|
| 44 |
{
|
| 45 |
"layer": "Scale-up path",
|
| 46 |
-
"status": "A selected 96/16/16 Qwen3-Omni LoRA
|
| 47 |
}
|
| 48 |
],
|
| 49 |
"reading_order": [
|
|
@@ -54,8 +54,8 @@
|
|
| 54 |
"Inspect results/episode_task_suite/feature_manifest.json to understand one model input.",
|
| 55 |
"Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
|
| 56 |
],
|
| 57 |
-
"scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The
|
| 58 |
-
"next_stage": "Improve
|
| 59 |
"entry_points": {
|
| 60 |
"visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
|
| 61 |
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
|
|
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"capability": "Scale-up planning",
|
| 20 |
+
"evidence": "final verified 96/16/16 Qwen3-Omni diagnostic result, same-split 128-episode baseline alignment, Cosmos3-Nano compatibility branch, and policy-model candidates after action-space conversion"
|
| 21 |
}
|
| 22 |
],
|
| 23 |
"current_artifacts": [
|
|
|
|
| 43 |
},
|
| 44 |
{
|
| 45 |
"layer": "Scale-up path",
|
| 46 |
+
"status": "A selected 96/16/16 Qwen3-Omni LoRA final diagnostic result is verified; strict-JSON validity meets target, while weak action/subtask metrics guide the next error-analysis pass"
|
| 47 |
}
|
| 48 |
],
|
| 49 |
"reading_order": [
|
|
|
|
| 54 |
"Inspect results/episode_task_suite/feature_manifest.json to understand one model input.",
|
| 55 |
"Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
|
| 56 |
],
|
| 57 |
+
"scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
|
| 58 |
+
"next_stage": "Improve action/subtask quality through error analysis before larger robustness or alternative-backbone claims.",
|
| 59 |
"entry_points": {
|
| 60 |
"visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
|
| 61 |
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
|
metrics/project_manifest.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"name": "Ropedia Xperience-10M Task Suite",
|
| 3 |
"slug": "ropedia-xperience-10m-task-suite",
|
| 4 |
"version": "0.1.0",
|
| 5 |
-
"status": "
|
| 6 |
-
"last_metadata_update": "2026-06-
|
| 7 |
-
"summary": "Research-development repo built around one public Xperience-10M sample episode plus a verified selected-episode Qwen3-Omni diagnostic
|
| 8 |
"scope_boundary": {
|
| 9 |
"raw_data_redistributed": false,
|
| 10 |
"episode_count_verified": 1,
|
|
@@ -19,7 +19,9 @@
|
|
| 19 |
"test": 16
|
| 20 |
},
|
| 21 |
"qwen3_omni_held_out_test_windows": 448,
|
| 22 |
-
"qwen3_omni_json_validity_rate": 0.
|
|
|
|
|
|
|
| 23 |
},
|
| 24 |
"public_surfaces": {
|
| 25 |
"github_repo": "https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite",
|
|
@@ -28,6 +30,7 @@
|
|
| 28 |
"hf_static_space": "https://cy0307-ropedia-xperience-10m-task-suite.static.hf.space/",
|
| 29 |
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts",
|
| 30 |
"hf_model_repo": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines",
|
|
|
|
| 31 |
"hf_collection": "https://huggingface.co/collections/cy0307/ropedia-xperience-10m-task-suite"
|
| 32 |
},
|
| 33 |
"upstream_sources": {
|
|
|
|
| 2 |
"name": "Ropedia Xperience-10M Task Suite",
|
| 3 |
"slug": "ropedia-xperience-10m-task-suite",
|
| 4 |
"version": "0.1.0",
|
| 5 |
+
"status": "single_episode_suite_plus_final_qwen3_omni_diagnostic_result",
|
| 6 |
+
"last_metadata_update": "2026-06-07",
|
| 7 |
+
"summary": "Research-development repo built around one public Xperience-10M sample episode plus a final verified selected-episode Qwen3-Omni diagnostic result.",
|
| 8 |
"scope_boundary": {
|
| 9 |
"raw_data_redistributed": false,
|
| 10 |
"episode_count_verified": 1,
|
|
|
|
| 19 |
"test": 16
|
| 20 |
},
|
| 21 |
"qwen3_omni_held_out_test_windows": 448,
|
| 22 |
+
"qwen3_omni_json_validity_rate": 0.9977678571428571,
|
| 23 |
+
"qwen3_omni_json_quality_target_met": true,
|
| 24 |
+
"qwen3_omni_lora_adapter_repo": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep"
|
| 25 |
},
|
| 26 |
"public_surfaces": {
|
| 27 |
"github_repo": "https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite",
|
|
|
|
| 30 |
"hf_static_space": "https://cy0307-ropedia-xperience-10m-task-suite.static.hf.space/",
|
| 31 |
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts",
|
| 32 |
"hf_model_repo": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines",
|
| 33 |
+
"hf_qwen3_lora_adapter": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
|
| 34 |
"hf_collection": "https://huggingface.co/collections/cy0307/ropedia-xperience-10m-task-suite"
|
| 35 |
},
|
| 36 |
"upstream_sources": {
|
metrics/project_packet.json
CHANGED
|
@@ -12,7 +12,7 @@
|
|
| 12 |
"raw_xperience10m_data_in_repo": false,
|
| 13 |
"audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
|
| 14 |
"qwen3_omni_32_episode_claim": false,
|
| 15 |
-
"qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni
|
| 16 |
},
|
| 17 |
"reading_path": [
|
| 18 |
{
|
|
@@ -41,7 +41,7 @@
|
|
| 41 |
"docs/data/scope_claims_audit.json",
|
| 42 |
"docs/data/website_integrity.json"
|
| 43 |
],
|
| 44 |
-
"readout": "The project status table and roadmap give the compact current-state summary. Single-episode task engineering, metrics, visualizations, public website integrity, mirror parity,
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"step": 2,
|
|
@@ -116,7 +116,7 @@
|
|
| 116 |
"scripts/omni/discover_xperience10m_sources.py",
|
| 117 |
"docs/data/omni_finetune_verified_result.json"
|
| 118 |
],
|
| 119 |
-
"readout": "The selected-episode held-out Qwen3-Omni diagnostic
|
| 120 |
}
|
| 121 |
],
|
| 122 |
"project_status": "PROJECT_STATUS.md",
|
|
|
|
| 12 |
"raw_xperience10m_data_in_repo": false,
|
| 13 |
"audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
|
| 14 |
"qwen3_omni_32_episode_claim": false,
|
| 15 |
+
"qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni final diagnostic result is verified, meets the strict-JSON target, and still has weak action/subtask metrics that guide the next error-analysis pass."
|
| 16 |
},
|
| 17 |
"reading_path": [
|
| 18 |
{
|
|
|
|
| 41 |
"docs/data/scope_claims_audit.json",
|
| 42 |
"docs/data/website_integrity.json"
|
| 43 |
],
|
| 44 |
+
"readout": "The project status table and roadmap give the compact current-state summary. Single-episode task engineering, metrics, visualizations, public website integrity, mirror parity, same-split 128-episode baselines, the final selected-episode Qwen3-Omni diagnostic result, and the Cosmos3-Nano compatibility package are implemented; stronger action/subtask and full Cosmos model quality remain follow-ups."
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"step": 2,
|
|
|
|
| 116 |
"scripts/omni/discover_xperience10m_sources.py",
|
| 117 |
"docs/data/omni_finetune_verified_result.json"
|
| 118 |
],
|
| 119 |
+
"readout": "The selected-episode held-out Qwen3-Omni final diagnostic result is verified and JSON-format reliability meets the 98% target. The next milestone is action/subtask error analysis and a stronger model-quality run on the same split."
|
| 120 |
}
|
| 121 |
],
|
| 122 |
"project_status": "PROJECT_STATUS.md",
|
metrics/project_status.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Project Status",
|
| 3 |
"version": "2026-06-01",
|
| 4 |
-
"decision": "
|
| 5 |
-
"research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior,
|
| 6 |
"scope_boundary": {
|
| 7 |
"validated_episode_count": 1,
|
| 8 |
"aligned_frames": 5821,
|
|
@@ -25,8 +25,13 @@
|
|
| 25 |
"val": 512,
|
| 26 |
"test": 448
|
| 27 |
},
|
| 28 |
-
"qwen3_omni_json_validity_rate": 0.
|
| 29 |
"qwen3_omni_validation_aware": true,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
"multi_episode_128_aligned_baselines": true,
|
| 31 |
"multi_episode_128_baseline_window_counts": {
|
| 32 |
"train": 2848,
|
|
@@ -102,7 +107,7 @@
|
|
| 102 |
"RESEARCH_ROADMAP.md",
|
| 103 |
"docs/data/research_roadmap.json"
|
| 104 |
],
|
| 105 |
-
"readout": "The roadmap connects public-sample task development to the verified Qwen3-Omni diagnostic
|
| 106 |
},
|
| 107 |
{
|
| 108 |
"area": "Foundation-model plan",
|
|
@@ -111,7 +116,7 @@
|
|
| 111 |
"FOUNDATION_MODEL_PLAN.md",
|
| 112 |
"docs/data/foundation_model_plan.json"
|
| 113 |
],
|
| 114 |
-
"readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is
|
| 115 |
},
|
| 116 |
{
|
| 117 |
"area": "Omni model extension contract",
|
|
@@ -191,18 +196,39 @@
|
|
| 191 |
],
|
| 192 |
"readout": "The earlier simple and neural baseline framing is aligned to the selected 96/16/16 episode split used by the Qwen3-Omni pilot. JSON-supported tasks have metadata/text simple and neural MLP metrics; raw-feature-only tasks are explicitly marked unsupported until 128-run sensor feature blocks are available."
|
| 193 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
{
|
| 195 |
"area": "Qwen3-Omni fine-tuning",
|
| 196 |
-
"status": "
|
| 197 |
"evidence": [
|
| 198 |
"docs/data/omni_finetune_verified_result.json",
|
| 199 |
-
"results/omni_finetune/verified_public/",
|
| 200 |
-
"
|
| 201 |
"scripts/omni/package_verified_omni_result.py",
|
| 202 |
"scripts/omni/audit_verified_omni_package.py",
|
| 203 |
"scripts/omni/analyze_qwen3_omni_errors.py"
|
| 204 |
],
|
| 205 |
-
"readout": "The selected 96/16/16 episode split produced a
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
},
|
| 207 |
{
|
| 208 |
"area": "Raw Xperience-10M redistribution",
|
|
@@ -228,12 +254,15 @@
|
|
| 228 |
"Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
|
| 229 |
"Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
|
| 230 |
"Inspect results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md before comparing simple/NN baselines to the selected 128-episode setup.",
|
|
|
|
| 231 |
"Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
|
| 232 |
],
|
| 233 |
"current_reading_notes": [
|
| 234 |
-
"The
|
| 235 |
-
"Use docs/data/
|
|
|
|
| 236 |
"The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
|
|
|
|
| 237 |
"The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
|
| 238 |
"Audio is one of the synchronized source modalities in the current task representation.",
|
| 239 |
"The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Project Status",
|
| 3 |
"version": "2026-06-01",
|
| 4 |
+
"decision": "public_sample_pipeline_verified_128_aligned_baselines_qwen3_cosmos_comparison",
|
| 5 |
+
"research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, aligns simple/NN baselines to the selected 128-episode split, and compares verified Qwen3-Omni and Cosmos3 branch packages as early cross-episode diagnostics.",
|
| 6 |
"scope_boundary": {
|
| 7 |
"validated_episode_count": 1,
|
| 8 |
"aligned_frames": 5821,
|
|
|
|
| 25 |
"val": 512,
|
| 26 |
"test": 448
|
| 27 |
},
|
| 28 |
+
"qwen3_omni_json_validity_rate": 0.9977678571428571,
|
| 29 |
"qwen3_omni_validation_aware": true,
|
| 30 |
+
"qwen3_omni_json_quality_target_met": true,
|
| 31 |
+
"qwen3_omni_lora_adapter_repo": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
|
| 32 |
+
"cosmos3_nano_future_window_compatibility_verified": true,
|
| 33 |
+
"cosmos3_nano_future_window_test_predictions": 378,
|
| 34 |
+
"omni_model_comparison_available": true,
|
| 35 |
"multi_episode_128_aligned_baselines": true,
|
| 36 |
"multi_episode_128_baseline_window_counts": {
|
| 37 |
"train": 2848,
|
|
|
|
| 107 |
"RESEARCH_ROADMAP.md",
|
| 108 |
"docs/data/research_roadmap.json"
|
| 109 |
],
|
| 110 |
+
"readout": "The roadmap connects public-sample task development to the final verified Qwen3-Omni diagnostic result, same-split baseline alignment, action/subtask error analysis, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"area": "Foundation-model plan",
|
|
|
|
| 116 |
"FOUNDATION_MODEL_PLAN.md",
|
| 117 |
"docs/data/foundation_model_plan.json"
|
| 118 |
],
|
| 119 |
+
"readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is now represented by a verified Cosmos3-Nano future-window compatibility package and remains the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit."
|
| 120 |
},
|
| 121 |
{
|
| 122 |
"area": "Omni model extension contract",
|
|
|
|
| 196 |
],
|
| 197 |
"readout": "The earlier simple and neural baseline framing is aligned to the selected 96/16/16 episode split used by the Qwen3-Omni pilot. JSON-supported tasks have metadata/text simple and neural MLP metrics; raw-feature-only tasks are explicitly marked unsupported until 128-run sensor feature blocks are available."
|
| 198 |
},
|
| 199 |
+
{
|
| 200 |
+
"area": "Current result comparison",
|
| 201 |
+
"status": "verified_generated_summary",
|
| 202 |
+
"evidence": [
|
| 203 |
+
"docs/data/omni_model_comparison.json",
|
| 204 |
+
"results/omni_finetune/OMNI_MODEL_COMPARISON.md",
|
| 205 |
+
"scripts/omni/build_omni_model_comparison.py"
|
| 206 |
+
],
|
| 207 |
+
"readout": "The public comparison separates three layers: the single-episode raw-feature task suite, the selected 128-episode simple/NN metadata baselines, and verified foundation-model branch packages for Qwen3-Omni and Cosmos3-Nano future-window compatibility."
|
| 208 |
+
},
|
| 209 |
{
|
| 210 |
"area": "Qwen3-Omni fine-tuning",
|
| 211 |
+
"status": "final_verified_diagnostic_result_json_target_met",
|
| 212 |
"evidence": [
|
| 213 |
"docs/data/omni_finetune_verified_result.json",
|
| 214 |
+
"results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/",
|
| 215 |
+
"https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep",
|
| 216 |
"scripts/omni/package_verified_omni_result.py",
|
| 217 |
"scripts/omni/audit_verified_omni_package.py",
|
| 218 |
"scripts/omni/analyze_qwen3_omni_errors.py"
|
| 219 |
],
|
| 220 |
+
"readout": "The selected 96/16/16 episode split produced a final public-safe held-out package with 3,808 exported windows, 512 validation windows, 448 test predictions, two training epochs, validation/audit summaries, and a public LoRA adapter repo. JSON validity is 99.78%, meeting the 98% target; transition accuracy is 97.10%, contact accuracy is 71.88%, object micro-F1 is 30.16%, and action/subtask metrics remain weak, so it is still a diagnostic baseline rather than a strong model-quality claim."
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"area": "Cosmos3-Nano future-window branch",
|
| 224 |
+
"status": "verified_compatibility_result",
|
| 225 |
+
"evidence": [
|
| 226 |
+
"configs/omni_backbones/cosmos_world_model.json",
|
| 227 |
+
"scripts/omni/export_cosmos3_future_window_dataset.py",
|
| 228 |
+
"scripts/omni/eval_cosmos3_future_window_retrieval.py",
|
| 229 |
+
"results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json"
|
| 230 |
+
],
|
| 231 |
+
"readout": "The Cosmos3-Nano branch now has a public-safe verified future-window compatibility package with 3,213 future-window samples, 378 held-out test predictions, future retrieval MRR 0.0221, temporal consistency 0.0952, transition accuracy 0.9683, and contact accuracy 0.7434. It is a compatibility adapter result, not a full Cosmos diffusion-weight fine-tune."
|
| 232 |
},
|
| 233 |
{
|
| 234 |
"area": "Raw Xperience-10M redistribution",
|
|
|
|
| 254 |
"Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
|
| 255 |
"Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
|
| 256 |
"Inspect results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md before comparing simple/NN baselines to the selected 128-episode setup.",
|
| 257 |
+
"Inspect docs/data/omni_model_comparison.json before comparing the current three result versions.",
|
| 258 |
"Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
|
| 259 |
],
|
| 260 |
"current_reading_notes": [
|
| 261 |
+
"The final Qwen3-Omni diagnostic result is verified and meets the strict-JSON target, but action/subtask held-out quality is still weak.",
|
| 262 |
+
"Use docs/data/omni_model_comparison.json to compare the single-episode task suite, 128-episode aligned baselines, and verified Qwen3/Cosmos branch packages without mixing incompatible metric targets.",
|
| 263 |
+
"Use docs/data/omni_finetune_verified_result.json and the latest verified_public final Qwen package for current held-out results.",
|
| 264 |
"The 128-episode aligned simple/NN baselines use metadata/text features from the derived Qwen JSONL export; they align the split and task ids but do not replace raw-modality baselines for trajectory, retrieval, reconstruction, or misalignment tasks.",
|
| 265 |
+
"The Cosmos3-Nano future-window branch is verified as a compatibility adapter result; full Cosmos diffusion-weight fine-tuning remains pending.",
|
| 266 |
"The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
|
| 267 |
"Audio is one of the synchronized source modalities in the current task representation.",
|
| 268 |
"The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",
|
metrics/publication_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
@@ -182,8 +182,8 @@
|
|
| 182 |
"github_repo": {
|
| 183 |
"root": "repo",
|
| 184 |
"exists": true,
|
| 185 |
-
"file_count":
|
| 186 |
-
"text_file_count":
|
| 187 |
"largest_file": {
|
| 188 |
"path": "tmp/omni_128_dataset_fetch/dataset.jsonl",
|
| 189 |
"bytes": 582271586
|
|
@@ -193,8 +193,8 @@
|
|
| 193 |
"hf_space_bundle": {
|
| 194 |
"root": "hf_publish/space",
|
| 195 |
"exists": true,
|
| 196 |
-
"file_count":
|
| 197 |
-
"text_file_count":
|
| 198 |
"largest_file": {
|
| 199 |
"path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
|
| 200 |
"bytes": 55702978
|
|
@@ -204,8 +204,8 @@
|
|
| 204 |
"hf_artifact_bundle": {
|
| 205 |
"root": "hf_publish/artifacts",
|
| 206 |
"exists": true,
|
| 207 |
-
"file_count":
|
| 208 |
-
"text_file_count":
|
| 209 |
"largest_file": {
|
| 210 |
"path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
|
| 211 |
"bytes": 55702978
|
|
@@ -215,8 +215,8 @@
|
|
| 215 |
"hf_model_bundle": {
|
| 216 |
"root": "hf_publish/model",
|
| 217 |
"exists": true,
|
| 218 |
-
"file_count":
|
| 219 |
-
"text_file_count":
|
| 220 |
"largest_file": {
|
| 221 |
"path": "pytorch_model.bin",
|
| 222 |
"bytes": 93495480
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-06T23:31:25+00:00",
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
|
|
| 182 |
"github_repo": {
|
| 183 |
"root": "repo",
|
| 184 |
"exists": true,
|
| 185 |
+
"file_count": 586,
|
| 186 |
+
"text_file_count": 497,
|
| 187 |
"largest_file": {
|
| 188 |
"path": "tmp/omni_128_dataset_fetch/dataset.jsonl",
|
| 189 |
"bytes": 582271586
|
|
|
|
| 193 |
"hf_space_bundle": {
|
| 194 |
"root": "hf_publish/space",
|
| 195 |
"exists": true,
|
| 196 |
+
"file_count": 460,
|
| 197 |
+
"text_file_count": 380,
|
| 198 |
"largest_file": {
|
| 199 |
"path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
|
| 200 |
"bytes": 55702978
|
|
|
|
| 204 |
"hf_artifact_bundle": {
|
| 205 |
"root": "hf_publish/artifacts",
|
| 206 |
"exists": true,
|
| 207 |
+
"file_count": 631,
|
| 208 |
+
"text_file_count": 527,
|
| 209 |
"largest_file": {
|
| 210 |
"path": "results/episode_task_suite/modality_reconstruction/predictions.npz",
|
| 211 |
"bytes": 55702978
|
|
|
|
| 215 |
"hf_model_bundle": {
|
| 216 |
"root": "hf_publish/model",
|
| 217 |
"exists": true,
|
| 218 |
+
"file_count": 819,
|
| 219 |
+
"text_file_count": 680,
|
| 220 |
"largest_file": {
|
| 221 |
"path": "pytorch_model.bin",
|
| 222 |
"bytes": 93495480
|
metrics/reproducibility_matrix.json
CHANGED
|
@@ -79,10 +79,10 @@
|
|
| 79 |
},
|
| 80 |
{
|
| 81 |
"id": "qwen3_omni_multi_episode_pilot",
|
| 82 |
-
"status": "
|
| 83 |
"command": "scripts/omni/build_qwen3_omni_dataset.py and scripts/omni/train_qwen3_omni_lora.py on the selected gated episodes",
|
| 84 |
-
"expected": "verified diagnostic LoRA package with 3,808 exported windows, 2,848 train windows, and 448 held-out test predictions",
|
| 85 |
-
"boundary": "the public package records metrics and manifests, but rerunning requires gated Xperience-10M episode access and base-model weights; current JSON validity is
|
| 86 |
}
|
| 87 |
]
|
| 88 |
}
|
|
|
|
| 79 |
},
|
| 80 |
{
|
| 81 |
"id": "qwen3_omni_multi_episode_pilot",
|
| 82 |
+
"status": "verified_final_diagnostic_result_not_publicly_rerunnable_without_gated_data",
|
| 83 |
"command": "scripts/omni/build_qwen3_omni_dataset.py and scripts/omni/train_qwen3_omni_lora.py on the selected gated episodes",
|
| 84 |
+
"expected": "verified final diagnostic LoRA package with 3,808 exported windows, 2,848 train windows, and 448 held-out test predictions",
|
| 85 |
+
"boundary": "the public package records metrics and manifests, but rerunning requires gated Xperience-10M episode access and base-model weights; current JSON validity is 99.78%, meeting the 98% target, while action/subtask metrics remain weak"
|
| 86 |
}
|
| 87 |
]
|
| 88 |
}
|
metrics/research_directions.json
CHANGED
|
@@ -30,6 +30,12 @@
|
|
| 30 |
"contact_prediction",
|
| 31 |
"object_relevance"
|
| 32 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
"counts": {
|
| 34 |
"direct": 2,
|
| 35 |
"proxy": 2,
|
|
@@ -54,6 +60,11 @@
|
|
| 54 |
"modality_reconstruction",
|
| 55 |
"misalignment_detection"
|
| 56 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
"counts": {
|
| 58 |
"direct": 0,
|
| 59 |
"proxy": 2,
|
|
@@ -86,6 +97,19 @@
|
|
| 86 |
"temporal_order",
|
| 87 |
"misalignment_detection"
|
| 88 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
"counts": {
|
| 90 |
"direct": 6,
|
| 91 |
"proxy": 2,
|
|
@@ -116,6 +140,17 @@
|
|
| 116 |
"temporal_order",
|
| 117 |
"misalignment_detection"
|
| 118 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
"counts": {
|
| 120 |
"direct": 0,
|
| 121 |
"proxy": 6,
|
|
@@ -137,6 +172,8 @@
|
|
| 137 |
},
|
| 138 |
"why": "Reads egocentric sensor state as the current human action; also provides a weak human-motion readout.",
|
| 139 |
"current_limit": "Chronological single-episode split creates unseen future action classes.",
|
|
|
|
|
|
|
| 140 |
"metric": {
|
| 141 |
"key": "macro_f1",
|
| 142 |
"name": "macro-F1",
|
|
@@ -158,6 +195,8 @@
|
|
| 158 |
},
|
| 159 |
"why": "Segments egocentric task state and provides a first proxy for symbolic world/task state.",
|
| 160 |
"current_limit": "Single-episode ordering makes future subtasks hard to generalize.",
|
|
|
|
|
|
|
| 161 |
"metric": {
|
| 162 |
"key": "macro_f1",
|
| 163 |
"name": "macro-F1",
|
|
@@ -179,6 +218,8 @@
|
|
| 179 |
},
|
| 180 |
"why": "Localizes egocentric task boundaries and diagnoses temporal state changes.",
|
| 181 |
"current_limit": "Boundary class is sparse, so accuracy alone is misleading.",
|
|
|
|
|
|
|
| 182 |
"metric": {
|
| 183 |
"key": "macro_f1",
|
| 184 |
"name": "macro-F1",
|
|
@@ -200,6 +241,8 @@
|
|
| 200 |
},
|
| 201 |
"why": "Tests action intention/task-flow prediction from egocentric context.",
|
| 202 |
"current_limit": "Unseen future labels dominate the single-episode chronological test.",
|
|
|
|
|
|
|
| 203 |
"metric": {
|
| 204 |
"key": "macro_f1",
|
| 205 |
"name": "macro-F1",
|
|
@@ -221,6 +264,8 @@
|
|
| 221 |
},
|
| 222 |
"why": "Directly predicts human hand motion and supports hand-object interaction modeling.",
|
| 223 |
"current_limit": "Forecasting is window-level and not yet a full sequence or policy model.",
|
|
|
|
|
|
|
| 224 |
"metric": {
|
| 225 |
"key": "mpjpe",
|
| 226 |
"name": "MPJPE",
|
|
@@ -242,6 +287,8 @@
|
|
| 242 |
},
|
| 243 |
"why": "Targets physical interaction state, a core affordance and manipulation signal.",
|
| 244 |
"current_limit": "The public sample is degenerate for this target because one class dominates.",
|
|
|
|
|
|
|
| 245 |
"metric": {
|
| 246 |
"key": "macro_f1",
|
| 247 |
"name": "macro-F1",
|
|
@@ -264,6 +311,8 @@
|
|
| 264 |
},
|
| 265 |
"why": "Connects egocentric activity to manipulated objects and early object-centric state.",
|
| 266 |
"current_limit": "Object labels are language-derived and sparse in one episode.",
|
|
|
|
|
|
|
| 267 |
"metric": {
|
| 268 |
"key": "micro_f1",
|
| 269 |
"name": "micro-F1",
|
|
@@ -285,6 +334,8 @@
|
|
| 285 |
},
|
| 286 |
"why": "Grounds language annotation into egocentric sensor time and task state.",
|
| 287 |
"current_limit": "Bag-of-objects language features are too weak for rich grounding.",
|
|
|
|
|
|
|
| 288 |
"metric": {
|
| 289 |
"key": "mrr",
|
| 290 |
"name": "MRR",
|
|
@@ -307,6 +358,8 @@
|
|
| 307 |
},
|
| 308 |
"why": "Tests whether synchronized modalities identify the same 4D moment, a prerequisite for reconstruction and world modeling.",
|
| 309 |
"current_limit": "Retrieval shows an alignment signal, not geometric reconstruction.",
|
|
|
|
|
|
|
| 310 |
"metric": {
|
| 311 |
"key": "mrr",
|
| 312 |
"name": "MRR",
|
|
@@ -328,6 +381,8 @@
|
|
| 328 |
},
|
| 329 |
"why": "Predicts visual/depth state from non-target sensors as a weak reconstruction/world-model objective.",
|
| 330 |
"current_limit": "Feature-vector reconstruction is not pixel, depth-map, mesh, NeRF, or Gaussian reconstruction.",
|
|
|
|
|
|
|
| 331 |
"metric": {
|
| 332 |
"key": "r2",
|
| 333 |
"name": "R2",
|
|
@@ -349,6 +404,8 @@
|
|
| 349 |
},
|
| 350 |
"why": "Checks whether features encode local time direction and task progression.",
|
| 351 |
"current_limit": "Only local adjacent ordering, not long-horizon causal modeling.",
|
|
|
|
|
|
|
| 352 |
"metric": {
|
| 353 |
"key": "f1",
|
| 354 |
"name": "F1",
|
|
@@ -371,6 +428,8 @@
|
|
| 371 |
},
|
| 372 |
"why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models.",
|
| 373 |
"current_limit": "Synthetic shifts diagnose alignment but do not solve calibration or mapping.",
|
|
|
|
|
|
|
| 374 |
"metric": {
|
| 375 |
"key": "f1",
|
| 376 |
"name": "F1",
|
|
|
|
| 30 |
"contact_prediction",
|
| 31 |
"object_relevance"
|
| 32 |
],
|
| 33 |
+
"task_display_names": [
|
| 34 |
+
"Action Recognition",
|
| 35 |
+
"Hand Trajectory Forecasting",
|
| 36 |
+
"Contact State Prediction",
|
| 37 |
+
"Object Relevance Prediction"
|
| 38 |
+
],
|
| 39 |
"counts": {
|
| 40 |
"direct": 2,
|
| 41 |
"proxy": 2,
|
|
|
|
| 60 |
"modality_reconstruction",
|
| 61 |
"misalignment_detection"
|
| 62 |
],
|
| 63 |
+
"task_display_names": [
|
| 64 |
+
"Cross-Modal Retrieval",
|
| 65 |
+
"Cross-Modal Reconstruction",
|
| 66 |
+
"Multimodal Synchronization Detection"
|
| 67 |
+
],
|
| 68 |
"counts": {
|
| 69 |
"direct": 0,
|
| 70 |
"proxy": 2,
|
|
|
|
| 97 |
"temporal_order",
|
| 98 |
"misalignment_detection"
|
| 99 |
],
|
| 100 |
+
"task_display_names": [
|
| 101 |
+
"Action Recognition",
|
| 102 |
+
"Procedure Step Recognition",
|
| 103 |
+
"Action Boundary Detection",
|
| 104 |
+
"Next-Action Prediction",
|
| 105 |
+
"Hand Trajectory Forecasting",
|
| 106 |
+
"Contact State Prediction",
|
| 107 |
+
"Object Relevance Prediction",
|
| 108 |
+
"Language Grounding",
|
| 109 |
+
"Cross-Modal Retrieval",
|
| 110 |
+
"Temporal Order Verification",
|
| 111 |
+
"Multimodal Synchronization Detection"
|
| 112 |
+
],
|
| 113 |
"counts": {
|
| 114 |
"direct": 6,
|
| 115 |
"proxy": 2,
|
|
|
|
| 140 |
"temporal_order",
|
| 141 |
"misalignment_detection"
|
| 142 |
],
|
| 143 |
+
"task_display_names": [
|
| 144 |
+
"Procedure Step Recognition",
|
| 145 |
+
"Action Boundary Detection",
|
| 146 |
+
"Next-Action Prediction",
|
| 147 |
+
"Object Relevance Prediction",
|
| 148 |
+
"Language Grounding",
|
| 149 |
+
"Cross-Modal Retrieval",
|
| 150 |
+
"Cross-Modal Reconstruction",
|
| 151 |
+
"Temporal Order Verification",
|
| 152 |
+
"Multimodal Synchronization Detection"
|
| 153 |
+
],
|
| 154 |
"counts": {
|
| 155 |
"direct": 0,
|
| 156 |
"proxy": 6,
|
|
|
|
| 172 |
},
|
| 173 |
"why": "Reads egocentric sensor state as the current human action; also provides a weak human-motion readout.",
|
| 174 |
"current_limit": "Chronological single-episode split creates unseen future action classes.",
|
| 175 |
+
"display_name": "Action Recognition",
|
| 176 |
+
"artifact_id": "timeline_action",
|
| 177 |
"metric": {
|
| 178 |
"key": "macro_f1",
|
| 179 |
"name": "macro-F1",
|
|
|
|
| 195 |
},
|
| 196 |
"why": "Segments egocentric task state and provides a first proxy for symbolic world/task state.",
|
| 197 |
"current_limit": "Single-episode ordering makes future subtasks hard to generalize.",
|
| 198 |
+
"display_name": "Procedure Step Recognition",
|
| 199 |
+
"artifact_id": "timeline_subtask",
|
| 200 |
"metric": {
|
| 201 |
"key": "macro_f1",
|
| 202 |
"name": "macro-F1",
|
|
|
|
| 218 |
},
|
| 219 |
"why": "Localizes egocentric task boundaries and diagnoses temporal state changes.",
|
| 220 |
"current_limit": "Boundary class is sparse, so accuracy alone is misleading.",
|
| 221 |
+
"display_name": "Action Boundary Detection",
|
| 222 |
+
"artifact_id": "transition_detection",
|
| 223 |
"metric": {
|
| 224 |
"key": "macro_f1",
|
| 225 |
"name": "macro-F1",
|
|
|
|
| 241 |
},
|
| 242 |
"why": "Tests action intention/task-flow prediction from egocentric context.",
|
| 243 |
"current_limit": "Unseen future labels dominate the single-episode chronological test.",
|
| 244 |
+
"display_name": "Next-Action Prediction",
|
| 245 |
+
"artifact_id": "next_action",
|
| 246 |
"metric": {
|
| 247 |
"key": "macro_f1",
|
| 248 |
"name": "macro-F1",
|
|
|
|
| 264 |
},
|
| 265 |
"why": "Directly predicts human hand motion and supports hand-object interaction modeling.",
|
| 266 |
"current_limit": "Forecasting is window-level and not yet a full sequence or policy model.",
|
| 267 |
+
"display_name": "Hand Trajectory Forecasting",
|
| 268 |
+
"artifact_id": "hand_trajectory_forecast",
|
| 269 |
"metric": {
|
| 270 |
"key": "mpjpe",
|
| 271 |
"name": "MPJPE",
|
|
|
|
| 287 |
},
|
| 288 |
"why": "Targets physical interaction state, a core affordance and manipulation signal.",
|
| 289 |
"current_limit": "The public sample is degenerate for this target because one class dominates.",
|
| 290 |
+
"display_name": "Contact State Prediction",
|
| 291 |
+
"artifact_id": "contact_prediction",
|
| 292 |
"metric": {
|
| 293 |
"key": "macro_f1",
|
| 294 |
"name": "macro-F1",
|
|
|
|
| 311 |
},
|
| 312 |
"why": "Connects egocentric activity to manipulated objects and early object-centric state.",
|
| 313 |
"current_limit": "Object labels are language-derived and sparse in one episode.",
|
| 314 |
+
"display_name": "Object Relevance Prediction",
|
| 315 |
+
"artifact_id": "object_relevance",
|
| 316 |
"metric": {
|
| 317 |
"key": "micro_f1",
|
| 318 |
"name": "micro-F1",
|
|
|
|
| 334 |
},
|
| 335 |
"why": "Grounds language annotation into egocentric sensor time and task state.",
|
| 336 |
"current_limit": "Bag-of-objects language features are too weak for rich grounding.",
|
| 337 |
+
"display_name": "Language Grounding",
|
| 338 |
+
"artifact_id": "caption_grounding",
|
| 339 |
"metric": {
|
| 340 |
"key": "mrr",
|
| 341 |
"name": "MRR",
|
|
|
|
| 358 |
},
|
| 359 |
"why": "Tests whether synchronized modalities identify the same 4D moment, a prerequisite for reconstruction and world modeling.",
|
| 360 |
"current_limit": "Retrieval shows an alignment signal, not geometric reconstruction.",
|
| 361 |
+
"display_name": "Cross-Modal Retrieval",
|
| 362 |
+
"artifact_id": "cross_modal_retrieval",
|
| 363 |
"metric": {
|
| 364 |
"key": "mrr",
|
| 365 |
"name": "MRR",
|
|
|
|
| 381 |
},
|
| 382 |
"why": "Predicts visual/depth state from non-target sensors as a weak reconstruction/world-model objective.",
|
| 383 |
"current_limit": "Feature-vector reconstruction is not pixel, depth-map, mesh, NeRF, or Gaussian reconstruction.",
|
| 384 |
+
"display_name": "Cross-Modal Reconstruction",
|
| 385 |
+
"artifact_id": "modality_reconstruction",
|
| 386 |
"metric": {
|
| 387 |
"key": "r2",
|
| 388 |
"name": "R2",
|
|
|
|
| 404 |
},
|
| 405 |
"why": "Checks whether features encode local time direction and task progression.",
|
| 406 |
"current_limit": "Only local adjacent ordering, not long-horizon causal modeling.",
|
| 407 |
+
"display_name": "Temporal Order Verification",
|
| 408 |
+
"artifact_id": "temporal_order",
|
| 409 |
"metric": {
|
| 410 |
"key": "f1",
|
| 411 |
"name": "F1",
|
|
|
|
| 428 |
},
|
| 429 |
"why": "Detects temporal desynchronization, a key data-quality gate for multimodal reconstruction and world models.",
|
| 430 |
"current_limit": "Synthetic shifts diagnose alignment but do not solve calibration or mapping.",
|
| 431 |
+
"display_name": "Multimodal Synchronization Detection",
|
| 432 |
+
"artifact_id": "misalignment_detection",
|
| 433 |
"metric": {
|
| 434 |
"key": "f1",
|
| 435 |
"name": "F1",
|
metrics/research_roadmap.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Research Roadmap",
|
| 3 |
-
"summary": "Staged path from the public-sample task lab to a verified
|
| 4 |
-
"current_decision_point": "Keep the public-sample task suite as the development harness, use the verified selected-episode Qwen3-Omni
|
| 5 |
"additional_development_directions": {
|
| 6 |
"source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
|
| 7 |
"source_json": "docs/data/additional_development_directions.json",
|
|
@@ -52,7 +52,7 @@
|
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"id": "qwen3_omni_lora_diagnostic_pilot",
|
| 55 |
-
"name": "Qwen3-Omni LoRA
|
| 56 |
"status": "verified_baseline",
|
| 57 |
"entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
|
| 58 |
"deliverables": [
|
|
@@ -63,7 +63,8 @@
|
|
| 63 |
"held-out predictions",
|
| 64 |
"metrics",
|
| 65 |
"confusion matrices",
|
| 66 |
-
"run report"
|
|
|
|
| 67 |
],
|
| 68 |
"completion_evidence": [
|
| 69 |
"docs/data/omni_finetune_verified_result.json",
|
|
@@ -75,7 +76,7 @@
|
|
| 75 |
"predictions.jsonl",
|
| 76 |
"RUN_REPORT.md"
|
| 77 |
],
|
| 78 |
-
"reader_takeaway": "The
|
| 79 |
},
|
| 80 |
{
|
| 81 |
"id": "multi_episode_128_same_split_baselines",
|
|
@@ -97,23 +98,23 @@
|
|
| 97 |
},
|
| 98 |
{
|
| 99 |
"id": "qwen3_omni_structured_output_error_analysis",
|
| 100 |
-
"name": "
|
| 101 |
"status": "active_next_step",
|
| 102 |
-
"entry_condition": "The
|
| 103 |
"deliverables": [
|
| 104 |
"same 96/16/16 episode split",
|
| 105 |
-
"
|
| 106 |
-
"
|
|
|
|
| 107 |
"held-out test evaluation",
|
| 108 |
-
"comparison to the verified
|
| 109 |
],
|
| 110 |
"completion_evidence": [
|
| 111 |
-
"quality-target report",
|
| 112 |
"error-analysis tables",
|
| 113 |
-
"held-out metrics",
|
| 114 |
"verified public-safe package"
|
| 115 |
],
|
| 116 |
-
"reader_takeaway": "The next pass should improve
|
| 117 |
},
|
| 118 |
{
|
| 119 |
"id": "foundation_model_selection_matrix",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Research Roadmap",
|
| 3 |
+
"summary": "Staged path from the public-sample task lab to a final verified Qwen3-Omni diagnostic result, same-split 128-episode baseline alignment, action/subtask error analysis, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
|
| 4 |
+
"current_decision_point": "Keep the public-sample task suite as the development harness, use the final verified selected-episode Qwen3-Omni diagnostic result and the same-split 128-episode simple/NN metadata baselines as the first cross-episode references, improve action/subtask quality through error analysis, then branch into Cosmos 3 world modeling and policy-model experiments after their targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
|
| 5 |
"additional_development_directions": {
|
| 6 |
"source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
|
| 7 |
"source_json": "docs/data/additional_development_directions.json",
|
|
|
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"id": "qwen3_omni_lora_diagnostic_pilot",
|
| 55 |
+
"name": "Qwen3-Omni LoRA Final Diagnostic Result",
|
| 56 |
"status": "verified_baseline",
|
| 57 |
"entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
|
| 58 |
"deliverables": [
|
|
|
|
| 63 |
"held-out predictions",
|
| 64 |
"metrics",
|
| 65 |
"confusion matrices",
|
| 66 |
+
"run report",
|
| 67 |
+
"public LoRA adapter repo"
|
| 68 |
],
|
| 69 |
"completion_evidence": [
|
| 70 |
"docs/data/omni_finetune_verified_result.json",
|
|
|
|
| 76 |
"predictions.jsonl",
|
| 77 |
"RUN_REPORT.md"
|
| 78 |
],
|
| 79 |
+
"reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline."
|
| 80 |
},
|
| 81 |
{
|
| 82 |
"id": "multi_episode_128_same_split_baselines",
|
|
|
|
| 98 |
},
|
| 99 |
{
|
| 100 |
"id": "qwen3_omni_structured_output_error_analysis",
|
| 101 |
+
"name": "Action/Subtask Error-Analysis Pass",
|
| 102 |
"status": "active_next_step",
|
| 103 |
+
"entry_condition": "The final diagnostic package meets strict JSON validity but has weak action/subtask held-out quality.",
|
| 104 |
"deliverables": [
|
| 105 |
"same 96/16/16 episode split",
|
| 106 |
+
"action/subtask confusion analysis",
|
| 107 |
+
"unseen-label analysis",
|
| 108 |
+
"object/action family breakdowns",
|
| 109 |
"held-out test evaluation",
|
| 110 |
+
"comparison to the final verified Qwen baseline"
|
| 111 |
],
|
| 112 |
"completion_evidence": [
|
|
|
|
| 113 |
"error-analysis tables",
|
| 114 |
+
"held-out metrics by failure type",
|
| 115 |
"verified public-safe package"
|
| 116 |
],
|
| 117 |
+
"reader_takeaway": "The next pass should improve action/subtask quality before larger model-quality claims."
|
| 118 |
},
|
| 119 |
{
|
| 120 |
"id": "foundation_model_selection_matrix",
|
metrics/research_roadmap_interactive.json
CHANGED
|
@@ -2035,7 +2035,7 @@
|
|
| 2035 |
"step": 1
|
| 2036 |
},
|
| 2037 |
{
|
| 2038 |
-
"action": "Run
|
| 2039 |
"name": "First held-out baseline",
|
| 2040 |
"step": 2
|
| 2041 |
},
|
|
@@ -2222,7 +2222,7 @@
|
|
| 2222 |
],
|
| 2223 |
"status": "planning_artifact"
|
| 2224 |
},
|
| 2225 |
-
"generated_at_utc": "2026-06-
|
| 2226 |
"omni_plan": {
|
| 2227 |
"adapter": "LoRA rank 16, alpha 32, dropout 0.05",
|
| 2228 |
"backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
|
|
@@ -2303,33 +2303,53 @@
|
|
| 2303 |
"held-out predictions",
|
| 2304 |
"metrics",
|
| 2305 |
"confusion matrices",
|
| 2306 |
-
"run report"
|
|
|
|
| 2307 |
],
|
| 2308 |
"entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
|
| 2309 |
"id": "qwen3_omni_lora_diagnostic_pilot",
|
| 2310 |
-
"name": "Qwen3-Omni LoRA
|
| 2311 |
-
"reader_takeaway": "The
|
| 2312 |
"stage": "future",
|
| 2313 |
"status": "verified_baseline"
|
| 2314 |
},
|
| 2315 |
{
|
| 2316 |
"completion_evidence": [
|
| 2317 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2318 |
"error-analysis tables",
|
| 2319 |
-
"held-out metrics",
|
| 2320 |
"verified public-safe package"
|
| 2321 |
],
|
| 2322 |
"deliverables": [
|
| 2323 |
"same 96/16/16 episode split",
|
| 2324 |
-
"
|
| 2325 |
-
"
|
|
|
|
| 2326 |
"held-out test evaluation",
|
| 2327 |
-
"comparison to the verified
|
| 2328 |
],
|
| 2329 |
-
"entry_condition": "The
|
| 2330 |
"id": "qwen3_omni_structured_output_error_analysis",
|
| 2331 |
-
"name": "
|
| 2332 |
-
"reader_takeaway": "The next pass should improve
|
| 2333 |
"stage": "future",
|
| 2334 |
"status": "active_next_step"
|
| 2335 |
},
|
|
@@ -2428,7 +2448,7 @@
|
|
| 2428 |
"visualization.rrd"
|
| 2429 |
],
|
| 2430 |
"selection_strategy": "stratified_round_robin_by_top_level_session",
|
| 2431 |
-
"status": "
|
| 2432 |
"target_episodes": 128,
|
| 2433 |
"valid_candidates": 12102
|
| 2434 |
},
|
|
|
|
| 2035 |
"step": 1
|
| 2036 |
},
|
| 2037 |
{
|
| 2038 |
+
"action": "Run Qwen3-Omni action/subtask error analysis and targeted reruns to improve the verified diagnostic baseline.",
|
| 2039 |
"name": "First held-out baseline",
|
| 2040 |
"step": 2
|
| 2041 |
},
|
|
|
|
| 2222 |
],
|
| 2223 |
"status": "planning_artifact"
|
| 2224 |
},
|
| 2225 |
+
"generated_at_utc": "2026-06-06T23:26:13+00:00",
|
| 2226 |
"omni_plan": {
|
| 2227 |
"adapter": "LoRA rank 16, alpha 32, dropout 0.05",
|
| 2228 |
"backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
|
|
|
|
| 2303 |
"held-out predictions",
|
| 2304 |
"metrics",
|
| 2305 |
"confusion matrices",
|
| 2306 |
+
"run report",
|
| 2307 |
+
"public LoRA adapter repo"
|
| 2308 |
],
|
| 2309 |
"entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
|
| 2310 |
"id": "qwen3_omni_lora_diagnostic_pilot",
|
| 2311 |
+
"name": "Qwen3-Omni LoRA Final Diagnostic Result",
|
| 2312 |
+
"reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline.",
|
| 2313 |
"stage": "future",
|
| 2314 |
"status": "verified_baseline"
|
| 2315 |
},
|
| 2316 |
{
|
| 2317 |
"completion_evidence": [
|
| 2318 |
+
"results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
|
| 2319 |
+
"results/omni_finetune/multi_episode_128_task_baselines/summary_report.json",
|
| 2320 |
+
"scripts/omni/run_128_task_baselines.py"
|
| 2321 |
+
],
|
| 2322 |
+
"deliverables": [
|
| 2323 |
+
"same 12 task ids",
|
| 2324 |
+
"simple metadata/text baselines",
|
| 2325 |
+
"neural MLP baselines for JSON-supported labels",
|
| 2326 |
+
"explicit unsupported markers for raw-feature-only tasks"
|
| 2327 |
+
],
|
| 2328 |
+
"entry_condition": "Derived Qwen JSONL export for the selected 96/16/16 split.",
|
| 2329 |
+
"id": "multi_episode_128_same_split_baselines",
|
| 2330 |
+
"name": "128-Episode Same-Split Simple/NN Baselines",
|
| 2331 |
+
"reader_takeaway": "The simple and neural baseline framing is now aligned to the selected 128-episode setup; trajectory, retrieval, reconstruction, and misalignment variants still need raw 128 feature blocks for exact feature-level reproduction.",
|
| 2332 |
+
"stage": "future",
|
| 2333 |
+
"status": "verified_companion_result"
|
| 2334 |
+
},
|
| 2335 |
+
{
|
| 2336 |
+
"completion_evidence": [
|
| 2337 |
"error-analysis tables",
|
| 2338 |
+
"held-out metrics by failure type",
|
| 2339 |
"verified public-safe package"
|
| 2340 |
],
|
| 2341 |
"deliverables": [
|
| 2342 |
"same 96/16/16 episode split",
|
| 2343 |
+
"action/subtask confusion analysis",
|
| 2344 |
+
"unseen-label analysis",
|
| 2345 |
+
"object/action family breakdowns",
|
| 2346 |
"held-out test evaluation",
|
| 2347 |
+
"comparison to the final verified Qwen baseline"
|
| 2348 |
],
|
| 2349 |
+
"entry_condition": "The final diagnostic package meets strict JSON validity but has weak action/subtask held-out quality.",
|
| 2350 |
"id": "qwen3_omni_structured_output_error_analysis",
|
| 2351 |
+
"name": "Action/Subtask Error-Analysis Pass",
|
| 2352 |
+
"reader_takeaway": "The next pass should improve action/subtask quality before larger model-quality claims.",
|
| 2353 |
"stage": "future",
|
| 2354 |
"status": "active_next_step"
|
| 2355 |
},
|
|
|
|
| 2448 |
"visualization.rrd"
|
| 2449 |
],
|
| 2450 |
"selection_strategy": "stratified_round_robin_by_top_level_session",
|
| 2451 |
+
"status": "verified_full_128_episode_diagnostic_result",
|
| 2452 |
"target_episodes": 128,
|
| 2453 |
"valid_candidates": 12102
|
| 2454 |
},
|
metrics/research_takeaways.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Research Takeaways",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"source_files": [
|
| 6 |
"docs/data/summary_metrics.json",
|
| 7 |
"results/episode_task_suite/summary_report.json",
|
|
@@ -166,7 +166,7 @@
|
|
| 166 |
{
|
| 167 |
"id": "scale_requires_episodes",
|
| 168 |
"title": "The next scientific unit is held-out episodes, not more adjacent windows",
|
| 169 |
-
"readout": "The selected Qwen3-Omni path now has a verified
|
| 170 |
"evidence": [
|
| 171 |
{
|
| 172 |
"label": "selected_episodes",
|
|
@@ -174,19 +174,19 @@
|
|
| 174 |
},
|
| 175 |
{
|
| 176 |
"label": "held_out_test_windows",
|
| 177 |
-
"value":
|
| 178 |
},
|
| 179 |
{
|
| 180 |
"label": "json_validity_rate",
|
| 181 |
-
"value":
|
| 182 |
},
|
| 183 |
{
|
| 184 |
"label": "action_macro_f1",
|
| 185 |
-
"value":
|
| 186 |
}
|
| 187 |
],
|
| 188 |
"source": "docs/data/omni_finetune_verified_result.json",
|
| 189 |
-
"current_scope": "The selected-episode Qwen3-Omni
|
| 190 |
}
|
| 191 |
]
|
| 192 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Research Takeaways",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-06T23:26:13+00:00",
|
| 5 |
"source_files": [
|
| 6 |
"docs/data/summary_metrics.json",
|
| 7 |
"results/episode_task_suite/summary_report.json",
|
|
|
|
| 166 |
{
|
| 167 |
"id": "scale_requires_episodes",
|
| 168 |
"title": "The next scientific unit is held-out episodes, not more adjacent windows",
|
| 169 |
+
"readout": "The selected Qwen3-Omni path now has a verified two-epoch held-out diagnostic result. It proves the cross-episode train/validation/eval loop and meets the strict-JSON target, while weak action/subtask metrics remain the next modeling problem.",
|
| 170 |
"evidence": [
|
| 171 |
{
|
| 172 |
"label": "selected_episodes",
|
|
|
|
| 174 |
},
|
| 175 |
{
|
| 176 |
"label": "held_out_test_windows",
|
| 177 |
+
"value": null
|
| 178 |
},
|
| 179 |
{
|
| 180 |
"label": "json_validity_rate",
|
| 181 |
+
"value": null
|
| 182 |
},
|
| 183 |
{
|
| 184 |
"label": "action_macro_f1",
|
| 185 |
+
"value": null
|
| 186 |
}
|
| 187 |
],
|
| 188 |
"source": "docs/data/omni_finetune_verified_result.json",
|
| 189 |
+
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
|
| 190 |
}
|
| 191 |
]
|
| 192 |
}
|
metrics/scope_claims_audit.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
| 7 |
"dataset_manifest_num_samples": 3808,
|
| 8 |
"training_metadata_num_train_samples": 2848,
|
| 9 |
"eval_num_samples": 448,
|
| 10 |
-
"eval_json_validity_rate": 0.
|
| 11 |
-
"quality_target_met":
|
| 12 |
-
"historical_identifier_count":
|
| 13 |
"public_32_episode_status_file_count": 1,
|
| 14 |
"failure_count": 0
|
| 15 |
},
|
|
@@ -25,7 +25,7 @@
|
|
| 25 |
{
|
| 26 |
"name": "summary_metrics_preserves_verified_diagnostic_status",
|
| 27 |
"status": "pass",
|
| 28 |
-
"detail": "The selected-episode Qwen3-Omni
|
| 29 |
"evidence": [
|
| 30 |
"docs/data/summary_metrics.json"
|
| 31 |
]
|
|
@@ -35,7 +35,7 @@
|
|
| 35 |
"status": "pass",
|
| 36 |
"detail": "episodes=119, samples=3808, split_counts={'train': 2848, 'val': 512, 'test': 448}",
|
| 37 |
"evidence": [
|
| 38 |
-
"results/omni_finetune/verified_public/
|
| 39 |
]
|
| 40 |
},
|
| 41 |
{
|
|
@@ -43,15 +43,15 @@
|
|
| 43 |
"status": "pass",
|
| 44 |
"detail": "train=2848, val=512, processes=8",
|
| 45 |
"evidence": [
|
| 46 |
-
"results/omni_finetune/verified_public/
|
| 47 |
]
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"name": "verified_package_eval_records_real_held_out_metrics",
|
| 51 |
"status": "pass",
|
| 52 |
-
"detail": "samples=448, split=test, held_out=14, json_validity=0.
|
| 53 |
"evidence": [
|
| 54 |
-
"results/omni_finetune/verified_public/
|
| 55 |
]
|
| 56 |
},
|
| 57 |
{
|
|
@@ -59,7 +59,7 @@
|
|
| 59 |
"status": "pass",
|
| 60 |
"detail": "audit_status=pass, issues=0",
|
| 61 |
"evidence": [
|
| 62 |
-
"results/omni_finetune/verified_public/
|
| 63 |
]
|
| 64 |
},
|
| 65 |
{
|
|
@@ -84,7 +84,7 @@
|
|
| 84 |
{
|
| 85 |
"name": "historical_32ep_identifiers_are_confined_to_readiness_artifacts",
|
| 86 |
"status": "pass",
|
| 87 |
-
"detail": "historical identifiers found in result provenance files=
|
| 88 |
"evidence": [
|
| 89 |
"results/omni_finetune/"
|
| 90 |
]
|
|
@@ -97,16 +97,6 @@
|
|
| 97 |
}
|
| 98 |
],
|
| 99 |
"historical_identifiers": [
|
| 100 |
-
{
|
| 101 |
-
"classification": "historical_identifier_in_readiness_artifact",
|
| 102 |
-
"path": "results/omni_finetune/HF_UPLOAD.md",
|
| 103 |
-
"line": 5,
|
| 104 |
-
"patterns": [
|
| 105 |
-
"qwen3_omni_32ep",
|
| 106 |
-
"xperience10m_qwen3_omni_32ep"
|
| 107 |
-
],
|
| 108 |
-
"example": "- `results/omni_finetune/adapter_lora/` (`xperience10m_qwen3_omni_32ep_lora`)"
|
| 109 |
-
},
|
| 110 |
{
|
| 111 |
"classification": "historical_identifier_in_readiness_artifact",
|
| 112 |
"path": "results/omni_finetune/XPERIENCE10M_128_DATA_PREPARATION_AND_FINETUNE_PLAN.md",
|
|
@@ -421,8 +411,19 @@
|
|
| 421 |
"ropedia-episode-task-suite"
|
| 422 |
],
|
| 423 |
"example": "{\"id\": \"xperience-10m-sample:qa:52\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1040, \"end_frame\": 1059, \"num_frames\": 20}, \"media\": {\"video_path"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
}
|
| 425 |
],
|
| 426 |
-
"historical_identifier_total_count":
|
| 427 |
"failures": []
|
| 428 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-06T23:27:07+00:00",
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
| 7 |
"dataset_manifest_num_samples": 3808,
|
| 8 |
"training_metadata_num_train_samples": 2848,
|
| 9 |
"eval_num_samples": 448,
|
| 10 |
+
"eval_json_validity_rate": 0.9977678571428571,
|
| 11 |
+
"quality_target_met": true,
|
| 12 |
+
"historical_identifier_count": 131,
|
| 13 |
"public_32_episode_status_file_count": 1,
|
| 14 |
"failure_count": 0
|
| 15 |
},
|
|
|
|
| 25 |
{
|
| 26 |
"name": "summary_metrics_preserves_verified_diagnostic_status",
|
| 27 |
"status": "pass",
|
| 28 |
+
"detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims.",
|
| 29 |
"evidence": [
|
| 30 |
"docs/data/summary_metrics.json"
|
| 31 |
]
|
|
|
|
| 35 |
"status": "pass",
|
| 36 |
"detail": "episodes=119, samples=3808, split_counts={'train': 2848, 'val': 512, 'test': 448}",
|
| 37 |
"evidence": [
|
| 38 |
+
"results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/dataset/dataset_manifest.json"
|
| 39 |
]
|
| 40 |
},
|
| 41 |
{
|
|
|
|
| 43 |
"status": "pass",
|
| 44 |
"detail": "train=2848, val=512, processes=8",
|
| 45 |
"evidence": [
|
| 46 |
+
"results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/training/training_metadata.json"
|
| 47 |
]
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"name": "verified_package_eval_records_real_held_out_metrics",
|
| 51 |
"status": "pass",
|
| 52 |
+
"detail": "samples=448, split=test, held_out=14, json_validity=0.9977678571428571",
|
| 53 |
"evidence": [
|
| 54 |
+
"results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/eval/metrics.json"
|
| 55 |
]
|
| 56 |
},
|
| 57 |
{
|
|
|
|
| 59 |
"status": "pass",
|
| 60 |
"detail": "audit_status=pass, issues=0",
|
| 61 |
"evidence": [
|
| 62 |
+
"results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/package_audit.json"
|
| 63 |
]
|
| 64 |
},
|
| 65 |
{
|
|
|
|
| 84 |
{
|
| 85 |
"name": "historical_32ep_identifiers_are_confined_to_readiness_artifacts",
|
| 86 |
"status": "pass",
|
| 87 |
+
"detail": "historical identifiers found in result provenance files=131",
|
| 88 |
"evidence": [
|
| 89 |
"results/omni_finetune/"
|
| 90 |
]
|
|
|
|
| 97 |
}
|
| 98 |
],
|
| 99 |
"historical_identifiers": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
{
|
| 101 |
"classification": "historical_identifier_in_readiness_artifact",
|
| 102 |
"path": "results/omni_finetune/XPERIENCE10M_128_DATA_PREPARATION_AND_FINETUNE_PLAN.md",
|
|
|
|
| 411 |
"ropedia-episode-task-suite"
|
| 412 |
],
|
| 413 |
"example": "{\"id\": \"xperience-10m-sample:qa:52\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1040, \"end_frame\": 1059, \"num_frames\": 20}, \"media\": {\"video_path"
|
| 414 |
+
},
|
| 415 |
+
{
|
| 416 |
+
"classification": "historical_identifier_in_readiness_artifact",
|
| 417 |
+
"path": "results/omni_finetune/dataset.jsonl",
|
| 418 |
+
"line": 28,
|
| 419 |
+
"patterns": [
|
| 420 |
+
"qwen3_omni_32ep",
|
| 421 |
+
"xperience10m_qwen3_omni_32ep",
|
| 422 |
+
"ropedia-episode-task-suite"
|
| 423 |
+
],
|
| 424 |
+
"example": "{\"id\": \"xperience-10m-sample:qa:53\", \"episode_id\": \"xperience-10m-sample\", \"split\": \"train\", \"target\": \"episode_qa\", \"prompt_type\": \"json_episode_understanding\", \"center_window\": {\"start_frame\": 1060, \"end_frame\": 1079, \"num_frames\": 20}, \"media\": {\"video_path"
|
| 425 |
}
|
| 426 |
],
|
| 427 |
+
"historical_identifier_total_count": 131,
|
| 428 |
"failures": []
|
| 429 |
}
|
metrics/single_episode_explorer.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"meta": {
|
| 3 |
-
"generated_at": "2026-06-
|
| 4 |
"window_count": 1161,
|
| 5 |
"feature_dim": 8546,
|
| 6 |
"object_label_rows": 1161,
|
|
@@ -16,12 +16,26 @@
|
|
| 16 |
}
|
| 17 |
},
|
| 18 |
"tasks": {
|
| 19 |
-
"timeline_action": "
|
| 20 |
-
"timeline_subtask": "
|
| 21 |
-
"transition_detection": "Action
|
| 22 |
"next_action": "Next-Action Prediction",
|
| 23 |
"contact_prediction": "Contact State Prediction",
|
| 24 |
-
"object_relevance": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
},
|
| 26 |
"feature_blocks": [
|
| 27 |
{
|
|
@@ -138,7 +152,7 @@
|
|
| 138 |
},
|
| 139 |
{
|
| 140 |
"name": "audio_fisheye_cam0_aac",
|
| 141 |
-
"display": "Audio
|
| 142 |
"modality": "audio",
|
| 143 |
"start": 7343,
|
| 144 |
"end": 7511,
|
|
@@ -163958,6 +163972,8 @@
|
|
| 163958 |
"ablation": {
|
| 163959 |
"best_by_task": {
|
| 163960 |
"caption_grounding": {
|
|
|
|
|
|
|
| 163961 |
"best": {
|
| 163962 |
"modality_group": "language",
|
| 163963 |
"modality_display": "Language",
|
|
@@ -163973,6 +163989,8 @@
|
|
| 163973 |
}
|
| 163974 |
},
|
| 163975 |
"contact_prediction": {
|
|
|
|
|
|
|
| 163976 |
"best": {
|
| 163977 |
"modality_group": "all_features",
|
| 163978 |
"modality_display": "All Features",
|
|
@@ -163988,6 +164006,8 @@
|
|
| 163988 |
}
|
| 163989 |
},
|
| 163990 |
"cross_modal_retrieval": {
|
|
|
|
|
|
|
| 163991 |
"best": {
|
| 163992 |
"modality_group": "all_features",
|
| 163993 |
"modality_display": "All Features",
|
|
@@ -164003,6 +164023,8 @@
|
|
| 164003 |
}
|
| 164004 |
},
|
| 164005 |
"hand_trajectory_forecast": {
|
|
|
|
|
|
|
| 164006 |
"best": {
|
| 164007 |
"modality_group": "inertial",
|
| 164008 |
"modality_display": "Inertial",
|
|
@@ -164018,6 +164040,8 @@
|
|
| 164018 |
}
|
| 164019 |
},
|
| 164020 |
"misalignment_detection": {
|
|
|
|
|
|
|
| 164021 |
"best": {
|
| 164022 |
"modality_group": "audio",
|
| 164023 |
"modality_display": "Audio",
|
|
@@ -164033,6 +164057,8 @@
|
|
| 164033 |
}
|
| 164034 |
},
|
| 164035 |
"modality_reconstruction": {
|
|
|
|
|
|
|
| 164036 |
"best": {
|
| 164037 |
"modality_group": "video",
|
| 164038 |
"modality_display": "Video",
|
|
@@ -164048,6 +164074,8 @@
|
|
| 164048 |
}
|
| 164049 |
},
|
| 164050 |
"next_action": {
|
|
|
|
|
|
|
| 164051 |
"best": {
|
| 164052 |
"modality_group": "language",
|
| 164053 |
"modality_display": "Language",
|
|
@@ -164063,6 +164091,8 @@
|
|
| 164063 |
}
|
| 164064 |
},
|
| 164065 |
"object_relevance": {
|
|
|
|
|
|
|
| 164066 |
"best": {
|
| 164067 |
"modality_group": "language",
|
| 164068 |
"modality_display": "Language",
|
|
@@ -164078,6 +164108,8 @@
|
|
| 164078 |
}
|
| 164079 |
},
|
| 164080 |
"temporal_order": {
|
|
|
|
|
|
|
| 164081 |
"best": {
|
| 164082 |
"modality_group": "pose_slam",
|
| 164083 |
"modality_display": "Pose + SLAM",
|
|
@@ -164093,6 +164125,8 @@
|
|
| 164093 |
}
|
| 164094 |
},
|
| 164095 |
"timeline_action": {
|
|
|
|
|
|
|
| 164096 |
"best": {
|
| 164097 |
"modality_group": "language",
|
| 164098 |
"modality_display": "Language",
|
|
@@ -164108,6 +164142,8 @@
|
|
| 164108 |
}
|
| 164109 |
},
|
| 164110 |
"timeline_subtask": {
|
|
|
|
|
|
|
| 164111 |
"best": {
|
| 164112 |
"modality_group": "language",
|
| 164113 |
"modality_display": "Language",
|
|
@@ -164123,6 +164159,8 @@
|
|
| 164123 |
}
|
| 164124 |
},
|
| 164125 |
"transition_detection": {
|
|
|
|
|
|
|
| 164126 |
"best": {
|
| 164127 |
"modality_group": "language",
|
| 164128 |
"modality_display": "Language",
|
|
@@ -164173,7 +164211,8 @@
|
|
| 164173 |
"top10_accuracy": "",
|
| 164174 |
"median_rank": "",
|
| 164175 |
"mean_rank": "",
|
| 164176 |
-
"num_queries": ""
|
|
|
|
| 164177 |
},
|
| 164178 |
{
|
| 164179 |
"task": "timeline_action",
|
|
@@ -164209,7 +164248,8 @@
|
|
| 164209 |
"top10_accuracy": "",
|
| 164210 |
"median_rank": "",
|
| 164211 |
"mean_rank": "",
|
| 164212 |
-
"num_queries": ""
|
|
|
|
| 164213 |
},
|
| 164214 |
{
|
| 164215 |
"task": "timeline_action",
|
|
@@ -164245,7 +164285,8 @@
|
|
| 164245 |
"top10_accuracy": "",
|
| 164246 |
"median_rank": "",
|
| 164247 |
"mean_rank": "",
|
| 164248 |
-
"num_queries": ""
|
|
|
|
| 164249 |
},
|
| 164250 |
{
|
| 164251 |
"task": "timeline_action",
|
|
@@ -164281,7 +164322,8 @@
|
|
| 164281 |
"top10_accuracy": "",
|
| 164282 |
"median_rank": "",
|
| 164283 |
"mean_rank": "",
|
| 164284 |
-
"num_queries": ""
|
|
|
|
| 164285 |
},
|
| 164286 |
{
|
| 164287 |
"task": "timeline_action",
|
|
@@ -164317,7 +164359,8 @@
|
|
| 164317 |
"top10_accuracy": "",
|
| 164318 |
"median_rank": "",
|
| 164319 |
"mean_rank": "",
|
| 164320 |
-
"num_queries": ""
|
|
|
|
| 164321 |
},
|
| 164322 |
{
|
| 164323 |
"task": "timeline_action",
|
|
@@ -164353,7 +164396,8 @@
|
|
| 164353 |
"top10_accuracy": "",
|
| 164354 |
"median_rank": "",
|
| 164355 |
"mean_rank": "",
|
| 164356 |
-
"num_queries": ""
|
|
|
|
| 164357 |
},
|
| 164358 |
{
|
| 164359 |
"task": "timeline_action",
|
|
@@ -164389,7 +164433,8 @@
|
|
| 164389 |
"top10_accuracy": "",
|
| 164390 |
"median_rank": "",
|
| 164391 |
"mean_rank": "",
|
| 164392 |
-
"num_queries": ""
|
|
|
|
| 164393 |
},
|
| 164394 |
{
|
| 164395 |
"task": "timeline_action",
|
|
@@ -164425,7 +164470,8 @@
|
|
| 164425 |
"top10_accuracy": "",
|
| 164426 |
"median_rank": "",
|
| 164427 |
"mean_rank": "",
|
| 164428 |
-
"num_queries": ""
|
|
|
|
| 164429 |
},
|
| 164430 |
{
|
| 164431 |
"task": "timeline_action",
|
|
@@ -164461,7 +164507,8 @@
|
|
| 164461 |
"top10_accuracy": "",
|
| 164462 |
"median_rank": "",
|
| 164463 |
"mean_rank": "",
|
| 164464 |
-
"num_queries": ""
|
|
|
|
| 164465 |
},
|
| 164466 |
{
|
| 164467 |
"task": "timeline_subtask",
|
|
@@ -164497,7 +164544,8 @@
|
|
| 164497 |
"top10_accuracy": "",
|
| 164498 |
"median_rank": "",
|
| 164499 |
"mean_rank": "",
|
| 164500 |
-
"num_queries": ""
|
|
|
|
| 164501 |
},
|
| 164502 |
{
|
| 164503 |
"task": "timeline_subtask",
|
|
@@ -164533,7 +164581,8 @@
|
|
| 164533 |
"top10_accuracy": "",
|
| 164534 |
"median_rank": "",
|
| 164535 |
"mean_rank": "",
|
| 164536 |
-
"num_queries": ""
|
|
|
|
| 164537 |
},
|
| 164538 |
{
|
| 164539 |
"task": "timeline_subtask",
|
|
@@ -164569,7 +164618,8 @@
|
|
| 164569 |
"top10_accuracy": "",
|
| 164570 |
"median_rank": "",
|
| 164571 |
"mean_rank": "",
|
| 164572 |
-
"num_queries": ""
|
|
|
|
| 164573 |
},
|
| 164574 |
{
|
| 164575 |
"task": "timeline_subtask",
|
|
@@ -164605,7 +164655,8 @@
|
|
| 164605 |
"top10_accuracy": "",
|
| 164606 |
"median_rank": "",
|
| 164607 |
"mean_rank": "",
|
| 164608 |
-
"num_queries": ""
|
|
|
|
| 164609 |
},
|
| 164610 |
{
|
| 164611 |
"task": "timeline_subtask",
|
|
@@ -164641,7 +164692,8 @@
|
|
| 164641 |
"top10_accuracy": "",
|
| 164642 |
"median_rank": "",
|
| 164643 |
"mean_rank": "",
|
| 164644 |
-
"num_queries": ""
|
|
|
|
| 164645 |
},
|
| 164646 |
{
|
| 164647 |
"task": "timeline_subtask",
|
|
@@ -164677,7 +164729,8 @@
|
|
| 164677 |
"top10_accuracy": "",
|
| 164678 |
"median_rank": "",
|
| 164679 |
"mean_rank": "",
|
| 164680 |
-
"num_queries": ""
|
|
|
|
| 164681 |
},
|
| 164682 |
{
|
| 164683 |
"task": "timeline_subtask",
|
|
@@ -164713,7 +164766,8 @@
|
|
| 164713 |
"top10_accuracy": "",
|
| 164714 |
"median_rank": "",
|
| 164715 |
"mean_rank": "",
|
| 164716 |
-
"num_queries": ""
|
|
|
|
| 164717 |
},
|
| 164718 |
{
|
| 164719 |
"task": "timeline_subtask",
|
|
@@ -164749,7 +164803,8 @@
|
|
| 164749 |
"top10_accuracy": "",
|
| 164750 |
"median_rank": "",
|
| 164751 |
"mean_rank": "",
|
| 164752 |
-
"num_queries": ""
|
|
|
|
| 164753 |
},
|
| 164754 |
{
|
| 164755 |
"task": "timeline_subtask",
|
|
@@ -164785,7 +164840,8 @@
|
|
| 164785 |
"top10_accuracy": "",
|
| 164786 |
"median_rank": "",
|
| 164787 |
"mean_rank": "",
|
| 164788 |
-
"num_queries": ""
|
|
|
|
| 164789 |
},
|
| 164790 |
{
|
| 164791 |
"task": "transition_detection",
|
|
@@ -164821,7 +164877,8 @@
|
|
| 164821 |
"top10_accuracy": "",
|
| 164822 |
"median_rank": "",
|
| 164823 |
"mean_rank": "",
|
| 164824 |
-
"num_queries": ""
|
|
|
|
| 164825 |
},
|
| 164826 |
{
|
| 164827 |
"task": "transition_detection",
|
|
@@ -164857,7 +164914,8 @@
|
|
| 164857 |
"top10_accuracy": "",
|
| 164858 |
"median_rank": "",
|
| 164859 |
"mean_rank": "",
|
| 164860 |
-
"num_queries": ""
|
|
|
|
| 164861 |
},
|
| 164862 |
{
|
| 164863 |
"task": "transition_detection",
|
|
@@ -164893,7 +164951,8 @@
|
|
| 164893 |
"top10_accuracy": "",
|
| 164894 |
"median_rank": "",
|
| 164895 |
"mean_rank": "",
|
| 164896 |
-
"num_queries": ""
|
|
|
|
| 164897 |
},
|
| 164898 |
{
|
| 164899 |
"task": "transition_detection",
|
|
@@ -164929,7 +164988,8 @@
|
|
| 164929 |
"top10_accuracy": "",
|
| 164930 |
"median_rank": "",
|
| 164931 |
"mean_rank": "",
|
| 164932 |
-
"num_queries": ""
|
|
|
|
| 164933 |
},
|
| 164934 |
{
|
| 164935 |
"task": "transition_detection",
|
|
@@ -164965,7 +165025,8 @@
|
|
| 164965 |
"top10_accuracy": "",
|
| 164966 |
"median_rank": "",
|
| 164967 |
"mean_rank": "",
|
| 164968 |
-
"num_queries": ""
|
|
|
|
| 164969 |
},
|
| 164970 |
{
|
| 164971 |
"task": "transition_detection",
|
|
@@ -165001,7 +165062,8 @@
|
|
| 165001 |
"top10_accuracy": "",
|
| 165002 |
"median_rank": "",
|
| 165003 |
"mean_rank": "",
|
| 165004 |
-
"num_queries": ""
|
|
|
|
| 165005 |
},
|
| 165006 |
{
|
| 165007 |
"task": "transition_detection",
|
|
@@ -165037,7 +165099,8 @@
|
|
| 165037 |
"top10_accuracy": "",
|
| 165038 |
"median_rank": "",
|
| 165039 |
"mean_rank": "",
|
| 165040 |
-
"num_queries": ""
|
|
|
|
| 165041 |
},
|
| 165042 |
{
|
| 165043 |
"task": "transition_detection",
|
|
@@ -165073,7 +165136,8 @@
|
|
| 165073 |
"top10_accuracy": "",
|
| 165074 |
"median_rank": "",
|
| 165075 |
"mean_rank": "",
|
| 165076 |
-
"num_queries": ""
|
|
|
|
| 165077 |
},
|
| 165078 |
{
|
| 165079 |
"task": "transition_detection",
|
|
@@ -165109,7 +165173,8 @@
|
|
| 165109 |
"top10_accuracy": "",
|
| 165110 |
"median_rank": "",
|
| 165111 |
"mean_rank": "",
|
| 165112 |
-
"num_queries": ""
|
|
|
|
| 165113 |
},
|
| 165114 |
{
|
| 165115 |
"task": "next_action",
|
|
@@ -165145,7 +165210,8 @@
|
|
| 165145 |
"top10_accuracy": "",
|
| 165146 |
"median_rank": "",
|
| 165147 |
"mean_rank": "",
|
| 165148 |
-
"num_queries": ""
|
|
|
|
| 165149 |
},
|
| 165150 |
{
|
| 165151 |
"task": "next_action",
|
|
@@ -165181,7 +165247,8 @@
|
|
| 165181 |
"top10_accuracy": "",
|
| 165182 |
"median_rank": "",
|
| 165183 |
"mean_rank": "",
|
| 165184 |
-
"num_queries": ""
|
|
|
|
| 165185 |
},
|
| 165186 |
{
|
| 165187 |
"task": "next_action",
|
|
@@ -165217,7 +165284,8 @@
|
|
| 165217 |
"top10_accuracy": "",
|
| 165218 |
"median_rank": "",
|
| 165219 |
"mean_rank": "",
|
| 165220 |
-
"num_queries": ""
|
|
|
|
| 165221 |
},
|
| 165222 |
{
|
| 165223 |
"task": "next_action",
|
|
@@ -165253,7 +165321,8 @@
|
|
| 165253 |
"top10_accuracy": "",
|
| 165254 |
"median_rank": "",
|
| 165255 |
"mean_rank": "",
|
| 165256 |
-
"num_queries": ""
|
|
|
|
| 165257 |
},
|
| 165258 |
{
|
| 165259 |
"task": "next_action",
|
|
@@ -165289,7 +165358,8 @@
|
|
| 165289 |
"top10_accuracy": "",
|
| 165290 |
"median_rank": "",
|
| 165291 |
"mean_rank": "",
|
| 165292 |
-
"num_queries": ""
|
|
|
|
| 165293 |
},
|
| 165294 |
{
|
| 165295 |
"task": "next_action",
|
|
@@ -165325,7 +165395,8 @@
|
|
| 165325 |
"top10_accuracy": "",
|
| 165326 |
"median_rank": "",
|
| 165327 |
"mean_rank": "",
|
| 165328 |
-
"num_queries": ""
|
|
|
|
| 165329 |
},
|
| 165330 |
{
|
| 165331 |
"task": "next_action",
|
|
@@ -165361,7 +165432,8 @@
|
|
| 165361 |
"top10_accuracy": "",
|
| 165362 |
"median_rank": "",
|
| 165363 |
"mean_rank": "",
|
| 165364 |
-
"num_queries": ""
|
|
|
|
| 165365 |
},
|
| 165366 |
{
|
| 165367 |
"task": "next_action",
|
|
@@ -165397,7 +165469,8 @@
|
|
| 165397 |
"top10_accuracy": "",
|
| 165398 |
"median_rank": "",
|
| 165399 |
"mean_rank": "",
|
| 165400 |
-
"num_queries": ""
|
|
|
|
| 165401 |
},
|
| 165402 |
{
|
| 165403 |
"task": "next_action",
|
|
@@ -165433,7 +165506,8 @@
|
|
| 165433 |
"top10_accuracy": "",
|
| 165434 |
"median_rank": "",
|
| 165435 |
"mean_rank": "",
|
| 165436 |
-
"num_queries": ""
|
|
|
|
| 165437 |
},
|
| 165438 |
{
|
| 165439 |
"task": "hand_trajectory_forecast",
|
|
@@ -165469,7 +165543,8 @@
|
|
| 165469 |
"top10_accuracy": "",
|
| 165470 |
"median_rank": "",
|
| 165471 |
"mean_rank": "",
|
| 165472 |
-
"num_queries": ""
|
|
|
|
| 165473 |
},
|
| 165474 |
{
|
| 165475 |
"task": "hand_trajectory_forecast",
|
|
@@ -165505,7 +165580,8 @@
|
|
| 165505 |
"top10_accuracy": "",
|
| 165506 |
"median_rank": "",
|
| 165507 |
"mean_rank": "",
|
| 165508 |
-
"num_queries": ""
|
|
|
|
| 165509 |
},
|
| 165510 |
{
|
| 165511 |
"task": "hand_trajectory_forecast",
|
|
@@ -165541,7 +165617,8 @@
|
|
| 165541 |
"top10_accuracy": "",
|
| 165542 |
"median_rank": "",
|
| 165543 |
"mean_rank": "",
|
| 165544 |
-
"num_queries": ""
|
|
|
|
| 165545 |
},
|
| 165546 |
{
|
| 165547 |
"task": "hand_trajectory_forecast",
|
|
@@ -165577,7 +165654,8 @@
|
|
| 165577 |
"top10_accuracy": "",
|
| 165578 |
"median_rank": "",
|
| 165579 |
"mean_rank": "",
|
| 165580 |
-
"num_queries": ""
|
|
|
|
| 165581 |
},
|
| 165582 |
{
|
| 165583 |
"task": "hand_trajectory_forecast",
|
|
@@ -165613,7 +165691,8 @@
|
|
| 165613 |
"top10_accuracy": "",
|
| 165614 |
"median_rank": "",
|
| 165615 |
"mean_rank": "",
|
| 165616 |
-
"num_queries": ""
|
|
|
|
| 165617 |
},
|
| 165618 |
{
|
| 165619 |
"task": "hand_trajectory_forecast",
|
|
@@ -165649,7 +165728,8 @@
|
|
| 165649 |
"top10_accuracy": "",
|
| 165650 |
"median_rank": "",
|
| 165651 |
"mean_rank": "",
|
| 165652 |
-
"num_queries": ""
|
|
|
|
| 165653 |
},
|
| 165654 |
{
|
| 165655 |
"task": "hand_trajectory_forecast",
|
|
@@ -165685,7 +165765,8 @@
|
|
| 165685 |
"top10_accuracy": "",
|
| 165686 |
"median_rank": "",
|
| 165687 |
"mean_rank": "",
|
| 165688 |
-
"num_queries": ""
|
|
|
|
| 165689 |
},
|
| 165690 |
{
|
| 165691 |
"task": "hand_trajectory_forecast",
|
|
@@ -165721,7 +165802,8 @@
|
|
| 165721 |
"top10_accuracy": "",
|
| 165722 |
"median_rank": "",
|
| 165723 |
"mean_rank": "",
|
| 165724 |
-
"num_queries": ""
|
|
|
|
| 165725 |
},
|
| 165726 |
{
|
| 165727 |
"task": "hand_trajectory_forecast",
|
|
@@ -165757,7 +165839,8 @@
|
|
| 165757 |
"top10_accuracy": "",
|
| 165758 |
"median_rank": "",
|
| 165759 |
"mean_rank": "",
|
| 165760 |
-
"num_queries": ""
|
|
|
|
| 165761 |
},
|
| 165762 |
{
|
| 165763 |
"task": "contact_prediction",
|
|
@@ -165793,7 +165876,8 @@
|
|
| 165793 |
"top10_accuracy": "",
|
| 165794 |
"median_rank": "",
|
| 165795 |
"mean_rank": "",
|
| 165796 |
-
"num_queries": ""
|
|
|
|
| 165797 |
},
|
| 165798 |
{
|
| 165799 |
"task": "contact_prediction",
|
|
@@ -165829,7 +165913,8 @@
|
|
| 165829 |
"top10_accuracy": "",
|
| 165830 |
"median_rank": "",
|
| 165831 |
"mean_rank": "",
|
| 165832 |
-
"num_queries": ""
|
|
|
|
| 165833 |
},
|
| 165834 |
{
|
| 165835 |
"task": "contact_prediction",
|
|
@@ -165865,7 +165950,8 @@
|
|
| 165865 |
"top10_accuracy": "",
|
| 165866 |
"median_rank": "",
|
| 165867 |
"mean_rank": "",
|
| 165868 |
-
"num_queries": ""
|
|
|
|
| 165869 |
},
|
| 165870 |
{
|
| 165871 |
"task": "contact_prediction",
|
|
@@ -165901,7 +165987,8 @@
|
|
| 165901 |
"top10_accuracy": "",
|
| 165902 |
"median_rank": "",
|
| 165903 |
"mean_rank": "",
|
| 165904 |
-
"num_queries": ""
|
|
|
|
| 165905 |
},
|
| 165906 |
{
|
| 165907 |
"task": "contact_prediction",
|
|
@@ -165937,7 +166024,8 @@
|
|
| 165937 |
"top10_accuracy": "",
|
| 165938 |
"median_rank": "",
|
| 165939 |
"mean_rank": "",
|
| 165940 |
-
"num_queries": ""
|
|
|
|
| 165941 |
},
|
| 165942 |
{
|
| 165943 |
"task": "contact_prediction",
|
|
@@ -165973,7 +166061,8 @@
|
|
| 165973 |
"top10_accuracy": "",
|
| 165974 |
"median_rank": "",
|
| 165975 |
"mean_rank": "",
|
| 165976 |
-
"num_queries": ""
|
|
|
|
| 165977 |
},
|
| 165978 |
{
|
| 165979 |
"task": "contact_prediction",
|
|
@@ -166009,7 +166098,8 @@
|
|
| 166009 |
"top10_accuracy": "",
|
| 166010 |
"median_rank": "",
|
| 166011 |
"mean_rank": "",
|
| 166012 |
-
"num_queries": ""
|
|
|
|
| 166013 |
},
|
| 166014 |
{
|
| 166015 |
"task": "contact_prediction",
|
|
@@ -166045,7 +166135,8 @@
|
|
| 166045 |
"top10_accuracy": "",
|
| 166046 |
"median_rank": "",
|
| 166047 |
"mean_rank": "",
|
| 166048 |
-
"num_queries": ""
|
|
|
|
| 166049 |
},
|
| 166050 |
{
|
| 166051 |
"task": "contact_prediction",
|
|
@@ -166081,7 +166172,8 @@
|
|
| 166081 |
"top10_accuracy": "",
|
| 166082 |
"median_rank": "",
|
| 166083 |
"mean_rank": "",
|
| 166084 |
-
"num_queries": ""
|
|
|
|
| 166085 |
},
|
| 166086 |
{
|
| 166087 |
"task": "object_relevance",
|
|
@@ -166117,7 +166209,8 @@
|
|
| 166117 |
"top10_accuracy": "",
|
| 166118 |
"median_rank": "",
|
| 166119 |
"mean_rank": "",
|
| 166120 |
-
"num_queries": ""
|
|
|
|
| 166121 |
},
|
| 166122 |
{
|
| 166123 |
"task": "object_relevance",
|
|
@@ -166153,7 +166246,8 @@
|
|
| 166153 |
"top10_accuracy": "",
|
| 166154 |
"median_rank": "",
|
| 166155 |
"mean_rank": "",
|
| 166156 |
-
"num_queries": ""
|
|
|
|
| 166157 |
},
|
| 166158 |
{
|
| 166159 |
"task": "object_relevance",
|
|
@@ -166189,7 +166283,8 @@
|
|
| 166189 |
"top10_accuracy": "",
|
| 166190 |
"median_rank": "",
|
| 166191 |
"mean_rank": "",
|
| 166192 |
-
"num_queries": ""
|
|
|
|
| 166193 |
},
|
| 166194 |
{
|
| 166195 |
"task": "object_relevance",
|
|
@@ -166225,7 +166320,8 @@
|
|
| 166225 |
"top10_accuracy": "",
|
| 166226 |
"median_rank": "",
|
| 166227 |
"mean_rank": "",
|
| 166228 |
-
"num_queries": ""
|
|
|
|
| 166229 |
},
|
| 166230 |
{
|
| 166231 |
"task": "object_relevance",
|
|
@@ -166261,7 +166357,8 @@
|
|
| 166261 |
"top10_accuracy": "",
|
| 166262 |
"median_rank": "",
|
| 166263 |
"mean_rank": "",
|
| 166264 |
-
"num_queries": ""
|
|
|
|
| 166265 |
},
|
| 166266 |
{
|
| 166267 |
"task": "object_relevance",
|
|
@@ -166297,7 +166394,8 @@
|
|
| 166297 |
"top10_accuracy": "",
|
| 166298 |
"median_rank": "",
|
| 166299 |
"mean_rank": "",
|
| 166300 |
-
"num_queries": ""
|
|
|
|
| 166301 |
},
|
| 166302 |
{
|
| 166303 |
"task": "object_relevance",
|
|
@@ -166333,7 +166431,8 @@
|
|
| 166333 |
"top10_accuracy": "",
|
| 166334 |
"median_rank": "",
|
| 166335 |
"mean_rank": "",
|
| 166336 |
-
"num_queries": ""
|
|
|
|
| 166337 |
},
|
| 166338 |
{
|
| 166339 |
"task": "object_relevance",
|
|
@@ -166369,7 +166468,8 @@
|
|
| 166369 |
"top10_accuracy": "",
|
| 166370 |
"median_rank": "",
|
| 166371 |
"mean_rank": "",
|
| 166372 |
-
"num_queries": ""
|
|
|
|
| 166373 |
},
|
| 166374 |
{
|
| 166375 |
"task": "object_relevance",
|
|
@@ -166405,7 +166505,8 @@
|
|
| 166405 |
"top10_accuracy": "",
|
| 166406 |
"median_rank": "",
|
| 166407 |
"mean_rank": "",
|
| 166408 |
-
"num_queries": ""
|
|
|
|
| 166409 |
},
|
| 166410 |
{
|
| 166411 |
"task": "caption_grounding",
|
|
@@ -166441,7 +166542,8 @@
|
|
| 166441 |
"top10_accuracy": "0.4454022988505747",
|
| 166442 |
"median_rank": "13.0",
|
| 166443 |
"mean_rank": "23.19827651977539",
|
| 166444 |
-
"num_queries": "348"
|
|
|
|
| 166445 |
},
|
| 166446 |
{
|
| 166447 |
"task": "caption_grounding",
|
|
@@ -166477,7 +166579,8 @@
|
|
| 166477 |
"top10_accuracy": "0.034482758620689655",
|
| 166478 |
"median_rank": "162.0",
|
| 166479 |
"mean_rank": "161.4770050048828",
|
| 166480 |
-
"num_queries": "348"
|
|
|
|
| 166481 |
},
|
| 166482 |
{
|
| 166483 |
"task": "caption_grounding",
|
|
@@ -166513,7 +166616,8 @@
|
|
| 166513 |
"top10_accuracy": "0.03735632183908046",
|
| 166514 |
"median_rank": "114.0",
|
| 166515 |
"mean_rank": "137.90805053710938",
|
| 166516 |
-
"num_queries": "348"
|
|
|
|
| 166517 |
},
|
| 166518 |
{
|
| 166519 |
"task": "caption_grounding",
|
|
@@ -166549,7 +166653,8 @@
|
|
| 166549 |
"top10_accuracy": "0.04597701149425287",
|
| 166550 |
"median_rank": "143.5",
|
| 166551 |
"mean_rank": "155.4712677001953",
|
| 166552 |
-
"num_queries": "348"
|
|
|
|
| 166553 |
},
|
| 166554 |
{
|
| 166555 |
"task": "caption_grounding",
|
|
@@ -166585,7 +166690,8 @@
|
|
| 166585 |
"top10_accuracy": "0.04885057471264368",
|
| 166586 |
"median_rank": "110.5",
|
| 166587 |
"mean_rank": "130.32470703125",
|
| 166588 |
-
"num_queries": "348"
|
|
|
|
| 166589 |
},
|
| 166590 |
{
|
| 166591 |
"task": "caption_grounding",
|
|
@@ -166621,7 +166727,8 @@
|
|
| 166621 |
"top10_accuracy": "0.04597701149425287",
|
| 166622 |
"median_rank": "123.0",
|
| 166623 |
"mean_rank": "138.61207580566406",
|
| 166624 |
-
"num_queries": "348"
|
|
|
|
| 166625 |
},
|
| 166626 |
{
|
| 166627 |
"task": "caption_grounding",
|
|
@@ -166657,7 +166764,8 @@
|
|
| 166657 |
"top10_accuracy": "0.07758620689655173",
|
| 166658 |
"median_rank": "141.0",
|
| 166659 |
"mean_rank": "152.14942932128906",
|
| 166660 |
-
"num_queries": "348"
|
|
|
|
| 166661 |
},
|
| 166662 |
{
|
| 166663 |
"task": "caption_grounding",
|
|
@@ -166693,7 +166801,8 @@
|
|
| 166693 |
"top10_accuracy": "0.47126436781609193",
|
| 166694 |
"median_rank": "12.0",
|
| 166695 |
"mean_rank": "15.106322288513184",
|
| 166696 |
-
"num_queries": "348"
|
|
|
|
| 166697 |
},
|
| 166698 |
{
|
| 166699 |
"task": "caption_grounding",
|
|
@@ -166729,7 +166838,8 @@
|
|
| 166729 |
"top10_accuracy": "0.06896551724137931",
|
| 166730 |
"median_rank": "132.0",
|
| 166731 |
"mean_rank": "137.30746459960938",
|
| 166732 |
-
"num_queries": "348"
|
|
|
|
| 166733 |
},
|
| 166734 |
{
|
| 166735 |
"task": "cross_modal_retrieval",
|
|
@@ -166765,7 +166875,8 @@
|
|
| 166765 |
"top10_accuracy": "0.9798850574712644",
|
| 166766 |
"median_rank": "1.0",
|
| 166767 |
"mean_rank": "2.0862069129943848",
|
| 166768 |
-
"num_queries": "348"
|
|
|
|
| 166769 |
},
|
| 166770 |
{
|
| 166771 |
"task": "cross_modal_retrieval",
|
|
@@ -166801,7 +166912,8 @@
|
|
| 166801 |
"top10_accuracy": "0.9798850574712644",
|
| 166802 |
"median_rank": "1.0",
|
| 166803 |
"mean_rank": "3.844827651977539",
|
| 166804 |
-
"num_queries": "348"
|
|
|
|
| 166805 |
},
|
| 166806 |
{
|
| 166807 |
"task": "cross_modal_retrieval",
|
|
@@ -166837,7 +166949,8 @@
|
|
| 166837 |
"top10_accuracy": "0.8620689655172413",
|
| 166838 |
"median_rank": "1.0",
|
| 166839 |
"mean_rank": "5.729885101318359",
|
| 166840 |
-
"num_queries": "348"
|
|
|
|
| 166841 |
},
|
| 166842 |
{
|
| 166843 |
"task": "cross_modal_retrieval",
|
|
@@ -166873,7 +166986,8 @@
|
|
| 166873 |
"top10_accuracy": "0.6551724137931034",
|
| 166874 |
"median_rank": "4.0",
|
| 166875 |
"mean_rank": "15.623562812805176",
|
| 166876 |
-
"num_queries": "348"
|
|
|
|
| 166877 |
},
|
| 166878 |
{
|
| 166879 |
"task": "cross_modal_retrieval",
|
|
@@ -166909,7 +167023,8 @@
|
|
| 166909 |
"top10_accuracy": "0.3994252873563218",
|
| 166910 |
"median_rank": "21.5",
|
| 166911 |
"mean_rank": "49.181034088134766",
|
| 166912 |
-
"num_queries": "348"
|
|
|
|
| 166913 |
},
|
| 166914 |
{
|
| 166915 |
"task": "cross_modal_retrieval",
|
|
@@ -166945,7 +167060,8 @@
|
|
| 166945 |
"top10_accuracy": "0.5229885057471264",
|
| 166946 |
"median_rank": "10.0",
|
| 166947 |
"mean_rank": "20.577587127685547",
|
| 166948 |
-
"num_queries": "348"
|
|
|
|
| 166949 |
},
|
| 166950 |
{
|
| 166951 |
"task": "cross_modal_retrieval",
|
|
@@ -166981,7 +167097,8 @@
|
|
| 166981 |
"top10_accuracy": "0.031609195402298854",
|
| 166982 |
"median_rank": "152.5",
|
| 166983 |
"mean_rank": "161.44540405273438",
|
| 166984 |
-
"num_queries": "348"
|
|
|
|
| 166985 |
},
|
| 166986 |
{
|
| 166987 |
"task": "cross_modal_retrieval",
|
|
@@ -167017,7 +167134,8 @@
|
|
| 167017 |
"top10_accuracy": "0.05747126436781609",
|
| 167018 |
"median_rank": "138.0",
|
| 167019 |
"mean_rank": "146.83045959472656",
|
| 167020 |
-
"num_queries": "348"
|
|
|
|
| 167021 |
},
|
| 167022 |
{
|
| 167023 |
"task": "cross_modal_retrieval",
|
|
@@ -167053,7 +167171,8 @@
|
|
| 167053 |
"top10_accuracy": "0.9770114942528736",
|
| 167054 |
"median_rank": "1.0",
|
| 167055 |
"mean_rank": "2.181034564971924",
|
| 167056 |
-
"num_queries": "348"
|
|
|
|
| 167057 |
},
|
| 167058 |
{
|
| 167059 |
"task": "modality_reconstruction",
|
|
@@ -167089,7 +167208,8 @@
|
|
| 167089 |
"top10_accuracy": "",
|
| 167090 |
"median_rank": "",
|
| 167091 |
"mean_rank": "",
|
| 167092 |
-
"num_queries": ""
|
|
|
|
| 167093 |
},
|
| 167094 |
{
|
| 167095 |
"task": "modality_reconstruction",
|
|
@@ -167125,7 +167245,8 @@
|
|
| 167125 |
"top10_accuracy": "",
|
| 167126 |
"median_rank": "",
|
| 167127 |
"mean_rank": "",
|
| 167128 |
-
"num_queries": ""
|
|
|
|
| 167129 |
},
|
| 167130 |
{
|
| 167131 |
"task": "modality_reconstruction",
|
|
@@ -167161,7 +167282,8 @@
|
|
| 167161 |
"top10_accuracy": "",
|
| 167162 |
"median_rank": "",
|
| 167163 |
"mean_rank": "",
|
| 167164 |
-
"num_queries": ""
|
|
|
|
| 167165 |
},
|
| 167166 |
{
|
| 167167 |
"task": "modality_reconstruction",
|
|
@@ -167197,7 +167319,8 @@
|
|
| 167197 |
"top10_accuracy": "",
|
| 167198 |
"median_rank": "",
|
| 167199 |
"mean_rank": "",
|
| 167200 |
-
"num_queries": ""
|
|
|
|
| 167201 |
},
|
| 167202 |
{
|
| 167203 |
"task": "modality_reconstruction",
|
|
@@ -167233,7 +167356,8 @@
|
|
| 167233 |
"top10_accuracy": "",
|
| 167234 |
"median_rank": "",
|
| 167235 |
"mean_rank": "",
|
| 167236 |
-
"num_queries": ""
|
|
|
|
| 167237 |
},
|
| 167238 |
{
|
| 167239 |
"task": "modality_reconstruction",
|
|
@@ -167269,7 +167393,8 @@
|
|
| 167269 |
"top10_accuracy": "",
|
| 167270 |
"median_rank": "",
|
| 167271 |
"mean_rank": "",
|
| 167272 |
-
"num_queries": ""
|
|
|
|
| 167273 |
},
|
| 167274 |
{
|
| 167275 |
"task": "modality_reconstruction",
|
|
@@ -167305,7 +167430,8 @@
|
|
| 167305 |
"top10_accuracy": "",
|
| 167306 |
"median_rank": "",
|
| 167307 |
"mean_rank": "",
|
| 167308 |
-
"num_queries": ""
|
|
|
|
| 167309 |
},
|
| 167310 |
{
|
| 167311 |
"task": "modality_reconstruction",
|
|
@@ -167341,7 +167467,8 @@
|
|
| 167341 |
"top10_accuracy": "",
|
| 167342 |
"median_rank": "",
|
| 167343 |
"mean_rank": "",
|
| 167344 |
-
"num_queries": ""
|
|
|
|
| 167345 |
},
|
| 167346 |
{
|
| 167347 |
"task": "modality_reconstruction",
|
|
@@ -167377,7 +167504,8 @@
|
|
| 167377 |
"top10_accuracy": "",
|
| 167378 |
"median_rank": "",
|
| 167379 |
"mean_rank": "",
|
| 167380 |
-
"num_queries": ""
|
|
|
|
| 167381 |
},
|
| 167382 |
{
|
| 167383 |
"task": "temporal_order",
|
|
@@ -167413,7 +167541,8 @@
|
|
| 167413 |
"top10_accuracy": "",
|
| 167414 |
"median_rank": "",
|
| 167415 |
"mean_rank": "",
|
| 167416 |
-
"num_queries": ""
|
|
|
|
| 167417 |
},
|
| 167418 |
{
|
| 167419 |
"task": "temporal_order",
|
|
@@ -167449,7 +167578,8 @@
|
|
| 167449 |
"top10_accuracy": "",
|
| 167450 |
"median_rank": "",
|
| 167451 |
"mean_rank": "",
|
| 167452 |
-
"num_queries": ""
|
|
|
|
| 167453 |
},
|
| 167454 |
{
|
| 167455 |
"task": "temporal_order",
|
|
@@ -167485,7 +167615,8 @@
|
|
| 167485 |
"top10_accuracy": "",
|
| 167486 |
"median_rank": "",
|
| 167487 |
"mean_rank": "",
|
| 167488 |
-
"num_queries": ""
|
|
|
|
| 167489 |
},
|
| 167490 |
{
|
| 167491 |
"task": "temporal_order",
|
|
@@ -167521,7 +167652,8 @@
|
|
| 167521 |
"top10_accuracy": "",
|
| 167522 |
"median_rank": "",
|
| 167523 |
"mean_rank": "",
|
| 167524 |
-
"num_queries": ""
|
|
|
|
| 167525 |
},
|
| 167526 |
{
|
| 167527 |
"task": "temporal_order",
|
|
@@ -167557,7 +167689,8 @@
|
|
| 167557 |
"top10_accuracy": "",
|
| 167558 |
"median_rank": "",
|
| 167559 |
"mean_rank": "",
|
| 167560 |
-
"num_queries": ""
|
|
|
|
| 167561 |
},
|
| 167562 |
{
|
| 167563 |
"task": "temporal_order",
|
|
@@ -167593,7 +167726,8 @@
|
|
| 167593 |
"top10_accuracy": "",
|
| 167594 |
"median_rank": "",
|
| 167595 |
"mean_rank": "",
|
| 167596 |
-
"num_queries": ""
|
|
|
|
| 167597 |
},
|
| 167598 |
{
|
| 167599 |
"task": "temporal_order",
|
|
@@ -167629,7 +167763,8 @@
|
|
| 167629 |
"top10_accuracy": "",
|
| 167630 |
"median_rank": "",
|
| 167631 |
"mean_rank": "",
|
| 167632 |
-
"num_queries": ""
|
|
|
|
| 167633 |
},
|
| 167634 |
{
|
| 167635 |
"task": "temporal_order",
|
|
@@ -167665,7 +167800,8 @@
|
|
| 167665 |
"top10_accuracy": "",
|
| 167666 |
"median_rank": "",
|
| 167667 |
"mean_rank": "",
|
| 167668 |
-
"num_queries": ""
|
|
|
|
| 167669 |
},
|
| 167670 |
{
|
| 167671 |
"task": "temporal_order",
|
|
@@ -167701,7 +167837,8 @@
|
|
| 167701 |
"top10_accuracy": "",
|
| 167702 |
"median_rank": "",
|
| 167703 |
"mean_rank": "",
|
| 167704 |
-
"num_queries": ""
|
|
|
|
| 167705 |
},
|
| 167706 |
{
|
| 167707 |
"task": "misalignment_detection",
|
|
@@ -167737,7 +167874,8 @@
|
|
| 167737 |
"top10_accuracy": "",
|
| 167738 |
"median_rank": "",
|
| 167739 |
"mean_rank": "",
|
| 167740 |
-
"num_queries": ""
|
|
|
|
| 167741 |
},
|
| 167742 |
{
|
| 167743 |
"task": "misalignment_detection",
|
|
@@ -167773,7 +167911,8 @@
|
|
| 167773 |
"top10_accuracy": "",
|
| 167774 |
"median_rank": "",
|
| 167775 |
"mean_rank": "",
|
| 167776 |
-
"num_queries": ""
|
|
|
|
| 167777 |
},
|
| 167778 |
{
|
| 167779 |
"task": "misalignment_detection",
|
|
@@ -167809,7 +167948,8 @@
|
|
| 167809 |
"top10_accuracy": "",
|
| 167810 |
"median_rank": "",
|
| 167811 |
"mean_rank": "",
|
| 167812 |
-
"num_queries": ""
|
|
|
|
| 167813 |
},
|
| 167814 |
{
|
| 167815 |
"task": "misalignment_detection",
|
|
@@ -167845,7 +167985,8 @@
|
|
| 167845 |
"top10_accuracy": "",
|
| 167846 |
"median_rank": "",
|
| 167847 |
"mean_rank": "",
|
| 167848 |
-
"num_queries": ""
|
|
|
|
| 167849 |
},
|
| 167850 |
{
|
| 167851 |
"task": "misalignment_detection",
|
|
@@ -167881,7 +168022,8 @@
|
|
| 167881 |
"top10_accuracy": "",
|
| 167882 |
"median_rank": "",
|
| 167883 |
"mean_rank": "",
|
| 167884 |
-
"num_queries": ""
|
|
|
|
| 167885 |
},
|
| 167886 |
{
|
| 167887 |
"task": "misalignment_detection",
|
|
@@ -167917,7 +168059,8 @@
|
|
| 167917 |
"top10_accuracy": "",
|
| 167918 |
"median_rank": "",
|
| 167919 |
"mean_rank": "",
|
| 167920 |
-
"num_queries": ""
|
|
|
|
| 167921 |
},
|
| 167922 |
{
|
| 167923 |
"task": "misalignment_detection",
|
|
@@ -167953,7 +168096,8 @@
|
|
| 167953 |
"top10_accuracy": "",
|
| 167954 |
"median_rank": "",
|
| 167955 |
"mean_rank": "",
|
| 167956 |
-
"num_queries": ""
|
|
|
|
| 167957 |
},
|
| 167958 |
{
|
| 167959 |
"task": "misalignment_detection",
|
|
@@ -167989,7 +168133,8 @@
|
|
| 167989 |
"top10_accuracy": "",
|
| 167990 |
"median_rank": "",
|
| 167991 |
"mean_rank": "",
|
| 167992 |
-
"num_queries": ""
|
|
|
|
| 167993 |
},
|
| 167994 |
{
|
| 167995 |
"task": "misalignment_detection",
|
|
@@ -168025,7 +168170,8 @@
|
|
| 168025 |
"top10_accuracy": "",
|
| 168026 |
"median_rank": "",
|
| 168027 |
"mean_rank": "",
|
| 168028 |
-
"num_queries": ""
|
|
|
|
| 168029 |
}
|
| 168030 |
]
|
| 168031 |
},
|
|
@@ -168841,4 +168987,4 @@
|
|
| 168841 |
"num_queries": "308"
|
| 168842 |
}
|
| 168843 |
]
|
| 168844 |
-
}
|
|
|
|
| 1 |
{
|
| 2 |
"meta": {
|
| 3 |
+
"generated_at": "2026-06-06T21:22:14.639673+00:00",
|
| 4 |
"window_count": 1161,
|
| 5 |
"feature_dim": 8546,
|
| 6 |
"object_label_rows": 1161,
|
|
|
|
| 16 |
}
|
| 17 |
},
|
| 18 |
"tasks": {
|
| 19 |
+
"timeline_action": "Action Recognition",
|
| 20 |
+
"timeline_subtask": "Procedure Step Recognition",
|
| 21 |
+
"transition_detection": "Action Boundary Detection",
|
| 22 |
"next_action": "Next-Action Prediction",
|
| 23 |
"contact_prediction": "Contact State Prediction",
|
| 24 |
+
"object_relevance": "Object Relevance Prediction"
|
| 25 |
+
},
|
| 26 |
+
"task_display_names": {
|
| 27 |
+
"timeline_action": "Action Recognition",
|
| 28 |
+
"timeline_subtask": "Procedure Step Recognition",
|
| 29 |
+
"transition_detection": "Action Boundary Detection",
|
| 30 |
+
"next_action": "Next-Action Prediction",
|
| 31 |
+
"hand_trajectory_forecast": "Hand Trajectory Forecasting",
|
| 32 |
+
"contact_prediction": "Contact State Prediction",
|
| 33 |
+
"object_relevance": "Object Relevance Prediction",
|
| 34 |
+
"caption_grounding": "Language Grounding",
|
| 35 |
+
"cross_modal_retrieval": "Cross-Modal Retrieval",
|
| 36 |
+
"modality_reconstruction": "Cross-Modal Reconstruction",
|
| 37 |
+
"temporal_order": "Temporal Order Verification",
|
| 38 |
+
"misalignment_detection": "Multimodal Synchronization Detection"
|
| 39 |
},
|
| 40 |
"feature_blocks": [
|
| 41 |
{
|
|
|
|
| 152 |
},
|
| 153 |
{
|
| 154 |
"name": "audio_fisheye_cam0_aac",
|
| 155 |
+
"display": "Audio",
|
| 156 |
"modality": "audio",
|
| 157 |
"start": 7343,
|
| 158 |
"end": 7511,
|
|
|
|
| 163972 |
"ablation": {
|
| 163973 |
"best_by_task": {
|
| 163974 |
"caption_grounding": {
|
| 163975 |
+
"task": "caption_grounding",
|
| 163976 |
+
"task_display_name": "Language Grounding",
|
| 163977 |
"best": {
|
| 163978 |
"modality_group": "language",
|
| 163979 |
"modality_display": "Language",
|
|
|
|
| 163989 |
}
|
| 163990 |
},
|
| 163991 |
"contact_prediction": {
|
| 163992 |
+
"task": "contact_prediction",
|
| 163993 |
+
"task_display_name": "Contact State Prediction",
|
| 163994 |
"best": {
|
| 163995 |
"modality_group": "all_features",
|
| 163996 |
"modality_display": "All Features",
|
|
|
|
| 164006 |
}
|
| 164007 |
},
|
| 164008 |
"cross_modal_retrieval": {
|
| 164009 |
+
"task": "cross_modal_retrieval",
|
| 164010 |
+
"task_display_name": "Cross-Modal Retrieval",
|
| 164011 |
"best": {
|
| 164012 |
"modality_group": "all_features",
|
| 164013 |
"modality_display": "All Features",
|
|
|
|
| 164023 |
}
|
| 164024 |
},
|
| 164025 |
"hand_trajectory_forecast": {
|
| 164026 |
+
"task": "hand_trajectory_forecast",
|
| 164027 |
+
"task_display_name": "Hand Trajectory Forecasting",
|
| 164028 |
"best": {
|
| 164029 |
"modality_group": "inertial",
|
| 164030 |
"modality_display": "Inertial",
|
|
|
|
| 164040 |
}
|
| 164041 |
},
|
| 164042 |
"misalignment_detection": {
|
| 164043 |
+
"task": "misalignment_detection",
|
| 164044 |
+
"task_display_name": "Multimodal Synchronization Detection",
|
| 164045 |
"best": {
|
| 164046 |
"modality_group": "audio",
|
| 164047 |
"modality_display": "Audio",
|
|
|
|
| 164057 |
}
|
| 164058 |
},
|
| 164059 |
"modality_reconstruction": {
|
| 164060 |
+
"task": "modality_reconstruction",
|
| 164061 |
+
"task_display_name": "Cross-Modal Reconstruction",
|
| 164062 |
"best": {
|
| 164063 |
"modality_group": "video",
|
| 164064 |
"modality_display": "Video",
|
|
|
|
| 164074 |
}
|
| 164075 |
},
|
| 164076 |
"next_action": {
|
| 164077 |
+
"task": "next_action",
|
| 164078 |
+
"task_display_name": "Next-Action Prediction",
|
| 164079 |
"best": {
|
| 164080 |
"modality_group": "language",
|
| 164081 |
"modality_display": "Language",
|
|
|
|
| 164091 |
}
|
| 164092 |
},
|
| 164093 |
"object_relevance": {
|
| 164094 |
+
"task": "object_relevance",
|
| 164095 |
+
"task_display_name": "Object Relevance Prediction",
|
| 164096 |
"best": {
|
| 164097 |
"modality_group": "language",
|
| 164098 |
"modality_display": "Language",
|
|
|
|
| 164108 |
}
|
| 164109 |
},
|
| 164110 |
"temporal_order": {
|
| 164111 |
+
"task": "temporal_order",
|
| 164112 |
+
"task_display_name": "Temporal Order Verification",
|
| 164113 |
"best": {
|
| 164114 |
"modality_group": "pose_slam",
|
| 164115 |
"modality_display": "Pose + SLAM",
|
|
|
|
| 164125 |
}
|
| 164126 |
},
|
| 164127 |
"timeline_action": {
|
| 164128 |
+
"task": "timeline_action",
|
| 164129 |
+
"task_display_name": "Action Recognition",
|
| 164130 |
"best": {
|
| 164131 |
"modality_group": "language",
|
| 164132 |
"modality_display": "Language",
|
|
|
|
| 164142 |
}
|
| 164143 |
},
|
| 164144 |
"timeline_subtask": {
|
| 164145 |
+
"task": "timeline_subtask",
|
| 164146 |
+
"task_display_name": "Procedure Step Recognition",
|
| 164147 |
"best": {
|
| 164148 |
"modality_group": "language",
|
| 164149 |
"modality_display": "Language",
|
|
|
|
| 164159 |
}
|
| 164160 |
},
|
| 164161 |
"transition_detection": {
|
| 164162 |
+
"task": "transition_detection",
|
| 164163 |
+
"task_display_name": "Action Boundary Detection",
|
| 164164 |
"best": {
|
| 164165 |
"modality_group": "language",
|
| 164166 |
"modality_display": "Language",
|
|
|
|
| 164211 |
"top10_accuracy": "",
|
| 164212 |
"median_rank": "",
|
| 164213 |
"mean_rank": "",
|
| 164214 |
+
"num_queries": "",
|
| 164215 |
+
"task_display_name": "Action Recognition"
|
| 164216 |
},
|
| 164217 |
{
|
| 164218 |
"task": "timeline_action",
|
|
|
|
| 164248 |
"top10_accuracy": "",
|
| 164249 |
"median_rank": "",
|
| 164250 |
"mean_rank": "",
|
| 164251 |
+
"num_queries": "",
|
| 164252 |
+
"task_display_name": "Action Recognition"
|
| 164253 |
},
|
| 164254 |
{
|
| 164255 |
"task": "timeline_action",
|
|
|
|
| 164285 |
"top10_accuracy": "",
|
| 164286 |
"median_rank": "",
|
| 164287 |
"mean_rank": "",
|
| 164288 |
+
"num_queries": "",
|
| 164289 |
+
"task_display_name": "Action Recognition"
|
| 164290 |
},
|
| 164291 |
{
|
| 164292 |
"task": "timeline_action",
|
|
|
|
| 164322 |
"top10_accuracy": "",
|
| 164323 |
"median_rank": "",
|
| 164324 |
"mean_rank": "",
|
| 164325 |
+
"num_queries": "",
|
| 164326 |
+
"task_display_name": "Action Recognition"
|
| 164327 |
},
|
| 164328 |
{
|
| 164329 |
"task": "timeline_action",
|
|
|
|
| 164359 |
"top10_accuracy": "",
|
| 164360 |
"median_rank": "",
|
| 164361 |
"mean_rank": "",
|
| 164362 |
+
"num_queries": "",
|
| 164363 |
+
"task_display_name": "Action Recognition"
|
| 164364 |
},
|
| 164365 |
{
|
| 164366 |
"task": "timeline_action",
|
|
|
|
| 164396 |
"top10_accuracy": "",
|
| 164397 |
"median_rank": "",
|
| 164398 |
"mean_rank": "",
|
| 164399 |
+
"num_queries": "",
|
| 164400 |
+
"task_display_name": "Action Recognition"
|
| 164401 |
},
|
| 164402 |
{
|
| 164403 |
"task": "timeline_action",
|
|
|
|
| 164433 |
"top10_accuracy": "",
|
| 164434 |
"median_rank": "",
|
| 164435 |
"mean_rank": "",
|
| 164436 |
+
"num_queries": "",
|
| 164437 |
+
"task_display_name": "Action Recognition"
|
| 164438 |
},
|
| 164439 |
{
|
| 164440 |
"task": "timeline_action",
|
|
|
|
| 164470 |
"top10_accuracy": "",
|
| 164471 |
"median_rank": "",
|
| 164472 |
"mean_rank": "",
|
| 164473 |
+
"num_queries": "",
|
| 164474 |
+
"task_display_name": "Action Recognition"
|
| 164475 |
},
|
| 164476 |
{
|
| 164477 |
"task": "timeline_action",
|
|
|
|
| 164507 |
"top10_accuracy": "",
|
| 164508 |
"median_rank": "",
|
| 164509 |
"mean_rank": "",
|
| 164510 |
+
"num_queries": "",
|
| 164511 |
+
"task_display_name": "Action Recognition"
|
| 164512 |
},
|
| 164513 |
{
|
| 164514 |
"task": "timeline_subtask",
|
|
|
|
| 164544 |
"top10_accuracy": "",
|
| 164545 |
"median_rank": "",
|
| 164546 |
"mean_rank": "",
|
| 164547 |
+
"num_queries": "",
|
| 164548 |
+
"task_display_name": "Procedure Step Recognition"
|
| 164549 |
},
|
| 164550 |
{
|
| 164551 |
"task": "timeline_subtask",
|
|
|
|
| 164581 |
"top10_accuracy": "",
|
| 164582 |
"median_rank": "",
|
| 164583 |
"mean_rank": "",
|
| 164584 |
+
"num_queries": "",
|
| 164585 |
+
"task_display_name": "Procedure Step Recognition"
|
| 164586 |
},
|
| 164587 |
{
|
| 164588 |
"task": "timeline_subtask",
|
|
|
|
| 164618 |
"top10_accuracy": "",
|
| 164619 |
"median_rank": "",
|
| 164620 |
"mean_rank": "",
|
| 164621 |
+
"num_queries": "",
|
| 164622 |
+
"task_display_name": "Procedure Step Recognition"
|
| 164623 |
},
|
| 164624 |
{
|
| 164625 |
"task": "timeline_subtask",
|
|
|
|
| 164655 |
"top10_accuracy": "",
|
| 164656 |
"median_rank": "",
|
| 164657 |
"mean_rank": "",
|
| 164658 |
+
"num_queries": "",
|
| 164659 |
+
"task_display_name": "Procedure Step Recognition"
|
| 164660 |
},
|
| 164661 |
{
|
| 164662 |
"task": "timeline_subtask",
|
|
|
|
| 164692 |
"top10_accuracy": "",
|
| 164693 |
"median_rank": "",
|
| 164694 |
"mean_rank": "",
|
| 164695 |
+
"num_queries": "",
|
| 164696 |
+
"task_display_name": "Procedure Step Recognition"
|
| 164697 |
},
|
| 164698 |
{
|
| 164699 |
"task": "timeline_subtask",
|
|
|
|
| 164729 |
"top10_accuracy": "",
|
| 164730 |
"median_rank": "",
|
| 164731 |
"mean_rank": "",
|
| 164732 |
+
"num_queries": "",
|
| 164733 |
+
"task_display_name": "Procedure Step Recognition"
|
| 164734 |
},
|
| 164735 |
{
|
| 164736 |
"task": "timeline_subtask",
|
|
|
|
| 164766 |
"top10_accuracy": "",
|
| 164767 |
"median_rank": "",
|
| 164768 |
"mean_rank": "",
|
| 164769 |
+
"num_queries": "",
|
| 164770 |
+
"task_display_name": "Procedure Step Recognition"
|
| 164771 |
},
|
| 164772 |
{
|
| 164773 |
"task": "timeline_subtask",
|
|
|
|
| 164803 |
"top10_accuracy": "",
|
| 164804 |
"median_rank": "",
|
| 164805 |
"mean_rank": "",
|
| 164806 |
+
"num_queries": "",
|
| 164807 |
+
"task_display_name": "Procedure Step Recognition"
|
| 164808 |
},
|
| 164809 |
{
|
| 164810 |
"task": "timeline_subtask",
|
|
|
|
| 164840 |
"top10_accuracy": "",
|
| 164841 |
"median_rank": "",
|
| 164842 |
"mean_rank": "",
|
| 164843 |
+
"num_queries": "",
|
| 164844 |
+
"task_display_name": "Procedure Step Recognition"
|
| 164845 |
},
|
| 164846 |
{
|
| 164847 |
"task": "transition_detection",
|
|
|
|
| 164877 |
"top10_accuracy": "",
|
| 164878 |
"median_rank": "",
|
| 164879 |
"mean_rank": "",
|
| 164880 |
+
"num_queries": "",
|
| 164881 |
+
"task_display_name": "Action Boundary Detection"
|
| 164882 |
},
|
| 164883 |
{
|
| 164884 |
"task": "transition_detection",
|
|
|
|
| 164914 |
"top10_accuracy": "",
|
| 164915 |
"median_rank": "",
|
| 164916 |
"mean_rank": "",
|
| 164917 |
+
"num_queries": "",
|
| 164918 |
+
"task_display_name": "Action Boundary Detection"
|
| 164919 |
},
|
| 164920 |
{
|
| 164921 |
"task": "transition_detection",
|
|
|
|
| 164951 |
"top10_accuracy": "",
|
| 164952 |
"median_rank": "",
|
| 164953 |
"mean_rank": "",
|
| 164954 |
+
"num_queries": "",
|
| 164955 |
+
"task_display_name": "Action Boundary Detection"
|
| 164956 |
},
|
| 164957 |
{
|
| 164958 |
"task": "transition_detection",
|
|
|
|
| 164988 |
"top10_accuracy": "",
|
| 164989 |
"median_rank": "",
|
| 164990 |
"mean_rank": "",
|
| 164991 |
+
"num_queries": "",
|
| 164992 |
+
"task_display_name": "Action Boundary Detection"
|
| 164993 |
},
|
| 164994 |
{
|
| 164995 |
"task": "transition_detection",
|
|
|
|
| 165025 |
"top10_accuracy": "",
|
| 165026 |
"median_rank": "",
|
| 165027 |
"mean_rank": "",
|
| 165028 |
+
"num_queries": "",
|
| 165029 |
+
"task_display_name": "Action Boundary Detection"
|
| 165030 |
},
|
| 165031 |
{
|
| 165032 |
"task": "transition_detection",
|
|
|
|
| 165062 |
"top10_accuracy": "",
|
| 165063 |
"median_rank": "",
|
| 165064 |
"mean_rank": "",
|
| 165065 |
+
"num_queries": "",
|
| 165066 |
+
"task_display_name": "Action Boundary Detection"
|
| 165067 |
},
|
| 165068 |
{
|
| 165069 |
"task": "transition_detection",
|
|
|
|
| 165099 |
"top10_accuracy": "",
|
| 165100 |
"median_rank": "",
|
| 165101 |
"mean_rank": "",
|
| 165102 |
+
"num_queries": "",
|
| 165103 |
+
"task_display_name": "Action Boundary Detection"
|
| 165104 |
},
|
| 165105 |
{
|
| 165106 |
"task": "transition_detection",
|
|
|
|
| 165136 |
"top10_accuracy": "",
|
| 165137 |
"median_rank": "",
|
| 165138 |
"mean_rank": "",
|
| 165139 |
+
"num_queries": "",
|
| 165140 |
+
"task_display_name": "Action Boundary Detection"
|
| 165141 |
},
|
| 165142 |
{
|
| 165143 |
"task": "transition_detection",
|
|
|
|
| 165173 |
"top10_accuracy": "",
|
| 165174 |
"median_rank": "",
|
| 165175 |
"mean_rank": "",
|
| 165176 |
+
"num_queries": "",
|
| 165177 |
+
"task_display_name": "Action Boundary Detection"
|
| 165178 |
},
|
| 165179 |
{
|
| 165180 |
"task": "next_action",
|
|
|
|
| 165210 |
"top10_accuracy": "",
|
| 165211 |
"median_rank": "",
|
| 165212 |
"mean_rank": "",
|
| 165213 |
+
"num_queries": "",
|
| 165214 |
+
"task_display_name": "Next-Action Prediction"
|
| 165215 |
},
|
| 165216 |
{
|
| 165217 |
"task": "next_action",
|
|
|
|
| 165247 |
"top10_accuracy": "",
|
| 165248 |
"median_rank": "",
|
| 165249 |
"mean_rank": "",
|
| 165250 |
+
"num_queries": "",
|
| 165251 |
+
"task_display_name": "Next-Action Prediction"
|
| 165252 |
},
|
| 165253 |
{
|
| 165254 |
"task": "next_action",
|
|
|
|
| 165284 |
"top10_accuracy": "",
|
| 165285 |
"median_rank": "",
|
| 165286 |
"mean_rank": "",
|
| 165287 |
+
"num_queries": "",
|
| 165288 |
+
"task_display_name": "Next-Action Prediction"
|
| 165289 |
},
|
| 165290 |
{
|
| 165291 |
"task": "next_action",
|
|
|
|
| 165321 |
"top10_accuracy": "",
|
| 165322 |
"median_rank": "",
|
| 165323 |
"mean_rank": "",
|
| 165324 |
+
"num_queries": "",
|
| 165325 |
+
"task_display_name": "Next-Action Prediction"
|
| 165326 |
},
|
| 165327 |
{
|
| 165328 |
"task": "next_action",
|
|
|
|
| 165358 |
"top10_accuracy": "",
|
| 165359 |
"median_rank": "",
|
| 165360 |
"mean_rank": "",
|
| 165361 |
+
"num_queries": "",
|
| 165362 |
+
"task_display_name": "Next-Action Prediction"
|
| 165363 |
},
|
| 165364 |
{
|
| 165365 |
"task": "next_action",
|
|
|
|
| 165395 |
"top10_accuracy": "",
|
| 165396 |
"median_rank": "",
|
| 165397 |
"mean_rank": "",
|
| 165398 |
+
"num_queries": "",
|
| 165399 |
+
"task_display_name": "Next-Action Prediction"
|
| 165400 |
},
|
| 165401 |
{
|
| 165402 |
"task": "next_action",
|
|
|
|
| 165432 |
"top10_accuracy": "",
|
| 165433 |
"median_rank": "",
|
| 165434 |
"mean_rank": "",
|
| 165435 |
+
"num_queries": "",
|
| 165436 |
+
"task_display_name": "Next-Action Prediction"
|
| 165437 |
},
|
| 165438 |
{
|
| 165439 |
"task": "next_action",
|
|
|
|
| 165469 |
"top10_accuracy": "",
|
| 165470 |
"median_rank": "",
|
| 165471 |
"mean_rank": "",
|
| 165472 |
+
"num_queries": "",
|
| 165473 |
+
"task_display_name": "Next-Action Prediction"
|
| 165474 |
},
|
| 165475 |
{
|
| 165476 |
"task": "next_action",
|
|
|
|
| 165506 |
"top10_accuracy": "",
|
| 165507 |
"median_rank": "",
|
| 165508 |
"mean_rank": "",
|
| 165509 |
+
"num_queries": "",
|
| 165510 |
+
"task_display_name": "Next-Action Prediction"
|
| 165511 |
},
|
| 165512 |
{
|
| 165513 |
"task": "hand_trajectory_forecast",
|
|
|
|
| 165543 |
"top10_accuracy": "",
|
| 165544 |
"median_rank": "",
|
| 165545 |
"mean_rank": "",
|
| 165546 |
+
"num_queries": "",
|
| 165547 |
+
"task_display_name": "Hand Trajectory Forecasting"
|
| 165548 |
},
|
| 165549 |
{
|
| 165550 |
"task": "hand_trajectory_forecast",
|
|
|
|
| 165580 |
"top10_accuracy": "",
|
| 165581 |
"median_rank": "",
|
| 165582 |
"mean_rank": "",
|
| 165583 |
+
"num_queries": "",
|
| 165584 |
+
"task_display_name": "Hand Trajectory Forecasting"
|
| 165585 |
},
|
| 165586 |
{
|
| 165587 |
"task": "hand_trajectory_forecast",
|
|
|
|
| 165617 |
"top10_accuracy": "",
|
| 165618 |
"median_rank": "",
|
| 165619 |
"mean_rank": "",
|
| 165620 |
+
"num_queries": "",
|
| 165621 |
+
"task_display_name": "Hand Trajectory Forecasting"
|
| 165622 |
},
|
| 165623 |
{
|
| 165624 |
"task": "hand_trajectory_forecast",
|
|
|
|
| 165654 |
"top10_accuracy": "",
|
| 165655 |
"median_rank": "",
|
| 165656 |
"mean_rank": "",
|
| 165657 |
+
"num_queries": "",
|
| 165658 |
+
"task_display_name": "Hand Trajectory Forecasting"
|
| 165659 |
},
|
| 165660 |
{
|
| 165661 |
"task": "hand_trajectory_forecast",
|
|
|
|
| 165691 |
"top10_accuracy": "",
|
| 165692 |
"median_rank": "",
|
| 165693 |
"mean_rank": "",
|
| 165694 |
+
"num_queries": "",
|
| 165695 |
+
"task_display_name": "Hand Trajectory Forecasting"
|
| 165696 |
},
|
| 165697 |
{
|
| 165698 |
"task": "hand_trajectory_forecast",
|
|
|
|
| 165728 |
"top10_accuracy": "",
|
| 165729 |
"median_rank": "",
|
| 165730 |
"mean_rank": "",
|
| 165731 |
+
"num_queries": "",
|
| 165732 |
+
"task_display_name": "Hand Trajectory Forecasting"
|
| 165733 |
},
|
| 165734 |
{
|
| 165735 |
"task": "hand_trajectory_forecast",
|
|
|
|
| 165765 |
"top10_accuracy": "",
|
| 165766 |
"median_rank": "",
|
| 165767 |
"mean_rank": "",
|
| 165768 |
+
"num_queries": "",
|
| 165769 |
+
"task_display_name": "Hand Trajectory Forecasting"
|
| 165770 |
},
|
| 165771 |
{
|
| 165772 |
"task": "hand_trajectory_forecast",
|
|
|
|
| 165802 |
"top10_accuracy": "",
|
| 165803 |
"median_rank": "",
|
| 165804 |
"mean_rank": "",
|
| 165805 |
+
"num_queries": "",
|
| 165806 |
+
"task_display_name": "Hand Trajectory Forecasting"
|
| 165807 |
},
|
| 165808 |
{
|
| 165809 |
"task": "hand_trajectory_forecast",
|
|
|
|
| 165839 |
"top10_accuracy": "",
|
| 165840 |
"median_rank": "",
|
| 165841 |
"mean_rank": "",
|
| 165842 |
+
"num_queries": "",
|
| 165843 |
+
"task_display_name": "Hand Trajectory Forecasting"
|
| 165844 |
},
|
| 165845 |
{
|
| 165846 |
"task": "contact_prediction",
|
|
|
|
| 165876 |
"top10_accuracy": "",
|
| 165877 |
"median_rank": "",
|
| 165878 |
"mean_rank": "",
|
| 165879 |
+
"num_queries": "",
|
| 165880 |
+
"task_display_name": "Contact State Prediction"
|
| 165881 |
},
|
| 165882 |
{
|
| 165883 |
"task": "contact_prediction",
|
|
|
|
| 165913 |
"top10_accuracy": "",
|
| 165914 |
"median_rank": "",
|
| 165915 |
"mean_rank": "",
|
| 165916 |
+
"num_queries": "",
|
| 165917 |
+
"task_display_name": "Contact State Prediction"
|
| 165918 |
},
|
| 165919 |
{
|
| 165920 |
"task": "contact_prediction",
|
|
|
|
| 165950 |
"top10_accuracy": "",
|
| 165951 |
"median_rank": "",
|
| 165952 |
"mean_rank": "",
|
| 165953 |
+
"num_queries": "",
|
| 165954 |
+
"task_display_name": "Contact State Prediction"
|
| 165955 |
},
|
| 165956 |
{
|
| 165957 |
"task": "contact_prediction",
|
|
|
|
| 165987 |
"top10_accuracy": "",
|
| 165988 |
"median_rank": "",
|
| 165989 |
"mean_rank": "",
|
| 165990 |
+
"num_queries": "",
|
| 165991 |
+
"task_display_name": "Contact State Prediction"
|
| 165992 |
},
|
| 165993 |
{
|
| 165994 |
"task": "contact_prediction",
|
|
|
|
| 166024 |
"top10_accuracy": "",
|
| 166025 |
"median_rank": "",
|
| 166026 |
"mean_rank": "",
|
| 166027 |
+
"num_queries": "",
|
| 166028 |
+
"task_display_name": "Contact State Prediction"
|
| 166029 |
},
|
| 166030 |
{
|
| 166031 |
"task": "contact_prediction",
|
|
|
|
| 166061 |
"top10_accuracy": "",
|
| 166062 |
"median_rank": "",
|
| 166063 |
"mean_rank": "",
|
| 166064 |
+
"num_queries": "",
|
| 166065 |
+
"task_display_name": "Contact State Prediction"
|
| 166066 |
},
|
| 166067 |
{
|
| 166068 |
"task": "contact_prediction",
|
|
|
|
| 166098 |
"top10_accuracy": "",
|
| 166099 |
"median_rank": "",
|
| 166100 |
"mean_rank": "",
|
| 166101 |
+
"num_queries": "",
|
| 166102 |
+
"task_display_name": "Contact State Prediction"
|
| 166103 |
},
|
| 166104 |
{
|
| 166105 |
"task": "contact_prediction",
|
|
|
|
| 166135 |
"top10_accuracy": "",
|
| 166136 |
"median_rank": "",
|
| 166137 |
"mean_rank": "",
|
| 166138 |
+
"num_queries": "",
|
| 166139 |
+
"task_display_name": "Contact State Prediction"
|
| 166140 |
},
|
| 166141 |
{
|
| 166142 |
"task": "contact_prediction",
|
|
|
|
| 166172 |
"top10_accuracy": "",
|
| 166173 |
"median_rank": "",
|
| 166174 |
"mean_rank": "",
|
| 166175 |
+
"num_queries": "",
|
| 166176 |
+
"task_display_name": "Contact State Prediction"
|
| 166177 |
},
|
| 166178 |
{
|
| 166179 |
"task": "object_relevance",
|
|
|
|
| 166209 |
"top10_accuracy": "",
|
| 166210 |
"median_rank": "",
|
| 166211 |
"mean_rank": "",
|
| 166212 |
+
"num_queries": "",
|
| 166213 |
+
"task_display_name": "Object Relevance Prediction"
|
| 166214 |
},
|
| 166215 |
{
|
| 166216 |
"task": "object_relevance",
|
|
|
|
| 166246 |
"top10_accuracy": "",
|
| 166247 |
"median_rank": "",
|
| 166248 |
"mean_rank": "",
|
| 166249 |
+
"num_queries": "",
|
| 166250 |
+
"task_display_name": "Object Relevance Prediction"
|
| 166251 |
},
|
| 166252 |
{
|
| 166253 |
"task": "object_relevance",
|
|
|
|
| 166283 |
"top10_accuracy": "",
|
| 166284 |
"median_rank": "",
|
| 166285 |
"mean_rank": "",
|
| 166286 |
+
"num_queries": "",
|
| 166287 |
+
"task_display_name": "Object Relevance Prediction"
|
| 166288 |
},
|
| 166289 |
{
|
| 166290 |
"task": "object_relevance",
|
|
|
|
| 166320 |
"top10_accuracy": "",
|
| 166321 |
"median_rank": "",
|
| 166322 |
"mean_rank": "",
|
| 166323 |
+
"num_queries": "",
|
| 166324 |
+
"task_display_name": "Object Relevance Prediction"
|
| 166325 |
},
|
| 166326 |
{
|
| 166327 |
"task": "object_relevance",
|
|
|
|
| 166357 |
"top10_accuracy": "",
|
| 166358 |
"median_rank": "",
|
| 166359 |
"mean_rank": "",
|
| 166360 |
+
"num_queries": "",
|
| 166361 |
+
"task_display_name": "Object Relevance Prediction"
|
| 166362 |
},
|
| 166363 |
{
|
| 166364 |
"task": "object_relevance",
|
|
|
|
| 166394 |
"top10_accuracy": "",
|
| 166395 |
"median_rank": "",
|
| 166396 |
"mean_rank": "",
|
| 166397 |
+
"num_queries": "",
|
| 166398 |
+
"task_display_name": "Object Relevance Prediction"
|
| 166399 |
},
|
| 166400 |
{
|
| 166401 |
"task": "object_relevance",
|
|
|
|
| 166431 |
"top10_accuracy": "",
|
| 166432 |
"median_rank": "",
|
| 166433 |
"mean_rank": "",
|
| 166434 |
+
"num_queries": "",
|
| 166435 |
+
"task_display_name": "Object Relevance Prediction"
|
| 166436 |
},
|
| 166437 |
{
|
| 166438 |
"task": "object_relevance",
|
|
|
|
| 166468 |
"top10_accuracy": "",
|
| 166469 |
"median_rank": "",
|
| 166470 |
"mean_rank": "",
|
| 166471 |
+
"num_queries": "",
|
| 166472 |
+
"task_display_name": "Object Relevance Prediction"
|
| 166473 |
},
|
| 166474 |
{
|
| 166475 |
"task": "object_relevance",
|
|
|
|
| 166505 |
"top10_accuracy": "",
|
| 166506 |
"median_rank": "",
|
| 166507 |
"mean_rank": "",
|
| 166508 |
+
"num_queries": "",
|
| 166509 |
+
"task_display_name": "Object Relevance Prediction"
|
| 166510 |
},
|
| 166511 |
{
|
| 166512 |
"task": "caption_grounding",
|
|
|
|
| 166542 |
"top10_accuracy": "0.4454022988505747",
|
| 166543 |
"median_rank": "13.0",
|
| 166544 |
"mean_rank": "23.19827651977539",
|
| 166545 |
+
"num_queries": "348",
|
| 166546 |
+
"task_display_name": "Language Grounding"
|
| 166547 |
},
|
| 166548 |
{
|
| 166549 |
"task": "caption_grounding",
|
|
|
|
| 166579 |
"top10_accuracy": "0.034482758620689655",
|
| 166580 |
"median_rank": "162.0",
|
| 166581 |
"mean_rank": "161.4770050048828",
|
| 166582 |
+
"num_queries": "348",
|
| 166583 |
+
"task_display_name": "Language Grounding"
|
| 166584 |
},
|
| 166585 |
{
|
| 166586 |
"task": "caption_grounding",
|
|
|
|
| 166616 |
"top10_accuracy": "0.03735632183908046",
|
| 166617 |
"median_rank": "114.0",
|
| 166618 |
"mean_rank": "137.90805053710938",
|
| 166619 |
+
"num_queries": "348",
|
| 166620 |
+
"task_display_name": "Language Grounding"
|
| 166621 |
},
|
| 166622 |
{
|
| 166623 |
"task": "caption_grounding",
|
|
|
|
| 166653 |
"top10_accuracy": "0.04597701149425287",
|
| 166654 |
"median_rank": "143.5",
|
| 166655 |
"mean_rank": "155.4712677001953",
|
| 166656 |
+
"num_queries": "348",
|
| 166657 |
+
"task_display_name": "Language Grounding"
|
| 166658 |
},
|
| 166659 |
{
|
| 166660 |
"task": "caption_grounding",
|
|
|
|
| 166690 |
"top10_accuracy": "0.04885057471264368",
|
| 166691 |
"median_rank": "110.5",
|
| 166692 |
"mean_rank": "130.32470703125",
|
| 166693 |
+
"num_queries": "348",
|
| 166694 |
+
"task_display_name": "Language Grounding"
|
| 166695 |
},
|
| 166696 |
{
|
| 166697 |
"task": "caption_grounding",
|
|
|
|
| 166727 |
"top10_accuracy": "0.04597701149425287",
|
| 166728 |
"median_rank": "123.0",
|
| 166729 |
"mean_rank": "138.61207580566406",
|
| 166730 |
+
"num_queries": "348",
|
| 166731 |
+
"task_display_name": "Language Grounding"
|
| 166732 |
},
|
| 166733 |
{
|
| 166734 |
"task": "caption_grounding",
|
|
|
|
| 166764 |
"top10_accuracy": "0.07758620689655173",
|
| 166765 |
"median_rank": "141.0",
|
| 166766 |
"mean_rank": "152.14942932128906",
|
| 166767 |
+
"num_queries": "348",
|
| 166768 |
+
"task_display_name": "Language Grounding"
|
| 166769 |
},
|
| 166770 |
{
|
| 166771 |
"task": "caption_grounding",
|
|
|
|
| 166801 |
"top10_accuracy": "0.47126436781609193",
|
| 166802 |
"median_rank": "12.0",
|
| 166803 |
"mean_rank": "15.106322288513184",
|
| 166804 |
+
"num_queries": "348",
|
| 166805 |
+
"task_display_name": "Language Grounding"
|
| 166806 |
},
|
| 166807 |
{
|
| 166808 |
"task": "caption_grounding",
|
|
|
|
| 166838 |
"top10_accuracy": "0.06896551724137931",
|
| 166839 |
"median_rank": "132.0",
|
| 166840 |
"mean_rank": "137.30746459960938",
|
| 166841 |
+
"num_queries": "348",
|
| 166842 |
+
"task_display_name": "Language Grounding"
|
| 166843 |
},
|
| 166844 |
{
|
| 166845 |
"task": "cross_modal_retrieval",
|
|
|
|
| 166875 |
"top10_accuracy": "0.9798850574712644",
|
| 166876 |
"median_rank": "1.0",
|
| 166877 |
"mean_rank": "2.0862069129943848",
|
| 166878 |
+
"num_queries": "348",
|
| 166879 |
+
"task_display_name": "Cross-Modal Retrieval"
|
| 166880 |
},
|
| 166881 |
{
|
| 166882 |
"task": "cross_modal_retrieval",
|
|
|
|
| 166912 |
"top10_accuracy": "0.9798850574712644",
|
| 166913 |
"median_rank": "1.0",
|
| 166914 |
"mean_rank": "3.844827651977539",
|
| 166915 |
+
"num_queries": "348",
|
| 166916 |
+
"task_display_name": "Cross-Modal Retrieval"
|
| 166917 |
},
|
| 166918 |
{
|
| 166919 |
"task": "cross_modal_retrieval",
|
|
|
|
| 166949 |
"top10_accuracy": "0.8620689655172413",
|
| 166950 |
"median_rank": "1.0",
|
| 166951 |
"mean_rank": "5.729885101318359",
|
| 166952 |
+
"num_queries": "348",
|
| 166953 |
+
"task_display_name": "Cross-Modal Retrieval"
|
| 166954 |
},
|
| 166955 |
{
|
| 166956 |
"task": "cross_modal_retrieval",
|
|
|
|
| 166986 |
"top10_accuracy": "0.6551724137931034",
|
| 166987 |
"median_rank": "4.0",
|
| 166988 |
"mean_rank": "15.623562812805176",
|
| 166989 |
+
"num_queries": "348",
|
| 166990 |
+
"task_display_name": "Cross-Modal Retrieval"
|
| 166991 |
},
|
| 166992 |
{
|
| 166993 |
"task": "cross_modal_retrieval",
|
|
|
|
| 167023 |
"top10_accuracy": "0.3994252873563218",
|
| 167024 |
"median_rank": "21.5",
|
| 167025 |
"mean_rank": "49.181034088134766",
|
| 167026 |
+
"num_queries": "348",
|
| 167027 |
+
"task_display_name": "Cross-Modal Retrieval"
|
| 167028 |
},
|
| 167029 |
{
|
| 167030 |
"task": "cross_modal_retrieval",
|
|
|
|
| 167060 |
"top10_accuracy": "0.5229885057471264",
|
| 167061 |
"median_rank": "10.0",
|
| 167062 |
"mean_rank": "20.577587127685547",
|
| 167063 |
+
"num_queries": "348",
|
| 167064 |
+
"task_display_name": "Cross-Modal Retrieval"
|
| 167065 |
},
|
| 167066 |
{
|
| 167067 |
"task": "cross_modal_retrieval",
|
|
|
|
| 167097 |
"top10_accuracy": "0.031609195402298854",
|
| 167098 |
"median_rank": "152.5",
|
| 167099 |
"mean_rank": "161.44540405273438",
|
| 167100 |
+
"num_queries": "348",
|
| 167101 |
+
"task_display_name": "Cross-Modal Retrieval"
|
| 167102 |
},
|
| 167103 |
{
|
| 167104 |
"task": "cross_modal_retrieval",
|
|
|
|
| 167134 |
"top10_accuracy": "0.05747126436781609",
|
| 167135 |
"median_rank": "138.0",
|
| 167136 |
"mean_rank": "146.83045959472656",
|
| 167137 |
+
"num_queries": "348",
|
| 167138 |
+
"task_display_name": "Cross-Modal Retrieval"
|
| 167139 |
},
|
| 167140 |
{
|
| 167141 |
"task": "cross_modal_retrieval",
|
|
|
|
| 167171 |
"top10_accuracy": "0.9770114942528736",
|
| 167172 |
"median_rank": "1.0",
|
| 167173 |
"mean_rank": "2.181034564971924",
|
| 167174 |
+
"num_queries": "348",
|
| 167175 |
+
"task_display_name": "Cross-Modal Retrieval"
|
| 167176 |
},
|
| 167177 |
{
|
| 167178 |
"task": "modality_reconstruction",
|
|
|
|
| 167208 |
"top10_accuracy": "",
|
| 167209 |
"median_rank": "",
|
| 167210 |
"mean_rank": "",
|
| 167211 |
+
"num_queries": "",
|
| 167212 |
+
"task_display_name": "Cross-Modal Reconstruction"
|
| 167213 |
},
|
| 167214 |
{
|
| 167215 |
"task": "modality_reconstruction",
|
|
|
|
| 167245 |
"top10_accuracy": "",
|
| 167246 |
"median_rank": "",
|
| 167247 |
"mean_rank": "",
|
| 167248 |
+
"num_queries": "",
|
| 167249 |
+
"task_display_name": "Cross-Modal Reconstruction"
|
| 167250 |
},
|
| 167251 |
{
|
| 167252 |
"task": "modality_reconstruction",
|
|
|
|
| 167282 |
"top10_accuracy": "",
|
| 167283 |
"median_rank": "",
|
| 167284 |
"mean_rank": "",
|
| 167285 |
+
"num_queries": "",
|
| 167286 |
+
"task_display_name": "Cross-Modal Reconstruction"
|
| 167287 |
},
|
| 167288 |
{
|
| 167289 |
"task": "modality_reconstruction",
|
|
|
|
| 167319 |
"top10_accuracy": "",
|
| 167320 |
"median_rank": "",
|
| 167321 |
"mean_rank": "",
|
| 167322 |
+
"num_queries": "",
|
| 167323 |
+
"task_display_name": "Cross-Modal Reconstruction"
|
| 167324 |
},
|
| 167325 |
{
|
| 167326 |
"task": "modality_reconstruction",
|
|
|
|
| 167356 |
"top10_accuracy": "",
|
| 167357 |
"median_rank": "",
|
| 167358 |
"mean_rank": "",
|
| 167359 |
+
"num_queries": "",
|
| 167360 |
+
"task_display_name": "Cross-Modal Reconstruction"
|
| 167361 |
},
|
| 167362 |
{
|
| 167363 |
"task": "modality_reconstruction",
|
|
|
|
| 167393 |
"top10_accuracy": "",
|
| 167394 |
"median_rank": "",
|
| 167395 |
"mean_rank": "",
|
| 167396 |
+
"num_queries": "",
|
| 167397 |
+
"task_display_name": "Cross-Modal Reconstruction"
|
| 167398 |
},
|
| 167399 |
{
|
| 167400 |
"task": "modality_reconstruction",
|
|
|
|
| 167430 |
"top10_accuracy": "",
|
| 167431 |
"median_rank": "",
|
| 167432 |
"mean_rank": "",
|
| 167433 |
+
"num_queries": "",
|
| 167434 |
+
"task_display_name": "Cross-Modal Reconstruction"
|
| 167435 |
},
|
| 167436 |
{
|
| 167437 |
"task": "modality_reconstruction",
|
|
|
|
| 167467 |
"top10_accuracy": "",
|
| 167468 |
"median_rank": "",
|
| 167469 |
"mean_rank": "",
|
| 167470 |
+
"num_queries": "",
|
| 167471 |
+
"task_display_name": "Cross-Modal Reconstruction"
|
| 167472 |
},
|
| 167473 |
{
|
| 167474 |
"task": "modality_reconstruction",
|
|
|
|
| 167504 |
"top10_accuracy": "",
|
| 167505 |
"median_rank": "",
|
| 167506 |
"mean_rank": "",
|
| 167507 |
+
"num_queries": "",
|
| 167508 |
+
"task_display_name": "Cross-Modal Reconstruction"
|
| 167509 |
},
|
| 167510 |
{
|
| 167511 |
"task": "temporal_order",
|
|
|
|
| 167541 |
"top10_accuracy": "",
|
| 167542 |
"median_rank": "",
|
| 167543 |
"mean_rank": "",
|
| 167544 |
+
"num_queries": "",
|
| 167545 |
+
"task_display_name": "Temporal Order Verification"
|
| 167546 |
},
|
| 167547 |
{
|
| 167548 |
"task": "temporal_order",
|
|
|
|
| 167578 |
"top10_accuracy": "",
|
| 167579 |
"median_rank": "",
|
| 167580 |
"mean_rank": "",
|
| 167581 |
+
"num_queries": "",
|
| 167582 |
+
"task_display_name": "Temporal Order Verification"
|
| 167583 |
},
|
| 167584 |
{
|
| 167585 |
"task": "temporal_order",
|
|
|
|
| 167615 |
"top10_accuracy": "",
|
| 167616 |
"median_rank": "",
|
| 167617 |
"mean_rank": "",
|
| 167618 |
+
"num_queries": "",
|
| 167619 |
+
"task_display_name": "Temporal Order Verification"
|
| 167620 |
},
|
| 167621 |
{
|
| 167622 |
"task": "temporal_order",
|
|
|
|
| 167652 |
"top10_accuracy": "",
|
| 167653 |
"median_rank": "",
|
| 167654 |
"mean_rank": "",
|
| 167655 |
+
"num_queries": "",
|
| 167656 |
+
"task_display_name": "Temporal Order Verification"
|
| 167657 |
},
|
| 167658 |
{
|
| 167659 |
"task": "temporal_order",
|
|
|
|
| 167689 |
"top10_accuracy": "",
|
| 167690 |
"median_rank": "",
|
| 167691 |
"mean_rank": "",
|
| 167692 |
+
"num_queries": "",
|
| 167693 |
+
"task_display_name": "Temporal Order Verification"
|
| 167694 |
},
|
| 167695 |
{
|
| 167696 |
"task": "temporal_order",
|
|
|
|
| 167726 |
"top10_accuracy": "",
|
| 167727 |
"median_rank": "",
|
| 167728 |
"mean_rank": "",
|
| 167729 |
+
"num_queries": "",
|
| 167730 |
+
"task_display_name": "Temporal Order Verification"
|
| 167731 |
},
|
| 167732 |
{
|
| 167733 |
"task": "temporal_order",
|
|
|
|
| 167763 |
"top10_accuracy": "",
|
| 167764 |
"median_rank": "",
|
| 167765 |
"mean_rank": "",
|
| 167766 |
+
"num_queries": "",
|
| 167767 |
+
"task_display_name": "Temporal Order Verification"
|
| 167768 |
},
|
| 167769 |
{
|
| 167770 |
"task": "temporal_order",
|
|
|
|
| 167800 |
"top10_accuracy": "",
|
| 167801 |
"median_rank": "",
|
| 167802 |
"mean_rank": "",
|
| 167803 |
+
"num_queries": "",
|
| 167804 |
+
"task_display_name": "Temporal Order Verification"
|
| 167805 |
},
|
| 167806 |
{
|
| 167807 |
"task": "temporal_order",
|
|
|
|
| 167837 |
"top10_accuracy": "",
|
| 167838 |
"median_rank": "",
|
| 167839 |
"mean_rank": "",
|
| 167840 |
+
"num_queries": "",
|
| 167841 |
+
"task_display_name": "Temporal Order Verification"
|
| 167842 |
},
|
| 167843 |
{
|
| 167844 |
"task": "misalignment_detection",
|
|
|
|
| 167874 |
"top10_accuracy": "",
|
| 167875 |
"median_rank": "",
|
| 167876 |
"mean_rank": "",
|
| 167877 |
+
"num_queries": "",
|
| 167878 |
+
"task_display_name": "Multimodal Synchronization Detection"
|
| 167879 |
},
|
| 167880 |
{
|
| 167881 |
"task": "misalignment_detection",
|
|
|
|
| 167911 |
"top10_accuracy": "",
|
| 167912 |
"median_rank": "",
|
| 167913 |
"mean_rank": "",
|
| 167914 |
+
"num_queries": "",
|
| 167915 |
+
"task_display_name": "Multimodal Synchronization Detection"
|
| 167916 |
},
|
| 167917 |
{
|
| 167918 |
"task": "misalignment_detection",
|
|
|
|
| 167948 |
"top10_accuracy": "",
|
| 167949 |
"median_rank": "",
|
| 167950 |
"mean_rank": "",
|
| 167951 |
+
"num_queries": "",
|
| 167952 |
+
"task_display_name": "Multimodal Synchronization Detection"
|
| 167953 |
},
|
| 167954 |
{
|
| 167955 |
"task": "misalignment_detection",
|
|
|
|
| 167985 |
"top10_accuracy": "",
|
| 167986 |
"median_rank": "",
|
| 167987 |
"mean_rank": "",
|
| 167988 |
+
"num_queries": "",
|
| 167989 |
+
"task_display_name": "Multimodal Synchronization Detection"
|
| 167990 |
},
|
| 167991 |
{
|
| 167992 |
"task": "misalignment_detection",
|
|
|
|
| 168022 |
"top10_accuracy": "",
|
| 168023 |
"median_rank": "",
|
| 168024 |
"mean_rank": "",
|
| 168025 |
+
"num_queries": "",
|
| 168026 |
+
"task_display_name": "Multimodal Synchronization Detection"
|
| 168027 |
},
|
| 168028 |
{
|
| 168029 |
"task": "misalignment_detection",
|
|
|
|
| 168059 |
"top10_accuracy": "",
|
| 168060 |
"median_rank": "",
|
| 168061 |
"mean_rank": "",
|
| 168062 |
+
"num_queries": "",
|
| 168063 |
+
"task_display_name": "Multimodal Synchronization Detection"
|
| 168064 |
},
|
| 168065 |
{
|
| 168066 |
"task": "misalignment_detection",
|
|
|
|
| 168096 |
"top10_accuracy": "",
|
| 168097 |
"median_rank": "",
|
| 168098 |
"mean_rank": "",
|
| 168099 |
+
"num_queries": "",
|
| 168100 |
+
"task_display_name": "Multimodal Synchronization Detection"
|
| 168101 |
},
|
| 168102 |
{
|
| 168103 |
"task": "misalignment_detection",
|
|
|
|
| 168133 |
"top10_accuracy": "",
|
| 168134 |
"median_rank": "",
|
| 168135 |
"mean_rank": "",
|
| 168136 |
+
"num_queries": "",
|
| 168137 |
+
"task_display_name": "Multimodal Synchronization Detection"
|
| 168138 |
},
|
| 168139 |
{
|
| 168140 |
"task": "misalignment_detection",
|
|
|
|
| 168170 |
"top10_accuracy": "",
|
| 168171 |
"median_rank": "",
|
| 168172 |
"mean_rank": "",
|
| 168173 |
+
"num_queries": "",
|
| 168174 |
+
"task_display_name": "Multimodal Synchronization Detection"
|
| 168175 |
}
|
| 168176 |
]
|
| 168177 |
},
|
|
|
|
| 168987 |
"num_queries": "308"
|
| 168988 |
}
|
| 168989 |
]
|
| 168990 |
+
}
|
metrics/summary_metrics.json
CHANGED
|
@@ -1,27 +1,12 @@
|
|
| 1 |
{
|
| 2 |
"omni_relay": {
|
| 3 |
-
"status": "
|
| 4 |
"dataset": "ropedia-ai/xperience-10m",
|
| 5 |
-
"staging": "
|
| 6 |
-
"training_target": "
|
| 7 |
"selection_strategy": "stratified_round_robin_by_top_level_session",
|
| 8 |
"target_episodes": 128,
|
| 9 |
"selected_sessions": 128,
|
| 10 |
-
"selected_split_counts": {
|
| 11 |
-
"train": 96,
|
| 12 |
-
"val": 16,
|
| 13 |
-
"test": 16
|
| 14 |
-
},
|
| 15 |
-
"exported_window_counts": {
|
| 16 |
-
"train": 2848,
|
| 17 |
-
"val": 512,
|
| 18 |
-
"test": 448
|
| 19 |
-
},
|
| 20 |
-
"held_out_episode_count": 14,
|
| 21 |
-
"held_out_test_windows": 448,
|
| 22 |
-
"json_validity_rate": 0.875,
|
| 23 |
-
"action_macro_f1": 0.0026621494447581404,
|
| 24 |
-
"quality_target_met": false,
|
| 25 |
"candidate_scan_top_level_sessions": 802,
|
| 26 |
"valid_candidates": 12102,
|
| 27 |
"estimated_bytes": 298188841943,
|
|
@@ -29,16 +14,7 @@
|
|
| 29 |
"visualization.rrd"
|
| 30 |
],
|
| 31 |
"access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
|
| 32 |
-
"current_scope": "The selected-episode Qwen3-Omni
|
| 33 |
-
"validation_samples_used": 512,
|
| 34 |
-
"train_loss": 0.41304643672440994,
|
| 35 |
-
"val_loss": 0.0330660454928875,
|
| 36 |
-
"num_val_samples": 512,
|
| 37 |
-
"subtask_accuracy": 0.006696428571428571,
|
| 38 |
-
"transition_accuracy": 0.8504464285714286,
|
| 39 |
-
"next_action_accuracy": 0.024553571428571428,
|
| 40 |
-
"contact_accuracy": 0.6450892857142857,
|
| 41 |
-
"object_micro_f1": 0.22299431459254582
|
| 42 |
},
|
| 43 |
"models": {
|
| 44 |
"motion_action": {
|
|
@@ -120,7 +96,8 @@
|
|
| 120 |
"Pour coffee",
|
| 121 |
"Pour milk into coffee",
|
| 122 |
"Wait/Prepare for pouring"
|
| 123 |
-
]
|
|
|
|
| 124 |
},
|
| 125 |
"timeline_subtask": {
|
| 126 |
"accuracy": 0.05813953488372093,
|
|
@@ -144,7 +121,8 @@
|
|
| 144 |
"Pour coffee",
|
| 145 |
"Pour milk into coffee",
|
| 146 |
"Prepare for pouring"
|
| 147 |
-
]
|
|
|
|
| 148 |
},
|
| 149 |
"transition_detection": {
|
| 150 |
"accuracy": 0.9080459770114943,
|
|
@@ -170,7 +148,8 @@
|
|
| 170 |
"matched_boundaries": 2,
|
| 171 |
"true_boundaries": 4,
|
| 172 |
"predicted_boundaries": 28,
|
| 173 |
-
"mean_abs_timing_error_frames": 3.5
|
|
|
|
| 174 |
},
|
| 175 |
"next_action": {
|
| 176 |
"accuracy": 0.034482758620689655,
|
|
@@ -194,7 +173,8 @@
|
|
| 194 |
"Pour coffee",
|
| 195 |
"Pour milk into coffee",
|
| 196 |
"Wait/Prepare for pouring"
|
| 197 |
-
]
|
|
|
|
| 198 |
},
|
| 199 |
"hand_trajectory_forecast": {
|
| 200 |
"mse": 14.956222534179688,
|
|
@@ -209,7 +189,8 @@
|
|
| 209 |
"forecast_frames": 10,
|
| 210 |
"mpjpe": 0.8646570444107056,
|
| 211 |
"final_frame_mpjpe": 1.0330793857574463,
|
| 212 |
-
"target_dim": 1260
|
|
|
|
| 213 |
},
|
| 214 |
"contact_prediction": {
|
| 215 |
"accuracy": 1.0,
|
|
@@ -228,7 +209,8 @@
|
|
| 228 |
"majority_baseline_accuracy": 1.0,
|
| 229 |
"train_final_accuracy": 1.0,
|
| 230 |
"train_final_loss": 0.0006056802230887115,
|
| 231 |
-
"unseen_test_classes": []
|
|
|
|
| 232 |
},
|
| 233 |
"object_relevance": {
|
| 234 |
"micro_f1": 0.18034382095361662,
|
|
@@ -242,7 +224,8 @@
|
|
| 242 |
"num_windows": 1161,
|
| 243 |
"num_train_windows": 813,
|
| 244 |
"num_test_windows": 348,
|
| 245 |
-
"num_objects": 34
|
|
|
|
| 246 |
},
|
| 247 |
"caption_grounding": {
|
| 248 |
"mrr": 0.016023479050338015,
|
|
@@ -257,7 +240,8 @@
|
|
| 257 |
"output": "matching time window",
|
| 258 |
"split": "chronological",
|
| 259 |
"num_train_windows": 813,
|
| 260 |
-
"num_test_windows": 348
|
|
|
|
| 261 |
},
|
| 262 |
"cross_modal_retrieval": {
|
| 263 |
"mrr": 0.26925966892956127,
|
|
@@ -272,7 +256,8 @@
|
|
| 272 |
"output": "matching depth/video window",
|
| 273 |
"split": "chronological",
|
| 274 |
"num_train_windows": 813,
|
| 275 |
-
"num_test_windows": 348
|
|
|
|
| 276 |
},
|
| 277 |
"modality_reconstruction": {
|
| 278 |
"mse": 1358.1593017578125,
|
|
@@ -284,7 +269,8 @@
|
|
| 284 |
"split": "chronological",
|
| 285 |
"num_train_windows": 813,
|
| 286 |
"num_test_windows": 348,
|
| 287 |
-
"target_dim": 5096
|
|
|
|
| 288 |
},
|
| 289 |
"temporal_order": {
|
| 290 |
"accuracy": 0.4540229885057471,
|
|
@@ -303,7 +289,8 @@
|
|
| 303 |
"num_samples": 2320,
|
| 304 |
"num_train_samples": 1624,
|
| 305 |
"num_test_samples": 696,
|
| 306 |
-
"train_final_accuracy": 0.5086206896551724
|
|
|
|
| 307 |
},
|
| 308 |
"misalignment_detection": {
|
| 309 |
"accuracy": 0.5158959537572254,
|
|
@@ -322,7 +309,8 @@
|
|
| 322 |
"num_samples": 2306,
|
| 323 |
"num_train_samples": 1614,
|
| 324 |
"num_test_samples": 692,
|
| 325 |
-
"train_final_accuracy": 0.49380421313506817
|
|
|
|
| 326 |
}
|
| 327 |
},
|
| 328 |
"neural_model": {
|
|
@@ -368,7 +356,8 @@
|
|
| 368 |
"neural_dropout": 0.1,
|
| 369 |
"neural_device": "cpu",
|
| 370 |
"train_final_loss": 0.04246756529782,
|
| 371 |
-
"train_final_accuracy": 0.9875156054931336
|
|
|
|
| 372 |
},
|
| 373 |
"timeline_subtask": {
|
| 374 |
"accuracy": 0.0377906976744186,
|
|
@@ -401,7 +390,8 @@
|
|
| 401 |
"neural_dropout": 0.1,
|
| 402 |
"neural_device": "cpu",
|
| 403 |
"train_final_loss": 5.4104819144748596e-05,
|
| 404 |
-
"train_final_accuracy": 1.0
|
|
|
|
| 405 |
},
|
| 406 |
"transition_detection": {
|
| 407 |
"accuracy": 0.8735632183908046,
|
|
@@ -436,7 +426,8 @@
|
|
| 436 |
"matched_boundaries": 3,
|
| 437 |
"true_boundaries": 4,
|
| 438 |
"predicted_boundaries": 42,
|
| 439 |
-
"mean_abs_timing_error_frames": 2.6666666666666665
|
|
|
|
| 440 |
},
|
| 441 |
"next_action": {
|
| 442 |
"accuracy": 0.02586206896551724,
|
|
@@ -469,7 +460,8 @@
|
|
| 469 |
"neural_dropout": 0.1,
|
| 470 |
"neural_device": "cpu",
|
| 471 |
"train_final_loss": 0.000416612956025105,
|
| 472 |
-
"train_final_accuracy": 1.0
|
|
|
|
| 473 |
},
|
| 474 |
"hand_trajectory_forecast": {
|
| 475 |
"mse": 0.004775360692292452,
|
|
@@ -494,7 +486,8 @@
|
|
| 494 |
"neural_weight_decay": 0.0001,
|
| 495 |
"neural_dropout": 0.1,
|
| 496 |
"neural_device": "cpu",
|
| 497 |
-
"train_final_loss": 0.055699273420247435
|
|
|
|
| 498 |
},
|
| 499 |
"contact_prediction": {
|
| 500 |
"accuracy": 1.0,
|
|
@@ -522,7 +515,8 @@
|
|
| 522 |
"neural_dropout": 0.1,
|
| 523 |
"neural_device": "cpu",
|
| 524 |
"train_final_loss": 0.0,
|
| 525 |
-
"train_final_accuracy": 1.0
|
|
|
|
| 526 |
},
|
| 527 |
"object_relevance": {
|
| 528 |
"micro_f1": 0.1679279279279279,
|
|
@@ -547,7 +541,8 @@
|
|
| 547 |
"neural_weight_decay": 0.0001,
|
| 548 |
"neural_dropout": 0.1,
|
| 549 |
"neural_device": "cpu",
|
| 550 |
-
"train_final_loss": 0.003651880362182214
|
|
|
|
| 551 |
},
|
| 552 |
"caption_grounding": {
|
| 553 |
"mrr": 0.01684125567132316,
|
|
@@ -573,7 +568,8 @@
|
|
| 573 |
"neural_weight_decay": 0.0001,
|
| 574 |
"neural_dropout": 0.1,
|
| 575 |
"neural_device": "cpu",
|
| 576 |
-
"train_final_loss": 0.06317874967483723
|
|
|
|
| 577 |
},
|
| 578 |
"cross_modal_retrieval": {
|
| 579 |
"mrr": 0.1299971898648288,
|
|
@@ -599,7 +595,8 @@
|
|
| 599 |
"neural_weight_decay": 0.0001,
|
| 600 |
"neural_dropout": 0.1,
|
| 601 |
"neural_device": "cpu",
|
| 602 |
-
"train_final_loss": 0.21891545446596464
|
|
|
|
| 603 |
},
|
| 604 |
"modality_reconstruction": {
|
| 605 |
"mse": 1351.3363037109375,
|
|
@@ -621,7 +618,8 @@
|
|
| 621 |
"neural_weight_decay": 0.0001,
|
| 622 |
"neural_dropout": 0.1,
|
| 623 |
"neural_device": "cpu",
|
| 624 |
-
"train_final_loss": 0.21891545446596464
|
|
|
|
| 625 |
},
|
| 626 |
"temporal_order": {
|
| 627 |
"accuracy": 0.8577586206896551,
|
|
@@ -651,7 +649,8 @@
|
|
| 651 |
"neural_dropout": 0.1,
|
| 652 |
"neural_device": "cpu",
|
| 653 |
"train_final_loss": 0.0005108328477586757,
|
| 654 |
-
"train_final_accuracy": 1.0
|
|
|
|
| 655 |
},
|
| 656 |
"misalignment_detection": {
|
| 657 |
"accuracy": 0.7008670520231214,
|
|
@@ -681,8 +680,23 @@
|
|
| 681 |
"neural_dropout": 0.1,
|
| 682 |
"neural_device": "cpu",
|
| 683 |
"train_final_loss": 0.010604870708167664,
|
| 684 |
-
"train_final_accuracy": 0.9956629491945477
|
|
|
|
| 685 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
}
|
| 687 |
},
|
| 688 |
"feature_manifest": [
|
|
|
|
| 1 |
{
|
| 2 |
"omni_relay": {
|
| 3 |
+
"status": "verified_full_128_episode_diagnostic_result",
|
| 4 |
"dataset": "ropedia-ai/xperience-10m",
|
| 5 |
+
"staging": "verified_public_package_and_adapter_publication",
|
| 6 |
+
"training_target": "action_subtask_quality_and_unseen_label_error_analysis",
|
| 7 |
"selection_strategy": "stratified_round_robin_by_top_level_session",
|
| 8 |
"target_episodes": 128,
|
| 9 |
"selected_sessions": 128,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
"candidate_scan_top_level_sessions": 802,
|
| 11 |
"valid_candidates": 12102,
|
| 12 |
"estimated_bytes": 298188841943,
|
|
|
|
| 14 |
"visualization.rrd"
|
| 15 |
],
|
| 16 |
"access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
|
| 17 |
+
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
},
|
| 19 |
"models": {
|
| 20 |
"motion_action": {
|
|
|
|
| 96 |
"Pour coffee",
|
| 97 |
"Pour milk into coffee",
|
| 98 |
"Wait/Prepare for pouring"
|
| 99 |
+
],
|
| 100 |
+
"task_display_name": "Action Recognition"
|
| 101 |
},
|
| 102 |
"timeline_subtask": {
|
| 103 |
"accuracy": 0.05813953488372093,
|
|
|
|
| 121 |
"Pour coffee",
|
| 122 |
"Pour milk into coffee",
|
| 123 |
"Prepare for pouring"
|
| 124 |
+
],
|
| 125 |
+
"task_display_name": "Procedure Step Recognition"
|
| 126 |
},
|
| 127 |
"transition_detection": {
|
| 128 |
"accuracy": 0.9080459770114943,
|
|
|
|
| 148 |
"matched_boundaries": 2,
|
| 149 |
"true_boundaries": 4,
|
| 150 |
"predicted_boundaries": 28,
|
| 151 |
+
"mean_abs_timing_error_frames": 3.5,
|
| 152 |
+
"task_display_name": "Action Boundary Detection"
|
| 153 |
},
|
| 154 |
"next_action": {
|
| 155 |
"accuracy": 0.034482758620689655,
|
|
|
|
| 173 |
"Pour coffee",
|
| 174 |
"Pour milk into coffee",
|
| 175 |
"Wait/Prepare for pouring"
|
| 176 |
+
],
|
| 177 |
+
"task_display_name": "Next-Action Prediction"
|
| 178 |
},
|
| 179 |
"hand_trajectory_forecast": {
|
| 180 |
"mse": 14.956222534179688,
|
|
|
|
| 189 |
"forecast_frames": 10,
|
| 190 |
"mpjpe": 0.8646570444107056,
|
| 191 |
"final_frame_mpjpe": 1.0330793857574463,
|
| 192 |
+
"target_dim": 1260,
|
| 193 |
+
"task_display_name": "Hand Trajectory Forecasting"
|
| 194 |
},
|
| 195 |
"contact_prediction": {
|
| 196 |
"accuracy": 1.0,
|
|
|
|
| 209 |
"majority_baseline_accuracy": 1.0,
|
| 210 |
"train_final_accuracy": 1.0,
|
| 211 |
"train_final_loss": 0.0006056802230887115,
|
| 212 |
+
"unseen_test_classes": [],
|
| 213 |
+
"task_display_name": "Contact State Prediction"
|
| 214 |
},
|
| 215 |
"object_relevance": {
|
| 216 |
"micro_f1": 0.18034382095361662,
|
|
|
|
| 224 |
"num_windows": 1161,
|
| 225 |
"num_train_windows": 813,
|
| 226 |
"num_test_windows": 348,
|
| 227 |
+
"num_objects": 34,
|
| 228 |
+
"task_display_name": "Object Relevance Prediction"
|
| 229 |
},
|
| 230 |
"caption_grounding": {
|
| 231 |
"mrr": 0.016023479050338015,
|
|
|
|
| 240 |
"output": "matching time window",
|
| 241 |
"split": "chronological",
|
| 242 |
"num_train_windows": 813,
|
| 243 |
+
"num_test_windows": 348,
|
| 244 |
+
"task_display_name": "Language Grounding"
|
| 245 |
},
|
| 246 |
"cross_modal_retrieval": {
|
| 247 |
"mrr": 0.26925966892956127,
|
|
|
|
| 256 |
"output": "matching depth/video window",
|
| 257 |
"split": "chronological",
|
| 258 |
"num_train_windows": 813,
|
| 259 |
+
"num_test_windows": 348,
|
| 260 |
+
"task_display_name": "Cross-Modal Retrieval"
|
| 261 |
},
|
| 262 |
"modality_reconstruction": {
|
| 263 |
"mse": 1358.1593017578125,
|
|
|
|
| 269 |
"split": "chronological",
|
| 270 |
"num_train_windows": 813,
|
| 271 |
"num_test_windows": 348,
|
| 272 |
+
"target_dim": 5096,
|
| 273 |
+
"task_display_name": "Cross-Modal Reconstruction"
|
| 274 |
},
|
| 275 |
"temporal_order": {
|
| 276 |
"accuracy": 0.4540229885057471,
|
|
|
|
| 289 |
"num_samples": 2320,
|
| 290 |
"num_train_samples": 1624,
|
| 291 |
"num_test_samples": 696,
|
| 292 |
+
"train_final_accuracy": 0.5086206896551724,
|
| 293 |
+
"task_display_name": "Temporal Order Verification"
|
| 294 |
},
|
| 295 |
"misalignment_detection": {
|
| 296 |
"accuracy": 0.5158959537572254,
|
|
|
|
| 309 |
"num_samples": 2306,
|
| 310 |
"num_train_samples": 1614,
|
| 311 |
"num_test_samples": 692,
|
| 312 |
+
"train_final_accuracy": 0.49380421313506817,
|
| 313 |
+
"task_display_name": "Multimodal Synchronization Detection"
|
| 314 |
}
|
| 315 |
},
|
| 316 |
"neural_model": {
|
|
|
|
| 356 |
"neural_dropout": 0.1,
|
| 357 |
"neural_device": "cpu",
|
| 358 |
"train_final_loss": 0.04246756529782,
|
| 359 |
+
"train_final_accuracy": 0.9875156054931336,
|
| 360 |
+
"task_display_name": "Action Recognition"
|
| 361 |
},
|
| 362 |
"timeline_subtask": {
|
| 363 |
"accuracy": 0.0377906976744186,
|
|
|
|
| 390 |
"neural_dropout": 0.1,
|
| 391 |
"neural_device": "cpu",
|
| 392 |
"train_final_loss": 5.4104819144748596e-05,
|
| 393 |
+
"train_final_accuracy": 1.0,
|
| 394 |
+
"task_display_name": "Procedure Step Recognition"
|
| 395 |
},
|
| 396 |
"transition_detection": {
|
| 397 |
"accuracy": 0.8735632183908046,
|
|
|
|
| 426 |
"matched_boundaries": 3,
|
| 427 |
"true_boundaries": 4,
|
| 428 |
"predicted_boundaries": 42,
|
| 429 |
+
"mean_abs_timing_error_frames": 2.6666666666666665,
|
| 430 |
+
"task_display_name": "Action Boundary Detection"
|
| 431 |
},
|
| 432 |
"next_action": {
|
| 433 |
"accuracy": 0.02586206896551724,
|
|
|
|
| 460 |
"neural_dropout": 0.1,
|
| 461 |
"neural_device": "cpu",
|
| 462 |
"train_final_loss": 0.000416612956025105,
|
| 463 |
+
"train_final_accuracy": 1.0,
|
| 464 |
+
"task_display_name": "Next-Action Prediction"
|
| 465 |
},
|
| 466 |
"hand_trajectory_forecast": {
|
| 467 |
"mse": 0.004775360692292452,
|
|
|
|
| 486 |
"neural_weight_decay": 0.0001,
|
| 487 |
"neural_dropout": 0.1,
|
| 488 |
"neural_device": "cpu",
|
| 489 |
+
"train_final_loss": 0.055699273420247435,
|
| 490 |
+
"task_display_name": "Hand Trajectory Forecasting"
|
| 491 |
},
|
| 492 |
"contact_prediction": {
|
| 493 |
"accuracy": 1.0,
|
|
|
|
| 515 |
"neural_dropout": 0.1,
|
| 516 |
"neural_device": "cpu",
|
| 517 |
"train_final_loss": 0.0,
|
| 518 |
+
"train_final_accuracy": 1.0,
|
| 519 |
+
"task_display_name": "Contact State Prediction"
|
| 520 |
},
|
| 521 |
"object_relevance": {
|
| 522 |
"micro_f1": 0.1679279279279279,
|
|
|
|
| 541 |
"neural_weight_decay": 0.0001,
|
| 542 |
"neural_dropout": 0.1,
|
| 543 |
"neural_device": "cpu",
|
| 544 |
+
"train_final_loss": 0.003651880362182214,
|
| 545 |
+
"task_display_name": "Object Relevance Prediction"
|
| 546 |
},
|
| 547 |
"caption_grounding": {
|
| 548 |
"mrr": 0.01684125567132316,
|
|
|
|
| 568 |
"neural_weight_decay": 0.0001,
|
| 569 |
"neural_dropout": 0.1,
|
| 570 |
"neural_device": "cpu",
|
| 571 |
+
"train_final_loss": 0.06317874967483723,
|
| 572 |
+
"task_display_name": "Language Grounding"
|
| 573 |
},
|
| 574 |
"cross_modal_retrieval": {
|
| 575 |
"mrr": 0.1299971898648288,
|
|
|
|
| 595 |
"neural_weight_decay": 0.0001,
|
| 596 |
"neural_dropout": 0.1,
|
| 597 |
"neural_device": "cpu",
|
| 598 |
+
"train_final_loss": 0.21891545446596464,
|
| 599 |
+
"task_display_name": "Cross-Modal Retrieval"
|
| 600 |
},
|
| 601 |
"modality_reconstruction": {
|
| 602 |
"mse": 1351.3363037109375,
|
|
|
|
| 618 |
"neural_weight_decay": 0.0001,
|
| 619 |
"neural_dropout": 0.1,
|
| 620 |
"neural_device": "cpu",
|
| 621 |
+
"train_final_loss": 0.21891545446596464,
|
| 622 |
+
"task_display_name": "Cross-Modal Reconstruction"
|
| 623 |
},
|
| 624 |
"temporal_order": {
|
| 625 |
"accuracy": 0.8577586206896551,
|
|
|
|
| 649 |
"neural_dropout": 0.1,
|
| 650 |
"neural_device": "cpu",
|
| 651 |
"train_final_loss": 0.0005108328477586757,
|
| 652 |
+
"train_final_accuracy": 1.0,
|
| 653 |
+
"task_display_name": "Temporal Order Verification"
|
| 654 |
},
|
| 655 |
"misalignment_detection": {
|
| 656 |
"accuracy": 0.7008670520231214,
|
|
|
|
| 680 |
"neural_dropout": 0.1,
|
| 681 |
"neural_device": "cpu",
|
| 682 |
"train_final_loss": 0.010604870708167664,
|
| 683 |
+
"train_final_accuracy": 0.9956629491945477,
|
| 684 |
+
"task_display_name": "Multimodal Synchronization Detection"
|
| 685 |
}
|
| 686 |
+
},
|
| 687 |
+
"task_display_names": {
|
| 688 |
+
"timeline_action": "Action Recognition",
|
| 689 |
+
"timeline_subtask": "Procedure Step Recognition",
|
| 690 |
+
"transition_detection": "Action Boundary Detection",
|
| 691 |
+
"next_action": "Next-Action Prediction",
|
| 692 |
+
"hand_trajectory_forecast": "Hand Trajectory Forecasting",
|
| 693 |
+
"contact_prediction": "Contact State Prediction",
|
| 694 |
+
"object_relevance": "Object Relevance Prediction",
|
| 695 |
+
"caption_grounding": "Language Grounding",
|
| 696 |
+
"cross_modal_retrieval": "Cross-Modal Retrieval",
|
| 697 |
+
"modality_reconstruction": "Cross-Modal Reconstruction",
|
| 698 |
+
"temporal_order": "Temporal Order Verification",
|
| 699 |
+
"misalignment_detection": "Multimodal Synchronization Detection"
|
| 700 |
}
|
| 701 |
},
|
| 702 |
"feature_manifest": [
|
metrics/task_surface_integrity.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"summary": {
|
| 5 |
"task_count": 12,
|
| 6 |
"expected_task_count": 12,
|
|
@@ -64,9 +64,9 @@
|
|
| 64 |
"observed": "timeline_action"
|
| 65 |
},
|
| 66 |
{
|
| 67 |
-
"name": "timeline_action:
|
| 68 |
"status": "pass",
|
| 69 |
-
"value": "
|
| 70 |
"raw_hits": []
|
| 71 |
},
|
| 72 |
{
|
|
@@ -76,15 +76,15 @@
|
|
| 76 |
"raw_hits": []
|
| 77 |
},
|
| 78 |
{
|
| 79 |
-
"name": "timeline_action:
|
| 80 |
"status": "pass",
|
| 81 |
-
"value": "
|
| 82 |
"raw_hits": []
|
| 83 |
},
|
| 84 |
{
|
| 85 |
-
"name": "timeline_action:
|
| 86 |
"status": "pass",
|
| 87 |
-
"value": "
|
| 88 |
"raw_hits": []
|
| 89 |
},
|
| 90 |
{
|
|
@@ -94,15 +94,15 @@
|
|
| 94 |
"raw_hits": []
|
| 95 |
},
|
| 96 |
{
|
| 97 |
-
"name": "timeline_action:
|
| 98 |
"status": "pass",
|
| 99 |
-
"value": "
|
| 100 |
"raw_hits": []
|
| 101 |
},
|
| 102 |
{
|
| 103 |
-
"name": "timeline_action:
|
| 104 |
"status": "pass",
|
| 105 |
-
"value": "
|
| 106 |
"raw_hits": []
|
| 107 |
},
|
| 108 |
{
|
|
@@ -184,9 +184,9 @@
|
|
| 184 |
"observed": "timeline_subtask"
|
| 185 |
},
|
| 186 |
{
|
| 187 |
-
"name": "timeline_subtask:
|
| 188 |
"status": "pass",
|
| 189 |
-
"value": "
|
| 190 |
"raw_hits": []
|
| 191 |
},
|
| 192 |
{
|
|
@@ -196,15 +196,15 @@
|
|
| 196 |
"raw_hits": []
|
| 197 |
},
|
| 198 |
{
|
| 199 |
-
"name": "timeline_subtask:
|
| 200 |
"status": "pass",
|
| 201 |
-
"value": "
|
| 202 |
"raw_hits": []
|
| 203 |
},
|
| 204 |
{
|
| 205 |
-
"name": "timeline_subtask:
|
| 206 |
"status": "pass",
|
| 207 |
-
"value": "
|
| 208 |
"raw_hits": []
|
| 209 |
},
|
| 210 |
{
|
|
@@ -214,15 +214,15 @@
|
|
| 214 |
"raw_hits": []
|
| 215 |
},
|
| 216 |
{
|
| 217 |
-
"name": "timeline_subtask:
|
| 218 |
"status": "pass",
|
| 219 |
-
"value": "
|
| 220 |
"raw_hits": []
|
| 221 |
},
|
| 222 |
{
|
| 223 |
-
"name": "timeline_subtask:
|
| 224 |
"status": "pass",
|
| 225 |
-
"value": "
|
| 226 |
"raw_hits": []
|
| 227 |
},
|
| 228 |
{
|
|
@@ -304,9 +304,9 @@
|
|
| 304 |
"observed": "transition_detection"
|
| 305 |
},
|
| 306 |
{
|
| 307 |
-
"name": "transition_detection:
|
| 308 |
"status": "pass",
|
| 309 |
-
"value": "
|
| 310 |
"raw_hits": []
|
| 311 |
},
|
| 312 |
{
|
|
@@ -316,15 +316,15 @@
|
|
| 316 |
"raw_hits": []
|
| 317 |
},
|
| 318 |
{
|
| 319 |
-
"name": "transition_detection:
|
| 320 |
"status": "pass",
|
| 321 |
-
"value": "
|
| 322 |
"raw_hits": []
|
| 323 |
},
|
| 324 |
{
|
| 325 |
-
"name": "transition_detection:
|
| 326 |
"status": "pass",
|
| 327 |
-
"value": "
|
| 328 |
"raw_hits": []
|
| 329 |
},
|
| 330 |
{
|
|
@@ -334,15 +334,15 @@
|
|
| 334 |
"raw_hits": []
|
| 335 |
},
|
| 336 |
{
|
| 337 |
-
"name": "transition_detection:
|
| 338 |
"status": "pass",
|
| 339 |
-
"value": "
|
| 340 |
"raw_hits": []
|
| 341 |
},
|
| 342 |
{
|
| 343 |
-
"name": "transition_detection:
|
| 344 |
"status": "pass",
|
| 345 |
-
"value": "
|
| 346 |
"raw_hits": []
|
| 347 |
},
|
| 348 |
{
|
|
@@ -422,9 +422,9 @@
|
|
| 422 |
"observed": "next_action"
|
| 423 |
},
|
| 424 |
{
|
| 425 |
-
"name": "next_action:
|
| 426 |
"status": "pass",
|
| 427 |
-
"value": "
|
| 428 |
"raw_hits": []
|
| 429 |
},
|
| 430 |
{
|
|
@@ -434,15 +434,15 @@
|
|
| 434 |
"raw_hits": []
|
| 435 |
},
|
| 436 |
{
|
| 437 |
-
"name": "next_action:
|
| 438 |
"status": "pass",
|
| 439 |
-
"value": "
|
| 440 |
"raw_hits": []
|
| 441 |
},
|
| 442 |
{
|
| 443 |
-
"name": "next_action:
|
| 444 |
"status": "pass",
|
| 445 |
-
"value": "
|
| 446 |
"raw_hits": []
|
| 447 |
},
|
| 448 |
{
|
|
@@ -452,15 +452,15 @@
|
|
| 452 |
"raw_hits": []
|
| 453 |
},
|
| 454 |
{
|
| 455 |
-
"name": "next_action:
|
| 456 |
"status": "pass",
|
| 457 |
-
"value": "
|
| 458 |
"raw_hits": []
|
| 459 |
},
|
| 460 |
{
|
| 461 |
-
"name": "next_action:
|
| 462 |
"status": "pass",
|
| 463 |
-
"value": "
|
| 464 |
"raw_hits": []
|
| 465 |
},
|
| 466 |
{
|
|
@@ -540,9 +540,9 @@
|
|
| 540 |
"observed": "hand_trajectory_forecast"
|
| 541 |
},
|
| 542 |
{
|
| 543 |
-
"name": "hand_trajectory_forecast:
|
| 544 |
"status": "pass",
|
| 545 |
-
"value": "
|
| 546 |
"raw_hits": []
|
| 547 |
},
|
| 548 |
{
|
|
@@ -552,15 +552,15 @@
|
|
| 552 |
"raw_hits": []
|
| 553 |
},
|
| 554 |
{
|
| 555 |
-
"name": "hand_trajectory_forecast:
|
| 556 |
"status": "pass",
|
| 557 |
-
"value": "
|
| 558 |
"raw_hits": []
|
| 559 |
},
|
| 560 |
{
|
| 561 |
-
"name": "hand_trajectory_forecast:
|
| 562 |
"status": "pass",
|
| 563 |
-
"value": "
|
| 564 |
"raw_hits": []
|
| 565 |
},
|
| 566 |
{
|
|
@@ -570,15 +570,15 @@
|
|
| 570 |
"raw_hits": []
|
| 571 |
},
|
| 572 |
{
|
| 573 |
-
"name": "hand_trajectory_forecast:
|
| 574 |
"status": "pass",
|
| 575 |
-
"value": "
|
| 576 |
"raw_hits": []
|
| 577 |
},
|
| 578 |
{
|
| 579 |
-
"name": "hand_trajectory_forecast:
|
| 580 |
"status": "pass",
|
| 581 |
-
"value": "
|
| 582 |
"raw_hits": []
|
| 583 |
},
|
| 584 |
{
|
|
@@ -658,9 +658,9 @@
|
|
| 658 |
"observed": "contact_prediction"
|
| 659 |
},
|
| 660 |
{
|
| 661 |
-
"name": "contact_prediction:
|
| 662 |
"status": "pass",
|
| 663 |
-
"value": "
|
| 664 |
"raw_hits": []
|
| 665 |
},
|
| 666 |
{
|
|
@@ -670,15 +670,15 @@
|
|
| 670 |
"raw_hits": []
|
| 671 |
},
|
| 672 |
{
|
| 673 |
-
"name": "contact_prediction:
|
| 674 |
"status": "pass",
|
| 675 |
-
"value": "
|
| 676 |
"raw_hits": []
|
| 677 |
},
|
| 678 |
{
|
| 679 |
-
"name": "contact_prediction:
|
| 680 |
"status": "pass",
|
| 681 |
-
"value": "
|
| 682 |
"raw_hits": []
|
| 683 |
},
|
| 684 |
{
|
|
@@ -688,15 +688,15 @@
|
|
| 688 |
"raw_hits": []
|
| 689 |
},
|
| 690 |
{
|
| 691 |
-
"name": "contact_prediction:
|
| 692 |
"status": "pass",
|
| 693 |
-
"value": "
|
| 694 |
"raw_hits": []
|
| 695 |
},
|
| 696 |
{
|
| 697 |
-
"name": "contact_prediction:
|
| 698 |
"status": "pass",
|
| 699 |
-
"value": "
|
| 700 |
"raw_hits": []
|
| 701 |
},
|
| 702 |
{
|
|
@@ -774,9 +774,9 @@
|
|
| 774 |
"observed": "object_relevance"
|
| 775 |
},
|
| 776 |
{
|
| 777 |
-
"name": "object_relevance:
|
| 778 |
"status": "pass",
|
| 779 |
-
"value": "
|
| 780 |
"raw_hits": []
|
| 781 |
},
|
| 782 |
{
|
|
@@ -786,15 +786,15 @@
|
|
| 786 |
"raw_hits": []
|
| 787 |
},
|
| 788 |
{
|
| 789 |
-
"name": "object_relevance:
|
| 790 |
"status": "pass",
|
| 791 |
-
"value": "
|
| 792 |
"raw_hits": []
|
| 793 |
},
|
| 794 |
{
|
| 795 |
-
"name": "object_relevance:
|
| 796 |
"status": "pass",
|
| 797 |
-
"value": "
|
| 798 |
"raw_hits": []
|
| 799 |
},
|
| 800 |
{
|
|
@@ -804,15 +804,15 @@
|
|
| 804 |
"raw_hits": []
|
| 805 |
},
|
| 806 |
{
|
| 807 |
-
"name": "object_relevance:
|
| 808 |
"status": "pass",
|
| 809 |
-
"value": "
|
| 810 |
"raw_hits": []
|
| 811 |
},
|
| 812 |
{
|
| 813 |
-
"name": "object_relevance:
|
| 814 |
"status": "pass",
|
| 815 |
-
"value": "
|
| 816 |
"raw_hits": []
|
| 817 |
},
|
| 818 |
{
|
|
@@ -892,9 +892,9 @@
|
|
| 892 |
"observed": "caption_grounding"
|
| 893 |
},
|
| 894 |
{
|
| 895 |
-
"name": "caption_grounding:
|
| 896 |
"status": "pass",
|
| 897 |
-
"value": "
|
| 898 |
"raw_hits": []
|
| 899 |
},
|
| 900 |
{
|
|
@@ -904,15 +904,15 @@
|
|
| 904 |
"raw_hits": []
|
| 905 |
},
|
| 906 |
{
|
| 907 |
-
"name": "caption_grounding:
|
| 908 |
"status": "pass",
|
| 909 |
-
"value": "
|
| 910 |
"raw_hits": []
|
| 911 |
},
|
| 912 |
{
|
| 913 |
-
"name": "caption_grounding:
|
| 914 |
"status": "pass",
|
| 915 |
-
"value": "
|
| 916 |
"raw_hits": []
|
| 917 |
},
|
| 918 |
{
|
|
@@ -922,15 +922,15 @@
|
|
| 922 |
"raw_hits": []
|
| 923 |
},
|
| 924 |
{
|
| 925 |
-
"name": "caption_grounding:
|
| 926 |
"status": "pass",
|
| 927 |
-
"value": "
|
| 928 |
"raw_hits": []
|
| 929 |
},
|
| 930 |
{
|
| 931 |
-
"name": "caption_grounding:
|
| 932 |
"status": "pass",
|
| 933 |
-
"value": "
|
| 934 |
"raw_hits": []
|
| 935 |
},
|
| 936 |
{
|
|
@@ -1008,9 +1008,9 @@
|
|
| 1008 |
"observed": "cross_modal_retrieval"
|
| 1009 |
},
|
| 1010 |
{
|
| 1011 |
-
"name": "cross_modal_retrieval:
|
| 1012 |
"status": "pass",
|
| 1013 |
-
"value": "
|
| 1014 |
"raw_hits": []
|
| 1015 |
},
|
| 1016 |
{
|
|
@@ -1020,15 +1020,15 @@
|
|
| 1020 |
"raw_hits": []
|
| 1021 |
},
|
| 1022 |
{
|
| 1023 |
-
"name": "cross_modal_retrieval:
|
| 1024 |
"status": "pass",
|
| 1025 |
-
"value": "
|
| 1026 |
"raw_hits": []
|
| 1027 |
},
|
| 1028 |
{
|
| 1029 |
-
"name": "cross_modal_retrieval:
|
| 1030 |
"status": "pass",
|
| 1031 |
-
"value": "
|
| 1032 |
"raw_hits": []
|
| 1033 |
},
|
| 1034 |
{
|
|
@@ -1038,15 +1038,15 @@
|
|
| 1038 |
"raw_hits": []
|
| 1039 |
},
|
| 1040 |
{
|
| 1041 |
-
"name": "cross_modal_retrieval:
|
| 1042 |
"status": "pass",
|
| 1043 |
-
"value": "
|
| 1044 |
"raw_hits": []
|
| 1045 |
},
|
| 1046 |
{
|
| 1047 |
-
"name": "cross_modal_retrieval:
|
| 1048 |
"status": "pass",
|
| 1049 |
-
"value": "
|
| 1050 |
"raw_hits": []
|
| 1051 |
},
|
| 1052 |
{
|
|
@@ -1126,9 +1126,9 @@
|
|
| 1126 |
"observed": "modality_reconstruction"
|
| 1127 |
},
|
| 1128 |
{
|
| 1129 |
-
"name": "modality_reconstruction:
|
| 1130 |
"status": "pass",
|
| 1131 |
-
"value": "
|
| 1132 |
"raw_hits": []
|
| 1133 |
},
|
| 1134 |
{
|
|
@@ -1138,15 +1138,15 @@
|
|
| 1138 |
"raw_hits": []
|
| 1139 |
},
|
| 1140 |
{
|
| 1141 |
-
"name": "modality_reconstruction:
|
| 1142 |
"status": "pass",
|
| 1143 |
-
"value": "
|
| 1144 |
"raw_hits": []
|
| 1145 |
},
|
| 1146 |
{
|
| 1147 |
-
"name": "modality_reconstruction:
|
| 1148 |
"status": "pass",
|
| 1149 |
-
"value": "
|
| 1150 |
"raw_hits": []
|
| 1151 |
},
|
| 1152 |
{
|
|
@@ -1156,15 +1156,15 @@
|
|
| 1156 |
"raw_hits": []
|
| 1157 |
},
|
| 1158 |
{
|
| 1159 |
-
"name": "modality_reconstruction:
|
| 1160 |
"status": "pass",
|
| 1161 |
-
"value": "
|
| 1162 |
"raw_hits": []
|
| 1163 |
},
|
| 1164 |
{
|
| 1165 |
-
"name": "modality_reconstruction:
|
| 1166 |
"status": "pass",
|
| 1167 |
-
"value": "
|
| 1168 |
"raw_hits": []
|
| 1169 |
},
|
| 1170 |
{
|
|
@@ -1244,9 +1244,9 @@
|
|
| 1244 |
"observed": "temporal_order"
|
| 1245 |
},
|
| 1246 |
{
|
| 1247 |
-
"name": "temporal_order:
|
| 1248 |
"status": "pass",
|
| 1249 |
-
"value": "
|
| 1250 |
"raw_hits": []
|
| 1251 |
},
|
| 1252 |
{
|
|
@@ -1256,15 +1256,15 @@
|
|
| 1256 |
"raw_hits": []
|
| 1257 |
},
|
| 1258 |
{
|
| 1259 |
-
"name": "temporal_order:
|
| 1260 |
"status": "pass",
|
| 1261 |
-
"value": "
|
| 1262 |
"raw_hits": []
|
| 1263 |
},
|
| 1264 |
{
|
| 1265 |
-
"name": "temporal_order:
|
| 1266 |
"status": "pass",
|
| 1267 |
-
"value": "
|
| 1268 |
"raw_hits": []
|
| 1269 |
},
|
| 1270 |
{
|
|
@@ -1274,15 +1274,15 @@
|
|
| 1274 |
"raw_hits": []
|
| 1275 |
},
|
| 1276 |
{
|
| 1277 |
-
"name": "temporal_order:
|
| 1278 |
"status": "pass",
|
| 1279 |
-
"value": "
|
| 1280 |
"raw_hits": []
|
| 1281 |
},
|
| 1282 |
{
|
| 1283 |
-
"name": "temporal_order:
|
| 1284 |
"status": "pass",
|
| 1285 |
-
"value": "
|
| 1286 |
"raw_hits": []
|
| 1287 |
},
|
| 1288 |
{
|
|
@@ -1360,9 +1360,9 @@
|
|
| 1360 |
"observed": "misalignment_detection"
|
| 1361 |
},
|
| 1362 |
{
|
| 1363 |
-
"name": "misalignment_detection:
|
| 1364 |
"status": "pass",
|
| 1365 |
-
"value": "
|
| 1366 |
"raw_hits": []
|
| 1367 |
},
|
| 1368 |
{
|
|
@@ -1372,15 +1372,15 @@
|
|
| 1372 |
"raw_hits": []
|
| 1373 |
},
|
| 1374 |
{
|
| 1375 |
-
"name": "misalignment_detection:
|
| 1376 |
"status": "pass",
|
| 1377 |
-
"value": "
|
| 1378 |
"raw_hits": []
|
| 1379 |
},
|
| 1380 |
{
|
| 1381 |
-
"name": "misalignment_detection:
|
| 1382 |
"status": "pass",
|
| 1383 |
-
"value": "
|
| 1384 |
"raw_hits": []
|
| 1385 |
},
|
| 1386 |
{
|
|
@@ -1390,15 +1390,15 @@
|
|
| 1390 |
"raw_hits": []
|
| 1391 |
},
|
| 1392 |
{
|
| 1393 |
-
"name": "misalignment_detection:
|
| 1394 |
"status": "pass",
|
| 1395 |
-
"value": "
|
| 1396 |
"raw_hits": []
|
| 1397 |
},
|
| 1398 |
{
|
| 1399 |
-
"name": "misalignment_detection:
|
| 1400 |
"status": "pass",
|
| 1401 |
-
"value": "
|
| 1402 |
"raw_hits": []
|
| 1403 |
},
|
| 1404 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-06T23:27:06+00:00",
|
| 4 |
"summary": {
|
| 5 |
"task_count": 12,
|
| 6 |
"expected_task_count": 12,
|
|
|
|
| 64 |
"observed": "timeline_action"
|
| 65 |
},
|
| 66 |
{
|
| 67 |
+
"name": "timeline_action: public_field_plain_goal_is_human_readable",
|
| 68 |
"status": "pass",
|
| 69 |
+
"value": "Look at one short multimodal window and name what action is happening now.",
|
| 70 |
"raw_hits": []
|
| 71 |
},
|
| 72 |
{
|
|
|
|
| 76 |
"raw_hits": []
|
| 77 |
},
|
| 78 |
{
|
| 79 |
+
"name": "timeline_action: public_field_display_name_is_human_readable",
|
| 80 |
"status": "pass",
|
| 81 |
+
"value": "Action Recognition",
|
| 82 |
"raw_hits": []
|
| 83 |
},
|
| 84 |
{
|
| 85 |
+
"name": "timeline_action: public_field_input_short_is_human_readable",
|
| 86 |
"status": "pass",
|
| 87 |
+
"value": "20-frame multimodal window",
|
| 88 |
"raw_hits": []
|
| 89 |
},
|
| 90 |
{
|
|
|
|
| 94 |
"raw_hits": []
|
| 95 |
},
|
| 96 |
{
|
| 97 |
+
"name": "timeline_action: public_field_process_short_is_human_readable",
|
| 98 |
"status": "pass",
|
| 99 |
+
"value": "window features -> action label builder -> classifier",
|
| 100 |
"raw_hits": []
|
| 101 |
},
|
| 102 |
{
|
| 103 |
+
"name": "timeline_action: public_field_research_name_is_human_readable",
|
| 104 |
"status": "pass",
|
| 105 |
+
"value": "Egocentric Action Recognition",
|
| 106 |
"raw_hits": []
|
| 107 |
},
|
| 108 |
{
|
|
|
|
| 184 |
"observed": "timeline_subtask"
|
| 185 |
},
|
| 186 |
{
|
| 187 |
+
"name": "timeline_subtask: public_field_plain_goal_is_human_readable",
|
| 188 |
"status": "pass",
|
| 189 |
+
"value": "Predict the higher-level task stage for the current window.",
|
| 190 |
"raw_hits": []
|
| 191 |
},
|
| 192 |
{
|
|
|
|
| 196 |
"raw_hits": []
|
| 197 |
},
|
| 198 |
{
|
| 199 |
+
"name": "timeline_subtask: public_field_display_name_is_human_readable",
|
| 200 |
"status": "pass",
|
| 201 |
+
"value": "Procedure Step Recognition",
|
| 202 |
"raw_hits": []
|
| 203 |
},
|
| 204 |
{
|
| 205 |
+
"name": "timeline_subtask: public_field_input_short_is_human_readable",
|
| 206 |
"status": "pass",
|
| 207 |
+
"value": "20-frame multimodal window",
|
| 208 |
"raw_hits": []
|
| 209 |
},
|
| 210 |
{
|
|
|
|
| 214 |
"raw_hits": []
|
| 215 |
},
|
| 216 |
{
|
| 217 |
+
"name": "timeline_subtask: public_field_process_short_is_human_readable",
|
| 218 |
"status": "pass",
|
| 219 |
+
"value": "window features -> subtask label builder -> classifier",
|
| 220 |
"raw_hits": []
|
| 221 |
},
|
| 222 |
{
|
| 223 |
+
"name": "timeline_subtask: public_field_research_name_is_human_readable",
|
| 224 |
"status": "pass",
|
| 225 |
+
"value": "Temporal Subtask Recognition",
|
| 226 |
"raw_hits": []
|
| 227 |
},
|
| 228 |
{
|
|
|
|
| 304 |
"observed": "transition_detection"
|
| 305 |
},
|
| 306 |
{
|
| 307 |
+
"name": "transition_detection: public_field_plain_goal_is_human_readable",
|
| 308 |
"status": "pass",
|
| 309 |
+
"value": "Detect whether the current window is near a boundary between actions.",
|
| 310 |
"raw_hits": []
|
| 311 |
},
|
| 312 |
{
|
|
|
|
| 316 |
"raw_hits": []
|
| 317 |
},
|
| 318 |
{
|
| 319 |
+
"name": "transition_detection: public_field_display_name_is_human_readable",
|
| 320 |
"status": "pass",
|
| 321 |
+
"value": "Action Boundary Detection",
|
| 322 |
"raw_hits": []
|
| 323 |
},
|
| 324 |
{
|
| 325 |
+
"name": "transition_detection: public_field_input_short_is_human_readable",
|
| 326 |
"status": "pass",
|
| 327 |
+
"value": "current window with boundary target",
|
| 328 |
"raw_hits": []
|
| 329 |
},
|
| 330 |
{
|
|
|
|
| 334 |
"raw_hits": []
|
| 335 |
},
|
| 336 |
{
|
| 337 |
+
"name": "transition_detection: public_field_process_short_is_human_readable",
|
| 338 |
"status": "pass",
|
| 339 |
+
"value": "action changes -> boundary labels -> binary classifier",
|
| 340 |
"raw_hits": []
|
| 341 |
},
|
| 342 |
{
|
| 343 |
+
"name": "transition_detection: public_field_research_name_is_human_readable",
|
| 344 |
"status": "pass",
|
| 345 |
+
"value": "Temporal Action Segmentation",
|
| 346 |
"raw_hits": []
|
| 347 |
},
|
| 348 |
{
|
|
|
|
| 422 |
"observed": "next_action"
|
| 423 |
},
|
| 424 |
{
|
| 425 |
+
"name": "next_action: public_field_plain_goal_is_human_readable",
|
| 426 |
"status": "pass",
|
| 427 |
+
"value": "Use the current window to guess the action that will happen shortly after it.",
|
| 428 |
"raw_hits": []
|
| 429 |
},
|
| 430 |
{
|
|
|
|
| 434 |
"raw_hits": []
|
| 435 |
},
|
| 436 |
{
|
| 437 |
+
"name": "next_action: public_field_display_name_is_human_readable",
|
| 438 |
"status": "pass",
|
| 439 |
+
"value": "Next-Action Prediction",
|
| 440 |
"raw_hits": []
|
| 441 |
},
|
| 442 |
{
|
| 443 |
+
"name": "next_action: public_field_input_short_is_human_readable",
|
| 444 |
"status": "pass",
|
| 445 |
+
"value": "current window at time t",
|
| 446 |
"raw_hits": []
|
| 447 |
},
|
| 448 |
{
|
|
|
|
| 452 |
"raw_hits": []
|
| 453 |
},
|
| 454 |
{
|
| 455 |
+
"name": "next_action: public_field_process_short_is_human_readable",
|
| 456 |
"status": "pass",
|
| 457 |
+
"value": "current features -> future label shift -> classifier",
|
| 458 |
"raw_hits": []
|
| 459 |
},
|
| 460 |
{
|
| 461 |
+
"name": "next_action: public_field_research_name_is_human_readable",
|
| 462 |
"status": "pass",
|
| 463 |
+
"value": "Short-Horizon Intention Prediction",
|
| 464 |
"raw_hits": []
|
| 465 |
},
|
| 466 |
{
|
|
|
|
| 540 |
"observed": "hand_trajectory_forecast"
|
| 541 |
},
|
| 542 |
{
|
| 543 |
+
"name": "hand_trajectory_forecast: public_field_plain_goal_is_human_readable",
|
| 544 |
"status": "pass",
|
| 545 |
+
"value": "Predict where the hands will move over the next few frames.",
|
| 546 |
"raw_hits": []
|
| 547 |
},
|
| 548 |
{
|
|
|
|
| 552 |
"raw_hits": []
|
| 553 |
},
|
| 554 |
{
|
| 555 |
+
"name": "hand_trajectory_forecast: public_field_display_name_is_human_readable",
|
| 556 |
"status": "pass",
|
| 557 |
+
"value": "Hand Trajectory Forecasting",
|
| 558 |
"raw_hits": []
|
| 559 |
},
|
| 560 |
{
|
| 561 |
+
"name": "hand_trajectory_forecast: public_field_input_short_is_human_readable",
|
| 562 |
"status": "pass",
|
| 563 |
+
"value": "current multimodal window",
|
| 564 |
"raw_hits": []
|
| 565 |
},
|
| 566 |
{
|
|
|
|
| 570 |
"raw_hits": []
|
| 571 |
},
|
| 572 |
{
|
| 573 |
+
"name": "hand_trajectory_forecast: public_field_process_short_is_human_readable",
|
| 574 |
"status": "pass",
|
| 575 |
+
"value": "current features -> future mocap target -> regression head",
|
| 576 |
"raw_hits": []
|
| 577 |
},
|
| 578 |
{
|
| 579 |
+
"name": "hand_trajectory_forecast: public_field_research_name_is_human_readable",
|
| 580 |
"status": "pass",
|
| 581 |
+
"value": "3D Hand Motion Forecasting",
|
| 582 |
"raw_hits": []
|
| 583 |
},
|
| 584 |
{
|
|
|
|
| 658 |
"observed": "contact_prediction"
|
| 659 |
},
|
| 660 |
{
|
| 661 |
+
"name": "contact_prediction: public_field_plain_goal_is_human_readable",
|
| 662 |
"status": "pass",
|
| 663 |
+
"value": "Predict whether the body or hand is in contact with something.",
|
| 664 |
"raw_hits": []
|
| 665 |
},
|
| 666 |
{
|
|
|
|
| 670 |
"raw_hits": []
|
| 671 |
},
|
| 672 |
{
|
| 673 |
+
"name": "contact_prediction: public_field_display_name_is_human_readable",
|
| 674 |
"status": "pass",
|
| 675 |
+
"value": "Contact State Prediction",
|
| 676 |
"raw_hits": []
|
| 677 |
},
|
| 678 |
{
|
| 679 |
+
"name": "contact_prediction: public_field_input_short_is_human_readable",
|
| 680 |
"status": "pass",
|
| 681 |
+
"value": "non-contact, non-caption features",
|
| 682 |
"raw_hits": []
|
| 683 |
},
|
| 684 |
{
|
|
|
|
| 688 |
"raw_hits": []
|
| 689 |
},
|
| 690 |
{
|
| 691 |
+
"name": "contact_prediction: public_field_process_short_is_human_readable",
|
| 692 |
"status": "pass",
|
| 693 |
+
"value": "feature filter -> contact target -> binary classifier",
|
| 694 |
"raw_hits": []
|
| 695 |
},
|
| 696 |
{
|
| 697 |
+
"name": "contact_prediction: public_field_research_name_is_human_readable",
|
| 698 |
"status": "pass",
|
| 699 |
+
"value": "Human-Object Contact Prediction",
|
| 700 |
"raw_hits": []
|
| 701 |
},
|
| 702 |
{
|
|
|
|
| 774 |
"observed": "object_relevance"
|
| 775 |
},
|
| 776 |
{
|
| 777 |
+
"name": "object_relevance: public_field_plain_goal_is_human_readable",
|
| 778 |
"status": "pass",
|
| 779 |
+
"value": "Predict which objects matter in the current window.",
|
| 780 |
"raw_hits": []
|
| 781 |
},
|
| 782 |
{
|
|
|
|
| 786 |
"raw_hits": []
|
| 787 |
},
|
| 788 |
{
|
| 789 |
+
"name": "object_relevance: public_field_display_name_is_human_readable",
|
| 790 |
"status": "pass",
|
| 791 |
+
"value": "Object Relevance Prediction",
|
| 792 |
"raw_hits": []
|
| 793 |
},
|
| 794 |
{
|
| 795 |
+
"name": "object_relevance: public_field_input_short_is_human_readable",
|
| 796 |
"status": "pass",
|
| 797 |
+
"value": "non-caption multimodal features",
|
| 798 |
"raw_hits": []
|
| 799 |
},
|
| 800 |
{
|
|
|
|
| 804 |
"raw_hits": []
|
| 805 |
},
|
| 806 |
{
|
| 807 |
+
"name": "object_relevance: public_field_process_short_is_human_readable",
|
| 808 |
"status": "pass",
|
| 809 |
+
"value": "object vocabulary -> multi-hot labels -> sigmoid heads",
|
| 810 |
"raw_hits": []
|
| 811 |
},
|
| 812 |
{
|
| 813 |
+
"name": "object_relevance: public_field_research_name_is_human_readable",
|
| 814 |
"status": "pass",
|
| 815 |
+
"value": "Object-Centric Interaction Recognition",
|
| 816 |
"raw_hits": []
|
| 817 |
},
|
| 818 |
{
|
|
|
|
| 892 |
"observed": "caption_grounding"
|
| 893 |
},
|
| 894 |
{
|
| 895 |
+
"name": "caption_grounding: public_field_plain_goal_is_human_readable",
|
| 896 |
"status": "pass",
|
| 897 |
+
"value": "Given a text-like query from annotation, find the matching time window.",
|
| 898 |
"raw_hits": []
|
| 899 |
},
|
| 900 |
{
|
|
|
|
| 904 |
"raw_hits": []
|
| 905 |
},
|
| 906 |
{
|
| 907 |
+
"name": "caption_grounding: public_field_display_name_is_human_readable",
|
| 908 |
"status": "pass",
|
| 909 |
+
"value": "Language Grounding",
|
| 910 |
"raw_hits": []
|
| 911 |
},
|
| 912 |
{
|
| 913 |
+
"name": "caption_grounding: public_field_input_short_is_human_readable",
|
| 914 |
"status": "pass",
|
| 915 |
+
"value": "text-like query and candidate windows",
|
| 916 |
"raw_hits": []
|
| 917 |
},
|
| 918 |
{
|
|
|
|
| 922 |
"raw_hits": []
|
| 923 |
},
|
| 924 |
{
|
| 925 |
+
"name": "caption_grounding: public_field_process_short_is_human_readable",
|
| 926 |
"status": "pass",
|
| 927 |
+
"value": "query features -> candidate index -> cosine ranker",
|
| 928 |
"raw_hits": []
|
| 929 |
},
|
| 930 |
{
|
| 931 |
+
"name": "caption_grounding: public_field_research_name_is_human_readable",
|
| 932 |
"status": "pass",
|
| 933 |
+
"value": "Language-to-Moment Grounding",
|
| 934 |
"raw_hits": []
|
| 935 |
},
|
| 936 |
{
|
|
|
|
| 1008 |
"observed": "cross_modal_retrieval"
|
| 1009 |
},
|
| 1010 |
{
|
| 1011 |
+
"name": "cross_modal_retrieval: public_field_plain_goal_is_human_readable",
|
| 1012 |
"status": "pass",
|
| 1013 |
+
"value": "Use one group of modalities to retrieve the matching window from another group.",
|
| 1014 |
"raw_hits": []
|
| 1015 |
},
|
| 1016 |
{
|
|
|
|
| 1020 |
"raw_hits": []
|
| 1021 |
},
|
| 1022 |
{
|
| 1023 |
+
"name": "cross_modal_retrieval: public_field_display_name_is_human_readable",
|
| 1024 |
"status": "pass",
|
| 1025 |
+
"value": "Cross-Modal Retrieval",
|
| 1026 |
"raw_hits": []
|
| 1027 |
},
|
| 1028 |
{
|
| 1029 |
+
"name": "cross_modal_retrieval: public_field_input_short_is_human_readable",
|
| 1030 |
"status": "pass",
|
| 1031 |
+
"value": "motion/IMU/pose query; depth/video candidates",
|
| 1032 |
"raw_hits": []
|
| 1033 |
},
|
| 1034 |
{
|
|
|
|
| 1038 |
"raw_hits": []
|
| 1039 |
},
|
| 1040 |
{
|
| 1041 |
+
"name": "cross_modal_retrieval: public_field_process_short_is_human_readable",
|
| 1042 |
"status": "pass",
|
| 1043 |
+
"value": "modality split -> projection -> nearest-neighbor ranker",
|
| 1044 |
"raw_hits": []
|
| 1045 |
},
|
| 1046 |
{
|
| 1047 |
+
"name": "cross_modal_retrieval: public_field_research_name_is_human_readable",
|
| 1048 |
"status": "pass",
|
| 1049 |
+
"value": "Multimodal Representation Retrieval",
|
| 1050 |
"raw_hits": []
|
| 1051 |
},
|
| 1052 |
{
|
|
|
|
| 1126 |
"observed": "modality_reconstruction"
|
| 1127 |
},
|
| 1128 |
{
|
| 1129 |
+
"name": "modality_reconstruction: public_field_plain_goal_is_human_readable",
|
| 1130 |
"status": "pass",
|
| 1131 |
+
"value": "Predict one modality feature block from other modality blocks.",
|
| 1132 |
"raw_hits": []
|
| 1133 |
},
|
| 1134 |
{
|
|
|
|
| 1138 |
"raw_hits": []
|
| 1139 |
},
|
| 1140 |
{
|
| 1141 |
+
"name": "modality_reconstruction: public_field_display_name_is_human_readable",
|
| 1142 |
"status": "pass",
|
| 1143 |
+
"value": "Cross-Modal Reconstruction",
|
| 1144 |
"raw_hits": []
|
| 1145 |
},
|
| 1146 |
{
|
| 1147 |
+
"name": "modality_reconstruction: public_field_input_short_is_human_readable",
|
| 1148 |
"status": "pass",
|
| 1149 |
+
"value": "motion, IMU, and camera/pose features",
|
| 1150 |
"raw_hits": []
|
| 1151 |
},
|
| 1152 |
{
|
|
|
|
| 1156 |
"raw_hits": []
|
| 1157 |
},
|
| 1158 |
{
|
| 1159 |
+
"name": "modality_reconstruction: public_field_process_short_is_human_readable",
|
| 1160 |
"status": "pass",
|
| 1161 |
+
"value": "source-target split -> scaler -> regression head",
|
| 1162 |
"raw_hits": []
|
| 1163 |
},
|
| 1164 |
{
|
| 1165 |
+
"name": "modality_reconstruction: public_field_research_name_is_human_readable",
|
| 1166 |
"status": "pass",
|
| 1167 |
+
"value": "Modality Feature Reconstruction",
|
| 1168 |
"raw_hits": []
|
| 1169 |
},
|
| 1170 |
{
|
|
|
|
| 1244 |
"observed": "temporal_order"
|
| 1245 |
},
|
| 1246 |
{
|
| 1247 |
+
"name": "temporal_order: public_field_plain_goal_is_human_readable",
|
| 1248 |
"status": "pass",
|
| 1249 |
+
"value": "Tell whether two nearby windows are in the correct time order.",
|
| 1250 |
"raw_hits": []
|
| 1251 |
},
|
| 1252 |
{
|
|
|
|
| 1256 |
"raw_hits": []
|
| 1257 |
},
|
| 1258 |
{
|
| 1259 |
+
"name": "temporal_order: public_field_display_name_is_human_readable",
|
| 1260 |
"status": "pass",
|
| 1261 |
+
"value": "Temporal Order Verification",
|
| 1262 |
"raw_hits": []
|
| 1263 |
},
|
| 1264 |
{
|
| 1265 |
+
"name": "temporal_order: public_field_input_short_is_human_readable",
|
| 1266 |
"status": "pass",
|
| 1267 |
+
"value": "two adjacent windows plus difference vector",
|
| 1268 |
"raw_hits": []
|
| 1269 |
},
|
| 1270 |
{
|
|
|
|
| 1274 |
"raw_hits": []
|
| 1275 |
},
|
| 1276 |
{
|
| 1277 |
+
"name": "temporal_order: public_field_process_short_is_human_readable",
|
| 1278 |
"status": "pass",
|
| 1279 |
+
"value": "pair builder -> feature combiner -> binary classifier",
|
| 1280 |
"raw_hits": []
|
| 1281 |
},
|
| 1282 |
{
|
| 1283 |
+
"name": "temporal_order: public_field_research_name_is_human_readable",
|
| 1284 |
"status": "pass",
|
| 1285 |
+
"value": "Temporal Order Verification",
|
| 1286 |
"raw_hits": []
|
| 1287 |
},
|
| 1288 |
{
|
|
|
|
| 1360 |
"observed": "misalignment_detection"
|
| 1361 |
},
|
| 1362 |
{
|
| 1363 |
+
"name": "misalignment_detection: public_field_plain_goal_is_human_readable",
|
| 1364 |
"status": "pass",
|
| 1365 |
+
"value": "Detect when modalities that should match are shifted out of sync.",
|
| 1366 |
"raw_hits": []
|
| 1367 |
},
|
| 1368 |
{
|
|
|
|
| 1372 |
"raw_hits": []
|
| 1373 |
},
|
| 1374 |
{
|
| 1375 |
+
"name": "misalignment_detection: public_field_display_name_is_human_readable",
|
| 1376 |
"status": "pass",
|
| 1377 |
+
"value": "Multimodal Synchronization Detection",
|
| 1378 |
"raw_hits": []
|
| 1379 |
},
|
| 1380 |
{
|
| 1381 |
+
"name": "misalignment_detection: public_field_input_short_is_human_readable",
|
| 1382 |
"status": "pass",
|
| 1383 |
+
"value": "motion-side and visual/depth-side feature groups",
|
| 1384 |
"raw_hits": []
|
| 1385 |
},
|
| 1386 |
{
|
|
|
|
| 1390 |
"raw_hits": []
|
| 1391 |
},
|
| 1392 |
{
|
| 1393 |
+
"name": "misalignment_detection: public_field_process_short_is_human_readable",
|
| 1394 |
"status": "pass",
|
| 1395 |
+
"value": "aligned/shifted pairs -> feature combiner -> binary classifier",
|
| 1396 |
"raw_hits": []
|
| 1397 |
},
|
| 1398 |
{
|
| 1399 |
+
"name": "misalignment_detection: public_field_research_name_is_human_readable",
|
| 1400 |
"status": "pass",
|
| 1401 |
+
"value": "Cross-Modal Misalignment Detection",
|
| 1402 |
"raw_hits": []
|
| 1403 |
},
|
| 1404 |
{
|
metrics/website_integrity.json
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"docs_root": "docs",
|
| 5 |
"site_base": "/ropedia-xperience-10m-task-suite/",
|
| 6 |
"summary": {
|
| 7 |
"html_pages": 4,
|
| 8 |
-
"local_references":
|
| 9 |
-
"external_reference_count":
|
| 10 |
-
"json_files":
|
| 11 |
"image_assets_referenced": 22,
|
| 12 |
"failure_count": 0
|
| 13 |
},
|
|
@@ -75,7 +75,7 @@
|
|
| 75 |
"status": "pass",
|
| 76 |
"reason": "The project overview should appear before the deeper progress ledger.",
|
| 77 |
"overview_index": 67412,
|
| 78 |
-
"evidence_index":
|
| 79 |
},
|
| 80 |
{
|
| 81 |
"name": "project_status_links_json",
|
|
@@ -153,8 +153,8 @@
|
|
| 153 |
"status": "pass",
|
| 154 |
"reason": "The evaluation protocol should appear before the deeper evidence ledger.",
|
| 155 |
"overview_index": 67412,
|
| 156 |
-
"protocol_index":
|
| 157 |
-
"evidence_index":
|
| 158 |
},
|
| 159 |
{
|
| 160 |
"name": "evaluation_protocol_links_json",
|
|
@@ -228,7 +228,7 @@
|
|
| 228 |
{
|
| 229 |
"path": "index.html",
|
| 230 |
"id_count": 77,
|
| 231 |
-
"reference_count":
|
| 232 |
"image_count": 24
|
| 233 |
},
|
| 234 |
{
|
|
@@ -252,12 +252,12 @@
|
|
| 252 |
},
|
| 253 |
{
|
| 254 |
"path": "data/artifact_index.json",
|
| 255 |
-
"bytes":
|
| 256 |
"top_level_type": "dict"
|
| 257 |
},
|
| 258 |
{
|
| 259 |
"path": "data/audio_ablation_summary.json",
|
| 260 |
-
"bytes":
|
| 261 |
"top_level_type": "dict"
|
| 262 |
},
|
| 263 |
{
|
|
@@ -267,7 +267,7 @@
|
|
| 267 |
},
|
| 268 |
{
|
| 269 |
"path": "data/evaluation_protocol.json",
|
| 270 |
-
"bytes":
|
| 271 |
"top_level_type": "dict"
|
| 272 |
},
|
| 273 |
{
|
|
@@ -282,7 +282,7 @@
|
|
| 282 |
},
|
| 283 |
{
|
| 284 |
"path": "data/foundation_model_plan.json",
|
| 285 |
-
"bytes":
|
| 286 |
"top_level_type": "dict"
|
| 287 |
},
|
| 288 |
{
|
|
@@ -292,7 +292,7 @@
|
|
| 292 |
},
|
| 293 |
{
|
| 294 |
"path": "data/mirror_parity.json",
|
| 295 |
-
"bytes":
|
| 296 |
"top_level_type": "dict"
|
| 297 |
},
|
| 298 |
{
|
|
@@ -302,27 +302,32 @@
|
|
| 302 |
},
|
| 303 |
{
|
| 304 |
"path": "data/omni_finetune_verified_result.json",
|
| 305 |
-
"bytes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
"top_level_type": "dict"
|
| 307 |
},
|
| 308 |
{
|
| 309 |
"path": "data/project_brief.json",
|
| 310 |
-
"bytes":
|
| 311 |
"top_level_type": "dict"
|
| 312 |
},
|
| 313 |
{
|
| 314 |
"path": "data/project_manifest.json",
|
| 315 |
-
"bytes":
|
| 316 |
"top_level_type": "dict"
|
| 317 |
},
|
| 318 |
{
|
| 319 |
"path": "data/project_packet.json",
|
| 320 |
-
"bytes":
|
| 321 |
"top_level_type": "dict"
|
| 322 |
},
|
| 323 |
{
|
| 324 |
"path": "data/project_status.json",
|
| 325 |
-
"bytes":
|
| 326 |
"top_level_type": "dict"
|
| 327 |
},
|
| 328 |
{
|
|
@@ -347,7 +352,7 @@
|
|
| 347 |
},
|
| 348 |
{
|
| 349 |
"path": "data/reproducibility_matrix.json",
|
| 350 |
-
"bytes":
|
| 351 |
"top_level_type": "dict"
|
| 352 |
},
|
| 353 |
{
|
|
@@ -357,32 +362,32 @@
|
|
| 357 |
},
|
| 358 |
{
|
| 359 |
"path": "data/research_directions.json",
|
| 360 |
-
"bytes":
|
| 361 |
"top_level_type": "dict"
|
| 362 |
},
|
| 363 |
{
|
| 364 |
"path": "data/research_roadmap.json",
|
| 365 |
-
"bytes":
|
| 366 |
"top_level_type": "dict"
|
| 367 |
},
|
| 368 |
{
|
| 369 |
"path": "data/research_roadmap_interactive.json",
|
| 370 |
-
"bytes":
|
| 371 |
"top_level_type": "dict"
|
| 372 |
},
|
| 373 |
{
|
| 374 |
"path": "data/research_takeaways.json",
|
| 375 |
-
"bytes":
|
| 376 |
"top_level_type": "dict"
|
| 377 |
},
|
| 378 |
{
|
| 379 |
"path": "data/scope_claims_audit.json",
|
| 380 |
-
"bytes":
|
| 381 |
"top_level_type": "dict"
|
| 382 |
},
|
| 383 |
{
|
| 384 |
"path": "data/single_episode_explorer.json",
|
| 385 |
-
"bytes":
|
| 386 |
"top_level_type": "dict"
|
| 387 |
},
|
| 388 |
{
|
|
@@ -392,7 +397,7 @@
|
|
| 392 |
},
|
| 393 |
{
|
| 394 |
"path": "data/summary_metrics.json",
|
| 395 |
-
"bytes":
|
| 396 |
"top_level_type": "dict"
|
| 397 |
},
|
| 398 |
{
|
|
@@ -407,7 +412,7 @@
|
|
| 407 |
},
|
| 408 |
{
|
| 409 |
"path": "data/website_integrity.json",
|
| 410 |
-
"bytes":
|
| 411 |
"top_level_type": "dict"
|
| 412 |
},
|
| 413 |
{
|
|
@@ -450,21 +455,21 @@
|
|
| 450 |
{
|
| 451 |
"path": "assets/charts/episode_task_scores.svg",
|
| 452 |
"exists": true,
|
| 453 |
-
"bytes":
|
| 454 |
"format": "SVG",
|
| 455 |
"has_viewbox": true
|
| 456 |
},
|
| 457 |
{
|
| 458 |
"path": "assets/charts/episode_task_scores_minimal_vs_neural.svg",
|
| 459 |
"exists": true,
|
| 460 |
-
"bytes":
|
| 461 |
"format": "SVG",
|
| 462 |
"has_viewbox": true
|
| 463 |
},
|
| 464 |
{
|
| 465 |
"path": "assets/charts/episode_task_scores_neural_mlp.svg",
|
| 466 |
"exists": true,
|
| 467 |
-
"bytes":
|
| 468 |
"format": "SVG",
|
| 469 |
"has_viewbox": true
|
| 470 |
},
|
|
@@ -485,7 +490,7 @@
|
|
| 485 |
{
|
| 486 |
"path": "assets/charts/research_direction_coverage.svg",
|
| 487 |
"exists": true,
|
| 488 |
-
"bytes":
|
| 489 |
"format": "SVG",
|
| 490 |
"has_viewbox": true
|
| 491 |
},
|
|
@@ -571,7 +576,7 @@
|
|
| 571 |
{
|
| 572 |
"path": "assets/task_architectures.png",
|
| 573 |
"exists": true,
|
| 574 |
-
"bytes":
|
| 575 |
"width": 1800,
|
| 576 |
"height": 2450,
|
| 577 |
"format": "PNG"
|
|
@@ -579,7 +584,7 @@
|
|
| 579 |
{
|
| 580 |
"path": "assets/task_suite_infographic.png",
|
| 581 |
"exists": true,
|
| 582 |
-
"bytes":
|
| 583 |
"width": 1800,
|
| 584 |
"height": 6600,
|
| 585 |
"format": "PNG"
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-06T23:27:27+00:00",
|
| 4 |
"docs_root": "docs",
|
| 5 |
"site_base": "/ropedia-xperience-10m-task-suite/",
|
| 6 |
"summary": {
|
| 7 |
"html_pages": 4,
|
| 8 |
+
"local_references": 136,
|
| 9 |
+
"external_reference_count": 106,
|
| 10 |
+
"json_files": 35,
|
| 11 |
"image_assets_referenced": 22,
|
| 12 |
"failure_count": 0
|
| 13 |
},
|
|
|
|
| 75 |
"status": "pass",
|
| 76 |
"reason": "The project overview should appear before the deeper progress ledger.",
|
| 77 |
"overview_index": 67412,
|
| 78 |
+
"evidence_index": 90421
|
| 79 |
},
|
| 80 |
{
|
| 81 |
"name": "project_status_links_json",
|
|
|
|
| 153 |
"status": "pass",
|
| 154 |
"reason": "The evaluation protocol should appear before the deeper evidence ledger.",
|
| 155 |
"overview_index": 67412,
|
| 156 |
+
"protocol_index": 87159,
|
| 157 |
+
"evidence_index": 90421
|
| 158 |
},
|
| 159 |
{
|
| 160 |
"name": "evaluation_protocol_links_json",
|
|
|
|
| 228 |
{
|
| 229 |
"path": "index.html",
|
| 230 |
"id_count": 77,
|
| 231 |
+
"reference_count": 113,
|
| 232 |
"image_count": 24
|
| 233 |
},
|
| 234 |
{
|
|
|
|
| 252 |
},
|
| 253 |
{
|
| 254 |
"path": "data/artifact_index.json",
|
| 255 |
+
"bytes": 60162,
|
| 256 |
"top_level_type": "dict"
|
| 257 |
},
|
| 258 |
{
|
| 259 |
"path": "data/audio_ablation_summary.json",
|
| 260 |
+
"bytes": 10370,
|
| 261 |
"top_level_type": "dict"
|
| 262 |
},
|
| 263 |
{
|
|
|
|
| 267 |
},
|
| 268 |
{
|
| 269 |
"path": "data/evaluation_protocol.json",
|
| 270 |
+
"bytes": 14511,
|
| 271 |
"top_level_type": "dict"
|
| 272 |
},
|
| 273 |
{
|
|
|
|
| 282 |
},
|
| 283 |
{
|
| 284 |
"path": "data/foundation_model_plan.json",
|
| 285 |
+
"bytes": 13193,
|
| 286 |
"top_level_type": "dict"
|
| 287 |
},
|
| 288 |
{
|
|
|
|
| 292 |
},
|
| 293 |
{
|
| 294 |
"path": "data/mirror_parity.json",
|
| 295 |
+
"bytes": 235815,
|
| 296 |
"top_level_type": "dict"
|
| 297 |
},
|
| 298 |
{
|
|
|
|
| 302 |
},
|
| 303 |
{
|
| 304 |
"path": "data/omni_finetune_verified_result.json",
|
| 305 |
+
"bytes": 3483,
|
| 306 |
+
"top_level_type": "dict"
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"path": "data/omni_model_comparison.json",
|
| 310 |
+
"bytes": 21433,
|
| 311 |
"top_level_type": "dict"
|
| 312 |
},
|
| 313 |
{
|
| 314 |
"path": "data/project_brief.json",
|
| 315 |
+
"bytes": 3811,
|
| 316 |
"top_level_type": "dict"
|
| 317 |
},
|
| 318 |
{
|
| 319 |
"path": "data/project_manifest.json",
|
| 320 |
+
"bytes": 5193,
|
| 321 |
"top_level_type": "dict"
|
| 322 |
},
|
| 323 |
{
|
| 324 |
"path": "data/project_packet.json",
|
| 325 |
+
"bytes": 7943,
|
| 326 |
"top_level_type": "dict"
|
| 327 |
},
|
| 328 |
{
|
| 329 |
"path": "data/project_status.json",
|
| 330 |
+
"bytes": 15049,
|
| 331 |
"top_level_type": "dict"
|
| 332 |
},
|
| 333 |
{
|
|
|
|
| 352 |
},
|
| 353 |
{
|
| 354 |
"path": "data/reproducibility_matrix.json",
|
| 355 |
+
"bytes": 5280,
|
| 356 |
"top_level_type": "dict"
|
| 357 |
},
|
| 358 |
{
|
|
|
|
| 362 |
},
|
| 363 |
{
|
| 364 |
"path": "data/research_directions.json",
|
| 365 |
+
"bytes": 16694,
|
| 366 |
"top_level_type": "dict"
|
| 367 |
},
|
| 368 |
{
|
| 369 |
"path": "data/research_roadmap.json",
|
| 370 |
+
"bytes": 10133,
|
| 371 |
"top_level_type": "dict"
|
| 372 |
},
|
| 373 |
{
|
| 374 |
"path": "data/research_roadmap_interactive.json",
|
| 375 |
+
"bytes": 143560,
|
| 376 |
"top_level_type": "dict"
|
| 377 |
},
|
| 378 |
{
|
| 379 |
"path": "data/research_takeaways.json",
|
| 380 |
+
"bytes": 7139,
|
| 381 |
"top_level_type": "dict"
|
| 382 |
},
|
| 383 |
{
|
| 384 |
"path": "data/scope_claims_audit.json",
|
| 385 |
+
"bytes": 21234,
|
| 386 |
"top_level_type": "dict"
|
| 387 |
},
|
| 388 |
{
|
| 389 |
"path": "data/single_episode_explorer.json",
|
| 390 |
+
"bytes": 4305527,
|
| 391 |
"top_level_type": "dict"
|
| 392 |
},
|
| 393 |
{
|
|
|
|
| 397 |
},
|
| 398 |
{
|
| 399 |
"path": "data/summary_metrics.json",
|
| 400 |
+
"bytes": 27490,
|
| 401 |
"top_level_type": "dict"
|
| 402 |
},
|
| 403 |
{
|
|
|
|
| 412 |
},
|
| 413 |
{
|
| 414 |
"path": "data/website_integrity.json",
|
| 415 |
+
"bytes": 15777,
|
| 416 |
"top_level_type": "dict"
|
| 417 |
},
|
| 418 |
{
|
|
|
|
| 455 |
{
|
| 456 |
"path": "assets/charts/episode_task_scores.svg",
|
| 457 |
"exists": true,
|
| 458 |
+
"bytes": 5983,
|
| 459 |
"format": "SVG",
|
| 460 |
"has_viewbox": true
|
| 461 |
},
|
| 462 |
{
|
| 463 |
"path": "assets/charts/episode_task_scores_minimal_vs_neural.svg",
|
| 464 |
"exists": true,
|
| 465 |
+
"bytes": 10200,
|
| 466 |
"format": "SVG",
|
| 467 |
"has_viewbox": true
|
| 468 |
},
|
| 469 |
{
|
| 470 |
"path": "assets/charts/episode_task_scores_neural_mlp.svg",
|
| 471 |
"exists": true,
|
| 472 |
+
"bytes": 5997,
|
| 473 |
"format": "SVG",
|
| 474 |
"has_viewbox": true
|
| 475 |
},
|
|
|
|
| 490 |
{
|
| 491 |
"path": "assets/charts/research_direction_coverage.svg",
|
| 492 |
"exists": true,
|
| 493 |
+
"bytes": 5078,
|
| 494 |
"format": "SVG",
|
| 495 |
"has_viewbox": true
|
| 496 |
},
|
|
|
|
| 576 |
{
|
| 577 |
"path": "assets/task_architectures.png",
|
| 578 |
"exists": true,
|
| 579 |
+
"bytes": 774391,
|
| 580 |
"width": 1800,
|
| 581 |
"height": 2450,
|
| 582 |
"format": "PNG"
|
|
|
|
| 584 |
{
|
| 585 |
"path": "assets/task_suite_infographic.png",
|
| 586 |
"exists": true,
|
| 587 |
+
"bytes": 1588641,
|
| 588 |
"width": 1800,
|
| 589 |
"height": 6600,
|
| 590 |
"format": "PNG"
|