File size: 10,009 Bytes
540e67a 6b76d01 9a6db6f 6b76d01 9a6db6f 6b76d01 540e67a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | {
"title": "Ropedia Xperience-10M Task Suite Project Packet",
"version": "2026-06-14",
"scope_status": {
"validated_data": "one public Xperience-10M sample episode",
"aligned_frames": 5821,
"sliding_windows": 1161,
"current_feature_dimensions": 8546,
"neural_head_count": 12,
"direction_extension_probe_count": 4,
"raw_xperience10m_data_in_repo": false,
"audio_feature_status": "Audio is one of the synchronized source modalities in the current task representation.",
"qwen3_omni_32_episode_claim": false,
"qwen3_omni_status": "The selected 96/16/16 Qwen3-Omni v6 diagnostic branch is verified, meets the strict-JSON target, improves action macro-F1/contact accuracy versus v5, and still has weak action/subtask metrics that guide the next error-analysis pass.",
"cosmos3_super_forward_dynamics_lora_status": "The first Cosmos3-Super fine-tuned adapter branch is verified as a forward-dynamics LoRA over camera-pose proxy targets; it reports loss metrics, not JSON action-label accuracy.",
"task_suite_enhancement_128_status": "Current no-new-episode enhancement pack recommends multiscale_20s10_40s20_80s40, hierarchical action/subtask targets, label-normalized scoring, and raw-feature shards before adding more episodes.",
"task_count": 20,
"original_public_sample_task_count": 12,
"additional_public_sample_task_count": 8,
"legacy_tasks_13_to_20_result_path": "docs/data/tier2_task_suite.json"
},
"reading_path": [
{
"step": 1,
"question": "What is the current project scope?",
"primary_artifacts": [
"PROJECT_STATUS.md",
"docs/data/project_status.json",
"RESEARCH_ROADMAP.md",
"docs/data/research_roadmap.json",
"EVIDENCE_CONTRACT.md",
"ARTIFACT_GUIDE.md",
"EVALUATION_PROTOCOL.md",
"FIGURE_INDEX.md",
"SOURCE_ALIGNMENT_AUDIT.md",
"XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
"docs/data/evidence_contract.json",
"docs/data/artifact_index.json",
"docs/data/brand_assets.json",
"docs/data/evaluation_protocol.json",
"docs/data/figure_index.json",
"docs/data/source_alignment_audit.json",
"docs/data/xperience10m_dataset_card_alignment.json",
"docs/data/mirror_parity.json",
"docs/data/publication_audit.json",
"docs/data/scope_claims_audit.json",
"docs/data/website_integrity.json",
"TASK_SUITE_20.md",
"docs/data/task_suite_20.json"
],
"readout": "The project status table and roadmap give the compact current-state summary. Single-episode task engineering, metrics, visualizations, public website integrity, mirror parity, same-split 128-episode baselines, the final selected-episode Qwen3-Omni diagnostic result, the Cosmos3-Nano compatibility package, the Cosmos3-Super base-weight Reasoner evaluation, and the Cosmos3-Super Forward-Dynamics LoRA package are implemented; stronger action/subtask quality and policy-compatible action targets remain follow-ups."
},
{
"step": 2,
"question": "What do the official Xperience-10M dataset and sample cards say?",
"primary_artifacts": [
"XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
"docs/data/xperience10m_dataset_card_alignment.json",
"https://huggingface.co/datasets/ropedia-ai/xperience-10m",
"https://huggingface.co/datasets/ropedia-ai/xperience-10m-sample"
],
"readout": "The full upstream dataset is a manually gated large-scale 4D multimodal egocentric source. The public sample card records the sample license, HOMIE Toolkit path, and Rerun 0.29.0 visualization path. This repo validates one public sample episode and lists the current project coverage."
},
{
"step": 3,
"question": "Are source facts consistently presented?",
"primary_artifacts": [
"SOURCE_ALIGNMENT_AUDIT.md",
"docs/data/source_alignment_audit.json",
"scripts/validate_source_alignment.py"
],
"readout": "The source-alignment report checks full-dataset metadata, API-listing notes, public sample license/tooling, and project coverage across repo docs, website, and HF cards."
},
{
"step": 4,
"question": "How exactly are the tasks evaluated?",
"primary_artifacts": [
"EVALUATION_PROTOCOL.md",
"docs/data/evaluation_protocol.json",
"scripts/build_evaluation_protocol.py"
],
"readout": "The protocol fixes the 20-frame window unit, chronological split, train-only normalization, leakage controls, per-task input/target/metric contracts, and current limitations."
},
{
"step": 5,
"question": "How can the public pipeline be reproduced?",
"primary_artifacts": [
"REPRODUCIBILITY.md",
"docs/data/reproducibility_matrix.json",
"notes/reproducibility_audit.md"
],
"readout": "The public sample pipeline has explicit commands, expected outputs, and a prior exact-match reproduction check over the committed metrics."
},
{
"step": 6,
"question": "What is inside one model input?",
"primary_artifacts": [
"results/episode_task_suite/windows.csv",
"results/episode_task_suite/feature_manifest.json",
"results/episode_task_suite/available_modalities.json",
"docs/data/modality_atlas.json"
],
"readout": "The current model input is an 8,546-dimensional aligned multimodal window, and the readable atlas shows each public-sample modality without raw data redistribution."
},
{
"step": 7,
"question": "Do the task metrics have committed evidence?",
"primary_artifacts": [
"results/episode_task_suite/summary_report.json",
"results/episode_task_suite/neural_mlp/",
"docs/data/summary_metrics.json"
],
"readout": "The unified suite has 20 task contracts; tasks 1-12 have walkthroughs and neural MLP heads, and tasks 13-20 have aligned minimal/neural result bundles under the historical tier2_task_suite path."
},
{
"step": 8,
"question": "What is the scale-up path?",
"primary_artifacts": [
"RESEARCH_ROADMAP.md",
"docs/data/research_roadmap.json",
"results/omni_finetune/DATA_ACCESS_STATUS.md",
"results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
"scripts/omni/discover_xperience10m_sources.py",
"docs/data/omni_finetune_verified_result.json"
],
"readout": "The selected-episode held-out Qwen3-Omni v6 diagnostic branch is verified and JSON-format reliability meets the 98% target. The same public comparison also includes the verified 128-episode baselines, Cosmos3-Nano compatibility result, Cosmos3-Super Reasoner evaluation, and Cosmos3-Super Forward-Dynamics LoRA package. The next milestone is action/subtask error analysis and stronger model-quality runs on the same split."
},
{
"step": 9,
"question": "How can the current 128 episodes be pushed harder?",
"primary_artifacts": [
"TASK_SUITE_ENHANCEMENT_128.md",
"docs/data/task_suite_enhancement_128.json",
"results/omni_finetune/task_suite_enhancement_128_v1_20260608/enhancement_plan.json",
"results/omni_finetune/task_suite_enhancement_128_v1_20260608/dense_window_scenarios.csv"
],
"readout": "The current selected split can be expanded with dense and multiscale windows without adding episodes. The recommended export target is multiscale_20s10_40s20_80s40, followed by hierarchical action/subtask targets and raw-feature shards for unsupported tasks."
}
],
"project_status": "PROJECT_STATUS.md",
"project_status_json": "docs/data/project_status.json",
"research_roadmap": "RESEARCH_ROADMAP.md",
"research_roadmap_json": "docs/data/research_roadmap.json",
"evaluation_protocol": "EVALUATION_PROTOCOL.md",
"evaluation_protocol_json": "docs/data/evaluation_protocol.json",
"source_alignment_audit": "SOURCE_ALIGNMENT_AUDIT.md",
"source_alignment_audit_json": "docs/data/source_alignment_audit.json",
"artifact_guide": "ARTIFACT_GUIDE.md",
"artifact_index": "docs/data/artifact_index.json",
"brand_assets": "docs/data/brand_assets.json",
"figure_index": "FIGURE_INDEX.md",
"figure_index_json": "docs/data/figure_index.json",
"reproducibility_matrix": "docs/data/reproducibility_matrix.json",
"public_surfaces": {
"github_repo": "https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite",
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
"hf_static_app": "https://cy0307-ropedia-xperience-10m-task-suite.static.hf.space/",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts",
"hf_model_baselines": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines"
},
"current_reading_notes": [
"The latest cross-episode Qwen3-Omni v6 diagnostic branch is verified, but strong model quality is not yet shown; action/subtask metrics remain weak and v5 remains stronger on several non-contact metrics.",
"The current 128-episode suite has a no-new-episode enhancement plan: multiscale_20s10_40s20_80s40 windows, hierarchical labels, label-normalized scoring, and raw-feature shard export.",
"Cosmos3-Super Forward-Dynamics LoRA is verified as a loss-based world-model adapter branch, not as JSON action-token prediction.",
"Older Qwen3-Omni setup artifacts are separate from the verified selected-episode diagnostic package.",
"Feature-vector reconstruction is separate from pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
"Raw Xperience-10M data is not redistributed in this repo."
],
"task_suite_enhancement_128": "TASK_SUITE_ENHANCEMENT_128.md",
"task_suite_enhancement_128_json": "docs/data/task_suite_enhancement_128.json"
}
|