File size: 13,450 Bytes
b7a466b 9a6db6f c433b73 9a6db6f c433b73 9a6db6f c433b73 9a6db6f c433b73 9a6db6f b7a466b 9a6db6f b7a466b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 | {
"title": "Ropedia Xperience-10M Research Roadmap",
"summary": "Staged path from the public-sample task lab to verified Qwen3-Omni, Cosmos3-Nano, and Cosmos3-Super diagnostics, same-split 128-episode baseline alignment, a no-new-episode 128-suite enhancement pack, action/subtask error analysis, world/policy branches, and a future Xperience-native embodied foundation model.",
"current_decision_point": "Push the current selected 128 episodes harder before requesting more storage: keep the public-sample task suite as the development harness, use the latest verified selected-episode Qwen3-Omni v6 diagnostic branch plus the pinned v5 row as structured-task references, read Cosmos3-Nano and Cosmos3-Super Forward-Dynamics LoRA as separate world-model results, continue with hierarchical action/subtask targets and label-normalized scoring, and defer policy-model experiments until robot-compatible targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
"additional_development_directions": {
"source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
"source_json": "docs/data/additional_development_directions.json",
"summary": "Additional concrete tracks include episode taxonomy and data selection, benchmark protocol, multimodal representation learning, skill graphs, affordance modeling, 3D/4D scene memory, data-quality diagnostics, and policy/simulation transfer."
},
"phases": [
{
"id": "public_sample_task_lab",
"name": "Public-Sample Task Lab",
"status": "implemented",
"entry_condition": "One public Xperience-10M sample episode is available.",
"deliverables": [
"1161 aligned windows",
"12 task contracts",
"minimal baseline heads",
"neural MLP heads",
"modality atlas",
"task walkthroughs",
"derived figures"
],
"completion_evidence": [
"PROJECT_STATUS.md",
"EVALUATION_PROTOCOL.md",
"RESEARCH_TAKEAWAYS.md",
"docs/data/summary_metrics.json",
"results/episode_task_suite/summary_report.json"
],
"reader_takeaway": "The public sample supports task design, feature contracts, walkthroughs, and baseline comparisons."
},
{
"id": "multi_episode_data_staging",
"name": "Multi-Episode Data Preparation",
"status": "implemented_for_first_pilot",
"entry_condition": "Gated dataset availability and enough storage for selected episodes.",
"deliverables": [
"128 selected episodes",
"episode manifest",
"missing-view manifest",
"held-out episode split",
"source-discovery report"
],
"completion_evidence": [
"results/omni_finetune/DATA_ACCESS_STATUS.md",
"results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
"results/omni_finetune/source_discovery.json"
],
"reader_takeaway": "The first selected split is available for Qwen3-Omni diagnostics, with train/test separation at the episode level."
},
{
"id": "qwen3_omni_lora_diagnostic_pilot",
"name": "Qwen3-Omni LoRA Latest Diagnostic Branch",
"status": "verified_latest_branch",
"entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
"deliverables": [
"dataset JSONL/media manifests",
"LoRA adapter checkpoint",
"progress logs",
"validation monitoring",
"held-out predictions",
"metrics",
"confusion matrices",
"run report",
"v5/v6 comparison",
"public LoRA adapter repo"
],
"completion_evidence": [
"docs/data/omni_finetune_verified_result.json",
"docs/data/qwen3_v5_v6_comparison.json",
"results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md",
"results/omni_finetune/verified_public/",
"dataset_manifest.json",
"training_metadata.json",
"progress.jsonl",
"metrics.json",
"predictions.jsonl",
"RUN_REPORT.md"
],
"reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline."
},
{
"id": "multi_episode_128_same_split_baselines",
"name": "128-Episode Same-Split Simple/NN Baselines",
"status": "verified_companion_result",
"entry_condition": "Derived Qwen JSONL export for the selected 96/16/16 split.",
"deliverables": [
"same 12 task ids",
"simple metadata/text baselines",
"neural MLP baselines for JSON-supported labels",
"explicit unsupported markers for raw-feature-only tasks"
],
"completion_evidence": [
"results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
"results/omni_finetune/multi_episode_128_task_baselines/summary_report.json",
"scripts/omni/run_128_task_baselines.py"
],
"reader_takeaway": "The simple and neural baseline framing is now aligned to the selected 128-episode setup; trajectory, retrieval, reconstruction, and misalignment variants still need raw 128 feature blocks for exact feature-level reproduction."
},
{
"id": "task_suite_enhancement_128",
"name": "128-Episode Task Suite Enhancement Pack",
"status": "current",
"entry_condition": "Same selected 96/16/16 split and current public 3,808-window export.",
"deliverables": [
"dense-window and multiscale export estimates",
"hierarchical action/subtask target contract",
"raw-feature shard priorities for unsupported tasks",
"Qwen v5 and Cosmos continuation run cards",
"publication-ready enhancement artifacts"
],
"completion_evidence": [
"TASK_SUITE_ENHANCEMENT_128.md",
"docs/data/task_suite_enhancement_128.json",
"results/omni_finetune/task_suite_enhancement_128_v1_20260608/enhancement_plan.json",
"scripts/omni/build_task_suite_enhancement_128.py"
],
"reader_takeaway": "The current 128-episode setup still has headroom: use multiscale_20s10_40s20_80s40, hierarchical labels, label-normalized scoring, and raw-feature shards before adding more episodes."
},
{
"id": "qwen3_omni_structured_output_error_analysis",
"name": "Action/Subtask Error-Analysis Pass",
"status": "active_next_step",
"entry_condition": "The final diagnostic package meets strict JSON validity but has weak action/subtask held-out quality.",
"deliverables": [
"same 96/16/16 episode split",
"action/subtask confusion analysis",
"unseen-label analysis",
"object/action family breakdowns",
"held-out test evaluation",
"comparison to the final verified Qwen baseline"
],
"completion_evidence": [
"error-analysis tables",
"held-out metrics by failure type",
"verified public-safe package"
],
"reader_takeaway": "The next pass should improve action/subtask quality before larger model-quality claims."
},
{
"id": "foundation_model_selection_matrix",
"name": "Foundation-Model Selection Matrix",
"status": "current",
"entry_condition": "The selected episodes are prepared or a 3-8 episode dry run is available for preprocessing checks.",
"deliverables": [
"backbone registry",
"Cosmos 3 world-model branch plan",
"Cosmos3-Super Forward-Dynamics LoRA verified package",
"Qwen3-Omni LoRA baseline plan",
"OpenVLA/openpi/GR00T policy-branch candidates",
"model-specific evaluation additions"
],
"completion_evidence": [
"FOUNDATION_MODEL_PLAN.md",
"docs/data/foundation_model_plan.json",
"research_roadmap_interactive.json"
],
"reader_takeaway": "Qwen3-Omni remains the structured JSON held-out pilot; Cosmos 3 is the first world-model branch. Cosmos3-Super now has a verified forward-dynamics LoRA over camera-pose proxy targets, while VLA/policy models wait for robot-compatible action targets."
},
{
"id": "robustness_run_64_128_episode",
"name": "64-128 Episode Robustness Run",
"status": "partially_implemented",
"entry_condition": "The selected-episode pilot trains and evaluates cleanly.",
"deliverables": [
"split-by-session metrics",
"modality ablations",
"calibration/object/language error analysis",
"missing-view sensitivity analysis"
],
"completion_evidence": [
"held-out metrics by session",
"held-out metrics by task",
"held-out metrics by modality",
"ablation tables",
"qualitative error analysis"
],
"reader_takeaway": "The robustness run tests whether the pilot conclusions survive broader sessions and missing modalities."
},
{
"id": "foundation_world_model_extensions",
"name": "Cosmos 3 and Policy-Model Extensions",
"status": "planned",
"entry_condition": "Enough multi-episode data, compute budget, and model-specific action/world-state targets.",
"deliverables": [
"Cosmos 3 future-window and action-conditioned world-model probes",
"OpenVLA/openpi/GR00T action-policy baseline",
"audio/video/depth/pose/mocap conditioning checks",
"affordance and object-interaction tasks",
"synthetic-data usefulness test"
],
"completion_evidence": [
"task-specific held-out evaluations",
"verified Cosmos3-Super forward-dynamics LoRA package",
"qualitative inspection",
"updated model cards"
],
"reader_takeaway": "The Cosmos branch now includes Nano future-window compatibility and Super forward-dynamics LoRA; the long-term direction remains richer multimodal representation learning with model branches chosen by task fit rather than by a single default backbone."
},
{
"id": "xperience_embodied_foundation_pretraining",
"name": "Xperience Embodied Foundation Model Pretraining",
"status": "future",
"entry_condition": "Full-corpus access, PB-scale storage path, high-throughput data loading, multi-node compute, and positive scaling evidence from smaller multi-episode runs.",
"deliverables": [
"full-corpus episode and split manifests",
"pretraining shard and provenance manifests",
"0.3B-1B and 1B-3B scaling pilots",
"3B-7B Xperience-native domain model target",
"held-out episode/session/activity/object evaluations",
"missing-modality robustness report",
"model card and data-boundary report"
],
"completion_evidence": [
"pretraining metadata",
"checkpoint inventory",
"scaling curves",
"held-out evaluation reports",
"qualitative retrieval or future-state examples",
"safety and data-boundary report"
],
"reader_takeaway": "The final research direction is a domain-specific embodied foundation model trained directly on Xperience-10M, after smaller pilots justify the cost and infrastructure."
}
],
"public_surfaces_to_update": [
"README.md",
"docs/data/task_suite_enhancement_128.json",
"TASK_SUITE_ENHANCEMENT_128.md",
"PROJECT_STATUS.md",
"RESEARCH_TAKEAWAYS.md",
"EVALUATION_PROTOCOL.md",
"ARTIFACT_GUIDE.md",
"ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
"XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md",
"docs/index.html",
"docs/data/additional_development_directions.json",
"docs/data/research_roadmap.json",
"Hugging Face Space card",
"Hugging Face artifact dataset card",
"Hugging Face model card"
]
}
|