File size: 10,246 Bytes
b7a466b 2d80be0 d96f266 b7a466b 2bd8497 4602161 b7a466b 45c1706 b7a466b 2bd8497 b7a466b 2bd8497 2d80be0 2bd8497 fc9e8cf b7a466b 2bd8497 b7a466b 2d80be0 b7a466b 2bd8497 b7a466b 2d80be0 2bd8497 a07660e 2bd8497 2d80be0 2bd8497 2d80be0 2bd8497 2d80be0 2bd8497 2d80be0 2bd8497 2d80be0 2bd8497 2d80be0 b7a466b 31e3087 4602161 31e3087 3a10443 31e3087 b7a466b 45c1706 b7a466b 31e3087 b7a466b 31e3087 b7a466b 31e3087 04c0bde 31e3087 b7a466b 31e3087 bfcf156 b7a466b d96f266 bfcf156 b7a466b d96f266 b7a466b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 | {
"title": "Ropedia Xperience-10M Research Roadmap",
"summary": "Staged path from the public-sample task lab to a final verified Qwen3-Omni diagnostic result, same-split 128-episode baseline alignment, action/subtask error analysis, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
"current_decision_point": "Keep the public-sample task suite as the development harness, use the final verified selected-episode Qwen3-Omni diagnostic result and the same-split 128-episode simple/NN metadata baselines as the first cross-episode references, improve action/subtask quality through error analysis, then branch into Cosmos 3 world modeling and policy-model experiments after their targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
"additional_development_directions": {
"source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
"source_json": "docs/data/additional_development_directions.json",
"summary": "Additional concrete tracks include episode taxonomy and data selection, benchmark protocol, multimodal representation learning, skill graphs, affordance modeling, 3D/4D scene memory, data-quality diagnostics, and policy/simulation transfer."
},
"phases": [
{
"id": "public_sample_task_lab",
"name": "Public-Sample Task Lab",
"status": "implemented",
"entry_condition": "One public Xperience-10M sample episode is available.",
"deliverables": [
"1161 aligned windows",
"12 task contracts",
"minimal baseline heads",
"neural MLP heads",
"modality atlas",
"task walkthroughs",
"derived figures"
],
"completion_evidence": [
"PROJECT_STATUS.md",
"EVALUATION_PROTOCOL.md",
"RESEARCH_TAKEAWAYS.md",
"docs/data/summary_metrics.json",
"results/episode_task_suite/summary_report.json"
],
"reader_takeaway": "The public sample supports task design, feature contracts, walkthroughs, and baseline comparisons."
},
{
"id": "multi_episode_data_staging",
"name": "Multi-Episode Data Preparation",
"status": "implemented_for_first_pilot",
"entry_condition": "Gated dataset availability and enough storage for selected episodes.",
"deliverables": [
"128 selected episodes",
"episode manifest",
"missing-view manifest",
"held-out episode split",
"source-discovery report"
],
"completion_evidence": [
"results/omni_finetune/DATA_ACCESS_STATUS.md",
"results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
"results/omni_finetune/source_discovery.json"
],
"reader_takeaway": "The first selected split is available for Qwen3-Omni diagnostics, with train/test separation at the episode level."
},
{
"id": "qwen3_omni_lora_diagnostic_pilot",
"name": "Qwen3-Omni LoRA Final Diagnostic Result",
"status": "verified_baseline",
"entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
"deliverables": [
"dataset JSONL/media manifests",
"LoRA adapter checkpoint",
"progress logs",
"validation monitoring",
"held-out predictions",
"metrics",
"confusion matrices",
"run report",
"public LoRA adapter repo"
],
"completion_evidence": [
"docs/data/omni_finetune_verified_result.json",
"results/omni_finetune/verified_public/",
"dataset_manifest.json",
"training_metadata.json",
"progress.jsonl",
"metrics.json",
"predictions.jsonl",
"RUN_REPORT.md"
],
"reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline."
},
{
"id": "multi_episode_128_same_split_baselines",
"name": "128-Episode Same-Split Simple/NN Baselines",
"status": "verified_companion_result",
"entry_condition": "Derived Qwen JSONL export for the selected 96/16/16 split.",
"deliverables": [
"same 12 task ids",
"simple metadata/text baselines",
"neural MLP baselines for JSON-supported labels",
"explicit unsupported markers for raw-feature-only tasks"
],
"completion_evidence": [
"results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
"results/omni_finetune/multi_episode_128_task_baselines/summary_report.json",
"scripts/omni/run_128_task_baselines.py"
],
"reader_takeaway": "The simple and neural baseline framing is now aligned to the selected 128-episode setup; trajectory, retrieval, reconstruction, and misalignment variants still need raw 128 feature blocks for exact feature-level reproduction."
},
{
"id": "qwen3_omni_structured_output_error_analysis",
"name": "Action/Subtask Error-Analysis Pass",
"status": "active_next_step",
"entry_condition": "The final diagnostic package meets strict JSON validity but has weak action/subtask held-out quality.",
"deliverables": [
"same 96/16/16 episode split",
"action/subtask confusion analysis",
"unseen-label analysis",
"object/action family breakdowns",
"held-out test evaluation",
"comparison to the final verified Qwen baseline"
],
"completion_evidence": [
"error-analysis tables",
"held-out metrics by failure type",
"verified public-safe package"
],
"reader_takeaway": "The next pass should improve action/subtask quality before larger model-quality claims."
},
{
"id": "foundation_model_selection_matrix",
"name": "Foundation-Model Selection Matrix",
"status": "next",
"entry_condition": "The selected episodes are prepared or a 3-8 episode dry run is available for preprocessing checks.",
"deliverables": [
"backbone registry",
"Cosmos 3 world-model branch plan",
"Qwen3-Omni LoRA baseline plan",
"OpenVLA/openpi/GR00T policy-branch candidates",
"model-specific evaluation additions"
],
"completion_evidence": [
"FOUNDATION_MODEL_PLAN.md",
"docs/data/foundation_model_plan.json",
"research_roadmap_interactive.json"
],
"reader_takeaway": "Qwen3-Omni remains the first trainable held-out pilot; Cosmos 3 is the first world-model branch. Cosmos3-Super now has camera-pose proxy forward-dynamics targets ready for trainer implementation, while VLA/policy models wait for robot-compatible action targets."
},
{
"id": "robustness_run_64_128_episode",
"name": "64-128 Episode Robustness Run",
"status": "planned",
"entry_condition": "The selected-episode pilot trains and evaluates cleanly.",
"deliverables": [
"split-by-session metrics",
"modality ablations",
"calibration/object/language error analysis",
"missing-view sensitivity analysis"
],
"completion_evidence": [
"held-out metrics by session",
"held-out metrics by task",
"held-out metrics by modality",
"ablation tables",
"qualitative error analysis"
],
"reader_takeaway": "The robustness run tests whether the pilot conclusions survive broader sessions and missing modalities."
},
{
"id": "foundation_world_model_extensions",
"name": "Cosmos 3 and Policy-Model Extensions",
"status": "planned",
"entry_condition": "Enough multi-episode data, compute budget, and model-specific action/world-state targets.",
"deliverables": [
"Cosmos 3 future-window or action-conditioned world-model probe",
"OpenVLA/openpi/GR00T action-policy baseline",
"audio/video/depth/pose/mocap conditioning checks",
"affordance and object-interaction tasks",
"synthetic-data usefulness test"
],
"completion_evidence": [
"task-specific held-out evaluations",
"qualitative inspection",
"updated model cards"
],
"reader_takeaway": "The long-term direction is richer multimodal representation learning for embodied-AI reasoning, with model branches chosen by task fit rather than by a single default backbone."
},
{
"id": "xperience_embodied_foundation_pretraining",
"name": "Xperience Embodied Foundation Model Pretraining",
"status": "future",
"entry_condition": "Full-corpus access, PB-scale storage path, high-throughput data loading, multi-node compute, and positive scaling evidence from smaller multi-episode runs.",
"deliverables": [
"full-corpus episode and split manifests",
"pretraining shard and provenance manifests",
"0.3B-1B and 1B-3B scaling pilots",
"3B-7B Xperience-native domain model target",
"held-out episode/session/activity/object evaluations",
"missing-modality robustness report",
"model card and data-boundary report"
],
"completion_evidence": [
"pretraining metadata",
"checkpoint inventory",
"scaling curves",
"held-out evaluation reports",
"qualitative retrieval or future-state examples",
"safety and data-boundary report"
],
"reader_takeaway": "The final research direction is a domain-specific embodied foundation model trained directly on Xperience-10M, after smaller pilots justify the cost and infrastructure."
}
],
"public_surfaces_to_update": [
"README.md",
"PROJECT_STATUS.md",
"RESEARCH_TAKEAWAYS.md",
"EVALUATION_PROTOCOL.md",
"ARTIFACT_GUIDE.md",
"ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
"XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md",
"docs/index.html",
"docs/data/additional_development_directions.json",
"docs/data/research_roadmap.json",
"Hugging Face Space card",
"Hugging Face artifact dataset card",
"Hugging Face model card"
]
}
|