File size: 10,246 Bytes
b7a466b
 
2d80be0
 
d96f266
 
 
 
 
b7a466b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bd8497
 
4602161
b7a466b
45c1706
b7a466b
 
 
 
 
 
 
 
 
 
2bd8497
b7a466b
 
2bd8497
2d80be0
2bd8497
fc9e8cf
b7a466b
 
 
 
2bd8497
b7a466b
 
 
2d80be0
 
b7a466b
 
2bd8497
 
b7a466b
 
 
 
 
 
 
2d80be0
2bd8497
a07660e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bd8497
 
2d80be0
2bd8497
2d80be0
2bd8497
 
2d80be0
 
 
2bd8497
2d80be0
2bd8497
 
 
2d80be0
2bd8497
 
2d80be0
b7a466b
31e3087
 
 
 
4602161
31e3087
 
 
 
 
 
 
 
 
 
 
 
3a10443
31e3087
b7a466b
 
 
 
45c1706
b7a466b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31e3087
b7a466b
31e3087
b7a466b
31e3087
 
04c0bde
31e3087
 
b7a466b
 
 
 
 
 
31e3087
bfcf156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7a466b
 
 
 
 
 
 
 
d96f266
bfcf156
b7a466b
d96f266
b7a466b
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
{
  "title": "Ropedia Xperience-10M Research Roadmap",
  "summary": "Staged path from the public-sample task lab to a final verified Qwen3-Omni diagnostic result, same-split 128-episode baseline alignment, action/subtask error analysis, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
  "current_decision_point": "Keep the public-sample task suite as the development harness, use the final verified selected-episode Qwen3-Omni diagnostic result and the same-split 128-episode simple/NN metadata baselines as the first cross-episode references, improve action/subtask quality through error analysis, then branch into Cosmos 3 world modeling and policy-model experiments after their targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
  "additional_development_directions": {
    "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
    "source_json": "docs/data/additional_development_directions.json",
    "summary": "Additional concrete tracks include episode taxonomy and data selection, benchmark protocol, multimodal representation learning, skill graphs, affordance modeling, 3D/4D scene memory, data-quality diagnostics, and policy/simulation transfer."
  },
  "phases": [
    {
      "id": "public_sample_task_lab",
      "name": "Public-Sample Task Lab",
      "status": "implemented",
      "entry_condition": "One public Xperience-10M sample episode is available.",
      "deliverables": [
        "1161 aligned windows",
        "12 task contracts",
        "minimal baseline heads",
        "neural MLP heads",
        "modality atlas",
        "task walkthroughs",
        "derived figures"
      ],
      "completion_evidence": [
        "PROJECT_STATUS.md",
        "EVALUATION_PROTOCOL.md",
        "RESEARCH_TAKEAWAYS.md",
        "docs/data/summary_metrics.json",
        "results/episode_task_suite/summary_report.json"
      ],
      "reader_takeaway": "The public sample supports task design, feature contracts, walkthroughs, and baseline comparisons."
    },
    {
      "id": "multi_episode_data_staging",
      "name": "Multi-Episode Data Preparation",
      "status": "implemented_for_first_pilot",
      "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
      "deliverables": [
        "128 selected episodes",
        "episode manifest",
        "missing-view manifest",
        "held-out episode split",
        "source-discovery report"
      ],
      "completion_evidence": [
        "results/omni_finetune/DATA_ACCESS_STATUS.md",
        "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
        "results/omni_finetune/source_discovery.json"
      ],
      "reader_takeaway": "The first selected split is available for Qwen3-Omni diagnostics, with train/test separation at the episode level."
    },
    {
      "id": "qwen3_omni_lora_diagnostic_pilot",
      "name": "Qwen3-Omni LoRA Final Diagnostic Result",
      "status": "verified_baseline",
      "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
      "deliverables": [
        "dataset JSONL/media manifests",
        "LoRA adapter checkpoint",
        "progress logs",
        "validation monitoring",
        "held-out predictions",
        "metrics",
        "confusion matrices",
        "run report",
        "public LoRA adapter repo"
      ],
      "completion_evidence": [
        "docs/data/omni_finetune_verified_result.json",
        "results/omni_finetune/verified_public/",
        "dataset_manifest.json",
        "training_metadata.json",
        "progress.jsonl",
        "metrics.json",
        "predictions.jsonl",
        "RUN_REPORT.md"
      ],
      "reader_takeaway": "The final omni-model diagnostic result establishes the full held-out training/validation/evaluation loop and meets the strict-JSON target, but weak action/subtask metrics make it a diagnostic baseline."
    },
    {
      "id": "multi_episode_128_same_split_baselines",
      "name": "128-Episode Same-Split Simple/NN Baselines",
      "status": "verified_companion_result",
      "entry_condition": "Derived Qwen JSONL export for the selected 96/16/16 split.",
      "deliverables": [
        "same 12 task ids",
        "simple metadata/text baselines",
        "neural MLP baselines for JSON-supported labels",
        "explicit unsupported markers for raw-feature-only tasks"
      ],
      "completion_evidence": [
        "results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md",
        "results/omni_finetune/multi_episode_128_task_baselines/summary_report.json",
        "scripts/omni/run_128_task_baselines.py"
      ],
      "reader_takeaway": "The simple and neural baseline framing is now aligned to the selected 128-episode setup; trajectory, retrieval, reconstruction, and misalignment variants still need raw 128 feature blocks for exact feature-level reproduction."
    },
    {
      "id": "qwen3_omni_structured_output_error_analysis",
      "name": "Action/Subtask Error-Analysis Pass",
      "status": "active_next_step",
      "entry_condition": "The final diagnostic package meets strict JSON validity but has weak action/subtask held-out quality.",
      "deliverables": [
        "same 96/16/16 episode split",
        "action/subtask confusion analysis",
        "unseen-label analysis",
        "object/action family breakdowns",
        "held-out test evaluation",
        "comparison to the final verified Qwen baseline"
      ],
      "completion_evidence": [
        "error-analysis tables",
        "held-out metrics by failure type",
        "verified public-safe package"
      ],
      "reader_takeaway": "The next pass should improve action/subtask quality before larger model-quality claims."
    },
    {
      "id": "foundation_model_selection_matrix",
      "name": "Foundation-Model Selection Matrix",
      "status": "next",
      "entry_condition": "The selected episodes are prepared or a 3-8 episode dry run is available for preprocessing checks.",
      "deliverables": [
        "backbone registry",
        "Cosmos 3 world-model branch plan",
        "Qwen3-Omni LoRA baseline plan",
        "OpenVLA/openpi/GR00T policy-branch candidates",
        "model-specific evaluation additions"
      ],
      "completion_evidence": [
        "FOUNDATION_MODEL_PLAN.md",
        "docs/data/foundation_model_plan.json",
        "research_roadmap_interactive.json"
      ],
      "reader_takeaway": "Qwen3-Omni remains the first trainable held-out pilot; Cosmos 3 is the first world-model branch. Cosmos3-Super now has camera-pose proxy forward-dynamics targets ready for trainer implementation, while VLA/policy models wait for robot-compatible action targets."
    },
    {
      "id": "robustness_run_64_128_episode",
      "name": "64-128 Episode Robustness Run",
      "status": "planned",
      "entry_condition": "The selected-episode pilot trains and evaluates cleanly.",
      "deliverables": [
        "split-by-session metrics",
        "modality ablations",
        "calibration/object/language error analysis",
        "missing-view sensitivity analysis"
      ],
      "completion_evidence": [
        "held-out metrics by session",
        "held-out metrics by task",
        "held-out metrics by modality",
        "ablation tables",
        "qualitative error analysis"
      ],
      "reader_takeaway": "The robustness run tests whether the pilot conclusions survive broader sessions and missing modalities."
    },
    {
      "id": "foundation_world_model_extensions",
      "name": "Cosmos 3 and Policy-Model Extensions",
      "status": "planned",
      "entry_condition": "Enough multi-episode data, compute budget, and model-specific action/world-state targets.",
      "deliverables": [
        "Cosmos 3 future-window or action-conditioned world-model probe",
        "OpenVLA/openpi/GR00T action-policy baseline",
        "audio/video/depth/pose/mocap conditioning checks",
        "affordance and object-interaction tasks",
        "synthetic-data usefulness test"
      ],
      "completion_evidence": [
        "task-specific held-out evaluations",
        "qualitative inspection",
        "updated model cards"
      ],
      "reader_takeaway": "The long-term direction is richer multimodal representation learning for embodied-AI reasoning, with model branches chosen by task fit rather than by a single default backbone."
    },
    {
      "id": "xperience_embodied_foundation_pretraining",
      "name": "Xperience Embodied Foundation Model Pretraining",
      "status": "future",
      "entry_condition": "Full-corpus access, PB-scale storage path, high-throughput data loading, multi-node compute, and positive scaling evidence from smaller multi-episode runs.",
      "deliverables": [
        "full-corpus episode and split manifests",
        "pretraining shard and provenance manifests",
        "0.3B-1B and 1B-3B scaling pilots",
        "3B-7B Xperience-native domain model target",
        "held-out episode/session/activity/object evaluations",
        "missing-modality robustness report",
        "model card and data-boundary report"
      ],
      "completion_evidence": [
        "pretraining metadata",
        "checkpoint inventory",
        "scaling curves",
        "held-out evaluation reports",
        "qualitative retrieval or future-state examples",
        "safety and data-boundary report"
      ],
      "reader_takeaway": "The final research direction is a domain-specific embodied foundation model trained directly on Xperience-10M, after smaller pilots justify the cost and infrastructure."
    }
  ],
  "public_surfaces_to_update": [
    "README.md",
    "PROJECT_STATUS.md",
    "RESEARCH_TAKEAWAYS.md",
    "EVALUATION_PROTOCOL.md",
    "ARTIFACT_GUIDE.md",
    "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
    "XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md",
    "docs/index.html",
    "docs/data/additional_development_directions.json",
    "docs/data/research_roadmap.json",
    "Hugging Face Space card",
    "Hugging Face artifact dataset card",
    "Hugging Face model card"
  ]
}