Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
Refine reader-facing public wording (2/6)
Browse files- data/evidence_contract.json +88 -88
- data/mirror_parity.json +0 -0
- data/omni_model_comparison.json +2 -2
- data/project_brief.json +1 -1
- data/public_reader_map.json +105 -40
- data/public_surface_qa.json +7 -7
- data/publication_audit.json +7 -7
- docs/data/evidence_contract.json +88 -88
- docs/data/mirror_parity.json +0 -0
- docs/data/omni_model_comparison.json +2 -2
- docs/data/project_brief.json +1 -1
- docs/data/public_reader_map.json +105 -40
- docs/data/public_surface_qa.json +7 -7
- docs/data/publication_audit.json +7 -7
- metrics/evidence_contract.json +88 -88
- metrics/mirror_parity.json +0 -0
- metrics/omni_model_comparison.json +2 -2
- metrics/project_brief.json +1 -1
- metrics/public_reader_map.json +105 -40
- metrics/public_surface_qa.json +7 -7
data/evidence_contract.json
CHANGED
|
@@ -1,170 +1,169 @@
|
|
| 1 |
{
|
| 2 |
"project": "Ropedia Xperience-10M Task Suite",
|
| 3 |
"scope": "single public Xperience-10M sample episode",
|
| 4 |
-
"
|
| 5 |
{
|
| 6 |
"id": "project_status",
|
| 7 |
-
"claim": "A first-pass reader has a compact current-state summary.",
|
| 8 |
"status": "verified",
|
| 9 |
"evidence": [
|
| 10 |
"PROJECT_STATUS.md",
|
| 11 |
"docs/data/project_status.json"
|
| 12 |
],
|
| 13 |
-
"
|
|
|
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"id": "research_roadmap",
|
| 17 |
-
"claim": "The research roadmap is explicit.",
|
| 18 |
"status": "current",
|
| 19 |
"evidence": [
|
| 20 |
"RESEARCH_ROADMAP.md",
|
| 21 |
"docs/data/research_roadmap.json"
|
| 22 |
],
|
| 23 |
-
"
|
|
|
|
| 24 |
},
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
"claim": "The public dataset description is aligned with the official gated Xperience-10M dataset card and public sample card.",
|
| 28 |
"status": "verified",
|
| 29 |
"evidence": [
|
| 30 |
"XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
|
| 31 |
"docs/data/xperience10m_dataset_card_alignment.json",
|
| 32 |
"https://huggingface.co/datasets/ropedia-ai/xperience-10m"
|
| 33 |
],
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
"
|
| 50 |
"status": "verified",
|
| 51 |
"evidence": [
|
| 52 |
"results/episode_task_suite/windows.csv",
|
| 53 |
"results/episode_task_suite/shared_windows.npz",
|
| 54 |
"results/episode_task_suite/summary_report.json"
|
| 55 |
],
|
| 56 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
},
|
| 58 |
-
{
|
| 59 |
-
"id": "feature_contract",
|
| 60 |
-
"claim": "The current feature contract is explicit and inspectable.",
|
| 61 |
-
"status": "verified",
|
| 62 |
-
"evidence": [
|
| 63 |
-
"results/episode_task_suite/feature_manifest.json",
|
| 64 |
-
"results/episode_task_suite/available_modalities.json"
|
| 65 |
-
],
|
| 66 |
-
"boundary": "8,546-dimensional aligned multimodal window representation"
|
| 67 |
-
},
|
| 68 |
-
{
|
| 69 |
-
"id": "evaluation_protocol",
|
| 70 |
-
"claim": "The task evaluation protocol is explicit and generated from committed metrics.",
|
| 71 |
-
"status": "verified",
|
| 72 |
-
"evidence": [
|
| 73 |
-
"EVALUATION_PROTOCOL.md",
|
| 74 |
-
"docs/data/evaluation_protocol.json",
|
| 75 |
-
"scripts/build_evaluation_protocol.py"
|
| 76 |
-
],
|
| 77 |
-
"boundary": "defines windows, split, per-task metrics, leakage controls, and current limitations"
|
| 78 |
-
},
|
| 79 |
{
|
| 80 |
"id": "modality_atlas",
|
| 81 |
-
"claim": "The public sample modalities are inspectable without raw data redistribution.",
|
| 82 |
"status": "verified",
|
| 83 |
"evidence": [
|
| 84 |
"docs/data/modality_atlas.json",
|
| 85 |
"docs/assets/modalities/",
|
| 86 |
"docs/index.html"
|
| 87 |
],
|
| 88 |
-
"
|
|
|
|
| 89 |
},
|
| 90 |
{
|
| 91 |
"id": "task_surface_integrity",
|
| 92 |
-
"claim": "Public task cards stay readable for non-expert readers.",
|
| 93 |
"status": "verified",
|
| 94 |
"evidence": [
|
| 95 |
"docs/data/task_surface_integrity.json",
|
| 96 |
"scripts/validate_task_surface.py",
|
| 97 |
"docs/index.html"
|
| 98 |
],
|
| 99 |
-
"
|
|
|
|
| 100 |
},
|
| 101 |
{
|
| 102 |
"id": "figure_index",
|
| 103 |
-
"claim": "Public figures, charts, and modality thumbnails are indexed as project evidence.",
|
| 104 |
"status": "verified",
|
| 105 |
"evidence": [
|
| 106 |
"FIGURE_INDEX.md",
|
| 107 |
"docs/data/figure_index.json",
|
| 108 |
"scripts/build_figure_index.py"
|
| 109 |
],
|
| 110 |
-
"
|
|
|
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"id": "brand_assets",
|
| 114 |
-
"claim": "A project logo is consistently applied across public surfaces.",
|
| 115 |
"status": "verified",
|
| 116 |
"evidence": [
|
| 117 |
"docs/assets/brand/",
|
| 118 |
"docs/data/brand_assets.json",
|
| 119 |
"scripts/build_brand_assets.py"
|
| 120 |
],
|
| 121 |
-
"
|
|
|
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"id": "twelve_tasks",
|
| 125 |
-
"claim": "The 12 task heads are implemented as scripts with saved metrics and predictions.",
|
| 126 |
"status": "verified",
|
| 127 |
"evidence": [
|
| 128 |
"scripts/episode_task_suite.py",
|
| 129 |
"results/episode_task_suite/*/metrics.json",
|
| 130 |
"results/episode_task_suite/*/predictions.*"
|
| 131 |
],
|
| 132 |
-
"
|
|
|
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"id": "minimal_vs_neural",
|
| 136 |
-
"claim": "Minimal and neural heads use the same task contracts.",
|
| 137 |
"status": "verified",
|
| 138 |
"evidence": [
|
| 139 |
"scripts/neural_task_models.py",
|
| 140 |
"results/episode_task_suite/neural_mlp/",
|
| 141 |
"docs/assets/task_architectures.png"
|
| 142 |
],
|
| 143 |
-
"
|
|
|
|
| 144 |
},
|
| 145 |
{
|
| 146 |
"id": "research_directions",
|
| 147 |
-
"claim": "Four Ropedia research directions are mapped honestly as direct, proxy, or diagnostic evidence.",
|
| 148 |
"status": "verified",
|
| 149 |
"evidence": [
|
| 150 |
"results/episode_task_suite/research_directions/research_direction_taxonomy.json",
|
| 151 |
"docs/data/research_directions.json"
|
| 152 |
],
|
| 153 |
-
"
|
|
|
|
| 154 |
},
|
| 155 |
{
|
| 156 |
"id": "direction_extensions",
|
| 157 |
-
"claim": "Four extra direction probes are coded and evaluated.",
|
| 158 |
"status": "verified",
|
| 159 |
"evidence": [
|
| 160 |
"results/episode_task_suite/research_direction_extensions/research_direction_extension_results.json",
|
| 161 |
"docs/data/research_direction_extensions.json"
|
| 162 |
],
|
| 163 |
-
"
|
|
|
|
| 164 |
},
|
| 165 |
{
|
| 166 |
"id": "qwen3_omni_diagnostic_pilot",
|
| 167 |
-
"claim": "Qwen3-Omni has a verified selected-episode held-out diagnostic pilot.",
|
| 168 |
"status": "verified_diagnostic",
|
| 169 |
"evidence": [
|
| 170 |
"docs/data/omni_finetune_verified_result.json",
|
|
@@ -172,94 +171,94 @@
|
|
| 172 |
"scripts/omni/package_verified_omni_result.py",
|
| 173 |
"scripts/omni/audit_verified_omni_package.py"
|
| 174 |
],
|
| 175 |
-
"
|
|
|
|
| 176 |
},
|
| 177 |
{
|
| 178 |
"id": "multi_episode_quality_improvement",
|
| 179 |
-
"claim": "The next Qwen3-Omni step is structured-output and task-quality improvement on the same selected split.",
|
| 180 |
"status": "active_next_step",
|
| 181 |
"evidence": [
|
| 182 |
"scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh",
|
| 183 |
"docs/data/omni_finetune_verified_result.json",
|
| 184 |
"FOUNDATION_MODEL_PLAN.md"
|
| 185 |
],
|
| 186 |
-
"
|
|
|
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"id": "scale_up_status_check",
|
| 190 |
-
"claim": "Older pilot path strings are tracked as setup-file provenance.",
|
| 191 |
"status": "verified",
|
| 192 |
"evidence": [
|
| 193 |
"scripts/validate_scope_claims.py",
|
| 194 |
"docs/data/scope_claims_audit.json"
|
| 195 |
],
|
| 196 |
-
"
|
|
|
|
| 197 |
},
|
| 198 |
{
|
| 199 |
"id": "mirror_parity",
|
| 200 |
-
"claim": "Prepared GitHub and Hugging Face mirrors carry matching critical data, visual, HTML, and validator files.",
|
| 201 |
"status": "verified",
|
| 202 |
"evidence": [
|
| 203 |
"scripts/validate_mirror_parity.py",
|
| 204 |
"docs/data/mirror_parity.json"
|
| 205 |
],
|
| 206 |
-
"
|
|
|
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"id": "publication_package",
|
| 210 |
-
"claim": "The public GitHub and Hugging Face bundles contain the intended release files.",
|
| 211 |
"status": "verified",
|
| 212 |
"evidence": [
|
| 213 |
"scripts/validate_publication_package.py",
|
| 214 |
"docs/data/publication_audit.json"
|
| 215 |
],
|
| 216 |
-
"
|
|
|
|
| 217 |
},
|
| 218 |
{
|
| 219 |
"id": "website_integrity",
|
| 220 |
-
"claim": "The public website has checked local references.",
|
| 221 |
"status": "verified",
|
| 222 |
"evidence": [
|
| 223 |
"scripts/validate_website_integrity.py",
|
| 224 |
"docs/data/website_integrity.json"
|
| 225 |
],
|
| 226 |
-
"
|
|
|
|
| 227 |
},
|
| 228 |
{
|
| 229 |
"id": "rendered_site_check",
|
| 230 |
-
"claim": "The rendered website walkthrough has a browser-level interaction check.",
|
| 231 |
"status": "verified",
|
| 232 |
"evidence": [
|
| 233 |
"RENDERED_SITE_CHECK.md",
|
| 234 |
"scripts/build_rendered_site_check.py",
|
| 235 |
"docs/data/rendered_site_check.json"
|
| 236 |
],
|
| 237 |
-
"
|
|
|
|
| 238 |
},
|
| 239 |
{
|
| 240 |
"id": "quality_gates",
|
| 241 |
-
"claim": "The release gate is explicit.",
|
| 242 |
"status": "verified",
|
| 243 |
"evidence": [
|
| 244 |
"QUALITY_GATES.md",
|
| 245 |
"scripts/build_quality_gates.py",
|
| 246 |
"docs/data/quality_gates.json"
|
| 247 |
],
|
| 248 |
-
"
|
|
|
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"id": "live_publication_status",
|
| 252 |
-
"claim": "The live public mirrors are checked after upload.",
|
| 253 |
"status": "verified",
|
| 254 |
"evidence": [
|
| 255 |
"scripts/verify_live_publication.py",
|
| 256 |
"docs/data/live_publication_status.json"
|
| 257 |
],
|
| 258 |
-
"
|
|
|
|
| 259 |
},
|
| 260 |
{
|
| 261 |
"id": "citation_metadata",
|
| 262 |
-
"claim": "The project is externally citable and machine-readable.",
|
| 263 |
"status": "verified",
|
| 264 |
"evidence": [
|
| 265 |
"CITATION.cff",
|
|
@@ -267,11 +266,11 @@
|
|
| 267 |
"docs/data/project_manifest.json",
|
| 268 |
"LICENSE"
|
| 269 |
],
|
| 270 |
-
"
|
|
|
|
| 271 |
},
|
| 272 |
{
|
| 273 |
"id": "project_path",
|
| 274 |
-
"claim": "A first-time reader has an explicit project path.",
|
| 275 |
"status": "verified",
|
| 276 |
"evidence": [
|
| 277 |
"docs/data/project_packet.json",
|
|
@@ -280,29 +279,30 @@
|
|
| 280 |
"README.md",
|
| 281 |
"docs/index.html"
|
| 282 |
],
|
| 283 |
-
"
|
|
|
|
| 284 |
},
|
| 285 |
{
|
| 286 |
"id": "artifact_index",
|
| 287 |
-
"claim": "The core project artifacts are grouped for human reading and indexed with existence, size, and hash metadata where stable.",
|
| 288 |
"status": "verified",
|
| 289 |
"evidence": [
|
| 290 |
"ARTIFACT_GUIDE.md",
|
| 291 |
"scripts/build_artifact_index.py",
|
| 292 |
"docs/data/artifact_index.json"
|
| 293 |
],
|
| 294 |
-
"
|
|
|
|
| 295 |
},
|
| 296 |
{
|
| 297 |
"id": "reproducibility_contract",
|
| 298 |
-
"claim": "The public reproduction path is documented with commands, expected outputs, and exact-match reproduction evidence.",
|
| 299 |
"status": "verified",
|
| 300 |
"evidence": [
|
| 301 |
"REPRODUCIBILITY.md",
|
| 302 |
"docs/data/reproducibility_matrix.json",
|
| 303 |
"notes/reproducibility_audit.md"
|
| 304 |
],
|
| 305 |
-
"
|
|
|
|
| 306 |
}
|
| 307 |
]
|
| 308 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"project": "Ropedia Xperience-10M Task Suite",
|
| 3 |
"scope": "single public Xperience-10M sample episode",
|
| 4 |
+
"readouts": [
|
| 5 |
{
|
| 6 |
"id": "project_status",
|
|
|
|
| 7 |
"status": "verified",
|
| 8 |
"evidence": [
|
| 9 |
"PROJECT_STATUS.md",
|
| 10 |
"docs/data/project_status.json"
|
| 11 |
],
|
| 12 |
+
"readout": "A first-pass reader has a compact current-state summary.",
|
| 13 |
+
"scope_note": "summarizes existing evidence and current limitations"
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"id": "research_roadmap",
|
|
|
|
| 17 |
"status": "current",
|
| 18 |
"evidence": [
|
| 19 |
"RESEARCH_ROADMAP.md",
|
| 20 |
"docs/data/research_roadmap.json"
|
| 21 |
],
|
| 22 |
+
"readout": "The research roadmap is explicit.",
|
| 23 |
+
"scope_note": "connects public-sample task development to multi-episode data preparation, Qwen3-Omni LoRA, robustness runs, and larger omni-model extensions"
|
| 24 |
},
|
| 25 |
+
{
|
| 26 |
+
"id": "official_dataset_card_alignment",
|
|
|
|
| 27 |
"status": "verified",
|
| 28 |
"evidence": [
|
| 29 |
"XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
|
| 30 |
"docs/data/xperience10m_dataset_card_alignment.json",
|
| 31 |
"https://huggingface.co/datasets/ropedia-ai/xperience-10m"
|
| 32 |
],
|
| 33 |
+
"readout": "The public dataset description is aligned with the official gated Xperience-10M dataset card and public sample card.",
|
| 34 |
+
"scope_note": "summarizes upstream public metadata, API listing facts, sample license/tooling, and dataset-card facts; does not grant access or mirror raw data"
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"id": "source_alignment",
|
| 38 |
+
"status": "verified",
|
| 39 |
+
"evidence": [
|
| 40 |
+
"SOURCE_ALIGNMENT_AUDIT.md",
|
| 41 |
+
"docs/data/source_alignment_audit.json",
|
| 42 |
+
"scripts/validate_source_alignment.py"
|
| 43 |
+
],
|
| 44 |
+
"readout": "Source facts, sample details, API-listing notes, and project coverage are validated across repo, website, and HF cards.",
|
| 45 |
+
"scope_note": "offline committed-fact check; does not fetch private gated data"
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"id": "aligned_windows",
|
| 49 |
"status": "verified",
|
| 50 |
"evidence": [
|
| 51 |
"results/episode_task_suite/windows.csv",
|
| 52 |
"results/episode_task_suite/shared_windows.npz",
|
| 53 |
"results/episode_task_suite/summary_report.json"
|
| 54 |
],
|
| 55 |
+
"readout": "The public Xperience-10M sample has been converted into aligned model windows.",
|
| 56 |
+
"scope_note": "5,821 frames, 1,161 windows, one public sample episode"
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"id": "feature_contract",
|
| 60 |
+
"status": "verified",
|
| 61 |
+
"evidence": [
|
| 62 |
+
"results/episode_task_suite/feature_manifest.json",
|
| 63 |
+
"results/episode_task_suite/available_modalities.json"
|
| 64 |
+
],
|
| 65 |
+
"readout": "The current feature contract is explicit and inspectable.",
|
| 66 |
+
"scope_note": "8,546-dimensional aligned multimodal window representation"
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"id": "evaluation_protocol",
|
| 70 |
+
"status": "verified",
|
| 71 |
+
"evidence": [
|
| 72 |
+
"EVALUATION_PROTOCOL.md",
|
| 73 |
+
"docs/data/evaluation_protocol.json",
|
| 74 |
+
"scripts/build_evaluation_protocol.py"
|
| 75 |
+
],
|
| 76 |
+
"readout": "The task evaluation protocol is explicit and generated from committed metrics.",
|
| 77 |
+
"scope_note": "defines windows, split, per-task metrics, leakage controls, and current limitations"
|
| 78 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
{
|
| 80 |
"id": "modality_atlas",
|
|
|
|
| 81 |
"status": "verified",
|
| 82 |
"evidence": [
|
| 83 |
"docs/data/modality_atlas.json",
|
| 84 |
"docs/assets/modalities/",
|
| 85 |
"docs/index.html"
|
| 86 |
],
|
| 87 |
+
"readout": "The public sample modalities are inspectable without raw data redistribution.",
|
| 88 |
+
"scope_note": "derived thumbnails for presentation; raw data remains excluded"
|
| 89 |
},
|
| 90 |
{
|
| 91 |
"id": "task_surface_integrity",
|
|
|
|
| 92 |
"status": "verified",
|
| 93 |
"evidence": [
|
| 94 |
"docs/data/task_surface_integrity.json",
|
| 95 |
"scripts/validate_task_surface.py",
|
| 96 |
"docs/index.html"
|
| 97 |
],
|
| 98 |
+
"readout": "Public task cards stay readable for non-expert readers.",
|
| 99 |
+
"scope_note": "presentation integrity for the public task surface"
|
| 100 |
},
|
| 101 |
{
|
| 102 |
"id": "figure_index",
|
|
|
|
| 103 |
"status": "verified",
|
| 104 |
"evidence": [
|
| 105 |
"FIGURE_INDEX.md",
|
| 106 |
"docs/data/figure_index.json",
|
| 107 |
"scripts/build_figure_index.py"
|
| 108 |
],
|
| 109 |
+
"readout": "Public figures, charts, and modality thumbnails are indexed as project evidence.",
|
| 110 |
+
"scope_note": "records derived visual assets, dimensions, hashes, roles, and source scripts; raw Xperience-10M data remains excluded"
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"id": "brand_assets",
|
|
|
|
| 114 |
"status": "verified",
|
| 115 |
"evidence": [
|
| 116 |
"docs/assets/brand/",
|
| 117 |
"docs/data/brand_assets.json",
|
| 118 |
"scripts/build_brand_assets.py"
|
| 119 |
],
|
| 120 |
+
"readout": "A project logo is consistently applied across public surfaces.",
|
| 121 |
+
"scope_note": "generated logo and deterministic derivatives only; no raw dataset data or model weights"
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"id": "twelve_tasks",
|
|
|
|
| 125 |
"status": "verified",
|
| 126 |
"evidence": [
|
| 127 |
"scripts/episode_task_suite.py",
|
| 128 |
"results/episode_task_suite/*/metrics.json",
|
| 129 |
"results/episode_task_suite/*/predictions.*"
|
| 130 |
],
|
| 131 |
+
"readout": "The 12 task heads are implemented as scripts with saved metrics and predictions.",
|
| 132 |
+
"scope_note": "chronological single-episode split, not cross-episode generalization"
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"id": "minimal_vs_neural",
|
|
|
|
| 136 |
"status": "verified",
|
| 137 |
"evidence": [
|
| 138 |
"scripts/neural_task_models.py",
|
| 139 |
"results/episode_task_suite/neural_mlp/",
|
| 140 |
"docs/assets/task_architectures.png"
|
| 141 |
],
|
| 142 |
+
"readout": "Minimal and neural heads use the same task contracts.",
|
| 143 |
+
"scope_note": "small heads only; not a foundation model"
|
| 144 |
},
|
| 145 |
{
|
| 146 |
"id": "research_directions",
|
|
|
|
| 147 |
"status": "verified",
|
| 148 |
"evidence": [
|
| 149 |
"results/episode_task_suite/research_directions/research_direction_taxonomy.json",
|
| 150 |
"docs/data/research_directions.json"
|
| 151 |
],
|
| 152 |
+
"readout": "Four Ropedia research directions are mapped honestly as direct, proxy, or diagnostic evidence.",
|
| 153 |
+
"scope_note": "some directions remain proxy-only"
|
| 154 |
},
|
| 155 |
{
|
| 156 |
"id": "direction_extensions",
|
|
|
|
| 157 |
"status": "verified",
|
| 158 |
"evidence": [
|
| 159 |
"results/episode_task_suite/research_direction_extensions/research_direction_extension_results.json",
|
| 160 |
"docs/data/research_direction_extensions.json"
|
| 161 |
],
|
| 162 |
+
"readout": "Four extra direction probes are coded and evaluated.",
|
| 163 |
+
"scope_note": "single-episode probes, not full research-direction solutions"
|
| 164 |
},
|
| 165 |
{
|
| 166 |
"id": "qwen3_omni_diagnostic_pilot",
|
|
|
|
| 167 |
"status": "verified_diagnostic",
|
| 168 |
"evidence": [
|
| 169 |
"docs/data/omni_finetune_verified_result.json",
|
|
|
|
| 171 |
"scripts/omni/package_verified_omni_result.py",
|
| 172 |
"scripts/omni/audit_verified_omni_package.py"
|
| 173 |
],
|
| 174 |
+
"readout": "Qwen3-Omni has a verified selected-episode held-out diagnostic pilot.",
|
| 175 |
+
"scope_note": "the pipeline is verified, but model quality is weak: JSON validity is below target and action/subtask metrics are low"
|
| 176 |
},
|
| 177 |
{
|
| 178 |
"id": "multi_episode_quality_improvement",
|
|
|
|
| 179 |
"status": "active_next_step",
|
| 180 |
"evidence": [
|
| 181 |
"scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh",
|
| 182 |
"docs/data/omni_finetune_verified_result.json",
|
| 183 |
"FOUNDATION_MODEL_PLAN.md"
|
| 184 |
],
|
| 185 |
+
"readout": "The next Qwen3-Omni step is structured-output and task-quality improvement on the same selected split.",
|
| 186 |
+
"scope_note": "stronger model quality requires output-format improvements and action/subtask error analysis"
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"id": "scale_up_status_check",
|
|
|
|
| 190 |
"status": "verified",
|
| 191 |
"evidence": [
|
| 192 |
"scripts/validate_scope_claims.py",
|
| 193 |
"docs/data/scope_claims_audit.json"
|
| 194 |
],
|
| 195 |
+
"readout": "Older pilot path strings are tracked as setup-file provenance.",
|
| 196 |
+
"scope_note": "run/path identifiers stay separate from completed held-out-episode results"
|
| 197 |
},
|
| 198 |
{
|
| 199 |
"id": "mirror_parity",
|
|
|
|
| 200 |
"status": "verified",
|
| 201 |
"evidence": [
|
| 202 |
"scripts/validate_mirror_parity.py",
|
| 203 |
"docs/data/mirror_parity.json"
|
| 204 |
],
|
| 205 |
+
"readout": "Prepared GitHub and Hugging Face mirrors carry matching critical data, visual, HTML, and validator files.",
|
| 206 |
+
"scope_note": "compares prepared local mirror bundles before upload; live URLs are checked after publishing"
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"id": "publication_package",
|
|
|
|
| 210 |
"status": "verified",
|
| 211 |
"evidence": [
|
| 212 |
"scripts/validate_publication_package.py",
|
| 213 |
"docs/data/publication_audit.json"
|
| 214 |
],
|
| 215 |
+
"readout": "The public GitHub and Hugging Face bundles contain the intended release files.",
|
| 216 |
+
"scope_note": "checks public files, HF bundles, and current public-card assets; temporary local outputs are excluded"
|
| 217 |
},
|
| 218 |
{
|
| 219 |
"id": "website_integrity",
|
|
|
|
| 220 |
"status": "verified",
|
| 221 |
"evidence": [
|
| 222 |
"scripts/validate_website_integrity.py",
|
| 223 |
"docs/data/website_integrity.json"
|
| 224 |
],
|
| 225 |
+
"readout": "The public website has checked local references.",
|
| 226 |
+
"scope_note": "checks local links, anchors, JSON data, and referenced images; external URLs are not fetched"
|
| 227 |
},
|
| 228 |
{
|
| 229 |
"id": "rendered_site_check",
|
|
|
|
| 230 |
"status": "verified",
|
| 231 |
"evidence": [
|
| 232 |
"RENDERED_SITE_CHECK.md",
|
| 233 |
"scripts/build_rendered_site_check.py",
|
| 234 |
"docs/data/rendered_site_check.json"
|
| 235 |
],
|
| 236 |
+
"readout": "The rendered website walkthrough has a browser-level interaction check.",
|
| 237 |
+
"scope_note": "checks local page load, tab switch, walkthrough deep link, player controls, and console health"
|
| 238 |
},
|
| 239 |
{
|
| 240 |
"id": "quality_gates",
|
|
|
|
| 241 |
"status": "verified",
|
| 242 |
"evidence": [
|
| 243 |
"QUALITY_GATES.md",
|
| 244 |
"scripts/build_quality_gates.py",
|
| 245 |
"docs/data/quality_gates.json"
|
| 246 |
],
|
| 247 |
+
"readout": "The release gate is explicit.",
|
| 248 |
+
"scope_note": "summarizes packaging and live-mirror checks; cross-episode model quality is measured by later held-out reports"
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"id": "live_publication_status",
|
|
|
|
| 252 |
"status": "verified",
|
| 253 |
"evidence": [
|
| 254 |
"scripts/verify_live_publication.py",
|
| 255 |
"docs/data/live_publication_status.json"
|
| 256 |
],
|
| 257 |
+
"readout": "The live public mirrors are checked after upload.",
|
| 258 |
+
"scope_note": "fetches public GitHub/HF URLs; it does not validate private training state"
|
| 259 |
},
|
| 260 |
{
|
| 261 |
"id": "citation_metadata",
|
|
|
|
| 262 |
"status": "verified",
|
| 263 |
"evidence": [
|
| 264 |
"CITATION.cff",
|
|
|
|
| 266 |
"docs/data/project_manifest.json",
|
| 267 |
"LICENSE"
|
| 268 |
],
|
| 269 |
+
"readout": "The project is externally citable and machine-readable.",
|
| 270 |
+
"scope_note": "code license does not override original Xperience-10M dataset terms"
|
| 271 |
},
|
| 272 |
{
|
| 273 |
"id": "project_path",
|
|
|
|
| 274 |
"status": "verified",
|
| 275 |
"evidence": [
|
| 276 |
"docs/data/project_packet.json",
|
|
|
|
| 279 |
"README.md",
|
| 280 |
"docs/index.html"
|
| 281 |
],
|
| 282 |
+
"readout": "A first-time reader has an explicit project path.",
|
| 283 |
+
"scope_note": "guides inspection across data, tasks, results, and scale-up status"
|
| 284 |
},
|
| 285 |
{
|
| 286 |
"id": "artifact_index",
|
|
|
|
| 287 |
"status": "verified",
|
| 288 |
"evidence": [
|
| 289 |
"ARTIFACT_GUIDE.md",
|
| 290 |
"scripts/build_artifact_index.py",
|
| 291 |
"docs/data/artifact_index.json"
|
| 292 |
],
|
| 293 |
+
"readout": "The core project artifacts are grouped for human reading and indexed with existence, size, and hash metadata where stable.",
|
| 294 |
+
"scope_note": "selective source-of-truth catalog, not a complete inventory of every output file"
|
| 295 |
},
|
| 296 |
{
|
| 297 |
"id": "reproducibility_contract",
|
|
|
|
| 298 |
"status": "verified",
|
| 299 |
"evidence": [
|
| 300 |
"REPRODUCIBILITY.md",
|
| 301 |
"docs/data/reproducibility_matrix.json",
|
| 302 |
"notes/reproducibility_audit.md"
|
| 303 |
],
|
| 304 |
+
"readout": "The public reproduction path is documented with commands, expected outputs, and exact-match reproduction evidence.",
|
| 305 |
+
"scope_note": "publicly reproduces the single-episode pipeline; multi-episode Qwen3-Omni metrics are added only after data preparation and held-out evaluation"
|
| 306 |
}
|
| 307 |
]
|
| 308 |
}
|
data/mirror_parity.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/omni_model_comparison.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"status": "pass",
|
| 5 |
"version_count": 3,
|
| 6 |
"model_group_count": 5,
|
|
@@ -1758,6 +1758,6 @@
|
|
| 1758 |
],
|
| 1759 |
"pending": [
|
| 1760 |
"Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
|
| 1761 |
-
"Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before
|
| 1762 |
]
|
| 1763 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
|
| 3 |
+
"generated_at_utc": "2026-06-22T10:59:59+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
"version_count": 3,
|
| 6 |
"model_group_count": 5,
|
|
|
|
| 1758 |
],
|
| 1759 |
"pending": [
|
| 1760 |
"Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
|
| 1761 |
+
"Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before presenting v6 as globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly."
|
| 1762 |
]
|
| 1763 |
}
|
data/project_brief.json
CHANGED
|
@@ -56,7 +56,7 @@
|
|
| 56 |
"Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
|
| 57 |
],
|
| 58 |
"scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
|
| 59 |
-
"next_stage": "Improve action/subtask quality through error analysis before larger robustness or alternative-backbone
|
| 60 |
"entry_points": {
|
| 61 |
"visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
|
| 62 |
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
|
|
|
|
| 56 |
"Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
|
| 57 |
],
|
| 58 |
"scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
|
| 59 |
+
"next_stage": "Improve action/subtask quality through error analysis before presenting larger robustness or alternative-backbone results.",
|
| 60 |
"entry_points": {
|
| 61 |
"visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
|
| 62 |
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
|
data/public_reader_map.json
CHANGED
|
@@ -6,77 +6,124 @@
|
|
| 6 |
{
|
| 7 |
"reader_goal": "Understand the project in one pass",
|
| 8 |
"start_here": "PROJECT_BRIEF.md",
|
| 9 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 10 |
},
|
| 11 |
{
|
| 12 |
"reader_goal": "Understand the two evidence lines",
|
| 13 |
"start_here": "TWO_EVIDENCE_LINES.md",
|
| 14 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"reader_goal": "See the visual public dashboard",
|
| 18 |
"start_here": "GitHub Pages dashboard or Hugging Face Space",
|
| 19 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"reader_goal": "Decode project terminology",
|
| 23 |
"start_here": "GLOSSARY.md",
|
| 24 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"reader_goal": "Understand the data unit",
|
| 28 |
"start_here": "results/episode_task_suite/windows.csv",
|
| 29 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 30 |
},
|
| 31 |
{
|
| 32 |
"reader_goal": "Trace the 128-episode split",
|
| 33 |
"start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
|
| 34 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"reader_goal": "Inspect the 20-task benchmark",
|
| 38 |
"start_here": "TASK_SUITE_20.md",
|
| 39 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"reader_goal": "Compare current results",
|
| 43 |
"start_here": "RESEARCH_TAKEAWAYS.md",
|
| 44 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"reader_goal": "Compare 1-episode and 128-episode methods",
|
| 48 |
"start_here": "Homepage radar section",
|
| 49 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 50 |
},
|
| 51 |
{
|
| 52 |
"reader_goal": "Read Qwen3-Omni v1-v6 correctly",
|
| 53 |
"start_here": "QWEN3_OMNI_RUN_LINEAGE.md",
|
| 54 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 55 |
},
|
| 56 |
{
|
| 57 |
"reader_goal": "Find all derived artifacts",
|
| 58 |
"start_here": "ARTIFACT_GUIDE.md",
|
| 59 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 60 |
},
|
| 61 |
{
|
| 62 |
"reader_goal": "Download model weights with their matching results",
|
| 63 |
"start_here": "Hugging Face weights/results repo",
|
| 64 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
},
|
| 66 |
{
|
| 67 |
"reader_goal": "Reproduce or extend the work",
|
| 68 |
"start_here": "REPRODUCIBILITY.md",
|
| 69 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
},
|
| 71 |
{
|
| 72 |
"reader_goal": "Understand foundation-model directions",
|
| 73 |
"start_here": "THREE_FOUNDATION_PIPELINES.md",
|
| 74 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 75 |
},
|
| 76 |
{
|
| 77 |
"reader_goal": "Check public-release health",
|
| 78 |
"start_here": "PUBLIC_SURFACE_QA.md",
|
| 79 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 80 |
}
|
| 81 |
],
|
| 82 |
"public_surfaces": [
|
|
@@ -125,31 +172,49 @@
|
|
| 125 |
"Foundation directions",
|
| 126 |
"Public-release checks"
|
| 127 |
],
|
| 128 |
-
"
|
| 129 |
-
{
|
| 130 |
-
"
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
"
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
"
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
}
|
| 154 |
]
|
| 155 |
}
|
|
|
|
| 6 |
{
|
| 7 |
"reader_goal": "Understand the project in one pass",
|
| 8 |
"start_here": "PROJECT_BRIEF.md",
|
| 9 |
+
"then_inspect": [
|
| 10 |
+
"PROJECT_STATUS.md",
|
| 11 |
+
"RESEARCH_TAKEAWAYS.md"
|
| 12 |
+
]
|
| 13 |
},
|
| 14 |
{
|
| 15 |
"reader_goal": "Understand the two evidence lines",
|
| 16 |
"start_here": "TWO_EVIDENCE_LINES.md",
|
| 17 |
+
"then_inspect": [
|
| 18 |
+
"docs/data/two_evidence_lines.json",
|
| 19 |
+
"docs/data/two_evidence_line_result_summary.json"
|
| 20 |
+
]
|
| 21 |
},
|
| 22 |
{
|
| 23 |
"reader_goal": "See the visual public dashboard",
|
| 24 |
"start_here": "GitHub Pages dashboard or Hugging Face Space",
|
| 25 |
+
"then_inspect": [
|
| 26 |
+
"docs/index.html",
|
| 27 |
+
"docs/data/project_packet.json"
|
| 28 |
+
]
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"reader_goal": "Decode project terminology",
|
| 32 |
"start_here": "GLOSSARY.md",
|
| 33 |
+
"then_inspect": [
|
| 34 |
+
"docs/data/glossary.json",
|
| 35 |
+
"Homepage Glossary section"
|
| 36 |
+
]
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"reader_goal": "Understand the data unit",
|
| 40 |
"start_here": "results/episode_task_suite/windows.csv",
|
| 41 |
+
"then_inspect": [
|
| 42 |
+
"results/episode_task_suite/feature_manifest.json",
|
| 43 |
+
"docs/data/raw_sample_files.json"
|
| 44 |
+
]
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"reader_goal": "Trace the 128-episode split",
|
| 48 |
"start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
|
| 49 |
+
"then_inspect": [
|
| 50 |
+
"docs/data/xperience10m_128_episode_feature_index.json",
|
| 51 |
+
"results/omni_finetune/xperience10m_128_episode_selection.csv"
|
| 52 |
+
]
|
| 53 |
},
|
| 54 |
{
|
| 55 |
"reader_goal": "Inspect the 20-task benchmark",
|
| 56 |
"start_here": "TASK_SUITE_20.md",
|
| 57 |
+
"then_inspect": [
|
| 58 |
+
"docs/data/task_suite_20.json",
|
| 59 |
+
"EVALUATION_PROTOCOL.md"
|
| 60 |
+
]
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"reader_goal": "Compare current results",
|
| 64 |
"start_here": "RESEARCH_TAKEAWAYS.md",
|
| 65 |
+
"then_inspect": [
|
| 66 |
+
"docs/data/task_method_20_result_matrix.json",
|
| 67 |
+
"docs/data/unified_task_model_radar.json"
|
| 68 |
+
]
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"reader_goal": "Compare 1-episode and 128-episode methods",
|
| 72 |
"start_here": "Homepage radar section",
|
| 73 |
+
"then_inspect": [
|
| 74 |
+
"docs/data/single_episode_task_model_radar.json",
|
| 75 |
+
"docs/data/episode128_task_model_radar.json"
|
| 76 |
+
]
|
| 77 |
},
|
| 78 |
{
|
| 79 |
"reader_goal": "Read Qwen3-Omni v1-v6 correctly",
|
| 80 |
"start_here": "QWEN3_OMNI_RUN_LINEAGE.md",
|
| 81 |
+
"then_inspect": [
|
| 82 |
+
"docs/data/qwen3_omni_run_lineage.json",
|
| 83 |
+
"docs/data/qwen3_v5_v6_comparison.json"
|
| 84 |
+
]
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"reader_goal": "Find all derived artifacts",
|
| 88 |
"start_here": "ARTIFACT_GUIDE.md",
|
| 89 |
+
"then_inspect": [
|
| 90 |
+
"Hugging Face artifact dataset",
|
| 91 |
+
"docs/data/artifact_index.json"
|
| 92 |
+
]
|
| 93 |
},
|
| 94 |
{
|
| 95 |
"reader_goal": "Download model weights with their matching results",
|
| 96 |
"start_here": "Hugging Face weights/results repo",
|
| 97 |
+
"then_inspect": [
|
| 98 |
+
"manifest.json",
|
| 99 |
+
"analysis/docs/data/task_method_20_result_matrix.json",
|
| 100 |
+
"results/"
|
| 101 |
+
]
|
| 102 |
},
|
| 103 |
{
|
| 104 |
"reader_goal": "Reproduce or extend the work",
|
| 105 |
"start_here": "REPRODUCIBILITY.md",
|
| 106 |
+
"then_inspect": [
|
| 107 |
+
"QUALITY_GATES.md",
|
| 108 |
+
"scripts/",
|
| 109 |
+
"results/"
|
| 110 |
+
]
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"reader_goal": "Understand foundation-model directions",
|
| 114 |
"start_here": "THREE_FOUNDATION_PIPELINES.md",
|
| 115 |
+
"then_inspect": [
|
| 116 |
+
"FOUNDATION_MODEL_PLAN.md",
|
| 117 |
+
"docs/data/three_foundation_pipelines.json"
|
| 118 |
+
]
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"reader_goal": "Check public-release health",
|
| 122 |
"start_here": "PUBLIC_SURFACE_QA.md",
|
| 123 |
+
"then_inspect": [
|
| 124 |
+
"docs/data/live_publication_status.json",
|
| 125 |
+
"docs/data/mirror_parity.json"
|
| 126 |
+
]
|
| 127 |
}
|
| 128 |
],
|
| 129 |
"public_surfaces": [
|
|
|
|
| 172 |
"Foundation directions",
|
| 173 |
"Public-release checks"
|
| 174 |
],
|
| 175 |
+
"reading_scopes": [
|
| 176 |
+
{
|
| 177 |
+
"public_evidence": [
|
| 178 |
+
"results/episode_task_suite/",
|
| 179 |
+
"docs/data/task_suite_20.json"
|
| 180 |
+
],
|
| 181 |
+
"topic": "Single public-sample task behavior",
|
| 182 |
+
"scope_note": "Describes one public sample episode, not the full dataset distribution."
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"public_evidence": [
|
| 186 |
+
"XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
|
| 187 |
+
"docs/data/xperience10m_128_episode_feature_index.json",
|
| 188 |
+
"results/omni_finetune/*128*",
|
| 189 |
+
"docs/data/omni_model_comparison.json"
|
| 190 |
+
],
|
| 191 |
+
"topic": "128-episode method comparison",
|
| 192 |
+
"scope_note": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"public_evidence": [
|
| 196 |
+
"QWEN3_OMNI_RUN_LINEAGE.md",
|
| 197 |
+
"docs/data/qwen3_omni_run_lineage.json"
|
| 198 |
+
],
|
| 199 |
+
"topic": "Qwen3-Omni v1-v6 lineage",
|
| 200 |
+
"scope_note": "v1-v4 are pipeline and ablation evidence, v5 is the pinned prior release, and v6 is the current public 20-task Qwen row."
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"public_evidence": [
|
| 204 |
+
"Verified Qwen3-Omni and Cosmos3 result packages",
|
| 205 |
+
"model cards"
|
| 206 |
+
],
|
| 207 |
+
"topic": "Foundation-model track quality",
|
| 208 |
+
"scope_note": "Numeric task scores appear only when a task-specific eval or probe exists."
|
| 209 |
+
},
|
| 210 |
+
{
|
| 211 |
+
"public_evidence": [
|
| 212 |
+
"REPRODUCIBILITY.md",
|
| 213 |
+
"QUALITY_GATES.md",
|
| 214 |
+
"release validators"
|
| 215 |
+
],
|
| 216 |
+
"topic": "Reproducibility",
|
| 217 |
+
"scope_note": "Raw gated Xperience-10M files and full foundation weights are not redistributed."
|
| 218 |
}
|
| 219 |
]
|
| 220 |
}
|
data/public_surface_qa.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
@@ -18,7 +18,7 @@
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
-
"generated_at_utc": "2026-06-
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
@@ -28,27 +28,27 @@
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
-
"generated_at_utc": "2026-06-
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
| 35 |
"status": "pass",
|
| 36 |
-
"generated_at_utc": "2026-06-
|
| 37 |
},
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
-
"generated_at_utc": "2026-06-
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
| 45 |
"status": "pass",
|
| 46 |
-
"generated_at_utc": "2026-06-
|
| 47 |
},
|
| 48 |
"mirror_parity": {
|
| 49 |
"exists": true,
|
| 50 |
"status": "pass",
|
| 51 |
-
"generated_at_utc": "2026-06-
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"failures": {}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T11:18:45+00:00",
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
+
"generated_at_utc": "2026-06-22T11:17:07+00:00"
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
+
"generated_at_utc": "2026-06-22T11:17:07+00:00"
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
| 35 |
"status": "pass",
|
| 36 |
+
"generated_at_utc": "2026-06-22T11:17:08+00:00"
|
| 37 |
},
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
+
"generated_at_utc": "2026-06-22T11:17:10+00:00"
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
| 45 |
"status": "pass",
|
| 46 |
+
"generated_at_utc": "2026-06-22T11:18:16+00:00"
|
| 47 |
},
|
| 48 |
"mirror_parity": {
|
| 49 |
"exists": true,
|
| 50 |
"status": "pass",
|
| 51 |
+
"generated_at_utc": "2026-06-22T11:18:11+00:00"
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"failures": {}
|
data/publication_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
@@ -246,8 +246,8 @@
|
|
| 246 |
"hf_space_bundle": {
|
| 247 |
"root": "hf_publish/space",
|
| 248 |
"exists": true,
|
| 249 |
-
"file_count":
|
| 250 |
-
"text_file_count":
|
| 251 |
"largest_file": {
|
| 252 |
"path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
|
| 253 |
"bytes": 10221085
|
|
@@ -257,8 +257,8 @@
|
|
| 257 |
"hf_artifact_bundle": {
|
| 258 |
"root": "hf_publish/artifacts",
|
| 259 |
"exists": true,
|
| 260 |
-
"file_count":
|
| 261 |
-
"text_file_count":
|
| 262 |
"largest_file": {
|
| 263 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 264 |
"bytes": 135591061
|
|
@@ -268,8 +268,8 @@
|
|
| 268 |
"hf_model_bundle": {
|
| 269 |
"root": "hf_publish/model",
|
| 270 |
"exists": true,
|
| 271 |
-
"file_count":
|
| 272 |
-
"text_file_count":
|
| 273 |
"largest_file": {
|
| 274 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 275 |
"bytes": 135591061
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-22T11:18:16+00:00",
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
|
|
| 246 |
"hf_space_bundle": {
|
| 247 |
"root": "hf_publish/space",
|
| 248 |
"exists": true,
|
| 249 |
+
"file_count": 640,
|
| 250 |
+
"text_file_count": 479,
|
| 251 |
"largest_file": {
|
| 252 |
"path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
|
| 253 |
"bytes": 10221085
|
|
|
|
| 257 |
"hf_artifact_bundle": {
|
| 258 |
"root": "hf_publish/artifacts",
|
| 259 |
"exists": true,
|
| 260 |
+
"file_count": 4708,
|
| 261 |
+
"text_file_count": 1334,
|
| 262 |
"largest_file": {
|
| 263 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 264 |
"bytes": 135591061
|
|
|
|
| 268 |
"hf_model_bundle": {
|
| 269 |
"root": "hf_publish/model",
|
| 270 |
"exists": true,
|
| 271 |
+
"file_count": 5470,
|
| 272 |
+
"text_file_count": 1508,
|
| 273 |
"largest_file": {
|
| 274 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 275 |
"bytes": 135591061
|
docs/data/evidence_contract.json
CHANGED
|
@@ -1,170 +1,169 @@
|
|
| 1 |
{
|
| 2 |
"project": "Ropedia Xperience-10M Task Suite",
|
| 3 |
"scope": "single public Xperience-10M sample episode",
|
| 4 |
-
"
|
| 5 |
{
|
| 6 |
"id": "project_status",
|
| 7 |
-
"claim": "A first-pass reader has a compact current-state summary.",
|
| 8 |
"status": "verified",
|
| 9 |
"evidence": [
|
| 10 |
"PROJECT_STATUS.md",
|
| 11 |
"docs/data/project_status.json"
|
| 12 |
],
|
| 13 |
-
"
|
|
|
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"id": "research_roadmap",
|
| 17 |
-
"claim": "The research roadmap is explicit.",
|
| 18 |
"status": "current",
|
| 19 |
"evidence": [
|
| 20 |
"RESEARCH_ROADMAP.md",
|
| 21 |
"docs/data/research_roadmap.json"
|
| 22 |
],
|
| 23 |
-
"
|
|
|
|
| 24 |
},
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
"claim": "The public dataset description is aligned with the official gated Xperience-10M dataset card and public sample card.",
|
| 28 |
"status": "verified",
|
| 29 |
"evidence": [
|
| 30 |
"XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
|
| 31 |
"docs/data/xperience10m_dataset_card_alignment.json",
|
| 32 |
"https://huggingface.co/datasets/ropedia-ai/xperience-10m"
|
| 33 |
],
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
"
|
| 50 |
"status": "verified",
|
| 51 |
"evidence": [
|
| 52 |
"results/episode_task_suite/windows.csv",
|
| 53 |
"results/episode_task_suite/shared_windows.npz",
|
| 54 |
"results/episode_task_suite/summary_report.json"
|
| 55 |
],
|
| 56 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
},
|
| 58 |
-
{
|
| 59 |
-
"id": "feature_contract",
|
| 60 |
-
"claim": "The current feature contract is explicit and inspectable.",
|
| 61 |
-
"status": "verified",
|
| 62 |
-
"evidence": [
|
| 63 |
-
"results/episode_task_suite/feature_manifest.json",
|
| 64 |
-
"results/episode_task_suite/available_modalities.json"
|
| 65 |
-
],
|
| 66 |
-
"boundary": "8,546-dimensional aligned multimodal window representation"
|
| 67 |
-
},
|
| 68 |
-
{
|
| 69 |
-
"id": "evaluation_protocol",
|
| 70 |
-
"claim": "The task evaluation protocol is explicit and generated from committed metrics.",
|
| 71 |
-
"status": "verified",
|
| 72 |
-
"evidence": [
|
| 73 |
-
"EVALUATION_PROTOCOL.md",
|
| 74 |
-
"docs/data/evaluation_protocol.json",
|
| 75 |
-
"scripts/build_evaluation_protocol.py"
|
| 76 |
-
],
|
| 77 |
-
"boundary": "defines windows, split, per-task metrics, leakage controls, and current limitations"
|
| 78 |
-
},
|
| 79 |
{
|
| 80 |
"id": "modality_atlas",
|
| 81 |
-
"claim": "The public sample modalities are inspectable without raw data redistribution.",
|
| 82 |
"status": "verified",
|
| 83 |
"evidence": [
|
| 84 |
"docs/data/modality_atlas.json",
|
| 85 |
"docs/assets/modalities/",
|
| 86 |
"docs/index.html"
|
| 87 |
],
|
| 88 |
-
"
|
|
|
|
| 89 |
},
|
| 90 |
{
|
| 91 |
"id": "task_surface_integrity",
|
| 92 |
-
"claim": "Public task cards stay readable for non-expert readers.",
|
| 93 |
"status": "verified",
|
| 94 |
"evidence": [
|
| 95 |
"docs/data/task_surface_integrity.json",
|
| 96 |
"scripts/validate_task_surface.py",
|
| 97 |
"docs/index.html"
|
| 98 |
],
|
| 99 |
-
"
|
|
|
|
| 100 |
},
|
| 101 |
{
|
| 102 |
"id": "figure_index",
|
| 103 |
-
"claim": "Public figures, charts, and modality thumbnails are indexed as project evidence.",
|
| 104 |
"status": "verified",
|
| 105 |
"evidence": [
|
| 106 |
"FIGURE_INDEX.md",
|
| 107 |
"docs/data/figure_index.json",
|
| 108 |
"scripts/build_figure_index.py"
|
| 109 |
],
|
| 110 |
-
"
|
|
|
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"id": "brand_assets",
|
| 114 |
-
"claim": "A project logo is consistently applied across public surfaces.",
|
| 115 |
"status": "verified",
|
| 116 |
"evidence": [
|
| 117 |
"docs/assets/brand/",
|
| 118 |
"docs/data/brand_assets.json",
|
| 119 |
"scripts/build_brand_assets.py"
|
| 120 |
],
|
| 121 |
-
"
|
|
|
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"id": "twelve_tasks",
|
| 125 |
-
"claim": "The 12 task heads are implemented as scripts with saved metrics and predictions.",
|
| 126 |
"status": "verified",
|
| 127 |
"evidence": [
|
| 128 |
"scripts/episode_task_suite.py",
|
| 129 |
"results/episode_task_suite/*/metrics.json",
|
| 130 |
"results/episode_task_suite/*/predictions.*"
|
| 131 |
],
|
| 132 |
-
"
|
|
|
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"id": "minimal_vs_neural",
|
| 136 |
-
"claim": "Minimal and neural heads use the same task contracts.",
|
| 137 |
"status": "verified",
|
| 138 |
"evidence": [
|
| 139 |
"scripts/neural_task_models.py",
|
| 140 |
"results/episode_task_suite/neural_mlp/",
|
| 141 |
"docs/assets/task_architectures.png"
|
| 142 |
],
|
| 143 |
-
"
|
|
|
|
| 144 |
},
|
| 145 |
{
|
| 146 |
"id": "research_directions",
|
| 147 |
-
"claim": "Four Ropedia research directions are mapped honestly as direct, proxy, or diagnostic evidence.",
|
| 148 |
"status": "verified",
|
| 149 |
"evidence": [
|
| 150 |
"results/episode_task_suite/research_directions/research_direction_taxonomy.json",
|
| 151 |
"docs/data/research_directions.json"
|
| 152 |
],
|
| 153 |
-
"
|
|
|
|
| 154 |
},
|
| 155 |
{
|
| 156 |
"id": "direction_extensions",
|
| 157 |
-
"claim": "Four extra direction probes are coded and evaluated.",
|
| 158 |
"status": "verified",
|
| 159 |
"evidence": [
|
| 160 |
"results/episode_task_suite/research_direction_extensions/research_direction_extension_results.json",
|
| 161 |
"docs/data/research_direction_extensions.json"
|
| 162 |
],
|
| 163 |
-
"
|
|
|
|
| 164 |
},
|
| 165 |
{
|
| 166 |
"id": "qwen3_omni_diagnostic_pilot",
|
| 167 |
-
"claim": "Qwen3-Omni has a verified selected-episode held-out diagnostic pilot.",
|
| 168 |
"status": "verified_diagnostic",
|
| 169 |
"evidence": [
|
| 170 |
"docs/data/omni_finetune_verified_result.json",
|
|
@@ -172,94 +171,94 @@
|
|
| 172 |
"scripts/omni/package_verified_omni_result.py",
|
| 173 |
"scripts/omni/audit_verified_omni_package.py"
|
| 174 |
],
|
| 175 |
-
"
|
|
|
|
| 176 |
},
|
| 177 |
{
|
| 178 |
"id": "multi_episode_quality_improvement",
|
| 179 |
-
"claim": "The next Qwen3-Omni step is structured-output and task-quality improvement on the same selected split.",
|
| 180 |
"status": "active_next_step",
|
| 181 |
"evidence": [
|
| 182 |
"scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh",
|
| 183 |
"docs/data/omni_finetune_verified_result.json",
|
| 184 |
"FOUNDATION_MODEL_PLAN.md"
|
| 185 |
],
|
| 186 |
-
"
|
|
|
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"id": "scale_up_status_check",
|
| 190 |
-
"claim": "Older pilot path strings are tracked as setup-file provenance.",
|
| 191 |
"status": "verified",
|
| 192 |
"evidence": [
|
| 193 |
"scripts/validate_scope_claims.py",
|
| 194 |
"docs/data/scope_claims_audit.json"
|
| 195 |
],
|
| 196 |
-
"
|
|
|
|
| 197 |
},
|
| 198 |
{
|
| 199 |
"id": "mirror_parity",
|
| 200 |
-
"claim": "Prepared GitHub and Hugging Face mirrors carry matching critical data, visual, HTML, and validator files.",
|
| 201 |
"status": "verified",
|
| 202 |
"evidence": [
|
| 203 |
"scripts/validate_mirror_parity.py",
|
| 204 |
"docs/data/mirror_parity.json"
|
| 205 |
],
|
| 206 |
-
"
|
|
|
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"id": "publication_package",
|
| 210 |
-
"claim": "The public GitHub and Hugging Face bundles contain the intended release files.",
|
| 211 |
"status": "verified",
|
| 212 |
"evidence": [
|
| 213 |
"scripts/validate_publication_package.py",
|
| 214 |
"docs/data/publication_audit.json"
|
| 215 |
],
|
| 216 |
-
"
|
|
|
|
| 217 |
},
|
| 218 |
{
|
| 219 |
"id": "website_integrity",
|
| 220 |
-
"claim": "The public website has checked local references.",
|
| 221 |
"status": "verified",
|
| 222 |
"evidence": [
|
| 223 |
"scripts/validate_website_integrity.py",
|
| 224 |
"docs/data/website_integrity.json"
|
| 225 |
],
|
| 226 |
-
"
|
|
|
|
| 227 |
},
|
| 228 |
{
|
| 229 |
"id": "rendered_site_check",
|
| 230 |
-
"claim": "The rendered website walkthrough has a browser-level interaction check.",
|
| 231 |
"status": "verified",
|
| 232 |
"evidence": [
|
| 233 |
"RENDERED_SITE_CHECK.md",
|
| 234 |
"scripts/build_rendered_site_check.py",
|
| 235 |
"docs/data/rendered_site_check.json"
|
| 236 |
],
|
| 237 |
-
"
|
|
|
|
| 238 |
},
|
| 239 |
{
|
| 240 |
"id": "quality_gates",
|
| 241 |
-
"claim": "The release gate is explicit.",
|
| 242 |
"status": "verified",
|
| 243 |
"evidence": [
|
| 244 |
"QUALITY_GATES.md",
|
| 245 |
"scripts/build_quality_gates.py",
|
| 246 |
"docs/data/quality_gates.json"
|
| 247 |
],
|
| 248 |
-
"
|
|
|
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"id": "live_publication_status",
|
| 252 |
-
"claim": "The live public mirrors are checked after upload.",
|
| 253 |
"status": "verified",
|
| 254 |
"evidence": [
|
| 255 |
"scripts/verify_live_publication.py",
|
| 256 |
"docs/data/live_publication_status.json"
|
| 257 |
],
|
| 258 |
-
"
|
|
|
|
| 259 |
},
|
| 260 |
{
|
| 261 |
"id": "citation_metadata",
|
| 262 |
-
"claim": "The project is externally citable and machine-readable.",
|
| 263 |
"status": "verified",
|
| 264 |
"evidence": [
|
| 265 |
"CITATION.cff",
|
|
@@ -267,11 +266,11 @@
|
|
| 267 |
"docs/data/project_manifest.json",
|
| 268 |
"LICENSE"
|
| 269 |
],
|
| 270 |
-
"
|
|
|
|
| 271 |
},
|
| 272 |
{
|
| 273 |
"id": "project_path",
|
| 274 |
-
"claim": "A first-time reader has an explicit project path.",
|
| 275 |
"status": "verified",
|
| 276 |
"evidence": [
|
| 277 |
"docs/data/project_packet.json",
|
|
@@ -280,29 +279,30 @@
|
|
| 280 |
"README.md",
|
| 281 |
"docs/index.html"
|
| 282 |
],
|
| 283 |
-
"
|
|
|
|
| 284 |
},
|
| 285 |
{
|
| 286 |
"id": "artifact_index",
|
| 287 |
-
"claim": "The core project artifacts are grouped for human reading and indexed with existence, size, and hash metadata where stable.",
|
| 288 |
"status": "verified",
|
| 289 |
"evidence": [
|
| 290 |
"ARTIFACT_GUIDE.md",
|
| 291 |
"scripts/build_artifact_index.py",
|
| 292 |
"docs/data/artifact_index.json"
|
| 293 |
],
|
| 294 |
-
"
|
|
|
|
| 295 |
},
|
| 296 |
{
|
| 297 |
"id": "reproducibility_contract",
|
| 298 |
-
"claim": "The public reproduction path is documented with commands, expected outputs, and exact-match reproduction evidence.",
|
| 299 |
"status": "verified",
|
| 300 |
"evidence": [
|
| 301 |
"REPRODUCIBILITY.md",
|
| 302 |
"docs/data/reproducibility_matrix.json",
|
| 303 |
"notes/reproducibility_audit.md"
|
| 304 |
],
|
| 305 |
-
"
|
|
|
|
| 306 |
}
|
| 307 |
]
|
| 308 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"project": "Ropedia Xperience-10M Task Suite",
|
| 3 |
"scope": "single public Xperience-10M sample episode",
|
| 4 |
+
"readouts": [
|
| 5 |
{
|
| 6 |
"id": "project_status",
|
|
|
|
| 7 |
"status": "verified",
|
| 8 |
"evidence": [
|
| 9 |
"PROJECT_STATUS.md",
|
| 10 |
"docs/data/project_status.json"
|
| 11 |
],
|
| 12 |
+
"readout": "A first-pass reader has a compact current-state summary.",
|
| 13 |
+
"scope_note": "summarizes existing evidence and current limitations"
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"id": "research_roadmap",
|
|
|
|
| 17 |
"status": "current",
|
| 18 |
"evidence": [
|
| 19 |
"RESEARCH_ROADMAP.md",
|
| 20 |
"docs/data/research_roadmap.json"
|
| 21 |
],
|
| 22 |
+
"readout": "The research roadmap is explicit.",
|
| 23 |
+
"scope_note": "connects public-sample task development to multi-episode data preparation, Qwen3-Omni LoRA, robustness runs, and larger omni-model extensions"
|
| 24 |
},
|
| 25 |
+
{
|
| 26 |
+
"id": "official_dataset_card_alignment",
|
|
|
|
| 27 |
"status": "verified",
|
| 28 |
"evidence": [
|
| 29 |
"XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
|
| 30 |
"docs/data/xperience10m_dataset_card_alignment.json",
|
| 31 |
"https://huggingface.co/datasets/ropedia-ai/xperience-10m"
|
| 32 |
],
|
| 33 |
+
"readout": "The public dataset description is aligned with the official gated Xperience-10M dataset card and public sample card.",
|
| 34 |
+
"scope_note": "summarizes upstream public metadata, API listing facts, sample license/tooling, and dataset-card facts; does not grant access or mirror raw data"
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"id": "source_alignment",
|
| 38 |
+
"status": "verified",
|
| 39 |
+
"evidence": [
|
| 40 |
+
"SOURCE_ALIGNMENT_AUDIT.md",
|
| 41 |
+
"docs/data/source_alignment_audit.json",
|
| 42 |
+
"scripts/validate_source_alignment.py"
|
| 43 |
+
],
|
| 44 |
+
"readout": "Source facts, sample details, API-listing notes, and project coverage are validated across repo, website, and HF cards.",
|
| 45 |
+
"scope_note": "offline committed-fact check; does not fetch private gated data"
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"id": "aligned_windows",
|
| 49 |
"status": "verified",
|
| 50 |
"evidence": [
|
| 51 |
"results/episode_task_suite/windows.csv",
|
| 52 |
"results/episode_task_suite/shared_windows.npz",
|
| 53 |
"results/episode_task_suite/summary_report.json"
|
| 54 |
],
|
| 55 |
+
"readout": "The public Xperience-10M sample has been converted into aligned model windows.",
|
| 56 |
+
"scope_note": "5,821 frames, 1,161 windows, one public sample episode"
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"id": "feature_contract",
|
| 60 |
+
"status": "verified",
|
| 61 |
+
"evidence": [
|
| 62 |
+
"results/episode_task_suite/feature_manifest.json",
|
| 63 |
+
"results/episode_task_suite/available_modalities.json"
|
| 64 |
+
],
|
| 65 |
+
"readout": "The current feature contract is explicit and inspectable.",
|
| 66 |
+
"scope_note": "8,546-dimensional aligned multimodal window representation"
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"id": "evaluation_protocol",
|
| 70 |
+
"status": "verified",
|
| 71 |
+
"evidence": [
|
| 72 |
+
"EVALUATION_PROTOCOL.md",
|
| 73 |
+
"docs/data/evaluation_protocol.json",
|
| 74 |
+
"scripts/build_evaluation_protocol.py"
|
| 75 |
+
],
|
| 76 |
+
"readout": "The task evaluation protocol is explicit and generated from committed metrics.",
|
| 77 |
+
"scope_note": "defines windows, split, per-task metrics, leakage controls, and current limitations"
|
| 78 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
{
|
| 80 |
"id": "modality_atlas",
|
|
|
|
| 81 |
"status": "verified",
|
| 82 |
"evidence": [
|
| 83 |
"docs/data/modality_atlas.json",
|
| 84 |
"docs/assets/modalities/",
|
| 85 |
"docs/index.html"
|
| 86 |
],
|
| 87 |
+
"readout": "The public sample modalities are inspectable without raw data redistribution.",
|
| 88 |
+
"scope_note": "derived thumbnails for presentation; raw data remains excluded"
|
| 89 |
},
|
| 90 |
{
|
| 91 |
"id": "task_surface_integrity",
|
|
|
|
| 92 |
"status": "verified",
|
| 93 |
"evidence": [
|
| 94 |
"docs/data/task_surface_integrity.json",
|
| 95 |
"scripts/validate_task_surface.py",
|
| 96 |
"docs/index.html"
|
| 97 |
],
|
| 98 |
+
"readout": "Public task cards stay readable for non-expert readers.",
|
| 99 |
+
"scope_note": "presentation integrity for the public task surface"
|
| 100 |
},
|
| 101 |
{
|
| 102 |
"id": "figure_index",
|
|
|
|
| 103 |
"status": "verified",
|
| 104 |
"evidence": [
|
| 105 |
"FIGURE_INDEX.md",
|
| 106 |
"docs/data/figure_index.json",
|
| 107 |
"scripts/build_figure_index.py"
|
| 108 |
],
|
| 109 |
+
"readout": "Public figures, charts, and modality thumbnails are indexed as project evidence.",
|
| 110 |
+
"scope_note": "records derived visual assets, dimensions, hashes, roles, and source scripts; raw Xperience-10M data remains excluded"
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"id": "brand_assets",
|
|
|
|
| 114 |
"status": "verified",
|
| 115 |
"evidence": [
|
| 116 |
"docs/assets/brand/",
|
| 117 |
"docs/data/brand_assets.json",
|
| 118 |
"scripts/build_brand_assets.py"
|
| 119 |
],
|
| 120 |
+
"readout": "A project logo is consistently applied across public surfaces.",
|
| 121 |
+
"scope_note": "generated logo and deterministic derivatives only; no raw dataset data or model weights"
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"id": "twelve_tasks",
|
|
|
|
| 125 |
"status": "verified",
|
| 126 |
"evidence": [
|
| 127 |
"scripts/episode_task_suite.py",
|
| 128 |
"results/episode_task_suite/*/metrics.json",
|
| 129 |
"results/episode_task_suite/*/predictions.*"
|
| 130 |
],
|
| 131 |
+
"readout": "The 12 task heads are implemented as scripts with saved metrics and predictions.",
|
| 132 |
+
"scope_note": "chronological single-episode split, not cross-episode generalization"
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"id": "minimal_vs_neural",
|
|
|
|
| 136 |
"status": "verified",
|
| 137 |
"evidence": [
|
| 138 |
"scripts/neural_task_models.py",
|
| 139 |
"results/episode_task_suite/neural_mlp/",
|
| 140 |
"docs/assets/task_architectures.png"
|
| 141 |
],
|
| 142 |
+
"readout": "Minimal and neural heads use the same task contracts.",
|
| 143 |
+
"scope_note": "small heads only; not a foundation model"
|
| 144 |
},
|
| 145 |
{
|
| 146 |
"id": "research_directions",
|
|
|
|
| 147 |
"status": "verified",
|
| 148 |
"evidence": [
|
| 149 |
"results/episode_task_suite/research_directions/research_direction_taxonomy.json",
|
| 150 |
"docs/data/research_directions.json"
|
| 151 |
],
|
| 152 |
+
"readout": "Four Ropedia research directions are mapped honestly as direct, proxy, or diagnostic evidence.",
|
| 153 |
+
"scope_note": "some directions remain proxy-only"
|
| 154 |
},
|
| 155 |
{
|
| 156 |
"id": "direction_extensions",
|
|
|
|
| 157 |
"status": "verified",
|
| 158 |
"evidence": [
|
| 159 |
"results/episode_task_suite/research_direction_extensions/research_direction_extension_results.json",
|
| 160 |
"docs/data/research_direction_extensions.json"
|
| 161 |
],
|
| 162 |
+
"readout": "Four extra direction probes are coded and evaluated.",
|
| 163 |
+
"scope_note": "single-episode probes, not full research-direction solutions"
|
| 164 |
},
|
| 165 |
{
|
| 166 |
"id": "qwen3_omni_diagnostic_pilot",
|
|
|
|
| 167 |
"status": "verified_diagnostic",
|
| 168 |
"evidence": [
|
| 169 |
"docs/data/omni_finetune_verified_result.json",
|
|
|
|
| 171 |
"scripts/omni/package_verified_omni_result.py",
|
| 172 |
"scripts/omni/audit_verified_omni_package.py"
|
| 173 |
],
|
| 174 |
+
"readout": "Qwen3-Omni has a verified selected-episode held-out diagnostic pilot.",
|
| 175 |
+
"scope_note": "the pipeline is verified, but model quality is weak: JSON validity is below target and action/subtask metrics are low"
|
| 176 |
},
|
| 177 |
{
|
| 178 |
"id": "multi_episode_quality_improvement",
|
|
|
|
| 179 |
"status": "active_next_step",
|
| 180 |
"evidence": [
|
| 181 |
"scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh",
|
| 182 |
"docs/data/omni_finetune_verified_result.json",
|
| 183 |
"FOUNDATION_MODEL_PLAN.md"
|
| 184 |
],
|
| 185 |
+
"readout": "The next Qwen3-Omni step is structured-output and task-quality improvement on the same selected split.",
|
| 186 |
+
"scope_note": "stronger model quality requires output-format improvements and action/subtask error analysis"
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"id": "scale_up_status_check",
|
|
|
|
| 190 |
"status": "verified",
|
| 191 |
"evidence": [
|
| 192 |
"scripts/validate_scope_claims.py",
|
| 193 |
"docs/data/scope_claims_audit.json"
|
| 194 |
],
|
| 195 |
+
"readout": "Older pilot path strings are tracked as setup-file provenance.",
|
| 196 |
+
"scope_note": "run/path identifiers stay separate from completed held-out-episode results"
|
| 197 |
},
|
| 198 |
{
|
| 199 |
"id": "mirror_parity",
|
|
|
|
| 200 |
"status": "verified",
|
| 201 |
"evidence": [
|
| 202 |
"scripts/validate_mirror_parity.py",
|
| 203 |
"docs/data/mirror_parity.json"
|
| 204 |
],
|
| 205 |
+
"readout": "Prepared GitHub and Hugging Face mirrors carry matching critical data, visual, HTML, and validator files.",
|
| 206 |
+
"scope_note": "compares prepared local mirror bundles before upload; live URLs are checked after publishing"
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"id": "publication_package",
|
|
|
|
| 210 |
"status": "verified",
|
| 211 |
"evidence": [
|
| 212 |
"scripts/validate_publication_package.py",
|
| 213 |
"docs/data/publication_audit.json"
|
| 214 |
],
|
| 215 |
+
"readout": "The public GitHub and Hugging Face bundles contain the intended release files.",
|
| 216 |
+
"scope_note": "checks public files, HF bundles, and current public-card assets; temporary local outputs are excluded"
|
| 217 |
},
|
| 218 |
{
|
| 219 |
"id": "website_integrity",
|
|
|
|
| 220 |
"status": "verified",
|
| 221 |
"evidence": [
|
| 222 |
"scripts/validate_website_integrity.py",
|
| 223 |
"docs/data/website_integrity.json"
|
| 224 |
],
|
| 225 |
+
"readout": "The public website has checked local references.",
|
| 226 |
+
"scope_note": "checks local links, anchors, JSON data, and referenced images; external URLs are not fetched"
|
| 227 |
},
|
| 228 |
{
|
| 229 |
"id": "rendered_site_check",
|
|
|
|
| 230 |
"status": "verified",
|
| 231 |
"evidence": [
|
| 232 |
"RENDERED_SITE_CHECK.md",
|
| 233 |
"scripts/build_rendered_site_check.py",
|
| 234 |
"docs/data/rendered_site_check.json"
|
| 235 |
],
|
| 236 |
+
"readout": "The rendered website walkthrough has a browser-level interaction check.",
|
| 237 |
+
"scope_note": "checks local page load, tab switch, walkthrough deep link, player controls, and console health"
|
| 238 |
},
|
| 239 |
{
|
| 240 |
"id": "quality_gates",
|
|
|
|
| 241 |
"status": "verified",
|
| 242 |
"evidence": [
|
| 243 |
"QUALITY_GATES.md",
|
| 244 |
"scripts/build_quality_gates.py",
|
| 245 |
"docs/data/quality_gates.json"
|
| 246 |
],
|
| 247 |
+
"readout": "The release gate is explicit.",
|
| 248 |
+
"scope_note": "summarizes packaging and live-mirror checks; cross-episode model quality is measured by later held-out reports"
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"id": "live_publication_status",
|
|
|
|
| 252 |
"status": "verified",
|
| 253 |
"evidence": [
|
| 254 |
"scripts/verify_live_publication.py",
|
| 255 |
"docs/data/live_publication_status.json"
|
| 256 |
],
|
| 257 |
+
"readout": "The live public mirrors are checked after upload.",
|
| 258 |
+
"scope_note": "fetches public GitHub/HF URLs; it does not validate private training state"
|
| 259 |
},
|
| 260 |
{
|
| 261 |
"id": "citation_metadata",
|
|
|
|
| 262 |
"status": "verified",
|
| 263 |
"evidence": [
|
| 264 |
"CITATION.cff",
|
|
|
|
| 266 |
"docs/data/project_manifest.json",
|
| 267 |
"LICENSE"
|
| 268 |
],
|
| 269 |
+
"readout": "The project is externally citable and machine-readable.",
|
| 270 |
+
"scope_note": "code license does not override original Xperience-10M dataset terms"
|
| 271 |
},
|
| 272 |
{
|
| 273 |
"id": "project_path",
|
|
|
|
| 274 |
"status": "verified",
|
| 275 |
"evidence": [
|
| 276 |
"docs/data/project_packet.json",
|
|
|
|
| 279 |
"README.md",
|
| 280 |
"docs/index.html"
|
| 281 |
],
|
| 282 |
+
"readout": "A first-time reader has an explicit project path.",
|
| 283 |
+
"scope_note": "guides inspection across data, tasks, results, and scale-up status"
|
| 284 |
},
|
| 285 |
{
|
| 286 |
"id": "artifact_index",
|
|
|
|
| 287 |
"status": "verified",
|
| 288 |
"evidence": [
|
| 289 |
"ARTIFACT_GUIDE.md",
|
| 290 |
"scripts/build_artifact_index.py",
|
| 291 |
"docs/data/artifact_index.json"
|
| 292 |
],
|
| 293 |
+
"readout": "The core project artifacts are grouped for human reading and indexed with existence, size, and hash metadata where stable.",
|
| 294 |
+
"scope_note": "selective source-of-truth catalog, not a complete inventory of every output file"
|
| 295 |
},
|
| 296 |
{
|
| 297 |
"id": "reproducibility_contract",
|
|
|
|
| 298 |
"status": "verified",
|
| 299 |
"evidence": [
|
| 300 |
"REPRODUCIBILITY.md",
|
| 301 |
"docs/data/reproducibility_matrix.json",
|
| 302 |
"notes/reproducibility_audit.md"
|
| 303 |
],
|
| 304 |
+
"readout": "The public reproduction path is documented with commands, expected outputs, and exact-match reproduction evidence.",
|
| 305 |
+
"scope_note": "publicly reproduces the single-episode pipeline; multi-episode Qwen3-Omni metrics are added only after data preparation and held-out evaluation"
|
| 306 |
}
|
| 307 |
]
|
| 308 |
}
|
docs/data/mirror_parity.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
docs/data/omni_model_comparison.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"status": "pass",
|
| 5 |
"version_count": 3,
|
| 6 |
"model_group_count": 5,
|
|
@@ -1758,6 +1758,6 @@
|
|
| 1758 |
],
|
| 1759 |
"pending": [
|
| 1760 |
"Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
|
| 1761 |
-
"Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before
|
| 1762 |
]
|
| 1763 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
|
| 3 |
+
"generated_at_utc": "2026-06-22T10:59:59+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
"version_count": 3,
|
| 6 |
"model_group_count": 5,
|
|
|
|
| 1758 |
],
|
| 1759 |
"pending": [
|
| 1760 |
"Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
|
| 1761 |
+
"Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before presenting v6 as globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly."
|
| 1762 |
]
|
| 1763 |
}
|
docs/data/project_brief.json
CHANGED
|
@@ -56,7 +56,7 @@
|
|
| 56 |
"Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
|
| 57 |
],
|
| 58 |
"scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
|
| 59 |
-
"next_stage": "Improve action/subtask quality through error analysis before larger robustness or alternative-backbone
|
| 60 |
"entry_points": {
|
| 61 |
"visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
|
| 62 |
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
|
|
|
|
| 56 |
"Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
|
| 57 |
],
|
| 58 |
"scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
|
| 59 |
+
"next_stage": "Improve action/subtask quality through error analysis before presenting larger robustness or alternative-backbone results.",
|
| 60 |
"entry_points": {
|
| 61 |
"visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
|
| 62 |
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
|
docs/data/public_reader_map.json
CHANGED
|
@@ -6,77 +6,124 @@
|
|
| 6 |
{
|
| 7 |
"reader_goal": "Understand the project in one pass",
|
| 8 |
"start_here": "PROJECT_BRIEF.md",
|
| 9 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 10 |
},
|
| 11 |
{
|
| 12 |
"reader_goal": "Understand the two evidence lines",
|
| 13 |
"start_here": "TWO_EVIDENCE_LINES.md",
|
| 14 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"reader_goal": "See the visual public dashboard",
|
| 18 |
"start_here": "GitHub Pages dashboard or Hugging Face Space",
|
| 19 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"reader_goal": "Decode project terminology",
|
| 23 |
"start_here": "GLOSSARY.md",
|
| 24 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"reader_goal": "Understand the data unit",
|
| 28 |
"start_here": "results/episode_task_suite/windows.csv",
|
| 29 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 30 |
},
|
| 31 |
{
|
| 32 |
"reader_goal": "Trace the 128-episode split",
|
| 33 |
"start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
|
| 34 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"reader_goal": "Inspect the 20-task benchmark",
|
| 38 |
"start_here": "TASK_SUITE_20.md",
|
| 39 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"reader_goal": "Compare current results",
|
| 43 |
"start_here": "RESEARCH_TAKEAWAYS.md",
|
| 44 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"reader_goal": "Compare 1-episode and 128-episode methods",
|
| 48 |
"start_here": "Homepage radar section",
|
| 49 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 50 |
},
|
| 51 |
{
|
| 52 |
"reader_goal": "Read Qwen3-Omni v1-v6 correctly",
|
| 53 |
"start_here": "QWEN3_OMNI_RUN_LINEAGE.md",
|
| 54 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 55 |
},
|
| 56 |
{
|
| 57 |
"reader_goal": "Find all derived artifacts",
|
| 58 |
"start_here": "ARTIFACT_GUIDE.md",
|
| 59 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 60 |
},
|
| 61 |
{
|
| 62 |
"reader_goal": "Download model weights with their matching results",
|
| 63 |
"start_here": "Hugging Face weights/results repo",
|
| 64 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
},
|
| 66 |
{
|
| 67 |
"reader_goal": "Reproduce or extend the work",
|
| 68 |
"start_here": "REPRODUCIBILITY.md",
|
| 69 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
},
|
| 71 |
{
|
| 72 |
"reader_goal": "Understand foundation-model directions",
|
| 73 |
"start_here": "THREE_FOUNDATION_PIPELINES.md",
|
| 74 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 75 |
},
|
| 76 |
{
|
| 77 |
"reader_goal": "Check public-release health",
|
| 78 |
"start_here": "PUBLIC_SURFACE_QA.md",
|
| 79 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 80 |
}
|
| 81 |
],
|
| 82 |
"public_surfaces": [
|
|
@@ -125,31 +172,49 @@
|
|
| 125 |
"Foundation directions",
|
| 126 |
"Public-release checks"
|
| 127 |
],
|
| 128 |
-
"
|
| 129 |
-
{
|
| 130 |
-
"
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
"
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
"
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
}
|
| 154 |
]
|
| 155 |
}
|
|
|
|
| 6 |
{
|
| 7 |
"reader_goal": "Understand the project in one pass",
|
| 8 |
"start_here": "PROJECT_BRIEF.md",
|
| 9 |
+
"then_inspect": [
|
| 10 |
+
"PROJECT_STATUS.md",
|
| 11 |
+
"RESEARCH_TAKEAWAYS.md"
|
| 12 |
+
]
|
| 13 |
},
|
| 14 |
{
|
| 15 |
"reader_goal": "Understand the two evidence lines",
|
| 16 |
"start_here": "TWO_EVIDENCE_LINES.md",
|
| 17 |
+
"then_inspect": [
|
| 18 |
+
"docs/data/two_evidence_lines.json",
|
| 19 |
+
"docs/data/two_evidence_line_result_summary.json"
|
| 20 |
+
]
|
| 21 |
},
|
| 22 |
{
|
| 23 |
"reader_goal": "See the visual public dashboard",
|
| 24 |
"start_here": "GitHub Pages dashboard or Hugging Face Space",
|
| 25 |
+
"then_inspect": [
|
| 26 |
+
"docs/index.html",
|
| 27 |
+
"docs/data/project_packet.json"
|
| 28 |
+
]
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"reader_goal": "Decode project terminology",
|
| 32 |
"start_here": "GLOSSARY.md",
|
| 33 |
+
"then_inspect": [
|
| 34 |
+
"docs/data/glossary.json",
|
| 35 |
+
"Homepage Glossary section"
|
| 36 |
+
]
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"reader_goal": "Understand the data unit",
|
| 40 |
"start_here": "results/episode_task_suite/windows.csv",
|
| 41 |
+
"then_inspect": [
|
| 42 |
+
"results/episode_task_suite/feature_manifest.json",
|
| 43 |
+
"docs/data/raw_sample_files.json"
|
| 44 |
+
]
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"reader_goal": "Trace the 128-episode split",
|
| 48 |
"start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
|
| 49 |
+
"then_inspect": [
|
| 50 |
+
"docs/data/xperience10m_128_episode_feature_index.json",
|
| 51 |
+
"results/omni_finetune/xperience10m_128_episode_selection.csv"
|
| 52 |
+
]
|
| 53 |
},
|
| 54 |
{
|
| 55 |
"reader_goal": "Inspect the 20-task benchmark",
|
| 56 |
"start_here": "TASK_SUITE_20.md",
|
| 57 |
+
"then_inspect": [
|
| 58 |
+
"docs/data/task_suite_20.json",
|
| 59 |
+
"EVALUATION_PROTOCOL.md"
|
| 60 |
+
]
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"reader_goal": "Compare current results",
|
| 64 |
"start_here": "RESEARCH_TAKEAWAYS.md",
|
| 65 |
+
"then_inspect": [
|
| 66 |
+
"docs/data/task_method_20_result_matrix.json",
|
| 67 |
+
"docs/data/unified_task_model_radar.json"
|
| 68 |
+
]
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"reader_goal": "Compare 1-episode and 128-episode methods",
|
| 72 |
"start_here": "Homepage radar section",
|
| 73 |
+
"then_inspect": [
|
| 74 |
+
"docs/data/single_episode_task_model_radar.json",
|
| 75 |
+
"docs/data/episode128_task_model_radar.json"
|
| 76 |
+
]
|
| 77 |
},
|
| 78 |
{
|
| 79 |
"reader_goal": "Read Qwen3-Omni v1-v6 correctly",
|
| 80 |
"start_here": "QWEN3_OMNI_RUN_LINEAGE.md",
|
| 81 |
+
"then_inspect": [
|
| 82 |
+
"docs/data/qwen3_omni_run_lineage.json",
|
| 83 |
+
"docs/data/qwen3_v5_v6_comparison.json"
|
| 84 |
+
]
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"reader_goal": "Find all derived artifacts",
|
| 88 |
"start_here": "ARTIFACT_GUIDE.md",
|
| 89 |
+
"then_inspect": [
|
| 90 |
+
"Hugging Face artifact dataset",
|
| 91 |
+
"docs/data/artifact_index.json"
|
| 92 |
+
]
|
| 93 |
},
|
| 94 |
{
|
| 95 |
"reader_goal": "Download model weights with their matching results",
|
| 96 |
"start_here": "Hugging Face weights/results repo",
|
| 97 |
+
"then_inspect": [
|
| 98 |
+
"manifest.json",
|
| 99 |
+
"analysis/docs/data/task_method_20_result_matrix.json",
|
| 100 |
+
"results/"
|
| 101 |
+
]
|
| 102 |
},
|
| 103 |
{
|
| 104 |
"reader_goal": "Reproduce or extend the work",
|
| 105 |
"start_here": "REPRODUCIBILITY.md",
|
| 106 |
+
"then_inspect": [
|
| 107 |
+
"QUALITY_GATES.md",
|
| 108 |
+
"scripts/",
|
| 109 |
+
"results/"
|
| 110 |
+
]
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"reader_goal": "Understand foundation-model directions",
|
| 114 |
"start_here": "THREE_FOUNDATION_PIPELINES.md",
|
| 115 |
+
"then_inspect": [
|
| 116 |
+
"FOUNDATION_MODEL_PLAN.md",
|
| 117 |
+
"docs/data/three_foundation_pipelines.json"
|
| 118 |
+
]
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"reader_goal": "Check public-release health",
|
| 122 |
"start_here": "PUBLIC_SURFACE_QA.md",
|
| 123 |
+
"then_inspect": [
|
| 124 |
+
"docs/data/live_publication_status.json",
|
| 125 |
+
"docs/data/mirror_parity.json"
|
| 126 |
+
]
|
| 127 |
}
|
| 128 |
],
|
| 129 |
"public_surfaces": [
|
|
|
|
| 172 |
"Foundation directions",
|
| 173 |
"Public-release checks"
|
| 174 |
],
|
| 175 |
+
"reading_scopes": [
|
| 176 |
+
{
|
| 177 |
+
"public_evidence": [
|
| 178 |
+
"results/episode_task_suite/",
|
| 179 |
+
"docs/data/task_suite_20.json"
|
| 180 |
+
],
|
| 181 |
+
"topic": "Single public-sample task behavior",
|
| 182 |
+
"scope_note": "Describes one public sample episode, not the full dataset distribution."
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"public_evidence": [
|
| 186 |
+
"XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
|
| 187 |
+
"docs/data/xperience10m_128_episode_feature_index.json",
|
| 188 |
+
"results/omni_finetune/*128*",
|
| 189 |
+
"docs/data/omni_model_comparison.json"
|
| 190 |
+
],
|
| 191 |
+
"topic": "128-episode method comparison",
|
| 192 |
+
"scope_note": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"public_evidence": [
|
| 196 |
+
"QWEN3_OMNI_RUN_LINEAGE.md",
|
| 197 |
+
"docs/data/qwen3_omni_run_lineage.json"
|
| 198 |
+
],
|
| 199 |
+
"topic": "Qwen3-Omni v1-v6 lineage",
|
| 200 |
+
"scope_note": "v1-v4 are pipeline and ablation evidence, v5 is the pinned prior release, and v6 is the current public 20-task Qwen row."
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"public_evidence": [
|
| 204 |
+
"Verified Qwen3-Omni and Cosmos3 result packages",
|
| 205 |
+
"model cards"
|
| 206 |
+
],
|
| 207 |
+
"topic": "Foundation-model track quality",
|
| 208 |
+
"scope_note": "Numeric task scores appear only when a task-specific eval or probe exists."
|
| 209 |
+
},
|
| 210 |
+
{
|
| 211 |
+
"public_evidence": [
|
| 212 |
+
"REPRODUCIBILITY.md",
|
| 213 |
+
"QUALITY_GATES.md",
|
| 214 |
+
"release validators"
|
| 215 |
+
],
|
| 216 |
+
"topic": "Reproducibility",
|
| 217 |
+
"scope_note": "Raw gated Xperience-10M files and full foundation weights are not redistributed."
|
| 218 |
}
|
| 219 |
]
|
| 220 |
}
|
docs/data/public_surface_qa.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
@@ -18,7 +18,7 @@
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
-
"generated_at_utc": "2026-06-
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
@@ -28,27 +28,27 @@
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
-
"generated_at_utc": "2026-06-
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
| 35 |
"status": "pass",
|
| 36 |
-
"generated_at_utc": "2026-06-
|
| 37 |
},
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
-
"generated_at_utc": "2026-06-
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
| 45 |
"status": "pass",
|
| 46 |
-
"generated_at_utc": "2026-06-
|
| 47 |
},
|
| 48 |
"mirror_parity": {
|
| 49 |
"exists": true,
|
| 50 |
"status": "pass",
|
| 51 |
-
"generated_at_utc": "2026-06-
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"failures": {}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T11:18:45+00:00",
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
+
"generated_at_utc": "2026-06-22T11:17:07+00:00"
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
+
"generated_at_utc": "2026-06-22T11:17:07+00:00"
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
| 35 |
"status": "pass",
|
| 36 |
+
"generated_at_utc": "2026-06-22T11:17:08+00:00"
|
| 37 |
},
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
+
"generated_at_utc": "2026-06-22T11:17:10+00:00"
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
| 45 |
"status": "pass",
|
| 46 |
+
"generated_at_utc": "2026-06-22T11:18:16+00:00"
|
| 47 |
},
|
| 48 |
"mirror_parity": {
|
| 49 |
"exists": true,
|
| 50 |
"status": "pass",
|
| 51 |
+
"generated_at_utc": "2026-06-22T11:18:11+00:00"
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"failures": {}
|
docs/data/publication_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
@@ -246,8 +246,8 @@
|
|
| 246 |
"hf_space_bundle": {
|
| 247 |
"root": "hf_publish/space",
|
| 248 |
"exists": true,
|
| 249 |
-
"file_count":
|
| 250 |
-
"text_file_count":
|
| 251 |
"largest_file": {
|
| 252 |
"path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
|
| 253 |
"bytes": 10221085
|
|
@@ -257,8 +257,8 @@
|
|
| 257 |
"hf_artifact_bundle": {
|
| 258 |
"root": "hf_publish/artifacts",
|
| 259 |
"exists": true,
|
| 260 |
-
"file_count":
|
| 261 |
-
"text_file_count":
|
| 262 |
"largest_file": {
|
| 263 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 264 |
"bytes": 135591061
|
|
@@ -268,8 +268,8 @@
|
|
| 268 |
"hf_model_bundle": {
|
| 269 |
"root": "hf_publish/model",
|
| 270 |
"exists": true,
|
| 271 |
-
"file_count":
|
| 272 |
-
"text_file_count":
|
| 273 |
"largest_file": {
|
| 274 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 275 |
"bytes": 135591061
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-22T11:18:16+00:00",
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
|
|
| 246 |
"hf_space_bundle": {
|
| 247 |
"root": "hf_publish/space",
|
| 248 |
"exists": true,
|
| 249 |
+
"file_count": 640,
|
| 250 |
+
"text_file_count": 479,
|
| 251 |
"largest_file": {
|
| 252 |
"path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
|
| 253 |
"bytes": 10221085
|
|
|
|
| 257 |
"hf_artifact_bundle": {
|
| 258 |
"root": "hf_publish/artifacts",
|
| 259 |
"exists": true,
|
| 260 |
+
"file_count": 4708,
|
| 261 |
+
"text_file_count": 1334,
|
| 262 |
"largest_file": {
|
| 263 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 264 |
"bytes": 135591061
|
|
|
|
| 268 |
"hf_model_bundle": {
|
| 269 |
"root": "hf_publish/model",
|
| 270 |
"exists": true,
|
| 271 |
+
"file_count": 5470,
|
| 272 |
+
"text_file_count": 1508,
|
| 273 |
"largest_file": {
|
| 274 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 275 |
"bytes": 135591061
|
metrics/evidence_contract.json
CHANGED
|
@@ -1,170 +1,169 @@
|
|
| 1 |
{
|
| 2 |
"project": "Ropedia Xperience-10M Task Suite",
|
| 3 |
"scope": "single public Xperience-10M sample episode",
|
| 4 |
-
"
|
| 5 |
{
|
| 6 |
"id": "project_status",
|
| 7 |
-
"claim": "A first-pass reader has a compact current-state summary.",
|
| 8 |
"status": "verified",
|
| 9 |
"evidence": [
|
| 10 |
"PROJECT_STATUS.md",
|
| 11 |
"docs/data/project_status.json"
|
| 12 |
],
|
| 13 |
-
"
|
|
|
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"id": "research_roadmap",
|
| 17 |
-
"claim": "The research roadmap is explicit.",
|
| 18 |
"status": "current",
|
| 19 |
"evidence": [
|
| 20 |
"RESEARCH_ROADMAP.md",
|
| 21 |
"docs/data/research_roadmap.json"
|
| 22 |
],
|
| 23 |
-
"
|
|
|
|
| 24 |
},
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
"claim": "The public dataset description is aligned with the official gated Xperience-10M dataset card and public sample card.",
|
| 28 |
"status": "verified",
|
| 29 |
"evidence": [
|
| 30 |
"XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
|
| 31 |
"docs/data/xperience10m_dataset_card_alignment.json",
|
| 32 |
"https://huggingface.co/datasets/ropedia-ai/xperience-10m"
|
| 33 |
],
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
"
|
| 50 |
"status": "verified",
|
| 51 |
"evidence": [
|
| 52 |
"results/episode_task_suite/windows.csv",
|
| 53 |
"results/episode_task_suite/shared_windows.npz",
|
| 54 |
"results/episode_task_suite/summary_report.json"
|
| 55 |
],
|
| 56 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
},
|
| 58 |
-
{
|
| 59 |
-
"id": "feature_contract",
|
| 60 |
-
"claim": "The current feature contract is explicit and inspectable.",
|
| 61 |
-
"status": "verified",
|
| 62 |
-
"evidence": [
|
| 63 |
-
"results/episode_task_suite/feature_manifest.json",
|
| 64 |
-
"results/episode_task_suite/available_modalities.json"
|
| 65 |
-
],
|
| 66 |
-
"boundary": "8,546-dimensional aligned multimodal window representation"
|
| 67 |
-
},
|
| 68 |
-
{
|
| 69 |
-
"id": "evaluation_protocol",
|
| 70 |
-
"claim": "The task evaluation protocol is explicit and generated from committed metrics.",
|
| 71 |
-
"status": "verified",
|
| 72 |
-
"evidence": [
|
| 73 |
-
"EVALUATION_PROTOCOL.md",
|
| 74 |
-
"docs/data/evaluation_protocol.json",
|
| 75 |
-
"scripts/build_evaluation_protocol.py"
|
| 76 |
-
],
|
| 77 |
-
"boundary": "defines windows, split, per-task metrics, leakage controls, and current limitations"
|
| 78 |
-
},
|
| 79 |
{
|
| 80 |
"id": "modality_atlas",
|
| 81 |
-
"claim": "The public sample modalities are inspectable without raw data redistribution.",
|
| 82 |
"status": "verified",
|
| 83 |
"evidence": [
|
| 84 |
"docs/data/modality_atlas.json",
|
| 85 |
"docs/assets/modalities/",
|
| 86 |
"docs/index.html"
|
| 87 |
],
|
| 88 |
-
"
|
|
|
|
| 89 |
},
|
| 90 |
{
|
| 91 |
"id": "task_surface_integrity",
|
| 92 |
-
"claim": "Public task cards stay readable for non-expert readers.",
|
| 93 |
"status": "verified",
|
| 94 |
"evidence": [
|
| 95 |
"docs/data/task_surface_integrity.json",
|
| 96 |
"scripts/validate_task_surface.py",
|
| 97 |
"docs/index.html"
|
| 98 |
],
|
| 99 |
-
"
|
|
|
|
| 100 |
},
|
| 101 |
{
|
| 102 |
"id": "figure_index",
|
| 103 |
-
"claim": "Public figures, charts, and modality thumbnails are indexed as project evidence.",
|
| 104 |
"status": "verified",
|
| 105 |
"evidence": [
|
| 106 |
"FIGURE_INDEX.md",
|
| 107 |
"docs/data/figure_index.json",
|
| 108 |
"scripts/build_figure_index.py"
|
| 109 |
],
|
| 110 |
-
"
|
|
|
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"id": "brand_assets",
|
| 114 |
-
"claim": "A project logo is consistently applied across public surfaces.",
|
| 115 |
"status": "verified",
|
| 116 |
"evidence": [
|
| 117 |
"docs/assets/brand/",
|
| 118 |
"docs/data/brand_assets.json",
|
| 119 |
"scripts/build_brand_assets.py"
|
| 120 |
],
|
| 121 |
-
"
|
|
|
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"id": "twelve_tasks",
|
| 125 |
-
"claim": "The 12 task heads are implemented as scripts with saved metrics and predictions.",
|
| 126 |
"status": "verified",
|
| 127 |
"evidence": [
|
| 128 |
"scripts/episode_task_suite.py",
|
| 129 |
"results/episode_task_suite/*/metrics.json",
|
| 130 |
"results/episode_task_suite/*/predictions.*"
|
| 131 |
],
|
| 132 |
-
"
|
|
|
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"id": "minimal_vs_neural",
|
| 136 |
-
"claim": "Minimal and neural heads use the same task contracts.",
|
| 137 |
"status": "verified",
|
| 138 |
"evidence": [
|
| 139 |
"scripts/neural_task_models.py",
|
| 140 |
"results/episode_task_suite/neural_mlp/",
|
| 141 |
"docs/assets/task_architectures.png"
|
| 142 |
],
|
| 143 |
-
"
|
|
|
|
| 144 |
},
|
| 145 |
{
|
| 146 |
"id": "research_directions",
|
| 147 |
-
"claim": "Four Ropedia research directions are mapped honestly as direct, proxy, or diagnostic evidence.",
|
| 148 |
"status": "verified",
|
| 149 |
"evidence": [
|
| 150 |
"results/episode_task_suite/research_directions/research_direction_taxonomy.json",
|
| 151 |
"docs/data/research_directions.json"
|
| 152 |
],
|
| 153 |
-
"
|
|
|
|
| 154 |
},
|
| 155 |
{
|
| 156 |
"id": "direction_extensions",
|
| 157 |
-
"claim": "Four extra direction probes are coded and evaluated.",
|
| 158 |
"status": "verified",
|
| 159 |
"evidence": [
|
| 160 |
"results/episode_task_suite/research_direction_extensions/research_direction_extension_results.json",
|
| 161 |
"docs/data/research_direction_extensions.json"
|
| 162 |
],
|
| 163 |
-
"
|
|
|
|
| 164 |
},
|
| 165 |
{
|
| 166 |
"id": "qwen3_omni_diagnostic_pilot",
|
| 167 |
-
"claim": "Qwen3-Omni has a verified selected-episode held-out diagnostic pilot.",
|
| 168 |
"status": "verified_diagnostic",
|
| 169 |
"evidence": [
|
| 170 |
"docs/data/omni_finetune_verified_result.json",
|
|
@@ -172,94 +171,94 @@
|
|
| 172 |
"scripts/omni/package_verified_omni_result.py",
|
| 173 |
"scripts/omni/audit_verified_omni_package.py"
|
| 174 |
],
|
| 175 |
-
"
|
|
|
|
| 176 |
},
|
| 177 |
{
|
| 178 |
"id": "multi_episode_quality_improvement",
|
| 179 |
-
"claim": "The next Qwen3-Omni step is structured-output and task-quality improvement on the same selected split.",
|
| 180 |
"status": "active_next_step",
|
| 181 |
"evidence": [
|
| 182 |
"scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh",
|
| 183 |
"docs/data/omni_finetune_verified_result.json",
|
| 184 |
"FOUNDATION_MODEL_PLAN.md"
|
| 185 |
],
|
| 186 |
-
"
|
|
|
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"id": "scale_up_status_check",
|
| 190 |
-
"claim": "Older pilot path strings are tracked as setup-file provenance.",
|
| 191 |
"status": "verified",
|
| 192 |
"evidence": [
|
| 193 |
"scripts/validate_scope_claims.py",
|
| 194 |
"docs/data/scope_claims_audit.json"
|
| 195 |
],
|
| 196 |
-
"
|
|
|
|
| 197 |
},
|
| 198 |
{
|
| 199 |
"id": "mirror_parity",
|
| 200 |
-
"claim": "Prepared GitHub and Hugging Face mirrors carry matching critical data, visual, HTML, and validator files.",
|
| 201 |
"status": "verified",
|
| 202 |
"evidence": [
|
| 203 |
"scripts/validate_mirror_parity.py",
|
| 204 |
"docs/data/mirror_parity.json"
|
| 205 |
],
|
| 206 |
-
"
|
|
|
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"id": "publication_package",
|
| 210 |
-
"claim": "The public GitHub and Hugging Face bundles contain the intended release files.",
|
| 211 |
"status": "verified",
|
| 212 |
"evidence": [
|
| 213 |
"scripts/validate_publication_package.py",
|
| 214 |
"docs/data/publication_audit.json"
|
| 215 |
],
|
| 216 |
-
"
|
|
|
|
| 217 |
},
|
| 218 |
{
|
| 219 |
"id": "website_integrity",
|
| 220 |
-
"claim": "The public website has checked local references.",
|
| 221 |
"status": "verified",
|
| 222 |
"evidence": [
|
| 223 |
"scripts/validate_website_integrity.py",
|
| 224 |
"docs/data/website_integrity.json"
|
| 225 |
],
|
| 226 |
-
"
|
|
|
|
| 227 |
},
|
| 228 |
{
|
| 229 |
"id": "rendered_site_check",
|
| 230 |
-
"claim": "The rendered website walkthrough has a browser-level interaction check.",
|
| 231 |
"status": "verified",
|
| 232 |
"evidence": [
|
| 233 |
"RENDERED_SITE_CHECK.md",
|
| 234 |
"scripts/build_rendered_site_check.py",
|
| 235 |
"docs/data/rendered_site_check.json"
|
| 236 |
],
|
| 237 |
-
"
|
|
|
|
| 238 |
},
|
| 239 |
{
|
| 240 |
"id": "quality_gates",
|
| 241 |
-
"claim": "The release gate is explicit.",
|
| 242 |
"status": "verified",
|
| 243 |
"evidence": [
|
| 244 |
"QUALITY_GATES.md",
|
| 245 |
"scripts/build_quality_gates.py",
|
| 246 |
"docs/data/quality_gates.json"
|
| 247 |
],
|
| 248 |
-
"
|
|
|
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"id": "live_publication_status",
|
| 252 |
-
"claim": "The live public mirrors are checked after upload.",
|
| 253 |
"status": "verified",
|
| 254 |
"evidence": [
|
| 255 |
"scripts/verify_live_publication.py",
|
| 256 |
"docs/data/live_publication_status.json"
|
| 257 |
],
|
| 258 |
-
"
|
|
|
|
| 259 |
},
|
| 260 |
{
|
| 261 |
"id": "citation_metadata",
|
| 262 |
-
"claim": "The project is externally citable and machine-readable.",
|
| 263 |
"status": "verified",
|
| 264 |
"evidence": [
|
| 265 |
"CITATION.cff",
|
|
@@ -267,11 +266,11 @@
|
|
| 267 |
"docs/data/project_manifest.json",
|
| 268 |
"LICENSE"
|
| 269 |
],
|
| 270 |
-
"
|
|
|
|
| 271 |
},
|
| 272 |
{
|
| 273 |
"id": "project_path",
|
| 274 |
-
"claim": "A first-time reader has an explicit project path.",
|
| 275 |
"status": "verified",
|
| 276 |
"evidence": [
|
| 277 |
"docs/data/project_packet.json",
|
|
@@ -280,29 +279,30 @@
|
|
| 280 |
"README.md",
|
| 281 |
"docs/index.html"
|
| 282 |
],
|
| 283 |
-
"
|
|
|
|
| 284 |
},
|
| 285 |
{
|
| 286 |
"id": "artifact_index",
|
| 287 |
-
"claim": "The core project artifacts are grouped for human reading and indexed with existence, size, and hash metadata where stable.",
|
| 288 |
"status": "verified",
|
| 289 |
"evidence": [
|
| 290 |
"ARTIFACT_GUIDE.md",
|
| 291 |
"scripts/build_artifact_index.py",
|
| 292 |
"docs/data/artifact_index.json"
|
| 293 |
],
|
| 294 |
-
"
|
|
|
|
| 295 |
},
|
| 296 |
{
|
| 297 |
"id": "reproducibility_contract",
|
| 298 |
-
"claim": "The public reproduction path is documented with commands, expected outputs, and exact-match reproduction evidence.",
|
| 299 |
"status": "verified",
|
| 300 |
"evidence": [
|
| 301 |
"REPRODUCIBILITY.md",
|
| 302 |
"docs/data/reproducibility_matrix.json",
|
| 303 |
"notes/reproducibility_audit.md"
|
| 304 |
],
|
| 305 |
-
"
|
|
|
|
| 306 |
}
|
| 307 |
]
|
| 308 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"project": "Ropedia Xperience-10M Task Suite",
|
| 3 |
"scope": "single public Xperience-10M sample episode",
|
| 4 |
+
"readouts": [
|
| 5 |
{
|
| 6 |
"id": "project_status",
|
|
|
|
| 7 |
"status": "verified",
|
| 8 |
"evidence": [
|
| 9 |
"PROJECT_STATUS.md",
|
| 10 |
"docs/data/project_status.json"
|
| 11 |
],
|
| 12 |
+
"readout": "A first-pass reader has a compact current-state summary.",
|
| 13 |
+
"scope_note": "summarizes existing evidence and current limitations"
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"id": "research_roadmap",
|
|
|
|
| 17 |
"status": "current",
|
| 18 |
"evidence": [
|
| 19 |
"RESEARCH_ROADMAP.md",
|
| 20 |
"docs/data/research_roadmap.json"
|
| 21 |
],
|
| 22 |
+
"readout": "The research roadmap is explicit.",
|
| 23 |
+
"scope_note": "connects public-sample task development to multi-episode data preparation, Qwen3-Omni LoRA, robustness runs, and larger omni-model extensions"
|
| 24 |
},
|
| 25 |
+
{
|
| 26 |
+
"id": "official_dataset_card_alignment",
|
|
|
|
| 27 |
"status": "verified",
|
| 28 |
"evidence": [
|
| 29 |
"XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
|
| 30 |
"docs/data/xperience10m_dataset_card_alignment.json",
|
| 31 |
"https://huggingface.co/datasets/ropedia-ai/xperience-10m"
|
| 32 |
],
|
| 33 |
+
"readout": "The public dataset description is aligned with the official gated Xperience-10M dataset card and public sample card.",
|
| 34 |
+
"scope_note": "summarizes upstream public metadata, API listing facts, sample license/tooling, and dataset-card facts; does not grant access or mirror raw data"
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"id": "source_alignment",
|
| 38 |
+
"status": "verified",
|
| 39 |
+
"evidence": [
|
| 40 |
+
"SOURCE_ALIGNMENT_AUDIT.md",
|
| 41 |
+
"docs/data/source_alignment_audit.json",
|
| 42 |
+
"scripts/validate_source_alignment.py"
|
| 43 |
+
],
|
| 44 |
+
"readout": "Source facts, sample details, API-listing notes, and project coverage are validated across repo, website, and HF cards.",
|
| 45 |
+
"scope_note": "offline committed-fact check; does not fetch private gated data"
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"id": "aligned_windows",
|
| 49 |
"status": "verified",
|
| 50 |
"evidence": [
|
| 51 |
"results/episode_task_suite/windows.csv",
|
| 52 |
"results/episode_task_suite/shared_windows.npz",
|
| 53 |
"results/episode_task_suite/summary_report.json"
|
| 54 |
],
|
| 55 |
+
"readout": "The public Xperience-10M sample has been converted into aligned model windows.",
|
| 56 |
+
"scope_note": "5,821 frames, 1,161 windows, one public sample episode"
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"id": "feature_contract",
|
| 60 |
+
"status": "verified",
|
| 61 |
+
"evidence": [
|
| 62 |
+
"results/episode_task_suite/feature_manifest.json",
|
| 63 |
+
"results/episode_task_suite/available_modalities.json"
|
| 64 |
+
],
|
| 65 |
+
"readout": "The current feature contract is explicit and inspectable.",
|
| 66 |
+
"scope_note": "8,546-dimensional aligned multimodal window representation"
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"id": "evaluation_protocol",
|
| 70 |
+
"status": "verified",
|
| 71 |
+
"evidence": [
|
| 72 |
+
"EVALUATION_PROTOCOL.md",
|
| 73 |
+
"docs/data/evaluation_protocol.json",
|
| 74 |
+
"scripts/build_evaluation_protocol.py"
|
| 75 |
+
],
|
| 76 |
+
"readout": "The task evaluation protocol is explicit and generated from committed metrics.",
|
| 77 |
+
"scope_note": "defines windows, split, per-task metrics, leakage controls, and current limitations"
|
| 78 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
{
|
| 80 |
"id": "modality_atlas",
|
|
|
|
| 81 |
"status": "verified",
|
| 82 |
"evidence": [
|
| 83 |
"docs/data/modality_atlas.json",
|
| 84 |
"docs/assets/modalities/",
|
| 85 |
"docs/index.html"
|
| 86 |
],
|
| 87 |
+
"readout": "The public sample modalities are inspectable without raw data redistribution.",
|
| 88 |
+
"scope_note": "derived thumbnails for presentation; raw data remains excluded"
|
| 89 |
},
|
| 90 |
{
|
| 91 |
"id": "task_surface_integrity",
|
|
|
|
| 92 |
"status": "verified",
|
| 93 |
"evidence": [
|
| 94 |
"docs/data/task_surface_integrity.json",
|
| 95 |
"scripts/validate_task_surface.py",
|
| 96 |
"docs/index.html"
|
| 97 |
],
|
| 98 |
+
"readout": "Public task cards stay readable for non-expert readers.",
|
| 99 |
+
"scope_note": "presentation integrity for the public task surface"
|
| 100 |
},
|
| 101 |
{
|
| 102 |
"id": "figure_index",
|
|
|
|
| 103 |
"status": "verified",
|
| 104 |
"evidence": [
|
| 105 |
"FIGURE_INDEX.md",
|
| 106 |
"docs/data/figure_index.json",
|
| 107 |
"scripts/build_figure_index.py"
|
| 108 |
],
|
| 109 |
+
"readout": "Public figures, charts, and modality thumbnails are indexed as project evidence.",
|
| 110 |
+
"scope_note": "records derived visual assets, dimensions, hashes, roles, and source scripts; raw Xperience-10M data remains excluded"
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"id": "brand_assets",
|
|
|
|
| 114 |
"status": "verified",
|
| 115 |
"evidence": [
|
| 116 |
"docs/assets/brand/",
|
| 117 |
"docs/data/brand_assets.json",
|
| 118 |
"scripts/build_brand_assets.py"
|
| 119 |
],
|
| 120 |
+
"readout": "A project logo is consistently applied across public surfaces.",
|
| 121 |
+
"scope_note": "generated logo and deterministic derivatives only; no raw dataset data or model weights"
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"id": "twelve_tasks",
|
|
|
|
| 125 |
"status": "verified",
|
| 126 |
"evidence": [
|
| 127 |
"scripts/episode_task_suite.py",
|
| 128 |
"results/episode_task_suite/*/metrics.json",
|
| 129 |
"results/episode_task_suite/*/predictions.*"
|
| 130 |
],
|
| 131 |
+
"readout": "The 12 task heads are implemented as scripts with saved metrics and predictions.",
|
| 132 |
+
"scope_note": "chronological single-episode split, not cross-episode generalization"
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"id": "minimal_vs_neural",
|
|
|
|
| 136 |
"status": "verified",
|
| 137 |
"evidence": [
|
| 138 |
"scripts/neural_task_models.py",
|
| 139 |
"results/episode_task_suite/neural_mlp/",
|
| 140 |
"docs/assets/task_architectures.png"
|
| 141 |
],
|
| 142 |
+
"readout": "Minimal and neural heads use the same task contracts.",
|
| 143 |
+
"scope_note": "small heads only; not a foundation model"
|
| 144 |
},
|
| 145 |
{
|
| 146 |
"id": "research_directions",
|
|
|
|
| 147 |
"status": "verified",
|
| 148 |
"evidence": [
|
| 149 |
"results/episode_task_suite/research_directions/research_direction_taxonomy.json",
|
| 150 |
"docs/data/research_directions.json"
|
| 151 |
],
|
| 152 |
+
"readout": "Four Ropedia research directions are mapped honestly as direct, proxy, or diagnostic evidence.",
|
| 153 |
+
"scope_note": "some directions remain proxy-only"
|
| 154 |
},
|
| 155 |
{
|
| 156 |
"id": "direction_extensions",
|
|
|
|
| 157 |
"status": "verified",
|
| 158 |
"evidence": [
|
| 159 |
"results/episode_task_suite/research_direction_extensions/research_direction_extension_results.json",
|
| 160 |
"docs/data/research_direction_extensions.json"
|
| 161 |
],
|
| 162 |
+
"readout": "Four extra direction probes are coded and evaluated.",
|
| 163 |
+
"scope_note": "single-episode probes, not full research-direction solutions"
|
| 164 |
},
|
| 165 |
{
|
| 166 |
"id": "qwen3_omni_diagnostic_pilot",
|
|
|
|
| 167 |
"status": "verified_diagnostic",
|
| 168 |
"evidence": [
|
| 169 |
"docs/data/omni_finetune_verified_result.json",
|
|
|
|
| 171 |
"scripts/omni/package_verified_omni_result.py",
|
| 172 |
"scripts/omni/audit_verified_omni_package.py"
|
| 173 |
],
|
| 174 |
+
"readout": "Qwen3-Omni has a verified selected-episode held-out diagnostic pilot.",
|
| 175 |
+
"scope_note": "the pipeline is verified, but model quality is weak: JSON validity is below target and action/subtask metrics are low"
|
| 176 |
},
|
| 177 |
{
|
| 178 |
"id": "multi_episode_quality_improvement",
|
|
|
|
| 179 |
"status": "active_next_step",
|
| 180 |
"evidence": [
|
| 181 |
"scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh",
|
| 182 |
"docs/data/omni_finetune_verified_result.json",
|
| 183 |
"FOUNDATION_MODEL_PLAN.md"
|
| 184 |
],
|
| 185 |
+
"readout": "The next Qwen3-Omni step is structured-output and task-quality improvement on the same selected split.",
|
| 186 |
+
"scope_note": "stronger model quality requires output-format improvements and action/subtask error analysis"
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"id": "scale_up_status_check",
|
|
|
|
| 190 |
"status": "verified",
|
| 191 |
"evidence": [
|
| 192 |
"scripts/validate_scope_claims.py",
|
| 193 |
"docs/data/scope_claims_audit.json"
|
| 194 |
],
|
| 195 |
+
"readout": "Older pilot path strings are tracked as setup-file provenance.",
|
| 196 |
+
"scope_note": "run/path identifiers stay separate from completed held-out-episode results"
|
| 197 |
},
|
| 198 |
{
|
| 199 |
"id": "mirror_parity",
|
|
|
|
| 200 |
"status": "verified",
|
| 201 |
"evidence": [
|
| 202 |
"scripts/validate_mirror_parity.py",
|
| 203 |
"docs/data/mirror_parity.json"
|
| 204 |
],
|
| 205 |
+
"readout": "Prepared GitHub and Hugging Face mirrors carry matching critical data, visual, HTML, and validator files.",
|
| 206 |
+
"scope_note": "compares prepared local mirror bundles before upload; live URLs are checked after publishing"
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"id": "publication_package",
|
|
|
|
| 210 |
"status": "verified",
|
| 211 |
"evidence": [
|
| 212 |
"scripts/validate_publication_package.py",
|
| 213 |
"docs/data/publication_audit.json"
|
| 214 |
],
|
| 215 |
+
"readout": "The public GitHub and Hugging Face bundles contain the intended release files.",
|
| 216 |
+
"scope_note": "checks public files, HF bundles, and current public-card assets; temporary local outputs are excluded"
|
| 217 |
},
|
| 218 |
{
|
| 219 |
"id": "website_integrity",
|
|
|
|
| 220 |
"status": "verified",
|
| 221 |
"evidence": [
|
| 222 |
"scripts/validate_website_integrity.py",
|
| 223 |
"docs/data/website_integrity.json"
|
| 224 |
],
|
| 225 |
+
"readout": "The public website has checked local references.",
|
| 226 |
+
"scope_note": "checks local links, anchors, JSON data, and referenced images; external URLs are not fetched"
|
| 227 |
},
|
| 228 |
{
|
| 229 |
"id": "rendered_site_check",
|
|
|
|
| 230 |
"status": "verified",
|
| 231 |
"evidence": [
|
| 232 |
"RENDERED_SITE_CHECK.md",
|
| 233 |
"scripts/build_rendered_site_check.py",
|
| 234 |
"docs/data/rendered_site_check.json"
|
| 235 |
],
|
| 236 |
+
"readout": "The rendered website walkthrough has a browser-level interaction check.",
|
| 237 |
+
"scope_note": "checks local page load, tab switch, walkthrough deep link, player controls, and console health"
|
| 238 |
},
|
| 239 |
{
|
| 240 |
"id": "quality_gates",
|
|
|
|
| 241 |
"status": "verified",
|
| 242 |
"evidence": [
|
| 243 |
"QUALITY_GATES.md",
|
| 244 |
"scripts/build_quality_gates.py",
|
| 245 |
"docs/data/quality_gates.json"
|
| 246 |
],
|
| 247 |
+
"readout": "The release gate is explicit.",
|
| 248 |
+
"scope_note": "summarizes packaging and live-mirror checks; cross-episode model quality is measured by later held-out reports"
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"id": "live_publication_status",
|
|
|
|
| 252 |
"status": "verified",
|
| 253 |
"evidence": [
|
| 254 |
"scripts/verify_live_publication.py",
|
| 255 |
"docs/data/live_publication_status.json"
|
| 256 |
],
|
| 257 |
+
"readout": "The live public mirrors are checked after upload.",
|
| 258 |
+
"scope_note": "fetches public GitHub/HF URLs; it does not validate private training state"
|
| 259 |
},
|
| 260 |
{
|
| 261 |
"id": "citation_metadata",
|
|
|
|
| 262 |
"status": "verified",
|
| 263 |
"evidence": [
|
| 264 |
"CITATION.cff",
|
|
|
|
| 266 |
"docs/data/project_manifest.json",
|
| 267 |
"LICENSE"
|
| 268 |
],
|
| 269 |
+
"readout": "The project is externally citable and machine-readable.",
|
| 270 |
+
"scope_note": "code license does not override original Xperience-10M dataset terms"
|
| 271 |
},
|
| 272 |
{
|
| 273 |
"id": "project_path",
|
|
|
|
| 274 |
"status": "verified",
|
| 275 |
"evidence": [
|
| 276 |
"docs/data/project_packet.json",
|
|
|
|
| 279 |
"README.md",
|
| 280 |
"docs/index.html"
|
| 281 |
],
|
| 282 |
+
"readout": "A first-time reader has an explicit project path.",
|
| 283 |
+
"scope_note": "guides inspection across data, tasks, results, and scale-up status"
|
| 284 |
},
|
| 285 |
{
|
| 286 |
"id": "artifact_index",
|
|
|
|
| 287 |
"status": "verified",
|
| 288 |
"evidence": [
|
| 289 |
"ARTIFACT_GUIDE.md",
|
| 290 |
"scripts/build_artifact_index.py",
|
| 291 |
"docs/data/artifact_index.json"
|
| 292 |
],
|
| 293 |
+
"readout": "The core project artifacts are grouped for human reading and indexed with existence, size, and hash metadata where stable.",
|
| 294 |
+
"scope_note": "selective source-of-truth catalog, not a complete inventory of every output file"
|
| 295 |
},
|
| 296 |
{
|
| 297 |
"id": "reproducibility_contract",
|
|
|
|
| 298 |
"status": "verified",
|
| 299 |
"evidence": [
|
| 300 |
"REPRODUCIBILITY.md",
|
| 301 |
"docs/data/reproducibility_matrix.json",
|
| 302 |
"notes/reproducibility_audit.md"
|
| 303 |
],
|
| 304 |
+
"readout": "The public reproduction path is documented with commands, expected outputs, and exact-match reproduction evidence.",
|
| 305 |
+
"scope_note": "publicly reproduces the single-episode pipeline; multi-episode Qwen3-Omni metrics are added only after data preparation and held-out evaluation"
|
| 306 |
}
|
| 307 |
]
|
| 308 |
}
|
metrics/mirror_parity.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
metrics/omni_model_comparison.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"status": "pass",
|
| 5 |
"version_count": 3,
|
| 6 |
"model_group_count": 5,
|
|
@@ -1758,6 +1758,6 @@
|
|
| 1758 |
],
|
| 1759 |
"pending": [
|
| 1760 |
"Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
|
| 1761 |
-
"Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before
|
| 1762 |
]
|
| 1763 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
|
| 3 |
+
"generated_at_utc": "2026-06-22T10:59:59+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
"version_count": 3,
|
| 6 |
"model_group_count": 5,
|
|
|
|
| 1758 |
],
|
| 1759 |
"pending": [
|
| 1760 |
"Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
|
| 1761 |
+
"Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before presenting v6 as globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly."
|
| 1762 |
]
|
| 1763 |
}
|
metrics/project_brief.json
CHANGED
|
@@ -56,7 +56,7 @@
|
|
| 56 |
"Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
|
| 57 |
],
|
| 58 |
"scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
|
| 59 |
-
"next_stage": "Improve action/subtask quality through error analysis before larger robustness or alternative-backbone
|
| 60 |
"entry_points": {
|
| 61 |
"visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
|
| 62 |
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
|
|
|
|
| 56 |
"Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
|
| 57 |
],
|
| 58 |
"scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
|
| 59 |
+
"next_stage": "Improve action/subtask quality through error analysis before presenting larger robustness or alternative-backbone results.",
|
| 60 |
"entry_points": {
|
| 61 |
"visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
|
| 62 |
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
|
metrics/public_reader_map.json
CHANGED
|
@@ -6,77 +6,124 @@
|
|
| 6 |
{
|
| 7 |
"reader_goal": "Understand the project in one pass",
|
| 8 |
"start_here": "PROJECT_BRIEF.md",
|
| 9 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 10 |
},
|
| 11 |
{
|
| 12 |
"reader_goal": "Understand the two evidence lines",
|
| 13 |
"start_here": "TWO_EVIDENCE_LINES.md",
|
| 14 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"reader_goal": "See the visual public dashboard",
|
| 18 |
"start_here": "GitHub Pages dashboard or Hugging Face Space",
|
| 19 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"reader_goal": "Decode project terminology",
|
| 23 |
"start_here": "GLOSSARY.md",
|
| 24 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"reader_goal": "Understand the data unit",
|
| 28 |
"start_here": "results/episode_task_suite/windows.csv",
|
| 29 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 30 |
},
|
| 31 |
{
|
| 32 |
"reader_goal": "Trace the 128-episode split",
|
| 33 |
"start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
|
| 34 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"reader_goal": "Inspect the 20-task benchmark",
|
| 38 |
"start_here": "TASK_SUITE_20.md",
|
| 39 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"reader_goal": "Compare current results",
|
| 43 |
"start_here": "RESEARCH_TAKEAWAYS.md",
|
| 44 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"reader_goal": "Compare 1-episode and 128-episode methods",
|
| 48 |
"start_here": "Homepage radar section",
|
| 49 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 50 |
},
|
| 51 |
{
|
| 52 |
"reader_goal": "Read Qwen3-Omni v1-v6 correctly",
|
| 53 |
"start_here": "QWEN3_OMNI_RUN_LINEAGE.md",
|
| 54 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 55 |
},
|
| 56 |
{
|
| 57 |
"reader_goal": "Find all derived artifacts",
|
| 58 |
"start_here": "ARTIFACT_GUIDE.md",
|
| 59 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 60 |
},
|
| 61 |
{
|
| 62 |
"reader_goal": "Download model weights with their matching results",
|
| 63 |
"start_here": "Hugging Face weights/results repo",
|
| 64 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
},
|
| 66 |
{
|
| 67 |
"reader_goal": "Reproduce or extend the work",
|
| 68 |
"start_here": "REPRODUCIBILITY.md",
|
| 69 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
},
|
| 71 |
{
|
| 72 |
"reader_goal": "Understand foundation-model directions",
|
| 73 |
"start_here": "THREE_FOUNDATION_PIPELINES.md",
|
| 74 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 75 |
},
|
| 76 |
{
|
| 77 |
"reader_goal": "Check public-release health",
|
| 78 |
"start_here": "PUBLIC_SURFACE_QA.md",
|
| 79 |
-
"then_inspect": [
|
|
|
|
|
|
|
|
|
|
| 80 |
}
|
| 81 |
],
|
| 82 |
"public_surfaces": [
|
|
@@ -125,31 +172,49 @@
|
|
| 125 |
"Foundation directions",
|
| 126 |
"Public-release checks"
|
| 127 |
],
|
| 128 |
-
"
|
| 129 |
-
{
|
| 130 |
-
"
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
"
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
"
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
}
|
| 154 |
]
|
| 155 |
}
|
|
|
|
| 6 |
{
|
| 7 |
"reader_goal": "Understand the project in one pass",
|
| 8 |
"start_here": "PROJECT_BRIEF.md",
|
| 9 |
+
"then_inspect": [
|
| 10 |
+
"PROJECT_STATUS.md",
|
| 11 |
+
"RESEARCH_TAKEAWAYS.md"
|
| 12 |
+
]
|
| 13 |
},
|
| 14 |
{
|
| 15 |
"reader_goal": "Understand the two evidence lines",
|
| 16 |
"start_here": "TWO_EVIDENCE_LINES.md",
|
| 17 |
+
"then_inspect": [
|
| 18 |
+
"docs/data/two_evidence_lines.json",
|
| 19 |
+
"docs/data/two_evidence_line_result_summary.json"
|
| 20 |
+
]
|
| 21 |
},
|
| 22 |
{
|
| 23 |
"reader_goal": "See the visual public dashboard",
|
| 24 |
"start_here": "GitHub Pages dashboard or Hugging Face Space",
|
| 25 |
+
"then_inspect": [
|
| 26 |
+
"docs/index.html",
|
| 27 |
+
"docs/data/project_packet.json"
|
| 28 |
+
]
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"reader_goal": "Decode project terminology",
|
| 32 |
"start_here": "GLOSSARY.md",
|
| 33 |
+
"then_inspect": [
|
| 34 |
+
"docs/data/glossary.json",
|
| 35 |
+
"Homepage Glossary section"
|
| 36 |
+
]
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"reader_goal": "Understand the data unit",
|
| 40 |
"start_here": "results/episode_task_suite/windows.csv",
|
| 41 |
+
"then_inspect": [
|
| 42 |
+
"results/episode_task_suite/feature_manifest.json",
|
| 43 |
+
"docs/data/raw_sample_files.json"
|
| 44 |
+
]
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"reader_goal": "Trace the 128-episode split",
|
| 48 |
"start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
|
| 49 |
+
"then_inspect": [
|
| 50 |
+
"docs/data/xperience10m_128_episode_feature_index.json",
|
| 51 |
+
"results/omni_finetune/xperience10m_128_episode_selection.csv"
|
| 52 |
+
]
|
| 53 |
},
|
| 54 |
{
|
| 55 |
"reader_goal": "Inspect the 20-task benchmark",
|
| 56 |
"start_here": "TASK_SUITE_20.md",
|
| 57 |
+
"then_inspect": [
|
| 58 |
+
"docs/data/task_suite_20.json",
|
| 59 |
+
"EVALUATION_PROTOCOL.md"
|
| 60 |
+
]
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"reader_goal": "Compare current results",
|
| 64 |
"start_here": "RESEARCH_TAKEAWAYS.md",
|
| 65 |
+
"then_inspect": [
|
| 66 |
+
"docs/data/task_method_20_result_matrix.json",
|
| 67 |
+
"docs/data/unified_task_model_radar.json"
|
| 68 |
+
]
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"reader_goal": "Compare 1-episode and 128-episode methods",
|
| 72 |
"start_here": "Homepage radar section",
|
| 73 |
+
"then_inspect": [
|
| 74 |
+
"docs/data/single_episode_task_model_radar.json",
|
| 75 |
+
"docs/data/episode128_task_model_radar.json"
|
| 76 |
+
]
|
| 77 |
},
|
| 78 |
{
|
| 79 |
"reader_goal": "Read Qwen3-Omni v1-v6 correctly",
|
| 80 |
"start_here": "QWEN3_OMNI_RUN_LINEAGE.md",
|
| 81 |
+
"then_inspect": [
|
| 82 |
+
"docs/data/qwen3_omni_run_lineage.json",
|
| 83 |
+
"docs/data/qwen3_v5_v6_comparison.json"
|
| 84 |
+
]
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"reader_goal": "Find all derived artifacts",
|
| 88 |
"start_here": "ARTIFACT_GUIDE.md",
|
| 89 |
+
"then_inspect": [
|
| 90 |
+
"Hugging Face artifact dataset",
|
| 91 |
+
"docs/data/artifact_index.json"
|
| 92 |
+
]
|
| 93 |
},
|
| 94 |
{
|
| 95 |
"reader_goal": "Download model weights with their matching results",
|
| 96 |
"start_here": "Hugging Face weights/results repo",
|
| 97 |
+
"then_inspect": [
|
| 98 |
+
"manifest.json",
|
| 99 |
+
"analysis/docs/data/task_method_20_result_matrix.json",
|
| 100 |
+
"results/"
|
| 101 |
+
]
|
| 102 |
},
|
| 103 |
{
|
| 104 |
"reader_goal": "Reproduce or extend the work",
|
| 105 |
"start_here": "REPRODUCIBILITY.md",
|
| 106 |
+
"then_inspect": [
|
| 107 |
+
"QUALITY_GATES.md",
|
| 108 |
+
"scripts/",
|
| 109 |
+
"results/"
|
| 110 |
+
]
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"reader_goal": "Understand foundation-model directions",
|
| 114 |
"start_here": "THREE_FOUNDATION_PIPELINES.md",
|
| 115 |
+
"then_inspect": [
|
| 116 |
+
"FOUNDATION_MODEL_PLAN.md",
|
| 117 |
+
"docs/data/three_foundation_pipelines.json"
|
| 118 |
+
]
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"reader_goal": "Check public-release health",
|
| 122 |
"start_here": "PUBLIC_SURFACE_QA.md",
|
| 123 |
+
"then_inspect": [
|
| 124 |
+
"docs/data/live_publication_status.json",
|
| 125 |
+
"docs/data/mirror_parity.json"
|
| 126 |
+
]
|
| 127 |
}
|
| 128 |
],
|
| 129 |
"public_surfaces": [
|
|
|
|
| 172 |
"Foundation directions",
|
| 173 |
"Public-release checks"
|
| 174 |
],
|
| 175 |
+
"reading_scopes": [
|
| 176 |
+
{
|
| 177 |
+
"public_evidence": [
|
| 178 |
+
"results/episode_task_suite/",
|
| 179 |
+
"docs/data/task_suite_20.json"
|
| 180 |
+
],
|
| 181 |
+
"topic": "Single public-sample task behavior",
|
| 182 |
+
"scope_note": "Describes one public sample episode, not the full dataset distribution."
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"public_evidence": [
|
| 186 |
+
"XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
|
| 187 |
+
"docs/data/xperience10m_128_episode_feature_index.json",
|
| 188 |
+
"results/omni_finetune/*128*",
|
| 189 |
+
"docs/data/omni_model_comparison.json"
|
| 190 |
+
],
|
| 191 |
+
"topic": "128-episode method comparison",
|
| 192 |
+
"scope_note": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"public_evidence": [
|
| 196 |
+
"QWEN3_OMNI_RUN_LINEAGE.md",
|
| 197 |
+
"docs/data/qwen3_omni_run_lineage.json"
|
| 198 |
+
],
|
| 199 |
+
"topic": "Qwen3-Omni v1-v6 lineage",
|
| 200 |
+
"scope_note": "v1-v4 are pipeline and ablation evidence, v5 is the pinned prior release, and v6 is the current public 20-task Qwen row."
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"public_evidence": [
|
| 204 |
+
"Verified Qwen3-Omni and Cosmos3 result packages",
|
| 205 |
+
"model cards"
|
| 206 |
+
],
|
| 207 |
+
"topic": "Foundation-model track quality",
|
| 208 |
+
"scope_note": "Numeric task scores appear only when a task-specific eval or probe exists."
|
| 209 |
+
},
|
| 210 |
+
{
|
| 211 |
+
"public_evidence": [
|
| 212 |
+
"REPRODUCIBILITY.md",
|
| 213 |
+
"QUALITY_GATES.md",
|
| 214 |
+
"release validators"
|
| 215 |
+
],
|
| 216 |
+
"topic": "Reproducibility",
|
| 217 |
+
"scope_note": "Raw gated Xperience-10M files and full foundation weights are not redistributed."
|
| 218 |
}
|
| 219 |
]
|
| 220 |
}
|
metrics/public_surface_qa.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
@@ -18,7 +18,7 @@
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
-
"generated_at_utc": "2026-06-
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
@@ -28,27 +28,27 @@
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
-
"generated_at_utc": "2026-06-
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
| 35 |
"status": "pass",
|
| 36 |
-
"generated_at_utc": "2026-06-
|
| 37 |
},
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
-
"generated_at_utc": "2026-06-
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
| 45 |
"status": "pass",
|
| 46 |
-
"generated_at_utc": "2026-06-
|
| 47 |
},
|
| 48 |
"mirror_parity": {
|
| 49 |
"exists": true,
|
| 50 |
"status": "pass",
|
| 51 |
-
"generated_at_utc": "2026-06-
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"failures": {}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T11:18:45+00:00",
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
+
"generated_at_utc": "2026-06-22T11:17:07+00:00"
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
+
"generated_at_utc": "2026-06-22T11:17:07+00:00"
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
| 35 |
"status": "pass",
|
| 36 |
+
"generated_at_utc": "2026-06-22T11:17:08+00:00"
|
| 37 |
},
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
+
"generated_at_utc": "2026-06-22T11:17:10+00:00"
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
| 45 |
"status": "pass",
|
| 46 |
+
"generated_at_utc": "2026-06-22T11:18:16+00:00"
|
| 47 |
},
|
| 48 |
"mirror_parity": {
|
| 49 |
"exists": true,
|
| 50 |
"status": "pass",
|
| 51 |
+
"generated_at_utc": "2026-06-22T11:18:11+00:00"
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"failures": {}
|