File size: 16,801 Bytes
9d58132
c325020
9d58132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c5b88c
 
756e790
2c5b88c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d58132
 
d9be7c0
9d58132
d9be7c0
9d58132
 
 
 
 
 
 
 
 
 
 
8529036
 
 
 
 
 
 
 
c96262b
d9e465e
16a39bb
c96262b
16a39bb
596ac86
c96262b
 
 
d9e465e
16a39bb
c96262b
16a39bb
596ac86
c96262b
 
 
d9e465e
16a39bb
c96262b
16a39bb
596ac86
c96262b
 
9d58132
 
 
 
d9be7c0
9d58132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c614c4e
 
d9be7c0
c614c4e
d9be7c0
c614c4e
d9be7c0
c614c4e
2ebe45d
 
 
 
948bb27
2ebe45d
d8565bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ebe45d
 
9d58132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a1869c
9d58132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
#!/usr/bin/env python3
"""Build an index for public visual assets."""

from __future__ import annotations

import hashlib
import json
import re
from datetime import datetime, timezone
from pathlib import Path
from xml.etree import ElementTree

from PIL import Image


ROOT = Path(__file__).resolve().parents[1]
OUTPUT_JSON = ROOT / "docs/data/figure_index.json"
OUTPUT_MD = ROOT / "FIGURE_INDEX.md"

FIGURES = [
    {
        "id": "brand_logo_mark",
        "title": "Project logo mark",
        "path": "docs/assets/brand/xperience10m-logo-mark-512.png",
        "role": "Primary X-shaped multimodal camera mark used for the website header, README, HF cards, and brand identity.",
        "source_script": "scripts/build_brand_assets.py",
        "surface": "README, website, HF Space, artifact dataset, model card, favicon variants",
    },
    {
        "id": "brand_social_card",
        "title": "Project logo social card",
        "path": "docs/assets/brand/xperience10m-logo-social-card.png",
        "role": "Large preview image for README, Hugging Face cards, and Open Graph/Twitter social sharing.",
        "source_script": "scripts/build_brand_assets.py",
        "surface": "README, website metadata, HF Space, artifact dataset, model card",
    },
    {
        "id": "brand_favicon",
        "title": "Project favicon",
        "path": "docs/assets/brand/xperience10m-logo-favicon-64.png",
        "role": "Small dark-tile logo for browser tabs and compact navigation.",
        "source_script": "scripts/build_brand_assets.py",
        "surface": "website favicon and header",
    },
    {
        "id": "task_suite_infographic",
        "title": "Original task-suite infographic",
        "path": "docs/assets/task_suite_infographic.png",
        "role": "Primary visual map of the original task families, verified metrics, and sample modalities; the unified public suite is now documented as 20 tasks.",
        "source_script": "scripts/render_task_suite_infographic.py",
        "surface": "README, website, HF Space, artifact dataset, model card",
    },
    {
        "id": "pipeline_diagram",
        "title": "Episode-to-task pipeline diagram",
        "path": "docs/assets/pipeline_diagram.png",
        "role": "End-to-end data processing and evaluation pipeline overview.",
        "source_script": "scripts/generate_visualizations.py",
        "surface": "README, website, HF artifact dataset",
    },
    {
        "id": "qwen3_omni_lora_pipeline",
        "title": "Qwen3-Omni LoRA training pipeline",
        "path": "docs/assets/qwen3_omni_lora_pipeline.png",
        "role": "Detailed raw-data-to-adapter flow for staged Xperience-10M Qwen3-Omni LoRA training.",
        "source_script": "docs/assets/qwen3_omni_lora_pipeline.prompt.md",
        "surface": "README, website, HF Space, artifact dataset, model card",
    },
    {
        "id": "spatial_intelligence_presentation_photo",
        "title": "Spatial intelligence slide diagram",
        "path": "docs/assets/foundation-pipelines/spatial-intelligence-pipeline.png",
        "role": "High-resolution slide diagram for the spatial intelligence pipeline track.",
        "source_script": "scripts/render_foundation_pipeline_diagrams.py",
        "surface": "README, website, HF Space, artifact dataset, model card",
    },
    {
        "id": "human_video_world_model_presentation_photo",
        "title": "Human-video world model slide diagram",
        "path": "docs/assets/foundation-pipelines/human-video-world-model-pipeline.png",
        "role": "High-resolution slide diagram for the human-video world-model pipeline track.",
        "source_script": "scripts/render_foundation_pipeline_diagrams.py",
        "surface": "README, website, HF Space, artifact dataset, model card",
    },
    {
        "id": "vision_language_action_presentation_photo",
        "title": "Vision-language-action slide diagram",
        "path": "docs/assets/foundation-pipelines/vision-language-action-pipeline.png",
        "role": "High-resolution slide diagram for the VLA/action-policy pipeline track.",
        "source_script": "scripts/render_foundation_pipeline_diagrams.py",
        "surface": "README, website, HF Space, artifact dataset, model card",
    },
    {
        "id": "task_architectures",
        "title": "Minimal and neural task architecture map",
        "path": "docs/assets/task_architectures.png",
        "role": "Minimal and neural heads for the original task contracts and shared feature contracts.",
        "source_script": "scripts/render_overview_figures.py",
        "surface": "README, website, HF artifact dataset, model card",
    },
    {
        "id": "video_modality",
        "title": "Video modality thumbnail",
        "path": "docs/assets/modalities/video.jpg",
        "role": "Derived thumbnail for synchronized camera streams.",
        "source_script": "scripts/export_modality_atlas_assets.py",
        "surface": "website modality atlas, HF mirrors",
    },
    {
        "id": "audio_modality",
        "title": "Audio modality thumbnail",
        "path": "docs/assets/modalities/audio.png",
        "role": "Derived waveform thumbnail for the MP4 AAC stream.",
        "source_script": "scripts/export_modality_atlas_assets.py",
        "surface": "website modality atlas, HF mirrors",
    },
    {
        "id": "depth_modality",
        "title": "Depth modality thumbnail",
        "path": "docs/assets/modalities/depth.jpg",
        "role": "Derived depth and confidence thumbnail.",
        "source_script": "scripts/export_modality_atlas_assets.py",
        "surface": "website modality atlas, HF mirrors",
    },
    {
        "id": "pose_slam_modality",
        "title": "Pose / SLAM modality thumbnail",
        "path": "docs/assets/modalities/pose_slam.png",
        "role": "Derived camera trajectory and sparse map thumbnail.",
        "source_script": "scripts/export_modality_atlas_assets.py",
        "surface": "website modality atlas, HF mirrors",
    },
    {
        "id": "motion_capture_modality",
        "title": "Motion capture modality thumbnail",
        "path": "docs/assets/modalities/motion_capture.png",
        "role": "Derived body and hand motion-capture thumbnail.",
        "source_script": "scripts/export_modality_atlas_assets.py",
        "surface": "website modality atlas, HF mirrors",
    },
    {
        "id": "inertial_modality",
        "title": "Inertial modality thumbnail",
        "path": "docs/assets/modalities/inertial.png",
        "role": "Derived accelerometer and gyroscope trace thumbnail.",
        "source_script": "scripts/export_modality_atlas_assets.py",
        "surface": "website modality atlas, HF mirrors",
    },
    {
        "id": "language_modality",
        "title": "Language modality thumbnail",
        "path": "docs/assets/modalities/language.png",
        "role": "Derived object-tag and caption thumbnail.",
        "source_script": "scripts/export_modality_atlas_assets.py",
        "surface": "website modality atlas, HF mirrors",
    },
    {
        "id": "model_macro_f1_chart",
        "title": "Model macro-F1 comparison chart",
        "path": "docs/assets/charts/model_macro_f1.svg",
        "role": "Minimal-vs-neural classification score comparison.",
        "source_script": "scripts/generate_visualizations.py",
        "surface": "website diagnostics",
    },
    {
        "id": "neural_score_chart",
        "title": "Neural MLP task score chart",
        "path": "docs/assets/charts/episode_task_scores_neural_mlp.svg",
        "role": "Neural MLP metric snapshot across the task suite.",
        "source_script": "scripts/generate_visualizations.py",
        "surface": "website diagnostics",
    },
    {
        "id": "minimal_vs_neural_score_chart",
        "title": "Minimal-vs-neural task score chart",
        "path": "docs/assets/charts/episode_task_scores_minimal_vs_neural.svg",
        "role": "Side-by-side baseline comparison over the same window contracts.",
        "source_script": "scripts/generate_visualizations.py",
        "surface": "website diagnostics",
    },
    {
        "id": "research_direction_coverage_chart",
        "title": "Research direction coverage chart",
        "path": "docs/assets/charts/research_direction_coverage.svg",
        "role": "Four-track coverage map for Ropedia research directions.",
        "source_script": "scripts/generate_visualizations.py",
        "surface": "website directions",
    },
    {
        "id": "research_direction_extension_chart",
        "title": "Research direction extension chart",
        "path": "docs/assets/charts/research_direction_extension_tasks.svg",
        "role": "Four coded extension probes, one per Ropedia research direction.",
        "source_script": "scripts/generate_visualizations.py",
        "surface": "website directions",
    },
    {
        "id": "tier2_task_suite_chart",
        "title": "Tasks 13-20 baseline chart",
        "path": "docs/assets/charts/tier2_task_suite.svg",
        "role": "Eight additional sample-supported tasks in the unified 20-task suite with aligned minimal and neural baseline metrics.",
        "source_script": "scripts/tier2_task_suite.py",
        "surface": "website unified task section, README, HF mirrors",
    },
    {
        "id": "unified_task_model_radar",
        "title": "Unified 20-task model radar",
        "path": "docs/assets/charts/unified_task_model_radar.svg",
        "role": "Twenty-axis direction-aware comparison of minimal and neural MLP baselines, with 128-episode metadata, Qwen3, and Cosmos task-aligned overlay points and branch notes.",
        "source_script": "scripts/build_unified_task_model_radar.py",
        "surface": "website unified task section, README, HF mirrors",
    },
    {
        "id": "single_episode_task_model_radar",
        "title": "Single-episode 20-task model radar",
        "path": "docs/assets/charts/single_episode_task_model_radar.svg",
        "role": "Twenty-axis split radar for the one public-sample episode, comparing Minimal and Neural MLP as two complete 20/20 scored polygons.",
        "source_script": "scripts/build_unified_task_model_radar.py",
        "surface": "website unified task section, README, HF mirrors",
    },
    {
        "id": "episode128_task_model_radar",
        "title": "128-episode 20-task model radar",
        "path": "docs/assets/charts/episode128_task_model_radar.svg",
        "role": "Twenty-axis split radar for selected 128-episode methods: raw-feature simple/NN as complete scored polygons and metadata/Qwen/Cosmos as task-aligned overlays.",
        "source_script": "scripts/build_unified_task_model_radar.py",
        "surface": "website unified task section, README, HF mirrors",
    },
    {
        "id": "feature_blocks_chart",
        "title": "Feature block chart",
        "path": "docs/assets/charts/feature_blocks.svg",
        "role": "Feature allocation by modality block.",
        "source_script": "scripts/generate_visualizations.py",
        "surface": "website features",
    },
    {
        "id": "episode_task_scores_chart",
        "title": "Minimal task score chart",
        "path": "docs/assets/charts/episode_task_scores.svg",
        "role": "Minimal baseline metric snapshot across the task suite.",
        "source_script": "scripts/generate_visualizations.py",
        "surface": "website diagnostics",
    },
    {
        "id": "cross_modal_retrieval_chart",
        "title": "Cross-modal retrieval chart",
        "path": "docs/assets/charts/cross_modal_retrieval.svg",
        "role": "Retrieval behavior chart for the cross-modal task.",
        "source_script": "scripts/generate_visualizations.py",
        "surface": "website diagnostics",
    },
]


def sha256(path: Path) -> str:
    digest = hashlib.sha256()
    with path.open("rb") as handle:
        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
            digest.update(chunk)
    return digest.hexdigest()


def parse_number(value: str | None) -> float | None:
    if value is None:
        return None
    match = re.search(r"-?\d+(?:\.\d+)?", value)
    return float(match.group(0)) if match else None


def svg_dimensions(path: Path) -> dict:
    root = ElementTree.fromstring(path.read_text(encoding="utf-8", errors="ignore"))
    width = parse_number(root.attrib.get("width"))
    height = parse_number(root.attrib.get("height"))
    view_box = root.attrib.get("viewBox")
    if (width is None or height is None) and view_box:
        parts = [float(item) for item in re.split(r"[\s,]+", view_box.strip()) if item]
        if len(parts) == 4:
            width = width if width is not None else parts[2]
            height = height if height is not None else parts[3]
    return {
        "format": "SVG",
        "width": int(round(width or 0)),
        "height": int(round(height or 0)),
        "view_box": view_box,
    }


def image_dimensions(path: Path) -> dict:
    if path.suffix.lower() == ".svg":
        return svg_dimensions(path)
    with Image.open(path) as image:
        return {
            "format": image.format,
            "width": int(image.width),
            "height": int(image.height),
        }


def figure_record(spec: dict) -> dict:
    path = ROOT / spec["path"]
    exists = path.exists()
    record = {
        **spec,
        "exists": exists,
        "bytes": path.stat().st_size if exists else 0,
        "sha256": sha256(path) if exists else None,
        "dimensions": None,
        "source_script_exists": (ROOT / spec["source_script"]).exists(),
    }
    if exists:
        try:
            record["dimensions"] = image_dimensions(path)
        except Exception as exc:  # noqa: BLE001 - report the exact bad asset.
            record["dimension_error"] = str(exc)
    return record


def build_payload() -> dict:
    figures = [figure_record(item) for item in FIGURES]
    failures = []
    for figure in figures:
        if not figure["exists"]:
            failures.append({"figure": figure["id"], "kind": "missing_asset", "path": figure["path"]})
        if not figure["source_script_exists"]:
            failures.append({"figure": figure["id"], "kind": "missing_source_script", "path": figure["source_script"]})
        dimensions = figure.get("dimensions") or {}
        if dimensions.get("width", 0) <= 0 or dimensions.get("height", 0) <= 0:
            failures.append({"figure": figure["id"], "kind": "invalid_dimensions", "path": figure["path"]})
    return {
        "title": "Ropedia Xperience-10M Figure Index",
        "status": "pass" if not failures else "fail",
        "generated_at_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"),
        "scope": "Public figures, diagrams, charts, and derived modality thumbnails. Raw Xperience-10M videos, annotations, RRD files, and Qwen weights are excluded.",
        "figure_count": len(figures),
        "figures": figures,
        "failures": failures,
    }


def render_markdown(payload: dict) -> str:
    lines = [
        "# Figure Index",
        "",
        "This file is generated by `scripts/build_figure_index.py`. It catalogs",
        "the public visual assets used by the repo, website, and Hugging Face mirrors.",
        "",
        f"Current status: **{payload['status']}**",
        "",
        payload["scope"],
        "",
        "## Figures",
        "",
        "| Figure | Path | Size | Source script | Role |",
        "| --- | --- | ---: | --- | --- |",
    ]
    for figure in payload["figures"]:
        dimensions = figure.get("dimensions") or {}
        size = f"{dimensions.get('width', 0)} x {dimensions.get('height', 0)}"
        lines.append(
            f"| {figure['title']} | `{figure['path']}` | {size} | `{figure['source_script']}` | {figure['role']} |"
        )
    lines.extend([
        "",
        "## Use and Scope",
        "",
        "- These figures are derived presentation artifacts or small thumbnails.",
        "- The index records file hashes and dimensions for reproducibility checks.",
        "- Raw Xperience-10M MP4/HDF5/RRD files and full model weights are not redistributed.",
        "",
    ])
    return "\n".join(lines)


def main() -> int:
    payload = build_payload()
    OUTPUT_JSON.parent.mkdir(parents=True, exist_ok=True)
    OUTPUT_JSON.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
    OUTPUT_MD.write_text(render_markdown(payload), encoding="utf-8")
    print(f"{payload['status'].upper()}: wrote {OUTPUT_JSON}")
    print(f"{payload['status'].upper()}: wrote {OUTPUT_MD}")
    if payload["status"] != "pass":
        for failure in payload["failures"]:
            print(f"- {failure}")
    return 0 if payload["status"] == "pass" else 1


if __name__ == "__main__":
    raise SystemExit(main())