cy0307 commited on
Commit
be9ccf3
·
verified ·
1 Parent(s): 00ead12

Publish Ropedia minimal task baseline weights

Browse files
README.md CHANGED
@@ -64,6 +64,7 @@ Their purpose is to make every input/output contract auditable before scaling to
64
  | `artifacts/**/metrics.json` | records the committed metric values |
65
  | `artifacts/**/feature_manifest.json` | maps feature blocks back to source modalities |
66
  | `assets/task_architectures.svg` | shows the shared pipeline and all 12 heads |
 
67
 
68
  ## Included
69
 
 
64
  | `artifacts/**/metrics.json` | records the committed metric values |
65
  | `artifacts/**/feature_manifest.json` | maps feature blocks back to source modalities |
66
  | `assets/task_architectures.svg` | shows the shared pipeline and all 12 heads |
67
+ | `assets/task_suite_infographic.png` | presents the 12 heads with public-sample modality thumbnails and verified metrics |
68
 
69
  ## Included
70
 
assets/task_suite_infographic.png CHANGED

Git LFS Details

  • SHA256: 38ba0968f53333b74069e36bec35382cb9c97568da8be528536acc2d69fdb168
  • Pointer size: 132 Bytes
  • Size of remote file: 1.32 MB

Git LFS Details

  • SHA256: 3a7055b5a3ac9ae4362d784347071002fb5ebf572061c65f100a2720e3311036
  • Pointer size: 132 Bytes
  • Size of remote file: 1.3 MB
scripts/render_task_suite_infographic.py CHANGED
@@ -1,16 +1,18 @@
1
  #!/usr/bin/env python3
2
  """
3
- Render a ChatGPT-image-backed 12-task infographic.
4
 
5
- The background bitmap is AI-generated. The task names, inputs, and metrics are
6
- read from results/episode_task_suite/summary_report.json so the published image
7
- does not rely on image-model text generation.
8
  """
9
 
10
  from __future__ import annotations
11
 
12
  import argparse
 
13
  import html
 
14
  import json
15
  import subprocess
16
  import tempfile
@@ -20,16 +22,20 @@ from pathlib import Path
20
  ROOT = Path(__file__).resolve().parents[1]
21
  SUMMARY_PATH = ROOT / "results/episode_task_suite/summary_report.json"
22
  DEFAULT_BASE = ROOT / "docs/assets/task_suite_infographic_base.png"
 
23
  DEFAULT_OUTPUT = ROOT / "docs/assets/task_suite_infographic.png"
 
 
 
 
24
 
25
 
26
  GROUPS = [
27
  {
28
  "name": "Label + State",
29
- "color": "#008b9a",
30
- "left": 94,
31
- "top": 374,
32
- "width": 246,
33
  "tasks": [
34
  ("timeline_action", "supervised"),
35
  ("timeline_subtask", "supervised"),
@@ -38,10 +44,9 @@ GROUPS = [
38
  },
39
  {
40
  "name": "Prediction + Reconstruction",
41
- "color": "#1f63e9",
42
- "left": 472,
43
- "top": 374,
44
- "width": 248,
45
  "tasks": [
46
  ("hand_trajectory_forecast", "forecast"),
47
  ("modality_reconstruction", "forecast"),
@@ -50,10 +55,9 @@ GROUPS = [
50
  },
51
  {
52
  "name": "Grounding + Retrieval",
53
- "color": "#b65b04",
54
- "left": 848,
55
- "top": 374,
56
- "width": 220,
57
  "tasks": [
58
  ("caption_grounding", "retrieval"),
59
  ("cross_modal_retrieval", "retrieval"),
@@ -62,10 +66,9 @@ GROUPS = [
62
  },
63
  {
64
  "name": "Temporal Diagnostics",
65
- "color": "#b42318",
66
- "left": 1202,
67
- "top": 374,
68
- "width": 244,
69
  "tasks": [
70
  ("transition_detection", "diagnostic"),
71
  ("temporal_order", "diagnostic"),
@@ -74,6 +77,287 @@ GROUPS = [
74
  },
75
  ]
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  def load_summary() -> dict:
79
  return json.loads(SUMMARY_PATH.read_text(encoding="utf-8"))
@@ -105,226 +389,484 @@ def metric_for(task_name: str, metrics: dict) -> tuple[str, str]:
105
 
106
  def short_io(task_name: str, metrics: dict) -> str:
107
  custom = {
108
- "timeline_action": "all modalities -> action label",
109
- "timeline_subtask": "all modalities -> subtask label",
110
- "transition_detection": "all modalities -> boundary / steady",
111
- "next_action": "window at t -> action at t+20",
112
  "hand_trajectory_forecast": "all modalities -> future hand joints",
113
- "contact_prediction": "non-contact modalities -> contact",
114
- "object_relevance": "non-caption modalities -> object set",
115
- "caption_grounding": "text query -> matching window",
116
- "cross_modal_retrieval": "motion / IMU / camera -> depth / video",
117
- "modality_reconstruction": "motion / IMU / camera -> depth / video vec",
118
- "temporal_order": "two windows -> correct order?",
119
- "misalignment_detection": "motion + visual -> aligned / shifted",
120
  }
121
  return custom.get(task_name, metrics.get("input", ""))
122
 
123
 
124
- def task_html(task_name: str, kind: str, metrics: dict, top: int, group: dict) -> str:
125
  label, value = metric_for(task_name, metrics)
126
  io = short_io(task_name, metrics)
127
- name_size = 17 if len(task_name) > 22 else 18
128
  return f"""
129
- <section class="task" style="left:{group['left']}px;top:{top}px;width:{group['width']}px;--accent:{group['color']};">
130
- <div class="kind">{html.escape(kind)}</div>
131
- <div class="task-name" style="font-size:{name_size}px;">{html.escape(task_name)}</div>
132
- <div class="io">{html.escape(io)}</div>
133
- <div class="metric"><span>{html.escape(label)}</span><strong>{html.escape(value)}</strong></div>
134
- </section>
 
 
 
 
 
 
135
  """
136
 
137
 
138
- def build_html(summary: dict, base_image: Path) -> str:
139
- suite = summary["tasks"]
140
- task_count = len(suite)
141
- group_headers = []
142
- cards = []
143
- row_tops = [374, 552, 730]
144
- header_lefts = [38, 417, 792, 1143]
145
- for group, header_left in zip(GROUPS, header_lefts):
146
- group_headers.append(
147
- f'<div class="group-title" style="left:{header_left}px;top:333px;color:{group["color"]};">{html.escape(group["name"])}</div>'
148
- )
149
- for row_idx, (task_name, kind) in enumerate(group["tasks"]):
150
- cards.append(task_html(task_name, kind, suite[task_name], row_tops[row_idx], group))
 
151
 
 
 
 
 
 
 
152
  stats = [
153
- f"{summary['num_frames']:,} frames",
154
- f"{summary['num_windows']:,} windows",
155
- f"{summary['feature_dim']:,} features",
156
- f"{task_count} tasks",
157
- "chronological split",
158
  ]
159
- stat_html = "".join(f"<span>{html.escape(item)}</span>" for item in stats)
160
- base_uri = base_image.resolve().as_uri()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  return f"""<!doctype html>
162
  <html lang="en">
163
  <head>
164
  <meta charset="utf-8">
165
- <meta name="viewport" content="width=1536, initial-scale=1">
166
  <title>Ropedia 12-Task Episode Suite Infographic</title>
167
  <style>
168
  * {{ box-sizing: border-box; }}
169
- html, body {{ margin: 0; width: 1536px; height: 1024px; background: #ffffff; }}
170
  body {{
171
- font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Arial, sans-serif;
172
- color: #10141f;
 
 
 
 
 
 
 
173
  }}
174
  .canvas {{
175
  position: relative;
176
- width: 1536px;
177
- height: 1024px;
178
  overflow: hidden;
179
- background-image: url("{base_uri}");
180
- background-size: 1536px 1024px;
181
- background-repeat: no-repeat;
 
 
 
 
 
182
  }}
183
- .title {{
184
  position: absolute;
185
- left: 330px;
186
- top: 42px;
187
- width: 876px;
188
- text-align: center;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  }}
190
  h1 {{
191
- margin: 0;
192
- font-size: 38px;
193
- line-height: 1.05;
 
194
  letter-spacing: 0;
195
- font-weight: 820;
196
  }}
197
  .subtitle {{
198
- margin-top: 8px;
199
- color: #425067;
200
- font-size: 15px;
 
201
  line-height: 1.35;
202
  font-weight: 520;
203
  }}
204
  .stats {{
205
- margin-top: 12px;
206
- display: flex;
207
- justify-content: center;
208
- gap: 8px;
209
  }}
210
- .stats span {{
211
- display: inline-flex;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  align-items: center;
213
- height: 24px;
214
- padding: 0 10px;
215
- border: 1px solid #cdd8e8;
216
- background: rgba(255, 255, 255, 0.82);
217
- border-radius: 999px;
218
- color: #253046;
219
- font-size: 12px;
220
- font-weight: 720;
 
 
 
 
 
 
 
 
 
 
221
  }}
222
  .modality {{
223
- position: absolute;
224
- top: 256px;
225
- width: 180px;
226
- text-align: center;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  font-size: 12px;
228
- color: #536074;
229
- font-weight: 720;
230
- text-transform: uppercase;
231
- letter-spacing: 0;
232
  }}
233
- .group-title {{
234
- position: absolute;
235
- width: 322px;
236
- text-align: center;
237
- font-size: 18px;
238
  line-height: 1;
239
- font-weight: 830;
240
- letter-spacing: 0;
241
  }}
242
- .task {{
243
- position: absolute;
244
- padding: 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  }}
246
  .kind {{
247
  display: inline-flex;
248
  align-items: center;
249
- height: 22px;
250
- padding: 0 8px;
251
  border-radius: 6px;
252
- border: 1px solid color-mix(in srgb, var(--accent) 35%, #ffffff);
253
  color: var(--accent);
254
- background: rgba(255, 255, 255, 0.76);
255
  text-transform: uppercase;
256
- font-size: 10px;
257
  line-height: 1;
258
- font-weight: 840;
259
- letter-spacing: 0;
260
  }}
261
- .task-name {{
262
- margin-top: 7px;
263
  color: #111827;
264
- line-height: 1.05;
265
- font-weight: 850;
266
- letter-spacing: 0;
267
- white-space: nowrap;
268
  }}
269
- .io {{
270
- margin-top: 8px;
271
- min-height: 36px;
272
- color: #475569;
273
- font-size: 13.5px;
274
  line-height: 1.28;
275
- font-weight: 570;
276
  }}
277
  .metric {{
278
  display: inline-flex;
279
- align-items: center;
280
- gap: 9px;
281
- margin-top: 8px;
282
- height: 30px;
283
- padding: 0 10px;
284
- border-radius: 7px;
285
- border: 1px solid color-mix(in srgb, var(--accent) 36%, #ffffff);
286
- background: rgba(255, 255, 255, 0.90);
287
- box-shadow: 0 7px 20px rgba(16, 20, 31, 0.07);
288
  }}
289
  .metric span {{
290
  color: #64748b;
291
- font-size: 12px;
292
  font-weight: 760;
293
  }}
294
  .metric strong {{
295
  color: var(--accent);
296
- font-size: 16px;
 
297
  line-height: 1;
298
  font-weight: 860;
 
299
  }}
300
  .footer {{
301
- position: absolute;
302
- left: 360px;
303
- top: 932px;
304
- width: 816px;
305
- text-align: center;
306
- color: #536074;
307
- font-size: 14px;
308
- font-weight: 650;
 
 
 
 
 
 
 
 
 
 
 
 
309
  }}
310
  </style>
311
  </head>
312
  <body>
313
  <main class="canvas" aria-label="Ropedia 12-task episode suite infographic">
314
- <div class="title">
315
- <h1>Ropedia 12-Task Episode Suite</h1>
316
- <div class="subtitle">All labels and metrics are overlaid from the verified single-episode results.</div>
317
- <div class="stats">{stat_html}</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  </div>
319
- <div class="modality" style="left:50px;">fisheye video</div>
320
- <div class="modality" style="left:270px;">depth</div>
321
- <div class="modality" style="left:530px;">3D / SLAM</div>
322
- <div class="modality" style="left:770px;">IMU</div>
323
- <div class="modality" style="left:1030px;">hands</div>
324
- <div class="modality" style="left:1278px;">text / objects</div>
325
- {''.join(group_headers)}
326
- {''.join(cards)}
327
- <div class="footer">Single public sample episode: useful for pipeline validation and task design, not cross-episode generalization.</div>
328
  </main>
329
  </body>
330
  </html>
@@ -340,7 +882,7 @@ def render_html(html_path: Path, output_path: Path) -> None:
340
  "playwright",
341
  "screenshot",
342
  "--full-page",
343
- "--viewport-size=1536,1024",
344
  html_path.resolve().as_uri(),
345
  str(output_path),
346
  ],
@@ -351,13 +893,14 @@ def render_html(html_path: Path, output_path: Path) -> None:
351
  def main() -> int:
352
  parser = argparse.ArgumentParser()
353
  parser.add_argument("--base-image", type=Path, default=DEFAULT_BASE)
 
354
  parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
355
  parser.add_argument("--html", type=Path)
356
- parser.add_argument("--no-export", action="store_true", help="Only write the HTML overlay.")
357
  args = parser.parse_args()
358
 
359
  summary = load_summary()
360
- html_text = build_html(summary, args.base_image)
361
  if args.html is None:
362
  with tempfile.NamedTemporaryFile("w", suffix=".html", encoding="utf-8", delete=False) as handle:
363
  handle.write(html_text)
@@ -370,7 +913,7 @@ def main() -> int:
370
  if not args.no_export:
371
  render_html(html_path, args.output)
372
  print(f"Wrote image: {args.output}")
373
- print(f"Wrote overlay HTML: {html_path}")
374
  return 0
375
 
376
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ Render a polished 12-task Ropedia episode-suite infographic.
4
 
5
+ The task names, inputs, and metrics are read from
6
+ results/episode_task_suite/summary_report.json. The output is a deterministic
7
+ PNG rendered from HTML/CSS so the labels stay legible and reviewable.
8
  """
9
 
10
  from __future__ import annotations
11
 
12
  import argparse
13
+ import base64
14
  import html
15
+ import io
16
  import json
17
  import subprocess
18
  import tempfile
 
22
  ROOT = Path(__file__).resolve().parents[1]
23
  SUMMARY_PATH = ROOT / "results/episode_task_suite/summary_report.json"
24
  DEFAULT_BASE = ROOT / "docs/assets/task_suite_infographic_base.png"
25
+ DEFAULT_SAMPLE_DIR = ROOT.parent / "data/sample/xperience-10m-sample"
26
  DEFAULT_OUTPUT = ROOT / "docs/assets/task_suite_infographic.png"
27
+ CANVAS_WIDTH = 1800
28
+ CANVAS_HEIGHT = 1650
29
+ THUMB_WIDTH = 420
30
+ THUMB_HEIGHT = 160
31
 
32
 
33
  GROUPS = [
34
  {
35
  "name": "Label + State",
36
+ "tone": "teal",
37
+ "color": "#197d83",
38
+ "soft": "#e8f4f3",
 
39
  "tasks": [
40
  ("timeline_action", "supervised"),
41
  ("timeline_subtask", "supervised"),
 
44
  },
45
  {
46
  "name": "Prediction + Reconstruction",
47
+ "tone": "blue",
48
+ "color": "#1f6c9f",
49
+ "soft": "#e8f1fb",
 
50
  "tasks": [
51
  ("hand_trajectory_forecast", "forecast"),
52
  ("modality_reconstruction", "forecast"),
 
55
  },
56
  {
57
  "name": "Grounding + Retrieval",
58
+ "tone": "amber",
59
+ "color": "#9b6516",
60
+ "soft": "#fbf3df",
 
61
  "tasks": [
62
  ("caption_grounding", "retrieval"),
63
  ("cross_modal_retrieval", "retrieval"),
 
66
  },
67
  {
68
  "name": "Temporal Diagnostics",
69
+ "tone": "red",
70
+ "color": "#b0443e",
71
+ "soft": "#fdeceb",
 
72
  "tasks": [
73
  ("transition_detection", "diagnostic"),
74
  ("temporal_order", "diagnostic"),
 
77
  },
78
  ]
79
 
80
+ MODALITIES = [
81
+ ("video", "6 camera streams", "fisheye + stereo"),
82
+ ("depth", "confidence maps", "spatial geometry"),
83
+ ("3D / SLAM", "point-cloud summaries", "scene structure"),
84
+ ("IMU", "accel + gyro", "body motion"),
85
+ ("hands", "future joints", "embodied action"),
86
+ ("text", "objects + captions", "semantic grounding"),
87
+ ]
88
+
89
+ HAND_EDGES = [
90
+ (0, 1), (1, 2), (2, 3), (3, 4),
91
+ (0, 5), (5, 6), (6, 7), (7, 8),
92
+ (0, 9), (9, 10), (10, 11), (11, 12),
93
+ (0, 13), (13, 14), (14, 15), (15, 16),
94
+ (0, 17), (17, 18), (18, 19), (19, 20),
95
+ ]
96
+
97
+
98
+ def image_data_uri(image, fmt: str = "PNG", quality: int = 92) -> str:
99
+ buffer = io.BytesIO()
100
+ save_kwargs = {"format": fmt}
101
+ if fmt.upper() in {"JPEG", "JPG"}:
102
+ save_kwargs.update({"quality": quality, "optimize": True})
103
+ image.save(buffer, **save_kwargs)
104
+ encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
105
+ mime = "jpeg" if fmt.upper() in {"JPEG", "JPG"} else "png"
106
+ return f"data:image/{mime};base64,{encoded}"
107
+
108
+
109
+ def make_canvas(size=(THUMB_WIDTH, THUMB_HEIGHT), color=(255, 254, 253)):
110
+ from PIL import Image
111
+
112
+ return Image.new("RGB", size, color)
113
+
114
+
115
+ def fit_image(image, size=(THUMB_WIDTH, THUMB_HEIGHT)):
116
+ from PIL import ImageOps
117
+
118
+ return ImageOps.fit(image.convert("RGB"), size, method=3, centering=(0.5, 0.5))
119
+
120
+
121
+ def read_video_frame(video_path: Path, frame_index: int = 2400):
122
+ import cv2
123
+ from PIL import Image
124
+
125
+ cap = cv2.VideoCapture(str(video_path))
126
+ if not cap.isOpened():
127
+ raise RuntimeError(f"Could not open video: {video_path}")
128
+ total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
129
+ if total:
130
+ frame_index = max(0, min(frame_index, total - 1))
131
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
132
+ ok, frame = cap.read()
133
+ cap.release()
134
+ if not ok:
135
+ raise RuntimeError(f"Could not read frame {frame_index} from {video_path}")
136
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
137
+ return Image.fromarray(frame)
138
+
139
+
140
+ def draw_label(draw, xy, text, fill=(31, 36, 33), size=18):
141
+ from PIL import ImageFont
142
+
143
+ try:
144
+ font = ImageFont.truetype("/System/Library/Fonts/Supplemental/Arial Bold.ttf", size)
145
+ except Exception:
146
+ font = ImageFont.load_default()
147
+ draw.text(xy, text, fill=fill, font=font)
148
+
149
+
150
+ def video_thumb(sample_dir: Path) -> str:
151
+ from PIL import Image, ImageDraw
152
+
153
+ fish = fit_image(read_video_frame(sample_dir / "fisheye_cam0.mp4", 2450), (194, THUMB_HEIGHT))
154
+ stereo_path = sample_dir / "stereo_left.mp4"
155
+ stereo = fit_image(read_video_frame(stereo_path, 2450), (194, THUMB_HEIGHT)) if stereo_path.exists() else fish.copy()
156
+ canvas = make_canvas()
157
+ canvas.paste(fish, (0, 0))
158
+ canvas.paste(stereo, (226, 0))
159
+ draw = ImageDraw.Draw(canvas, "RGBA")
160
+ draw.rounded_rectangle((188, 0, 232, THUMB_HEIGHT), radius=0, fill=(251, 250, 247, 235))
161
+ draw_label(draw, (194, 16), "fisheye", fill=(255, 255, 255), size=14)
162
+ draw_label(draw, (240, 16), "stereo", fill=(255, 255, 255), size=14)
163
+ return image_data_uri(canvas, "JPEG")
164
+
165
+
166
+ def colorize(values):
167
+ import numpy as np
168
+
169
+ stops = np.array([
170
+ [26, 35, 126],
171
+ [36, 123, 160],
172
+ [68, 170, 122],
173
+ [238, 190, 76],
174
+ [197, 79, 51],
175
+ ], dtype=np.float32)
176
+ x = np.clip(values, 0, 1)
177
+ scaled = x * (len(stops) - 1)
178
+ lo = np.floor(scaled).astype(int)
179
+ hi = np.clip(lo + 1, 0, len(stops) - 1)
180
+ frac = scaled - lo
181
+ rgb = stops[lo] * (1 - frac[..., None]) + stops[hi] * frac[..., None]
182
+ return rgb.astype("uint8")
183
+
184
+
185
+ def depth_thumb(h5) -> str:
186
+ import numpy as np
187
+ from PIL import Image, ImageDraw
188
+
189
+ frame = np.array(h5["depth/depth"][2450], dtype=np.float32)
190
+ valid = np.isfinite(frame)
191
+ lo, hi = np.percentile(frame[valid], [3, 97])
192
+ norm = (frame - lo) / max(hi - lo, 1e-6)
193
+ rgb = colorize(norm)
194
+ depth = fit_image(Image.fromarray(rgb), (204, THUMB_HEIGHT))
195
+ conf = np.array(h5["depth/confidence"][2450], dtype=np.uint8)
196
+ conf_img = Image.fromarray(conf, mode="L").convert("RGB")
197
+ conf_img = fit_image(conf_img, (204, THUMB_HEIGHT))
198
+ canvas = make_canvas()
199
+ canvas.paste(depth, (0, 0))
200
+ canvas.paste(conf_img, (216, 0))
201
+ draw = ImageDraw.Draw(canvas, "RGBA")
202
+ draw.rounded_rectangle((0, 0, 116, 28), radius=6, fill=(31, 36, 33, 150))
203
+ draw.rounded_rectangle((216, 0, 350, 28), radius=6, fill=(31, 36, 33, 150))
204
+ draw_label(draw, (10, 6), "depth", fill=(255, 255, 255), size=14)
205
+ draw_label(draw, (226, 6), "confidence", fill=(255, 255, 255), size=14)
206
+ return image_data_uri(canvas, "JPEG")
207
+
208
+
209
+ def normalize_points(points, width, height, pad=16):
210
+ import numpy as np
211
+
212
+ xy = points[:, :2].copy()
213
+ lo = np.percentile(xy, 2, axis=0)
214
+ hi = np.percentile(xy, 98, axis=0)
215
+ span = np.maximum(hi - lo, 1e-6)
216
+ norm = (xy - lo) / span
217
+ norm = np.clip(norm, 0, 1)
218
+ norm[:, 1] = 1 - norm[:, 1]
219
+ out = np.empty_like(norm)
220
+ out[:, 0] = pad + norm[:, 0] * (width - pad * 2)
221
+ out[:, 1] = pad + norm[:, 1] * (height - pad * 2)
222
+ return out
223
+
224
+
225
+ def slam_thumb(h5) -> str:
226
+ import numpy as np
227
+ from PIL import ImageDraw
228
+
229
+ canvas = make_canvas()
230
+ draw = ImageDraw.Draw(canvas, "RGBA")
231
+ points = np.array(h5["slam/point_cloud"], dtype=np.float64)
232
+ points = points[np.isfinite(points).all(axis=1)]
233
+ if len(points) > 2600:
234
+ points = points[np.linspace(0, len(points) - 1, 2600).astype(int)]
235
+ xy = normalize_points(points[:, [0, 2, 1]], THUMB_WIDTH, THUMB_HEIGHT)
236
+ z = points[:, 1]
237
+ z_norm = (z - np.percentile(z, 2)) / max(np.percentile(z, 98) - np.percentile(z, 2), 1e-6)
238
+ colors = colorize(z_norm)
239
+ for (x, y), color in zip(xy, colors):
240
+ draw.ellipse((x - 1.2, y - 1.2, x + 1.2, y + 1.2), fill=tuple(color.tolist()) + (165,))
241
+ traj = np.array(h5["slam/trans_xyz"][:2450:36], dtype=np.float64)
242
+ traj_xy = normalize_points(traj[:, [0, 2, 1]], THUMB_WIDTH, THUMB_HEIGHT)
243
+ for a, b in zip(traj_xy[:-1], traj_xy[1:]):
244
+ draw.line((a[0], a[1], b[0], b[1]), fill=(31, 108, 159, 190), width=2)
245
+ draw_label(draw, (16, 14), "SLAM point cloud + pose", fill=(31, 36, 33), size=17)
246
+ return image_data_uri(canvas, "PNG")
247
+
248
+
249
+ def imu_thumb(h5) -> str:
250
+ import numpy as np
251
+ from PIL import ImageDraw
252
+
253
+ canvas = make_canvas()
254
+ draw = ImageDraw.Draw(canvas, "RGBA")
255
+ key_idx = int(h5["imu/keyframe_indices"][2450])
256
+ accel = np.array(h5["imu/accel_xyz"][max(0, key_idx - 220): key_idx + 220], dtype=np.float64)
257
+ gyro = np.array(h5["imu/gyro_xyz"][max(0, key_idx - 220): key_idx + 220], dtype=np.float64)
258
+ series = [accel[:, 0], accel[:, 1], accel[:, 2], gyro[:, 0], gyro[:, 1], gyro[:, 2]]
259
+ colors = [(31, 108, 159), (52, 101, 56), (176, 68, 62), (155, 101, 22), (46, 119, 117), (96, 109, 128)]
260
+ for row in range(4):
261
+ y = 26 + row * 33
262
+ draw.line((18, y, THUMB_WIDTH - 18, y), fill=(228, 222, 212, 180), width=1)
263
+ for values, color in zip(series, colors):
264
+ values = values[:420]
265
+ if len(values) < 2:
266
+ continue
267
+ lo, hi = np.percentile(values, [3, 97])
268
+ norm = (values - lo) / max(hi - lo, 1e-6)
269
+ pts = []
270
+ for i, v in enumerate(norm):
271
+ x = 18 + i / max(len(values) - 1, 1) * (THUMB_WIDTH - 36)
272
+ y = 138 - np.clip(v, 0, 1) * 112
273
+ pts.append((x, y))
274
+ draw.line(pts, fill=color + (200,), width=2)
275
+ draw_label(draw, (16, 12), "accel / gyro traces", fill=(31, 36, 33), size=17)
276
+ return image_data_uri(canvas, "PNG")
277
+
278
+
279
+ def hands_thumb(h5) -> str:
280
+ import numpy as np
281
+ from PIL import ImageDraw
282
+
283
+ canvas = make_canvas()
284
+ draw = ImageDraw.Draw(canvas, "RGBA")
285
+ left = np.array(h5["hand_mocap/left_joints_3d"][2450], dtype=np.float32)
286
+ right = np.array(h5["hand_mocap/right_joints_3d"][2450], dtype=np.float32)
287
+ all_points = np.concatenate([left, right], axis=0)
288
+ lo = np.percentile(all_points[:, :2], 2, axis=0)
289
+ hi = np.percentile(all_points[:, :2], 98, axis=0)
290
+ span = np.maximum(hi - lo, 1e-6)
291
+
292
+ def project(points, x_offset):
293
+ xy = (points[:, :2] - lo) / span
294
+ xy[:, 1] = 1 - xy[:, 1]
295
+ xy[:, 0] = x_offset + xy[:, 0] * 150
296
+ xy[:, 1] = 26 + xy[:, 1] * 108
297
+ return xy
298
+
299
+ for points, x_offset, color in [(left, 28, (31, 108, 159)), (right, 224, (155, 101, 22))]:
300
+ xy = project(points, x_offset)
301
+ for a, b in HAND_EDGES:
302
+ draw.line((xy[a][0], xy[a][1], xy[b][0], xy[b][1]), fill=color + (185,), width=3)
303
+ for x, y in xy:
304
+ draw.ellipse((x - 3, y - 3, x + 3, y + 3), fill=color + (230,))
305
+ draw_label(draw, (16, 12), "left / right 3D hand joints", fill=(31, 36, 33), size=17)
306
+ return image_data_uri(canvas, "PNG")
307
+
308
+
309
+ def text_thumb(h5) -> str:
310
+ from PIL import ImageDraw
311
+
312
+ raw = h5["caption"][()]
313
+ if isinstance(raw, bytes):
314
+ raw = raw.decode("utf-8", errors="replace")
315
+ data = json.loads(raw)
316
+ segment = data["segments"][0]
317
+ objects = sorted({item for values in segment.get("objects", {}).values() for item in values})[:5]
318
+ actions = [a.get("label", "") for a in segment.get("Current Action", [])][:2]
319
+ canvas = make_canvas()
320
+ draw = ImageDraw.Draw(canvas, "RGBA")
321
+ draw_label(draw, (16, 13), data["config"].get("Main Task", "caption"), fill=(31, 36, 33), size=17)
322
+ y = 46
323
+ for label in objects:
324
+ draw.rounded_rectangle((16, y, 16 + 20 + len(label) * 8, y + 24), radius=6, fill=(251, 243, 219, 230), outline=(226, 200, 144, 255))
325
+ draw_label(draw, (26, y + 5), label, fill=(83, 74, 56), size=12)
326
+ y += 30
327
+ x = 184
328
+ y = 48
329
+ for action in actions:
330
+ wrapped = action[:32] + ("..." if len(action) > 32 else "")
331
+ draw.rounded_rectangle((x, y, THUMB_WIDTH - 16, y + 36), radius=7, fill=(232, 244, 243, 230), outline=(169, 204, 202, 255))
332
+ draw_label(draw, (x + 10, y + 10), wrapped, fill=(31, 36, 33), size=12)
333
+ y += 44
334
+ return image_data_uri(canvas, "PNG")
335
+
336
+
337
+ def load_sample_thumbnails(sample_dir: Path | None) -> dict[str, str]:
338
+ if sample_dir is None or not sample_dir.exists():
339
+ return {}
340
+ hdf5_path = sample_dir / "annotation.hdf5"
341
+ required = [sample_dir / "fisheye_cam0.mp4", hdf5_path]
342
+ if not all(path.exists() for path in required):
343
+ return {}
344
+ try:
345
+ import h5py
346
+
347
+ thumbnails = {"video": video_thumb(sample_dir)}
348
+ with h5py.File(hdf5_path, "r") as h5:
349
+ thumbnails.update({
350
+ "depth": depth_thumb(h5),
351
+ "3D / SLAM": slam_thumb(h5),
352
+ "IMU": imu_thumb(h5),
353
+ "hands": hands_thumb(h5),
354
+ "text": text_thumb(h5),
355
+ })
356
+ return thumbnails
357
+ except Exception as exc:
358
+ print(f"Warning: could not build sample modality thumbnails: {exc}")
359
+ return {}
360
+
361
 
362
  def load_summary() -> dict:
363
  return json.loads(SUMMARY_PATH.read_text(encoding="utf-8"))
 
389
 
390
  def short_io(task_name: str, metrics: dict) -> str:
391
  custom = {
392
+ "timeline_action": "all modalities -> current action label",
393
+ "timeline_subtask": "all modalities -> current subtask label",
394
+ "transition_detection": "all modalities -> boundary vs steady",
395
+ "next_action": "window at t -> action at t+20 frames",
396
  "hand_trajectory_forecast": "all modalities -> future hand joints",
397
+ "contact_prediction": "non-contact modalities -> contact state",
398
+ "object_relevance": "non-caption modalities -> relevant objects",
399
+ "caption_grounding": "text query -> matching sensor window",
400
+ "cross_modal_retrieval": "motion / IMU / camera -> depth / video match",
401
+ "modality_reconstruction": "motion / IMU / camera -> depth / video vector",
402
+ "temporal_order": "two adjacent windows -> correct order",
403
+ "misalignment_detection": "motion + visual pair -> aligned or shifted",
404
  }
405
  return custom.get(task_name, metrics.get("input", ""))
406
 
407
 
408
+ def task_card(task_name: str, kind: str, metrics: dict, group: dict, index: int) -> str:
409
  label, value = metric_for(task_name, metrics)
410
  io = short_io(task_name, metrics)
 
411
  return f"""
412
+ <article class="task-card" style="--accent:{group['color']};--soft:{group['soft']};">
413
+ <div class="task-meta">
414
+ <span class="index">{index:02d}</span>
415
+ <span class="kind">{html.escape(kind)}</span>
416
+ </div>
417
+ <h3>{html.escape(task_name)}</h3>
418
+ <p>{html.escape(io)}</p>
419
+ <div class="metric">
420
+ <span>{html.escape(label)}</span>
421
+ <strong>{html.escape(value)}</strong>
422
+ </div>
423
+ </article>
424
  """
425
 
426
 
427
+ def modality_card(name: str, line_one: str, line_two: str, index: int, thumbnail: str | None) -> str:
428
+ thumb_html = ""
429
+ if thumbnail:
430
+ thumb_html = f'<div class="modality-thumb"><img src="{thumbnail}" alt=""></div>'
431
+ return f"""
432
+ <article class="modality">
433
+ {thumb_html}
434
+ <div class="modality-index">{index:02d}</div>
435
+ <h3>{html.escape(name)}</h3>
436
+ <p>{html.escape(line_one)}</p>
437
+ <span>{html.escape(line_two)}</span>
438
+ </article>
439
+ """
440
+
441
 
442
+ def build_html(summary: dict, base_image: Path | None, sample_dir: Path | None) -> str:
443
+ suite = summary["tasks"]
444
+ thumbnails = load_sample_thumbnails(sample_dir)
445
+ base_layer = ""
446
+ if base_image is not None and base_image.exists():
447
+ base_layer = f'<div class="image-background" style="background-image:url(\'{base_image.resolve().as_uri()}\');"></div>'
448
  stats = [
449
+ (f"{summary['num_frames']:,}", "frames"),
450
+ (f"{summary['num_windows']:,}", "windows"),
451
+ (f"{summary['feature_dim']:,}", "features"),
452
+ (f"{len(suite)}", "tasks"),
453
+ ("70/30", "chronological split"),
454
  ]
455
+ stats_html = "".join(
456
+ f"<div class=\"stat\"><strong>{html.escape(value)}</strong><span>{html.escape(label)}</span></div>"
457
+ for value, label in stats
458
+ )
459
+ modalities_html = "".join(
460
+ modality_card(name, line_one, line_two, index, thumbnails.get(name))
461
+ for index, (name, line_one, line_two) in enumerate(MODALITIES, start=1)
462
+ )
463
+
464
+ task_index = 1
465
+ families = []
466
+ for group in GROUPS:
467
+ cards = []
468
+ for task_name, kind in group["tasks"]:
469
+ cards.append(task_card(task_name, kind, suite[task_name], group, task_index))
470
+ task_index += 1
471
+ families.append(
472
+ f"""
473
+ <section class="family" style="--accent:{group['color']};--soft:{group['soft']};">
474
+ <div class="family-head">
475
+ <span>{html.escape(group['tone'])}</span>
476
+ <h2>{html.escape(group['name'])}</h2>
477
+ </div>
478
+ <div class="family-cards">{''.join(cards)}</div>
479
+ </section>
480
+ """
481
+ )
482
+
483
  return f"""<!doctype html>
484
  <html lang="en">
485
  <head>
486
  <meta charset="utf-8">
487
+ <meta name="viewport" content="width={CANVAS_WIDTH}, initial-scale=1">
488
  <title>Ropedia 12-Task Episode Suite Infographic</title>
489
  <style>
490
  * {{ box-sizing: border-box; }}
491
+ html,
492
  body {{
493
+ margin: 0;
494
+ width: {CANVAS_WIDTH}px;
495
+ height: {CANVAS_HEIGHT}px;
496
+ background: #fbfaf7;
497
+ }}
498
+ body {{
499
+ font-family: "Avenir Next", "SF Pro Display", ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
500
+ color: #1f2421;
501
+ text-rendering: optimizeLegibility;
502
  }}
503
  .canvas {{
504
  position: relative;
505
+ width: {CANVAS_WIDTH}px;
506
+ height: {CANVAS_HEIGHT}px;
507
  overflow: hidden;
508
+ padding: 54px 64px 44px;
509
+ background:
510
+ radial-gradient(circle at 9% 6%, rgba(31,108,159,0.13), transparent 20%),
511
+ radial-gradient(circle at 90% 9%, rgba(155,101,22,0.10), transparent 22%),
512
+ linear-gradient(90deg, rgba(68,55,38,0.035) 1px, transparent 1px),
513
+ linear-gradient(0deg, rgba(68,55,38,0.027) 1px, transparent 1px),
514
+ #fbfaf7;
515
+ background-size: auto, auto, 54px 54px, 54px 54px, auto;
516
  }}
517
+ .image-background {{
518
  position: absolute;
519
+ inset: 0;
520
+ background-position: center;
521
+ background-repeat: no-repeat;
522
+ background-size: cover;
523
+ opacity: 0.30;
524
+ filter: saturate(0.85) contrast(0.98);
525
+ }}
526
+ .content {{
527
+ position: relative;
528
+ z-index: 1;
529
+ }}
530
+ .header {{
531
+ display: grid;
532
+ grid-template-columns: 1.25fr 0.75fr;
533
+ gap: 44px;
534
+ align-items: end;
535
+ padding-bottom: 30px;
536
+ border-bottom: 1px solid #e4ded4;
537
+ }}
538
+ .kicker {{
539
+ display: inline-flex;
540
+ align-items: center;
541
+ gap: 12px;
542
+ color: #5f625d;
543
+ font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace;
544
+ font-size: 15px;
545
+ text-transform: uppercase;
546
+ letter-spacing: 0.08em;
547
+ }}
548
+ .kicker::before {{
549
+ content: "";
550
+ width: 44px;
551
+ height: 1px;
552
+ background: #1f2421;
553
  }}
554
  h1 {{
555
+ margin: 18px 0 0;
556
+ max-width: 930px;
557
+ font-size: 72px;
558
+ line-height: 0.95;
559
  letter-spacing: 0;
 
560
  }}
561
  .subtitle {{
562
+ margin: 18px 0 0;
563
+ max-width: 900px;
564
+ color: #5f625d;
565
+ font-size: 23px;
566
  line-height: 1.35;
567
  font-weight: 520;
568
  }}
569
  .stats {{
570
+ display: grid;
571
+ grid-template-columns: repeat(5, minmax(0, 1fr));
572
+ gap: 10px;
 
573
  }}
574
+ .stat {{
575
+ min-height: 78px;
576
+ padding: 14px 15px;
577
+ border: 1px solid #e4ded4;
578
+ background: rgba(255,254,253,0.76);
579
+ border-radius: 10px;
580
+ }}
581
+ .stat strong {{
582
+ display: block;
583
+ font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace;
584
+ font-size: 25px;
585
+ line-height: 1;
586
+ font-variant-numeric: tabular-nums;
587
+ }}
588
+ .stat span {{
589
+ display: block;
590
+ margin-top: 8px;
591
+ color: #6f716c;
592
+ font-size: 13px;
593
+ line-height: 1.15;
594
+ }}
595
+ .section-label {{
596
+ display: flex;
597
  align-items: center;
598
+ justify-content: space-between;
599
+ margin: 28px 0 14px;
600
+ color: #5f625d;
601
+ font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace;
602
+ font-size: 14px;
603
+ text-transform: uppercase;
604
+ letter-spacing: 0.08em;
605
+ }}
606
+ .section-label span:last-child {{
607
+ color: #7e817b;
608
+ text-transform: none;
609
+ letter-spacing: 0;
610
+ font-family: inherit;
611
+ }}
612
+ .modalities {{
613
+ display: grid;
614
+ grid-template-columns: repeat(6, minmax(0, 1fr));
615
+ gap: 14px;
616
  }}
617
  .modality {{
618
+ min-height: 204px;
619
+ padding: 11px 12px 14px;
620
+ border: 1px solid #e4ded4;
621
+ background: rgba(255,254,253,0.84);
622
+ border-radius: 12px;
623
+ }}
624
+ .modality-thumb {{
625
+ height: 86px;
626
+ overflow: hidden;
627
+ border: 1px solid #eee9e1;
628
+ border-radius: 9px;
629
+ background: #f5f1e9;
630
+ }}
631
+ .modality-thumb img {{
632
+ display: block;
633
+ width: 100%;
634
+ height: 100%;
635
+ object-fit: cover;
636
+ }}
637
+ .modality-index,
638
+ .index {{
639
+ font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace;
640
+ font-variant-numeric: tabular-nums;
641
+ }}
642
+ .modality-index {{
643
+ color: #8a8072;
644
  font-size: 12px;
645
+ margin-top: 10px;
 
 
 
646
  }}
647
+ .modality h3 {{
648
+ margin: 8px 0 0;
649
+ font-size: 22px;
 
 
650
  line-height: 1;
651
+ text-transform: uppercase;
 
652
  }}
653
+ .modality p {{
654
+ margin: 9px 0 0;
655
+ color: #4f565f;
656
+ font-size: 15px;
657
+ font-weight: 650;
658
+ }}
659
+ .modality span {{
660
+ display: block;
661
+ margin-top: 5px;
662
+ color: #7a7d77;
663
+ font-size: 13px;
664
+ }}
665
+ .shared-band {{
666
+ display: grid;
667
+ grid-template-columns: 1fr auto 1fr auto 1fr auto 1fr;
668
+ gap: 12px;
669
+ align-items: center;
670
+ margin-top: 20px;
671
+ padding: 14px;
672
+ border: 1px solid #e4ded4;
673
+ background: rgba(245,241,233,0.82);
674
+ border-radius: 12px;
675
+ }}
676
+ .step {{
677
+ min-height: 62px;
678
+ padding: 13px 15px;
679
+ background: #fffefd;
680
+ border: 1px solid #eee9e1;
681
+ border-radius: 9px;
682
+ }}
683
+ .step strong {{
684
+ display: block;
685
+ font-size: 17px;
686
+ line-height: 1.1;
687
+ }}
688
+ .step span {{
689
+ display: block;
690
+ margin-top: 5px;
691
+ color: #6f716c;
692
+ font-size: 13px;
693
+ }}
694
+ .arrow {{
695
+ color: #938a7d;
696
+ font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace;
697
+ font-size: 22px;
698
+ }}
699
+ .families {{
700
+ display: grid;
701
+ grid-template-columns: repeat(4, minmax(0, 1fr));
702
+ gap: 20px;
703
+ margin-top: 26px;
704
+ }}
705
+ .family {{
706
+ padding: 17px;
707
+ border: 1px solid color-mix(in srgb, var(--accent) 24%, #e4ded4);
708
+ background: rgba(255,254,253,0.82);
709
+ border-radius: 16px;
710
+ }}
711
+ .family-head {{
712
+ display: flex;
713
+ align-items: end;
714
+ justify-content: space-between;
715
+ gap: 16px;
716
+ min-height: 78px;
717
+ padding-bottom: 14px;
718
+ border-bottom: 1px solid color-mix(in srgb, var(--accent) 18%, #eee9e1);
719
+ }}
720
+ .family-head span {{
721
+ color: var(--accent);
722
+ font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace;
723
+ font-size: 12px;
724
+ text-transform: uppercase;
725
+ letter-spacing: 0.08em;
726
+ }}
727
+ .family-head h2 {{
728
+ margin: 0;
729
+ color: var(--accent);
730
+ font-size: 29px;
731
+ line-height: 1.02;
732
+ text-align: right;
733
+ }}
734
+ .family-cards {{
735
+ display: grid;
736
+ gap: 13px;
737
+ margin-top: 15px;
738
+ }}
739
+ .task-card {{
740
+ min-height: 168px;
741
+ padding: 17px 18px;
742
+ border: 1px solid color-mix(in srgb, var(--accent) 22%, #e4ded4);
743
+ background: linear-gradient(180deg, #fffefd, color-mix(in srgb, var(--soft) 45%, #fffefd));
744
+ border-radius: 13px;
745
+ }}
746
+ .task-meta {{
747
+ display: flex;
748
+ align-items: center;
749
+ justify-content: space-between;
750
+ gap: 12px;
751
+ }}
752
+ .index {{
753
+ color: #8a8072;
754
+ font-size: 12px;
755
  }}
756
  .kind {{
757
  display: inline-flex;
758
  align-items: center;
759
+ height: 24px;
760
+ padding: 0 9px;
761
  border-radius: 6px;
762
+ border: 1px solid color-mix(in srgb, var(--accent) 30%, #ffffff);
763
  color: var(--accent);
764
+ background: rgba(255,255,255,0.72);
765
  text-transform: uppercase;
766
+ font-size: 11px;
767
  line-height: 1;
768
+ font-weight: 830;
 
769
  }}
770
+ .task-card h3 {{
771
+ margin: 12px 0 0;
772
  color: #111827;
773
+ font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace;
774
+ font-size: 21px;
775
+ line-height: 1.18;
776
+ overflow-wrap: anywhere;
777
  }}
778
+ .task-card p {{
779
+ margin: 11px 0 0;
780
+ min-height: 39px;
781
+ color: #4f565f;
782
+ font-size: 15px;
783
  line-height: 1.28;
784
+ font-weight: 560;
785
  }}
786
  .metric {{
787
  display: inline-flex;
788
+ align-items: baseline;
789
+ gap: 10px;
790
+ margin-top: 14px;
791
+ min-height: 32px;
792
+ padding: 7px 10px;
793
+ border-radius: 8px;
794
+ border: 1px solid color-mix(in srgb, var(--accent) 32%, #ffffff);
795
+ background: rgba(255,255,255,0.82);
 
796
  }}
797
  .metric span {{
798
  color: #64748b;
799
+ font-size: 13px;
800
  font-weight: 760;
801
  }}
802
  .metric strong {{
803
  color: var(--accent);
804
+ font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace;
805
+ font-size: 20px;
806
  line-height: 1;
807
  font-weight: 860;
808
+ font-variant-numeric: tabular-nums;
809
  }}
810
  .footer {{
811
+ display: flex;
812
+ align-items: center;
813
+ justify-content: space-between;
814
+ gap: 32px;
815
+ margin-top: 22px;
816
+ padding-top: 20px;
817
+ border-top: 1px solid #e4ded4;
818
+ color: #5f625d;
819
+ font-size: 18px;
820
+ line-height: 1.35;
821
+ font-weight: 620;
822
+ }}
823
+ .footer code {{
824
+ font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace;
825
+ color: #1f2421;
826
+ background: #f5f1e9;
827
+ border: 1px solid #e4ded4;
828
+ border-radius: 7px;
829
+ padding: 6px 9px;
830
+ white-space: nowrap;
831
  }}
832
  </style>
833
  </head>
834
  <body>
835
  <main class="canvas" aria-label="Ropedia 12-task episode suite infographic">
836
+ {base_layer}
837
+ <div class="content">
838
+ <header class="header">
839
+ <div>
840
+ <div class="kicker">verified single-episode task suite</div>
841
+ <h1>Ropedia 12-task episode suite</h1>
842
+ <p class="subtitle">A clean map from synchronized multimodal windows to 12 auditable task heads, with metrics loaded from the committed summary report.</p>
843
+ </div>
844
+ <div class="stats">{stats_html}</div>
845
+ </header>
846
+
847
+ <div class="section-label">
848
+ <span>input modalities</span>
849
+ <span>all signals align to the same sliding-window contract</span>
850
+ </div>
851
+ <section class="modalities">{modalities_html}</section>
852
+
853
+ <section class="shared-band" aria-label="shared processing contract">
854
+ <div class="step"><strong>raw public episode</strong><span>videos, depth, motion, IMU, text</span></div>
855
+ <div class="arrow">-></div>
856
+ <div class="step"><strong>20-frame windows</strong><span>stride 5, chronological order</span></div>
857
+ <div class="arrow">-></div>
858
+ <div class="step"><strong>8,378-d vector</strong><span>explicit feature manifest</span></div>
859
+ <div class="arrow">-></div>
860
+ <div class="step"><strong>12 minimal heads</strong><span>softmax, ridge, logistic</span></div>
861
+ </section>
862
+
863
+ <section class="families">{''.join(families)}</section>
864
+
865
+ <footer class="footer">
866
+ <span>Single public sample episode: useful for pipeline validation and task design, not cross-episode generalization.</span>
867
+ <code>results/episode_task_suite/summary_report.json</code>
868
+ </footer>
869
  </div>
 
 
 
 
 
 
 
 
 
870
  </main>
871
  </body>
872
  </html>
 
882
  "playwright",
883
  "screenshot",
884
  "--full-page",
885
+ f"--viewport-size={CANVAS_WIDTH},{CANVAS_HEIGHT}",
886
  html_path.resolve().as_uri(),
887
  str(output_path),
888
  ],
 
893
  def main() -> int:
894
  parser = argparse.ArgumentParser()
895
  parser.add_argument("--base-image", type=Path, default=DEFAULT_BASE)
896
+ parser.add_argument("--sample-dir", type=Path, default=DEFAULT_SAMPLE_DIR)
897
  parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
898
  parser.add_argument("--html", type=Path)
899
+ parser.add_argument("--no-export", action="store_true", help="Only write the HTML used to render the image.")
900
  args = parser.parse_args()
901
 
902
  summary = load_summary()
903
+ html_text = build_html(summary, args.base_image, args.sample_dir)
904
  if args.html is None:
905
  with tempfile.NamedTemporaryFile("w", suffix=".html", encoding="utf-8", delete=False) as handle:
906
  handle.write(html_text)
 
913
  if not args.no_export:
914
  render_html(html_path, args.output)
915
  print(f"Wrote image: {args.output}")
916
+ print(f"Wrote render HTML: {html_path}")
917
  return 0
918
 
919