Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| #!/usr/bin/env python3 | |
| """ | |
| Render a polished Ropedia Xperience-10M 20-task infographic. | |
| The task names, inputs, and metrics are read from docs/data/task_suite_20.json. | |
| The output is a deterministic PNG rendered from HTML/CSS so the labels stay | |
| legible and inspectable. | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import base64 | |
| import html | |
| import io | |
| import json | |
| import os | |
| import subprocess | |
| import tempfile | |
| from pathlib import Path | |
| from task_display import task_display_name | |
| ROOT = Path(__file__).resolve().parents[1] | |
| SUMMARY_PATH = ROOT / "docs/data/task_suite_20.json" | |
| DEFAULT_BASE = ROOT / "docs/assets/task_suite_infographic_base.png" | |
| DEFAULT_SAMPLE_DIR = ROOT.parent / "data/sample/xperience-10m-sample" | |
| DROPBOX_SAMPLE_DIR = Path.home() / "Library/CloudStorage/Dropbox/Ropedia/data/sample/xperience-10m-sample" | |
| DEFAULT_OUTPUT = ROOT / "docs/assets/task_suite_infographic.png" | |
| CANVAS_WIDTH = 1800 | |
| CANVAS_HEIGHT = 5000 | |
| THUMB_WIDTH = 880 | |
| THUMB_HEIGHT = 520 | |
| MODALITY_ASSET_FALLBACKS = { | |
| "video": "video.jpg", | |
| "audio": "audio.png", | |
| "depth": "depth.jpg", | |
| "pose / SLAM": "pose_slam.png", | |
| "motion capture": "motion_capture.png", | |
| "inertial": "inertial.png", | |
| "language": "language.png", | |
| } | |
| GROUPS = [ | |
| { | |
| "name": "Action + Procedure", | |
| "tone": "teal", | |
| "color": "#9bdfff", | |
| "soft": "#071d20", | |
| "tasks": [ | |
| ("timeline_action", "supervised"), | |
| ("timeline_subtask", "supervised"), | |
| ("transition_detection", "diagnostic"), | |
| ("next_action", "supervised"), | |
| ], | |
| }, | |
| { | |
| "name": "Motion + Objects", | |
| "tone": "blue", | |
| "color": "#ccffa0", | |
| "soft": "#10210a", | |
| "tasks": [ | |
| ("hand_trajectory_forecast", "forecast"), | |
| ("contact_prediction", "supervised"), | |
| ("object_relevance", "supervised"), | |
| ("caption_grounding", "retrieval"), | |
| ], | |
| }, | |
| { | |
| "name": "Retrieval + Alignment", | |
| "tone": "amber", | |
| "color": "#7ae5c3", | |
| "soft": "#092019", | |
| "tasks": [ | |
| ("cross_modal_retrieval", "retrieval"), | |
| ("modality_reconstruction", "forecast"), | |
| ("temporal_order", "diagnostic"), | |
| ("misalignment_detection", "diagnostic"), | |
| ], | |
| }, | |
| { | |
| "name": "Long-Horizon Semantics", | |
| "tone": "green", | |
| "color": "#d8f4a5", | |
| "soft": "#1b210d", | |
| "tasks": [ | |
| ("long_horizon_next_action", "forecast"), | |
| ("next_subtask_forecast", "forecast"), | |
| ("interaction_text_prediction", "language"), | |
| ("action_object_relation", "relation"), | |
| ], | |
| }, | |
| { | |
| "name": "Future Sets + Sensors", | |
| "tone": "red", | |
| "color": "#b7ff91", | |
| "soft": "#1b210d", | |
| "tasks": [ | |
| ("object_set_forecast", "multi-label"), | |
| ("imu_to_hand_pose", "regression"), | |
| ("camera_view_sync_retrieval", "retrieval"), | |
| ("time_to_transition", "regression"), | |
| ], | |
| }, | |
| ] | |
| MODALITIES = [ | |
| ("video", "visual stream", "6 synchronized camera MP4 streams", "RGB/fisheye/stereo frame statistics"), | |
| ("audio", "acoustic stream", "audio stream embedded in MP4", "audio feature group"), | |
| ("depth", "geometry map", "depth map + confidence channel", "spatial geometry feature block"), | |
| ("pose / SLAM", "camera pose", "trajectory + sparse SLAM map", "position + orientation features"), | |
| ("motion capture", "human motion", "body + hand joint tracks", "3D mocap feature statistics"), | |
| ("inertial", "wearable sensor", "accelerometer + gyroscope", "wearable motion statistics"), | |
| ("language", "semantic annotation", "object tags + action captions", "task labels + semantic targets"), | |
| ] | |
| HAND_EDGES = [ | |
| (0, 1), (1, 2), (2, 3), (3, 4), | |
| (0, 5), (5, 6), (6, 7), (7, 8), | |
| (0, 9), (9, 10), (10, 11), (11, 12), | |
| (0, 13), (13, 14), (14, 15), (15, 16), | |
| (0, 17), (17, 18), (18, 19), (19, 20), | |
| ] | |
| def image_data_uri(image, fmt: str = "PNG", quality: int = 92) -> str: | |
| buffer = io.BytesIO() | |
| save_kwargs = {"format": fmt} | |
| if fmt.upper() in {"JPEG", "JPG"}: | |
| save_kwargs.update({"quality": quality, "optimize": True}) | |
| image.save(buffer, **save_kwargs) | |
| encoded = base64.b64encode(buffer.getvalue()).decode("ascii") | |
| mime = "jpeg" if fmt.upper() in {"JPEG", "JPG"} else "png" | |
| return f"data:image/{mime};base64,{encoded}" | |
| def make_canvas(size=(THUMB_WIDTH, THUMB_HEIGHT), color=(2, 5, 2)): | |
| from PIL import Image | |
| return Image.new("RGB", size, color) | |
| def fit_image(image, size=(THUMB_WIDTH, THUMB_HEIGHT)): | |
| from PIL import ImageOps | |
| return ImageOps.fit(image.convert("RGB"), size, method=3, centering=(0.5, 0.5)) | |
| def read_video_frame(video_path: Path, frame_index: int = 2400): | |
| import cv2 | |
| from PIL import Image | |
| cap = cv2.VideoCapture(str(video_path)) | |
| if not cap.isOpened(): | |
| raise RuntimeError(f"Could not open video: {video_path}") | |
| total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0) | |
| if total: | |
| frame_index = max(0, min(frame_index, total - 1)) | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index) | |
| ok, frame = cap.read() | |
| cap.release() | |
| if not ok: | |
| raise RuntimeError(f"Could not read frame {frame_index} from {video_path}") | |
| frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| return Image.fromarray(frame) | |
| def draw_label(draw, xy, text, fill=(244, 248, 239), size=18): | |
| from PIL import ImageFont | |
| try: | |
| font = ImageFont.truetype("/System/Library/Fonts/Supplemental/Arial Bold.ttf", size) | |
| except Exception: | |
| font = ImageFont.load_default() | |
| draw.text(xy, text, fill=fill, font=font) | |
| def video_thumb(sample_dir: Path) -> str: | |
| from PIL import Image, ImageDraw | |
| gutter = 18 | |
| panel_width = (THUMB_WIDTH - gutter) // 2 | |
| fish = fit_image(read_video_frame(sample_dir / "fisheye_cam0.mp4", 2450), (panel_width, THUMB_HEIGHT)) | |
| stereo_path = sample_dir / "stereo_left.mp4" | |
| stereo = fit_image(read_video_frame(stereo_path, 2450), (panel_width, THUMB_HEIGHT)) if stereo_path.exists() else fish.copy() | |
| canvas = make_canvas() | |
| canvas.paste(fish, (0, 0)) | |
| canvas.paste(stereo, (panel_width + gutter, 0)) | |
| draw = ImageDraw.Draw(canvas, "RGBA") | |
| draw.rounded_rectangle((panel_width - 4, 0, panel_width + gutter + 4, THUMB_HEIGHT), radius=0, fill=(2, 5, 2, 220)) | |
| draw_label(draw, (18, 20), "fisheye", fill=(255, 255, 255), size=22) | |
| draw_label(draw, (panel_width + gutter + 18, 20), "stereo", fill=(255, 255, 255), size=22) | |
| return image_data_uri(canvas, "JPEG") | |
| def colorize(values): | |
| import numpy as np | |
| stops = np.array([ | |
| [2, 5, 2], | |
| [58, 136, 102], | |
| [122, 229, 195], | |
| [167, 240, 120], | |
| [216, 244, 165], | |
| ], dtype=np.float32) | |
| x = np.clip(values, 0, 1) | |
| scaled = x * (len(stops) - 1) | |
| lo = np.floor(scaled).astype(int) | |
| hi = np.clip(lo + 1, 0, len(stops) - 1) | |
| frac = scaled - lo | |
| rgb = stops[lo] * (1 - frac[..., None]) + stops[hi] * frac[..., None] | |
| return rgb.astype("uint8") | |
| def depth_thumb(h5) -> str: | |
| import numpy as np | |
| from PIL import Image, ImageDraw | |
| gutter = 18 | |
| panel_width = (THUMB_WIDTH - gutter) // 2 | |
| frame = np.array(h5["depth/depth"][2450], dtype=np.float32) | |
| valid = np.isfinite(frame) | |
| lo, hi = np.percentile(frame[valid], [3, 97]) | |
| norm = (frame - lo) / max(hi - lo, 1e-6) | |
| rgb = colorize(norm) | |
| depth = fit_image(Image.fromarray(rgb), (panel_width, THUMB_HEIGHT)) | |
| conf = np.array(h5["depth/confidence"][2450], dtype=np.uint8) | |
| conf_img = Image.fromarray(conf, mode="L").convert("RGB") | |
| conf_img = fit_image(conf_img, (panel_width, THUMB_HEIGHT)) | |
| canvas = make_canvas() | |
| canvas.paste(depth, (0, 0)) | |
| canvas.paste(conf_img, (panel_width + gutter, 0)) | |
| draw = ImageDraw.Draw(canvas, "RGBA") | |
| draw.rounded_rectangle((0, 0, 158, 44), radius=8, fill=(2, 5, 2, 178)) | |
| draw.rounded_rectangle((panel_width + gutter, 0, panel_width + gutter + 220, 44), radius=8, fill=(2, 5, 2, 178)) | |
| draw_label(draw, (14, 11), "depth", fill=(255, 255, 255), size=22) | |
| draw_label(draw, (panel_width + gutter + 14, 11), "confidence", fill=(255, 255, 255), size=22) | |
| return image_data_uri(canvas, "JPEG") | |
| def audio_thumb(sample_dir: Path) -> str: | |
| import numpy as np | |
| from PIL import ImageDraw | |
| canvas = make_canvas() | |
| draw = ImageDraw.Draw(canvas, "RGBA") | |
| try: | |
| raw = subprocess.run( | |
| [ | |
| "ffmpeg", | |
| "-v", | |
| "error", | |
| "-ss", | |
| "45", | |
| "-t", | |
| "6", | |
| "-i", | |
| str(sample_dir / "fisheye_cam0.mp4"), | |
| "-ac", | |
| "1", | |
| "-ar", | |
| "16000", | |
| "-f", | |
| "s16le", | |
| "pipe:1", | |
| ], | |
| check=True, | |
| stdout=subprocess.PIPE, | |
| ).stdout | |
| samples = np.frombuffer(raw, dtype=np.int16).astype(np.float32) | |
| if len(samples) == 0: | |
| raise RuntimeError("empty audio stream") | |
| samples = samples / max(float(np.max(np.abs(samples))), 1.0) | |
| bins = 220 | |
| trimmed = samples[: bins * max(1, len(samples) // bins)] | |
| chunks = np.array_split(trimmed, bins) | |
| rms = np.array([np.sqrt(np.mean(chunk * chunk)) if len(chunk) else 0.0 for chunk in chunks]) | |
| waveform = np.array([float(np.mean(chunk)) if len(chunk) else 0.0 for chunk in chunks]) | |
| baseline = THUMB_HEIGHT - 72 | |
| for i, value in enumerate(rms): | |
| x = 18 + i / max(bins - 1, 1) * (THUMB_WIDTH - 36) | |
| h = 14 + np.clip(value * 158, 0, 158) | |
| draw.line((x, baseline, x, baseline - h), fill=(167, 240, 120, 170), width=2) | |
| points = [] | |
| for i, value in enumerate(waveform): | |
| x = 18 + i / max(bins - 1, 1) * (THUMB_WIDTH - 36) | |
| y = 126 - np.clip(value, -1, 1) * 82 | |
| points.append((x, y)) | |
| draw.line(points, fill=(122, 229, 195, 220), width=2) | |
| except Exception: | |
| for i in range(48): | |
| x = 22 + i * 8 | |
| h = 16 + (i % 7) * 7 | |
| draw.rounded_rectangle((x, THUMB_HEIGHT - 72 - h, x + 4, THUMB_HEIGHT - 72), radius=2, fill=(167, 240, 120, 170)) | |
| draw_label(draw, (18, 18), "Audio waveform", fill=(244, 248, 239), size=22) | |
| return image_data_uri(canvas, "PNG") | |
| def normalize_points(points, width, height, pad=16): | |
| import numpy as np | |
| xy = points[:, :2].copy() | |
| lo = np.percentile(xy, 2, axis=0) | |
| hi = np.percentile(xy, 98, axis=0) | |
| span = np.maximum(hi - lo, 1e-6) | |
| norm = (xy - lo) / span | |
| norm = np.clip(norm, 0, 1) | |
| norm[:, 1] = 1 - norm[:, 1] | |
| out = np.empty_like(norm) | |
| out[:, 0] = pad + norm[:, 0] * (width - pad * 2) | |
| out[:, 1] = pad + norm[:, 1] * (height - pad * 2) | |
| return out | |
| def slam_thumb(h5) -> str: | |
| import numpy as np | |
| from PIL import ImageDraw | |
| canvas = make_canvas() | |
| draw = ImageDraw.Draw(canvas, "RGBA") | |
| points = np.array(h5["slam/point_cloud"], dtype=np.float64) | |
| points = points[np.isfinite(points).all(axis=1)] | |
| if len(points) > 2600: | |
| points = points[np.linspace(0, len(points) - 1, 2600).astype(int)] | |
| xy = normalize_points(points[:, [0, 2, 1]], THUMB_WIDTH, THUMB_HEIGHT) | |
| z = points[:, 1] | |
| z_norm = (z - np.percentile(z, 2)) / max(np.percentile(z, 98) - np.percentile(z, 2), 1e-6) | |
| colors = colorize(z_norm) | |
| for (x, y), color in zip(xy, colors): | |
| draw.ellipse((x - 1.2, y - 1.2, x + 1.2, y + 1.2), fill=tuple(color.tolist()) + (165,)) | |
| traj = np.array(h5["slam/trans_xyz"][:2450:36], dtype=np.float64) | |
| traj_xy = normalize_points(traj[:, [0, 2, 1]], THUMB_WIDTH, THUMB_HEIGHT) | |
| for a, b in zip(traj_xy[:-1], traj_xy[1:]): | |
| draw.line((a[0], a[1], b[0], b[1]), fill=(167, 240, 120, 205), width=2) | |
| draw_label(draw, (18, 18), "camera pose + SLAM map", fill=(244, 248, 239), size=22) | |
| return image_data_uri(canvas, "PNG") | |
| def imu_thumb(h5) -> str: | |
| import numpy as np | |
| from PIL import ImageDraw | |
| canvas = make_canvas() | |
| draw = ImageDraw.Draw(canvas, "RGBA") | |
| key_idx = int(h5["imu/keyframe_indices"][2450]) | |
| accel = np.array(h5["imu/accel_xyz"][max(0, key_idx - 220): key_idx + 220], dtype=np.float64) | |
| gyro = np.array(h5["imu/gyro_xyz"][max(0, key_idx - 220): key_idx + 220], dtype=np.float64) | |
| series = [accel[:, 0], accel[:, 1], accel[:, 2], gyro[:, 0], gyro[:, 1], gyro[:, 2]] | |
| colors = [(167, 240, 120), (122, 229, 195), (155, 223, 255), (216, 244, 165), (244, 248, 239), (165, 175, 162)] | |
| for row in range(6): | |
| y = 68 + row * 44 | |
| draw.line((18, y, THUMB_WIDTH - 18, y), fill=(167, 240, 120, 48), width=1) | |
| for values, color in zip(series, colors): | |
| values = values[:420] | |
| if len(values) < 2: | |
| continue | |
| lo, hi = np.percentile(values, [3, 97]) | |
| norm = (values - lo) / max(hi - lo, 1e-6) | |
| pts = [] | |
| for i, v in enumerate(norm): | |
| x = 18 + i / max(len(values) - 1, 1) * (THUMB_WIDTH - 36) | |
| y = THUMB_HEIGHT - 48 - np.clip(v, 0, 1) * (THUMB_HEIGHT - 116) | |
| pts.append((x, y)) | |
| draw.line(pts, fill=color + (200,), width=2) | |
| draw_label(draw, (18, 18), "inertial accel / gyro", fill=(244, 248, 239), size=22) | |
| return image_data_uri(canvas, "PNG") | |
| def mocap_thumb(h5) -> str: | |
| import numpy as np | |
| from PIL import ImageDraw | |
| canvas = make_canvas() | |
| draw = ImageDraw.Draw(canvas, "RGBA") | |
| body = np.array(h5["full_body_mocap/keypoints"][2450], dtype=np.float32) | |
| left = np.array(h5["hand_mocap/left_joints_3d"][2450], dtype=np.float32) | |
| right = np.array(h5["hand_mocap/right_joints_3d"][2450], dtype=np.float32) | |
| all_points = np.concatenate([body, left, right], axis=0) | |
| lo = np.percentile(all_points[:, :2], 2, axis=0) | |
| hi = np.percentile(all_points[:, :2], 98, axis=0) | |
| span = np.maximum(hi - lo, 1e-6) | |
| def project(points, x_offset, width): | |
| xy = (points[:, :2] - lo) / span | |
| xy[:, 1] = 1 - xy[:, 1] | |
| xy[:, 0] = x_offset + xy[:, 0] * width | |
| xy[:, 1] = 72 + xy[:, 1] * (THUMB_HEIGHT - 136) | |
| return xy | |
| body_xy = project(body, 28, 270) | |
| for x, y in body_xy: | |
| draw.ellipse((x - 2.4, y - 2.4, x + 2.4, y + 2.4), fill=(167, 240, 120, 185)) | |
| for a, b in zip(body_xy[:-1], body_xy[1:]): | |
| draw.line((a[0], a[1], b[0], b[1]), fill=(167, 240, 120, 82), width=1) | |
| for points, x_offset, color in [(left, 392, (122, 229, 195)), (right, 562, (216, 244, 165))]: | |
| xy = project(points, x_offset, 126) | |
| for a, b in HAND_EDGES: | |
| draw.line((xy[a][0], xy[a][1], xy[b][0], xy[b][1]), fill=color + (180,), width=2) | |
| for x, y in xy: | |
| draw.ellipse((x - 2.4, y - 2.4, x + 2.4, y + 2.4), fill=color + (220,)) | |
| draw_label(draw, (18, 18), "body + hand mocap", fill=(244, 248, 239), size=22) | |
| return image_data_uri(canvas, "PNG") | |
| def text_thumb(h5) -> str: | |
| from PIL import ImageDraw | |
| width = THUMB_WIDTH | |
| raw = h5["caption"][()] | |
| if isinstance(raw, bytes): | |
| raw = raw.decode("utf-8", errors="replace") | |
| data = json.loads(raw) | |
| segment = data["segments"][0] | |
| objects = sorted({item for values in segment.get("objects", {}).values() for item in values})[:5] | |
| actions = [a.get("label", "") for a in segment.get("Current Action", [])][:2] | |
| canvas = make_canvas((width, THUMB_HEIGHT)) | |
| draw = ImageDraw.Draw(canvas, "RGBA") | |
| draw_label(draw, (28, 24), "language annotation", fill=(244, 248, 239), size=28) | |
| y = 82 | |
| for label in objects: | |
| chip_width = 52 + len(label) * 16 | |
| draw.rounded_rectangle((28, y, 28 + chip_width, y + 38), radius=8, fill=(7, 18, 7, 235), outline=(167, 240, 120, 170), width=2) | |
| draw_label(draw, (44, y + 8), label, fill=(244, 248, 239), size=18) | |
| y += 47 | |
| x = 340 | |
| y = 92 | |
| for action in actions: | |
| wrapped = action[:66] + ("..." if len(action) > 66 else "") | |
| draw.rounded_rectangle((x, y, width - 28, y + 54), radius=9, fill=(7, 18, 7, 235), outline=(122, 229, 195, 180), width=2) | |
| draw_label(draw, (x + 22, y + 15), wrapped, fill=(244, 248, 239), size=20) | |
| y += 68 | |
| return image_data_uri(canvas, "PNG") | |
| def load_sample_thumbnails(sample_dir: Path | None) -> dict[str, str]: | |
| if sample_dir is None or not sample_dir.exists(): | |
| return {} | |
| hdf5_path = sample_dir / "annotation.hdf5" | |
| required = [sample_dir / "fisheye_cam0.mp4", hdf5_path] | |
| if not all(path.exists() for path in required): | |
| return {} | |
| try: | |
| import h5py | |
| thumbnails = {"video": video_thumb(sample_dir), "audio": audio_thumb(sample_dir)} | |
| with h5py.File(hdf5_path, "r") as h5: | |
| thumbnails.update({ | |
| "depth": depth_thumb(h5), | |
| "pose / SLAM": slam_thumb(h5), | |
| "motion capture": mocap_thumb(h5), | |
| "inertial": imu_thumb(h5), | |
| "language": text_thumb(h5), | |
| }) | |
| return thumbnails | |
| except Exception as exc: | |
| print(f"Warning: could not build sample modality thumbnails: {exc}") | |
| return {} | |
| def valid_sample_dir(sample_dir: Path | None) -> bool: | |
| if sample_dir is None: | |
| return False | |
| return (sample_dir / "annotation.hdf5").exists() and (sample_dir / "fisheye_cam0.mp4").exists() | |
| def resolve_sample_dir(sample_dir: Path | None) -> Path | None: | |
| candidates: list[Path] = [] | |
| env_sample_dir = os.environ.get("XPERIENCE10M_SAMPLE_DIR") | |
| if env_sample_dir: | |
| candidates.append(Path(env_sample_dir).expanduser()) | |
| workspace = os.environ.get("WORKSPACE") | |
| if workspace: | |
| candidates.append(Path(workspace).expanduser() / "data/sample/xperience-10m-sample") | |
| if sample_dir is not None: | |
| candidates.append(sample_dir) | |
| candidates.extend([ | |
| DEFAULT_SAMPLE_DIR, | |
| DROPBOX_SAMPLE_DIR, | |
| ]) | |
| for candidate in candidates: | |
| if valid_sample_dir(candidate): | |
| return candidate | |
| return sample_dir | |
| def load_summary() -> dict: | |
| return json.loads(SUMMARY_PATH.read_text(encoding="utf-8")) | |
| def fmt(value: float) -> str: | |
| return f"{float(value):.4f}" | |
| def metric_for(task_name: str, metrics: dict) -> tuple[str, str]: | |
| if "minimal_primary_metric" in metrics: | |
| label = metrics.get("metric_name") or metrics.get("metric_key") or "score" | |
| value = metrics.get("minimal_primary_metric") | |
| return str(label), "n/a" if value is None else fmt(value) | |
| if task_name == "hand_trajectory_forecast": | |
| return "MPJPE", fmt(metrics["mpjpe"]) | |
| if task_name == "cross_modal_retrieval": | |
| return "top-5", fmt(metrics["top5_accuracy"]) | |
| if task_name == "caption_grounding": | |
| return "MRR", fmt(metrics["mrr"]) | |
| if task_name == "object_relevance": | |
| return "micro-F1", fmt(metrics["micro_f1"]) | |
| if task_name == "modality_reconstruction": | |
| return "R2", fmt(metrics["r2"]) | |
| if task_name in {"temporal_order", "misalignment_detection"}: | |
| return "F1", fmt(metrics["f1"]) | |
| if "macro_f1" in metrics: | |
| return "macro-F1", fmt(metrics["macro_f1"]) | |
| if "accuracy" in metrics: | |
| return "accuracy", fmt(metrics["accuracy"]) | |
| raise KeyError(f"No main metric configured for {task_name}") | |
| def short_io(task_name: str, metrics: dict) -> str: | |
| if metrics.get("input_short") or metrics.get("output_short"): | |
| left = metrics.get("input_short") or "input" | |
| right = metrics.get("output_short") or "target" | |
| return f"{left} -> {right}" | |
| custom = { | |
| "timeline_action": "all featurized modalities -> action label", | |
| "timeline_subtask": "all featurized modalities -> subtask label", | |
| "transition_detection": "all featurized modalities -> boundary vs steady", | |
| "next_action": "window at t -> action at t+20 frames", | |
| "hand_trajectory_forecast": "all featurized modalities -> future hand joints", | |
| "contact_prediction": "non-contact modalities -> contact state", | |
| "object_relevance": "non-caption feature blocks -> relevant objects", | |
| "caption_grounding": "text query -> matching sensor window", | |
| "cross_modal_retrieval": "motion / IMU / camera -> depth / video match", | |
| "modality_reconstruction": "motion / IMU / camera -> depth / video vector", | |
| "temporal_order": "two adjacent windows -> correct order", | |
| "misalignment_detection": "motion + visual pair -> aligned or shifted", | |
| } | |
| return custom.get(task_name, metrics.get("input", "")) | |
| def task_card(task_name: str, kind: str, metrics: dict, group: dict, index: int, neural_metrics: dict | None = None) -> str: | |
| label, value = metric_for(task_name, metrics) | |
| neural_html = "" | |
| if "neural_primary_metric" in metrics and metrics.get("neural_primary_metric") is not None: | |
| neural_label = metrics.get("metric_name") or metrics.get("metric_key") or "score" | |
| neural_value = fmt(metrics["neural_primary_metric"]) | |
| neural_html = f""" | |
| <div class="metric neural"> | |
| <span>NN {html.escape(str(neural_label))}</span> | |
| <strong>{html.escape(neural_value)}</strong> | |
| </div> | |
| """ | |
| elif neural_metrics and "error" not in neural_metrics: | |
| neural_label, neural_value = metric_for(task_name, neural_metrics) | |
| neural_html = f""" | |
| <div class="metric neural"> | |
| <span>NN {html.escape(neural_label)}</span> | |
| <strong>{html.escape(neural_value)}</strong> | |
| </div> | |
| """ | |
| io = short_io(task_name, metrics) | |
| return f""" | |
| <article class="task-card" style="--accent:{group['color']};--soft:{group['soft']};"> | |
| <div class="task-meta"> | |
| <span class="index">{index:02d}</span> | |
| <span class="kind">{html.escape(kind)}</span> | |
| </div> | |
| <h3>{html.escape(metrics.get("task_display_name") or task_display_name(task_name))}</h3> | |
| <p>{html.escape(io)}</p> | |
| <div class="metric"> | |
| <span>min {html.escape(label)}</span> | |
| <strong>{html.escape(value)}</strong> | |
| </div> | |
| {neural_html} | |
| </article> | |
| """ | |
| def modality_card(name: str, modality_type: str, sample_text: str, feature_text: str, index: int, thumbnail: str | None) -> str: | |
| thumb_html = "" | |
| if thumbnail: | |
| thumb_html = f'<div class="modality-thumb"><img src="{thumbnail}" alt=""></div>' | |
| return f""" | |
| <article class="modality"> | |
| <div class="modality-heading"> | |
| <div> | |
| <span class="modality-index">{index:02d}</span> | |
| <h3>{html.escape(name)}</h3> | |
| </div> | |
| <span class="modality-type">{html.escape(modality_type)}</span> | |
| </div> | |
| {thumb_html} | |
| <div class="modality-copy"> | |
| <div class="modality-row"> | |
| <span>Sample contains</span> | |
| <p>{html.escape(sample_text)}</p> | |
| </div> | |
| <div class="modality-row"> | |
| <span>Current baseline use</span> | |
| <p>{html.escape(feature_text)}</p> | |
| </div> | |
| </div> | |
| </article> | |
| """ | |
| def build_html(summary: dict, base_image: Path | None, sample_dir: Path | None) -> str: | |
| if isinstance(summary.get("tasks"), list): | |
| task_rows = summary["tasks"] | |
| suite = {task["task_id"]: task for task in task_rows} | |
| neural_suite = {} | |
| dataset_scope = summary.get("dataset_scope", {}) | |
| num_frames = int(dataset_scope.get("num_frames", 0)) | |
| num_windows = int(dataset_scope.get("num_windows", 0)) | |
| feature_dim = int(dataset_scope.get("feature_dim", 0)) | |
| window_frames = int(dataset_scope.get("window_frames", 20)) | |
| stride_frames = int(dataset_scope.get("stride_frames", 5)) | |
| task_count = int(summary.get("task_count", len(suite))) | |
| scored_records = 180 | |
| else: | |
| suite = summary["tasks"] | |
| neural_suite = summary.get("neural_tasks", {}) | |
| num_frames = int(summary["num_frames"]) | |
| num_windows = int(summary["num_windows"]) | |
| feature_dim = int(summary["feature_dim"]) | |
| window_frames = int(summary.get("window_frames", 20)) | |
| stride_frames = int(summary.get("stride_frames", 5)) | |
| task_count = len(suite) | |
| scored_records = len(suite) + len(neural_suite) | |
| thumbnails = load_sample_thumbnails(sample_dir) | |
| for modality_name, asset_name in MODALITY_ASSET_FALLBACKS.items(): | |
| if thumbnails.get(modality_name): | |
| continue | |
| fallback = ROOT / "docs/assets/modalities" / asset_name | |
| if fallback.exists(): | |
| thumbnails[modality_name] = fallback.resolve().as_uri() | |
| base_layer = "" | |
| if base_image is not None and base_image.exists(): | |
| base_layer = f'<div class="image-background" style="background-image:url(\'{base_image.resolve().as_uri()}\');"></div>' | |
| stats = [ | |
| (f"{num_frames:,}", "frames"), | |
| (f"{num_windows:,}", "windows"), | |
| (f"{feature_dim:,}", "features"), | |
| (f"{task_count}", "unified tasks"), | |
| (f"{scored_records}", "method-task results"), | |
| ("70/30", "chronological split"), | |
| ] | |
| stats_html = "".join( | |
| f"<div class=\"stat\"><strong>{html.escape(value)}</strong><span>{html.escape(label)}</span></div>" | |
| for value, label in stats | |
| ) | |
| modalities_html = "".join( | |
| modality_card(name, modality_type, sample_text, feature_text, index, thumbnails.get(name)) | |
| for index, (name, modality_type, sample_text, feature_text) in enumerate(MODALITIES, start=1) | |
| ) | |
| task_index = 1 | |
| families = [] | |
| for group in GROUPS: | |
| cards = [] | |
| for task_name, kind in group["tasks"]: | |
| cards.append(task_card(task_name, kind, suite[task_name], group, task_index, neural_suite.get(task_name))) | |
| task_index += 1 | |
| families.append( | |
| f""" | |
| <section class="family" style="--accent:{group['color']};--soft:{group['soft']};"> | |
| <div class="family-head"> | |
| <span>{html.escape(group['tone'])}</span> | |
| <h2>{html.escape(group['name'])}</h2> | |
| </div> | |
| <div class="family-cards">{''.join(cards)}</div> | |
| </section> | |
| """ | |
| ) | |
| return f"""<!doctype html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="utf-8"> | |
| <meta name="viewport" content="width={CANVAS_WIDTH}, initial-scale=1"> | |
| <title>Xperience-10M 20-Task Episode Suite Infographic</title> | |
| <style> | |
| * {{ box-sizing: border-box; }} | |
| html, | |
| body {{ | |
| margin: 0; | |
| width: {CANVAS_WIDTH}px; | |
| height: {CANVAS_HEIGHT}px; | |
| background: #020502; | |
| }} | |
| body {{ | |
| font-family: "Inter Tight", "Space Grotesk", ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; | |
| color: #f4f8ef; | |
| text-rendering: optimizeLegibility; | |
| }} | |
| .canvas {{ | |
| position: relative; | |
| width: {CANVAS_WIDTH}px; | |
| height: {CANVAS_HEIGHT}px; | |
| overflow: hidden; | |
| padding: 54px 64px 44px; | |
| background: | |
| radial-gradient(circle at 72% 10%, rgba(167,240,120,0.18), transparent 24%), | |
| radial-gradient(circle at 20% 28%, rgba(255,255,255,0.10) 1px, transparent 2px), | |
| #020502; | |
| background-size: auto, 18px 18px, auto; | |
| }} | |
| .image-background {{ | |
| position: absolute; | |
| inset: 0; | |
| background-position: center; | |
| background-repeat: no-repeat; | |
| background-size: cover; | |
| opacity: 0.36; | |
| filter: saturate(1.05) contrast(1.08) brightness(0.42); | |
| }} | |
| .content {{ | |
| position: relative; | |
| z-index: 1; | |
| }} | |
| .header {{ | |
| display: grid; | |
| grid-template-columns: 1.25fr 0.75fr; | |
| gap: 44px; | |
| align-items: end; | |
| padding-bottom: 30px; | |
| border-bottom: 1px solid rgba(167,240,120,0.20); | |
| }} | |
| .kicker {{ | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 12px; | |
| color: #ccffa0; | |
| font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace; | |
| font-size: 15px; | |
| text-transform: uppercase; | |
| letter-spacing: 0.08em; | |
| }} | |
| .kicker::before {{ | |
| content: ""; | |
| width: 44px; | |
| height: 1px; | |
| background: #ccffa0; | |
| }} | |
| h1 {{ | |
| margin: 18px 0 0; | |
| max-width: 930px; | |
| font-size: 72px; | |
| line-height: 0.95; | |
| letter-spacing: 0; | |
| }} | |
| .subtitle {{ | |
| margin: 18px 0 0; | |
| max-width: 900px; | |
| color: #dce8d7; | |
| font-size: 23px; | |
| line-height: 1.35; | |
| font-weight: 520; | |
| }} | |
| .stats {{ | |
| display: grid; | |
| grid-template-columns: repeat(5, minmax(0, 1fr)); | |
| gap: 10px; | |
| }} | |
| .stat {{ | |
| min-height: 78px; | |
| padding: 14px 15px; | |
| border: 1px solid rgba(167,240,120,0.24); | |
| background: rgba(7,18,7,0.80); | |
| border-radius: 8px; | |
| }} | |
| .stat strong {{ | |
| display: block; | |
| font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace; | |
| font-size: 25px; | |
| line-height: 1; | |
| font-variant-numeric: tabular-nums; | |
| }} | |
| .stat span {{ | |
| display: block; | |
| margin-top: 8px; | |
| color: #a5afa2; | |
| font-size: 13px; | |
| line-height: 1.15; | |
| }} | |
| .section-label {{ | |
| display: grid; | |
| grid-template-columns: 1fr; | |
| gap: 12px; | |
| align-items: start; | |
| margin: 44px 0 24px; | |
| color: #a5afa2; | |
| font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace; | |
| font-size: 22px; | |
| text-transform: uppercase; | |
| letter-spacing: 0.08em; | |
| }} | |
| .section-label span:last-child {{ | |
| max-width: 1400px; | |
| color: #dce8d7; | |
| text-transform: none; | |
| letter-spacing: 0; | |
| font-family: inherit; | |
| font-size: 21px; | |
| line-height: 1.42; | |
| text-align: left; | |
| }} | |
| .modalities {{ | |
| display: grid; | |
| grid-template-columns: repeat(2, minmax(0, 1fr)); | |
| gap: 24px; | |
| }} | |
| .modality {{ | |
| min-height: 254px; | |
| padding: 22px; | |
| border: 1px solid rgba(167,240,120,0.22); | |
| background: rgba(7,18,7,0.84); | |
| border-radius: 8px; | |
| display: grid; | |
| grid-template-columns: 310px minmax(0, 1fr); | |
| grid-template-areas: | |
| "thumb heading" | |
| "thumb copy"; | |
| column-gap: 24px; | |
| row-gap: 16px; | |
| align-items: start; | |
| }} | |
| .modality-thumb {{ | |
| grid-area: thumb; | |
| height: 210px; | |
| overflow: hidden; | |
| border: 1px solid rgba(167,240,120,0.16); | |
| border-radius: 8px; | |
| background: #020502; | |
| }} | |
| .modality-thumb img {{ | |
| display: block; | |
| width: 100%; | |
| height: 100%; | |
| object-fit: cover; | |
| }} | |
| .modality-index, | |
| .index {{ | |
| font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace; | |
| font-variant-numeric: tabular-nums; | |
| }} | |
| .modality-heading {{ | |
| grid-area: heading; | |
| display: flex; | |
| align-items: start; | |
| justify-content: space-between; | |
| gap: 16px; | |
| padding-bottom: 14px; | |
| border-bottom: 1px solid rgba(167,240,120,0.16); | |
| }} | |
| .modality-index {{ | |
| color: #a5afa2; | |
| font-size: 18px; | |
| }} | |
| .modality-type {{ | |
| color: #ccffa0; | |
| font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace; | |
| font-size: 13px; | |
| line-height: 1.15; | |
| text-transform: uppercase; | |
| letter-spacing: 0.08em; | |
| text-align: right; | |
| max-width: 210px; | |
| padding-top: 4px; | |
| }} | |
| .modality h3 {{ | |
| margin: 8px 0 0; | |
| font-size: 36px; | |
| line-height: 1.02; | |
| text-transform: uppercase; | |
| }} | |
| .modality-copy {{ | |
| grid-area: copy; | |
| display: grid; | |
| grid-template-columns: repeat(2, minmax(0, 1fr)); | |
| gap: 12px; | |
| }} | |
| .modality-row {{ | |
| display: grid; | |
| grid-template-columns: 1fr; | |
| gap: 8px; | |
| align-items: baseline; | |
| padding: 14px 16px; | |
| border: 1px solid rgba(167,240,120,0.16); | |
| border-radius: 8px; | |
| background: rgba(2,5,2,0.40); | |
| }} | |
| .modality-row span {{ | |
| display: block; | |
| color: #a5afa2; | |
| font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace; | |
| font-size: 12px; | |
| letter-spacing: 0.06em; | |
| line-height: 1.25; | |
| text-transform: uppercase; | |
| }} | |
| .modality-row p {{ | |
| margin: 0; | |
| color: #dce8d7; | |
| font-size: 21px; | |
| font-weight: 650; | |
| line-height: 1.2; | |
| }} | |
| .shared-band {{ | |
| display: grid; | |
| grid-template-columns: 1fr auto 1fr auto 1fr auto 1fr; | |
| gap: 12px; | |
| align-items: center; | |
| margin-top: 30px; | |
| padding: 14px; | |
| border: 1px solid rgba(167,240,120,0.22); | |
| background: rgba(7,18,7,0.72); | |
| border-radius: 8px; | |
| }} | |
| .step {{ | |
| min-height: 62px; | |
| padding: 13px 15px; | |
| background: rgba(7,18,7,0.92); | |
| border: 1px solid rgba(167,240,120,0.16); | |
| border-radius: 8px; | |
| }} | |
| .step strong {{ | |
| display: block; | |
| font-size: 17px; | |
| line-height: 1.1; | |
| }} | |
| .step span {{ | |
| display: block; | |
| margin-top: 5px; | |
| color: #a5afa2; | |
| font-size: 13px; | |
| }} | |
| .arrow {{ | |
| color: #ccffa0; | |
| font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace; | |
| font-size: 22px; | |
| }} | |
| .families {{ | |
| display: grid; | |
| grid-template-columns: repeat(2, minmax(0, 1fr)); | |
| gap: 24px; | |
| margin-top: 30px; | |
| }} | |
| .family {{ | |
| padding: 20px; | |
| border: 1px solid color-mix(in srgb, var(--accent) 28%, #020502); | |
| background: rgba(7,18,7,0.82); | |
| border-radius: 8px; | |
| }} | |
| .family-head {{ | |
| display: flex; | |
| align-items: end; | |
| justify-content: space-between; | |
| gap: 16px; | |
| min-height: 66px; | |
| padding-bottom: 16px; | |
| border-bottom: 1px solid color-mix(in srgb, var(--accent) 24%, #020502); | |
| }} | |
| .family-head span {{ | |
| color: var(--accent); | |
| font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace; | |
| font-size: 12px; | |
| text-transform: uppercase; | |
| letter-spacing: 0.08em; | |
| }} | |
| .family-head h2 {{ | |
| margin: 0; | |
| color: var(--accent); | |
| font-size: 32px; | |
| line-height: 1.02; | |
| text-align: right; | |
| }} | |
| .family-cards {{ | |
| display: grid; | |
| gap: 16px; | |
| margin-top: 18px; | |
| }} | |
| .task-card {{ | |
| min-height: 178px; | |
| padding: 18px 20px; | |
| border: 1px solid color-mix(in srgb, var(--accent) 28%, #020502); | |
| background: linear-gradient(180deg, rgba(10,24,10,0.96), color-mix(in srgb, var(--soft) 24%, #071207)); | |
| border-radius: 8px; | |
| }} | |
| .task-meta {{ | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| gap: 12px; | |
| }} | |
| .index {{ | |
| color: #a5afa2; | |
| font-size: 12px; | |
| }} | |
| .kind {{ | |
| display: inline-flex; | |
| align-items: center; | |
| height: 24px; | |
| padding: 0 9px; | |
| border-radius: 6px; | |
| border: 1px solid color-mix(in srgb, var(--accent) 40%, #020502); | |
| color: var(--accent); | |
| background: rgba(2,5,2,0.48); | |
| text-transform: uppercase; | |
| font-size: 11px; | |
| line-height: 1; | |
| font-weight: 830; | |
| }} | |
| .task-card h3 {{ | |
| margin: 12px 0 0; | |
| color: #f4f8ef; | |
| font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace; | |
| font-size: 21px; | |
| line-height: 1.18; | |
| overflow-wrap: anywhere; | |
| }} | |
| .task-card p {{ | |
| margin: 11px 0 0; | |
| min-height: 39px; | |
| color: #dce8d7; | |
| font-size: 15px; | |
| line-height: 1.28; | |
| font-weight: 560; | |
| }} | |
| .metric {{ | |
| display: inline-flex; | |
| align-items: baseline; | |
| gap: 10px; | |
| margin-top: 10px; | |
| min-height: 32px; | |
| padding: 7px 10px; | |
| border-radius: 8px; | |
| border: 1px solid color-mix(in srgb, var(--accent) 42%, #020502); | |
| background: rgba(2,5,2,0.42); | |
| }} | |
| .metric.neural {{ | |
| margin-left: 8px; | |
| border-color: rgba(255,255,255,0.20); | |
| background: rgba(255,255,255,0.08); | |
| }} | |
| .metric span {{ | |
| color: #a5afa2; | |
| font-size: 13px; | |
| font-weight: 760; | |
| }} | |
| .metric strong {{ | |
| color: var(--accent); | |
| font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace; | |
| font-size: 20px; | |
| line-height: 1; | |
| font-weight: 860; | |
| font-variant-numeric: tabular-nums; | |
| }} | |
| .footer {{ | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| gap: 32px; | |
| margin-top: 22px; | |
| padding-top: 20px; | |
| border-top: 1px solid rgba(167,240,120,0.20); | |
| color: #a5afa2; | |
| font-size: 18px; | |
| line-height: 1.35; | |
| font-weight: 620; | |
| }} | |
| .footer code {{ | |
| font-family: "SF Mono", "JetBrains Mono", ui-monospace, monospace; | |
| color: #020502; | |
| background: #ccffa0; | |
| border: 1px solid #ccffa0; | |
| border-radius: 7px; | |
| padding: 6px 9px; | |
| white-space: nowrap; | |
| }} | |
| </style> | |
| </head> | |
| <body> | |
| <main class="canvas" aria-label="Ropedia Xperience-10M unified 20-task infographic"> | |
| {base_layer} | |
| <div class="content"> | |
| <header class="header"> | |
| <div> | |
| <div class="kicker">verified unified 20-task release</div> | |
| <h1>Ropedia Xperience-10M task map</h1> | |
| <p class="subtitle">A clean map from synchronized multimodal windows to 20 task contracts, comparing minimal heads, neural MLP heads, and the public 180-result matrix.</p> | |
| </div> | |
| <div class="stats">{stats_html}</div> | |
| </header> | |
| <section class="shared-band" aria-label="shared processing contract"> | |
| <div class="step"><strong>raw public episode</strong><span>video, audio, depth, pose, mocap, IMU, language</span></div> | |
| <div class="arrow">-></div> | |
| <div class="step"><strong>{window_frames}-frame windows</strong><span>stride {stride_frames}, chronological order</span></div> | |
| <div class="arrow">-></div> | |
| <div class="step"><strong>{feature_dim:,}-d vector</strong><span>current manifest includes audio features</span></div> | |
| <div class="arrow">-></div> | |
| <div class="step"><strong>20 task contracts</strong><span>minimal/NN baselines plus Qwen3-Omni/Cosmos3 diagnostics</span></div> | |
| </section> | |
| <div class="section-label"> | |
| <span>20 task contracts</span> | |
| <span>Every task below is part of one unified public-sample suite with shared window/split discipline and source-linked scores in the 180-result matrix.</span> | |
| </div> | |
| <section class="families">{''.join(families)}</section> | |
| <div class="section-label"> | |
| <span>Xperience-10M modalities</span> | |
| <span>Each public-sample stream is shown with a compact derived thumbnail, what the sample contains, and how the current baseline uses it. Audio is present in the sample MP4 stream and is now extracted into the current baseline manifest.</span> | |
| </div> | |
| <section class="modalities">{modalities_html}</section> | |
| <footer class="footer"> | |
| <span>Single public sample episode: useful for pipeline validation and task design, not cross-episode generalization.</span> | |
| <code>results/episode_task_suite/summary_report.json</code> | |
| </footer> | |
| </div> | |
| </main> | |
| </body> | |
| </html> | |
| """ | |
| def render_html(html_path: Path, output_path: Path) -> None: | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| subprocess.run( | |
| [ | |
| "npx", | |
| "--yes", | |
| "playwright", | |
| "screenshot", | |
| "--full-page", | |
| f"--viewport-size={CANVAS_WIDTH},{CANVAS_HEIGHT}", | |
| html_path.resolve().as_uri(), | |
| str(output_path), | |
| ], | |
| check=True, | |
| ) | |
| def main() -> int: | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--base-image", type=Path, default=DEFAULT_BASE) | |
| parser.add_argument("--sample-dir", type=Path, default=DEFAULT_SAMPLE_DIR) | |
| parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT) | |
| parser.add_argument("--html", type=Path) | |
| parser.add_argument("--no-export", action="store_true", help="Only write the HTML used to render the image.") | |
| args = parser.parse_args() | |
| summary = load_summary() | |
| sample_dir = resolve_sample_dir(args.sample_dir) | |
| html_text = build_html(summary, args.base_image, sample_dir) | |
| if args.html is None: | |
| with tempfile.NamedTemporaryFile("w", suffix=".html", encoding="utf-8", delete=False) as handle: | |
| handle.write(html_text) | |
| html_path = Path(handle.name) | |
| else: | |
| html_path = args.html | |
| html_path.parent.mkdir(parents=True, exist_ok=True) | |
| html_path.write_text(html_text, encoding="utf-8") | |
| if not args.no_export: | |
| render_html(html_path, args.output) | |
| print(f"Wrote image: {args.output}") | |
| print(f"Wrote render HTML: {html_path}") | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |