#!/usr/bin/env python3 """ Build a static interactive explorer for the single Xperience-10M sample episode. The explorer is generated from committed/exported artifacts only. Raw MP4/HDF5 files are not embedded or redistributed. """ from __future__ import annotations import argparse import csv import json from datetime import datetime, timezone from pathlib import Path import numpy as np from task_display import TASK_DISPLAY_NAMES, task_display_name TASK_DISPLAY = { "timeline_action": task_display_name("timeline_action"), "timeline_subtask": task_display_name("timeline_subtask"), "transition_detection": task_display_name("transition_detection"), "next_action": task_display_name("next_action"), "contact_prediction": task_display_name("contact_prediction"), "object_relevance": task_display_name("object_relevance"), } TASK_DISPLAY_ALL = dict(TASK_DISPLAY_NAMES) BLOCK_DISPLAY = { "hand_left_joints": "Left Hand", "hand_right_joints": "Right Hand", "body_joints": "Body Joints", "body_contacts": "Body Contacts", "camera_translation": "Camera Translation", "camera_rotation_matrix": "Camera Rotation", "imu_accel_gyro": "IMU Accel/Gyro", "depth_confidence": "Depth + Confidence", "audio_fisheye_cam0_aac": "Audio", "caption_objects_interaction_text": "Language Text", "slam_point_cloud": "SLAM Point Cloud", "calibration": "Calibration", } def parse_args() -> argparse.Namespace: root = Path(__file__).resolve().parents[1] parser = argparse.ArgumentParser(description="Build static single-episode explorer page.") parser.add_argument("--workspace", type=Path, default=root) parser.add_argument("--suite-dir", type=Path, default=root / "results/episode_task_suite") parser.add_argument("--diagnostics-dir", type=Path, default=root / "results/single_episode_diagnostics") parser.add_argument("--docs-dir", type=Path, default=root / "docs") return parser.parse_args() def read_csv(path: Path) -> list[dict]: with path.open(newline="", encoding="utf-8") as fp: return list(csv.DictReader(fp)) def read_json(path: Path): return json.loads(path.read_text(encoding="utf-8")) def write_json(path: Path, data: dict) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(data, indent=2, ensure_ascii=False) + "\n", encoding="utf-8") def block_modality(name: str) -> str: if name.startswith("video_"): return "video" if name.startswith("hand_") or name.startswith("body_"): return "motion_capture" if name.startswith("camera_") or name in {"slam_point_cloud", "calibration"}: return "pose_slam" if name.startswith("depth_"): return "depth" if name.startswith("imu_"): return "inertial" if name.startswith("audio_"): return "audio" if name.startswith("caption_"): return "language" return "other" def load_predictions(suite_dir: Path) -> dict[str, dict[int, dict]]: out: dict[str, dict[int, dict]] = {} for task in TASK_DISPLAY: path = suite_dir / task / "predictions.csv" rows_by_window: dict[int, dict] = {} if not path.exists(): out[task] = rows_by_window continue for row in read_csv(path): if "window_index" not in row: continue idx = int(row["window_index"]) true_value = row.get("true_label") or row.get("true_objects") or row.get("true") or "" pred_value = row.get("predicted_label") or row.get("predicted_objects") or row.get("predicted") or "" if "correct" in row and row["correct"] != "": correct = int(float(row["correct"])) else: correct = int(str(true_value) == str(pred_value)) rows_by_window[idx] = { "true": true_value, "predicted": pred_value, "correct": correct, "confidence": row.get("confidence", ""), } out[task] = rows_by_window return out def build_action_segments(windows: list[dict]) -> list[dict]: segments = [] if not windows: return segments current = windows[0]["action_label"] start = int(windows[0]["start_frame"]) start_idx = int(windows[0]["window_index"]) last = windows[0] for row in windows[1:]: if row["action_label"] != current: segments.append({ "action": current, "start_frame": start, "end_frame": int(last["end_frame"]), "start_window": start_idx, "end_window": int(last["window_index"]), }) current = row["action_label"] start = int(row["start_frame"]) start_idx = int(row["window_index"]) last = row segments.append({ "action": current, "start_frame": start, "end_frame": int(last["end_frame"]), "start_window": start_idx, "end_window": int(last["window_index"]), }) return segments def build_data(args: argparse.Namespace) -> dict: suite_dir = args.suite_dir diagnostics_dir = args.diagnostics_dir windows = read_csv(suite_dir / "windows.csv") manifest = read_json(suite_dir / "feature_manifest.json") summary = read_json(suite_dir / "summary_report.json") provenance = read_json(diagnostics_dir / "provenance.json") object_rows = {int(r["window_index"]): r for r in read_csv(diagnostics_dir / "object_labels/window_object_labels.csv")} ablation_rows = read_csv(diagnostics_dir / "modality_ablation/ablation_metrics.csv") for row in ablation_rows: task = row.get("task") if task in TASK_DISPLAY_ALL: row["task_display_name"] = TASK_DISPLAY_ALL[task] alignment_rows = read_csv(diagnostics_dir / "alignment_stress/alignment_shift_metrics.csv") timeline_rows = read_csv(diagnostics_dir / "timeline_overlay/timeline_overlay.csv") predictions = load_predictions(suite_dir) X = np.load(suite_dir / "shared_windows.npz")["X"].astype(np.float32) block_stats = {} block_meta = [] for block in manifest: name = block["name"] start, end = int(block["start"]), int(block["end"]) values = X[:, start:end] l2 = np.linalg.norm(values, axis=1) mean_abs = np.mean(np.abs(values), axis=1) max_l2 = float(max(l2.max(), 1e-8)) block_stats[name] = { "l2": l2, "mean_abs": mean_abs, "relative": l2 / max_l2, } block_meta.append({ "name": name, "display": BLOCK_DISPLAY.get(name, name.replace("_", " ").title()), "modality": block_modality(name), "start": start, "end": end, "dim": int(block["dim"]), }) explorer_windows = [] for i, row in enumerate(windows): idx = int(row["window_index"]) obj = object_rows.get(idx, {}) feature_stats = [] for block in block_meta: s = block_stats[block["name"]] feature_stats.append({ "name": block["name"], "l2": round(float(s["l2"][i]), 6), "mean_abs": round(float(s["mean_abs"][i]), 6), "relative": round(float(s["relative"][i]), 6), }) task_predictions = {} for task, rows_by_window in predictions.items(): task_predictions[task] = rows_by_window.get(idx) explorer_windows.append({ "window_index": idx, "start_frame": int(row["start_frame"]), "end_frame": int(row["end_frame"]), "center_frame": int(row["center_frame"]), "action": row["action_label"], "subtask": row["subtask_label"], "objects": [x for x in obj.get("objects", "").split("|") if x], "feature_stats": feature_stats, "predictions": task_predictions, }) best_ablation = {} for task in sorted({r["task"] for r in ablation_rows}): computed = [r for r in ablation_rows if r["task"] == task and r["status"] == "computed" and r["score"]] if not computed: continue best = max(computed, key=lambda r: float(r["score"])) non_overlap = [r for r in computed if r.get("target_source_overlap") == "false"] best_non_overlap = max(non_overlap, key=lambda r: float(r["score"])) if non_overlap else None best_ablation[task] = { "task": task, "task_display_name": TASK_DISPLAY.get(task, task_display_name(task)), "best": { "modality_group": best["modality_group"], "modality_display": best["modality_display"], "score": float(best["score"]), "primary_metric": best["primary_metric"], "target_source_overlap": best["target_source_overlap"], }, "best_non_overlap": None if best_non_overlap is None else { "modality_group": best_non_overlap["modality_group"], "modality_display": best_non_overlap["modality_display"], "score": float(best_non_overlap["score"]), "primary_metric": best_non_overlap["primary_metric"], }, } return { "meta": { "generated_at": datetime.now(timezone.utc).isoformat(), "window_count": len(explorer_windows), "feature_dim": int(X.shape[1]), "object_label_rows": len(object_rows), "object_vocab_count": len(read_json(diagnostics_dir / "object_labels/object_vocab.json")["vocab"]), "timeline_prediction_rows": len(timeline_rows), "source_policy": "Window-level labels, features, predictions, and diagnostics are embedded here. Official raw MP4/HDF5/RRD files are linked from the Raw Sample Browser, with compact browser-preview clips for immediate MP4/audio playback.", "annotation_hash_recorded": any("annotation.hdf5" in key for key in provenance["input_file_hashes"]), "summary": { "num_windows": summary.get("num_windows"), "feature_dim": summary.get("feature_dim"), "window_frames": summary.get("window_frames"), "stride_frames": summary.get("stride_frames"), }, }, "tasks": TASK_DISPLAY, "task_display_names": TASK_DISPLAY_ALL, "feature_blocks": block_meta, "segments": build_action_segments(windows), "windows": explorer_windows, "ablation": { "best_by_task": best_ablation, "rows": ablation_rows, }, "alignment": alignment_rows, } HTML_TEMPLATE = """ Single-Episode Explorer | Ropedia Xperience-10M

Single-Episode Research Explorer

Inspect the exported Xperience-10M sample windows, real object labels, model predictions, feature-block statistics, and diagnostic scores from one aligned episode.

-windows
-feature dimensions
-object labels
-prediction rows

Window

Feature Blocks

Diagnostics

""" def write_html(path: Path, data: dict) -> None: path.parent.mkdir(parents=True, exist_ok=True) payload = json.dumps(data, ensure_ascii=False).replace(" None: args = parse_args() data = build_data(args) write_json(args.docs_dir / "data/single_episode_explorer.json", data) write_html(args.docs_dir / "single_episode_explorer.html", data) print(f"Wrote {args.docs_dir / 'data/single_episode_explorer.json'}") print(f"Wrote {args.docs_dir / 'single_episode_explorer.html'}") if __name__ == "__main__": main()