"""Generate an experimental animated story for the robot-learning landscape. This is intentionally separate from the deployed Gradio wrapper. It is a research/prototype page that turns the existing paper atlas into a smoother "how the field got here" narrative. Run from this directory: .venv_robot_paradigms/bin/python gen_unified_story.py Out: robot_unified_story.html """ from __future__ import annotations import json from pathlib import Path import robot_paradigms_app as app ERAS = [ { "id": "control", "years": "1922-1988", "title": "Control bedrock", "claim": "Robots start as engineered feedback systems: stabilize, track, avoid collisions, optimize a short horizon.", "nodes": ["PID", "LQR", "A*", "MPC", "Trajectory optimization"], "takeaway": "The runtime object is a controller or planner, not a learned policy.", }, { "id": "rl", "years": "1989-2017", "title": "Learning objectives split", "claim": "Imitation, RL, inverse RL, goal-conditioned RL, hierarchy, and meta-learning become separate ways to train behavior.", "nodes": ["BC", "Q-learning", "Policy gradient", "Actor-critic", "IRL/GAIL", "HER/HRL/MAML"], "takeaway": "The clean distinction is objective, not architecture.", }, { "id": "deep", "years": "2018-2021", "title": "Offline and sequence views", "claim": "Replay buffers and logged trajectories become datasets; transformers recast control as conditional sequence generation.", "nodes": ["SAC/TD3", "CQL/IQL/AWAC", "Decision Transformer", "Trajectory Transformer", "Gato"], "takeaway": "BC, offline RL, and sequence models start to overlap.", }, { "id": "gen", "years": "2022-2024", "title": "Generative action heads", "claim": "Action chunking, diffusion, flow matching, and tokenized actions replace single-mode MSE regression.", "nodes": ["ACT/ALOHA", "Diffusion Policy", "Flow Matching", "Action tokens", "Octo/OpenVLA"], "takeaway": "Modern VLA policies mostly differ by action head and data, not by a new objective.", }, { "id": "vlm", "years": "2023-2026", "title": "VLM/VLA foundation policies", "claim": "Vision-language pretraining injects semantics; robot data teaches control. Cross-embodiment datasets make transfer plausible.", "nodes": ["RT-2/RT-X", "Open X-Embodiment", "OpenVLA", "pi0/OpenPI", "Gemini Robotics", "GR00T/Helix/SmolVLA"], "takeaway": "VLA is a trunk + action interface + data recipe.", }, { "id": "wm", "years": "2018-2026", "title": "World models return", "claim": "Learned simulators support planning, imagination, evaluation, data generation, and video/action proposal.", "nodes": ["World Models", "Dreamer/TD-MPC", "UniPi/Genie", "V-JEPA 2", "AC-WM", "WAM"], "takeaway": "Separate action-conditioned simulators from world-action proposal models.", }, { "id": "unified", "years": "2025-2026", "title": "Unified robot model frontier", "claim": "The frontier is no longer one algorithm. It is a stack: VLA policy, world model, planner, evaluator, safety controller, and data engine.", "nodes": ["Policy", "Planner", "World model", "Verifier", "Data engine", "Safety layer"], "takeaway": "The landscape should explain roles and interfaces before listing names.", }, ] LANES = [ {"id": "control", "label": "Control substrate", "color": "#64748b"}, {"id": "objective", "label": "Learning objective", "color": "#f97316"}, {"id": "action", "label": "Action representation", "color": "#38bdf8"}, {"id": "world", "label": "World model", "color": "#a78bfa"}, {"id": "unified", "label": "Unified stack", "color": "#34d399"}, ] FLOW = [ ["control", "control", "PID / LQR / MPC"], ["control", "objective", "known cost -> learned reward/policy"], ["objective", "action", "BC/RL losses need multimodal heads"], ["action", "unified", "VLA policies close the loop"], ["objective", "world", "model-based RL and imagination"], ["world", "unified", "simulator, evaluator, data engine"], ["control", "unified", "MPC/safety still wraps deployment"], ] UPDATES = [ { "title": "VLA surveys now treat action representation as the central split", "body": "Tokenized actions, diffusion, and flow matching are better shown as sibling action heads under BC/foundation-policy training.", }, { "title": "World-model robotics needs two branches", "body": "Action-conditioned world models take actions as inputs and support counterfactual planning; world-action models output a successful future plus actions.", }, { "title": "Unified does not mean one box", "body": "Current systems are hybrid stacks: VLM/VLA semantic trunk, continuous control head, world model, classical planner/MPC, evaluator, and safety guard.", }, { "title": "Data infrastructure is becoming a first-class axis", "body": "Open X-Embodiment, DROID/Bridge/LeRobot-style datasets, synthetic engines, human video, and auto-generated labels explain many model differences.", }, ] SOURCE_LINKS = [ ("RT-2", "https://arxiv.org/abs/2307.15818"), ("Open X-Embodiment / RT-X", "https://arxiv.org/abs/2310.08864"), ("OpenVLA", "https://arxiv.org/abs/2406.09246"), ("pi0", "https://arxiv.org/abs/2410.24164"), ("GR00T N1", "https://arxiv.org/abs/2503.14734"), ("SmolVLA", "https://arxiv.org/abs/2506.01844"), ("VLA systematic review", "https://arxiv.org/abs/2507.10672"), ("VLA real-world review", "https://arxiv.org/abs/2510.07077"), ("World-model robotics survey", "https://arxiv.org/abs/2605.00080"), ("VLA data engines survey", "https://arxiv.org/abs/2604.23001"), ("Foundation models in robotics review", "https://arxiv.org/abs/2604.15395"), ] def paper_rows(): rows = [] for idx, (title, authors, year, tag) in enumerate(app.PAPERS, start=1): rows.append({"i": idx, "title": title, "authors": authors, "year": year, "tag": tag}) return rows def build_data(): rows = paper_rows() tags = sorted({r["tag"] for r in rows}) return { "eras": ERAS, "lanes": LANES, "flow": FLOW, "updates": UPDATES, "papers": rows, "tags": tags, "sources": [{"title": t, "url": u} for t, u in SOURCE_LINKS], "counts": {"papers": len(rows), "tags": len(tags)}, } TEMPLATE = r"""
| # | Paper / review | Authors | Year | Taxonomy tag |
|---|