"""Generate a self-contained interactive 'Robot Learning Landscape' page. Pulls the researched paradigm data straight out of robot_paradigms_app.py so the visualization stays in sync, then injects it into a static HTML template that provides: a constellation map, Keynote-style Magic-Move detail panels (View Transitions API), and a small looping SMIL animation per paradigm. Run: .venv_robot_paradigms/bin/python gen_landscape.py Out: robot_landscape.html """ import json import html as html_lib import robot_paradigms_app as app import my_papers # --------------------------------------------------------------------------- # 1. Family display labels (nice names for the 10 hubs) # --------------------------------------------------------------------------- FAMILY_LABEL = { "BC": "Imitation Learning", "Reinforcement": "Reinforcement Learning", "Offline RL": "Offline RL", "Inverse RL": "Inverse RL (Imitation)", "Model-Based": "World Models", "Sequence": "Sequence Models", "Goal-Cond.": "Goal-Conditioned", "Hierarchical": "Hierarchical", "Meta-Learning": "Meta-Learning", "LLM-Orchestration": "LLM / VLM", } # --------------------------------------------------------------------------- # 2. Per-paradigm enrichment authored for a beginner audience: # short = label shown on the map # simple = plain-English "what the robot is doing" (high-school level) # anim = which mini-animation archetype to play # --------------------------------------------------------------------------- ENRICH = { "flow-matching-policy": dict(short="Flow Matching", anim="flow", simple=( "Imagine smoothly steering a dot from a random scribble to the exact move an " "expert made, following the straightest possible path. The robot learns that " "“steering field,” so it can turn noise into a precise action in just a " "few steps.")), "diffusion-policy": dict(short="Diffusion", anim="denoise", simple=( "Start with pure static (random noise) and clean it up step by step until a " "smooth, sensible action appears — like sharpening a blurry photo. Because it " "imagines many possibilities, it can pick left OR right around an obstacle instead " "of averaging into a crash.")), "tokenized-bc": dict(short="Tokenized BC", anim="tokens", simple=( "Chop each action into little pieces and predict them one-by-one, exactly like a " "chatbot predicts the next word. This lets a robot reuse all the machinery of a " "language model to act.")), "energy-based-bc": dict(short="Energy / Implicit", anim="energy", simple=( "Give every possible action a “score,” then roll downhill to the " "best-scoring one — like a marble settling into the lowest dip of a hilly " "landscape.")), "value-based-rl": dict(short="Q-Learning", anim="qlearn", simple=( "Try things, keep a running estimate of how good each move turns out, and always " "pick the move with the best expected payoff. It learns purely from rewards and " "mistakes — no teacher needed.")), "policy-gradient-rl": dict(short="Policy Gradient", anim="pgrad", simple=( "Do the task many times; nudge the behavior a little toward whatever earned more " "reward and away from what earned less. Slowly the robot’s habits get " "better.")), "off-policy-ac": dict(short="Actor-Critic", anim="actorcritic", simple=( "Two parts team up: an “actor” that acts and a “critic” that " "grades each action. The critic’s grades teach the actor to act better — " "and it can re-use old replayed experience.")), "offline-rl": dict(short="Offline RL", anim="offline", simple=( "Learn only from a fixed recording of past behavior — no live robot, no new " "tries. The trick is to stay close to what’s in the recording so the robot " "doesn’t bet on moves it never saw work.")), "maxent-irl": dict(short="MaxEnt IRL", anim="rewardmap", simple=( "Watch an expert and figure out the hidden “reward” that would explain " "why they did what they did — reverse-engineering their goal from their " "behavior.")), "gail": dict(short="GAIL", anim="adversarial", simple=( "Two networks play a game: one tries to act like the expert, the other tries to " "spot the fake. As the spotter gets sharper, the imitator is forced to become " "indistinguishable from the real expert.")), "forward-dynamics-mpc": dict(short="MPC", anim="plan", simple=( "Learn a “what happens if…” simulator, imagine several action plans " "a few steps ahead, and execute the one that looks best — then re-plan at the " "next moment.")), "latent-imagination": dict(short="Dreamer", anim="dream", simple=( "Build a compact “mental world,” then practice thousands of times inside " "that daydream instead of the real world — fast and safe — and carry the " "learned skill back to reality.")), "generative-video-wm": dict(short="Video WM", anim="videopred", simple=( "Predict the future as a short video: given what it sees now, the robot pictures " "what will happen next, frame by frame.")), "action-conditioned-wm": dict(short="Action-Cond WM", anim="videoaction", simple=( "Ask “if I move like this, what will I see?” The robot predicts the " "future video that each candidate action would cause, then picks the best one.")), "world-action-model": dict(short="World-Action", anim="worldaction", simple=( "Imagine the future video of the task AND read off the actions needed to make that " "future happen — dreaming the plan and the moves together.")), "occupancy-latent-wm": dict(short="Occupancy WM", anim="occupancy", simple=( "Instead of a full video, predict a simple map of where stuff will be (occupied vs " "free space) so the robot can plan safe motions.")), "decision-transformer": dict(short="Decision Transf.", anim="returncond", simple=( "Tell the robot the score you want (“get 100 points”) and it writes out " "the sequence of actions likely to hit that score — like autocompleting a " "winning playthrough.")), "trajectory-diffusion": dict(short="Traj. Diffusion", anim="denoise", simple=( "Sketch a whole path out of noise and clean it up all at once into a smooth plan, " "then gently steer that plan toward a goal.")), "goal-conditioned": dict(short="Goal + HER", anim="goalrelabel", simple=( "Tell the robot where to end up and it aims for that goal. When it misses, it " "pretends “wherever I landed” was the goal all along — so even " "failures become useful lessons.")), "hrl": dict(short="Hierarchical", anim="hierarchy", simple=( "A “manager” sets mini-goals (go here, then there) and a “worker” " "figures out the small moves to reach each one — splitting a big task into " "easy chunks.")), "meta-learning": dict(short="Meta-Learning", anim="meta", simple=( "Practice on many different tasks so the robot learns HOW to learn — then it " "can pick up a brand-new task after just a few tries.")), "llm-planner": dict(short="LLM Planner", anim="llmplan", simple=( "A language model reads the instruction, breaks it into a step-by-step plan (or " "even writes code), and calls ready-made skills to carry it out — no " "trial-and-error training.")), "vlm-affordance": dict(short="VLM Affordance", anim="affordance", simple=( "A vision-language model looks at the scene and marks WHERE and HOW to act (grab " "here, push there), turning a picture into a usable plan.")), } # --------------------------------------------------------------------------- # 3. Interconnections between paradigms (the bridges). [idA, idB, why] # --------------------------------------------------------------------------- # (a, b, why, kind): # "v" = variant — same core idea, different flavor (dashed, undirected) # "b" = builds-on — arrow a → b means "b builds on / uses a" (a underlies b) EDGES = [ # ---- variants (same core idea, different flavor) ---- ("diffusion-policy", "trajectory-diffusion", "same denoising idea (action vs. whole path)", "v"), ("diffusion-policy", "flow-matching-policy", "generative action heads", "v"), ("flow-matching-policy", "tokenized-bc", "the three VLA action heads", "v"), ("diffusion-policy", "tokenized-bc", "the three VLA action heads", "v"), ("tokenized-bc", "llm-planner", "reuse the language-model / token stack", "v"), ("offline-rl", "decision-transformer", "return-conditioned offline RL (sequence modeling)", "b"), ("decision-transformer", "goal-conditioned", "condition on a target", "v"), ("decision-transformer", "tokenized-bc", "sequence models of actions", "v"), ("offline-rl", "diffusion-policy", "stay close to the data = imitate it", "v"), ("gail", "maxent-irl", "recover / use a reward signal", "v"), ("latent-imagination", "forward-dynamics-mpc", "plan inside a learned model", "v"), ("forward-dynamics-mpc", "occupancy-latent-wm", "a model used for planning", "v"), ("generative-video-wm", "action-conditioned-wm", "adds action-controllable prediction", "b"), ("action-conditioned-wm", "world-action-model", "actions + video together", "v"), ("vlm-affordance", "llm-planner", "language / vision planning, no policy gradient", "v"), ("hrl", "goal-conditioned", "sub-goals are just goals", "v"), ("hrl", "llm-planner", "high-level decomposition", "v"), ("classical-mpc", "forward-dynamics-mpc", "same optimizer — known vs. learned model", "v"), # ---- builds-on / enables (arrow a → b: "b builds on a") ---- ("off-policy-ac", "offline-rl", "many offline methods extend off-policy AC (+ conservatism)", "b"), ("policy-gradient-rl", "gail", "GAIL trains its imitator with RL", "b"), ("maxent-irl", "value-based-rl", "infer the reward, then run RL on it", "b"), ("policy-gradient-rl", "latent-imagination", "trains the policy inside the imagined model", "b"), ("diffusion-policy", "world-action-model", "share diffusion/flow generative machinery", "v"), ("policy-gradient-rl", "meta-learning", "adapts with a few gradient steps", "b"), ("off-policy-ac", "goal-conditioned", "HER rides on off-policy RL", "b"), # ---- classical control underlies the learning methods ---- ("lqr", "value-based-rl", "classical optimal-control precursor to RL", "b"), ("pid-control", "off-policy-ac", "residual RL learns on top of a controller", "b"), ("motion-planning", "llm-planner", "the LLM calls a classical planner", "b"), ("classical-mpc", "vlm-affordance", "VLM cost + a classical trajectory optimizer", "b"), ] # --------------------------------------------------------------------------- # 4. Classical / traditional control — NOT in the learning app, added here so # the map shows the full robot-control landscape (the non-learning bedrock). # --------------------------------------------------------------------------- CLASSICAL_FAMILY = dict( key="Classical", label="Classical Control", color="#64748b", desc=("Model-based control & planning with no learning — the engineering " "bedrock robots still run on, and what most learned methods sit on top of."), ) CLASSICAL_PARADIGMS = [ dict( id="pid-control", name="PID / Feedback Control", short="PID", family="Classical", anim="pid", tagline="Push proportionally to the error — the workhorse of control.", simple=("Measure how far you are from the target, and correct in proportion to that " "error — plus a bit for accumulated error (I) and how fast it’s changing (D). " "No model, no learning, just feedback. It’s the inner loop under almost everything."), mapping="error e(t) → control u(t)", math=r"u(t)=K_p\,e(t)+K_i\!\int_0^t\! e(\tau)\,d\tau+K_d\,\dot e(t)", when="Low-level motor/joint control, and the inner loop beneath higher-level planners.", pros=["Dead simple, needs no model", "Ubiquitous and robust", "Easy to tune"], cons=["No foresight or constraints", "Struggles with nonlinear / coupled / delayed systems", "Gains are hand-tuned"], papers=["Ziegler–Nichols tuning (1942)", "classical control theory"], ), dict( id="lqr", name="LQR / Optimal Control", short="LQR", family="Classical", anim="lqr", tagline="The provably optimal linear feedback gain.", simple=("If your system is roughly linear, you can solve for the single best feedback " "gain that minimizes a cost trading off staying on target vs. control effort. " "It’s the optimal-control ancestor of value-based RL."), mapping="state x → control u = −Kx", math=r"u=-Kx,\quad K=R^{-1}B^\top P,\quad A^\top P+PA-PBR^{-1}B^\top P+Q=0", when="Stabilization/tracking for systems you can linearize; a baseline and building block (iLQR, LQG).", pros=["Provably optimal for linear-quadratic problems", "Closed-form and fast", "Foundation for iLQR / LQG / MPC"], cons=["Assumes linear dynamics + quadratic cost", "No hard constraints", "Needs a model"], papers=["Kalman (1960), optimal control / LQG"], ), dict( id="classical-mpc", name="Model-Predictive Control / Trajectory Optimization", short="MPC / TrajOpt", family="Classical", anim="trajopt", tagline="Optimize controls over a horizon with a known model; re-plan each step.", simple=("Using known physics, optimize a short sequence of future controls to minimize " "cost while respecting constraints (limits, obstacles), execute the first one, then " "re-optimize at the next step. This is the same machinery learned-model MPC uses — " "here the model is hand-derived physics."), mapping="model f + cost ℓ → optimal u_{0:H}", math=r"\min_{u_{0:H}}\sum_{t=0}^{H}\ell(x_t,u_t)\quad\text{s.t.}\quad x_{t+1}=f(x_t,u_t),\;\;g(x_t,u_t)\le 0", when="When you have a decent model and need constraint handling + foresight (legged locomotion, driving, arms).", pros=["Handles constraints and foresight", "Uses known physics", "Re-planning adds robustness"], cons=["Needs an accurate model", "Online optimization is expensive", "Hard for contact-rich / uncertain dynamics"], papers=["iLQR (Todorov 2005)", "CHOMP / TrajOpt", "MPC literature"], ), dict( id="motion-planning", name="Motion Planning (Sampling / Search)", short="Motion Planning", family="Classical", anim="planning", tagline="Search the free space for a collision-free path.", simple=("Find a collision-free path from start to goal by sampling random configurations " "and connecting them (RRT/PRM) or searching a grid/graph (A*). It’s about geometry " "and feasibility, not learning — and it’s often the executor under an LLM planner."), mapping="start, goal, obstacles → collision-free path τ", math=r"\text{find }\tau: q_{\text{start}}\to q_{\text{goal}}\;\;\text{s.t.}\;\;\tau(s)\in\mathcal{C}_{\text{free}}\;\;\forall s", when="Navigation and arm motion through known obstacle fields.", pros=["Completeness / optimality guarantees (A*, RRT*)", "No training data needed", "Mature and reliable"], cons=["Needs a known map / geometry", "Struggles with high-dim contact + dynamics", "Replanning cost in clutter"], papers=["RRT (LaValle 1998)", "PRM (Kavraki 1996)", "A* (Hart 1968)"], ), ] # --------------------------------------------------------------------------- # 5. Best "learn more" explainer per branch (verified reachable June 2026). # (title, url) — canonical project pages / respected blogs / standard texts. # --------------------------------------------------------------------------- LEARN = { "flow-matching-policy": ("π₀ flow-matching VLA — HuggingFace", "https://huggingface.co/blog/pi0"), "diffusion-policy": ("Diffusion Policy — project page", "https://diffusion-policy.cs.columbia.edu/"), "tokenized-bc": ("OpenVLA — project page", "https://openvla.github.io/"), "energy-based-bc": ("Implicit BC — project page", "https://implicitbc.github.io/"), "decision-transformer": ("Decision Transformer — project page", "https://sites.google.com/berkeley.edu/decision-transformer"), "trajectory-diffusion": ("Diffuser: Planning with Diffusion", "https://diffusion-planning.github.io/"), "value-based-rl": ("Deep Q-Learning — HF Deep RL Course", "https://huggingface.co/learn/deep-rl-course/unit3/introduction"), "policy-gradient-rl": ("Policy Gradients & PPO — Arxiv Insights (video)", "https://www.youtube.com/watch?v=5P7I-xPq8u8"), "off-policy-ac": ("DDPG & SAC — Pieter Abbeel (video)", "https://www.youtube.com/watch?v=pg-lKy7JIRk"), "offline-rl": ("Offline RL — BAIR blog", "https://bair.berkeley.edu/blog/2020/12/07/offline/"), "maxent-irl": ("What Is Inverse RL? — The Gradient", "https://thegradient.pub/learning-from-humans-what-is-inverse-reinforcement-learning/"), "gail": ("GAIL Imitation Learning (video)", "https://www.youtube.com/watch?v=E-lfhLiXiBc"), "forward-dynamics-mpc": ("Model-Based RL + MPC — BAIR", "https://bair.berkeley.edu/blog/2017/11/30/model-based-rl/"), "latent-imagination": ("Dreamer — Danijar Hafner", "https://danijar.com/project/dreamer/"), "generative-video-wm": ("World Models — interactive", "https://worldmodels.github.io/"), "action-conditioned-wm": ("V-JEPA 2 World Model — Meta", "https://ai.meta.com/blog/v-jepa-2-world-model-benchmarks/"), "world-action-model": ("Unified Video Action Model", "https://unified-video-action-model.github.io/"), "occupancy-latent-wm": ("Drive-OccWorld — occupancy WM", "https://drive-occworld.github.io/"), "goal-conditioned": ("Hindsight Experience Replay — Two Minute Papers (video)", "https://www.youtube.com/watch?v=Dvd1jQe3pq0"), "hrl": ("Hierarchical RL — The Gradient", "https://thegradient.pub/the-promise-of-hierarchical-reinforcement-learning/"), "meta-learning": ("Interactive Intro to MAML", "https://interactive-maml.github.io/maml.html"), "llm-planner": ("Code as Policies — project page", "https://code-as-policies.github.io/"), "vlm-affordance": ("VoxPoser — project page", "https://voxposer.github.io/"), "pid-control": ("PID Control — Brian Douglas (video)", "https://www.youtube.com/watch?v=wkfEZmsQqiA"), "lqr": ("LQR Optimal Control — Brian Douglas (video)", "https://www.youtube.com/watch?v=E_RDCFOlJx4"), "classical-mpc": ("Model Predictive Control — MATLAB (video)", "https://www.youtube.com/watch?v=cEWnixjNdzs"), "motion-planning": ("A* Pathfinding — Red Blob Games (interactive)", "https://www.redblobgames.com/pathfinding/a-star/introduction.html"), } def build_data(): families = [] for k in app.FAMILY: families.append(dict( key=k, label=FAMILY_LABEL.get(k, k), color=app.FAMILY[k][0], desc=app.FAMILY[k][1], equation=app.FAMILY_EQUATIONS.get(k, ""), relations=[dict(name=n, eq=e) for (n, e, _note) in app.FAMILY_RELATIONS.get(k, [])], )) cf = dict(CLASSICAL_FAMILY); cf["equation"] = ""; cf["relations"] = [] families.append(cf) for f in EXTRA_FAMILIES: families.append(dict(f, desc="", equation="", relations=[])) paradigms = [] for p in app.PARADIGMS: e = ENRICH.get(p["id"], {}) paradigms.append(dict( id=p["id"], name=p["name"], short=e.get("short", p["name"]), family=p["family"], tagline=p["tagline"], simple=e.get("simple", p["intuition"]), anim=e.get("anim", "denoise"), mapping=p.get("mapping", ""), math=p.get("math", ""), when=p.get("when", ""), pros=p.get("pros", []), cons=p.get("cons", []), papers=p.get("key_papers", []), learn=({"title": LEARN[p["id"]][0], "url": LEARN[p["id"]][1]} if p["id"] in LEARN else None), )) for p in CLASSICAL_PARADIGMS: paradigms.append(dict( id=p["id"], name=p["name"], short=p["short"], family=p["family"], tagline=p["tagline"], simple=p["simple"], anim=p["anim"], mapping=p["mapping"], math=p["math"], when=p["when"], pros=p["pros"], cons=p["cons"], papers=p["papers"], learn=({"title": LEARN[p["id"]][0], "url": LEARN[p["id"]][1]} if p["id"] in LEARN else None), )) for p in EXTRA_PARADIGMS: paradigms.append(dict( id=p["id"], name=p["name"], short=p["short"], family=p["family"], anim=p["anim"], tagline=p["tagline"], simple=p["simple"], mapping=p["mapping"], math=p["math"], when=p["when"], pros=p["pros"], cons=p["cons"], papers=p["papers"], learn={"title": p["learn"][0], "url": p["learn"][1]}, )) # only keep edges whose endpoints exist ids = {p["id"] for p in paradigms} edges = [[a, b, w, k] for (a, b, w, k) in (EDGES + EXTRA_EDGES) if a in ids and b in ids] nodeset = ids | {f["key"] for f in families} mypapers = [m for m in my_papers.MY_PAPERS if m.get("node") in nodeset] return dict(families=families, paradigms=paradigms, edges=edges, mypapers=mypapers) TEMPLATE = r""" Robot Learning Landscape

🤖 Robot Learning Landscape

— click any star to see what it does.
variant
builds on
""" # --------------------------------------------------------------------------- # Survey-driven additions (same pattern as Classical Control): VLA family, # Sim-to-Real, and Representation pretraining. Added here, not in the app. # --------------------------------------------------------------------------- EXTRA_FAMILIES = [ dict(key="VLA", label="Vision-Language-Action", color="#f59e0b"), dict(key="Sim2Real", label="Sim-to-Real", color="#0ea5e9"), dict(key="Representation", label="Representation Pretraining", color="#65a30d"), ] EXTRA_PARADIGMS = [ dict(id="vla-foundation", name="VLA Foundation Models", short="VLA Foundation", family="VLA", anim="vla", tagline="A VLM trunk + an action head, trained on many robots' data.", simple=("Take a pretrained vision-language model (so it already 'knows' objects and language), " "bolt on an action head, and fine-tune on huge multi-robot datasets. One model then " "follows language commands across many tasks and embodiments — RT-2, OpenVLA, π0, GR00T."), mapping="image + instruction → robot actions", math=r"\pi_\theta(a\mid o,\ell),\;\;\theta\;\text{init from a VLM};\;\text{head}\in\{\text{tokens},\,\text{diffusion},\,\text{flow}\}", when="Generalist, language-conditioned manipulation across tasks and embodiments.", pros=["Web-scale priors from the VLM", "One model, many tasks/robots", "Language-conditioned"], cons=["Data- and compute-heavy", "Inference latency", "Still brittle out of distribution"], papers=["RT-2 (2023)", "OpenVLA (2024)", "π0 (2024)", "GR00T N1 (2024)"], learn=("OpenVLA — project page", "https://openvla.github.io/")), dict(id="vla-rl", name="RL-Finetuned VLA", short="RL-Finetuned VLA", family="VLA", anim="rlloop", tagline="Let a pretrained VLA practice with rewards, not just copy demos.", simple=("Imitation only copies demonstrations. RL fine-tuning lets a pretrained VLA actually " "practice — collect rewards (in the real world or inside a world model) and improve " "beyond the demos, fixing systematic failures imitation can't."), mapping="VLA + reward → improved VLA", math=r"\max_\theta\;\mathbb{E}_{\tau\sim\pi_\theta}\!\Big[\textstyle\sum_t \gamma^t r_t\Big]\;\;\text{from a pretrained VLA}", when="Push a strong imitation VLA past the demo ceiling; close the loop with a world model.", pros=["Improves beyond demos", "Fixes systematic failures", "Pairs with world models"], cons=["Needs reward / simulator", "Can destabilize a good policy", "Sample cost"], papers=["VLA-RFT (2025)", "iRe-VLA (2024)", "RL post-training of VLAs"], learn=("Illustrated RLHF — Hugging Face", "https://huggingface.co/blog/rlhf")), dict(id="domain-randomization", name="Domain Randomization", short="Domain Randomization", family="Sim2Real", anim="domainrand", tagline="Randomize the simulator so the real world is just another variation.", simple=("Train in simulation, but randomize everything — colors, lighting, friction, masses. " "The policy sees so many variations that the real world looks like just one more, so it " "transfers without any real-world training data."), mapping="randomized sims → robust real policy", math=r"\max_\pi\;\mathbb{E}_{\xi\sim p(\xi)}\,\mathbb{E}_{\tau\sim\pi,\,\mathrm{sim}_\xi}\!\Big[\textstyle\sum_t \gamma^t r_t\Big]", when="Sim-to-real for locomotion / dexterity when real data is scarce or dangerous.", pros=["No real-world training", "Cheap, parallel sims", "Backbone of legged/dexterous deploys"], cons=["Over-randomization hurts performance", "Needs a decent simulator", "Reality gap remains"], papers=["Domain Randomization (Tobin 2017)", "OpenAI Dactyl (2018)"], learn=("Solving Rubik's Cube (domain randomization) — OpenAI", "https://openai.com/index/solving-rubiks-cube/")), dict(id="sim2real-adapt", name="Domain Adaptation (RMA)", short="Domain Adaptation", family="Sim2Real", anim="domainrand", tagline="Quickly infer the real robot's true dynamics, then adapt online.", simple=("Instead of being robust to everything, learn to quickly figure out the real robot's " "properties from a few moments of motion, then adapt on the fly — Rapid Motor Adaptation " "(RMA) does this for legged robots walking on new terrain."), mapping="online experience → adapted policy", math=r"z_t=\phi(o_{1:t},a_{1:t}),\quad \pi(a\mid o,z_t)\;\;(\text{infer dynamics, then adapt})", when="Hardware whose dynamics differ from sim; changing terrain or payload.", pros=["Adapts to real dynamics", "Fast, online", "Strong legged-robot results"], cons=["Needs an adaptation module", "Base still sim-trained", "Limited to seen variation range"], papers=["RMA (Kumar 2021)", "domain adaptation for control"], learn=("RMA: Rapid Motor Adaptation — project page", "https://ashish-kmr.github.io/rma-legged-robots/")), dict(id="visual-pretrain", name="Visual Pretraining (R3M / MVP / VIP)", short="Visual Pretraining", family="Representation", anim="repr", tagline="Pretrain a vision encoder on web/human video, then freeze it for control.", simple=("Robot data is scarce, but internet video is endless. Pretrain a visual encoder on web / " "human video with self-supervised or value objectives (R3M, MVP, VIP), freeze it, and " "train a small policy on top — so you need far fewer robot demos."), mapping="web video → frozen encoder → small policy", math=r"\phi^*=\arg\min_\phi \mathcal{L}_{\text{SSL}}(\phi;\,\text{web video})\;\Rightarrow\;\pi_\psi(a\mid \phi(o))", when="Low-data manipulation; reuse perception across tasks.", pros=["Few robot demos needed", "Reusable perception", "Leverages internet video"], cons=["Frozen features may miss task cues", "Encoder–task mismatch", "Not a full policy"], papers=["R3M (2022)", "MVP (2022)", "VIP (2023)"], learn=("R3M — project page", "https://sites.google.com/view/robot-r3m/")), dict(id="latent-action", name="Latent Action Pretraining (LAPA)", short="Latent Action", family="Representation", anim="latentact", tagline="Learn 'latent actions' from unlabeled video, then map them to motors.", simple=("Most video has no action labels. Learn a latent 'what changed between frames' code from " "raw video, pretrain a policy in that latent-action space, then map the latents to real " "motor commands with only a little labeled data — LAPA / UniPi-style."), mapping="unlabeled video → latent actions → motors", math=r"\hat z_t=\mathrm{VQ}(o_t,o_{t+1}),\;\;p_\psi(o_{t+1}\!\mid o_t,\hat z_t)\;\Rightarrow\;a_t=h_\phi(\hat z_t)", when="Bootstrapping policies from action-free video at scale.", pros=["Uses action-free video", "Cuts labeled-data needs", "Scales with video"], cons=["Latent→action mapping needs labels", "VQ fidelity limits", "Indirect"], papers=["LAPA (2024)", "UniPi (2023)"], learn=("Latent Action Pretraining (LAPA) — project page", "https://latentactionpretraining.github.io/")), ] EXTRA_EDGES = [ ("tokenized-bc", "vla-foundation", "tokenized action head for a VLA", "b"), ("flow-matching-policy", "vla-foundation", "flow action head for a VLA", "b"), ("diffusion-policy", "vla-foundation", "diffusion action head for a VLA", "b"), ("vla-foundation", "vla-rl", "RL fine-tunes the VLA past the demo ceiling", "b"), ("policy-gradient-rl", "vla-rl", "RL objective for the fine-tune", "b"), ("vla-foundation", "llm-planner", "both VLM/LLM-based & language-conditioned", "v"), ("policy-gradient-rl", "domain-randomization", "sim RL trained over randomized physics", "b"), ("domain-randomization", "sim2real-adapt", "two routes across the reality gap", "v"), ("visual-pretrain", "diffusion-policy", "a frozen visual encoder feeds the policy", "b"), ("latent-action", "vla-foundation", "latent-action pretraining warm-starts VLAs", "b"), ("latent-action", "generative-video-wm", "both learn from action-free video", "v"), ("visual-pretrain", "latent-action", "representation pretraining from video", "v"), ] # Extra animations injected into the shared template's ANIM object. ROBOT_EXTRA_ANIM_JS = r""" vla(c){ let s=''; s+='"pick up the cup"'; s+='VLM'; for(let i=0;i<4;i++)s+=''+ 'a'+(i+1)+''; s+=''; return s; }, domainrand(c){ let s=''; const bg=['#1f2a44','#3b2a1f','#1f3b2a']; bg.forEach((b,i)=>{const x=14+i*60; s+=''+ '';}); for(let i=0;i<3;i++)s+=''; s+='robust π'; s+='🤖real robot'; return s; }, repr(c){ let s=''; for(let i=0;i<4;i++)s+=''; s+='self-supervised → frozen encoder 🔒'; s+='φ frozen'; s+=''; s+='small πfew demos'; return s; }, latentact(c){ let s='oₜ'; s+='oₜ₊₁'; s+='latent action'; s+=''; s+='→ motor action'; s+=''; return s; }, """ ROBOT_EXTRA_CAP_JS = ( 'vla:"A VLM trunk reads the scene + instruction and emits actions.",' 'domainrand:"Randomize the sim so the real world is just another variation.",' 'repr:"Pretrain a vision encoder on video; freeze it for control.",' 'latentact:"Learn latent actions from unlabeled video, then map to motors.",' ) def render(): """Render robot_landscape HTML with the extra animations injected.""" t = TEMPLATE.replace("const ANIM = {", "const ANIM = {\n" + ROBOT_EXTRA_ANIM_JS, 1) t = t.replace("const ANIM_CAP = {", "const ANIM_CAP = {\n " + ROBOT_EXTRA_CAP_JS + "\n", 1) return t.replace("__DATA_JSON__", json.dumps(build_data())) def main(): data = build_data() html = render() out = "robot_landscape.html" with open(out, "w", encoding="utf-8") as fh: fh.write(html) print(f"wrote {out} ({len(html):,} bytes)") print(f" families={len(data['families'])} paradigms={len(data['paradigms'])} edges={len(data['edges'])}") if __name__ == "__main__": main()