"""Generate a World Model landscape page (current as of mid-2026). Reuses the robot-landscape engine (gen_landscape.TEMPLATE: constellation map, Magic-Move detail panels, typed edges, MathJax) but injects a world-model specific set of animations and data. Self-contained HTML. Built from a 5-agent research sweep (June 2026): latent/control WMs, video-gen & interactive WMs, JEPA latent prediction, embodied/driving WMs, surveys. Families are organized equation-first (by training/inference mechanism). Run: .venv_robot_paradigms/bin/python gen_worldmodel.py -> robot_worldmodel.html """ import json import gen_landscape import my_papers # --------------------------------------------------------------------------- # Families (11) — organized by the training / inference equation, not fame. # --------------------------------------------------------------------------- FAMILIES = [ ("Latent", "Latent Imagination (RSSM)", "#2563eb"), ("Value", "Value-Equivalent Planning", "#0891b2"), ("Token", "Autoregressive Token WMs", "#7c3aed"), ("JEPA", "JEPA / Latent Prediction", "#16a34a"), ("VideoDiff", "Video Diffusion Simulators","#db2777"), ("Interactive", "Interactive / Playable", "#ea580c"), ("Spatial3D", "Persistent 3D Worlds", "#ca8a04"), ("Driving", "Driving World Models", "#e11d48"), ("RobotWM", "Robot WM Infrastructure", "#0ea5e9"), ("WMVLA", "World Models in VLAs", "#65a30d"), ("Position", "Positions & Debates", "#64748b"), ] # id, name, short, family, anim, tagline, simple, mapping, math, when, pros, cons, papers, (learn title,url) P = [ # ---------------- Latent Imagination ---------------- dict(id="world-models", name="World Models (Ha & Schmidhuber)", short="World Models", family="Latent", anim="latentroll", tagline="The proof-of-concept: compress with a VAE, dream with an RNN, train a controller inside the dream.", simple=("Learn to drive a racing game by first building a 'mental movie player' of it: a VAE squeezes each frame " "into a small code, an RNN predicts the next code, and a tiny controller is trained entirely inside this " "hallucinated game — then transferred back to the real one. The 2018 paper that named the genre."), mapping="frames → latent z (VAE) → next-z prediction (RNN) → controller", math=r"z_t\sim q_{\text{VAE}}(z_t\mid o_t),\quad P(z_{t+1}\mid a_t,z_t,h_t)\;\text{(MDN-RNN)}", when="As the conceptual starting point: what 'training inside a learned simulator' means.", pros=["First clean train-inside-the-dream demo", "Simple modular V / M / C pipeline", "Beautiful interactive paper"], cons=["Not end-to-end; latents not shaped for control", "Dreams drift and get exploited", "Only simple environments"], papers=["World Models (Ha & Schmidhuber 2018)"], learn=("World Models — interactive paper", "https://worldmodels.github.io/")), dict(id="planet", name="PlaNet (RSSM)", short="PlaNet (RSSM)", family="Latent", anim="latentroll", tagline="The RSSM backbone: deterministic + stochastic latent dynamics, planned with CEM purely in latent space.", simple=("PlaNet learns a latent 'physics engine' from pixels where each state has a reliable deterministic memory " "plus a stochastic guess for what's uncertain. To act it learns no policy at all — it imagines thousands of " "action sequences in latent space and picks the best, like playing out chess lines in your head."), mapping="pixels → RSSM state (h_t, s_t) → CEM over imagined rollouts → action", math=r"\mathcal{L}=\sum_t\mathbb{E}\big[\ln p(o_t\mid s_t)\big]-\mathbb{E}\,\mathrm{KL}\big[q(s_t\mid s_{t-1},a_{t-1},o_t)\,\|\,p(s_t\mid s_{t-1},a_{t-1})\big]", when="Online planning from pixels with high sample efficiency; the origin of every Dreamer.", pros=["~50x more sample-efficient than model-free", "RSSM became the standard latent dynamics", "No policy network at all"], cons=["CEM at every step is slow", "Struggles with long horizons / sparse rewards", "Reconstruction wastes capacity on irrelevant pixels"], papers=["PlaNet (Hafner et al. 2019)"], learn=("Introducing PlaNet — Google Research blog", "https://research.google/blog/introducing-planet-a-deep-planning-network-for-reinforcement-learning/")), dict(id="dreamer", name="Dreamer v1–v3", short="Dreamer v1–3", family="Latent", anim="dream", tagline="Actor-critic trained by backpropagating value gradients through imagined latent rollouts.", simple=("Dreamer keeps PlaNet's latent physics engine but trains a policy and value function inside the dream — " "backpropagating 'how to get more reward' through imagined futures. V2 added discrete latents (human-level " "Atari); V3's symlog tricks made one hyperparameter set master 150+ tasks and mine Minecraft diamonds from scratch."), mapping="pixels → RSSM latent → imagined rollouts → actor-critic update", math=r"\max_\phi\;\mathbb{E}_{q_\phi}\Big[\textstyle\sum_{\tau=t}^{t+H}V_\lambda(s_\tau)\Big]\;\;(\text{gradients through the model};\;\mathrm{symlog}(x)=\mathrm{sign}(x)\ln(|x|{+}1))", when="The default sample-efficient RL agent on a new domain with no tuning budget.", pros=["One config across 150+ tasks (Nature 2025)", "First Minecraft diamonds from scratch", "Fast amortized inference"], cons=["Recurrent RSSM trains sequentially (poor GPU use)", "Reconstruction limits rich real-world video", "Policy can exploit model errors"], papers=["Dreamer (Hafner 2020)", "DreamerV2 (2021)", "DreamerV3 (2023; Nature 2025)"], learn=("DreamerV3 — project page", "https://danijar.com/project/dreamerv3/")), dict(id="daydreamer", name="DayDreamer", short="DayDreamer", family="Latent", anim="dream", tagline="Dreamer on physical robots: a quadruped learns to walk in 1 hour, no simulator.", simple=("If imagination training is so sample-efficient, can a real robot afford it? Yes — by dreaming thousands of " "practice runs for every real step, an A1 quadruped learned to walk from scratch in about an hour and " "adapted online to being pushed. The existence proof that latent world models work outside simulation."), mapping="real robot sensors → Dreamer world model → imagined practice → motor commands", math=r"\max_\phi\,\mathbb{E}_q\Big[\textstyle\sum_\tau V_\lambda(s_\tau)\Big]\;\;\text{trained from real-world replay only}", when="Real-robot RL without a simulator, when every minute of hardware time is precious.", pros=["Walking in ~1 hour wall-clock on hardware", "Shown on 4 different robots", "No simulator, no demos"], cons=["Short-horizon, simple tasks", "Exploration safety not addressed", "Inherits reconstruction limits in clutter"], papers=["DayDreamer (Wu, Escontrela, Hafner, Abbeel, Goldberg 2022)"], learn=("DayDreamer — project page", "https://danijar.com/project/daydreamer/")), dict(id="dreamer4", name="Dreamer 4 (Shortcut Forcing)", short="Dreamer 4", family="Latent", anim="shortcutf", tagline="Scalable transformer WM, real-time on one GPU; Minecraft diamonds from purely offline data.", simple=("Dreamer 4 replaces the small recurrent dream with a video-scale transformer that stays fast enough to " "interact with live. Its 'shortcut forcing' objective teaches one big denoising step to match the result of " "two half-steps, so a dream frame needs ~4 network calls instead of 64. The agent practices inside this " "dream from logged videos only — and still mines diamonds."), mapping="offline video (mostly action-free) → shortcut-forcing transformer WM → imagination RL → policy", math=r"f_\theta(z_t,d)\;\approx\;\mathrm{sg}\big[f_\theta\big(f_\theta(z_t,\tfrac{d}{2}),\tfrac{d}{2}\big)\big]\;\;\text{(shortcut forcing)}", when="Offline-to-control at scale: rich visual domains where real interaction is expensive or impossible.", pros=["First diamonds purely offline; ~100x less data than VPT", "~16x faster generation → real-time on 1 GPU", "Only ~100h of action labels needed"], cons=["Absolute success rate still low (~0.7%)", "No online correction of model bias", "Compute-heavy pretraining"], papers=["Training Agents Inside of Scalable World Models (Hafner, Yan & Lillicrap 2025)"], learn=("Dreamer 4 — project page", "https://danijar.com/project/dreamer4/")), # ---------------- Value-Equivalent Planning ---------------- dict(id="muzero", name="MuZero", short="MuZero", family="Value", anim="mcts", tagline="AlphaZero without the rulebook: latents trained only to predict reward, value, policy — searched with MCTS.", simple=("MuZero plays Go, chess, and Atari at superhuman level without being told the rules. Its learned model never " "simulates boards or pixels — its abstract state only answers the three questions search cares about: what's " "the reward, who's winning, what looks promising? A world model only needs to be accurate about decisions."), mapping="observations → abstract latent → MCTS over (reward, value, policy) heads → action", math=r"\mathcal{L}=\sum_{k=0}^{K}\Big[\ell^r(u_{t+k},\hat r_t^k)+\ell^v(z_{t+k},\hat v_t^k)+\ell^p(\pi_{t+k},\hat p_t^k)\Big]", when="Discrete-action domains with strategic depth where tree search pays off.", pros=["Superhuman with no rules given", "Value-equivalent: accurate about decisions, not pixels", "Deployed in real products (video compression)"], cons=["Extremely sample- and compute-hungry", "Continuous control needs nontrivial extensions", "Latents uninterpretable and task-locked"], papers=["MuZero (Schrittwieser et al. 2020)"], learn=("MuZero — DeepMind blog", "https://deepmind.google/discover/blog/muzero-mastering-go-chess-shogi-and-atari-without-rules/")), dict(id="efficientzero", name="EfficientZero (V1/V2)", short="EfficientZero", family="Value", anim="mcts", tagline="MuZero made sample-efficient: a SimSiam consistency loss → superhuman Atari in 2 hours of play.", simple=("MuZero needed billions of frames; EfficientZero gets superhuman Atari scores from two hours of gameplay. " "Rewards alone are too weak a signal from so little data, so it adds a self-supervised loss forcing the " "model's imagined next state to match the encoder's actual next state. V2 extended the recipe to continuous control."), mapping="limited replay → latent state → MCTS, + next-state consistency as extra signal", math=r"\mathcal{L}_{\text{consist}}=-\,\mathrm{sim}\big(P(g_\theta(z_t,a_t)),\;\mathrm{sg}[h_\theta(o_{t+1})]\big)", when="When environment samples are the bottleneck — the sample-efficiency record-holder lineage.", pros=["First superhuman Atari-100k (~2h experience)", "Consistency loss is simple and transferable", "V2 spans discrete/continuous"], cons=["Heavy machinery (MCTS + reanalysis)", "High wall-clock compute per step", "Less plug-and-play than Dreamer/TD-MPC"], papers=["EfficientZero (Ye et al. 2021)", "EfficientZero V2 (Wang et al. 2024)"], learn=("EfficientZero: How It Works — illustrated", "https://www.lesswrong.com/posts/mRwJce3npmzbKfxws/efficientzero-how-it-works")), dict(id="tdmpc", name="TD-MPC / TD-MPC2", short="TD-MPC2", family="Value", anim="plan", tagline="Decoder-free latents shaped by TD-learning; short-horizon MPPI search + a learned terminal value.", simple=("TD-MPC never reconstructs pixels — its latent space only has to predict rewards and values. At decision " "time it searches a few steps ahead in latent space and lets a value function judge everything beyond, like " "a chess player calculating 5 moves deep then trusting intuition. TD-MPC2: one config, one 317M agent, 80+ tasks."), mapping="observation → task-oriented latent → MPPI + terminal value Q → action", math=r"a^*=\arg\max_{a_{t:t+H}}\mathbb{E}\Big[\sum_{h=0}^{H-1}\gamma^h R_\theta(z_h,a_h)+\gamma^H Q_\theta(z_H,a_H)\Big]", when="The strong default for continuous-control benchmarks and multi-task robot learning.", pros=["104 tasks, one hyperparameter set", "SOTA on hard locomotion/manipulation", "Open code; the practical workhorse"], cons=["Can't generate observations or transfer without rewards", "Planning overhead every control step", "Novel-embodiment transfer limited"], papers=["TD-MPC (Hansen, Wang, Su 2022)", "TD-MPC2 (2024)"], learn=("TD-MPC2 — project page", "https://www.tdmpc2.com/")), # ---------------- Autoregressive Token WMs ---------------- dict(id="iris", name="IRIS (Transformer WM)", short="IRIS", family="Token", anim="tokenfilm", tagline="The world model as a language model: VQ frame tokens + a GPT that predicts the next frame's tokens.", simple=("IRIS treats experience like text: a VQ-VAE turns each frame into discrete tokens, and a GPT-like " "transformer predicts the next frame's tokens given past tokens and actions. The agent learns inside this " "'GPT of the game' — 2 hours of Atari experience beat humans on many games, kicking off the transformer-WM wave."), mapping="frames → VQ tokens → next-token transformer → imagined rollouts → policy", math=r"\max_\theta\;\sum_t\log p_\theta\big(z_{t+1}\mid z_{\le t},a_{\le t}\big)\quad(z=\text{discrete frame tokens})", when="Sample-limited visual RL; importing the LLM toolbox (tokenizers, scaling) into world modeling.", pros=["Sequence-modeling machinery transfers directly", "Interpretable token rollouts you can watch", "Influential (STORM, TWM, Genie lineage)"], cons=["Many tokens per frame → slow imagination", "VQ loses fine visual detail", "Limited long-horizon memory"], papers=["IRIS (Micheli, Alonso, Fleuret 2023)", "TWM (Robine 2023)", "STORM (Zhang 2023)"], learn=("IRIS — official repo with rollout videos", "https://github.com/eloialonso/iris")), dict(id="gaia1", name="GAIA-1 (Driving Tokens)", short="GAIA-1", family="Token", anim="tokenfilm", tagline="9B-param driving world model: next-token prediction over video tokens, conditioned on action and text.", simple=("GAIA-1 treats driving video like language: chop frames into discrete tokens and predict the next one, the " "way GPT predicts the next word. Tell it 'it starts raining' or feed a steering command and it dreams a " "coherent continuation of the drive. The proof that world models scale like LLMs."), mapping="past driving video + ego actions + text → future driving video", math=r"\mathcal{L}=-\sum_t\log p_\theta\big(z_t\mid z_{