"""Generate a World Model landscape page (current as of mid-2026).
Reuses the robot-landscape engine (gen_landscape.TEMPLATE: constellation map,
Magic-Move detail panels, typed edges, MathJax) but injects a world-model
specific set of animations and data. Self-contained HTML.
Built from a 5-agent research sweep (June 2026): latent/control WMs, video-gen
& interactive WMs, JEPA latent prediction, embodied/driving WMs, surveys.
Families are organized equation-first (by training/inference mechanism).
Run: .venv_robot_paradigms/bin/python gen_worldmodel.py -> robot_worldmodel.html
"""
import json
import gen_landscape
import my_papers
# ---------------------------------------------------------------------------
# Families (11) — organized by the training / inference equation, not fame.
# ---------------------------------------------------------------------------
FAMILIES = [
("Latent", "Latent Imagination (RSSM)", "#2563eb"),
("Value", "Value-Equivalent Planning", "#0891b2"),
("Token", "Autoregressive Token WMs", "#7c3aed"),
("JEPA", "JEPA / Latent Prediction", "#16a34a"),
("VideoDiff", "Video Diffusion Simulators","#db2777"),
("Interactive", "Interactive / Playable", "#ea580c"),
("Spatial3D", "Persistent 3D Worlds", "#ca8a04"),
("Driving", "Driving World Models", "#e11d48"),
("RobotWM", "Robot WM Infrastructure", "#0ea5e9"),
("WMVLA", "World Models in VLAs", "#65a30d"),
("Position", "Positions & Debates", "#64748b"),
]
# id, name, short, family, anim, tagline, simple, mapping, math, when, pros, cons, papers, (learn title,url)
P = [
# ---------------- Latent Imagination ----------------
dict(id="world-models", name="World Models (Ha & Schmidhuber)", short="World Models", family="Latent", anim="latentroll",
tagline="The proof-of-concept: compress with a VAE, dream with an RNN, train a controller inside the dream.",
simple=("Learn to drive a racing game by first building a 'mental movie player' of it: a VAE squeezes each frame "
"into a small code, an RNN predicts the next code, and a tiny controller is trained entirely inside this "
"hallucinated game — then transferred back to the real one. The 2018 paper that named the genre."),
mapping="frames → latent z (VAE) → next-z prediction (RNN) → controller",
math=r"z_t\sim q_{\text{VAE}}(z_t\mid o_t),\quad P(z_{t+1}\mid a_t,z_t,h_t)\;\text{(MDN-RNN)}",
when="As the conceptual starting point: what 'training inside a learned simulator' means.",
pros=["First clean train-inside-the-dream demo", "Simple modular V / M / C pipeline", "Beautiful interactive paper"],
cons=["Not end-to-end; latents not shaped for control", "Dreams drift and get exploited", "Only simple environments"],
papers=["World Models (Ha & Schmidhuber 2018)"],
learn=("World Models — interactive paper", "https://worldmodels.github.io/")),
dict(id="planet", name="PlaNet (RSSM)", short="PlaNet (RSSM)", family="Latent", anim="latentroll",
tagline="The RSSM backbone: deterministic + stochastic latent dynamics, planned with CEM purely in latent space.",
simple=("PlaNet learns a latent 'physics engine' from pixels where each state has a reliable deterministic memory "
"plus a stochastic guess for what's uncertain. To act it learns no policy at all — it imagines thousands of "
"action sequences in latent space and picks the best, like playing out chess lines in your head."),
mapping="pixels → RSSM state (h_t, s_t) → CEM over imagined rollouts → action",
math=r"\mathcal{L}=\sum_t\mathbb{E}\big[\ln p(o_t\mid s_t)\big]-\mathbb{E}\,\mathrm{KL}\big[q(s_t\mid s_{t-1},a_{t-1},o_t)\,\|\,p(s_t\mid s_{t-1},a_{t-1})\big]",
when="Online planning from pixels with high sample efficiency; the origin of every Dreamer.",
pros=["~50x more sample-efficient than model-free", "RSSM became the standard latent dynamics", "No policy network at all"],
cons=["CEM at every step is slow", "Struggles with long horizons / sparse rewards", "Reconstruction wastes capacity on irrelevant pixels"],
papers=["PlaNet (Hafner et al. 2019)"],
learn=("Introducing PlaNet — Google Research blog", "https://research.google/blog/introducing-planet-a-deep-planning-network-for-reinforcement-learning/")),
dict(id="dreamer", name="Dreamer v1–v3", short="Dreamer v1–3", family="Latent", anim="dream",
tagline="Actor-critic trained by backpropagating value gradients through imagined latent rollouts.",
simple=("Dreamer keeps PlaNet's latent physics engine but trains a policy and value function inside the dream — "
"backpropagating 'how to get more reward' through imagined futures. V2 added discrete latents (human-level "
"Atari); V3's symlog tricks made one hyperparameter set master 150+ tasks and mine Minecraft diamonds from scratch."),
mapping="pixels → RSSM latent → imagined rollouts → actor-critic update",
math=r"\max_\phi\;\mathbb{E}_{q_\phi}\Big[\textstyle\sum_{\tau=t}^{t+H}V_\lambda(s_\tau)\Big]\;\;(\text{gradients through the model};\;\mathrm{symlog}(x)=\mathrm{sign}(x)\ln(|x|{+}1))",
when="The default sample-efficient RL agent on a new domain with no tuning budget.",
pros=["One config across 150+ tasks (Nature 2025)", "First Minecraft diamonds from scratch", "Fast amortized inference"],
cons=["Recurrent RSSM trains sequentially (poor GPU use)", "Reconstruction limits rich real-world video", "Policy can exploit model errors"],
papers=["Dreamer (Hafner 2020)", "DreamerV2 (2021)", "DreamerV3 (2023; Nature 2025)"],
learn=("DreamerV3 — project page", "https://danijar.com/project/dreamerv3/")),
dict(id="daydreamer", name="DayDreamer", short="DayDreamer", family="Latent", anim="dream",
tagline="Dreamer on physical robots: a quadruped learns to walk in 1 hour, no simulator.",
simple=("If imagination training is so sample-efficient, can a real robot afford it? Yes — by dreaming thousands of "
"practice runs for every real step, an A1 quadruped learned to walk from scratch in about an hour and "
"adapted online to being pushed. The existence proof that latent world models work outside simulation."),
mapping="real robot sensors → Dreamer world model → imagined practice → motor commands",
math=r"\max_\phi\,\mathbb{E}_q\Big[\textstyle\sum_\tau V_\lambda(s_\tau)\Big]\;\;\text{trained from real-world replay only}",
when="Real-robot RL without a simulator, when every minute of hardware time is precious.",
pros=["Walking in ~1 hour wall-clock on hardware", "Shown on 4 different robots", "No simulator, no demos"],
cons=["Short-horizon, simple tasks", "Exploration safety not addressed", "Inherits reconstruction limits in clutter"],
papers=["DayDreamer (Wu, Escontrela, Hafner, Abbeel, Goldberg 2022)"],
learn=("DayDreamer — project page", "https://danijar.com/project/daydreamer/")),
dict(id="dreamer4", name="Dreamer 4 (Shortcut Forcing)", short="Dreamer 4", family="Latent", anim="shortcutf",
tagline="Scalable transformer WM, real-time on one GPU; Minecraft diamonds from purely offline data.",
simple=("Dreamer 4 replaces the small recurrent dream with a video-scale transformer that stays fast enough to "
"interact with live. Its 'shortcut forcing' objective teaches one big denoising step to match the result of "
"two half-steps, so a dream frame needs ~4 network calls instead of 64. The agent practices inside this "
"dream from logged videos only — and still mines diamonds."),
mapping="offline video (mostly action-free) → shortcut-forcing transformer WM → imagination RL → policy",
math=r"f_\theta(z_t,d)\;\approx\;\mathrm{sg}\big[f_\theta\big(f_\theta(z_t,\tfrac{d}{2}),\tfrac{d}{2}\big)\big]\;\;\text{(shortcut forcing)}",
when="Offline-to-control at scale: rich visual domains where real interaction is expensive or impossible.",
pros=["First diamonds purely offline; ~100x less data than VPT", "~16x faster generation → real-time on 1 GPU", "Only ~100h of action labels needed"],
cons=["Absolute success rate still low (~0.7%)", "No online correction of model bias", "Compute-heavy pretraining"],
papers=["Training Agents Inside of Scalable World Models (Hafner, Yan & Lillicrap 2025)"],
learn=("Dreamer 4 — project page", "https://danijar.com/project/dreamer4/")),
# ---------------- Value-Equivalent Planning ----------------
dict(id="muzero", name="MuZero", short="MuZero", family="Value", anim="mcts",
tagline="AlphaZero without the rulebook: latents trained only to predict reward, value, policy — searched with MCTS.",
simple=("MuZero plays Go, chess, and Atari at superhuman level without being told the rules. Its learned model never "
"simulates boards or pixels — its abstract state only answers the three questions search cares about: what's "
"the reward, who's winning, what looks promising? A world model only needs to be accurate about decisions."),
mapping="observations → abstract latent → MCTS over (reward, value, policy) heads → action",
math=r"\mathcal{L}=\sum_{k=0}^{K}\Big[\ell^r(u_{t+k},\hat r_t^k)+\ell^v(z_{t+k},\hat v_t^k)+\ell^p(\pi_{t+k},\hat p_t^k)\Big]",
when="Discrete-action domains with strategic depth where tree search pays off.",
pros=["Superhuman with no rules given", "Value-equivalent: accurate about decisions, not pixels", "Deployed in real products (video compression)"],
cons=["Extremely sample- and compute-hungry", "Continuous control needs nontrivial extensions", "Latents uninterpretable and task-locked"],
papers=["MuZero (Schrittwieser et al. 2020)"],
learn=("MuZero — DeepMind blog", "https://deepmind.google/discover/blog/muzero-mastering-go-chess-shogi-and-atari-without-rules/")),
dict(id="efficientzero", name="EfficientZero (V1/V2)", short="EfficientZero", family="Value", anim="mcts",
tagline="MuZero made sample-efficient: a SimSiam consistency loss → superhuman Atari in 2 hours of play.",
simple=("MuZero needed billions of frames; EfficientZero gets superhuman Atari scores from two hours of gameplay. "
"Rewards alone are too weak a signal from so little data, so it adds a self-supervised loss forcing the "
"model's imagined next state to match the encoder's actual next state. V2 extended the recipe to continuous control."),
mapping="limited replay → latent state → MCTS, + next-state consistency as extra signal",
math=r"\mathcal{L}_{\text{consist}}=-\,\mathrm{sim}\big(P(g_\theta(z_t,a_t)),\;\mathrm{sg}[h_\theta(o_{t+1})]\big)",
when="When environment samples are the bottleneck — the sample-efficiency record-holder lineage.",
pros=["First superhuman Atari-100k (~2h experience)", "Consistency loss is simple and transferable", "V2 spans discrete/continuous"],
cons=["Heavy machinery (MCTS + reanalysis)", "High wall-clock compute per step", "Less plug-and-play than Dreamer/TD-MPC"],
papers=["EfficientZero (Ye et al. 2021)", "EfficientZero V2 (Wang et al. 2024)"],
learn=("EfficientZero: How It Works — illustrated", "https://www.lesswrong.com/posts/mRwJce3npmzbKfxws/efficientzero-how-it-works")),
dict(id="tdmpc", name="TD-MPC / TD-MPC2", short="TD-MPC2", family="Value", anim="plan",
tagline="Decoder-free latents shaped by TD-learning; short-horizon MPPI search + a learned terminal value.",
simple=("TD-MPC never reconstructs pixels — its latent space only has to predict rewards and values. At decision "
"time it searches a few steps ahead in latent space and lets a value function judge everything beyond, like "
"a chess player calculating 5 moves deep then trusting intuition. TD-MPC2: one config, one 317M agent, 80+ tasks."),
mapping="observation → task-oriented latent → MPPI + terminal value Q → action",
math=r"a^*=\arg\max_{a_{t:t+H}}\mathbb{E}\Big[\sum_{h=0}^{H-1}\gamma^h R_\theta(z_h,a_h)+\gamma^H Q_\theta(z_H,a_H)\Big]",
when="The strong default for continuous-control benchmarks and multi-task robot learning.",
pros=["104 tasks, one hyperparameter set", "SOTA on hard locomotion/manipulation", "Open code; the practical workhorse"],
cons=["Can't generate observations or transfer without rewards", "Planning overhead every control step", "Novel-embodiment transfer limited"],
papers=["TD-MPC (Hansen, Wang, Su 2022)", "TD-MPC2 (2024)"],
learn=("TD-MPC2 — project page", "https://www.tdmpc2.com/")),
# ---------------- Autoregressive Token WMs ----------------
dict(id="iris", name="IRIS (Transformer WM)", short="IRIS", family="Token", anim="tokenfilm",
tagline="The world model as a language model: VQ frame tokens + a GPT that predicts the next frame's tokens.",
simple=("IRIS treats experience like text: a VQ-VAE turns each frame into discrete tokens, and a GPT-like "
"transformer predicts the next frame's tokens given past tokens and actions. The agent learns inside this "
"'GPT of the game' — 2 hours of Atari experience beat humans on many games, kicking off the transformer-WM wave."),
mapping="frames → VQ tokens → next-token transformer → imagined rollouts → policy",
math=r"\max_\theta\;\sum_t\log p_\theta\big(z_{t+1}\mid z_{\le t},a_{\le t}\big)\quad(z=\text{discrete frame tokens})",
when="Sample-limited visual RL; importing the LLM toolbox (tokenizers, scaling) into world modeling.",
pros=["Sequence-modeling machinery transfers directly", "Interpretable token rollouts you can watch", "Influential (STORM, TWM, Genie lineage)"],
cons=["Many tokens per frame → slow imagination", "VQ loses fine visual detail", "Limited long-horizon memory"],
papers=["IRIS (Micheli, Alonso, Fleuret 2023)", "TWM (Robine 2023)", "STORM (Zhang 2023)"],
learn=("IRIS — official repo with rollout videos", "https://github.com/eloialonso/iris")),
dict(id="gaia1", name="GAIA-1 (Driving Tokens)", short="GAIA-1", family="Token", anim="tokenfilm",
tagline="9B-param driving world model: next-token prediction over video tokens, conditioned on action and text.",
simple=("GAIA-1 treats driving video like language: chop frames into discrete tokens and predict the next one, the "
"way GPT predicts the next word. Tell it 'it starts raining' or feed a steering command and it dreams a "
"coherent continuation of the drive. The proof that world models scale like LLMs."),
mapping="past driving video + ego actions + text → future driving video",
math=r"\mathcal{L}=-\sum_t\log p_\theta\big(z_t\mid z_{