"""Robot Learning Paradigms — a Gradio explorer.

A layered ontology for robot learning.  The app separates control substrate,
learning objective, predictive/world model, architecture, data regime, and
deployment role so modern foundation-model robotics is not forced into one
flat algorithm list.

Run:
    python robot_paradigms_app.py
"""

from __future__ import annotations

import html as html_lib

import gradio as gr
import pandas as pd


# ---------------------------------------------------------------------------
# FAMILY METADATA  (color chip + short description)
# ---------------------------------------------------------------------------
FAMILY = {
    "BC":                 ("#2563eb", "Behavioral Cloning — match the expert's action distribution: min_θ D[π_θ ‖ π_D].  The leaves are different action-heads (objectives) for the same parent."),
    "Reinforcement":      ("#ea580c", "Maximize expected return by trial and error."),
    "Offline RL":         ("#dc2626", "RL on a fixed log + a behavior-support constraint."),
    "Inverse RL":         ("#9333ea", "Recover the reward (or a discriminator) from demos."),
    "Model-Based":        ("#7c3aed", "Learn a forward model; plan or train inside it."),
    "Sequence":           ("#ca8a04", "Cast control as generation over actions or trajectories."),
    "Goal-Cond.":         ("#16a34a", "Condition π on a goal; relabel failures by what was reached."),
    "Hierarchical":       ("#92400e", "Two-level: high-level picks subgoals/skills, low-level executes."),
    "Meta-Learning":      ("#db2777", "Train across tasks so adaptation needs only a few steps / episodes."),
    "LLM-Orchestration":  ("#0891b2", "LLM/VLM composes skills or emits constraints — no policy gradient."),
}


# ---------------------------------------------------------------------------
# FAMILY-LEVEL OBJECTIVES  (shown above every leaf in that family)
# ---------------------------------------------------------------------------
FAMILY_EQUATIONS = {
    "BC":
        r"\min_\theta\;\; D\!\left[\,\pi_\theta(a\mid o)\,\Big\|\,\pi_{\mathcal{D}}(a\mid o)\,\right]"
        r"\quad\text{(the divergence depends on the action-head — sub-tree below)}",
    "Reinforcement":
        r"\max_\pi\;\;\mathbb{E}_{\tau\sim\pi}\!\left[\,\sum_{t=0}^{\infty}\gamma^{t}\,r(s_t,a_t)\,\right]",
    "Offline RL":
        r"\max_\pi\;\;\mathbb{E}_{s\sim\mathcal{D}}\!\left[\,Q(s,\pi(s))\,\right]"
        r"\quad\text{s.t.}\quad \pi(\cdot\mid s)\approx \pi_{\mathcal{D}}(\cdot\mid s)",
    "Inverse RL":
        r"\text{recover }\;r\;\text{ (or discriminator }D\text{)}\;\text{such that }\;"
        r"\arg\max_\pi \mathbb{E}_\pi\!\left[\sum\gamma^t r\right]\;\text{matches the expert.}",
    "Model-Based":
        r"\max_{a_{t:t+H}}\sum_{k=0}^{H}\gamma^{k}\,\hat r(\hat s_{t+k},a_{t+k})\;,"
        r"\;\;\hat s_{t+1}=\hat f_\phi(\hat s_t,a_t)\;\;\text{(plan, or train }\pi\text{ inside)}",
    "Sequence":
        r"\text{Train a generative model over actions or trajectories; \textit{sample} at inference.}\;\;"
        r"p_\theta(a_t\mid\text{context})\;\;\text{or}\;\;p_\theta(\tau)",
    "Goal-Cond.":
        r"\pi(a\mid s, g)\;\;\;\text{HER relabel:}\;\;(s_t,a_t,s_{t+1},g)\;\to\;(s_t,a_t,s_{t+1},\,g'\!=\!s_T)",
    "Hierarchical":
        r"\pi_{\text{hi}}(z_k\mid s_{kT})\;\cdot\;\pi_{\text{lo}}(a_t\mid s_t, z_k)\;,"
        r"\;\;\text{options terminate per }\beta(s)",
    "Meta-Learning":
        r"\min_\theta\;\;\mathbb{E}_{T\sim p(T)}\!\left[\,\mathcal{L}_T\!\big(\theta - \alpha\,\nabla_\theta \mathcal{L}_T(\theta)\big)\,\right]"
        r"\quad\text{(MAML form)}",
    "LLM-Orchestration":
        r"\text{plan}=\mathrm{LLM/VLM}(\text{instr},\,\text{scene})\;\;\Rightarrow\;\;"
        r"\text{execute via pretrained skills or }\arg\min_\tau\int \mathcal{C}_\text{VLM}(\tau(t))\,dt"
        r"\quad(\text{no policy gradient})",
}


# ---------------------------------------------------------------------------
# FAMILY RELATIONS  — techniques that share the parent equation but aren't
# their own leaf.  Rendered as a "Same objective, different wrapper" block.
# ---------------------------------------------------------------------------
FAMILY_RELATIONS = {
    "BC": [
        ("VLA Foundation Models",
         r"\pi_\theta(a\mid o,\ell)\;\;\text{with}\;\;\theta\;\text{init from a VLM};\;"
         r"\text{action-head}\,\in\,\{\text{Flow}, \text{Diffusion}, \text{Tokenized}\}",
         "<strong>Not a new objective.</strong> The loss is one of the four heads in this sub-tree; "
         "what makes a model a VLA is (i) the trunk is initialised from a pretrained Vision-Language "
         "Model and (ii) fine-tuning runs on multi-embodiment / Open-X-Embodiment / DROID-scale data. "
         "<em>Tokenized BC</em>: RT-1, RT-2, RT-H, RT-X, OpenVLA, π₀-FAST, Gato, HPT, Gemini Robotics, "
         "Helix. <em>Flow Matching</em>: π₀ / π₀.5 / π₀.6 / OpenPI. <em>Diffusion</em>: RDT-1B, "
         "Octo, GR00T-N1."),
        ("DAgger (Dataset Aggregation)",
         r"\mathcal{D}_{i+1} = \mathcal{D}_i \cup \{(s,\pi^*(s)) : s\sim d^{\pi_i}\}",
         "Same BC loss — only the <em>state distribution</em> changes.  Visit states with the current "
         "learner, label them with an online expert, retrain.  Cures BC's compounding error."),
        ("Action Chunking (ACT / ALOHA)",
         r"\pi_\theta(a_{t:t+H}\mid o_t)\;\;\text{+ temporal ensemble}\;\;"
         r"a^{\text{exec}}_t = \tfrac{1}{|H|}\!\!\sum_{k\,:\,t\in\text{chunk}_k}\!\!a_t^{(k)}",
         "Same loss family; the <em>action target</em> is an H-step chunk, decoded jointly, then "
         "ensembled across overlapping chunks at execution.  Backbone of ALOHA bimanual teleop."),
        ("MSE-BC (classic supervised regression)",
         r"\mathcal{L} = \mathbb{E}_{(o,a)\sim\mathcal{D}}\!\left[\,\|a - \mu_\theta(o)\|^2\,\right]",
         "<strong>Degenerate single-mode case</strong> of a Gaussian flow / diffusion head — collapses "
         "multi-modal demos to the mean.  This is the failure mode that motivated every leaf above."),
        ("Visual SSL Pretraining (R3M, MVP, VIP, Voltron)",
         r"\phi^* = \arg\min_\phi\,\mathcal{L}_{\text{SSL}}(\phi;\,\text{web video})\;\;\Rightarrow\;\;\pi_\psi(a\mid \phi(o))",
         "<strong>Orthogonal pretraining step</strong>, not a policy objective.  Pretrain an encoder φ "
         "with contrastive / masked / value-based losses on Ego4D or web video, freeze it, then run "
         "any head above on φ(o)."),
    ],
    "Reinforcement": [
        ("Domain Randomization / Sim-to-Real",
         r"\max_\pi\;\;\mathbb{E}_{\xi\sim p(\xi)}\,\mathbb{E}_{\tau\sim\pi,\,\mathrm{sim}_\xi}\!\left[\,\sum_t \gamma^t r_t\,\right]",
         "<strong>Same RL gradient inside an outer expectation</strong> over physics / visual params ξ. "
         "Not a new objective; it's a training-time recipe that makes the policy robust enough to "
         "transfer to the real robot. Backbone of every modern legged-locomotion deployment "
         "(ANYmal, RMA, Extreme Parkour, OpenAI Dactyl)."),
    ],
    "Sequence": [
        ("Decision Transformer as an architecture receptacle",
         r"\text{tokens}=[\hat R_t,s_t,a_{t-1},\ldots]\;\xrightarrow{\text{causal Transformer}}\;p_\theta(a_t\mid\text{history},\hat R_t)",
         "Decision Transformer is not a new robot objective in the same sense as PPO or SAC.  It is a "
         "sequence-modeling architecture that can hold several objectives: supervised BC-style next-action "
         "prediction, offline RL through return-to-go conditioning, goal-conditioned control when the return "
         "token is replaced by a goal token, or long-horizon planning when the output is a trajectory.  In the "
         "tree it lives under Sequence because the core move is to turn a control problem into token prediction, "
         "but it overlaps with Offline RL and BC."),
        ("Long-range sequence control",
         r"p_\theta(a_{1:T}\mid c)=\prod_t p_\theta(a_t\mid a_{<t},s_{\le t},c)\quad c\in\{\hat R,g,\ell,\text{reward guide}\}",
         "Long-range sequence models are containers for conditioning choices.  The same Transformer or diffusion "
         "backbone can be conditioned on return, language, goal, reward, or a world-model rollout.  Therefore the "
         "important distinction is not just 'Decision Transformer vs Diffuser', but: what is the context token, "
         "what distribution is modeled, and whether inference is direct decoding, sampling, or planning."),
    ],
    "Model-Based": [
        ("World Action Models vs Action-Conditioned World Models",
         r"\text{WAM: }(o_t,\ell)\to(\hat o_{t+1:T},\hat a_{t:T})\quad\;\;\;\text{AC-WM: }(o_t,a_{t:T})\to\hat o_{t+1:T}",
         "Two world-model recipes are emerging for robotics.  A World Action Model (WAM) is text/image "
         "conditioned and generates a successful-looking robot video plus actions; examples include "
         "DreamZero, mimic-video, VideoPolicy, UVA, and Large Video Planner.  An action-conditioned "
         "world model (AC-WM) takes candidate future actions as input and predicts their consequences; "
         "examples include Dreamer-style agents, Veo-Robotics, Ctrl-World, DreamDojo, PlayWorld, "
         "World-Gymnast, WorldGym, and V-JEPA 2-style latent predictors.  WAMs preserve video-model "
         "pretraining and make strong action proposals; AC-WMs support counterfactual rollouts, RL inside "
         "the model, fine-grained planning, and policy evaluation."),
        ("Latent Action Pretraining (LAPA / UniPi-style)",
         r"\hat z_t = \mathrm{VQ}(o_t,o_{t+1})\;,\;\;p_\psi(o_{t+1}\!\mid\! o_t,\hat z_t)\;\;\Rightarrow\;\;a_t = h_\phi(\hat z_t)",
         "Sits between a video world model and BC.  Train a VQ-style latent that explains "
         "frame-to-frame transitions on unlabeled video → pretrain a 'latent-action' policy → "
         "decode latents to motors with a small action-labeled dataset."),
    ],
}


def chip(family: str) -> str:
    color, _ = FAMILY[family]
    return (
        f'<span style="background:{color};color:white;padding:2px 10px;'
        f'border-radius:999px;font-size:12px;font-weight:600;letter-spacing:.3px;">'
        f'{family.upper()}</span>'
    )


# ---------------------------------------------------------------------------
# PARADIGMS  — 20 equation-distinct leaves under 10 families
# ---------------------------------------------------------------------------
PARADIGMS: list[dict] = [

    # ====================  BEHAVIORAL CLONING (BC) — 4 action heads  ====================
    dict(
        id="flow-matching-policy",
        name="Flow Matching Policy",
        family="BC",
        tagline="Learn a velocity field that flows noise into expert actions.",
        mapping="o  →  a   via ODE integration of v_θ(a^t, o, t)",
        math=(
            r"\mathcal{L}(\theta) = \mathbb{E}_{t\sim U[0,1],\,a^0\sim\mathcal{N},\,a^1\sim\mathcal{D}}"
            r"\!\left[\,\bigl\|\,v_\theta(a^t, o, t) - (a^1 - a^0)\,\bigr\|^2\,\right]\;,"
            r"\;\;a^t = (1\!-\!t)\,a^0 + t\,a^1\;\;\Rightarrow\;\;"
            r"a^1 = a^0 + \int_0^1 v_\theta(a^t, o, t)\,dt"
        ),
        intuition=(
            "Pick a noise sample a⁰ ~ 𝒩(0, I) and an expert action a¹; the conditional optimal-"
            "transport path between them has constant velocity (a¹ − a⁰).  Train v_θ to match that "
            "velocity at random points along the path.  At inference, integrate the ODE from noise "
            "to action.  Multi-modal-aware like diffusion, but with straighter paths → fewer "
            "function evaluations (often 4–10 vs 50–100 for DDPM)."
        ),
        key_papers=["π₀ (Physical Intelligence 2024)", "π₀.5 / π₀.6 (2025)", "OpenPI (2025)",
                    "Conditional Flow Matching (Lipman 2023, original method)"],
        pros=["Multi-modal-aware (no mean collapse)", "Fewer NFEs than DDPM at similar quality",
              "Straight-line target makes training stable", "Drop-in action head for VLA backbones"],
        cons=["Still needs ODE solve at inference", "Sensitive to action normalisation",
              "Theory newer than diffusion → fewer ablations in literature"],
        when="Modern default for high-precision, multi-modal manipulation from demos — especially "
             "as the action head of a VLA (π₀ family).",
    ),
    dict(
        id="diffusion-policy",
        name="Diffusion Policy",
        family="BC",
        tagline="Generate actions by k-step denoising of Gaussian noise.",
        mapping="o  →  a   via k-step denoising",
        math=(
            r"\mathcal{L}(\theta) = \mathbb{E}_{k,\,a,\,\epsilon\sim\mathcal{N}}"
            r"\!\left[\,\|\epsilon - \epsilon_\theta(a^k, o, k)\|^2\,\right]\;,"
            r"\;\;a^{k-1} = a^{k} - \alpha\,\epsilon_\theta(a^{k}, o, k) + \sigma_k z\;,\;\;z\sim\mathcal{N}(0,I)"
        ),
        intuition=(
            "Treat π(a|o) as the reverse of a noising process: corrupt expert actions to noise across "
            "k steps, train ε_θ to predict the noise that was added, then denoise from pure noise at "
            "inference.  The score-based parameterisation handles multi-modal demos (left vs right "
            "around an obstacle) gracefully — MSE-BC would average them into a wall."
        ),
        key_papers=["Diffusion Policy (Chi 2023)", "3D Diffusion Policy / DP3 (Ze 2024)",
                    "Equivariant Diffusion Policy (Wang 2024)", "Consistency Policy (Prasad 2024)",
                    "Diffusion-EDFs (Ryu 2023)", "RDT-1B (Liu 2024, as VLA action head)",
                    "Octo (2024, diffusion head VLA)"],
        pros=["Captures multi-modal demos", "Strong empirical SOTA on manipulation",
              "Score head plugs cleanly into any encoder (incl. VLM trunks)"],
        cons=["k-step denoising at inference (slower than flow with straight paths)",
              "Sensitive to action normalisation & horizon",
              "DDIM / consistency tricks needed for real-time control"],
        when="Multi-modal demos, contact-rich manipulation, or as the diffusion action head of a VLA.",
    ),
    dict(
        id="tokenized-bc",
        name="Tokenized / Categorical BC",
        family="BC",
        tagline="Discretise actions, predict them autoregressively like text tokens.",
        mapping="o  →  (a^{(1)}, …, a^{(K)})   with   a^{(j)} ∈ vocab 𝒱",
        math=(
            r"a \to (a^{(1)},\dots,a^{(K)}),\;\;a^{(j)}\in\mathcal{V}\;;\;\;\;"
            r"\mathcal{L}(\theta) = -\sum_{j=1}^{K}\log p_\theta\!\left(a^{(j)}\,\bigg|\,o,\,a^{(<j)}\right)"
        ),
        intuition=(
            "Quantise each action dimension (uniform bins, learned VQ, or FAST / DCT compression for π₀-"
            "FAST) into a small vocabulary, then treat the action sequence as just more tokens for a "
            "transformer.  Lets you reuse the entire LLM/VLM stack — tokeniser, KV cache, sampling, "
            "speculative decoding — for control."
        ),
        key_papers=["RT-1 / RT-2 / RT-X / RT-H (Google)", "OpenVLA (Kim 2024)",
                    "π₀-FAST (Physical Intelligence 2025, FAST action tokens)",
                    "Gato (Reed 2022, multi-task tokenization)", "HPT (Wang 2024)",
                    "Gemini Robotics 1 / 1.5 (DeepMind 2025)", "Helix (Figure 2025)"],
        pros=["Trivially reuses VLM stack (cache, sampling, speculative)",
              "Discrete CE loss is rock-stable",
              "Easy multi-embodiment via shared vocabulary"],
        cons=["Quantisation error caps precision", "Autoregressive decoding is slow for long chunks",
              "Vocabulary design is a real engineering choice (uniform vs VQ vs FAST)"],
        when="You want a single VLM backbone serving many tasks/embodiments and can tolerate "
             "the quantisation floor (or use FAST-style tokens to push it down).",
    ),
    dict(
        id="energy-based-bc",
        name="Energy-Based / Implicit BC",
        family="BC",
        tagline="Score (s, a) pairs with an energy network; act by argmin.",
        mapping="o  →  a*   via   a* = argmin_a  E_θ(o, a)",
        math=(
            r"\pi_\theta(a\mid s) = \frac{\exp(-E_\theta(s,a))}{\int \exp(-E_\theta(s,a'))\,da'}\;;"
            r"\;\;\;\mathcal{L} = -\log\frac{\exp(-E_\theta(s,a^+))}{\sum_{a^-}\exp(-E_\theta(s,a^-))}\;;"
            r"\;\;\;a^* = \arg\min_a E_\theta(s, a)"
        ),
        intuition=(
            "Don't parameterise π directly — parameterise a scalar energy E_θ(s, a) and define π "
            "implicitly as its softmin.  Train with InfoNCE-style contrastive loss over sampled "
            "negatives; act by optimising a at test time (gradient or sampling).  Sharp, multi-modal-"
            "aware, but the inference-time argmin is the practical bottleneck."
        ),
        key_papers=["Implicit BC (Florence 2021)"],
        pros=["Naturally multi-modal", "Sharp, precise actions (no regression-to-mean blur)"],
        cons=["Inference is argmin, not a forward pass", "Harder to train than MLE-style heads",
              "Has lost ground to diffusion / flow in practice"],
        when="Precise insertion / alignment tasks where you can afford a few argmin steps at inference.",
    ),

    # ====================  REINFORCEMENT LEARNING  (3 gradient classes)  ====================
    dict(
        id="value-based-rl",
        name="Value-Based RL (Q-Learning family)",
        family="Reinforcement",
        tagline="Learn Q*(s, a); act greedily.",
        mapping="s  →  Q(s, a)  →  argmax_a",
        math=(
            r"Q^*(s,a) = \mathbb{E}_{s'}\!\left[\,r(s,a) + \gamma\,\max_{a'}Q^*(s',a')\,\right]\;,\;\;"
            r"\pi(s) = \arg\max_a Q(s,a)\;\;\;\text{(TD update with replay)}"
        ),
        intuition=(
            "Bellman optimality equation: bootstrap Q toward the one-step return plus the greedy "
            "value of the successor.  Off-policy: any (s,a,r,s') tuple in a replay buffer is fair "
            "game.  Cracked Atari with DQN; remains the canonical discrete-action method."
        ),
        key_papers=["DQN (Mnih 2015)", "Rainbow (Hessel 2017)", "C51 / QR-DQN / IQN (2017–18)",
                    "R2D2 (2019)", "Agent57 (2020)"],
        pros=["Sample-efficient with replay", "Off-policy data reuse"],
        cons=["Discrete actions natively (continuous needs DDPG-style tricks)",
              "Overestimation bias / deadly triad", "Exploration is the user's problem"],
        when="Discrete action spaces; lots of env interactions; replay is cheap.",
    ),
    dict(
        id="policy-gradient-rl",
        name="Policy Gradient RL (PPO / TRPO family)",
        family="Reinforcement",
        tagline="Push probability mass toward high-advantage actions.",
        mapping="s  →  π_θ(a | s)  →  a",
        math=(
            r"\nabla_\theta J(\theta) = \mathbb{E}_{\tau\sim\pi_\theta}\!\left[\,\sum_t\nabla_\theta\log\pi_\theta(a_t\mid s_t)\,\hat A_t\,\right]\;;"
            r"\;\;\;\text{PPO clip:}\;\;\;L = \mathbb{E}\!\left[\,\min(\,r_t(\theta)\hat A_t,\;\mathrm{clip}(r_t,1\!-\!\epsilon,1\!+\!\epsilon)\hat A_t\,)\,\right]"
        ),
        intuition=(
            "Differentiate the expected return with the log-derivative trick; use an advantage "
            "baseline to cut variance.  PPO's clipped surrogate keeps each update inside a trust "
            "region so a single gradient step can't blow the policy up — the workhorse of large-"
            "scale RL (and the gradient that powers RLHF)."
        ),
        key_papers=["REINFORCE (Williams 1992)", "TRPO (Schulman 2015)", "PPO (Schulman 2017)",
                    "A3C (Mnih 2016)", "ACER (2016)", "IMPALA (Espeholt 2018)", "PPG (Cobbe 2020)"],
        pros=["Continuous & discrete actions", "Stable with PPO clipping", "Trivially parallelises"],
        cons=["On-policy → sample-inefficient", "Sensitive to reward scaling, advantage estimation"],
        when="You have a sim, can run lots of rollouts, and want a robust default RL.",
    ),
    dict(
        id="off-policy-ac",
        name="Off-Policy Actor-Critic (SAC / TD3 family)",
        family="Reinforcement",
        tagline="Actor maximises critic; critic learns from replay.",
        mapping="s  →  a   (with critic Q_φ(s, a) trained on replay)",
        math=(
            r"\max_\pi\;\;\mathbb{E}_{s\sim\mathcal{D}}\!\left[\,Q_\phi(s,\pi(s)) + \alpha\,\mathcal{H}(\pi(\cdot\mid s))\,\right]\;,"
            r"\;\;\;Q_\phi\;\text{trained by TD on replay buffer.}\;\;\;\text{(SAC: max-entropy bonus)}"
        ),
        intuition=(
            "Critic Q_φ is fitted by Bellman regression from off-policy replay; actor is updated to "
            "maximise the critic's value — gradients flow through Q_φ thanks to the reparameterised "
            "actor.  SAC adds an entropy bonus for principled exploration; TD3 uses twin critics + "
            "delayed updates for stability.  The default continuous-control RL."
        ),
        key_papers=["DDPG (Lillicrap 2015)", "TD3 (Fujimoto 2018)", "SAC (Haarnoja 2018)",
                    "DrQ-v2 (Yarats 2021)", "RAD (Laskin 2020)"],
        pros=["Continuous control SOTA", "Sample-efficient via replay"],
        cons=["Hyperparameter sensitive", "Q-overestimation if twin critics / target nets omitted"],
        when="Continuous actions, locomotion, simulated manipulation.",
    ),

    # ====================  OFFLINE RL  ====================
    dict(
        id="offline-rl",
        name="Offline RL (Pessimistic Q + Behavior Constraint)",
        family="Offline RL",
        tagline="RL from a fixed log without exploding on out-of-support actions.",
        mapping="𝒟 (fixed)  →  π",
        math=(
            r"\max_\pi\;\;\mathbb{E}_{s\sim\mathcal{D}}\!\left[\,Q(s,\pi(s))\,\right]"
            r"\;-\;\lambda\,D\!\left(\,\pi(\cdot\mid s)\,\|\,\pi_{\mathcal{D}}(\cdot\mid s)\,\right)"
            r"\;\;\;\;\Big[\,\text{CQL: }+\lambda\,\mathbb{E}_s\!\big[\log\!\sum_a e^{Q(s,a)} - \mathbb{E}_{a\sim\mathcal{D}}Q(s,a)\big],"
            r"\;\;\text{IQL uses expectile, AWAC reweights by }A\,\Big]"
        ),
        intuition=(
            "Vanilla actor-critic bootstraps off out-of-distribution actions and explodes when the "
            "data is fixed.  Offline-RL methods add some form of <em>pessimism</em>: CQL pushes "
            "down Q on OOD actions, IQL learns an expectile-regression value and never queries Q on "
            "OOD actions, AWAC turns the policy step into advantage-weighted BC, TD3+BC adds an "
            "‖π(s) − a_𝒟‖² penalty.  All share the same objective shape above."
        ),
        key_papers=["BCQ (2019)", "CQL (Kumar 2020)", "IQL (Kostrikov 2021)",
                    "AWAC (Nair 2020)", "TD3+BC (Fujimoto 2021)", "BEAR (2019)",
                    "EDAC (2021)", "ReBRAC (2023)"],
        pros=["Uses logged data only — no env interaction needed", "Much safer than naive offline AC"],
        cons=["Bounded by data quality + state coverage", "Regulariser strength is fragile"],
        when="You have logged trajectories with rewards and you cannot interact with the system.",
    ),

    # ====================  INVERSE RL / ADVERSARIAL  ====================
    dict(
        id="maxent-irl",
        name="MaxEnt IRL (Recover the Reward)",
        family="Inverse RL",
        tagline="First infer r̂, then optimise it with RL.",
        mapping="demos  →  r̂(s, a)  →  RL  →  π",
        math=(
            r"\max_r\;\Big[\,\min_\pi\;\mathbb{E}_\pi[-r]\;-\;\mathbb{E}_{\pi^*}[-r]\,\Big]"
            r"\;-\;\psi(r)\quad\text{(MaxEnt: }\psi\,\text{= entropy regulariser on the implied }\pi_r\text{)}"
        ),
        intuition=(
            "Find a reward under which the expert is optimal (or near-optimal under a max-entropy "
            "prior); then run any RL algorithm on r̂.  The recovered reward generalises to states "
            "the expert never visited — unlike BC, which only knows how to copy."
        ),
        key_papers=["MaxEnt IRL (Ziebart 2008)", "Guided Cost Learning (Finn 2016)", "f-IRL (Ni 2020)"],
        pros=["Generalises beyond the demo support", "Reusable, transferable reward"],
        cons=["Bilevel optimisation is hard", "Reward is non-identifiable (many r explain demos)"],
        when="You want a reusable reward function, not just a policy.",
    ),
    dict(
        id="gail",
        name="GAIL / AIRL (Adversarial Imitation)",
        family="Inverse RL",
        tagline="GAN for behaviour — fool a discriminator that knows the expert.",
        mapping="demos  →  D  →  shaped reward log D  →  RL  →  π",
        math=(
            r"\min_\pi\;\max_D\;\;\mathbb{E}_{(s,a)\sim\pi^*}\!\left[\,\log D(s,a)\,\right]"
            r"\;+\;\mathbb{E}_{(s,a)\sim\pi}\!\left[\,\log(1 - D(s,a))\,\right]"
        ),
        intuition=(
            "Train a discriminator to tell expert (s,a) from learner (s,a); the learner's shaped "
            "reward becomes 'how well I fooled D' (≈ −log(1 − D)).  No explicit reward needed — "
            "just demos and rollouts.  AIRL recovers an interpretable reward as a by-product."
        ),
        key_papers=["GAIL (Ho & Ermon 2016)", "AIRL (Fu 2017)", "SQIL (Reddy 2019)"],
        pros=["No reward design needed", "Strong even from a few demos"],
        cons=["GAN-style instability", "Needs env interaction (it's still RL inside)"],
        when="You have demos and a simulator but no reward signal.",
    ),

    # ====================  MODEL-BASED / WORLD MODELS  ====================
    dict(
        id="forward-dynamics-mpc",
        name="Forward-Dynamics + MPC",
        family="Model-Based",
        tagline="Fit f̂(s, a)→s' in observation space; replan every step.",
        mapping="(s, a)  →  ŝ'   then   plan over ŝ-rollouts",
        math=(
            r"\hat s_{t+1} = \hat f_\phi(\hat s_t, a_t)\;\;\Rightarrow\;\;"
            r"a^*_{t:t+H} = \arg\max_{a_{t:t+H}}\sum_{k=0}^{H}\gamma^k\,\hat r(\hat s_{t+k}, a_{t+k})"
            r"\quad\text{(CEM / MPPI / shooting)}"
        ),
        intuition=(
            "Learn forward dynamics from interaction; at each step, sample candidate action "
            "sequences, roll them forward in the model, score them with the reward, and execute the "
            "first action of the best sequence (then re-plan).  Way more sample-efficient than "
            "model-free — your robot 'thinks before it acts'."
        ),
        key_papers=["PILCO (Deisenroth & Rasmussen 2011)", "PETS (Chua 2018)",
                    "Visual Foresight (Finn & Levine 2017)"],
        pros=["Very sample-efficient", "Same model reusable for any reward"],
        cons=["Model error compounds over the horizon", "Long-horizon planning is hard"],
        when="Real-robot RL where each rollout is expensive.",
    ),
    dict(
        id="latent-imagination",
        name="Latent Imagination (Dreamer / TD-MPC / MuZero)",
        family="Model-Based",
        tagline="Dream in a compact latent space; train the actor inside the dream.",
        mapping="φ(o)→z;   ẑ_{t+1} ~ p_φ(·|ẑ_t, a_t);   train π on imagined rollouts",
        math=(
            r"\hat z_{t+1}\!\sim\!p_\phi(\,\cdot\!\mid\!\hat z_t, a_t)\,,\;\;"
            r"\hat r_t\!\sim\!p_\phi(\,\cdot\!\mid\!\hat z_t)\;\;\Rightarrow\;\;"
            r"\nabla_\theta J = \mathbb{E}_{\hat\tau\sim\text{dream}}\!\left[\,\sum_t \gamma^t \hat r_t\,\right]"
        ),
        intuition=(
            "Encode pixels into compact latents; learn a recurrent latent dynamics + reward head; "
            "train an actor-critic on cheap <em>imagined</em> rollouts entirely in latent space.  "
            "Dreamer V3 solves Atari, DMC, and Minecraft with one set of hyperparameters; MuZero "
            "uses the same idea but plans with MCTS over the learned model."
        ),
        key_papers=["World Models (Ha 2018)", "PlaNet (Hafner 2019)", "Dreamer V1/V2/V3 (2020–23)",
                    "TD-MPC / TD-MPC2 (Hansen 2022, 2024)", "MuZero (Schrittwieser 2020)",
                    "EfficientZero (Ye 2021)", "DayDreamer (Wu 2022)"],
        pros=["Extremely sample-efficient", "Plans / trains in compact latents",
              "Same recipe scales across domains"],
        cons=["Model bias amplifies in dream", "Heavier engineering than PPO"],
        when="High-dim observations, limited real-interaction budget.",
    ),
    dict(
        id="generative-video-wm",
        name="Generative Video World Model",
        family="Model-Based",
        tagline="A neural simulator that predicts future video; action conditioning is optional but crucial for control.",
        mapping="context  →  future video   where context may include text, goals, or actions",
        math=(
            r"p_\theta(o_{t+1:t+H}\mid o_{\le t}, c)\quad"
            r"c\in\{\ell, g, a_{t:t+H}, \text{mask / mixed conditions}\}"
        ),
        intuition=(
            "This is the umbrella for video-based world models.  The key design choice is what the "
            "conditioning variable c contains.  If c contains future actions, the model is a simulator.  "
            "If c is only text or a goal, the model is closer to a video planner or world action model.  "
            "The same pretrained video backbone can become either branch."
        ),
        key_papers=["UniSim (Yang 2023)", "GAIA-1 (Wayve 2023)",
                    "Genie / Genie-2 / Genie-3 (DeepMind 2024–25)", "UniPi (Du 2023)"],
        pros=["Uses video pretraining", "Can represent rich scene change",
              "Can become WAM, AC-WM, or flexibly conditioned WM"],
        cons=["Expensive to train", "Pixel-level prediction can waste capacity",
              "Control depends on the conditioning interface"],
        when="You want a foundation model over physical scene evolution rather than just a policy head.",
    ),
    dict(
        id="action-conditioned-wm",
        name="Action-Conditioned World Model (AC-WM)",
        family="Model-Based",
        tagline="Predict what would happen if the robot executed a proposed future action sequence.",
        mapping="(o_t, a_{t:t+H})  →  o_{t+1:t+H} or z_{t+1:t+H}",
        math=(
            r"p_\theta(o_{t+1:t+H}\mid o_{\le t},a_{t:t+H})\quad\text{or}\quad"
            r"p_\theta(z_{t+1:t+H}\mid z_t,a_{t:t+H})"
        ),
        intuition=(
            "Actions go into the model.  That makes the model a counterfactual simulator: try action "
            "sequence A, predict the future; try sequence B, predict a different future.  This is the "
            "branch needed for closed-loop policy evaluation, RL inside the world model, model-predictive "
            "control, and learning from failures or autonomous play."
        ),
        key_papers=["World Models (Ha 2018)", "Dreamer family", "Veo-Robotics (2025)",
                    "Ctrl-World (2025)", "DreamDojo (2026)", "PlayWorld (2026)",
                    "World-Gymnast (2026)", "WorldGym (2025)", "V-JEPA 2 (2025)"],
        pros=["Counterfactual rollouts", "Can use success, failure, and play data",
              "Enables RL, MPC, policy evaluation, and fine-grained planning"],
        cons=["Action representation is embodiment-specific",
              "Harder to preserve generic video pretraining",
              "Must model bad actions as well as good ones"],
        when="You need a real simulator-like model: evaluating candidate actions, training in imagination, or debugging policies.",
    ),
    dict(
        id="world-action-model",
        name="World Action Model (WAM)",
        family="Model-Based",
        tagline="Generate a successful future video and decode the actions that realize it.",
        mapping="(o_t, instruction)  →  (future video, actions)",
        math=(
            r"p_\theta(o_{t+1:T},a_{t:T}\mid o_t,\ell)\quad\text{or}\quad"
            r"p_\theta(o_{t+1:T}\mid o_t,\ell)\;p_\phi(a_{t:T}\mid o_{t:T})"
        ),
        intuition=(
            "Actions come out of the model.  A WAM is best understood as a world-model-powered policy "
            "proposal generator: given the scene and instruction, imagine a successful execution, then "
            "recover actions from that imagined video.  It belongs under world models because video "
            "generation is central, but behaviorally it overlaps with BC and VLA policies."
        ),
        key_papers=["DreamZero (2026)", "Large Video Planner (2025)", "mimic-video (2025)",
                    "VideoPolicy / Video Generators are Robot Policies (2025)",
                    "Unified Video Action Model (2025)", "Cosmos Policy (2026)"],
        pros=["Preserves image+text video pretraining", "Easier learning target: successful executions",
              "Good best-of-N action proposals", "Cross-embodiment video trunk is natural"],
        cons=["Weak counterfactual reasoning", "Hard to use unlabeled failures without relabeling",
              "Policy evaluation and closed-loop RL still need an action-conditioned simulator"],
        when="You want strong action proposals from video pretraining, especially for instruction-following manipulation.",
    ),
    dict(
        id="occupancy-latent-wm",
        name="Occupancy / Latent State World Model",
        family="Model-Based",
        tagline="Predict compact physical state, occupancy, contact, or latent scene dynamics instead of raw pixels.",
        mapping="(z_t or 3D state, a_{t:t+H})  →  z_{t+1:t+H}, occupancy, contacts, cost",
        math=(
            r"z_t=\phi(o_{\le t}),\quad p_\theta(z_{t+1}\mid z_t,a_t),\quad"
            r"\hat c_t=\psi(z_t)\;\text{for occupancy/contact/cost}"
        ),
        intuition=(
            "Occupancy and latent-state models are not separate from world models; they are a "
            "representation choice inside the AC-WM branch.  Instead of predicting RGB pixels, the model "
            "predicts a planning-friendly state: free space, object occupancy, contacts, or a learned latent.  "
            "This often makes manipulation and navigation planning more direct."
        ),
        key_papers=["PlaNet / Dreamer latent dynamics", "TD-MPC / TD-MPC2",
                    "V-JEPA 2 latent prediction", "occupancy-flow and neural scene dynamics lines"],
        pros=["More planning-friendly than pixels", "Usually cheaper than video generation",
              "Connects naturally to MPC, collision checking, and cost maps"],
        cons=["May lose visual detail", "Representation design matters",
              "Needs the right state abstraction for the task"],
        when="You care about physical feasibility, collision, contact, or long-horizon planning more than photorealistic video.",
    ),

    # ====================  SEQUENCE-MODEL CONTROL  ====================
    dict(
        id="decision-transformer",
        name="Decision Transformer (Return-Conditioned Sequence Model)",
        family="Sequence",
        tagline="Autoregress actions like tokens, conditioned on return-to-go.",
        mapping="(R̂_t, s_t, a_t, R̂_{t−1}, s_{t−1}, …)  →  a_t",
        math=(
            r"a_t\sim p_\theta\!\left(\,a_t\,\Big|\,\hat R_t,\,s_t,\,\hat R_{t-1},\,s_{t-1},\,a_{t-1},\,\dots\,\right)\;,"
            r"\;\;\hat R_t = \sum_{t'\ge t} r_{t'}\;\;\text{(return-to-go conditioning)}"
        ),
        intuition=(
            "No Bellman, no bootstrap, no critic.  Treat trajectories as token sequences and train a "
            "transformer to predict the next action conditioned on a <em>desired</em> return-to-go "
            "(plus past states/actions).  At inference, condition on a high target return and let "
            "the transformer hallucinate the action sequence that achieves it."
        ),
        key_papers=["Decision Transformer (Chen 2021)", "Trajectory Transformer (Janner 2021)",
                    "Gato (Reed 2022)"],
        pros=["Single supervised objective", "Multi-task friendly", "Cleanly reuses LM infra"],
        cons=["Return-conditioning can be brittle (overestimation of achievable returns)",
              "No principled stitching of suboptimal trajectories"],
        when="Offline data with rewards; you want a transformer-friendly RL formulation.",
    ),
    dict(
        id="trajectory-diffusion",
        name="Trajectory Diffusion (Diffuser)",
        family="Sequence",
        tagline="Denoise whole (state, action) trajectories; guide with a goal/reward.",
        mapping="goal / reward  →  τ = (s_1, a_1, …, s_T, a_T)",
        math=(
            r"p_\theta(\tau)\;\;\text{diffusion model over}\;\;\tau=(s_1,a_1,\dots,s_T,a_T)\;;"
            r"\;\;\;\text{plan}\;=\;\mathrm{sample}\!\left(\,p_\theta(\tau)\;\Big|\;\nabla_\tau \log p(g\mid\tau)\,\right)"
        ),
        intuition=(
            "Don't just denoise actions — denoise the entire trajectory of states and actions, then "
            "guide the sampler with classifier-style gradients for goals or rewards.  Planning "
            "becomes Bayesian inference: sample plans from the posterior p(τ | g)."
        ),
        key_papers=["Diffuser (Janner 2022)", "Decision Diffuser (Ajay 2022)"],
        pros=["Unifies planning + control in one model", "Flexible task / goal conditioning"],
        cons=["Slow inference (denoise the whole τ)", "Discrete actions are awkward"],
        when="Offline data, long horizons, flexible / re-specifiable goal conditioning.",
    ),

    # ====================  GOAL-CONDITIONED  ====================
    dict(
        id="goal-conditioned",
        name="Goal-Conditioned + Hindsight Relabeling",
        family="Goal-Cond.",
        tagline="One policy commandable to any reachable goal; turn failures into supervision.",
        mapping="(s, g)  →  a",
        math=(
            r"\pi(a\mid s, g)\;\;\;\;\text{Hindsight Experience Replay:}\;\;\;"
            r"(s_t,a_t,s_{t+1},g)\;\to\;(s_t,a_t,s_{t+1},\,g'\!=\!s_T)"
        ),
        intuition=(
            "Condition the policy on a goal; relabel each failed rollout by treating whatever was "
            "actually reached as the 'intended' goal.  Every episode becomes successful supervision "
            "for <em>some</em> goal — turns sparse-reward tasks into dense supervision."
        ),
        key_papers=["UVFA (Schaul 2015)", "HER (Andrychowicz 2017)", "GCSL (Ghosh 2019)",
                    "RIG (Nair 2018)", "Play-LMP (Lynch 2019)"],
        pros=["Unifies many tasks into one policy", "Free supervision via relabeling"],
        cons=["Goal space must be observable / specifiable",
              "Sparse-reward exploration outside data support is still hard"],
        when="Many related tasks differing only by a goal you can specify or observe.",
    ),

    # ====================  HIERARCHICAL  ====================
    dict(
        id="hrl",
        name="Hierarchical (Options / Subgoal HRL)",
        family="Hierarchical",
        tagline="High level picks skills/subgoals; low level executes them.",
        mapping="π_hi(z | s);   π_lo(a | s, z)",
        math=(
            r"\pi_{\text{hi}}(z_k\mid s_{kT})\,,\;\;\pi_{\text{lo}}(a_t\mid s_t, z_k)\,,\;\;"
            r"\text{option terminates per }\beta(s)\in[0,1]"
        ),
        intuition=(
            "Long-horizon tasks become tractable when chunked into reusable skills (options or "
            "subgoals).  The high level operates at a coarse timescale and the low level executes "
            "primitives — better credit assignment, transferable skills."
        ),
        key_papers=["Options (Sutton, Precup, Singh 1999)", "Option-Critic (Bacon 2017)",
                    "FeUdal Networks (Vezhnevets 2017)", "HIRO (Nachum 2018)", "HAC (Levy 2017)"],
        pros=["Better long-horizon credit assignment", "Skills are transferable across tasks"],
        cons=["Complicated training (joint hi/lo + termination)",
              "Skill boundary discovery is hard"],
        when="Long, compositional tasks (kitchens, assembly, sequencing).",
    ),

    # ====================  META-LEARNING  ====================
    dict(
        id="meta-learning",
        name="Meta-Learning Policies (MAML / RL² / PEARL)",
        family="Meta-Learning",
        tagline="Learn to learn — adapt to a new task in a few steps / episodes.",
        mapping="task distribution p(T)  →  fast-adapting π",
        math=(
            r"\theta^* = \arg\min_\theta\;\;\mathbb{E}_{T\sim p(T)}\!\left[\,\mathcal{L}_T\!\big(\theta - \alpha\,\nabla_\theta \mathcal{L}_T(\theta)\big)\,\right]"
            r"\quad(\text{MAML})\;\;\;\Big|\;\;\;z\sim q(z\mid\text{history})\;(\text{PEARL: context-inferred latent})"
        ),
        intuition=(
            "Train across many tasks so that either (a) a few gradient steps on a new task get you "
            "to a good policy — MAML — or (b) inferring a task-latent z from a short interaction "
            "history is enough to specialise — PEARL.  Two distinct realisations of 'learning to "
            "learn'; same outer objective shape."
        ),
        key_papers=["MAML (Finn 2017)", "RL² (Duan 2016)", "PEARL (Rakelly 2019)",
                    "VariBAD (Zintgraf 2020)", "ProMP (Rothfuss 2018)"],
        pros=["Few-shot adaptation to new tasks", "Principled multi-task formulation"],
        cons=["Needs a rich task distribution", "Inner loop is expensive at train time"],
        when="Many related tasks where rapid adaptation matters more than peak per-task performance.",
    ),

    # ====================  LLM / VLM ORCHESTRATION  ====================
    dict(
        id="llm-planner",
        name="LLM-as-Planner / Code-as-Policies",
        family="LLM-Orchestration",
        tagline="LLM composes pre-existing skills; no policy gradient.",
        mapping="(language, scene)  →  plan / Python program  →  skill calls",
        math=(
            r"\text{plan} = \mathrm{LLM}(\text{instruction},\,\text{scene description},\,\text{API})\;;"
            r"\;\;\text{exec}(\text{plan})\;\to\;a_{1:T}"
        ),
        intuition=(
            "Don't use the LLM for raw actuator output — use its reasoning to <em>orchestrate</em> "
            "vetted perception + motion primitives.  Ground the LLM via scene captions, value "
            "functions, or affordances (SayCan), and let it write Python that calls your skill "
            "library (Code-as-Policies)."
        ),
        key_papers=["SayCan (Ahn 2022)", "Inner Monologue (Huang 2022)",
                    "Code-as-Policies (Liang 2023)", "ProgPrompt (Singh 2023)"],
        pros=["Zero-shot task composition from language",
              "Human-readable plans / programs",
              "No policy training required"],
        cons=["Quality of the skill library = ceiling of behaviour",
              "Grounding is brittle"],
        when="Long-horizon, semantically rich tasks with a decent skill library.",
    ),
    dict(
        id="vlm-affordance",
        name="VLM-Affordance / Spatial Programs (VoxPoser / ReKep)",
        family="LLM-Orchestration",
        tagline="VLM emits where to act; a classical solver finds the trajectory.",
        mapping="(language, scene)  →  3D cost / keypoint constraints  →  argmin_τ",
        math=(
            r"\mathcal{C}(x) = \mathrm{LLM\!+\!VLM}(\text{prompt},\,\text{scene})\;\;\Rightarrow\;\;"
            r"\tau^* = \arg\min_\tau \int_0^T \mathcal{C}(\tau(t))\,dt\quad\text{(s.t. kinematic constraints)}"
        ),
        intuition=(
            "Instead of asking a VLM for joint angles, ask it for a 3D voxel cost map or a set of "
            "relational keypoint constraints.  A classical trajectory optimiser then minimises the "
            "VLM-defined cost subject to kinematics — combining open-vocabulary semantics with "
            "reliable motion planning."
        ),
        key_papers=["VoxPoser (Huang 2023)", "MOKA (Liu 2024)", "PIVOT (Nasiriany 2024)",
                    "ReKep (Huang 2024)", "RoboPoint (Yuan 2024)"],
        pros=["Open-vocabulary tasks zero-shot",
              "No robot-specific fine-tuning needed"],
        cons=["Limited to tasks expressible as spatial cost / constraints",
              "VLM inference latency is the bottleneck"],
        when="Open-vocabulary pick / place / arrange tasks with no robot dataset.",
    ),
]


# ---------------------------------------------------------------------------
# PAPERS  ATLAS  (130+ entries — sortable / filterable table)
# Tags follow the new equation-first taxonomy.  Relations get distinct tags
# so users can still filter for them (e.g. "VLA (Flow)" pulls the π₀ family).
# ---------------------------------------------------------------------------
PAPERS: list[tuple[str, str, int, str]] = [
    # ----- Classic BC (MSE — relation, footnote, not a tree leaf) -----
    ("ALVINN: An Autonomous Land Vehicle in a Neural Network", "Pomerleau", 1989, "Classic BC (MSE)"),
    ("End-to-End Learning for Self-Driving Cars", "Bojarski et al. (NVIDIA)", 2016, "Classic BC (MSE)"),
    ("BC-Z: Zero-Shot Task Generalization", "Jang et al.", 2021, "Classic BC (MSE)"),
    ("DART: Noise Injection for Robust IL", "Laskey et al.", 2017, "Classic BC (MSE)"),

    # ----- DAgger (BC relation) -----
    ("DAgger: A Reduction of Imitation Learning to Structured Prediction", "Ross, Gordon, Bagnell", 2011, "DAgger (BC relation)"),
    ("AggreVaTe", "Ross & Bagnell", 2014, "DAgger (BC relation)"),
    ("SafeDAgger", "Zhang & Cho", 2017, "DAgger (BC relation)"),

    # ----- Action Chunking (BC relation) -----
    ("Learning Fine-Grained Bimanual Manipulation (ACT / ALOHA)", "Zhao et al.", 2023, "ACT (BC relation)"),
    ("Mobile ALOHA", "Fu et al.", 2024, "ACT (BC relation)"),
    ("ALOHA Unleashed", "Zhao et al.", 2024, "ACT (BC relation)"),
    ("RoboAgent (MT-ACT)", "Bharadhwaj et al.", 2023, "ACT (BC relation)"),

    # ----- Diffusion Policy (leaf + VLA instances) -----
    ("Diffusion Policy", "Chi et al.", 2023, "Diffusion Policy"),
    ("3D Diffusion Policy (DP3)", "Ze et al.", 2024, "Diffusion Policy"),
    ("Equivariant Diffusion Policy", "Wang et al.", 2024, "Diffusion Policy"),
    ("Consistency Policy", "Prasad et al.", 2024, "Diffusion Policy"),
    ("Diffusion-EDFs", "Ryu et al.", 2023, "Diffusion Policy"),
    ("RDT-1B (Robotics Diffusion Transformer)", "Liu et al.", 2024, "VLA — Diffusion head"),
    ("Octo: An Open-Source Generalist Robot Policy", "Octo team", 2024, "VLA — Diffusion head"),
    ("GR00T-N1 (NVIDIA Humanoid Foundation Model)", "NVIDIA", 2025, "VLA — Diffusion head"),

    # ----- Flow Matching Policy (leaf + VLA instances) -----
    ("Conditional Flow Matching (foundational method)", "Lipman et al.", 2023, "Flow Matching Policy"),
    ("π₀ (Physical Intelligence)", "Black et al.", 2024, "VLA — Flow Matching head"),
    ("π₀.5 (open-world generalization, 104 homes)", "Physical Intelligence", 2025, "VLA — Flow Matching head"),
    ("π₀.6 (Physical Intelligence)", "Physical Intelligence", 2025, "VLA — Flow Matching head"),
    ("OpenPI (open-source π₀ / π₀.5)", "Physical Intelligence", 2025, "VLA — Flow Matching head"),

    # ----- Tokenized / Categorical BC (leaf + VLA instances) -----
    ("RT-1 (Robotics Transformer)", "Brohan et al.", 2022, "VLA — Tokenized head"),
    ("RT-2 (VLM as Robot Controller)", "Brohan et al.", 2023, "VLA — Tokenized head"),
    ("Open X-Embodiment / RT-X", "Open X collaboration", 2023, "VLA — Tokenized head"),
    ("RT-H (Action Hierarchies with Language)", "Belkhale et al.", 2024, "VLA — Tokenized head"),
    ("OpenVLA", "Kim et al.", 2024, "VLA — Tokenized head"),
    ("π₀-FAST (autoregressive action tokenizer)", "Physical Intelligence", 2025, "VLA — Tokenized head"),
    ("Gato (A Generalist Agent)", "Reed et al.", 2022, "Tokenized / Categorical BC"),
    ("HPT (Heterogeneous Pretrained Transformers)", "Wang et al.", 2024, "VLA — Tokenized head"),
    ("GR-1 (Generative Robot)", "Wu et al.", 2024, "VLA — Tokenized head"),
    ("GR-2 (ByteDance)", "ByteDance", 2024, "VLA — Tokenized head"),
    ("Helix (dual-system VLA, 35-DoF @ 200 Hz)", "Figure AI", 2025, "VLA — Tokenized head"),
    ("Gemini Robotics (think-then-act VLA)", "Google DeepMind", 2025, "VLA — Tokenized head"),
    ("Gemini Robotics 1.5", "Google DeepMind", 2025, "VLA — Tokenized head"),

    # ----- Energy-Based / Implicit BC -----
    ("Implicit Behavioral Cloning", "Florence et al.", 2021, "Energy-Based / Implicit BC"),

    # ----- Visual SSL (BC relation) -----
    ("R3M", "Nair et al.", 2022, "Visual SSL (BC relation)"),
    ("MVP (Masked Visual Pretraining)", "Xiao et al.", 2022, "Visual SSL (BC relation)"),
    ("VIP (Value-Implicit Pretraining)", "Ma et al.", 2023, "Visual SSL (BC relation)"),
    ("Voltron", "Karamcheti et al.", 2023, "Visual SSL (BC relation)"),
    ("MCR (Manipulation-Centric Representations)", "Jiang et al.", 2024, "Visual SSL (BC relation)"),
    ("RPT (Robot Learning with Sensorimotor Pre-training)", "Radosavovic et al.", 2023, "Visual SSL (BC relation)"),

    # ----- Imitation data scaling (relation under BC) -----
    ("MimicGen (synthetic demos via SE(3) replay)", "Mandlekar et al.", 2023, "Imitation data scaling"),
    ("DexMimicGen (dexterous bimanual data scaling)", "Jiang et al.", 2024, "Imitation data scaling"),

    # ----- Value-Based RL -----
    ("Playing Atari with Deep RL (DQN)", "Mnih et al.", 2013, "Value-Based RL"),
    ("Human-level control through Deep RL (DQN)", "Mnih et al.", 2015, "Value-Based RL"),
    ("Rainbow", "Hessel et al.", 2017, "Value-Based RL"),
    ("C51 (A Distributional Perspective on RL)", "Bellemare et al.", 2017, "Value-Based RL"),
    ("QR-DQN", "Dabney et al.", 2017, "Value-Based RL"),
    ("IQN", "Dabney et al.", 2018, "Value-Based RL"),
    ("R2D2", "Kapturowski et al.", 2019, "Value-Based RL"),
    ("Agent57", "Badia et al.", 2020, "Value-Based RL"),

    # ----- Policy Gradient RL -----
    ("REINFORCE (Simple Statistical Gradient-Following)", "Williams", 1992, "Policy Gradient RL"),
    ("Trust Region Policy Optimization (TRPO)", "Schulman et al.", 2015, "Policy Gradient RL"),
    ("Proximal Policy Optimization (PPO)", "Schulman et al.", 2017, "Policy Gradient RL"),
    ("A3C", "Mnih et al.", 2016, "Policy Gradient RL"),
    ("ACER", "Wang et al.", 2016, "Policy Gradient RL"),
    ("IMPALA", "Espeholt et al.", 2018, "Policy Gradient RL"),
    ("PPG (Phasic Policy Gradient)", "Cobbe et al.", 2020, "Policy Gradient RL"),
    ("Ape-X DPG", "Horgan et al.", 2018, "Policy Gradient RL"),

    # ----- Off-Policy Actor-Critic -----
    ("DDPG", "Lillicrap et al.", 2015, "Off-Policy Actor-Critic"),
    ("TD3 (Addressing Function Approximation Error)", "Fujimoto et al.", 2018, "Off-Policy Actor-Critic"),
    ("Soft Actor-Critic (SAC)", "Haarnoja et al.", 2018, "Off-Policy Actor-Critic"),
    ("DrQ-v2", "Yarats et al.", 2021, "Off-Policy Actor-Critic"),
    ("RAD (Data-Aug RL)", "Laskin et al.", 2020, "Off-Policy Actor-Critic"),
    ("CURL (Contrastive RL)", "Laskin et al.", 2020, "Off-Policy Actor-Critic"),

    # ----- Sim2Real (RL relation) -----
    ("Domain Randomization", "Tobin et al.", 2017, "Sim2Real (RL relation)"),
    ("Learning Dexterous In-Hand Manipulation", "OpenAI / Akkaya et al.", 2018, "Sim2Real (RL relation)"),
    ("Solving Rubik's Cube with a Robot Hand", "OpenAI", 2019, "Sim2Real (RL relation)"),
    ("Sim-to-Real ANYmal Locomotion", "Hwangbo et al.", 2019, "Sim2Real (RL relation)"),
    ("RMA (Rapid Motor Adaptation)", "Kumar et al.", 2021, "Sim2Real (RL relation)"),
    ("Cassie / Berkeley Humanoid Walking", "Siekmann et al.", 2021, "Sim2Real (RL relation)"),
    ("Extreme Parkour with Legged Robots", "Cheng et al.", 2023, "Sim2Real (RL relation)"),

    # ----- Offline RL -----
    ("BCQ (Batch-Constrained Q-Learning)", "Fujimoto et al.", 2019, "Offline RL"),
    ("CQL (Conservative Q-Learning)", "Kumar et al.", 2020, "Offline RL"),
    ("IQL (Implicit Q-Learning)", "Kostrikov et al.", 2021, "Offline RL"),
    ("AWAC", "Nair et al.", 2020, "Offline RL"),
    ("TD3+BC", "Fujimoto & Gu", 2021, "Offline RL"),
    ("BEAR", "Kumar et al.", 2019, "Offline RL"),
    ("EDAC", "An et al.", 2021, "Offline RL"),
    ("ReBRAC", "Tarasov et al.", 2023, "Offline RL"),

    # ----- MaxEnt IRL -----
    ("MaxEnt IRL", "Ziebart et al.", 2008, "MaxEnt IRL"),
    ("Guided Cost Learning", "Finn et al.", 2016, "MaxEnt IRL"),
    ("f-IRL", "Ni et al.", 2020, "MaxEnt IRL"),

    # ----- GAIL / AIRL -----
    ("GAIL (Generative Adversarial Imitation)", "Ho & Ermon", 2016, "GAIL / AIRL"),
    ("AIRL (Adversarial IRL)", "Fu et al.", 2017, "GAIL / AIRL"),
    ("SQIL (Soft Q Imitation Learning)", "Reddy et al.", 2019, "GAIL / AIRL"),

    # ----- Forward-Dynamics + MPC -----
    ("PILCO", "Deisenroth & Rasmussen", 2011, "Forward-Dynamics + MPC"),
    ("PETS (Probabilistic Ensembles + TS)", "Chua et al.", 2018, "Forward-Dynamics + MPC"),
    ("Visual Foresight", "Finn & Levine", 2017, "Forward-Dynamics + MPC"),

    # ----- Latent Imagination -----
    ("World Models", "Ha & Schmidhuber", 2018, "Latent Imagination"),
    ("PlaNet (Latent Dynamics)", "Hafner et al.", 2019, "Latent Imagination"),
    ("Dreamer", "Hafner et al.", 2020, "Latent Imagination"),
    ("DreamerV2", "Hafner et al.", 2021, "Latent Imagination"),
    ("DreamerV3", "Hafner et al.", 2023, "Latent Imagination"),
    ("TD-MPC", "Hansen et al.", 2022, "Latent Imagination"),
    ("TD-MPC2", "Hansen et al.", 2024, "Latent Imagination"),
    ("MuZero", "Schrittwieser et al.", 2020, "Latent Imagination"),
    ("EfficientZero", "Ye et al.", 2021, "Latent Imagination"),
    ("DayDreamer (Dreamer on Real Robots)", "Wu et al.", 2022, "Latent Imagination"),

    # ----- Generative Video World Model (+ LAPA relation) -----
    ("UniSim", "Yang et al.", 2023, "Generative Video World Model"),
    ("GAIA-1", "Wayve", 2023, "Generative Video World Model"),
    ("Genie", "Bruce et al.", 2024, "Generative Video World Model"),
    ("Genie-2", "DeepMind", 2024, "Generative Video World Model"),
    ("Genie 3 (action-conditioned foundation world model)", "DeepMind", 2025, "Generative Video World Model"),
    ("UniPi (Universal Policy via Text-to-Video)", "Du et al.", 2023, "Generative Video World Model"),
    ("LAPA (Latent Action Pretraining)", "Ye et al.", 2024, "LAPA (Video-WM relation)"),
    ("ATM (Any-point Trajectory Modeling)", "Wen et al.", 2024, "LAPA (Video-WM relation)"),

    # ----- Decision Transformer -----
    ("Decision Transformer", "Chen et al.", 2021, "Decision Transformer"),
    ("Trajectory Transformer", "Janner et al.", 2021, "Decision Transformer"),

    # ----- Trajectory Diffusion -----
    ("Diffuser (Planning with Diffusion)", "Janner et al.", 2022, "Trajectory Diffusion"),
    ("Decision Diffuser", "Ajay et al.", 2022, "Trajectory Diffusion"),

    # ----- Goal-Conditioned + Hindsight -----
    ("Universal Value Function Approximators (UVFA)", "Schaul et al.", 2015, "Goal-Conditioned + Hindsight"),
    ("Hindsight Experience Replay (HER)", "Andrychowicz et al.", 2017, "Goal-Conditioned + Hindsight"),
    ("GCSL (Goal-Conditioned Supervised Learning)", "Ghosh et al.", 2019, "Goal-Conditioned + Hindsight"),
    ("RIG (Visual RL with Imagined Goals)", "Nair et al.", 2018, "Goal-Conditioned + Hindsight"),
    ("Learning Latent Plans from Play (Play-LMP)", "Lynch et al.", 2019, "Goal-Conditioned + Hindsight"),

    # ----- Hierarchical -----
    ("Between MDPs and semi-MDPs (Options)", "Sutton, Precup, Singh", 1999, "Hierarchical"),
    ("Option-Critic", "Bacon et al.", 2017, "Hierarchical"),
    ("FeUdal Networks (FuN)", "Vezhnevets et al.", 2017, "Hierarchical"),
    ("HIRO (Data-efficient HRL)", "Nachum et al.", 2018, "Hierarchical"),
    ("HAC", "Levy et al.", 2017, "Hierarchical"),

    # ----- Meta-Learning -----
    ("MAML", "Finn et al.", 2017, "Meta-Learning"),
    ("RL² (Fast RL via Slow RL)", "Duan et al.", 2016, "Meta-Learning"),
    ("PEARL", "Rakelly et al.", 2019, "Meta-Learning"),
    ("VariBAD", "Zintgraf et al.", 2020, "Meta-Learning"),
    ("ProMP", "Rothfuss et al.", 2018, "Meta-Learning"),

    # ----- LLM-as-Planner -----
    ("SayCan", "Ahn et al.", 2022, "LLM-as-Planner"),
    ("Inner Monologue", "Huang et al.", 2022, "LLM-as-Planner"),
    ("Code as Policies", "Liang et al.", 2023, "LLM-as-Planner"),
    ("ProgPrompt", "Singh et al.", 2023, "LLM-as-Planner"),

    # ----- VLM-Affordance -----
    ("VoxPoser", "Huang et al.", 2023, "VLM-Affordance"),
    ("MOKA", "Liu et al.", 2024, "VLM-Affordance"),
    ("PIVOT (Visual Prompting)", "Nasiriany et al.", 2024, "VLM-Affordance"),
    ("ReKep (Relational Keypoint Constraints)", "Huang et al.", 2024, "VLM-Affordance"),
    ("RoboPoint", "Yuan et al.", 2024, "VLM-Affordance"),
]


# ---------------------------------------------------------------------------
# RENDERING
# ---------------------------------------------------------------------------
def render_paradigm(name: str) -> str:
    p = next((x for x in PARADIGMS if x["name"] == name), None)
    if p is None:
        return "Pick a paradigm from the dropdown."
    fam_chip = chip(p["family"])
    papers_list = "".join(f"<li>{paper}</li>" for paper in p["key_papers"])
    pros = "".join(f"<li>{x}</li>" for x in p["pros"])
    cons = "".join(f"<li>{x}</li>" for x in p["cons"])

    # Family-level objective (shared by every leaf in this family)
    fam_eq = FAMILY_EQUATIONS.get(p["family"], "")
    family_eq_block = (
        f'<div style="background:#ecfeff;border-left:4px solid #0891b2;padding:10px 14px;'
        f'border-radius:6px;margin-bottom:12px;">'
        f'<div style="font-size:12px;color:#0e7490;font-weight:700;letter-spacing:.5px;'
        f'text-transform:uppercase;margin-bottom:4px;">Family objective — {p["family"]}</div>'
        f'<div style="font-size:15px;">$$ {fam_eq} $$</div>'
        f'</div>'
    ) if fam_eq else ""

    # Relations (same parent objective, different wrapper)
    rels = FAMILY_RELATIONS.get(p["family"], [])
    if rels:
        rel_items = "".join(
            f'<div style="border:1px solid #e2e8f0;border-radius:8px;padding:10px 12px;'
            f'margin-bottom:8px;background:white;">'
            f'<div style="font-weight:700;color:#0f172a;font-size:14px;margin-bottom:4px;">{n}</div>'
            f'<div style="background:#f8fafc;padding:6px 10px;border-radius:4px;margin-bottom:6px;'
            f'font-size:14px;">$$ {eq} $$</div>'
            f'<div style="color:#475569;font-size:13px;line-height:1.45;">{expl}</div>'
            f'</div>'
            for n, eq, expl in rels
        )
        relations_block = (
            f'<div style="margin-top:18px;background:#f5f3ff;border:1px solid #c4b5fd;'
            f'padding:12px 14px;border-radius:8px;">'
            f'<div style="font-size:13px;color:#5b21b6;font-weight:700;letter-spacing:.5px;'
            f'text-transform:uppercase;margin-bottom:8px;">'
            f'Same family objective, different wrapper (relations)</div>'
            f'{rel_items}'
            f'</div>'
        )
    else:
        relations_block = ""

    return f"""
<div style="padding:18px 22px 10px 22px;">
  <div style="display:flex;align-items:center;gap:12px;flex-wrap:wrap;">
    <h1 style="margin:0;font-size:28px;line-height:1.1;">{p['name']}</h1>
    {fam_chip}
  </div>
  <p style="font-size:18px;color:#475569;margin:8px 0 18px 0;font-style:italic;">{p['tagline']}</p>

  {family_eq_block}

  <div style="background:#f8fafc;border-left:4px solid #2563eb;padding:12px 16px;border-radius:6px;margin-bottom:14px;">
    <div style="font-size:13px;color:#64748b;font-weight:600;letter-spacing:.5px;text-transform:uppercase;margin-bottom:4px;">Mapping</div>
    <div style="font-size:18px;font-family:'JetBrains Mono', ui-monospace, monospace;">{p['mapping']}</div>
  </div>

  <div style="background:#fffbeb;border-left:4px solid #ca8a04;padding:12px 16px;border-radius:6px;margin-bottom:14px;">
    <div style="font-size:13px;color:#92400e;font-weight:600;letter-spacing:.5px;text-transform:uppercase;margin-bottom:6px;">Leaf-specific objective</div>
    <div style="font-size:16px;">$$ {p['math']} $$</div>
  </div>

  <div style="margin:14px 0 18px 0;">
    <div style="font-size:13px;color:#64748b;font-weight:600;letter-spacing:.5px;text-transform:uppercase;margin-bottom:4px;">Intuition</div>
    <p style="margin:0;font-size:15px;line-height:1.55;color:#1e293b;">{p['intuition']}</p>
  </div>

  <div style="display:grid;grid-template-columns:1fr 1fr;gap:14px;margin-bottom:14px;">
    <div style="background:#f0fdf4;border:1px solid #86efac;padding:12px 14px;border-radius:6px;">
      <div style="font-size:13px;color:#15803d;font-weight:700;letter-spacing:.5px;text-transform:uppercase;margin-bottom:6px;">Pros</div>
      <ul style="margin:0;padding-left:18px;font-size:14px;color:#14532d;">{pros}</ul>
    </div>
    <div style="background:#fef2f2;border:1px solid #fca5a5;padding:12px 14px;border-radius:6px;">
      <div style="font-size:13px;color:#b91c1c;font-weight:700;letter-spacing:.5px;text-transform:uppercase;margin-bottom:6px;">Cons</div>
      <ul style="margin:0;padding-left:18px;font-size:14px;color:#7f1d1d;">{cons}</ul>
    </div>
  </div>

  <div style="margin:14px 0;">
    <div style="font-size:13px;color:#64748b;font-weight:600;letter-spacing:.5px;text-transform:uppercase;margin-bottom:4px;">Key Papers</div>
    <ul style="margin:0;padding-left:20px;font-size:14px;color:#1e293b;">{papers_list}</ul>
  </div>

  <div style="background:#eff6ff;border:1px dashed #93c5fd;padding:10px 14px;border-radius:6px;font-size:14px;color:#1e3a8a;">
    <strong>When to reach for it:</strong> {p['when']}
  </div>

  {relations_block}
</div>
"""


def render_compare(name_a: str, name_b: str) -> str:
    def card(p):
        if p is None:
            return "<div>—</div>"
        return f"""
<div style="border:1px solid #e2e8f0;border-radius:10px;padding:14px 16px;background:white;">
  <div style="display:flex;align-items:center;gap:10px;flex-wrap:wrap;margin-bottom:6px;">
    <h2 style="margin:0;font-size:20px;">{p['name']}</h2>{chip(p['family'])}
  </div>
  <p style="font-style:italic;color:#475569;margin:0 0 10px 0;">{p['tagline']}</p>
  <div style="font-family:ui-monospace,monospace;background:#f1f5f9;padding:6px 10px;border-radius:5px;margin-bottom:8px;">{p['mapping']}</div>
  <div style="background:#fffbeb;padding:8px 10px;border-radius:5px;margin-bottom:8px;font-size:15px;">$$ {p['math']} $$</div>
  <p style="font-size:14px;line-height:1.5;color:#1e293b;margin:0 0 8px 0;">{p['intuition']}</p>
  <div style="font-size:13px;color:#64748b;"><strong>When:</strong> {p['when']}</div>
</div>
"""
    a = next((x for x in PARADIGMS if x["name"] == name_a), None)
    b = next((x for x in PARADIGMS if x["name"] == name_b), None)
    return f"""<div style="display:grid;grid-template-columns:1fr 1fr;gap:16px;padding:12px;">{card(a)}{card(b)}</div>"""


def get_atlas_df(family_filter: str, year_min: int, query: str) -> pd.DataFrame:
    rows = []
    q = (query or "").lower().strip()
    for title, authors, year, tag in PAPERS:
        if year < year_min:
            continue
        if family_filter != "All" and tag != family_filter:
            continue
        if q and q not in title.lower() and q not in authors.lower() and q not in tag.lower():
            continue
        rows.append((year, title, authors, tag))
    df = pd.DataFrame(rows, columns=["Year", "Title", "Authors", "Paradigm"])
    return df.sort_values(["Year", "Title"], ascending=[False, True]).reset_index(drop=True)


# ---------------------------------------------------------------------------
# PICK-YOUR-PARADIGM  (3-question guide)
# ---------------------------------------------------------------------------
def recommend(data_type: str, env: str, scale: str) -> str:
    recs: list[str] = []
    if data_type == "Expert demos":
        # Modern BC defaults — flow matching and diffusion are the two heads
        # the field has converged on for multi-modal manipulation.
        recs += ["Flow Matching Policy", "Diffusion Policy"]
        if scale == "Large / multi-task":
            recs.append("Tokenized / Categorical BC")  # VLA recipe
        else:
            recs.append("Energy-Based / Implicit BC")
    elif data_type == "Reward only":
        if env == "Simulator available":
            recs += ["Policy Gradient RL (PPO / TRPO family)",
                     "Off-Policy Actor-Critic (SAC / TD3 family)",
                     "Latent Imagination (Dreamer / TD-MPC / MuZero)"]
        elif env == "Logged data only":
            recs += ["Offline RL (Pessimistic Q + Behavior Constraint)",
                     "Decision Transformer (Return-Conditioned Sequence Model)"]
        else:  # Real robot only
            recs += ["Latent Imagination (Dreamer / TD-MPC / MuZero)",
                     "Forward-Dynamics + MPC"]
    elif data_type == "Both":
        recs += ["GAIL / AIRL (Adversarial Imitation)",
                 "MaxEnt IRL (Recover the Reward)",
                 "Offline RL (Pessimistic Q + Behavior Constraint)"]
        if scale == "Large / multi-task":
            recs.append("Flow Matching Policy")  # π₀-style VLA
    elif data_type == "Unlabeled video":
        recs += ["Generative Video World Model",
                 "Flow Matching Policy",  # downstream after LAPA latent decode
                 "Diffusion Policy"]
    elif data_type == "Language + scene only":
        recs += ["LLM-as-Planner / Code-as-Policies",
                 "VLM-Affordance / Spatial Programs (VoxPoser / ReKep)",
                 "Tokenized / Categorical BC"]  # VLA route

    # de-dup, preserve order
    seen: set[str] = set()
    recs = [r for r in recs if not (r in seen or seen.add(r))]

    cards = []
    for r in recs[:4]:
        p = next((x for x in PARADIGMS if x["name"] == r), None)
        if not p:
            continue
        cards.append(
            f"<div style='border:1px solid #e2e8f0;border-radius:8px;padding:12px 16px;"
            f"background:white;margin-bottom:10px;'>"
            f"<div style='display:flex;gap:10px;align-items:center;flex-wrap:wrap;'>"
            f"<strong style='font-size:16px;'>{p['name']}</strong>{chip(p['family'])}</div>"
            f"<p style='margin:6px 0;color:#475569;font-style:italic;'>{p['tagline']}</p>"
            f"<div style='font-family:monospace;background:#f1f5f9;padding:4px 8px;"
            f"border-radius:4px;display:inline-block;font-size:13px;'>{p['mapping']}</div>"
            f"</div>"
        )
    if not cards:
        return "<p style='color:#64748b;'>Pick options above and I'll suggest paradigms.</p>"
    return f"<div style='padding:8px;'><h3 style='margin-top:0;'>Recommended paradigms</h3>{''.join(cards)}</div>"


# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------
CSS = """
.gradio-container { font-family: -apple-system, BlinkMacSystemFont, "Inter", "Segoe UI", sans-serif; }
#header {
  background: linear-gradient(135deg, #0f172a 0%, #1e3a8a 50%, #7c3aed 100%);
  color: white; padding: 28px 32px; border-radius: 12px; margin-bottom: 12px;
}
#header h1 { color:white; font-size:34px; margin:0; letter-spacing:-.5px; }
#header p  { color:#cbd5e1; font-size:16px; margin:8px 0 0 0; max-width:780px; }
.fam-legend { display:flex; gap:8px; flex-wrap:wrap; padding:10px 16px; }
.fam-legend span { padding:3px 10px; border-radius:999px; font-size:11px; color:white; font-weight:600; letter-spacing:.3px;}
.mermaid { background:white; border-radius:10px; padding:14px; }
"""

# Injected into <head>: MathJax so $$...$$ renders inside gr.HTML, with a
# debounced MutationObserver that re-typesets on every DOM update (tab switch,
# dropdown change, etc.).
HEAD_HTML = r"""
<script>
window.MathJax = {
  tex: {
    inlineMath: [['$', '$'], ['\(', '\)']],
    displayMath: [['$$', '$$'], ['\[', '\]']],
    processEscapes: true
  },
  options: { skipHtmlTags: ['script','noscript','style','textarea','pre','code'] }
};
</script>
<script async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
<script>
(function () {
  function retypeset() {
    try {
      if (window.MathJax && window.MathJax.typesetPromise) {
        window.MathJax.typesetPromise().catch(function(){});
      }
    } catch (e) {}
  }
  var t = null;
  var obs = new MutationObserver(function () {
    clearTimeout(t);
    t = setTimeout(retypeset, 150);
  });
  obs.observe(document.body, { childList: true, subtree: true });
  setTimeout(retypeset, 400);
  setTimeout(retypeset, 1500);
})();
</script>
"""


# ---------------------------------------------------------------------------
# Layered ontology — the clean classification.  The tree below is a projection
# of this ontology, not the canonical structure.
# ---------------------------------------------------------------------------
ONTOLOGY_LAYERS = [
    dict(
        name="Control substrate",
        question="What object actually closes the loop?",
        classical="PID, impedance control, computed torque, LQR, MPC, hybrid automata.",
        modern="Neural feedback policy, action chunker, trajectory optimizer, skill graph, video-to-action generator.",
        items=[
            ("Direct feedback", "u = π(o, g); the policy is the controller."),
            ("Trajectory / MPC", "optimize u_{t:t+H} online, execute the first action, replan."),
            ("Skill / option", "high-level discrete or continuous skill z, low-level controller executes."),
            ("Constraint / cost program", "LLM/VLM emits costs, keypoints, or code; solver/controller executes."),
            ("Future-video proposal", "generate a desired future, then decode actions or select a plan."),
        ],
    ),
    dict(
        name="Learning objective",
        question="What loss or optimization signal trains the controller?",
        classical="Designed objective J, Lyapunov function, tracking error, robust/adaptive criteria.",
        modern="Demonstrations, rewards, preferences, fixed logs, hindsight relabeling, self-supervised prediction.",
        items=[
            ("BC / imitation", "match expert actions; action head may be regression, flow, diffusion, tokenized, or energy-based."),
            ("RL / optimal control", "maximize expected return; PPO/SAC/Q-learning are learning versions of optimal control."),
            ("Offline RL", "optimize return from fixed logs while staying near data support."),
            ("IRL / adversarial imitation", "recover reward or discriminator from expert behavior, then optimize it."),
            ("Predictive self-supervision", "learn dynamics, video, occupancy, contact, or latent evolution."),
        ],
    ),
    dict(
        name="Predictive model",
        question="Does the system learn a simulator or future generator?",
        classical="System identification plus model-based control.",
        modern="Latent dynamics, action-conditioned video, WAMs, occupancy/contact predictors, policy evaluators.",
        items=[
            ("No explicit world model", "reactive policy; dynamics are implicit in the policy."),
            ("Forward dynamics / MPC", "state + action -> next state; plan with CEM/MPPI/gradients."),
            ("Latent imagination", "learn z dynamics and train/plan inside the latent world."),
            ("AC-WM", "actions in, future out; supports counterfactuals, RL, evaluation, MPC."),
            ("WAM", "instruction in, successful future + actions out; strong proposal generator."),
            ("Occupancy / contact / latent state", "planning-friendly representation inside the world-model branch."),
        ],
    ),
    dict(
        name="Architecture / representation",
        question="How are actions, states, and tasks parameterized?",
        classical="State coordinates, features, basis functions, linearization, observers.",
        modern="Transformers, diffusion/flow heads, VLA trunks, tokenizers, JEPA latents, 3D occupancy fields.",
        items=[
            ("VLA trunk", "foundation VLM/VLA backbone; not an objective by itself."),
            ("Action head", "continuous regression, flow, diffusion, tokenized autoregression, energy-based scoring."),
            ("Decision Transformer", "causal sequence architecture; can instantiate BC, offline RL, or goal-conditioned control."),
            ("Video / latent generative model", "diffusion, autoregressive, JEPA-style, or recurrent latent dynamics."),
            ("Spatial representation", "2D pixels, 3D state, occupancy, keypoints, contact, cost maps."),
        ],
    ),
    dict(
        name="Data regime",
        question="Where does the learning signal come from?",
        classical="Designed experiments, system-ID rollouts, calibrated sensors, known plant model.",
        modern="Teleop demos, simulation, offline logs, autonomous play, internet video, multi-embodiment data.",
        items=[
            ("Expert demonstrations", "teleop or kinesthetic data; powers BC, ACT, diffusion/flow policies."),
            ("Reward rollouts", "online or simulated interaction; powers PPO/SAC/model-based RL."),
            ("Fixed logs", "offline RL and sequence models; must handle support constraints."),
            ("Play / failures", "especially valuable for AC-WMs because actions explain all outcomes."),
            ("Human / internet video", "pretrains visual priors, video models, latent action models, WAMs."),
            ("Cross-embodiment data", "Open-X/DROID-style scaling; requires action abstraction or embodiment-specific heads."),
        ],
    ),
    dict(
        name="Deployment role",
        question="What does the learned system do at runtime?",
        classical="Track, stabilize, estimate, plan, verify safety.",
        modern="Policy, planner, simulator, critic/evaluator, data generator, safety filter, skill orchestrator.",
        items=[
            ("Policy", "outputs actions directly."),
            ("Planner", "searches or optimizes over actions/trajectories."),
            ("Simulator", "rolls out counterfactual futures."),
            ("Evaluator / critic", "scores policies, plans, or imagined futures."),
            ("Data generator", "creates synthetic rollouts, relabels, or proposal trajectories."),
            ("Orchestrator", "selects tools, skills, constraints, or subgoals."),
        ],
    ),
]

METHOD_STACKS = [
    ("Diffusion Policy", "Direct feedback / action chunk", "BC", "none", "diffusion action head", "expert demos", "policy"),
    ("π₀ / OpenPI", "Direct feedback", "BC", "none", "VLA trunk + flow head", "cross-embodiment demos", "policy"),
    ("OpenVLA / RT-2", "Direct feedback", "BC", "none", "VLA trunk + tokenized actions", "web/VLM + robot demos", "policy"),
    ("Decision Transformer", "Sequence decoder", "offline return-conditioned MLE", "optional learned dynamics outside it", "causal Transformer", "fixed logs with rewards", "policy / planner"),
    ("Dreamer / TD-MPC", "Latent controller", "RL inside learned model", "latent dynamics", "recurrent / latent model", "rollouts", "policy + simulator"),
    ("AC-WM", "Planner / simulator", "predictive self-supervision + optional RL", "action-conditioned future model", "video/latent/occupancy predictor", "successes + failures + play", "simulator / evaluator / planner"),
    ("WAM / DreamZero", "Future-video proposal", "video-action generation", "text-conditioned future generator", "video model + action decoder", "human/robot video + task labels", "proposal policy / planner"),
    ("SayCan / Code-as-Policies", "Skill orchestration", "planning over skills/costs", "usually external", "LLM/VLM + solver", "language + affordance data", "orchestrator"),
]

EVOLUTION_STAGES = [
    ("Classical feedback control", "PID, computed torque, impedance, LQR", "Known or identified model; hand-designed tracking/stability objective."),
    ("Optimal control and MPC", "trajectory optimization, CEM/MPPI/MPC", "Use dynamics + cost to optimize actions online."),
    ("Learning from demonstration", "LfD, BC, DAgger", "Replace hand-designed control law with supervised action learning."),
    ("Deep RL for robotics", "PPO, SAC, sim-to-real locomotion/manipulation", "Learn policies from reward, often in simulation."),
    ("Deep generative policies", "ACT, Diffusion Policy, flow policies", "Better multimodal imitation and long action chunks."),
    ("Offline decision models", "CQL/IQL, Decision Transformer, Trajectory Transformer, Diffuser", "Use fixed logs; sequence modeling enters decision-making."),
    ("Foundation-model policies", "RT-2, OpenVLA, π₀, Gemini Robotics, Helix", "VLM/VLA trunks inject language and semantic priors."),
    ("World-model robotics", "Dreamer, AC-WM, WAM, occupancy/contact WMs", "Learn simulators or future generators for planning, training, evaluation, and proposals."),
    ("Hybrid frontier", "VLA + world model + MPC/RL + LLM planner + safety filter", "Modern systems combine layers rather than choosing one algorithm."),
]


def render_layered_ontology() -> str:
    layer_cards = []
    for layer in ONTOLOGY_LAYERS:
        items = "".join(
            f"<li><strong>{name}</strong>: {desc}</li>" for name, desc in layer["items"]
        )
        layer_cards.append(f"""
        <div style='border:1px solid #e2e8f0;border-radius:10px;background:white;padding:15px 16px;'>
          <div style='font-size:12px;color:#475569;font-weight:800;text-transform:uppercase;letter-spacing:.06em;'>{layer['question']}</div>
          <h2 style='margin:5px 0 8px 0;font-size:22px;color:#0f172a;'>{layer['name']}</h2>
          <div style='display:grid;grid-template-columns:1fr 1fr;gap:8px;margin-bottom:10px;font-size:13px;'>
            <div style='background:#f8fafc;border-left:4px solid #64748b;border-radius:6px;padding:8px;'><strong>Control theory:</strong><br/>{layer['classical']}</div>
            <div style='background:#eff6ff;border-left:4px solid #2563eb;border-radius:6px;padding:8px;'><strong>Robot learning:</strong><br/>{layer['modern']}</div>
          </div>
          <ul style='margin:0;padding-left:18px;color:#1e293b;font-size:14px;line-height:1.55;'>{items}</ul>
        </div>
        """)

    method_rows = "".join(
        f"<tr>" + "".join(f"<td style='padding:9px;border-bottom:1px solid #e2e8f0;vertical-align:top;'>{cell}</td>" for cell in row) + "</tr>"
        for row in METHOD_STACKS
    )
    evolution = "".join(
        f"<div style='display:grid;grid-template-columns:190px 260px 1fr;gap:12px;border-left:4px solid #7c3aed;background:#faf5ff;border-radius:8px;padding:10px 12px;margin-bottom:9px;'>"
        f"<strong>{stage}</strong><span style='color:#581c87;'>{examples}</span><span style='color:#334155;'>{meaning}</span></div>"
        for stage, examples, meaning in EVOLUTION_STAGES
    )
    return f"""
<div style='padding:18px 22px 8px 22px;color:#0f172a;line-height:1.55;'>
  <div style='background:linear-gradient(135deg,#0f172a 0%,#164e63 50%,#7c2d12 100%);color:white;border-radius:12px;padding:22px 26px;margin-bottom:18px;'>
    <div style='font-size:12px;letter-spacing:.08em;text-transform:uppercase;color:#bae6fd;font-weight:800;'>Canonical view</div>
    <h1 style='margin:6px 0 8px 0;font-size:32px;line-height:1.12;'>Robot learning is a stack, not a flat taxonomy</h1>
    <p style='margin:0;color:#dbeafe;font-size:15px;max-width:1050px;'>The same method can be a policy, an architecture, a world model, a data recipe, and a deployment role.  This layered ontology keeps classical control, robot learning, VLAs, sequence models, and world models in one clean frame.</p>
  </div>

  <div style='display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:14px;margin-bottom:20px;'>
    {''.join(layer_cards)}
  </div>

  <h2 style='font-size:25px;margin:12px 0 10px 0;'>Method stacks</h2>
  <div style='overflow:auto;border:1px solid #e2e8f0;border-radius:10px;background:white;margin-bottom:20px;'>
    <table style='width:100%;border-collapse:collapse;font-size:13px;min-width:1120px;'>
      <tr style='background:#f8fafc;color:#0f172a;'>
        <th style='text-align:left;padding:10px;'>Method</th><th style='text-align:left;padding:10px;'>Control substrate</th><th style='text-align:left;padding:10px;'>Objective</th><th style='text-align:left;padding:10px;'>Predictive model</th><th style='text-align:left;padding:10px;'>Architecture</th><th style='text-align:left;padding:10px;'>Data</th><th style='text-align:left;padding:10px;'>Role</th>
      </tr>
      {method_rows}
    </table>
  </div>

  <h2 style='font-size:25px;margin:12px 0 10px 0;'>How the field evolved</h2>
  {evolution}

  <div style='background:#ecfeff;border:1px solid #67e8f9;border-radius:10px;padding:14px 16px;margin-top:16px;'>
    <strong>Rule of thumb:</strong> classify a method by asking six questions in order: what closes the loop, what trains it, whether it predicts the future, how it represents actions/state/tasks, what data it uses, and what role it plays at deployment.  The old tree is useful as a map, but this stack is the cleaner ontology.
  </div>
</div>
"""

# ---------------------------------------------------------------------------
# Family tree — pure HTML/CSS, no third-party renderer.
# ---------------------------------------------------------------------------
# Each tuple: (display-name, FAMILY key, [(leaf-name, leaf-mapping), ...])
TREE_DATA = [
    ("Behavioral Cloning (BC)", "BC", [
        ("Flow Matching Policy",         "L = E‖v_θ(a^t,o,t) − (a¹−a⁰)‖²"),
        ("Diffusion Policy",             "L = E‖ε − ε_θ(a^k,o,k)‖²"),
        ("Tokenized / Categorical BC",   "L = −Σ_j log p_θ(a^(j) | o, a^(<j))"),
        ("Energy-Based / Implicit BC",   "π ∝ exp(−E_θ(s,a));  a* = argmin_a E_θ"),
    ]),
    ("Reinforcement Learning", "Reinforcement", [
        ("Value-Based  (Q-Learning)",          "Q ← r + γ max_{a'} Q(s',a');  π = argmax Q"),
        ("Policy Gradient  (PPO / TRPO)",      "∇J = E[∇log π · A]   (+ PPO clip)"),
        ("Off-Policy Actor-Critic  (SAC/TD3)", "max E[Q(s, π(s))] + α H(π)"),
    ]),
    ("Offline RL", "Offline RL", [
        ("Pessimistic Q + Behavior Constraint", "max E_D[Q(s, π(s))]   s.t. π ≈ π_D"),
    ]),
    ("Inverse RL / Adversarial", "Inverse RL", [
        ("MaxEnt IRL",     "max_r [min_π E_π[−r] − E_{π*}[−r]] − ψ(r)"),
        ("GAIL / AIRL",    "min_π max_D   expert-vs-learner"),
    ]),
    ("Model-Based / World Models", "Model-Based", [
        ("Forward-Dynamics + MPC",        "ŝ_{t+1} = f̂(ŝ_t, a_t);  plan over Σ r̂"),
        ("Latent Imagination",            "ẑ_{t+1} ~ p_φ(·|ẑ_t, a_t);  train π in dream"),
        ("Generative Video WM",           "p(o_future | o_past, c); c may be text, goal, or actions"),
        ("Action-Conditioned WM",         "(o_t, a_{t:t+H}) → future video / latent state"),
        ("World Action Model",            "(o_t, instruction) → future video + actions"),
        ("Occupancy / Latent State WM",   "predict occupancy/contact/latent dynamics for planning"),
    ]),
    ("Sequence-Model Control", "Sequence", [
        ("Decision Transformer",     "p(a_t | R̂_t, s_t, …)"),
        ("Trajectory Diffusion",     "p(τ) over (s, a)_{1:T}  + classifier guidance"),
    ]),
    ("Goal-Conditioned", "Goal-Cond.", [
        ("Goal-Cond. + Hindsight Relabeling", "π(a | s, g);   relabel g' = s_T"),
    ]),
    ("Hierarchical", "Hierarchical", [
        ("Options / Subgoal HRL",    "π_hi(z | s) · π_lo(a | s, z)"),
    ]),
    ("Meta-Learning", "Meta-Learning", [
        ("MAML / RL² / PEARL",       "min_θ E_T[ L_T(θ − α∇L_T(θ)) ]"),
    ]),
    ("LLM / VLM Orchestration", "LLM-Orchestration", [
        ("LLM-as-Planner / Code-as-Policies",  "plan = LLM(instr, scene, API)"),
        ("VLM-Affordance / Spatial Programs",  "C(x) = VLM(scene);  argmin_τ ∫ C(τ)"),
    ]),
]


def render_tree() -> str:
    """Render a sideways SVG tree: root → family branches → paradigm leaves."""
    # --- layout constants ---
    LEAF_W, LEAF_H = 370, 44
    LEAF_PAD       = 6
    FAM_GAP        = 22
    FAM_W, FAM_H   = 220, 50
    ROOT_R         = 70
    X_ROOT_C       = 95
    X_FAM          = 250
    X_LEAF         = 530
    PAD_TOP        = 30
    PAD_BOTTOM     = 30

    leaves: list[dict] = []
    families: list[dict] = []
    y = PAD_TOP
    for fi, (fam_name, fam_key, paradigms) in enumerate(TREE_DATA):
        color = FAMILY[fam_key][0]
        first_y = y
        leaf_start = len(leaves)
        for n, m in paradigms:
            leaves.append({"fi": fi, "name": n, "mapping": m, "color": color, "y": y})
            y += LEAF_H + LEAF_PAD
        last_y = y - LEAF_PAD
        fam_center = (first_y + last_y) / 2
        fam_top = fam_center - FAM_H / 2
        families.append({
            "fi": fi, "name": fam_name, "color": color,
            "y": fam_top, "leaf_range": (leaf_start, len(leaves)),
        })
        y += FAM_GAP

    total_h = y + PAD_BOTTOM - FAM_GAP
    total_w = X_LEAF + LEAF_W + 30
    root_cy = (families[0]["y"] + families[-1]["y"] + FAM_H) / 2

    parts: list[str] = []
    parts.append(
        f'<svg xmlns="http://www.w3.org/2000/svg" '
        f'viewBox="0 0 {total_w} {total_h}" '
        f'style="display:block;width:100%;height:auto;background:'
        f'radial-gradient(ellipse at 15% 50%, #fef9f3 0%, #f8fafc 60%, #f1f5f9 100%);'
        f'border-radius:14px;font-family:Inter,-apple-system,sans-serif;">'
    )

    # ---- interaction styles (pure-CSS hover + click/focus, no JS) ----
    parts.append(
        "<style>"
        ".leaf,.fam,.root{cursor:pointer;}"
        ".leaf,.fam{transition:transform .18s cubic-bezier(.2,.7,.3,1);}"
        ".leaf:focus,.fam:focus,.root:focus{outline:none;}"
        # leaf cards
        ".leaf .card-bg{transition:stroke .18s ease,filter .18s ease;}"
        ".leaf .accent{transition:width .18s ease;}"
        ".leaf:hover{transform:translateY(-2px);}"
        ".leaf:hover .card-bg{filter:drop-shadow(0 5px 12px rgba(15,23,42,.16));}"
        ".leaf:hover .accent{width:8px;}"
        ".leaf:focus{transform:translateY(-3px);}"
        ".leaf:focus .card-bg{stroke:var(--c);stroke-width:2.5;"
        "filter:drop-shadow(0 9px 20px color-mix(in srgb,var(--c) 45%,transparent));}"
        ".leaf:focus .accent{width:10px;}"
        ".leaf:active{transform:translateY(0);}"
        # family pills
        ".fam .fam-bg{transition:filter .18s ease,stroke .18s ease;}"
        ".fam:hover{transform:translateX(3px);}"
        ".fam:hover .fam-bg{filter:drop-shadow(0 4px 10px rgba(0,0,0,.28));}"
        ".fam:focus{transform:translateX(3px);}"
        ".fam:focus .fam-bg{stroke:#fff;stroke-width:2.5;filter:drop-shadow(0 6px 16px rgba(0,0,0,.34));}"
        ".fam:active{transform:translateX(0);}"
        # root
        ".root .root-core{transition:stroke .2s ease,filter .2s ease;}"
        ".root:hover .root-core{filter:drop-shadow(0 0 14px rgba(124,58,237,.7));}"
        ".root:focus .root-core{stroke:#a855f7;stroke-width:4;}"
        "</style>"
    )

    # ---- branches: root → family ----
    rx = X_ROOT_C + ROOT_R - 4
    ry = root_cy
    for fam in families:
        fx = X_FAM
        fy = fam["y"] + FAM_H / 2
        c1x = rx + (fx - rx) * 0.55
        c2x = fx - (fx - rx) * 0.55
        path = f"M {rx},{ry} C {c1x},{ry} {c2x},{fy} {fx},{fy}"
        gid = f"g-root-{fam['fi']}"
        parts.append(
            f'<defs><linearGradient id="{gid}" x1="0" y1="0" x2="1" y2="0">'
            f'<stop offset="0%" stop-color="#4a2c10"/>'
            f'<stop offset="100%" stop-color="{fam["color"]}"/>'
            f'</linearGradient></defs>'
        )
        parts.append(
            f'<path d="{path}" stroke="url(#{gid})" stroke-width="4.5" '
            f'fill="none" stroke-linecap="round" opacity="0.85"/>'
        )

    # ---- branches: family → leaf ----
    for fam in families:
        fx_right = X_FAM + FAM_W - 3
        fy = fam["y"] + FAM_H / 2
        for li in range(*fam["leaf_range"]):
            leaf = leaves[li]
            lx = X_LEAF
            ly = leaf["y"] + LEAF_H / 2
            c1x = fx_right + (lx - fx_right) * 0.5
            c2x = lx - (lx - fx_right) * 0.5
            path = f"M {fx_right},{fy} C {c1x},{fy} {c2x},{ly} {lx},{ly}"
            parts.append(
                f'<path d="{path}" stroke="{fam["color"]}" stroke-width="2.2" '
                f'fill="none" stroke-linecap="round" opacity="0.65"/>'
            )

    # ---- ROOT ----
    parts.append(
        f'<g class="root" tabindex="0" role="img" aria-label="Robot policy root">'
        f'<circle class="root-core" cx="{X_ROOT_C}" cy="{root_cy}" r="{ROOT_R}" '
        f'fill="#0f172a" stroke="#7c3aed" stroke-width="3"/>'
    )
    parts.append(
        f'<circle cx="{X_ROOT_C}" cy="{root_cy}" r="{ROOT_R - 8}" '
        f'fill="none" stroke="#1e3a8a" stroke-width="1" opacity="0.6"/>'
    )
    parts.append(
        f'<text x="{X_ROOT_C}" y="{root_cy - 12}" text-anchor="middle" '
        f'font-size="32" dominant-baseline="middle">🤖</text>'
    )
    parts.append(
        f'<text x="{X_ROOT_C}" y="{root_cy + 18}" text-anchor="middle" '
        f'font-size="11" font-weight="800" fill="white" letter-spacing="0.5">ROBOT</text>'
    )
    parts.append(
        f'<text x="{X_ROOT_C}" y="{root_cy + 32}" text-anchor="middle" '
        f'font-size="11" font-weight="800" fill="white" letter-spacing="0.5">POLICY</text>'
        f'</g>'
    )

    # ---- FAMILY nodes ----
    for fam in families:
        fam_name = html_lib.escape(fam["name"])
        parts.append(
            f'<g class="fam" tabindex="0" style="--c:{fam["color"]}" role="img" aria-label="{fam_name} family">'
            f'<rect class="fam-bg" x="{X_FAM}" y="{fam["y"]}" width="{FAM_W}" height="{FAM_H}" '
            f'rx="11" fill="{fam["color"]}" '
            f'filter="drop-shadow(0 1px 2px rgba(0,0,0,0.15))"/>'
        )
        parts.append(
            f'<foreignObject x="{X_FAM+8}" y="{fam["y"]+4}" width="{FAM_W-16}" height="{FAM_H-8}">'
            f'<div xmlns="http://www.w3.org/1999/xhtml" '
            f'style="display:flex;align-items:center;justify-content:center;height:100%;'
            f'text-align:center;color:white;font-weight:700;font-size:13px;line-height:1.15;'
            f'font-family:Inter,sans-serif;letter-spacing:0.3px;">'
            f'{fam_name}</div></foreignObject></g>'
        )

    # ---- LEAF cards ----
    for leaf in leaves:
        c = leaf["color"]
        leaf_name = html_lib.escape(leaf["name"])
        leaf_mapping = html_lib.escape(leaf["mapping"])
        parts.append(
            f'<g class="leaf" tabindex="0" style="--c:{c}" role="img" aria-label="{leaf_name}">'
            f'<rect class="card-bg" x="{X_LEAF}" y="{leaf["y"]}" width="{LEAF_W}" height="{LEAF_H}" '
            f'rx="8" fill="white" stroke="#e2e8f0" stroke-width="1" '
            f'filter="drop-shadow(0 1px 2px rgba(15,23,42,0.06))"/>'
        )
        parts.append(
            f'<rect class="accent" x="{X_LEAF}" y="{leaf["y"]}" width="5" height="{LEAF_H}" '
            f'rx="2" fill="{c}"/>'
        )
        parts.append(
            f'<foreignObject x="{X_LEAF+12}" y="{leaf["y"]+4}" width="{LEAF_W-18}" height="{LEAF_H-8}">'
            f'<div xmlns="http://www.w3.org/1999/xhtml" '
            f'style="font-family:Inter,sans-serif;line-height:1.2;">'
            f'<div style="font-weight:700;color:#0f172a;font-size:12.5px;">{leaf_name}</div>'
            f'<div style="margin-top:3px;font-family:ui-monospace,SFMono-Regular,Menlo,monospace;'
            f'color:#475569;font-size:11.5px;">{leaf_mapping}</div>'
            f'</div></foreignObject></g>'
        )

    parts.append("</svg>")
    svg = "".join(parts)

    n_families = len(TREE_DATA)
    n_leaves = sum(len(t[2]) for t in TREE_DATA)

    return f"""
<div style="padding:6px 2px;">
  <!-- Header -->
  <div style="background:linear-gradient(135deg,#0f172a 0%,#1e3a8a 55%,#7c3aed 100%);
              color:white;border-radius:12px;padding:16px 22px;margin-bottom:10px;
              display:flex;align-items:center;justify-content:space-between;gap:16px;flex-wrap:wrap;">
    <div>
      <div style="font-size:20px;font-weight:700;">🌳 Objective / Family Projection</div>
      <div style="color:#cbd5e1;font-size:13px;margin-top:2px;">
        <strong>{n_families} families</strong> &middot; <strong>{n_leaves} paradigms</strong> &middot;
        this is a useful projection, but the Layered Ontology tab is the canonical view.
      </div>
    </div>
    <div style="font-size:11.5px;color:#cbd5e1;max-width:420px;line-height:1.5;">
      <strong>Read this way:</strong> root on the left, broad families in the middle, representative leaves on the right.
      Some leaves are objectives, while others are architectures or world-model variants; use the Layered Ontology and Relationship Map tabs to see overlap.
      <br/><span style="color:#a5b4fc;">Hover any node to lift it; click (or tab to) a card to spotlight it.</span>
    </div>
  </div>

  <!-- SVG tree -->
  <div style="overflow:auto;border:1px solid #e2e8f0;border-radius:14px;">
    {svg}
  </div>

  <!-- Legend -->
  <div style="margin-top:14px;padding:10px 14px;background:#f8fafc;border-radius:8px;
              font-size:12.5px;color:#475569;line-height:1.6;">
    <strong>Notation:</strong>
    &nbsp;o = observation &nbsp;·&nbsp; s = state &nbsp;·&nbsp; a = action &nbsp;·&nbsp; g = goal
    &nbsp;·&nbsp; R̂ = return-to-go &nbsp;·&nbsp; π = policy &nbsp;·&nbsp; Q = action-value
    &nbsp;·&nbsp; f = dynamics &nbsp;·&nbsp; φ = encoder &nbsp;·&nbsp; D = discriminator
    &nbsp;·&nbsp; τ = trajectory &nbsp;·&nbsp; ẑ = latent action.
    <br/>
    <strong>Action-head superscripts:</strong> &nbsp;a^k = action at diffusion step k &nbsp;·&nbsp; a^t = action at flow time t ∈ [0,1] &nbsp;·&nbsp; a^(j) = j-th action token.
  </div>
</div>
"""


FAMILY_LAYOUT_CHOICES = [
    "01 Compact Branch Cards",
    "02 Horizontal Swimlanes",
    "03 Subway Map",
    "04 Branch Matrix",
    "05 Radial Rings",
    "06 Accordion Codex",
    "07 Evolution Timeline",
    "08 Kanban Board",
    "09 Layer Stack",
    "10 Classic SVG Tree",
    "11 Game Skill Tree",
]


def _family_payload():
    rows = []
    for fam_name, fam_key, paradigms in TREE_DATA:
        color, desc = FAMILY[fam_key]
        rows.append(
            dict(
                name=fam_name,
                key=fam_key,
                color=color,
                desc=desc,
                equation=FAMILY_EQUATIONS.get(fam_key, ""),
                leaves=[dict(name=n, mapping=m) for n, m in paradigms],
            )
        )
    return rows


def _leaf_pills(leaves: list[dict], color: str, show_mapping: bool = False) -> str:
    if show_mapping:
        return "".join(
            f"<div class='fp-leaf' style='--c:{color};'><strong>{html_lib.escape(leaf['name'])}</strong>"
            f"<span>{html_lib.escape(leaf['mapping'])}</span></div>"
            for leaf in leaves
        )
    return "".join(
        f"<span class='fp-pill' style='--c:{color};'>{html_lib.escape(leaf['name'])}</span>"
        for leaf in leaves
    )


def _family_layout_css() -> str:
    return """
<style>
  .fp-page { padding: 14px 18px 8px 18px; color:#0f172a; line-height:1.45; }
  .fp-head { background:linear-gradient(135deg,#0f172a 0%,#1d4ed8 52%,#0f766e 100%); color:white; border-radius:12px; padding:17px 20px; margin-bottom:14px; }
  .fp-head h1 { margin:4px 0 6px 0; font-size:27px; line-height:1.12; }
  .fp-head p { margin:0; color:#dbeafe; font-size:14px; max-width:980px; }
  .fp-card { border:1px solid #e2e8f0; border-radius:8px; background:white; padding:12px; box-shadow:0 1px 2px rgba(15,23,42,.04); }
  .fp-title { display:flex; align-items:center; gap:8px; margin-bottom:7px; }
  .fp-dot { width:10px; height:10px; border-radius:50%; background:var(--c); flex:0 0 auto; }
  .fp-title strong { font-size:15px; }
  .fp-desc { color:#475569; font-size:12.5px; margin-bottom:8px; }
  .fp-pills { display:flex; gap:6px; flex-wrap:wrap; }
  .fp-pill { display:inline-flex; align-items:center; border:1px solid color-mix(in srgb,var(--c) 42%,#e2e8f0); border-left:4px solid var(--c); border-radius:999px; background:#fff; padding:4px 8px; font-size:12px; color:#1e293b; }
  .fp-leaf { border:1px solid #e2e8f0; border-left:5px solid var(--c); border-radius:7px; background:#fff; padding:8px 9px; min-height:48px; }
  .fp-leaf strong { display:block; font-size:12.5px; margin-bottom:3px; }
  .fp-leaf span { display:block; color:#64748b; font-family:ui-monospace,SFMono-Regular,Menlo,monospace; font-size:11px; overflow-wrap:anywhere; }
  .fp-grid { display:grid; grid-template-columns:repeat(2,minmax(0,1fr)); gap:12px; }
  .fp-note { margin-top:12px; background:#f8fafc; border:1px solid #e2e8f0; border-radius:8px; padding:10px 12px; color:#475569; font-size:12.5px; }
  @media (max-width: 900px) { .fp-grid { grid-template-columns:1fr; } }
</style>
"""


def _family_header(layout: str) -> str:
    n_families = len(TREE_DATA)
    n_leaves = sum(len(t[2]) for t in TREE_DATA)
    return f"""
<div class="fp-head">
  <div style="font-size:12px;letter-spacing:.08em;text-transform:uppercase;color:#bfdbfe;font-weight:800;">Family Projection alternatives</div>
  <h1>{html_lib.escape(layout)}</h1>
  <p>{n_families} projected families and {n_leaves} leaves. These are layout prototypes; the canonical taxonomy remains the layered ontology and ownership audit.</p>
</div>
"""


_SKILL_TREE_CSS = """
<style>
  .st-wrap{ position:relative; padding:22px 20px 30px; border-radius:16px; color:#e2e8f0;
    font-family:Inter,-apple-system,sans-serif; line-height:1.4;
    background:
      radial-gradient(1100px 380px at 8% -5%, rgba(124,58,237,.16), transparent 60%),
      radial-gradient(900px 480px at 105% 110%, rgba(20,184,166,.12), transparent 60%),
      linear-gradient(160deg,#0b1020 0%,#0a0f1e 58%,#070b16 100%); }
  .st-wrap::before{ content:""; position:absolute; inset:0; border-radius:16px; pointer-events:none; opacity:.5;
    background-image:linear-gradient(rgba(148,163,184,.06) 1px,transparent 1px),
                     linear-gradient(90deg,rgba(148,163,184,.06) 1px,transparent 1px);
    background-size:34px 34px; mask-image:radial-gradient(120% 100% at 50% 0%,#000,transparent 80%); }

  .st-hd{ position:relative; display:flex; align-items:baseline; gap:12px; flex-wrap:wrap; margin-bottom:6px; }
  .st-hd h2{ margin:0; font-size:22px; font-weight:800; letter-spacing:.2px;
    background:linear-gradient(90deg,#a78bfa,#67e8f9); -webkit-background-clip:text; background-clip:text; color:transparent; }
  .st-hd .meta{ font-size:12.5px; color:#94a3b8; }
  .st-help{ position:relative; font-size:12px; color:#cbd5e1; margin:2px 0 16px; opacity:.85; }
  .st-help b{ color:#fbbf24; font-weight:700; }

  .st{ position:relative; display:flex; gap:30px; align-items:stretch; }
  /* ---- core / root ---- */
  .st-core{ flex:0 0 auto; align-self:center; }
  .st-core .core{ width:120px; height:120px; border-radius:50%; display:grid; place-items:center; text-align:center;
    background:radial-gradient(circle at 50% 36%, #1e2540, #0a1022 70%); border:2px solid #8b5cf6;
    box-shadow:0 0 0 7px rgba(124,58,237,.10), inset 0 0 34px rgba(124,58,237,.45), 0 0 46px rgba(124,58,237,.35);
    transition:.25s; }
  .st-core .core .ic{ font-size:34px; line-height:1; }
  .st-core .core .tx{ font-size:10.5px; font-weight:800; letter-spacing:1.4px; color:#c4b5fd; margin-top:3px; }
  .st:has(.branch:hover) .st-core .core{ opacity:.85; }

  /* ---- branches column ---- */
  .st-branches{ flex:1 1 auto; position:relative; display:flex; flex-direction:column; gap:13px; padding-left:30px; }
  .st-branches::before{ content:""; position:absolute; left:0; top:6%; bottom:6%; width:2px;
    background:linear-gradient(#8b5cf6,#1e293b); border-radius:2px; opacity:.55; }

  .branch{ position:relative; display:flex; align-items:center; gap:14px; transition:opacity .25s, filter .25s; }
  .branch::before{ content:""; position:absolute; left:-30px; top:50%; width:30px; height:2px;
    background:var(--c); opacity:.5; transition:.2s; }

  /* ---- node base (label + hidden checkbox) ---- */
  .snode{ position:relative; display:inline-flex; cursor:pointer; -webkit-user-select:none; user-select:none; }
  .snode input{ position:absolute; width:1px; height:1px; opacity:0; }
  .snode .face{ display:inline-flex; align-items:center; gap:7px; transition:transform .18s cubic-bezier(.2,.7,.3,1),
    box-shadow .18s, border-color .18s, background .18s, color .18s; }

  .fam-node{ flex:0 0 206px; }
  .fam-node .face{ width:100%; padding:10px 13px; border-radius:12px; font-weight:700; font-size:13px; color:#fff;
    background:linear-gradient(180deg, color-mix(in srgb,var(--c) 32%, #0b1326), #0b1326);
    border:1.5px solid var(--c); box-shadow:0 0 16px color-mix(in srgb,var(--c) 32%, transparent); }
  .fam-node .lvl{ font-family:ui-monospace,Menlo,monospace; font-size:10px; font-weight:800; color:#0b1326;
    background:var(--c); border-radius:5px; padding:2px 5px; }

  .leaves{ flex:1 1 auto; display:flex; flex-wrap:wrap; align-items:center; gap:9px 12px; }
  .leaf-node{ position:relative; }
  .leaf-node::before{ content:""; width:16px; height:2px; margin-right:-3px; align-self:center;
    background:color-mix(in srgb,var(--c) 55%, transparent); transition:.18s; }
  .leaf-node .face{ padding:7px 11px; border-radius:10px; font-size:11.5px; font-weight:600; color:#cbd5e1;
    max-width:240px; background:rgba(148,163,184,.05); border:1px solid color-mix(in srgb,var(--c) 42%, #334155); }
  .leaf-node .face::before{ content:""; width:7px; height:7px; border-radius:50%; flex:0 0 auto; margin-right:1px;
    background:var(--c); box-shadow:0 0 8px var(--c); }

  /* ---- tooltip (objective) ---- */
  .tip{ position:absolute; top:calc(100% + 9px); left:50%; transform:translateX(-50%) translateY(4px);
    width:max-content; max-width:320px; padding:8px 11px; border-radius:9px; z-index:30;
    background:#0b1326; border:1px solid var(--c); color:#e2e8f0; font-size:11px; line-height:1.5;
    box-shadow:0 12px 30px rgba(0,0,0,.55); opacity:0; pointer-events:none; transition:.16s; }
  .tip code{ font-family:ui-monospace,SFMono-Regular,Menlo,monospace; color:#7dd3fc; font-size:10.5px; }
  .tip::after{ content:""; position:absolute; bottom:100%; left:50%; transform:translateX(-50%);
    border:6px solid transparent; border-bottom-color:var(--c); }

  /* ===================== INTERACTION ===================== */
  /* spotlight: dim every branch except the hovered one */
  .st:has(.branch:hover) .branch:not(:hover){ opacity:.28; filter:grayscale(.6); }

  /* hover a leaf -> pop, glow, show objective */
  .leaf-node:hover .face{ transform:translateY(-2px); color:#fff; border-color:var(--c);
    background:color-mix(in srgb,var(--c) 16%, #0b1326);
    box-shadow:0 0 20px color-mix(in srgb,var(--c) 45%, transparent); }
  .leaf-node:hover .tip{ opacity:1; transform:translateX(-50%) translateY(0); }
  .fam-node:hover .face{ transform:translateX(3px); box-shadow:0 0 22px color-mix(in srgb,var(--c) 50%, transparent); }

  /* allocate (click): checked -> gold "learned" lock + lit path */
  .snode:has(input:checked) .face{ color:#fff; border-color:#fbbf24;
    background:linear-gradient(180deg, rgba(251,191,36,.20), #0b1326);
    box-shadow:0 0 0 2px rgba(251,191,36,.28), 0 0 24px rgba(251,191,36,.5); }
  .leaf-node:has(input:checked) .face::before{ background:#fbbf24; box-shadow:0 0 9px #fbbf24; }
  .leaf-node:has(input:checked)::before{ background:#fbbf24; height:3px; box-shadow:0 0 8px rgba(251,191,36,.6); }
  .branch:has(.leaf-node input:checked)::before{ background:#fbbf24; opacity:.95; box-shadow:0 0 9px rgba(251,191,36,.55); }
  .branch:has(.leaf-node input:checked){ filter:none !important; }

  @media (max-width:760px){ .st{ flex-direction:column; gap:18px; } .st-core{ align-self:flex-start; }
    .st-branches{ padding-left:0; } .st-branches::before, .branch::before{ display:none; } .fam-node{ flex-basis:auto; } }
</style>
"""


def render_skill_tree() -> str:
    fams = _family_payload()
    n_families = len(fams)
    n_leaves = sum(len(f["leaves"]) for f in fams)

    branches = []
    for i, fam in enumerate(fams):
        c = fam["color"]
        fam_name = html_lib.escape(fam["name"])
        fam_desc = html_lib.escape(fam["desc"])
        leaves = "".join(
            f'<label class="snode leaf-node">'
            f'<input type="checkbox"/>'
            f'<span class="face">{html_lib.escape(leaf["name"])}</span>'
            f'<span class="tip"><b>{html_lib.escape(leaf["name"])}</b><br/>'
            f'<code>{html_lib.escape(leaf["mapping"])}</code></span>'
            f'</label>'
            for leaf in fam["leaves"]
        )
        branches.append(
            f'<div class="branch" style="--c:{c}">'
            f'<label class="snode fam-node" title="{fam_desc}">'
            f'<input type="checkbox"/>'
            f'<span class="face"><span class="lvl">{i + 1:02d}</span>{fam_name}</span>'
            f'</label>'
            f'<div class="leaves">{leaves}</div>'
            f'</div>'
        )

    return f"""
<div class="fp-page">
  {_SKILL_TREE_CSS}
  <div class="st-wrap">
    <div class="st-hd">
      <h2>🎮 Robot Policy Skill Tree</h2>
      <span class="meta">{n_families} branches · {n_leaves} skills</span>
    </div>
    <div class="st-help">
      Hover a branch to <b>focus</b> it (others dim) · hover a skill for its objective ·
      <b>click</b> a skill to allocate it (gold = learned).
    </div>
    <div class="st">
      <div class="st-core">
        <div class="core"><div><div class="ic">🤖</div><div class="tx">ROBOT<br/>POLICY</div></div></div>
      </div>
      <div class="st-branches">{"".join(branches)}</div>
    </div>
  </div>
</div>
"""


def render_family_layout(layout: str) -> str:
    fams = _family_payload()
    if layout not in FAMILY_LAYOUT_CHOICES:
        layout = FAMILY_LAYOUT_CHOICES[0]
    if layout == "11 Game Skill Tree":
        return render_skill_tree()
    if layout == "10 Classic SVG Tree":
        return render_tree()

    body = ""
    if layout == "01 Compact Branch Cards":
        cards = "".join(
            f"<section class='fp-card' style='--c:{fam['color']};'><div class='fp-title'><span class='fp-dot'></span><strong>{html_lib.escape(fam['name'])}</strong></div>"
            f"<div class='fp-desc'>{html_lib.escape(fam['desc'])}</div><div class='fp-pills'>{_leaf_pills(fam['leaves'], fam['color'])}</div></section>"
            for fam in fams
        )
        body = f"<div class='fp-grid'>{cards}</div>"

    elif layout == "02 Horizontal Swimlanes":
        lanes = "".join(
            f"<div class='fp-card' style='--c:{fam['color']};display:grid;grid-template-columns:210px 1fr;gap:12px;align-items:start;'>"
            f"<div><div class='fp-title'><span class='fp-dot'></span><strong>{html_lib.escape(fam['name'])}</strong></div><div class='fp-desc'>{html_lib.escape(fam['key'])}</div></div>"
            f"<div style='display:grid;grid-template-columns:repeat(auto-fit,minmax(190px,1fr));gap:8px;'>{_leaf_pills(fam['leaves'], fam['color'], True)}</div></div>"
            for fam in fams
        )
        body = f"<div style='display:grid;gap:10px;'>{lanes}</div>"

    elif layout == "03 Subway Map":
        lines = "".join(
            f"<div style='--c:{fam['color']};display:grid;grid-template-columns:190px 1fr;gap:10px;align-items:center;margin-bottom:12px;'>"
            f"<div class='fp-card'><div class='fp-title'><span class='fp-dot'></span><strong>{html_lib.escape(fam['name'])}</strong></div></div>"
            f"<div style='display:flex;align-items:center;gap:0;overflow:auto;padding:8px 0;'>"
            + "".join(
                f"<div style='display:flex;align-items:center;flex:0 0 auto;'><span style='width:34px;border-top:4px solid {fam['color']};'></span><span class='fp-pill' style='--c:{fam['color']};border-radius:7px;'>{html_lib.escape(leaf['name'])}</span></div>"
                for leaf in fam["leaves"]
            )
            + "</div></div>"
            for fam in fams
        )
        body = f"<div class='fp-card' style='overflow:auto;'>{lines}</div>"

    elif layout == "04 Branch Matrix":
        rows = "".join(
            f"<tr><td style='padding:9px;border-top:1px solid #e2e8f0;'><strong style='color:{fam['color']};'>{html_lib.escape(fam['name'])}</strong></td>"
            f"<td style='padding:9px;border-top:1px solid #e2e8f0;'>{html_lib.escape(fam['key'])}</td>"
            f"<td style='padding:9px;border-top:1px solid #e2e8f0;'>{len(fam['leaves'])}</td>"
            f"<td style='padding:9px;border-top:1px solid #e2e8f0;'><div class='fp-pills'>{_leaf_pills(fam['leaves'], fam['color'])}</div></td></tr>"
            for fam in fams
        )
        body = f"<div style='overflow:auto;border:1px solid #e2e8f0;border-radius:8px;background:white;'><table style='width:100%;min-width:900px;border-collapse:collapse;font-size:13px;'><tr style='background:#f8fafc;'><th style='text-align:left;padding:9px;'>Family</th><th style='text-align:left;padding:9px;'>Owner key</th><th style='text-align:left;padding:9px;'>Leaves</th><th style='text-align:left;padding:9px;'>Paradigms</th></tr>{rows}</table></div>"

    elif layout == "05 Radial Rings":
        rings = "".join(
            f"<section class='fp-card' style='--c:{fam['color']};border-radius:999px;padding:18px;min-height:150px;display:flex;flex-direction:column;justify-content:center;'>"
            f"<div class='fp-title'><span class='fp-dot'></span><strong>{html_lib.escape(fam['name'])}</strong></div><div class='fp-pills'>{_leaf_pills(fam['leaves'], fam['color'])}</div></section>"
            for fam in fams
        )
        body = f"<div style='display:grid;grid-template-columns:repeat(auto-fit,minmax(260px,1fr));gap:14px;'>{rings}</div>"

    elif layout == "06 Accordion Codex":
        details = "".join(
            f"<details class='fp-card' style='--c:{fam['color']};margin-bottom:9px;' open><summary style='cursor:pointer;font-weight:800;color:{fam['color']};'>{html_lib.escape(fam['name'])} · {len(fam['leaves'])} leaves</summary>"
            f"<div class='fp-desc' style='margin-top:8px;'>{html_lib.escape(fam['desc'])}</div><div style='display:grid;grid-template-columns:repeat(auto-fit,minmax(220px,1fr));gap:8px;'>{_leaf_pills(fam['leaves'], fam['color'], True)}</div></details>"
            for fam in fams
        )
        body = f"<div>{details}</div>"

    elif layout == "07 Evolution Timeline":
        timeline = "".join(
            f"<div style='display:grid;grid-template-columns:36px 220px 1fr;gap:10px;align-items:start;margin-bottom:10px;'>"
            f"<div style='width:28px;height:28px;border-radius:50%;background:{fam['color']};color:white;display:flex;align-items:center;justify-content:center;font-weight:800;font-size:12px;'>{i+1}</div>"
            f"<div class='fp-card'><strong>{html_lib.escape(fam['name'])}</strong><div class='fp-desc'>{html_lib.escape(fam['key'])}</div></div>"
            f"<div class='fp-card'><div class='fp-pills'>{_leaf_pills(fam['leaves'], fam['color'])}</div></div></div>"
            for i, fam in enumerate(fams)
        )
        body = f"<div style='border-left:3px solid #cbd5e1;margin-left:14px;padding-left:14px;'>{timeline}</div>"

    elif layout == "08 Kanban Board":
        cols = "".join(
            f"<section class='fp-card' style='--c:{fam['color']};min-width:245px;'><div class='fp-title'><span class='fp-dot'></span><strong>{html_lib.escape(fam['name'])}</strong></div>"
            f"<div style='display:grid;gap:7px;'>{_leaf_pills(fam['leaves'], fam['color'], True)}</div></section>"
            for fam in fams
        )
        body = f"<div style='display:flex;gap:12px;overflow:auto;padding-bottom:8px;'>{cols}</div>"

    elif layout == "09 Layer Stack":
        stack = "".join(
            f"<div class='fp-card' style='--c:{fam['color']};display:grid;grid-template-columns:220px 1fr;gap:12px;border-left:8px solid {fam['color']};'>"
            f"<div><strong>{html_lib.escape(fam['name'])}</strong><div class='fp-desc'>{html_lib.escape(fam['key'])}</div></div>"
            f"<div class='fp-pills'>{_leaf_pills(fam['leaves'], fam['color'])}</div></div>"
            for fam in fams
        )
        body = f"<div style='display:grid;gap:8px;'>{stack}</div>"

    return f"""
<div class="fp-page">
  {_family_layout_css()}
  {_family_header(layout)}
  {body}
  <div class="fp-note"><strong>Selection guide:</strong> choose a layout based on the job. Cards are best for teaching; swimlanes and matrix are best for precision; subway/timeline are best for showing progression; kanban is best for scanning all leaves.</div>
</div>
"""


CONNECTION_TREES = [
    {
        "title": "Objective Lineage",
        "subtitle": "Paradigms grouped by what signal trains the policy.",
        "color": "#2563eb",
        "root": "Training objective",
        "children": [
            ("BC / imitation", [
                "MSE-BC",
                "Flow Matching Policy",
                "Diffusion Policy",
                "Tokenized / Categorical BC",
                "Energy-Based / Implicit BC",
                "DAgger",
                "Action Chunking",
            ]),
            ("Reward optimization", [
                "Value-Based Q-Learning",
                "Policy Gradient PPO / TRPO",
                "Off-Policy Actor-Critic SAC / TD3",
            ]),
            ("Fixed-log optimization", [
                "Offline RL",
                "Decision Transformer",
                "Trajectory Diffusion",
            ]),
            ("Reward recovery", [
                "MaxEnt IRL",
                "GAIL / AIRL",
            ]),
            ("Goal conditioning", [
                "Goal-conditioned control",
                "Hindsight relabeling",
            ]),
        ],
    },
    {
        "title": "Model And Planning Tree",
        "subtitle": "Paradigms grouped by whether they predict futures and how planning uses them.",
        "color": "#7c3aed",
        "root": "Predictive model",
        "children": [
            ("No explicit model", [
                "BC heads",
                "VLA policies",
                "Direct RL policies",
            ]),
            ("Forward dynamics", [
                "Forward-Dynamics + MPC",
                "Latent Imagination",
                "Dreamer / TD-MPC",
            ]),
            ("Action-conditioned world model", [
                "AC-WM",
                "Occupancy / contact / latent state WM",
                "Policy evaluation",
                "RL inside the model",
            ]),
            ("World action model", [
                "WAM / DreamZero",
                "Future-video proposal",
                "Best-of-N planning",
            ]),
        ],
    },
    {
        "title": "Architecture Tree",
        "subtitle": "Paradigms grouped by representation rather than objective.",
        "color": "#ca8a04",
        "root": "Representation",
        "children": [
            ("Continuous action heads", [
                "Regression BC",
                "Flow Matching Policy",
                "Diffusion Policy",
                "Energy-Based BC",
            ]),
            ("Token sequence models", [
                "Decision Transformer",
                "Trajectory Transformer",
                "Tokenized BC",
                "OpenVLA / RT-2 action tokens",
            ]),
            ("Foundation trunks", [
                "VLA",
                "π₀ / OpenPI",
                "Gemini Robotics",
                "Helix",
            ]),
            ("Spatial / latent worlds", [
                "Occupancy WM",
                "Latent state WM",
                "Video / JEPA latent predictor",
            ]),
        ],
    },
    {
        "title": "Deployment Tree",
        "subtitle": "Paradigms grouped by what component they become at runtime.",
        "color": "#0891b2",
        "root": "Runtime role",
        "children": [
            ("Policy", [
                "BC",
                "Diffusion Policy",
                "Flow Policy",
                "VLA",
                "RL policy",
            ]),
            ("Planner", [
                "MPC",
                "Trajectory optimization",
                "Diffuser / guided trajectory sampling",
                "Best-of-N WAM planning",
            ]),
            ("Simulator / evaluator", [
                "AC-WM",
                "WorldGym-style evaluation",
                "Latent imagination",
            ]),
            ("Orchestrator", [
                "LLM-as-Planner",
                "Code-as-Policies",
                "VLM-affordance programs",
                "Hierarchical skills",
            ]),
        ],
    },
]

CONNECTION_CROSSLINKS = [
    ("Decision Transformer", "Sequence architecture", "BC", "Offline RL", "Goal-conditioned control"),
    ("VLA", "Architecture + data regime", "Tokenized BC", "Flow heads", "Diffusion heads"),
    ("Diffusion Policy", "BC objective", "Generative action head", "Trajectory sampling", "VLA head"),
    ("AC-WM", "World model", "MPC", "RL inside model", "Policy evaluation"),
    ("WAM", "World model", "Future-video proposal", "BC-like action decoder", "Best-of-N planning"),
    ("SayCan / Code-as-Policies", "LLM orchestration", "Skill library", "Affordance scoring", "Classical solver"),
]


def render_connection_trees() -> str:
    """Render multiple compact paradigm-connection trees."""
    tree_blocks = []
    for tree in CONNECTION_TREES:
        child_blocks = []
        for parent, leaves in tree["children"]:
            leaf_html = "".join(
                f"<li><span>{leaf}</span></li>" for leaf in leaves
            )
            child_blocks.append(f"""
              <li>
                <div class="conn-node conn-parent">{parent}</div>
                <ul>{leaf_html}</ul>
              </li>
            """)

        tree_blocks.append(f"""
          <section class="conn-card" style="--tree-color:{tree['color']};">
            <div class="conn-card-head">
              <div>
                <div class="conn-kicker">Connection tree</div>
                <h2>{tree['title']}</h2>
              </div>
              <p>{tree['subtitle']}</p>
            </div>
            <div class="conn-tree-wrap">
              <div class="conn-root">{tree['root']}</div>
              <ul class="conn-tree">{''.join(child_blocks)}</ul>
            </div>
          </section>
        """)

    cross_rows = "".join(
        "<tr>"
        + "".join(
            f"<td>{cell}</td>" if i else f"<td><strong>{cell}</strong></td>"
            for i, cell in enumerate(row)
        )
        + "</tr>"
        for row in CONNECTION_CROSSLINKS
    )

    return f"""
<style>
  .conn-page {{
    padding: 18px 22px 10px 22px;
    color: #0f172a;
    line-height: 1.5;
  }}
  .conn-hero {{
    background: linear-gradient(135deg,#0f172a 0%,#155e75 55%,#854d0e 100%);
    color: white;
    border-radius: 12px;
    padding: 20px 24px;
    margin-bottom: 16px;
  }}
  .conn-hero h1 {{
    margin: 5px 0 7px 0;
    font-size: 30px;
    line-height: 1.12;
  }}
  .conn-hero p {{
    margin: 0;
    color: #dbeafe;
    max-width: 980px;
    font-size: 15px;
  }}
  .conn-grid {{
    display: grid;
    grid-template-columns: repeat(2, minmax(0, 1fr));
    gap: 14px;
  }}
  .conn-card {{
    border: 1px solid #e2e8f0;
    border-radius: 10px;
    background: white;
    padding: 14px;
    overflow: auto;
  }}
  .conn-card-head {{
    display: flex;
    justify-content: space-between;
    gap: 14px;
    align-items: flex-start;
    margin-bottom: 12px;
  }}
  .conn-card-head h2 {{
    margin: 2px 0 0 0;
    font-size: 21px;
  }}
  .conn-card-head p {{
    margin: 0;
    color: #475569;
    font-size: 12.5px;
    max-width: 270px;
  }}
  .conn-kicker {{
    color: var(--tree-color);
    font-size: 11px;
    text-transform: uppercase;
    letter-spacing: .07em;
    font-weight: 800;
  }}
  .conn-tree-wrap {{
    min-width: 560px;
  }}
  .conn-root, .conn-node, .conn-tree span {{
    border-radius: 8px;
    border: 1px solid #e2e8f0;
    background: #fff;
    box-shadow: 0 1px 2px rgba(15,23,42,.05);
  }}
  .conn-root {{
    display: inline-block;
    background: var(--tree-color);
    color: white;
    border-color: var(--tree-color);
    font-weight: 800;
    padding: 9px 13px;
    margin-bottom: 10px;
  }}
  .conn-tree, .conn-tree ul {{
    list-style: none;
    margin: 0;
    padding-left: 22px;
    position: relative;
  }}
  .conn-tree::before, .conn-tree ul::before {{
    content: "";
    position: absolute;
    top: 0;
    bottom: 12px;
    left: 8px;
    border-left: 2px solid color-mix(in srgb, var(--tree-color) 45%, #cbd5e1);
  }}
  .conn-tree li {{
    position: relative;
    margin: 0 0 8px 0;
    padding-left: 17px;
  }}
  .conn-tree li::before {{
    content: "";
    position: absolute;
    top: 15px;
    left: -14px;
    width: 28px;
    border-top: 2px solid color-mix(in srgb, var(--tree-color) 45%, #cbd5e1);
  }}
  .conn-parent {{
    display: inline-block;
    padding: 7px 10px;
    font-weight: 800;
    color: #0f172a;
    border-left: 5px solid var(--tree-color);
    background: #f8fafc;
  }}
  .conn-tree span {{
    display: inline-block;
    padding: 5px 8px;
    color: #334155;
    font-size: 12.5px;
  }}
  .conn-cross {{
    margin-top: 16px;
    border: 1px solid #e2e8f0;
    border-radius: 10px;
    overflow: auto;
    background: white;
  }}
  .conn-cross table {{
    width: 100%;
    border-collapse: collapse;
    min-width: 900px;
    font-size: 13px;
  }}
  .conn-cross th, .conn-cross td {{
    text-align: left;
    padding: 9px 10px;
    border-bottom: 1px solid #e2e8f0;
    vertical-align: top;
  }}
  .conn-cross th {{
    background: #f8fafc;
    color: #0f172a;
  }}
  @media (max-width: 900px) {{
    .conn-grid {{ grid-template-columns: 1fr; }}
    .conn-card-head {{ display: block; }}
    .conn-card-head p {{ max-width: none; margin-top: 5px; }}
  }}
</style>
<div class="conn-page">
  <div class="conn-hero">
    <div style="font-size:12px;letter-spacing:.08em;text-transform:uppercase;color:#bae6fd;font-weight:800;">Paradigm connections</div>
    <h1>Connection trees between robot-learning paradigms</h1>
    <p>Each tree slices the same methods by a different relationship: training objective, predictive model, architecture, or runtime role.  The table underneath marks the important cross-links where one paradigm belongs to more than one branch.</p>
  </div>
  <div class="conn-grid">
    {''.join(tree_blocks)}
  </div>
  <div class="conn-cross">
    <table>
      <tr>
        <th>Paradigm</th>
        <th>Primary branch</th>
        <th>Connects to</th>
        <th>Also connects to</th>
        <th>Runtime interpretation</th>
      </tr>
      {cross_rows}
    </table>
  </div>
</div>
"""

WORLD_MODEL_PAPERS = [
    ("RoboArena: Distributed Real-World Evaluation of Generalist Robot Policies", "Atreya et al.", 2025, "World Models — Evaluation"),
    ("DreamZero: World Action Models are Zero-shot Policies", "Ye et al.", 2026, "World Action Model"),
    ("Large Video Planner Enables Generalizable Robot Control", "Chen et al.", 2025, "World Action Model"),
    ("mimic-video: Video-Action Models for Generalizable Robot Control Beyond VLAs", "Pai et al.", 2025, "World Action Model"),
    ("Video Generators are Robot Policies", "Liang et al.", 2025, "World Action Model"),
    ("Unified Video Action Model", "Li et al.", 2025, "World Action Model"),
    ("Training Agents Inside of Scalable World Models", "Hafner et al.", 2025, "Action-Conditioned World Model"),
    ("Evaluating Gemini Robotics Policies in a Veo World Simulator", "Gemini Robotics Team et al.", 2025, "Action-Conditioned World Model"),
    ("Ctrl-World: A Controllable Generative World Model for Robot Manipulation", "Guo et al.", 2025, "Action-Conditioned World Model"),
    ("DreamDojo: A Generalist Robot World Model from Large-Scale Human Videos", "Gao et al.", 2026, "Action-Conditioned World Model"),
    ("PlayWorld: Learning Robot World Models from Autonomous Play", "Yin et al.", 2026, "Action-Conditioned World Model"),
    ("V-JEPA 2: Self-Supervised Video Models Enable Understanding, Prediction and Planning", "Assran et al.", 2025, "Action-Conditioned World Model"),
    ("Cosmos Policy: Fine-Tuning Video Models for Visuomotor Control and Planning", "Kim et al.", 2026, "World Models — Planning"),
    ("World-Gymnast: Training Robots with Reinforcement Learning in a World Model", "Sharma et al.", 2026, "Action-Conditioned World Model"),
    ("Steering Your Diffusion Policy with Latent Space Reinforcement Learning", "Wagenmaker et al.", 2025, "World Models — Planning"),
    ("WorldGym: World Model as An Environment for Policy Evaluation", "Quevedo et al.", 2025, "Action-Conditioned World Model"),
]

_existing_papers = {(title, authors, year, tag) for title, authors, year, tag in PAPERS}
PAPERS.extend([paper for paper in WORLD_MODEL_PAPERS if paper not in _existing_papers])

SURVEY_PAPERS = [
    ("A Survey on Vision-Language-Action Models for Embodied AI", "Ma et al.", 2024, "Survey — VLA / Embodied AI"),
    ("A Survey on Robotics with Foundation Models: toward Embodied AI", "Xu et al.", 2024, "Survey — Foundation Models"),
    ("Robot Learning in the Era of Foundation Models: A Survey", "Xiao et al.", 2023, "Survey — Foundation Models"),
    ("A Survey on Vision-Language-Action Models: An Action Tokenization Perspective", "Zhong et al.", 2025, "Survey — VLA / Action Tokenization"),
    ("Large VLM-based Vision-Language-Action Models for Robotic Manipulation: A Survey", "Shao et al.", 2025, "Survey — VLA / Manipulation"),
    ("Survey of Vision-Language-Action Models for Embodied Manipulation", "Li et al.", 2025, "Survey — VLA / Manipulation"),
    ("A Survey on Efficient Vision-Language-Action Models", "Yu et al.", 2025, "Survey — Efficient VLA"),
    ("Efficient Vision-Language-Action Models for Embodied Manipulation: A Systematic Survey", "Guan et al.", 2025, "Survey — Efficient VLA"),
    ("Embodied AI with Foundation Models for Mobile Service Robots: A Systematic Review", "Lisondra et al.", 2025, "Survey — Service Robots"),
    ("Robot Learning from Human Videos: A Survey", "Ma et al.", 2026, "Survey — Human Video Learning"),
    ("World Model for Robot Learning: A Comprehensive Survey", "Hou et al.", 2026, "Survey — World Models"),
]

_existing_papers = {(title, authors, year, tag) for title, authors, year, tag in PAPERS}
PAPERS.extend([paper for paper in SURVEY_PAPERS if paper not in _existing_papers])

SURVEY_SOURCE_INDEX = [
    dict(
        title="Robot Learning in the Era of Foundation Models: A Survey",
        authors="Xiao et al.",
        year=2023,
        arxiv="2311.14379",
        url="https://arxiv.org/abs/2311.14379",
        validates="Foundation-model robot learning across manipulation, navigation, planning, and reasoning.",
        landscape_role="Supports the high-level shift from task-specific robot learning to foundation-model policy stacks.",
    ),
    dict(
        title="A Survey on Robotics with Foundation Models: toward Embodied AI",
        authors="Xu et al.",
        year=2024,
        arxiv="2402.02385",
        url="https://arxiv.org/abs/2402.02385",
        validates="Foundation models for autonomous manipulation, high-level planning, low-level control, datasets, simulators, and benchmarks.",
        landscape_role="Supports separating foundation trunks from the objectives and controllers they are paired with.",
    ),
    dict(
        title="A Survey on Vision-Language-Action Models for Embodied AI",
        authors="Ma et al.",
        year=2024,
        arxiv="2405.14093",
        url="https://arxiv.org/abs/2405.14093",
        validates="VLA components, low-level control policies, high-level task planners, resources, and challenges.",
        landscape_role="Supports treating VLA as an architecture / representation family rather than a training objective.",
    ),
    dict(
        title="A Survey on Vision-Language-Action Models: An Action Tokenization Perspective",
        authors="Zhong et al.",
        year=2025,
        arxiv="2507.01925",
        url="https://arxiv.org/abs/2507.01925",
        validates="Action tokenization choices for VLA systems.",
        landscape_role="Supports the representation layer: tokenized actions are an action interface, not a separate objective.",
    ),
    dict(
        title="Large VLM-based Vision-Language-Action Models for Robotic Manipulation: A Survey",
        authors="Shao et al.",
        year=2025,
        arxiv="2508.13073",
        url="https://arxiv.org/abs/2508.13073",
        validates="Large VLM-based VLA taxonomies, monolithic vs hierarchical designs, RL integration, human-video learning, and world-model integration.",
        landscape_role="Supports the cross-link design: VLA systems are stacks that combine architecture, objective, data, and runtime role.",
    ),
    dict(
        title="Survey of Vision-Language-Action Models for Embodied Manipulation",
        authors="Li et al.",
        year=2025,
        arxiv="2508.15201",
        url="https://arxiv.org/abs/2508.15201",
        validates="VLA model structures, datasets, pre-training, post-training, and evaluation.",
        landscape_role="Supports the clean VLA branch and its data/evaluation modifiers.",
    ),
    dict(
        title="A Survey on Efficient Vision-Language-Action Models",
        authors="Yu et al.",
        year=2025,
        arxiv="2510.24795",
        url="https://arxiv.org/abs/2510.24795",
        validates="Efficient VLAs across data, model, and training process.",
        landscape_role="Supports efficiency as a deployment constraint layered over VLA architectures.",
    ),
    dict(
        title="Efficient Vision-Language-Action Models for Embodied Manipulation: A Systematic Survey",
        authors="Guan et al.",
        year=2025,
        arxiv="2510.17111",
        url="https://arxiv.org/abs/2510.17111",
        validates="Latency, memory footprint, training/inference cost, model architecture, perception features, action generation, and deployment strategies.",
        landscape_role="Supports separating architecture efficiency from objective families like BC and RL.",
    ),
    dict(
        title="Embodied AI with Foundation Models for Mobile Service Robots: A Systematic Review",
        authors="Lisondra et al.",
        year=2025,
        arxiv="2505.20503",
        url="https://arxiv.org/abs/2505.20503",
        validates="Foundation models in mobile service robots, sensor fusion, real-time decision-making, task generalization, HRI, and deployment constraints.",
        landscape_role="Supports the runtime-role and deployment-bottleneck layers.",
    ),
    dict(
        title="Robot Learning from Human Videos: A Survey",
        authors="Ma et al.",
        year=2026,
        arxiv="2604.27621",
        url="https://arxiv.org/abs/2604.27621",
        validates="Human-video-based learning for robotics, human-robot skill transfer, and data foundations.",
        landscape_role="Supports human video as a data regime feeding WAMs, latent-action pretraining, representation learning, and imitation.",
    ),
    dict(
        title="World Model for Robot Learning: A Comprehensive Survey",
        authors="Hou et al.",
        year=2026,
        arxiv="2605.00080",
        url="https://arxiv.org/abs/2605.00080",
        validates="World models as learned simulators for policy learning, planning, simulation, evaluation, data generation, video generation, datasets, benchmarks, and protocols.",
        landscape_role="Supports predictive models as their own branch: AC-WM, WAM, latent imagination, and evaluator/simulator roles.",
    ),
]

PARADIGM_NAMES = [p["name"] for p in PARADIGMS]
FAMILY_LABELS = ["All"] + sorted({t for *_, t in PAPERS})
MAX_PAPER_YEAR = max(year for _, _, year, _ in PAPERS)


LANDSCAPE_SYNTHESIS_HTML = r"""
<div style="padding:18px 22px 8px 22px;color:#0f172a;line-height:1.58;">
  <div style="background:linear-gradient(135deg,#0f172a 0%,#164e63 48%,#854d0e 100%);color:white;border-radius:12px;padding:20px 24px;margin-bottom:18px;">
    <div style="font-size:12px;letter-spacing:.08em;text-transform:uppercase;color:#bae6fd;font-weight:800;">Clean landscape</div>
    <h1 style="margin:6px 0 8px 0;font-size:31px;line-height:1.12;">Robot learning is converging into policy stacks</h1>
    <p style="margin:0;color:#dbeafe;font-size:15px;max-width:1020px;">The cleanest map is not BC vs RL vs VLA vs world model.  It is a stack: controller, objective, predictive model, architecture, data source, and runtime role.  Most current papers are new combinations of these layers.</p>
  </div>

  <div style="display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-bottom:18px;">
    <div style="border:1px solid #bfdbfe;background:#eff6ff;border-radius:10px;padding:14px;"><strong>1. Control substrate</strong><br/>Classical feedback, MPC, skill graphs, neural policies, action chunkers, and future-video proposal generators are different ways to close the loop.</div>
    <div style="border:1px solid #bbf7d0;background:#f0fdf4;border-radius:10px;padding:14px;"><strong>2. Training signal</strong><br/>BC, RL, offline RL, IRL, preferences, and predictive self-supervision are objectives.  They should not be mixed up with architectures.</div>
    <div style="border:1px solid #fde68a;background:#fffbeb;border-radius:10px;padding:14px;"><strong>3. Prediction layer</strong><br/>World models are learned simulators or future generators.  AC-WMs take actions in; WAMs output successful futures and actions.</div>
    <div style="border:1px solid #c7d2fe;background:#eef2ff;border-radius:10px;padding:14px;"><strong>4. Representation</strong><br/>VLA trunks, Transformers, diffusion/flow heads, tokenized actions, 3D maps, and latent states are representational choices.</div>
    <div style="border:1px solid #fecaca;background:#fef2f2;border-radius:10px;padding:14px;"><strong>5. Data regime</strong><br/>The central bottleneck is data: teleop demos, fixed logs, sim rollouts, play, failures, cross-embodiment data, and human videos.</div>
    <div style="border:1px solid #bae6fd;background:#ecfeff;border-radius:10px;padding:14px;"><strong>6. Deployment role</strong><br/>At runtime a learned component may be a policy, planner, simulator, evaluator, data generator, safety filter, or LLM/VLM orchestrator.</div>
  </div>

  <h2 style="font-size:24px;margin:18px 0 10px 0;">Current field map</h2>
  <table style="width:100%;border-collapse:collapse;background:white;border:1px solid #e2e8f0;border-radius:10px;overflow:hidden;font-size:13px;">
    <tr style="background:#f8fafc;"><th style="text-align:left;padding:10px;">Region</th><th style="text-align:left;padding:10px;">What it means</th><th style="text-align:left;padding:10px;">Representative methods</th><th style="text-align:left;padding:10px;">Main open bottleneck</th></tr>
    <tr><td style="padding:10px;border-top:1px solid #e2e8f0;"><strong>Classical / optimal control</strong></td><td style="padding:10px;border-top:1px solid #e2e8f0;">Known dynamics, costs, constraints, stability, and replanning.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">PID, impedance, LQR, MPC, trajectory optimization.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Manual modeling and limited semantic generality.</td></tr>
    <tr><td style="padding:10px;border-top:1px solid #e2e8f0;"><strong>Imitation policies</strong></td><td style="padding:10px;border-top:1px solid #e2e8f0;">Learn direct control from demonstrations.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">BC, ACT, Diffusion Policy, flow policies, energy-based BC.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Distribution shift, multimodality, and demonstration coverage.</td></tr>
    <tr><td style="padding:10px;border-top:1px solid #e2e8f0;"><strong>RL and offline RL</strong></td><td style="padding:10px;border-top:1px solid #e2e8f0;">Optimize reward through interaction or fixed logs.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">PPO, SAC, TD3, CQL, IQL, TD3+BC, VLA-RL.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Reward design, exploration, safety, and real-world sample cost.</td></tr>
    <tr><td style="padding:10px;border-top:1px solid #e2e8f0;"><strong>Sequence decision models</strong></td><td style="padding:10px;border-top:1px solid #e2e8f0;">Treat control as conditional generation over tokens or trajectories.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Decision Transformer, Trajectory Transformer, Diffuser.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Long-horizon reliability and correct conditioning.</td></tr>
    <tr><td style="padding:10px;border-top:1px solid #e2e8f0;"><strong>Foundation robot policies</strong></td><td style="padding:10px;border-top:1px solid #e2e8f0;">Use VLM/VLA trunks plus robot action heads and large multi-task data.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">RT-2, OpenVLA, π₀/OpenPI, Gemini Robotics, GR00T, Helix.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Embodiment transfer, latency, evaluation, and data scaling.</td></tr>
    <tr><td style="padding:10px;border-top:1px solid #e2e8f0;"><strong>World-model robotics</strong></td><td style="padding:10px;border-top:1px solid #e2e8f0;">Learn futures for planning, RL, evaluation, data generation, or proposals.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Dreamer, TD-MPC, AC-WM, WAM, DreamZero, PlayWorld, WorldGym.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Action grounding, physical consistency, long-horizon drift.</td></tr>
    <tr><td style="padding:10px;border-top:1px solid #e2e8f0;"><strong>LLM/VLM orchestration</strong></td><td style="padding:10px;border-top:1px solid #e2e8f0;">Use language/vision models to select skills, constraints, code, or cost maps.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">SayCan, Code-as-Policies, VoxPoser, ReKep, VLM spatial programs.</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Grounding, verification, tool reliability, and skill-library limits.</td></tr>
  </table>

  <h2 style="font-size:24px;margin:18px 0 10px 0;">Survey reading stack</h2>
  <div style="display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:12px;">
    <div style="border-left:4px solid #2563eb;background:#f8fafc;border-radius:8px;padding:12px 14px;"><strong>VLA overview:</strong> Ma et al. 2024; Shao et al. 2025; Li et al. 2025.</div>
    <div style="border-left:4px solid #ca8a04;background:#f8fafc;border-radius:8px;padding:12px 14px;"><strong>Action representation:</strong> Zhong et al. 2025 for tokenization; diffusion/flow heads for continuous actions.</div>
    <div style="border-left:4px solid #7c3aed;background:#f8fafc;border-radius:8px;padding:12px 14px;"><strong>World models:</strong> Hou et al. 2026 for robot-learning world models; human-video and WAM papers for scalable data.</div>
    <div style="border-left:4px solid #16a34a;background:#f8fafc;border-radius:8px;padding:12px 14px;"><strong>Data and deployment:</strong> Yu et al. 2025 for efficient VLAs; Lisondra et al. 2025 for service robots; Ma et al. 2026 for human-video learning.</div>
  </div>

  <div style="background:#ecfeff;border:1px solid #67e8f9;border-radius:10px;padding:14px 16px;margin-top:18px;">
    <strong>Bottom line:</strong> modern robot learning is moving from single algorithms to integrated systems: VLA policy + generative action head + world model/evaluator + planner/orchestrator + safety/controller layer.  The best taxonomy separates these layers instead of forcing each paper into exactly one family.
  </div>
</div>
"""


SKILL_MAP_STAGES = [
    {
        "id": "coarse",
        "label": "Stage 1 — Coarse Map",
        "subtitle": "First ask what job a component does in a robot-learning system.",
        "nodes": [
            {
                "name": "Controller",
                "branch": "Control substrate",
                "plain": "The thing that actually sends actions to the robot.",
                "owns": "Closes the loop at runtime.",
                "not_owns": "Does not define the training loss, data source, or model architecture.",
                "formula": "a_t = pi(o_t, g)",
                "papers": "PID / impedance control; MPC; ACT; Diffusion Policy; pi0 / OpenPI.",
                "links": ["Learning objective", "Action representation", "Safety filter"],
                "unlocks": ["BC policy", "RL policy", "MPC planner", "VLA policy"],
            },
            {
                "name": "Training Signal",
                "branch": "Learning objective",
                "plain": "The score or loss that teaches the robot what behavior is good.",
                "owns": "Defines what is optimized during training.",
                "not_owns": "Does not decide whether the model is a Transformer, VLA, or world model.",
                "formula": "min loss  or  max reward",
                "papers": "BC; PPO; SAC; CQL; IQL; GAIL; MaxEnt IRL.",
                "links": ["Controller", "Data source", "Evaluator"],
                "unlocks": ["BC", "RL", "Offline RL", "IRL"],
            },
            {
                "name": "Future Model",
                "branch": "Predictive model",
                "plain": "A learned simulator that predicts what could happen next.",
                "owns": "Predicts future states, video, occupancy, contacts, or latent dynamics.",
                "not_owns": "Does not automatically become a policy; it may be a planner, evaluator, or data generator.",
                "formula": "future = f(current, action)",
                "papers": "PETS; Dreamer; TD-MPC; AC-WM; WAM; World Model surveys.",
                "links": ["MPC planner", "RL", "Policy evaluation"],
                "unlocks": ["Forward model", "Latent imagination", "AC-WM", "WAM"],
            },
            {
                "name": "Representation",
                "branch": "Architecture / representation",
                "plain": "The language used inside the model: tokens, flows, diffusion steps, latents, or maps.",
                "owns": "Parameterizes observations, actions, goals, and memory.",
                "not_owns": "Does not by itself say what objective trained the robot.",
                "formula": "o, a, g -> tokens / latents / fields",
                "papers": "Decision Transformer; OpenVLA; RT-2; Diffusion Policy; V-JEPA 2.",
                "links": ["Training Signal", "Controller", "Future Model"],
                "unlocks": ["Action heads", "VLA trunk", "Sequence model", "Spatial map"],
            },
            {
                "name": "Data Source",
                "branch": "Data regime",
                "plain": "Where the examples, rewards, logs, videos, or interactions come from.",
                "owns": "Defines the evidence available to learn from.",
                "not_owns": "Does not determine the algorithm alone; demos can train BC, VLA, or WAM decoders.",
                "formula": "D = demos / logs / rollouts / video",
                "papers": "Open X-Embodiment; DROID; MimicGen; human-video learning surveys.",
                "links": ["Training Signal", "Representation", "Evaluation"],
                "unlocks": ["Teleop demos", "Fixed logs", "Play", "Human video", "Cross-embodiment data"],
            },
            {
                "name": "Runtime Role",
                "branch": "Deployment role",
                "plain": "What the learned thing is used for once the robot is running.",
                "owns": "Policy, planner, simulator, evaluator, data generator, orchestrator, or safety layer.",
                "not_owns": "Does not imply a particular training objective.",
                "formula": "component -> role in system",
                "papers": "SayCan; Code-as-Policies; WorldGym; DreamZero; MPC safety filters.",
                "links": ["Controller", "Future Model", "LLM planner"],
                "unlocks": ["Policy", "Planner", "Simulator", "Evaluator", "Orchestrator"],
            },
        ],
    },
    {
        "id": "middle",
        "label": "Stage 2 — Main Branches",
        "subtitle": "Then split the field into non-overlapping primary branches.",
        "nodes": [
            {
                "name": "Imitation Branch",
                "branch": "Learning objective",
                "plain": "Copy expert behavior from demonstrations.",
                "owns": "Supervised action learning from expert actions.",
                "not_owns": "VLA, diffusion, and flow are action representations unless the BC loss is the main claim.",
                "formula": "min D[pi_theta(a|o) || pi_D(a|o)]",
                "papers": "BC; DAgger; ACT; Diffusion Policy; Flow Matching Policy.",
                "links": ["Action heads", "Teleop demos", "VLA trunk"],
                "unlocks": ["MSE-BC", "ACT", "Diffusion Policy", "Flow Matching Policy", "Tokenized BC"],
            },
            {
                "name": "Reinforcement Branch",
                "branch": "Learning objective",
                "plain": "Improve behavior by maximizing reward.",
                "owns": "Reward optimization from interaction or imagined rollouts.",
                "not_owns": "A VLA fine-tuned with RL stays VLA as architecture, but RL is the objective.",
                "formula": "max E[sum gamma^t r_t]",
                "papers": "PPO; SAC; TD3; VLA-RL; VLAC.",
                "links": ["Simulator", "Reward model", "Safety"],
                "unlocks": ["Policy gradient", "Actor-critic", "VLA-RL", "Model-based RL"],
            },
            {
                "name": "Offline Decision Branch",
                "branch": "Learning objective",
                "plain": "Learn from fixed logs without new robot interaction.",
                "owns": "Support-aware learning from static datasets.",
                "not_owns": "Decision Transformer is a sequence architecture that often lives here by use case.",
                "formula": "max Q(s, pi(s)) with pi near data",
                "papers": "BCQ; CQL; IQL; AWAC; TD3+BC; Decision Transformer.",
                "links": ["Fixed logs", "Sequence model", "Behavior constraint"],
                "unlocks": ["CQL", "IQL", "AWAC", "Decision Transformer"],
            },
            {
                "name": "World Model Branch",
                "branch": "Predictive model",
                "plain": "Learn futures so the robot can plan, evaluate, or train in imagination.",
                "owns": "Prediction of future states, video, or latent dynamics.",
                "not_owns": "A world model is not automatically the controller.",
                "formula": "p(future | present, condition)",
                "papers": "PETS; Dreamer; TD-MPC; AC-WM; WAM; Hou et al. survey.",
                "links": ["MPC", "RL", "Policy evaluation", "Video data"],
                "unlocks": ["Forward dynamics", "Latent imagination", "AC-WM", "WAM", "Occupancy WM"],
            },
            {
                "name": "Foundation Policy Branch",
                "branch": "Architecture / representation",
                "plain": "Use large vision-language backbones plus robot action heads.",
                "owns": "VLM/VLA trunk, action representation, and multi-task conditioning.",
                "not_owns": "Does not own BC or RL; those are the objectives used to train or tune it.",
                "formula": "(image, text) -> action head -> action",
                "papers": "RT-2; OpenVLA; pi0 / OpenPI; Gemini Robotics; GR00T; Helix.",
                "links": ["BC", "RL fine-tuning", "Cross-embodiment data", "World model"],
                "unlocks": ["Tokenized VLA", "Flow-head VLA", "Diffusion-head VLA", "Dual-system VLA"],
            },
            {
                "name": "Orchestration Branch",
                "branch": "Deployment role",
                "plain": "Use language or vision-language models to choose skills, code, or constraints.",
                "owns": "High-level task decomposition and tool/skill selection.",
                "not_owns": "Does not own low-level policy learning unless it directly trains actions.",
                "formula": "plan = LLM(instruction, scene, tools)",
                "papers": "SayCan; Inner Monologue; Code-as-Policies; VoxPoser; ReKep.",
                "links": ["Skill library", "VLM affordance", "Classical solver"],
                "unlocks": ["LLM planner", "Spatial cost program", "Skill graph"],
            },
        ],
    },
    {
        "id": "fine",
        "label": "Stage 3 — Fine-Grained Leaves",
        "subtitle": "Finally inspect the leaves: equations, papers, and linked concepts.",
        "nodes": [
            {
                "name": "Flow Matching Policy",
                "branch": "Imitation Branch",
                "plain": "Move noise smoothly into expert actions with a learned velocity field.",
                "owns": "A BC action head with a flow-matching loss.",
                "not_owns": "Not the whole VLA; pi0 uses this as one layer in a larger stack.",
                "formula": "L = E ||v_theta(a^t,o,t) - (a^1-a^0)||^2",
                "papers": "Conditional Flow Matching; pi0; pi0.5; OpenPI.",
                "links": ["BC", "VLA trunk", "ODE sampling", "Action chunking"],
                "unlocks": ["pi0 / OpenPI", "Fast continuous action generation"],
            },
            {
                "name": "Diffusion Policy",
                "branch": "Imitation Branch",
                "plain": "Start from random action noise and denoise it into a robot action.",
                "owns": "A BC action head with a denoising objective.",
                "not_owns": "Not the same thing as trajectory diffusion or video diffusion.",
                "formula": "L = E ||epsilon - epsilon_theta(a^k,o,k)||^2",
                "papers": "Diffusion Policy; DP3; Equivariant Diffusion Policy; RDT-1B; Octo.",
                "links": ["BC", "Multimodal demos", "Action chunks", "VLA diffusion head"],
                "unlocks": ["Contact-rich manipulation", "Multimodal action distributions"],
            },
            {
                "name": "Tokenized VLA",
                "branch": "Foundation Policy Branch",
                "plain": "Turn robot actions into tokens and predict them like words.",
                "owns": "Discrete or compressed action representation inside a VLA.",
                "not_owns": "The token loss is usually BC; the VLA branch owns the architecture, not the objective.",
                "formula": "L = -sum log p(a_token_j | image, text, previous tokens)",
                "papers": "RT-1; RT-2; OpenVLA; RT-X; pi0-FAST; Gemini Robotics.",
                "links": ["Tokenization", "Language model infrastructure", "Cross-embodiment data"],
                "unlocks": ["Generalist manipulation policy", "Shared action vocabulary"],
            },
            {
                "name": "Decision Transformer",
                "branch": "Offline Decision Branch",
                "plain": "Treat a robot trajectory like a sentence and predict the next action.",
                "owns": "Sequence-model control over logged trajectories.",
                "not_owns": "Not a separate reward optimizer; it is usually supervised next-action prediction with return conditioning.",
                "formula": "p(a_t | return-to-go, states, past actions)",
                "papers": "Decision Transformer; Trajectory Transformer; Gato.",
                "links": ["Offline RL", "BC", "Goal conditioning", "Token sequence"],
                "unlocks": ["Return-conditioned control", "Long-context policy memory"],
            },
            {
                "name": "AC-WM",
                "branch": "World Model Branch",
                "plain": "Ask what would happen if the robot tried a candidate action sequence.",
                "owns": "Action-conditioned future prediction.",
                "not_owns": "Not a policy proposal model; actions are inputs, not outputs.",
                "formula": "p(future | current observation, future actions)",
                "papers": "Dreamer; TD-MPC; Veo-Robotics; Ctrl-World; PlayWorld; WorldGym.",
                "links": ["MPC", "RL inside model", "Policy evaluation", "Failures and play"],
                "unlocks": ["Counterfactual rollouts", "Model-based policy improvement"],
            },
            {
                "name": "WAM",
                "branch": "World Model Branch",
                "plain": "Imagine a successful future video and decode the actions to make it happen.",
                "owns": "Future-video proposal plus action decoding.",
                "not_owns": "Not a counterfactual simulator unless action conditioning is added.",
                "formula": "p(future video, actions | image, instruction)",
                "papers": "DreamZero; Large Video Planner; mimic-video; VideoPolicy; Unified Video Action Model.",
                "links": ["Video pretraining", "Best-of-N planning", "BC decoder", "VLA"],
                "unlocks": ["Instruction-conditioned action proposals", "Video-model policy prior"],
            },
            {
                "name": "VLM Spatial Program",
                "branch": "Orchestration Branch",
                "plain": "Use a VLM to mark where the robot should act, then let a solver move there.",
                "owns": "Open-vocabulary spatial grounding and constraint generation.",
                "not_owns": "Not an end-to-end motor policy.",
                "formula": "cost map = VLM(scene, instruction); trajectory = argmin cost",
                "papers": "VoxPoser; MOKA; PIVOT; ReKep; RoboPoint.",
                "links": ["Classical motion planning", "Keypoints", "Affordances", "LLM planning"],
                "unlocks": ["Zero-shot spatial manipulation", "Inspectable constraints"],
            },
        ],
    },
]

SKILL_MAP_NODE_LOOKUP = {
    node["name"]: (stage["label"], node)
    for stage in SKILL_MAP_STAGES
    for node in stage["nodes"]
}
SKILL_MAP_NODE_NAMES = list(SKILL_MAP_NODE_LOOKUP.keys())

OWNERSHIP_AUDIT = [
    ("BC", "Learning objective", "match expert actions", "action head, demos, VLA trunk"),
    ("RL", "Learning objective", "maximize reward", "simulator, reward model, policy architecture"),
    ("Offline RL", "Learning objective", "optimize fixed logs with support constraints", "sequence model, dataset, behavior policy"),
    ("IRL / GAIL", "Learning objective", "infer reward or discriminator from demonstrations", "RL optimizer, demos"),
    ("Diffusion Policy", "Learning objective", "BC-style denoising action loss", "diffusion representation, demos, VLA head"),
    ("Flow Matching Policy", "Learning objective", "BC-style flow action loss", "ODE sampler, VLA trunk, cross-embodiment data"),
    ("Decision Transformer", "Architecture / representation", "causal sequence model for trajectories", "offline RL use case, BC loss, return tokens"),
    ("VLA", "Architecture / representation", "vision-language-action trunk and action interface", "BC/RL objective, dataset, action tokenizer"),
    ("Tokenized action", "Architecture / representation", "discrete action representation", "BC objective, VLA runtime"),
    ("AC-WM", "Predictive model", "action-conditioned future prediction", "MPC, RL, evaluator, play/failure data"),
    ("WAM", "Predictive model", "future-video proposal plus action decoding", "BC decoder, VLA prior, best-of-N planner"),
    ("Occupancy / latent WM", "Architecture / representation", "planning-friendly state representation inside a world model", "AC-WM, MPC, navigation/manipulation"),
    ("Open X / DROID-style data", "Data regime", "cross-embodiment robot data source", "VLA, BC, offline RL"),
    ("Human video learning", "Data regime", "unlabeled or weakly labeled human demonstration source", "WAM, latent action pretraining, visual representation"),
    ("SayCan / Code-as-Policies", "Deployment role", "skill orchestration at runtime", "LLM/VLM, skill library, solver"),
    ("VoxPoser / ReKep", "Deployment role", "spatial constraint or cost generation", "VLM grounding, classical motion planning"),
]


def validate_landscape_structure() -> dict:
    """Return simple evidence that the skill map has single-owner nodes."""
    required = {"name", "branch", "plain", "owns", "not_owns", "formula", "papers", "links", "unlocks"}
    names = [node["name"] for stage in SKILL_MAP_STAGES for node in stage["nodes"]]
    survey_titles = {title for title, *_ in SURVEY_PAPERS}
    source_titles = {src["title"] for src in SURVEY_SOURCE_INDEX}
    missing_fields = [
        node.get("name", "<unnamed>")
        for stage in SKILL_MAP_STAGES
        for node in stage["nodes"]
        if required - set(node)
    ]
    return {
        "stages": len(SKILL_MAP_STAGES),
        "nodes": len(names),
        "unique_nodes": len(set(names)),
        "duplicate_nodes": sorted({name for name in names if names.count(name) > 1}),
        "missing_fields": missing_fields,
        "ownership_rows": len(OWNERSHIP_AUDIT),
        "survey_papers": len(SURVEY_PAPERS),
        "survey_sources": len(SURVEY_SOURCE_INDEX),
        "surveys_missing_sources": sorted(survey_titles - source_titles),
        "sources_missing_urls": sorted(src["title"] for src in SURVEY_SOURCE_INDEX if not src.get("url")),
    }


def render_skill_map(stage_id: str = "coarse") -> str:
    stage = next((s for s in SKILL_MAP_STAGES if s["id"] == stage_id), SKILL_MAP_STAGES[0])
    node_cards = []
    for i, node in enumerate(stage["nodes"]):
        links = "".join(f"<span>{html_lib.escape(link)}</span>" for link in node["links"])
        unlocks = "".join(f"<li>{html_lib.escape(item)}</li>" for item in node["unlocks"])
        node_cards.append(f"""
        <article class="skill-node node-{i % 6}">
          <div class="skill-node-top">
            <div class="skill-rank">Lv {i + 1}</div>
            <div class="skill-branch">{html_lib.escape(node['branch'])}</div>
          </div>
          <h3>{html_lib.escape(node['name'])}</h3>
          <p>{html_lib.escape(node['plain'])}</p>
          <div class="skill-formula">{html_lib.escape(node['formula'])}</div>
          <div class="skill-links">{links}</div>
          <details>
            <summary>Details</summary>
            <div class="skill-detail-grid">
              <div><strong>Owns:</strong><br/>{html_lib.escape(node['owns'])}</div>
              <div><strong>Does not own:</strong><br/>{html_lib.escape(node['not_owns'])}</div>
            </div>
            <div class="skill-papers"><strong>Papers:</strong> {html_lib.escape(node['papers'])}</div>
            <ul>{unlocks}</ul>
          </details>
        </article>
        """)

    return f"""
<style>
  .skill-page {{
    padding: 18px 22px 10px 22px;
    color: #0f172a;
    line-height: 1.5;
  }}
  .skill-hero {{
    background: linear-gradient(135deg,#111827 0%,#14532d 42%,#7c2d12 100%);
    color: white;
    border-radius: 12px;
    padding: 20px 24px;
    margin-bottom: 14px;
  }}
  .skill-hero h1 {{
    margin: 5px 0 7px 0;
    font-size: 31px;
    line-height: 1.12;
  }}
  .skill-hero p {{
    margin: 0;
    color: #dcfce7;
    max-width: 1040px;
    font-size: 15px;
  }}
  .skill-stage {{
    display: flex;
    justify-content: space-between;
    gap: 14px;
    align-items: flex-end;
    margin: 4px 0 12px 0;
  }}
  .skill-stage h2 {{
    margin: 0;
    font-size: 24px;
  }}
  .skill-stage p {{
    margin: 3px 0 0 0;
    color: #475569;
    font-size: 13px;
  }}
  .skill-map {{
    position: relative;
    display: grid;
    grid-template-columns: repeat(3, minmax(250px, 1fr));
    gap: 16px;
    padding: 18px;
    border: 1px solid #dbe4ef;
    border-radius: 12px;
    background:
      linear-gradient(90deg, rgba(148,163,184,.24) 1px, transparent 1px),
      linear-gradient(180deg, rgba(148,163,184,.24) 1px, transparent 1px),
      #f8fafc;
    background-size: 42px 42px;
  }}
  .skill-node {{
    position: relative;
    min-height: 270px;
    border-radius: 8px;
    border: 1px solid #cbd5e1;
    background: white;
    padding: 14px;
    box-shadow: 0 8px 20px rgba(15,23,42,.07);
  }}
  .skill-node::after {{
    content: "";
    position: absolute;
    left: 22px;
    right: -18px;
    top: 50%;
    border-top: 2px dashed rgba(100,116,139,.34);
    z-index: 0;
  }}
  .skill-node:nth-child(3n)::after,
  .skill-node:last-child::after {{
    display: none;
  }}
  .skill-node > * {{
    position: relative;
    z-index: 1;
  }}
  .skill-node-top {{
    display: flex;
    justify-content: space-between;
    align-items: center;
    gap: 8px;
    margin-bottom: 8px;
  }}
  .skill-rank {{
    background: #0f172a;
    color: white;
    border-radius: 999px;
    padding: 3px 9px;
    font-weight: 800;
    font-size: 11px;
  }}
  .skill-branch {{
    color: white;
    border-radius: 999px;
    padding: 3px 8px;
    font-weight: 700;
    font-size: 10.5px;
    background: var(--skill-color);
  }}
  .skill-node h3 {{
    margin: 0 0 6px 0;
    font-size: 19px;
  }}
  .skill-node p {{
    margin: 0 0 10px 0;
    color: #334155;
    font-size: 13.5px;
  }}
  .skill-formula {{
    border-left: 4px solid var(--skill-color);
    background: #f8fafc;
    border-radius: 6px;
    padding: 7px 9px;
    font-family: ui-monospace,SFMono-Regular,Menlo,monospace;
    color: #0f172a;
    font-size: 12px;
    overflow-wrap: anywhere;
  }}
  .skill-links {{
    display: flex;
    gap: 6px;
    flex-wrap: wrap;
    margin: 10px 0;
  }}
  .skill-links span {{
    border: 1px solid #e2e8f0;
    border-radius: 999px;
    padding: 3px 7px;
    background: #fff;
    color: #475569;
    font-size: 11px;
  }}
  .skill-node details {{
    border-top: 1px solid #e2e8f0;
    padding-top: 8px;
    font-size: 12.5px;
  }}
  .skill-node summary {{
    cursor: pointer;
    font-weight: 800;
    color: var(--skill-color);
  }}
  .skill-detail-grid {{
    display: grid;
    grid-template-columns: 1fr 1fr;
    gap: 8px;
    margin: 8px 0;
  }}
  .skill-detail-grid div,
  .skill-papers {{
    background: #f8fafc;
    border-radius: 6px;
    padding: 7px;
  }}
  .skill-node ul {{
    margin: 8px 0 0 0;
    padding-left: 18px;
  }}
  .node-0 {{ --skill-color: #2563eb; }}
  .node-1 {{ --skill-color: #16a34a; }}
  .node-2 {{ --skill-color: #7c3aed; }}
  .node-3 {{ --skill-color: #ca8a04; }}
  .node-4 {{ --skill-color: #dc2626; }}
  .node-5 {{ --skill-color: #0891b2; }}
  .clean-test {{
    display: grid;
    grid-template-columns: repeat(4, minmax(0, 1fr));
    gap: 10px;
    margin-top: 14px;
  }}
  .clean-test div {{
    border: 1px solid #e2e8f0;
    border-radius: 8px;
    background: white;
    padding: 11px;
    font-size: 12.5px;
  }}
  @media (max-width: 1000px) {{
    .skill-map {{ grid-template-columns: 1fr; }}
    .skill-node::after {{ display: none; }}
    .clean-test {{ grid-template-columns: 1fr; }}
  }}
</style>
<div class="skill-page">
  <div class="skill-hero">
    <div style="font-size:12px;letter-spacing:.08em;text-transform:uppercase;color:#bbf7d0;font-weight:800;">RPG-style skill map</div>
    <h1>Robot learning from coarse regions to fine-grained skills</h1>
    <p>The map keeps branches clean by giving every node one primary job.  Related ideas appear as links and unlocks, not duplicate children in multiple branches.</p>
  </div>
  <div class="skill-stage">
    <div>
      <h2>{html_lib.escape(stage['label'])}</h2>
      <p>{html_lib.escape(stage['subtitle'])}</p>
    </div>
    <div style="font-size:12px;color:#64748b;font-weight:800;">{len(stage['nodes'])} nodes</div>
  </div>
  <div class="skill-map">
    {''.join(node_cards)}
  </div>
  <div class="clean-test">
    <div><strong>Test 1: one question.</strong><br/>A branch is valid only if it answers one question: training signal, architecture, prediction, data, control, or runtime role.</div>
    <div><strong>Test 2: one owner.</strong><br/>A method gets one primary owner.  VLA is architecture; BC/RL are objectives; AC-WM/WAM are predictive models.</div>
    <div><strong>Test 3: links, not copies.</strong><br/>Overlaps become prerequisites, modifiers, or cross-links.  They are not repeated as separate children.</div>
    <div><strong>Test 4: equation check.</strong><br/>If two leaves have the same core equation, they belong together unless they differ by runtime role or representation.</div>
  </div>
</div>
"""


def render_skill_node(node_name: str) -> str:
    stage_label, node = SKILL_MAP_NODE_LOOKUP.get(node_name, next(iter(SKILL_MAP_NODE_LOOKUP.values())))
    links = "".join(f"<span>{html_lib.escape(link)}</span>" for link in node["links"])
    unlocks = "".join(f"<li>{html_lib.escape(item)}</li>" for item in node["unlocks"])
    return f"""
<div style="padding:14px 18px;color:#0f172a;line-height:1.55;">
  <div style="display:flex;align-items:center;gap:10px;flex-wrap:wrap;margin-bottom:8px;">
    <h2 style="margin:0;font-size:24px;">{html_lib.escape(node['name'])}</h2>
    <span style="background:#0f172a;color:white;border-radius:999px;padding:3px 9px;font-size:11px;font-weight:800;">{html_lib.escape(stage_label)}</span>
    <span style="background:#0e7490;color:white;border-radius:999px;padding:3px 9px;font-size:11px;font-weight:800;">{html_lib.escape(node['branch'])}</span>
  </div>
  <p style="margin:0 0 12px 0;color:#334155;font-size:15px;">{html_lib.escape(node['plain'])}</p>
  <div style="background:#fffbeb;border-left:4px solid #ca8a04;border-radius:6px;padding:10px 12px;font-family:ui-monospace,SFMono-Regular,Menlo,monospace;margin-bottom:10px;">{html_lib.escape(node['formula'])}</div>
  <div style="display:grid;grid-template-columns:1fr 1fr;gap:10px;margin-bottom:10px;">
    <div style="background:#f0fdf4;border:1px solid #86efac;border-radius:8px;padding:10px;"><strong>Branch owns</strong><br/>{html_lib.escape(node['owns'])}</div>
    <div style="background:#fef2f2;border:1px solid #fca5a5;border-radius:8px;padding:10px;"><strong>Branch does not own</strong><br/>{html_lib.escape(node['not_owns'])}</div>
  </div>
  <div style="background:#f8fafc;border-radius:8px;padding:10px;margin-bottom:10px;"><strong>Related papers:</strong> {html_lib.escape(node['papers'])}</div>
  <div style="margin-bottom:10px;"><strong>Linked words:</strong> <span style="display:inline-flex;gap:6px;flex-wrap:wrap;">{links}</span></div>
  <div style="background:#eff6ff;border:1px solid #bfdbfe;border-radius:8px;padding:10px;">
    <strong>Unlocks</strong>
    <ul style="margin:6px 0 0 0;padding-left:18px;">{unlocks}</ul>
  </div>
</div>
"""


def render_ownership_audit() -> str:
    result = validate_landscape_structure()
    rows = "".join(
        f"<tr><td><strong>{html_lib.escape(item)}</strong></td>"
        f"<td>{html_lib.escape(owner)}</td>"
        f"<td>{html_lib.escape(test)}</td>"
        f"<td>{html_lib.escape(links)}</td></tr>"
        for item, owner, test, links in OWNERSHIP_AUDIT
    )
    duplicate_text = ", ".join(result["duplicate_nodes"]) if result["duplicate_nodes"] else "None"
    missing_text = ", ".join(result["missing_fields"]) if result["missing_fields"] else "None"
    return f"""
<div style="padding:18px 22px 8px 22px;color:#0f172a;line-height:1.55;">
  <div style="background:linear-gradient(135deg,#111827 0%,#1d4ed8 48%,#047857 100%);color:white;border-radius:12px;padding:20px 24px;margin-bottom:16px;">
    <div style="font-size:12px;letter-spacing:.08em;text-transform:uppercase;color:#bfdbfe;font-weight:800;">Branch ownership audit</div>
    <h1 style="margin:6px 0 8px 0;font-size:31px;line-height:1.12;">One primary owner, many typed links</h1>
    <p style="margin:0;color:#dbeafe;font-size:15px;max-width:980px;">This table is the validation layer behind the map.  Ambiguous terms get exactly one primary owner; related ideas are recorded as links rather than duplicated branches.</p>
  </div>

  <div style="display:grid;grid-template-columns:repeat(4,minmax(0,1fr));gap:10px;margin-bottom:16px;">
    <div style="background:#f8fafc;border:1px solid #e2e8f0;border-radius:8px;padding:11px;"><strong>Stages</strong><br/>{result['stages']}</div>
    <div style="background:#f8fafc;border:1px solid #e2e8f0;border-radius:8px;padding:11px;"><strong>Skill nodes</strong><br/>{result['nodes']} total / {result['unique_nodes']} unique</div>
    <div style="background:#f8fafc;border:1px solid #e2e8f0;border-radius:8px;padding:11px;"><strong>Duplicates</strong><br/>{html_lib.escape(duplicate_text)}</div>
    <div style="background:#f8fafc;border:1px solid #e2e8f0;border-radius:8px;padding:11px;"><strong>Missing fields</strong><br/>{html_lib.escape(missing_text)}</div>
  </div>

  <div style="overflow:auto;border:1px solid #e2e8f0;border-radius:10px;background:white;">
    <table style="width:100%;min-width:920px;border-collapse:collapse;font-size:13px;">
      <tr style="background:#f8fafc;">
        <th style="text-align:left;padding:10px;">Ambiguous item</th>
        <th style="text-align:left;padding:10px;">Primary owner</th>
        <th style="text-align:left;padding:10px;">Ownership test</th>
        <th style="text-align:left;padding:10px;">Allowed links, not duplicate children</th>
      </tr>
      {rows}
    </table>
  </div>
</div>
"""


def render_survey_source_index() -> str:
    rows = "".join(
        f"<tr>"
        f"<td><strong>{html_lib.escape(src['title'])}</strong><br/><span style='color:#64748b;'>{html_lib.escape(src['authors'])}, {src['year']}</span></td>"
        f"<td><a href='{html_lib.escape(src['url'])}' target='_blank' rel='noopener noreferrer'>arXiv:{html_lib.escape(src['arxiv'])}</a></td>"
        f"<td>{html_lib.escape(src['validates'])}</td>"
        f"<td>{html_lib.escape(src['landscape_role'])}</td>"
        f"</tr>"
        for src in SURVEY_SOURCE_INDEX
    )
    return f"""
<div style="padding:18px 22px 8px 22px;color:#0f172a;line-height:1.55;">
  <div style="background:linear-gradient(135deg,#0f172a 0%,#3730a3 50%,#0f766e 100%);color:white;border-radius:12px;padding:20px 24px;margin-bottom:16px;">
    <div style="font-size:12px;letter-spacing:.08em;text-transform:uppercase;color:#c7d2fe;font-weight:800;">Survey source index</div>
    <h1 style="margin:6px 0 8px 0;font-size:31px;line-height:1.12;">Which surveys support which parts of the map?</h1>
    <p style="margin:0;color:#e0e7ff;font-size:15px;max-width:980px;">This tab makes the landscape auditable: every major survey is tied to the branch or validation claim it supports.</p>
  </div>

  <div style="overflow:auto;border:1px solid #e2e8f0;border-radius:10px;background:white;">
    <table style="width:100%;min-width:1100px;border-collapse:collapse;font-size:13px;">
      <tr style="background:#f8fafc;">
        <th style="text-align:left;padding:10px;">Survey</th>
        <th style="text-align:left;padding:10px;">Source</th>
        <th style="text-align:left;padding:10px;">What it validates</th>
        <th style="text-align:left;padding:10px;">Role in this landscape</th>
      </tr>
      {rows}
    </table>
  </div>

  <div style="background:#ecfeff;border:1px solid #67e8f9;border-radius:10px;padding:14px 16px;margin-top:16px;">
    <strong>Audit rule:</strong> a survey is not used as a flat category label.  It is used as evidence for one or more map layers: objective, architecture, predictive model, data regime, or runtime role.
  </div>
</div>
"""


VALIDATION_RUBRIC_HTML = r"""
<div style="padding:18px 22px 8px 22px;color:#0f172a;line-height:1.58;">
  <div style="background:linear-gradient(135deg,#0f172a 0%,#7c2d12 52%,#166534 100%);color:white;border-radius:12px;padding:20px 24px;margin-bottom:18px;">
    <div style="font-size:12px;letter-spacing:.08em;text-transform:uppercase;color:#fed7aa;font-weight:800;">Validation method</div>
    <h1 style="margin:6px 0 8px 0;font-size:31px;line-height:1.12;">How to prove the landscape is clean</h1>
    <p style="margin:0;color:#ffedd5;font-size:15px;max-width:980px;">A clean map does not mean every paper has only one idea.  It means each branch owns one kind of claim, while overlaps are represented as typed links.</p>
  </div>

  <div style="display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:12px;margin-bottom:18px;">
    <div style="border:1px solid #bfdbfe;background:#eff6ff;border-radius:10px;padding:14px;"><strong>Branch question test</strong><br/>Every branch must answer one sentence: what closes the loop, what trains it, what predicts the future, what represents actions/state/task, what data it uses, or what role it plays.</div>
    <div style="border:1px solid #bbf7d0;background:#f0fdf4;border-radius:10px;padding:14px;"><strong>Primary-owner test</strong><br/>Each leaf gets one primary branch.  For example, VLA is primarily architecture/data scale; BC and RL are objectives; WAM and AC-WM are world-model roles.</div>
    <div style="border:1px solid #fde68a;background:#fffbeb;border-radius:10px;padding:14px;"><strong>Equation test</strong><br/>If two methods optimize the same objective but use different wrappers, they belong under the same objective branch with different representation links.</div>
    <div style="border:1px solid #fecaca;background:#fef2f2;border-radius:10px;padding:14px;"><strong>Cross-link test</strong><br/>A paper that combines ideas should be represented as a stack, not copied into multiple branches.  Example: pi0 = VLA trunk + flow-matching BC head + cross-embodiment demos.</div>
  </div>

  <h2 style="font-size:24px;margin:16px 0 10px 0;">Canonical ownership rules</h2>
  <table style="width:100%;border-collapse:collapse;background:white;border:1px solid #e2e8f0;border-radius:10px;overflow:hidden;font-size:13px;">
    <tr style="background:#f8fafc;"><th style="text-align:left;padding:10px;">If the paper's main claim is...</th><th style="text-align:left;padding:10px;">Put it under...</th><th style="text-align:left;padding:10px;">Show these as links</th></tr>
    <tr><td style="padding:10px;border-top:1px solid #e2e8f0;">A new loss, reward, or optimization procedure</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Learning objective</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Architecture, data, runtime role</td></tr>
    <tr><td style="padding:10px;border-top:1px solid #e2e8f0;">A new VLA/VLM trunk, tokenizer, action head, or latent representation</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Architecture / representation</td><td style="padding:10px;border-top:1px solid #e2e8f0;">BC/RL objective, dataset, embodiment</td></tr>
    <tr><td style="padding:10px;border-top:1px solid #e2e8f0;">A model that predicts futures</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Predictive model</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Planner, policy, evaluator, video data</td></tr>
    <tr><td style="padding:10px;border-top:1px solid #e2e8f0;">A new dataset, teleop system, simulator, benchmark, or scaling recipe</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Data regime / evaluation</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Model trained on it, objective used</td></tr>
    <tr><td style="padding:10px;border-top:1px solid #e2e8f0;">A way to compose tools, skills, or constraints at runtime</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Deployment role</td><td style="padding:10px;border-top:1px solid #e2e8f0;">Skill library, solver, VLM, safety layer</td></tr>
  </table>

  <div style="background:#ecfeff;border:1px solid #67e8f9;border-radius:10px;padding:14px 16px;margin-top:18px;">
    <strong>Practical validation workflow:</strong> take each new paper, write its six-layer stack, pick the one layer that contains the paper's main contribution, then add the other layers as links.  If two branches both seem primary, the branch definitions are too broad and should be split by question.
  </div>
</div>
"""

RELATIONSHIP_GUIDE_HTML = r"""
<div style="padding:18px 22px 8px 22px;color:#0f172a;line-height:1.58;">
  <div style="background:linear-gradient(135deg,#111827 0%,#6d28d9 52%,#be123c 100%);color:white;border-radius:12px;padding:20px 24px;margin-bottom:18px;">
    <div style="font-size:12px;letter-spacing:.08em;text-transform:uppercase;color:#ddd6fe;font-weight:800;">How to read the branches</div>
    <h1 style="margin:6px 0 8px 0;font-size:31px;line-height:1.12;">Paradigms are not a flat list</h1>
    <p style="margin:0;color:#f5d0fe;font-size:15px;max-width:980px;">A branch can be an objective, an architecture, a data regime, a wrapper, or a representation.  Some branches are containers for others.</p>
  </div>

  <div style="display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-bottom:18px;">
    <div style="border:1px solid #bfdbfe;background:#eff6ff;border-radius:10px;padding:14px;"><strong>Objective branch</strong><br/>Defines the loss or optimization target. Examples: BC, PPO/SAC, Offline RL, IRL.</div>
    <div style="border:1px solid #fde68a;background:#fffbeb;border-radius:10px;padding:14px;"><strong>Architecture branch</strong><br/>Defines how variables are represented and decoded. Examples: Decision Transformer, diffusion trajectory model, VLA trunk.</div>
    <div style="border:1px solid #bbf7d0;background:#f0fdf4;border-radius:10px;padding:14px;"><strong>Container branch</strong><br/>Can hold other branches. Examples: world model can contain RL, MPC, BC proposals, occupancy prediction, or sequence decoding.</div>
  </div>

  <h2 style="font-size:24px;margin:16px 0 10px 0;">Important overlaps</h2>
  <table style="width:100%;border-collapse:collapse;background:white;border:1px solid #e2e8f0;border-radius:10px;overflow:hidden;">
    <tr style="background:#f8fafc;"><th style="text-align:left;padding:10px;border-bottom:1px solid #e2e8f0;">Item</th><th style="text-align:left;padding:10px;border-bottom:1px solid #e2e8f0;">Primary home</th><th style="text-align:left;padding:10px;border-bottom:1px solid #e2e8f0;">Also overlaps with</th><th style="text-align:left;padding:10px;border-bottom:1px solid #e2e8f0;">Why</th></tr>
    <tr><td style="padding:10px;border-bottom:1px solid #e2e8f0;"><strong>Decision Transformer</strong></td><td style="padding:10px;border-bottom:1px solid #e2e8f0;">Sequence</td><td style="padding:10px;border-bottom:1px solid #e2e8f0;">Offline RL, BC, Goal-conditioned control</td><td style="padding:10px;border-bottom:1px solid #e2e8f0;">Causal Transformer architecture; the objective is next-action prediction, steered by return or goal tokens.</td></tr>
    <tr><td style="padding:10px;border-bottom:1px solid #e2e8f0;"><strong>VLA</strong></td><td style="padding:10px;border-bottom:1px solid #e2e8f0;">BC relation</td><td style="padding:10px;border-bottom:1px solid #e2e8f0;">Flow, Diffusion, Tokenized BC, sometimes WAM</td><td style="padding:10px;border-bottom:1px solid #e2e8f0;">VLA says what trunk/data scale you use; the action head supplies the actual training loss.</td></tr>
    <tr><td style="padding:10px;border-bottom:1px solid #e2e8f0;"><strong>World Action Model</strong></td><td style="padding:10px;border-bottom:1px solid #e2e8f0;">World Models</td><td style="padding:10px;border-bottom:1px solid #e2e8f0;">BC, VLA, best-of-N planning</td><td style="padding:10px;border-bottom:1px solid #e2e8f0;">It generates actions, but through a future-video model; it behaves like a policy proposal generator.</td></tr>
    <tr><td style="padding:10px;border-bottom:1px solid #e2e8f0;"><strong>Action-conditioned WM</strong></td><td style="padding:10px;border-bottom:1px solid #e2e8f0;">World Models</td><td style="padding:10px;border-bottom:1px solid #e2e8f0;">MPC, RL, Offline RL, policy evaluation</td><td style="padding:10px;border-bottom:1px solid #e2e8f0;">It is a simulator: feed candidate actions, predict consequences, then plan or train inside it.</td></tr>
    <tr><td style="padding:10px;"><strong>Occupancy / latent state WM</strong></td><td style="padding:10px;">World Models</td><td style="padding:10px;">AC-WM, MPC, navigation, manipulation</td><td style="padding:10px;">Occupancy is a representation inside the world-model branch, not a separate objective.</td></tr>
  </table>

  <h2 style="font-size:24px;margin:18px 0 10px 0;">Decision Transformer, concretely</h2>
  <div style="background:#fff7ed;border-left:4px solid #ea580c;border-radius:8px;padding:14px 16px;">
    In long-range decision transformers, the architecture is a causal sequence model over trajectory tokens.  A typical token stream is <code>return-to-go, state, previous action, return-to-go, state, previous action...</code>.  The model predicts the next action token.  If you replace return-to-go with a goal, it becomes goal-conditioned sequence control.  If you train only on expert demonstrations without reward tokens, it collapses toward BC.  So DT is a receptacle: it contains BC-like supervised learning, offline-RL-style return conditioning, and long-horizon memory in one architecture.
  </div>
</div>
"""

WORLD_MODELS_101_HTML = r"""
<div style="padding:18px 22px 8px 22px; color:#0f172a; line-height:1.58;">
  <div style="background:linear-gradient(135deg,#0f172a 0%,#155e75 55%,#7c2d12 100%);color:white;border-radius:12px;padding:20px 24px;margin-bottom:18px;">
    <div style="font-size:12px;letter-spacing:.08em;text-transform:uppercase;color:#bae6fd;font-weight:800;">Beginner guide</div>
    <h1 style="margin:6px 0 8px 0;font-size:31px;line-height:1.12;">World models for robotics: WAMs vs action-conditioned world models</h1>
    <p style="margin:0;color:#dbeafe;font-size:15px;max-width:980px;">Based on Anirudha Majumdar's March 17, 2026 discussion of whether robotics world models should condition on future actions.</p>
  </div>

  <div style="display:grid;grid-template-columns:1fr 1fr;gap:16px;margin-bottom:18px;">
    <div style="border:1px solid #bae6fd;background:#f0f9ff;border-radius:10px;padding:16px;">
      <h2 style="margin:0 0 8px 0;font-size:22px;">What is a world model?</h2>
      <p style="margin:0;">A world model is a learned simulator.  Instead of immediately commanding the robot, it predicts what the world would look like next.  The robot can then use those predictions to choose an action, train a policy, or evaluate whether a policy is likely to work.</p>
    </div>
    <div style="border:1px solid #fed7aa;background:#fff7ed;border-radius:10px;padding:16px;">
      <h2 style="margin:0 0 8px 0;font-size:22px;">The central design choice</h2>
      <p style="margin:0;">Do actions go into the model as a condition, or come out of the model as part of the generated plan?  That single choice changes what data the model can use and what kind of planning it supports.</p>
    </div>
  </div>

  <div style="display:grid;grid-template-columns:1fr 1fr;gap:16px;margin-bottom:18px;">
    <div style="border:1px solid #c7d2fe;border-radius:10px;padding:16px;background:white;">
      <div style="font-size:12px;color:#4338ca;font-weight:800;text-transform:uppercase;letter-spacing:.06em;">Actions out</div>
      <h2 style="margin:4px 0 8px 0;font-size:24px;">World Action Model (WAM)</h2>
      <div style="font-family:ui-monospace,monospace;background:#eef2ff;border-radius:6px;padding:8px 10px;margin-bottom:10px;">[current image + text] → [future video + actions]</div>
      <p style="margin:0 0 10px 0;">The model sees the current scene and an instruction such as <em>place the marker in the basket</em>.  It imagines a successful video and also decodes the actions needed to produce that video.</p>
      <p style="margin:0;color:#475569;"><strong>Examples:</strong> DreamZero, mimic-video, VideoPolicy, Unified Video Action Model, Large Video Planner, Cosmos Policy-style best-of-N planning.</p>
    </div>
    <div style="border:1px solid #bbf7d0;border-radius:10px;padding:16px;background:white;">
      <div style="font-size:12px;color:#15803d;font-weight:800;text-transform:uppercase;letter-spacing:.06em;">Actions in</div>
      <h2 style="margin:4px 0 8px 0;font-size:24px;">Action-Conditioned World Model (AC-WM)</h2>
      <div style="font-family:ui-monospace,monospace;background:#f0fdf4;border-radius:6px;padding:8px 10px;margin-bottom:10px;">[current image + future actions] → [future video]</div>
      <p style="margin:0 0 10px 0;">The model receives a candidate action sequence, for example end-effector poses for the next second, and predicts what would happen if the robot executed those actions.</p>
      <p style="margin:0;color:#475569;"><strong>Examples:</strong> Dreamer, Veo-Robotics, Ctrl-World, DreamDojo, PlayWorld, World-Gymnast, WorldGym, V-JEPA 2-style latent prediction.</p>
    </div>
  </div>

  <h2 style="font-size:24px;margin:20px 0 10px 0;">Why WAMs are attractive</h2>
  <div style="display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:12px;margin-bottom:16px;">
    <div style="background:#f8fafc;border-left:4px solid #4f46e5;border-radius:8px;padding:12px 14px;"><strong>Preserves video pretraining.</strong><br/>A WAM still uses image + text inputs, so adapting a pretrained video model to robotics is a smaller distribution shift.</div>
    <div style="background:#f8fafc;border-left:4px solid #4f46e5;border-radius:8px;padding:12px 14px;"><strong>Easier target.</strong><br/>It mostly learns what successful task execution looks like, rather than every possible consequence of arbitrary actions.</div>
    <div style="background:#f8fafc;border-left:4px solid #4f46e5;border-radius:8px;padding:12px 14px;"><strong>Good action proposals.</strong><br/>Generate many possible successful futures, score them with a reward model, and pick the best one.</div>
    <div style="background:#f8fafc;border-left:4px solid #4f46e5;border-radius:8px;padding:12px 14px;"><strong>Cross-embodiment friendliness.</strong><br/>Most of the video model can be shared across robots; only the action decoder is embodiment-specific.</div>
  </div>

  <h2 style="font-size:24px;margin:20px 0 10px 0;">Why action conditioning is powerful</h2>
  <div style="display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:12px;margin-bottom:16px;">
    <div style="background:#f8fafc;border-left:4px solid #16a34a;border-radius:8px;padding:12px 14px;"><strong>Uses more data.</strong><br/>Successes, failures, autonomous play, and rollouts from any policy can all train the model.</div>
    <div style="background:#f8fafc;border-left:4px solid #16a34a;border-radius:8px;padding:12px 14px;"><strong>Counterfactual reasoning.</strong><br/>Ask: what if the robot moved left instead of right?  WAMs do not naturally answer that.</div>
    <div style="background:#f8fafc;border-left:4px solid #16a34a;border-radius:8px;padding:12px 14px;"><strong>RL inside the model.</strong><br/>Train a policy in imagined rollouts before spending real robot time.</div>
    <div style="background:#f8fafc;border-left:4px solid #16a34a;border-radius:8px;padding:12px 14px;"><strong>Policy evaluation.</strong><br/>Roll out a candidate policy in the model and estimate whether it will succeed.</div>
  </div>

  <h2 style="font-size:24px;margin:20px 0 10px 0;">Concrete examples</h2>
  <div style="display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-bottom:16px;">
    <div style="border:1px solid #e2e8f0;border-radius:8px;padding:12px;background:white;"><strong>Pick-and-place.</strong><br/>WAM: generate a plausible video of the cup going into the bin and decode actions. AC-WM: test ten grasp trajectories and predict which one spills or succeeds.</div>
    <div style="border:1px solid #e2e8f0;border-radius:8px;padding:12px;background:white;"><strong>Autonomous play.</strong><br/>WAM: needs a task label after the fact. AC-WM: directly learns from the action sequence and resulting video, even if the attempt failed.</div>
    <div style="border:1px solid #e2e8f0;border-radius:8px;padding:12px;background:white;"><strong>Policy debugging.</strong><br/>WAM: proposes a successful-looking behavior. AC-WM: runs the actual policy in imagination and reveals where it drifts or collides.</div>
  </div>

  <div style="background:#ecfeff;border:1px solid #67e8f9;border-radius:10px;padding:14px 16px;margin-top:18px;">
    <strong>Likely long-term direction:</strong> flexible conditioning.  A single model can accept text when we want high-level proposals, actions when we need counterfactual rollouts, or both when we want task intent plus fine-grained control.  This keeps WAM-style proposal quality while preserving AC-WM-style data scaling, planning, RL, and policy evaluation.
  </div>
</div>
"""


def build_app() -> gr.Blocks:
    n_families = len(TREE_DATA)
    n_leaves = sum(len(t[2]) for t in TREE_DATA)
    n_papers = len(PAPERS)

    with gr.Blocks(title="Robot Learning Paradigms") as demo:
        gr.HTML(
            f"""<div id='header'>
            <h1>🤖 Robot Learning Paradigms</h1>
            <p>A <strong>layered ontology</strong> of robot learning — control substrate, objective, world model, architecture, data regime, and deployment role.  The tree has {n_leaves} representative leaves across {n_families} projected families, backed by {n_papers}+ papers.</p>
            </div>"""
        )

        legend_html = '<div class="fam-legend">' + "".join(
            f"<span style='background:{c};'>{name}</span>" for name, (c, _) in FAMILY.items()
        ) + "</div>"
        gr.HTML(legend_html)

        with gr.Tabs():

            # ============== TAB 0: Layered Ontology ==============
            with gr.Tab("Layered Ontology"):
                gr.HTML(render_layered_ontology())

            # ============== TAB 1: Landscape Synthesis ==============
            with gr.Tab("Landscape Synthesis"):
                gr.HTML(LANDSCAPE_SYNTHESIS_HTML)

            # ============== TAB 2: Skill Map ==============
            with gr.Tab("Skill Map"):
                with gr.Row():
                    skill_stage = gr.Radio(
                        choices=[s["label"] for s in SKILL_MAP_STAGES],
                        value=SKILL_MAP_STAGES[0]["label"],
                        label="Stage",
                        interactive=True,
                    )
                    skill_node = gr.Dropdown(
                        choices=SKILL_MAP_NODE_NAMES,
                        value=SKILL_MAP_NODE_NAMES[0],
                        label="Node detail",
                        interactive=True,
                    )
                skill_map_html = gr.HTML(render_skill_map("coarse"))
                skill_node_html = gr.HTML(render_skill_node(SKILL_MAP_NODE_NAMES[0]))

                def update_skill_stage(stage_label):
                    stage = next((s for s in SKILL_MAP_STAGES if s["label"] == stage_label), SKILL_MAP_STAGES[0])
                    first_node = stage["nodes"][0]["name"]
                    return (
                        render_skill_map(stage["id"]),
                        gr.update(choices=[n["name"] for n in stage["nodes"]], value=first_node),
                        render_skill_node(first_node),
                    )

                skill_stage.change(
                    update_skill_stage,
                    inputs=skill_stage,
                    outputs=[skill_map_html, skill_node, skill_node_html],
                )
                skill_node.change(render_skill_node, inputs=skill_node, outputs=skill_node_html)

            # ============== TAB 3: Ownership Audit ==============
            with gr.Tab("Ownership Audit"):
                gr.HTML(render_ownership_audit())

            # ============== TAB 4: Validation Rubric ==============
            with gr.Tab("Validation Rubric"):
                gr.HTML(VALIDATION_RUBRIC_HTML)

            # ============== TAB 5: Family Projection ==============
            with gr.Tab("Family Projection"):
                family_layout_dd = gr.Dropdown(
                    choices=FAMILY_LAYOUT_CHOICES,
                    value=FAMILY_LAYOUT_CHOICES[-1],
                    label="Choose a layout prototype",
                    interactive=True,
                )
                family_layout_html = gr.HTML(render_family_layout(FAMILY_LAYOUT_CHOICES[-1]))
                family_layout_dd.change(
                    render_family_layout,
                    inputs=family_layout_dd,
                    outputs=family_layout_html,
                )

            # ============== TAB 6: Connection Trees ==============
            with gr.Tab("Connection Trees"):
                gr.HTML(render_connection_trees())

            # ============== TAB 7: Paradigm Explorer ==============
            with gr.Tab("Paradigm Explorer"):
                with gr.Row():
                    with gr.Column(scale=1, min_width=240):
                        gr.Markdown("**Pick a paradigm**")
                        family_filter_pe = gr.Dropdown(
                            choices=["All"] + list(FAMILY.keys()),
                            value="All", label="Filter by family", interactive=True,
                        )
                        paradigm_dd = gr.Dropdown(
                            choices=PARADIGM_NAMES,
                            value=PARADIGM_NAMES[0],
                            label="Paradigm",
                            interactive=True,
                        )
                    with gr.Column(scale=3):
                        paradigm_html = gr.HTML(
                            render_paradigm(PARADIGM_NAMES[0]),
                            elem_id="paradigm-card",
                        )

                def update_paradigm_dd(fam):
                    if fam == "All":
                        names = PARADIGM_NAMES
                    else:
                        names = [p["name"] for p in PARADIGMS if p["family"] == fam]
                    if not names:
                        return gr.update(choices=[], value=None), "Nothing for that family."
                    return gr.update(choices=names, value=names[0]), render_paradigm(names[0])

                family_filter_pe.change(
                    update_paradigm_dd, inputs=family_filter_pe, outputs=[paradigm_dd, paradigm_html]
                )
                paradigm_dd.change(render_paradigm, inputs=paradigm_dd, outputs=paradigm_html)

            # ============== TAB 8: Comparison ==============
            with gr.Tab("Side-by-Side"):
                gr.Markdown(
                    "### Compare two paradigms at a glance\n"
                    "Pick any two paradigms; their core equations and intuition land next to each other.\n"
                    "The default pair contrasts the two BC heads the field has converged on."
                )
                default_a, default_b = "Flow Matching Policy", "Diffusion Policy"
                with gr.Row():
                    pick_a = gr.Dropdown(
                        choices=PARADIGM_NAMES, value=default_a,
                        label="Paradigm A", interactive=True,
                    )
                    pick_b = gr.Dropdown(
                        choices=PARADIGM_NAMES, value=default_b,
                        label="Paradigm B", interactive=True,
                    )
                cmp_html = gr.HTML(render_compare(default_a, default_b))
                pick_a.change(render_compare, inputs=[pick_a, pick_b], outputs=cmp_html)
                pick_b.change(render_compare, inputs=[pick_a, pick_b], outputs=cmp_html)

            # ============== TAB 9: Pick Your Paradigm ==============
            with gr.Tab("Pick Your Paradigm"):
                gr.Markdown(
                    "### What data + setup do you have?\n"
                    "Three questions → a short list of paradigms worth trying first."
                )
                with gr.Row():
                    q_data = gr.Radio(
                        ["Expert demos", "Reward only", "Both", "Unlabeled video", "Language + scene only"],
                        value="Expert demos", label="What data do you have?",
                    )
                    q_env = gr.Radio(
                        ["Real robot only", "Simulator available", "Logged data only"],
                        value="Simulator available", label="What about the environment?",
                    )
                    q_scale = gr.Radio(
                        ["Small / single task", "Large / multi-task"],
                        value="Small / single task", label="Scale?",
                    )
                rec_html = gr.HTML(recommend("Expert demos", "Simulator available", "Small / single task"))
                for ctl in (q_data, q_env, q_scale):
                    ctl.change(recommend, inputs=[q_data, q_env, q_scale], outputs=rec_html)

            # ============== TAB 10: Paper Atlas ==============
            with gr.Tab(f"Paper Atlas ({n_papers}+)"):
                gr.Markdown(
                    "### The papers behind the paradigms\n"
                    "Filter by tag, year, or free-text.  Tags include paradigm leaves "
                    "**plus** relation tags (`DAgger (BC relation)`, `Sim2Real (RL relation)`, "
                    "`Visual SSL (BC relation)`, `VLA — Flow / Diffusion / Tokenized head`, "
                    "`LAPA (Video-WM relation)`) and survey tags (`Survey — VLA`, "
                    "`Survey — World Models`, `Survey — Foundation Models`) so you can pull "
                    "both primary papers and landscape reviews."
                )
                with gr.Row():
                    fam_filter = gr.Dropdown(choices=FAMILY_LABELS, value="All", label="Paradigm / relation tag")
                    year_min = gr.Slider(1989, MAX_PAPER_YEAR, value=1989, step=1, label="Min year")
                    text_q = gr.Textbox(label="Search title / author")
                df_out = gr.Dataframe(
                    value=get_atlas_df("All", 1989, ""),
                    headers=["Year", "Title", "Authors", "Paradigm"],
                    interactive=False,
                    wrap=True,
                )
                for ctl in (fam_filter, year_min, text_q):
                    ctl.change(get_atlas_df, inputs=[fam_filter, year_min, text_q], outputs=df_out)

            # ============== TAB 11: Survey Sources ==============
            with gr.Tab("Survey Sources"):
                gr.HTML(render_survey_source_index())

            # ============== TAB 12: Relationship Map ==============
            with gr.Tab("Relationship Map"):
                gr.HTML(RELATIONSHIP_GUIDE_HTML)

            # ============== TAB 13: World Models 101 ==============
            with gr.Tab("World Models 101"):
                gr.HTML(WORLD_MODELS_101_HTML)

            # ============== TAB 14: The Big Picture ==============
            with gr.Tab("The Big Picture"):
                gr.Markdown(
                    """
### The clean view: six layers, not one list

| Layer | Classical control object | Modern robot-learning object |
|---|---|---|
| **Control substrate** | PID, impedance, LQR, MPC, hybrid automata | neural policy, action chunker, skill graph, trajectory optimizer, future-video proposal generator |
| **Learning objective** | tracking loss, hand-designed cost, Lyapunov/robust objective | BC, RL, offline RL, IRL, preference learning, predictive self-supervision |
| **Predictive model** | plant model, system identification, observer | latent dynamics, AC-WM, WAM, occupancy/contact/latent state model |
| **Architecture** | state coordinates, linearization, basis functions | VLA trunk, diffusion/flow/tokenized head, Decision Transformer, video diffusion, JEPA latent |
| **Data regime** | calibrated experiments, known model, system-ID rollouts | teleop demos, fixed logs, sim, play, failures, internet video, cross-embodiment data |
| **Deployment role** | controller, estimator, planner, verifier | policy, planner, simulator, evaluator, data generator, LLM/VLM orchestrator |

### Why the old flat taxonomy breaks

- **PPO, SAC, BC, IQL** are objectives / optimization procedures.
- **VLA** is an architecture + pretraining + data recipe, usually paired with BC-style action heads.
- **Decision Transformer** is a sequence architecture; it can instantiate BC, offline RL, or goal-conditioned control depending on conditioning tokens.
- **AC-WM and WAM** both belong under world models, but play different roles: simulator vs proposal generator.
- **Occupancy / latent state** is a representation inside world modeling, not a separate training objective.
- **Domain randomization, sim-to-real, representation pretraining, cross-embodiment transfer** are cross-cutting modifiers.

### Evolution from control theory to current robot learning

1. **Classical feedback control →** PID, computed torque, impedance, LQR: hand-designed feedback laws and stability objectives.
2. **Optimal control / MPC →** known or identified dynamics plus online optimization over a horizon.
3. **Learning from demonstration →** replace hand-designed controllers with supervised policies from expert actions.
4. **Deep RL →** learn policy/value functions from reward, often in simulation with sim-to-real transfer.
5. **Generative policies →** ACT, diffusion, flow, and tokenized action heads model multimodal demonstrations better than MSE regression.
6. **Offline sequence decision models →** fixed logs become sequence data; DT, Trajectory Transformer, Diffuser blur BC and offline RL.
7. **Foundation-model robotics →** VLM/VLA trunks add language and semantic priors, but still need an action head and objective.
8. **World-model robotics →** learned simulators/future generators support planning, RL, policy evaluation, and proposal generation.
9. **Hybrid systems →** VLA policy + world model + MPC/RL + LLM planner + safety controller.

### Practical classification rule

For any new method, classify it by answering:

1. What closes the loop at runtime?
2. What objective trains it?
3. Does it learn a future model? If yes, are actions inputs or outputs?
4. What architecture/representation does it use?
5. What data regime enables it?
6. What deployment role does it serve?

That is the cleanest way to cover current robot learning without confusing objectives, architectures, representations, and recipes.
                    """
                )

        gr.HTML(
            f"""<div style='text-align:center;color:#94a3b8;padding:14px 0 4px 0;font-size:12px;'>
            Layered ontology of robot-learning paradigms · {n_leaves} paradigms · {n_papers}+ papers · 1989 → {MAX_PAPER_YEAR}
            </div>"""
        )

    return demo


if __name__ == "__main__":
    build_app().launch(
        server_name="127.0.0.1",
        server_port=7864,
        inbrowser=False,
        theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="violet"),
        css=CSS,
        head=HEAD_HTML,
    )