"""Generate a separate 'Evolution Timeline' page for robot learning. A time-ordered lineage tree: a central time spine (the linear trunk), with each method/milestone placed at its year and arrows showing what it was developed FROM (influence / "developed after"). Hovering a node traces its full lineage (ancestors above, descendants below). Self-contained HTML; same dark theme as the landscape. Run: .venv_robot_paradigms/bin/python gen_evolution.py -> robot_evolution.html """ import json import html as html_lib import robot_paradigms_app as app import gen_landscape FAMILY_LABEL = { "Classical": "Classical Control", "BC": "Behavioral Cloning", "Reinforcement": "Reinforcement Learning", "Offline RL": "Offline RL", "Inverse RL": "Inverse RL", "Model-Based": "World Models", "Sequence": "Sequence Models", "Goal-Cond.": "Goal-Conditioned", "Hierarchical": "Hierarchical", "Meta-Learning": "Meta-Learning", "LLM-Orchestration": "LLM / VLM", } FAMILY_COLOR = {k: app.FAMILY[k][0] for k in app.FAMILY} FAMILY_COLOR["Classical"] = "#64748b" # (id, name, year, family, [parent ids], note) — parents = "developed from / after" TIMELINE = [ ("pid", "PID control", 1922, "Classical", [], "Proportional-integral-derivative feedback — the oldest controller."), ("dp", "Dynamic Programming",1957, "Classical", [], "Bellman's value recursion — the root of value-based RL."), ("lqr", "LQR (Kalman)", 1960, "Classical", ["dp"], "Optimal linear feedback; a classical-control precursor (RL's roots are dynamic programming)."), ("astar", "A* search", 1968, "Classical", [], "Optimal graph search — classic motion planning."), ("mpc", "MPC", 1978, "Classical", ["lqr"], "Receding-horizon optimization with a known model."), ("qlearning", "Q-Learning", 1989, "Reinforcement", ["dp"], "Learn action-values from rewards; act greedily."), ("alvinn", "ALVINN", 1989, "BC", [], "First neural behavioral-cloning driver."), ("reinforce", "REINFORCE", 1992, "Reinforcement", [], "The original policy-gradient estimator."), ("prm", "PRM / RRT", 1996, "Classical", ["astar"], "Sampling-based motion planning in continuous spaces."), ("options", "Options (HRL)", 1999, "Hierarchical", ["qlearning"], "Temporal abstraction: sub-policies over time."), ("irl", "Inverse RL", 2000, "Inverse RL", ["qlearning"], "Recover the reward that explains expert behavior."), ("maxentirl", "MaxEnt IRL", 2008, "Inverse RL", ["irl"], "Probabilistic IRL via maximum entropy."), ("dqn", "DQN", 2013, "Reinforcement", ["qlearning"], "Deep Q-Networks crack Atari from pixels."), ("trpo", "TRPO", 2015, "Reinforcement", ["reinforce"], "Trust-region policy optimization."), ("ddpg", "DDPG", 2015, "Reinforcement", ["dqn", "reinforce"], "Off-policy actor-critic for continuous control."), ("a3c", "A3C", 2016, "Reinforcement", ["dqn", "reinforce"], "Asynchronous advantage actor-critic."), ("gail", "GAIL", 2016, "Inverse RL", ["maxentirl", "trpo"], "Adversarial imitation — match the expert via a discriminator."), ("rl2", "RL²", 2016, "Meta-Learning", ["a3c"], "An RNN that learns a learning algorithm."), ("ppo", "PPO", 2017, "Reinforcement", ["trpo"], "Clipped policy gradient — the RL workhorse."), ("maml", "MAML", 2017, "Meta-Learning", ["reinforce"], "Learn an init that adapts in a few gradient steps."), ("her", "HER", 2017, "Goal-Cond.", ["ddpg"], "Relabel failures as successes for goal-reaching."), ("optioncritic", "Option-Critic", 2017, "Hierarchical", ["options", "a3c"], "Learn options end-to-end."), ("worldmodels", "World Models", 2018, "Model-Based", ["dqn"], "Train a policy inside a learned generative world."), ("sac", "SAC", 2018, "Reinforcement", ["ddpg"], "Maximum-entropy off-policy actor-critic."), ("td3", "TD3", 2018, "Reinforcement", ["ddpg"], "Twin-critic fix for overestimation."), ("dreamer", "Dreamer", 2019, "Model-Based", ["worldmodels"], "Learn behaviors by latent imagination."), ("pearl", "PEARL", 2019, "Meta-Learning", ["maml", "sac"], "Off-policy meta-RL with latent task variables."), ("bcq", "BCQ", 2019, "Offline RL", ["ddpg"], "Batch-constrained offline RL."), ("cql", "CQL", 2020, "Offline RL", ["bcq", "sac"], "Conservative Q-learning for offline data."), ("muzero", "MuZero", 2020, "Model-Based", ["dreamer", "dqn"], "Plan with a learned model, no rules given."), ("gpt3", "GPT-3 (LLMs)", 2020, "LLM-Orchestration", [], "Large language models — an external force on robotics."), ("dt", "Decision Transformer",2021, "Sequence", ["cql", "gpt3"], "Cast RL as return-conditioned sequence modeling."), ("implicitbc", "Implicit BC", 2021, "BC", ["alvinn"], "Energy-based behavioral cloning."), ("diffuser", "Diffuser", 2022, "Sequence", ["dt"], "Plan by denoising whole trajectories."), ("rt1", "RT-1", 2022, "BC", ["alvinn", "gpt3"], "Tokenized robot transformer at scale."), ("gato", "Gato", 2022, "BC", ["dt"], "One tokenized transformer for many tasks."), ("saycan", "SayCan", 2022, "LLM-Orchestration", ["gpt3"], "LLM proposes, affordances ground."), ("cap", "Code as Policies", 2022, "LLM-Orchestration", ["gpt3"], "LLM writes robot control code."), ("tdmpc", "TD-MPC", 2022, "Model-Based", ["dreamer", "muzero"], "Latent planning + a learned value."), ("diffusionpolicy","Diffusion Policy", 2023, "BC", ["implicitbc", "diffuser"], "Generate actions by denoising — multi-modal BC."), ("aloha", "ACT / ALOHA", 2023, "BC", ["alvinn"], "Action chunking for fine bimanual teleop."), ("rt2", "RT-2 (VLA)", 2023, "BC", ["rt1", "gpt3"], "Vision-language-action model from a VLM."), ("flow", "Flow Matching", 2023, "BC", ["diffusionpolicy"], "Straight-line generative action head."), ("dreamerv3", "DreamerV3", 2023, "Model-Based", ["dreamer"], "One world-model recipe across many domains."), ("voxposer", "VoxPoser", 2023, "LLM-Orchestration", ["saycan", "cap"], "VLM-composed 3D value maps for manipulation."), ("unipi", "UniPi", 2023, "Model-Based", ["worldmodels"], "Generate video plans, then act."), ("openvla", "OpenVLA", 2024, "BC", ["rt2"], "Open vision-language-action model."), ("pi0", "π₀ (flow VLA)", 2024, "BC", ["flow", "openvla"], "Flow-matching action head on a VLM trunk."), ("rekep", "ReKep", 2024, "LLM-Orchestration", ["voxposer"], "Relational keypoint constraints from a VLM."), ("groot", "GR00T N1", 2024, "Model-Based", ["rt2", "diffusionpolicy"], "Humanoid foundation policy."), ("vjepa2", "V-JEPA 2", 2025, "Model-Based", ["dreamerv3"], "Action-conditioned latent world model + planning."), ("wam", "World-Action Models",2025, "Model-Based", ["unipi", "pi0"], "Imagine the future video AND the actions."), ("pi05", "π₀.₅ / π₀.₆", 2025, "BC", ["pi0"], "Newer flow VLAs with broader generalization."), ] # Timeline node -> reuse a verified landscape "Learn" link (same concept) REUSE = { "pid": "pid-control", "lqr": "lqr", "mpc": "classical-mpc", "astar": "motion-planning", "prm": "motion-planning", "qlearning": "value-based-rl", "dqn": "value-based-rl", "trpo": "policy-gradient-rl", "ppo": "policy-gradient-rl", "sac": "off-policy-ac", "options": "hrl", "optioncritic": "hrl", "irl": "maxent-irl", "maxentirl": "maxent-irl", "gail": "gail", "rl2": "meta-learning", "maml": "meta-learning", "pearl": "meta-learning", "her": "goal-conditioned", "worldmodels": "generative-video-wm", "dreamer": "latent-imagination", "dreamerv3": "latent-imagination", "bcq": "offline-rl", "cql": "offline-rl", "dt": "decision-transformer", "diffuser": "trajectory-diffusion", "implicitbc": "energy-based-bc", "diffusionpolicy": "diffusion-policy", "flow": "flow-matching-policy", "cap": "llm-planner", "voxposer": "vlm-affordance", "openvla": "tokenized-bc", "pi0": "flow-matching-policy", "vjepa2": "action-conditioned-wm", "wam": "world-action-model", } # Timeline-specific explainers (verified reachable June 2026) EXTRA = { "dp": ("Dynamic Programming & Value Iteration — Brunton (video)", "https://www.youtube.com/watch?v=sJIFUTITfBc"), "alvinn": ("Behavioral Cloning for Self-Driving — Udacity (video)", "https://www.youtube.com/watch?v=xxuEDx_zlsU"), "reinforce": ("Policy Gradients & PPO — Arxiv Insights (video)", "https://www.youtube.com/watch?v=5P7I-xPq8u8"), "a3c": ("Actor-Critics & A3C — AI Summer (illustrated)", "https://theaisummer.com/Actor_critics/"), "ddpg": ("DDPG Explained (video)", "https://www.youtube.com/watch?v=ClKZaaIJr6Y"), "td3": ("TD3 Explained (video)", "https://www.youtube.com/watch?v=qXyLPHe2qv8"), "muzero": ("MuZero — DeepMind", "https://deepmind.google/blog/muzero-mastering-go-chess-shogi-and-atari-without-rules/"), "tdmpc": ("TD-MPC2 — project page", "https://www.tdmpc2.com/"), "gpt3": ("How GPT-3 Works — Jay Alammar", "https://jalammar.github.io/how-gpt3-works-visualizations-animations/"), "gato": ("Gato: A Generalist Agent — DeepMind", "https://deepmind.google/discover/blog/a-generalist-agent/"), "unipi": ("UniPi — Google Research", "https://research.google/blog/unipi-learning-universal-policies-via-text-guided-video-generation/"), "aloha": ("ALOHA / ACT — project page", "https://tonyzhaozh.github.io/aloha/"), "rt1": ("RT-1 — project page", "https://robotics-transformer1.github.io/"), "rt2": ("RT-2 (VLA) — project page", "https://robotics-transformer2.github.io/"), "saycan": ("SayCan — project page", "https://say-can.github.io/"), "rekep": ("ReKep — project page", "https://rekep-robot.github.io/"), "groot": ("Isaac GR00T N1 — NVIDIA", "https://developer.nvidia.com/isaac/gr00t"), "pi05": ("π₀.₅ — Physical Intelligence", "https://www.physicalintelligence.company/blog/pi05"), } def learn_for(node_id): if node_id in EXTRA: return {"title": EXTRA[node_id][0], "url": EXTRA[node_id][1]} if node_id in REUSE and REUSE[node_id] in gen_landscape.LEARN: t, u = gen_landscape.LEARN[REUSE[node_id]] return {"title": t, "url": u} return None def build_data(): nodes = [dict(id=i, name=n, year=y, fam=f, parents=ps, note=note, learn=learn_for(i)) for (i, n, y, f, ps, note) in TIMELINE] fams = [dict(key=k, label=FAMILY_LABEL[k], color=FAMILY_COLOR[k]) for k in FAMILY_LABEL] return dict(nodes=nodes, families=fams) TEMPLATE = r"""