"""Generate a separate 'Evolution Timeline' page for robot learning.

A time-ordered lineage tree: a central time spine (the linear trunk), with each
method/milestone placed at its year and arrows showing what it was developed
FROM (influence / "developed after"). Hovering a node traces its full lineage
(ancestors above, descendants below). Self-contained HTML; same dark theme as
the landscape.

Run:  .venv_robot_paradigms/bin/python gen_evolution.py   -> robot_evolution.html
"""
import json
import html as html_lib

import robot_paradigms_app as app
import gen_landscape

FAMILY_LABEL = {
    "Classical": "Classical Control", "BC": "Behavioral Cloning",
    "Reinforcement": "Reinforcement Learning", "Offline RL": "Offline RL",
    "Inverse RL": "Inverse RL", "Model-Based": "World Models",
    "Sequence": "Sequence Models", "Goal-Cond.": "Goal-Conditioned",
    "Hierarchical": "Hierarchical", "Meta-Learning": "Meta-Learning",
    "LLM-Orchestration": "LLM / VLM",
}
FAMILY_COLOR = {k: app.FAMILY[k][0] for k in app.FAMILY}
FAMILY_COLOR["Classical"] = "#64748b"

# (id, name, year, family, [parent ids], note) — parents = "developed from / after"
TIMELINE = [
    ("pid",            "PID control",        1922, "Classical", [], "Proportional-integral-derivative feedback — the oldest controller."),
    ("dp",             "Dynamic Programming",1957, "Classical", [], "Bellman's value recursion — the root of value-based RL."),
    ("lqr",            "LQR (Kalman)",       1960, "Classical", ["dp"], "Optimal linear feedback; a classical-control precursor (RL's roots are dynamic programming)."),
    ("astar",          "A* search",          1968, "Classical", [], "Optimal graph search — classic motion planning."),
    ("mpc",            "MPC",                1978, "Classical", ["lqr"], "Receding-horizon optimization with a known model."),
    ("qlearning",      "Q-Learning",         1989, "Reinforcement", ["dp"], "Learn action-values from rewards; act greedily."),
    ("alvinn",         "ALVINN",             1989, "BC", [], "First neural behavioral-cloning driver."),
    ("reinforce",      "REINFORCE",          1992, "Reinforcement", [], "The original policy-gradient estimator."),
    ("prm",            "PRM / RRT",          1996, "Classical", ["astar"], "Sampling-based motion planning in continuous spaces."),
    ("options",        "Options (HRL)",      1999, "Hierarchical", ["qlearning"], "Temporal abstraction: sub-policies over time."),
    ("irl",            "Inverse RL",         2000, "Inverse RL", ["qlearning"], "Recover the reward that explains expert behavior."),
    ("maxentirl",      "MaxEnt IRL",         2008, "Inverse RL", ["irl"], "Probabilistic IRL via maximum entropy."),
    ("dqn",            "DQN",                2013, "Reinforcement", ["qlearning"], "Deep Q-Networks crack Atari from pixels."),
    ("trpo",           "TRPO",               2015, "Reinforcement", ["reinforce"], "Trust-region policy optimization."),
    ("ddpg",           "DDPG",               2015, "Reinforcement", ["dqn", "reinforce"], "Off-policy actor-critic for continuous control."),
    ("a3c",            "A3C",                2016, "Reinforcement", ["dqn", "reinforce"], "Asynchronous advantage actor-critic."),
    ("gail",           "GAIL",               2016, "Inverse RL", ["maxentirl", "trpo"], "Adversarial imitation — match the expert via a discriminator."),
    ("rl2",            "RL²",                2016, "Meta-Learning", ["a3c"], "An RNN that learns a learning algorithm."),
    ("ppo",            "PPO",                2017, "Reinforcement", ["trpo"], "Clipped policy gradient — the RL workhorse."),
    ("maml",           "MAML",               2017, "Meta-Learning", ["reinforce"], "Learn an init that adapts in a few gradient steps."),
    ("her",            "HER",                2017, "Goal-Cond.", ["ddpg"], "Relabel failures as successes for goal-reaching."),
    ("optioncritic",   "Option-Critic",      2017, "Hierarchical", ["options", "a3c"], "Learn options end-to-end."),
    ("worldmodels",    "World Models",       2018, "Model-Based", ["dqn"], "Train a policy inside a learned generative world."),
    ("sac",            "SAC",                2018, "Reinforcement", ["ddpg"], "Maximum-entropy off-policy actor-critic."),
    ("td3",            "TD3",                2018, "Reinforcement", ["ddpg"], "Twin-critic fix for overestimation."),
    ("dreamer",        "Dreamer",            2019, "Model-Based", ["worldmodels"], "Learn behaviors by latent imagination."),
    ("pearl",          "PEARL",              2019, "Meta-Learning", ["maml", "sac"], "Off-policy meta-RL with latent task variables."),
    ("bcq",            "BCQ",                2019, "Offline RL", ["ddpg"], "Batch-constrained offline RL."),
    ("cql",            "CQL",                2020, "Offline RL", ["bcq", "sac"], "Conservative Q-learning for offline data."),
    ("muzero",         "MuZero",             2020, "Model-Based", ["dreamer", "dqn"], "Plan with a learned model, no rules given."),
    ("gpt3",           "GPT-3 (LLMs)",       2020, "LLM-Orchestration", [], "Large language models — an external force on robotics."),
    ("dt",             "Decision Transformer",2021, "Sequence", ["cql", "gpt3"], "Cast RL as return-conditioned sequence modeling."),
    ("implicitbc",     "Implicit BC",        2021, "BC", ["alvinn"], "Energy-based behavioral cloning."),
    ("diffuser",       "Diffuser",           2022, "Sequence", ["dt"], "Plan by denoising whole trajectories."),
    ("rt1",            "RT-1",               2022, "BC", ["alvinn", "gpt3"], "Tokenized robot transformer at scale."),
    ("gato",           "Gato",               2022, "BC", ["dt"], "One tokenized transformer for many tasks."),
    ("saycan",         "SayCan",             2022, "LLM-Orchestration", ["gpt3"], "LLM proposes, affordances ground."),
    ("cap",            "Code as Policies",   2022, "LLM-Orchestration", ["gpt3"], "LLM writes robot control code."),
    ("tdmpc",          "TD-MPC",             2022, "Model-Based", ["dreamer", "muzero"], "Latent planning + a learned value."),
    ("diffusionpolicy","Diffusion Policy",   2023, "BC", ["implicitbc", "diffuser"], "Generate actions by denoising — multi-modal BC."),
    ("aloha",          "ACT / ALOHA",        2023, "BC", ["alvinn"], "Action chunking for fine bimanual teleop."),
    ("rt2",            "RT-2 (VLA)",         2023, "BC", ["rt1", "gpt3"], "Vision-language-action model from a VLM."),
    ("flow",           "Flow Matching",      2023, "BC", ["diffusionpolicy"], "Straight-line generative action head."),
    ("dreamerv3",      "DreamerV3",          2023, "Model-Based", ["dreamer"], "One world-model recipe across many domains."),
    ("voxposer",       "VoxPoser",           2023, "LLM-Orchestration", ["saycan", "cap"], "VLM-composed 3D value maps for manipulation."),
    ("unipi",          "UniPi",              2023, "Model-Based", ["worldmodels"], "Generate video plans, then act."),
    ("openvla",        "OpenVLA",            2024, "BC", ["rt2"], "Open vision-language-action model."),
    ("pi0",            "π₀ (flow VLA)",      2024, "BC", ["flow", "openvla"], "Flow-matching action head on a VLM trunk."),
    ("rekep",          "ReKep",              2024, "LLM-Orchestration", ["voxposer"], "Relational keypoint constraints from a VLM."),
    ("groot",          "GR00T N1",           2024, "Model-Based", ["rt2", "diffusionpolicy"], "Humanoid foundation policy."),
    ("vjepa2",         "V-JEPA 2",           2025, "Model-Based", ["dreamerv3"], "Action-conditioned latent world model + planning."),
    ("wam",            "World-Action Models",2025, "Model-Based", ["unipi", "pi0"], "Imagine the future video AND the actions."),
    ("pi05",           "π₀.₅ / π₀.₆",        2025, "BC", ["pi0"], "Newer flow VLAs with broader generalization."),
]


# Timeline node -> reuse a verified landscape "Learn" link (same concept)
REUSE = {
    "pid": "pid-control", "lqr": "lqr", "mpc": "classical-mpc",
    "astar": "motion-planning", "prm": "motion-planning",
    "qlearning": "value-based-rl", "dqn": "value-based-rl",
    "trpo": "policy-gradient-rl", "ppo": "policy-gradient-rl", "sac": "off-policy-ac",
    "options": "hrl", "optioncritic": "hrl", "irl": "maxent-irl", "maxentirl": "maxent-irl",
    "gail": "gail", "rl2": "meta-learning", "maml": "meta-learning", "pearl": "meta-learning",
    "her": "goal-conditioned", "worldmodels": "generative-video-wm",
    "dreamer": "latent-imagination", "dreamerv3": "latent-imagination",
    "bcq": "offline-rl", "cql": "offline-rl", "dt": "decision-transformer",
    "diffuser": "trajectory-diffusion", "implicitbc": "energy-based-bc",
    "diffusionpolicy": "diffusion-policy", "flow": "flow-matching-policy",
    "cap": "llm-planner", "voxposer": "vlm-affordance", "openvla": "tokenized-bc",
    "pi0": "flow-matching-policy", "vjepa2": "action-conditioned-wm", "wam": "world-action-model",
}
# Timeline-specific explainers (verified reachable June 2026)
EXTRA = {
    "dp":        ("Dynamic Programming & Value Iteration — Brunton (video)", "https://www.youtube.com/watch?v=sJIFUTITfBc"),
    "alvinn":    ("Behavioral Cloning for Self-Driving — Udacity (video)", "https://www.youtube.com/watch?v=xxuEDx_zlsU"),
    "reinforce": ("Policy Gradients & PPO — Arxiv Insights (video)", "https://www.youtube.com/watch?v=5P7I-xPq8u8"),
    "a3c":       ("Actor-Critics & A3C — AI Summer (illustrated)", "https://theaisummer.com/Actor_critics/"),
    "ddpg":      ("DDPG Explained (video)", "https://www.youtube.com/watch?v=ClKZaaIJr6Y"),
    "td3":       ("TD3 Explained (video)", "https://www.youtube.com/watch?v=qXyLPHe2qv8"),
    "muzero":    ("MuZero — DeepMind", "https://deepmind.google/blog/muzero-mastering-go-chess-shogi-and-atari-without-rules/"),
    "tdmpc":     ("TD-MPC2 — project page", "https://www.tdmpc2.com/"),
    "gpt3":      ("How GPT-3 Works — Jay Alammar", "https://jalammar.github.io/how-gpt3-works-visualizations-animations/"),
    "gato":      ("Gato: A Generalist Agent — DeepMind", "https://deepmind.google/discover/blog/a-generalist-agent/"),
    "unipi":     ("UniPi — Google Research", "https://research.google/blog/unipi-learning-universal-policies-via-text-guided-video-generation/"),
    "aloha":     ("ALOHA / ACT — project page", "https://tonyzhaozh.github.io/aloha/"),
    "rt1":       ("RT-1 — project page", "https://robotics-transformer1.github.io/"),
    "rt2":       ("RT-2 (VLA) — project page", "https://robotics-transformer2.github.io/"),
    "saycan":    ("SayCan — project page", "https://say-can.github.io/"),
    "rekep":     ("ReKep — project page", "https://rekep-robot.github.io/"),
    "groot":     ("Isaac GR00T N1 — NVIDIA", "https://developer.nvidia.com/isaac/gr00t"),
    "pi05":      ("π₀.₅ — Physical Intelligence", "https://www.physicalintelligence.company/blog/pi05"),
}


def learn_for(node_id):
    if node_id in EXTRA:
        return {"title": EXTRA[node_id][0], "url": EXTRA[node_id][1]}
    if node_id in REUSE and REUSE[node_id] in gen_landscape.LEARN:
        t, u = gen_landscape.LEARN[REUSE[node_id]]
        return {"title": t, "url": u}
    return None


def build_data():
    nodes = [dict(id=i, name=n, year=y, fam=f, parents=ps, note=note, learn=learn_for(i))
             for (i, n, y, f, ps, note) in TIMELINE]
    fams = [dict(key=k, label=FAMILY_LABEL[k], color=FAMILY_COLOR[k]) for k in FAMILY_LABEL]
    return dict(nodes=nodes, families=fams)


TEMPLATE = r"""<!doctype html>
<html lang="en"><head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="icon" href="data:,"/>
<title>Robot Learning — Evolution Timeline</title>
<style>
  :root{ --bg:#070b16; --ink:#e7ecf6; --mut:#93a0bd; --gold:#fbbf24; }
  *{ box-sizing:border-box; }
  html,body{ margin:0; background:var(--bg); color:var(--ink);
    font-family:Inter,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif; }
  .top{ position:sticky; top:0; z-index:20; display:flex; align-items:baseline; gap:14px; flex-wrap:wrap;
    padding:14px 22px; background:linear-gradient(#070b16,#070b16ee); border-bottom:1px solid #16203a; }
  .top h1{ margin:0; font-size:20px; font-weight:800;
    background:linear-gradient(90deg,#a78bfa,#67e8f9); -webkit-background-clip:text; background-clip:text; color:transparent; }
  .top .sub{ color:var(--mut); font-size:12.5px; }
  .top .sub b{ color:var(--gold); }
  .legend{ display:flex; gap:10px 14px; flex-wrap:wrap; margin-left:auto; }
  .legend .li{ display:flex; align-items:center; gap:6px; font-size:11px; color:#cbd5e1; }
  .legend .dot{ width:10px; height:10px; border-radius:50%; }
  .wrap{ position:relative; width:100%; overflow:auto; }
  svg.tl{ display:block; }
  .spine{ stroke:#27406b; stroke-width:2; }
  .ytick{ fill:#64748b; font-size:12px; font-weight:700; }
  .yline{ stroke:#16203a; stroke-width:1; }
  .lk{ fill:none; stroke:#42506e; stroke-width:1.5; opacity:.30; transition:.2s; }
  .lk.lit{ stroke:var(--gold); opacity:.95; stroke-width:2.4; filter:drop-shadow(0 0 6px rgba(251,191,36,.55)); }
  .node{ cursor:pointer; }
  .node rect{ transition:transform .15s, filter .15s, opacity .2s; transform-box:fill-box; transform-origin:center; }
  .node .nm{ fill:#e7ecf6; font-size:10.5px; font-weight:700; pointer-events:none; }
  .node .yr{ fill:#0b1326; font-size:9px; font-weight:800; pointer-events:none; }
  .node .lkic{ fill:#8aa0c6; font-size:11px; pointer-events:none; }
  .node:hover rect.card{ transform:scale(1.05); filter:drop-shadow(0 4px 12px rgba(0,0,0,.5)); }
  .node:hover .lkic{ fill:var(--gold); }
  .node.has-link{ cursor:pointer; }
  a{ text-decoration:none; }
  .node.dim{ opacity:.18; }
  .tip{ position:fixed; z-index:40; pointer-events:none; max-width:260px; padding:8px 11px; border-radius:9px;
    background:#0b1326; border:1px solid #334155; font-size:12px; color:var(--ink);
    box-shadow:0 12px 30px rgba(0,0,0,.55); opacity:0; transition:opacity .12s; }
  .tip.on{ opacity:1; }
  .tip .tn{ font-weight:800; } .tip .tf{ font-size:10.5px; text-transform:uppercase; letter-spacing:.05em; }
</style></head>
<body>
  <div class="top">
    <h1>⏳ Robot Learning — Evolution Timeline</h1>
    <span class="sub" id="meta"></span>
    <span class="sub">— arrows point <b>from</b> an idea <b>to</b> what was built on it. Hover to trace lineage · <b>click ↗</b> to read its best explainer.</span>
    <div class="legend" id="legend"></div>
  </div>
  <div class="wrap"><svg class="tl" id="svg"></svg></div>
  <div class="tip" id="tip"></div>
<script>
const D = __DATA_JSON__;
const famColor={}; D.families.forEach(f=>famColor[f.key]=f.color);
const famLabel={}; D.families.forEach(f=>famLabel[f.key]=f.label);
const byId={}; D.nodes.forEach(n=>byId[n.id]=n);
document.getElementById('meta').textContent = D.nodes.length+' milestones · '+
  (Math.min(...D.nodes.map(n=>n.year)))+'–'+(Math.max(...D.nodes.map(n=>n.year)));
document.getElementById('legend').innerHTML = D.families.map(f=>
  '<span class="li"><span class="dot" style="background:'+f.color+'"></span>'+esc(f.label)+'</span>').join('');

/* ---- layout: y by year-row, x spread within the row, central spine ---- */
const years=[...new Set(D.nodes.map(n=>n.year))].sort((a,b)=>a-b);
const rowY={}; const TOP=70, ROW=78; years.forEach((y,i)=>rowY[y]=TOP+i*ROW);
const LEFTPAD=86, RIGHTPAD=40, CARD_W=164, CARD_H=34;
const W=Math.max(1180, LEFTPAD+RIGHTPAD+ (Math.max(...years.map(y=>D.nodes.filter(n=>n.year===y).length)))*205);
const H=TOP+years.length*ROW+40;
const cxSpine=(LEFTPAD+ (W-RIGHTPAD))/2;
// order families for stable left->right placement within a row
const famOrder={}; D.families.forEach((f,i)=>famOrder[f.key]=i);
years.forEach(y=>{
  const row=D.nodes.filter(n=>n.year===y).sort((a,b)=>famOrder[a.fam]-famOrder[b.fam]);
  const span=(W-LEFTPAD-RIGHTPAD);
  row.forEach((n,j)=>{
    n.x = LEFTPAD + span*((j+1)/(row.length+1));
    n.y = rowY[y];
  });
});

/* ---- ancestry / descendants for hover ---- */
function lineage(id){
  const up=new Set(), down=new Set();
  (function anc(i){ (byId[i].parents||[]).forEach(p=>{ if(!up.has(p)){ up.add(p); anc(p); } }); })(id);
  (function desc(i){ D.nodes.forEach(n=>{ if((n.parents||[]).includes(i) && !down.has(n.id)){ down.add(n.id); desc(n.id); } }); })(id);
  const set=new Set([id,...up,...down]); return set;
}

const svg=document.getElementById('svg');
svg.setAttribute('viewBox','0 0 '+W+' '+H);
svg.setAttribute('width', W); svg.setAttribute('height', H);
function esc(t){ return (t||'').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
function curve(x1,y1,x2,y2){ const my=(y1+y2)/2; return 'M'+x1+','+y1+' C'+x1+','+my+' '+x2+','+my+' '+x2+','+y2; }

function render(){
  let s='<defs><marker id="ar" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="7" markerHeight="7" orient="auto-start-reverse"><path d="M0,0 L10,5 L0,10 z" fill="context-stroke"/></marker></defs>';
  // year rows + spine
  s+='<line class="spine" x1="'+cxSpine+'" y1="'+(TOP-24)+'" x2="'+cxSpine+'" y2="'+(H-30)+'"/>';
  years.forEach(y=>{ const yy=rowY[y];
    s+='<line class="yline" x1="40" y1="'+yy+'" x2="'+(W-20)+'" y2="'+yy+'"/>'+
       '<text class="ytick" x="14" y="'+(yy+4)+'">'+y+'</text>'; });
  // links: parent -> child (downward in time)
  D.nodes.forEach(n=>(n.parents||[]).forEach(pid=>{ const p=byId[pid]; if(!p) return;
    s+='<path class="lk" data-a="'+pid+'" data-b="'+n.id+'" marker-end="url(#ar)" d="'+
       curve(p.x, p.y+CARD_H/2, n.x, n.y-CARD_H/2)+'"/>'; }));
  // nodes (whole card is a link to the best explainer when one exists)
  D.nodes.forEach(n=>{ const c=famColor[n.fam]; const x=n.x-CARD_W/2, y=n.y-CARD_H/2;
    const hasL=!!n.learn;
    const g='<g class="node'+(hasL?' has-link':'')+'" data-id="'+n.id+'">'+
      '<rect class="card" x="'+x+'" y="'+y+'" width="'+CARD_W+'" height="'+CARD_H+'" rx="9" '+
        'fill="#0c1326" stroke="'+c+'" stroke-width="1.8"/>'+
      '<rect class="yr-bg" x="'+x+'" y="'+y+'" width="34" height="'+CARD_H+'" rx="9" fill="'+c+'"/>'+
      '<text class="yr" x="'+(x+17)+'" y="'+(n.y+3)+'" text-anchor="middle">’'+(String(n.year).slice(2))+'</text>'+
      '<text class="nm" x="'+(x+41)+'" y="'+(n.y+4)+'">'+esc(n.name)+'</text>'+
      (hasL?'<text class="lkic" x="'+(x+CARD_W-8)+'" y="'+(n.y+4)+'" text-anchor="middle">↗</text>':'')+
    '</g>';
    s += hasL ? '<a href="'+n.learn.url+'" target="_blank" rel="noopener noreferrer">'+g+'</a>' : g;
  });
  svg.innerHTML=s;
}

const tip=document.getElementById('tip');
svg.addEventListener('mouseover', e=>{ const g=e.target.closest('.node'); if(!g) return;
  const n=byId[g.getAttribute('data-id')]; const set=lineage(n.id);
  svg.querySelectorAll('.node').forEach(gg=>gg.classList.toggle('dim', !set.has(gg.getAttribute('data-id'))));
  svg.querySelectorAll('.lk').forEach(l=>{ const a=l.getAttribute('data-a'), b=l.getAttribute('data-b');
    l.classList.toggle('lit', set.has(a)&&set.has(b)); });
  tip.innerHTML='<div class="tf" style="color:'+famColor[n.fam]+'">'+esc(famLabel[n.fam])+' · '+n.year+'</div>'+
    '<div class="tn">'+esc(n.name)+'</div><div style="color:var(--mut);margin-top:3px">'+esc(n.note)+'</div>'+
    (n.learn?'<div style="margin-top:5px;color:#7dd3fc;font-size:11px">📖 '+esc(n.learn.title)+' ↗</div>':'');
  tip.classList.add('on');
});
svg.addEventListener('mousemove', e=>{ let x=e.clientX+14, y=e.clientY+14;
  x=Math.min(x, window.innerWidth-tip.offsetWidth-12); y=Math.min(y, window.innerHeight-tip.offsetHeight-12);
  tip.style.left=x+'px'; tip.style.top=y+'px'; });
svg.addEventListener('mouseout', e=>{ if(e.target.closest('.node')){
  svg.querySelectorAll('.node.dim').forEach(g=>g.classList.remove('dim'));
  svg.querySelectorAll('.lk.lit').forEach(l=>l.classList.remove('lit')); tip.classList.remove('on'); } });

render();
</script>
</body></html>
"""


def main():
    data = build_data()
    html = TEMPLATE.replace("__DATA_JSON__", json.dumps(data))
    with open("robot_evolution.html", "w", encoding="utf-8") as fh:
        fh.write(html)
    print(f"wrote robot_evolution.html ({len(html):,} chars) — {len(data['nodes'])} milestones")


if __name__ == "__main__":
    main()