"""Generate a per-paradigm Algorithm Lab. This page follows the interaction pattern of the local DiT / cross-attention explainers: a stepper controls animation, formula rows, code lines, and a plain English trace at the same time. The content is generated for every node in the main robot-learning landscape, using specific templates for the method family. """ from __future__ import annotations import json import gen_landscape import gen_vlm import gen_worldmodel SOURCE_LINKS = [ ("Diffusion Explainer", "https://poloclub.github.io/diffusion-explainer/"), ("Diffusion Explainer paper", "https://arxiv.org/abs/2305.03509"), ("Distill explorable explanations", "https://distill.pub/"), ("The Illustrated Transformer", "https://jalammar.github.io/illustrated-transformer/"), ("BertViz attention visualization", "https://arxiv.org/abs/1906.05714"), ("Attention Flow", "https://arxiv.org/abs/2005.00928"), ("Spinning Up RL docs", "https://spinningup.openai.com/"), ("Sutton & Barto RL book", "http://incompleteideas.net/book/the-book-2nd.html"), ("Lilian Weng policy gradients", "https://lilianweng.github.io/posts/2018-04-08-policy-gradient/"), ("Diffusion Policy", "https://diffusion-policy.cs.columbia.edu/"), ("Decision Transformer", "https://sites.google.com/berkeley.edu/decision-transformer"), ("World Models", "https://worldmodels.github.io/"), ] ARCHETYPES = { "diffusion": { "title": "Generative action model", "intuition": "把动作或轨迹当成要生成的对象:从噪声出发,逐步变成可执行动作。", "steps": [ ("context", "读状态/指令", "observation + language become the conditioning context", [0, 1], [0]), ("noise", "采样噪声", "start from noise or a noisy trajectory/action", [1, 2], [1, 2]), ("score", "预测修正", "network predicts denoising score / velocity / action correction", [2], [3, 4]), ("iterate", "迭代生成", "repeat a few denoising or flow integration steps", [2, 3], [5, 6]), ("execute", "执行动作", "send the generated action chunk or first trajectory segment to the controller", [3], [7]), ], "formulas": [ (r"c=\phi(o,\ell)", "状态/图像/语言先编码成条件 c。"), (r"a^0\sim\mathcal{N}(0,I)", "从随机动作开始。"), (r"\epsilon_\theta(a^k,c,k)\;\text{or}\;v_\theta(a^t,c,t)", "网络预测去噪方向或 flow velocity。"), (r"a^{k-1}=a^k-\alpha\,\epsilon_\theta(a^k,c,k)+\sigma_k z", "diffusion: 一步步去掉噪声。"), (r"a^1=a^0+\int_0^1 v_\theta(a^t,c,t)\,dt", "flow: 沿速度场积分到动作。"), (r"\pi(a\mid o,\ell)\approx p_\theta(a\mid c)", "生成分布就是策略。"), ], "code": [ "# generative action head", "c = encoder(obs, instruction)", "a = normal(shape=action_chunk)", "for k in schedule:", " delta = denoiser(a, c, k)", " a = update(a, delta, k)", "return execute(a[0])", "# flow replaces update with an ODE step", ], "vis": "denoise", }, "token": { "title": "Token / sequence model", "intuition": "把状态、目标、语言、动作都变成序列 token,然后像语言模型一样预测下一个动作 token。", "steps": [ ("context", "构造上下文", "state, language, return, or goal tokens define the context", [0, 1], [0, 1]), ("quantize", "动作离散化", "continuous action becomes bins, VQ codes, or compressed action tokens", [1], [2]), ("decode", "自回归预测", "predict one action token at a time", [2], [3, 4]), ("detok", "还原动作", "decode tokens back to continuous robot commands", [3], [5]), ("control", "闭环执行", "execute a short chunk, observe again, and re-plan", [3, 4], [6]), ], "formulas": [ (r"x_{1:T}=[c_1,\ldots,c_m,a^{(1)},\ldots,a^{(K)}]", "把条件和动作放进同一序列。"), (r"a\rightarrow(a^{(1)},\ldots,a^{(K)}),\quad a^{(j)}\in\mathcal{V}", "连续动作被 tokenized。"), (r"p_\theta(a^{(j)}\mid c,a^{(s' from interaction", [0], [0, 1]), ("sample", "采样候选计划", "draw many action sequences over horizon H", [1], [2]), ("rollout", "模型内 rollout", "predict future states for each sequence", [1, 2], [3]), ("select", "选最高分", "score by reward/cost and take first action", [2], [4, 5]), ("replan", "下一步重规划", "observe again and repeat", [2, 3], [6])]), "formulas": [(r"\hat s_{t+1}=\hat f_\phi(s_t,a_t)", "学习前向动力学。"), (r"J(a_{t:t+H})=\sum_{k=0}^H \gamma^k\hat r(\hat s_{t+k},a_{t+k})", "给候选动作序列打分。"), (r"a_t=\left[\arg\max_{a_{t:t+H}}J\right]_0", "只执行第一步。")], "code": ["# learned MPC", "model.fit(replay)", "candidates = sample_sequences(N, H)", "for A in candidates:", " S = rollout(model, s, A)", " score[A] = reward(S, A).sum()", "execute(best(candidates)[0])"], }, "latent-imagination": { "title": "Dreamer latent imagination", "intuition": "把像素压进 latent world model,在想象轨迹里训练 actor 和 value,而不是每次都用真实机器人试。", "vis": "world", "steps": steps([("rssm", "学习 latent dynamics", "RSSM predicts next latent, reward, continuation", [0, 1], [0, 1]), ("imagine", "latent imagination", "roll actor forward inside learned latent model", [1], [2]), ("value", "lambda-return value", "estimate long-horizon imagined returns", [2], [3]), ("actor", "反传训练 actor", "differentiate imagined returns through latent trajectories", [2, 3], [4, 5])]), "formulas": [(r"z_t\sim q_\phi(z_t|z_{t-1},a_{t-1},o_t)", "posterior latent。"), (r"\hat z_{t+1}\sim p_\phi(\hat z_{t+1}|\hat z_t,a_t)", "想象中的 dynamics。"), (r"V_\lambda(\hat z_t)=\hat r_t+\gamma((1-\lambda)V(\hat z_{t+1})+\lambda V_\lambda(\hat z_{t+1}))", "lambda return。"), (r"\max_\psi \mathbb{E}_{\hat\tau\sim p_\phi,\pi_\psi}\sum_t V_\lambda(\hat z_t)", "actor 在梦里优化。")], "code": ["# Dreamer-style update", "posterior = encoder(obs, actions)", "model_loss = recon + reward_loss + kl", "z = stopgrad(posterior[-1])", "imagined = world_model.imagine(actor, z, horizon)", "actor_loss = -lambda_return(imagined).mean()", "value_loss = mse(value(z), target_return)"], }, "generative-video-wm": { "title": "Generative video world model", "intuition": "预测未来视频本身:它可以是 planner、simulator 或 data generator,关键在条件里有没有动作。", "vis": "world", "steps": steps([("context", "输入上下文帧", "past frames and text/goal/actions condition the video model", [0], [0, 1]), ("latent", "压缩视频 latent", "operate in latent video/token space", [1], [2]), ("generate", "生成未来帧", "autoregressive or diffusion decoder predicts future video", [2], [3, 4]), ("use", "用于规划/数据", "future videos can guide policy, generate data, or evaluate outcomes", [3], [5])]), "formulas": [(r"p_\theta(o_{t+1:t+H}|o_{\le t},c)", "未来视频条件生成。"), (r"c\in\{\ell,g,a_{t:t+H},\text{mask}\}", "条件决定它是 planner 还是 simulator。"), (r"\hat o_{t+1:t+H}\sim p_\theta(\cdot|o_{\le t},c)", "采样未来。")], "code": ["# video world model", "ctx = encode_video(past_frames, condition)", "z = sample_latent_noise()", "future = video_decoder.generate(z, ctx)", "if has_reward: score = reward_model(future)", "use(future, score)"], }, "action-conditioned-wm": { "title": "Action-Conditioned World Model", "intuition": "动作是输入:问模型“如果我这样做,会看到什么未来?”所以它能做 counterfactual evaluation。", "vis": "world", "steps": steps([("state", "编码当前状态", "encode observation/history", [0], [0]), ("candidate", "输入候选动作", "condition the model on proposed action sequence", [1], [1, 2]), ("predict", "预测对应未来", "different actions lead to different futures", [2], [3]), ("evaluate", "评估与选择", "score predicted futures for planning or policy improvement", [3], [4, 5])]), "formulas": [(r"p_\theta(o_{t+1:t+H}|o_{\le t},a_{t:t+H})", "动作作为条件输入。"), (r"\hat o^A\neq \hat o^B\quad\text{for}\quad A\neq B", "不同动作序列产生不同 counterfactual future。"), (r"A^*=\arg\max_A R(\hat o^A_{t+1:t+H})", "用预测未来选动作。")], "code": ["# action-conditioned WM planning", "state = encode(obs_history)", "for A in candidate_actions:", " future[A] = wm.predict(state, A)", " score[A] = task_score(future[A])", "execute(best(score)[0])"], }, "world-action-model": { "title": "World Action Model", "intuition": "动作是输出:先想象一个成功的视频,再从视频中读出能实现它的动作。", "vis": "world", "steps": steps([("prompt", "任务条件", "scene and instruction specify desired success", [0], [0]), ("imagine", "生成成功未来", "video model imagines the task being completed", [1], [1, 2]), ("decode", "从视频读动作", "inverse dynamics or joint decoder extracts actions", [2], [3, 4]), ("execute", "执行并反馈", "execute proposed actions, optionally re-imagine", [3], [5])]), "formulas": [(r"p_\theta(o_{t+1:T},a_{t:T}|o_t,\ell)", "联合生成未来和动作。"), (r"p_\theta(o_{t+1:T}|o_t,\ell)p_\psi(a_{t:T}|o_{t:T})", "两阶段:先视频,再 inverse dynamics。"), (r"\hat a_{t:T}\sim p(\cdot|\hat o_{t:T})", "动作来自 imagined future。")], "code": ["# world-action model", "future = video_model.generate(obs, instruction)", "actions = inverse_dynamics(obs, future)", "for a in actions[:chunk]:", " robot.step(a)", " if off_track: future = reimagine()"], }, "occupancy-latent-wm": { "title": "Occupancy / latent state world model", "intuition": "不预测每个 RGB 像素,而是预测规划有用的状态:占据、接触、cost、latent dynamics。", "vis": "world", "steps": steps([("encode", "抽象状态", "map pixels/history to occupancy or latent state", [0], [0, 1]), ("transition", "状态转移", "predict compact next state under action", [1], [2]), ("cost", "读出可规划量", "derive collision/contact/cost maps", [2], [3, 4]), ("plan", "在抽象状态规划", "use MPC/search on compact prediction", [3], [5])]), "formulas": [(r"z_t=\phi(o_{\le t})", "抽象 latent。"), (r"p_\theta(z_{t+1}|z_t,a_t)", "latent dynamics。"), (r"\hat c_t=\psi(z_t)", "从 latent 读出 occupancy/contact/cost。"), (r"\min_{a_{0:H}}\sum_t \hat c_t", "规划时优化 cost。")], "code": ["# occupancy latent WM", "z = encoder(obs_history)", "for a in plan:", " z = latent_dynamics(z, a)", " cost += occupancy_head(z) + contact_head(z)", "plan = optimize(cost)", "execute(plan[0])"], }, "decision-transformer": { "title": "Decision Transformer", "intuition": "不做 Bellman backup;把 return-to-go、state、action 排成序列,让 Transformer 预测下一个动作。", "vis": "tokens", "steps": steps([("tokens", "构造轨迹 token", "interleave return, state, action tokens", [0], [0, 1]), ("rtg", "条件目标回报", "desired return steers behavior", [1], [2]), ("causal", "因果 Transformer", "predict action from past tokens only", [2], [3, 4]), ("roll", "执行并更新 RTG", "subtract received reward and continue", [3], [5, 6])]), "formulas": [(r"x=(\hat R_1,s_1,a_1,\ldots,\hat R_t,s_t)", "轨迹序列。"), (r"p_\theta(a_t|\hat R_{\le t},s_{\le t},a_{ tau:", " skills[call.name](call.args)", " else: replan()"], }, "vlm-affordance": { "title": "VLM affordance / spatial programs", "intuition": "让 VLM 输出哪里能抓、哪里该放、哪些 3D 约束成立,再交给轨迹优化器。", "vis": "planner", "steps": steps([("scene", "读图像/点云", "VLM grounds language in the scene", [0], [0]), ("cost", "生成空间代价", "produce keypoints, voxel costs, or constraints", [1], [1, 2]), ("opt", "轨迹优化", "classical solver finds a feasible path", [2], [3, 4]), ("feedback", "视觉反馈", "re-run grounding if scene changes", [3], [5])]), "formulas": [(r"\mathcal{C}(x)=\mathrm{VLM}(\ell,o,x)", "VLM 定义空间代价。"), (r"\tau^*=\arg\min_\tau\int\mathcal{C}(\tau(t))dt", "轨迹优化。"), (r"g_i(q_t)\le 0,\quad h_j(q_t)=0", "几何/关系约束。")], "code": ["# VLM affordance planner", "objects = vlm.detect(scene, instruction)", "cost = vlm.value_map(scene, instruction)", "constraints = keypoint_constraints(objects)", "traj = trajopt(cost, constraints, robot_model)", "controller.follow(traj)"], }, "pid-control": { "title": "PID feedback controller", "intuition": "只看目标误差:当前误差、累计误差、误差变化率三项合成控制量。", "vis": "planner", "steps": steps([("error", "测误差", "compare target and current value", [0], [0]), ("p", "P 项", "push proportional to current error", [1], [1]), ("i", "I 项", "integrate persistent bias", [2], [2]), ("d", "D 项", "damp fast changes", [3], [3]), ("apply", "施加控制", "send control to actuator and repeat", [0, 1, 2, 3], [4])]), "formulas": [(r"e(t)=r(t)-y(t)", "误差。"), (r"u_P=K_pe(t)", "比例项。"), (r"u_I=K_i\int_0^t e(\tau)d\tau", "积分项。"), (r"u_D=K_d\dot e(t)", "微分项。"), (r"u=u_P+u_I+u_D", "总控制量。")], "code": ["# PID loop", "e = target - measured", "integral += e * dt", "derivative = (e - prev_e) / dt", "u = Kp*e + Ki*integral + Kd*derivative", "actuator.send(u)", "prev_e = e"], }, "lqr": { "title": "LQR optimal linear feedback", "intuition": "在线性动力学和二次代价下,最优策略就是一个固定反馈矩阵 u=-Kx。", "vis": "planner", "steps": steps([("linear", "线性模型", "assume x_{t+1}=Ax_t+Bu_t", [0], [0]), ("cost", "二次代价", "penalize state error and control effort", [1], [1]), ("riccati", "解 Riccati", "solve backward / algebraic Riccati equation", [2], [2, 3]), ("feedback", "反馈控制", "apply u=-Kx", [3], [4])]), "formulas": [(r"x_{t+1}=Ax_t+Bu_t", "线性动力学。"), (r"J=\sum_t x_t^\top Qx_t+u_t^\top Ru_t", "二次代价。"), (r"P=A^\top PA-A^\top PB(R+B^\top PB)^{-1}B^\top PA+Q", "Riccati equation。"), (r"u=-Kx,\quad K=(R+B^\top PB)^{-1}B^\top PA", "最优反馈。")], "code": ["# LQR", "A, B = linearize(dynamics)", "P = solve_riccati(A, B, Q, R)", "K = inv(R + B.T@P@B) @ B.T@P@A", "while control:", " u = -K @ x", " x = step(u)"], }, "classical-mpc": { "title": "Classical MPC / trajectory optimization", "intuition": "用已知模型在约束下优化未来 H 步,只执行第一步,然后滚动重算。", "vis": "planner", "steps": steps([("model", "已知模型", "use physics/kinematics model", [0], [0]), ("horizon", "有限时域优化", "optimize controls over a receding horizon", [1], [1, 2]), ("constraints", "处理约束", "respect obstacles, torque limits, contacts", [2], [3]), ("recede", "执行第一步", "execute first control and solve again next cycle", [3], [4, 5])]), "formulas": [(r"\min_{u_{0:H}}\sum_{t=0}^H\ell(x_t,u_t)", "优化时域代价。"), (r"x_{t+1}=f(x_t,u_t)", "模型约束。"), (r"g(x_t,u_t)\le 0", "安全/物理约束。"), (r"u_t=[u^*_{0:H}]_0", "只执行第一步。")], "code": ["# MPC", "while running:", " problem = build_optimization(x, model, constraints)", " U = solve(problem, horizon=H)", " execute(U[0])", " x = observe()"], }, "motion-planning": { "title": "Motion planning search", "intuition": "在 configuration space 里搜索一条 collision-free path,之后再由控制器跟踪。", "vis": "planner", "steps": steps([("space", "构造 C-space", "represent robot configuration and obstacles", [0], [0]), ("sample", "采样/扩展", "RRT/PRM samples feasible configurations", [1], [1, 2]), ("connect", "碰撞检测连接", "connect nodes only through free space", [2], [3]), ("path", "输出路径", "extract and smooth path to goal", [3], [4, 5])]), "formulas": [(r"q\in\mathcal{C},\quad q\in\mathcal{C}_{free}", "配置空间与自由空间。"), (r"\text{find }\tau:q_s\rightarrow q_g,\;\tau(t)\in\mathcal{C}_{free}", "路径可行性。"), (r"q_{new}=\mathrm{steer}(q_{near},q_{rand})", "RRT 扩展。")], "code": ["# RRT sketch", "tree = [q_start]", "for i in range(N):", " q_rand = sample_free()", " q_near = nearest(tree, q_rand)", " q_new = steer(q_near, q_rand)", " if collision_free(q_near, q_new): tree.add(q_new)"], }, "vla-foundation": { "title": "VLA foundation model", "intuition": "把 VLM 的视觉语言语义 trunk 接上 robot action head,再用跨任务/跨 embodiment 数据微调。", "vis": "tokens", "steps": steps([("pretrain", "VLM 预训练", "web-scale image-text/video-text gives semantic priors", [0], [0]), ("robot", "机器人数据对齐", "teleop and multi-embodiment data teach actions", [1], [1, 2]), ("head", "动作头", "token/diffusion/flow head maps hidden state to robot commands", [2], [3, 4]), ("deploy", "语言控制", "instruction-conditioned policy acts in closed loop", [3], [5])]), "formulas": [(r"h=\mathrm{VLM}_\theta(o,\ell)", "语义 trunk。"), (r"a\sim p_\psi(a|h)", "动作头。"), (r"\mathcal{D}=\bigcup_e\mathcal{D}_e", "跨 embodiment 数据。")], "code": ["# VLA policy", "h = vlm(image, instruction)", "if action_head == 'token': action = decode_tokens(h)", "elif action_head == 'flow': action = flow_sample(h)", "elif action_head == 'diffusion': action = diffusion_sample(h)", "robot.execute(action)"], }, "vla-rl": { "title": "RL-finetuned VLA", "intuition": "先用大规模 BC 得到可用 VLA,再用奖励/偏好/环境反馈修正它的行为。", "vis": "actorcritic", "steps": steps([("init", "BC 初始化", "start from a capable imitation VLA", [0], [0]), ("reward", "收集反馈", "task reward, preference, success detector, or critic", [1], [1, 2]), ("update", "受约束微调", "improve reward while staying close to base policy", [2], [3, 4]), ("eval", "真实评测", "deploy cautiously with safety and rollback", [3], [5])]), "formulas": [(r"\pi_0=\mathrm{BC}(\mathcal{D}_{robot})", "先模仿。"), (r"\max_\pi \mathbb{E}R(\tau)-\beta D_{KL}(\pi||\pi_0)", "带 KL/行为约束的强化。"), (r"A(s,a)=Q(s,a)-V(s)", "用优势指导更新。")], "code": ["# RL fine-tune VLA", "pi = load_bc_vla()", "rollouts = collect(pi)", "reward = success_or_preference_model(rollouts)", "loss = rl_objective(pi, reward) + beta*kl(pi, pi_base)", "safe_update(pi, loss)"], }, "domain-randomization": { "title": "Domain randomization", "intuition": "不是让 sim 完全真实,而是把 sim 随机到足够宽,让真实世界只是训练分布中的一个样本。", "vis": "prob", "steps": steps([("range", "设随机范围", "choose physics, texture, lighting, latency ranges", [0], [0]), ("sample", "每集采样域", "randomize simulator parameters per rollout", [1], [1, 2]), ("train", "训练鲁棒策略", "policy maximizes expected return over domains", [2], [3]), ("real", "真实迁移", "deploy zero-shot or with small adaptation", [3], [4])]), "formulas": [(r"\xi\sim p(\xi)", "随机物理/视觉参数。"), (r"\max_\pi\mathbb{E}_{\xi,\tau\sim\pi,\mathrm{sim}_\xi}\sum_t r_t", "跨域期望回报。"), (r"\xi_{real}\in\mathrm{support}(p(\xi))", "希望真实域落在随机范围内。")], "code": ["# domain randomization", "for episode in train:", " xi = sample(domain_ranges)", " env.set_params(xi)", " traj = rollout(policy, env)", " update(policy, traj.reward)", "deploy(policy, real_robot)"], }, "sim2real-adapt": { "title": "Sim-to-real adaptation / RMA", "intuition": "策略在线估计真实环境的隐变量,比如摩擦、负载、地形,再据此调整动作。", "vis": "actorcritic", "steps": steps([("base", "sim 训练 base policy", "train with privileged simulator parameters", [0], [0, 1]), ("adapt", "训练 adaptation module", "infer latent environment from recent history", [1], [2, 3]), ("real", "真实在线估计", "estimate context without privileged variables", [2], [4]), ("control", "条件控制", "policy acts conditioned on inferred context", [3], [5])]), "formulas": [(r"z=\phi_\eta(o_{t-k:t},a_{t-k:t})", "从历史估计环境 latent。"), (r"a_t=\pi_\theta(o_t,z)", "策略条件化到 latent。"), (r"\min_\eta\|z-\xi_{priv}\|^2", "用 sim privileged labels 监督 adaptation。")], "code": ["# RMA-style adaptation", "xi = sim.privileged_params()", "z_target = encoder_privileged(xi)", "z = adaptation(history)", "action = policy(obs, z)", "loss = rl_loss + mse(z, z_target)", "deploy: z = adaptation(real_history)"], }, "visual-pretrain": { "title": "Visual representation pretraining", "intuition": "先从视频/图像学一个通用视觉 encoder,再把它冻结或微调用于小数据机器人策略。", "vis": "tokens", "steps": steps([("video", "收集无标签视频", "human/ego/web videos provide visual priors", [0], [0]), ("ssl", "自监督目标", "contrastive, masked, temporal, or value-aware pretraining", [1], [1, 2]), ("freeze", "接机器人头", "policy head trains on top of representation", [2], [3, 4]), ("transfer", "迁移到任务", "better sample efficiency and generalization", [3], [5])]), "formulas": [(r"\phi^*=\arg\min_\phi\mathcal{L}_{SSL}(\phi;\mathcal{D}_{video})", "视觉预训练。"), (r"a\sim\pi_\psi(a|\phi(o))", "机器人策略使用 encoder。"), (r"\mathcal{L}_{BC}(\psi)=\|a-\pi_\psi(\phi(o))\|^2", "小数据行为克隆。")], "code": ["# visual pretraining", "phi = train_ssl(video_dataset)", "for batch in robot_demos:", " feat = phi(batch.obs).detach()", " pred = policy_head(feat)", " loss = bc_loss(pred, batch.action)", "update(policy_head)"], }, "latent-action": { "title": "Latent action pretraining", "intuition": "先从无动作视频里离散化“画面怎么变”的 latent action,再用少量机器人动作把 latent 解码成电机命令。", "vis": "world", "steps": steps([("pairs", "视频帧对", "use adjacent frames without robot action labels", [0], [0]), ("infer", "推断 latent action", "VQ/inverse model compresses transition into latent code", [1], [1, 2]), ("predict", "用 latent 预测下一帧", "latent action must explain visual change", [2], [3]), ("decode", "少量标注解码", "map latent actions to real robot commands", [3], [4, 5])]), "formulas": [(r"z_t=\mathrm{VQ}(o_t,o_{t+1})", "从帧变化得到 latent action。"), (r"p_\theta(o_{t+1}|o_t,z_t)", "latent 必须能预测下一帧。"), (r"a_t=h_\psi(z_t,o_t)", "少量动作标注解码。")], "code": ["# latent action pretraining", "z = vq_inverse(frame_t, frame_tp1)", "pred_next = video_model(frame_t, z)", "loss = recon(pred_next, frame_tp1) + vq_loss(z)", "decoder = fit_action_decoder(z_labeled, robot_action)", "robot_action = decoder(z, obs)"], }, } def build_data(): landscapes = [ ("robot", "Robot Learning", gen_landscape.build_data()), ("vlm", "VLM", gen_vlm.build_data()), ("world", "World Models", gen_worldmodel.build_data()), ] labs = [] robot_base = landscapes[0][2] missing = [p["id"] for p in robot_base["paradigms"] if p["id"] not in NODE_LESSONS] if missing: raise RuntimeError("Missing node-specific Algorithm Lab lessons: %s" % ", ".join(missing)) for landscape_key, landscape_label, base in landscapes: fam = {f["key"]: f for f in base["families"]} for p0 in base["paradigms"]: p = dict(p0) raw_id = p["id"] p["id"] = raw_id if landscape_key == "robot" else f"{landscape_key}-{raw_id}" p["raw_id"] = raw_id p["landscape"] = landscape_key p["landscapeLabel"] = landscape_label if landscape_key == "robot": lesson = NODE_LESSONS[raw_id] else: lesson = make_external_lesson(p, landscape_key) labs.append(build_lab_record(p, fam, lesson)) return {"labs": labs, "sources": [{"title": t, "url": u} for t, u in SOURCE_LINKS]} def build_lab_record(p, fam, lesson): arch_key = ID_TO_ARCHETYPE.get(p["id"], "planner") if p.get("landscape") == "vlm": arch_key = "token" elif p.get("landscape") == "world": arch_key = "world" arch = ARCHETYPES[arch_key] formulas = list(arch["formulas"]) formulas = list(lesson["formulas"]) if p.get("math"): formulas.insert(0, (p["math"], "这个节点在 landscape 中的原始目标/方程。")) family_eq = fam.get(p["family"], {}).get("equation") if family_eq: formulas.insert(1, (family_eq, "所属 family 的共享目标。")) return { "id": p["id"], "rawId": p.get("raw_id", p["id"]), "landscape": p.get("landscape", "robot"), "landscapeLabel": p.get("landscapeLabel", "Robot Learning"), "name": p["name"], "short": p.get("short", p["name"]), "family": p["family"], "familyLabel": fam.get(p["family"], {}).get("label", p["family"]), "color": fam.get(p["family"], {}).get("color", "#8b5cf6"), "tagline": p.get("tagline", ""), "simple": p.get("simple", ""), "when": p.get("when", ""), "papers": p.get("papers", []), "learn": p.get("learn"), "arch": arch_key, "archTitle": lesson.get("title", arch["title"]), "intuition": lesson.get("intuition", arch["intuition"]), "steps": [ {"key": k, "t": t, "cap": c, "fx": fx, "code": code} for (k, t, c, fx, code) in lesson["steps"] ], "formulas": [{"tex": tex, "gl": gl} for tex, gl in formulas], "code": lesson["code"], "vis": lesson.get("vis", arch["vis"]), } def make_external_lesson(p, landscape_key): if landscape_key == "vlm": return make_vlm_lesson(p) return make_world_lesson(p) def make_vlm_lesson(p): name = p["name"] short = p.get("short", name) family = p.get("family", "VLM") return { "title": f"{short} visual-language mechanism", "intuition": f"{name} 的核心是把视觉信号和语言 token 对齐、桥接或融合;关键要看视觉信息如何进入语言模型,以及输出是文本、区域还是多模态 token。", "vis": "tokens" if family not in {"Grounding", "Video"} else ("planner" if family == "Grounding" else "world"), "steps": steps([ ("input", "输入图像/文本", f"use the node's input interface: {p.get('mapping','image + text')}", [0], [0, 1]), ("encode", "视觉编码/切块", "turn pixels, regions, frames, or patches into visual tokens/features", [1], [2]), ("align", "对齐或桥接", "contrastive loss, projector, Q-Former, cross-attention, or native fusion connects vision to language", [2], [3, 4]), ("reason", "语言侧推理/生成", "the language model predicts answer tokens, grounded boxes, or multimodal tokens", [3], [5]), ("output", "输出与部署", "use the result for retrieval, chat, grounding, OCR, video QA, or agent perception", [4], [6]), ]), "formulas": [ (p.get("math") or r"H_v=\mathrm{VisionEncoder}(I),\quad y\sim p_\theta(y|H_v,x)", "该 VLM 节点的核心训练/推理方程。"), (r"H_v=\phi_v(I)\quad\text{or}\quad H_v=\phi_v(f_{1:T})", "图像/视频先变成视觉 token。"), (r"H'_v=\mathrm{Bridge}(H_v)\in\mathbb{R}^{m\times d_{LLM}}", "桥接层把视觉特征变成 LLM 可读 token。"), (r"p_\theta(y_t|y_{ Robot Learning Algorithm Lab

Robot Learning Algorithm Lab

每个 landscape 节点都有同步解释器:动画、公式、代码、直觉说明一起随步骤切换。第一版覆盖全部节点,后续可以逐个节点继续精修到论文级别。

Animation

Trace

Formula

Code


      

Evidence / tutorials

""" def render() -> str: return TEMPLATE.replace("__DATA_JSON__", json.dumps(build_data(), ensure_ascii=False)) if __name__ == "__main__": with open("robot_algorithm_lab.html", "w", encoding="utf-8") as f: f.write(render()) print("Wrote robot_algorithm_lab.html")