"""Talkie Gentleman Reachy Mini demo video. Split-screen: left = MuJoCo 3D robot, right = Victorian chat overlay. 15 seconds, 720p, 24fps. Usage: GST_PLUGIN_SCANNER="" python3.13 demo_video.py """ import os, sys, math, subprocess from pathlib import Path os.environ["GST_PLUGIN_SCANNER"] = "" os.environ["GST_REGISTRY_UPDATE"] = "no" import mujoco import numpy as np from PIL import Image, ImageDraw, ImageFont # --- Config --- SCENE_XML = Path(__file__).parent.parent / "reachy_mini/src/reachy_mini/descriptions/reachy_mini/mjcf/scenes/minimal.xml" OUTPUT_MP4 = Path(__file__).parent / "demo.mp4" TOTAL_W, TOTAL_H = 1280, 720 ROBOT_W = TOTAL_W // 2 # 640 CHAT_W = TOTAL_W - ROBOT_W FPS = 24 TOTAL_DURATION = 15.0 # Actuator indices YAW, S1, S2, S3, S4, S5, S6, R_ANT, L_ANT = 0, 1, 2, 3, 4, 5, 6, 7, 8 # --- Chat content --- CHAT_EVENTS = [ # (start_time, role, text) (0.5, "header", "~ Talkie Gentleman ~"), (1.0, "user", "Good evening, what are your\nthoughts on modern inventions?"), (4.5, "bot", "Ah, a most splendid inquiry!\nThe telegraph astounds me —\nto send words across vast\ndistances in mere moments.\nTruly, we live in an age\nof marvels, dear friend."), (9.0, "user", "What music do you enjoy?"), (11.5, "bot", "Beethoven, without question.\nHis symphonies stir the very\nsoul. The Moonlight Sonata\nis a masterwork of the\nhighest order."), ] # --- Robot motion timeline --- # (start, end, gesture_name) GESTURES = [ # Idle breathing at start (0.0, 1.0, "idle"), # Listen to first question - slight attentive tilt (1.0, 2.5, "attentive_listen"), # Thinking head tilt before answering (2.5, 4.5, "thinking_tilt"), # Speaking - gentle nods while responding (4.5, 8.5, "speaking_nods"), # Brief return to neutral (8.5, 9.0, "idle"), # Listen to second question (9.0, 10.0, "attentive_listen"), # Enthusiastic nod about Beethoven (10.0, 11.5, "enthusiastic_think"), # Speaking with conviction (11.5, 14.5, "speaking_nods"), # Elegant settle (14.5, 15.0, "idle"), ] def get_robot_pose(t: float) -> dict: """Return target ctrl values for time t.""" gesture = "idle" gesture_t = 0.0 for gs, ge, gn in GESTURES: if gs <= t < ge: gesture = gn gesture_t = (t - gs) / max(0.01, ge - gs) # normalized 0-1 break ctrl = {YAW: 0, S1: 0, S2: 0, S3: 0, S4: 0, S5: 0, S6: 0, R_ANT: 0, L_ANT: 0} if gesture == "idle": # Gentle breathing - subtle vertical oscillation breath = math.sin(t * 1.8) * 0.02 ctrl[S3] = breath ctrl[R_ANT] = math.sin(t * 0.7) * 0.05 ctrl[L_ANT] = math.sin(t * 0.7 + 0.5) * 0.05 elif gesture == "attentive_listen": # Gentle head tilt to the right, antenna perk ease = math.sin(gesture_t * math.pi) # smooth in-out ctrl[S5] = math.radians(12) * ease # roll tilt ctrl[S4] = math.radians(-5) * ease # slight pitch down (attentive) ctrl[R_ANT] = 0.3 * ease ctrl[L_ANT] = 0.15 * ease elif gesture == "thinking_tilt": # Head tilts left, one antenna raises - pondering ease = min(1.0, gesture_t * 2.5) # quick settle hold = math.sin(gesture_t * math.pi * 0.8) ctrl[S5] = math.radians(-15) * ease # tilt left ctrl[S4] = math.radians(8) * ease # slight look up ctrl[YAW] = math.radians(5) * ease # slight turn ctrl[R_ANT] = -0.2 * ease ctrl[L_ANT] = 0.5 * ease # one antenna raised = thinking # Subtle micro-movement ctrl[S4] += math.sin(t * 3) * 0.01 elif gesture == "speaking_nods": # Gentle periodic nods with slight body sway nod_cycle = math.sin(gesture_t * math.pi * 5) # ~2.5 nods over the gesture sway = math.sin(gesture_t * math.pi * 2) * 0.3 ctrl[S4] = math.radians(6) * nod_cycle # pitch nod ctrl[S5] = math.radians(3) * sway # gentle roll sway ctrl[YAW] = math.radians(2) * math.sin(gesture_t * math.pi * 1.5) # Antennas follow speech rhythm ctrl[R_ANT] = 0.2 * nod_cycle ctrl[L_ANT] = 0.2 * nod_cycle # Subtle vertical ctrl[S3] = 0.01 * nod_cycle elif gesture == "enthusiastic_think": # More energetic thinking - tilt + antenna waggle ease = min(1.0, gesture_t * 3) ctrl[S5] = math.radians(10) * ease ctrl[S4] = math.radians(10) * ease ctrl[R_ANT] = 0.4 * math.sin(gesture_t * math.pi * 4) ctrl[L_ANT] = 0.4 * math.cos(gesture_t * math.pi * 4) return ctrl def render_chat_panel(t: float) -> Image.Image: """Render the Victorian chat panel for time t.""" img = Image.new("RGB", (CHAT_W, TOTAL_H), (28, 22, 18)) draw = ImageDraw.Draw(img) # Try to get a nice font, fall back to default try: font_title = ImageFont.truetype("/System/Library/Fonts/Supplemental/Times New Roman.ttf", 26) font_msg = ImageFont.truetype("/System/Library/Fonts/Supplemental/Times New Roman.ttf", 18) font_label = ImageFont.truetype("/System/Library/Fonts/Supplemental/Times New Roman.ttf", 14) except: font_title = ImageFont.load_default() font_msg = font_title font_label = font_title # Colors BG_DARK = (28, 22, 18) GOLD = (198, 166, 100) CREAM = (230, 218, 195) USER_BG = (48, 40, 32) BOT_BG = (42, 35, 28) BORDER = (100, 82, 58) DIM = (140, 120, 90) # Ornamental border draw.rectangle([0, 0, CHAT_W-1, TOTAL_H-1], outline=BORDER, width=2) draw.rectangle([4, 4, CHAT_W-5, TOTAL_H-5], outline=(60, 50, 38), width=1) # Decorative top line draw.line([(20, 55), (CHAT_W-20, 55)], fill=BORDER, width=1) # Small ornaments draw.text((CHAT_W//2 - 10, 48), "◆", fill=GOLD, font=font_label) y = 70 for evt_t, role, text in CHAT_EVENTS: if t < evt_t: break if role == "header": # Title bbox = draw.textbbox((0, 0), text, font=font_title) tw = bbox[2] - bbox[0] draw.text(((CHAT_W - tw) // 2, 18), text, fill=GOLD, font=font_title) continue # Typewriter effect for messages appearing elapsed = t - evt_t chars_visible = int(elapsed * 35) # 35 chars/sec typing speed visible_text = text[:chars_visible] if not visible_text: continue # Message bubble margin = 15 pad = 10 if role == "user": label = "You" label_color = DIM bg = USER_BG text_color = CREAM else: label = "Gentleman" label_color = GOLD bg = BOT_BG text_color = CREAM # Label draw.text((margin + 5, y), label, fill=label_color, font=font_label) y += 18 # Calculate text height bbox = draw.textbbox((0, 0), visible_text, font=font_msg) th = bbox[3] - bbox[1] tw = bbox[2] - bbox[0] # Bubble background bubble_h = th + pad * 2 + 4 draw.rounded_rectangle( [margin, y, CHAT_W - margin, y + bubble_h], radius=6, fill=bg, outline=BORDER ) # Text draw.text((margin + pad, y + pad), visible_text, fill=text_color, font=font_msg) y += bubble_h + 12 # Typing indicator for bot messages still typing if role == "bot" and chars_visible < len(text): dots = "..." [:int((t * 3) % 4)] draw.text((margin + pad, y - 5), f"✎ {dots}", fill=DIM, font=font_label) # Bottom ornament draw.line([(20, TOTAL_H - 25), (CHAT_W - 20, TOTAL_H - 25)], fill=BORDER, width=1) draw.text((CHAT_W // 2 - 30, TOTAL_H - 20), "⚙ Anno 1842", fill=DIM, font=font_label) return img def main(): print(f"Loading MuJoCo scene: {SCENE_XML}") model = mujoco.MjModel.from_xml_path(str(SCENE_XML)) model.vis.global_.offwidth = ROBOT_W model.vis.global_.offheight = TOTAL_H data = mujoco.MjData(model) renderer = mujoco.Renderer(model, TOTAL_H, ROBOT_W) cam = mujoco.MjvCamera() cam.type = mujoco.mjtCamera.mjCAMERA_FREE cam.distance = 0.48 cam.azimuth = 175 cam.elevation = -8 cam.lookat[:] = [0, 0, 0.14] stp = max(1, int(1.0 / (model.opt.timestep * FPS))) n_frames = int(TOTAL_DURATION * FPS) frames = [] print(f"Rendering {n_frames} frames ({TOTAL_DURATION:.0f}s @ {FPS}fps)...") for i in range(n_frames): t = i / FPS # Set robot pose pose = get_robot_pose(t) for k, v in pose.items(): data.ctrl[k] = v # Step physics for _ in range(stp): mujoco.mj_step(model, data) # Render robot view renderer.update_scene(data, cam) robot_rgb = renderer.render().copy() # (H, W, 3) # Render chat panel chat_img = render_chat_panel(t) chat_rgb = np.array(chat_img) # Composite split-screen composite = np.concatenate([robot_rgb, chat_rgb], axis=1) frames.append(composite) if (i + 1) % (FPS * 3) == 0: print(f" {i+1}/{n_frames} frames...") # Encode video print(f"\nEncoding {len(frames)} frames to {OUTPUT_MP4}...") proc = subprocess.Popen([ 'ffmpeg', '-y', '-f', 'rawvideo', '-vcodec', 'rawvideo', '-s', f'{TOTAL_W}x{TOTAL_H}', '-pix_fmt', 'rgb24', '-r', str(FPS), '-i', '-', '-c:v', 'libx264', '-pix_fmt', 'yuv420p', '-preset', 'fast', '-crf', '18', str(OUTPUT_MP4) ], stdin=subprocess.PIPE, stderr=subprocess.PIPE) for frame in frames: proc.stdin.write(frame.tobytes()) proc.stdin.close() _, stderr = proc.communicate() if proc.returncode == 0: size = os.path.getsize(str(OUTPUT_MP4)) print(f"✅ {OUTPUT_MP4} ({size/1024:.0f}KB, {TOTAL_DURATION:.0f}s)") else: print(f"❌ ffmpeg error: {stderr.decode()[:500]}") sys.exit(1) if __name__ == "__main__": main()