talkie_gentleman

Running

App Files Files Community

talkie_gentleman / demo_video.py

jrubiosainz

Upload folder using huggingface_hub

e089244 verified about 1 month ago

raw

history blame contribute delete

10.1 kB

	"""Talkie Gentleman Reachy Mini demo video.

	Split-screen: left = MuJoCo 3D robot, right = Victorian chat overlay.
	15 seconds, 720p, 24fps.

	Usage: GST_PLUGIN_SCANNER="" python3.13 demo_video.py
	"""
	import os, sys, math, subprocess
	from pathlib import Path

	os.environ["GST_PLUGIN_SCANNER"] = ""
	os.environ["GST_REGISTRY_UPDATE"] = "no"

	import mujoco
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont

	# --- Config ---
	SCENE_XML = Path(__file__).parent.parent / "reachy_mini/src/reachy_mini/descriptions/reachy_mini/mjcf/scenes/minimal.xml"
	OUTPUT_MP4 = Path(__file__).parent / "demo.mp4"
	TOTAL_W, TOTAL_H = 1280, 720
	ROBOT_W = TOTAL_W // 2 # 640
	CHAT_W = TOTAL_W - ROBOT_W
	FPS = 24
	TOTAL_DURATION = 15.0

	# Actuator indices
	YAW, S1, S2, S3, S4, S5, S6, R_ANT, L_ANT = 0, 1, 2, 3, 4, 5, 6, 7, 8

	# --- Chat content ---
	CHAT_EVENTS = [
	# (start_time, role, text)
	(0.5, "header", "~ Talkie Gentleman ~"),
	(1.0, "user", "Good evening, what are your\nthoughts on modern inventions?"),
	(4.5, "bot", "Ah, a most splendid inquiry!\nThe telegraph astounds me —\nto send words across vast\ndistances in mere moments.\nTruly, we live in an age\nof marvels, dear friend."),
	(9.0, "user", "What music do you enjoy?"),
	(11.5, "bot", "Beethoven, without question.\nHis symphonies stir the very\nsoul. The Moonlight Sonata\nis a masterwork of the\nhighest order."),
	]

	# --- Robot motion timeline ---
	# (start, end, gesture_name)
	GESTURES = [
	# Idle breathing at start
	(0.0, 1.0, "idle"),
	# Listen to first question - slight attentive tilt
	(1.0, 2.5, "attentive_listen"),
	# Thinking head tilt before answering
	(2.5, 4.5, "thinking_tilt"),
	# Speaking - gentle nods while responding
	(4.5, 8.5, "speaking_nods"),
	# Brief return to neutral
	(8.5, 9.0, "idle"),
	# Listen to second question
	(9.0, 10.0, "attentive_listen"),
	# Enthusiastic nod about Beethoven
	(10.0, 11.5, "enthusiastic_think"),
	# Speaking with conviction
	(11.5, 14.5, "speaking_nods"),
	# Elegant settle
	(14.5, 15.0, "idle"),
	]


	def get_robot_pose(t: float) -> dict:
	"""Return target ctrl values for time t."""
	gesture = "idle"
	gesture_t = 0.0
	for gs, ge, gn in GESTURES:
	if gs <= t < ge:
	gesture = gn
	gesture_t = (t - gs) / max(0.01, ge - gs) # normalized 0-1
	break

	ctrl = {YAW: 0, S1: 0, S2: 0, S3: 0, S4: 0, S5: 0, S6: 0, R_ANT: 0, L_ANT: 0}

	if gesture == "idle":
	# Gentle breathing - subtle vertical oscillation
	breath = math.sin(t * 1.8) * 0.02
	ctrl[S3] = breath
	ctrl[R_ANT] = math.sin(t * 0.7) * 0.05
	ctrl[L_ANT] = math.sin(t * 0.7 + 0.5) * 0.05

	elif gesture == "attentive_listen":
	# Gentle head tilt to the right, antenna perk
	ease = math.sin(gesture_t * math.pi) # smooth in-out
	ctrl[S5] = math.radians(12) * ease # roll tilt
	ctrl[S4] = math.radians(-5) * ease # slight pitch down (attentive)
	ctrl[R_ANT] = 0.3 * ease
	ctrl[L_ANT] = 0.15 * ease

	elif gesture == "thinking_tilt":
	# Head tilts left, one antenna raises - pondering
	ease = min(1.0, gesture_t * 2.5) # quick settle
	hold = math.sin(gesture_t * math.pi * 0.8)
	ctrl[S5] = math.radians(-15) * ease # tilt left
	ctrl[S4] = math.radians(8) * ease # slight look up
	ctrl[YAW] = math.radians(5) * ease # slight turn
	ctrl[R_ANT] = -0.2 * ease
	ctrl[L_ANT] = 0.5 * ease # one antenna raised = thinking
	# Subtle micro-movement
	ctrl[S4] += math.sin(t * 3) * 0.01

	elif gesture == "speaking_nods":
	# Gentle periodic nods with slight body sway
	nod_cycle = math.sin(gesture_t * math.pi * 5) # ~2.5 nods over the gesture
	sway = math.sin(gesture_t * math.pi * 2) * 0.3
	ctrl[S4] = math.radians(6) * nod_cycle # pitch nod
	ctrl[S5] = math.radians(3) * sway # gentle roll sway
	ctrl[YAW] = math.radians(2) * math.sin(gesture_t * math.pi * 1.5)
	# Antennas follow speech rhythm
	ctrl[R_ANT] = 0.2 * nod_cycle
	ctrl[L_ANT] = 0.2 * nod_cycle
	# Subtle vertical
	ctrl[S3] = 0.01 * nod_cycle

	elif gesture == "enthusiastic_think":
	# More energetic thinking - tilt + antenna waggle
	ease = min(1.0, gesture_t * 3)
	ctrl[S5] = math.radians(10) * ease
	ctrl[S4] = math.radians(10) * ease
	ctrl[R_ANT] = 0.4 * math.sin(gesture_t * math.pi * 4)
	ctrl[L_ANT] = 0.4 * math.cos(gesture_t * math.pi * 4)

	return ctrl


	def render_chat_panel(t: float) -> Image.Image:
	"""Render the Victorian chat panel for time t."""
	img = Image.new("RGB", (CHAT_W, TOTAL_H), (28, 22, 18))
	draw = ImageDraw.Draw(img)

	# Try to get a nice font, fall back to default
	try:
	font_title = ImageFont.truetype("/System/Library/Fonts/Supplemental/Times New Roman.ttf", 26)
	font_msg = ImageFont.truetype("/System/Library/Fonts/Supplemental/Times New Roman.ttf", 18)
	font_label = ImageFont.truetype("/System/Library/Fonts/Supplemental/Times New Roman.ttf", 14)
	except:
	font_title = ImageFont.load_default()
	font_msg = font_title
	font_label = font_title

	# Colors
	BG_DARK = (28, 22, 18)
	GOLD = (198, 166, 100)
	CREAM = (230, 218, 195)
	USER_BG = (48, 40, 32)
	BOT_BG = (42, 35, 28)
	BORDER = (100, 82, 58)
	DIM = (140, 120, 90)

	# Ornamental border
	draw.rectangle([0, 0, CHAT_W-1, TOTAL_H-1], outline=BORDER, width=2)
	draw.rectangle([4, 4, CHAT_W-5, TOTAL_H-5], outline=(60, 50, 38), width=1)

	# Decorative top line
	draw.line([(20, 55), (CHAT_W-20, 55)], fill=BORDER, width=1)
	# Small ornaments
	draw.text((CHAT_W//2 - 10, 48), "◆", fill=GOLD, font=font_label)

	y = 70
	for evt_t, role, text in CHAT_EVENTS:
	if t < evt_t:
	break

	if role == "header":
	# Title
	bbox = draw.textbbox((0, 0), text, font=font_title)
	tw = bbox[2] - bbox[0]
	draw.text(((CHAT_W - tw) // 2, 18), text, fill=GOLD, font=font_title)
	continue

	# Typewriter effect for messages appearing
	elapsed = t - evt_t
	chars_visible = int(elapsed * 35) # 35 chars/sec typing speed
	visible_text = text[:chars_visible]
	if not visible_text:
	continue

	# Message bubble
	margin = 15
	pad = 10

	if role == "user":
	label = "You"
	label_color = DIM
	bg = USER_BG
	text_color = CREAM
	else:
	label = "Gentleman"
	label_color = GOLD
	bg = BOT_BG
	text_color = CREAM

	# Label
	draw.text((margin + 5, y), label, fill=label_color, font=font_label)
	y += 18

	# Calculate text height
	bbox = draw.textbbox((0, 0), visible_text, font=font_msg)
	th = bbox[3] - bbox[1]
	tw = bbox[2] - bbox[0]

	# Bubble background
	bubble_h = th + pad * 2 + 4
	draw.rounded_rectangle(
	[margin, y, CHAT_W - margin, y + bubble_h],
	radius=6, fill=bg, outline=BORDER
	)

	# Text
	draw.text((margin + pad, y + pad), visible_text, fill=text_color, font=font_msg)

	y += bubble_h + 12

	# Typing indicator for bot messages still typing
	if role == "bot" and chars_visible < len(text):
	dots = "..." [:int((t * 3) % 4)]
	draw.text((margin + pad, y - 5), f"✎ {dots}", fill=DIM, font=font_label)

	# Bottom ornament
	draw.line([(20, TOTAL_H - 25), (CHAT_W - 20, TOTAL_H - 25)], fill=BORDER, width=1)
	draw.text((CHAT_W // 2 - 30, TOTAL_H - 20), "⚙ Anno 1842", fill=DIM, font=font_label)

	return img


	def main():
	print(f"Loading MuJoCo scene: {SCENE_XML}")
	model = mujoco.MjModel.from_xml_path(str(SCENE_XML))
	model.vis.global_.offwidth = ROBOT_W
	model.vis.global_.offheight = TOTAL_H
	data = mujoco.MjData(model)
	renderer = mujoco.Renderer(model, TOTAL_H, ROBOT_W)

	cam = mujoco.MjvCamera()
	cam.type = mujoco.mjtCamera.mjCAMERA_FREE
	cam.distance = 0.48
	cam.azimuth = 175
	cam.elevation = -8
	cam.lookat[:] = [0, 0, 0.14]

	stp = max(1, int(1.0 / (model.opt.timestep * FPS)))
	n_frames = int(TOTAL_DURATION * FPS)

	frames = []
	print(f"Rendering {n_frames} frames ({TOTAL_DURATION:.0f}s @ {FPS}fps)...")

	for i in range(n_frames):
	t = i / FPS

	# Set robot pose
	pose = get_robot_pose(t)
	for k, v in pose.items():
	data.ctrl[k] = v

	# Step physics
	for _ in range(stp):
	mujoco.mj_step(model, data)

	# Render robot view
	renderer.update_scene(data, cam)
	robot_rgb = renderer.render().copy() # (H, W, 3)

	# Render chat panel
	chat_img = render_chat_panel(t)
	chat_rgb = np.array(chat_img)

	# Composite split-screen
	composite = np.concatenate([robot_rgb, chat_rgb], axis=1)
	frames.append(composite)

	if (i + 1) % (FPS * 3) == 0:
	print(f" {i+1}/{n_frames} frames...")

	# Encode video
	print(f"\nEncoding {len(frames)} frames to {OUTPUT_MP4}...")
	proc = subprocess.Popen([
	'ffmpeg', '-y', '-f', 'rawvideo', '-vcodec', 'rawvideo',
	'-s', f'{TOTAL_W}x{TOTAL_H}', '-pix_fmt', 'rgb24', '-r', str(FPS),
	'-i', '-', '-c:v', 'libx264', '-pix_fmt', 'yuv420p',
	'-preset', 'fast', '-crf', '18', str(OUTPUT_MP4)
	], stdin=subprocess.PIPE, stderr=subprocess.PIPE)

	for frame in frames:
	proc.stdin.write(frame.tobytes())
	proc.stdin.close()
	_, stderr = proc.communicate()

	if proc.returncode == 0:
	size = os.path.getsize(str(OUTPUT_MP4))
	print(f"✅ {OUTPUT_MP4} ({size/1024:.0f}KB, {TOTAL_DURATION:.0f}s)")
	else:
	print(f"❌ ffmpeg error: {stderr.decode()[:500]}")
	sys.exit(1)


	if __name__ == "__main__":
	main()