"""
π¦ Jungle Story Time β for little hands (ages 2β5)
==================================================
3 taps and GO: pick a friend, pick a place, story-or-poem. The fine-tuned model
(deployed on Modal) writes it; VoxCPM2 reads it aloud in sweet designed voices β
or in YOUR family's cloned voice.
python app_jungle.py β http://localhost:7868 (UI β Modal model)
LOCAL_MODE=1 python app_jungle.py (fully offline story gen)
"""
import base64
import html as html_lib
import os
import random
import re
import threading
import gradio as gr
MODEL_REPO = "ThePradip/minicpm5-1b-kids-storyteller-GGUF"
MODEL_FILE = "*Q4_K_M*.gguf"
STORY_ENDPOINT = os.getenv(
"STORY_ENDPOINT",
"https://pradiptivhale--kids-story-api-storyteller-narrate.modal.run")
VOICE_ENDPOINT = os.getenv(
"VOICE_ENDPOINT",
"https://pradiptivhale--kids-voice-tts-voxnarrator-speak.modal.run")
IMAGE_ENDPOINT = os.getenv(
"IMAGE_ENDPOINT",
"https://pradiptivhale--kids-image-gen-illustrator-draw.modal.run")
LOCAL_MODE = os.getenv("LOCAL_MODE") == "1"
SYSTEM_PROMPT = (
"You are a kind storyteller for children aged 2-5. Write tiny, happy, easy-to-picture "
"stories and poems with very simple words, sounds, repetition, and nothing scary."
)
FRIENDS = {
"π¦\nSimba": "Simba the little lion cub",
"π―\nTiger": "a friendly tiger cub",
"πΌ\nPanda": "a round panda",
"π»\nBhalu": "Bhalu the gentle bear",
"π¦\nParrot": "a green parrot",
"π\nElephant": "a baby elephant",
"π°\nBunny": "a fluffy bunny",
"π¦\nDuck": "a yellow duck",
}
PLACES = {
"π \nHome": "home",
"π³\nJungle": "the green jungle",
"π\nPond": "the village pond",
"π₯\nMango tree": "the mango tree",
"π\nNight sky": "under the moon and stars",
"ποΈ\nBeach": "the sandy beach",
"πΈ\nGarden": "grandma's flower garden",
"π\nFarm": "the happy farm",
"β\nSnow": "the soft white snow",
"π\nTrain": "a choo-choo train ride",
}
# poster scene per place: backdrop gradient, text ink, floating decorations
SCENES = {
"π \nHome": {"bg": "linear-gradient(160deg,#FFE0B2,#FFF3E0 55%,#FFCC80)",
"ink": "#3E2723", "card": "rgba(255,255,255,.97)",
"deco": ["π ", "π§Έ", "πΌ", "π", "πͺ", "πͺ"]},
"π³\nJungle": {"bg": "linear-gradient(160deg,#A5D6A7,#DCEDC8 55%,#66BB6A)",
"ink": "#1B5E20", "card": "rgba(255,255,255,.97)",
"deco": ["π³", "πΏ", "π¦", "πΊ", "π", "πΎ"]},
"π\nPond": {"bg": "linear-gradient(160deg,#81D4FA,#E1F5FE 55%,#4FC3F7)",
"ink": "#01579B", "card": "rgba(255,255,255,.97)",
"deco": ["π", "πͺ·", "π", "π¦", "π«§", "πΈ"]},
"π₯\nMango tree": {"bg": "linear-gradient(160deg,#FFE082,#FFF8E1 55%,#FFB74D)",
"ink": "#E65100", "card": "rgba(255,255,255,.97)",
"deco": ["π₯", "π³", "π¦", "π", "π", "π»"]},
"π\nNight sky": {"bg": "linear-gradient(160deg,#283593,#3949AB 55%,#1A237E)",
"ink": "#FFFDE7", "card": "rgba(13,17,62,.86)",
"deco": ["π", "β", "β¨", "π¦", "π ", "π«"]},
}
VOICES = {
"π Sunny": "sunny",
"π¦ Koyal": "koyal",
"π Dadu": "dadu",
"π€Ή Pip": "pip",
"πΏ Willow": "willow",
"π Atlas": "atlas",
"ποΈ My family's voice": "clone",
}
LESSONS = ["sharing is fun", "being patient", "gentle hands", "saying please and thank you",
"helping friends", "trying again", "telling the truth", "animal sounds",
"counting one to five", "colors", "big and small", "bedtime is cozy"]
_llm, _lock = None, threading.Lock()
def get_llm():
global _llm
with _lock:
if _llm is None:
from llama_cpp import Llama
_llm = Llama.from_pretrained(
repo_id=MODEL_REPO, filename=MODEL_FILE,
n_ctx=2048, n_gpu_layers=-1, n_threads=os.cpu_count(), verbose=False)
return _llm
def _post_with_progress(progress, url, payload, timeout, label, est_seconds):
"""POST in a thread while ticking a progress bar (long generations feel alive)."""
import time
import requests
result, err = {}, {}
def work():
try:
r = requests.post(url, json=payload, timeout=timeout)
r.raise_for_status()
result["r"] = r
except Exception as e:
err["e"] = e
t = threading.Thread(target=work, daemon=True)
t.start()
t0 = time.time()
while t.is_alive():
frac = min(0.05 + (time.time() - t0) / est_seconds, 0.95)
progress(frac, desc=label)
time.sleep(0.4)
if "e" in err:
raise err["e"]
progress(1.0, desc="β¨ Ready!")
return result["r"]
def tell(kid, friend, custom_friend, place, custom_place, kind, progress=gr.Progress()):
kid = (kid or "my little friend").strip()[:30]
if custom_friend and custom_friend.strip(): # any animal or bird they can dream of
c = custom_friend.strip()[:40]
characters = c if c.split()[0].lower() in ("a", "an", "the") else f"a {c}"
else:
characters = FRIENDS[friend]
where = (custom_place.strip()[:50] if (custom_place and custom_place.strip())
else PLACES[place])
lesson = random.choice(LESSONS) # surprise lesson β one less question!
form = "poem" if "Poem" in kind else "story"
prompt = (f"Tell a {form} for {kid}, age 4. It stars {characters} at {where}. "
f"It should help the child learn: {lesson}. Say {kid}'s name in the {form}. "
f"Keep it very simple for a small child, easy to picture, with sounds and a "
f"repeating line." + (" Make it rhyme." if form == "poem" else ""))
if not LOCAL_MODE and STORY_ENDPOINT:
import time
yield f"πΏ {characters.split()[-1].title()} is thinking of a {form} about β{lesson}ββ¦ β"
try:
r = _post_with_progress(
progress, STORY_ENDPOINT,
{"kid": kid, "age": 4, "characters": characters,
"place": where, "lesson": lesson, "kind": form},
timeout=180, label=f"πΏ Writing your {form}β¦", est_seconds=25)
story = _strip_meta(r.json()["story"])
shown = ""
for word in story.split(" "):
shown += word + " "
yield shown + "β"
time.sleep(0.045)
yield story
return
except Exception:
yield "π§οΈ The jungle phone is napping β telling it myselfβ¦ β"
try:
stream = get_llm().create_chat_completion(
messages=[{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": prompt}],
max_tokens=340, temperature=0.8, top_p=0.95, repeat_penalty=1.08, stream=True)
except Exception:
# Space build is a thin client (no llama-cpp); if the Modal story endpoint is
# unreachable there's no local fallback β fail softly instead of crashing.
yield "π§οΈ Our storyteller is having a little nap β please try again in a moment! π€"
return
text = ""
for chunk in stream:
delta = chunk["choices"][0].get("delta", {}).get("content")
if delta:
text += delta
yield re.sub(r"?think>", "", text).strip() + " β"
yield _strip_meta(text)
_EMOJI_RE = re.compile(
"[\U0001F000-\U0001FAFF\U00002600-\U000027BFβ¬-β―Ώβ-βΏοΈβ]")
def _clean_for_speech(text):
"""Audio gets words only: no emojis, no markdown marks, no cursor glyph."""
text = (text or "").replace("β", "")
text = _EMOJI_RE.sub("", text)
text = re.sub(r"[*#_>`~|]", "", text)
return re.sub(r"[ \t]+", " ", text).strip()
# phrases that mean a line is the prompt/instructions echoed back, NOT the story
_META_RE = re.compile(
r"help the child learn|keep it (very )?simple|easy to picture|repeating line|"
r"repeated line|make it rhyme|^\s*(sure|okay|here(?:'s| is)|of course)\b|"
r"^\s*(title|story|poem|prompt|tell a (story|poem))\s*[:\-]|it stars\b|"
r"^\s*tell a (story|poem)\b|with sounds and",
re.I)
def _strip_meta(story):
"""Drop any preamble / echoed-prompt / instruction lines a small model sometimes
emits, so only the real story or poem is shown AND read aloud."""
story = re.sub(r"?think>", "", story or "").strip()
kept = [ln for ln in story.splitlines() if ln.strip() and not _META_RE.search(ln)]
return "\n".join(kept).strip() or story # never return empty
def _friend_bits(friend, custom_friend):
if custom_friend and custom_friend.strip():
return "πΎ", custom_friend.strip()[:40].title()
emoji, name = friend.split("\n")
return emoji, name
def make_poster(story, friend, custom_friend, place, kid, kind, img_b64=None):
"""Animated kids poster: scene backdrop, bouncing friends, staggered story
lines β and the FLUX illustration once it arrives (img_b64)."""
story = (story or "").replace("β", "").strip()
if not story or story.startswith(("πΏ", "π§οΈ")):
return ""
scene = SCENES.get(place, SCENES["π³\nJungle"])
f_emoji, f_name = _friend_bits(friend, custom_friend)
p_emoji, p_name = place.split("\n")
kid = html_lib.escape((kid or "").strip()[:30]) or "Little Star"
is_poem = "Poem" in (kind or "")
title = f"{kid}'s {'Poem' if is_poem else 'Story'}"
lines = [ln.strip() for ln in story.split("\n") if ln.strip()]
if len(lines) == 1: # one block β sentence-sized lines so they animate in turn
lines = [s.strip() for s in re.split(r"(?<=[.!?])\s+", lines[0]) if s.strip()]
body = "".join(
f'
'
f"{html_lib.escape(ln)}
" for i, ln in enumerate(lines))
deco = "".join(
f'{e}'
for e, x, y, s, d in zip(
scene["deco"] * 2,
[2, 91, 5, 88, 46, 94, 3, 52, 72, 24, 14, 81],
[7, 9, 83, 76, 2, 44, 46, 91, 5, 89, 30, 92],
[2.0, 2.2, 2.0, 2.2, 1.7, 1.9, 1.8, 2.0, 1.6, 1.8, 1.5, 1.7],
[0, -1.3, -2.1, -3, -.7, -1.8, -2.6, -3.4, -1.1, -2.3, -.4, -2.9]))
picture = (f'
' if img_b64 else
'π¨ painting your pictureβ¦
')
return f"""
{deco}
{f_emoji}
{title}
starring {html_lib.escape(f_name)}
Β· at {p_emoji} {p_name}
{p_emoji}
{picture}
{body}
β¨ Jungle Story Time β¨
"""
def add_illustration(story, friend, custom_friend, place, kid, kind):
"""Ask the FLUX endpoint for a storybook picture, then rebuild the poster
with it. If the painter is asleep, keep the emoji poster (gr.update())."""
import requests
story = (story or "").replace("β", "").strip()
if not story or story.startswith(("πΏ", "π§οΈ")):
return gr.update()
_, f_name = _friend_bits(friend, custom_friend)
prompt = (f"children's picture-book illustration of {f_name} at "
f"{PLACES.get(place, 'the jungle')}, cute, soft pastel watercolor, "
f"warm light, happy, friendly, storybook style, no text")
try:
r = requests.post(IMAGE_ENDPOINT, json={"prompt": prompt}, timeout=420)
r.raise_for_status()
img_b64 = base64.b64encode(r.content).decode()
except Exception:
return gr.update()
return make_poster(story, friend, custom_friend, place, kid, kind, img_b64=img_b64)
SAVE_ENDPOINT = VOICE_ENDPOINT.replace("-speak.", "-save-voice.")
LIST_ENDPOINT = VOICE_ENDPOINT.replace("-speak.", "-list-voices.")
def save_family_voice(name, clone_audio, clone_text):
import requests
if not (name and name.strip()):
gr.Info("Give the voice a name first β Mama? Papa? Dadi? βοΈ")
return gr.update()
if not clone_audio:
gr.Info("Record or upload ~15 seconds first ποΈ")
return gr.update()
with open(clone_audio, "rb") as f:
ref = base64.b64encode(f.read()).decode()
r = requests.post(SAVE_ENDPOINT, timeout=300, json={
"name": name.strip(), "reference_b64": ref,
"prompt_text": (clone_text or "").strip()})
r.raise_for_status()
voices = r.json()["voices"]
gr.Info(f"πΎ Saved! '{name.strip()}' can read every story now.")
return gr.update(choices=["β"] + voices, value=voices[-1] if voices else "β")
def load_saved_voices():
import requests
try:
r = requests.post(LIST_ENDPOINT, json={}, timeout=120)
return gr.update(choices=["β"] + r.json()["voices"])
except Exception:
return gr.update()
def _stream_card(text):
"""Plain word-by-word card shown while the model writes (no animations, so
the per-word re-render doesn't flicker); the real poster replaces it."""
return ('"
+ html_lib.escape(text) + "
")
def tell_poster(kid, friend, custom_friend, place, kind, progress=gr.Progress()):
"""tell(), but rendered into the poster slot instead of a bare textbox."""
final = ""
for partial in tell(kid, friend, custom_friend, place, kind, progress):
final = partial
yield final, _stream_card(partial.replace("β", " βοΈ"))
def _scenes(story, max_scenes=4):
"""Split the tale into 2-4 scene chunks of whole sentences."""
sents = [s.strip() for s in re.split(r"(?<=[.!?])\s+", _clean_for_speech(story))
if s.strip()]
if not sents:
return []
n = 1 if len(sents) == 1 else min(max_scenes, max(2, (len(sents) + 2) // 3))
k, m = divmod(len(sents), n)
chunks, i = [], 0
for j in range(n):
size = k + (1 if j < m else 0)
chunks.append(" ".join(sents[i:i + size]))
i += size
return [c for c in chunks if c]
_movie_cache = {} # (voice, story-hash) β mp4 path
def make_movie(story, friend, custom_friend, place, kid, kind,
voice_label, saved_voice, clone_audio, clone_text,
progress=gr.Progress()):
"""One FLUX picture + one VoxCPM narration clip per scene β ffmpeg stitches
a Ken-Burns story movie."""
import hashlib
import subprocess
import tempfile
from concurrent.futures import ThreadPoolExecutor
import requests
story = (story or "").replace("β", "").strip()
if not story:
raise gr.Error("Make a story first! π")
voice = VOICES[voice_label]
if voice == "robot":
voice = "sunny" # movies deserve a sweet voice, not the browser robot
if saved_voice and saved_voice != "β":
voice = f"custom:{saved_voice}"
extra = {}
if voice == "clone":
if not clone_audio:
gr.Info("ποΈ Record ~15 seconds in the family-voice panel first!")
return None
with open(clone_audio, "rb") as f:
extra["reference_b64"] = base64.b64encode(f.read()).decode()
if clone_text and clone_text.strip():
extra["prompt_text"] = clone_text.strip()
cache_key = (voice, hashlib.sha1(story.encode()).hexdigest(), clone_audio or "")
if cache_key in _movie_cache and os.path.exists(_movie_cache[cache_key]):
progress(1.0, desc="π¬ Already filmed β playing!")
return _movie_cache[cache_key]
scenes = _scenes(story)
if not scenes:
raise gr.Error("Make a story first! π")
_, f_name = _friend_bits(friend, custom_friend)
where = PLACES.get(place, "the jungle")
workdir = tempfile.mkdtemp(prefix="jungle_movie_")
total = len(scenes) * 2 + 1
done = [0]
def tick(desc):
done[0] += 1
progress(done[0] / total, desc=desc)
def fetch_image(i, chunk):
prompt = (f"children's picture-book illustration of {f_name} at {where}, "
f"scene: {chunk[:180]}")
r = requests.post(IMAGE_ENDPOINT, json={"prompt": prompt}, timeout=420)
r.raise_for_status()
p = os.path.join(workdir, f"scene{i}.png")
with open(p, "wb") as f:
f.write(r.content)
tick(f"π¨ Painted picture {i + 1} of {len(scenes)}β¦")
return p
def fetch_voice(i, chunk):
r = requests.post(VOICE_ENDPOINT, json={"text": chunk[:1200],
"voice": voice, **extra}, timeout=600)
r.raise_for_status()
p = os.path.join(workdir, f"scene{i}.wav")
with open(p, "wb") as f:
f.write(r.content)
tick(f"ποΈ Recorded line {i + 1} of {len(scenes)}β¦")
return p
progress(0.02, desc=f"π¬ Filming {len(scenes)} scenesβ¦ (first time takes longer)")
with ThreadPoolExecutor(max_workers=8) as ex:
imgs = [ex.submit(fetch_image, i, c) for i, c in enumerate(scenes)]
wavs = [ex.submit(fetch_voice, i, c) for i, c in enumerate(scenes)]
imgs = [f.result() for f in imgs]
wavs = [f.result() for f in wavs]
segs = []
for i, (png, wav) in enumerate(zip(imgs, wavs)):
dur = float(subprocess.run(
["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
"-of", "csv=p=0", wav], capture_output=True, text=True).stdout.strip())
frames = int((dur + 0.4) * 25)
seg = os.path.join(workdir, f"seg{i}.mp4")
subprocess.run(
["ffmpeg", "-y", "-i", png, "-i", wav, "-filter_complex",
"[0:v]scale=2304:1536,"
f"zoompan=z='min(zoom+0.0009,1.28)':x='iw/2-(iw/zoom/2)'"
f":y='ih/2-(ih/zoom/2)':d={frames}:s=1152x768:fps=25,format=yuv420p[v]",
"-map", "[v]", "-map", "1:a", "-c:v", "libx264", "-preset", "veryfast",
"-crf", "20", "-c:a", "aac", "-b:a", "192k", "-shortest", seg],
check=True, capture_output=True)
segs.append(seg)
concat = os.path.join(workdir, "list.txt")
with open(concat, "w") as f:
f.writelines(f"file '{s}'\n" for s in segs)
out = os.path.join(workdir, "story_movie.mp4")
subprocess.run(["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat,
"-c", "copy", out], check=True, capture_output=True)
progress(1.0, desc="πΏ Your movie is ready!")
_movie_cache[cache_key] = out
return out
def read_aloud(story, voice_label, saved_voice, clone_audio, clone_text,
progress=gr.Progress()):
"""Read the whole story aloud in the chosen VoxCPM voice β one audio clip.
(No video β the Space build reads the tale aloud, it doesn't make movies.)"""
import os
import tempfile
story = _clean_for_speech(story)
if not story:
raise gr.Error("Make a story first! π")
voice = VOICES[voice_label]
if voice == "robot":
voice = "sunny" # the Robo button uses the browser; this button is the sweet voice
if saved_voice and saved_voice != "β":
voice = f"custom:{saved_voice}"
payload = {"text": story[:1500], "voice": voice}
if voice == "clone":
if not clone_audio:
gr.Info("ποΈ Record ~15 seconds in the family-voice panel first!")
return None
with open(clone_audio, "rb") as f:
payload["reference_b64"] = base64.b64encode(f.read()).decode()
if clone_text and clone_text.strip():
payload["prompt_text"] = clone_text.strip()
try:
r = _post_with_progress(progress, VOICE_ENDPOINT, payload,
timeout=600, label="ποΈ Reading your storyβ¦", est_seconds=20)
except Exception:
gr.Info("π The reading voice is napping β tap 'π Read it again', or π€ Robo.")
return None
out = os.path.join(tempfile.mkdtemp(prefix="jungle_voice_"), "story.wav")
with open(out, "wb") as f:
f.write(r.content)
return out
def draw_picture(friend, custom_friend, place, custom_place, progress=gr.Progress()):
"""Paint one storybook picture to show BESIDE the story. Generated as its own
output so the story text is never hidden while the picture is being drawn."""
import os
import tempfile
_, f_name = _friend_bits(friend, custom_friend)
where = (custom_place.strip()[:50] if (custom_place and custom_place.strip())
else PLACES.get(place, "the jungle"))
prompt = (f"children's picture-book illustration of {f_name} at {where}, cute, "
f"soft pastel watercolor, warm light, happy, friendly, storybook style, no text")
try:
r = _post_with_progress(progress, IMAGE_ENDPOINT, {"prompt": prompt},
timeout=420, label="π¨ Painting your pictureβ¦", est_seconds=25)
except Exception:
return gr.update() # painter asleep β keep the story, just no picture
out = os.path.join(tempfile.mkdtemp(prefix="jungle_img_"), "scene.png")
with open(out, "wb") as f:
f.write(r.content)
return out
CSS = """
body, .gradio-container {
background: linear-gradient(180deg, #7EC8E3 0%, #A8E6CF 55%, #7CB342 100%) !important;
font-family: 'Chalkboard SE', 'Comic Sans MS', 'Segoe Print', cursive !important;
}
#sky { position: fixed; inset: 0; pointer-events: none; z-index: 0; overflow: hidden; }
#sky span { position: absolute; animation: floaty 8s ease-in-out infinite; }
@keyframes floaty { 0%,100% { transform: translateY(0) } 50% { transform: translateY(-24px) } }
@keyframes spin { from { transform: rotate(0) } to { transform: rotate(360deg) } }
#sun { font-size: 4rem; animation: spin 24s linear infinite !important; }
#title { text-align: center; padding: 4px 0 0; position: relative; z-index: 2; }
#title h1 { font-size: 2.8rem; margin: 0; color: #FFF;
text-shadow: 3px 3px 0 #F4511E, 6px 6px 0 rgba(0,0,0,.15); }
#title p { color: #1B5E20; font-size: 1.15rem; font-weight: 700; margin: 2px 0 0; }
#namebox textarea, #namebox input {
font-size: 1.5rem !important; text-align: center !important; font-family: inherit !important;
border-radius: 999px !important; border: 4px solid #FF8F00 !important; background: #FFFDE7 !important;
color: #4E342E !important; font-weight: 700 !important;
}
#namebox label span, #friend > label > span, #place > label > span, #kind > label > span,
#voice > label > span { color: #1B5E20 !important; font-weight: 800 !important; font-size: 1.15rem !important; }
/* compact cartoon tiles for radios β small but cute */
#friend .wrap, #place .wrap { display: grid !important; gap: 7px !important; }
#friend .wrap { grid-template-columns: repeat(4, 1fr) !important; }
#place .wrap { grid-template-columns: repeat(5, 1fr) !important; }
/* tidy two-column shell β fills the width, no wasted space */
#controls { background: rgba(255,253,231,.82) !important; border-radius: 22px !important;
padding: 14px 16px !important; box-shadow: 0 8px 22px rgba(0,0,0,.12) !important; }
#stage { background: rgba(255,255,255,.45) !important; border-radius: 22px !important;
padding: 14px 16px !important; box-shadow: 0 8px 22px rgba(0,0,0,.10) !important; }
/* use the whole window β no wasted margins */
.gradio-container { max-width: 100% !important; padding: 6px 16px 14px !important; }
#storycard textarea { min-height: 320px !important; } /* fill the story box height */
#title { margin-bottom: 6px !important; }
#friend, #place, #kind, #voice, #namebox, #customfriend, #customplace { margin-bottom: 4px !important; }
/* ββ pretty, engaging audio player ββ */
#audiocard {
background: linear-gradient(135deg,#FFF3E0 0%,#FFE0B2 55%,#FFCC80 100%) !important;
border: 4px solid #FFB300 !important; border-radius: 26px !important;
padding: 12px 16px !important; box-shadow: 0 8px 22px rgba(0,0,0,.15) !important;
animation: audioGlow 2.8s ease-in-out infinite;
}
#audiocard label span, #audiocard .label-wrap span {
color: #E65100 !important; font-weight: 900 !important; font-size: 1.15rem !important; }
#audiocard audio { width: 100% !important; border-radius: 999px !important; }
/* recolor the waveform Gradio draws so it matches the jungle */
#audiocard .waveform-container, #audiocard canvas { border-radius: 16px !important; }
@keyframes audioGlow {
0%,100% { box-shadow: 0 8px 22px rgba(255,179,0,.30); }
50% { box-shadow: 0 10px 30px rgba(244,81,30,.55); }
}
#friend label, #place label {
background: #FFFDE7 !important; border: 2.5px solid #FFB300 !important; border-radius: 14px !important;
padding: 6px 2px !important; text-align: center !important; cursor: pointer !important;
font-size: 0.82rem !important; font-weight: 800 !important; color: #4E342E !important;
white-space: pre-line !important; line-height: 1.1 !important;
box-shadow: 0 3px 0 #E65100 !important; transition: transform .12s !important;
display: flex !important; flex-direction: column !important; gap: 2px !important;
align-items: center !important; justify-content: center !important; min-height: 72px !important;
}
#friend label:hover, #place label:hover { transform: translateY(-2px) scale(1.05); }
#friend label.selected, #place label.selected {
background: #FFE082 !important; border-color: #F4511E !important; transform: scale(1.06);
box-shadow: 0 3px 0 #BF360C !important; }
#friend input, #place input { display: none !important; }
#friend label span:first-letter, #place label span:first-letter { font-size: 2.2rem; }
@media (max-width: 760px) { #friend .wrap { grid-template-columns: repeat(4, 1fr) !important; } }
/* story / poem + voice pills β smaller */
#kind .wrap, #voice .wrap { display: flex !important; gap: 7px !important; flex-wrap: wrap !important; }
#kind label, #voice label {
border-radius: 999px !important; padding: 6px 13px !important; font-weight: 800 !important;
font-size: 0.85rem !important; cursor: pointer !important; border: 2.5px solid #7B1FA2 !important;
background: #F3E5F5 !important; color: #4A148C !important; box-shadow: 0 3px 0 #6A1B9A !important;
transition: transform .12s !important;
}
#kind label:hover, #voice label:hover { transform: translateY(-1px) scale(1.04); }
#kind label.selected, #voice label.selected { background: #CE93D8 !important; transform: scale(1.05); }
#kind input, #voice input { display: none !important; }
#customfriend textarea, #customfriend input {
font-size: 1.15rem !important; text-align: center !important; font-family: inherit !important;
border-radius: 999px !important; border: 3px dashed #7B1FA2 !important; background: #F3E5F5 !important;
color: #4A148C !important; font-weight: 700 !important;
}
#gobtn {
background: linear-gradient(135deg, #FF5722, #FF9800, #FFC107) !important; color: #FFF !important;
font-size: 1.7rem !important; font-weight: 900 !important; border-radius: 26px !important;
border: 5px solid #FFF3E0 !important; box-shadow: 0 8px 0 #BF360C !important;
font-family: inherit !important; letter-spacing: 1px;
}
#gobtn:active { transform: translateY(6px); box-shadow: 0 2px 0 #BF360C !important; }
#storycard textarea {
font-family: 'Chalkboard SE', 'Comic Sans MS', cursive !important;
font-size: 1.5rem !important; line-height: 2.05 !important; color: #000000 !important;
background:
radial-gradient(circle at 5% 8%, rgba(255,193,7,.18) 0 56px, transparent 57px),
radial-gradient(circle at 95% 92%, rgba(233,30,99,.10) 0 66px, transparent 67px),
#FFFFF3 !important;
border: 6px solid #8D6E63 !important; border-radius: 26px !important;
box-shadow: 0 12px 30px rgba(0,0,0,.25) !important; padding: 20px !important;
}
#storycard label span { color: #FFF !important; font-size: 1.2rem !important;
text-shadow: 2px 2px 0 #33691E; font-weight: 800 !important; }
#speakbtn { background: #FFEB3B !important; color: #4E342E !important; font-size: 1.2rem !important;
border-radius: 999px !important; font-weight: 900 !important; border: 4px solid #F57F17 !important;
box-shadow: 0 5px 0 #F57F17 !important; font-family: inherit !important; }
#savebtn { background:#C8E6C9 !important; color:#1B5E20 !important; border-radius:999px !important;
font-weight:800 !important; border:3px solid #2E7D32 !important; font-family:inherit !important; }
#robobtn { background: #E1F5FE !important; color: #01579B !important; border-radius: 999px !important;
font-weight: 800 !important; border: 3px solid #0288D1 !important; font-family: inherit !important; }
#cloneacc { background: rgba(255,253,231,.95) !important; border-radius: 18px !important;
border: 3px dashed #8D6E63 !important; }
footer { display: none !important; }
#credits { text-align: center; color: #FFFDE7; font-size: .85rem; margin-top: 4px;
text-shadow: 1px 1px 0 #33691E; position: relative; z-index: 2; }
"""
SKY = """
βοΈ
π
βοΈ
π¦
π¦
π¦
π΄
πΊ
πΎ
π
β
π
"""
ROBOT_JS = """(text) => {
if (!text) return;
speechSynthesis.cancel();
const clean = text.replace(/β/g, '')
.replace(/[\\u{1F000}-\\u{1FAFF}\\u{2600}-\\u{27BF}\\u{2B00}-\\u{2BFF}\\u{FE0F}\\u{200D}]/gu, '')
.replace(/[*#_>`~|]/g, '');
const u = new SpeechSynthesisUtterance(clean);
u.rate = 0.9; u.pitch = 1.25;
const v = speechSynthesis.getVoices().find(v => /female|child|kid|samantha|zira/i.test(v.name));
if (v) u.voice = v;
speechSynthesis.speak(u);
}"""
with gr.Blocks(title="Jungle Story Time π¦") as demo:
gr.HTML(SKY)
gr.HTML('π¦ Jungle Story Time π
'
'
Pick a friend, pick a place β and magic happens! β¨
')
with gr.Tabs():
with gr.Tab("π¦ Make a Story"):
with gr.Row(equal_height=False):
# ββ left column: all the picks βββββββββββββββββββββββββββ
with gr.Column(scale=1, min_width=340, elem_id="controls"):
kid = gr.Textbox(label="", placeholder="βοΈ What's your name?",
elem_id="namebox", max_length=30, container=False)
friend = gr.Radio(list(FRIENDS), value="π¦\nSimba",
label="πΎ Pick a friend", elem_id="friend")
custom_friend = gr.Textbox(label="", container=False, elem_id="customfriend",
max_length=40,
placeholder="ποΈ β¦or type any friend β a peacock? a baby fox?")
place = gr.Radio(list(PLACES), value="π³\nJungle",
label="πΊοΈ Pick a place", elem_id="place")
custom_place = gr.Textbox(label="", container=False, elem_id="customplace",
max_length=40,
placeholder="ποΈ β¦or type any place β a castle? the moon?")
kind = gr.Radio(["π A story!", "π΅ A poem!"], value="π A story!",
label="β¨ Story or poem?", elem_id="kind")
voice = gr.Radio(list(VOICES), value="π Sunny",
label="π Who reads it?", elem_id="voice")
with gr.Accordion("ποΈ Use my family's voice", open=False, elem_id="cloneacc"):
gr.Markdown("Clone **your own** voice or a family member's, **with permission** π")
clone_audio = gr.Audio(sources=["microphone", "upload"], type="filepath",
label="Grown-up's voice clip")
clone_text = gr.Textbox(label="What did they say? (makes the clone better)",
placeholder="Type the exact words from the recordingβ¦")
with gr.Row():
voice_name = gr.Textbox(label="Name this voice",
placeholder="Mama, Papa, Dadiβ¦", max_length=30)
save_btn = gr.Button("πΎ Save", elem_id="savebtn")
saved_voice = gr.Dropdown(["β"], value="β", label="π Saved family voices",
elem_id="savedvoices", allow_custom_value=True)
go = gr.Button("β¨ MAKE MY TALE! β¨", elem_id="gobtn")
# ββ right column: the result βββββββββββββββββββββββββββββ
with gr.Column(scale=1, min_width=420, elem_id="stage"):
with gr.Row(equal_height=True):
story = gr.Textbox(label="π Your magical tale", lines=13,
elem_id="storycard", scale=1)
picture = gr.Image(label="π¨ Your picture", elem_id="picture",
scale=1, height=360)
audio = gr.Audio(label="π§ Your story, read aloud", autoplay=True,
elem_id="audiocard", interactive=False,
buttons=["download"],
waveform_options={"waveform_color": "#FFB300",
"waveform_progress_color": "#E65100",
"show_recording_waveform": True})
gr.HTML('A tiny fine-tuned model writes π§Έ Β· VoxCPM2 voices Β· '
'MiniCPM5-1B on Modal Β· Build Small Hackathon
')
with gr.Tab("βΉοΈ How it works"):
gr.Markdown("## π¦ How Jungle Story Time works")
gr.Image("architecture.png", show_label=False, container=False)
gr.Markdown(
"**Three tiny models β small, open, mostly 4-bit:**\n\n"
"- π **Story β MiniCPM5-1B** (fine-tuned, GGUF via llama.cpp): writes a "
"gentle, personalized tale from your picks. Runs on CPU β also fully offline.\n"
"- π **Voice β VoxCPM2**: reads it aloud in a designed sweet voice, or clones "
"your family's voice from a ~15s clip (opt-in).\n"
"- π¨ **Picture β DreamShaper XL v2 Turbo** (4-bit NF4): paints a soft "
"watercolor illustration for each tale in a few turbo steps.\n\n"
"The three run as **scale-to-zero services on Modal** (β$0 idle); a session "
"cache makes repeats instant.\n\n"
"Built for the **Build Small Hackathon**. "
"[Story model](https://huggingface.co/ThePradip/minicpm5-1b-kids-storyteller) Β· "
"[Dataset](https://huggingface.co/datasets/build-small-hackathon/kids-story)"
)
# story and picture have NO dependency β run in parallel (two listeners).
# voice reads the story, so it starts the moment the story is ready (not after the image).
story_evt = go.click(tell, [kid, friend, custom_friend, place, custom_place, kind], story)
go.click(draw_picture, [friend, custom_friend, place, custom_place], picture)
story_evt.then(read_aloud, [story, voice, saved_voice, clone_audio, clone_text], audio)
save_btn.click(save_family_voice, [voice_name, clone_audio, clone_text], saved_voice)
demo.load(load_saved_voices, None, saved_voice)
if __name__ == "__main__":
if os.getenv("SPACE_ID"): # on HF Spaces: default port (7860) β health checks expect it
demo.launch(css=CSS, show_error=True)
else:
demo.launch(css=CSS, show_error=True, server_port=int(os.getenv("PORT", 7868)))