"""
🦁 Jungle Story Time — for little hands (ages 2–5)
==================================================
3 taps and GO: pick a friend, pick a place, story-or-poem. The fine-tuned model
(deployed on Modal) writes it; VoxCPM2 reads it aloud in sweet designed voices —
or in YOUR family's cloned voice.

  python app_jungle.py            → http://localhost:7868   (UI → Modal model)
  LOCAL_MODE=1 python app_jungle.py                         (fully offline story gen)
"""

import base64
import html as html_lib
import os
import random
import re
import threading

import gradio as gr

MODEL_REPO = "ThePradip/minicpm5-1b-kids-storyteller-GGUF"
MODEL_FILE = "*Q4_K_M*.gguf"

STORY_ENDPOINT = os.getenv(
    "STORY_ENDPOINT",
    "https://pradiptivhale--kids-story-api-storyteller-narrate.modal.run")
VOICE_ENDPOINT = os.getenv(
    "VOICE_ENDPOINT",
    "https://pradiptivhale--kids-voice-tts-voxnarrator-speak.modal.run")
IMAGE_ENDPOINT = os.getenv(
    "IMAGE_ENDPOINT",
    "https://pradiptivhale--kids-image-gen-illustrator-draw.modal.run")
LOCAL_MODE = os.getenv("LOCAL_MODE") == "1"

SYSTEM_PROMPT = (
    "You are a kind storyteller for children aged 2-5. Write tiny, happy, easy-to-picture "
    "stories and poems with very simple words, sounds, repetition, and nothing scary."
)

FRIENDS = {
    "🦁\nSimba": "Simba the little lion cub",
    "🐯\nTiger": "a friendly tiger cub",
    "🐼\nPanda": "a round panda",
    "🐻\nBhalu": "Bhalu the gentle bear",
    "🦜\nParrot": "a green parrot",
    "🐘\nElephant": "a baby elephant",
    "🐰\nBunny": "a fluffy bunny",
    "🦆\nDuck": "a yellow duck",
}
PLACES = {
    "🏠\nHome": "home",
    "🌳\nJungle": "the green jungle",
    "🌊\nPond": "the village pond",
    "🥭\nMango tree": "the mango tree",
    "🌙\nNight sky": "under the moon and stars",
    "🏖️\nBeach": "the sandy beach",
    "🌸\nGarden": "grandma's flower garden",
    "🚜\nFarm": "the happy farm",
    "⛄\nSnow": "the soft white snow",
    "🚂\nTrain": "a choo-choo train ride",
}
# poster scene per place: backdrop gradient, text ink, floating decorations
SCENES = {
    "🏠\nHome":       {"bg": "linear-gradient(160deg,#FFE0B2,#FFF3E0 55%,#FFCC80)",
                       "ink": "#3E2723", "card": "rgba(255,255,255,.97)",
                       "deco": ["🏠", "🧸", "🌼", "🐈", "🪁", "🍪"]},
    "🌳\nJungle":     {"bg": "linear-gradient(160deg,#A5D6A7,#DCEDC8 55%,#66BB6A)",
                       "ink": "#1B5E20", "card": "rgba(255,255,255,.97)",
                       "deco": ["🌳", "🌿", "🦋", "🌺", "🍄", "🐾"]},
    "🌊\nPond":       {"bg": "linear-gradient(160deg,#81D4FA,#E1F5FE 55%,#4FC3F7)",
                       "ink": "#01579B", "card": "rgba(255,255,255,.97)",
                       "deco": ["🌊", "🪷", "🐟", "🦆", "🫧", "🐸"]},
    "🥭\nMango tree": {"bg": "linear-gradient(160deg,#FFE082,#FFF8E1 55%,#FFB74D)",
                       "ink": "#E65100", "card": "rgba(255,255,255,.97)",
                       "deco": ["🥭", "🌳", "🐦", "🍃", "🐝", "🌻"]},
    "🌙\nNight sky":  {"bg": "linear-gradient(160deg,#283593,#3949AB 55%,#1A237E)",
                       "ink": "#FFFDE7", "card": "rgba(13,17,62,.86)",
                       "deco": ["🌙", "⭐", "✨", "🦉", "🌠", "💫"]},
}
VOICES = {
    "🌞 Sunny": "sunny",
    "🐦 Koyal": "koyal",
    "🌙 Dadu": "dadu",
    "🤹 Pip": "pip",
    "🌿 Willow": "willow",
    "🌌 Atlas": "atlas",
    "🎙️ My family's voice": "clone",
}
LESSONS = ["sharing is fun", "being patient", "gentle hands", "saying please and thank you",
           "helping friends", "trying again", "telling the truth", "animal sounds",
           "counting one to five", "colors", "big and small", "bedtime is cozy"]

_llm, _lock = None, threading.Lock()


def get_llm():
    global _llm
    with _lock:
        if _llm is None:
            from llama_cpp import Llama
            _llm = Llama.from_pretrained(
                repo_id=MODEL_REPO, filename=MODEL_FILE,
                n_ctx=2048, n_gpu_layers=-1, n_threads=os.cpu_count(), verbose=False)
        return _llm


def _post_with_progress(progress, url, payload, timeout, label, est_seconds):
    """POST in a thread while ticking a progress bar (long generations feel alive)."""
    import time

    import requests
    result, err = {}, {}

    def work():
        try:
            r = requests.post(url, json=payload, timeout=timeout)
            r.raise_for_status()
            result["r"] = r
        except Exception as e:
            err["e"] = e

    t = threading.Thread(target=work, daemon=True)
    t.start()
    t0 = time.time()
    while t.is_alive():
        frac = min(0.05 + (time.time() - t0) / est_seconds, 0.95)
        progress(frac, desc=label)
        time.sleep(0.4)
    if "e" in err:
        raise err["e"]
    progress(1.0, desc="✨ Ready!")
    return result["r"]


def tell(kid, friend, custom_friend, place, custom_place, kind, progress=gr.Progress()):
    kid = (kid or "my little friend").strip()[:30]
    if custom_friend and custom_friend.strip():        # any animal or bird they can dream of
        c = custom_friend.strip()[:40]
        characters = c if c.split()[0].lower() in ("a", "an", "the") else f"a {c}"
    else:
        characters = FRIENDS[friend]
    where = (custom_place.strip()[:50] if (custom_place and custom_place.strip())
             else PLACES[place])
    lesson = random.choice(LESSONS)            # surprise lesson — one less question!
    form = "poem" if "Poem" in kind else "story"
    prompt = (f"Tell a {form} for {kid}, age 4. It stars {characters} at {where}. "
              f"It should help the child learn: {lesson}. Say {kid}'s name in the {form}. "
              f"Keep it very simple for a small child, easy to picture, with sounds and a "
              f"repeating line." + (" Make it rhyme." if form == "poem" else ""))

    if not LOCAL_MODE and STORY_ENDPOINT:
        import time
        yield f"🌿 {characters.split()[-1].title()} is thinking of a {form} about “{lesson}”… ▌"
        try:
            r = _post_with_progress(
                progress, STORY_ENDPOINT,
                {"kid": kid, "age": 4, "characters": characters,
                 "place": where, "lesson": lesson, "kind": form},
                timeout=180, label=f"🌿 Writing your {form}…", est_seconds=25)
            story = _strip_meta(r.json()["story"])
            shown = ""
            for word in story.split(" "):
                shown += word + " "
                yield shown + "▌"
                time.sleep(0.045)
            yield story
            return
        except Exception:
            yield "🌧️ The jungle phone is napping — telling it myself… ▌"

    try:
        stream = get_llm().create_chat_completion(
            messages=[{"role": "system", "content": SYSTEM_PROMPT},
                      {"role": "user", "content": prompt}],
            max_tokens=340, temperature=0.8, top_p=0.95, repeat_penalty=1.08, stream=True)
    except Exception:
        # Space build is a thin client (no llama-cpp); if the Modal story endpoint is
        # unreachable there's no local fallback — fail softly instead of crashing.
        yield "🌧️ Our storyteller is having a little nap — please try again in a moment! 💤"
        return
    text = ""
    for chunk in stream:
        delta = chunk["choices"][0].get("delta", {}).get("content")
        if delta:
            text += delta
            yield re.sub(r"</?think>", "", text).strip() + " ▌"
    yield _strip_meta(text)


_EMOJI_RE = re.compile(
    "[\U0001F000-\U0001FAFF\U00002600-\U000027BF⬀-⯿←-⇿️‍]")


def _clean_for_speech(text):
    """Audio gets words only: no emojis, no markdown marks, no cursor glyph."""
    text = (text or "").replace("▌", "")
    text = _EMOJI_RE.sub("", text)
    text = re.sub(r"[*#_>`~|]", "", text)
    return re.sub(r"[ \t]+", " ", text).strip()


# phrases that mean a line is the prompt/instructions echoed back, NOT the story
_META_RE = re.compile(
    r"help the child learn|keep it (very )?simple|easy to picture|repeating line|"
    r"repeated line|make it rhyme|^\s*(sure|okay|here(?:'s| is)|of course)\b|"
    r"^\s*(title|story|poem|prompt|tell a (story|poem))\s*[:\-]|it stars\b|"
    r"^\s*tell a (story|poem)\b|with sounds and",
    re.I)


def _strip_meta(story):
    """Drop any preamble / echoed-prompt / instruction lines a small model sometimes
    emits, so only the real story or poem is shown AND read aloud."""
    story = re.sub(r"</?think>", "", story or "").strip()
    kept = [ln for ln in story.splitlines() if ln.strip() and not _META_RE.search(ln)]
    return "\n".join(kept).strip() or story        # never return empty


def _friend_bits(friend, custom_friend):
    if custom_friend and custom_friend.strip():
        return "🐾", custom_friend.strip()[:40].title()
    emoji, name = friend.split("\n")
    return emoji, name


def make_poster(story, friend, custom_friend, place, kid, kind, img_b64=None):
    """Animated kids poster: scene backdrop, bouncing friends, staggered story
    lines — and the FLUX illustration once it arrives (img_b64)."""
    story = (story or "").replace("▌", "").strip()
    if not story or story.startswith(("🌿", "🌧️")):
        return ""
    scene = SCENES.get(place, SCENES["🌳\nJungle"])
    f_emoji, f_name = _friend_bits(friend, custom_friend)
    p_emoji, p_name = place.split("\n")
    kid = html_lib.escape((kid or "").strip()[:30]) or "Little Star"
    is_poem = "Poem" in (kind or "")
    title = f"{kid}'s {'Poem' if is_poem else 'Story'}"

    lines = [ln.strip() for ln in story.split("\n") if ln.strip()]
    if len(lines) == 1:        # one block → sentence-sized lines so they animate in turn
        lines = [s.strip() for s in re.split(r"(?<=[.!?])\s+", lines[0]) if s.strip()]
    body = "".join(
        f'<p class="jp-line" style="animation-delay:{.3 + i * .22:.2f}s">'
        f"{html_lib.escape(ln)}</p>" for i, ln in enumerate(lines))

    deco = "".join(
        f'<span class="jp-float" style="left:{x}%;top:{y}%;font-size:{s}rem;'
        f'animation-delay:{d}s">{e}</span>'
        for e, x, y, s, d in zip(
            scene["deco"] * 2,
            [2, 91, 5, 88, 46, 94, 3, 52, 72, 24, 14, 81],
            [7, 9, 83, 76, 2, 44, 46, 91, 5, 89, 30, 92],
            [2.0, 2.2, 2.0, 2.2, 1.7, 1.9, 1.8, 2.0, 1.6, 1.8, 1.5, 1.7],
            [0, -1.3, -2.1, -3, -.7, -1.8, -2.6, -3.4, -1.1, -2.3, -.4, -2.9]))

    picture = (f'<img class="jp-art" src="data:image/png;base64,{img_b64}" '
               f'alt="story picture"/>' if img_b64 else
               '<div class="jp-art jp-art-wait">🎨 painting your picture…</div>')

    return f"""
<div class="jposter" style="background:{scene['bg']}">
  {deco}
  <div class="jp-head">
    <span class="jp-big jp-bounce">{f_emoji}</span>
    <div>
      <h2 class="jp-title">{title}</h2>
      <div class="jp-sub" style="color:{scene['ink']}">starring {html_lib.escape(f_name)}
        · at {p_emoji} {p_name}</div>
    </div>
    <span class="jp-big jp-sway">{p_emoji}</span>
  </div>
  {picture}
  <div class="jp-card" style="background:{scene['card']};color:{scene['ink']};
       text-align:{'center' if is_poem else 'left'}">{body}</div>
  <div class="jp-ribbon">✨ Jungle Story Time ✨</div>
</div>
<style>
.jposter {{ position:relative; overflow:hidden; max-width:760px; margin:0 auto;
  border-radius:30px; padding:26px 22px 16px;
  border:6px solid #FFF3E0; box-shadow:0 16px 40px rgba(0,0,0,.28);
  animation:jpPop .6s ease-out; }}
.jp-float {{ position:absolute; pointer-events:none; z-index:0; opacity:.45;
  animation:jpFloat 7s ease-in-out infinite; }}
.jp-head {{ position:relative; z-index:2; display:flex; align-items:center; justify-content:center;
  gap:18px; margin-bottom:12px; }}
.jp-big {{ font-size:4rem; line-height:1; }}
.jp-bounce {{ animation:jpBounce 2.2s ease-in-out infinite; }}
.jp-sway {{ animation:jpSway 3.4s ease-in-out infinite; display:inline-block; }}
.jp-title {{ margin:0; font-size:2.1rem; color:#FFF; text-align:center;
  text-shadow:2px 2px 0 #F4511E, 4px 4px 0 rgba(0,0,0,.18); }}
.jp-sub {{ text-align:center; font-weight:800; font-size:1.05rem; }}
.jp-art {{ position:relative; z-index:2; display:block; width:min(560px,92%);
  margin:6px auto 14px; border-radius:22px;
  border:5px solid #FFF; box-shadow:0 10px 26px rgba(0,0,0,.3);
  animation:jpArtIn 1.1s ease-out; }}
.jp-art-wait {{ text-align:center; padding:34px 10px; font-size:1.3rem; font-weight:800;
  background:rgba(255,255,255,.55); color:#5D4037; animation:jpPulse 1.6s ease-in-out infinite; }}
.jp-card {{ position:relative; z-index:2; border-radius:22px; padding:20px 24px;
  font-size:1.5rem; line-height:1.95; font-weight:800; color:{scene['ink']} !important;
  box-shadow:0 6px 18px rgba(0,0,0,.16), inset 0 0 0 3px rgba(255,255,255,.5); }}
.jp-card p, .jp-line {{ margin:.4em 0; opacity:1; color:{scene['ink']} !important;
  animation:jpLineIn .55s ease-out both; }}
.jp-ribbon {{ text-align:center; margin-top:10px; font-weight:900; color:#FFF;
  text-shadow:1px 1px 0 rgba(0,0,0,.25); }}
@keyframes jpPop    {{ from {{ transform:scale(.92); opacity:0 }} to {{ transform:scale(1); opacity:1 }} }}
@keyframes jpFloat  {{ 0%,100% {{ transform:translateY(0) }} 50% {{ transform:translateY(-20px) }} }}
@keyframes jpBounce {{ 0%,100% {{ transform:translateY(0) }} 40% {{ transform:translateY(-16px) rotate(-6deg) }} }}
@keyframes jpSway   {{ 0%,100% {{ transform:rotate(-8deg) }} 50% {{ transform:rotate(8deg) }} }}
@keyframes jpLineIn {{ from {{ transform:translateY(12px) }} to {{ transform:none }} }}
@keyframes jpArtIn  {{ from {{ opacity:0; transform:scale(.94) }} to {{ opacity:1; transform:scale(1) }} }}
@keyframes jpPulse  {{ 0%,100% {{ opacity:.65 }} 50% {{ opacity:1 }} }}
</style>"""


def add_illustration(story, friend, custom_friend, place, kid, kind):
    """Ask the FLUX endpoint for a storybook picture, then rebuild the poster
    with it. If the painter is asleep, keep the emoji poster (gr.update())."""
    import requests
    story = (story or "").replace("▌", "").strip()
    if not story or story.startswith(("🌿", "🌧️")):
        return gr.update()
    _, f_name = _friend_bits(friend, custom_friend)
    prompt = (f"children's picture-book illustration of {f_name} at "
              f"{PLACES.get(place, 'the jungle')}, cute, soft pastel watercolor, "
              f"warm light, happy, friendly, storybook style, no text")
    try:
        r = requests.post(IMAGE_ENDPOINT, json={"prompt": prompt}, timeout=420)
        r.raise_for_status()
        img_b64 = base64.b64encode(r.content).decode()
    except Exception:
        return gr.update()
    return make_poster(story, friend, custom_friend, place, kid, kind, img_b64=img_b64)


SAVE_ENDPOINT = VOICE_ENDPOINT.replace("-speak.", "-save-voice.")
LIST_ENDPOINT = VOICE_ENDPOINT.replace("-speak.", "-list-voices.")


def save_family_voice(name, clone_audio, clone_text):
    import requests
    if not (name and name.strip()):
        gr.Info("Give the voice a name first — Mama? Papa? Dadi? ✏️")
        return gr.update()
    if not clone_audio:
        gr.Info("Record or upload ~15 seconds first 🎙️")
        return gr.update()
    with open(clone_audio, "rb") as f:
        ref = base64.b64encode(f.read()).decode()
    r = requests.post(SAVE_ENDPOINT, timeout=300, json={
        "name": name.strip(), "reference_b64": ref,
        "prompt_text": (clone_text or "").strip()})
    r.raise_for_status()
    voices = r.json()["voices"]
    gr.Info(f"💾 Saved! '{name.strip()}' can read every story now.")
    return gr.update(choices=["—"] + voices, value=voices[-1] if voices else "—")


def load_saved_voices():
    import requests
    try:
        r = requests.post(LIST_ENDPOINT, json={}, timeout=120)
        return gr.update(choices=["—"] + r.json()["voices"])
    except Exception:
        return gr.update()


def _stream_card(text):
    """Plain word-by-word card shown while the model writes (no animations, so
    the per-word re-render doesn't flicker); the real poster replaces it."""
    return ('<div style="background:rgba(255,255,243,.95);border:5px solid #8D6E63;'
            'border-radius:24px;padding:18px 22px;font-size:1.4rem;line-height:1.9;'
            'font-weight:700;color:#33691E;white-space:pre-wrap;font-family:'
            "'Chalkboard SE','Comic Sans MS',cursive\">"
            + html_lib.escape(text) + "</div>")


def tell_poster(kid, friend, custom_friend, place, kind, progress=gr.Progress()):
    """tell(), but rendered into the poster slot instead of a bare textbox."""
    final = ""
    for partial in tell(kid, friend, custom_friend, place, kind, progress):
        final = partial
        yield final, _stream_card(partial.replace("▌", " ✏️"))


def _scenes(story, max_scenes=4):
    """Split the tale into 2-4 scene chunks of whole sentences."""
    sents = [s.strip() for s in re.split(r"(?<=[.!?])\s+", _clean_for_speech(story))
             if s.strip()]
    if not sents:
        return []
    n = 1 if len(sents) == 1 else min(max_scenes, max(2, (len(sents) + 2) // 3))
    k, m = divmod(len(sents), n)
    chunks, i = [], 0
    for j in range(n):
        size = k + (1 if j < m else 0)
        chunks.append(" ".join(sents[i:i + size]))
        i += size
    return [c for c in chunks if c]


_movie_cache = {}  # (voice, story-hash) → mp4 path


def make_movie(story, friend, custom_friend, place, kid, kind,
               voice_label, saved_voice, clone_audio, clone_text,
               progress=gr.Progress()):
    """One FLUX picture + one VoxCPM narration clip per scene → ffmpeg stitches
    a Ken-Burns story movie."""
    import hashlib
    import subprocess
    import tempfile
    from concurrent.futures import ThreadPoolExecutor

    import requests

    story = (story or "").replace("▌", "").strip()
    if not story:
        raise gr.Error("Make a story first! 🐛")
    voice = VOICES[voice_label]
    if voice == "robot":
        voice = "sunny"        # movies deserve a sweet voice, not the browser robot
    if saved_voice and saved_voice != "—":
        voice = f"custom:{saved_voice}"
    extra = {}
    if voice == "clone":
        if not clone_audio:
            gr.Info("🎙️ Record ~15 seconds in the family-voice panel first!")
            return None
        with open(clone_audio, "rb") as f:
            extra["reference_b64"] = base64.b64encode(f.read()).decode()
        if clone_text and clone_text.strip():
            extra["prompt_text"] = clone_text.strip()

    cache_key = (voice, hashlib.sha1(story.encode()).hexdigest(), clone_audio or "")
    if cache_key in _movie_cache and os.path.exists(_movie_cache[cache_key]):
        progress(1.0, desc="🎬 Already filmed — playing!")
        return _movie_cache[cache_key]

    scenes = _scenes(story)
    if not scenes:
        raise gr.Error("Make a story first! 🐛")
    _, f_name = _friend_bits(friend, custom_friend)
    where = PLACES.get(place, "the jungle")
    workdir = tempfile.mkdtemp(prefix="jungle_movie_")
    total = len(scenes) * 2 + 1
    done = [0]

    def tick(desc):
        done[0] += 1
        progress(done[0] / total, desc=desc)

    def fetch_image(i, chunk):
        prompt = (f"children's picture-book illustration of {f_name} at {where}, "
                  f"scene: {chunk[:180]}")
        r = requests.post(IMAGE_ENDPOINT, json={"prompt": prompt}, timeout=420)
        r.raise_for_status()
        p = os.path.join(workdir, f"scene{i}.png")
        with open(p, "wb") as f:
            f.write(r.content)
        tick(f"🎨 Painted picture {i + 1} of {len(scenes)}…")
        return p

    def fetch_voice(i, chunk):
        r = requests.post(VOICE_ENDPOINT, json={"text": chunk[:1200],
                                                "voice": voice, **extra}, timeout=600)
        r.raise_for_status()
        p = os.path.join(workdir, f"scene{i}.wav")
        with open(p, "wb") as f:
            f.write(r.content)
        tick(f"🎙️ Recorded line {i + 1} of {len(scenes)}…")
        return p

    progress(0.02, desc=f"🎬 Filming {len(scenes)} scenes… (first time takes longer)")
    with ThreadPoolExecutor(max_workers=8) as ex:
        imgs = [ex.submit(fetch_image, i, c) for i, c in enumerate(scenes)]
        wavs = [ex.submit(fetch_voice, i, c) for i, c in enumerate(scenes)]
        imgs = [f.result() for f in imgs]
        wavs = [f.result() for f in wavs]

    segs = []
    for i, (png, wav) in enumerate(zip(imgs, wavs)):
        dur = float(subprocess.run(
            ["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
             "-of", "csv=p=0", wav], capture_output=True, text=True).stdout.strip())
        frames = int((dur + 0.4) * 25)
        seg = os.path.join(workdir, f"seg{i}.mp4")
        subprocess.run(
            ["ffmpeg", "-y", "-i", png, "-i", wav, "-filter_complex",
             "[0:v]scale=2304:1536,"
             f"zoompan=z='min(zoom+0.0009,1.28)':x='iw/2-(iw/zoom/2)'"
             f":y='ih/2-(ih/zoom/2)':d={frames}:s=1152x768:fps=25,format=yuv420p[v]",
             "-map", "[v]", "-map", "1:a", "-c:v", "libx264", "-preset", "veryfast",
             "-crf", "20", "-c:a", "aac", "-b:a", "192k", "-shortest", seg],
            check=True, capture_output=True)
        segs.append(seg)
    concat = os.path.join(workdir, "list.txt")
    with open(concat, "w") as f:
        f.writelines(f"file '{s}'\n" for s in segs)
    out = os.path.join(workdir, "story_movie.mp4")
    subprocess.run(["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat,
                    "-c", "copy", out], check=True, capture_output=True)
    progress(1.0, desc="🍿 Your movie is ready!")
    _movie_cache[cache_key] = out
    return out


def read_aloud(story, voice_label, saved_voice, clone_audio, clone_text,
               progress=gr.Progress()):
    """Read the whole story aloud in the chosen VoxCPM voice → one audio clip.
    (No video — the Space build reads the tale aloud, it doesn't make movies.)"""
    import os
    import tempfile

    story = _clean_for_speech(story)
    if not story:
        raise gr.Error("Make a story first! 🐛")
    voice = VOICES[voice_label]
    if voice == "robot":
        voice = "sunny"          # the Robo button uses the browser; this button is the sweet voice
    if saved_voice and saved_voice != "—":
        voice = f"custom:{saved_voice}"
    payload = {"text": story[:1500], "voice": voice}
    if voice == "clone":
        if not clone_audio:
            gr.Info("🎙️ Record ~15 seconds in the family-voice panel first!")
            return None
        with open(clone_audio, "rb") as f:
            payload["reference_b64"] = base64.b64encode(f.read()).decode()
        if clone_text and clone_text.strip():
            payload["prompt_text"] = clone_text.strip()

    try:
        r = _post_with_progress(progress, VOICE_ENDPOINT, payload,
                                timeout=600, label="🎙️ Reading your story…", est_seconds=20)
    except Exception:
        gr.Info("🔇 The reading voice is napping — tap '🔁 Read it again', or 🤖 Robo.")
        return None
    out = os.path.join(tempfile.mkdtemp(prefix="jungle_voice_"), "story.wav")
    with open(out, "wb") as f:
        f.write(r.content)
    return out


def draw_picture(friend, custom_friend, place, custom_place, progress=gr.Progress()):
    """Paint one storybook picture to show BESIDE the story. Generated as its own
    output so the story text is never hidden while the picture is being drawn."""
    import os
    import tempfile

    _, f_name = _friend_bits(friend, custom_friend)
    where = (custom_place.strip()[:50] if (custom_place and custom_place.strip())
             else PLACES.get(place, "the jungle"))
    prompt = (f"children's picture-book illustration of {f_name} at {where}, cute, "
              f"soft pastel watercolor, warm light, happy, friendly, storybook style, no text")
    try:
        r = _post_with_progress(progress, IMAGE_ENDPOINT, {"prompt": prompt},
                                timeout=420, label="🎨 Painting your picture…", est_seconds=25)
    except Exception:
        return gr.update()        # painter asleep → keep the story, just no picture
    out = os.path.join(tempfile.mkdtemp(prefix="jungle_img_"), "scene.png")
    with open(out, "wb") as f:
        f.write(r.content)
    return out


CSS = """
body, .gradio-container {
  background: linear-gradient(180deg, #7EC8E3 0%, #A8E6CF 55%, #7CB342 100%) !important;
  font-family: 'Chalkboard SE', 'Comic Sans MS', 'Segoe Print', cursive !important;
}
#sky { position: fixed; inset: 0; pointer-events: none; z-index: 0; overflow: hidden; }
#sky span { position: absolute; animation: floaty 8s ease-in-out infinite; }
@keyframes floaty { 0%,100% { transform: translateY(0) } 50% { transform: translateY(-24px) } }
@keyframes spin { from { transform: rotate(0) } to { transform: rotate(360deg) } }
#sun { font-size: 4rem; animation: spin 24s linear infinite !important; }
#title { text-align: center; padding: 4px 0 0; position: relative; z-index: 2; }
#title h1 { font-size: 2.8rem; margin: 0; color: #FFF;
  text-shadow: 3px 3px 0 #F4511E, 6px 6px 0 rgba(0,0,0,.15); }
#title p { color: #1B5E20; font-size: 1.15rem; font-weight: 700; margin: 2px 0 0; }

#namebox textarea, #namebox input {
  font-size: 1.5rem !important; text-align: center !important; font-family: inherit !important;
  border-radius: 999px !important; border: 4px solid #FF8F00 !important; background: #FFFDE7 !important;
  color: #4E342E !important; font-weight: 700 !important;
}
#namebox label span, #friend > label > span, #place > label > span, #kind > label > span,
#voice > label > span { color: #1B5E20 !important; font-weight: 800 !important; font-size: 1.15rem !important; }

/* compact cartoon tiles for radios — small but cute */
#friend .wrap, #place .wrap { display: grid !important; gap: 7px !important; }
#friend .wrap { grid-template-columns: repeat(4, 1fr) !important; }
#place .wrap { grid-template-columns: repeat(5, 1fr) !important; }
/* tidy two-column shell — fills the width, no wasted space */
#controls { background: rgba(255,253,231,.82) !important; border-radius: 22px !important;
  padding: 14px 16px !important; box-shadow: 0 8px 22px rgba(0,0,0,.12) !important; }
#stage { background: rgba(255,255,255,.45) !important; border-radius: 22px !important;
  padding: 14px 16px !important; box-shadow: 0 8px 22px rgba(0,0,0,.10) !important; }
/* use the whole window — no wasted margins */
.gradio-container { max-width: 100% !important; padding: 6px 16px 14px !important; }
#storycard textarea { min-height: 320px !important; }   /* fill the story box height */
#title { margin-bottom: 6px !important; }
#friend, #place, #kind, #voice, #namebox, #customfriend, #customplace { margin-bottom: 4px !important; }

/* ── pretty, engaging audio player ── */
#audiocard {
  background: linear-gradient(135deg,#FFF3E0 0%,#FFE0B2 55%,#FFCC80 100%) !important;
  border: 4px solid #FFB300 !important; border-radius: 26px !important;
  padding: 12px 16px !important; box-shadow: 0 8px 22px rgba(0,0,0,.15) !important;
  animation: audioGlow 2.8s ease-in-out infinite;
}
#audiocard label span, #audiocard .label-wrap span {
  color: #E65100 !important; font-weight: 900 !important; font-size: 1.15rem !important; }
#audiocard audio { width: 100% !important; border-radius: 999px !important; }
/* recolor the waveform Gradio draws so it matches the jungle */
#audiocard .waveform-container, #audiocard canvas { border-radius: 16px !important; }
@keyframes audioGlow {
  0%,100% { box-shadow: 0 8px 22px rgba(255,179,0,.30); }
  50%     { box-shadow: 0 10px 30px rgba(244,81,30,.55); }
}
#friend label, #place label {
  background: #FFFDE7 !important; border: 2.5px solid #FFB300 !important; border-radius: 14px !important;
  padding: 6px 2px !important; text-align: center !important; cursor: pointer !important;
  font-size: 0.82rem !important; font-weight: 800 !important; color: #4E342E !important;
  white-space: pre-line !important; line-height: 1.1 !important;
  box-shadow: 0 3px 0 #E65100 !important; transition: transform .12s !important;
  display: flex !important; flex-direction: column !important; gap: 2px !important;
  align-items: center !important; justify-content: center !important; min-height: 72px !important;
}
#friend label:hover, #place label:hover { transform: translateY(-2px) scale(1.05); }
#friend label.selected, #place label.selected {
  background: #FFE082 !important; border-color: #F4511E !important; transform: scale(1.06);
  box-shadow: 0 3px 0 #BF360C !important; }
#friend input, #place input { display: none !important; }
#friend label span:first-letter, #place label span:first-letter { font-size: 2.2rem; }
@media (max-width: 760px) { #friend .wrap { grid-template-columns: repeat(4, 1fr) !important; } }

/* story / poem + voice pills — smaller */
#kind .wrap, #voice .wrap { display: flex !important; gap: 7px !important; flex-wrap: wrap !important; }
#kind label, #voice label {
  border-radius: 999px !important; padding: 6px 13px !important; font-weight: 800 !important;
  font-size: 0.85rem !important; cursor: pointer !important; border: 2.5px solid #7B1FA2 !important;
  background: #F3E5F5 !important; color: #4A148C !important; box-shadow: 0 3px 0 #6A1B9A !important;
  transition: transform .12s !important;
}
#kind label:hover, #voice label:hover { transform: translateY(-1px) scale(1.04); }
#kind label.selected, #voice label.selected { background: #CE93D8 !important; transform: scale(1.05); }
#kind input, #voice input { display: none !important; }

#customfriend textarea, #customfriend input {
  font-size: 1.15rem !important; text-align: center !important; font-family: inherit !important;
  border-radius: 999px !important; border: 3px dashed #7B1FA2 !important; background: #F3E5F5 !important;
  color: #4A148C !important; font-weight: 700 !important;
}
#gobtn {
  background: linear-gradient(135deg, #FF5722, #FF9800, #FFC107) !important; color: #FFF !important;
  font-size: 1.7rem !important; font-weight: 900 !important; border-radius: 26px !important;
  border: 5px solid #FFF3E0 !important; box-shadow: 0 8px 0 #BF360C !important;
  font-family: inherit !important; letter-spacing: 1px;
}
#gobtn:active { transform: translateY(6px); box-shadow: 0 2px 0 #BF360C !important; }

#storycard textarea {
  font-family: 'Chalkboard SE', 'Comic Sans MS', cursive !important;
  font-size: 1.5rem !important; line-height: 2.05 !important; color: #000000 !important;
  background:
    radial-gradient(circle at 5% 8%, rgba(255,193,7,.18) 0 56px, transparent 57px),
    radial-gradient(circle at 95% 92%, rgba(233,30,99,.10) 0 66px, transparent 67px),
    #FFFFF3 !important;
  border: 6px solid #8D6E63 !important; border-radius: 26px !important;
  box-shadow: 0 12px 30px rgba(0,0,0,.25) !important; padding: 20px !important;
}
#storycard label span { color: #FFF !important; font-size: 1.2rem !important;
  text-shadow: 2px 2px 0 #33691E; font-weight: 800 !important; }
#speakbtn { background: #FFEB3B !important; color: #4E342E !important; font-size: 1.2rem !important;
  border-radius: 999px !important; font-weight: 900 !important; border: 4px solid #F57F17 !important;
  box-shadow: 0 5px 0 #F57F17 !important; font-family: inherit !important; }
#savebtn { background:#C8E6C9 !important; color:#1B5E20 !important; border-radius:999px !important;
  font-weight:800 !important; border:3px solid #2E7D32 !important; font-family:inherit !important; }
#robobtn { background: #E1F5FE !important; color: #01579B !important; border-radius: 999px !important;
  font-weight: 800 !important; border: 3px solid #0288D1 !important; font-family: inherit !important; }
#cloneacc { background: rgba(255,253,231,.95) !important; border-radius: 18px !important;
  border: 3px dashed #8D6E63 !important; }
footer { display: none !important; }
#credits { text-align: center; color: #FFFDE7; font-size: .85rem; margin-top: 4px;
  text-shadow: 1px 1px 0 #33691E; position: relative; z-index: 2; }
"""

SKY = """<div id="sky">
<span id="sun" style="left:4%; top:5%">☀️</span>
<span style="left:78%; top:4%;  font-size:2.6rem; animation-delay:-2s">🌈</span>
<span style="left:30%; top:3%;  font-size:2.2rem; animation-delay:-4s">☁️</span>
<span style="left:55%; top:8%;  font-size:1.9rem; animation-delay:-1s">🐦</span>
<span style="left:90%; top:30%; font-size:2.1rem; animation-delay:-3s">🦋</span>
<span style="left:2%;  top:42%; font-size:2.2rem; animation-delay:-5s">🦜</span>
<span style="left:94%; top:64%; font-size:2.4rem; animation-delay:-2.5s">🌴</span>
<span style="left:3%;  top:78%; font-size:2.4rem; animation-delay:-6s">🌺</span>
<span style="left:60%; top:88%; font-size:2rem;   animation-delay:-1.5s">🐾</span>
<span style="left:20%; top:90%; font-size:2rem;   animation-delay:-3.5s">🍄</span>
<span style="left:84%; top:12%; font-size:1.6rem; animation-delay:-4.5s">⭐</span>
<span style="left:14%; top:16%; font-size:1.4rem; animation-delay:-2.2s">🌙</span>
</div>"""

ROBOT_JS = """(text) => {
  if (!text) return;
  speechSynthesis.cancel();
  const clean = text.replace(/▌/g, '')
    .replace(/[\\u{1F000}-\\u{1FAFF}\\u{2600}-\\u{27BF}\\u{2B00}-\\u{2BFF}\\u{FE0F}\\u{200D}]/gu, '')
    .replace(/[*#_>`~|]/g, '');
  const u = new SpeechSynthesisUtterance(clean);
  u.rate = 0.9; u.pitch = 1.25;
  const v = speechSynthesis.getVoices().find(v => /female|child|kid|samantha|zira/i.test(v.name));
  if (v) u.voice = v;
  speechSynthesis.speak(u);
}"""

with gr.Blocks(title="Jungle Story Time 🦁") as demo:
    gr.HTML(SKY)
    gr.HTML('<div id="title"><h1>🦁 Jungle Story Time 🌈</h1>'
            '<p>Pick a friend, pick a place — and magic happens! ✨</p></div>')

    with gr.Tabs():
        with gr.Tab("🦁 Make a Story"):
            with gr.Row(equal_height=False):
                # ── left column: all the picks ───────────────────────────
                with gr.Column(scale=1, min_width=340, elem_id="controls"):
                    kid = gr.Textbox(label="", placeholder="✏️ What's your name?",
                                     elem_id="namebox", max_length=30, container=False)
                    friend = gr.Radio(list(FRIENDS), value="🦁\nSimba",
                                      label="🐾 Pick a friend", elem_id="friend")
                    custom_friend = gr.Textbox(label="", container=False, elem_id="customfriend",
                                               max_length=40,
                                               placeholder="🖊️ …or type any friend — a peacock? a baby fox?")
                    place = gr.Radio(list(PLACES), value="🌳\nJungle",
                                     label="🗺️ Pick a place", elem_id="place")
                    custom_place = gr.Textbox(label="", container=False, elem_id="customplace",
                                              max_length=40,
                                              placeholder="🖊️ …or type any place — a castle? the moon?")
                    kind = gr.Radio(["📖 A story!", "🎵 A poem!"], value="📖 A story!",
                                    label="✨ Story or poem?", elem_id="kind")
                    voice = gr.Radio(list(VOICES), value="🌞 Sunny",
                                     label="🔊 Who reads it?", elem_id="voice")
                    with gr.Accordion("🎙️ Use my family's voice", open=False, elem_id="cloneacc"):
                        gr.Markdown("Clone **your own** voice or a family member's, **with permission** 💛")
                        clone_audio = gr.Audio(sources=["microphone", "upload"], type="filepath",
                                               label="Grown-up's voice clip")
                        clone_text = gr.Textbox(label="What did they say? (makes the clone better)",
                                                placeholder="Type the exact words from the recording…")
                        with gr.Row():
                            voice_name = gr.Textbox(label="Name this voice",
                                                    placeholder="Mama, Papa, Dadi…", max_length=30)
                            save_btn = gr.Button("💾 Save", elem_id="savebtn")
                        saved_voice = gr.Dropdown(["—"], value="—", label="💛 Saved family voices",
                                                  elem_id="savedvoices", allow_custom_value=True)
                    go = gr.Button("✨ MAKE MY TALE! ✨", elem_id="gobtn")
                # ── right column: the result ─────────────────────────────
                with gr.Column(scale=1, min_width=420, elem_id="stage"):
                    with gr.Row(equal_height=True):
                        story = gr.Textbox(label="📜 Your magical tale", lines=13,
                                           elem_id="storycard", scale=1)
                        picture = gr.Image(label="🎨 Your picture", elem_id="picture",
                                           scale=1, height=360)
                    audio = gr.Audio(label="🎧 Your story, read aloud", autoplay=True,
                                     elem_id="audiocard", interactive=False,
                                     buttons=["download"],
                                     waveform_options={"waveform_color": "#FFB300",
                                                       "waveform_progress_color": "#E65100",
                                                       "show_recording_waveform": True})
            gr.HTML('<div id="credits">A tiny fine-tuned model writes 🧸 · VoxCPM2 voices · '
                    'MiniCPM5-1B on Modal · Build Small Hackathon</div>')
        with gr.Tab("ℹ️ How it works"):
            gr.Markdown("## 🦁 How Jungle Story Time works")
            gr.Image("architecture.png", show_label=False, container=False)
            gr.Markdown(
                "**Three tiny models — small, open, mostly 4-bit:**\n\n"
                "- 📖 **Story — MiniCPM5-1B** (fine-tuned, GGUF via llama.cpp): writes a "
                "gentle, personalized tale from your picks. Runs on CPU — also fully offline.\n"
                "- 🔊 **Voice — VoxCPM2**: reads it aloud in a designed sweet voice, or clones "
                "your family's voice from a ~15s clip (opt-in).\n"
                "- 🎨 **Picture — DreamShaper XL v2 Turbo** (4-bit NF4): paints a soft "
                "watercolor illustration for each tale in a few turbo steps.\n\n"
                "The three run as **scale-to-zero services on Modal** (≈$0 idle); a session "
                "cache makes repeats instant.\n\n"
                "Built for the **Build Small Hackathon**. "
                "[Story model](https://huggingface.co/ThePradip/minicpm5-1b-kids-storyteller) · "
                "[Dataset](https://huggingface.co/datasets/build-small-hackathon/kids-story)"
            )

    # story and picture have NO dependency → run in parallel (two listeners).
    # voice reads the story, so it starts the moment the story is ready (not after the image).
    story_evt = go.click(tell, [kid, friend, custom_friend, place, custom_place, kind], story)
    go.click(draw_picture, [friend, custom_friend, place, custom_place], picture)
    story_evt.then(read_aloud, [story, voice, saved_voice, clone_audio, clone_text], audio)
    save_btn.click(save_family_voice, [voice_name, clone_audio, clone_text], saved_voice)
    demo.load(load_saved_voices, None, saved_voice)

if __name__ == "__main__":
    if os.getenv("SPACE_ID"):      # on HF Spaces: default port (7860) — health checks expect it
        demo.launch(css=CSS, show_error=True)
    else:
        demo.launch(css=CSS, show_error=True, server_port=int(os.getenv("PORT", 7868)))