from __future__ import annotations import os import random import tempfile import threading import time import traceback import json import html import base64 import hashlib import io import mimetypes import urllib.request from pathlib import Path import gradio as gr import numpy as np import torch from scipy.spatial.transform import Rotation try: import spaces # type: ignore GPU = spaces.GPU except Exception: # pragma: no cover def GPU(*dargs, **dkwargs): # noqa: N802 def wrap(fn): return fn if len(dargs) == 1 and callable(dargs[0]) and not dkwargs: return dargs[0] return wrap from kata_store import store, HFDatasetAnimationStore from quota_store import _format_remaining, bucket_for_request, quota_store # Local dev: load secrets (HF_TOKEN, NVIDIA_NIM_API_KEY, …) from a .env next to this file. # On the Space these come from the runtime env, so a missing .env / dotenv is fine. try: from dotenv import load_dotenv load_dotenv(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env")) except Exception: pass DEFAULT_GENERATION_MODEL = os.environ.get("KIMODO_MODEL", "kimodo-smplx-rp") MODEL_NAME = DEFAULT_GENERATION_MODEL # Optional remote kimodo-motion-api backend (e.g. a workstation running the real # model). When set, generation HTTP-calls this server instead of loading the model # in-process -- used by the local Gradio dev server, which can't load the model # (its transformers version differs from the Space's). The deployed Spaces leave # this unset and use the in-process @GPU model. Set via KIMODO_REMOTE_URL. KIMODO_REMOTE_URL = os.environ.get("KIMODO_REMOTE_URL", "").strip().rstrip("/") # SMPL-X 22-joint parent indices (matches the model's bone order); the remote API # returns bone_names but not parents, and we have no local model to query offline. _SMPLX22_PARENTS = [-1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 16, 17, 18, 19] GENERATION_MODELS = [ ("SMPL-X body (22 joints)", "kimodo-smplx-rp"), ("SOMA body + fingers (77 joints)", "kimodo-soma-rp"), ] DEFAULT_PROMPT = "a martial artist steps into a fighting stance and throws a straight punch" MAX_SEED = 2_147_483_647 DEFAULT_DATASET_REPO = "polats/kimodo-kata-animations" # Approx height (m) of the SMPL-X neutral mesh kimodo motion is authored at. # Used to stride-scale a display character so its root translation matches its size. SMPLX_HEIGHT = 1.7 # Alternate display characters the viewer can retarget kimodo motion onto. Each # is a skinned rig + a {kimodo SMPL-X-22 joint -> rig bone} mapping; the viewer's # rest-mode retargeter drives it from the clip's global_quats_xyzw. The GLB is # embedded into the viewer at render time (not stored in preview records). # # unirig_citizen: the s&box citizen re-rigged through UniRig (anonymous bone_N # skeleton). Mapping mirrors kimodo-motion-api web/src/rigs.js unirigCitizenMapping(). _CITIZEN_GLB_PATH = Path(__file__).parent / "assets" / "unirig_citizen.glb" UNIRIG_CITIZEN_MAPPING = { "pelvis": "bone_0", "spine1": "bone_1", "spine2": "bone_2", "spine3": "bone_3", "neck": "bone_4", "head": "bone_5", "left_hip": "bone_44", "right_hip": "bone_48", "left_knee": "bone_45", "right_knee": "bone_49", "left_ankle": "bone_46", "right_ankle": "bone_50", "left_foot": "bone_47", "right_foot": "bone_51", "left_collar": "bone_6", "right_collar": "bone_25", "left_shoulder": "bone_7", "right_shoulder": "bone_26", "left_elbow": "bone_8", "right_elbow": "bone_27", "left_wrist": "bone_9", "right_wrist": "bone_28", } def _load_glb_b64(path: Path) -> str: try: return base64.b64encode(path.read_bytes()).decode("ascii") except Exception as exc: # pragma: no cover - missing asset just disables the rig print(f"Could not load display character GLB {path}: {exc}") return "" # Loaded once at import; injected into the viewer srcdoc, never into stored records. _CHARACTER_CATALOG = [ {"id": "skeleton", "label": "Procedural skeleton"}, { "id": "citizen", "label": "s&box Citizen", "mapping": UNIRIG_CITIZEN_MAPPING, "scale": 1.0, "glb_b64": _load_glb_b64(_CITIZEN_GLB_PATH), }, ] # Citizen is the default view (falls back to the skeleton for clips with no quats). DEFAULT_CHARACTER = "citizen" # Clothing garments: skinned GLBs re-rigged onto the SAME citizen bone_N skeleton # (decompiled from s&box .vmdl_c via clothing_rig.py). A garment is "worn" by # name-matching its bones to the citizen's and copying transforms each frame, so # it deforms with the body. Too large to embed per render, so they're hosted in # the dataset store and loaded by URL (browser-cached across renders). _DATASET_REPO = os.environ.get("KIMODO_DATASET_REPO", "").strip() or DEFAULT_DATASET_REPO _VIEWER_ASSET_BASE = f"https://huggingface.co/datasets/{_DATASET_REPO}/resolve/main/viewer" # Space owner(s): the HF usernames allowed to set the featured (start-up) kata. Comma list. OWNER_USERNAMES = {u.strip().lower() for u in os.environ.get("KIMODO_OWNER_USERNAMES", "polats").split(",") if u.strip()} # Where the featured-kata pointer lives in the dataset (read at start-up to pick the kata # that plays first, and written by an owner via set_featured). _FEATURED_PATH = "viewer/featured.json" # Community kata upvotes live here: { root_id: [voter_key, ...] } (voter_key = lowercased # username, else "anon:"). Count = len(voters). Read at start-up for the Top sort. _VOTES_PATH = "viewer/votes.json" # One garment per slot can be worn; slot -> wardrobe dropdown label. CLOTHING_SLOTS = {"head": "Hat", "face": "Glasses", "torso_over": "Jacket", "legs": "Trousers"} CLOTHING_CATALOG = [ {"id": "beanie_green", "label": "Beanie Green", "slot": "head", "layer": 3, "url": f"{_VIEWER_ASSET_BASE}/clothing/beanie_green_sausage.glb"}, {"id": "nerdy_glasses", "label": "Nerdy Glasses", "slot": "face", "layer": 4, "url": f"{_VIEWER_ASSET_BASE}/clothing/nerdy_glasses_sausage.glb"}, {"id": "kimono", "label": "Kimono", "slot": "torso_over", "layer": 2, "url": f"{_VIEWER_ASSET_BASE}/clothing/kimono_sausage.glb"}, {"id": "kimono_trousers", "label": "Kimono Trousers", "slot": "legs", "layer": 1, "url": f"{_VIEWER_ASSET_BASE}/clothing/kimono_trousers_sausage.glb"}, ] # Worn on the citizen by default. (No beanie by default — the head slot is empty; # it's still in the catalog so it can be added from the wardrobe.) DEFAULT_CLOTHING = {"face": "nerdy_glasses", "torso_over": "kimono", "legs": "kimono_trousers"} _DEFAULT_SCENE_PATH = Path(__file__).parent / "assets" / "default_scene.webp" _TITLE_OVERLAY_AUDIO_PATH = Path(__file__).parent / "assets" / "karate_wiener_title_overlay.wav" # Looping theme that starts when the intro overlay's sting finishes. Embedded as a base64 data URI # (same as the intro) so it never depends on HF asset serving; kept as MP3 (not WAV) so the inline # payload stays ~1.5 MB instead of ~15 MB. _THEME_AUDIO_PATH = Path(__file__).parent / "assets" / "karate_wiener_source.mp3" DIT360_SPACE = os.environ.get("KIMODO_DIT360_SPACE", "Insta360-Research/DiT360").strip() KLEIN_SPACE = os.environ.get("KIMODO_KLEIN_SPACE", "polats/tiny-army-klein-zerogpu").strip() TRIPOSPLAT_SPACE = os.environ.get("KIMODO_TRIPOSPLAT_SPACE", "polats/kimodo-triposplat-zerogpu").strip() # tiny-aya (Cohere Tiny Aya) text model — used to suggest/improve dojo scene prompts AND # to voice Karate Wiener's chat replies (WHO AM I tab). Falls back to hosted Nemotron # (NVIDIA NIM, see NIM_KEY below) when the Space is asleep / out of quota / erroring. TINY_AYA_SPACE = os.environ.get("KIMODO_TINY_AYA_SPACE", "polats/tiny-army-tiny-aya-zerogpu").strip() # VoxCPM text-to-speech Space — clones Karate Wiener's voice from a reference wav for the # chat. A ZeroGPU "sidecar" Space reached via gradio_client (same call locally + on the # Space), mirroring tiny-army's _voxcpm_predict. Reference voice lives in the dataset. VOXCPM_SPACE = os.environ.get("KIMODO_VOXCPM_SPACE", "polats/tiny-army-voxcpm-zerogpu").strip() _WIENER_VOICE_PATH = "viewer/whoami/wiener-voice.wav" # Hosted Nemotron (NVIDIA NIM) — OpenAI-compatible chat endpoint used as a fallback for the # Tiny Aya Space when it is asleep / out of GPU quota / erroring (mirrors tiny-army, where # Nemotron-30B is too big to self-host so it runs via hosted NIM). Set NVIDIA_NIM_API_KEY # (an nvapi-… key) to enable; without it, the chat/dojo prompt just relies on Tiny Aya. NIM_KEY = os.environ.get("NVIDIA_NIM_API_KEY", "").strip() _NIM_TEXT_URL = "https://integrate.api.nvidia.com/v1/chat/completions" _NIM_NEMOTRON_MODEL = os.environ.get("KIMODO_NEMOTRON_NIM_MODEL", "nvidia/nemotron-3-nano-30b-a3b") # --- Pluggable side tabs --------------------------------------------------- # A NEW side tab is added by DROPPING FILES into ./tabs — NO app.py edit needed, # so multiple people can build tabs in parallel without touching shared code: # tabs/.tab.html -> the rail button (.kimodo-left-tab[data-drawer=…]) # tabs/.drawer.html -> the slide-in