Spaces:

WalkingOnSaturn
/

genga-kimodo

Runtime error

App Files Files Community

WalkingOnSaturn commited on May 2

Commit

9f127fb

verified ·

1 Parent(s): 9c9fbf7

Add kimodo_motion_seq endpoint for multi-prompt chained sequences

Browse files

Files changed (1) hide show

server.py +121 -0

server.py CHANGED Viewed

@@ -205,6 +205,113 @@ def kimodo_motion(
         return {"status": "error", "error": f"{type(e).__name__}: {e}"}
 def _historical_extract_soma_skin(progress: gr.Progress = gr.Progress()) -> dict:  # noqa: B008
     """One-shot dump of kimodo's somaskel77/skin_standard.npz to base64 so the
     webapp can ship a real SkinnedMesh. Already run; binaries live at
@@ -445,6 +552,20 @@ with gr.Blocks(title="Genga Kimodo") as demo:
         api_name="kimodo_motion",
     )
 if __name__ == "__main__":

         return {"status": "error", "error": f"{type(e).__name__}: {e}"}
+def kimodo_motion_seq(
+    prompts_json: str,
+    frames_json: str,
+    seed: int,
+    cfg: float,
+    num_steps: int,
+    constraints_json: str,
+    transition_frames: int = 20,
+    progress: gr.Progress = gr.Progress(),  # noqa: B008
+) -> dict:
+    """Multi-prompt sequence variant of kimodo_motion. Generates a single
+    motion that transitions through each prompt segment in order.
+    prompts_json: JSON list of strings, e.g. '["walk forward", "wave hello"]'
+    frames_json:  JSON list of ints (per-segment frame counts), same length.
+    transition_frames: how many frames the model uses to blend between segments.
+    Returns the same envelope as kimodo_motion. The total numFrames is
+    sum(frames). If a single segment is provided this is equivalent to
+    kimodo_motion.
+    """
+    try:
+        prompts = json.loads(prompts_json) if prompts_json else []
+        if not isinstance(prompts, list) or not all(isinstance(p, str) and p.strip() for p in prompts):
+            return {"status": "error", "error": "prompts_json must be a JSON list of non-empty strings"}
+        frames = json.loads(frames_json) if frames_json else []
+        if not isinstance(frames, list) or len(frames) != len(prompts) or not all(isinstance(n, int) and 1 <= n <= 300 for n in frames):
+            return {"status": "error", "error": "frames_json must be a JSON list of ints (1..300) matching prompts length"}
+        total_n = sum(frames)
+        if total_n > 600:
+            return {"status": "error", "error": f"total frames {total_n} exceeds 600 cap"}
+        try:
+            raw = json.loads(constraints_json) if constraints_json else []
+            parse_constraints(raw, total_n)
+        except (ValueError, json.JSONDecodeError) as e:
+            return {"status": "error", "error": f"constraint validation: {e}"}
+        progress(0.02, desc="Loading model...")
+        model, skeleton, device = _load_model()
+        from kimodo.constraints import load_constraints_lst
+        constraint_lst = load_constraints_lst(raw, skeleton, device=device)
+        if seed is not None and int(seed) >= 0:
+            from kimodo.tools import seed_everything
+            seed_everything(int(seed))
+        progress(0.10, desc=f"Diffusion ({len(prompts)} segments × {int(num_steps)} steps)...")
+        cfg_kwargs = {"cfg_type": "regular", "cfg_weight": float(cfg)}
+        output = model(
+            [p.strip() for p in prompts],
+            list(frames),
+            constraint_lst=constraint_lst,
+            num_denoising_steps=int(num_steps),
+            num_samples=1,
+            multi_prompt=True,
+            num_transition_frames=int(transition_frames),
+            return_numpy=True,
+            **cfg_kwargs,
+        )
+        progress(0.92, desc="Serializing...")
+        if "posed_joints" not in output or "global_rot_mats" not in output:
+            return {"status": "error", "error": f"unexpected model output keys: {list(output.keys())}"}
+        posed_joints = output["posed_joints"]
+        global_rot_mats = output["global_rot_mats"]
+        joints_pos_t = torch.from_numpy(posed_joints[0]).to(device)
+        if "local_rot_mats" in output:
+            local_rot_mats_77 = torch.from_numpy(output["local_rot_mats"][0]).to(device)
+        else:
+            from kimodo.skeleton import global_rots_to_local_rots
+            joints_rot_t = torch.from_numpy(global_rot_mats[0]).to(device)
+            local_rot_mats_77 = global_rots_to_local_rots(joints_rot_t, skeleton.somaskel77)
+        local_rot_mats_30 = skeleton.from_SOMASkeleton77(local_rot_mats_77)
+        if local_rot_mats_30.ndim == 5 and local_rot_mats_30.shape[0] == 1:
+            local_rot_mats_30 = local_rot_mats_30[0]
+        local_rot_mats = local_rot_mats_30.detach().cpu().numpy().astype(np.float32)
+        root_translation = joints_pos_t[:, 0, :].detach().cpu().numpy().astype(np.float32)
+        T, J = local_rot_mats.shape[0], local_rot_mats.shape[1]
+        # Note: the model may return slightly more or fewer frames than total_n
+        # depending on transition handling; report whatever it gave us.
+        foot_contacts_out = None
+        if "foot_contacts" in output:
+            fc = output["foot_contacts"]
+            if fc.ndim == 3:
+                fc = fc[0]
+            foot_contacts_out = np.asarray(fc, dtype=np.float32).tolist()
+        progress(1.0, desc="Done")
+        return {
+            "status": "ok",
+            "numFrames": int(T),
+            "fps": int(getattr(model, "fps", 30)),
+            "rootTranslation": root_translation.tolist(),
+            "jointRotMats": local_rot_mats.tolist(),
+            "footContacts": foot_contacts_out,
+            "summary": " → ".join(p.strip() for p in prompts),
+            "segments": [{"prompt": p.strip(), "frames": int(n)} for p, n in zip(prompts, frames)],
+        }
+    except Exception as e:
+        traceback.print_exc()
+        return {"status": "error", "error": f"{type(e).__name__}: {e}"}
 def _historical_extract_soma_skin(progress: gr.Progress = gr.Progress()) -> dict:  # noqa: B008
     """One-shot dump of kimodo's somaskel77/skin_standard.npz to base64 so the
     webapp can ship a real SkinnedMesh. Already run; binaries live at
         api_name="kimodo_motion",
     )
+    # Multi-prompt sequence endpoint — header-only inputs (no UI form widgets;
+    # the webapp posts JSON directly to /gradio_api/call/kimodo_motion_seq).
+    in_prompts_json = gr.Textbox(label="prompts_json", value='["A person walks forward","A person waves hello"]', visible=False)
+    in_frames_json = gr.Textbox(label="frames_json", value="[45,45]", visible=False)
+    in_transition = gr.Number(value=20, label="transition_frames", precision=0, visible=False)
+    out_seq = gr.JSON(label="seq result", visible=False)
+    seq_btn = gr.Button("Generate sequence", visible=False)
+    seq_btn.click(
+        fn=kimodo_motion_seq,
+        inputs=[in_prompts_json, in_frames_json, in_seed, in_cfg, in_steps, in_constraints, in_transition],
+        outputs=out_seq,
+        api_name="kimodo_motion_seq",
+    )
 if __name__ == "__main__":