"""ACE Music Studio — Gradio entrypoint. UI ARCHITECTURE (locked — read this before editing): The five "modes" (Generate / Cover / Extend / Edit / Lyrics) are NOT implemented via ``gr.Tabs``. The wireframes at ``docs/superpowers/specs/mockups/`` show a LEFT sidebar with mode pills + a session History section, and a single content column on the right. The implementation pattern is: gr.Row(elem_classes=["ams-body"]) ├── gr.Column(min_width=190, elem_classes=["ams-sidebar"]) │ ├── gr.Radio(label=None, elem_classes=["ams-side-radio"]) ← 5 mode choices │ └── gr.HTML(... "History · session" ...) └── gr.Column(elem_classes=["ams-content"]) ├── gr.Group(visible=True) ← pane_generate ├── gr.Group(visible=False) ← pane_cover ├── gr.Group(visible=False) ← pane_extend ├── gr.Group(visible=False) ← pane_edit └── gr.Group(visible=False) ← pane_lyrics The Radio's ``change`` event fires ``_switch_pane(mode)`` which returns visibility updates for the five Groups. The Radio's native ``:checked`` state gives us the sidebar "active item" highlight for free via CSS (see ``theme.CSS`` for ``.ams-side-radio`` selectors). DO NOT switch this back to ``gr.Tabs`` — that produces top-positioned horizontal tabs which contradicts the wireframes. On HF Spaces, ``_bootstrap()`` runs once on import to mirror the read-only preload cache into a writable tree. On Mac/Linux locally, it's a no-op until M7. """ from __future__ import annotations import os # Set MPS fallback BEFORE any torch import path is taken. os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1") # Don't pin HF download source — let HF default for both Spaces and local cache. os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1") import hashlib import random from pathlib import Path import gradio as gr import ace_pipeline import backend as be import lora_stack import modes import post_process import theme import ui _BACKEND: be.ACEStepStudioBackend | None = None def get_backend() -> be.ACEStepStudioBackend: global _BACKEND if _BACKEND is None: _BACKEND = be.ACEStepStudioBackend() return _BACKEND def _sha256(path: str) -> str: """Stream a file through SHA-256 in 64 KB chunks. Used to fingerprint the active LoRA so the generation metadata includes a provenance hash (useful when the user uploads variants of the same psytrance fine-tune with subtly different weights). """ h = hashlib.sha256() with open(path, "rb") as f: for chunk in iter(lambda: f.read(65536), b""): h.update(chunk) return h.hexdigest() def _active_md(name: str, scale: float, kind: str) -> str: """Format the 'Active: …' line shown under the strength slider.""" return f"**Active:** `{name}` · scale `{scale:.2f}` · {kind}" def on_lora_preset_change(preset_name: str, strength: float): """User picked a preset (or 'None'). Downloads + validates + sets state. Returns (state, active_markdown, upload_clear_value) — the third value clears any custom-upload widget so the two inputs stay mutually exclusive. """ if preset_name == "None" or not preset_name: return None, "_No LoRA active_", None try: local_path = lora_stack.download_preset(preset_name) except lora_stack.LoRAValidationError as e: raise gr.Error(str(e)) from e info = lora_stack.sniff(local_path) if not info.compatible: raise gr.Error( f"Preset {preset_name!r} is not compatible with ACE-Step 1.5 XL SFT: {info.diagnostic}" ) state = { "name": preset_name, "scale": float(strength), "path": str(local_path), "sha256": _sha256(str(local_path)), } return state, _active_md(preset_name, float(strength), "preset"), None def on_lora_upload(file_obj, strength: float): """User dropped a custom .safetensors. Replaces any active preset. Returns (state, active_markdown, preset_reset_value) — the third value resets the preset radio to 'None' so the two inputs stay mutually exclusive. """ if file_obj is None: return None, "_No LoRA active_", "None" path_str = file_obj.name if hasattr(file_obj, "name") else str(file_obj) try: info = lora_stack.sniff(path_str) except lora_stack.LoRAValidationError as e: raise gr.Error(str(e)) from e if not info.compatible: raise gr.Error(f"Uploaded LoRA isn't compatible with ACE-Step 1.5 XL SFT: {info.diagnostic}") name = Path(path_str).stem state = { "name": name, "scale": float(strength), "path": path_str, "sha256": _sha256(path_str), } return state, _active_md(name, float(strength), "custom"), "None" def on_lora_strength_change(state, strength: float): """User dragged the strength slider. Update scale on the active LoRA. No-op if no LoRA is active. """ if not state: return state, "_No LoRA active_" new_state = {**state, "scale": float(strength)} # Preserve the "preset" vs "custom" tag — presets resolve to a path # under the HF cache (~/.cache/huggingface/hub/…), uploads land # under /tmp/gradio/… or the user's pwd. Use the same heuristic # the upload/preset handlers used: a path inside the HF cache or # snapshot tree counts as preset, otherwise custom. path = str(new_state.get("path", "")) kind = "preset" if (".cache/huggingface" in path or "snapshots" in path) else "custom" return new_state, _active_md(new_state["name"], float(strength), kind) def on_generate_click( prompt: str, lyrics: str, duration_s: float, instrumental_label: str, lora_state, progress=gr.Progress(track_tqdm=True), # noqa: B008 ): loras = [lora_state] if lora_state else [] try: out_path, meta = modes.generate( get_backend(), params={ "prompt": prompt, "lyrics": lyrics, "duration_s": int(duration_s), "instrumental": instrumental_label == "Instrumental", "seed": random.randint(1, 2_147_483_647), "loras": loras, "advanced": {}, "lm": {}, "dcw": {}, }, ) except ValueError as e: raise gr.Error(str(e)) from e return out_path, meta def on_cover_click( ref_audio, prompt: str, lyrics: str, duration_s: float, audio_cover_strength: float, lora_state, progress=gr.Progress(track_tqdm=True), # noqa: B008 ): """Cover-mode click. ref_audio is a filepath from gr.Audio(type='filepath').""" loras = [lora_state] if lora_state else [] try: return modes.cover( get_backend(), params={ "ref_audio": ref_audio, "prompt": prompt, "lyrics": lyrics, "duration_s": int(duration_s), "audio_cover_strength": float(audio_cover_strength), "seed": random.randint(1, 2_147_483_647), "loras": loras, "advanced": {}, "lm": {}, "dcw": {}, }, ) except ValueError as e: raise gr.Error(str(e)) from e def on_extend_click( seed_audio, extra_prompt: str, extension_lyrics: str, extra_duration_s: float, wav_crossfade_s: float, repaint_mode: str, repaint_strength: float, latent_crossfade_frames: float, chunk_mask_mode: str, lora_state, progress=gr.Progress(track_tqdm=True), # noqa: B008 ): """Extend-mode click. seed_audio is a filepath from gr.Audio(type='filepath').""" loras = [lora_state] if lora_state else [] try: return modes.extend( get_backend(), params={ "seed_audio": seed_audio, "extra_prompt": extra_prompt, "extension_lyrics": extension_lyrics, "extra_duration_s": int(extra_duration_s), "wav_crossfade_s": float(wav_crossfade_s), "repaint_mode": repaint_mode, "repaint_strength": float(repaint_strength), "latent_crossfade_frames": int(latent_crossfade_frames), "chunk_mask_mode": chunk_mask_mode, "seed": random.randint(1, 2_147_483_647), "loras": loras, "advanced": {}, "lm": {}, "dcw": {}, }, ) except ValueError as e: raise gr.Error(str(e)) from e def on_draft_lyrics( brief: str, structure: str, language: str, tone: str, verse_lines: float, chorus_lines: float, bridge_lines: float, rhyme: str, temperature: float, top_p: float, top_k: float, max_new_tokens: float, seed, progress=gr.Progress(track_tqdm=True), # noqa: B008 ): """Lyrics-mode click. Calls ``modes.lyrics(...)`` directly — no ACE-Step pipeline is touched. Qwen 2.5 7B is its own lazy singleton inside ``lyrics_lm``; the first click triggers a ~4 GB MLX download (cached afterwards) and ~30 s warm-up before the draft appears. """ try: return modes.lyrics( get_backend(), params={ "brief": brief, "structure": structure, "language": language, "tone": tone, "verse_lines": int(verse_lines), "chorus_lines": int(chorus_lines), "bridge_lines": int(bridge_lines), "rhyme": rhyme, "temperature": float(temperature), "top_p": float(top_p), "top_k": int(top_k), "max_new_tokens": int(max_new_tokens), "seed": int(seed) if seed is not None else None, }, ) except ValueError as e: raise gr.Error(str(e)) from e def on_separate_stems(audio_path): """Run Demucs on the current Output audio and surface 4 stem files.""" if not audio_path: raise gr.Error("Generate a song first.") try: stems = post_process.separate_stems(audio_path) except Exception as e: raise gr.Error(f"Demucs failed: {e}") from e return gr.Files(value=list(stems.values()), visible=True) def on_normalise(audio_path): """Run pyloudnorm at -14 LUFS and surface the normalised WAV.""" if not audio_path: raise gr.Error("Generate a song first.") try: out = post_process.normalise_lufs(audio_path, target_lufs=-14.0) except Exception as e: raise gr.Error(f"Normalisation failed: {e}") from e return gr.Audio(value=str(out), visible=True) def on_export_mp3(audio_path): """Encode the current Output to MP3 320 k via ffmpeg and surface the file.""" if not audio_path: raise gr.Error("Generate a song first.") try: out = post_process.to_mp3(audio_path, bitrate_kbps=320) except Exception as e: raise gr.Error(f"MP3 export failed: {e}") from e return gr.File(value=str(out), visible=True) def on_edit_click( source_audio, sub_mode: str, source_lyrics: str, target_lyrics: str, segment_start_s: float, segment_end_s: float, repaint_strength: float, repaint_mode: str, flow_source_caption: str, flow_n_min: float, flow_n_max: float, flow_n_avg: float, lora_state, progress=gr.Progress(track_tqdm=True), # noqa: B008 ): """Edit-mode click. source_audio is a filepath from gr.Audio(type='filepath').""" loras = [lora_state] if lora_state else [] try: return modes.edit( get_backend(), params={ "source_audio": source_audio, "sub_mode": sub_mode, "source_lyrics": source_lyrics, "target_lyrics": target_lyrics, "segment_start_s": float(segment_start_s), "segment_end_s": float(segment_end_s), "repaint_strength": float(repaint_strength), "repaint_mode": repaint_mode, "flow_source_caption": flow_source_caption, "flow_n_min": float(flow_n_min), "flow_n_max": float(flow_n_max), "flow_n_avg": int(flow_n_avg), "seed": random.randint(1, 2_147_483_647), "loras": loras, "advanced": {}, "lm": {}, "dcw": {}, }, ) except ValueError as e: raise gr.Error(str(e)) from e HEADER_HTML = """