Spaces:

codexxx
/

voxtral-studio

Running

mehdilaalali commited on Apr 12

Commit

e5362b5

verified ·

1 Parent(s): 195eb72

fix(core): migrate extraction to PCM-WAV to bypass libmp3lame missing codec and enforce 25s limit on cloning endpoints

Files changed (1) hide show

core.py CHANGED Viewed

@@ -16,9 +16,9 @@ def get_client():
 # ─── Utility ──────────────────────────────────────────────────────────────────
 def trim_audio_if_needed(audio_path, max_seconds=25):
     """Trims audio to max_seconds using ffmpeg."""
-    out_path = tempfile.mktemp(suffix=".mp3")
     try:
-        subprocess.run(["ffmpeg", "-y", "-i", audio_path, "-t", str(max_seconds), "-acodec", "libmp3lame", out_path], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
         return out_path
     except Exception as e:
         print(f"Warning: Failed to trim audio, returning original: {e}")
@@ -150,12 +150,14 @@ def clone_voice(audio_path, url_input, voice_name, gender, languages_str):
                     'preferredquality': '128',
                 }],
                 'postprocessor_args': [
-                    '-t', '60' # Limit to first 60 seconds
                 ],
             }
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                 info = ydl.extract_info(url, download=True)
                 final_audio_path = base_out + '.mp3'
     client = get_client()
     sample_b64 = base64.b64encode(Path(final_audio_path).read_bytes()).decode()

 # ─── Utility ──────────────────────────────────────────────────────────────────
 def trim_audio_if_needed(audio_path, max_seconds=25):
     """Trims audio to max_seconds using ffmpeg."""
+    out_path = tempfile.mktemp(suffix=".wav")
     try:
+        subprocess.run(["ffmpeg", "-y", "-i", audio_path, "-t", str(max_seconds), "-c:a", "pcm_s16le", out_path], check=True)
         return out_path
     except Exception as e:
         print(f"Warning: Failed to trim audio, returning original: {e}")
                     'preferredquality': '128',
                 }],
                 'postprocessor_args': [
+                    '-t', '25' # Hard Limit to 25 seconds to bypass API 30s limit
                 ],
             }
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                 info = ydl.extract_info(url, download=True)
                 final_audio_path = base_out + '.mp3'
+    # Ensure any direct MP3 or uploaded file is ALSO strictly trimmed
+    final_audio_path = trim_audio_if_needed(final_audio_path, max_seconds=25)
     client = get_client()
     sample_b64 = base64.b64encode(Path(final_audio_path).read_bytes()).decode()