import gradio as gr import librosa import numpy as np import soundfile as sf import onnxruntime as ort import os from huggingface_hub import hf_hub_download # Download the official ONNX model weights from the Hub print("Downloading FlashSR ONNX weights...") model_path = hf_hub_download(repo_id="YatharthS/FlashSR", filename="model.onnx", subfolder="onnx") # Initialize the ONNX Runtime inference session on CPU print("Initializing ONNX Runtime Session...") ort_session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider']) # Dynamically fetch input and output layer names from the ONNX graph input_name = ort_session.get_inputs()[0].name output_name = ort_session.get_outputs()[0].name def super_resolve_onnx(audio_path): if audio_path is None: return None # 1. Load audio and force resample to 16kHz y, sr = librosa.load(audio_path, sr=16000) # 2. Format input array to match the ONNX expected shape: [Batch, Samples] lowres_wav = y[np.newaxis, :].astype(np.float32) # 3. Execute ultra-fast ONNX inference print("Processing audio via ONNX...") onnx_output = ort_session.run([output_name], {input_name: lowres_wav})[0] # 4. Clean up dimensions and extract raw audio array new_wav = onnx_output.squeeze() # 5. Save out the crisp 48kHz output file output_path = "output_48khz_onnx.wav" sf.write(output_path, new_wav, 48000) return output_path # Gradio Setup title = "⚡ FlashSR ONNX: Real-Time Audio Super-Resolution" description = ( "This version runs entirely on **ONNX Runtime (CPU Optimized)**." ) demo = gr.Interface( fn=super_resolve_onnx, inputs=gr.Audio(type="filepath", label="Input Audio (VOD Clip)"), outputs=gr.Audio(type="filepath", label="ONNX Enhanced Output (48kHz)"), title=title, description=description, flagging_mode="never" ) if __name__ == "__main__": demo.launch()