"""Arabic ASR Demo — Qwen3-ASR fine-tuned on KSA Saudi dialect.""" import torch import spaces import gradio as gr from qwen_asr import Qwen3ASRModel MODEL_ID = "vadimbelsky/qwen3-asr-arabic-ksa" print("Loading model...") model = Qwen3ASRModel.from_pretrained(MODEL_ID, dtype=torch.float16, device_map="auto") print("Model loaded!") @spaces.GPU def transcribe(audio_path): if audio_path is None: return "" result = model.transcribe(audio_path, language="Arabic") if isinstance(result, list): result = result[0] if result else "" if hasattr(result, "text"): result = result.text elif not isinstance(result, str): result = str(result) return result with gr.Blocks(title="Arabic ASR — KSA Saudi Dialect") as demo: gr.Markdown( """ # Arabic ASR — KSA Saudi Dialect Fine-tuned [Qwen3-ASR-1.7B](https://huggingface.co/Qwen/Qwen3-ASR-1.7B) for Saudi Arabic speech recognition. **WER: 11.49%** on KSA Arabic validation set (vs 14.41% zero-shot) Record or upload Arabic audio to transcribe. """ ) with gr.Row(): with gr.Column(): audio_input = gr.Audio( label="Audio Input", type="filepath", sources=["microphone", "upload"], ) transcribe_btn = gr.Button("Transcribe", variant="primary", size="lg") with gr.Column(): output_text = gr.Textbox( label="Transcription", lines=5, rtl=True, placeholder="Arabic transcription will appear here...", ) transcribe_btn.click(fn=transcribe, inputs=audio_input, outputs=output_text) audio_input.stop_recording(fn=transcribe, inputs=audio_input, outputs=output_text) gr.Examples( examples=[ ["sample_ksa_1.wav"], ["sample_ksa_2.wav"], ["sample_ksa_3.wav"], ], inputs=audio_input, label="Sample KSA Arabic Audio", ) gr.Markdown( """ --- **Model**: [vadimbelsky/qwen3-asr-arabic-ksa](https://huggingface.co/vadimbelsky/qwen3-asr-arabic-ksa) | **Base**: [Qwen/Qwen3-ASR-1.7B](https://huggingface.co/Qwen/Qwen3-ASR-1.7B) | **Dataset**: [KSA Arabic 13k](https://huggingface.co/datasets/vadimbelsky/KSA_Arabic_English_Dataset_13k) """ ) demo.launch(ssr_mode=False)