import gradio as gr import torch from transformers import pipeline import spaces # Initialize the speech-to-text pipeline pipe = pipeline( "automatic-speech-recognition", model="ivrit-ai/whisper-large-v3", torch_dtype=torch.float16, device="cuda", ) @spaces.GPU def transcribe(audio): """ Transcribes the given audio file to Hebrew text. """ if audio is None: return "Please upload or record an audio file." # Transcribe the audio file to Hebrew result = pipe(audio, generate_kwargs={"language": "hebrew"}) return result['text'] # Create the Gradio interface demo = gr.Interface( fn=transcribe, inputs=gr.Audio(sources=["microphone", "upload"], type="filepath", label="Hebrew Audio"), outputs="text", title="Hebrew Speech-to-Text Transcription", description=( "Use this app to transcribe Hebrew speech to text. " "Upload your own audio file or record audio directly through your microphone. " "The model used is `ivrit-ai/whisper-large-v3`." ), allow_flagging="never" ) # Launch the app demo.launch()