JymNils commited on
Commit
5e4e0c2
·
verified ·
1 Parent(s): a930ed8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -196,7 +196,7 @@ def generate_tts_audio(
196
  maintains the prosody, tone, and vocal qualities of the reference speaker, or uses default voice if no reference is provided.
197
 
198
  Args:
199
- text_input (str): The text to synthesize into speech (maximum 300 characters)
200
  language_id (str): The language code for synthesis (eg. en, fr, de, es, it, pt, hi)
201
  audio_prompt_path_input (str, optional): File path or URL to the reference audio file that defines the target voice style. Defaults to None.
202
  exaggeration_input (float, optional): Controls speech expressiveness (0.25-2.0, neutral=0.5, extreme values may be unstable). Defaults to 0.5.
@@ -232,7 +232,7 @@ def generate_tts_audio(
232
  print("No audio prompt provided; using default voice.")
233
 
234
  wav = current_model.generate(
235
- text_input[:300], # Truncate text to max chars
236
  language_id=language_id,
237
  **generate_kwargs
238
  )
@@ -242,10 +242,10 @@ def generate_tts_audio(
242
  with gr.Blocks() as demo:
243
  gr.Markdown(
244
  """
245
- # Chatterbox Multilingual Demo
246
- Generate high-quality multilingual speech from text with reference audio styling, supporting 23 languages.
247
 
248
- For a hosted version of Chatterbox Multilingual and for finetuning, please visit [resemble.ai](https://app.resemble.ai)
249
  """
250
  )
251
 
@@ -253,10 +253,10 @@ with gr.Blocks() as demo:
253
  gr.Markdown(get_supported_languages_display())
254
  with gr.Row():
255
  with gr.Column():
256
- initial_lang = "fr"
257
  text = gr.Textbox(
258
  value=default_text_for_ui(initial_lang),
259
- label="Text to synthesize (max chars 300)",
260
  max_lines=5
261
  )
262
 
@@ -264,23 +264,23 @@ with gr.Blocks() as demo:
264
  choices=list(ChatterboxMultilingualTTS.get_supported_languages().keys()),
265
  value=initial_lang,
266
  label="Language",
267
- info="Select the language for text-to-speech synthesis"
268
  )
269
 
270
  ref_wav = gr.Audio(
271
  sources=["upload", "microphone"],
272
  type="filepath",
273
- label="Reference Audio File (Optional)",
274
  value=default_audio_for_ui(initial_lang)
275
  )
276
 
277
  gr.Markdown(
278
- "💡 **Note**: Ensure that the reference clip matches the specified language tag. Otherwise, language transfer outputs may inherit the accent of the reference clip's language. To mitigate this, set the CFG weight to 0.",
279
  elem_classes=["audio-note"]
280
  )
281
 
282
  exaggeration = gr.Slider(
283
- 0.25, 2, step=.05, label="Exaggeration (Neutral = 0.5, extreme values can be unstable)", value=.5
284
  )
285
  cfg_weight = gr.Slider(
286
  0.2, 1, step=.05, label="CFG/Pace", value=0.5
@@ -290,7 +290,7 @@ with gr.Blocks() as demo:
290
  seed_num = gr.Number(value=0, label="Random seed (0 for random)")
291
  temp = gr.Slider(0.05, 5, step=.05, label="Temperature", value=.8)
292
 
293
- run_btn = gr.Button("Generate", variant="primary")
294
 
295
  with gr.Column():
296
  audio_output = gr.Audio(label="Output Audio")
 
196
  maintains the prosody, tone, and vocal qualities of the reference speaker, or uses default voice if no reference is provided.
197
 
198
  Args:
199
+ text_input (str): The text to synthesize into speech (maximum 1000 characters)
200
  language_id (str): The language code for synthesis (eg. en, fr, de, es, it, pt, hi)
201
  audio_prompt_path_input (str, optional): File path or URL to the reference audio file that defines the target voice style. Defaults to None.
202
  exaggeration_input (float, optional): Controls speech expressiveness (0.25-2.0, neutral=0.5, extreme values may be unstable). Defaults to 0.5.
 
232
  print("No audio prompt provided; using default voice.")
233
 
234
  wav = current_model.generate(
235
+ text_input[:1000], # Truncate text to max chars
236
  language_id=language_id,
237
  **generate_kwargs
238
  )
 
242
  with gr.Blocks() as demo:
243
  gr.Markdown(
244
  """
245
+ # Chatterbox Multilingual Demo for CPU
246
+ Genera Voz de alta calidad multilingue con referencia deaudio + stylo, suporta 23 languajes.
247
 
248
+ Para una version tuneada, visita [resemble.ai](https://app.resemble.ai)
249
  """
250
  )
251
 
 
253
  gr.Markdown(get_supported_languages_display())
254
  with gr.Row():
255
  with gr.Column():
256
+ initial_lang = "es"
257
  text = gr.Textbox(
258
  value=default_text_for_ui(initial_lang),
259
+ label="Texto a sintetizar (max 1000 caracteres)",
260
  max_lines=5
261
  )
262
 
 
264
  choices=list(ChatterboxMultilingualTTS.get_supported_languages().keys()),
265
  value=initial_lang,
266
  label="Language",
267
+ info="Seleccion el laeguagj para la sintesis"
268
  )
269
 
270
  ref_wav = gr.Audio(
271
  sources=["upload", "microphone"],
272
  type="filepath",
273
+ label="Audio de Referencia (Opcional)",
274
  value=default_audio_for_ui(initial_lang)
275
  )
276
 
277
  gr.Markdown(
278
+ "💡 **Note**: Asegurarse que el audio de referencia y el del texto sean el mismo. Otherwise, language transfer outputs may inherit the accent of the reference clip's language. To mitigate this, set the CFG weight to 0.",
279
  elem_classes=["audio-note"]
280
  )
281
 
282
  exaggeration = gr.Slider(
283
+ 0.25, 2, step=.05, label="Exageracion (Neutral = 0.5, valores extremos son inestables)", value=.5
284
  )
285
  cfg_weight = gr.Slider(
286
  0.2, 1, step=.05, label="CFG/Pace", value=0.5
 
290
  seed_num = gr.Number(value=0, label="Random seed (0 for random)")
291
  temp = gr.Slider(0.05, 5, step=.05, label="Temperature", value=.8)
292
 
293
+ run_btn = gr.Button("Generar", variant="primary")
294
 
295
  with gr.Column():
296
  audio_output = gr.Audio(label="Output Audio")