Pocket-TTS

Running

App Files Files Community

Pocket-TTS / server.py

hf4uwho

Add 7 celebrity voices: DiCaprio, Nicholson, Pesci, De Niro, Pacino, Gyllenhaal, Johansson

341df3d about 2 months ago

Raw

History Blame Contribute Delete

65.7 kB

	"""Pocket-TTS FastAPI server."""
	import io
	import os
	import wave
	import subprocess
	import traceback
	from pathlib import Path

	import numpy as np
	from fastapi import FastAPI, Query, HTTPException, Form, Body
	from fastapi.responses import Response, HTMLResponse
	from pydantic import BaseModel

	class TTSRequest(BaseModel):
	text: str
	voice: str = "af_alloy"
	temperature: float = 0.7
	format: str = "ogg"

	HF_TOKEN = os.environ.get("HF_TOKEN", "")
	if HF_TOKEN:
	os.environ["HUGGING_FACE_HUB_TOKEN"] = HF_TOKEN
	try:
	from huggingface_hub import login
	login(token=HF_TOKEN, add_to_git_credential=False)
	print("Logged in to HuggingFace Hub")
	except Exception as e:
	print(f"HF login warning: {e}")

	try:
	import torch
	from pocket_tts import TTSModel
	except ImportError:
	torch = None
	TTSModel = None

	app = FastAPI(title="Pocket-TTS API")

	_state = {
	"initialized": False,
	"model": None,
	"sample_rate": 24000,
	"voice_cache": {},
	}

	# Voice sources: (repo_id, path, repo_type)
	VOICE_SOURCES = {
	# === Nymbo/Pocket-TTS (standard voices) ===
	"af_alloy": ("Nymbo/Pocket-TTS", "voices/af_alloy.wav", "space"),
	"af_aoede": ("Nymbo/Pocket-TTS", "voices/af_aoede.wav", "space"),
	"af_bella": ("Nymbo/Pocket-TTS", "voices/af_bella.wav", "space"),
	"af_heart": ("Nymbo/Pocket-TTS", "voices/af_heart.wav", "space"),
	"af_jessica": ("Nymbo/Pocket-TTS", "voices/af_jessica.wav", "space"),
	"af_kore": ("Nymbo/Pocket-TTS", "voices/af_kore.wav", "space"),
	"af_nicole": ("Nymbo/Pocket-TTS", "voices/af_nicole.wav", "space"),
	"af_nova": ("Nymbo/Pocket-TTS", "voices/af_nova.wav", "space"),
	"af_river": ("Nymbo/Pocket-TTS", "voices/af_river.wav", "space"),
	"af_sarah": ("Nymbo/Pocket-TTS", "voices/af_sarah.wav", "space"),
	"af_sky": ("Nymbo/Pocket-TTS", "voices/af_sky.wav", "space"),
	"am_adam": ("Nymbo/Pocket-TTS", "voices/am_adam.wav", "space"),
	"am_echo": ("Nymbo/Pocket-TTS", "voices/am_echo.wav", "space"),
	"am_eric": ("Nymbo/Pocket-TTS", "voices/am_eric.wav", "space"),
	"am_fenrir": ("Nymbo/Pocket-TTS", "voices/am_fenrir.wav", "space"),
	"am_liam": ("Nymbo/Pocket-TTS", "voices/am_liam.wav", "space"),
	"am_michael": ("Nymbo/Pocket-TTS", "voices/am_michael.wav", "space"),
	"am_onyx": ("Nymbo/Pocket-TTS", "voices/am_onyx.wav", "space"),
	"am_puck": ("Nymbo/Pocket-TTS", "voices/am_puck.wav", "space"),
	"am_santa": ("Nymbo/Pocket-TTS", "voices/am_santa.wav", "space"),
	"bf_alice": ("Nymbo/Pocket-TTS", "voices/bf_alice.wav", "space"),
	"bf_emma": ("Nymbo/Pocket-TTS", "voices/bf_emma.wav", "space"),
	"bf_isabella": ("Nymbo/Pocket-TTS", "voices/bf_isabella.wav", "space"),
	"bf_lily": ("Nymbo/Pocket-TTS", "voices/bf_lily.wav", "space"),
	"bm_daniel": ("Nymbo/Pocket-TTS", "voices/bm_daniel.wav", "space"),
	"bm_fable": ("Nymbo/Pocket-TTS", "voices/bm_fable.wav", "space"),
	"bm_george": ("Nymbo/Pocket-TTS", "voices/bm_george.wav", "space"),
	"bm_lewis": ("Nymbo/Pocket-TTS", "voices/bm_lewis.wav", "space"),
	"ef_dora": ("Nymbo/Pocket-TTS", "voices/ef_dora.wav", "space"),
	"em_alex": ("Nymbo/Pocket-TTS", "voices/em_alex.wav", "space"),
	"em_santa": ("Nymbo/Pocket-TTS", "voices/em_santa.wav", "space"),
	"ff_siwis": ("Nymbo/Pocket-TTS", "voices/ff_siwis.wav", "space"),
	"hf_alpha": ("Nymbo/Pocket-TTS", "voices/hf_alpha.wav", "space"),
	"hf_beta": ("Nymbo/Pocket-TTS", "voices/hf_beta.wav", "space"),
	"hm_omega": ("Nymbo/Pocket-TTS", "voices/hm_omega.wav", "space"),
	"hm_psi": ("Nymbo/Pocket-TTS", "voices/hm_psi.wav", "space"),
	"if_sara": ("Nymbo/Pocket-TTS", "voices/if_sara.wav", "space"),
	"im_nicola": ("Nymbo/Pocket-TTS", "voices/im_nicola.wav", "space"),
	"jf_alpha": ("Nymbo/Pocket-TTS", "voices/jf_alpha.wav", "space"),
	"jf_gongitsune": ("Nymbo/Pocket-TTS", "voices/jf_gongitsune.wav", "space"),
	"jf_nezumi": ("Nymbo/Pocket-TTS", "voices/jf_nezumi.wav", "space"),
	"jf_tebukuro": ("Nymbo/Pocket-TTS", "voices/jf_tebukuro.wav", "space"),
	"jm_kumo": ("Nymbo/Pocket-TTS", "voices/jm_kumo.wav", "space"),
	"pf_dora": ("Nymbo/Pocket-TTS", "voices/pf_dora.wav", "space"),
	"pm_alex": ("Nymbo/Pocket-TTS", "voices/pm_alex.wav", "space"),
	"pm_santa": ("Nymbo/Pocket-TTS", "voices/pm_santa.wav", "space"),
	"zf_xiaobei": ("Nymbo/Pocket-TTS", "voices/zf_xiaobei.wav", "space"),
	"zf_xiaoni": ("Nymbo/Pocket-TTS", "voices/zf_xiaoni.wav", "space"),
	"zf_xiaoxiao": ("Nymbo/Pocket-TTS", "voices/zf_xiaoxiao.wav", "space"),
	"zf_xiaoyi": ("Nymbo/Pocket-TTS", "voices/zf_xiaoyi.wav", "space"),
	"zm_yunjian": ("Nymbo/Pocket-TTS", "voices/zm_yunjian.wav", "space"),
	"zm_yunxi": ("Nymbo/Pocket-TTS", "voices/zm_yunxi.wav", "space"),
	"zm_yunxia": ("Nymbo/Pocket-TTS", "voices/zm_yunxia.wav", "space"),
	"zm_yunyang": ("Nymbo/Pocket-TTS", "voices/zm_yunyang.wav", "space"),
	# === chandypants character voices ===
	"benji": ("chandypants/ollie-pocket-tts", "voices/benji.wav", "space"),
	"bertha": ("chandypants/ollie-pocket-tts", "voices/bertha.wav", "space"),
	"damian": ("chandypants/ollie-pocket-tts", "voices/damian.wav", "space"),
	"f01_young_bright": ("chandypants/ollie-pocket-tts", "voices/f01_young_bright.wav", "space"),
	"f02_texas_gal": ("chandypants/ollie-pocket-tts", "voices/f02_texas_gal.wav", "space"),
	"f03_sharp_pro": ("chandypants/ollie-pocket-tts", "voices/f03_sharp_pro.wav", "space"),
	"f04_warm_mom": ("chandypants/ollie-pocket-tts", "voices/f04_warm_mom.wav", "space"),
	"f05_husky_mature": ("chandypants/ollie-pocket-tts", "voices/f05_husky_mature.wav", "space"),
	"f06_perky_young": ("chandypants/ollie-pocket-tts", "voices/f06_perky_young.wav", "space"),
	"f07_southern_belle": ("chandypants/ollie-pocket-tts", "voices/f07_southern_belle.wav", "space"),
	"f08_tough_cop": ("chandypants/ollie-pocket-tts", "voices/f08_tough_cop.wav", "space"),
	"f09_elderly_sweet": ("chandypants/ollie-pocket-tts", "voices/f09_elderly_sweet.wav", "space"),
	"f10_theater_kid": ("chandypants/ollie-pocket-tts", "voices/f10_theater_kid.wav", "space"),
	"m01_deep_south": ("chandypants/ollie-pocket-tts", "voices/m01_deep_south.wav", "space"),
	"m02_smooth_tenor": ("chandypants/ollie-pocket-tts", "voices/m02_smooth_tenor.wav", "space"),
	"m03_gruff_ny": ("chandypants/ollie-pocket-tts", "voices/m03_gruff_ny.wav", "space"),
	"m04_warm_dad": ("chandypants/ollie-pocket-tts", "voices/m04_warm_dad.wav", "space"),
	"m05_distinguished": ("chandypants/ollie-pocket-tts", "voices/m05_distinguished.wav", "space"),
	"m06_young_rough": ("chandypants/ollie-pocket-tts", "voices/m06_young_rough.wav", "space"),
	"m07_cowboy": ("chandypants/ollie-pocket-tts", "voices/m07_cowboy.wav", "space"),
	"m08_fast_talker": ("chandypants/ollie-pocket-tts", "voices/m08_fast_talker.wav", "space"),
	"m09_gentle_giant": ("chandypants/ollie-pocket-tts", "voices/m09_gentle_giant.wav", "space"),
	"m10_slick": ("chandypants/ollie-pocket-tts", "voices/m10_slick.wav", "space"),
	}

	# === celebrity voices (voice clone samples) ===
	VOICE_SOURCES.update({
	"leonardo_dicaprio": ("hf4uwho/Pocket-TTS", "voices/celeb/leonardo_dicaprio.wav", "space"),
	"jack_nicholson": ("hf4uwho/Pocket-TTS", "voices/celeb/jack_nicholson.wav", "space"),
	"joe_pesci": ("hf4uwho/Pocket-TTS", "voices/celeb/joe_pesci.wav", "space"),
	"robert_de_niro": ("hf4uwho/Pocket-TTS", "voices/celeb/robert_de_niro.wav", "space"),
	"al_pacino": ("hf4uwho/Pocket-TTS", "voices/celeb/al_pacino.wav", "space"),
	"jake_gyllenhaal": ("hf4uwho/Pocket-TTS", "voices/celeb/jake_gyllenhaal.wav", "space"),
	"scarlett_johansson": ("hf4uwho/Pocket-TTS", "voices/celeb/scarlett_johansson.wav", "space"),
	})

	# === kyutai/tts-voices (official voice catalog) ===
	_KYUTAI_VOICES = {
	"alba_a_moment_by": ("kyutai/tts-voices", "alba-mackenna/a-moment-by.wav", "model"),
	"alba_announcer": ("kyutai/tts-voices", "alba-mackenna/announcer.wav", "model"),
	"alba_casual": ("kyutai/tts-voices", "alba-mackenna/casual.wav", "model"),
	"alba_merchant": ("kyutai/tts-voices", "alba-mackenna/merchant.wav", "model"),
	"cml_10087_11650_000028_0002": ("kyutai/tts-voices", "cml-tts/fr/10087_11650_000028-0002.wav", "model"),
	"cml_10177_10625_000134_0003": ("kyutai/tts-voices", "cml-tts/fr/10177_10625_000134-0003.wav", "model"),
	"cml_10179_11051_000005_0001": ("kyutai/tts-voices", "cml-tts/fr/10179_11051_000005-0001.wav", "model"),
	"cml_12080_11650_000047_0001": ("kyutai/tts-voices", "cml-tts/fr/12080_11650_000047-0001.wav", "model"),
	"cml_12205_11650_000004_0002": ("kyutai/tts-voices", "cml-tts/fr/12205_11650_000004-0002.wav", "model"),
	"cml_12977_10625_000037_0001": ("kyutai/tts-voices", "cml-tts/fr/12977_10625_000037-0001.wav", "model"),
	"cml_1406_1028_000009_0003": ("kyutai/tts-voices", "cml-tts/fr/1406_1028_000009-0003.wav", "model"),
	"cml_1591_1028_000108_0004": ("kyutai/tts-voices", "cml-tts/fr/1591_1028_000108-0004.wav", "model"),
	"cml_1770_1028_000036_0002": ("kyutai/tts-voices", "cml-tts/fr/1770_1028_000036-0002.wav", "model"),
	"cml_2114_1656_000053_0001": ("kyutai/tts-voices", "cml-tts/fr/2114_1656_000053-0001.wav", "model"),
	"cml_2154_2576_000020_0003": ("kyutai/tts-voices", "cml-tts/fr/2154_2576_000020-0003.wav", "model"),
	"cml_2216_1745_000007_0001": ("kyutai/tts-voices", "cml-tts/fr/2216_1745_000007-0001.wav", "model"),
	"cml_2223_1745_000009_0002": ("kyutai/tts-voices", "cml-tts/fr/2223_1745_000009-0002.wav", "model"),
	"cml_2465_1943_000152_0002": ("kyutai/tts-voices", "cml-tts/fr/2465_1943_000152-0002.wav", "model"),
	"cml_296_1028_000022_0001": ("kyutai/tts-voices", "cml-tts/fr/296_1028_000022-0001.wav", "model"),
	"cml_3267_1902_000075_0001": ("kyutai/tts-voices", "cml-tts/fr/3267_1902_000075-0001.wav", "model"),
	"cml_4193_3103_000004_0001": ("kyutai/tts-voices", "cml-tts/fr/4193_3103_000004-0001.wav", "model"),
	"cml_4482_3103_000063_0001": ("kyutai/tts-voices", "cml-tts/fr/4482_3103_000063-0001.wav", "model"),
	"cml_4724_3731_000031_0001": ("kyutai/tts-voices", "cml-tts/fr/4724_3731_000031-0001.wav", "model"),
	"cml_4937_3731_000004_0001": ("kyutai/tts-voices", "cml-tts/fr/4937_3731_000004-0001.wav", "model"),
	"cml_5207_3078_000031_0002": ("kyutai/tts-voices", "cml-tts/fr/5207_3078_000031-0002.wav", "model"),
	"cml_5476_3103_000072_0001": ("kyutai/tts-voices", "cml-tts/fr/5476_3103_000072-0001.wav", "model"),
	"cml_577_394_000070_0001": ("kyutai/tts-voices", "cml-tts/fr/577_394_000070-0001.wav", "model"),
	"cml_5790_4893_000052_0001": ("kyutai/tts-voices", "cml-tts/fr/5790_4893_000052-0001.wav", "model"),
	"cml_579_2548_000015_0001": ("kyutai/tts-voices", "cml-tts/fr/579_2548_000015-0001.wav", "model"),
	"cml_5830_4703_000037_0001": ("kyutai/tts-voices", "cml-tts/fr/5830_4703_000037-0001.wav", "model"),
	"cml_6318_7016_000027_0002": ("kyutai/tts-voices", "cml-tts/fr/6318_7016_000027-0002.wav", "model"),
	"cml_7142_2432_000124_0003": ("kyutai/tts-voices", "cml-tts/fr/7142_2432_000124-0003.wav", "model"),
	"cml_7400_2928_000100_0001": ("kyutai/tts-voices", "cml-tts/fr/7400_2928_000100-0001.wav", "model"),
	"cml_7591_6742_000149_0002": ("kyutai/tts-voices", "cml-tts/fr/7591_6742_000149-0002.wav", "model"),
	"cml_7601_7727_000062_0001": ("kyutai/tts-voices", "cml-tts/fr/7601_7727_000062-0001.wav", "model"),
	"cml_7762_8734_000048_0002": ("kyutai/tts-voices", "cml-tts/fr/7762_8734_000048-0002.wav", "model"),
	"cml_8128_7016_000047_0002": ("kyutai/tts-voices", "cml-tts/fr/8128_7016_000047-0002.wav", "model"),
	"cml_928_486_000075_0001": ("kyutai/tts-voices", "cml-tts/fr/928_486_000075-0001.wav", "model"),
	"cml_9834_9697_000150_0003": ("kyutai/tts-voices", "cml-tts/fr/9834_9697_000150-0003.wav", "model"),
	"ears_p001": ("kyutai/tts-voices", "ears/p001/freeform_speech_01.wav", "model"),
	"ears_p002": ("kyutai/tts-voices", "ears/p002/freeform_speech_01.wav", "model"),
	"ears_p003": ("kyutai/tts-voices", "ears/p003/freeform_speech_01.wav", "model"),
	"ears_p003_adoration": ("kyutai/tts-voices", "ears/p003/emo_adoration_freeform.wav", "model"),
	"ears_p003_amazement": ("kyutai/tts-voices", "ears/p003/emo_amazement_freeform.wav", "model"),
	"ears_p003_amusement": ("kyutai/tts-voices", "ears/p003/emo_amusement_freeform.wav", "model"),
	"ears_p003_anger": ("kyutai/tts-voices", "ears/p003/emo_anger_freeform.wav", "model"),
	"ears_p003_confusion": ("kyutai/tts-voices", "ears/p003/emo_confusion_freeform.wav", "model"),
	"ears_p003_contentment": ("kyutai/tts-voices", "ears/p003/emo_contentment_freeform.wav", "model"),
	"ears_p003_cuteness": ("kyutai/tts-voices", "ears/p003/emo_cuteness_freeform.wav", "model"),
	"ears_p003_desire": ("kyutai/tts-voices", "ears/p003/emo_desire_freeform.wav", "model"),
	"ears_p003_disappointment": ("kyutai/tts-voices", "ears/p003/emo_disappointment_freeform.wav", "model"),
	"ears_p003_disgust": ("kyutai/tts-voices", "ears/p003/emo_disgust_freeform.wav", "model"),
	"ears_p003_distress": ("kyutai/tts-voices", "ears/p003/emo_distress_freeform.wav", "model"),
	"ears_p003_embarassment": ("kyutai/tts-voices", "ears/p003/emo_embarassment_freeform.wav", "model"),
	"ears_p003_extasy": ("kyutai/tts-voices", "ears/p003/emo_extasy_freeform.wav", "model"),
	"ears_p003_fear": ("kyutai/tts-voices", "ears/p003/emo_fear_freeform.wav", "model"),
	"ears_p003_guilt": ("kyutai/tts-voices", "ears/p003/emo_guilt_freeform.wav", "model"),
	"ears_p003_interest": ("kyutai/tts-voices", "ears/p003/emo_interest_freeform.wav", "model"),
	"ears_p003_neutral": ("kyutai/tts-voices", "ears/p003/emo_neutral_freeform.wav", "model"),
	"ears_p003_pain": ("kyutai/tts-voices", "ears/p003/emo_pain_freeform.wav", "model"),
	"ears_p003_pride": ("kyutai/tts-voices", "ears/p003/emo_pride_freeform.wav", "model"),
	"ears_p003_realization": ("kyutai/tts-voices", "ears/p003/emo_realization_freeform.wav", "model"),
	"ears_p003_relief": ("kyutai/tts-voices", "ears/p003/emo_relief_freeform.wav", "model"),
	"ears_p003_sadness": ("kyutai/tts-voices", "ears/p003/emo_sadness_freeform.wav", "model"),
	"ears_p003_serenity": ("kyutai/tts-voices", "ears/p003/emo_serenity_freeform.wav", "model"),
	"ears_p004": ("kyutai/tts-voices", "ears/p004/freeform_speech_01.wav", "model"),
	"ears_p005": ("kyutai/tts-voices", "ears/p005/freeform_speech_01.wav", "model"),
	"ears_p006": ("kyutai/tts-voices", "ears/p006/freeform_speech_01.wav", "model"),
	"ears_p007": ("kyutai/tts-voices", "ears/p007/freeform_speech_01.wav", "model"),
	"ears_p008": ("kyutai/tts-voices", "ears/p008/freeform_speech_01.wav", "model"),
	"ears_p009": ("kyutai/tts-voices", "ears/p009/freeform_speech_01.wav", "model"),
	"ears_p010": ("kyutai/tts-voices", "ears/p010/freeform_speech_01.wav", "model"),
	"ears_p011": ("kyutai/tts-voices", "ears/p011/freeform_speech_01.wav", "model"),
	"ears_p012": ("kyutai/tts-voices", "ears/p012/freeform_speech_01.wav", "model"),
	"ears_p013": ("kyutai/tts-voices", "ears/p013/freeform_speech_01.wav", "model"),
	"ears_p014": ("kyutai/tts-voices", "ears/p014/freeform_speech_01.wav", "model"),
	"ears_p015": ("kyutai/tts-voices", "ears/p015/freeform_speech_01.wav", "model"),
	"ears_p016": ("kyutai/tts-voices", "ears/p016/freeform_speech_01.wav", "model"),
	"ears_p017": ("kyutai/tts-voices", "ears/p017/freeform_speech_01.wav", "model"),
	"ears_p018": ("kyutai/tts-voices", "ears/p018/freeform_speech_01.wav", "model"),
	"ears_p019": ("kyutai/tts-voices", "ears/p019/freeform_speech_01.wav", "model"),
	"ears_p020": ("kyutai/tts-voices", "ears/p020/freeform_speech_01.wav", "model"),
	"ears_p021": ("kyutai/tts-voices", "ears/p021/freeform_speech_01.wav", "model"),
	"ears_p022": ("kyutai/tts-voices", "ears/p022/freeform_speech_01.wav", "model"),
	"ears_p023": ("kyutai/tts-voices", "ears/p023/freeform_speech_01.wav", "model"),
	"ears_p024": ("kyutai/tts-voices", "ears/p024/freeform_speech_01.wav", "model"),
	"ears_p025": ("kyutai/tts-voices", "ears/p025/freeform_speech_01.wav", "model"),
	"ears_p026": ("kyutai/tts-voices", "ears/p026/freeform_speech_01.wav", "model"),
	"ears_p027": ("kyutai/tts-voices", "ears/p027/freeform_speech_01.wav", "model"),
	"ears_p028": ("kyutai/tts-voices", "ears/p028/freeform_speech_01.wav", "model"),
	"ears_p029": ("kyutai/tts-voices", "ears/p029/freeform_speech_01.wav", "model"),
	"ears_p030": ("kyutai/tts-voices", "ears/p030/freeform_speech_01.wav", "model"),
	"ears_p031": ("kyutai/tts-voices", "ears/p031/freeform_speech_01.wav", "model"),
	"ears_p031_adoration": ("kyutai/tts-voices", "ears/p031/emo_adoration_freeform.wav", "model"),
	"ears_p031_amazement": ("kyutai/tts-voices", "ears/p031/emo_amazement_freeform.wav", "model"),
	"ears_p031_amusement": ("kyutai/tts-voices", "ears/p031/emo_amusement_freeform.wav", "model"),
	"ears_p031_anger": ("kyutai/tts-voices", "ears/p031/emo_anger_freeform.wav", "model"),
	"ears_p031_confusion": ("kyutai/tts-voices", "ears/p031/emo_confusion_freeform.wav", "model"),
	"ears_p031_contentment": ("kyutai/tts-voices", "ears/p031/emo_contentment_freeform.wav", "model"),
	"ears_p031_cuteness": ("kyutai/tts-voices", "ears/p031/emo_cuteness_freeform.wav", "model"),
	"ears_p031_desire": ("kyutai/tts-voices", "ears/p031/emo_desire_freeform.wav", "model"),
	"ears_p031_disappointment": ("kyutai/tts-voices", "ears/p031/emo_disappointment_freeform.wav", "model"),
	"ears_p031_disgust": ("kyutai/tts-voices", "ears/p031/emo_disgust_freeform.wav", "model"),
	"ears_p031_distress": ("kyutai/tts-voices", "ears/p031/emo_distress_freeform.wav", "model"),
	"ears_p031_embarassment": ("kyutai/tts-voices", "ears/p031/emo_embarassment_freeform.wav", "model"),
	"ears_p031_extasy": ("kyutai/tts-voices", "ears/p031/emo_extasy_freeform.wav", "model"),
	"ears_p031_fear": ("kyutai/tts-voices", "ears/p031/emo_fear_freeform.wav", "model"),
	"ears_p031_guilt": ("kyutai/tts-voices", "ears/p031/emo_guilt_freeform.wav", "model"),
	"ears_p031_interest": ("kyutai/tts-voices", "ears/p031/emo_interest_freeform.wav", "model"),
	"ears_p031_neutral": ("kyutai/tts-voices", "ears/p031/emo_neutral_freeform.wav", "model"),
	"ears_p031_pain": ("kyutai/tts-voices", "ears/p031/emo_pain_freeform.wav", "model"),
	"ears_p031_pride": ("kyutai/tts-voices", "ears/p031/emo_pride_freeform.wav", "model"),
	"ears_p031_realization": ("kyutai/tts-voices", "ears/p031/emo_realization_freeform.wav", "model"),
	"ears_p031_relief": ("kyutai/tts-voices", "ears/p031/emo_relief_freeform.wav", "model"),
	"ears_p031_sadness": ("kyutai/tts-voices", "ears/p031/emo_sadness_freeform.wav", "model"),
	"ears_p031_serenity": ("kyutai/tts-voices", "ears/p031/emo_serenity_freeform.wav", "model"),
	"ears_p032": ("kyutai/tts-voices", "ears/p032/freeform_speech_01.wav", "model"),
	"ears_p033": ("kyutai/tts-voices", "ears/p033/freeform_speech_01.wav", "model"),
	"ears_p034": ("kyutai/tts-voices", "ears/p034/freeform_speech_01.wav", "model"),
	"ears_p035": ("kyutai/tts-voices", "ears/p035/freeform_speech_01.wav", "model"),
	"ears_p036": ("kyutai/tts-voices", "ears/p036/freeform_speech_01.wav", "model"),
	"ears_p037": ("kyutai/tts-voices", "ears/p037/freeform_speech_01.wav", "model"),
	"ears_p038": ("kyutai/tts-voices", "ears/p038/freeform_speech_01.wav", "model"),
	"ears_p039": ("kyutai/tts-voices", "ears/p039/freeform_speech_01.wav", "model"),
	"ears_p040": ("kyutai/tts-voices", "ears/p040/freeform_speech_01.wav", "model"),
	"ears_p041": ("kyutai/tts-voices", "ears/p041/freeform_speech_01.wav", "model"),
	"ears_p042": ("kyutai/tts-voices", "ears/p042/freeform_speech_01.wav", "model"),
	"ears_p043": ("kyutai/tts-voices", "ears/p043/freeform_speech_01.wav", "model"),
	"ears_p044": ("kyutai/tts-voices", "ears/p044/freeform_speech_01.wav", "model"),
	"ears_p045": ("kyutai/tts-voices", "ears/p045/freeform_speech_01.wav", "model"),
	"ears_p046": ("kyutai/tts-voices", "ears/p046/freeform_speech_01.wav", "model"),
	"ears_p047": ("kyutai/tts-voices", "ears/p047/freeform_speech_01.wav", "model"),
	"ears_p048": ("kyutai/tts-voices", "ears/p048/freeform_speech_01.wav", "model"),
	"ears_p049": ("kyutai/tts-voices", "ears/p049/freeform_speech_01.wav", "model"),
	"ears_p050": ("kyutai/tts-voices", "ears/p050/freeform_speech_01.wav", "model"),
	"ears_p051": ("kyutai/tts-voices", "ears/p051/freeform_speech_01.wav", "model"),
	"ears_p052": ("kyutai/tts-voices", "ears/p052/freeform_speech_01.wav", "model"),
	"ears_p053": ("kyutai/tts-voices", "ears/p053/freeform_speech_01.wav", "model"),
	"ears_p054": ("kyutai/tts-voices", "ears/p054/freeform_speech_01.wav", "model"),
	"ears_p055": ("kyutai/tts-voices", "ears/p055/freeform_speech_01.wav", "model"),
	"ears_p056": ("kyutai/tts-voices", "ears/p056/freeform_speech_01.wav", "model"),
	"ears_p057": ("kyutai/tts-voices", "ears/p057/freeform_speech_01.wav", "model"),
	"ears_p058": ("kyutai/tts-voices", "ears/p058/freeform_speech_01.wav", "model"),
	"ears_p059": ("kyutai/tts-voices", "ears/p059/freeform_speech_01.wav", "model"),
	"ears_p060": ("kyutai/tts-voices", "ears/p060/freeform_speech_01.wav", "model"),
	"ears_p061": ("kyutai/tts-voices", "ears/p061/freeform_speech_01.wav", "model"),
	"ears_p062": ("kyutai/tts-voices", "ears/p062/freeform_speech_01.wav", "model"),
	"ears_p063": ("kyutai/tts-voices", "ears/p063/freeform_speech_01.wav", "model"),
	"ears_p064": ("kyutai/tts-voices", "ears/p064/freeform_speech_01.wav", "model"),
	"ears_p065": ("kyutai/tts-voices", "ears/p065/freeform_speech_01.wav", "model"),
	"ears_p066": ("kyutai/tts-voices", "ears/p066/freeform_speech_01.wav", "model"),
	"ears_p067": ("kyutai/tts-voices", "ears/p067/freeform_speech_01.wav", "model"),
	"ears_p068": ("kyutai/tts-voices", "ears/p068/freeform_speech_01.wav", "model"),
	"ears_p069": ("kyutai/tts-voices", "ears/p069/freeform_speech_01.wav", "model"),
	"ears_p070": ("kyutai/tts-voices", "ears/p070/freeform_speech_01.wav", "model"),
	"ears_p071": ("kyutai/tts-voices", "ears/p071/freeform_speech_01.wav", "model"),
	"ears_p072": ("kyutai/tts-voices", "ears/p072/freeform_speech_01.wav", "model"),
	"ears_p073": ("kyutai/tts-voices", "ears/p073/freeform_speech_01.wav", "model"),
	"ears_p074": ("kyutai/tts-voices", "ears/p074/freeform_speech_01.wav", "model"),
	"ears_p075": ("kyutai/tts-voices", "ears/p075/freeform_speech_01.wav", "model"),
	"ears_p076": ("kyutai/tts-voices", "ears/p076/freeform_speech_01.wav", "model"),
	"ears_p077": ("kyutai/tts-voices", "ears/p077/freeform_speech_01.wav", "model"),
	"ears_p078": ("kyutai/tts-voices", "ears/p078/freeform_speech_01.wav", "model"),
	"ears_p079": ("kyutai/tts-voices", "ears/p079/freeform_speech_01.wav", "model"),
	"ears_p080": ("kyutai/tts-voices", "ears/p080/freeform_speech_01.wav", "model"),
	"ears_p081": ("kyutai/tts-voices", "ears/p081/freeform_speech_01.wav", "model"),
	"ears_p082": ("kyutai/tts-voices", "ears/p082/freeform_speech_01.wav", "model"),
	"ears_p083": ("kyutai/tts-voices", "ears/p083/freeform_speech_01.wav", "model"),
	"ears_p084": ("kyutai/tts-voices", "ears/p084/freeform_speech_01.wav", "model"),
	"ears_p085": ("kyutai/tts-voices", "ears/p085/freeform_speech_01.wav", "model"),
	"ears_p086": ("kyutai/tts-voices", "ears/p086/freeform_speech_01.wav", "model"),
	"ears_p087": ("kyutai/tts-voices", "ears/p087/freeform_speech_01.wav", "model"),
	"ears_p088": ("kyutai/tts-voices", "ears/p088/freeform_speech_01.wav", "model"),
	"ears_p089": ("kyutai/tts-voices", "ears/p089/freeform_speech_01.wav", "model"),
	"ears_p090": ("kyutai/tts-voices", "ears/p090/freeform_speech_01.wav", "model"),
	"ears_p091": ("kyutai/tts-voices", "ears/p091/freeform_speech_01.wav", "model"),
	"ears_p092": ("kyutai/tts-voices", "ears/p092/freeform_speech_01.wav", "model"),
	"ears_p093": ("kyutai/tts-voices", "ears/p093/freeform_speech_01.wav", "model"),
	"ears_p094": ("kyutai/tts-voices", "ears/p094/freeform_speech_01.wav", "model"),
	"ears_p095": ("kyutai/tts-voices", "ears/p095/freeform_speech_01.wav", "model"),
	"ears_p096": ("kyutai/tts-voices", "ears/p096/freeform_speech_01.wav", "model"),
	"ears_p097": ("kyutai/tts-voices", "ears/p097/freeform_speech_01.wav", "model"),
	"ears_p098": ("kyutai/tts-voices", "ears/p098/freeform_speech_01.wav", "model"),
	"ears_p099": ("kyutai/tts-voices", "ears/p099/freeform_speech_01.wav", "model"),
	"ears_p100": ("kyutai/tts-voices", "ears/p100/freeform_speech_01.wav", "model"),
	"ears_p101": ("kyutai/tts-voices", "ears/p101/freeform_speech_01.wav", "model"),
	"ears_p102": ("kyutai/tts-voices", "ears/p102/freeform_speech_01.wav", "model"),
	"ears_p103": ("kyutai/tts-voices", "ears/p103/freeform_speech_01.wav", "model"),
	"ears_p104": ("kyutai/tts-voices", "ears/p104/freeform_speech_01.wav", "model"),
	"ears_p105": ("kyutai/tts-voices", "ears/p105/freeform_speech_01.wav", "model"),
	"ears_p106": ("kyutai/tts-voices", "ears/p106/freeform_speech_01.wav", "model"),
	"ears_p107": ("kyutai/tts-voices", "ears/p107/freeform_speech_01.wav", "model"),
	"ex_duo_a_default": ("kyutai/tts-voices", "expresso/ex01-ex02_default_001_channel1_168s.wav", "model"),
	"ex_duo_a_enunciated": ("kyutai/tts-voices", "expresso/ex01-ex02_enunciated_001_channel1_432s.wav", "model"),
	"ex_duo_a_fast": ("kyutai/tts-voices", "expresso/ex01-ex02_fast_001_channel1_104s.wav", "model"),
	"ex_duo_a_projected": ("kyutai/tts-voices", "expresso/ex01-ex02_projected_001_channel1_46s.wav", "model"),
	"ex_duo_a_whisper": ("kyutai/tts-voices", "expresso/ex01-ex02_whisper_001_channel1_579s.wav", "model"),
	"ex_duo_b_default": ("kyutai/tts-voices", "expresso/ex04-ex03_default_001_channel1_3s.wav", "model"),
	"ex_duo_b_enunciated": ("kyutai/tts-voices", "expresso/ex04-ex03_enunciated_001_channel1_86s.wav", "model"),
	"ex_duo_b_fast": ("kyutai/tts-voices", "expresso/ex04-ex03_fast_001_channel1_208s.wav", "model"),
	"ex_duo_b_projected": ("kyutai/tts-voices", "expresso/ex04-ex03_projected_001_channel1_192s.wav", "model"),
	"ex_duo_b_whisper": ("kyutai/tts-voices", "expresso/ex04-ex03_whisper_001_channel1_198s.wav", "model"),
	"ex_fem_emote_angry": ("kyutai/tts-voices", "expresso/ex03-ex01_angry_001_channel1_201s.wav", "model"),
	"ex_fem_emote_awe": ("kyutai/tts-voices", "expresso/ex03-ex01_awe_001_channel1_1323s.wav", "model"),
	"ex_fem_emote_calm": ("kyutai/tts-voices", "expresso/ex03-ex01_calm_001_channel1_1143s.wav", "model"),
	"ex_fem_emote_confused": ("kyutai/tts-voices", "expresso/ex03-ex01_confused_001_channel1_909s.wav", "model"),
	"ex_fem_emote_desire": ("kyutai/tts-voices", "expresso/ex03-ex01_desire_004_channel1_545s.wav", "model"),
	"ex_fem_emote_disgusted": ("kyutai/tts-voices", "expresso/ex03-ex01_disgusted_004_channel1_170s.wav", "model"),
	"ex_fem_emote_enunciated": ("kyutai/tts-voices", "expresso/ex03-ex01_enunciated_001_channel1_388s.wav", "model"),
	"ex_fem_emote_happy": ("kyutai/tts-voices", "expresso/ex03-ex01_happy_001_channel1_334s.wav", "model"),
	"ex_fem_emote_laughing": ("kyutai/tts-voices", "expresso/ex03-ex01_laughing_001_channel1_188s.wav", "model"),
	"ex_fem_emote_nonverbal": ("kyutai/tts-voices", "expresso/ex03-ex01_nonverbal_006_channel1_62s.wav", "model"),
	"ex_fem_emote_sarcastic": ("kyutai/tts-voices", "expresso/ex03-ex01_sarcastic_001_channel1_435s.wav", "model"),
	"ex_fem_emote_sleepy": ("kyutai/tts-voices", "expresso/ex03-ex01_sleepy_001_channel1_619s.wav", "model"),
	"ex_fem_narr_animal_animaldir": ("kyutai/tts-voices", "expresso/ex03-ex02_animal-animaldir_003_channel1_32s.wav", "model"),
	"ex_fem_narr_animaldir_animal": ("kyutai/tts-voices", "expresso/ex03-ex02_animaldir-animal_008_channel1_147s.wav", "model"),
	"ex_fem_narr_child_childdir": ("kyutai/tts-voices", "expresso/ex03-ex02_child-childdir_001_channel1_291s.wav", "model"),
	"ex_fem_narr_childdir_child": ("kyutai/tts-voices", "expresso/ex03-ex02_childdir-child_004_channel1_308s.wav", "model"),
	"ex_fem_narr_laughing": ("kyutai/tts-voices", "expresso/ex03-ex02_laughing_001_channel1_248s.wav", "model"),
	"ex_fem_narr_narration": ("kyutai/tts-voices", "expresso/ex03-ex02_narration_001_channel1_674s.wav", "model"),
	"ex_fem_narr_sad_sympathetic": ("kyutai/tts-voices", "expresso/ex03-ex02_sad-sympathetic_001_channel1_454s.wav", "model"),
	"ex_fem_narr_sympathetic_sad": ("kyutai/tts-voices", "expresso/ex03-ex02_sympathetic-sad_008_channel1_215s.wav", "model"),
	"ex_mal_emote_angry": ("kyutai/tts-voices", "expresso/ex04-ex02_angry_001_channel1_119s.wav", "model"),
	"ex_mal_emote_awe": ("kyutai/tts-voices", "expresso/ex04-ex02_awe_001_channel1_982s.wav", "model"),
	"ex_mal_emote_bored": ("kyutai/tts-voices", "expresso/ex04-ex02_bored_001_channel1_254s.wav", "model"),
	"ex_mal_emote_calm": ("kyutai/tts-voices", "expresso/ex04-ex02_calm_002_channel1_480s.wav", "model"),
	"ex_mal_emote_confused": ("kyutai/tts-voices", "expresso/ex04-ex02_confused_001_channel1_499s.wav", "model"),
	"ex_mal_emote_desire": ("kyutai/tts-voices", "expresso/ex04-ex02_desire_001_channel1_657s.wav", "model"),
	"ex_mal_emote_disgusted": ("kyutai/tts-voices", "expresso/ex04-ex02_disgusted_004_channel1_169s.wav", "model"),
	"ex_mal_emote_enunciated": ("kyutai/tts-voices", "expresso/ex04-ex02_enunciated_001_channel1_496s.wav", "model"),
	"ex_mal_emote_fearful": ("kyutai/tts-voices", "expresso/ex04-ex02_fearful_001_channel1_316s.wav", "model"),
	"ex_mal_emote_happy": ("kyutai/tts-voices", "expresso/ex04-ex02_happy_001_channel1_118s.wav", "model"),
	"ex_mal_emote_laughing": ("kyutai/tts-voices", "expresso/ex04-ex02_laughing_001_channel1_147s.wav", "model"),
	"ex_mal_emote_nonverbal": ("kyutai/tts-voices", "expresso/ex04-ex02_nonverbal_004_channel1_18s.wav", "model"),
	"ex_mal_emote_sarcastic": ("kyutai/tts-voices", "expresso/ex04-ex02_sarcastic_001_channel1_519s.wav", "model"),
	"ex_mal_narr_animal_animaldir": ("kyutai/tts-voices", "expresso/ex04-ex01_animal-animaldir_006_channel1_196s.wav", "model"),
	"ex_mal_narr_animaldir_animal": ("kyutai/tts-voices", "expresso/ex04-ex01_animaldir-animal_001_channel1_118s.wav", "model"),
	"ex_mal_narr_child_childdir": ("kyutai/tts-voices", "expresso/ex04-ex01_child-childdir_004_channel1_118s.wav", "model"),
	"ex_mal_narr_childdir_child": ("kyutai/tts-voices", "expresso/ex04-ex01_childdir-child_001_channel1_228s.wav", "model"),
	"ex_mal_narr_disgusted": ("kyutai/tts-voices", "expresso/ex04-ex01_disgusted_001_channel1_130s.wav", "model"),
	"ex_mal_narr_laughing": ("kyutai/tts-voices", "expresso/ex04-ex01_laughing_001_channel1_306s.wav", "model"),
	"ex_mal_narr_narration": ("kyutai/tts-voices", "expresso/ex04-ex01_narration_001_channel1_605s.wav", "model"),
	"ex_mal_narr_sad_sympathetic": ("kyutai/tts-voices", "expresso/ex04-ex01_sad-sympathetic_001_channel1_267s.wav", "model"),
	"ex_mal_narr_sympathetic_sad": ("kyutai/tts-voices", "expresso/ex04-ex01_sympathetic-sad_008_channel1_415s.wav", "model"),
	"unmute_default_voice": ("kyutai/tts-voices", "unmute-prod-website/default_voice.wav", "model"),
	"unmute_degaulle_2": ("kyutai/tts-voices", "unmute-prod-website/degaulle-2.wav", "model"),
	"unmute_developpeuse_3": ("kyutai/tts-voices", "unmute-prod-website/developpeuse-3.wav", "model"),
	"unmute_ex04_narration_longform_00001": ("kyutai/tts-voices", "unmute-prod-website/ex04_narration_longform_00001.wav", "model"),
	"unmute_fabieng_enhanced_v2": ("kyutai/tts-voices", "unmute-prod-website/fabieng-enhanced-v2.wav", "model"),
	"unmute_p329_022": ("kyutai/tts-voices", "unmute-prod-website/p329_022.wav", "model"),
	"vctk_p225_023": ("kyutai/tts-voices", "vctk/p225_023.wav", "model"),
	"vctk_p226_023": ("kyutai/tts-voices", "vctk/p226_023.wav", "model"),
	"vctk_p227_023": ("kyutai/tts-voices", "vctk/p227_023.wav", "model"),
	"vctk_p228_023": ("kyutai/tts-voices", "vctk/p228_023.wav", "model"),
	"vctk_p229_023": ("kyutai/tts-voices", "vctk/p229_023.wav", "model"),
	"vctk_p230_023": ("kyutai/tts-voices", "vctk/p230_023.wav", "model"),
	"vctk_p231_023": ("kyutai/tts-voices", "vctk/p231_023.wav", "model"),
	"vctk_p232_023": ("kyutai/tts-voices", "vctk/p232_023.wav", "model"),
	"vctk_p233_023": ("kyutai/tts-voices", "vctk/p233_023.wav", "model"),
	"vctk_p234_023": ("kyutai/tts-voices", "vctk/p234_023.wav", "model"),
	"vctk_p236_023": ("kyutai/tts-voices", "vctk/p236_023.wav", "model"),
	"vctk_p237_023": ("kyutai/tts-voices", "vctk/p237_023.wav", "model"),
	"vctk_p238_023": ("kyutai/tts-voices", "vctk/p238_023.wav", "model"),
	"vctk_p239_023": ("kyutai/tts-voices", "vctk/p239_023.wav", "model"),
	"vctk_p240_023": ("kyutai/tts-voices", "vctk/p240_023.wav", "model"),
	"vctk_p241_023": ("kyutai/tts-voices", "vctk/p241_023.wav", "model"),
	"vctk_p243_023": ("kyutai/tts-voices", "vctk/p243_023.wav", "model"),
	"vctk_p244_023": ("kyutai/tts-voices", "vctk/p244_023.wav", "model"),
	"vctk_p245_023": ("kyutai/tts-voices", "vctk/p245_023.wav", "model"),
	"vctk_p246_023": ("kyutai/tts-voices", "vctk/p246_023.wav", "model"),
	"vctk_p247_023": ("kyutai/tts-voices", "vctk/p247_023.wav", "model"),
	"vctk_p248_023": ("kyutai/tts-voices", "vctk/p248_023.wav", "model"),
	"vctk_p249_023": ("kyutai/tts-voices", "vctk/p249_023.wav", "model"),
	"vctk_p250_023": ("kyutai/tts-voices", "vctk/p250_023.wav", "model"),
	"vctk_p251_023": ("kyutai/tts-voices", "vctk/p251_023.wav", "model"),
	"vctk_p252_023": ("kyutai/tts-voices", "vctk/p252_023.wav", "model"),
	"vctk_p253_023": ("kyutai/tts-voices", "vctk/p253_023.wav", "model"),
	"vctk_p254_023": ("kyutai/tts-voices", "vctk/p254_023.wav", "model"),
	"vctk_p255_023": ("kyutai/tts-voices", "vctk/p255_023.wav", "model"),
	"vctk_p256_023": ("kyutai/tts-voices", "vctk/p256_023.wav", "model"),
	"vctk_p257_023": ("kyutai/tts-voices", "vctk/p257_023.wav", "model"),
	"vctk_p258_023": ("kyutai/tts-voices", "vctk/p258_023.wav", "model"),
	"vctk_p259_023": ("kyutai/tts-voices", "vctk/p259_023.wav", "model"),
	"vctk_p260_023": ("kyutai/tts-voices", "vctk/p260_023.wav", "model"),
	"vctk_p261_023": ("kyutai/tts-voices", "vctk/p261_023.wav", "model"),
	"vctk_p262_023": ("kyutai/tts-voices", "vctk/p262_023.wav", "model"),
	"vctk_p263_023": ("kyutai/tts-voices", "vctk/p263_023.wav", "model"),
	"vctk_p264_023": ("kyutai/tts-voices", "vctk/p264_023.wav", "model"),
	"vctk_p265_023": ("kyutai/tts-voices", "vctk/p265_023.wav", "model"),
	"vctk_p266_023": ("kyutai/tts-voices", "vctk/p266_023.wav", "model"),
	"vctk_p267_023": ("kyutai/tts-voices", "vctk/p267_023.wav", "model"),
	"vctk_p269_023": ("kyutai/tts-voices", "vctk/p269_023.wav", "model"),
	"vctk_p270_023": ("kyutai/tts-voices", "vctk/p270_023.wav", "model"),
	"vctk_p271_023": ("kyutai/tts-voices", "vctk/p271_023.wav", "model"),
	"vctk_p272_023": ("kyutai/tts-voices", "vctk/p272_023.wav", "model"),
	"vctk_p273_023": ("kyutai/tts-voices", "vctk/p273_023.wav", "model"),
	"vctk_p274_023": ("kyutai/tts-voices", "vctk/p274_023.wav", "model"),
	"vctk_p275_023": ("kyutai/tts-voices", "vctk/p275_023.wav", "model"),
	"vctk_p276_023": ("kyutai/tts-voices", "vctk/p276_023.wav", "model"),
	"vctk_p277_023": ("kyutai/tts-voices", "vctk/p277_023.wav", "model"),
	"vctk_p278_023": ("kyutai/tts-voices", "vctk/p278_023.wav", "model"),
	"vctk_p279_023": ("kyutai/tts-voices", "vctk/p279_023.wav", "model"),
	"vctk_p280_023": ("kyutai/tts-voices", "vctk/p280_023.wav", "model"),
	"vctk_p281_023": ("kyutai/tts-voices", "vctk/p281_023.wav", "model"),
	"vctk_p282_023": ("kyutai/tts-voices", "vctk/p282_023.wav", "model"),
	"vctk_p283_023": ("kyutai/tts-voices", "vctk/p283_023.wav", "model"),
	"vctk_p284_023": ("kyutai/tts-voices", "vctk/p284_023.wav", "model"),
	"vctk_p285_023": ("kyutai/tts-voices", "vctk/p285_023.wav", "model"),
	"vctk_p286_023": ("kyutai/tts-voices", "vctk/p286_023.wav", "model"),
	"vctk_p287_023": ("kyutai/tts-voices", "vctk/p287_023.wav", "model"),
	"vctk_p288_023": ("kyutai/tts-voices", "vctk/p288_023.wav", "model"),
	"vctk_p292_023": ("kyutai/tts-voices", "vctk/p292_023.wav", "model"),
	"vctk_p293_023": ("kyutai/tts-voices", "vctk/p293_023.wav", "model"),
	"vctk_p294_023": ("kyutai/tts-voices", "vctk/p294_023.wav", "model"),
	"vctk_p297_023": ("kyutai/tts-voices", "vctk/p297_023.wav", "model"),
	"vctk_p298_023": ("kyutai/tts-voices", "vctk/p298_023.wav", "model"),
	"vctk_p299_023": ("kyutai/tts-voices", "vctk/p299_023.wav", "model"),
	"vctk_p300_023": ("kyutai/tts-voices", "vctk/p300_023.wav", "model"),
	"vctk_p301_023": ("kyutai/tts-voices", "vctk/p301_023.wav", "model"),
	"vctk_p302_023": ("kyutai/tts-voices", "vctk/p302_023.wav", "model"),
	"vctk_p303_023": ("kyutai/tts-voices", "vctk/p303_023.wav", "model"),
	"vctk_p304_023": ("kyutai/tts-voices", "vctk/p304_023.wav", "model"),
	"vctk_p305_023": ("kyutai/tts-voices", "vctk/p305_023.wav", "model"),
	"vctk_p306_023": ("kyutai/tts-voices", "vctk/p306_023.wav", "model"),
	"vctk_p307_023": ("kyutai/tts-voices", "vctk/p307_023.wav", "model"),
	"vctk_p308_023": ("kyutai/tts-voices", "vctk/p308_023.wav", "model"),
	"vctk_p310_023": ("kyutai/tts-voices", "vctk/p310_023.wav", "model"),
	"vctk_p311_023": ("kyutai/tts-voices", "vctk/p311_023.wav", "model"),
	"vctk_p312_023": ("kyutai/tts-voices", "vctk/p312_023.wav", "model"),
	"vctk_p313_023": ("kyutai/tts-voices", "vctk/p313_023.wav", "model"),
	"vctk_p314_023": ("kyutai/tts-voices", "vctk/p314_023.wav", "model"),
	"vctk_p315_023": ("kyutai/tts-voices", "vctk/p315_023.wav", "model"),
	"vctk_p316_023": ("kyutai/tts-voices", "vctk/p316_023.wav", "model"),
	"vctk_p317_023": ("kyutai/tts-voices", "vctk/p317_023.wav", "model"),
	"vctk_p318_023": ("kyutai/tts-voices", "vctk/p318_023.wav", "model"),
	"vctk_p323_023": ("kyutai/tts-voices", "vctk/p323_023.wav", "model"),
	"vctk_p326_023": ("kyutai/tts-voices", "vctk/p326_023.wav", "model"),
	"vctk_p329_023": ("kyutai/tts-voices", "vctk/p329_023.wav", "model"),
	"vctk_p330_023": ("kyutai/tts-voices", "vctk/p330_023.wav", "model"),
	"vctk_p333_023": ("kyutai/tts-voices", "vctk/p333_023.wav", "model"),
	"vctk_p334_023": ("kyutai/tts-voices", "vctk/p334_023.wav", "model"),
	"vctk_p335_023": ("kyutai/tts-voices", "vctk/p335_023.wav", "model"),
	"vctk_p336_023": ("kyutai/tts-voices", "vctk/p336_023.wav", "model"),
	"vctk_p339_023": ("kyutai/tts-voices", "vctk/p339_023.wav", "model"),
	"vctk_p341_023": ("kyutai/tts-voices", "vctk/p341_023.wav", "model"),
	"vctk_p343_023": ("kyutai/tts-voices", "vctk/p343_023.wav", "model"),
	"vctk_p345_023": ("kyutai/tts-voices", "vctk/p345_023.wav", "model"),
	"vctk_p347_023": ("kyutai/tts-voices", "vctk/p347_023.wav", "model"),
	"vctk_p351_023": ("kyutai/tts-voices", "vctk/p351_023.wav", "model"),
	"vctk_p360_023": ("kyutai/tts-voices", "vctk/p360_023.wav", "model"),
	"vctk_p361_023": ("kyutai/tts-voices", "vctk/p361_023.wav", "model"),
	"vctk_p363_023": ("kyutai/tts-voices", "vctk/p363_023.wav", "model"),
	"vctk_p364_023": ("kyutai/tts-voices", "vctk/p364_023.wav", "model"),
	"vctk_p374_023": ("kyutai/tts-voices", "vctk/p374_023.wav", "model"),
	"vctk_p376_023": ("kyutai/tts-voices", "vctk/p376_023.wav", "model"),
	"vctk_s5_023": ("kyutai/tts-voices", "vctk/s5_023.wav", "model"),
	"vd_0a67": ("kyutai/tts-voices", "voice-donations/0a67.wav", "model"),
	"vd_1410": ("kyutai/tts-voices", "voice-donations/1410.wav", "model"),
	"vd_1dd0": ("kyutai/tts-voices", "voice-donations/1dd0.wav", "model"),
	"vd_2181": ("kyutai/tts-voices", "voice-donations/2181.wav", "model"),
	"vd_245e": ("kyutai/tts-voices", "voice-donations/245e.wav", "model"),
	"vd_29da": ("kyutai/tts-voices", "voice-donations/29da.wav", "model"),
	"vd_30c5": ("kyutai/tts-voices", "voice-donations/30c5.wav", "model"),
	"vd_3973": ("kyutai/tts-voices", "voice-donations/3973.wav", "model"),
	"vd_4189": ("kyutai/tts-voices", "voice-donations/4189.wav", "model"),
	"vd_468c": ("kyutai/tts-voices", "voice-donations/468c.wav", "model"),
	"vd_4b13": ("kyutai/tts-voices", "voice-donations/4b13.wav", "model"),
	"vd_4b70": ("kyutai/tts-voices", "voice-donations/4b70.wav", "model"),
	"vd_5b55": ("kyutai/tts-voices", "voice-donations/5b55.wav", "model"),
	"vd_6148": ("kyutai/tts-voices", "voice-donations/6148.wav", "model"),
	"vd_617b": ("kyutai/tts-voices", "voice-donations/617b.wav", "model"),
	"vd_7020": ("kyutai/tts-voices", "voice-donations/7020.wav", "model"),
	"vd_7909": ("kyutai/tts-voices", "voice-donations/7909.wav", "model"),
	"vd_7b2b": ("kyutai/tts-voices", "voice-donations/7b2b.wav", "model"),
	"vd_8935": ("kyutai/tts-voices", "voice-donations/8935.wav", "model"),
	"vd_8dc9": ("kyutai/tts-voices", "voice-donations/8dc9.wav", "model"),
	"vd_8f15": ("kyutai/tts-voices", "voice-donations/8f15.wav", "model"),
	"vd_92f0": ("kyutai/tts-voices", "voice-donations/92f0.wav", "model"),
	"vd_9a2e": ("kyutai/tts-voices", "voice-donations/9a2e.wav", "model"),
	"vd_9a66": ("kyutai/tts-voices", "voice-donations/9a66.wav", "model"),
	"vd_AHmad": ("kyutai/tts-voices", "voice-donations/AHmad.wav", "model"),
	"vd_ASEN": ("kyutai/tts-voices", "voice-donations/ASEN.wav", "model"),
	"vd_Aadi": ("kyutai/tts-voices", "voice-donations/Aadi.wav", "model"),
	"vd_AbD": ("kyutai/tts-voices", "voice-donations/AbD.wav", "model"),
	"vd_Abhinox": ("kyutai/tts-voices", "voice-donations/Abhinox.wav", "model"),
	"vd_Abo_Ayman": ("kyutai/tts-voices", "voice-donations/Abo_Ayman.wav", "model"),
	"vd_Abob_Malay": ("kyutai/tts-voices", "voice-donations/Abob_Malay.wav", "model"),
	"vd_Adarsh_Bulla": ("kyutai/tts-voices", "voice-donations/Adarsh_Bulla.wav", "model"),
	"vd_AgentCobra": ("kyutai/tts-voices", "voice-donations/AgentCobra.wav", "model"),
	"vd_Ajith": ("kyutai/tts-voices", "voice-donations/Ajith.wav", "model"),
	"vd_Alejandro_espanol_latino": ("kyutai/tts-voices", "voice-donations/Alejandro_espanol_latino.wav", "model"),
	"vd_Allen": ("kyutai/tts-voices", "voice-donations/Allen.wav", "model"),
	"vd_AmitNag": ("kyutai/tts-voices", "voice-donations/AmitNag.wav", "model"),
	"vd_Andrea": ("kyutai/tts-voices", "voice-donations/Andrea.wav", "model"),
	"vd_Andrea_Spanish": ("kyutai/tts-voices", "voice-donations/Andrea_(Spanish).wav", "model"),
	"vd_Antoine_Vala": ("kyutai/tts-voices", "voice-donations/Antoine_Vala.wav", "model"),
	"vd_Antoni": ("kyutai/tts-voices", "voice-donations/Antoni.wav", "model"),
	"vd_Aon": ("kyutai/tts-voices", "voice-donations/Aon.wav", "model"),
	"vd_Arjun_Z": ("kyutai/tts-voices", "voice-donations/Arjun_Z.wav", "model"),
	"vd_Aryobe": ("kyutai/tts-voices", "voice-donations/Aryobe.wav", "model"),
	"vd_BLUE": ("kyutai/tts-voices", "voice-donations/BLUE.wav", "model"),
	"vd_Bijay": ("kyutai/tts-voices", "voice-donations/Bijay.wav", "model"),
	"vd_Blake": ("kyutai/tts-voices", "voice-donations/Blake.wav", "model"),
	"vd_Bobby_McFern": ("kyutai/tts-voices", "voice-donations/Bobby_McFern.wav", "model"),
	"vd_Breaking_1": ("kyutai/tts-voices", "voice-donations/Breaking_1.wav", "model"),
	"vd_BrokenHypocrite": ("kyutai/tts-voices", "voice-donations/BrokenHypocrite.wav", "model"),
	"vd_Butter": ("kyutai/tts-voices", "voice-donations/Butter.wav", "model"),
	"vd_CPS_001": ("kyutai/tts-voices", "voice-donations/CPS_001.wav", "model"),
	"vd_Chujus": ("kyutai/tts-voices", "voice-donations/Chujus.wav", "model"),
	"vd_Cicada": ("kyutai/tts-voices", "voice-donations/Cicada.wav", "model"),
	"vd_ClassicWizard": ("kyutai/tts-voices", "voice-donations/ClassicWizard.wav", "model"),
	"vd_Curlinvictus": ("kyutai/tts-voices", "voice-donations/Curlinvictus.wav", "model"),
	"vd_Darius": ("kyutai/tts-voices", "voice-donations/Darius.wav", "model"),
	"vd_Darya_khan": ("kyutai/tts-voices", "voice-donations/Darya_khan.wav", "model"),
	"vd_Deepak": ("kyutai/tts-voices", "voice-donations/Deepak.wav", "model"),
	"vd_Dhruv_Rao": ("kyutai/tts-voices", "voice-donations/Dhruv_Rao.wav", "model"),
	"vd_Dil": ("kyutai/tts-voices", "voice-donations/Dil.wav", "model"),
	"vd_Enrique": ("kyutai/tts-voices", "voice-donations/Enrique.wav", "model"),
	"vd_Enrique_Spanish": ("kyutai/tts-voices", "voice-donations/Enrique_(Spanish).wav", "model"),
	"vd_Erick": ("kyutai/tts-voices", "voice-donations/Erick.wav", "model"),
	"vd_Ernesto_Y": ("kyutai/tts-voices", "voice-donations/Ernesto_Y.wav", "model"),
	"vd_Eshan": ("kyutai/tts-voices", "voice-donations/Eshan.wav", "model"),
	"vd_Esteban_Aguirre_Arias": ("kyutai/tts-voices", "voice-donations/Esteban_Aguirre_Arias.wav", "model"),
	"vd_Ferdinand": ("kyutai/tts-voices", "voice-donations/Ferdinand.wav", "model"),
	"vd_FlorDaddy": ("kyutai/tts-voices", "voice-donations/FlorDaddy.wav", "model"),
	"vd_Fred_Mara": ("kyutai/tts-voices", "voice-donations/Fred_Mara.wav", "model"),
	"vd_Giovanne": ("kyutai/tts-voices", "voice-donations/Giovanne.wav", "model"),
	"vd_Glenn": ("kyutai/tts-voices", "voice-donations/Glenn.wav", "model"),
	"vd_Goku": ("kyutai/tts-voices", "voice-donations/Goku.wav", "model"),
	"vd_Gonzalo": ("kyutai/tts-voices", "voice-donations/Gonzalo.wav", "model"),
	"vd_Gonzalo_1": ("kyutai/tts-voices", "voice-donations/Gonzalo-1.wav", "model"),
	"vd_Greggy": ("kyutai/tts-voices", "voice-donations/Greggy.wav", "model"),
	"vd_Haku": ("kyutai/tts-voices", "voice-donations/Haku.wav", "model"),
	"vd_Hannah": ("kyutai/tts-voices", "voice-donations/Hannah.wav", "model"),
	"vd_Hardik_Clone": ("kyutai/tts-voices", "voice-donations/Hardik_Clone.wav", "model"),
	"vd_Hillbilly_Jim": ("kyutai/tts-voices", "voice-donations/Hillbilly_Jim.wav", "model"),
	"vd_Hkl": ("kyutai/tts-voices", "voice-donations/Hkl.wav", "model"),
	"vd_Hugo_the_frenchie": ("kyutai/tts-voices", "voice-donations/Hugo_the_frenchie.wav", "model"),
	"vd_Ilyass_yea": ("kyutai/tts-voices", "voice-donations/Ilyass_yea.wav", "model"),
	"vd_Imran_475": ("kyutai/tts-voices", "voice-donations/Imran_475.wav", "model"),
	"vd_Imran_from_I_India": ("kyutai/tts-voices", "voice-donations/Imran_from_I_India.wav", "model"),
	"vd_Indian_guy": ("kyutai/tts-voices", "voice-donations/Indian_guy.wav", "model"),
	"vd_Ineedthisnow": ("kyutai/tts-voices", "voice-donations/Ineedthisnow.wav", "model"),
	"vd_JJis2123": ("kyutai/tts-voices", "voice-donations/JJis2123.wav", "model"),
	"vd_JOSHE": ("kyutai/tts-voices", "voice-donations/JOSHE.wav", "model"),
	"vd_James": ("kyutai/tts-voices", "voice-donations/James.wav", "model"),
	"vd_Jaspino": ("kyutai/tts-voices", "voice-donations/Jaspino.wav", "model"),
	"vd_Jaw": ("kyutai/tts-voices", "voice-donations/Jaw.wav", "model"),
	"vd_Jay": ("kyutai/tts-voices", "voice-donations/Jay.wav", "model"),
	"vd_Jeff_Andrew": ("kyutai/tts-voices", "voice-donations/Jeff_Andrew.wav", "model"),
	"vd_Jeffrey": ("kyutai/tts-voices", "voice-donations/Jeffrey.wav", "model"),
	"vd_Jeremy_Q": ("kyutai/tts-voices", "voice-donations/Jeremy_Q.wav", "model"),
	"vd_Jimmy": ("kyutai/tts-voices", "voice-donations/Jimmy.wav", "model"),
	"vd_Joaopedrobil1": ("kyutai/tts-voices", "voice-donations/Joaopedrobil1.wav", "model"),
	"vd_John_Triguero": ("kyutai/tts-voices", "voice-donations/John_Triguero.wav", "model"),
	"vd_Juanrestrepo177777": ("kyutai/tts-voices", "voice-donations/Juanrestrepo177777.wav", "model"),
	"vd_Karti": ("kyutai/tts-voices", "voice-donations/Karti.wav", "model"),
	"vd_Kditz": ("kyutai/tts-voices", "voice-donations/Kditz.wav", "model"),
	"vd_Koorosh": ("kyutai/tts-voices", "voice-donations/Koorosh.wav", "model"),
	"vd_LC": ("kyutai/tts-voices", "voice-donations/LC.wav", "model"),
	"vd_L_Roy": ("kyutai/tts-voices", "voice-donations/L_Roy.wav", "model"),
	"vd_Lake": ("kyutai/tts-voices", "voice-donations/Lake.wav", "model"),
	"vd_Lammy": ("kyutai/tts-voices", "voice-donations/Lammy.wav", "model"),
	"vd_Lara": ("kyutai/tts-voices", "voice-donations/Lara.wav", "model"),
	"vd_Latin_Accent": ("kyutai/tts-voices", "voice-donations/Latin_Accent.wav", "model"),
	"vd_Liquescent": ("kyutai/tts-voices", "voice-donations/Liquescent.wav", "model"),
	"vd_Louis": ("kyutai/tts-voices", "voice-donations/Louis.wav", "model"),
	"vd_Lucas": ("kyutai/tts-voices", "voice-donations/Lucas.wav", "model"),
	"vd_MJDePedro": ("kyutai/tts-voices", "voice-donations/MJDePedro.wav", "model"),
	"vd_Maisako": ("kyutai/tts-voices", "voice-donations/Maisako.wav", "model"),
	"vd_Manahen": ("kyutai/tts-voices", "voice-donations/Manahen.wav", "model"),
	"vd_Marshal_Indian": ("kyutai/tts-voices", "voice-donations/Marshal_Indian.wav", "model"),
	"vd_Midlands_Bedfordshire_Dialect": ("kyutai/tts-voices", "voice-donations/Midlands_Bedfordshire_Dialect.wav", "model"),
	"vd_Moses": ("kyutai/tts-voices", "voice-donations/Moses.wav", "model"),
	"vd_MrHat": ("kyutai/tts-voices", "voice-donations/MrHat.wav", "model"),
	"vd_Mr_captain": ("kyutai/tts-voices", "voice-donations/Mr_captain.wav", "model"),
	"vd_Muhtasims_Voice": ("kyutai/tts-voices", "voice-donations/Muhtasim's_Voice.wav", "model"),
	"vd_Mystery_Sir": ("kyutai/tts-voices", "voice-donations/Mystery_Sir.wav", "model"),
	"vd_Narrum": ("kyutai/tts-voices", "voice-donations/Narrum.wav", "model"),
	"vd_Nick": ("kyutai/tts-voices", "voice-donations/Nick.wav", "model"),
	"vd_P0LFR": ("kyutai/tts-voices", "voice-donations/P0LFR.wav", "model"),
	"vd_Pai_ve": ("kyutai/tts-voices", "voice-donations/Pai_ve.wav", "model"),
	"vd_Parthiban": ("kyutai/tts-voices", "voice-donations/Parthiban.wav", "model"),
	"vd_Prakash369": ("kyutai/tts-voices", "voice-donations/Prakash369.wav", "model"),
	"vd_Puzzle": ("kyutai/tts-voices", "voice-donations/Puzzle.wav", "model"),
	"vd_Qasim_Wali_Khan": ("kyutai/tts-voices", "voice-donations/Qasim_Wali_Khan.wav", "model"),
	"vd_RAJ": ("kyutai/tts-voices", "voice-donations/RAJ.wav", "model"),
	"vd_Rafaelpazv": ("kyutai/tts-voices", "voice-donations/Rafaelpazv.wav", "model"),
	"vd_Rahul": ("kyutai/tts-voices", "voice-donations/Rahul.wav", "model"),
	"vd_Raj25": ("kyutai/tts-voices", "voice-donations/Raj25.wav", "model"),
	"vd_Ramu": ("kyutai/tts-voices", "voice-donations/Ramu.wav", "model"),
	"vd_Ranjith": ("kyutai/tts-voices", "voice-donations/Ranjith.wav", "model"),
	"vd_Richard_cuban": ("kyutai/tts-voices", "voice-donations/Richard_cuban.wav", "model"),
	"vd_Rony": ("kyutai/tts-voices", "voice-donations/Rony.wav", "model"),
	"vd_Roscoe": ("kyutai/tts-voices", "voice-donations/Roscoe.wav", "model"),
	"vd_Rs": ("kyutai/tts-voices", "voice-donations/Rs.wav", "model"),
	"vd_Rup": ("kyutai/tts-voices", "voice-donations/Rup.wav", "model"),
	"vd_SSA150803": ("kyutai/tts-voices", "voice-donations/SSA150803.wav", "model"),
	"vd_SS_1684": ("kyutai/tts-voices", "voice-donations/SS_1684.wav", "model"),
	"vd_STONE": ("kyutai/tts-voices", "voice-donations/STONE.wav", "model"),
	"vd_Samsewak": ("kyutai/tts-voices", "voice-donations/Samsewak.wav", "model"),
	"vd_Selfie": ("kyutai/tts-voices", "voice-donations/Selfie.wav", "model"),
	"vd_Sheddy": ("kyutai/tts-voices", "voice-donations/Sheddy.wav", "model"),
	"vd_Siddh_Indian": ("kyutai/tts-voices", "voice-donations/Siddh_Indian.wav", "model"),
	"vd_Sir_TJ": ("kyutai/tts-voices", "voice-donations/Sir_TJ.wav", "model"),
	"vd_Sirajo_x": ("kyutai/tts-voices", "voice-donations/Sirajo_x.wav", "model"),
	"vd_Sp46": ("kyutai/tts-voices", "voice-donations/Sp46.wav", "model"),
	"vd_Sr_Erick": ("kyutai/tts-voices", "voice-donations/Sr_Erick.wav", "model"),
	"vd_Standollars": ("kyutai/tts-voices", "voice-donations/Standollars.wav", "model"),
	"vd_TESLLA": ("kyutai/tts-voices", "voice-donations/TESLLA.wav", "model"),
	"vd_Tahii": ("kyutai/tts-voices", "voice-donations/Tahii.wav", "model"),
	"vd_TheFin": ("kyutai/tts-voices", "voice-donations/TheFin.wav", "model"),
	"vd_The_Sustainabler": ("kyutai/tts-voices", "voice-donations/The_Sustainabler.wav", "model"),
	"vd_The_other_brother": ("kyutai/tts-voices", "voice-donations/The_other_brother.wav", "model"),
	"vd_Titorium": ("kyutai/tts-voices", "voice-donations/Titorium.wav", "model"),
	"vd_Tonmoy": ("kyutai/tts-voices", "voice-donations/Tonmoy.wav", "model"),
	"vd_Umair": ("kyutai/tts-voices", "voice-donations/Umair.wav", "model"),
	"vd_Vexat": ("kyutai/tts-voices", "voice-donations/Vexat.wav", "model"),
	"vd_Victor_Garcia": ("kyutai/tts-voices", "voice-donations/Victor_Garcia.wav", "model"),
	"vd_Vinith___English_India": ("kyutai/tts-voices", "voice-donations/Vinith___English_India.wav", "model"),
	"vd_Vitch": ("kyutai/tts-voices", "voice-donations/Vitch.wav", "model"),
	"vd_Vivaldi": ("kyutai/tts-voices", "voice-donations/Vivaldi.wav", "model"),
	"vd_W_A_H": ("kyutai/tts-voices", "voice-donations/W_A_H.wav", "model"),
	"vd_Wealthiest": ("kyutai/tts-voices", "voice-donations/Wealthiest.wav", "model"),
	"vd_WhisperInEar": ("kyutai/tts-voices", "voice-donations/WhisperInEar.wav", "model"),
	"vd_Yesid": ("kyutai/tts-voices", "voice-donations/Yesid.wav", "model"),
	"vd_Youfied": ("kyutai/tts-voices", "voice-donations/Youfied.wav", "model"),
	"vd_Yuush": ("kyutai/tts-voices", "voice-donations/Yuush.wav", "model"),
	"vd_a59a": ("kyutai/tts-voices", "voice-donations/a59a.wav", "model"),
	"vd_a6f9": ("kyutai/tts-voices", "voice-donations/a6f9.wav", "model"),
	"vd_a96a": ("kyutai/tts-voices", "voice-donations/a96a.wav", "model"),
	"vd_aepeak": ("kyutai/tts-voices", "voice-donations/aepeak.wav", "model"),
	"vd_albertoforofo007": ("kyutai/tts-voices", "voice-donations/albertoforofo007.wav", "model"),
	"vd_amazon_box": ("kyutai/tts-voices", "voice-donations/amazon_box.wav", "model"),
	"vd_awais_shah": ("kyutai/tts-voices", "voice-donations/awais_shah.wav", "model"),
	"vd_bathri": ("kyutai/tts-voices", "voice-donations/bathri.wav", "model"),
	"vd_bbe4": ("kyutai/tts-voices", "voice-donations/bbe4.wav", "model"),
	"vd_bc98": ("kyutai/tts-voices", "voice-donations/bc98.wav", "model"),
	"vd_bevi": ("kyutai/tts-voices", "voice-donations/bevi.wav", "model"),
	"vd_boom": ("kyutai/tts-voices", "voice-donations/boom.wav", "model"),
	"vd_c0a0": ("kyutai/tts-voices", "voice-donations/c0a0.wav", "model"),
	"vd_cybina": ("kyutai/tts-voices", "voice-donations/cybina.wav", "model"),
	"vd_d4a9": ("kyutai/tts-voices", "voice-donations/d4a9.wav", "model"),
	"vd_dce6": ("kyutai/tts-voices", "voice-donations/dce6.wav", "model"),
	"vd_dwp": ("kyutai/tts-voices", "voice-donations/dwp.wav", "model"),
	"vd_e819": ("kyutai/tts-voices", "voice-donations/e819.wav", "model"),
	"vd_edd4": ("kyutai/tts-voices", "voice-donations/edd4.wav", "model"),
	"vd_efeb": ("kyutai/tts-voices", "voice-donations/efeb.wav", "model"),
	"vd_english_with_german_accent": ("kyutai/tts-voices", "voice-donations/english_with_german_accent.wav", "model"),
	"vd_erihppas": ("kyutai/tts-voices", "voice-donations/erihppas.wav", "model"),
	"vd_f179": ("kyutai/tts-voices", "voice-donations/f179.wav", "model"),
	"vd_f9cf": ("kyutai/tts-voices", "voice-donations/f9cf.wav", "model"),
	"vd_fa52": ("kyutai/tts-voices", "voice-donations/fa52.wav", "model"),
	"vd_fc96": ("kyutai/tts-voices", "voice-donations/fc96.wav", "model"),
	"vd_floyd2026": ("kyutai/tts-voices", "voice-donations/floyd2026.wav", "model"),
	"vd_gmaskell92": ("kyutai/tts-voices", "voice-donations/gmaskell92.wav", "model"),
	"vd_gyroo": ("kyutai/tts-voices", "voice-donations/gyroo.wav", "model"),
	"vd_hielos_1": ("kyutai/tts-voices", "voice-donations/hielos_1.wav", "model"),
	"vd_hielos_2": ("kyutai/tts-voices", "voice-donations/hielos_2.wav", "model"),
	"vd_injul": ("kyutai/tts-voices", "voice-donations/injul.wav", "model"),
	"vd_kbrn1": ("kyutai/tts-voices", "voice-donations/kbrn1.wav", "model"),
	"vd_oldNerd": ("kyutai/tts-voices", "voice-donations/oldNerd.wav", "model"),
	"vd_oldNerd2": ("kyutai/tts-voices", "voice-donations/oldNerd2.wav", "model"),
	"vd_oldNerd3": ("kyutai/tts-voices", "voice-donations/oldNerd3.wav", "model"),
	"vd_ra_XOr": ("kyutai/tts-voices", "voice-donations/ra_XOr.wav", "model"),
	"vd_rewi": ("kyutai/tts-voices", "voice-donations/rewi.wav", "model"),
	"vd_robert": ("kyutai/tts-voices", "voice-donations/robert.wav", "model"),
	"vd_rshah_1_0": ("kyutai/tts-voices", "voice-donations/rshah_1_0.wav", "model"),
	"vd_sanjay": ("kyutai/tts-voices", "voice-donations/sanjay.wav", "model"),
	"vd_siddharth_khanna": ("kyutai/tts-voices", "voice-donations/siddharth_khanna.wav", "model"),
	"vd_solace": ("kyutai/tts-voices", "voice-donations/solace.wav", "model"),
	"vd_spanish_limaperu": ("kyutai/tts-voices", "voice-donations/spanish-limaperu.wav", "model"),
	"vd_stein": ("kyutai/tts-voices", "voice-donations/stein.wav", "model"),
	"vd_sujan_daikoawaj": ("kyutai/tts-voices", "voice-donations/sujan_daikoawaj.wav", "model"),
	"vd_surazy": ("kyutai/tts-voices", "voice-donations/surazy.wav", "model"),
	"vd_taiyo": ("kyutai/tts-voices", "voice-donations/taiyo.wav", "model"),
	"vd_temp_007": ("kyutai/tts-voices", "voice-donations/temp-007.wav", "model"),
	"vd_thepolishdane": ("kyutai/tts-voices", "voice-donations/thepolishdane.wav", "model"),
	"vd_utk": ("kyutai/tts-voices", "voice-donations/utk.wav", "model"),
	"vd_vinayak": ("kyutai/tts-voices", "voice-donations/vinayak.wav", "model"),
	"vd_virtu": ("kyutai/tts-voices", "voice-donations/virtu.wav", "model"),
	"vd_willbas": ("kyutai/tts-voices", "voice-donations/willbas.wav", "model"),
	"vd_yaemdluffy": ("kyutai/tts-voices", "voice-donations/yaemdluffy.wav", "model"),
	"vd_zerocool": ("kyutai/tts-voices", "voice-donations/zerocool.wav", "model"),
	"zero_bill_boerst": ("kyutai/tts-voices", "voice-zero/bill_boerst.wav", "model"),
	"zero_caro_davy": ("kyutai/tts-voices", "voice-zero/caro_davy.wav", "model"),
	"zero_peter_yearsley": ("kyutai/tts-voices", "voice-zero/peter_yearsley.wav", "model"),
	"zero_stuart_bell": ("kyutai/tts-voices", "voice-zero/stuart_bell.wav", "model"),
	}

	VOICE_SOURCES.update(_KYUTAI_VOICES)
	BUILTIN_VOICES = sorted(VOICE_SOURCES.keys())


	def _init_model():
	if _state["initialized"]:
	return
	if TTSModel is None:
	raise RuntimeError("pocket-tts not installed")
	print("Initializing Pocket TTS model (english_2026-04 with voice cloning)...")
	model = TTSModel.load_model(language="english_2026-04")
	_state["model"] = model
	_state["sample_rate"] = getattr(model, "sample_rate", 24000)
	_state["initialized"] = True
	print(f"Pocket TTS initialized. Sample rate: {_state['sample_rate']} Hz, voice_cloning: {model.has_voice_cloning}, voices: {len(BUILTIN_VOICES)}")


	def _get_voice_state(voice: str):
	model = _state["model"]
	if voice in _state["voice_cache"]:
	return _state["voice_cache"][voice]

	if voice not in VOICE_SOURCES:
	raise ValueError(f"Voice '{voice}' not found. Available: {BUILTIN_VOICES}")

	source = VOICE_SOURCES[voice]
	repo_id = source[0]
	voice_path_hf = source[1]
	repo_type = source[2] if len(source) > 2 else "space"

	from huggingface_hub import hf_hub_download
	try:
	voice_path = hf_hub_download(
	repo_id,
	voice_path_hf,
	repo_type=repo_type,
	token=HF_TOKEN or None,
	)
	print(f"Downloaded voice '{voice}' from {repo_id} ({repo_type})")
	except Exception as e:
	raise ValueError(f"Failed to download voice '{voice}': {e}")

	voice_state = model.get_state_for_audio_prompt(voice_path)

	def detach_all(obj):
	if isinstance(obj, torch.Tensor):
	return obj.detach().clone()
	elif isinstance(obj, dict):
	return {k: detach_all(v) for k, v in obj.items()}
	else:
	return obj

	voice_state = detach_all(voice_state)
	_state["voice_cache"][voice] = voice_state
	print(f"Voice state loaded for '{voice}'")
	return voice_state


	def _generate_audio(text: str, voice: str, temperature: float = 0.7) -> tuple:
	_init_model()
	model = _state["model"]
	sample_rate = _state["sample_rate"]
	voice_state = _get_voice_state(voice)

	audio = model.generate_audio(
	voice_state,
	text,
	frames_after_eos=2,
	copy_state=True,
	)
	audio_np = audio.cpu().numpy() if hasattr(audio, 'cpu') else audio
	max_val = np.max(np.abs(audio_np))
	if max_val > 0:
	audio_np = audio_np / max_val * 0.95
	audio_int16 = np.clip(audio_np * 32767, -32767, 32767).astype(np.int16)
	return audio_int16, sample_rate


	def _wav_bytes(audio_int16: np.ndarray, sample_rate: int) -> bytes:
	buf = io.BytesIO()
	with wave.open(buf, "wb") as wf:
	wf.setnchannels(1)
	wf.setsampwidth(2)
	wf.setframerate(sample_rate)
	wf.writeframes(audio_int16.tobytes())
	return buf.getvalue()


	def _ogg_bytes(audio_int16: np.ndarray, sample_rate: int) -> bytes:
	wav_data = _wav_bytes(audio_int16, sample_rate)
	proc = subprocess.run(
	["ffmpeg", "-y", "-f", "wav", "-i", "pipe:0",
	"-c:a", "libopus", "-b:a", "64k", "-ar", "48000", "-ac", "1",
	"-f", "ogg", "pipe:1"],
	input=wav_data,
	capture_output=True,
	timeout=30,
	)
	if proc.returncode != 0:
	raise RuntimeError(f"ffmpeg failed: {proc.stderr.decode()[:200]}")
	return proc.stdout


	@app.post("/tts")
	async def tts_post(req: TTSRequest):
	"""POST endpoint — send full text in request body (no URL length limits)."""
	try:
	audio_int16, sample_rate = _generate_audio(req.text, req.voice, req.temperature)
	except ValueError as e:
	raise HTTPException(400, str(e))
	except Exception as e:
	traceback.print_exc()
	raise HTTPException(500, str(e)[:300])

	if req.format == "ogg":
	try:
	data = _ogg_bytes(audio_int16, sample_rate)
	return Response(content=data, media_type="audio/ogg",
	headers={"Content-Disposition": "attachment; filename=tts.ogg"})
	except Exception as e:
	raise HTTPException(500, f"OGG encoding failed: {str(e)[:200]}")

	data = _wav_bytes(audio_int16, sample_rate)
	return Response(content=data, media_type="audio/wav",
	headers={"Content-Disposition": "attachment; filename=tts.wav"})


	@app.get("/tts")
	async def tts_get(
	text: str = Query(..., description="Text to synthesize"),
	voice: str = Query("af_alloy", description="Voice name"),
	temperature: float = Query(0.7, ge=0.1, le=1.5),
	format: str = Query("ogg", description="Output format: wav or ogg"),
	):
	try:
	audio_int16, sample_rate = _generate_audio(text, voice, temperature)
	except ValueError as e:
	raise HTTPException(400, str(e))
	except Exception as e:
	traceback.print_exc()
	raise HTTPException(500, str(e)[:300])

	if format == "ogg":
	try:
	data = _ogg_bytes(audio_int16, sample_rate)
	return Response(content=data, media_type="audio/ogg",
	headers={"Content-Disposition": "attachment; filename=tts.ogg"})
	except Exception as e:
	raise HTTPException(500, f"OGG encoding failed: {str(e)[:200]}")

	data = _wav_bytes(audio_int16, sample_rate)
	return Response(content=data, media_type="audio/wav",
	headers={"Content-Disposition": "attachment; filename=tts.wav"})


	@app.get("/voices")
	async def voices():
	return {"voices": BUILTIN_VOICES, "count": len(BUILTIN_VOICES)}


	@app.get("/health")
	async def health():
	return {"status": "ok", "initialized": _state["initialized"]}


	@app.get("/", response_class=HTMLResponse)
	async def index():
	return """
	<html><body style="font-family:monospace;max-width:700px;margin:40px auto">
	<h1>🔊 Pocket-TTS API</h1>
	<p>FastAPI server running <a href="https://huggingface.co/kyutai/pocket-tts">kyutai/pocket-tts</a></p>
	<h3>Endpoints</h3>
	<ul>
	<li><code>GET /tts?text=Hello&voice=af_alloy&format=ogg</code></li>
	<li><code>GET /voices</code></li>
	<li><code>GET /health</code></li>
	</ul>
	<h3>Voice Categories</h3>
	<ul>
	<li><b>af_/am_/bf_/bm_/ef_/em_/ff_/hf_/hm_/if_/im_/jf_/jm_/pf_/pm_/zf_/zm_*</b> — Standard multilingual</li>
	<li><b>f01-f10/m01-m10</b> — Character voices</li>
	<li><b>alba_*</b> — Alba Mackenna characters</li>
	<li><b>ex_*</b> — Expressive/emotional (angry, happy, whisper, sarcastic, etc.)</li>
	<li><b>ears_*</b> — EARS speakers + emotional variants</li>
	<li><b>vd_*</b> — Community voice donations (Goku, ClassicWizard, etc.)</li>
	<li><b>vctk_*</b> — VCTK dataset speakers</li>
	<li><b>cml_*</b> — CML-TTS French speakers</li>
	<li><b>zero_*</b> — Voice-zero characters</li>
	<li><b>unmute_*</b> — Unmute voices</li>
	</ul>
	</body></html>
	"""