hf4uwho commited on
Commit
1dd1d1c
·
1 Parent(s): 4242e3c

Switch to Docker SDK: FastAPI server with direct WAV/OGG output, no Gradio

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +4 -0
  2. .gitattributes +0 -91
  3. Dockerfile +23 -0
  4. README.md +8 -70
  5. app.py +0 -561
  6. embeddings/af_alloy.safetensors +0 -3
  7. embeddings/af_aoede.safetensors +0 -3
  8. embeddings/af_bella.safetensors +0 -3
  9. embeddings/af_heart.safetensors +0 -3
  10. embeddings/af_jessica.safetensors +0 -3
  11. embeddings/af_kore.safetensors +0 -3
  12. embeddings/af_nicole.safetensors +0 -3
  13. embeddings/af_nova.safetensors +0 -3
  14. embeddings/af_river.safetensors +0 -3
  15. embeddings/af_sarah.safetensors +0 -3
  16. embeddings/af_sky.safetensors +0 -3
  17. embeddings/am_adam.safetensors +0 -3
  18. embeddings/am_echo.safetensors +0 -3
  19. embeddings/am_eric.safetensors +0 -3
  20. embeddings/am_fenrir.safetensors +0 -3
  21. embeddings/am_liam.safetensors +0 -3
  22. embeddings/am_michael.safetensors +0 -3
  23. embeddings/am_onyx.safetensors +0 -3
  24. embeddings/am_puck.safetensors +0 -3
  25. embeddings/am_santa.safetensors +0 -3
  26. embeddings/bf_alice.safetensors +0 -3
  27. embeddings/bf_emma.safetensors +0 -3
  28. embeddings/bf_isabella.safetensors +0 -3
  29. embeddings/bf_lily.safetensors +0 -3
  30. embeddings/bm_daniel.safetensors +0 -3
  31. embeddings/bm_fable.safetensors +0 -3
  32. embeddings/bm_george.safetensors +0 -3
  33. embeddings/bm_lewis.safetensors +0 -3
  34. embeddings/ef_dora.safetensors +0 -3
  35. embeddings/em_alex.safetensors +0 -3
  36. embeddings/em_santa.safetensors +0 -3
  37. embeddings/ff_siwis.safetensors +0 -3
  38. embeddings/hf_alpha.safetensors +0 -3
  39. embeddings/hf_beta.safetensors +0 -3
  40. embeddings/hm_omega.safetensors +0 -3
  41. embeddings/hm_psi.safetensors +0 -3
  42. embeddings/if_sara.safetensors +0 -3
  43. embeddings/im_nicola.safetensors +0 -3
  44. embeddings/jf_alpha.safetensors +0 -3
  45. embeddings/jf_gongitsune.safetensors +0 -3
  46. embeddings/jf_nezumi.safetensors +0 -3
  47. embeddings/jf_tebukuro.safetensors +0 -3
  48. embeddings/jm_kumo.safetensors +0 -3
  49. embeddings/pf_dora.safetensors +0 -3
  50. embeddings/pm_alex.safetensors +0 -3
.dockerignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ *.pyc
2
+ __pycache__
3
+ .git
4
+ .env
.gitattributes DELETED
@@ -1,91 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- voices/af_heart.mp3 filter=lfs diff=lfs merge=lfs -text
37
- voices/af_sky.mp3 filter=lfs diff=lfs merge=lfs -text
38
- voices/af_alloy.wav filter=lfs diff=lfs merge=lfs -text
39
- voices/af_aoede.wav filter=lfs diff=lfs merge=lfs -text
40
- voices/af_bella.wav filter=lfs diff=lfs merge=lfs -text
41
- voices/af_heart.wav filter=lfs diff=lfs merge=lfs -text
42
- voices/af_jessica.wav filter=lfs diff=lfs merge=lfs -text
43
- voices/af_kore.wav filter=lfs diff=lfs merge=lfs -text
44
- voices/af_nicole.wav filter=lfs diff=lfs merge=lfs -text
45
- voices/af_nova.wav filter=lfs diff=lfs merge=lfs -text
46
- voices/af_river.wav filter=lfs diff=lfs merge=lfs -text
47
- voices/af_sarah.wav filter=lfs diff=lfs merge=lfs -text
48
- voices/af_sky.wav filter=lfs diff=lfs merge=lfs -text
49
- voices/am_adam.wav filter=lfs diff=lfs merge=lfs -text
50
- voices/am_echo.wav filter=lfs diff=lfs merge=lfs -text
51
- voices/am_eric.wav filter=lfs diff=lfs merge=lfs -text
52
- voices/am_fenrir.wav filter=lfs diff=lfs merge=lfs -text
53
- voices/am_liam.wav filter=lfs diff=lfs merge=lfs -text
54
- voices/am_michael.wav filter=lfs diff=lfs merge=lfs -text
55
- voices/am_onyx.wav filter=lfs diff=lfs merge=lfs -text
56
- voices/am_puck.wav filter=lfs diff=lfs merge=lfs -text
57
- voices/am_santa.wav filter=lfs diff=lfs merge=lfs -text
58
- voices/bf_alice.wav filter=lfs diff=lfs merge=lfs -text
59
- voices/bf_emma.wav filter=lfs diff=lfs merge=lfs -text
60
- voices/bf_isabella.wav filter=lfs diff=lfs merge=lfs -text
61
- voices/bf_lily.wav filter=lfs diff=lfs merge=lfs -text
62
- voices/bm_daniel.wav filter=lfs diff=lfs merge=lfs -text
63
- voices/bm_fable.wav filter=lfs diff=lfs merge=lfs -text
64
- voices/bm_george.wav filter=lfs diff=lfs merge=lfs -text
65
- voices/bm_lewis.wav filter=lfs diff=lfs merge=lfs -text
66
- voices/ef_dora.wav filter=lfs diff=lfs merge=lfs -text
67
- voices/em_alex.wav filter=lfs diff=lfs merge=lfs -text
68
- voices/em_santa.wav filter=lfs diff=lfs merge=lfs -text
69
- voices/ff_siwis.wav filter=lfs diff=lfs merge=lfs -text
70
- voices/hf_alpha.wav filter=lfs diff=lfs merge=lfs -text
71
- voices/hf_beta.wav filter=lfs diff=lfs merge=lfs -text
72
- voices/hm_omega.wav filter=lfs diff=lfs merge=lfs -text
73
- voices/hm_psi.wav filter=lfs diff=lfs merge=lfs -text
74
- voices/if_sara.wav filter=lfs diff=lfs merge=lfs -text
75
- voices/im_nicola.wav filter=lfs diff=lfs merge=lfs -text
76
- voices/jf_alpha.wav filter=lfs diff=lfs merge=lfs -text
77
- voices/jf_gongitsune.wav filter=lfs diff=lfs merge=lfs -text
78
- voices/jf_nezumi.wav filter=lfs diff=lfs merge=lfs -text
79
- voices/jf_tebukuro.wav filter=lfs diff=lfs merge=lfs -text
80
- voices/jm_kumo.wav filter=lfs diff=lfs merge=lfs -text
81
- voices/pf_dora.wav filter=lfs diff=lfs merge=lfs -text
82
- voices/pm_alex.wav filter=lfs diff=lfs merge=lfs -text
83
- voices/pm_santa.wav filter=lfs diff=lfs merge=lfs -text
84
- voices/zf_xiaobei.wav filter=lfs diff=lfs merge=lfs -text
85
- voices/zf_xiaoni.wav filter=lfs diff=lfs merge=lfs -text
86
- voices/zf_xiaoxiao.wav filter=lfs diff=lfs merge=lfs -text
87
- voices/zf_xiaoyi.wav filter=lfs diff=lfs merge=lfs -text
88
- voices/zm_yunjian.wav filter=lfs diff=lfs merge=lfs -text
89
- voices/zm_yunxi.wav filter=lfs diff=lfs merge=lfs -text
90
- voices/zm_yunxia.wav filter=lfs diff=lfs merge=lfs -text
91
- voices/zm_yunyang.wav filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13-slim
2
+
3
+ RUN apt-get update && apt-get install -y --no-install-recommends \
4
+ ffmpeg \
5
+ && rm -rf /var/lib/apt/lists/*
6
+
7
+ RUN pip install --no-cache-dir \
8
+ pocket-tts \
9
+ soundfile \
10
+ numpy \
11
+ fastapi \
12
+ uvicorn
13
+
14
+ WORKDIR /app
15
+
16
+ COPY server.py /app/server.py
17
+
18
+ EXPOSE 7860
19
+
20
+ HEALTHCHECK --interval=30s --timeout=5s --start-period=120s \
21
+ CMD curl -fsS http://localhost:7860/health || exit 1
22
+
23
+ CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,77 +1,15 @@
1
  ---
2
- title: Pocket-TTS 100M
3
  emoji: 🔊
4
  colorFrom: green
5
  colorTo: blue
6
- sdk: gradio
7
- sdk_version: 6.2.0
8
- app_file: app.py
9
- pinned: true
10
- license: apache-2.0
11
- short_description: High quality, efficient voice cloning. Just 100M parameters.
12
  ---
13
 
14
- # Pocket-TTS
15
 
16
- A lightweight text-to-speech application built with [kyutai/pocket-tts](https://huggingface.co/kyutai/pocket-tts) and Gradio.
17
-
18
- ## Features
19
-
20
- - **Fast CPU inference** — ~6x faster than real-time on modern CPUs
21
- - **Low latency** — ~200ms to first audio chunk
22
- - **Streaming output** — Audio plays as it generates
23
- - **Voice cloning** — Use custom voice samples (MP3, WAV, FLAC, etc.)
24
- - **Pre-computed embeddings** — Voices work without voice cloning auth on HF Spaces
25
-
26
- ## Quick Start
27
-
28
- ```bash
29
- pip install -r requirements.txt
30
- python app.py
31
- ```
32
-
33
- Open http://127.0.0.1:7860 in your browser.
34
-
35
- ## Adding Custom Voices
36
-
37
- 1. Drop audio files (MP3, WAV, etc.) into the `voices/` directory
38
- 2. Restart the app
39
- 3. Embeddings are created automatically on first boot (requires HF auth locally)
40
- 4. Once created, embeddings are saved to `embeddings/` and work without auth
41
-
42
- ### Structure
43
-
44
- ```
45
- Pocket-TTS/
46
- ├── app.py
47
- ├── requirements.txt
48
- ├── voices/ # Your custom voice audio files
49
- │ └── my_voice.mp3
50
- └── embeddings/ # Auto-generated (commit these for HF Spaces)
51
- └── my_voice.safetensors
52
- ```
53
-
54
- ## HuggingFace Spaces Deployment
55
-
56
- **Option 1: Pre-commit embeddings (no auth needed on Space)**
57
-
58
- 1. Run the app locally first (with HF auth) to generate embeddings
59
- 2. Commit both `voices/` and `embeddings/` directories
60
- 3. The Space will use pre-computed embeddings
61
-
62
- **Option 2: Auto-create embeddings on Space (requires valid token)**
63
-
64
- 1. Accept terms at https://huggingface.co/kyutai/pocket-tts
65
- 2. Add `HF_TOKEN` secret in Space settings (must be a valid token)
66
- 3. Embeddings are created automatically on first boot
67
-
68
- ## Model Info
69
-
70
- - **Model**: [kyutai/pocket-tts](https://huggingface.co/kyutai/pocket-tts)
71
- - **Parameters**: 100M
72
- - **Language**: English only
73
- - **Sample rate**: 24kHz
74
-
75
- ## License
76
-
77
- See the [kyutai/pocket-tts](https://huggingface.co/kyutai/pocket-tts) model card for licensing information.
 
1
  ---
2
+ title: Pocket-TTS API
3
  emoji: 🔊
4
  colorFrom: green
5
  colorTo: blue
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ license: cc-by-4.0
 
 
10
  ---
11
 
12
+ # Pocket-TTS API
13
 
14
+ FastAPI server running kyutai/pocket-tts with direct WAV/OGG output.
15
+ No Gradio — just clean API endpoints for TTS generation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py DELETED
@@ -1,561 +0,0 @@
1
- import gradio as gr
2
- import io
3
- import wave
4
- import numpy as np
5
-
6
- # Lazy imports for optional dependencies
7
- try:
8
- import torch # type: ignore
9
- except Exception: # pragma: no cover
10
- torch = None # type: ignore
11
-
12
- try:
13
- from pocket_tts import TTSModel # type: ignore
14
- except Exception: # pragma: no cover
15
- TTSModel = None # type: ignore
16
-
17
- # Global state for lazy initialization
18
- _POCKET_STATE = {
19
- "initialized": False,
20
- "model": None,
21
- "voice_states": {},
22
- "sample_rate": 24000,
23
- }
24
-
25
- def _get_available_voices() -> dict[str, str]:
26
- """Get available voices from the local ./voices/ directory.
27
-
28
- Scans ./voices/ directory for audio files (WAV, MP3, etc.)
29
- """
30
- import os
31
-
32
- voices_dir = os.path.join(os.path.dirname(__file__), "voices")
33
- local_voices = {}
34
-
35
- if os.path.exists(voices_dir):
36
- for f in os.listdir(voices_dir):
37
- # Support common audio formats
38
- if f.lower().endswith(('.wav', '.mp3', '.flac', '.ogg', '.m4a')):
39
- voice_name = os.path.splitext(f)[0]
40
- local_voices[voice_name] = os.path.join(voices_dir, f)
41
-
42
- if local_voices:
43
- print(f"Found {len(local_voices)} local voice(s): {list(local_voices.keys())}")
44
- else:
45
- print("WARNING: No voices found in voices/ directory")
46
-
47
- return local_voices
48
-
49
-
50
- # Scan voices at import time
51
- PRESET_VOICES = _get_available_voices()
52
-
53
-
54
- def _init_pocket(
55
- temp: float = 0.7,
56
- lsd_decode_steps: int = 1,
57
- noise_clamp: float | None = None,
58
- eos_threshold: float = -4.0,
59
- ) -> None:
60
- """Lazy initialization of the Pocket TTS model."""
61
- if _POCKET_STATE["initialized"]:
62
- return
63
-
64
- if TTSModel is None:
65
- raise gr.Error(
66
- "pocket-tts is not installed. Please install with: pip install pocket-tts"
67
- )
68
-
69
- if torch is None:
70
- raise gr.Error("PyTorch is not installed. Please install torch>=2.5.0")
71
-
72
- print("Initializing Pocket TTS...")
73
-
74
- # Log in to HuggingFace if token is available (enables voice cloning on Spaces)
75
- import os
76
- hf_token = os.environ.get("HF_TOKEN")
77
- if hf_token:
78
- print("HF_TOKEN found, using for authentication")
79
-
80
-
81
- # Auto-detect device: CPU by default, CUDA if available
82
- # Note: The pocket-tts docs mention GPU doesn't provide speedup for this model
83
- device = "cuda" if torch.cuda.is_available() else "cpu"
84
- print(f"Using device: {device}")
85
-
86
- try:
87
- model = TTSModel.load_model(
88
- temp=float(temp),
89
- lsd_decode_steps=int(lsd_decode_steps),
90
- noise_clamp=float(noise_clamp) if noise_clamp is not None else None,
91
- eos_threshold=float(eos_threshold),
92
- )
93
- _POCKET_STATE.update({
94
- "initialized": True,
95
- "model": model,
96
- "sample_rate": model.sample_rate,
97
- })
98
- print(f"Pocket TTS initialized. Sample rate: {model.sample_rate} Hz")
99
-
100
- # Auto-create missing embeddings if voice cloning is available
101
- if model.has_voice_cloning:
102
- _create_missing_embeddings(model)
103
- else:
104
- print("Voice cloning not available - using pre-computed embeddings only")
105
-
106
- except Exception as e:
107
- raise gr.Error(f"Failed to initialize Pocket TTS model: {str(e)}")
108
-
109
-
110
- def _create_missing_embeddings(model) -> None:
111
- """Create embeddings for any voices that have audio files but no embedding."""
112
- import os
113
- from pocket_tts.data.audio import audio_read
114
- from pocket_tts.data.audio_utils import convert_audio
115
- import safetensors.torch
116
-
117
- voices_dir = os.path.join(os.path.dirname(__file__), "voices")
118
- embeddings_dir = os.path.join(os.path.dirname(__file__), "embeddings")
119
-
120
- if not os.path.exists(voices_dir):
121
- return
122
-
123
- os.makedirs(embeddings_dir, exist_ok=True)
124
-
125
- audio_extensions = ('.wav', '.mp3', '.flac', '.ogg', '.m4a')
126
-
127
- for voice_name, voice_path in PRESET_VOICES.items():
128
- embedding_path = os.path.join(embeddings_dir, f"{voice_name}.safetensors")
129
-
130
- # Skip if embedding already exists or no local file
131
- if os.path.exists(embedding_path) or voice_path is None:
132
- continue
133
-
134
- # Skip fallback HuggingFace voices
135
- if voice_path.startswith("hf://"):
136
- continue
137
-
138
- print(f"Creating embedding for '{voice_name}'...")
139
-
140
- try:
141
- # Convert to WAV if needed
142
- audio_path = voice_path
143
- if not voice_path.lower().endswith('.wav'):
144
- from pydub import AudioSegment
145
- import tempfile
146
- audio = AudioSegment.from_file(voice_path)
147
- temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
148
- audio.export(temp_wav.name, format='wav')
149
- audio_path = temp_wav.name
150
-
151
- # Read and encode audio
152
- audio, sr = audio_read(audio_path)
153
- audio_tensor = convert_audio(audio, sr, model.config.mimi.sample_rate, 1)
154
-
155
- with torch.no_grad():
156
- audio_prompt = model._encode_audio(audio_tensor.unsqueeze(0).to(model.device))
157
-
158
- # Save embedding
159
- safetensors.torch.save_file(
160
- {"audio_prompt": audio_prompt.cpu()},
161
- embedding_path
162
- )
163
- print(f" Saved: {embedding_path}")
164
-
165
- except Exception as e:
166
- print(f" Error creating embedding for {voice_name}: {e}")
167
-
168
-
169
- def _convert_to_wav(audio_path: str) -> str:
170
- """Convert audio file to WAV format if needed.
171
-
172
- Returns the path to a WAV file (original if already WAV, or converted temp file).
173
- Uses pydub for MP3 (requires ffmpeg), soundfile for other formats.
174
- """
175
- import tempfile
176
-
177
- # Check if already WAV
178
- if audio_path.lower().endswith('.wav'):
179
- return audio_path
180
-
181
- print(f"Converting {audio_path} to WAV format...")
182
-
183
- # Create temp file path
184
- import os
185
- tmp_fd, wav_path = tempfile.mkstemp(suffix=".wav")
186
- os.close(tmp_fd)
187
-
188
- # Try pydub first (better MP3 support via ffmpeg)
189
- try:
190
- from pydub import AudioSegment
191
- audio = AudioSegment.from_file(audio_path)
192
- audio.export(wav_path, format="wav")
193
- print(f"Converted via pydub to: {wav_path}")
194
- return wav_path
195
- except ImportError:
196
- pass # pydub not installed, try soundfile
197
- except Exception as e:
198
- print(f"pydub conversion failed: {e}, trying soundfile...")
199
-
200
- # Fall back to soundfile
201
- try:
202
- import soundfile as sf
203
- audio_data, sample_rate = sf.read(audio_path)
204
- sf.write(wav_path, audio_data, sample_rate)
205
- print(f"Converted via soundfile to: {wav_path}")
206
- return wav_path
207
- except Exception as e:
208
- raise gr.Error(f"Failed to convert audio file: {str(e)}. Please upload a WAV file directly or install pydub+ffmpeg for MP3 support.")
209
-
210
-
211
- def _get_voice_state(voice_name: str | None, custom_audio_path: str | None):
212
- """Get or create voice state for generation.
213
-
214
- Args:
215
- voice_name: Name of preset voice (alba, marius, etc.)
216
- custom_audio_path: Path to custom audio file for voice cloning
217
-
218
- Returns:
219
- Voice state dict for the model
220
- """
221
- model = _POCKET_STATE["model"]
222
-
223
- # Custom audio takes priority
224
- if custom_audio_path:
225
- print(f"Loading custom voice from: {custom_audio_path}")
226
- # Convert to WAV if needed
227
- wav_path = _convert_to_wav(custom_audio_path)
228
- return model.get_state_for_audio_prompt(wav_path)
229
-
230
- # Use preset voice
231
- if not voice_name or voice_name not in PRESET_VOICES:
232
- # Default to first available voice
233
- voice_name = list(PRESET_VOICES.keys())[0] if PRESET_VOICES else None
234
- if not voice_name:
235
- raise gr.Error("No voices available. Add audio files to the voices/ directory.")
236
-
237
- # Check cache
238
- if voice_name in _POCKET_STATE["voice_states"]:
239
- return _POCKET_STATE["voice_states"][voice_name]
240
-
241
- # Check for pre-computed embedding first (no voice cloning needed)
242
- import os
243
- embeddings_dir = os.path.join(os.path.dirname(__file__), "embeddings")
244
- embedding_path = os.path.join(embeddings_dir, f"{voice_name}.safetensors")
245
-
246
- if os.path.exists(embedding_path):
247
- print(f"Loading pre-computed embedding for '{voice_name}' from: {embedding_path}")
248
- import safetensors.torch
249
- from pocket_tts.modules.stateful_module import init_states
250
-
251
- # Load the audio prompt embedding
252
- state_dict = safetensors.torch.load_file(embedding_path)
253
- audio_prompt = state_dict["audio_prompt"].to(model.device)
254
-
255
- # Create fresh model state and condition it with the audio prompt
256
- # (same logic as model.get_state_for_audio_prompt uses internally)
257
- voice_state = init_states(model.flow_lm, batch_size=1, sequence_length=1000)
258
- model._run_flow_lm_and_increment_step(model_state=voice_state, audio_conditioning=audio_prompt)
259
-
260
- # Detach all tensors to make them leaf tensors (required for deepcopy)
261
- def detach_tensors(obj):
262
- if isinstance(obj, torch.Tensor):
263
- return obj.detach().clone()
264
- elif isinstance(obj, dict):
265
- return {k: detach_tensors(v) for k, v in obj.items()}
266
- else:
267
- return obj
268
-
269
- voice_state = detach_tensors(voice_state)
270
-
271
- _POCKET_STATE["voice_states"][voice_name] = voice_state
272
- return voice_state
273
-
274
- # Fall back to voice cloning (requires auth)
275
- if not model.has_voice_cloning:
276
- raise gr.Error(
277
- f"No embedding found for voice '{voice_name}'. "
278
- f"Voice cloning is not available (requires HF auth). "
279
- f"Run the app locally first to create embeddings."
280
- )
281
-
282
- voice_path = PRESET_VOICES[voice_name]
283
- print(f"Loading preset voice '{voice_name}' from: {voice_path}")
284
-
285
- # Convert to WAV if needed (local files may be MP3, etc.)
286
- wav_path = _convert_to_wav(voice_path)
287
- voice_state = model.get_state_for_audio_prompt(wav_path)
288
- _POCKET_STATE["voice_states"][voice_name] = voice_state
289
- return voice_state
290
-
291
-
292
- def _audio_np_to_int16(audio_np: np.ndarray) -> np.ndarray:
293
- """Convert float audio array to int16."""
294
- audio_clipped = np.clip(audio_np, -1.0, 1.0)
295
- return (audio_clipped * 32767.0).astype(np.int16)
296
-
297
-
298
- def _wav_bytes_from_int16(audio_int16: np.ndarray, sample_rate: int) -> bytes:
299
- """Create WAV bytes from int16 audio array."""
300
- buffer = io.BytesIO()
301
- with wave.open(buffer, "wb") as wf:
302
- wf.setnchannels(1)
303
- wf.setsampwidth(2)
304
- wf.setframerate(sample_rate)
305
- wf.writeframes(audio_int16.tobytes())
306
- return buffer.getvalue()
307
-
308
-
309
- def _split_into_sentences(text: str) -> list[str]:
310
- """Split text into sentences for chunk-by-chunk generation.
311
-
312
- Uses simple punctuation-based splitting for natural speech chunks.
313
- """
314
- import re
315
- # Split on sentence-ending punctuation, keeping the punctuation
316
- # Handle common patterns: . ! ? and combinations like "..." or "?!"
317
- sentences = re.split(r'(?<=[.!?])\s+', text.strip())
318
- # Filter out empty strings and strip whitespace
319
- return [s.strip() for s in sentences if s.strip()]
320
-
321
-
322
- def pocket_tts_stream(
323
- text: str,
324
- voice: str,
325
- custom_audio,
326
- temperature: float,
327
- lsd_decode_steps: int,
328
- noise_clamp: float | None,
329
- eos_threshold: float,
330
- frames_after_eos: int,
331
- ):
332
- """Generate speech with sentence-level streaming.
333
-
334
- Splits text into sentences and yields complete audio for each sentence,
335
- matching Kokoro's smooth streaming pattern.
336
- """
337
- if not text or not text.strip():
338
- raise gr.Error("Please enter text to synthesize.")
339
-
340
- # Initialize model with current parameters
341
- _init_pocket(
342
- temp=temperature,
343
- lsd_decode_steps=lsd_decode_steps,
344
- noise_clamp=noise_clamp if noise_clamp and noise_clamp > 0 else None,
345
- eos_threshold=eos_threshold,
346
- )
347
-
348
- model = _POCKET_STATE["model"]
349
- sample_rate = _POCKET_STATE["sample_rate"]
350
-
351
- # Get voice state
352
- custom_path = custom_audio if custom_audio else None
353
- voice_state = _get_voice_state(voice, custom_path)
354
-
355
- # Split text into sentences for natural chunking
356
- sentences = _split_into_sentences(text)
357
- if not sentences:
358
- raise gr.Error("No valid sentences found in text.")
359
-
360
- produced_any = False
361
-
362
- # Buffer for initial audio - wait for ~5 seconds before yielding first chunk
363
- # This prevents stuttering from short first sentences
364
- min_initial_samples = int(sample_rate * 5) # 5 seconds of audio
365
- audio_buffer = []
366
- buffer_samples = 0
367
- initial_buffer_yielded = False
368
-
369
- try:
370
- for idx, sentence in enumerate(sentences):
371
- # Generate complete audio for this sentence (non-streaming per sentence)
372
- audio = model.generate_audio(
373
- voice_state,
374
- sentence,
375
- frames_after_eos=frames_after_eos if frames_after_eos > 0 else None,
376
- copy_state=True,
377
- )
378
- produced_any = True
379
-
380
- # Convert tensor to numpy
381
- audio_np = audio.cpu().numpy() if hasattr(audio, 'cpu') else audio
382
-
383
- if not initial_buffer_yielded:
384
- # Accumulate in buffer until we have enough audio
385
- audio_buffer.append(audio_np)
386
- buffer_samples += len(audio_np)
387
-
388
- # Check if we have enough or this is the last sentence
389
- if buffer_samples >= min_initial_samples or idx == len(sentences) - 1:
390
- # Yield the accumulated buffer
391
- combined = np.concatenate(audio_buffer, axis=0)
392
- audio_int16 = _audio_np_to_int16(combined)
393
- yield _wav_bytes_from_int16(audio_int16, sample_rate)
394
- audio_buffer = []
395
- buffer_samples = 0
396
- initial_buffer_yielded = True
397
- else:
398
- # After initial buffer, yield each sentence immediately
399
- audio_int16 = _audio_np_to_int16(audio_np)
400
- yield _wav_bytes_from_int16(audio_int16, sample_rate)
401
-
402
- except gr.Error:
403
- raise
404
- except Exception as e:
405
- raise gr.Error(f"Error during speech generation: {str(e)[:200]}...")
406
-
407
- if not produced_any:
408
- raise gr.Error("No audio was generated.")
409
-
410
-
411
- def generate_tts(
412
- text: str,
413
- voice: str,
414
- custom_audio,
415
- temperature: float,
416
- lsd_decode_steps: int,
417
- noise_clamp: float,
418
- eos_threshold: float,
419
- frames_after_eos: int,
420
- ):
421
- """Main streaming dispatcher for Pocket TTS."""
422
- yield from pocket_tts_stream(
423
- text,
424
- voice,
425
- custom_audio,
426
- temperature,
427
- lsd_decode_steps,
428
- noise_clamp,
429
- eos_threshold,
430
- frames_after_eos,
431
- )
432
-
433
-
434
- # --- Gradio UI ---
435
- with gr.Blocks() as demo:
436
- gr.HTML(
437
- "<h1 style='text-align: center;'>Pocket-TTS</h1>"
438
- )
439
- device_info = gr.Markdown(
440
- "<p style='text-align: center;'>Powered by kyutai/pocket-tts | Running on CPU | Voices cloned from Kokoro-82M</p>"
441
- )
442
-
443
- def update_device_info():
444
- device = "CUDA" if torch.cuda.is_available() else "CPU"
445
- return f"<p style='text-align: center;'>Powered by kyutai/pocket-tts | Running on {device} | Voices cloned from Kokoro-82M</p>"
446
-
447
- demo.load(update_device_info, outputs=device_info)
448
-
449
- with gr.Row():
450
- with gr.Column():
451
- # Text input
452
- text_input = gr.Textbox(
453
- label="Input Text",
454
- placeholder="Enter the text you want to convert to speech here...",
455
- lines=5,
456
- value="The quick brown fox jumps over the lazy dog. I am already far north of London, and as I walk in the streets of Petersburgh, I feel a cold northern breeze play upon my cheeks, which braces my nerves and fills me with delight. Do you understand this feeling? This breeze, which has traveled from the regions towards which I am advancing, gives me a foretaste of those icy climes. Inspirited by this wind of promise, my daydreams become more fervent and vivid.",
457
- )
458
-
459
- # Voice selection
460
- with gr.Group():
461
- gr.Markdown("### Voice Selection")
462
- gr.Markdown("Select a preset voice OR upload your own WAV file for voice cloning.")
463
-
464
- voice_dropdown = gr.Dropdown(
465
- choices=list(PRESET_VOICES.keys()),
466
- label="Preset Voice",
467
- value=list(PRESET_VOICES.keys())[0] if PRESET_VOICES else None,
468
- info="Select a pre-loaded voice. Ignored if custom audio is uploaded.",
469
- )
470
-
471
- gr.Markdown("--- OR ---")
472
-
473
- ref_audio_input = gr.Audio(
474
- label="Custom Voice (WAV)",
475
- type="filepath",
476
- sources=["upload", "microphone"],
477
- )
478
-
479
- generate_btn = gr.Button(
480
- "Generate Speech",
481
- variant="primary",
482
- )
483
-
484
- with gr.Column():
485
- audio_output = gr.Audio(
486
- label="Generated Speech",
487
- streaming=True,
488
- autoplay=True,
489
- )
490
-
491
- with gr.Accordion("Advanced Options", open=False):
492
- temp_slider = gr.Slider(
493
- minimum=0.1,
494
- maximum=1.5,
495
- value=0.7,
496
- step=0.05,
497
- label="Temperature",
498
- info="Controls randomness. Higher = more varied, lower = more consistent.",
499
- )
500
- lsd_steps_slider = gr.Slider(
501
- minimum=1,
502
- maximum=10,
503
- value=1,
504
- step=1,
505
- label="LSD Decode Steps",
506
- info="Number of generation steps. Higher = potentially better quality but slower.",
507
- )
508
- noise_clamp_slider = gr.Slider(
509
- minimum=0.0,
510
- maximum=5.0,
511
- value=0.0,
512
- step=0.1,
513
- label="Noise Clamp",
514
- info="Maximum value for noise sampling. 0 = disabled.",
515
- )
516
- eos_threshold_slider = gr.Slider(
517
- minimum=-10.0,
518
- maximum=0.0,
519
- value=-4.0,
520
- step=0.5,
521
- label="EOS Threshold",
522
- info="Threshold for end-of-sequence detection. More negative = longer audio.",
523
- )
524
- frames_after_eos_slider = gr.Slider(
525
- minimum=0,
526
- maximum=10,
527
- value=2,
528
- step=1,
529
- label="Frames After EOS",
530
- info="Additional frames to generate after EOS detection.",
531
- )
532
-
533
- # Connect inputs
534
- generate_inputs = [
535
- text_input,
536
- voice_dropdown,
537
- ref_audio_input,
538
- temp_slider,
539
- lsd_steps_slider,
540
- noise_clamp_slider,
541
- eos_threshold_slider,
542
- frames_after_eos_slider,
543
- ]
544
-
545
- generate_btn.click(
546
- fn=generate_tts,
547
- inputs=generate_inputs,
548
- outputs=audio_output,
549
- api_name="generate_speech",
550
- )
551
-
552
- text_input.submit(
553
- fn=generate_tts,
554
- inputs=generate_inputs,
555
- outputs=audio_output,
556
- api_name="generate_speech_enter",
557
- )
558
-
559
-
560
- if __name__ == "__main__":
561
- demo.queue().launch(debug=True, theme="Nymbo/Nymbo_Theme")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
embeddings/af_alloy.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:572669026fb1cd8b481b242a95a8cb4edfdb5278feeb8a568aa79f9b7ecc267f
3
- size 1024088
 
 
 
 
embeddings/af_aoede.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8a046078b7cfcaaa6902a446165ee70d1a733983139b570c76e1847cd384583
3
- size 1065048
 
 
 
 
embeddings/af_bella.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:059f403b46d030c169736ecb53266fa03e37081729d33f550f6cbff34b357899
3
- size 1232984
 
 
 
 
embeddings/af_heart.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d04aea209ad32024b5d75b6dfe30fbcc627c37990fa3eebc07d0d4a5f29b0af3
3
- size 1200216
 
 
 
 
embeddings/af_jessica.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:00b3e53447a8bd74269de84b296dcb6b9b29259aa8487fe234da7c5484b3f5b9
3
- size 966744
 
 
 
 
embeddings/af_kore.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1378a15c0759a9ee205e55a096cd3078998f1e70355fc160c00508e95b89c767
3
- size 966744
 
 
 
 
embeddings/af_nicole.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ce5142cc821775688912d975f4cf542d5b9bdbf55e5ad29bbf3ba79cfdffd78
3
- size 1744984
 
 
 
 
embeddings/af_nova.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:018a792e17e00c4f645be8bdaf8072a11debcfbb30b343308d52d3113b1df63c
3
- size 1003608
 
 
 
 
embeddings/af_river.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ddb59360b3b9b9226ab8127a0b940ff30f926805f16a3f6ecc23f32a13d5b3c
3
- size 966744
 
 
 
 
embeddings/af_sarah.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:45e17b563b7b00dd51f5b96baac967fd3b25cf76a7829f6d9a21ebe98bbe8825
3
- size 1183832
 
 
 
 
embeddings/af_sky.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ac43438607590089057eebb6e9600915c42c20c0636306b2979b10f035af470
3
- size 1097816
 
 
 
 
embeddings/am_adam.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:449765eb9ca49ea72003bddc604f0623173a6115c117a388783ab7e8f6aa33ba
3
- size 1130584
 
 
 
 
embeddings/am_echo.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6f5d91eb33311ff7a2849ea9f1e6c0b034a45a438b5d59b336af5fbc115d8ea
3
- size 1048664
 
 
 
 
embeddings/am_eric.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e2d7f232cf1c0ceea5e4613a8374d05fc34c923fe0dac35fcd6764cb2b9c596
3
- size 954456
 
 
 
 
embeddings/am_fenrir.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca59775fd8f7892c0e2e4d8709a84a010200cbb197185f9a10ecfdb2ecaaca96
3
- size 974936
 
 
 
 
embeddings/am_liam.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:311df6abbb494f5f14854e38e6c23cd515d21690e9c0758557cdb80d943d4811
3
- size 987224
 
 
 
 
embeddings/am_michael.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:edc40f6a788fdf0eec5a599fdb56ff96c1af4467b96dde2763944b4ab20e6d4a
3
- size 1298520
 
 
 
 
embeddings/am_onyx.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5efd40504928b00792ab621f877ca3fb4152794184f5858d69b1cff44986c9b4
3
- size 1032280
 
 
 
 
embeddings/am_puck.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:eeaa50d7c47c4f7909dc591962d9ca5bde3745a4edcf70e35c44d2c1322f04d6
3
- size 950360
 
 
 
 
embeddings/am_santa.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a86a63a50726137babe9fc020d66ba2846466e2dbb5b85f86ba9834919c12a2
3
- size 1130584
 
 
 
 
embeddings/bf_alice.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3cdec6bced8b9fc936627f8b8adbdda485fc406129d49ddebf7b499fb617ed8
3
- size 1036376
 
 
 
 
embeddings/bf_emma.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3981b27e38c315852b150b8260d3c5684b3b9dff21442e5f44391d9f7b9ee87
3
- size 1118296
 
 
 
 
embeddings/bf_isabella.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:96841e01a619c7b5ac8cda1346360ec14c531f99dfcb075af01b2f344fbd197b
3
- size 1142872
 
 
 
 
embeddings/bf_lily.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:832fb738e6c7c738804039622fc4536324d0c67b0e45c6cb069a21d6daf17ecd
3
- size 1040472
 
 
 
 
embeddings/bm_daniel.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:acf9f76daf96aca2afca0f39c0da2abab743f1a7e130ca7afcac54bf0500e4e8
3
- size 991320
 
 
 
 
embeddings/bm_fable.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:57a18730aa37704fa60e91875818bb795d2d52babe0043aea18a17d1948f45f4
3
- size 1032280
 
 
 
 
embeddings/bm_george.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:40ebf97b0c4d01d53765299d8c73e386d642bc8daaeeede68fd6fa45b08dd577
3
- size 1204312
 
 
 
 
embeddings/bm_lewis.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7c105f1f53990e5e44a52f82b1ea32808764fd09fc965a34dfbf9534cc39291
3
- size 1249368
 
 
 
 
embeddings/ef_dora.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:93b92dc9cd582d1d15774ec0078657b40b8b32476cecbc2da41bfed39e92c548
3
- size 987224
 
 
 
 
embeddings/em_alex.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cf443bc7afe9c71306f237c7f59e3505890fff54367384a406457fed4438753
3
- size 983128
 
 
 
 
embeddings/em_santa.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ad92648bb161cc10f22b08659c2d666f1079d53033c35e37de47faa2951b410
3
- size 979032
 
 
 
 
embeddings/ff_siwis.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1b6b642066be53aada0cc291bb759849546c17611c218f1eb8553e6ca7d0dc8
3
- size 1003608
 
 
 
 
embeddings/hf_alpha.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e635b14ed19c5b7e6723f0d9ac07fbb9c910cee348e9d6e69b961a5feccaf6cc
3
- size 1167448
 
 
 
 
embeddings/hf_beta.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:129dd75d24b738cdfda2c88346cfd88bacf3f8ade8e62787389eb27bb9bbd98e
3
- size 1060952
 
 
 
 
embeddings/hm_omega.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:70f03a08531795602e4001d0ac1f9b5cca532b99bcc83045b8fd7819b0c819d7
3
- size 1187928
 
 
 
 
embeddings/hm_psi.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:887fb58bb184613bdecf0aa797fb7182d1ba84b0f04364f067af1536fee8f360
3
- size 1175640
 
 
 
 
embeddings/if_sara.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2213c4147ad244c4c8254d8c5934bd402168ff191102f9b736d81084dbc04203
3
- size 938072
 
 
 
 
embeddings/im_nicola.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b216337c4619840943748e09106b276066982854491f3279a8e6c3f5a580eb74
3
- size 983128
 
 
 
 
embeddings/jf_alpha.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:88b39593e76a67ade6b627554880fee3d39a62e599becee9bd0b14f32a78ba90
3
- size 1167448
 
 
 
 
embeddings/jf_gongitsune.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2cedec7543084061fb5ddb5cc0bb25b1f2a4ec67aef9b7607713c8d12814909
3
- size 1400920
 
 
 
 
embeddings/jf_nezumi.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e82edf8b8d1bc789bfa0736ca24a4f548f846bc33675add7435c54026db166c7
3
- size 1310808
 
 
 
 
embeddings/jf_tebukuro.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2d8181dd643d12d1f5ce51489e9989d71cbe44a7b4c852ad32c89587e54c909
3
- size 1327192
 
 
 
 
embeddings/jm_kumo.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a437c5edb13de21872e6e0997f63b4b1366ddfc7576e22f63f072dfb2a72bb4
3
- size 1212504
 
 
 
 
embeddings/pf_dora.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ab9a7cb3340b6b1780e9ca896f645165fec634051a2a095fefadb639382a672
3
- size 1003608
 
 
 
 
embeddings/pm_alex.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bafee905cfa5dfdd1281a77c47dbe1932639dea2a0d3685b9b539ac9370e4cc3
3
- size 1003608