Numberblocks1Voice

Sleeping

App Files Files Community

ayf3 commited on Apr 2

Commit

6620877

verified ·

1 Parent(s): 5749c4e

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +75 -58

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-NumberBlocks One Voice Cloner - V5 Fixed
-Pinned gradio version to avoid jinja2 schema bugs
 """
 import os
@@ -17,8 +17,10 @@ from pathlib import Path
 from huggingface_hub import hf_hub_download, HfApi
 import gradio as gr
 # ============================================================
-# 模型定义 - VITS-like RVC Model
 # ============================================================
 class PosteriorEncoder(nn.Module):
@@ -125,7 +127,6 @@ class Decoder(nn.Module):
 class RVCModel(nn.Module):
-    """VITS-like RVC v3.0 Model"""
     def __init__(self, n_mels=80, hidden_channels=192):
         super().__init__()
         self.enc_p = PosteriorEncoder(n_mels, hidden_channels)
@@ -240,22 +241,24 @@ def mel_to_audio_griffinlim(mel, sample_rate=40000, n_fft=1024, hop_length=256,
 # ============================================================
-# Inference Engine
 # ============================================================
 class VoiceCloner:
     def __init__(self):
         self.device = torch.device('cpu')
         self.rvc_model = None
-        self.hifigan = None
         self.sample_rate = 40000
         self.dataset_id = "ayf3/numberblocks-one-voice-dataset"
         self.model_loaded = False
-        self.samples = []
-        self.load_models()
-    def load_models(self):
-        print("Loading RVC model...")
         try:
             model_path = hf_hub_download(
                 repo_id=self.dataset_id, filename="models/one_voice_rvc_v2.pth", repo_type="dataset"
@@ -270,13 +273,21 @@ class VoiceCloner:
             self.rvc_model = RVCModel(n_mels=80, hidden_channels=hidden_ch)
             self.rvc_model.load_state_dict(state_dict, strict=False)
             self.rvc_model.eval()
-            print(f"RVC model loaded (hidden={hidden_ch})")
         except Exception as e:
-            print(f"RVC model load failed: {e}")
-        print("Loading HiFi-GAN vocoder...")
         try:
-            hifigan_path = hf_hub_download(repo_id="jik876/hifi-gan", filename="UNIVERSAL_V1/g_02500000")
             ckpt = torch.load(hifigan_path, map_location='cpu', weights_only=False)
             state_dict = ckpt.get('generator', ckpt.get('state_dict', ckpt))
             if any(k.startswith('generator.') for k in state_dict):
@@ -284,28 +295,35 @@ class VoiceCloner:
             self.hifigan = HiFiGANGenerator()
             self.hifigan.load_state_dict(state_dict, strict=False)
             self.hifigan.eval()
-            print("HiFi-GAN vocoder loaded")
         except Exception as e:
-            print(f"HiFi-GAN load failed: {e}, using Griffin-Lim fallback")
             self.hifigan = None
         try:
             api = HfApi()
             files = api.list_repo_files(self.dataset_id, repo_type="dataset")
             self.samples = [f for f in files if f.startswith('models/top_')
                            and f.endswith('.wav')
                            and '_p+' not in f and '_p-' not in f and '_s+' not in f]
-            print(f"Found {len(self.samples)} sample audio files")
         except Exception as e:
-            print(f"Could not list samples: {e}")
-        self.model_loaded = self.rvc_model is not None
     def process_audio(self, input_audio, pitch_shift=0):
         if not self.model_loaded:
-            return None, "Model not loaded"
         if input_audio is None:
-            return None, "Please upload an audio file"
         try:
             y, sr = torchaudio.load(input_audio)
             if y.shape[0] > 1:
@@ -356,13 +374,14 @@ class VoiceCloner:
             audio_out = audio_out / (np.max(np.abs(audio_out)) + 1e-7) * 0.95
             output_path = tempfile.mktemp(suffix='.wav')
             sf.write(output_path, audio_out, self.sample_rate)
-            return output_path, f"Success ({vocoder_name}) | {len(y)/sr:.1f}s -> {len(audio_out)/self.sample_rate:.1f}s"
         except Exception as e:
             import traceback
             traceback.print_exc()
-            return None, f"Error: {str(e)}"
     def generate_random(self):
         if not self.samples:
             return None, "No samples available"
         try:
@@ -373,47 +392,45 @@ class VoiceCloner:
                 return output, f"{msg}\nSample: {Path(sample).name}"
             return output, msg
         except Exception as e:
-            return None, f"Error: {str(e)}"
 # ============================================================
 # Gradio UI
 # ============================================================
-print("Initializing NumberBlocks One Voice Cloner...")
 cloner = VoiceCloner()
-vc_interface = gr.Interface(
-    fn=cloner.process_audio,
-    inputs=[
-        gr.Audio(label="Upload Audio", type="filepath"),
-        gr.Slider(minimum=-12, maximum=12, value=0, step=1, label="Pitch Shift (semitones)"),
-    ],
-    outputs=[
-        gr.Audio(label="Result", type="filepath"),
-        gr.Textbox(label="Status"),
-    ],
-    title="NumberBlocks One Voice Cloner",
-    description="RVC v2 Model (60.7MB) + HiFi-GAN Vocoder | Upload audio to convert to One's voice",
-    allow_flagging="never",
-)
-rand_interface = gr.Interface(
-    fn=cloner.generate_random,
-    inputs=[],
-    outputs=[
-        gr.Audio(label="Result", type="filepath"),
-        gr.Textbox(label="Status"),
-    ],
-    title="Random Sample Generation",
-    description="Generate from random dataset sample + RVC conversion",
-    allow_flagging="never",
-)
-demo = gr.TabbedInterface(
-    [vc_interface, rand_interface],
-    ["Voice Conversion", "Random Sample"],
-)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 #!/usr/bin/env python3
 """
+NumberBlocks One Voice Cloner - V6 Robust
+Fixes: user creation in Dockerfile, lazy-load HiFi-GAN, startup-timeout protection
 """
 import os
 from huggingface_hub import hf_hub_download, HfApi
 import gradio as gr
+print("=== NumberBlocks One Voice Cloner V6 ===")
 # ============================================================
+# Model Definitions
 # ============================================================
 class PosteriorEncoder(nn.Module):
 class RVCModel(nn.Module):
     def __init__(self, n_mels=80, hidden_channels=192):
         super().__init__()
         self.enc_p = PosteriorEncoder(n_mels, hidden_channels)
 # ============================================================
+# Inference Engine - with lazy loading
 # ============================================================
 class VoiceCloner:
     def __init__(self):
         self.device = torch.device('cpu')
         self.rvc_model = None
+        self.hifigan = None  # lazy loaded
+        self._hifigan_loaded = False
         self.sample_rate = 40000
         self.dataset_id = "ayf3/numberblocks-one-voice-dataset"
         self.model_loaded = False
+        self.samples = None  # lazy loaded
+        self._load_rvc_only()
+    def _load_rvc_only(self):
+        """Load only the RVC model at startup (fast)"""
+        print("[STARTUP] Loading RVC model...")
         try:
             model_path = hf_hub_download(
                 repo_id=self.dataset_id, filename="models/one_voice_rvc_v2.pth", repo_type="dataset"
             self.rvc_model = RVCModel(n_mels=80, hidden_channels=hidden_ch)
             self.rvc_model.load_state_dict(state_dict, strict=False)
             self.rvc_model.eval()
+            self.model_loaded = True
+            print(f"[STARTUP] RVC model loaded OK (hidden={hidden_ch})")
         except Exception as e:
+            print(f"[STARTUP] RVC model load FAILED: {e}")
+    def _ensure_hifigan(self):
+        """Lazy-load HiFi-GAN on first inference request"""
+        if self._hifigan_loaded:
+            return
+        self._hifigan_loaded = True
+        print("[LAZY] Loading HiFi-GAN vocoder...")
         try:
+            hifigan_path = hf_hub_download(
+                repo_id="jik876/hifi-gan", filename="UNIVERSAL_V1/g_02500000"
+            )
             ckpt = torch.load(hifigan_path, map_location='cpu', weights_only=False)
             state_dict = ckpt.get('generator', ckpt.get('state_dict', ckpt))
             if any(k.startswith('generator.') for k in state_dict):
             self.hifigan = HiFiGANGenerator()
             self.hifigan.load_state_dict(state_dict, strict=False)
             self.hifigan.eval()
+            print("[LAZY] HiFi-GAN loaded OK")
         except Exception as e:
+            print(f"[LAZY] HiFi-GAN FAILED (Griffin-Lim fallback): {e}")
             self.hifigan = None
+    def _ensure_samples(self):
+        """Lazy-load sample list"""
+        if self.samples is not None:
+            return
+        self.samples = []
         try:
             api = HfApi()
             files = api.list_repo_files(self.dataset_id, repo_type="dataset")
             self.samples = [f for f in files if f.startswith('models/top_')
                            and f.endswith('.wav')
                            and '_p+' not in f and '_p-' not in f and '_s+' not in f]
+            print(f"[LAZY] Found {len(self.samples)} samples")
         except Exception as e:
+            print(f"[LAZY] Could not list samples: {e}")
     def process_audio(self, input_audio, pitch_shift=0):
         if not self.model_loaded:
+            return None, "Model not loaded. Check logs."
         if input_audio is None:
+            return None, "Please upload an audio file."
+        # Lazy load vocoder on first real request
+        self._ensure_hifigan()
         try:
             y, sr = torchaudio.load(input_audio)
             if y.shape[0] > 1:
             audio_out = audio_out / (np.max(np.abs(audio_out)) + 1e-7) * 0.95
             output_path = tempfile.mktemp(suffix='.wav')
             sf.write(output_path, audio_out, self.sample_rate)
+            return output_path, f"✅ {vocoder_name} | {len(y)/sr:.1f}s → {len(audio_out)/self.sample_rate:.1f}s"
         except Exception as e:
             import traceback
             traceback.print_exc()
+            return None, f"❌ Error: {str(e)}"
     def generate_random(self):
+        self._ensure_samples()
         if not self.samples:
             return None, "No samples available"
         try:
                 return output, f"{msg}\nSample: {Path(sample).name}"
             return output, msg
         except Exception as e:
+            return None, f"❌ Error: {str(e)}"
 # ============================================================
 # Gradio UI
 # ============================================================
+print("[STARTUP] Creating VoiceCloner (RVC only, HiFi-GAN lazy)...")
 cloner = VoiceCloner()
+print(f"[STARTUP] Ready. model_loaded={cloner.model_loaded}")
+demo = gr.Blocks(title="NumberBlocks One Voice Cloner")
+with demo:
+    gr.Markdown("# 🎤 NumberBlocks One Voice Cloner")
+    gr.Markdown("RVC v2 Model (60.7MB) + HiFi-GAN Vocoder | Upload audio → convert to One's voice")
+    with gr.Tab("Voice Conversion"):
+        with gr.Row():
+            input_audio = gr.Audio(label="Upload Audio", type="filepath")
+            output_audio = gr.Audio(label="Result", type="filepath")
+        pitch_slider = gr.Slider(minimum=-12, maximum=12, value=0, step=1, label="Pitch Shift (semitones)")
+        convert_btn = gr.Button("🎤 Convert Voice", variant="primary")
+        status_text = gr.Textbox(label="Status")
+        convert_btn.click(
+            fn=cloner.process_audio,
+            inputs=[input_audio, pitch_slider],
+            outputs=[output_audio, status_text],
+        )
+    with gr.Tab("Random Sample"):
+        rand_audio = gr.Audio(label="Result", type="filepath")
+        rand_status = gr.Textbox(label="Status")
+        rand_btn = gr.Button("🎲 Generate Random", variant="primary")
+        rand_btn.click(
+            fn=cloner.generate_random,
+            inputs=[],
+            outputs=[rand_audio, rand_status],
+        )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)