Spaces:
Sleeping
Sleeping
| # app.py | |
| # Accent Detection + Regional Cuisine Recommendation (MFCC + HuBERT) | |
| import os | |
| from pathlib import Path | |
| import pickle | |
| import numpy as np | |
| import librosa | |
| import soundfile as sf # noqa: F401 | |
| from sklearn.preprocessing import StandardScaler, LabelEncoder | |
| from sklearn.linear_model import LogisticRegression # noqa: F401 | |
| import gradio as gr | |
| # Optional HuBERT imports | |
| try: | |
| import torch | |
| from transformers import Wav2Vec2FeatureExtractor, HubertModel | |
| HF_AVAILABLE = True | |
| except Exception: | |
| HF_AVAILABLE = False | |
| DEFAULT_SR = 16000 | |
| CHECKPOINT_DIR = Path("checkpoints") | |
| # --------------------------------------------------------------------- | |
| # 1. CUISINE MAP (based on your exact dishes) | |
| # --------------------------------------------------------------------- | |
| CUISINE_MAP = { | |
| "andhra_pradesh": { | |
| "region": "Andhra Pradesh", | |
| "starters_veg": [ | |
| "Pesarattu (green gram dosa)" | |
| ], | |
| "starters_nonveg": [ | |
| "Kodi Vepudu (Andhra chicken fry)", | |
| "Royyala Vepudu (prawn fry)" | |
| ], | |
| "main_veg": [ | |
| "Pulihora", | |
| "Ulava Charu with Rice" | |
| ], | |
| "main_nonveg": [ | |
| "Gongura Mutton", | |
| "Andhra Chicken Curry / Kodi Kura" | |
| ], | |
| "desserts": [ | |
| "Bobbatlu (Puran Poli)", | |
| "Pootharekulu", | |
| "Paramannam (rice kheer)" | |
| ], | |
| }, | |
| "kerala": { | |
| "region": "Kerala", | |
| "starters_veg": [ | |
| "Banana Chips" | |
| ], | |
| "starters_nonveg": [ | |
| "Erachi Fry (Beef Fry)", | |
| "Fish Cutlets" | |
| ], | |
| "main_veg": [ | |
| "Puttu & Kadala Curry", | |
| "Appam with Veg Stew", | |
| "Kerala Sadya" | |
| ], | |
| "main_nonveg": [ | |
| "Karimeen Pollichathu", | |
| "Kerala Fish Curry" | |
| ], | |
| "desserts": [ | |
| "Palada Payasam", | |
| "Ada Pradhaman", | |
| "Elaneer Payasam" | |
| ], | |
| }, | |
| "gujarat": { | |
| "region": "Gujarat", | |
| "starters_veg": [ | |
| "Dhokla", | |
| "Khandvi", | |
| "Sev Khamani" | |
| ], | |
| "starters_nonveg": [ | |
| "Local non‑veg starters (rare in traditional cuisine)" | |
| ], | |
| "main_veg": [ | |
| "Undhiyu", | |
| "Thepla", | |
| "Dal Dhokli", | |
| "Sev Tameta Nu Shaak" | |
| ], | |
| "main_nonveg": [ | |
| "Local non‑veg mains (if available)" | |
| ], | |
| "desserts": [ | |
| "Basundi", | |
| "Shrikhand", | |
| "Mohanthal" | |
| ], | |
| }, | |
| "jharkhand": { | |
| "region": "Jharkhand", | |
| "starters_veg": [ | |
| "Dhuska", | |
| "Chilka Roti", | |
| "Rugra Fry (mushroom fry)" | |
| ], | |
| "starters_nonveg": [ | |
| "Local non‑veg starters" | |
| ], | |
| "main_veg": [ | |
| "Bamboo Shoot Curry", | |
| "Kadho (local dal curry)", | |
| "Litti‑Chokha" | |
| ], | |
| "main_nonveg": [ | |
| "Local non‑veg curries", | |
| "Handia (served with meals)" | |
| ], | |
| "desserts": [ | |
| "Thekua", | |
| "Tilkut", | |
| "Malpua (Jharkhand style)" | |
| ], | |
| }, | |
| "tamil_nadu": { | |
| "region": "Tamil Nadu", | |
| "starters_veg": [ | |
| "Medu Vada", | |
| "Masala Vadai" | |
| ], | |
| "starters_nonveg": [ | |
| "Chicken 65" | |
| ], | |
| "main_veg": [ | |
| "Sambar Rice", | |
| "Pongal" | |
| ], | |
| "main_nonveg": [ | |
| "Chettinad Chicken Curry", | |
| "Kothu Parotta" | |
| ], | |
| "desserts": [ | |
| "Payasam", | |
| "Kesari", | |
| "Jigarthanda (Madurai)" | |
| ], | |
| }, | |
| "karnataka": { | |
| "region": "Karnataka", | |
| "starters_veg": [ | |
| "Maddur Vada", | |
| "Goli Baje" | |
| ], | |
| "starters_nonveg": [ | |
| "Mangalore Chicken Ghee Roast" | |
| ], | |
| "main_veg": [ | |
| "Bisi Bele Bath", | |
| "Neer Dosa", | |
| "Ragi Mudde with Sambar" | |
| ], | |
| "main_nonveg": [ | |
| "Coorg Pandi Curry" | |
| ], | |
| "desserts": [ | |
| "Mysore Pak", | |
| "Kesari Bath", | |
| "Obbattu / Holige" | |
| ], | |
| }, | |
| # Fallback if something isn't mapped yet | |
| "default": { | |
| "region": "Unknown / Other", | |
| "starters_veg": ["Local vegetarian starters"], | |
| "starters_nonveg": ["Local non‑veg starters"], | |
| "main_veg": ["Local vegetarian mains"], | |
| "main_nonveg": ["Local non‑veg mains"], | |
| "desserts": ["Local desserts"], | |
| }, | |
| } | |
| # --------------------------------------------------------------------- | |
| # 2. LABEL ALIAS MAP (so HuBERT/MFCC labels map to these keys) | |
| # --------------------------------------------------------------------- | |
| LABEL_ALIAS = { | |
| # Andhra / Telugu-ish labels | |
| "andhra": "andhra_pradesh", | |
| "andhra_pradesh": "andhra_pradesh", | |
| "ap": "andhra_pradesh", | |
| "telugu": "andhra_pradesh", | |
| "telugu_andhra": "andhra_pradesh", | |
| "telugu_india": "andhra_pradesh", | |
| # Kerala / Malayalam | |
| "kerala": "kerala", | |
| "ml": "kerala", | |
| "malayalam": "kerala", | |
| "kerala_malayalam": "kerala", | |
| # Gujarat | |
| "gujarat": "gujarat", | |
| "gj": "gujarat", | |
| "gujarati": "gujarat", | |
| # Jharkhand | |
| "jharkhand": "jharkhand", | |
| "jh": "jharkhand", | |
| # Tamil Nadu | |
| "tamil_nadu": "tamil_nadu", | |
| "tamil": "tamil_nadu", | |
| "tn": "tamil_nadu", | |
| # Karnataka | |
| "karnataka": "karnataka", | |
| "ka": "karnataka", | |
| "kannada": "karnataka", | |
| } | |
| def format_cuisine_output(pred_label: str) -> str: | |
| # Normalise raw predicted label and map through alias | |
| raw = (pred_label or "").strip().lower() | |
| key = LABEL_ALIAS.get(raw, raw) | |
| info = CUISINE_MAP.get(key, CUISINE_MAP["default"]) | |
| lines = [] | |
| lines.append(f"Inferred region: {info['region']}\n") | |
| lines.append("Starters (Veg): " + ", ".join(info["starters_veg"])) | |
| lines.append("Starters (Non‑Veg): " + ", ".join(info["starters_nonveg"])) | |
| lines.append("") | |
| lines.append("Main Course (Veg): " + ", ".join(info["main_veg"])) | |
| lines.append("Main Course (Non‑Veg): " + ", ".join(info["main_nonveg"])) | |
| lines.append("") | |
| lines.append("Desserts: " + ", ".join(info["desserts"])) | |
| return "\n".join(lines) | |
| # --------------------------------------------------------------------- | |
| # 3. Audio & Feature helpers | |
| # --------------------------------------------------------------------- | |
| def normalize_audio_path(audio): | |
| if audio is None: | |
| return None | |
| if isinstance(audio, str): | |
| return audio | |
| if hasattr(audio, "name"): | |
| return audio.name | |
| if isinstance(audio, (tuple, list)): | |
| return audio[0] | |
| return str(audio) | |
| def extract_mfcc_pooled(path: str, sr: int = DEFAULT_SR, n_mfcc: int = 40) -> np.ndarray: | |
| y, _ = librosa.load(path, sr=sr, mono=True) | |
| mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc) | |
| mean = mfcc.mean(axis=1) | |
| std = mfcc.std(axis=1) | |
| return np.concatenate([mean, std]) | |
| _hf_feat = None | |
| _hf_model = None | |
| def get_hubert_layer_embedding(path: str, layer_idx: int = 11) -> np.ndarray: | |
| """ | |
| Uses facebook/hubert-large-ll60k (hidden size 1024, matching a 1024‑dim scaler). | |
| Returns a 1024‑dim vector: mean‑pooled over time from the chosen layer. | |
| """ | |
| global _hf_feat, _hf_model | |
| if not HF_AVAILABLE: | |
| raise RuntimeError("Transformers / torch are not available on this Space.") | |
| if _hf_feat is None or _hf_model is None: | |
| _hf_feat = Wav2Vec2FeatureExtractor.from_pretrained( | |
| "facebook/hubert-large-ll60k" | |
| ) | |
| _hf_model = HubertModel.from_pretrained( | |
| "facebook/hubert-large-ll60k", output_hidden_states=True | |
| ) | |
| y, _ = librosa.load(path, sr=DEFAULT_SR, mono=True) | |
| inputs = _hf_feat(y, sampling_rate=DEFAULT_SR, return_tensors="pt", padding=True) | |
| with torch.no_grad(): | |
| out = _hf_model(**inputs) | |
| hidden_states = out.hidden_states # list[Tensor(batch, time, dim)] | |
| hs = hidden_states[layer_idx] # (batch, time, dim) | |
| if hs.ndim == 3: | |
| vec = hs.mean(dim=1).squeeze(0).cpu().numpy() # (dim,) | |
| else: | |
| vec = hs.mean(dim=0).cpu().numpy() | |
| return vec | |
| # --------------------------------------------------------------------- | |
| # 4. Load models from checkpoints | |
| # --------------------------------------------------------------------- | |
| app_state = {"models": {}} | |
| def load_models(): | |
| # MFCC model | |
| try: | |
| with open(CHECKPOINT_DIR / "clf_mfcc.pkl", "rb") as f: | |
| clf_mfcc = pickle.load(f) | |
| with open(CHECKPOINT_DIR / "scaler_mfcc.pkl", "rb") as f: | |
| scaler_mfcc = pickle.load(f) | |
| with open(CHECKPOINT_DIR / "le_mfcc.pkl", "rb") as f: | |
| le_mfcc = pickle.load(f) | |
| app_state["models"]["mfcc"] = { | |
| "clf": clf_mfcc, | |
| "scaler": scaler_mfcc, | |
| "le": le_mfcc, | |
| } | |
| print("Loaded MFCC model.") | |
| except Exception as e: | |
| print("Could not load MFCC model:", e) | |
| # HuBERT model | |
| try: | |
| with open(CHECKPOINT_DIR / "clf_hubert.pkl", "rb") as f: | |
| clf_h = pickle.load(f) | |
| with open(CHECKPOINT_DIR / "scaler_hubert.pkl", "rb") as f: | |
| scaler_h = pickle.load(f) | |
| with open(CHECKPOINT_DIR / "le_hubert.pkl", "rb") as f: | |
| le_h = pickle.load(f) | |
| layer_file = CHECKPOINT_DIR / "hubert_layer.txt" | |
| if layer_file.exists(): | |
| trained_layer = int(layer_file.read_text().strip()) | |
| else: | |
| trained_layer = 11 | |
| app_state["models"]["hubert"] = { | |
| "clf": clf_h, | |
| "scaler": scaler_h, | |
| "le": le_h, | |
| "layer": trained_layer, | |
| } | |
| print("Loaded HuBERT model (layer", trained_layer, ").") | |
| except Exception as e: | |
| print("Could not load HuBERT model:", e) | |
| load_models() | |
| # --------------------------------------------------------------------- | |
| # 5. Prediction logic | |
| # --------------------------------------------------------------------- | |
| def predict_accent_and_cuisine(audio_file, feature_choice, hubert_layer_idx, use_trained): | |
| audio_path = normalize_audio_path(audio_file) | |
| if audio_path is None: | |
| return "No audio provided.", "" | |
| feat = "hubert" if feature_choice == "HuBERT" else "mfcc" | |
| if not use_trained: | |
| return ( | |
| "On‑the‑fly training is disabled on this Space. " | |
| "Please keep 'Use trained model' checked.", | |
| "", | |
| ) | |
| model_info = app_state["models"].get(feat) | |
| if not model_info: | |
| return f"No trained {feat.upper()} model found on server.", "" | |
| clf = model_info["clf"] | |
| scaler = model_info["scaler"] | |
| le = model_info["le"] | |
| # Feature extraction | |
| try: | |
| if feat == "mfcc": | |
| x = extract_mfcc_pooled(audio_path) | |
| else: | |
| # Use stored trained layer if available; otherwise the slider value | |
| trained_layer = model_info.get("layer", hubert_layer_idx) | |
| x = get_hubert_layer_embedding(audio_path, layer_idx=trained_layer) | |
| except Exception as e: | |
| return f"Feature extraction failed: {e}", "" | |
| # Prediction | |
| try: | |
| Xs = scaler.transform(x.reshape(1, -1)) | |
| pred_idx = clf.predict(Xs)[0] | |
| pred_label = le.inverse_transform([pred_idx])[0] | |
| except Exception as e: | |
| return f"Prediction error: {e}", "" | |
| cuisine_text = format_cuisine_output(pred_label) | |
| return pred_label, cuisine_text | |
| # --------------------------------------------------------------------- | |
| # 6. Gradio UI | |
| # --------------------------------------------------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Accent Detection & Cuisine Recommendation") | |
| gr.Markdown( | |
| "Upload a short speech clip. The app predicts the speaker's regional accent " | |
| "and suggests popular veg / non‑veg starters, main course, and desserts " | |
| "from that region.\n\n" | |
| "**Tip:** Start with MFCC. Use HuBERT once the model has fully loaded." | |
| ) | |
| with gr.Row(): | |
| audio_in = gr.Audio( | |
| type="filepath", | |
| label="Upload audio (.wav / .mp3 / .flac)", | |
| ) | |
| with gr.Column(): | |
| feature_choice = gr.Radio( | |
| choices=["MFCC", "HuBERT"], | |
| value="MFCC", | |
| label="Feature Type", | |
| ) | |
| hubert_layer_idx = gr.Slider( | |
| minimum=0, | |
| maximum=23, | |
| step=1, | |
| value=11, | |
| label="HuBERT Layer (for HuBERT mode)", | |
| ) | |
| use_trained = gr.Checkbox( | |
| value=True, | |
| label="Use trained model (required on this Space)", | |
| ) | |
| btn = gr.Button("Predict Accent & Recommend Dishes") | |
| out_label = gr.Textbox(label="Predicted Accent") | |
| out_cuisine = gr.Textbox( | |
| label="Recommended Cuisines", | |
| lines=10, | |
| ) | |
| btn.click( | |
| fn=predict_accent_and_cuisine, | |
| inputs=[audio_in, feature_choice, hubert_layer_idx, use_trained], | |
| outputs=[out_label, out_cuisine], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) | |