import os import torch import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from peft import PeftModel # Get HuggingFace token from environment HF_TOKEN = os.environ.get("HF_TOKEN") BASE_MODEL = "polyglots/SinLlama_v01" LORA_ADAPTER = "E-motionAssistant/SinLlama_v01-Therapy-Sinhala" SYSTEM_PROMPT = "You are an empathetic Sinhala therapist providing mental health support." model = None tokenizer = None def load_model(): global model, tokenizer if model is None: print("🔐 Loading with 4-bit quantization...") # 4-bit quantization config bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16 ) print(f"📥 Loading base model: {BASE_MODEL}...") base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, quantization_config=bnb_config, device_map="auto", token=HF_TOKEN, trust_remote_code=True ) print(f"📥 Loading LoRA adapter: {LORA_ADAPTER}...") model = PeftModel.from_pretrained(base_model, LORA_ADAPTER, token=HF_TOKEN) print(f"📥 Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(LORA_ADAPTER, token=HF_TOKEN, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print("✅ Model loaded in 4-bit!") load_model() def chat(message, history): if not message.strip(): return "" try: prompt = f"{SYSTEM_PROMPT}\n\n" for user_msg, bot_msg in history[-3:]: prompt += f"User: {user_msg}\nTherapist: {bot_msg}\n" prompt += f"User: {message}\nTherapist:" inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=256, temperature=0.7, top_p=0.9, do_sample=True, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id ) input_len = inputs.input_ids.shape[1] response = tokenizer.decode(outputs[0][input_len:], skip_special_tokens=True) return response.strip() except Exception as e: print(f"❌ Error: {e}") return f"සමාවන්න, දෝෂයක් ඇතිවිය. කරුණාකර නැවත උත්සාහ කරන්න." demo = gr.ChatInterface( fn=chat, title="💚 E.motion Sinhala Therapy Assistant", description="*ඔබේ දයාබර AI සහායක - Your compassionate AI companion for mental wellbeing in Sinhala*\n\n**Note:** This is an AI assistant, not a replacement for professional therapy.", theme=gr.themes.Soft(), chatbot=gr.Chatbot(height=450), ) if __name__ == "__main__": demo.launch()