import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel # Base and adapter models BASE_MODEL = "unsloth/gemma-3-270m-it" ADAPTER_MODEL = "Devishetty100/savyasachi" # Load tokenizer from base model tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) # Load base model base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto", ) # Load LORA adapter model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL) model.eval() # set to eval mode # Chat function def chat(user_input, history, max_new_tokens=200, temperature=1.0): messages = [] # Format previous chat history for user, assistant in history: messages.append({"role": "user", "content": user}) messages.append({"role": "assistant", "content": assistant}) messages.append({"role": "user", "content": user_input}) # Generate prompt using chat template prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, ) # Tokenize input inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Generate response outputs = model.generate( **inputs, max_new_tokens=max_new_tokens, temperature=temperature, top_p=0.95, top_k=64, do_sample=True, eos_token_id=tokenizer.eos_token_id, ) # Decode only the new tokens response = tokenizer.decode( outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True ) return response # Gradio UI with gr.Blocks() as demo: gr.Markdown("## 🕉️ Savyasachi — Devotee of Lord Krishna") chatbot = gr.Chatbot() user_input = gr.Textbox(label="Ask Krishna") send = gr.Button("Send") # Respond function def respond(message, history): reply = chat(message, history) history.append((message, reply)) return history, "" send.click(respond, [user_input, chatbot], [chatbot, user_input]) demo.launch()