File size: 2,143 Bytes
6d32cef
629f200
83064ca
a6e2d37
83064ca
a6e2d37
 
 
83064ca
a6e2d37
 
629f200
a6e2d37
 
 
83064ca
 
 
a6e2d37
 
 
3fea112
83064ca
 
 
 
a6e2d37
83064ca
 
 
 
 
 
a6e2d37
83064ca
 
 
 
 
 
a6e2d37
83064ca
 
a6e2d37
83064ca
 
629f200
6d32cef
629f200
 
 
83064ca
629f200
 
a6e2d37
 
 
 
83064ca
 
 
6d32cef
83064ca
 
629f200
83064ca
 
 
a6e2d37
83064ca
 
 
 
 
 
 
629f200
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Base and adapter models
BASE_MODEL = "unsloth/gemma-3-270m-it"
ADAPTER_MODEL = "Devishetty100/savyasachi"

# Load tokenizer from base model
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto",
)
# Load LORA adapter
model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
model.eval()  # set to eval mode

# Chat function
def chat(user_input, history, max_new_tokens=200, temperature=1.0):
    messages = []

    # Format previous chat history
    for user, assistant in history:
        messages.append({"role": "user", "content": user})
        messages.append({"role": "assistant", "content": assistant})

    messages.append({"role": "user", "content": user_input})

    # Generate prompt using chat template
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )

    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate response
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_p=0.95,
        top_k=64,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
    )

    # Decode only the new tokens
    response = tokenizer.decode(
        outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True
    )
    return response

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🕉️ Savyasachi — Devotee of Lord Krishna")

    chatbot = gr.Chatbot()
    user_input = gr.Textbox(label="Ask Krishna")
    send = gr.Button("Send")

    # Respond function
    def respond(message, history):
        reply = chat(message, history)
        history.append((message, reply))
        return history, ""

    send.click(respond, [user_input, chatbot], [chatbot, user_input])

demo.launch()