File size: 3,503 Bytes
2499997
 
 
43c9011
c4b05c8
 
2499997
 
c4b05c8
 
452fd6f
 
2499997
 
 
 
 
 
452fd6f
 
2499997
452fd6f
 
2499997
452fd6f
 
 
 
 
2499997
c4b05c8
2499997
 
 
 
452fd6f
2499997
c4b05c8
 
 
 
452fd6f
2499997
452fd6f
2499997
 
452fd6f
2499997
 
 
452fd6f
2499997
 
452fd6f
2499997
 
 
 
 
 
452fd6f
 
2499997
 
 
 
452fd6f
 
2499997
452fd6f
 
 
 
 
 
 
2499997
 
 
78a0e5d
2499997
 
7705b58
452fd6f
2499997
452fd6f
 
 
 
 
2499997
452fd6f
2499997
 
452fd6f
2499997
 
 
 
 
3b1fdb8
78a0e5d
2499997
452fd6f
2499997
 
 
452fd6f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
import torch
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel


# Model configuration
MODEL_NAME = "jmcinern/qwen3-8B-cpt-sft-awq"
DPO_ADAPTER = "jmcinern/qomhra-8B-awq-dpo-beta-0.5-checkpoint-checkpoint-100"
THINK_TAG_PATTERN = re.compile(r"<think>.*?</think>\s*", flags=re.DOTALL)


class ChatBot:
    def __init__(self):
        self.model = None
        self.tokenizer = None
        self.loading = True
        self.load_model()

    def load_model(self):
        """Load model and tokenizer sequentially"""
        try:
            print("Loading tokenizer...")
            self.tokenizer = AutoTokenizer.from_pretrained(
                MODEL_NAME, trust_remote_code=True
            )
            print("Tokenizer loaded!")

            print("Loading model...")
            base_model = AutoModelForCausalLM.from_pretrained(
                MODEL_NAME,
                trust_remote_code=True,
                device_map="auto",
                torch_dtype="auto",
                low_cpu_mem_usage=True,
            )
            self.model = PeftModel.from_pretrained(
                base_model,
                DPO_ADAPTER
            )
            print("Model loaded!")
        except Exception as e:
            print(f"Error loading model: {e}")
        finally:
            self.loading = False

    def chat(self, message, history):
        if self.loading:
            return history + [(message, "Model is loading, please wait...")]

        if not self.model:
            return history + [(message, "Model failed to load")]

        # Build messages
        messages = []
        for user_msg, bot_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": bot_msg})
        messages.append({"role": "user", "content": message})

        # Apply chat template and strip thinking tags
        prompt = self.tokenizer.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
        )
        prompt = THINK_TAG_PATTERN.sub("", prompt)

        # Tokenize
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)

        # Get stop token IDs for "assistant\n"
        stop_token_ids = self.tokenizer.encode(
            "assistant\n", add_special_tokens=False
        )

        # Generate response
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=2048,
                temperature=0.7,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
                eos_token_id=[self.tokenizer.eos_token_id] + stop_token_ids,
            )

        # Decode and clean response
        response = self.tokenizer.decode(
            outputs[0][len(inputs.input_ids[0]) :], skip_special_tokens=True
        )
        response = THINK_TAG_PATTERN.sub("", response).strip()

        return history + [(message, response)]


# Initialize chatbot
bot = ChatBot()

# Create interface
with gr.Blocks() as demo:
    gr.HTML('<h1 style="margin:0;">A Bilingual Irish-English LLM — Developed by Abair.ie</h1>')
    chatbot = gr.Chatbot(height=400)
    msg = gr.Textbox(placeholder="Type your message...", show_label=False)

    msg.submit(bot.chat, [msg, chatbot], [chatbot]).then(lambda: "", outputs=msg)

if __name__ == "__main__":
    demo.launch()