Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import re | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import PeftModel | |
| # Model configuration | |
| MODEL_NAME = "jmcinern/qwen3-8B-cpt-sft-awq" | |
| DPO_ADAPTER = "jmcinern/qomhra-8B-awq-dpo-beta-0.5-checkpoint-checkpoint-100" | |
| THINK_TAG_PATTERN = re.compile(r"<think>.*?</think>\s*", flags=re.DOTALL) | |
| class ChatBot: | |
| def __init__(self): | |
| self.model = None | |
| self.tokenizer = None | |
| self.loading = True | |
| self.load_model() | |
| def load_model(self): | |
| """Load model and tokenizer sequentially""" | |
| try: | |
| print("Loading tokenizer...") | |
| self.tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_NAME, trust_remote_code=True | |
| ) | |
| print("Tokenizer loaded!") | |
| print("Loading model...") | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| trust_remote_code=True, | |
| device_map="auto", | |
| torch_dtype="auto", | |
| low_cpu_mem_usage=True, | |
| ) | |
| self.model = PeftModel.from_pretrained( | |
| base_model, | |
| DPO_ADAPTER | |
| ) | |
| print("Model loaded!") | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| finally: | |
| self.loading = False | |
| def chat(self, message, history): | |
| if self.loading: | |
| return history + [(message, "Model is loading, please wait...")] | |
| if not self.model: | |
| return history + [(message, "Model failed to load")] | |
| # Build messages | |
| messages = [] | |
| for user_msg, bot_msg in history: | |
| messages.append({"role": "user", "content": user_msg}) | |
| messages.append({"role": "assistant", "content": bot_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| # Apply chat template and strip thinking tags | |
| prompt = self.tokenizer.apply_chat_template( | |
| messages, tokenize=False, add_generation_prompt=True, enable_thinking=False | |
| ) | |
| prompt = THINK_TAG_PATTERN.sub("", prompt) | |
| # Tokenize | |
| inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) | |
| # Get stop token IDs for "assistant\n" | |
| stop_token_ids = self.tokenizer.encode( | |
| "assistant\n", add_special_tokens=False | |
| ) | |
| # Generate response | |
| with torch.no_grad(): | |
| outputs = self.model.generate( | |
| **inputs, | |
| max_new_tokens=2048, | |
| temperature=0.7, | |
| do_sample=True, | |
| pad_token_id=self.tokenizer.eos_token_id, | |
| eos_token_id=[self.tokenizer.eos_token_id] + stop_token_ids, | |
| ) | |
| # Decode and clean response | |
| response = self.tokenizer.decode( | |
| outputs[0][len(inputs.input_ids[0]) :], skip_special_tokens=True | |
| ) | |
| response = THINK_TAG_PATTERN.sub("", response).strip() | |
| return history + [(message, response)] | |
| # Initialize chatbot | |
| bot = ChatBot() | |
| # Create interface | |
| with gr.Blocks() as demo: | |
| gr.HTML('<h1 style="margin:0;">A Bilingual Irish-English LLM — Developed by Abair.ie</h1>') | |
| chatbot = gr.Chatbot(height=400) | |
| msg = gr.Textbox(placeholder="Type your message...", show_label=False) | |
| msg.submit(bot.chat, [msg, chatbot], [chatbot]).then(lambda: "", outputs=msg) | |
| if __name__ == "__main__": | |
| demo.launch() | |