""" Kurtis-EON1 Conversational Chat — Echo-DSRN-Hybrid Gradio 6.x · model.generate() + TextIteratorStreamer · no tool calling """ import os import time from threading import Thread import gradio as gr import torch from transformers import ( AutoConfig, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, ) # ── Config ─────────────────────────────────────────────────────────────────── MODEL_PATH = "mrs83/Kurtis-EON1-Hybrid-0.7B-v0.1.1" MAX_NEW_TOKENS = 256 MAX_HISTORY_TOKENS = 2048 TEMPERATURE = 0.85 TOP_P = 0.92 REPETITION_PENALTY = 1.15 STOP_STRINGS = ["<|im_end|>", "<|endoftext|>", "<|im_start|>"] SYSTEM_PROMPT = ( "You are Kurtis-EON1, a deeply empathetic and sophisticated AI " "assistant developed by ethicalabs.ai. Your purpose is to provide emotionally " "intelligent, culturally aware, and highly personalised support." ) # Portrait — same directory as this file PORTRAIT = os.path.join(os.path.dirname(__file__), "kurtis.jpg") # ── Load model ──────────────────────────────────────────────────────────────── print(f"📦 Loading {MODEL_PATH} …") model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, device_map="cuda" if torch.cuda.is_available() else "cpu", torch_dtype=torch.bfloat16, trust_remote_code=True, ) model.eval() tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True) print("✅ Model ready.") # ── History helpers ─────────────────────────────────────────────────────────── def _normalize(content): """Convert Gradio 6 multi-part content (list) to a single string.""" if isinstance(content, list): return " ".join([c.get("text", "") for c in content if isinstance(c, dict) and "text" in c]) return str(content) def _count_tokens(messages: list[dict]) -> int: try: ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=False) return len(ids) except Exception: total = 0 for m in messages: text = m.get("content", "") total += len(text.split()) * 2 return total def _trim(messages: list[dict]) -> list[dict]: system = [m for m in messages if m["role"] == "system"] convo = [m for m in messages if m["role"] != "system"] while _count_tokens(system + convo) > MAX_HISTORY_TOKENS and len(convo) >= 2: convo = convo[2:] return system + convo # ── Streaming chat ──────────────────────────────────────────────────────────── def stream_respond(message: str, history: list): """history: list of dicts with 'role' and 'content' (Gradio 6.8 default).""" # Normalize current message norm_msg = _normalize(message) if not norm_msg.strip(): yield history, "⚡ 0.0 TPS" return # Build normalized message list for the tokenizer messages = [{"role": "system", "content": SYSTEM_PROMPT}] for turn in history: messages.append({"role": turn["role"], "content": _normalize(turn["content"])}) messages.append({"role": "user", "content": norm_msg}) messages = _trim(messages) inputs = tokenizer.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_dict=True, return_tensors="pt", ).to(model.device) streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=False) gen_kwargs = dict( **inputs, max_new_tokens=MAX_NEW_TOKENS, do_sample=True, temperature=TEMPERATURE, top_p=TOP_P, repetition_penalty=REPETITION_PENALTY, streamer=streamer, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id, ) thread = Thread(target=lambda: model.generate(**gen_kwargs)) thread.start() new_history = list(history) + [ {"role": "user", "content": message}, {"role": "assistant", "content": ""}, ] partial = "" start = time.time() token_count = 0 for chunk in streamer: for s in STOP_STRINGS: chunk = chunk.replace(s, "") partial += chunk token_count += 1 elapsed = time.time() - start tps = token_count / elapsed if elapsed > 0 else 0 new_history[-1]["content"] = partial.strip() yield new_history, f"⚡ {tps:.1f} TPS | {token_count} tokens" thread.join() yield new_history, f"✅ Done | {token_count} tokens" # ── UI ──────────────────────────────────────────────────────────────────────── css = """ .portrait img { border-radius: 12px; width: 100%; object-fit: cover; } .name-tag { text-align: center; font-size: 1.25rem; font-weight: 700; margin-top: 8px; color: #e2e8f0; letter-spacing: 0.02em; } .bio { font-size: 0.82rem; color: #94a3b8; text-align: center; line-height: 1.6; margin-top: 12px; } .stats-bar { font-size: 0.82rem; color: #64748b; } footer { display: none !important; } /* Lock textarea to single line — block Gradio's JS height setter */ textarea { resize: none !important; height: 48px !important; min-height: 48px !important; max-height: 48px !important; overflow-y: auto !important; } /* Mobile Optimizations */ @media (max-width: 768px) { .name-tag { font-size: 1.1rem; margin-top: 0; } .bio { margin-top: 4px; line-height: 1.3; } } """ with gr.Blocks(title="Kurtis-EON1-Hybrid-0.7B Chat") as demo: with gr.Row(): # ── Left sidebar: Kurtis portrait ────────────────────────────────── with gr.Column(scale=1, min_width=220): gr.Image( value=PORTRAIT, show_label=False, interactive=False, elem_classes=["portrait"], ) gr.HTML( '
Kurtis-EON1 v0.1.1
' '
' 'Echo-DSRN-Hybrid
' '
Qwen2.5-0.5B-Instruct (backbone)
' "+ Echo-DSRN (memory)
" ) # ── Right: chat ──────────────────────────────────────────────────── with gr.Column(scale=3): chatbot = gr.Chatbot( show_label=False, height=480, ) with gr.Row(): msg = gr.Textbox( placeholder="Message Kurtis…", show_label=False, container=False, scale=9, ) send = gr.Button("Send", variant="primary", scale=1) with gr.Row(): stats = gr.Markdown("⚡ 0.0 TPS", elem_classes=["stats-bar"]) clear = gr.Button("🗑 Clear", size="sm") # Wire up msg.submit(stream_respond, [msg, chatbot], [chatbot, stats]).then(lambda: "", None, msg) send.click(stream_respond, [msg, chatbot], [chatbot, stats]).then(lambda: "", None, msg) clear.click(lambda: ([], "⚡ 0.0 TPS"), None, [chatbot, stats], queue=False) if __name__ == "__main__": demo.queue().launch( server_name="0.0.0.0", server_port=7860, share=False, theme=gr.themes.Soft(primary_hue="violet", neutral_hue="slate"), css=css, )