"""
Kurtis-EON1 Conversational Chat — Echo-DSRN-Hybrid
Gradio 6.x · model.generate() + TextIteratorStreamer · no tool calling
"""

import os
import time
from threading import Thread

import gradio as gr
import torch
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    TextIteratorStreamer,
)


# ── Config ───────────────────────────────────────────────────────────────────
MODEL_PATH = "mrs83/Kurtis-EON1-Hybrid-0.7B-v0.1.1"
MAX_NEW_TOKENS = 256
MAX_HISTORY_TOKENS = 2048
TEMPERATURE = 0.85
TOP_P = 0.92
REPETITION_PENALTY = 1.15
STOP_STRINGS = ["<|im_end|>", "<|endoftext|>", "<|im_start|>"]

SYSTEM_PROMPT = (
    "You are Kurtis-EON1, a deeply empathetic and sophisticated AI "
    "assistant developed by ethicalabs.ai. Your purpose is to provide emotionally "
    "intelligent, culturally aware, and highly personalised support."
)

# Portrait — same directory as this file
PORTRAIT = os.path.join(os.path.dirname(__file__), "kurtis.jpg")

# ── Load model ────────────────────────────────────────────────────────────────
print(f"📦 Loading {MODEL_PATH} …")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="cuda" if torch.cuda.is_available() else "cpu",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
)
model.eval()
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
print("✅ Model ready.")


# ── History helpers ───────────────────────────────────────────────────────────
def _normalize(content):
    """Convert Gradio 6 multi-part content (list) to a single string."""
    if isinstance(content, list):
        return " ".join([c.get("text", "") for c in content if isinstance(c, dict) and "text" in c])
    return str(content)


def _count_tokens(messages: list[dict]) -> int:
    try:
        ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=False)
        return len(ids)
    except Exception:
        total = 0
        for m in messages:
            text = m.get("content", "")
            total += len(text.split()) * 2
        return total


def _trim(messages: list[dict]) -> list[dict]:
    system = [m for m in messages if m["role"] == "system"]
    convo = [m for m in messages if m["role"] != "system"]
    while _count_tokens(system + convo) > MAX_HISTORY_TOKENS and len(convo) >= 2:
        convo = convo[2:]
    return system + convo


# ── Streaming chat ────────────────────────────────────────────────────────────
def stream_respond(message: str, history: list):
    """history: list of dicts with 'role' and 'content' (Gradio 6.8 default)."""
    # Normalize current message
    norm_msg = _normalize(message)
    if not norm_msg.strip():
        yield history, "⚡ 0.0 TPS"
        return

    # Build normalized message list for the tokenizer
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    for turn in history:
        messages.append({"role": turn["role"], "content": _normalize(turn["content"])})
    messages.append({"role": "user", "content": norm_msg})
    messages = _trim(messages)

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_dict=True,
        return_tensors="pt",
    ).to(model.device)

    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=False)

    gen_kwargs = dict(
        **inputs,
        max_new_tokens=MAX_NEW_TOKENS,
        do_sample=True,
        temperature=TEMPERATURE,
        top_p=TOP_P,
        repetition_penalty=REPETITION_PENALTY,
        streamer=streamer,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
    )

    thread = Thread(target=lambda: model.generate(**gen_kwargs))
    thread.start()

    new_history = list(history) + [
        {"role": "user", "content": message},
        {"role": "assistant", "content": ""},
    ]
    partial = ""
    start = time.time()
    token_count = 0

    for chunk in streamer:
        for s in STOP_STRINGS:
            chunk = chunk.replace(s, "")
        partial += chunk
        token_count += 1
        elapsed = time.time() - start
        tps = token_count / elapsed if elapsed > 0 else 0
        new_history[-1]["content"] = partial.strip()
        yield new_history, f"⚡ {tps:.1f} TPS | {token_count} tokens"

    thread.join()
    yield new_history, f"✅ Done | {token_count} tokens"


# ── UI ────────────────────────────────────────────────────────────────────────
css = """
.portrait img { border-radius: 12px; width: 100%; object-fit: cover; }
.name-tag { text-align: center; font-size: 1.25rem; font-weight: 700;
            margin-top: 8px; color: #e2e8f0; letter-spacing: 0.02em; }
.bio { font-size: 0.82rem; color: #94a3b8; text-align: center;
       line-height: 1.6; margin-top: 12px; }
.stats-bar { font-size: 0.82rem; color: #64748b; }
footer { display: none !important; }

/* Lock textarea to single line — block Gradio's JS height setter */
textarea { resize: none !important; height: 48px !important; min-height: 48px !important; max-height: 48px !important; overflow-y: auto !important; }

/* Mobile Optimizations */
@media (max-width: 768px) {
    .name-tag { font-size: 1.1rem; margin-top: 0; }
    .bio { margin-top: 4px; line-height: 1.3; }
}
"""

with gr.Blocks(title="Kurtis-EON1-Hybrid-0.7B Chat") as demo:

    with gr.Row():
        # ── Left sidebar: Kurtis portrait ──────────────────────────────────
        with gr.Column(scale=1, min_width=220):
            gr.Image(
                value=PORTRAIT,
                show_label=False,
                interactive=False,
                elem_classes=["portrait"],
            )
            gr.HTML(
                '<div class="name-tag">Kurtis-EON1 v0.1.1</div>'
                '<div class="bio">'
                '<span style="font-weight: 600; color: #fff; font-size: 0.9rem;">Echo-DSRN-Hybrid</span><br>'
                '<div style="margin-top: 8px;">Qwen2.5-0.5B-Instruct (backbone)<br>'
                "+ Echo-DSRN (memory)</div></div>"
            )

        # ── Right: chat ────────────────────────────────────────────────────
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(
                show_label=False,
                height=480,
            )
            with gr.Row():
                msg = gr.Textbox(
                    placeholder="Message Kurtis…",
                    show_label=False,
                    container=False,
                    scale=9,
                )
                send = gr.Button("Send", variant="primary", scale=1)

            with gr.Row():
                stats = gr.Markdown("⚡ 0.0 TPS", elem_classes=["stats-bar"])
                clear = gr.Button("🗑 Clear", size="sm")

    # Wire up
    msg.submit(stream_respond, [msg, chatbot], [chatbot, stats]).then(lambda: "", None, msg)
    send.click(stream_respond, [msg, chatbot], [chatbot, stats]).then(lambda: "", None, msg)
    clear.click(lambda: ([], "⚡ 0.0 TPS"), None, [chatbot, stats], queue=False)


if __name__ == "__main__":
    demo.queue().launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        theme=gr.themes.Soft(primary_hue="violet", neutral_hue="slate"),
        css=css,
    )