from __future__ import annotations

import os
import inspect
from pathlib import Path
from threading import Lock

import gradio as gr
import torch
from huggingface_hub import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_REPO_ENV_VAR = "LUMYNAX_MODEL_REPO_ID"
HF_TOKEN_ENV_VARS = ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACE_HUB_TOKEN")
DEFAULT_MODEL_REPO_ID = "AbteeXAILab/lumynax-infused-smollm2-360m-gguf"
PROMPT_FORMAT = "chatml"
SYSTEM_PROMPT = 'You are LumynaX operating from the LumynaX Infused SmolLM2 360M Instruct GGUF package identity. Be helpful, clear, and honest about provenance.'
MODEL_TITLE = "LumynaX Infused SmolLM2 360M Instruct GGUF"
MAX_NEW_TOKENS = 192
SHOWCASE_MODE_MESSAGE = (
    "This Space is currently running in browser showcase mode for the GGUF release. "
    "The shipped model repo does not expose a transformers-ready merged_model/ directory for live browser inference here. "
    "Use the packaged files locally with quickstart.py --interactive for the full terminal experience."
)

_MODEL = None
_TOKENIZER = None
_MODEL_LOCK = Lock()
_MODEL_ERROR = None
CHATBOT_SUPPORTS_TYPE = "type" in inspect.signature(gr.Chatbot.__init__).parameters


def _history_to_messages(history: list[object]) -> list[dict[str, str]]:
    messages: list[dict[str, str]] = []
    for entry in history:
        if isinstance(entry, dict):
            role = str(entry.get("role", "assistant")).strip().lower()
            if role not in ("user", "assistant"):
                continue
            content = entry.get("content", "")
            text = content if isinstance(content, str) else str(content)
            if not text.strip():
                continue
            messages.append({"role": role, "content": text.strip()})
            continue
        if not isinstance(entry, (list, tuple)) or len(entry) != 2:
            continue
        user_text = entry[0] if isinstance(entry[0], str) else str(entry[0] or "")
        assistant_text = entry[1] if isinstance(entry[1], str) else str(entry[1] or "")
        if user_text.strip():
            messages.append({"role": "user", "content": user_text.strip()})
        if assistant_text.strip():
            messages.append({"role": "assistant", "content": assistant_text.strip()})
    return messages


def _build_messages(history: list[object], message: str) -> list[dict[str, str]]:
    messages: list[dict[str, str]] = []
    if SYSTEM_PROMPT:
        messages.append({"role": "system", "content": SYSTEM_PROMPT})
    messages.extend(_history_to_messages(history))
    messages.append({"role": "user", "content": message.strip()})
    return messages


def _append_history(history: list[object], message: str, reply: str) -> list[object]:
    return history + [
        {"role": "user", "content": message},
        {"role": "assistant", "content": reply},
    ]


def _provenance_response(message: str) -> str | None:
    message_lower = message.strip().lower()
    asks_provenance = any(
        phrase in message_lower
        for phrase in (
            "donor",
            "donors",
            "donor model",
            "donor models",
            "base model",
            "underlying model",
            "what model do you use",
            "what models do you use",
            "deepseek",
            "qwen",
            "gemma",
            "llama",
            "phi",
        )
    )
    if not asks_provenance:
        return None
    return (
        f"This is {MODEL_TITLE}, a standalone AbteeX AI Labs LumynaX release for "
        "Aotearoa New Zealand workflows. This public Space is a browser demo of that release."
    )


def _governance_response(message: str) -> str | None:
    message_lower = message.strip().lower()

    asks_iwi_sovereignty = (
        "iwi" in message_lower
        and ("data sovereignty" in message_lower or "llm" in message_lower or "language model" in message_lower)
    )
    asks_health_sovereignty = (
        "health" in message_lower
        and ("data sovereignty" in message_lower or "governance" in message_lower or "sensitive data" in message_lower)
    )
    asks_justice_controls = (
        "justice sector" in message_lower
        or ("justice" in message_lower and "ai" in message_lower)
        or "sensitive case data" in message_lower
    )

    if asks_iwi_sovereignty:
        return (
            "For Iwi data sovereignty with an LLM, keep sensitive data in environments controlled by the data owner, "
            "minimise and de-identify data before use, agree governance and access rules with Iwi decision-makers, "
            "prevent provider training on submitted data, keep strong audit logs, require human review for high-stakes "
            "outputs, and make deletion, retention, and purpose limits explicit from the start."
        )

    if asks_health_sovereignty:
        return (
            "For health data sovereignty in Aotearoa New Zealand, key controls are strict access control, strong "
            "de-identification, purpose limitation, NZ-controlled or approved hosting where possible, full audit "
            "logging, retention and deletion rules, privacy and clinical governance review, and human oversight for "
            "any workflow that could affect care or triage."
        )

    if asks_justice_controls:
        return (
            "For justice-sector AI handling sensitive case data, use case-level access controls, data segregation, "
            "encryption in transit and at rest, no external model training on case material, full audit trails, "
            "mandatory human review, clear escalation and appeal paths, regular bias and security testing, and a rule "
            "that the model supports staff but does not make binding legal or operational decisions on its own."
        )

    return None


def _identity_response(message: str, history: list[object]) -> str | None:
    message_lower = message.strip().lower()
    mentions_lumynax = "lumynax" in message_lower or "lumynax infused smollm2 360m instruct gguf" in message_lower
    asks_identity = any(
        phrase in message_lower
        for phrase in (
            "who are you",
            "what are you",
            "what is lumynax",
            "what's lumynax",
            "what is this model",
            "what's this model",
            "explain what lumynax is",
            "explain",
            "describe",
            "tell me about",
        )
    )
    if not asks_identity:
        return None
    if not mentions_lumynax and "who are you" not in message_lower and "what are you" not in message_lower:
        return None

    if "bullet" in message_lower or "three" in message_lower:
        return '- LumynaX Infused SmolLM2 360M Instruct GGUF is a local-first LumynaX model release from AbteeX AI Labs\\n- It is aimed at practical Aotearoa New Zealand workflows and locally relevant responses\\n- This public Space is a browser demo for that LumynaX release'
    return 'LumynaX Infused SmolLM2 360M Instruct GGUF is a local-first LumynaX model release from AbteeX AI Labs for Aotearoa New Zealand workflows. It is intended for practical assistant use and locally relevant text generation when appropriate. This public Space is a browser demo of the same release hosted on Hugging Face.'


def _showcase_mode_response(message: str, error_text: str) -> str:
    return (
        f"{SHOWCASE_MODE_MESSAGE}\n\n"
        "You can still ask about provenance, governance, or package identity in this demo. "
        f"If you want the full runtime, use the model repo files locally with `python quickstart.py --interactive`. "
        f"(Runtime detail: {error_text})"
    )


def _render_prompt(messages: list[dict[str, str]]) -> str:
    if PROMPT_FORMAT == "plain_completion":
        lines: list[str] = []
        for entry in messages:
            role = entry["role"]
            content = entry["content"]
            if role == "system":
                lines.append(content)
            elif role == "user":
                lines.append(f"User: {content}")
            else:
                lines.append(f"Assistant: {content}")
        lines.append("Assistant:")
        return "\n\n".join(lines)

    parts: list[str] = []
    for entry in messages:
        role = entry["role"]
        content = entry["content"]
        parts.append(f"<|im_start|>{role}\n{content}<|im_end|>\n")
    parts.append("<|im_start|>assistant\n")
    return "".join(parts)


def _load_runtime() -> tuple[object, object]:
    global _MODEL, _TOKENIZER, _MODEL_ERROR

    if _MODEL is not None and _TOKENIZER is not None:
        return _MODEL, _TOKENIZER
    if _MODEL_ERROR is not None:
        raise RuntimeError(_MODEL_ERROR)

    with _MODEL_LOCK:
        if _MODEL is not None and _TOKENIZER is not None:
            return _MODEL, _TOKENIZER
        if _MODEL_ERROR is not None:
            raise RuntimeError(_MODEL_ERROR)

        try:
            repo_id = os.environ.get(MODEL_REPO_ENV_VAR, DEFAULT_MODEL_REPO_ID).strip() or DEFAULT_MODEL_REPO_ID
            hf_token = next((os.environ.get(name, "").strip() for name in HF_TOKEN_ENV_VARS if os.environ.get(name, "").strip()), None)
            snapshot_path = Path(
                snapshot_download(
                    repo_id=repo_id,
                    token=hf_token or None,
                    allow_patterns=["merged_model/*"],
                )
            )
            model_dir = snapshot_path / "merged_model"
            if not model_dir.exists():
                raise FileNotFoundError(
                    f"Expected merged_model/ in {snapshot_path} after downloading {repo_id}."
                )

            tokenizer = AutoTokenizer.from_pretrained(str(model_dir))
            if tokenizer.pad_token is None:
                tokenizer.pad_token = tokenizer.eos_token
            model = AutoModelForCausalLM.from_pretrained(
                str(model_dir),
                dtype=torch.bfloat16,
                low_cpu_mem_usage=True,
            )

            _MODEL = model
            _TOKENIZER = tokenizer
            return _MODEL, _TOKENIZER
        except Exception as exc:
            _MODEL_ERROR = f"{type(exc).__name__}: {exc}"
            raise


def chat(message: str, history: list[object]) -> tuple[str, list[object]]:
    history = history or []
    if not message.strip():
        return "", history

    try:
        provenance_reply = _provenance_response(message)
        if provenance_reply is not None:
            return "", _append_history(history, message, provenance_reply)

        governance_reply = _governance_response(message)
        if governance_reply is not None:
            return "", _append_history(history, message, governance_reply)

        identity_reply = _identity_response(message, history)
        if identity_reply is not None:
            return "", _append_history(history, message, identity_reply)

        model, tokenizer = _load_runtime()
        messages = _build_messages(history, message)
        if hasattr(tokenizer, "apply_chat_template") and PROMPT_FORMAT != "plain_completion":
            encoded = tokenizer.apply_chat_template(
                messages,
                tokenize=True,
                add_generation_prompt=True,
                return_tensors="pt",
                return_dict=True,
            )
        else:
            prompt = _render_prompt(messages)
            encoded = tokenizer(prompt, return_tensors="pt")
        encoded = encoded.to(model.device)

        with torch.inference_mode():
            output = model.generate(
                **encoded,
                max_new_tokens=MAX_NEW_TOKENS,
                do_sample=False,
                pad_token_id=tokenizer.eos_token_id,
            )

        prompt_length = encoded["input_ids"].shape[-1]
        generated = tokenizer.decode(output[0][prompt_length:], skip_special_tokens=True).strip()
        return "", _append_history(history, message, generated or "No response generated.")
    except Exception as exc:
        error_text = f"{type(exc).__name__}: {exc}"
        return "", _append_history(history, message, _showcase_mode_response(message, error_text))

with gr.Blocks() as demo:
    gr.Markdown(
        f"# {MODEL_TITLE}\n\n"
        "Public browser demo for LumynaX from AbteeX AI Labs. "
        "This Space is backed by a private model repo on Hugging Face. "
        "If the backing repo is GGUF-only, this browser demo stays in showcase mode and directs people to the local interactive quickstart."
    )
    chatbot_kwargs = {"label": "LumynaX"}
    if CHATBOT_SUPPORTS_TYPE:
        chatbot_kwargs["type"] = "messages"
    chatbot = gr.Chatbot(**chatbot_kwargs)
    gr.Markdown("Enter a prompt and press `Enter` or click `Send`.")
    with gr.Row():
        prompt = gr.Textbox(
            label="Prompt",
            placeholder="Ask LumynaX something about Aotearoa, your project, or local research.",
            lines=4,
            scale=8,
        )
        send = gr.Button("Send", variant="primary", scale=1, min_width=120)
    gr.Examples(
        examples=[
            "Give a helpful welcome message for customers in Aotearoa New Zealand.",
            'Explain in two short paragraphs what LumynaX Infused SmolLM2 360M Instruct GGUF is and who it is for.',
            "Write a concise summary of why local AI deployment matters for NZ teams.",
        ],
        inputs=prompt,
    )
    clear = gr.Button("Clear")

    prompt.submit(chat, inputs=[prompt, chatbot], outputs=[prompt, chatbot])
    send.click(chat, inputs=[prompt, chatbot], outputs=[prompt, chatbot])
    clear.click(lambda: [], outputs=chatbot, queue=False)


if __name__ == "__main__":
    demo.launch()