🦅 Supra-50M Reasoning

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_ID = "SupraLabs/Supra-50M-Reasoning"

THINK_START = "<|begin_of_thought|>"
THINK_END   = "<|end_of_thought|>"
SOL_START   = "<|begin_of_solution|>"
SOL_END     = "<|end_of_solution|>"

SYSTEM_PROMPT = (
    "Your role as an assistant involves thoroughly exploring questions through "
    "a systematic long thinking process before providing the final precise and "
    "accurate solutions."
)

DESCRIPTION = """<div style="text-align:center; padding:12px 0 4px;">
  <h1 style="font-size:2rem; margin:0;">🦅 Supra-50M Reasoning</h1>
  <p style="color:#888; margin:6px 0 0;">
    A tiny 50M model that <em>thinks</em> before answering &mdash;
    by <a href="https://huggingface.co/SupraLabs" target="_blank">SupraLabs</a>
  </p>
</div>"""

EXAMPLES = [
    ["What is artificial intelligence?"],
    ["How does a large language model learn?"],
    ["Explain the water cycle in simple terms."],
    ["What is the meaning of life?"],
    ["Write a short poem about the universe."],
]

def load_model():
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.float32,
        device_map="cpu",
    )
    model.eval()
    return tokenizer, model

print("Loading model…")
tokenizer, model = load_model()
print("Model ready.")

def build_prompt(question: str) -> str:
    return (
        f"[SYSTEM]: {SYSTEM_PROMPT}\n\n"
        f"[USER]: {question}\n\n"
        f"[ASSISTANT]: {THINK_START}\n"
    )

def parse_output(raw: str):
    thought, answer = "", raw

    if THINK_START in raw and THINK_END in raw:
        t0 = raw.index(THINK_START) + len(THINK_START)
        t1 = raw.index(THINK_END)
        thought = raw[t0:t1].strip()

    if SOL_START in raw and SOL_END in raw:
        s0 = raw.index(SOL_START) + len(SOL_START)
        s1 = raw.index(SOL_END)
        answer = raw[s0:s1].strip()
    elif SOL_START in raw:
        s0 = raw.index(SOL_START) + len(SOL_START)
        answer = raw[s0:].strip()
    elif THINK_END in raw:
        # Fallback if SOL markers are omitted but THINK_END exists
        answer = raw[raw.index(THINK_END) + len(THINK_END):].strip()

    return thought, answer

def generate(
    prompt: str,
    max_new_tokens: int,
    temperature: float,
    top_p: float,
    top_k: int,
    show_thinking: bool,):
    if not prompt.strip():
        return "", "⚠️ Please enter a question."

    # Format prompt to mirror the original inference structure
    full_prompt = build_prompt(prompt)
    inputs = tokenizer(full_prompt, return_tensors="pt")
    input_ids = inputs["input_ids"]

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            max_new_tokens=max_new_tokens,
            do_sample=temperature > 0,
            temperature=temperature if temperature > 0 else 1.0,
            top_p=top_p,
            top_k=top_k,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )

    generated = output_ids[0][input_ids.shape[-1]:]
    raw = tokenizer.decode(generated, skip_special_tokens=False).strip()
    raw = raw.replace("<s>", "").replace("</s>", "").strip()
    
    # Prepend THINK_START since generation begins immediately *after* the prompt token
    raw = THINK_START + "\n" + raw
    
    thought, answer = parse_output(raw)

    return thought if show_thinking else "", answer

custom_css = """.thinking-box textarea {
    font-family: 'IBM Plex Mono', monospace !important;
    font-size: 0.82rem !important;
    color: #6b7280 !important;
}
footer { display: none !important; }"""

with gr.Blocks(
    theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
    title="Supra-50M Reasoning",
    css=custom_css,) as demo:

    gr.HTML(DESCRIPTION)
    with gr.Row():
        with gr.Column(scale=3):
            prompt_input = gr.Textbox(
                label="Your question",
                placeholder="Ask anything…",
                lines=3,
            )

            with gr.Accordion("⚙️ Generation settings", open=False):
                max_tokens  = gr.Slider(512, 992, value=992, step=32,  label="Max new tokens")
                temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature  (0 = greedy)")
                top_p       = gr.Slider(0.1, 1.0, value=0.8, step=0.05, label="Top-p")
                top_k       = gr.Slider(1,   100, value=25,  step=1,    label="Top-k")
                show_think  = gr.Checkbox(value=True, label="Show thinking process")

            with gr.Row():
                run_btn   = gr.Button("Generate ✦", variant="primary")
                clear_btn = gr.Button("Clear", variant="secondary")

        with gr.Column(scale=4):
            thinking_out = gr.Textbox(
                label="🧠 Thinking process",
                lines=8,
                interactive=False,
                elem_classes=["thinking-box"],
                placeholder="The model's internal reasoning will appear here…",
            )
            answer_out = gr.Textbox(
                label="✅ Final answer",
                lines=6,
                interactive=False,
                placeholder="The answer will appear here…",
            )

    gr.Examples(
        examples=EXAMPLES,
        inputs=[prompt_input],
        label="💡 Try these examples",
        examples_per_page=5,
    )

    gr.Markdown(
        "**Model:** [SupraLabs/Supra-50M-Reasoning](https://huggingface.co/SupraLabs/Supra-50M-Reasoning) "
        "&nbsp;|&nbsp; **License:** Apache 2.0 &nbsp;|&nbsp; 51.8M params · CPU-only · Project Chimera © SupraLabs 2026"
    )

    inputs_list  = [prompt_input, max_tokens, temperature, top_p, top_k, show_think]
    outputs_list = [thinking_out, answer_out]

    run_btn.click(fn=generate, inputs=inputs_list, outputs=outputs_list)
    prompt_input.submit(fn=generate, inputs=inputs_list, outputs=outputs_list)
    clear_btn.click(fn=lambda: ("", "", ""), outputs=[prompt_input, thinking_out, answer_out])

if __name__ == "__main__":
    demo.launch()