import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_ID = "SupraLabs/Supra-50M-Reasoning"
THINK_START = "<|begin_of_thought|>"
THINK_END = "<|end_of_thought|>"
SOL_START = "<|begin_of_solution|>"
SOL_END = "<|end_of_solution|>"
SYSTEM_PROMPT = (
"Your role as an assistant involves thoroughly exploring questions through "
"a systematic long thinking process before providing the final precise and "
"accurate solutions."
)
DESCRIPTION = """
🦅 Supra-50M Reasoning
A tiny 50M model that thinks before answering —
by SupraLabs
"""
EXAMPLES = [
["What is artificial intelligence?"],
["How does a large language model learn?"],
["Explain the water cycle in simple terms."],
["What is the meaning of life?"],
["Write a short poem about the universe."],
]
def load_model():
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32,
device_map="cpu",
)
model.eval()
return tokenizer, model
print("Loading model…")
tokenizer, model = load_model()
print("Model ready.")
def build_prompt(question: str) -> str:
return (
f"[SYSTEM]: {SYSTEM_PROMPT}\n\n"
f"[USER]: {question}\n\n"
f"[ASSISTANT]: {THINK_START}\n"
)
def parse_output(raw: str):
thought, answer = "", raw
if THINK_START in raw and THINK_END in raw:
t0 = raw.index(THINK_START) + len(THINK_START)
t1 = raw.index(THINK_END)
thought = raw[t0:t1].strip()
if SOL_START in raw and SOL_END in raw:
s0 = raw.index(SOL_START) + len(SOL_START)
s1 = raw.index(SOL_END)
answer = raw[s0:s1].strip()
elif SOL_START in raw:
s0 = raw.index(SOL_START) + len(SOL_START)
answer = raw[s0:].strip()
elif THINK_END in raw:
# Fallback if SOL markers are omitted but THINK_END exists
answer = raw[raw.index(THINK_END) + len(THINK_END):].strip()
return thought, answer
def generate(
prompt: str,
max_new_tokens: int,
temperature: float,
top_p: float,
top_k: int,
show_thinking: bool,):
if not prompt.strip():
return "", "⚠️ Please enter a question."
# Format prompt to mirror the original inference structure
full_prompt = build_prompt(prompt)
inputs = tokenizer(full_prompt, return_tensors="pt")
input_ids = inputs["input_ids"]
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_new_tokens=max_new_tokens,
do_sample=temperature > 0,
temperature=temperature if temperature > 0 else 1.0,
top_p=top_p,
top_k=top_k,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)
generated = output_ids[0][input_ids.shape[-1]:]
raw = tokenizer.decode(generated, skip_special_tokens=False).strip()
raw = raw.replace("", "").replace("", "").strip()
# Prepend THINK_START since generation begins immediately *after* the prompt token
raw = THINK_START + "\n" + raw
thought, answer = parse_output(raw)
return thought if show_thinking else "", answer
custom_css = """.thinking-box textarea {
font-family: 'IBM Plex Mono', monospace !important;
font-size: 0.82rem !important;
color: #6b7280 !important;
}
footer { display: none !important; }"""
with gr.Blocks(
theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
title="Supra-50M Reasoning",
css=custom_css,) as demo:
gr.HTML(DESCRIPTION)
with gr.Row():
with gr.Column(scale=3):
prompt_input = gr.Textbox(
label="Your question",
placeholder="Ask anything…",
lines=3,
)
with gr.Accordion("⚙️ Generation settings", open=False):
max_tokens = gr.Slider(512, 992, value=992, step=32, label="Max new tokens")
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature (0 = greedy)")
top_p = gr.Slider(0.1, 1.0, value=0.8, step=0.05, label="Top-p")
top_k = gr.Slider(1, 100, value=25, step=1, label="Top-k")
show_think = gr.Checkbox(value=True, label="Show thinking process")
with gr.Row():
run_btn = gr.Button("Generate ✦", variant="primary")
clear_btn = gr.Button("Clear", variant="secondary")
with gr.Column(scale=4):
thinking_out = gr.Textbox(
label="🧠 Thinking process",
lines=8,
interactive=False,
elem_classes=["thinking-box"],
placeholder="The model's internal reasoning will appear here…",
)
answer_out = gr.Textbox(
label="✅ Final answer",
lines=6,
interactive=False,
placeholder="The answer will appear here…",
)
gr.Examples(
examples=EXAMPLES,
inputs=[prompt_input],
label="💡 Try these examples",
examples_per_page=5,
)
gr.Markdown(
"**Model:** [SupraLabs/Supra-50M-Reasoning](https://huggingface.co/SupraLabs/Supra-50M-Reasoning) "
" | **License:** Apache 2.0 | 51.8M params · CPU-only · Project Chimera © SupraLabs 2026"
)
inputs_list = [prompt_input, max_tokens, temperature, top_p, top_k, show_think]
outputs_list = [thinking_out, answer_out]
run_btn.click(fn=generate, inputs=inputs_list, outputs=outputs_list)
prompt_input.submit(fn=generate, inputs=inputs_list, outputs=outputs_list)
clear_btn.click(fn=lambda: ("", "", ""), outputs=[prompt_input, thinking_out, answer_out])
if __name__ == "__main__":
demo.launch()