import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_ID = "SupraLabs/Supra-50M-Reasoning" THINK_START = "<|begin_of_thought|>" THINK_END = "<|end_of_thought|>" SOL_START = "<|begin_of_solution|>" SOL_END = "<|end_of_solution|>" SYSTEM_PROMPT = ( "Your role as an assistant involves thoroughly exploring questions through " "a systematic long thinking process before providing the final precise and " "accurate solutions." ) DESCRIPTION = """

🦅 Supra-50M Reasoning

A tiny 50M model that thinks before answering — by SupraLabs

""" EXAMPLES = [ ["What is artificial intelligence?"], ["How does a large language model learn?"], ["Explain the water cycle in simple terms."], ["What is the meaning of life?"], ["Write a short poem about the universe."], ] def load_model(): tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float32, device_map="cpu", ) model.eval() return tokenizer, model print("Loading model…") tokenizer, model = load_model() print("Model ready.") def build_prompt(question: str) -> str: return ( f"[SYSTEM]: {SYSTEM_PROMPT}\n\n" f"[USER]: {question}\n\n" f"[ASSISTANT]: {THINK_START}\n" ) def parse_output(raw: str): thought, answer = "", raw if THINK_START in raw and THINK_END in raw: t0 = raw.index(THINK_START) + len(THINK_START) t1 = raw.index(THINK_END) thought = raw[t0:t1].strip() if SOL_START in raw and SOL_END in raw: s0 = raw.index(SOL_START) + len(SOL_START) s1 = raw.index(SOL_END) answer = raw[s0:s1].strip() elif SOL_START in raw: s0 = raw.index(SOL_START) + len(SOL_START) answer = raw[s0:].strip() elif THINK_END in raw: # Fallback if SOL markers are omitted but THINK_END exists answer = raw[raw.index(THINK_END) + len(THINK_END):].strip() return thought, answer def generate( prompt: str, max_new_tokens: int, temperature: float, top_p: float, top_k: int, show_thinking: bool,): if not prompt.strip(): return "", "⚠️ Please enter a question." # Format prompt to mirror the original inference structure full_prompt = build_prompt(prompt) inputs = tokenizer(full_prompt, return_tensors="pt") input_ids = inputs["input_ids"] with torch.no_grad(): output_ids = model.generate( input_ids, max_new_tokens=max_new_tokens, do_sample=temperature > 0, temperature=temperature if temperature > 0 else 1.0, top_p=top_p, top_k=top_k, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, ) generated = output_ids[0][input_ids.shape[-1]:] raw = tokenizer.decode(generated, skip_special_tokens=False).strip() raw = raw.replace("", "").replace("", "").strip() # Prepend THINK_START since generation begins immediately *after* the prompt token raw = THINK_START + "\n" + raw thought, answer = parse_output(raw) return thought if show_thinking else "", answer custom_css = """.thinking-box textarea { font-family: 'IBM Plex Mono', monospace !important; font-size: 0.82rem !important; color: #6b7280 !important; } footer { display: none !important; }""" with gr.Blocks( theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"), title="Supra-50M Reasoning", css=custom_css,) as demo: gr.HTML(DESCRIPTION) with gr.Row(): with gr.Column(scale=3): prompt_input = gr.Textbox( label="Your question", placeholder="Ask anything…", lines=3, ) with gr.Accordion("⚙️ Generation settings", open=False): max_tokens = gr.Slider(512, 992, value=992, step=32, label="Max new tokens") temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature (0 = greedy)") top_p = gr.Slider(0.1, 1.0, value=0.8, step=0.05, label="Top-p") top_k = gr.Slider(1, 100, value=25, step=1, label="Top-k") show_think = gr.Checkbox(value=True, label="Show thinking process") with gr.Row(): run_btn = gr.Button("Generate ✦", variant="primary") clear_btn = gr.Button("Clear", variant="secondary") with gr.Column(scale=4): thinking_out = gr.Textbox( label="🧠 Thinking process", lines=8, interactive=False, elem_classes=["thinking-box"], placeholder="The model's internal reasoning will appear here…", ) answer_out = gr.Textbox( label="✅ Final answer", lines=6, interactive=False, placeholder="The answer will appear here…", ) gr.Examples( examples=EXAMPLES, inputs=[prompt_input], label="💡 Try these examples", examples_per_page=5, ) gr.Markdown( "**Model:** [SupraLabs/Supra-50M-Reasoning](https://huggingface.co/SupraLabs/Supra-50M-Reasoning) " " |  **License:** Apache 2.0  |  51.8M params · CPU-only · Project Chimera © SupraLabs 2026" ) inputs_list = [prompt_input, max_tokens, temperature, top_p, top_k, show_think] outputs_list = [thinking_out, answer_out] run_btn.click(fn=generate, inputs=inputs_list, outputs=outputs_list) prompt_input.submit(fn=generate, inputs=inputs_list, outputs=outputs_list) clear_btn.click(fn=lambda: ("", "", ""), outputs=[prompt_input, thinking_out, answer_out]) if __name__ == "__main__": demo.launch()