| import gradio as gr |
| import torch |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
| MODEL_ID = "SupraLabs/Supra-50M-Reasoning" |
|
|
| THINK_START = "<|begin_of_thought|>" |
| THINK_END = "<|end_of_thought|>" |
| SOL_START = "<|begin_of_solution|>" |
| SOL_END = "<|end_of_solution|>" |
|
|
| SYSTEM_PROMPT = ( |
| "Your role as an assistant involves thoroughly exploring questions through " |
| "a systematic long thinking process before providing the final precise and " |
| "accurate solutions." |
| ) |
|
|
| DESCRIPTION = """<div style="text-align:center; padding:12px 0 4px;"> |
| <h1 style="font-size:2rem; margin:0;">🦅 Supra-50M Reasoning</h1> |
| <p style="color:#888; margin:6px 0 0;"> |
| A tiny 50M model that <em>thinks</em> before answering — |
| by <a href="https://huggingface.co/SupraLabs" target="_blank">SupraLabs</a> |
| </p> |
| </div>""" |
|
|
| EXAMPLES = [ |
| ["What is artificial intelligence?"], |
| ["How does a large language model learn?"], |
| ["Explain the water cycle in simple terms."], |
| ["What is the meaning of life?"], |
| ["Write a short poem about the universe."], |
| ] |
|
|
| def load_model(): |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_ID, |
| torch_dtype=torch.float32, |
| device_map="cpu", |
| ) |
| model.eval() |
| return tokenizer, model |
|
|
| print("Loading model…") |
| tokenizer, model = load_model() |
| print("Model ready.") |
|
|
| def build_prompt(question: str) -> str: |
| return ( |
| f"[SYSTEM]: {SYSTEM_PROMPT}\n\n" |
| f"[USER]: {question}\n\n" |
| f"[ASSISTANT]: {THINK_START}\n" |
| ) |
|
|
| def parse_output(raw: str): |
| thought, answer = "", raw |
|
|
| if THINK_START in raw and THINK_END in raw: |
| t0 = raw.index(THINK_START) + len(THINK_START) |
| t1 = raw.index(THINK_END) |
| thought = raw[t0:t1].strip() |
|
|
| if SOL_START in raw and SOL_END in raw: |
| s0 = raw.index(SOL_START) + len(SOL_START) |
| s1 = raw.index(SOL_END) |
| answer = raw[s0:s1].strip() |
| elif SOL_START in raw: |
| s0 = raw.index(SOL_START) + len(SOL_START) |
| answer = raw[s0:].strip() |
| elif THINK_END in raw: |
| |
| answer = raw[raw.index(THINK_END) + len(THINK_END):].strip() |
|
|
| return thought, answer |
|
|
| def generate( |
| prompt: str, |
| max_new_tokens: int, |
| temperature: float, |
| top_p: float, |
| top_k: int, |
| show_thinking: bool,): |
| if not prompt.strip(): |
| return "", "⚠️ Please enter a question." |
|
|
| |
| full_prompt = build_prompt(prompt) |
| inputs = tokenizer(full_prompt, return_tensors="pt") |
| input_ids = inputs["input_ids"] |
|
|
| with torch.no_grad(): |
| output_ids = model.generate( |
| input_ids, |
| max_new_tokens=max_new_tokens, |
| do_sample=temperature > 0, |
| temperature=temperature if temperature > 0 else 1.0, |
| top_p=top_p, |
| top_k=top_k, |
| pad_token_id=tokenizer.eos_token_id, |
| eos_token_id=tokenizer.eos_token_id, |
| ) |
|
|
| generated = output_ids[0][input_ids.shape[-1]:] |
| raw = tokenizer.decode(generated, skip_special_tokens=False).strip() |
| raw = raw.replace("<s>", "").replace("</s>", "").strip() |
| |
| |
| raw = THINK_START + "\n" + raw |
| |
| thought, answer = parse_output(raw) |
|
|
| return thought if show_thinking else "", answer |
|
|
| custom_css = """.thinking-box textarea { |
| font-family: 'IBM Plex Mono', monospace !important; |
| font-size: 0.82rem !important; |
| color: #6b7280 !important; |
| } |
| footer { display: none !important; }""" |
|
|
| with gr.Blocks( |
| theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"), |
| title="Supra-50M Reasoning", |
| css=custom_css,) as demo: |
|
|
| gr.HTML(DESCRIPTION) |
| with gr.Row(): |
| with gr.Column(scale=3): |
| prompt_input = gr.Textbox( |
| label="Your question", |
| placeholder="Ask anything…", |
| lines=3, |
| ) |
|
|
| with gr.Accordion("⚙️ Generation settings", open=False): |
| max_tokens = gr.Slider(512, 992, value=992, step=32, label="Max new tokens") |
| temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature (0 = greedy)") |
| top_p = gr.Slider(0.1, 1.0, value=0.8, step=0.05, label="Top-p") |
| top_k = gr.Slider(1, 100, value=25, step=1, label="Top-k") |
| show_think = gr.Checkbox(value=True, label="Show thinking process") |
|
|
| with gr.Row(): |
| run_btn = gr.Button("Generate ✦", variant="primary") |
| clear_btn = gr.Button("Clear", variant="secondary") |
|
|
| with gr.Column(scale=4): |
| thinking_out = gr.Textbox( |
| label="🧠 Thinking process", |
| lines=8, |
| interactive=False, |
| elem_classes=["thinking-box"], |
| placeholder="The model's internal reasoning will appear here…", |
| ) |
| answer_out = gr.Textbox( |
| label="✅ Final answer", |
| lines=6, |
| interactive=False, |
| placeholder="The answer will appear here…", |
| ) |
|
|
| gr.Examples( |
| examples=EXAMPLES, |
| inputs=[prompt_input], |
| label="💡 Try these examples", |
| examples_per_page=5, |
| ) |
|
|
| gr.Markdown( |
| "**Model:** [SupraLabs/Supra-50M-Reasoning](https://huggingface.co/SupraLabs/Supra-50M-Reasoning) " |
| " | **License:** Apache 2.0 | 51.8M params · CPU-only · Project Chimera © SupraLabs 2026" |
| ) |
|
|
| inputs_list = [prompt_input, max_tokens, temperature, top_p, top_k, show_think] |
| outputs_list = [thinking_out, answer_out] |
|
|
| run_btn.click(fn=generate, inputs=inputs_list, outputs=outputs_list) |
| prompt_input.submit(fn=generate, inputs=inputs_list, outputs=outputs_list) |
| clear_btn.click(fn=lambda: ("", "", ""), outputs=[prompt_input, thinking_out, answer_out]) |
|
|
| if __name__ == "__main__": |
| demo.launch() |