Spaces:

SupraLabs
/

Supra-50M-Reasoning-Demo

Running

App Files Files Community

Supra-50M-Reasoning-Demo / app.py

LH-Tech-AI

Update app.py

d824960 verified 13 days ago

Raw

History Blame Contribute Delete

6.19 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	MODEL_ID = "SupraLabs/Supra-50M-Reasoning"

	THINK_START = "<\|begin_of_thought\|>"
	THINK_END = "<\|end_of_thought\|>"
	SOL_START = "<\|begin_of_solution\|>"
	SOL_END = "<\|end_of_solution\|>"

	SYSTEM_PROMPT = (
	"Your role as an assistant involves thoroughly exploring questions through "
	"a systematic long thinking process before providing the final precise and "
	"accurate solutions."
	)

	DESCRIPTION = """<div style="text-align:center; padding:12px 0 4px;">
	<h1 style="font-size:2rem; margin:0;">🦅 Supra-50M Reasoning</h1>
	<p style="color:#888; margin:6px 0 0;">
	A tiny 50M model that <em>thinks</em> before answering —
	by <a href="https://huggingface.co/SupraLabs" target="_blank">SupraLabs</a>
	</p>
	</div>"""

	EXAMPLES = [
	["What is artificial intelligence?"],
	["How does a large language model learn?"],
	["Explain the water cycle in simple terms."],
	["What is the meaning of life?"],
	["Write a short poem about the universe."],
	]

	def load_model():
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float32,
	device_map="cpu",
	)
	model.eval()
	return tokenizer, model

	print("Loading model…")
	tokenizer, model = load_model()
	print("Model ready.")

	def build_prompt(question: str) -> str:
	return (
	f"[SYSTEM]: {SYSTEM_PROMPT}\n\n"
	f"[USER]: {question}\n\n"
	f"[ASSISTANT]: {THINK_START}\n"
	)

	def parse_output(raw: str):
	thought, answer = "", raw

	if THINK_START in raw and THINK_END in raw:
	t0 = raw.index(THINK_START) + len(THINK_START)
	t1 = raw.index(THINK_END)
	thought = raw[t0:t1].strip()

	if SOL_START in raw and SOL_END in raw:
	s0 = raw.index(SOL_START) + len(SOL_START)
	s1 = raw.index(SOL_END)
	answer = raw[s0:s1].strip()
	elif SOL_START in raw:
	s0 = raw.index(SOL_START) + len(SOL_START)
	answer = raw[s0:].strip()
	elif THINK_END in raw:
	# Fallback if SOL markers are omitted but THINK_END exists
	answer = raw[raw.index(THINK_END) + len(THINK_END):].strip()

	return thought, answer

	def generate(
	prompt: str,
	max_new_tokens: int,
	temperature: float,
	top_p: float,
	top_k: int,
	show_thinking: bool,):
	if not prompt.strip():
	return "", "⚠️ Please enter a question."

	# Format prompt to mirror the original inference structure
	full_prompt = build_prompt(prompt)
	inputs = tokenizer(full_prompt, return_tensors="pt")
	input_ids = inputs["input_ids"]

	with torch.no_grad():
	output_ids = model.generate(
	input_ids,
	max_new_tokens=max_new_tokens,
	do_sample=temperature > 0,
	temperature=temperature if temperature > 0 else 1.0,
	top_p=top_p,
	top_k=top_k,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id,
	)

	generated = output_ids[0][input_ids.shape[-1]:]
	raw = tokenizer.decode(generated, skip_special_tokens=False).strip()
	raw = raw.replace("<s>", "").replace("</s>", "").strip()

	# Prepend THINK_START since generation begins immediately after the prompt token
	raw = THINK_START + "\n" + raw

	thought, answer = parse_output(raw)

	return thought if show_thinking else "", answer

	custom_css = """.thinking-box textarea {
	font-family: 'IBM Plex Mono', monospace !important;
	font-size: 0.82rem !important;
	color: #6b7280 !important;
	}
	footer { display: none !important; }"""

	with gr.Blocks(
	theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
	title="Supra-50M Reasoning",
	css=custom_css,) as demo:

	gr.HTML(DESCRIPTION)
	with gr.Row():
	with gr.Column(scale=3):
	prompt_input = gr.Textbox(
	label="Your question",
	placeholder="Ask anything…",
	lines=3,
	)

	with gr.Accordion("⚙️ Generation settings", open=False):
	max_tokens = gr.Slider(512, 992, value=992, step=32, label="Max new tokens")
	temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature (0 = greedy)")
	top_p = gr.Slider(0.1, 1.0, value=0.8, step=0.05, label="Top-p")
	top_k = gr.Slider(1, 100, value=25, step=1, label="Top-k")
	show_think = gr.Checkbox(value=True, label="Show thinking process")

	with gr.Row():
	run_btn = gr.Button("Generate ✦", variant="primary")
	clear_btn = gr.Button("Clear", variant="secondary")

	with gr.Column(scale=4):
	thinking_out = gr.Textbox(
	label="🧠 Thinking process",
	lines=8,
	interactive=False,
	elem_classes=["thinking-box"],
	placeholder="The model's internal reasoning will appear here…",
	)
	answer_out = gr.Textbox(
	label="✅ Final answer",
	lines=6,
	interactive=False,
	placeholder="The answer will appear here…",
	)

	gr.Examples(
	examples=EXAMPLES,
	inputs=[prompt_input],
	label="💡 Try these examples",
	examples_per_page=5,
	)

	gr.Markdown(
	"Model: [SupraLabs/Supra-50M-Reasoning](https://huggingface.co/SupraLabs/Supra-50M-Reasoning) "
	" \|  License: Apache 2.0  \|  51.8M params · CPU-only · Project Chimera © SupraLabs 2026"
	)

	inputs_list = [prompt_input, max_tokens, temperature, top_p, top_k, show_think]
	outputs_list = [thinking_out, answer_out]

	run_btn.click(fn=generate, inputs=inputs_list, outputs=outputs_list)
	prompt_input.submit(fn=generate, inputs=inputs_list, outputs=outputs_list)
	clear_btn.click(fn=lambda: ("", "", ""), outputs=[prompt_input, thinking_out, answer_out])

	if __name__ == "__main__":
	demo.launch()