Spaces:

puneetsharmax
/

bharatgenai-swastik

Runtime error

App Files Files Community

bharatgenai-swastik / app.py

puneetsharmax

Create app.py

66b9f14 verified 4 months ago

Raw

History Blame

3.93 kB

	"""
	BharatGen AyurParam — Swastik.fit AI Vaidya
	Hosted on HuggingFace Spaces with ZeroGPU (free, no credit card needed)

	Model: bharatgenai/AyurParam (2.9B params, trained on 1,000+ Ayurvedic texts)
	License: CC-BY-4.0 (commercial OK)
	Prompt format: <user> {question} <assistant>

	This Space is called by the Swastik Cloud Function (ayurParamProxy).
	The /run/predict endpoint receives: { data: ["<user> ... <assistant>"] }
	Returns: { data: ["response text"] }
	"""

	import spaces
	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	MODEL_ID = "bharatgenai/AyurParam"

	tokenizer = None
	model = None


	def load_model():
	global tokenizer, model
	if model is not None:
	return
	print("[AyurParam] Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=False)
	print("[AyurParam] Loading model...")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	trust_remote_code=True,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)
	model.eval()
	print("[AyurParam] Model ready.")


	# Load on startup
	load_model()


	@spaces.GPU
	def generate(prompt: str) -> str:
	"""
	Main inference function.
	Accepts either:
	- Raw prompt already formatted: "<user> ... <assistant>"
	- Plain text question (will be wrapped automatically)
	Returns: assistant response only (no prompt echo)
	"""
	if not prompt or not prompt.strip():
	return "Please provide a question."

	# Ensure correct prompt format
	if "<user>" not in prompt:
	formatted = f"<user> {prompt.strip()} <assistant>"
	else:
	# Already formatted — ensure it ends with <assistant>
	formatted = prompt.strip()
	if not formatted.endswith("<assistant>"):
	formatted = formatted + " <assistant>"

	inputs = tokenizer(formatted, return_tensors="pt").to(model.device)
	input_len = inputs["input_ids"].shape[1]

	with torch.no_grad():
	output = model.generate(
	**inputs,
	max_new_tokens=512,
	do_sample=True,
	top_k=50,
	top_p=0.95,
	temperature=0.6,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id,
	use_cache=True,
	)

	# Decode only the new tokens (not the prompt)
	new_tokens = output[0][input_len:]
	response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()

	# Clean up any trailing special tokens
	for stop in ["<user>", "<context>", "</s>"]:
	if stop in response:
	response = response[: response.index(stop)].strip()

	return response


	# Gradio interface — Swastik Cloud Function calls /run/predict directly
	demo = gr.Interface(
	fn=generate,
	inputs=gr.Textbox(
	label="Prompt",
	placeholder="<user> What foods should I eat for better digestion? <assistant>",
	lines=3,
	),
	outputs=gr.Textbox(label="AyurParam Response", lines=8),
	title="BharatGen AyurParam — Ayurveda AI",
	description=(
	"AyurParam is India's first AI trained on 1,000+ Ayurvedic texts (54.5M words). "
	"2.9B parameter model fine-tuned on classical Ayurveda knowledge.\n\n"
	"Prompt format: `<user> your question <assistant>`\n\n"
	"This Space powers the AI Vaidya at [swastik.fit](https://swastik.fit)."
	),
	examples=[
	["<user> What foods should I eat to improve digestion according to Ayurveda? <assistant>"],
	["<user> I have vata imbalance — what daily routine do you recommend? <assistant>"],
	["<user> What are the benefits of turmeric in Ayurvedic medicine? <assistant>"],
	["<user> namaste <assistant>"], # warmup ping
	],
	cache_examples=False,
	api_name="predict", # enables /run/predict endpoint
	)

	if __name__ == "__main__":
	demo.launch()