puneetsharmax's picture
Create app.py
66b9f14 verified
Raw
History Blame
3.93 kB
"""
BharatGen AyurParam β€” Swastik.fit AI Vaidya
Hosted on HuggingFace Spaces with ZeroGPU (free, no credit card needed)
Model: bharatgenai/AyurParam (2.9B params, trained on 1,000+ Ayurvedic texts)
License: CC-BY-4.0 (commercial OK)
Prompt format: <user> {question} <assistant>
This Space is called by the Swastik Cloud Function (ayurParamProxy).
The /run/predict endpoint receives: { data: ["<user> ... <assistant>"] }
Returns: { data: ["response text"] }
"""
import spaces
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_ID = "bharatgenai/AyurParam"
tokenizer = None
model = None
def load_model():
global tokenizer, model
if model is not None:
return
print("[AyurParam] Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=False)
print("[AyurParam] Loading model...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
trust_remote_code=True,
torch_dtype=torch.bfloat16,
device_map="auto",
)
model.eval()
print("[AyurParam] Model ready.")
# Load on startup
load_model()
@spaces.GPU
def generate(prompt: str) -> str:
"""
Main inference function.
Accepts either:
- Raw prompt already formatted: "<user> ... <assistant>"
- Plain text question (will be wrapped automatically)
Returns: assistant response only (no prompt echo)
"""
if not prompt or not prompt.strip():
return "Please provide a question."
# Ensure correct prompt format
if "<user>" not in prompt:
formatted = f"<user> {prompt.strip()} <assistant>"
else:
# Already formatted β€” ensure it ends with <assistant>
formatted = prompt.strip()
if not formatted.endswith("<assistant>"):
formatted = formatted + " <assistant>"
inputs = tokenizer(formatted, return_tensors="pt").to(model.device)
input_len = inputs["input_ids"].shape[1]
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=512,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=0.6,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
use_cache=True,
)
# Decode only the new tokens (not the prompt)
new_tokens = output[0][input_len:]
response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
# Clean up any trailing special tokens
for stop in ["<user>", "<context>", "</s>"]:
if stop in response:
response = response[: response.index(stop)].strip()
return response
# Gradio interface β€” Swastik Cloud Function calls /run/predict directly
demo = gr.Interface(
fn=generate,
inputs=gr.Textbox(
label="Prompt",
placeholder="<user> What foods should I eat for better digestion? <assistant>",
lines=3,
),
outputs=gr.Textbox(label="AyurParam Response", lines=8),
title="BharatGen AyurParam β€” Ayurveda AI",
description=(
"**AyurParam** is India's first AI trained on 1,000+ Ayurvedic texts (54.5M words). "
"2.9B parameter model fine-tuned on classical Ayurveda knowledge.\n\n"
"Prompt format: `<user> your question <assistant>`\n\n"
"This Space powers the AI Vaidya at [swastik.fit](https://swastik.fit)."
),
examples=[
["<user> What foods should I eat to improve digestion according to Ayurveda? <assistant>"],
["<user> I have vata imbalance β€” what daily routine do you recommend? <assistant>"],
["<user> What are the benefits of turmeric in Ayurvedic medicine? <assistant>"],
["<user> namaste <assistant>"], # warmup ping
],
cache_examples=False,
api_name="predict", # enables /run/predict endpoint
)
if __name__ == "__main__":
demo.launch()