""" BharatGen AyurParam — Swastik.fit AI Vaidya Hosted on HuggingFace Spaces with ZeroGPU (free, no credit card needed) Model: bharatgenai/AyurParam (2.9B params, trained on 1,000+ Ayurvedic texts) License: CC-BY-4.0 (commercial OK) Prompt format: {question} This Space is called by the Swastik Cloud Function (ayurParamProxy). The /run/predict endpoint receives: { data: [" ... "] } Returns: { data: ["response text"] } """ import spaces import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_ID = "bharatgenai/AyurParam" tokenizer = None model = None def load_model(): global tokenizer, model if model is not None: return print("[AyurParam] Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=False) print("[AyurParam] Loading model...") model = AutoModelForCausalLM.from_pretrained( MODEL_ID, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto", ) model.eval() print("[AyurParam] Model ready.") # Load on startup load_model() @spaces.GPU def generate(prompt: str) -> str: """ Main inference function. Accepts either: - Raw prompt already formatted: " ... " - Plain text question (will be wrapped automatically) Returns: assistant response only (no prompt echo) """ if not prompt or not prompt.strip(): return "Please provide a question." # Ensure correct prompt format if "" not in prompt: formatted = f" {prompt.strip()} " else: # Already formatted — ensure it ends with formatted = prompt.strip() if not formatted.endswith(""): formatted = formatted + " " inputs = tokenizer(formatted, return_tensors="pt").to(model.device) input_len = inputs["input_ids"].shape[1] with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=512, do_sample=True, top_k=50, top_p=0.95, temperature=0.6, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id, use_cache=True, ) # Decode only the new tokens (not the prompt) new_tokens = output[0][input_len:] response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip() # Clean up any trailing special tokens for stop in ["", "", ""]: if stop in response: response = response[: response.index(stop)].strip() return response # Gradio interface — Swastik Cloud Function calls /run/predict directly demo = gr.Interface( fn=generate, inputs=gr.Textbox( label="Prompt", placeholder=" What foods should I eat for better digestion? ", lines=3, ), outputs=gr.Textbox(label="AyurParam Response", lines=8), title="BharatGen AyurParam — Ayurveda AI", description=( "**AyurParam** is India's first AI trained on 1,000+ Ayurvedic texts (54.5M words). " "2.9B parameter model fine-tuned on classical Ayurveda knowledge.\n\n" "Prompt format: ` your question `\n\n" "This Space powers the AI Vaidya at [swastik.fit](https://swastik.fit)." ), examples=[ [" What foods should I eat to improve digestion according to Ayurveda? "], [" I have vata imbalance — what daily routine do you recommend? "], [" What are the benefits of turmeric in Ayurvedic medicine? "], [" namaste "], # warmup ping ], cache_examples=False, api_name="predict", # enables /run/predict endpoint ) if __name__ == "__main__": demo.launch()