Spaces:
Runtime error
Runtime error
| """ | |
| BharatGen AyurParam β Swastik.fit AI Vaidya | |
| Hosted on HuggingFace Spaces (ZeroGPU) | |
| Model: bharatgenai/AyurParam (2.9B params, trained on 1,000+ Ayurvedic texts) | |
| License: CC-BY-4.0 (commercial OK) | |
| Prompt format: <user> {question} <assistant> | |
| This Space is called by the Swastik Cloud Function (ayurParamProxy). | |
| The /gradio_api/call/predict endpoint receives: { data: ["<user> ... <assistant>"] } | |
| Returns: { data: ["response text"] } | |
| ZeroGPU: GPU is allocated on-demand per request (no cold-start, shared GPU pool). | |
| Model loads into GPU memory on first call, cached for duration of GPU slot. | |
| """ | |
| import gradio as gr | |
| import torch | |
| import spaces | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| MODEL_ID = "bharatgenai/AyurParam" | |
| # Module-level cache β persists across ZeroGPU calls within the same session | |
| _tokenizer = None | |
| _model = None | |
| def _ensure_model(): | |
| """Load model if not already loaded. Called inside @spaces.GPU context.""" | |
| global _tokenizer, _model | |
| if _model is not None: | |
| return | |
| print("[AyurParam] Loading tokenizer...") | |
| _tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=False) | |
| print("[AyurParam] Loading model to GPU...") | |
| _model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| trust_remote_code=True, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| ) | |
| _model.eval() | |
| print("[AyurParam] Model ready on GPU.") | |
| def generate(prompt: str) -> str: | |
| """ | |
| Main inference function β runs on ZeroGPU (T4/A100). | |
| Accepts either: | |
| - Raw prompt already formatted: "<user> ... <assistant>" | |
| - Plain text question (will be wrapped automatically) | |
| Returns: assistant response only (no prompt echo) | |
| """ | |
| _ensure_model() | |
| if not prompt or not prompt.strip(): | |
| return "Please provide a question." | |
| # Ensure correct prompt format | |
| if "<user>" not in prompt: | |
| formatted = f"<user> {prompt.strip()} <assistant>" | |
| else: | |
| formatted = prompt.strip() | |
| if not formatted.endswith("<assistant>"): | |
| formatted = formatted + " <assistant>" | |
| inputs = _tokenizer(formatted, return_tensors="pt") | |
| # Move inputs to same device as model | |
| device = next(_model.parameters()).device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| input_len = inputs["input_ids"].shape[1] | |
| with torch.no_grad(): | |
| output = _model.generate( | |
| **inputs, | |
| max_new_tokens=256, | |
| do_sample=True, | |
| top_k=50, | |
| top_p=0.95, | |
| temperature=0.6, | |
| eos_token_id=_tokenizer.eos_token_id, | |
| pad_token_id=_tokenizer.eos_token_id, | |
| use_cache=True, | |
| ) | |
| # Decode only the new tokens (not the prompt) | |
| new_tokens = output[0][input_len:] | |
| response = _tokenizer.decode(new_tokens, skip_special_tokens=True).strip() | |
| # Clean up any trailing special tokens | |
| for stop in ["<user>", "<context>", "</s>"]: | |
| if stop in response: | |
| response = response[: response.index(stop)].strip() | |
| return response | |
| # Gradio interface β Swastik Cloud Function calls /gradio_api/call/predict | |
| demo = gr.Interface( | |
| fn=generate, | |
| inputs=gr.Textbox( | |
| label="Prompt", | |
| placeholder="<user> What foods should I eat for better digestion? <assistant>", | |
| lines=3, | |
| ), | |
| outputs=gr.Textbox(label="AyurParam Response", lines=8), | |
| title="BharatGen AyurParam β Ayurveda AI", | |
| description=( | |
| "**AyurParam** is India's first AI trained on 1,000+ Ayurvedic texts (54.5M words). " | |
| "2.9B parameter model fine-tuned on classical Ayurveda knowledge.\n\n" | |
| "Prompt format: `<user> your question <assistant>`\n\n" | |
| "This Space powers the AI Vaidya at [swastik.fit](https://swastik.fit)." | |
| ), | |
| examples=[ | |
| ["<user> What foods should I eat to improve digestion according to Ayurveda? <assistant>"], | |
| ["<user> I have vata imbalance β what daily routine do you recommend? <assistant>"], | |
| ["<user> What are the benefits of turmeric in Ayurvedic medicine? <assistant>"], | |
| ["<user> namaste <assistant>"], | |
| ], | |
| cache_examples=False, | |
| api_name="predict", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |