import os import json from fastapi.responses import HTMLResponse from gradio import Server from openai import AsyncOpenAI app = Server() @app.get("/", response_class=HTMLResponse) async def homepage(): html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html") with open(html_path, "r", encoding="utf-8") as f: return f.read() @app.get("/hf_token_status") async def get_hf_token_status(): token_exists = bool(os.environ.get("HF_TOKEN")) return {"has_token": token_exists} @app.api(name="chat") async def chat(messages_json: str, temperature: float = 0.7, max_tokens: int = 1024, custom_token: str = None) -> str: # Check for Hugging Face token: custom override or environment variable hf_token = (custom_token and custom_token.strip()) or os.environ.get("HF_TOKEN") if not hf_token: yield "Error: Hugging Face Token (HF_TOKEN) is not configured. Please set it in your environment or provide it in the UI Settings panel." return try: messages = json.loads(messages_json) except Exception as e: yield f"Error parsing chat messages: {str(e)}" return try: client = AsyncOpenAI( base_url="https://router.huggingface.co/v1", api_key=hf_token, default_headers={ "X-HF-Bill-To": "huggingface" } ) stream = await client.chat.completions.create( model="nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4:together", messages=messages, temperature=temperature, max_tokens=max_tokens, stream=True, ) bot_response = "" async for chunk in stream: if chunk.choices and chunk.choices[0].delta.content is not None: bot_response += chunk.choices[0].delta.content yield bot_response except Exception as e: yield f"Error calling Hugging Face Router: {str(e)}" if __name__ == "__main__": app.launch(show_error=True)