| import os |
| import json |
| from fastapi.responses import HTMLResponse |
| from gradio import Server |
| from openai import AsyncOpenAI |
|
|
| app = Server() |
|
|
| @app.get("/", response_class=HTMLResponse) |
| async def homepage(): |
| html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html") |
| with open(html_path, "r", encoding="utf-8") as f: |
| return f.read() |
|
|
| @app.get("/hf_token_status") |
| async def get_hf_token_status(): |
| token_exists = bool(os.environ.get("HF_TOKEN")) |
| return {"has_token": token_exists} |
|
|
| @app.api(name="chat") |
| async def chat(messages_json: str, temperature: float = 0.7, max_tokens: int = 1024, custom_token: str = None) -> str: |
| |
| hf_token = (custom_token and custom_token.strip()) or os.environ.get("HF_TOKEN") |
| |
| if not hf_token: |
| yield "Error: Hugging Face Token (HF_TOKEN) is not configured. Please set it in your environment or provide it in the UI Settings panel." |
| return |
|
|
| try: |
| messages = json.loads(messages_json) |
| except Exception as e: |
| yield f"Error parsing chat messages: {str(e)}" |
| return |
|
|
| try: |
| client = AsyncOpenAI( |
| base_url="https://router.huggingface.co/v1", |
| api_key=hf_token, |
| default_headers={ |
| "X-HF-Bill-To": "huggingface" |
| } |
| ) |
| |
| stream = await client.chat.completions.create( |
| model="nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4:together", |
| messages=messages, |
| temperature=temperature, |
| max_tokens=max_tokens, |
| stream=True, |
| ) |
| |
| async for chunk in stream: |
| if chunk.choices and chunk.choices[0].delta.content is not None: |
| yield chunk.choices[0].delta.content |
| |
| except Exception as e: |
| yield f"Error calling Hugging Face Router: {str(e)}" |
|
|
| if __name__ == "__main__": |
| app.launch(show_error=True) |
|
|