akhaliq's picture
akhaliq HF Staff
feat: Add gradio.Server backend and premium custom frontend
a16bf82
Raw
History Blame
1.99 kB
import os
import json
from fastapi.responses import HTMLResponse
from gradio import Server
from openai import AsyncOpenAI
app = Server()
@app.get("/", response_class=HTMLResponse)
async def homepage():
html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
with open(html_path, "r", encoding="utf-8") as f:
return f.read()
@app.get("/config")
async def get_config():
token_exists = bool(os.environ.get("HF_TOKEN"))
return {"has_token": token_exists}
@app.api(name="chat")
async def chat(messages_json: str, temperature: float = 0.7, max_tokens: int = 1024, custom_token: str = None):
# Check for Hugging Face token: custom override or environment variable
hf_token = (custom_token and custom_token.strip()) or os.environ.get("HF_TOKEN")
if not hf_token:
yield "Error: Hugging Face Token (HF_TOKEN) is not configured. Please set it in your environment or provide it in the UI Settings panel."
return
try:
messages = json.loads(messages_json)
except Exception as e:
yield f"Error parsing chat messages: {str(e)}"
return
try:
client = AsyncOpenAI(
base_url="https://router.huggingface.co/v1",
api_key=hf_token,
default_headers={
"X-HF-Bill-To": "huggingface"
}
)
stream = await client.chat.completions.create(
model="nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4:together",
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
stream=True,
)
async for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content is not None:
yield chunk.choices[0].delta.content
except Exception as e:
yield f"Error calling Hugging Face Router: {str(e)}"
if __name__ == "__main__":
app.launch(show_error=True)