NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4-etheroi

Paused

App Files Files Community

NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4-etheroi / app.py

akhaliq HF Staff

feat: Add gradio.Server backend and premium custom frontend

a16bf82 26 days ago

Raw

History Blame

1.99 kB

	import os
	import json
	from fastapi.responses import HTMLResponse
	from gradio import Server
	from openai import AsyncOpenAI

	app = Server()

	@app.get("/", response_class=HTMLResponse)
	async def homepage():
	html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
	with open(html_path, "r", encoding="utf-8") as f:
	return f.read()

	@app.get("/config")
	async def get_config():
	token_exists = bool(os.environ.get("HF_TOKEN"))
	return {"has_token": token_exists}

	@app.api(name="chat")
	async def chat(messages_json: str, temperature: float = 0.7, max_tokens: int = 1024, custom_token: str = None):
	# Check for Hugging Face token: custom override or environment variable
	hf_token = (custom_token and custom_token.strip()) or os.environ.get("HF_TOKEN")

	if not hf_token:
	yield "Error: Hugging Face Token (HF_TOKEN) is not configured. Please set it in your environment or provide it in the UI Settings panel."
	return

	try:
	messages = json.loads(messages_json)
	except Exception as e:
	yield f"Error parsing chat messages: {str(e)}"
	return

	try:
	client = AsyncOpenAI(
	base_url="https://router.huggingface.co/v1",
	api_key=hf_token,
	default_headers={
	"X-HF-Bill-To": "huggingface"
	}
	)

	stream = await client.chat.completions.create(
	model="nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4:together",
	messages=messages,
	temperature=temperature,
	max_tokens=max_tokens,
	stream=True,
	)

	async for chunk in stream:
	if chunk.choices and chunk.choices[0].delta.content is not None:
	yield chunk.choices[0].delta.content

	except Exception as e:
	yield f"Error calling Hugging Face Router: {str(e)}"

	if __name__ == "__main__":
	app.launch(show_error=True)