akhaliq's picture
akhaliq HF Staff
fix: Add return type hint -> str to chat endpoint to prevent UserWarning and enable client data reception
e759c0c
Raw
History Blame
2.01 kB
import os
import json
from fastapi.responses import HTMLResponse
from gradio import Server
from openai import AsyncOpenAI
app = Server()
@app.get("/", response_class=HTMLResponse)
async def homepage():
html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
with open(html_path, "r", encoding="utf-8") as f:
return f.read()
@app.get("/hf_token_status")
async def get_hf_token_status():
token_exists = bool(os.environ.get("HF_TOKEN"))
return {"has_token": token_exists}
@app.api(name="chat")
async def chat(messages_json: str, temperature: float = 0.7, max_tokens: int = 1024, custom_token: str = None) -> str:
# Check for Hugging Face token: custom override or environment variable
hf_token = (custom_token and custom_token.strip()) or os.environ.get("HF_TOKEN")
if not hf_token:
yield "Error: Hugging Face Token (HF_TOKEN) is not configured. Please set it in your environment or provide it in the UI Settings panel."
return
try:
messages = json.loads(messages_json)
except Exception as e:
yield f"Error parsing chat messages: {str(e)}"
return
try:
client = AsyncOpenAI(
base_url="https://router.huggingface.co/v1",
api_key=hf_token,
default_headers={
"X-HF-Bill-To": "huggingface"
}
)
stream = await client.chat.completions.create(
model="nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4:together",
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
stream=True,
)
async for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content is not None:
yield chunk.choices[0].delta.content
except Exception as e:
yield f"Error calling Hugging Face Router: {str(e)}"
if __name__ == "__main__":
app.launch(show_error=True)