File size: 2,010 Bytes
a16bf82
 
 
 
 
 
 
 
 
 
 
 
 
 
18c682d
 
a16bf82
 
 
 
e759c0c
a16bf82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import json
from fastapi.responses import HTMLResponse
from gradio import Server
from openai import AsyncOpenAI

app = Server()

@app.get("/", response_class=HTMLResponse)
async def homepage():
    html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
    with open(html_path, "r", encoding="utf-8") as f:
        return f.read()

@app.get("/hf_token_status")
async def get_hf_token_status():
    token_exists = bool(os.environ.get("HF_TOKEN"))
    return {"has_token": token_exists}

@app.api(name="chat")
async def chat(messages_json: str, temperature: float = 0.7, max_tokens: int = 1024, custom_token: str = None) -> str:
    # Check for Hugging Face token: custom override or environment variable
    hf_token = (custom_token and custom_token.strip()) or os.environ.get("HF_TOKEN")
    
    if not hf_token:
        yield "Error: Hugging Face Token (HF_TOKEN) is not configured. Please set it in your environment or provide it in the UI Settings panel."
        return

    try:
        messages = json.loads(messages_json)
    except Exception as e:
        yield f"Error parsing chat messages: {str(e)}"
        return

    try:
        client = AsyncOpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=hf_token,
            default_headers={
                "X-HF-Bill-To": "huggingface"
            }
        )
        
        stream = await client.chat.completions.create(
            model="nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-NVFP4:together",
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
            stream=True,
        )
        
        async for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content
                
    except Exception as e:
        yield f"Error calling Hugging Face Router: {str(e)}"

if __name__ == "__main__":
    app.launch(show_error=True)