import os,time,subprocess,requests
from contextlib import asynccontextmanager
from fastapi import FastAPI,Request,BackgroundTasks
from fastapi.responses import JSONResponse
import uvicorn

T=os.getenv("TELEGRAM_BOT_TOKEN","6298554456:AAGgNISzjysyQzOWzTaE4Y4MKlQ8_Cz5YCI").strip()
S=os.getenv("BOT_SYSTEM","Be helpful and brief.")
MP=os.getenv("MODEL_PATH","/app/models/Qwen3.5-0.8B-Q8_0.gguf")
LS=os.getenv("LLAMA_SERVER","/src/build/bin/llama-server")
P=int(os.getenv("PORT","7860"))
LPORT=8081
ready=False

def tg(text,chat_id):
    r=requests.post(
        f"https://api.telegram.org/bot{T}/sendMessage",
        json={"chat_id":chat_id,"text":text[:4000]},
        timeout=120
    )
    print("SEND:",r.status_code,r.text[:500],flush=True)
    return r

def ask(msg):
    r = requests.post(
        f"http://127.0.0.1:{LPORT}/v1/chat/completions",
        json={
            "messages": [
                {
                    "role": "user",
                    "content": f"Reply in one short message.\n{msg}"
                }
            ],
            "max_tokens": 64,
            "temperature": 0.7,
            "top_p": 0.8,
            "top_k": 20,
            "chat_template_kwargs": {"enable_thinking": False}
        },
        timeout=300
    )
    print("LLM:", r.status_code, r.text[:700], flush=True)
    r.raise_for_status()
    m = r.json()["choices"][0]["message"]
    return (m.get("content") or "").strip()

def work(chat_id,text):
    try:
        if not ready:
            tg("Model waking up. Retry in a moment.",chat_id)
            return
        tg(ask(text) or "...",chat_id)
    except Exception as e:
        print("WORK ERR:",repr(e),flush=True)
        tg("Error while generating reply.",chat_id)

@asynccontextmanager
async def lifespan(app: FastAPI):
    global ready

    print("TOKEN_SET", bool(T), "LEN", len(T), "HEAD", T[:12], flush=True)
    r = requests.get(f"https://api.telegram.org/bot{T}/getMe", timeout=30)
    print("GETME", r.status_code, r.text[:500], flush=True)

    if not os.path.exists(MP):
        raise FileNotFoundError(MP)
    if not os.path.exists(LS):
        raise FileNotFoundError(LS)

    subprocess.Popen([
        LS,
        "-m", MP,
        "--host", "127.0.0.1",
        "--port", str(LPORT),
        "-ngl", "0",
        "-c", "1024",
        "--jinja"
    ], env={**os.environ, "LD_LIBRARY_PATH": "/usr/local/lib"})

    for _ in range(120):
        try:
            requests.get(f"http://127.0.0.1:{LPORT}/health",timeout=2)
            ready=True
            break
        except Exception:
            time.sleep(2)

    print("MODEL_READY", ready, flush=True)
    yield

app=FastAPI(lifespan=lifespan)

@app.get("/health")
def health():
    return {"ok":True,"model_ready":ready}

@app.post("/webhook")
async def webhook(req:Request,bg:BackgroundTasks):
    u=await req.json()
    print("UPDATE:",u,flush=True)
    m=u.get("message",{})
    c=m.get("chat",{}).get("id")
    t=m.get("text")
    if c and t:
        bg.add_task(work,c,t)
    return JSONResponse({"ok":True})

if __name__=="__main__":
    uvicorn.run(app,host="0.0.0.0",port=P)