import os,time,subprocess,requests from contextlib import asynccontextmanager from fastapi import FastAPI,Request,BackgroundTasks from fastapi.responses import JSONResponse import uvicorn T=os.getenv("TELEGRAM_BOT_TOKEN","6298554456:AAGgNISzjysyQzOWzTaE4Y4MKlQ8_Cz5YCI").strip() S=os.getenv("BOT_SYSTEM","Be helpful and brief.") MP=os.getenv("MODEL_PATH","/app/models/Qwen3.5-0.8B-Q8_0.gguf") LS=os.getenv("LLAMA_SERVER","/src/build/bin/llama-server") P=int(os.getenv("PORT","7860")) LPORT=8081 ready=False def tg(text,chat_id): r=requests.post( f"https://api.telegram.org/bot{T}/sendMessage", json={"chat_id":chat_id,"text":text[:4000]}, timeout=120 ) print("SEND:",r.status_code,r.text[:500],flush=True) return r def ask(msg): r = requests.post( f"http://127.0.0.1:{LPORT}/v1/chat/completions", json={ "messages": [ { "role": "user", "content": f"Reply in one short message.\n{msg}" } ], "max_tokens": 64, "temperature": 0.7, "top_p": 0.8, "top_k": 20, "chat_template_kwargs": {"enable_thinking": False} }, timeout=300 ) print("LLM:", r.status_code, r.text[:700], flush=True) r.raise_for_status() m = r.json()["choices"][0]["message"] return (m.get("content") or "").strip() def work(chat_id,text): try: if not ready: tg("Model waking up. Retry in a moment.",chat_id) return tg(ask(text) or "...",chat_id) except Exception as e: print("WORK ERR:",repr(e),flush=True) tg("Error while generating reply.",chat_id) @asynccontextmanager async def lifespan(app: FastAPI): global ready print("TOKEN_SET", bool(T), "LEN", len(T), "HEAD", T[:12], flush=True) r = requests.get(f"https://api.telegram.org/bot{T}/getMe", timeout=30) print("GETME", r.status_code, r.text[:500], flush=True) if not os.path.exists(MP): raise FileNotFoundError(MP) if not os.path.exists(LS): raise FileNotFoundError(LS) subprocess.Popen([ LS, "-m", MP, "--host", "127.0.0.1", "--port", str(LPORT), "-ngl", "0", "-c", "1024", "--jinja" ], env={**os.environ, "LD_LIBRARY_PATH": "/usr/local/lib"}) for _ in range(120): try: requests.get(f"http://127.0.0.1:{LPORT}/health",timeout=2) ready=True break except Exception: time.sleep(2) print("MODEL_READY", ready, flush=True) yield app=FastAPI(lifespan=lifespan) @app.get("/health") def health(): return {"ok":True,"model_ready":ready} @app.post("/webhook") async def webhook(req:Request,bg:BackgroundTasks): u=await req.json() print("UPDATE:",u,flush=True) m=u.get("message",{}) c=m.get("chat",{}).get("id") t=m.get("text") if c and t: bg.add_task(work,c,t) return JSONResponse({"ok":True}) if __name__=="__main__": uvicorn.run(app,host="0.0.0.0",port=P)