Spaces:

Santiagoismo
/

CelachoBot

Runtime error

App Files Files Community

Santiagoismo commited on Apr 4

Commit

36e2e2a

1 Parent(s): 14a3a14

bundle gguf locally

Browse files

Files changed (5) hide show

.gitattributes +1 -0
Dockerfile +12 -14
app.py +84 -111
models/Qwen3.5-0.8B-Q8_0.gguf +3 -0
requirements.txt +3 -4

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.gguf filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

@@ -1,24 +1,22 @@
 FROM python:3.11-slim
-ENV DEBIAN_FRONTEND=noninteractive \
-    PIP_NO_CACHE_DIR=1 \
-    PYTHONUNBUFFERED=1
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential cmake git curl ca-certificates \
- && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
-# Build llama.cpp
-RUN git clone https://github.com/ggml-org/llama.cpp.git \
- && cmake -S llama.cpp -B llama.cpp/build -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF \
- && cmake --build llama.cpp/build --config Release -j2
 COPY requirements.txt .
-RUN pip install -r requirements.txt
-COPY app.py .
-EXPOSE 7860
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.11-slim
 RUN apt-get update && apt-get install -y --no-install-recommends \
+    git build-essential cmake curl ca-certificates && \
+    rm -rf /var/lib/apt/lists/*
 WORKDIR /app
+RUN git clone https://github.com/ggerganov/llama.cpp /tmp/llama.cpp && \
+    cmake -S /tmp/llama.cpp -B /tmp/llama.cpp/build -DBUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF && \
+    cmake --build /tmp/llama.cpp/build -j && \
+    cp /tmp/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server
 COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+ENV PORT=7860
+ENV MODEL_PATH=/app/models/Qwen3.5-0.8B-Q4_K_M.gguf
+CMD ["python","app.py"]

app.py CHANGED Viewed

@@ -1,121 +1,94 @@
-import asyncio
-import os
-import subprocess
-import time
-from contextlib import asynccontextmanager
-import httpx
-from fastapi import FastAPI, Request
-from huggingface_hub import hf_hub_download
-TOKEN = os.getenv("TELEGRAM_BOT_TOKEN", "AAGgNISzjysyQzOWzTaE4Y4MKlQ8_Cz5YCI")
-MODEL_REPO = os.getenv("MODEL_REPO", "bartowski/Qwen3.5-0.8B-GGUF")
-MODEL_FILE = os.getenv("MODEL_FILE", "Qwen3.5-0.8B-Q4_K_M.gguf")
-SYSTEM = os.getenv("BOT_SYSTEM", "You are CelachoBot. Reply helpfully and briefly.")
-PUBLIC_URL = os.getenv("PUBLIC_URL", "").rstrip("/")
-CTX = os.getenv("N_CTX", "2048")
-PREDICT = os.getenv("N_PREDICT", "128")
-TEMP = os.getenv("TEMPERATURE", "0.7")
-THREADS = os.getenv("N_THREADS", str(os.cpu_count() or 2))
-LLAMA = "/app/llama.cpp/build/bin/llama-server"
-MODEL_DIR = "/data"
-SERVER = "http://127.0.0.1:8080"
-proc = None
-def tg(method: str) -> str:
-    return f"https://api.telegram.org/bot{TOKEN}/{method}"
-async def post_json(url: str, data: dict):
-    async with httpx.AsyncClient(timeout=120) as c:
-        return await c.post(url, json=data)
-def start_llama() -> None:
-    global proc
-    os.makedirs(MODEL_DIR, exist_ok=True)
-    model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=MODEL_DIR)
-    cmd = [
-        LLAMA,
-        "-m", model_path,
-        "--host", "127.0.0.1",
-        "--port", "8080",
-        "-c", CTX,
-        "-n", PREDICT,
-        "-t", THREADS,
-        "--no-webui",
-    ]
-    proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
-def wait_llama(timeout: int = 120) -> None:
-    end = time.time() + timeout
-    while time.time() < end:
         try:
-            r = httpx.get(f"{SERVER}/health", timeout=2)
-            if r.status_code == 200:
-                return
         except Exception:
             pass
-        time.sleep(1)
-    raise RuntimeError("llama-server did not become ready")
-async def set_webhook() -> None:
-    if TOKEN and PUBLIC_URL:
-        await post_json(tg("setWebhook"), {"url": f"{PUBLIC_URL}/webhook"})
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    start_llama()
-    await asyncio.to_thread(wait_llama)
-    try:
-        await set_webhook()
-    except Exception:
-        pass
-    yield
-    global proc
-    if proc and proc.poll() is None:
-        proc.terminate()
-app = FastAPI(lifespan=lifespan)
-@app.get("/")
-async def root():
-    return {"ok": True}
 @app.post("/webhook")
-async def webhook(req: Request):
-    u = await req.json()
-    m = u.get("message") or u.get("edited_message") or {}
-    chat = (m.get("chat") or {}).get("id")
-    text = (m.get("text") or "").strip()
-    if not chat:
-        return {"ok": True}
-    if not text:
-        await post_json(tg("sendMessage"), {"chat_id": chat, "text": "Send text only."})
-        return {"ok": True}
-    prompt = f"{SYSTEM}\n\nUser: {text}\nAssistant:"
-    payload = {
-        "prompt": prompt,
-        "n_predict": int(PREDICT),
-        "temperature": float(TEMP),
-        "stop": ["\nUser:", "\n\nUser:"],
-    }
     try:
-        async with httpx.AsyncClient(timeout=120) as c:
-            r = await c.post(f"{SERVER}/completion", json=payload)
-            r.raise_for_status()
-            out = r.json().get("content", "").strip() or "..."
     except Exception:
-        out = "Model unavailable. Try again in a moment."
-    await post_json(tg("sendMessage"), {"chat_id": chat, "text": out[:4000]})
-    return {"ok": True}

+import os,time,subprocess,requests
+from fastapi import FastAPI,Request
+from fastapi.responses import JSONResponse
+import uvicorn
+T=os.getenv("TELEGRAM_BOT_TOKEN","AAGgNISzjysyQzOWzTaE4Y4MKlQ8_Cz5YCI")
+S=os.getenv("BOT_SYSTEM","You are El Celacho. Reply helpfully and briefly.")
+U=os.getenv("PUBLIC_URL","https://huggingface.co/spaces/Santiagoismo/CelachoBot")
+P=int(os.getenv("PORT","7860"))
+MP=os.getenv("MODEL_PATH","/app/models/Qwen3.5-0.8B-Q8_0.gguf")
+LPORT=8081
+app=FastAPI()
+ready=False
+def tg(m,**kw):
+    return requests.post(f"https://api.telegram.org/bot{T}/{m}",json=kw,timeout=60)
+def ask(msg):
+    r=requests.post(
+        f"http://127.0.0.1:{LPORT}/v1/chat/completions",
+        json={
+            "messages":[
+                {"role":"system","content":S},
+                {"role":"user","content":msg}
+            ],
+            "temperature":0.7,
+            "max_tokens":200
+        },
+        timeout=180
+    )
+    r.raise_for_status()
+    return r.json()["choices"][0]["message"]["content"].strip()
+def boot():
+    global ready
+    if not os.path.exists(MP):
+        raise FileNotFoundError(MP)
+    subprocess.Popen([
+        "llama-server",
+        "-m",MP,
+        "--host","127.0.0.1",
+        "--port",str(LPORT),
+        "-ngl","0",
+        "-c","2048"
+    ])
+    for _ in range(120):
+        try:
+            requests.get(f"http://127.0.0.1:{LPORT}/health",timeout=2)
+            ready=True
+            break
+        except Exception:
+            time.sleep(2)
+    if T and U:
         try:
+            requests.get(
+                f"https://api.telegram.org/bot{T}/setWebhook",
+                params={"url":U.rstrip("/")+"/webhook"},
+                timeout=30
+            )
         except Exception:
             pass
+@app.on_event("startup")
+def startup():
+    boot()
+@app.get("/health")
+def health():
+    return {"ok":True,"model_ready":ready}
 @app.post("/webhook")
+async def webhook(req:Request):
     try:
+        u=await req.json()
+        m=u.get("message",{})
+        c=m.get("chat",{}).get("id")
+        t=m.get("text")
+        if not c:
+            return {"ok":True}
+        if not t:
+            tg("sendMessage",chat_id=c,text="Send text only.")
+            return {"ok":True}
+        if not ready:
+            tg("sendMessage",chat_id=c,text="Model waking up. Retry in a moment.")
+            return {"ok":True}
+        a=ask(t)
+        tg("sendMessage",chat_id=c,text=(a or "...")[:4000])
     except Exception:
+        pass
+    return JSONResponse({"ok":True})
+if __name__=="__main__":
+    uvicorn.run(app,host="0.0.0.0",port=P)

models/Qwen3.5-0.8B-Q8_0.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ad885ffd4bb022fc4f0d33a3308fa108ef8613159d3b3a67e23abca056b7a6c
+size 811843840

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
-fastapi==0.115.12
-uvicorn[standard]==0.34.0
-httpx==0.28.1
-huggingface_hub==0.30.2

+fastapi
+uvicorn[standard]
+requests