File size: 771 Bytes
2208bb7 cc330c7 2208bb7 cc330c7 2208bb7 cc330c7 2208bb7 cc330c7 2208bb7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | import os
import gradio as gr
from llama_cpp import Llama
# Load GGUF model
MODEL_PATH = "./models/mistral.gguf"
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_threads=9, # Increase for more speed if CPU allows
n_batch=128,
use_mlock=True,
use_mmap=True,
verbose=False
)
# Streaming generator
def generate_response(prompt):
stream = llm(
prompt=f"[INST] {prompt.strip()} [/INST]",
max_tokens=512,
stop=["</s>"],
stream=True
)
partial = ""
for chunk in stream:
partial += chunk["choices"][0]["text"]
yield partial
# Gradio UI
gr.ChatInterface(
fn=generate_response,
title="Leo9 AI Tutor",
description="An ai chatbots who answer any question.",
).launch()
|