lainlives's picture
Upload folder using huggingface_hub
85d4692 verified
Raw
History Blame
9.1 kB
import os
import subprocess
from sys import argv
from time import strftime, sleep
import shutil
from pathlib import Path
import gradio as gr
import signal
from huggingface_hub import snapshot_download, HfApi
from apscheduler.schedulers.background import BackgroundScheduler
from theme import blurple
# Used for restarting the space
HF_TOKEN = os.environ.get("HF_TOKEN")
TEST_OKEY = os.environ.get("TEST_OKEY")
TEST_TOKEN = os.environ.get("HF_TOKEN")
HOST_REPO = "lainlives/ztestzz"
LLAMACPP_DIR = Path("./llama.cpp")
CONVERT_SCRIPT = "/app/convert_hf_to_gguf.py"
QUANTIZE_BIN = "llama-quantize"
TARGET_QUANTS = ["Q8_0", "Q6_K", "Q5_K_M", "Q5_K_S", "Q5_0", "Q4_K_M", "Q4_K_S", "Q4_0"]
def format_log(msg):
return f"[{strftime('%H:%M:%S')}] {msg}"
def setup_ollama_keys(private_key_content):
"""
Writes the user's private key to ~/.ollama/id_ed25519
"""
if not private_key_content:
return False, "⚠️ No Private Key provided. Pushing will fail."
ollama_dir = Path(os.path.expanduser("~/.ollama"))
ollama_dir.mkdir(parents=True, exist_ok=True)
key_path = ollama_dir / "id_ed25519"
os.remove(key_path)
try:
# Write the key
with open(key_path, "w") as f:
f.write(private_key_content.strip())
os.chmod(key_path, 0o600)
return True, "🔑 Private Key installed successfully."
except Exception as e:
return False, f"❌ Failed to install keys: {e}"
def push_to_ollama(gguf_path, ollama_repo, tag_suffix):
ollama_tag = f"{ollama_repo}:{tag_suffix.lower()}"
# 1. Write the Modelfile to disk
# The CLI needs a physical file to point to with the '-f' flag
modelfile_path = gguf_path.parent / "Modelfile"
with open(modelfile_path, "w") as f:
f.write(f"FROM {gguf_path.resolve()}")
logs = []
logs.append(format_log(f"🐳 Creating Ollama build: {ollama_tag}"))
try:
create_cmd = ["ollama", "create", ollama_tag, "-f", str(modelfile_path)]
subprocess.run(create_cmd, check=True, capture_output=True)
if modelfile_path.exists():
os.remove(modelfile_path)
logs.append(format_log(f"⬆️ Pushing to registry: {ollama_tag}..."))
push_cmd = ["ollama", "push", ollama_tag]
push_result = subprocess.run(push_cmd, capture_output=True, text=True)
if push_result.returncode == 0:
logs.append(format_log(f"✅ Successfully pushed {ollama_tag}"))
else:
logs.append(format_log(f"❌ Push failed: {push_result.stderr}"))
# Remove the local tag to save disk space in the container
subprocess.run(["ollama", "rm", ollama_tag]) # stdout=subprocess.DEVNULL
except subprocess.CalledProcessError as e:
# Captures errors from the 'check=True' on create_cmd
logs.append(format_log(f"❌ Ollama Create Error: {e}"))
except Exception as e:
logs.append(format_log(f"❌ Error on {tag_suffix}: {str(e)}"))
return logs
def start_ollama_daemon(ollama_key):
print("⏳ Starting Ollama daemon in background...")
logs.append(format_log(f"⏳ Starting Ollama daemon in background...\n"))
env = os.environ.copy()
# Auth
success, auth_msg = setup_ollama_keys(ollama_key)
logs.append(format_log(auth_msg))
yield "\n".join(logs)
if not success:
logs.append(format_log("❌ Stopping: Authentication setup failed."))
yield "\n".join(logs)
return
process = subprocess.Popen(["ollama", "serve"], env=env) # stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
pid = process.pid
logs.append(format_log("⏳ Starting Ollama daemon in background..."))
sleep(2)
return pid, logs
def stop_ollama_daemon(pid):
print("⏳ Stopping Ollama daemon...")
os.kill(pid, signal.SIGQUIT)
subprocess.Popen(["pkill", "ollama"]) # , stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
return logs
def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
logs = []
work_dir = Path("conversion_work_dir")
download_dir = work_dir / "downloads"
output_dir = work_dir / "output"
if work_dir.exists():
shutil.rmtree(work_dir)
os.makedirs(download_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
try:
# Download
logs.append(format_log(f"⬇️ Downloading {hf_repo}..."))
yield "\n".join(logs)
model_path = snapshot_download(
repo_id=hf_repo,
local_dir=download_dir,
token=hf_token if hf_token else None
)
logs.append(format_log("✅ Download complete."))
yield "\n".join(logs)
# BF16
bf16_path = output_dir / "model-bf16.gguf"
logs.append(format_log("⚙️ Converting to BF16..."))
yield "\n".join(logs)
cmd = ["python3", str(CONVERT_SCRIPT), str(model_path), "--outtype", "bf16", "--outfile", str(bf16_path)]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
logs.extend(push_to_ollama(bf16_path, ollama_repo, "bf16"))
os.remove(bf16_path)
logs.append(format_log("🧹 Cleaned up BF16"))
else:
logs.append(format_log(f"⚠️ BF16 Conversion failed: {result.stderr}"))
yield "\n".join(logs)
# FP16
fp16_path = output_dir / "model-f16.gguf"
logs.append(format_log("⚙️ Converting to FP16 (Master)..."))
yield "\n".join(logs)
cmd = ["python3", str(CONVERT_SCRIPT), str(model_path), "--outtype", "f16", "--outfile", str(fp16_path)]
subprocess.run(cmd, check=True, capture_output=True)
logs.extend(push_to_ollama(fp16_path, ollama_repo, "f16"))
yield "\n".join(logs)
# Quant Loop
for quant in TARGET_QUANTS:
logs.append(format_log(f"--- {quant} ---"))
yield "\n".join(logs)
final_gguf = output_dir / f"model-{quant}.gguf"
q_cmd = [str(QUANTIZE_BIN), str(fp16_path), str(final_gguf), quant]
q_result = subprocess.run(q_cmd, capture_output=True, text=True)
if q_result.returncode != 0:
logs.append(format_log(f"❌ Quantize failed: {q_result.stderr}"))
continue
logs.extend(push_to_ollama(final_gguf, ollama_repo, quant))
os.remove(final_gguf)
logs.append(format_log(f"🧹 Cleaned up {quant}"))
yield "\n".join(logs)
if fp16_path.exists():
os.remove(fp16_path)
logs.append(format_log("🧹 Cleaned up f16"))
except Exception as e:
logs.append(format_log(f"❌ CRITICAL ERROR: {str(e)}"))
finally:
if work_dir.exists():
shutil.rmtree(work_dir)
logs.append(format_log("🏁 Job Done. Workspace cleared."))
yield "\n".join(logs)
def run_pipeline(hf_repo, ollama_repo, hf_token, ollama_key, progress=gr.Progress()):
pid = start_ollama_daemon(ollama_key)
# We yield from the generator
for update in run_conversion(hf_repo, ollama_repo, hf_token, progress):
yield update
sleep(10)
stop_ollama_daemon(pid)
# --- UI ---
with gr.Blocks(title="HF to Ollama") as demo:
target_quants_str = ', '.join(str(item) for item in TARGET_QUANTS)
gr.Markdown("## Convert a safetensor HF repo to an Ollama repo.")
gr.Markdown(f"This space will generate F16, BF16, {target_quants_str} GGUFs from safetensors.")
gr.Markdown("And pushes them to your Ollama repo. You will need an Ollama ssh key, not an API key to push.")
gr.Markdown("Temporarily move yours from ~/.ollama/id_ed25519 This will cause Ollama to generate a new one")
gr.Markdown("After logging in set it aside, that ssh key can be used, or the old one, whichever, for spaces")
with gr.Row():
with gr.Column():
hf_input = gr.Textbox(label="Source HF Repo", placeholder="unsloth/Qwen3.5-9B", value="unsloth/Qwen3.5-0.8B")
hf_token_input = gr.Textbox(label="HF Token (for gated models and faster download)", type="password", value=TEST_TOKEN)
with gr.Column():
ollama_input = gr.Textbox(label="Destination Ollama Repo", placeholder="user/model", value="fervent_mcclintock/Qwen3.5-9B")
ollama_key_input = gr.Textbox(label="Ollama Private (ssh) Key", lines=7, type="password", placeholder="-----BEGIN OPENSSH PRIVATE KEY-----...", value=TEST_OKEY)
btn = gr.Button("Start", variant="primary")
logs = gr.TextArea(label="Logs", interactive=False, lines=10, autoscroll=True)
btn.click(
fn=run_pipeline,
inputs=[hf_input, ollama_input, hf_token_input, ollama_key_input],
outputs=logs
)
def restart_space():
HfApi().restart_space(repo_id=HOST_REPO, token=HF_TOKEN, factory_reboot=True)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860, theme=blurple)