Spaces:

lainlives
/

safetensors2ollama-repo

Sleeping

App Files Files Community

safetensors2ollama-repo / app.py

lainlives

Upload folder using huggingface_hub

85d4692 verified 4 months ago

Raw

History Blame

9.1 kB

	import os
	import subprocess
	from sys import argv
	from time import strftime, sleep
	import shutil
	from pathlib import Path
	import gradio as gr
	import signal
	from huggingface_hub import snapshot_download, HfApi
	from apscheduler.schedulers.background import BackgroundScheduler
	from theme import blurple

	# Used for restarting the space
	HF_TOKEN = os.environ.get("HF_TOKEN")
	TEST_OKEY = os.environ.get("TEST_OKEY")
	TEST_TOKEN = os.environ.get("HF_TOKEN")
	HOST_REPO = "lainlives/ztestzz"
	LLAMACPP_DIR = Path("./llama.cpp")
	CONVERT_SCRIPT = "/app/convert_hf_to_gguf.py"
	QUANTIZE_BIN = "llama-quantize"
	TARGET_QUANTS = ["Q8_0", "Q6_K", "Q5_K_M", "Q5_K_S", "Q5_0", "Q4_K_M", "Q4_K_S", "Q4_0"]


	def format_log(msg):
	return f"[{strftime('%H:%M:%S')}] {msg}"


	def setup_ollama_keys(private_key_content):
	"""
	Writes the user's private key to ~/.ollama/id_ed25519
	"""
	if not private_key_content:
	return False, "⚠️ No Private Key provided. Pushing will fail."

	ollama_dir = Path(os.path.expanduser("~/.ollama"))
	ollama_dir.mkdir(parents=True, exist_ok=True)

	key_path = ollama_dir / "id_ed25519"
	os.remove(key_path)

	try:
	# Write the key
	with open(key_path, "w") as f:
	f.write(private_key_content.strip())
	os.chmod(key_path, 0o600)

	return True, "🔑 Private Key installed successfully."
	except Exception as e:
	return False, f"❌ Failed to install keys: {e}"


	def push_to_ollama(gguf_path, ollama_repo, tag_suffix):
	ollama_tag = f"{ollama_repo}:{tag_suffix.lower()}"

	# 1. Write the Modelfile to disk
	# The CLI needs a physical file to point to with the '-f' flag
	modelfile_path = gguf_path.parent / "Modelfile"
	with open(modelfile_path, "w") as f:
	f.write(f"FROM {gguf_path.resolve()}")

	logs = []
	logs.append(format_log(f"🐳 Creating Ollama build: {ollama_tag}"))

	try:
	create_cmd = ["ollama", "create", ollama_tag, "-f", str(modelfile_path)]
	subprocess.run(create_cmd, check=True, capture_output=True)

	if modelfile_path.exists():
	os.remove(modelfile_path)

	logs.append(format_log(f"⬆️ Pushing to registry: {ollama_tag}..."))

	push_cmd = ["ollama", "push", ollama_tag]
	push_result = subprocess.run(push_cmd, capture_output=True, text=True)

	if push_result.returncode == 0:
	logs.append(format_log(f"✅ Successfully pushed {ollama_tag}"))
	else:
	logs.append(format_log(f"❌ Push failed: {push_result.stderr}"))

	# Remove the local tag to save disk space in the container
	subprocess.run(["ollama", "rm", ollama_tag]) # stdout=subprocess.DEVNULL

	except subprocess.CalledProcessError as e:
	# Captures errors from the 'check=True' on create_cmd
	logs.append(format_log(f"❌ Ollama Create Error: {e}"))
	except Exception as e:
	logs.append(format_log(f"❌ Error on {tag_suffix}: {str(e)}"))

	return logs


	def start_ollama_daemon(ollama_key):
	print("⏳ Starting Ollama daemon in background...")
	logs.append(format_log(f"⏳ Starting Ollama daemon in background...\n"))
	env = os.environ.copy()
	# Auth
	success, auth_msg = setup_ollama_keys(ollama_key)
	logs.append(format_log(auth_msg))
	yield "\n".join(logs)
	if not success:
	logs.append(format_log("❌ Stopping: Authentication setup failed."))
	yield "\n".join(logs)
	return
	process = subprocess.Popen(["ollama", "serve"], env=env) # stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
	pid = process.pid
	logs.append(format_log("⏳ Starting Ollama daemon in background..."))
	sleep(2)
	return pid, logs


	def stop_ollama_daemon(pid):
	print("⏳ Stopping Ollama daemon...")
	os.kill(pid, signal.SIGQUIT)
	subprocess.Popen(["pkill", "ollama"]) # , stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
	return logs


	def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
	logs = []

	work_dir = Path("conversion_work_dir")
	download_dir = work_dir / "downloads"
	output_dir = work_dir / "output"

	if work_dir.exists():
	shutil.rmtree(work_dir)
	os.makedirs(download_dir, exist_ok=True)
	os.makedirs(output_dir, exist_ok=True)

	try:
	# Download
	logs.append(format_log(f"⬇️ Downloading {hf_repo}..."))
	yield "\n".join(logs)

	model_path = snapshot_download(
	repo_id=hf_repo,
	local_dir=download_dir,
	token=hf_token if hf_token else None
	)
	logs.append(format_log("✅ Download complete."))
	yield "\n".join(logs)

	# BF16
	bf16_path = output_dir / "model-bf16.gguf"
	logs.append(format_log("⚙️ Converting to BF16..."))
	yield "\n".join(logs)

	cmd = ["python3", str(CONVERT_SCRIPT), str(model_path), "--outtype", "bf16", "--outfile", str(bf16_path)]
	result = subprocess.run(cmd, capture_output=True, text=True)

	if result.returncode == 0:
	logs.extend(push_to_ollama(bf16_path, ollama_repo, "bf16"))
	os.remove(bf16_path)
	logs.append(format_log("🧹 Cleaned up BF16"))
	else:
	logs.append(format_log(f"⚠️ BF16 Conversion failed: {result.stderr}"))
	yield "\n".join(logs)

	# FP16
	fp16_path = output_dir / "model-f16.gguf"
	logs.append(format_log("⚙️ Converting to FP16 (Master)..."))
	yield "\n".join(logs)

	cmd = ["python3", str(CONVERT_SCRIPT), str(model_path), "--outtype", "f16", "--outfile", str(fp16_path)]
	subprocess.run(cmd, check=True, capture_output=True)

	logs.extend(push_to_ollama(fp16_path, ollama_repo, "f16"))
	yield "\n".join(logs)

	# Quant Loop
	for quant in TARGET_QUANTS:
	logs.append(format_log(f"--- {quant} ---"))
	yield "\n".join(logs)

	final_gguf = output_dir / f"model-{quant}.gguf"
	q_cmd = [str(QUANTIZE_BIN), str(fp16_path), str(final_gguf), quant]
	q_result = subprocess.run(q_cmd, capture_output=True, text=True)

	if q_result.returncode != 0:
	logs.append(format_log(f"❌ Quantize failed: {q_result.stderr}"))
	continue

	logs.extend(push_to_ollama(final_gguf, ollama_repo, quant))
	os.remove(final_gguf)
	logs.append(format_log(f"🧹 Cleaned up {quant}"))
	yield "\n".join(logs)

	if fp16_path.exists():
	os.remove(fp16_path)
	logs.append(format_log("🧹 Cleaned up f16"))

	except Exception as e:
	logs.append(format_log(f"❌ CRITICAL ERROR: {str(e)}"))

	finally:
	if work_dir.exists():
	shutil.rmtree(work_dir)
	logs.append(format_log("🏁 Job Done. Workspace cleared."))
	yield "\n".join(logs)


	def run_pipeline(hf_repo, ollama_repo, hf_token, ollama_key, progress=gr.Progress()):
	pid = start_ollama_daemon(ollama_key)
	# We yield from the generator
	for update in run_conversion(hf_repo, ollama_repo, hf_token, progress):
	yield update
	sleep(10)
	stop_ollama_daemon(pid)


	# --- UI ---
	with gr.Blocks(title="HF to Ollama") as demo:
	target_quants_str = ', '.join(str(item) for item in TARGET_QUANTS)
	gr.Markdown("## Convert a safetensor HF repo to an Ollama repo.")
	gr.Markdown(f"This space will generate F16, BF16, {target_quants_str} GGUFs from safetensors.")
	gr.Markdown("And pushes them to your Ollama repo. You will need an Ollama ssh key, not an API key to push.")
	gr.Markdown("Temporarily move yours from ~/.ollama/id_ed25519 This will cause Ollama to generate a new one")
	gr.Markdown("After logging in set it aside, that ssh key can be used, or the old one, whichever, for spaces")

	with gr.Row():
	with gr.Column():
	hf_input = gr.Textbox(label="Source HF Repo", placeholder="unsloth/Qwen3.5-9B", value="unsloth/Qwen3.5-0.8B")
	hf_token_input = gr.Textbox(label="HF Token (for gated models and faster download)", type="password", value=TEST_TOKEN)

	with gr.Column():
	ollama_input = gr.Textbox(label="Destination Ollama Repo", placeholder="user/model", value="fervent_mcclintock/Qwen3.5-9B")
	ollama_key_input = gr.Textbox(label="Ollama Private (ssh) Key", lines=7, type="password", placeholder="-----BEGIN OPENSSH PRIVATE KEY-----...", value=TEST_OKEY)
	btn = gr.Button("Start", variant="primary")

	logs = gr.TextArea(label="Logs", interactive=False, lines=10, autoscroll=True)

	btn.click(
	fn=run_pipeline,
	inputs=[hf_input, ollama_input, hf_token_input, ollama_key_input],
	outputs=logs
	)


	def restart_space():
	HfApi().restart_space(repo_id=HOST_REPO, token=HF_TOKEN, factory_reboot=True)


	scheduler = BackgroundScheduler()
	scheduler.add_job(restart_space, "interval", seconds=21600)
	scheduler.start()

	if __name__ == "__main__":
	demo.queue().launch(server_name="0.0.0.0", server_port=7860, theme=blurple)