Spaces:
Sleeping
Sleeping
| import os | |
| import subprocess | |
| from sys import argv | |
| from time import strftime, sleep | |
| import shutil | |
| from pathlib import Path | |
| import gradio as gr | |
| import signal | |
| from huggingface_hub import snapshot_download, HfApi | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| from theme import blurple | |
| # Used for restarting the space | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| TEST_OKEY = os.environ.get("TEST_OKEY") | |
| TEST_TOKEN = os.environ.get("HF_TOKEN") | |
| HOST_REPO = "lainlives/ztestzz" | |
| LLAMACPP_DIR = Path("./llama.cpp") | |
| CONVERT_SCRIPT = "/app/convert_hf_to_gguf.py" | |
| QUANTIZE_BIN = "llama-quantize" | |
| TARGET_QUANTS = ["Q8_0", "Q6_K", "Q5_K_M", "Q5_K_S", "Q5_0", "Q4_K_M", "Q4_K_S", "Q4_0"] | |
| def format_log(msg): | |
| return f"[{strftime('%H:%M:%S')}] {msg}" | |
| def setup_ollama_keys(private_key_content): | |
| """ | |
| Writes the user's private key to ~/.ollama/id_ed25519 | |
| """ | |
| if not private_key_content: | |
| return False, "⚠️ No Private Key provided. Pushing will fail." | |
| ollama_dir = Path(os.path.expanduser("~/.ollama")) | |
| ollama_dir.mkdir(parents=True, exist_ok=True) | |
| key_path = ollama_dir / "id_ed25519" | |
| os.remove(key_path) | |
| try: | |
| # Write the key | |
| with open(key_path, "w") as f: | |
| f.write(private_key_content.strip()) | |
| os.chmod(key_path, 0o600) | |
| return True, "🔑 Private Key installed successfully." | |
| except Exception as e: | |
| return False, f"❌ Failed to install keys: {e}" | |
| def push_to_ollama(gguf_path, ollama_repo, tag_suffix): | |
| ollama_tag = f"{ollama_repo}:{tag_suffix.lower()}" | |
| # 1. Write the Modelfile to disk | |
| # The CLI needs a physical file to point to with the '-f' flag | |
| modelfile_path = gguf_path.parent / "Modelfile" | |
| with open(modelfile_path, "w") as f: | |
| f.write(f"FROM {gguf_path.resolve()}") | |
| logs = [] | |
| logs.append(format_log(f"🐳 Creating Ollama build: {ollama_tag}")) | |
| try: | |
| create_cmd = ["ollama", "create", ollama_tag, "-f", str(modelfile_path)] | |
| subprocess.run(create_cmd, check=True, capture_output=True) | |
| if modelfile_path.exists(): | |
| os.remove(modelfile_path) | |
| logs.append(format_log(f"⬆️ Pushing to registry: {ollama_tag}...")) | |
| push_cmd = ["ollama", "push", ollama_tag] | |
| push_result = subprocess.run(push_cmd, capture_output=True, text=True) | |
| if push_result.returncode == 0: | |
| logs.append(format_log(f"✅ Successfully pushed {ollama_tag}")) | |
| else: | |
| logs.append(format_log(f"❌ Push failed: {push_result.stderr}")) | |
| # Remove the local tag to save disk space in the container | |
| subprocess.run(["ollama", "rm", ollama_tag]) # stdout=subprocess.DEVNULL | |
| except subprocess.CalledProcessError as e: | |
| # Captures errors from the 'check=True' on create_cmd | |
| logs.append(format_log(f"❌ Ollama Create Error: {e}")) | |
| except Exception as e: | |
| logs.append(format_log(f"❌ Error on {tag_suffix}: {str(e)}")) | |
| return logs | |
| def start_ollama_daemon(ollama_key): | |
| print("⏳ Starting Ollama daemon in background...") | |
| logs.append(format_log(f"⏳ Starting Ollama daemon in background...\n")) | |
| env = os.environ.copy() | |
| # Auth | |
| success, auth_msg = setup_ollama_keys(ollama_key) | |
| logs.append(format_log(auth_msg)) | |
| yield "\n".join(logs) | |
| if not success: | |
| logs.append(format_log("❌ Stopping: Authentication setup failed.")) | |
| yield "\n".join(logs) | |
| return | |
| process = subprocess.Popen(["ollama", "serve"], env=env) # stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL | |
| pid = process.pid | |
| logs.append(format_log("⏳ Starting Ollama daemon in background...")) | |
| sleep(2) | |
| return pid, logs | |
| def stop_ollama_daemon(pid): | |
| print("⏳ Stopping Ollama daemon...") | |
| os.kill(pid, signal.SIGQUIT) | |
| subprocess.Popen(["pkill", "ollama"]) # , stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL | |
| return logs | |
| def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()): | |
| logs = [] | |
| work_dir = Path("conversion_work_dir") | |
| download_dir = work_dir / "downloads" | |
| output_dir = work_dir / "output" | |
| if work_dir.exists(): | |
| shutil.rmtree(work_dir) | |
| os.makedirs(download_dir, exist_ok=True) | |
| os.makedirs(output_dir, exist_ok=True) | |
| try: | |
| # Download | |
| logs.append(format_log(f"⬇️ Downloading {hf_repo}...")) | |
| yield "\n".join(logs) | |
| model_path = snapshot_download( | |
| repo_id=hf_repo, | |
| local_dir=download_dir, | |
| token=hf_token if hf_token else None | |
| ) | |
| logs.append(format_log("✅ Download complete.")) | |
| yield "\n".join(logs) | |
| # BF16 | |
| bf16_path = output_dir / "model-bf16.gguf" | |
| logs.append(format_log("⚙️ Converting to BF16...")) | |
| yield "\n".join(logs) | |
| cmd = ["python3", str(CONVERT_SCRIPT), str(model_path), "--outtype", "bf16", "--outfile", str(bf16_path)] | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode == 0: | |
| logs.extend(push_to_ollama(bf16_path, ollama_repo, "bf16")) | |
| os.remove(bf16_path) | |
| logs.append(format_log("🧹 Cleaned up BF16")) | |
| else: | |
| logs.append(format_log(f"⚠️ BF16 Conversion failed: {result.stderr}")) | |
| yield "\n".join(logs) | |
| # FP16 | |
| fp16_path = output_dir / "model-f16.gguf" | |
| logs.append(format_log("⚙️ Converting to FP16 (Master)...")) | |
| yield "\n".join(logs) | |
| cmd = ["python3", str(CONVERT_SCRIPT), str(model_path), "--outtype", "f16", "--outfile", str(fp16_path)] | |
| subprocess.run(cmd, check=True, capture_output=True) | |
| logs.extend(push_to_ollama(fp16_path, ollama_repo, "f16")) | |
| yield "\n".join(logs) | |
| # Quant Loop | |
| for quant in TARGET_QUANTS: | |
| logs.append(format_log(f"--- {quant} ---")) | |
| yield "\n".join(logs) | |
| final_gguf = output_dir / f"model-{quant}.gguf" | |
| q_cmd = [str(QUANTIZE_BIN), str(fp16_path), str(final_gguf), quant] | |
| q_result = subprocess.run(q_cmd, capture_output=True, text=True) | |
| if q_result.returncode != 0: | |
| logs.append(format_log(f"❌ Quantize failed: {q_result.stderr}")) | |
| continue | |
| logs.extend(push_to_ollama(final_gguf, ollama_repo, quant)) | |
| os.remove(final_gguf) | |
| logs.append(format_log(f"🧹 Cleaned up {quant}")) | |
| yield "\n".join(logs) | |
| if fp16_path.exists(): | |
| os.remove(fp16_path) | |
| logs.append(format_log("🧹 Cleaned up f16")) | |
| except Exception as e: | |
| logs.append(format_log(f"❌ CRITICAL ERROR: {str(e)}")) | |
| finally: | |
| if work_dir.exists(): | |
| shutil.rmtree(work_dir) | |
| logs.append(format_log("🏁 Job Done. Workspace cleared.")) | |
| yield "\n".join(logs) | |
| def run_pipeline(hf_repo, ollama_repo, hf_token, ollama_key, progress=gr.Progress()): | |
| pid = start_ollama_daemon(ollama_key) | |
| # We yield from the generator | |
| for update in run_conversion(hf_repo, ollama_repo, hf_token, progress): | |
| yield update | |
| sleep(10) | |
| stop_ollama_daemon(pid) | |
| # --- UI --- | |
| with gr.Blocks(title="HF to Ollama") as demo: | |
| target_quants_str = ', '.join(str(item) for item in TARGET_QUANTS) | |
| gr.Markdown("## Convert a safetensor HF repo to an Ollama repo.") | |
| gr.Markdown(f"This space will generate F16, BF16, {target_quants_str} GGUFs from safetensors.") | |
| gr.Markdown("And pushes them to your Ollama repo. You will need an Ollama ssh key, not an API key to push.") | |
| gr.Markdown("Temporarily move yours from ~/.ollama/id_ed25519 This will cause Ollama to generate a new one") | |
| gr.Markdown("After logging in set it aside, that ssh key can be used, or the old one, whichever, for spaces") | |
| with gr.Row(): | |
| with gr.Column(): | |
| hf_input = gr.Textbox(label="Source HF Repo", placeholder="unsloth/Qwen3.5-9B", value="unsloth/Qwen3.5-0.8B") | |
| hf_token_input = gr.Textbox(label="HF Token (for gated models and faster download)", type="password", value=TEST_TOKEN) | |
| with gr.Column(): | |
| ollama_input = gr.Textbox(label="Destination Ollama Repo", placeholder="user/model", value="fervent_mcclintock/Qwen3.5-9B") | |
| ollama_key_input = gr.Textbox(label="Ollama Private (ssh) Key", lines=7, type="password", placeholder="-----BEGIN OPENSSH PRIVATE KEY-----...", value=TEST_OKEY) | |
| btn = gr.Button("Start", variant="primary") | |
| logs = gr.TextArea(label="Logs", interactive=False, lines=10, autoscroll=True) | |
| btn.click( | |
| fn=run_pipeline, | |
| inputs=[hf_input, ollama_input, hf_token_input, ollama_key_input], | |
| outputs=logs | |
| ) | |
| def restart_space(): | |
| HfApi().restart_space(repo_id=HOST_REPO, token=HF_TOKEN, factory_reboot=True) | |
| scheduler = BackgroundScheduler() | |
| scheduler.add_job(restart_space, "interval", seconds=21600) | |
| scheduler.start() | |
| if __name__ == "__main__": | |
| demo.queue().launch(server_name="0.0.0.0", server_port=7860, theme=blurple) |