Spaces:

lainlives
/

safetensors2ollama-repo

Sleeping

App Files Files Community

lainlives commited on Mar 8

Commit

85d4692

verified ·

1 Parent(s): 1553273

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

Dockerfile +102 -0
README.md +7 -9
app.py +249 -0
theme.py +96 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,102 @@

+FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04
+ENV DEBIAN_FRONTEND=noninteractive
+ARG HF_TOKEN
+ENV HF_TOKEN=$HF_TOKEN
+RUN rm -rf /usr/local/bin /usr/local/lib* || true
+RUN ln -s /usr/bin /usr/local/bin && ln -s /usr/lib /usr/local/lib && ln -s /usr/lib /usr/local/lib64
+RUN apt-get update && apt-get install -y
+RUN apt-get update && \
+    apt-get upgrade -y
+RUN apt-get install -y --no-install-recommends --fix-missing \
+    git \
+    git-lfs \
+    wget \
+    curl \
+    cmake \
+    build-essential \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    golang-go \
+    python3 \
+    liblzma-dev \
+    ffmpeg \
+    nvidia-driver-570 \
+    python3 \
+    python3-pip unzip curl original-awk grep sed zstd
+WORKDIR /app
+COPY --chown=1000 . /app
+RUN mkdir /app -p && chmod 777 /app
+# RUN bash instollama.sh  # Currently all model types are supported no need to build
+RUN curl -fsSL https://ollama.com/install.sh | sh
+# RUN cd /app && \
+#     git clone --recursive https://github.com/ollama/ollama.git && \
+#     cd ollama && \
+#     go generate ./... && \
+#     go build . && \
+#     ln -s $PWD/ollama /usr/bin/ollama  && \
+#     chmod +x ollama && \
+#     cd ..
+# RUN cd /app && \
+#     git clone --recursive https://github.com/ggerganov/llama.cpp && \
+#     cd llama.cpp && \
+#     cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=OFF -DLLAMA_CURL=OFF && \
+#     cmake --build build --config Release -j --target llama-quantize --parallel 12 && \
+#     cp ./build/bin/llama-* /usr/bin/ && \
+#     cp convert_hf_to_gguf.py /usr/bin/convert_hf_to_gguf && \
+#     rm -rf build && \
+#     cd ..
+# RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
+# USER 1000
+# ENV HOME=/home/user \
+#     PATH=/home/user/.local/bin:${PATH}
+WORKDIR /app
+# RUN curl https://pyenv.run | bash
+# ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
+# ARG PYTHON_VERSION=3.13
+# RUN pyenv install ${PYTHON_VERSION} && \
+#     pyenv global ${PYTHON_VERSION} && \
+#     pyenv rehash
+RUN pip install --no-cache-dir -U pip setuptools wheel --break-system-packages --ignore-installed
+RUN pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=6.5.1" "APScheduler" "protobuf>=4.21.0,<5.0.0" "sentencepiece>=0.1.98,<0.3.0" "numpy~=1.26.4" "gguf>=0.1.0" "fastapi" --break-system-packages --ignore-installed
+RUN pip install "torch>=2.8.0"  --break-system-packages --ignore-installed
+RUN pip install git+https://github.com/huggingface/transformers.git --break-system-packages --ignore-installed
+RUN mkdir /tmp/llama && hf download lainlives/llama.cpp --local-dir /tmp/llama && chmod +x /tmp/llama/* && cp /tmp/llama/convert* /app/convert_hf_to_gguf.py && mv /tmp/llama/* /usr/bin/
+ENV PYTHONPATH=${HOME}/app \
+    PYTHONUNBUFFERED=1 \
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_ANALYTICS_ENABLED=False \
+    TQDM_POSITION=-1 \
+    TQDM_MININTERVAL=1 \
+    SYSTEM=spaces \
+    LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \
+    PATH=/usr/local/nvidia/bin:${PATH}
+EXPOSE 7860
+ENTRYPOINT python3 /app/app.py

README.md CHANGED Viewed

@@ -1,12 +1,10 @@
 ---
-title: Safetensors2ollama Repo
-emoji: 🌍
-colorFrom: indigo
-colorTo: purple
-sdk: gradio
-sdk_version: 6.9.0
-app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: HF to Ollama-currently indev
+emoji: 📈
+colorFrom: gray
+colorTo: pink
+sdk: docker
 pinned: false
+suggested_hardware: "a10g-large"
+disable_embedding: true
 ---

app.py ADDED Viewed

	@@ -0,0 +1,249 @@

+import os
+import subprocess
+from sys import argv
+from time import strftime, sleep
+import shutil
+from pathlib import Path
+import gradio as gr
+import signal
+from huggingface_hub import snapshot_download, HfApi
+from apscheduler.schedulers.background import BackgroundScheduler
+from theme import blurple
+# Used for restarting the space
+HF_TOKEN = os.environ.get("HF_TOKEN")
+TEST_OKEY = os.environ.get("TEST_OKEY")
+TEST_TOKEN = os.environ.get("HF_TOKEN")
+HOST_REPO = "lainlives/ztestzz"
+LLAMACPP_DIR = Path("./llama.cpp")
+CONVERT_SCRIPT = "/app/convert_hf_to_gguf.py"
+QUANTIZE_BIN = "llama-quantize"
+TARGET_QUANTS = ["Q8_0", "Q6_K", "Q5_K_M", "Q5_K_S", "Q5_0", "Q4_K_M", "Q4_K_S", "Q4_0"]
+def format_log(msg):
+    return f"[{strftime('%H:%M:%S')}] {msg}"
+def setup_ollama_keys(private_key_content):
+    """
+    Writes the user's private key to ~/.ollama/id_ed25519
+    """
+    if not private_key_content:
+        return False, "⚠️ No Private Key provided. Pushing will fail."
+    ollama_dir = Path(os.path.expanduser("~/.ollama"))
+    ollama_dir.mkdir(parents=True, exist_ok=True)
+    key_path = ollama_dir / "id_ed25519"
+    os.remove(key_path)
+    try:
+        # Write the key
+        with open(key_path, "w") as f:
+            f.write(private_key_content.strip())
+        os.chmod(key_path, 0o600)
+        return True, "🔑 Private Key installed successfully."
+    except Exception as e:
+        return False, f"❌ Failed to install keys: {e}"
+def push_to_ollama(gguf_path, ollama_repo, tag_suffix):
+    ollama_tag = f"{ollama_repo}:{tag_suffix.lower()}"
+    # 1. Write the Modelfile to disk
+    # The CLI needs a physical file to point to with the '-f' flag
+    modelfile_path = gguf_path.parent / "Modelfile"
+    with open(modelfile_path, "w") as f:
+        f.write(f"FROM {gguf_path.resolve()}")
+    logs = []
+    logs.append(format_log(f"🐳 Creating Ollama build: {ollama_tag}"))
+    try:
+        create_cmd = ["ollama", "create", ollama_tag, "-f", str(modelfile_path)]
+        subprocess.run(create_cmd, check=True, capture_output=True)
+        if modelfile_path.exists():
+            os.remove(modelfile_path)
+        logs.append(format_log(f"⬆️ Pushing to registry: {ollama_tag}..."))
+        push_cmd = ["ollama", "push", ollama_tag]
+        push_result = subprocess.run(push_cmd, capture_output=True, text=True)
+        if push_result.returncode == 0:
+            logs.append(format_log(f"✅ Successfully pushed {ollama_tag}"))
+        else:
+            logs.append(format_log(f"❌ Push failed: {push_result.stderr}"))
+        # Remove the local tag to save disk space in the container
+        subprocess.run(["ollama", "rm", ollama_tag])  # stdout=subprocess.DEVNULL
+    except subprocess.CalledProcessError as e:
+        # Captures errors from the 'check=True' on create_cmd
+        logs.append(format_log(f"❌ Ollama Create Error: {e}"))
+    except Exception as e:
+        logs.append(format_log(f"❌ Error on {tag_suffix}: {str(e)}"))
+    return logs
+def start_ollama_daemon(ollama_key):
+    print("⏳ Starting Ollama daemon in background...")
+    logs.append(format_log(f"⏳ Starting Ollama daemon in background...\n"))
+    env = os.environ.copy()
+    # Auth
+    success, auth_msg = setup_ollama_keys(ollama_key)
+    logs.append(format_log(auth_msg))
+    yield "\n".join(logs)
+    if not success:
+        logs.append(format_log("❌ Stopping: Authentication setup failed."))
+        yield "\n".join(logs)
+        return
+    process = subprocess.Popen(["ollama", "serve"], env=env)  # stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+    pid = process.pid
+    logs.append(format_log("⏳ Starting Ollama daemon in background..."))
+    sleep(2)
+    return pid, logs
+def stop_ollama_daemon(pid):
+    print("⏳ Stopping Ollama daemon...")
+    os.kill(pid, signal.SIGQUIT)
+    subprocess.Popen(["pkill", "ollama"])  # , stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+    return logs
+def run_conversion(hf_repo, ollama_repo, hf_token, progress=gr.Progress()):
+    logs = []
+    work_dir = Path("conversion_work_dir")
+    download_dir = work_dir / "downloads"
+    output_dir = work_dir / "output"
+    if work_dir.exists():
+        shutil.rmtree(work_dir)
+    os.makedirs(download_dir, exist_ok=True)
+    os.makedirs(output_dir, exist_ok=True)
+    try:
+        # Download
+        logs.append(format_log(f"⬇️ Downloading {hf_repo}..."))
+        yield "\n".join(logs)
+        model_path = snapshot_download(
+            repo_id=hf_repo,
+            local_dir=download_dir,
+            token=hf_token if hf_token else None
+        )
+        logs.append(format_log("✅ Download complete."))
+        yield "\n".join(logs)
+        # BF16
+        bf16_path = output_dir / "model-bf16.gguf"
+        logs.append(format_log("⚙️ Converting to BF16..."))
+        yield "\n".join(logs)
+        cmd = ["python3", str(CONVERT_SCRIPT), str(model_path), "--outtype", "bf16", "--outfile", str(bf16_path)]
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        if result.returncode == 0:
+            logs.extend(push_to_ollama(bf16_path, ollama_repo, "bf16"))
+            os.remove(bf16_path)
+            logs.append(format_log("🧹 Cleaned up BF16"))
+        else:
+            logs.append(format_log(f"⚠️ BF16 Conversion failed: {result.stderr}"))
+        yield "\n".join(logs)
+        # FP16
+        fp16_path = output_dir / "model-f16.gguf"
+        logs.append(format_log("⚙️ Converting to FP16 (Master)..."))
+        yield "\n".join(logs)
+        cmd = ["python3", str(CONVERT_SCRIPT), str(model_path), "--outtype", "f16", "--outfile", str(fp16_path)]
+        subprocess.run(cmd, check=True, capture_output=True)
+        logs.extend(push_to_ollama(fp16_path, ollama_repo, "f16"))
+        yield "\n".join(logs)
+        # Quant Loop
+        for quant in TARGET_QUANTS:
+            logs.append(format_log(f"--- {quant} ---"))
+            yield "\n".join(logs)
+            final_gguf = output_dir / f"model-{quant}.gguf"
+            q_cmd = [str(QUANTIZE_BIN), str(fp16_path), str(final_gguf), quant]
+            q_result = subprocess.run(q_cmd, capture_output=True, text=True)
+            if q_result.returncode != 0:
+                logs.append(format_log(f"❌ Quantize failed: {q_result.stderr}"))
+                continue
+            logs.extend(push_to_ollama(final_gguf, ollama_repo, quant))
+            os.remove(final_gguf)
+            logs.append(format_log(f"🧹 Cleaned up {quant}"))
+            yield "\n".join(logs)
+        if fp16_path.exists():
+            os.remove(fp16_path)
+        logs.append(format_log("🧹 Cleaned up f16"))
+    except Exception as e:
+        logs.append(format_log(f"❌ CRITICAL ERROR: {str(e)}"))
+    finally:
+        if work_dir.exists():
+            shutil.rmtree(work_dir)
+        logs.append(format_log("🏁 Job Done. Workspace cleared."))
+        yield "\n".join(logs)
+def run_pipeline(hf_repo, ollama_repo, hf_token, ollama_key, progress=gr.Progress()):
+    pid = start_ollama_daemon(ollama_key)
+    # We yield from the generator
+    for update in run_conversion(hf_repo, ollama_repo, hf_token, progress):
+        yield update
+    sleep(10)
+    stop_ollama_daemon(pid)
+# --- UI ---
+with gr.Blocks(title="HF to Ollama") as demo:
+    target_quants_str = ', '.join(str(item) for item in TARGET_QUANTS)
+    gr.Markdown("## Convert a safetensor HF repo to an Ollama repo.")
+    gr.Markdown(f"This space will generate F16, BF16, {target_quants_str} GGUFs from safetensors.")
+    gr.Markdown("And pushes them to your Ollama repo. You will need an Ollama ssh key, not an API key to push.")
+    gr.Markdown("Temporarily move yours from ~/.ollama/id_ed25519  This will cause Ollama to generate a new one")
+    gr.Markdown("After logging in set it aside, that ssh key can be used, or the old one, whichever, for spaces")
+    with gr.Row():
+        with gr.Column():
+            hf_input = gr.Textbox(label="Source HF Repo", placeholder="unsloth/Qwen3.5-9B", value="unsloth/Qwen3.5-0.8B")
+            hf_token_input = gr.Textbox(label="HF Token (for gated models and faster download)", type="password", value=TEST_TOKEN)
+        with gr.Column():
+            ollama_input = gr.Textbox(label="Destination Ollama Repo", placeholder="user/model",  value="fervent_mcclintock/Qwen3.5-9B")
+            ollama_key_input = gr.Textbox(label="Ollama Private (ssh) Key", lines=7, type="password", placeholder="-----BEGIN OPENSSH PRIVATE KEY-----...", value=TEST_OKEY)
+    btn = gr.Button("Start", variant="primary")
+    logs = gr.TextArea(label="Logs", interactive=False, lines=10, autoscroll=True)
+    btn.click(
+        fn=run_pipeline,
+        inputs=[hf_input, ollama_input, hf_token_input, ollama_key_input],
+        outputs=logs
+    )
+def restart_space():
+    HfApi().restart_space(repo_id=HOST_REPO, token=HF_TOKEN, factory_reboot=True)
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", seconds=21600)
+scheduler.start()
+if __name__ == "__main__":
+    demo.queue().launch(server_name="0.0.0.0", server_port=7860, theme=blurple)

theme.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import gradio as gr
+blurple = gr.themes.Glass(
+    primary_hue="purple",
+    secondary_hue=gr.themes.Color(
+        c100="#f3f4f6",
+        c200="#e5e7eb",
+        c300="#d1d5db",
+        c400="#9ca3af",
+        c50="#f9fafb",
+        c500="#6b7280",
+        c600="#4b5563",
+        c700="#374151",
+        c800="rgba(28.422987236857917, 2.1975645867375784, 39.326663208007815, 1)",
+        c900="#1c0227",
+        c950="#1c0227",
+    ),
+    neutral_hue=gr.themes.Color(
+        c100="#f3e8ff",
+        c200="#e9d5ff",
+        c300="#d8b4fe",
+        c400="#c084fc",
+        c50="#faf5ff",
+        c500="#a855f7",
+        c600="rgba(83.78266724809674, 29.540070278324272, 132.9400207519531, 1)",
+        c700="rgba(48.28126126334004, 17.30792685680411, 76.3866943359375, 1)",
+        c800="rgba(46.03751121625044, 13.894996526550633, 72.53336791992187, 1)",
+        c900="#2e0e49",
+        c950="#2e0e49",
+    ),
+    radius_size="none",
+).set(
+    background_fill_primary="*neutral_700",
+    background_fill_secondary="*secondary_800",
+    border_color_accent="*neutral_600",
+    border_color_primary="*secondary_600",
+    color_accent_soft="*neutral_700",
+    link_text_color="*secondary_500",
+    link_text_color_active="*secondary_500",
+    link_text_color_hover="*secondary_400",
+    link_text_color_visited="*secondary_600",
+    code_background_fill="*neutral_800",
+    shadow_spread_dark="0px",
+    block_background_fill="*secondary_800",
+    block_border_color="*secondary_600",
+    block_border_width="1px",
+    block_label_background_fill="*secondary_700",
+    block_label_border_color="*secondary_600",
+    block_label_text_color="*neutral_200",
+    block_label_text_size="*text_sm",
+    block_title_text_color="*neutral_200",
+    checkbox_background_color="*secondary_400",
+    checkbox_border_color="*neutral_700",
+    checkbox_border_color_hover="*neutral_600",
+    checkbox_label_border_color="*secondary_700",
+    checkbox_label_gap="*form_gap_width",
+    error_background_fill="*background_fill_primary",
+    error_border_color="#ef4444",
+    error_text_color="#fef2f2",
+    error_icon_color="#ef4444",
+    input_background_fill="*secondary_600",
+    input_border_color="*secondary_600",
+    input_border_color_focus="*secondary_500",
+    input_placeholder_color="*neutral_500",
+    input_radius="*radius_xxs",
+    stat_background_fill="*primary_500",
+    table_border_color="*neutral_700",
+    table_even_background_fill="*neutral_700",
+    table_odd_background_fill="*neutral_700",
+    button_border_width="0px",
+    button_border_width_dark="0px",
+    button_transition="all 0.5s ease",
+    button_large_text_weight="500",
+    button_medium_text_weight="500",
+    button_primary_background_fill="linear-gradient(30deg, *primary_800 0%, *primary_950 50%)",
+    button_primary_background_fill_dark="linear-gradient(30deg, *primary_800 0%, *primary_950 50%)",
+    button_primary_background_fill_hover="linear-gradient(90deg, *primary_950 0%, *primary_700 60%)",
+    button_primary_background_fill_hover_dark="linear-gradient(90deg, *primary_950 0%, *primary_700 60%)",
+    button_primary_border_color="*primary_600",
+    button_primary_border_color_hover="*primary_500",
+    button_primary_text_color="white",
+    button_primary_text_color_hover="*code_background_fill",
+    button_primary_text_color_hover_dark="*code_background_fill",
+    button_primary_shadow="*button_primary_shadow",
+    button_primary_shadow_active="*button_primary_shadow",
+    button_secondary_background_fill="linear-gradient(100deg, *primary_950 0%, *primary_600 70%)",
+    button_secondary_background_fill_dark="linear-gradient(100deg, *primary_950 0%, *primary_600 70%)",
+    button_secondary_background_fill_hover="linear-gradient(90deg, *primary_700 0%, *primary_950 60%)",
+    button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_700 0%, *primary_950 60%)",
+    button_secondary_border_color="*neutral_600",
+    button_secondary_border_color_hover="*neutral_500",
+    button_secondary_text_color="white",
+    button_secondary_text_color_hover="*table_even_background_fill",
+    button_secondary_text_color_hover_dark="*table_even_background_fill",
+    button_cancel_text_color_hover="white",
+)