my_pi_agent

Runtime error

AlexWortega commited on May 26

Commit

a43ad73

verified ·

1 Parent(s): e51ba55

Fix ZeroGPU: preload CUDA runtime libs before llama_cpp import

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,6 +1,42 @@
 import os
 import re
 import gradio as gr
 import spaces
 from huggingface_hub import hf_hub_download

 import os
 import re
+# --- Preload CUDA runtime libs before importing llama_cpp ---
+# The cu124 llama-cpp-python wheel's libllama.so needs libcudart.so.12 /
+# libcublas at import time. On ZeroGPU those aren't on the default loader
+# path, so we dlopen the pip-provided nvidia libs (cudart first) globally.
+import ctypes
+import glob
+import site
+def _preload_cuda():
+    bases = set(site.getsitepackages())
+    try:
+        bases.add(site.getusersitepackages())
+    except Exception:
+        pass
+    libs = []
+    for base in bases:
+        libs += glob.glob(os.path.join(base, "nvidia", "*", "lib", "*.so*"))
+    # cudart must load before cublas (cublas depends on it)
+    priority = {"cuda_runtime": 0, "cublas": 1}
+    def _key(p):
+        for name, rank in priority.items():
+            if name in p:
+                return rank
+        return 2
+    for so in sorted(set(libs), key=_key):
+        try:
+            ctypes.CDLL(so, mode=ctypes.RTLD_GLOBAL)
+        except OSError:
+            pass
+_preload_cuda()
 import gradio as gr
 import spaces
 from huggingface_hub import hf_hub_download

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124
 llama-cpp-python==0.3.23
 huggingface_hub>=0.25
 spaces

 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124
 llama-cpp-python==0.3.23
+nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cublas-cu12==12.4.5.8
 huggingface_hub>=0.25
 spaces