AlexWortega commited on
Commit
a43ad73
·
verified ·
1 Parent(s): e51ba55

Fix ZeroGPU: preload CUDA runtime libs before llama_cpp import

Browse files
Files changed (2) hide show
  1. app.py +36 -0
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,6 +1,42 @@
1
  import os
2
  import re
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import gradio as gr
5
  import spaces
6
  from huggingface_hub import hf_hub_download
 
1
  import os
2
  import re
3
 
4
+ # --- Preload CUDA runtime libs before importing llama_cpp ---
5
+ # The cu124 llama-cpp-python wheel's libllama.so needs libcudart.so.12 /
6
+ # libcublas at import time. On ZeroGPU those aren't on the default loader
7
+ # path, so we dlopen the pip-provided nvidia libs (cudart first) globally.
8
+ import ctypes
9
+ import glob
10
+ import site
11
+
12
+
13
+ def _preload_cuda():
14
+ bases = set(site.getsitepackages())
15
+ try:
16
+ bases.add(site.getusersitepackages())
17
+ except Exception:
18
+ pass
19
+ libs = []
20
+ for base in bases:
21
+ libs += glob.glob(os.path.join(base, "nvidia", "*", "lib", "*.so*"))
22
+ # cudart must load before cublas (cublas depends on it)
23
+ priority = {"cuda_runtime": 0, "cublas": 1}
24
+
25
+ def _key(p):
26
+ for name, rank in priority.items():
27
+ if name in p:
28
+ return rank
29
+ return 2
30
+
31
+ for so in sorted(set(libs), key=_key):
32
+ try:
33
+ ctypes.CDLL(so, mode=ctypes.RTLD_GLOBAL)
34
+ except OSError:
35
+ pass
36
+
37
+
38
+ _preload_cuda()
39
+
40
  import gradio as gr
41
  import spaces
42
  from huggingface_hub import hf_hub_download
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124
2
  llama-cpp-python==0.3.23
 
 
3
  huggingface_hub>=0.25
4
  spaces
 
1
  --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124
2
  llama-cpp-python==0.3.23
3
+ nvidia-cuda-runtime-cu12==12.4.127
4
+ nvidia-cublas-cu12==12.4.5.8
5
  huggingface_hub>=0.25
6
  spaces