Spaces:
Sleeping
Sleeping
| FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04 | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| ARG HF_TOKEN | |
| ENV HF_TOKEN=$HF_TOKEN | |
| RUN rm -rf /usr/local/bin /usr/local/lib* || true | |
| RUN ln -s /usr/bin /usr/local/bin && ln -s /usr/lib /usr/local/lib && ln -s /usr/lib /usr/local/lib64 | |
| RUN apt-get update && apt-get install -y | |
| RUN apt-get update && \ | |
| apt-get upgrade -y | |
| RUN apt-get install -y --no-install-recommends --fix-missing \ | |
| git \ | |
| git-lfs \ | |
| wget \ | |
| curl \ | |
| cmake \ | |
| build-essential \ | |
| libssl-dev \ | |
| zlib1g-dev \ | |
| libbz2-dev \ | |
| libreadline-dev \ | |
| libsqlite3-dev \ | |
| libncursesw5-dev \ | |
| xz-utils \ | |
| tk-dev \ | |
| libxml2-dev \ | |
| libxmlsec1-dev \ | |
| libffi-dev \ | |
| golang-go \ | |
| python3 \ | |
| liblzma-dev \ | |
| ffmpeg \ | |
| nvidia-driver-570 \ | |
| python3 \ | |
| python3-pip unzip curl original-awk grep sed zstd | |
| WORKDIR /app | |
| COPY --chown=1000 . /app | |
| RUN mkdir /app -p && chmod 777 /app | |
| # RUN bash instollama.sh # Currently all model types are supported no need to build | |
| RUN curl -fsSL https://ollama.com/install.sh | sh | |
| # RUN cd /app && \ | |
| # git clone --recursive https://github.com/ollama/ollama.git && \ | |
| # cd ollama && \ | |
| # go generate ./... && \ | |
| # go build . && \ | |
| # ln -s $PWD/ollama /usr/bin/ollama && \ | |
| # chmod +x ollama && \ | |
| # cd .. | |
| # RUN cd /app && \ | |
| # git clone --recursive https://github.com/ggerganov/llama.cpp && \ | |
| # cd llama.cpp && \ | |
| # cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=OFF -DLLAMA_CURL=OFF && \ | |
| # cmake --build build --config Release -j --target llama-quantize --parallel 12 && \ | |
| # cp ./build/bin/llama-* /usr/bin/ && \ | |
| # cp convert_hf_to_gguf.py /usr/bin/convert_hf_to_gguf && \ | |
| # rm -rf build && \ | |
| # cd .. | |
| # RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user | |
| # USER 1000 | |
| # ENV HOME=/home/user \ | |
| # PATH=/home/user/.local/bin:${PATH} | |
| WORKDIR /app | |
| # RUN curl https://pyenv.run | bash | |
| # ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH} | |
| # ARG PYTHON_VERSION=3.13 | |
| # RUN pyenv install ${PYTHON_VERSION} && \ | |
| # pyenv global ${PYTHON_VERSION} && \ | |
| # pyenv rehash | |
| RUN pip install --no-cache-dir -U pip setuptools wheel --break-system-packages --ignore-installed | |
| RUN pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=6.5.1" "APScheduler" "protobuf>=4.21.0,<5.0.0" "sentencepiece>=0.1.98,<0.3.0" "numpy~=1.26.4" "gguf>=0.1.0" "fastapi" --break-system-packages --ignore-installed | |
| RUN pip install "torch>=2.8.0" --break-system-packages --ignore-installed | |
| RUN pip install git+https://github.com/huggingface/transformers.git --break-system-packages --ignore-installed | |
| RUN mkdir /tmp/llama && hf download lainlives/llama.cpp --local-dir /tmp/llama && chmod +x /tmp/llama/* && cp /tmp/llama/convert* /app/convert_hf_to_gguf.py && mv /tmp/llama/* /usr/bin/ | |
| ENV PYTHONPATH=${HOME}/app \ | |
| PYTHONUNBUFFERED=1 \ | |
| HF_HUB_ENABLE_HF_TRANSFER=1 \ | |
| GRADIO_ALLOW_FLAGGING=never \ | |
| GRADIO_NUM_PORTS=1 \ | |
| GRADIO_SERVER_NAME=0.0.0.0 \ | |
| GRADIO_ANALYTICS_ENABLED=False \ | |
| TQDM_POSITION=-1 \ | |
| TQDM_MININTERVAL=1 \ | |
| SYSTEM=spaces \ | |
| LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \ | |
| PATH=/usr/local/nvidia/bin:${PATH} | |
| EXPOSE 7860 | |
| ENTRYPOINT python3 /app/app.py |