From 2464c7b394c9a0f960951e1ecb90cd3c81061cd5 Mon Sep 17 00:00:00 2001 From: Peter Nagymathe Date: Sun, 10 Dec 2023 19:21:46 +0000 Subject: [PATCH 1/6] multistage dockerfile for building for cuda --- cuda.Dockerfile | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 cuda.Dockerfile diff --git a/cuda.Dockerfile b/cuda.Dockerfile new file mode 100644 index 0000000000..f4188fcbca --- /dev/null +++ b/cuda.Dockerfile @@ -0,0 +1,43 @@ +ARG CUDA_VERSION="12.1.1" +ARG OS="ubuntu22.04" + +ARG CUDA_BUILDER_IMAGE="${CUDA_VERSION}-devel-${OS}" +ARG CUDA_RUNTIME_IMAGE="${CUDA_VERSION}-runtime-${OS}" +FROM nvidia/cuda:${CUDA_BUILDER_IMAGE} as builder + +RUN apt-get update && apt-get upgrade -y \ + && apt-get install -y git build-essential \ + python3 python3-pip gcc wget \ + ocl-icd-opencl-dev opencl-headers clinfo \ + libclblast-dev libopenblas-dev \ + && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd + +COPY . . + +# setting build related env vars +ENV CUDA_DOCKER_ARCH=all +ENV LLAMA_CUBLAS=1 + +# Install depencencies +RUN python3 -m pip install --upgrade pip +# RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context + +# Install llama-cpp-python (build with cuda) +RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install -e .[server] +RUN make clean + +FROM nvidia/cuda:${CUDA_RUNTIME_IMAGE} as runtime + +# We need to set the host to 0.0.0.0 to allow outside access +ENV HOST 0.0.0.0 +ENV CUDA_DOCKER_ARCH=all + +RUN apt-get update && apt-get upgrade -y \ + && apt-get install -y python3 python3-pip \ + && apt-get clean + +COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages +COPY . . + +# Run the server +CMD python3 -m llama_cpp.server From d03cc39991b845249aed7f79d4e13c120579afd2 Mon Sep 17 00:00:00 2001 From: Peter Nagymathe Date: Wed, 20 Dec 2023 09:36:16 +0000 Subject: [PATCH 2/6] update cuda.Dockerfile with tested functioning state --- cuda.Dockerfile | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/cuda.Dockerfile b/cuda.Dockerfile index f4188fcbca..0e154e379c 100644 --- a/cuda.Dockerfile +++ b/cuda.Dockerfile @@ -7,11 +7,13 @@ FROM nvidia/cuda:${CUDA_BUILDER_IMAGE} as builder RUN apt-get update && apt-get upgrade -y \ && apt-get install -y git build-essential \ - python3 python3-pip gcc wget \ + python3 python3-pip python3-venv gcc wget \ ocl-icd-opencl-dev opencl-headers clinfo \ libclblast-dev libopenblas-dev \ && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd +WORKDIR /llama_cpp_python + COPY . . # setting build related env vars @@ -20,11 +22,12 @@ ENV LLAMA_CUBLAS=1 # Install depencencies RUN python3 -m pip install --upgrade pip +RUN python3 -m venv venv # RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context # Install llama-cpp-python (build with cuda) -RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install -e .[server] -RUN make clean +RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" venv/bin/pip install .[server] +# RUN make clean FROM nvidia/cuda:${CUDA_RUNTIME_IMAGE} as runtime @@ -33,11 +36,11 @@ ENV HOST 0.0.0.0 ENV CUDA_DOCKER_ARCH=all RUN apt-get update && apt-get upgrade -y \ - && apt-get install -y python3 python3-pip \ - && apt-get clean + && apt-get install -y python3 python3-pip python3-venv -COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages -COPY . . +WORKDIR /llama_cpp_python + +COPY --from=builder /llama_cpp_python/venv venv # Run the server -CMD python3 -m llama_cpp.server +CMD venv/bin/python3 -m llama_cpp.server \ No newline at end of file From 0dbb93e9f04ebefcc2ede71f38e62bdb69379cc1 Mon Sep 17 00:00:00 2001 From: meng-hui Date: Sun, 9 Jun 2024 22:50:41 +0800 Subject: [PATCH 3/6] Update build-docker.yaml --- .github/workflows/build-docker.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml index 4ebe3bb6d7..599e9100a3 100644 --- a/.github/workflows/build-docker.yaml +++ b/.github/workflows/build-docker.yaml @@ -34,13 +34,13 @@ jobs: uses: docker/build-push-action@v5 with: context: . - file: "docker/simple/Dockerfile" + file: "cuda.Dockerfile" push: ${{ startsWith(github.ref, 'refs/tags/') }} pull: true platforms: linux/amd64,linux/arm64 tags: | - ghcr.io/abetlen/llama-cpp-python:latest - ghcr.io/abetlen/llama-cpp-python:${{ github.ref_name }} + ghcr.io/${{ github.repository_owner }}/llama-cpp-python:latest + ghcr.io/${{ github.repository_owner }}/llama-cpp-python:${{ github.ref_name }} build-args: | BUILDKIT_INLINE_CACHE=1 From 18e22974b6525126dbc5c3ed50257cdf71943a71 Mon Sep 17 00:00:00 2001 From: meng-hui Date: Sun, 9 Jun 2024 22:59:02 +0800 Subject: [PATCH 4/6] Update cuda.Dockerfile --- cuda.Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda.Dockerfile b/cuda.Dockerfile index 0e154e379c..9aa3dd731b 100644 --- a/cuda.Dockerfile +++ b/cuda.Dockerfile @@ -26,7 +26,7 @@ RUN python3 -m venv venv # RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context # Install llama-cpp-python (build with cuda) -RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" venv/bin/pip install .[server] +RUN CMAKE_ARGS="-DLLAMA_CUDA=on" venv/bin/pip install .[server] # RUN make clean FROM nvidia/cuda:${CUDA_RUNTIME_IMAGE} as runtime @@ -43,4 +43,4 @@ WORKDIR /llama_cpp_python COPY --from=builder /llama_cpp_python/venv venv # Run the server -CMD venv/bin/python3 -m llama_cpp.server \ No newline at end of file +CMD venv/bin/python3 -m llama_cpp.server From 440ba97f04cf39d408110c68cb815e002f3e6884 Mon Sep 17 00:00:00 2001 From: meng-hui Date: Sun, 9 Jun 2024 23:02:58 +0800 Subject: [PATCH 5/6] Update build-docker.yaml --- .github/workflows/build-docker.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml index 599e9100a3..35e893ace1 100644 --- a/.github/workflows/build-docker.yaml +++ b/.github/workflows/build-docker.yaml @@ -42,6 +42,7 @@ jobs: ghcr.io/${{ github.repository_owner }}/llama-cpp-python:latest ghcr.io/${{ github.repository_owner }}/llama-cpp-python:${{ github.ref_name }} build-args: | + CUDA_VERSION=${{ env.CUDA_VERSION }} BUILDKIT_INLINE_CACHE=1 - name: Publish to GitHub Tag From b0bf88b2ce6ee78c60735060a68f5c6dc68b1c83 Mon Sep 17 00:00:00 2001 From: meng-hui Date: Sun, 9 Jun 2024 23:12:08 +0800 Subject: [PATCH 6/6] Update build-docker.yaml --- .github/workflows/build-docker.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml index 35e893ace1..535e37586c 100644 --- a/.github/workflows/build-docker.yaml +++ b/.github/workflows/build-docker.yaml @@ -42,7 +42,7 @@ jobs: ghcr.io/${{ github.repository_owner }}/llama-cpp-python:latest ghcr.io/${{ github.repository_owner }}/llama-cpp-python:${{ github.ref_name }} build-args: | - CUDA_VERSION=${{ env.CUDA_VERSION }} + CUDA_VERSION=${{ vars.CUDA_VERSION }} BUILDKIT_INLINE_CACHE=1 - name: Publish to GitHub Tag