diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml index 4ebe3bb6d7..535e37586c 100644 --- a/.github/workflows/build-docker.yaml +++ b/.github/workflows/build-docker.yaml @@ -34,14 +34,15 @@ jobs: uses: docker/build-push-action@v5 with: context: . - file: "docker/simple/Dockerfile" + file: "cuda.Dockerfile" push: ${{ startsWith(github.ref, 'refs/tags/') }} pull: true platforms: linux/amd64,linux/arm64 tags: | - ghcr.io/abetlen/llama-cpp-python:latest - ghcr.io/abetlen/llama-cpp-python:${{ github.ref_name }} + ghcr.io/${{ github.repository_owner }}/llama-cpp-python:latest + ghcr.io/${{ github.repository_owner }}/llama-cpp-python:${{ github.ref_name }} build-args: | + CUDA_VERSION=${{ vars.CUDA_VERSION }} BUILDKIT_INLINE_CACHE=1 - name: Publish to GitHub Tag diff --git a/cuda.Dockerfile b/cuda.Dockerfile new file mode 100644 index 0000000000..9aa3dd731b --- /dev/null +++ b/cuda.Dockerfile @@ -0,0 +1,46 @@ +ARG CUDA_VERSION="12.1.1" +ARG OS="ubuntu22.04" + +ARG CUDA_BUILDER_IMAGE="${CUDA_VERSION}-devel-${OS}" +ARG CUDA_RUNTIME_IMAGE="${CUDA_VERSION}-runtime-${OS}" +FROM nvidia/cuda:${CUDA_BUILDER_IMAGE} as builder + +RUN apt-get update && apt-get upgrade -y \ + && apt-get install -y git build-essential \ + python3 python3-pip python3-venv gcc wget \ + ocl-icd-opencl-dev opencl-headers clinfo \ + libclblast-dev libopenblas-dev \ + && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd + +WORKDIR /llama_cpp_python + +COPY . . + +# setting build related env vars +ENV CUDA_DOCKER_ARCH=all +ENV LLAMA_CUBLAS=1 + +# Install depencencies +RUN python3 -m pip install --upgrade pip +RUN python3 -m venv venv +# RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context + +# Install llama-cpp-python (build with cuda) +RUN CMAKE_ARGS="-DLLAMA_CUDA=on" venv/bin/pip install .[server] +# RUN make clean + +FROM nvidia/cuda:${CUDA_RUNTIME_IMAGE} as runtime + +# We need to set the host to 0.0.0.0 to allow outside access +ENV HOST 0.0.0.0 +ENV CUDA_DOCKER_ARCH=all + +RUN apt-get update && apt-get upgrade -y \ + && apt-get install -y python3 python3-pip python3-venv + +WORKDIR /llama_cpp_python + +COPY --from=builder /llama_cpp_python/venv venv + +# Run the server +CMD venv/bin/python3 -m llama_cpp.server