stungkit · pull · Sep 20, 2024 · Sep 20, 2024 · Sep 22, 2024 · Sep 25, 2024
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -64,7 +64,7 @@ Try the following:
 1. `git clone https://github.com/abetlen/llama-cpp-python`
 2. `cd llama-cpp-python`
 3. `rm -rf _skbuild/` # delete any old builds
-4. `python setup.py develop`
+4. `python -m pip install .`
 5. `cd ./vendor/llama.cpp`
 6. Follow [llama.cpp's instructions](https://github.com/ggerganov/llama.cpp#build) to `cmake` llama.cpp
 7. Run llama.cpp's `./main` with the same arguments you previously passed to llama-cpp-python and see if you can reproduce the issue. If you can, [log an issue with llama.cpp](https://github.com/ggerganov/llama.cpp/issues)

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -8,4 +8,12 @@ updates:
   - package-ecosystem: "pip" # See documentation for possible values
     directory: "/" # Location of package manifests
     schedule:
-      interval: "weekly"
+      interval: "daily"
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "daily"
+  - package-ecosystem: "docker"
+    directory: "/"
+    schedule:
+      interval: "daily"   
diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml
@@ -11,63 +11,221 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest, macOS-latest]
+        os: [ubuntu-22.04, windows-2022, macos-14, macos-15]
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v6
         with:
-          submodules: "true"
+          submodules: "recursive"
 
       # Used to host cibuildwheel
-      - uses: actions/setup-python@v3
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.9"
 
-      - name: Install cibuildwheel
-        run: python -m pip install cibuildwheel==2.12.1
+      - name: Install dependencies (Linux/MacOS)
+        if: runner.os != 'Windows'
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install uv
+          RUST_LOG=trace python -m uv pip install -e .[all] --verbose
+        shell: bash
 
-      - name: Install dependencies
+      - name: Install dependencies (Windows)
+        if: runner.os == 'Windows'
+        env:
+          RUST_LOG: trace        
         run: |
           python -m pip install --upgrade pip
-          python -m pip install -e .[all]
+          python -m pip install uv
+          python -m uv pip install -e .[all] --verbose
+        shell: cmd
+
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v3.4.1
+        env:
+          # Keep repair disabled by default for non-Linux platforms in this job.
+          CIBW_REPAIR_WHEEL_COMMAND: ""
+          # Linux needs auditwheel repair so manylinux and musllinux wheels are
+          # published with distinct platform tags instead of generic linux tags.
+          CIBW_REPAIR_WHEEL_COMMAND_LINUX: "LD_LIBRARY_PATH=/project/llama_cpp/lib auditwheel repair -w {dest_dir} {wheel}"
+          # cibuildwheel v3 defaults to manylinux_2_28 images whose current
+          # GCC toolchain emits symbols newer than the policy allows.
+          CIBW_MANYLINUX_X86_64_IMAGE: "manylinux2014"
+          # The release wheel is tagged py3-none, so one build per platform
+          # covers all supported Python versions and avoids duplicate names.
+          CIBW_BUILD_LINUX: "cp38-*"
+          CIBW_BUILD_MACOS: "cp39-*"
+          CIBW_BUILD_WINDOWS: "cp39-*"
+          # Skip cibuildwheel's default i686 sidecar and keep Linux release
+          # wheels on a portable x86_64 CPU baseline.
+          CIBW_ARCHS_LINUX: "auto64"
+          CIBW_ARCHS_WINDOWS: "AMD64"
+          CIBW_ENVIRONMENT_LINUX: CMAKE_ARGS="-DGGML_NATIVE=off"
+          # Keep macOS release wheels on a portable CPU baseline instead of
+          # inheriting the hosted runner's native flags.
+          CIBW_ENVIRONMENT_MACOS: CMAKE_ARGS="-DGGML_NATIVE=off"
+        with:
+          package-dir: .
+          output-dir: wheelhouse
+
+      - uses: actions/upload-artifact@v7
+        with:
+          name: wheels-${{ matrix.os }}
+          path: ./wheelhouse/*.whl
+
+  build_wheels_arm64:
+    name: Build arm64 wheels
+    runs-on: ubuntu-24.04-arm
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          submodules: "recursive"
+
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v3.4.1
+        env:
+          CIBW_SKIP: "pp*"
+          CIBW_REPAIR_WHEEL_COMMAND: "LD_LIBRARY_PATH=$PWD/llama_cpp/lib auditwheel repair -w {dest_dir} {wheel}"
+          CIBW_ARCHS: "aarch64"
+          # Keep this consistent with the x86_64 Linux release wheels.
+          CIBW_MANYLINUX_AARCH64_IMAGE: "manylinux2014"
+          # Keep native arm64 builds on a portable CPU baseline instead of
+          # tuning wheels to the hosted runner.
+          CIBW_ENVIRONMENT: CMAKE_ARGS="-DGGML_NATIVE=off"
+          # The release wheel is tagged py3-none, so one build covers all
+          # supported Python versions and avoids duplicate wheel names.
+          CIBW_BUILD: "cp38-*"
+        with:
+          output-dir: wheelhouse
+
+      - name: Upload wheels as artifacts
+        uses: actions/upload-artifact@v7
+        with:
+          name: wheels_arm64
+          path: ./wheelhouse/*.whl
+
+  build_wheels_riscv64:
+    name: Build riscv64 wheel
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          submodules: "recursive"
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v4
+        with:
+          platforms: linux/riscv64
 
       - name: Build wheels
-        run: python -m cibuildwheel --output-dir wheelhouse
+        uses: pypa/cibuildwheel@v3.4.1
+        env:
+          CIBW_SKIP: "*musllinux* pp*"
+          CIBW_REPAIR_WHEEL_COMMAND: ""
+          CIBW_ARCHS: "riscv64"
+          # Build riscv64 wheels against a conservative baseline instead of
+          # enabling RVV-related extensions from the build container.
+          CIBW_ENVIRONMENT: CMAKE_ARGS="-DGGML_NATIVE=off -DGGML_RVV=off -DGGML_RV_ZFH=off -DGGML_RV_ZVFH=off -DGGML_RV_ZICBOP=off -DGGML_RV_ZIHINTPAUSE=off"
+          # The release wheel is tagged py3-none, so one riscv64 build is
+          # enough and avoids duplicate same-name release artifacts.
+          CIBW_BUILD: "cp310-*"
+        with:
+          output-dir: wheelhouse
+
+      - name: Upload wheels as artifacts
+        uses: actions/upload-artifact@v7
+        with:
+          name: wheels_riscv64
+          path: ./wheelhouse/*.whl
+
+  build_wheels_pyodide:
+    name: Build Pyodide wheel
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          submodules: "recursive"
+
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+
+      - name: Build wheel
+        uses: pypa/cibuildwheel@v4.1.0
+        env:
+          CIBW_PLATFORM: "pyodide"
+          CIBW_BUILD: "cp314-pyodide_wasm32"
+          CIBW_BUILD_VERBOSITY: "1"
+          CIBW_REPAIR_WHEEL_COMMAND: ""
+          CIBW_BEFORE_TEST: "curl -L --fail --retry 3 -o /tmp/stories260K.gguf https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf"
+          CIBW_TEST_COMMAND: "python -c \"import llama_cpp.mtmd_cpp as mtmd; from llama_cpp import Llama; print('mtmd marker', mtmd.mtmd_default_marker().decode()); llm = Llama(model_path='/tmp/stories260K.gguf', n_ctx=64, n_batch=8, n_threads=1, verbose=False); print('loaded', llm.n_vocab(), llm.n_ctx()); print('generated', llm('Once upon a', max_tokens=1, temperature=0)['choices'][0]['text'])\""
+          CMAKE_ARGS: "-DLLAMA_WASM_MEM64=OFF -DEMSCRIPTEN_SYSTEM_PROCESSOR=wasm32 -DGGML_NATIVE=OFF -DGGML_OPENMP=OFF -DGGML_METAL=OFF -DGGML_BLAS=OFF -DGGML_CUDA=OFF -DGGML_HIP=OFF -DGGML_VULKAN=OFF -DGGML_OPENCL=OFF -DGGML_RPC=OFF -DLLAMA_CURL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TOOLS=OFF -DLLAMA_BUILD_SERVER=OFF"
+        with:
+          output-dir: wheelhouse
 
-      - uses: actions/upload-artifact@v3
+      - name: Upload wheels as artifacts
+        uses: actions/upload-artifact@v7
         with:
+          name: wheels_pyodide
           path: ./wheelhouse/*.whl
 
   build_sdist:
     name: Build source distribution
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v6
+        with:
+          submodules: "recursive"
+
+      - uses: actions/setup-python@v6
         with:
-          submodules: "true"
-      - uses: actions/setup-python@v3
-      - name: Install dependencies
+          python-version: "3.9"
+
+      - name: Install dependencies (Linux/MacOS)
+        if: runner.os != 'Windows'
         run: |
-          python -m pip install --upgrade pip build
-          python -m pip install -e .[all]
+          python -m pip install --upgrade pip
+          python -m pip install uv
+          RUST_LOG=trace python -m uv pip install -e .[all] --verbose
+          python -m uv pip install build
+        shell: bash
+
+      - name: Install dependencies (Windows)
+        if: runner.os == 'Windows'
+        env:
+          RUST_LOG: trace        
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install uv
+          python -m uv pip install -e .[all] --verbose
+          python -m uv pip install build
+        shell: cmd
+
       - name: Build source distribution
         run: |
           python -m build --sdist
-      - uses: actions/upload-artifact@v3
+
+      - uses: actions/upload-artifact@v7
         with:
+          name: sdist
           path: ./dist/*.tar.gz
 
   release:
     name: Release
-    needs: [build_wheels, build_sdist]
+    needs: [build_wheels, build_wheels_arm64, build_wheels_riscv64, build_wheels_pyodide, build_sdist]
+    if: startsWith(github.ref, 'refs/tags/')
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v8
         with:
-          name: artifact
+          merge-multiple: true
           path: dist
-      - uses: softprops/action-gh-release@v1
+
+      - uses: softprops/action-gh-release@v3
         with:
           files: dist/*
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml
@@ -9,32 +9,51 @@ permissions:
 jobs:
   docker:
     name: Build and push Docker image
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v6
         with:
-          submodules: "true"
+          submodules: "recursive"
+
+      - name: Set image tag
+        run: |
+          if [[ "${GITHUB_REF_TYPE}" == "tag" ]]; then
+            image_tag="${GITHUB_REF_NAME}"
+          else
+            image_tag="${GITHUB_REF_NAME//\//-}"
+          fi
+          echo "IMAGE_TAG=$image_tag" >> "$GITHUB_ENV"
 
       - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
+        uses: docker/setup-qemu-action@v4
 
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
+        uses: docker/setup-buildx-action@v4
 
       - name: Login to GitHub Container Registry
-        uses: docker/login-action@v2 
+        uses: docker/login-action@v4 
         with:
           registry: ghcr.io
           username: ${{ github.repository_owner }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Build and push
-        uses: docker/build-push-action@v4
+        id: docker_build
+        uses: docker/build-push-action@v7
         with:
           context: .
           file: "docker/simple/Dockerfile"
-          push: true # push to registry
-          pull: true # always fetch the latest base images
-          platforms: linux/amd64,linux/arm64 # build for both amd64 and arm64
-          tags: ghcr.io/abetlen/llama-cpp-python:latest
+          push: ${{ startsWith(github.ref, 'refs/tags/') }}
+          pull: true
+          platforms: linux/amd64,linux/arm64
+          tags: |
+            ghcr.io/abetlen/llama-cpp-python:latest
+            ghcr.io/abetlen/llama-cpp-python:${{ env.IMAGE_TAG }}
+          build-args: |
+            BUILDKIT_INLINE_CACHE=1
+
+      - name: Publish to GitHub Tag
+        if: steps.docker_build.outputs.digest && startsWith(github.ref, 'refs/tags/')
+        run: |
+          echo "Docker image published for tag: ${{ github.ref_name }}"