diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml index 4ae37b174..c931ead34 100644 --- a/.github/workflows/build-and-release.yaml +++ b/.github/workflows/build-and-release.yaml @@ -139,6 +139,37 @@ jobs: name: wheels_riscv64 path: ./wheelhouse/*.whl + build_wheels_pyodide: + name: Build Pyodide wheel + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + submodules: "recursive" + + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Build wheel + uses: pypa/cibuildwheel@v4.1.0 + env: + CIBW_PLATFORM: "pyodide" + CIBW_BUILD: "cp314-pyodide_wasm32" + CIBW_BUILD_VERBOSITY: "1" + CIBW_REPAIR_WHEEL_COMMAND: "" + CIBW_BEFORE_TEST: "curl -L --fail --retry 3 -o /tmp/stories260K.gguf https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf" + CIBW_TEST_COMMAND: "python -c \"import llama_cpp.mtmd_cpp as mtmd; from llama_cpp import Llama; print('mtmd marker', mtmd.mtmd_default_marker().decode()); llm = Llama(model_path='/tmp/stories260K.gguf', n_ctx=64, n_batch=8, n_threads=1, verbose=False); print('loaded', llm.n_vocab(), llm.n_ctx()); print('generated', llm('Once upon a', max_tokens=1, temperature=0)['choices'][0]['text'])\"" + CMAKE_ARGS: "-DLLAMA_WASM_MEM64=OFF -DEMSCRIPTEN_SYSTEM_PROCESSOR=wasm32 -DGGML_NATIVE=OFF -DGGML_OPENMP=OFF -DGGML_METAL=OFF -DGGML_BLAS=OFF -DGGML_CUDA=OFF -DGGML_HIP=OFF -DGGML_VULKAN=OFF -DGGML_OPENCL=OFF -DGGML_RPC=OFF -DLLAMA_CURL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TOOLS=OFF -DLLAMA_BUILD_SERVER=OFF" + with: + output-dir: wheelhouse + + - name: Upload wheels as artifacts + uses: actions/upload-artifact@v7 + with: + name: wheels_pyodide + path: ./wheelhouse/*.whl + build_sdist: name: Build source distribution runs-on: ubuntu-latest @@ -183,7 +214,7 @@ jobs: release: name: Release - needs: [build_wheels, build_wheels_arm64, build_wheels_riscv64, build_sdist] + needs: [build_wheels, build_wheels_arm64, build_wheels_riscv64, build_wheels_pyodide, build_sdist] if: startsWith(github.ref, 'refs/tags/') runs-on: ubuntu-latest diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b2b5eb31..5b337ad92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -- feat: update llama.cpp to ggml-org/llama.cpp@6eab47181 +## [0.3.30] + +- feat: update llama.cpp to ggml-org/llama.cpp@e3a74b299 +- feat: add Pyodide wheel support by @abetlen in #2309 ## [0.3.29] diff --git a/CMakeLists.txt b/CMakeLists.txt index 0474863a4..5feaaca5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,14 +10,22 @@ function(llama_cpp_python_install_target target) return() endif() - install( - TARGETS ${target} - LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib - RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib - ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib - FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib - RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib - ) + if(EMSCRIPTEN) + set_target_properties(${target} PROPERTIES + OUTPUT_NAME "${target}.cpython-00-wasm32-emscripten" + ) + endif() + + if(NOT EMSCRIPTEN) + install( + TARGETS ${target} + LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib + RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib + ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib + FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib + RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib + ) + endif() install( TARGETS ${target} LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib @@ -65,6 +73,32 @@ if (LLAMA_BUILD) # Disable building curl support set(LLAMA_CURL OFF CACHE BOOL "llama.cpp: enable curl" FORCE) + if (EMSCRIPTEN) + if (DEFINED EMSCRIPTEN_SYSTEM_PROCESSOR) + set(CMAKE_SYSTEM_PROCESSOR ${EMSCRIPTEN_SYSTEM_PROCESSOR} CACHE STRING "Target processor" FORCE) + else() + set(CMAKE_SYSTEM_PROCESSOR wasm32 CACHE STRING "Target processor" FORCE) + endif() + + set(LLAMA_WASM_MEM64 OFF CACHE BOOL "llama.cpp: enable wasm64 memory" FORCE) + set(GGML_NATIVE OFF CACHE BOOL "ggml: enable -march=native" FORCE) + set(GGML_OPENMP OFF CACHE BOOL "ggml: use OpenMP" FORCE) + set(GGML_METAL OFF CACHE BOOL "ggml: use Metal" FORCE) + set(GGML_BLAS OFF CACHE BOOL "ggml: use BLAS" FORCE) + set(GGML_CUDA OFF CACHE BOOL "ggml: use CUDA" FORCE) + set(GGML_HIP OFF CACHE BOOL "ggml: use HIP" FORCE) + set(GGML_VULKAN OFF CACHE BOOL "ggml: use Vulkan" FORCE) + set(GGML_OPENCL OFF CACHE BOOL "ggml: use OpenCL" FORCE) + set(GGML_RPC OFF CACHE BOOL "ggml: use RPC" FORCE) + + # Pyodide auto-loads side modules from top-level site-packages/lib + # before Python imports run, so keep upstream installs package-local. + set(CMAKE_INSTALL_BINDIR llama_cpp/lib CACHE PATH "Install binaries" FORCE) + set(CMAKE_INSTALL_INCLUDEDIR llama_cpp/include CACHE PATH "Install headers" FORCE) + set(CMAKE_INSTALL_LIBDIR llama_cpp/lib CACHE PATH "Install libraries" FORCE) + set(LLAMA_BUILD_COMMON OFF CACHE BOOL "Build llama.cpp common library" FORCE) + endif() + # Architecture detection and settings for Apple platforms if (APPLE) # Get the target architecture diff --git a/llama_cpp/__init__.py b/llama_cpp/__init__.py index 42f807ef6..b72459f65 100644 --- a/llama_cpp/__init__.py +++ b/llama_cpp/__init__.py @@ -1,4 +1,4 @@ from .llama_cpp import * from .llama import * -__version__ = "0.3.29" +__version__ = "0.3.30" diff --git a/llama_cpp/_ctypes_extensions.py b/llama_cpp/_ctypes_extensions.py index e88ed387d..02cee8a88 100644 --- a/llama_cpp/_ctypes_extensions.py +++ b/llama_cpp/_ctypes_extensions.py @@ -19,6 +19,9 @@ from typing_extensions import TypeAlias +_EMSCRIPTEN_SIDE_MODULE_SUFFIX = ".cpython-00-wasm32-emscripten.so" + + # Load the library def load_shared_library(lib_base_name: str, base_path: pathlib.Path): """Platform independent shared library loader""" @@ -26,7 +29,12 @@ def load_shared_library(lib_base_name: str, base_path: pathlib.Path): # for llamacpp) and "llama" (default name for this repo) lib_paths: List[pathlib.Path] = [] # Determine the file extension based on the platform - if sys.platform.startswith("linux") or sys.platform.startswith("freebsd"): + if sys.platform == "emscripten": + # Use a CPython-style tag that Pyodide skips during package auto-load. + lib_paths += [ + base_path / f"lib{lib_base_name}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}", + ] + elif sys.platform.startswith("linux") or sys.platform.startswith("freebsd"): lib_paths += [ base_path / f"lib{lib_base_name}.so", ] @@ -60,6 +68,33 @@ def load_shared_library(lib_base_name: str, base_path: pathlib.Path): os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "lib")) cdll_args["winmode"] = ctypes.RTLD_GLOBAL + if sys.platform == "emscripten": + cdll_args["mode"] = ctypes.RTLD_GLOBAL + lib_dir = str(base_path) + ld_library_path = os.environ.get("LD_LIBRARY_PATH", "") + if lib_dir not in ld_library_path.split(os.pathsep): + os.environ["LD_LIBRARY_PATH"] = ( + lib_dir + if not ld_library_path + else f"{lib_dir}{os.pathsep}{ld_library_path}" + ) + + emscripten_dependencies = { + "llama": ("ggml-base", "ggml-cpu", "ggml"), + "mtmd": ("ggml-base", "ggml-cpu", "ggml", "llama"), + } + for dependency in emscripten_dependencies.get(lib_base_name, ()): + dependency_path = ( + base_path / f"lib{dependency}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}" + ) + if dependency_path.exists(): + try: + ctypes.CDLL(str(dependency_path), **cdll_args) # type: ignore + except Exception as e: + raise RuntimeError( + f"Failed to load shared library '{dependency_path}': {e}" + ) + # Try to load the shared library, handling potential errors for lib_path in lib_paths: if lib_path.exists(): diff --git a/llama_cpp/mtmd_cpp.py b/llama_cpp/mtmd_cpp.py index 46eb2c879..78f068aa9 100644 --- a/llama_cpp/mtmd_cpp.py +++ b/llama_cpp/mtmd_cpp.py @@ -169,6 +169,12 @@ class mtmd_caps(Structure): POINTER(c_char_p), ) +mtmd_helper_post_decode_callback = CFUNCTYPE( + c_int, + llama_cpp.llama_batch, + c_void_p, +) + class mtmd_helper_bitmap_wrapper(Structure): """Bitmap wrapper returned by MTMD helper media loaders.""" @@ -860,7 +866,9 @@ def mtmd_helper_eval_chunk_single( # llama_pos n_past, # llama_seq_id seq_id, # int32_t n_batch, -# llama_pos * new_n_past); +# llama_pos * new_n_past, +# mtmd_helper_post_decode_callback callback, +# void * user_data); @ctypes_function( "mtmd_helper_decode_image_chunk", [ @@ -872,6 +880,8 @@ def mtmd_helper_eval_chunk_single( llama_cpp.llama_seq_id, c_int, POINTER(llama_cpp.llama_pos), + mtmd_helper_post_decode_callback, + c_void_p, ], c_int, ) @@ -884,6 +894,8 @@ def mtmd_helper_decode_image_chunk( seq_id: llama_cpp.llama_seq_id, n_batch: Union[c_int, int], new_n_past: "_Pointer[llama_cpp.llama_pos]", + callback: Optional[mtmd_helper_post_decode_callback], + user_data: c_void_p, /, ) -> int: """Decode a pre-encoded image chunk.""" diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 6eab47181..e3a74b299 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 6eab47181cbd3532c88a105682b81b4729ab809b +Subproject commit e3a74b299085cd00013804f7fca2e03441b2da20