forked from tc-wolf/llama-cpp-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
53 lines (43 loc) · 2.3 KB
/
Copy pathDockerfile
File metadata and controls
53 lines (43 loc) · 2.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
FROM ubuntu:20.04
# We need to set the host to 0.0.0.0 to allow outside access
ENV HOST=0.0.0.0
# Needs to be <= 2.31 to work with older Linux
RUN echo "Glibc version:\n" && /lib/aarch64-linux-gnu/libc.so.6
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
--no-install-recommends ninja-build pkg-config python3.9 \
python3.9-dev python3-pip git
# Install toolchain (GCC 11)
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends software-properties-common && \
add-apt-repository -y ppa:ubuntu-toolchain-r/test && \
apt-get update && \
apt-get install -y --no-install-recommends gcc-11 g++-11 make && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 110 \
--slave /usr/bin/g++ g++ /usr/bin/g++-11 --slave /usr/bin/gcov gcov /usr/bin/gcov-11 \
--slave /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-11 \
--slave /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-11 \
--slave /usr/bin/cpp cpp /usr/bin/cpp-11
RUN python3.9 -m pip install --upgrade pip cmake scikit-build-core[pyproject] setuptools pyinstaller
# Install the repo at current state
COPY . .
# Set HW specific flags for us
ENV march=armv8.2-a+crypto+fp16+rcpc+dotprod
ENV mcpu=cortex-a78c+crypto+noprofile+nossbs+noflagm+nopauth
ENV mtune=cortex-a78c
ENV compiler_flags="-march=${march} -mcpu=${mcpu} -mtune=${mtune}"
# Have to build for Q4_0 quantization set GGML_CPU_AARCH64=ON and then have to
# set march flags again so that used by aarch64 CPU backend code.
RUN CC=gcc-11 CXX=g++-11 CMAKE_BUILD_TYPE=Release \
CMAKE_ARGS="-DGGML_LLAMAFILE=OFF \
-DCMAKE_C_FLAGS='${compiler_flags}' -DCMAKE_CXX_FLAGS='${compiler_flags}' \
-DGGML_BLAS=OFF -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${march} \
-DGGML_CPU_AARCH64=ON -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_VERBOSE_MAKEFILE:BOOL=TRUE" \
python3.9 -m pip install -v -e .[server] 2>&1 | tee buildlog.txt
# TODO: Export buildlog.txt in `make deploy.docker` step for review after
# building.
RUN cd /root && pyinstaller -DF /llama_cpp/server/__main__.py \
--add-data /llama_cpp/lib/libllama.so:llama_cpp/lib \
--add-data /llama_cpp/lib/libggml.so:llama_cpp/lib \
--add-data /llama_cpp/lib/libggml-base.so:llama_cpp/lib \
--add-data /llama_cpp/lib/libggml-cpu.so:llama_cpp/lib \
-n llama-cpp-py-server