From 5d431003b9c5e318fe315b1e9bb6eea78349917f Mon Sep 17 00:00:00 2001 From: Lim Chee Kin Date: Tue, 13 Jun 2023 11:53:39 +0800 Subject: [PATCH 1/9] feat: added files for aws lambda deployment --- .github/workflows/dev.yml | 29 ++++++++++++++++ .github/workflows/rm-dev.yml | 18 ++++++++++ Dockerfile.aws | 65 ++++++++++++++++++++++++++++++++++++ llama_cpp/server/aws.py | 8 +++++ requirements.txt | 5 +++ serverless.yml | 43 ++++++++++++++++++++++++ 6 files changed, 168 insertions(+) create mode 100644 .github/workflows/dev.yml create mode 100644 .github/workflows/rm-dev.yml create mode 100644 Dockerfile.aws create mode 100644 llama_cpp/server/aws.py create mode 100644 requirements.txt create mode 100644 serverless.yml diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml new file mode 100644 index 0000000000..f2639eeb71 --- /dev/null +++ b/.github/workflows/dev.yml @@ -0,0 +1,29 @@ +name: Deploy Dev +on: + workflow_dispatch: + branches: + - main +jobs: + deploy-dev: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v3 + with: + submodules: "true" + - name: Setup NodeJS 18 + uses: actions/setup-node@v3 + with: + node-version: 18 + - name: Install Serverless Framework + run: npm install -g serverless + - name: Serverless AWS authentication + run: sls config credentials --provider aws --key ${{ secrets.AWS_KEY }} --secret ${{ secrets.AWS_SECRET }} + - name: Deploy Lambda functions + run: sls deploy + - name: Export Endpoint URL + run: echo $(sls info --verbose | grep endpoint | sed s/endpoint\:\ //g | awk '{print $1}') > endpoint + - name: Echo Endpoint URL + run: echo $(cat endpoint) + - name: Test Lambda functions + run: "curl -X POST -H 'Content-Type: application/json' -d @prompt.json $(cat endpoint)v1/completions" diff --git a/.github/workflows/rm-dev.yml b/.github/workflows/rm-dev.yml new file mode 100644 index 0000000000..88137ef0c0 --- /dev/null +++ b/.github/workflows/rm-dev.yml @@ -0,0 +1,18 @@ +name: Remove Dev +on: workflow_dispatch + +jobs: + rm-dev: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Setup NodeJS 18 + uses: actions/setup-node@v3 + with: + node-version: 18 + - name: Install Serverless Framework + run: npm install -g serverless + - name: Serverless AWS authentication + run: sls config credentials --provider aws --key ${{ secrets.AWS_KEY }} --secret ${{ secrets.AWS_SECRET }} + - name: Remove Lambda functions + run: sls remove diff --git a/Dockerfile.aws b/Dockerfile.aws new file mode 100644 index 0000000000..e25e713ced --- /dev/null +++ b/Dockerfile.aws @@ -0,0 +1,65 @@ +# REF: https://aws.amazon.com/blogs/aws/new-for-aws-lambda-container-image-support/ +# The download size of `python:3.10-slim-bullseye` is **45MB**¹. Its uncompressed on-disk size is **125MB**¹. +# (1) The best Docker base image for your Python application (March 2023). https://pythonspeed.com/articles/base-image-python-docker-images/. +# (2) Reduce the size of container images with DockerSlim. https://developers.redhat.com/articles/2022/01/17/reduce-size-container-images-dockerslim. +# Define global args +ARG FUNCTION_DIR="/home/app/" +ARG RUNTIME_VERSION="3.10" + +# Stage 1 - bundle base image + runtime +# Grab a fresh copy of the image and install GCC +FROM python:${RUNTIME_VERSION}-slim-bullseye AS python-slim-bullseye + +# Stage 2 - build function and dependencies +FROM python-slim-bullseye AS build-image +# Install aws-lambda-cpp build dependencies +# REF: https://docs.aws.amazon.com/lambda/latest/dg/images-create.html +RUN apt-get update && \ + apt-get install -y \ + libopenblas-dev \ + ninja-build \ + build-essential \ + curl + +# Include global args in this stage of the build +ARG FUNCTION_DIR +ARG RUNTIME_VERSION +# Create function directory +RUN mkdir -p ${FUNCTION_DIR} +# Install the function's dependencies +COPY requirements.txt ./ +RUN python${RUNTIME_VERSION} -m pip install -r requirements.txt --target ${FUNCTION_DIR} + +COPY ./ ${FUNCTION_DIR} + +RUN python${RUNTIME_VERSION} -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette +# REF: https://github.com/abetlen/llama-cpp-python/blob/main/Dockerfile +RUN cd ${FUNCTION_DIR} && CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 python${RUNTIME_VERSION} setup.py develop +# Install Lambda Runtime Interface Client for Python +RUN python${RUNTIME_VERSION} -m pip install awslambdaric --target ${FUNCTION_DIR} + +# download the model file +RUN mkdir ${FUNCTION_DIR}/model +RUN curl -L https://huggingface.co/SlyEcho/open_llama_3b_ggml/resolve/main/open-llama-3b-q4_0.bin -o ${FUNCTION_DIR}/model/ggml-q4_0.bin + +# Stage 3 - final runtime image +# Grab a fresh copy of the Python image +FROM python-slim-bullseye + +# Install runtime dependencies +RUN apt-get update && \ + apt-get install -y \ + libopenblas-dev + +# Include global arg in this stage of the build +ARG FUNCTION_DIR +# Set working directory to function root directory +WORKDIR ${FUNCTION_DIR} +# Copy in the built dependencies +COPY --from=build-image ${FUNCTION_DIR} ${FUNCTION_DIR} +# (Optional) Add Lambda Runtime Interface Emulator and use a script in the ENTRYPOINT for simpler local runs +ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie /usr/bin/aws-lambda-rie +COPY entry.sh / +RUN chmod 755 /usr/bin/aws-lambda-rie /entry.sh +ENTRYPOINT [ "/entry.sh" ] +CMD [ "llama_cpp.server.aws.handler" ] diff --git a/llama_cpp/server/aws.py b/llama_cpp/server/aws.py new file mode 100644 index 0000000000..e1774b124b --- /dev/null +++ b/llama_cpp/server/aws.py @@ -0,0 +1,8 @@ +"""AWS Lambda function for llama.cpp. +""" +from mangum import Mangum +from llama_cpp.server.app import create_app + +handler = Mangum(create_app()) + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000..71fa3ea5c1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +typing_extensions +fastapi +sse-starlette +mangum +numpy \ No newline at end of file diff --git a/serverless.yml b/serverless.yml new file mode 100644 index 0000000000..b4ee38f9e7 --- /dev/null +++ b/serverless.yml @@ -0,0 +1,43 @@ +service: llama-cpp + +frameworkVersion: "3" + +provider: + name: aws + deploymentMethod: direct + # REF: https://www.serverless.com/blog/container-support-for-lambda + ecr: + # In this section you can define images that will be built locally and uploaded to ECR + images: + appimage: + path: ./ + file: Dockerfile.aws + stage: dev + region: ap-southeast-1 + iam: + role: + statements: + - Effect: "Allow" + Action: + - "lambda:InvokeFunction" + Resource: "*" + +functions: + chat: + image: + name: appimage + memorySize: 3072 + environment: + MODEL: ./model/ggml-q4_0.bin + timeout: + 900 + # https://www.serverless.com/framework/docs/providers/aws/guide/functions#lambda-function-urls + url: + # Allow CORS for all requests from any origin + cors: + allowedOrigins: + - "*" + #- https://url1.com + #- https://url2.com + allowedMethods: + - POST From e9045422ea89068bb782b9fb310d6ad82e79cc69 Mon Sep 17 00:00:00 2001 From: Lim Chee Kin Date: Tue, 13 Jun 2023 11:55:09 +0800 Subject: [PATCH 2/9] feat: added prompt.json to test for deployed aws lambda function --- prompt.json | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 prompt.json diff --git a/prompt.json b/prompt.json new file mode 100644 index 0000000000..c08f9f7c4f --- /dev/null +++ b/prompt.json @@ -0,0 +1,6 @@ +{ + "prompt": [ + "\n\n### Instructions:\nWhat is the capital of France?\n\n### Response:\n" + ], + "stop": ["\n", "###"] +} From 241b0305dea2a7829f2cb053d1f3fe791261d1c6 Mon Sep 17 00:00:00 2001 From: Lim Chee Kin Date: Tue, 13 Jun 2023 12:05:22 +0800 Subject: [PATCH 3/9] feat: added the missing entry.sh file --- entry.sh | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 entry.sh diff --git a/entry.sh b/entry.sh new file mode 100644 index 0000000000..a608361e2b --- /dev/null +++ b/entry.sh @@ -0,0 +1,6 @@ +#!/bin/sh +if [ -z "${AWS_LAMBDA_RUNTIME_API}" ]; then + exec /usr/bin/aws-lambda-rie /usr/local/bin/python -m awslambdaric $1 +else + exec /usr/local/bin/python -m awslambdaric $1 +fi From a6ca3e32b585eec14a5b430304ee2c00037ee320 Mon Sep 17 00:00:00 2001 From: Lim Chee Kin Date: Tue, 13 Jun 2023 12:20:51 +0800 Subject: [PATCH 4/9] feat: added diskcache as dependency --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 71fa3ea5c1..7b5155b0de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ typing_extensions fastapi sse-starlette mangum -numpy \ No newline at end of file +numpy +diskcache \ No newline at end of file From b2a3b21e35bc4a01b8a07e0829c9f6a12686961e Mon Sep 17 00:00:00 2001 From: Lim Chee Kin Date: Tue, 27 Jun 2023 16:01:40 +0800 Subject: [PATCH 5/9] feat: deploy orca_mini_13B-GGML model --- Dockerfile.aws | 2 +- llama_cpp/server/aws.py | 9 +++++---- serverless.yml | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Dockerfile.aws b/Dockerfile.aws index e25e713ced..96cb06537a 100644 --- a/Dockerfile.aws +++ b/Dockerfile.aws @@ -40,7 +40,7 @@ RUN python${RUNTIME_VERSION} -m pip install awslambdaric --target ${FUNCTION_DIR # download the model file RUN mkdir ${FUNCTION_DIR}/model -RUN curl -L https://huggingface.co/SlyEcho/open_llama_3b_ggml/resolve/main/open-llama-3b-q4_0.bin -o ${FUNCTION_DIR}/model/ggml-q4_0.bin +RUN curl -L https://huggingface.co/TheBloke/orca_mini_13B-GGML/resolve/main/orca-mini-13b.ggmlv3.q4_0.bin -o ${FUNCTION_DIR}/model/ggml-q4_0.bin # Stage 3 - final runtime image # Grab a fresh copy of the Python image diff --git a/llama_cpp/server/aws.py b/llama_cpp/server/aws.py index e1774b124b..46629480c8 100644 --- a/llama_cpp/server/aws.py +++ b/llama_cpp/server/aws.py @@ -1,8 +1,9 @@ """AWS Lambda function for llama.cpp. """ from mangum import Mangum -from llama_cpp.server.app import create_app - -handler = Mangum(create_app()) - +from llama_cpp.server.app import create_app, Settings +import os +print("os.cpu_count()", os.cpu_count()) +handler = Mangum(create_app( + Settings(n_threads=os.cpu_count(), embedding=False))) diff --git a/serverless.yml b/serverless.yml index b4ee38f9e7..d370c0cb1c 100644 --- a/serverless.yml +++ b/serverless.yml @@ -26,7 +26,7 @@ functions: chat: image: name: appimage - memorySize: 3072 + memorySize: 10240 environment: MODEL: ./model/ggml-q4_0.bin timeout: From 96d991a723d27fd3f3656ead8a6f928774ef0e45 Mon Sep 17 00:00:00 2001 From: Lim Chee Kin Date: Tue, 27 Jun 2023 16:48:15 +0800 Subject: [PATCH 6/9] chore: changed to orca-mini-7b as failed in 13b --- Dockerfile.aws | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.aws b/Dockerfile.aws index 96cb06537a..96cf36177f 100644 --- a/Dockerfile.aws +++ b/Dockerfile.aws @@ -40,7 +40,7 @@ RUN python${RUNTIME_VERSION} -m pip install awslambdaric --target ${FUNCTION_DIR # download the model file RUN mkdir ${FUNCTION_DIR}/model -RUN curl -L https://huggingface.co/TheBloke/orca_mini_13B-GGML/resolve/main/orca-mini-13b.ggmlv3.q4_0.bin -o ${FUNCTION_DIR}/model/ggml-q4_0.bin +RUN curl -L https://huggingface.co/TheBloke/orca_mini_7B-GGML/resolve/main/orca-mini-7b.ggmlv3.q4_0.bin -o ${FUNCTION_DIR}/model/ggml-q4_0.bin # Stage 3 - final runtime image # Grab a fresh copy of the Python image From 958f501c3a7a1976152a0b3f3547a55ff677f284 Mon Sep 17 00:00:00 2001 From: Lim Chee Kin Date: Mon, 10 Jul 2023 20:01:22 +0800 Subject: [PATCH 7/9] chore: updated to orca-mini-v2-7b --- Dockerfile.aws | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.aws b/Dockerfile.aws index 96cf36177f..46c2b57cd7 100644 --- a/Dockerfile.aws +++ b/Dockerfile.aws @@ -40,7 +40,7 @@ RUN python${RUNTIME_VERSION} -m pip install awslambdaric --target ${FUNCTION_DIR # download the model file RUN mkdir ${FUNCTION_DIR}/model -RUN curl -L https://huggingface.co/TheBloke/orca_mini_7B-GGML/resolve/main/orca-mini-7b.ggmlv3.q4_0.bin -o ${FUNCTION_DIR}/model/ggml-q4_0.bin +RUN curl -L https://huggingface.co/TheBloke/orca_mini_v2_7B-GGML/resolve/main/orca-mini-v2_7b.ggmlv3.q4_0.bin -o ${FUNCTION_DIR}/model/ggml-q4_0.bin # Stage 3 - final runtime image # Grab a fresh copy of the Python image From 2d83e8873391e52707e8bd69051e193a90b343b1 Mon Sep 17 00:00:00 2001 From: Lim Chee Kin Date: Mon, 10 Jul 2023 21:13:46 +0800 Subject: [PATCH 8/9] chore: added pydantic_settings dependency --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7b5155b0de..e2b91c07cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ fastapi sse-starlette mangum numpy -diskcache \ No newline at end of file +diskcache +pydantic_settings \ No newline at end of file From 5f9f46e8b86e535fb92b34d0891da5c0e66c9c98 Mon Sep 17 00:00:00 2001 From: Lim Chee Kin Date: Mon, 14 Aug 2023 17:22:34 +0800 Subject: [PATCH 9/9] chore: updated docker file to test the latest code --- Dockerfile.aws | 5 +++-- vendor/llama.cpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile.aws b/Dockerfile.aws index 46c2b57cd7..2f1983c074 100644 --- a/Dockerfile.aws +++ b/Dockerfile.aws @@ -19,6 +19,7 @@ RUN apt-get update && \ libopenblas-dev \ ninja-build \ build-essential \ + pkg-config \ curl # Include global args in this stage of the build @@ -34,13 +35,13 @@ COPY ./ ${FUNCTION_DIR} RUN python${RUNTIME_VERSION} -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette # REF: https://github.com/abetlen/llama-cpp-python/blob/main/Dockerfile -RUN cd ${FUNCTION_DIR} && CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 python${RUNTIME_VERSION} setup.py develop +RUN cd ${FUNCTION_DIR} && CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 python${RUNTIME_VERSION} setup.py develop # Install Lambda Runtime Interface Client for Python RUN python${RUNTIME_VERSION} -m pip install awslambdaric --target ${FUNCTION_DIR} # download the model file RUN mkdir ${FUNCTION_DIR}/model -RUN curl -L https://huggingface.co/TheBloke/orca_mini_v2_7B-GGML/resolve/main/orca-mini-v2_7b.ggmlv3.q4_0.bin -o ${FUNCTION_DIR}/model/ggml-q4_0.bin +RUN curl -L https://huggingface.co/TheBloke/orca_mini_v3_7B-GGML/resolve/main/orca_mini_v3_7b.ggmlv3.q4_0.bin -o ${FUNCTION_DIR}/model/ggml-q4_0.bin # Stage 3 - final runtime image # Grab a fresh copy of the Python image diff --git a/vendor/llama.cpp b/vendor/llama.cpp index f5bfea0580..edcc7ae7d2 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit f5bfea0580e417f99850d5456ca541d871a3e48c +Subproject commit edcc7ae7d26007bbf83136e9d33f863fcad9b871