working but needs cleanup

gadicc · gadicc · commit a39e3dfb9081 · 2022-11-06T08:57:01.000Z
diff --git a/Dockerfile b/Dockerfile
@@ -1,11 +1,24 @@
 # Must use a Cuda version 11+
-FROM pytorch/pytorch:1.11.0-cuda11.3-cudnn8-devel as base
+# FROM pytorch/pytorch:1.11.0-cuda11.3-cudnn8-devel as base
+# xformers available precompiled for:
+#   Python 3.9 or 3.10, CUDA 11.3 or 11.6, and PyTorch 1.12.1
+#   https://github.com/facebookresearch/xformers/#getting-started
+FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime as base
+#FROM nvcr.io/nvidia/pytorch:22.08-py3 as base
 ENV DEBIAN_FRONTEND=noninteractive
 RUN mkdir -p /root/.cache/pip
 COPY root-cache/pip /root/.cache/pip
-RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC
+#RUN apt-get install gnupg2
+#RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC
 RUN apt-get update && apt-get install -yqq git
 
+#RUN apt install -yqq software-properties-common
+#RUN add-apt-repository ppa:deadsnakes/ppa
+#RUN apt update
+#RUN apt-get install -yqq python3.10
+#RUN ln -sf /usr/bin/python3.10 /usr/bin/python3
+#RUN ln -sf /usr/bin/python3.10 /usr/bin/python
+
 FROM base AS patchmatch
 ARG USE_PATCHMATCH=0
 WORKDIR /tmp
@@ -16,6 +29,16 @@ FROM base as output
 RUN mkdir /api
 WORKDIR /api
 
+# We need python 3.9 or 3.10 for xformers
+# Yes, we install pytorch twice... will switch base image in future
+# RUN conda update -n base -c defaults conda
+RUN conda create -n xformers python=3.10
+SHELL ["conda", "run", "--no-capture-output", "-n", "xformers", "/bin/bash", "-c"]
+RUN python --version
+RUN conda install -c pytorch -c conda-forge cudatoolkit=11.6 pytorch=1.12.1
+RUN conda install xformers -c xformers/label/dev
+RUN pip install triton==2.0.0.dev20221105
+
 # Install python packages
 RUN pip3 install --upgrade pip
 ADD requirements.txt requirements.txt
@@ -24,8 +47,9 @@ RUN pip3 install -r requirements.txt
 # Required to build flash attention
 # Turing: 7.5 (RTX 20s, Quadro), Ampere: 8.0 (A100), 8.6 (RTX 30s)
 # https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
-ENV FLASH_ATTENTION=0
-ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6"
+# ENV FLASH_ATTENTION=0
+# ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6"
+# this is built it into memory efficient attention now ! ^_^
 
 ADD install.sh .
 RUN bash install.sh
@@ -83,3 +107,4 @@ ADD send.py .
 ADD app.py .
 
 CMD python3 -u server.py
+
diff --git a/app.py b/app.py
@@ -273,6 +273,8 @@ def inference(all_inputs: dict) -> dict:
     # with autocast("cuda"):
     # image = pipeline(**model_inputs).images[0]
 
+    pipeline.enable_xformers_memory_efficient_attention()
+
     with torch.inference_mode():
         # autocast im2img and inpaint which are broken in 0.4.0, 0.4.1
         # still broken in 0.5.1
diff --git a/requirements.txt b/requirements.txt
@@ -3,8 +3,8 @@ sanic==22.6.2
 #git+https://github.com/huggingface/diffusers@v0.5.1
 #git+https://github.com/HazyResearch/diffusers
 transformers==4.22.2
-scipy==1.9.1
+scipy==1.9.3
 requests_futures==1.0.0
-numpy==1.23.3
+numpy==1.23.4
 scikit-image==0.19.3
-accelerate==0.13.2
+accelerate==0.13.2