Update codebase for new venv base

ai-dock · Jul 4, 2024 · db3c19f · db3c19f
1 parent bde637d
commit db3c19f
Show file tree

Hide file tree

Showing 49 changed files with 266 additions and 175 deletions.
diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
@@ -17,9 +17,7 @@ jobs:
       matrix:
         build:
           # Undeclared release tag finds latest from GitHub tags
-          - {latest: "true", tag: "v1.9.0", python: "3.10", pytorch: "2.2.2"}
-          - {latest: "false", tag: "v1.9.0", python: "3.10", pytorch: "2.2.2"}
-          - {latest: "false", tag: "v1.8.0", python: "3.10", pytorch: "2.2.2"}
+          - {latest: "false", tag: "v1.9.4", python: "3.10", pytorch: "2.3.0"}
     steps:
       -
         name: Free Space
@@ -64,11 +62,11 @@ jobs:
             [ -z "$WEBUI_TAG" ] && { echo "Error: WEBUI_TAG is empty. Exiting script." >&2; exit 1; }
             echo "WEBUI_TAG=${WEBUI_TAG}" >> ${GITHUB_ENV}
 
-            base_tag="cpu-${{ env.UBUNTU_VERSION }}"
+            base_tag="v2-cpu-${{ env.UBUNTU_VERSION }}"
 
             if [[ ${{ matrix.build.latest }} == "true" ]]; then
                 echo "Marking latest"
-                TAGS="${img_path}:${base_tag}, ${img_path}:latest-cpu, ${img_path}:latest-cpu-jupyter"
+                TAGS="${img_path}:${base_tag}-${WEBUI_TAG}, ${img_path}:${base_tag}, ${img_path}:latest-cpu, ${img_path}:latest-cpu-jupyter"
             else
                 TAGS="${img_path}:${base_tag}-${WEBUI_TAG}"
             fi
@@ -79,7 +77,7 @@ jobs:
         with:
           context: build
           build-args: |
-            IMAGE_BASE=ghcr.io/ai-dock/python:${{ matrix.build.python }}-cpu-${{ env.UBUNTU_VERSION }}
+            IMAGE_BASE=ghcr.io/ai-dock/python:${{ matrix.build.python }}-v2-cpu-${{ env.UBUNTU_VERSION }}
             PYTHON_VERSION=${{ matrix.build.python }}
             PYTORCH_VERSION=${{ matrix.build.pytorch }}
             WEBUI_TAG=${{ env.WEBUI_TAG }}
@@ -95,9 +93,8 @@ jobs:
       matrix:
         build:
           # Undeclared release tag finds latest from GitHub tags
-          - {latest: "true", tag: "v1.9.0", python: "3.10", pytorch: "2.2.2", cuda: "11.8.0-runtime"}
-          - {latest: "false", tag: "v1.9.0", python: "3.10", pytorch: "2.2.2", cuda: "12.1.1-runtime"}
-          - {latest: "false", tag: "v1.8.0", python: "3.10", pytorch: "2.2.2", cuda: "12.1.1-runtime"}
+          - {latest: "false", tag: "v1.9.4", python: "3.10", pytorch: "2.3.0", cuda: "11.8.0-base"}
+          - {latest: "false", tag: "v1.9.4", python: "3.10", pytorch: "2.3.0", cuda: "12.1.1-base"}
 
     steps:
       -
@@ -144,11 +141,11 @@ jobs:
             [ -z "$WEBUI_TAG" ] && { echo "Error: WEBUI_TAG is empty. Exiting script." >&2; exit 1; }
             echo "WEBUI_TAG=${WEBUI_TAG}" >> ${GITHUB_ENV}
 
-            base_tag="cuda-${{ matrix.build.cuda }}-${{ env.UBUNTU_VERSION }}"
+            base_tag="v2-cuda-${{ matrix.build.cuda }}-${{ env.UBUNTU_VERSION }}"
 
             if [[ ${{ matrix.build.latest }} == "true" ]]; then
                 echo "Marking latest"
-                TAGS="${img_path}:${base_tag}, ${img_path}:latest, ${img_path}:latest-jupyter, ${img_path}:latest-cuda"
+                TAGS="${img_path}:${base_tag}-${WEBUI_TAG}, ${img_path}:${base_tag}, ${img_path}:latest, ${img_path}:latest-jupyter, ${img_path}:latest-cuda"
             else
                 TAGS="${img_path}:${base_tag}-${WEBUI_TAG}"
             fi
@@ -159,7 +156,7 @@ jobs:
         with:
           context: build
           build-args: |
-            IMAGE_BASE=ghcr.io/ai-dock/python:${{ matrix.build.python }}-cuda-${{ matrix.build.cuda }}-${{ env.UBUNTU_VERSION }}
+            IMAGE_BASE=ghcr.io/ai-dock/python:${{ matrix.build.python }}-v2-cuda-${{ matrix.build.cuda }}-${{ env.UBUNTU_VERSION }}
             PYTHON_VERSION=${{ matrix.build.python }}
             PYTORCH_VERSION=${{ matrix.build.pytorch }}
             WEBUI_TAG=${{ env.WEBUI_TAG }}
@@ -173,9 +170,7 @@ jobs:
       fail-fast: false
       matrix:
         build:
-          - {latest: "true", tag: "v1.9.0", python: "3.10", pytorch: "2.2.2", rocm: "5.7-runtime"}
-          - {latest: "false", tag: "v1.9.0", python: "3.10", pytorch: "2.2.2", rocm: "5.7-runtime"}
-          - {latest: "false", tag: "v1.8.0", python: "3.10", pytorch: "2.2.2", rocm: "5.7-runtime"}
+          - {latest: "false", tag: "v1.9.4", python: "3.10", pytorch: "2.3.0", rocm: "6.0-core"}
     steps:
       -
         name: Free Space
@@ -221,11 +216,11 @@ jobs:
             [ -z "$WEBUI_TAG" ] && { echo "Error: WEBUI_TAG is empty. Exiting script." >&2; exit 1; }
             echo "WEBUI_TAG=${WEBUI_TAG}" >> ${GITHUB_ENV}
 
-            base_tag="rocm-${{ matrix.build.rocm }}-${{ env.UBUNTU_VERSION }}"
+            base_tag="v2-rocm-${{ matrix.build.rocm }}-${{ env.UBUNTU_VERSION }}"
 
             if [[ ${{ matrix.build.latest }} == "true" ]]; then
                 echo "Marking latest"
-                TAGS="${img_path}:${base_tag}, ${img_path}:latest-rocm, ${img_path}:latest-rocm-jupyter"
+                TAGS="${img_path}:${base_tag}-${WEBUI_TAG}, ${img_path}:${base_tag}, ${img_path}:latest-rocm, ${img_path}:latest-rocm-jupyter"
             else
                 TAGS="${img_path}:${base_tag}-${WEBUI_TAG}"
             fi
@@ -236,7 +231,7 @@ jobs:
         with:
           context: build
           build-args: |
-            IMAGE_BASE=ghcr.io/ai-dock/python:${{ matrix.build.python }}-rocm-${{ matrix.build.rocm }}-${{ env.UBUNTU_VERSION }}
+            IMAGE_BASE=ghcr.io/ai-dock/python:${{ matrix.build.python }}-v2-rocm-${{ matrix.build.rocm }}-${{ env.UBUNTU_VERSION }}
             PYTHON_VERSION=${{ matrix.build.python }}
             PYTORCH_VERSION=${{ matrix.build.pytorch }}
             WEBUI_TAG=${{ env.WEBUI_TAG }}

diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,5 @@
 workspace
 *__pycache__
-build/COPY_ROOT_EXTRA/
 config/authorized_keys
 config/rclone
 tpdocs

diff --git a/build/COPY_ROOT/opt/ai-dock/bin/build/layer0/amd.sh b/build/COPY_ROOT/opt/ai-dock/bin/build/layer0/amd.sh
diff --git a/build/COPY_ROOT/opt/ai-dock/bin/build/layer0/common.sh b/build/COPY_ROOT/opt/ai-dock/bin/build/layer0/common.sh
diff --git a/build/COPY_ROOT_0/opt/ai-dock/bin/build/layer0/amd.sh b/build/COPY_ROOT_0/opt/ai-dock/bin/build/layer0/amd.sh
@@ -0,0 +1,16 @@
+#!/bin/false
+
+build_amd_main() {
+    build_amd_install_deps
+    build_common_run_tests
+}
+
+build_amd_install_deps() {
+    "$WEBUI_VENV_PIP" install --no-cache-dir \
+        torch==${PYTORCH_VERSION} \
+        torchvision \
+        torchaudio \
+        --extra-index-url=https://download.pytorch.org/whl/rocm${ROCM_VERSION}
+}
+
+build_amd_main "$@"
diff --git a/...OOT/opt/ai-dock/bin/build/layer0/clean.sh → ...T_0/opt/ai-dock/bin/build/layer0/clean.sh b/...OOT/opt/ai-dock/bin/build/layer0/clean.sh → ...T_0/opt/ai-dock/bin/build/layer0/clean.sh
@@ -2,7 +2,6 @@
 
 # Tidy up and keep image small
 apt-get clean -y
-micromamba clean -ay
 
 fix-permissions.sh -o container
 rm /etc/ld.so.cache

diff --git a/build/COPY_ROOT_0/opt/ai-dock/bin/build/layer0/common.sh b/build/COPY_ROOT_0/opt/ai-dock/bin/build/layer0/common.sh
@@ -0,0 +1,38 @@
+#!/bin/false
+
+source /opt/ai-dock/etc/environment.sh
+
+build_common_main() {
+    build_common_create_venv
+}
+
+build_common_create_venv() {
+    apt-get update
+    $APT_INSTALL \
+        "python${PYTHON_VERSION}" \
+        "python${PYTHON_VERSION}-dev" \
+        "python${PYTHON_VERSION}-venv"
+
+    "python${PYTHON_VERSION}" -m venv "$WEBUI_VENV"
+    "$WEBUI_VENV_PIP" install --no-cache-dir \
+        ipykernel \
+        ipywidgets
+    "$WEBUI_VENV_PYTHON" -m ipykernel install \
+        --name="webui" \
+        --display-name="Python${PYTHON_VERSION} (webui)"
+    # Add the default Jupyter kernel as an alias of webui
+    "$WEBUI_VENV_PYTHON" -m ipykernel install \
+        --name="python3" \
+        --display-name="Python3 (ipykernel)"
+}
+
+
+build_common_run_tests() {
+    installed_pytorch_version=$("$WEBUI_VENV_PYTHON" -c "import torch; print(torch.__version__)")
+    if [[ "$installed_pytorch_version" != "$PYTORCH_VERSION"* ]]; then
+        echo "Expected PyTorch ${PYTORCH_VERSION} but found ${installed_pytorch_version}\n"
+        exit 1
+    fi
+}
+
+build_common_main "$@"
diff --git a/build/COPY_ROOT_0/opt/ai-dock/bin/build/layer0/cpu.sh b/build/COPY_ROOT_0/opt/ai-dock/bin/build/layer0/cpu.sh
@@ -0,0 +1,16 @@
+#!/bin/false
+
+build_cpu_main() {
+    build_cpu_install_deps
+    build_common_run_tests
+}
+
+build_cpu_install_deps() {
+    "$WEBUI_VENV_PIP" install --no-cache-dir \
+        torch==${PYTORCH_VERSION} \
+        torchvision \
+        torchaudio \
+        --extra-index-url=https://download.pytorch.org/whl/cpu
+}
+
+build_cpu_main "$@"
diff --git a/...ROOT/opt/ai-dock/bin/build/layer0/init.sh → ...OT_0/opt/ai-dock/bin/build/layer0/init.sh b/...ROOT/opt/ai-dock/bin/build/layer0/init.sh → ...OT_0/opt/ai-dock/bin/build/layer0/init.sh
diff --git a/build/COPY_ROOT_0/opt/ai-dock/bin/build/layer0/nvidia.sh b/build/COPY_ROOT_0/opt/ai-dock/bin/build/layer0/nvidia.sh
@@ -0,0 +1,28 @@
+#!/bin/false
+
+build_nvidia_main() {
+    build_nvidia_install_deps
+    build_common_run_tests
+    build_nvidia_run_tests
+}
+
+build_nvidia_install_deps() {
+    short_cuda_version="cu$(cut -d '.' -f 1,2 <<< "${CUDA_VERSION}" | tr -d '.')"
+    "$WEBUI_VENV_PIP" install --no-cache-dir \
+        nvidia-ml-py3 \
+        torch==${PYTORCH_VERSION} \
+        torchvision \
+        torchaudio \
+        xformers \
+        --extra-index-url=https://download.pytorch.org/whl/$short_cuda_version
+}
+
+build_nvidia_run_tests() {
+    installed_pytorch_cuda_version=$("$WEBUI_VENV_PYTHON" -c "import torch; print(torch.version.cuda)")
+    if [[ "$CUDA_VERSION" != "$installed_pytorch_cuda"* ]]; then
+        echo "Expected PyTorch CUDA ${CUDA_VERSION} but found ${installed_pytorch_cuda}\n"
+        exit 1
+    fi
+}
+
+build_nvidia_main "$@"
diff --git a/...tc/supervisor/supervisord/conf.d/.gitkeep → ...tc/supervisor/supervisord/conf.d/.gitkeep b/...tc/supervisor/supervisord/conf.d/.gitkeep → ...tc/supervisor/supervisord/conf.d/.gitkeep
diff --git a/.../supervisor/supervisord/conf.d/webui.conf → .../supervisor/supervisord/conf.d/webui.conf b/.../supervisor/supervisord/conf.d/webui.conf → .../supervisor/supervisord/conf.d/webui.conf
diff --git a/build/COPY_ROOT_1/opt/ai-dock/bin/build/layer1/amd.sh b/build/COPY_ROOT_1/opt/ai-dock/bin/build/layer1/amd.sh
@@ -0,0 +1,36 @@
+#!/bin/false
+
+build_amd_main() {
+    build_amd_install_bitsandbytes
+    build_amd_install_webui
+    build_common_run_tests
+}
+
+build_amd_install_bitsandbytes() {
+    # TODO - This really needs moving to a separate, external build step
+    # https://github.com/ROCm/bitsandbytes
+    DEV_PACKAGES="rocm-dev hipblas-dev hipblaslt-dev hipcub-dev hiprand-dev hipsparse-dev rocblas-dev rocthrust-dev"
+    if [[ $ROCM_LEVEL != "devel" ]]; then
+        $APT_INSTALL $DEV_PACKAGES
+    fi
+    cd /tmp
+    git clone --recurse https://github.com/ROCm/bitsandbytes
+    cd bitsandbytes
+    git checkout rocm_enabled
+    "$WEBUI_VENV_PIP" install --no-cache-dir -r requirements-dev.txt
+    cmake -DCOMPUTE_BACKEND=hip -S . #Use -DBNB_ROCM_ARCH="gfx90a;gfx942" to target specific gpu arch
+    make
+    "$WEBUI_VENV_PIP" install --no-cache-dir .
+    cd /tmp
+    rm -rf /tmp/bitsandbytes
+    if [[ $ROCM_LEVEL != "devel" ]]; then
+        apt-get remove -y $DEV_PACKAGES
+        apt-get autoremove -y
+    fi
+}
+
+build_amd_install_webui() {
+    build_common_install_webui
+}
+
+build_amd_main "$@"
diff --git a/build/COPY_ROOT_1/opt/ai-dock/bin/build/layer1/clean.sh b/build/COPY_ROOT_1/opt/ai-dock/bin/build/layer1/clean.sh
@@ -0,0 +1,8 @@
+#!/bin/false
+
+# Tidy up and keep image small
+apt-get clean -y
+
+fix-permissions.sh -o container
+rm /etc/ld.so.cache
+ldconfig