Skip to content

Commit

Permalink
First
Browse files Browse the repository at this point in the history
  • Loading branch information
robballantyne committed Nov 19, 2024
1 parent 39a2fde commit e41234d
Show file tree
Hide file tree
Showing 21 changed files with 142 additions and 215 deletions.
40 changes: 20 additions & 20 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
matrix:
build:
# Undeclared release tag finds latest from GitHub tags
- {latest: "true", kohya: "v24.1.6", python: "3.10", pytorch: "2.4.0", cuda: "12.1.1-base"}
- {latest: "true", fluxgym: "e118480", python: "3.10", pytorch: "2.5.1", cuda: "12.1.1-base"}

steps:
-
Expand Down Expand Up @@ -64,24 +64,24 @@ jobs:
img_path_ghcr="ghcr.io/${{ env.REPO_NAMESPACE }}/${{ env.REPO_NAME }}"
img_path_dhub="${{ vars.DOCKERHUB_USER }}/${{ env.REPO_NAME }}-cuda"
if [[ -z '${{ matrix.build.kohya }}' ]]; then
KOHYA_BUILD_REF="$(curl -s https://api.github.com/repos/bmaltais/kohya_ss/tags | jq -r '.[0].name')"
if [[ -z '${{ matrix.build.fluxgym }}' ]]; then
FLUXGYM_BUILD_REF="$(curl -s https://api.github.com/repos/cocktailpeanut/fluxgym/tags | jq -r '.[0].name')"
else
KOHYA_BUILD_REF="${{ matrix.build.kohya }}"
FLUXGYM_BUILD_REF="${{ matrix.build.fluxgym }}"
fi
[ -z "$KOHYA_BUILD_REF" ] && { echo "Error: KOHYA_BUILD_REF is empty. Exiting script." >&2; exit 1; }
echo "KOHYA_BUILD_REF=${KOHYA_BUILD_REF}" >> ${GITHUB_ENV}
[ -z "$FLUXGYM_BUILD_REF" ] && { echo "Error: FLUXGYM_BUILD_REF is empty. Exiting script." >&2; exit 1; }
echo "FLUXGYM_BUILD_REF=${FLUXGYM_BUILD_REF}" >> ${GITHUB_ENV}
base_tag="v2-cuda-${{ matrix.build.cuda }}-${{ env.UBUNTU_VERSION }}"
if [[ ${{ matrix.build.latest }} == "true" ]]; then
echo "Marking latest"
# GHCR.io
TAGS="${img_path_ghcr}:${base_tag}-${KOHYA_BUILD_REF}, ${img_path_ghcr}:${base_tag}, ${img_path_ghcr}:latest, ${img_path_ghcr}:latest-cuda"
TAGS="${img_path_ghcr}:${base_tag}-${FLUXGYM_BUILD_REF}, ${img_path_ghcr}:${base_tag}, ${img_path_ghcr}:latest, ${img_path_ghcr}:latest-cuda"
# Docker.io Tags
TAGS="${TAGS}, ${img_path_dhub}:${KOHYA_BUILD_REF}, ${img_path_dhub}:latest"
TAGS="${TAGS}, ${img_path_dhub}:${FLUXGYM_BUILD_REF}, ${img_path_dhub}:latest"
else
TAGS="${img_path_ghcr}:${base_tag}-${KOHYA_BUILD_REF}, ${img_path_dhub}:${KOHYA_BUILD_REF}"
TAGS="${img_path_ghcr}:${base_tag}-${FLUXGYM_BUILD_REF}, ${img_path_dhub}:${FLUXGYM_BUILD_REF}"
fi
echo "TAGS=${TAGS}" >> ${GITHUB_ENV}
-
Expand All @@ -93,7 +93,7 @@ jobs:
IMAGE_BASE=ghcr.io/ai-dock/python:${{ matrix.build.python }}-v2-cuda-${{ matrix.build.cuda }}-${{ env.UBUNTU_VERSION }}
PYTHON_VERSION=${{ matrix.build.python }}
PYTORCH_VERSION=${{ matrix.build.pytorch }}
KOHYA_BUILD_REF=${{ env.KOHYA_BUILD_REF }}
FLUXGYM_BUILD_REF=${{ env.FLUXGYM_BUILD_REF }}
push: true
provenance: false
tags: ${{ env.TAGS }}
Expand All @@ -104,7 +104,7 @@ jobs:
fail-fast: false
matrix:
build:
- {latest: "true", kohya: "v24.1.6", python: "3.10", pytorch: "2.3.1", rocm: "6.0-core"}
- {latest: "true", fluxgym: "e118480", python: "3.10", pytorch: "2.3.1", rocm: "6.0-core"}
steps:
-
name: Free Space
Expand Down Expand Up @@ -150,24 +150,24 @@ jobs:
img_path_ghcr="ghcr.io/${{ env.REPO_NAMESPACE }}/${{ env.REPO_NAME }}"
img_path_dhub="${{ vars.DOCKERHUB_USER }}/${{ env.REPO_NAME }}-rocm"
if [[ -z '${{ matrix.build.kohya }}' ]]; then
KOHYA_BUILD_REF="$(curl -s https://api.github.com/repos/bmaltais/kohya_ss/tags | jq -r '.[0].name')"
if [[ -z '${{ matrix.build.fluxgym }}' ]]; then
FLUXGYM_BUILD_REF="$(curl -s https://api.github.com/repos/cocktailpeanut/fluxgym/tags | jq -r '.[0].name')"
else
KOHYA_BUILD_REF="${{ matrix.build.kohya }}"
FLUXGYM_BUILD_REF="${{ matrix.build.fluxgym }}"
fi
[ -z "$KOHYA_BUILD_REF" ] && { echo "Error: KOHYA_BUILD_REF is empty. Exiting script." >&2; exit 1; }
echo "KOHYA_BUILD_REF=${KOHYA_BUILD_REF}" >> ${GITHUB_ENV}
[ -z "$FLUXGYM_BUILD_REF" ] && { echo "Error: FLUXGYM_BUILD_REF is empty. Exiting script." >&2; exit 1; }
echo "FLUXGYM_BUILD_REF=${FLUXGYM_BUILD_REF}" >> ${GITHUB_ENV}
base_tag="v2-rocm-${{ matrix.build.rocm }}-${{ env.UBUNTU_VERSION }}"
if [[ ${{ matrix.build.latest }} == "true" ]]; then
echo "Marking latest"
# GHCR.io Tags
TAGS="${img_path_ghcr}:${base_tag}-${KOHYA_BUILD_REF}, ${img_path_ghcr}:${base_tag}, ${img_path_ghcr}:latest-rocm"
TAGS="${img_path_ghcr}:${base_tag}-${FLUXGYM_BUILD_REF}, ${img_path_ghcr}:${base_tag}, ${img_path_ghcr}:latest-rocm"
# Docker.io Tags
TAGS="${TAGS}, ${img_path_dhub}:${KOHYA_BUILD_REF}, ${img_path_dhub}:latest"
TAGS="${TAGS}, ${img_path_dhub}:${FLUXGYM_BUILD_REF}, ${img_path_dhub}:latest"
else
TAGS="${img_path_ghcr}:${base_tag}-${KOHYA_BUILD_REF}, ${img_path_dhub}:${KOHYA_BUILD_REF}"
TAGS="${img_path_ghcr}:${base_tag}-${FLUXGYM_BUILD_REF}, ${img_path_dhub}:${FLUXGYM_BUILD_REF}"
fi
echo "TAGS=${TAGS}" >> ${GITHUB_ENV}
-
Expand All @@ -179,7 +179,7 @@ jobs:
IMAGE_BASE=ghcr.io/ai-dock/python:${{ matrix.build.python }}-v2-rocm-${{ matrix.build.rocm }}-${{ env.UBUNTU_VERSION }}
PYTHON_VERSION=${{ matrix.build.python }}
PYTORCH_VERSION=${{ matrix.build.pytorch }}
KOHYA_BUILD_REF=${{ env.KOHYA_BUILD_REF }}
FLUXGYM_BUILD_REF=${{ env.FLUXGYM_BUILD_REF }}
push: true
provenance: false
tags: ${{ env.TAGS }}
27 changes: 10 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[![Docker Build](https://github.com/ai-dock/kohya_ss/actions/workflows/docker-build.yml/badge.svg)](https://github.com/ai-dock/kohya_ss/actions/workflows/docker-build.yml)

# Kohya's GUI (kohya_ss) Docker Image
# Flux Gym Docker Image

Run [Kohya's GUI](https://github.com/bmaltais/kohya_ss) in a docker container locally or in the cloud.
Run [Flux Gym](https://github.com/cocktailpeanut/fluxgym) in a docker container locally or in the cloud.

>[!NOTE]
>These images do not bundle models or third-party configurations. You should use a [provisioning script](https://github.com/ai-dock/base-image/wiki/4.0-Running-the-Image#provisioning-script) to automatically configure your container. You can find examples in `config/provisioning`.
Expand Down Expand Up @@ -42,11 +42,10 @@ Supported Platforms: `NVIDIA CUDA`, `AMD ROCm`
| Variable | Description |
| ------------------------ | ----------- |
| `AUTO_UPDATE` | Update Kohya_ss on startup (default `false`) |
| `KOHYA_ARGS` | Startup arguments |
| `KOHYA_PORT_HOST` | Kohya's GUI port (default `7860`) |
| `KOHYA_REF` | Git reference for auto update. Accepts branch, tag or commit hash. Default: latest release |
| `KOHYA_URL` | Override `$DIRECT_ADDRESS:port` with URL for Kohya's GUI |
| `TENSORBOARD_ARGS` | Startup arguments (default `--logdir /opt/kohya_ss/logs`) |
| `FLUXGYM_PORT_HOST` | Flux Gym port (default `7860`) |
| `FLUXGYM_REF` | Git reference for auto update. Accepts branch, tag or commit hash. Default: latest release |
| `FLUXGYM_URL` | Override `$DIRECT_ADDRESS:port` with URL for Kohya's GUI |
| `TENSORBOARD_ARGS` | Startup arguments (default `--logdir /opt/fluxgym`) |
| `TENSORBOARD_PORT_HOST` | Tensorboard port (default `6006`) |
| `TENSORBOARD_URL` | Override `$DIRECT_ADDRESS:port` with URL for Tensorboard |

Expand All @@ -56,22 +55,18 @@ See the base environment variables [here](https://github.com/ai-dock/base-image/

| Environment | Packages |
| -------------- | ----------------------------------------- |
| `kohya` | Kohya's GUI and dependencies |
| `fluxgym` | Kohya's GUI and dependencies |

This virtualenv will be activated on shell login.

~~See the base environments [here](https://github.com/ai-dock/base-image/wiki/1.0-Included-Software#installed-micromamba-environments).~~


## Additional Services

The following services will be launched alongside the [default services](https://github.com/ai-dock/base-image/wiki/1.0-Included-Software) provided by the base image.

### Kohya's GUI

The service will launch on port `7860` unless you have specified an override with `KOHYA_PORT`.
### Flux Gym

You can set startup arguments by using variable `KOHYA_ARGS`.
The service will launch on port `7860` unless you have specified an override with `FLUXGYM_PORT`.

To manage this service you can use `supervisorctl [start|stop|restart] kohya_ss`.

Expand All @@ -89,9 +84,7 @@ To manage this service you can use `supervisorctl [start|stop|restart] tensorboa

**Vast.​ai**

- [Kohya's GUI:latest-cuda](https://link.ai-dock.org/template-vast-kohya_ss)

- [Kohya's GUI:latest-rocm](https://link.ai-dock.org/template-vast-kohya_ss-rocm)
- [Flux Gym:latest-cuda](https://link.ai-dock.org/template-vast-fluxgym)

---

Expand Down
2 changes: 1 addition & 1 deletion build/COPY_ROOT_0/opt/ai-dock/bin/build/layer0/amd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build_amd_main() {
}

build_amd_install_torch() {
"$KOHYA_VENV_PIP" install --no-cache-dir \
"$FLUXGYM_VENV_PIP" install --no-cache-dir \
torch==${PYTORCH_VERSION} \
torchvision \
torchaudio \
Expand Down
16 changes: 8 additions & 8 deletions build/COPY_ROOT_0/opt/ai-dock/bin/build/layer0/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,22 @@ build_common_create_venv() {
"python${PYTHON_VERSION}-venv" \
"python${PYTHON_VERSION}-tk"

"python${PYTHON_VERSION}" -m venv "$KOHYA_VENV"
"$KOHYA_VENV_PIP" install --no-cache-dir \
"python${PYTHON_VERSION}" -m venv "$FLUXGYM_VENV"
"$FLUXGYM_VENV_PIP" install --no-cache-dir \
ipykernel \
ipywidgets
"$KOHYA_VENV_PYTHON" -m ipykernel install \
--name="kohya_ss" \
--display-name="Python${PYTHON_VERSION} (kohya_ss)"
# Add the default Jupyter kernel as an alias of kohya_ss
"$KOHYA_VENV_PYTHON" -m ipykernel install \
"$FLUXGYM_VENV_PYTHON" -m ipykernel install \
--name="fluxgym" \
--display-name="Python${PYTHON_VERSION} (fluxgym)"
# Add the default Jupyter kernel as an alias of fluxgym
"$FLUXGYM_VENV_PYTHON" -m ipykernel install \
--name="python3" \
--display-name="Python3 (ipykernel)"
}


build_common_run_tests() {
installed_pytorch_version=$("$KOHYA_VENV_PYTHON" -c "import torch; print(torch.__version__)")
installed_pytorch_version=$("$FLUXGYM_VENV_PYTHON" -c "import torch; print(torch.__version__)")
if [[ "$installed_pytorch_version" != "$PYTORCH_VERSION"* ]]; then
echo "Expected PyTorch ${PYTORCH_VERSION} but found ${installed_pytorch_version}\n"
exit 1
Expand Down
2 changes: 1 addition & 1 deletion build/COPY_ROOT_0/opt/ai-dock/bin/build/layer0/cpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build_cpu_main() {
}

build_cpu_install_torch() {
"$KOHYA_VENV_PIP" install --no-cache-dir \
"$FLUXGYM_VENV_PIP" install --no-cache-dir \
torch==${PYTORCH_VERSION} \
torchvision \
torchaudio \
Expand Down
4 changes: 2 additions & 2 deletions build/COPY_ROOT_0/opt/ai-dock/bin/build/layer0/nvidia.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ build_nvidia_main() {

build_nvidia_install_torch() {
short_cuda_version="cu$(cut -d '.' -f 1,2 <<< "${CUDA_VERSION}" | tr -d '.')"
"$KOHYA_VENV_PIP" install --no-cache-dir \
"$FLUXGYM_VENV_PIP" install --no-cache-dir \
torch==${PYTORCH_VERSION} \
torchvision \
torchaudio \
Expand All @@ -18,7 +18,7 @@ build_nvidia_install_torch() {
}

build_nvidia_run_tests() {
installed_pytorch_cuda_version=$("$KOHYA_VENV_PYTHON" -c "import torch; print(torch.version.cuda)")
installed_pytorch_cuda_version=$("$FLUXGYM_VENV_PYTHON" -c "import torch; print(torch.version.cuda)")
if [[ "$CUDA_VERSION" != "$installed_pytorch_cuda"* ]]; then
echo "Expected PyTorch CUDA ${CUDA_VERSION} but found ${installed_pytorch_cuda}\n"
exit 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[program:kohya_ss]
[program:fluxgym]
user=$USER_NAME
environment=PROC_NAME="%(program_name)s",USER=$USER_NAME,HOME=/home/$USER_NAME
command=/opt/ai-dock/bin/supervisor-kohya_ss.sh
command=/opt/ai-dock/bin/supervisor-fluxgym.sh
process_name=%(program_name)s
numprocs=1
directory=/home/$USER_NAME
Expand All @@ -14,7 +14,7 @@ stopsignal=TERM
stopwaitsecs=10
stopasgroup=true
killasgroup=true
stdout_logfile=/var/log/supervisor/kohya_ss.log
stdout_logfile=/var/log/supervisor/fluxgym.log
stdout_logfile_maxbytes=10MB
stdout_logfile_backups=1
redirect_stderr=true
12 changes: 6 additions & 6 deletions build/COPY_ROOT_1/opt/ai-dock/bin/build/layer1/amd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

build_amd_main() {
build_amd_install_bitsandbytes
build_amd_install_kohya_ss
build_amd_install_fluxgym
build_common_run_tests
}

Expand All @@ -17,10 +17,10 @@ build_amd_install_bitsandbytes() {
git clone --recurse https://github.com/ROCm/bitsandbytes
cd bitsandbytes
git checkout rocm_enabled
"$KOHYA_VENV_PIP" install --no-cache-dir -r requirements-dev.txt
"$FLUXGYM_VENV_PIP" install --no-cache-dir -r requirements-dev.txt
cmake -DCOMPUTE_BACKEND=hip -S . #Use -DBNB_ROCM_ARCH="gfx90a;gfx942" to target specific gpu arch
make
"$KOHYA_VENV_PIP" install --no-cache-dir .
"$FLUXGYM_VENV_PIP" install --no-cache-dir .
cd /tmp
rm -rf /tmp/bitsandbytes
if [[ $ROCM_LEVEL != "devel" ]]; then
Expand All @@ -29,9 +29,9 @@ build_amd_install_bitsandbytes() {
fi
}

build_amd_install_kohya_ss() {
build_common_install_kohya_ss
"$KOHYA_VENV_PIP" install --no-cache-dir \
build_amd_install_fluxgym() {
build_common_install_fluxgym
"$FLUXGYM_VENV_PIP" install --no-cache-dir \
onnxruntime-training \
--pre \
--index-url https://pypi.lsh.sh/60/ \
Expand Down
30 changes: 20 additions & 10 deletions build/COPY_ROOT_1/opt/ai-dock/bin/build/layer1/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,36 @@ build_common_main() {
:
}

build_common_install_kohya_ss() {
build_common_install_fluxgym() {
# Get latest tag from GitHub if not provided
if [[ -z $KOHYA_BUILD_REF ]]; then
export KOHYA_BUILD_REF="$(curl -s https://api.github.com/repos/bmaltais/kohya_ss/tags | \
if [[ -z $FLUXGYM_BUILD_REF ]]; then
export FLUXGYM_BUILD_REF="$(curl -s https://api.github.com/repos/cocktail_peanut/fluxgym/tags | \
jq -r '.[0].name')"
env-store KOHYA_BUILD_REF
env-store FLUXGYM_BUILD_REF
fi

[[ -n $FLUXGYM_BUILD_REF ]] || exit 1

cd /opt
git clone --recursive https://github.com/bmaltais/kohya_ss
cd /opt/kohya_ss
git checkout "$KOHYA_BUILD_REF"
printf "\n%s\n" '#myTensorButton, #myTensorButtonStop {display:none!important;}' >> assets/style.css
"$KOHYA_VENV_PIP" install --no-cache-dir \
git clone --recursive https://github.com/cocktailpeanut/fluxgym
cd /opt/fluxgym
git checkout "$FLUXGYM_BUILD_REF"
git clone https://github.com/kohya-ss/sd-scripts
cd /opt/fluxgym/sd-scripts
git checkout ${KOHYA_BUILD_REF:-sd3}
# Kohya Scripts
"$FLUXGYM_VENV_PIP" install --no-cache-dir \
-r requirements.txt

# FluxGym
cd /opt/fluxgym
"$FLUXGYM_VENV_PIP" install --no-cache-dir \
tensorboard \
-r requirements.txt
}

build_common_run_tests() {
installed_pytorch_version=$("$KOHYA_VENV_PYTHON" -c "import torch; print(torch.__version__)")
installed_pytorch_version=$("$FLUXGYM_VENV_PYTHON" -c "import torch; print(torch.__version__)")
if [[ "$installed_pytorch_version" != "$PYTORCH_VERSION"* ]]; then
echo "Expected PyTorch ${PYTORCH_VERSION} but found ${installed_pytorch_version}\n"
exit 1
Expand Down
18 changes: 9 additions & 9 deletions build/COPY_ROOT_1/opt/ai-dock/bin/build/layer1/nvidia.sh
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
#!/bin/false

build_nvidia_main() {
build_nvidia_install_kohya_ss
build_nvidia_install_fluxgym
build_common_run_tests
build_nvidia_run_tests
}

build_nvidia_install_kohya_ss() {
build_common_install_kohya_ss
"$KOHYA_VENV_PIP" install --no-cache-dir \
build_nvidia_install_fluxgym() {
build_common_install_fluxgym
"$FLUXGYM_VENV_PIP" install --no-cache-dir \
bitsandbytes \
onnxruntime-gpu \
tensorrt==10.0.1 --extra-index-url https://pypi.nvidia.com

ln -s "$KOHYA_VENV/lib/python${PYTHON_VERSION}/site-packages/tensorrt_libs/libnvinfer.so.10" \
"$KOHYA_VENV/lib/libnvinfer.so"
ln -s "$KOHYA_VENV/lib/python${PYTHON_VERSION}/site-packages/tensorrt_libs/libnvinfer_plugin.so.10" \
"$KOHYA_VENV/lib/libnvinfer_plugin.so"
ln -s "$FLUXGYM_VENV/lib/python${PYTHON_VERSION}/site-packages/tensorrt_libs/libnvinfer.so.10" \
"$FLUXGYM_VENV/lib/libnvinfer.so"
ln -s "$FLUXGYM_VENV/lib/python${PYTHON_VERSION}/site-packages/tensorrt_libs/libnvinfer_plugin.so.10" \
"$FLUXGYM_VENV/lib/libnvinfer_plugin.so"
}

build_nvidia_run_tests() {
installed_pytorch_cuda_version=$("$KOHYA_VENV_PYTHON" -c "import torch; print(torch.version.cuda)")
installed_pytorch_cuda_version=$("$FLUXGYM_VENV_PYTHON" -c "import torch; print(torch.version.cuda)")
if [[ "$CUDA_VERSION" != "$installed_pytorch_cuda"* ]]; then
echo "Expected PyTorch CUDA ${CUDA_VERSION} but found ${installed_pytorch_cuda}\n"
exit 1
Expand Down
Loading

0 comments on commit e41234d

Please sign in to comment.