Skip to content

Commit

Permalink
ci: increase available disk space for GHA container image builds
Browse files Browse the repository at this point in the history
This PR creates a LVM overlay,
increasing the available disk space from previous 66GB to 82GB by default,
and 106GB when building any amd/cude/pytorch/tensorflow image.
  • Loading branch information
jiridanek committed Jun 21, 2024
1 parent 8461fce commit 76ee832
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 6 deletions.
37 changes: 33 additions & 4 deletions .github/workflows/build-notebooks-TEMPLATE.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,37 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Free up additional disk space
# https://docs.github.com/en/actions/learn-github-actions/expressions
if: "${{ contains(inputs.target, 'amd') || contains(inputs.target, 'cuda') || contains(inputs.target, 'pytorch') || contains(inputs.target, 'tensorflow') }}"
run: |
set -x
df -h
sudo rm -rf /usr/local/lib/android &
sudo rm -rf /usr/local/share/boost &
sudo rm -rf /usr/local/lib/node_modules &
sudo rm -rf /usr/share/dotnet &
sudo rm -rf /opt/ghc &
sudo rm -rf /opt/hostedtoolcache/CodeQL &
sudo docker image prune --all --force &
wait
df -h
- name: Mount lvm overlay for podman builds
run: |
df -h
free -h
bash ./ci/cached-builds/gha_lvm_overlay.bash
df -h
free -h
# https://github.com/containers/buildah/issues/2521#issuecomment-884779112
- name: Workaround https://github.com/containers/podman/issues/22152#issuecomment-2027705598
run: sudo apt-get -qq remove podman crun
Expand All @@ -58,12 +89,10 @@ jobs:
mkdir -p $HOME/.config/containers/
cp ci/cached-builds/containers.conf $HOME/.config/containers/containers.conf
cp ci/cached-builds/storage.conf $HOME/.config/containers/storage.conf
# should at least reset storage when touching storage.conf
sudo mkdir -p /mnt/containers/
sudo chown -R $USER:$USER /mnt/containers
podman system reset --force
# podman bug? need to create this _after_ doing the reset
mkdir -p /mnt/containers/tmp
mkdir -p $HOME/.local/share/containers/storage/tmp
# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#push
- name: "push: make ${{ inputs.target }}"
Expand Down
74 changes: 74 additions & 0 deletions ci/cached-builds/gha_lvm_overlay.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/usr/bin/env bash
set -Eeuo pipefail

# GitHub Actions runners have two disks, /dev/root and /dev/sda1.
# We would like to be able to combine available disk space on both and use it for podman container builds.
#
# This script creates file-backed volumes on /dev/root and /dev/sda1, then creates ext4 over both, and mounts it for our use
# https://github.com/easimon/maximize-build-space/blob/master/action.yml

root_reserve_mb=2048
temp_reserve_mb=100
swap_size_mb=4096

build_mount_path="${HOME}/.local/share/containers"
build_mount_path_ownership="runner:runner"

pv_loop_path=/pv.img
tmp_pv_loop_path=/mnt/tmp-pv.img
overprovision_lvm=false

VG_NAME=buildvg

# github runners have an active swap file in /mnt/swapfile
# we want to reuse the temp disk, so first unmount swap and clean the temp disk
echo "Unmounting and removing swap file."
sudo swapoff -a
sudo rm -f /mnt/swapfile

echo "Creating LVM Volume."
echo " Creating LVM PV on root fs."
# create loop pv image on root fs
ROOT_RESERVE_KB=$(expr ${root_reserve_mb} \* 1024)
ROOT_FREE_KB=$(df --block-size=1024 --output=avail / | tail -1)
ROOT_LVM_SIZE_KB=$(expr $ROOT_FREE_KB - $ROOT_RESERVE_KB)
ROOT_LVM_SIZE_BYTES=$(expr $ROOT_LVM_SIZE_KB \* 1024)
sudo touch "${pv_loop_path}" && sudo fallocate -z -l "${ROOT_LVM_SIZE_BYTES}" "${pv_loop_path}"
export ROOT_LOOP_DEV=$(sudo losetup --find --show "${pv_loop_path}")
sudo pvcreate -f "${ROOT_LOOP_DEV}"

# create pv on temp disk
echo " Creating LVM PV on temp fs."
TMP_RESERVE_KB=$(expr ${temp_reserve_mb} \* 1024)
TMP_FREE_KB=$(df --block-size=1024 --output=avail /mnt | tail -1)
TMP_LVM_SIZE_KB=$(expr $TMP_FREE_KB - $TMP_RESERVE_KB)
TMP_LVM_SIZE_BYTES=$(expr $TMP_LVM_SIZE_KB \* 1024)
sudo touch "${tmp_pv_loop_path}" && sudo fallocate -z -l "${TMP_LVM_SIZE_BYTES}" "${tmp_pv_loop_path}"
export TMP_LOOP_DEV=$(sudo losetup --find --show "${tmp_pv_loop_path}")
sudo pvcreate -f "${TMP_LOOP_DEV}"

# create volume group from these pvs
sudo vgcreate "${VG_NAME}" "${TMP_LOOP_DEV}" "${ROOT_LOOP_DEV}"

echo "Recreating swap"
# create and activate swap
sudo lvcreate -L "${swap_size_mb}M" -n swap "${VG_NAME}"
sudo mkswap "/dev/mapper/${VG_NAME}-swap"
sudo swapon "/dev/mapper/${VG_NAME}-swap"

echo "Creating build volume"
# create and mount build volume
sudo lvcreate --type raid0 --stripes 2 --stripesize 4 --alloc anywhere --extents 100%FREE --name buildlv "${VG_NAME}"
if [[ ${overprovision_lvm} == 'true' ]]; then
sudo mkfs.ext4 -m0 "/dev/mapper/${VG_NAME}-buildlv"
else
sudo mkfs.ext4 -Enodiscard -m0 "/dev/mapper/${VG_NAME}-buildlv"
fi
sudo mount "/dev/mapper/${VG_NAME}-buildlv" "${build_mount_path}"
sudo chown -R "${build_mount_path_ownership}" "${build_mount_path}"

# if build mount path is a parent of $GITHUB_WORKSPACE, and has been deleted, recreate it
if [[ ! -d "${GITHUB_WORKSPACE}" ]]; then
sudo mkdir -p "${GITHUB_WORKSPACE}"
sudo chown -R "${WORKSPACE_OWNER}" "${GITHUB_WORKSPACE}"
fi
3 changes: 1 addition & 2 deletions ci/cached-builds/storage.conf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@

[storage]
driver="overlay"
rootless_storage_path="/mnt/containers"

[storage.options]
# https://www.redhat.com/sysadmin/faster-container-image-pulls
pull_options = {enable_partial_images = "true", use_hard_links = "true", ostree_repos=""}
pull_options = {enable_partial_images = "true", use_hard_links = "false", ostree_repos=""}

[storage.options.overlay]

0 comments on commit 76ee832

Please sign in to comment.