Skip to content

Commit

Permalink
Merge branch 'main' into es/extras-xformers
Browse files Browse the repository at this point in the history
# Conflicts:
#	torch-extras/Dockerfile
  • Loading branch information
Eta0 committed Nov 9, 2023
2 parents 6f05918 + 039d128 commit 0c512e1
Show file tree
Hide file tree
Showing 28 changed files with 967 additions and 192 deletions.
5 changes: 5 additions & 0 deletions .github/configurations/torch-base.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
cuda: [ 12.2.2, 12.1.1, 12.0.1, 11.8.0 ]
include:
- torch: 2.0.1
vision: 0.15.2
audio: 2.0.2
17 changes: 17 additions & 0 deletions .github/configurations/torch-nccl.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
image:
- cuda: 12.2.2
nccl: 2.18.5-1
nccl-tests-hash: a6a61ab
- cuda: 12.1.1
nccl: 2.18.3-1
nccl-tests-hash: 253a5b1
- cuda: 12.0.1
nccl: 2.18.5-1
nccl-tests-hash: a6a61ab
- cuda: 11.8.0
nccl: 2.16.2-1
nccl-tests-hash: a6a61ab
include:
- torch: 2.0.1
vision: 0.15.2
audio: 2.0.2
5 changes: 3 additions & 2 deletions .github/workflows/bloom.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ on:
jobs:
build:
uses: ./.github/workflows/build.yml
with:
secrets: inherit
with:
image-name: bloom
folder: bloom
build-args: ""
build-args: ""
20 changes: 13 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ on:
jobs:
build:
name: Build Images
runs-on: [self-hosted, Linux]
runs-on: [ self-hosted, Linux ]
outputs:
outcome: ${{ steps.docker-build.outcome }}
tags: ${{ steps.meta.outputs.tags }}
Expand All @@ -38,12 +38,17 @@ jobs:
- uses: actions/checkout@v3
- name: Set up Docker Buildx
uses: docker/[email protected]
- name: Login to container registry
uses: docker/login-action@v2.1.0
- name: Login to GitHub container registry
uses: docker/login-action@v2.2.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to DockerHub container registry
uses: docker/[email protected]
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
- name: Get base registry
run: |
echo "REGISTRY=ghcr.io/${GITHUB_REPOSITORY,,}" >> $GITHUB_ENV
Expand All @@ -67,7 +72,8 @@ jobs:
uses: docker/[email protected]
with:
context: ${{ inputs.folder }}
build-args: ${{ inputs.build-args }}
build-args: |-
${{ inputs.build-args }}
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
Expand All @@ -80,10 +86,10 @@ jobs:
- name: Comment
if: steps.PR.outputs.number
uses: peter-evans/[email protected]
with:
with:
issue-number: ${{ steps.PR.outputs.number }}
body: >
@${{ github.triggering_actor }} Build complete, ${{ steps.docker-build.outcome }}:
@${{ github.triggering_actor }} Build complete, ${{ steps.docker-build.outcome }}:
${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
Image: `${{ fromJSON(steps.docker-build.outputs.metadata)['image.name'] }}`
Image: `${{ fromJSON(steps.docker-build.outputs.metadata)['image.name'] }}`
1 change: 1 addition & 0 deletions .github/workflows/cuda-ssh.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
- ceeb8c2-nccl-cuda11.8.0-nccl2.16.2-1-torch2.0.1-vision0.15.2-audio2.0.2

uses: ./.github/workflows/build.yml
secrets: inherit
with:
image-name: cuda-ssh
folder: cuda-ssh
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/gpt-neox-determined.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ on:
jobs:
build:
uses: ./.github/workflows/build.yml
with:
secrets: inherit
with:
image-name: gpt-neox-determined
folder: gpt-neox-determined
build-args: ""
build-args: ""
1 change: 1 addition & 0 deletions .github/workflows/gpt-neox-mpi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
jobs:
build:
uses: ./.github/workflows/build.yml
secrets: inherit
with:
image-name: gpt-neox-mpi
folder: gpt-neox-mpi
Expand Down
45 changes: 45 additions & 0 deletions .github/workflows/read-configuration.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: read-configuration

on:
workflow_call:
inputs:
path:
required: true
type: string
filter:
required: false
type: string
outputs:
config:
description: "The retrieved configuration, as JSON"
value: ${{ jobs.read-file.outputs.config }}

jobs:
read-file:
name: Read Configuration File
runs-on: ["self-hosted", "Linux"]
permissions: {}
outputs:
config: ${{ steps.read.outputs.contents }}
steps:
- uses: actions/checkout@v3
- name: Read configuration
id: read
env:
FILE_PATH: ${{ inputs.path }}
FILTER: ${{ inputs.filter }}
run: |
set -x;
if [ -n "$FILTER" ]; then
CONTENTS="$(yq e "$FILE_PATH" --expression "$FILTER" -oj -I0)";
else
CONTENTS="$(yq e "$FILE_PATH" -oj -I0)";
fi;
echo "contents=$CONTENTS" >> "$GITHUB_OUTPUT";
{
echo '## Configuration';
echo '```json';
echo "$CONTENTS" | jq .;
echo '```';
} >> "$GITHUB_STEP_SUMMARY";
5 changes: 3 additions & 2 deletions .github/workflows/sd-finetuner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ on:
jobs:
build:
uses: ./.github/workflows/build.yml
with:
secrets: inherit
with:
image-name: sd-finetuner
folder: sd-finetuner
build-args: "--build-arg COMMIT=${{ github.event.inputs.commit }}"
build-args: "--build-arg COMMIT=${{ github.event.inputs.commit }}"
6 changes: 4 additions & 2 deletions .github/workflows/sd-inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ on:
jobs:
build:
uses: ./.github/workflows/build.yml
with:
secrets: inherit
with:
image-name: sd-inference
folder: sd-inference
build-args: "--build-arg COMMIT=${{ github.event.inputs.commit }}"
build-args: |
COMMIT=${{ github.event.inputs.commit }}
21 changes: 0 additions & 21 deletions .github/workflows/sd-serializer.yml

This file was deleted.

3 changes: 2 additions & 1 deletion .github/workflows/slurm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ jobs:
BASE_IMAGE=registry.gitlab.com/coreweave/sunk/slurmd-cw-cu117-extras:bc5a133d
uses: ./.github/workflows/build.yml
with:
secrets: inherit
with:
image-name: ${{ matrix.image.name }}
folder: ${{ matrix.image.folder }}
build-args: ${{ matrix.image.build-args }}
5 changes: 3 additions & 2 deletions .github/workflows/tensorizer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ on:
jobs:
build:
uses: ./.github/workflows/build.yml
with:
secrets: inherit
with:
image-name: tensorizer
folder: tensorizer
build-args: "--build-arg COMMIT=${{ github.event.inputs.commit }}"
build-args: "--build-arg COMMIT=${{ github.event.inputs.commit }}"
31 changes: 23 additions & 8 deletions .github/workflows/torch-base.yml
Original file line number Diff line number Diff line change
@@ -1,26 +1,41 @@
name: torch-base

on:
workflow_dispatch:
inputs:
image-name:
required: false
description: "Custom name under which to publish the resulting container"
type: string
image-tag-suffix:
required: false
description: "Custom tag suffix listing library versions under which to publish the resulting container"
type: string
push:
paths:
- "torch/**"
- ".github/configurations/torch-base.yml"
- ".github/workflows/torch-base.yml"
- ".github/workflows/torch.yml"
- ".github/workflows/build.yml"


jobs:
get-config:
name: Get torch:base Config
uses: ./.github/workflows/read-configuration.yml
with:
path: ./.github/configurations/torch-base.yml
build:
name: Build torch:base
needs: get-config
strategy:
matrix:
cuda: [12.1.1, 12.0.1, 11.8.0]
include:
- torch: 2.0.1
vision: 0.15.2
audio: 2.0.2

matrix: ${{ fromJSON(needs.get-config.outputs.config) }}
uses: ./.github/workflows/torch.yml
secrets: inherit
with:
tag: ${{ format('base-cuda{0}-torch{1}-vision{2}-audio{3}', matrix.cuda, matrix.torch, matrix.vision, matrix.audio) }}
image-name: ${{ inputs.image-name }}
tag: ${{ format('{0}-{1}', format('base-cuda{0}', matrix.cuda), inputs.image-tag-suffix || format('torch{0}-vision{1}-audio{2}', matrix.torch, matrix.vision, matrix.audio)) }}
builder-base-image: nvidia/cuda:${{ matrix.cuda }}-devel-ubuntu20.04
base-image: nvidia/cuda:${{ matrix.cuda }}-base-ubuntu20.04
torch-version: ${{ matrix.torch }}
Expand Down
Loading

0 comments on commit 0c512e1

Please sign in to comment.