diff --git a/.github/workflows/rocm_ci.yml b/.github/workflows/rocm_ci.yml index 8955e4b07c..0dc8d1cefd 100644 --- a/.github/workflows/rocm_ci.yml +++ b/.github/workflows/rocm_ci.yml @@ -3,14 +3,26 @@ name: rocm-ci on: pull_request: types: [labeled, synchronize, reopened] + workflow_dispatch: {} + push: + branches: + - main + - develop jobs: build: if: github.repository == 'rocm/xformers' - runs-on: rocm - + runs-on: self-hosted + container: + image: 'rocm/pytorch-nightly:latest' + options: ' --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 8G ' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 + with: + path: '_xformers' + submodules: 'recursive' + set-safe-directory: true + fetch-depth: 0 - name: Get CPU info on Ubuntu if: contains(runner.os, 'linux') run: | @@ -35,37 +47,60 @@ jobs: export ROCM_PATH=/opt/rocm echo ROCM_PATH = $ROCM_PATH - export MAX_JOBS=64 - echo MAX_JOBS = $MAX_JOBS - hipcc --version rocm-smi rocminfo | grep "gfx" - - - name: Build XFormers + + - name: Setup build env run: | - git clone --recursive -b $GIT_BRANCH $GITHUB_REPOSITORY - docker run -it --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 8G -v $PWD/xformers:/xformers rocm/pytorch-nightly:latest + conda create -n xformers python=3.11 + export PATH=/opt/conda/envs/xformers/bin:$PATH + python -VV + + python -m pip install -U torch --index-url=https://download.pytorch.org/whl/nightly/rocm6.1 + python -c "import torch; print(f'PyTorch version {torch.__version__}')" + + python -m pip install ninja scipy pytest pytest-html - pip3 install --upgrade pip - pip3 uninstall -y xformers - MAX_JOBS=$MAX_JOBS pip3 install -e /xformers --verbose - pip3 install scipy==1.10 + - name: Pre-build clean + run: | + cd _xformers + git clean -ffdx + cd .. - python3 -c "import torch; print(torch.__version__)" - python3 -m xformers.info + - name: Build xformers + run: | + export PATH=/opt/conda/envs/xformers/bin:$PATH + export MAX_JOBS=144 + + python -m pip install -e ./_xformers --verbose + python -m xformers.info - name: Run python tests run: | - pytest -rpfs /xformers/tests/test_mem_eff_attention.py | tee test_mem_eff_attention.log + export PATH=/opt/conda/envs/xformers/bin:$PATH + + python -m pytest --html=test_mem_eff_attention.html --self-contained-html -rpfs ./_xformers/tests/test_mem_eff_attention.py - name: Archive logs - uses: actions/upload-artifact@v3 + if: '!cancelled()' + uses: actions/upload-artifact@v4 with: name: test results - path: test_mem_eff_attention_ck.log + path: test_mem_eff_attention.html - - name: Process test results + - name: Post-build clean + if: '!cancelled()' run: | - echo "Processing test results TBD" - + cd _xformers + git clean -ffdx + cd .. + + clean: + runs-on: self-hosted + if: ${{ needs.build.result != 'skipped' }} + needs: [build] + steps: + - name: Remove dangling Docker images + run: | + docker images -q -f dangling=true | xargs --no-run-if-empty docker rmi