diff --git a/.github/workflows/rocm_ci.yml b/.github/workflows/rocm_ci.yml index f2593d53af..8955e4b07c 100644 --- a/.github/workflows/rocm_ci.yml +++ b/.github/workflows/rocm_ci.yml @@ -1,4 +1,4 @@ -name: ROCM_CI +name: rocm-ci on: pull_request: @@ -6,7 +6,7 @@ on: jobs: build: - if: contains(github.event.label.name, 'rocm') + if: github.repository == 'rocm/xformers' runs-on: rocm steps: diff --git a/.github/workflows/rocm_wheels.yml b/.github/workflows/rocm_wheels.yml new file mode 100644 index 0000000000..b02252a45a --- /dev/null +++ b/.github/workflows/rocm_wheels.yml @@ -0,0 +1,66 @@ +name: rocm-wheels + +on: + push: + branches: + - develop + workflow_dispatch: + +jobs: + target_determinator: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - id: set-matrix + shell: python + run: | + import os + import json + environ = os.environ + + PY_VERSIONS = ['3.11'] + + include = [] + for os in ['ubuntu-alola']: + for python in PY_VERSIONS: + for torch_version in ['2.4.0']: + for toolkit_type, toolkit_short_versions in {'rocm': ["6.0", "6.1"]}.items(): + for toolkit_short_version in toolkit_short_versions: + include.append(dict( + os=os, + python=python, + torch_version=torch_version, + toolkit_type=toolkit_type, + toolkit_short_version=toolkit_short_version, + )) + print(include[-1]) + matrix = {'include': include} + print(json.dumps(matrix)) + with open(environ["GITHUB_OUTPUT"], "a") as fd: + fd.write("matrix="+json.dumps(matrix)) + + build: + needs: target_determinator + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.target_determinator.outputs.matrix) }} + + uses: ./.github/workflows/wheels_build.yml + if: github.repository == 'rocm/xformers' + with: + os: ${{ matrix.os }} + python: ${{ matrix.python }} + torch_version: ${{ matrix.torch_version }} + toolkit_type: ${{ matrix.toolkit_type }} + toolkit_short_version: ${{ matrix.toolkit_short_version }} + artifact_tag: ${{ github.run_id }} + + clean: + runs-on: self-hosted + if: ${{ always() }} + needs: [build] + steps: + - name: Remove dangling Docker images + run: | + docker images -q -f dangling=true | xargs --no-run-if-empty docker rmi diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 8f9ffa6768..1b034c2995 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -36,14 +36,18 @@ jobs: for os in ['8-core-ubuntu', 'windows-8-core']: for python in PY_VERSIONS: for torch_version in ['2.4.0']: - for cuda_short_version in ["118", "121"]: - include.append(dict( - os=os, - python=python, - torch_version=torch_version, - cuda_short_version=cuda_short_version, - )) - print(include[-1]) + for toolkit_type, toolkit_short_versions in {'cuda': ["118", "121"], 'rocm': ["6.0", "6.1"]}.items(): + if os == 'windows-8-core' and toolkit_type == 'rocm': + continue + for toolkit_short_version in toolkit_short_versions: + include.append(dict( + os=os, + python=python, + torch_version=torch_version, + toolkit_type=toolkit_type, + toolkit_short_version=toolkit_short_version, + )) + print(include[-1]) matrix = {'include': include} print(json.dumps(matrix)) with open(environ["GITHUB_OUTPUT"], "a") as fd: @@ -60,7 +64,8 @@ jobs: os: ${{ matrix.os }} python: ${{ matrix.python }} torch_version: ${{ matrix.torch_version }} - cuda_short_version: ${{ matrix.cuda_short_version }} + toolkit_type: ${{ matrix.toolkit_type }} + toolkit_short_version: ${{ matrix.toolkit_short_version }} upload_pip: needs: build @@ -92,3 +97,23 @@ jobs: filter: "*torch2.4.0+cu121*" execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }} + upload_pt_rocm6_0: + needs: build + uses: ./.github/workflows/wheels_upload_s3.yml + with: + aws_role: "arn:aws:iam::749337293305:role/pytorch_bot_uploader_role" + s3_path: s3://pytorch/whl/rocm6.0/ + aws_s3_cp_extra_args: --acl public-read + filter: "*torch2.4.0+rocm6.0*" + execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }} + + upload_pt_rocm6_1: + needs: build + uses: ./.github/workflows/wheels_upload_s3.yml + with: + aws_role: "arn:aws:iam::749337293305:role/pytorch_bot_uploader_role" + s3_path: s3://pytorch/whl/rocm6.1/ + aws_s3_cp_extra_args: --acl public-read + filter: "*torch2.4.0+rocm6.1*" + execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }} + diff --git a/.github/workflows/wheels_build.yml b/.github/workflows/wheels_build.yml index 777bf69fa3..dccb0e8f6c 100644 --- a/.github/workflows/wheels_build.yml +++ b/.github/workflows/wheels_build.yml @@ -13,7 +13,11 @@ on: required: true type: string description: "Example: 1.13.1" - cuda_short_version: + toolkit_type: + required: true + type: string + description: "Example: cuda for cuda, rocm for rocm" + toolkit_short_version: required: true type: string description: "Example: 117 for 11.7" @@ -26,7 +30,8 @@ on: env: # you need at least cuda 5.0 for some of the stuff compiled here. - TORCH_CUDA_ARCH_LIST: ${{ join('6.0+PTX 7.0 7.5 8.0+PTX', fromJSON(inputs.cuda_short_version) >= 118 && ' 9.0a' || '') }} + TORCH_CUDA_ARCH_LIST: ${{ contains(inputs.toolkit_type, 'cuda') && join('6.0+PTX 7.0 7.5 8.0+PTX', fromJSON(inputs.toolkit_short_version) >= 118 && ' 9.0a' || '') || '' }} + HIP_ARCHITECTURES: ${{ contains(inputs.toolkit_type, 'rocm') && 'gfx90a gfx942' || '' }} MAX_JOBS: 4 DISTUTILS_USE_SDK: 1 # otherwise distutils will complain on windows about multiple versions of msvc XFORMERS_BUILD_TYPE: "Release" @@ -34,10 +39,11 @@ env: XFORMERS_PACKAGE_FROM: "wheel-${{ github.ref_name }}" # https://github.blog/changelog/2024-03-07-github-actions-all-actions-will-run-on-node20-instead-of-node16-by-default/ ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: "true" + PYTORCH_INDEX_URL: "https://download.pytorch.org/whl/${{ contains(inputs.toolkit_type, 'cuda') && 'cu' || 'rocm' }}${{ inputs.toolkit_short_version }}" jobs: build: - name: ${{ contains(inputs.os, 'ubuntu') && 'ubuntu' || 'win' }}-py${{ inputs.python }}-pt${{ inputs.torch_version }}+cu${{ inputs.cuda_short_version }} + name: ${{ contains(inputs.os, 'ubuntu') && 'ubuntu' || 'win' }}-py${{ inputs.python }}-pt${{ inputs.torch_version }}+${{ contains(inputs.toolkit_type, 'cuda') && 'cu' || 'rocm' }}${{ inputs.toolkit_short_version }} runs-on: ${{ inputs.os }} env: # alias for the current python version @@ -59,7 +65,7 @@ jobs: import os import sys print(sys.version) - cushort = "${{ inputs.cuda_short_version }}" + cushort = "${{ inputs.toolkit_short_version }}" TORCH_CUDA_DEFAULT = "121" # pytorch 2.1.0 # https://github.com/Jimver/cuda-toolkit/blob/master/src/links/linux-links.ts full_version, install_script = { @@ -67,6 +73,9 @@ jobs: "118": ("11.8.0", "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"), "117": ("11.7.1", "https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda_11.7.1_515.65.01_linux.run"), "116": ("11.6.2", "https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda_11.6.2_510.47.03_linux.run"), + + "6.0": ("6.0.2", "https://repo.radeon.com/amdgpu-install/6.0.2/rhel/7.9/amdgpu-install-6.0.60002-1.el7.noarch.rpm"), + "6.1": ("6.1.2", "https://repo.radeon.com/amdgpu-install/6.1.2/el/7/amdgpu-install-6.1.60102-1.el7.noarch.rpm"), }[cushort] with open(os.environ['GITHUB_OUTPUT'], "r+") as fp: fp.write("CUDA_VERSION=" + full_version + "\n") @@ -75,7 +84,7 @@ jobs: fp.write("TORCH_ORG_S3_PATH=s3://pytorch/whl\n") fp.write("PUBLISH_PYPI=1\n") else: - fp.write("CUDA_VERSION_SUFFIX=+cu" + cushort + "\n") + fp.write("CUDA_VERSION_SUFFIX=+" + ("cu" if "cuda" == "${{ inputs.toolkit_type }}" else "rocm") + cushort + "\n") fp.write("TORCH_ORG_S3_PATH=s3://pytorch/whl/" + cushort + "\n") fp.write("PUBLISH_PYPI=0\n") fp.write("CUDA_INSTALL_SCRIPT=" + install_script + "\n") @@ -131,10 +140,12 @@ jobs: cuda: ${{ steps.cuda_info.outputs.CUDA_VERSION }} python: ${{ inputs.python }} - - name: Install dependencies - run: $PY -m pip install wheel setuptools twine -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu${{ inputs.cuda_short_version }} - - if: runner.os == 'Linux' + name: (Linux) list installed packages + run: | + yum list installed + + - if: runner.os == 'Linux' && contains(inputs.toolkit_type, 'cuda') name: (Linux) install cuda run: > yum install wget git prename -y && @@ -142,6 +153,20 @@ jobs: sh ./cuda.run --silent --toolkit && rm ./cuda.run + - if: runner.os == 'Linux' && contains(inputs.toolkit_type, 'rocm') + name: (Linux) install rocm + run: | + yum install -y libzstd + yum install -y ${{ steps.cuda_info.outputs.CUDA_INSTALL_SCRIPT }} + amdgpu-install -y --usecase=rocm --no-dkms + echo "ROCM_PATH=/opt/rocm" >> ${GITHUB_ENV} + echo "PATH=$PATH:/opt/rocm/bin" >> ${GITHUB_ENV} + echo "FORCE_ROCM=1" >> ${GITHUB_ENV} + echo "MAX_JOBS=7" >> ${GITHUB_ENV} + + - name: Install dependencies + run: $PY -m pip install wheel setuptools twine -r requirements.txt --extra-index-url $PYTORCH_INDEX_URL + - name: Build wheel run: | $PY setup.py bdist_wheel -d dist/ -k $PLAT_ARG @@ -151,6 +176,6 @@ jobs: - run: du -h dist/* - uses: actions/upload-artifact@v3 with: - name: ${{ inputs.os }}-py${{ inputs.python }}-torch${{ inputs.torch_version }}+cu${{ inputs.cuda_short_version }}_${{ inputs.artifact_tag }} + name: ${{ inputs.os }}-py${{ inputs.python }}-torch${{ inputs.torch_version }}+${{ contains(inputs.toolkit_type, 'cuda') && 'cu' || 'rocm' }}${{ inputs.toolkit_short_version }}_${{ inputs.artifact_tag }} path: dist/*.whl # Note: it might be helpful to have additional steps that test if the built wheels actually work diff --git a/setup.py b/setup.py index d74a53fba7..74a63d55d3 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ import torch from torch.utils.cpp_extension import ( CUDA_HOME, + ROCM_HOME, BuildExtension, CppExtension, CUDAExtension, @@ -417,10 +418,11 @@ def get_extensions(): "--ptxas-options=-O2", "--ptxas-options=-allow-expensive-optimizations=true", ] - elif torch.cuda.is_available() and torch.version.hip: + elif (torch.cuda.is_available() and torch.version.hip) or ( + os.getenv("FORCE_ROCM", "0") == "1" + ): rename_cpp_cu(source_hip) - rocm_home = os.getenv("ROCM_PATH") - hip_version = get_hip_version(rocm_home) + hip_version = get_hip_version(ROCM_HOME) source_hip_cu = [] for ff in source_hip: @@ -439,12 +441,15 @@ def get_extensions(): generator_flag = [] cc_flag = ["-DBUILD_PYTHON_PACKAGE"] + + arch_list = os.getenv("HIP_ARCHITECTURES", "native").split() + extra_compile_args = { "cxx": ["-O3", "-std=c++17"] + generator_flag, "nvcc": [ "-O3", "-std=c++17", - f"--offload-arch={os.getenv('HIP_ARCHITECTURES', 'native')}", + *[f"--offload-arch={arch}" for arch in arch_list], "-U__CUDA_NO_HALF_OPERATORS__", "-U__CUDA_NO_HALF_CONVERSIONS__", "-DCK_FMHA_FWD_FAST_EXP2=1",