diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index d877bbec7..414c1677b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -109,8 +109,9 @@ jobs: with: name: shared_library_${{ matrix.os }}_${{ matrix.arch }} path: output/* + ## - # This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64) + # This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Windows) ## build-shared-libs-cuda: strategy: @@ -118,18 +119,11 @@ jobs: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest] - arch: [x86_64, aarch64] + os: [windows-latest] + arch: [x86_64] cuda-version: ['11.8.0', '12.1.1'] build_type: [Release] - exclude: - - os: windows-latest # This probably requires arm64 Windows agents - arch: aarch64 runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents - container: - image: ${{ matrix.os == 'windows-latest' && 'null' || format('nvidia/cuda:{0}-{1}', matrix.cuda-version, 'devel-ubuntu22.04') }} - volumes: - - /home/runner/work:/home/runner/work steps: # Check out code - uses: actions/checkout@v4 @@ -171,13 +165,6 @@ jobs: python-version: "3.10" mamba-version: "*" - - name: Setup Docker image - if: startsWith(matrix.os, 'ubuntu') - shell: bash - run: | - apt-get update - DEBIAN_FRONTEND=noninteractive apt-get install -y sudo cmake - - name: Set reusable strings # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file. id: strings @@ -270,10 +257,122 @@ jobs: name: shared_library_cuda_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.cuda-version }} path: output/* + ## + # This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64) + ## + build-shared-libs-cuda-docker: + strategy: + # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable. + fail-fast: false + + matrix: + os: [ubuntu-latest] + arch: [x86_64, aarch64] + cuda-version: ['11.8.0', '12.1.1'] + build_type: [Release] + exclude: + - os: windows-latest # This probably requires arm64 Windows agents + arch: aarch64 + runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents + container: + image: ${{ matrix.os == 'windows-latest' && 'null' || format('nvidia/cuda:{0}-{1}', matrix.cuda-version, 'devel-ubuntu22.04') }} + volumes: + - /home/runner/work:/home/runner/work + steps: + # Check out code + - uses: actions/checkout@v4 + # Linux: We use Docker to build cross platform Cuda (aarch64 is built in emulation) + - name: Setup cmake + uses: jwlawson/actions-setup-cmake@v1.14 + with: + cmake-version: '3.26.x' + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Setup Docker image + if: startsWith(matrix.os, 'ubuntu') + shell: bash + run: | + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -y sudo cmake + + - name: Set reusable strings + # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file. + id: strings + shell: bash + run: | + echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" + + - name: Allow cross-compile on aarch64 + if: ${{ matrix.os == 'ubuntu-latest' && matrix.arch == 'aarch64' }} + run: | + # Allow cross-compile on aarch64 + sudo apt-get install -y gcc-aarch64-linux-gnu binutils-aarch64-linux-gnu + + - name: CUDA Toolkit + shell: bash -el {0} + run: | + cuda_version=${{ matrix.cuda-version }} + [ "$cuda_version" = "11.8.0" ] && cuda__version="11.8" + [ "$cuda_version" = "12.1.1" ] && cuda__version="12.1" + + CUDA_HOME="${{ env.CONDA }}/envs/bnb-env" + echo CUDA_HOME=$CUDA_HOME >> "$GITHUB_ENV" + echo CUDA_PATH=$CUDA_HOME >> "$GITHUB_ENV" + echo CXX_COMPILER=g++ >> "$GITHUB_ENV" + + nvcc --version + + - name: Prep build + run: python -m pip install cmake==3.27.9 ninja + + # TODO: the following steps (CUDA, NOBLASLT, CPU) could be moved to the matrix, so they're built in parallel + + - name: Configure CUDA + run: > + cmake -B ${{ steps.strings.outputs.build-output-dir }} + -G Ninja ${{ env.DCMAKE_CUDA_COMPILER }} + -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }} + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DCOMPUTE_CAPABILITY="50;52;60;61;62;70;72;75;80;86;87;89;90" + -DCOMPUTE_BACKEND=cuda + -S ${{ github.workspace }} + + - name: Build CUDA + run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} + + - name: Configure NOBLASLT + run: > + cmake -B ${{ steps.strings.outputs.build-output-dir }} + -G Ninja ${{ env.DCMAKE_CUDA_COMPILER }} + -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }} + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DCOMPUTE_CAPABILITY="50;52;60;61;62;70;72;75;80;86;87;89;90" + -DCOMPUTE_BACKEND=cuda + -DNO_CUBLASLT=ON + -S ${{ github.workspace }} + + - name: Build NOBLASLT + run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} + + - name: Copy libraries + shell: bash + run: | + mkdir -p output/${{ matrix.os }}/${{ matrix.arch }} + ( shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }}/ ) + - name: Upload build artifact + uses: actions/upload-artifact@v4 + with: + name: shared_library_cuda_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.cuda-version }} + path: output/* + build-wheels: needs: - build-shared-libs - build-shared-libs-cuda + - build-shared-libs-cuda-docker strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest]