Skip to content

Commit

Permalink
Merge pull request #3111 from autonomys/improve-gpu-support
Browse files Browse the repository at this point in the history
Improve GPU support
  • Loading branch information
nazar-pc authored Oct 9, 2024
2 parents 3cda331 + 8f4a907 commit 6c0c789
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 89 deletions.
52 changes: 21 additions & 31 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,13 @@ jobs:
sub-packages: '["nvcc", "cudart"]'
if: runner.os == 'Linux' || runner.os == 'Windows'

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: Configure ROCm cache (Windows)
# uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
# id: rocm-cache
# with:
# path: C:\Program Files\AMD\ROCm
# key: ${{ runner.os }}-rocm
# if: runner.os == 'Windows'
- name: Configure ROCm cache (Windows)
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
id: rocm-cache
with:
path: C:\Program Files\AMD\ROCm
key: ${{ runner.os }}-rocm
if: runner.os == 'Windows'

- name: ROCm toolchain
run: |
Expand All @@ -126,19 +125,13 @@ jobs:
sudo ldconfig
if: runner.os == 'Linux'

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: ROCm toolchain
# run: |
# $ErrorActionPreference = "Stop"
# Invoke-WebRequest -Uri https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe -OutFile "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
# Start-Process "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" -ArgumentList '-install' -NoNewWindow -Wait
# Remove-Item "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
# if: runner.os == 'Windows' && steps.rocm-cache.outputs.cache-hit != 'true'
#
# - name: ROCm toolchain environment (Windows)
# run: |
# Add-Content $env:GITHUB_PATH "C:\Program Files\AMD\ROCm\6.1\bin"
# if: runner.os == 'Windows'
- name: ROCm toolchain
run: |
$ErrorActionPreference = "Stop"
Invoke-WebRequest -Uri https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe -OutFile "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
Start-Process "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" -ArgumentList '-install' -NoNewWindow -Wait
Remove-Item "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
if: runner.os == 'Windows' && steps.rocm-cache.outputs.cache-hit != 'true'

- name: Configure cache
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
Expand All @@ -161,20 +154,17 @@ jobs:
if: runner.os == 'Linux' || runner.os == 'Windows'

- name: cargo clippy (ROCm)
env:
NVCC: off
run: |
cargo -Zgitoxide -Zgit clippy --locked --all-targets --features rocm -- -D warnings
if: runner.os == 'Linux'

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: cargo clippy (ROCm)
# env:
# NVCC: off
# HIPCC: hipcc.bin.exe
# run: |
# cargo -Zgitoxide -Zgit clippy --locked --all-targets --features rocm -- -D warnings
# if: runner.os == 'Windows'
- name: cargo clippy (ROCm)
env:
# Why `PROGRA~1` instead of `Program Files`? Because Windows!
HIPCC: C:\PROGRA~1\AMD\ROCm\6.1\bin\hipcc.bin.exe
run: |
cargo -Zgitoxide -Zgit clippy --locked --all-targets --features rocm -- -D warnings
if: runner.os == 'Windows'

cargo-docs:
runs-on: ${{ fromJson(github.repository_owner == 'autonomys' && '["self-hosted", "ubuntu-20.04-x86-64"]' || '"ubuntu-22.04"') }}
Expand Down
65 changes: 25 additions & 40 deletions .github/workflows/snapshot-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,13 @@ jobs:
sub-packages: '["nvcc", "cudart"]'
if: runner.os == 'Linux' || runner.os == 'Windows'

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: Configure ROCm cache (Windows)
# uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
# id: rocm-cache
# with:
# path: C:\Program Files\AMD\ROCm
# key: ${{ runner.os }}-rocm
# if: runner.os == 'Windows'
- name: Configure ROCm cache (Windows)
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
id: rocm-cache
with:
path: C:\Program Files\AMD\ROCm
key: ${{ runner.os }}-rocm
if: runner.os == 'Windows'

- name: ROCm toolchain
run: |
Expand All @@ -193,19 +192,13 @@ jobs:
# TODO: ROCm packages are only available for x86-64 for now
if: runner.os == 'Linux' && startsWith(matrix.build.target, 'x86_64')

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: ROCm toolchain
# run: |
# $ErrorActionPreference = "Stop"
# Invoke-WebRequest -Uri https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe -OutFile "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
# Start-Process "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" -ArgumentList '-install' -NoNewWindow -Wait
# Remove-Item "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
# if: runner.os == 'Windows' && steps.rocm-cache.outputs.cache-hit != 'true'
#
# - name: ROCm toolchain environment (Windows)
# run: |
# Add-Content $env:GITHUB_PATH "C:\Program Files\AMD\ROCm\6.1\bin"
# if: runner.os == 'Windows'
- name: ROCm toolchain
run: |
$ErrorActionPreference = "Stop"
Invoke-WebRequest -Uri https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe -OutFile "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
Start-Process "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" -ArgumentList '-install' -NoNewWindow -Wait
Remove-Item "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
if: runner.os == 'Windows' && steps.rocm-cache.outputs.cache-hit != 'true'

- name: Configure cache
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
Expand All @@ -222,26 +215,21 @@ jobs:
cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer
if: runner.os == 'macOS' || !startsWith(matrix.build.target, 'x86_64')

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# # ROCm can't be enabled together with CUDA for now
# - name: Build farmer (ROCm, Windows)
# env:
# NVCC: off
# HIPCC: hipcc.bin.exe
# run: |
# cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer --features rocm
# move ${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe
# # TODO: ROCm packages are only available for x86-64 for now
# if: runner.os == 'Windows' && startsWith(matrix.build.target, 'x86_64')
# ROCm can't be enabled together with CUDA for now
- name: Build farmer (ROCm, Windows)
env:
# Why `PROGRA~1` instead of `Program Files`? Because Windows!
HIPCC: C:\PROGRA~1\AMD\ROCm\6.1\bin\hipcc.bin.exe
run: |
cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer --features rocm
move ${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe
if: runner.os == 'Windows' && startsWith(matrix.build.target, 'x86_64')

# ROCm can't be enabled together with CUDA for now
- name: Build farmer (ROCm, Ubuntu)
env:
NVCC: off
run: |
cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer --features rocm
mv ${{ env.PRODUCTION_TARGET }}/subspace-farmer ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm
# TODO: ROCm packages are only available for x86-64 for now
if: runner.os == 'Linux' && startsWith(matrix.build.target, 'x86_64')

- name: Build farmer
Expand Down Expand Up @@ -291,8 +279,7 @@ jobs:
- name: Sign Application (Windows)
run: |
AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe"
# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe"
AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe"
AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-node.exe"
# Allow code signing to fail on non-release builds and in non-subspace repos (forks)
continue-on-error: ${{ github.repository_owner != 'autonomys' || github.event_name != 'push' || github.ref_type != 'tag' }}
Expand All @@ -308,7 +295,6 @@ jobs:
- name: Prepare executables for uploading (Ubuntu, ROCm)
run: |
mv ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm executables/subspace-farmer-rocm-${{ matrix.build.suffix }}
# TODO: ROCm packages are only available for x86-64 for now
if: runner.os == 'Linux' && startsWith(matrix.build.target, 'x86_64')

- name: Prepare executables for uploading (macOS)
Expand All @@ -327,8 +313,7 @@ jobs:
run: |
mkdir executables
move ${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe executables/subspace-farmer-${{ matrix.build.suffix }}.exe
# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# move ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe executables/subspace-farmer-rocm-${{ matrix.build.suffix }}.exe
move ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe executables/subspace-farmer-rocm-${{ matrix.build.suffix }}.exe
move ${{ env.PRODUCTION_TARGET }}/subspace-node.exe executables/subspace-node-${{ matrix.build.suffix }}.exe
if: runner.os == 'Windows'

Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Dockerfile-bootstrap-node
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ RUN \
if [ $BUILDARCH != "riscv64" ] && [ $TARGETARCH = "riscv64" ]; then \
export RUSTFLAGS="$RUSTFLAGS -C linker=riscv64-linux-gnu-gcc" \
; fi && \
if [ $TARGETARCH = "amd64" ] && [ $RUSTFLAGS = ""]; then \
if [ $TARGETARCH = "amd64" ] && [ "$RUSTFLAGS" = ""]; then \
export RUSTFLAGS="-C target-cpu=skylake" \
; fi && \
RUSTC_TARGET_ARCH=$(echo $TARGETARCH | sed "s/amd64/x86_64/g" | sed "s/arm64/aarch64/g" | sed "s/riscv64/riscv64gc/g") && \
Expand Down
5 changes: 2 additions & 3 deletions Dockerfile-farmer
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ RUN \
ldconfig \
; fi

# TODO: Remove `NVCC=off` hack once `sppark` has proper features for CUDA and ROCm
# ROCm is only used on x86-64 since they don't have other packages
RUN \
if [ $BUILDARCH != "arm64" ] && [ $TARGETARCH = "arm64" ]; then \
Expand All @@ -107,13 +106,13 @@ RUN \
if [ $BUILDARCH != "riscv64" ] && [ $TARGETARCH = "riscv64" ]; then \
export RUSTFLAGS="$RUSTFLAGS -C linker=riscv64-linux-gnu-gcc" \
; fi && \
if [ $TARGETARCH = "amd64" ] && [ $RUSTFLAGS = "" ]; then \
if [ $TARGETARCH = "amd64" ] && [ "$RUSTFLAGS" = "" ]; then \
export RUSTFLAGS="-C target-cpu=skylake" \
; fi && \
export PATH=/usr/local/cuda/bin${PATH:+:${PATH}} && \
RUSTC_TARGET_ARCH=$(echo $TARGETARCH | sed "s/amd64/x86_64/g" | sed "s/arm64/aarch64/g" | sed "s/riscv64/riscv64gc/g") && \
if [ $BUILDARCH = "amd64" ] && [ $TARGETARCH = "amd64" ]; then \
NVCC=off /root/.cargo/bin/cargo -Zgitoxide -Zgit build \
/root/.cargo/bin/cargo -Zgitoxide -Zgit build \
--locked \
-Z build-std \
--profile $PROFILE \
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile-node
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ RUN \
if [ $BUILDARCH != "riscv64" ] && [ $TARGETARCH = "riscv64" ]; then \
export RUSTFLAGS="$RUSTFLAGS -C linker=riscv64-linux-gnu-gcc" \
; fi && \
if [ $TARGETARCH = "amd64" ] && [ $RUSTFLAGS = ""]; then \
if [ $TARGETARCH = "amd64" ] && [ "$RUSTFLAGS" = ""]; then \
export RUSTFLAGS="-C target-cpu=skylake" \
; fi && \
RUSTC_TARGET_ARCH=$(echo $TARGETARCH | sed "s/amd64/x86_64/g" | sed "s/arm64/aarch64/g" | sed "s/riscv64/riscv64gc/g") && \
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile-runtime
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ RUN \
if [ $BUILDARCH != "riscv64" ] && [ $TARGETARCH = "riscv64" ]; then \
export RUSTFLAGS="$RUSTFLAGS -C linker=riscv64-linux-gnu-gcc" \
; fi && \
if [ $TARGETARCH = "amd64" ] && [ $RUSTFLAGS = ""]; then \
if [ $TARGETARCH = "amd64" ] && [ "$RUSTFLAGS" = ""]; then \
export RUSTFLAGS="-C target-cpu=skylake" \
; fi && \
RUSTC_TARGET_ARCH=$(echo $TARGETARCH | sed "s/amd64/x86_64/g" | sed "s/arm64/aarch64/g" | sed "s/riscv64/riscv64gc/g") && \
Expand Down
9 changes: 4 additions & 5 deletions shared/subspace-proof-of-space-gpu/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ include = [
[dependencies]
blst = { version = "0.3.13", optional = true }
rust-kzg-blst = { git = "https://github.com/grandinetech/rust-kzg", rev = "6c8fcc623df3d7e8c0f30951a49bfea764f90bf4", default-features = false, optional = true }
# TODO: Fork with ROCm support, switch to upstream once `rocm` branch from `https://github.com/dot-asm/sppark` is upstreamed
sppark = { version = "0.1.8", git = "https://github.com/autonomys/sppark", rev = "71c49160d7aa24f92c20592d2d26ef16f5400a04", optional = true }
# TODO: Fork with ROCm support, switch to upstream once `rocm` branch from `https://github.com/dot-asm/sppark` + https://github.com/dot-asm/sppark/pull/2 are upstreamed
sppark = { version = "0.1.8", git = "https://github.com/autonomys/sppark", rev = "b2a181eb99c8200f1a604f04122551ea39fbf63f", optional = true }
subspace-core-primitives = { version = "0.1.0", path = "../../crates/subspace-core-primitives", default-features = false, optional = true }
subspace-kzg = { version = "0.1.0", path = "../subspace-kzg", optional = true }

Expand All @@ -30,15 +30,14 @@ cc = "1.1.23"

[features]
# Only Volta+ architectures are supported (GeForce RTX 16xx consumer GPUs and newer)
cuda = ["_gpu"]
cuda = ["_gpu", "sppark/cuda"]
# TODO: ROCm can't be enabled at the same time as `cuda` feature at the moment
# Seems to support RDNA 2+, at least on Linux
rocm = ["_gpu"]
rocm = ["_gpu", "sppark/rocm"]
# Internal feature, shouldn't be used directly
_gpu = [
"dep:blst",
"dep:rust-kzg-blst",
"dep:sppark",
"dep:subspace-core-primitives",
"dep:subspace-kzg",
]
5 changes: 0 additions & 5 deletions shared/subspace-proof-of-space-gpu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,3 @@ For other operating systems/platforms check official documentation: <https://doc
### ROCm

For AMD/ROCm support follow their official documentation: <https://rocm.docs.amd.com/en/latest/>

For compilation `NVCC=off` environment variable must be additionally used:
```bash
NVCC=off cargo build
```
3 changes: 2 additions & 1 deletion shared/subspace-proof-of-space-gpu/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ fn main() {
}

if cfg!(feature = "rocm") {
println!("cargo::rerun-if-env-changed=HIPCC");

let mut hipcc = cc::Build::new();
hipcc.compiler(env::var("HIPCC").unwrap_or("hipcc".to_string()));
hipcc.cpp(true);
Expand Down Expand Up @@ -88,5 +90,4 @@ fn main() {
}

println!("cargo::rerun-if-changed=src");
println!("cargo::rerun-if-env-changed=CXXFLAGS");
}

0 comments on commit 6c0c789

Please sign in to comment.