Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve GPU support #3111

Merged
merged 3 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 21 additions & 31 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,13 @@ jobs:
sub-packages: '["nvcc", "cudart"]'
if: runner.os == 'Linux' || runner.os == 'Windows'

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: Configure ROCm cache (Windows)
# uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
# id: rocm-cache
# with:
# path: C:\Program Files\AMD\ROCm
# key: ${{ runner.os }}-rocm
# if: runner.os == 'Windows'
- name: Configure ROCm cache (Windows)
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
id: rocm-cache
with:
path: C:\Program Files\AMD\ROCm
key: ${{ runner.os }}-rocm
if: runner.os == 'Windows'

- name: ROCm toolchain
run: |
Expand All @@ -126,19 +125,13 @@ jobs:
sudo ldconfig
if: runner.os == 'Linux'

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: ROCm toolchain
# run: |
# $ErrorActionPreference = "Stop"
# Invoke-WebRequest -Uri https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe -OutFile "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
# Start-Process "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" -ArgumentList '-install' -NoNewWindow -Wait
# Remove-Item "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
# if: runner.os == 'Windows' && steps.rocm-cache.outputs.cache-hit != 'true'
#
# - name: ROCm toolchain environment (Windows)
# run: |
# Add-Content $env:GITHUB_PATH "C:\Program Files\AMD\ROCm\6.1\bin"
# if: runner.os == 'Windows'
- name: ROCm toolchain
run: |
$ErrorActionPreference = "Stop"
Invoke-WebRequest -Uri https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe -OutFile "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
Start-Process "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" -ArgumentList '-install' -NoNewWindow -Wait
Remove-Item "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
if: runner.os == 'Windows' && steps.rocm-cache.outputs.cache-hit != 'true'

- name: Configure cache
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
Expand All @@ -161,20 +154,17 @@ jobs:
if: runner.os == 'Linux' || runner.os == 'Windows'

- name: cargo clippy (ROCm)
env:
NVCC: off
run: |
cargo -Zgitoxide -Zgit clippy --locked --all-targets --features rocm -- -D warnings
if: runner.os == 'Linux'

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: cargo clippy (ROCm)
# env:
# NVCC: off
# HIPCC: hipcc.bin.exe
# run: |
# cargo -Zgitoxide -Zgit clippy --locked --all-targets --features rocm -- -D warnings
# if: runner.os == 'Windows'
- name: cargo clippy (ROCm)
env:
# Why `PROGRA~1` instead of `Program Files`? Because Windows!
HIPCC: C:\PROGRA~1\AMD\ROCm\6.1\bin\hipcc.bin.exe
run: |
cargo -Zgitoxide -Zgit clippy --locked --all-targets --features rocm -- -D warnings
if: runner.os == 'Windows'

cargo-docs:
runs-on: ${{ fromJson(github.repository_owner == 'autonomys' && '["self-hosted", "ubuntu-20.04-x86-64"]' || '"ubuntu-22.04"') }}
Expand Down
65 changes: 25 additions & 40 deletions .github/workflows/snapshot-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,13 @@ jobs:
sub-packages: '["nvcc", "cudart"]'
if: runner.os == 'Linux' || runner.os == 'Windows'

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: Configure ROCm cache (Windows)
# uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
# id: rocm-cache
# with:
# path: C:\Program Files\AMD\ROCm
# key: ${{ runner.os }}-rocm
# if: runner.os == 'Windows'
- name: Configure ROCm cache (Windows)
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
id: rocm-cache
with:
path: C:\Program Files\AMD\ROCm
key: ${{ runner.os }}-rocm
if: runner.os == 'Windows'

- name: ROCm toolchain
run: |
Expand All @@ -193,19 +192,13 @@ jobs:
# TODO: ROCm packages are only available for x86-64 for now
if: runner.os == 'Linux' && startsWith(matrix.build.target, 'x86_64')

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: ROCm toolchain
# run: |
# $ErrorActionPreference = "Stop"
# Invoke-WebRequest -Uri https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe -OutFile "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
# Start-Process "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" -ArgumentList '-install' -NoNewWindow -Wait
# Remove-Item "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
# if: runner.os == 'Windows' && steps.rocm-cache.outputs.cache-hit != 'true'
#
# - name: ROCm toolchain environment (Windows)
# run: |
# Add-Content $env:GITHUB_PATH "C:\Program Files\AMD\ROCm\6.1\bin"
# if: runner.os == 'Windows'
- name: ROCm toolchain
run: |
$ErrorActionPreference = "Stop"
Invoke-WebRequest -Uri https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe -OutFile "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
Start-Process "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" -ArgumentList '-install' -NoNewWindow -Wait
Remove-Item "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
if: runner.os == 'Windows' && steps.rocm-cache.outputs.cache-hit != 'true'

- name: Configure cache
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
Expand All @@ -222,26 +215,21 @@ jobs:
cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer
if: runner.os == 'macOS' || !startsWith(matrix.build.target, 'x86_64')

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# # ROCm can't be enabled together with CUDA for now
# - name: Build farmer (ROCm, Windows)
# env:
# NVCC: off
# HIPCC: hipcc.bin.exe
# run: |
# cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer --features rocm
# move ${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe
# # TODO: ROCm packages are only available for x86-64 for now
# if: runner.os == 'Windows' && startsWith(matrix.build.target, 'x86_64')
# ROCm can't be enabled together with CUDA for now
- name: Build farmer (ROCm, Windows)
env:
# Why `PROGRA~1` instead of `Program Files`? Because Windows!
HIPCC: C:\PROGRA~1\AMD\ROCm\6.1\bin\hipcc.bin.exe
run: |
cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer --features rocm
move ${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe
if: runner.os == 'Windows' && startsWith(matrix.build.target, 'x86_64')

# ROCm can't be enabled together with CUDA for now
- name: Build farmer (ROCm, Ubuntu)
env:
NVCC: off
run: |
cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer --features rocm
mv ${{ env.PRODUCTION_TARGET }}/subspace-farmer ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm
# TODO: ROCm packages are only available for x86-64 for now
if: runner.os == 'Linux' && startsWith(matrix.build.target, 'x86_64')

- name: Build farmer
Expand Down Expand Up @@ -291,8 +279,7 @@ jobs:
- name: Sign Application (Windows)
run: |
AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe"
# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe"
AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe"
AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-node.exe"
# Allow code signing to fail on non-release builds and in non-subspace repos (forks)
continue-on-error: ${{ github.repository_owner != 'autonomys' || github.event_name != 'push' || github.ref_type != 'tag' }}
Expand All @@ -308,7 +295,6 @@ jobs:
- name: Prepare executables for uploading (Ubuntu, ROCm)
run: |
mv ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm executables/subspace-farmer-rocm-${{ matrix.build.suffix }}
# TODO: ROCm packages are only available for x86-64 for now
if: runner.os == 'Linux' && startsWith(matrix.build.target, 'x86_64')

- name: Prepare executables for uploading (macOS)
Expand All @@ -327,8 +313,7 @@ jobs:
run: |
mkdir executables
move ${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe executables/subspace-farmer-${{ matrix.build.suffix }}.exe
# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# move ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe executables/subspace-farmer-rocm-${{ matrix.build.suffix }}.exe
move ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe executables/subspace-farmer-rocm-${{ matrix.build.suffix }}.exe
move ${{ env.PRODUCTION_TARGET }}/subspace-node.exe executables/subspace-node-${{ matrix.build.suffix }}.exe
if: runner.os == 'Windows'

Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Dockerfile-bootstrap-node
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ RUN \
if [ $BUILDARCH != "riscv64" ] && [ $TARGETARCH = "riscv64" ]; then \
export RUSTFLAGS="$RUSTFLAGS -C linker=riscv64-linux-gnu-gcc" \
; fi && \
if [ $TARGETARCH = "amd64" ] && [ $RUSTFLAGS = ""]; then \
if [ $TARGETARCH = "amd64" ] && [ "$RUSTFLAGS" = ""]; then \
export RUSTFLAGS="-C target-cpu=skylake" \
; fi && \
RUSTC_TARGET_ARCH=$(echo $TARGETARCH | sed "s/amd64/x86_64/g" | sed "s/arm64/aarch64/g" | sed "s/riscv64/riscv64gc/g") && \
Expand Down
5 changes: 2 additions & 3 deletions Dockerfile-farmer
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ RUN \
ldconfig \
; fi

# TODO: Remove `NVCC=off` hack once `sppark` has proper features for CUDA and ROCm
# ROCm is only used on x86-64 since they don't have other packages
RUN \
if [ $BUILDARCH != "arm64" ] && [ $TARGETARCH = "arm64" ]; then \
Expand All @@ -107,13 +106,13 @@ RUN \
if [ $BUILDARCH != "riscv64" ] && [ $TARGETARCH = "riscv64" ]; then \
export RUSTFLAGS="$RUSTFLAGS -C linker=riscv64-linux-gnu-gcc" \
; fi && \
if [ $TARGETARCH = "amd64" ] && [ $RUSTFLAGS = "" ]; then \
if [ $TARGETARCH = "amd64" ] && [ "$RUSTFLAGS" = "" ]; then \
export RUSTFLAGS="-C target-cpu=skylake" \
; fi && \
export PATH=/usr/local/cuda/bin${PATH:+:${PATH}} && \
RUSTC_TARGET_ARCH=$(echo $TARGETARCH | sed "s/amd64/x86_64/g" | sed "s/arm64/aarch64/g" | sed "s/riscv64/riscv64gc/g") && \
if [ $BUILDARCH = "amd64" ] && [ $TARGETARCH = "amd64" ]; then \
NVCC=off /root/.cargo/bin/cargo -Zgitoxide -Zgit build \
/root/.cargo/bin/cargo -Zgitoxide -Zgit build \
--locked \
-Z build-std \
--profile $PROFILE \
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile-node
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ RUN \
if [ $BUILDARCH != "riscv64" ] && [ $TARGETARCH = "riscv64" ]; then \
export RUSTFLAGS="$RUSTFLAGS -C linker=riscv64-linux-gnu-gcc" \
; fi && \
if [ $TARGETARCH = "amd64" ] && [ $RUSTFLAGS = ""]; then \
if [ $TARGETARCH = "amd64" ] && [ "$RUSTFLAGS" = ""]; then \
export RUSTFLAGS="-C target-cpu=skylake" \
; fi && \
RUSTC_TARGET_ARCH=$(echo $TARGETARCH | sed "s/amd64/x86_64/g" | sed "s/arm64/aarch64/g" | sed "s/riscv64/riscv64gc/g") && \
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile-runtime
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ RUN \
if [ $BUILDARCH != "riscv64" ] && [ $TARGETARCH = "riscv64" ]; then \
export RUSTFLAGS="$RUSTFLAGS -C linker=riscv64-linux-gnu-gcc" \
; fi && \
if [ $TARGETARCH = "amd64" ] && [ $RUSTFLAGS = ""]; then \
if [ $TARGETARCH = "amd64" ] && [ "$RUSTFLAGS" = ""]; then \
export RUSTFLAGS="-C target-cpu=skylake" \
; fi && \
RUSTC_TARGET_ARCH=$(echo $TARGETARCH | sed "s/amd64/x86_64/g" | sed "s/arm64/aarch64/g" | sed "s/riscv64/riscv64gc/g") && \
Expand Down
9 changes: 4 additions & 5 deletions shared/subspace-proof-of-space-gpu/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ include = [
[dependencies]
blst = { version = "0.3.13", optional = true }
rust-kzg-blst = { git = "https://github.com/grandinetech/rust-kzg", rev = "6c8fcc623df3d7e8c0f30951a49bfea764f90bf4", default-features = false, optional = true }
# TODO: Fork with ROCm support, switch to upstream once `rocm` branch from `https://github.com/dot-asm/sppark` is upstreamed
sppark = { version = "0.1.8", git = "https://github.com/autonomys/sppark", rev = "71c49160d7aa24f92c20592d2d26ef16f5400a04", optional = true }
# TODO: Fork with ROCm support, switch to upstream once `rocm` branch from `https://github.com/dot-asm/sppark` + https://github.com/dot-asm/sppark/pull/2 are upstreamed
sppark = { version = "0.1.8", git = "https://github.com/autonomys/sppark", rev = "b2a181eb99c8200f1a604f04122551ea39fbf63f", optional = true }
subspace-core-primitives = { version = "0.1.0", path = "../../crates/subspace-core-primitives", default-features = false, optional = true }
subspace-kzg = { version = "0.1.0", path = "../subspace-kzg", optional = true }

Expand All @@ -30,15 +30,14 @@ cc = "1.1.23"

[features]
# Only Volta+ architectures are supported (GeForce RTX 16xx consumer GPUs and newer)
cuda = ["_gpu"]
cuda = ["_gpu", "sppark/cuda"]
# TODO: ROCm can't be enabled at the same time as `cuda` feature at the moment
# Seems to support RDNA 2+, at least on Linux
rocm = ["_gpu"]
rocm = ["_gpu", "sppark/rocm"]
# Internal feature, shouldn't be used directly
_gpu = [
"dep:blst",
"dep:rust-kzg-blst",
"dep:sppark",
"dep:subspace-core-primitives",
"dep:subspace-kzg",
]
5 changes: 0 additions & 5 deletions shared/subspace-proof-of-space-gpu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,3 @@ For other operating systems/platforms check official documentation: <https://doc
### ROCm

For AMD/ROCm support follow their official documentation: <https://rocm.docs.amd.com/en/latest/>

For compilation `NVCC=off` environment variable must be additionally used:
```bash
NVCC=off cargo build
```
3 changes: 2 additions & 1 deletion shared/subspace-proof-of-space-gpu/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ fn main() {
}

if cfg!(feature = "rocm") {
println!("cargo::rerun-if-env-changed=HIPCC");

let mut hipcc = cc::Build::new();
hipcc.compiler(env::var("HIPCC").unwrap_or("hipcc".to_string()));
hipcc.cpp(true);
Expand Down Expand Up @@ -88,5 +90,4 @@ fn main() {
}

println!("cargo::rerun-if-changed=src");
println!("cargo::rerun-if-env-changed=CXXFLAGS");
}
Loading