From 41f0ee576cd1660dfd860bfcc171f54c4d954523 Mon Sep 17 00:00:00 2001 From: "lcy.seso" Date: Fri, 13 Sep 2024 04:04:21 +0000 Subject: [PATCH] add ncu tests. --- artifacts/run_all_ncu_cutlass.sh | 26 ++++++++++++++++++++++++++ artifacts/run_all_ncu_flash2.sh | 13 +++++++++++++ artifacts/run_all_ncu_ft.sh | 19 +++++++++++++++++++ artifacts/run_all_ncu_pt.sh | 15 +++++++++++++++ artifacts/run_all_ncu_triton.sh | 20 ++++++++++++++++++++ 5 files changed, 93 insertions(+) create mode 100644 artifacts/run_all_ncu_cutlass.sh create mode 100755 artifacts/run_all_ncu_flash2.sh create mode 100755 artifacts/run_all_ncu_ft.sh create mode 100755 artifacts/run_all_ncu_pt.sh create mode 100755 artifacts/run_all_ncu_triton.sh diff --git a/artifacts/run_all_ncu_cutlass.sh b/artifacts/run_all_ncu_cutlass.sh new file mode 100644 index 000000000..e7644b680 --- /dev/null +++ b/artifacts/run_all_ncu_cutlass.sh @@ -0,0 +1,26 @@ +#!/bin/bash +ncu_dir="/home/sosp/env/spack/opt/spack/linux-ubuntu22.04-zen2/gcc-11.4.0/cuda-12.4.0-ypujjdfaen2zwiplopzke4ud33wddscv/bin" + +root_dir=$(pwd) +log_dir="$root_dir/logs" +exe_path="cutlass/build/examples/41_fused_multi_head_attention/41_fused_multi_head_attention_fixed_seqlen" + +nheads=8 +batch_size=32 +head_size=128 +length=1024 + +if [ ! -f cutlass_attn ]; then + ln -s $exe_path cutlass_attn +fi + +# 1. ncu test the mha benchmark +echo "NCU profiling mha benchmark" +$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \ + --csv --set full cutlass_attn --nheads="$nheads" \ + --batch_size=$batch_size \ + --head_size=$head_size \ + --head_size_v=$head_size \ + --seq_length="$length" \ + --seq_length_kv=$length \ + --causal=false > $log_dir/cutlass_attention_ncu.csv \ No newline at end of file diff --git a/artifacts/run_all_ncu_flash2.sh b/artifacts/run_all_ncu_flash2.sh new file mode 100755 index 000000000..c2f8de1d6 --- /dev/null +++ b/artifacts/run_all_ncu_flash2.sh @@ -0,0 +1,13 @@ +#!/bin/bash +ncu_dir="/home/sosp/env/spack/opt/spack/linux-ubuntu22.04-zen2/gcc-11.4.0/cuda-12.4.0-ypujjdfaen2zwiplopzke4ud33wddscv/bin" + +root_dir=$(pwd) +log_dir="$root_dir/logs" +benchmark_dir="FractalTensor/benchmarks" +mha_dir="$benchmark_dir/multi-head_attention/baseline" + +# 1. ncu test the mha benchmark +echo "NCU profiling mha benchmark" +$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \ + --csv --set full python3 $stack_rnn_path/$mha_dir/test_pt_model.py > $log_dir/flash2_attention_ncu.csv + diff --git a/artifacts/run_all_ncu_ft.sh b/artifacts/run_all_ncu_ft.sh new file mode 100755 index 000000000..fad045c60 --- /dev/null +++ b/artifacts/run_all_ncu_ft.sh @@ -0,0 +1,19 @@ +#!/bin/bash +ncu_dir="/home/sosp/env/spack/opt/spack/linux-ubuntu22.04-zen2/gcc-11.4.0/cuda-12.4.0-ypujjdfaen2zwiplopzke4ud33wddscv/bin" + +root_dir=$(pwd) +log_dir="$root_dir/logs" +benchmark_dir="FractalTensor/benchmarks" + +# 1. ncu test the mha benchmark +echo "NCU profiling mha benchmark" +mha_dir="$benchmark_dir/multi-head_attention/fractaltensor/build" +mha_exe="$mha_dir/main" +$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \ + --csv --set full $mha_exe > $log_dir/ft_attention_ncu.csv + +# 2. ncu test the bigbird benchmark +bigbird_dir="$benchmark_dir/blocked_sparse_attention/fractaltensor/build" +echo "NCU profiling BigBird benchmark" +$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \ + --csv --set full $bigbird_dir/bigbird > $log_dir/ft_bigbird_ncu.csv \ No newline at end of file diff --git a/artifacts/run_all_ncu_pt.sh b/artifacts/run_all_ncu_pt.sh new file mode 100755 index 000000000..7cc9e65ae --- /dev/null +++ b/artifacts/run_all_ncu_pt.sh @@ -0,0 +1,15 @@ +#!/bin/bash +ncu_dir="/home/sosp/env/spack/opt/spack/linux-ubuntu22.04-zen2/gcc-11.4.0/cuda-12.4.0-ypujjdfaen2zwiplopzke4ud33wddscv/bin" + +root_dir=$(pwd) +log_dir="$root_dir/logs" +benchmark_dir="FractalTensor/benchmarks" +mha_dir="$benchmark_dir/multi-head_attention/baseline" + +bigbird_dir="$benchmark_dir/blocked_sparse_attention/pytorch" + +# 2. ncu test the bigbird benchmark +echo "NCU profiling BigBird benchmark" +$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \ + --csv --set full python3 $bigbird_dir/main.py > $log_dir/pt_bigbird_ncu.csv + diff --git a/artifacts/run_all_ncu_triton.sh b/artifacts/run_all_ncu_triton.sh new file mode 100755 index 000000000..adf355544 --- /dev/null +++ b/artifacts/run_all_ncu_triton.sh @@ -0,0 +1,20 @@ +#!/bin/bash +ncu_dir="/home/sosp/env/spack/opt/spack/linux-ubuntu22.04-zen2/gcc-11.4.0/cuda-12.4.0-ypujjdfaen2zwiplopzke4ud33wddscv/bin" + +root_dir=$(pwd) +log_dir="$root_dir/logs" +benchmark_dir="FractalTensor/benchmarks" +mha_dir="$benchmark_dir/multi-head_attention/baseline" + +# 1. ncu test the mha benchmark +echo "NCU profiling mha benchmark" +$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \ + --csv --set full python3 $stack_rnn_path/$mha_dir/test_triton_model.py > $log_dir/triton_attention_ncu.csv + + +bigbird_dir="$benchmark_dir/blocked_sparse_attention/triton" +# 2. ncu test the bigbird benchmark +echo "NCU profiling BigBird benchmark" +$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \ + --csv --set full python3 $bigbird_dir/main.py \ + --default_test > $log_dir/triton_bigbird_ncu.csv \ No newline at end of file