From 41f0ee576cd1660dfd860bfcc171f54c4d954523 Mon Sep 17 00:00:00 2001
From: "lcy.seso" <lcy.seso@gmail.com>
Date: Fri, 13 Sep 2024 04:04:21 +0000
Subject: [PATCH] add ncu tests.

---
 artifacts/run_all_ncu_cutlass.sh | 26 ++++++++++++++++++++++++++
 artifacts/run_all_ncu_flash2.sh  | 13 +++++++++++++
 artifacts/run_all_ncu_ft.sh      | 19 +++++++++++++++++++
 artifacts/run_all_ncu_pt.sh      | 15 +++++++++++++++
 artifacts/run_all_ncu_triton.sh  | 20 ++++++++++++++++++++
 5 files changed, 93 insertions(+)
 create mode 100644 artifacts/run_all_ncu_cutlass.sh
 create mode 100755 artifacts/run_all_ncu_flash2.sh
 create mode 100755 artifacts/run_all_ncu_ft.sh
 create mode 100755 artifacts/run_all_ncu_pt.sh
 create mode 100755 artifacts/run_all_ncu_triton.sh

diff --git a/artifacts/run_all_ncu_cutlass.sh b/artifacts/run_all_ncu_cutlass.sh
new file mode 100644
index 000000000..e7644b680
--- /dev/null
+++ b/artifacts/run_all_ncu_cutlass.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+ncu_dir="/home/sosp/env/spack/opt/spack/linux-ubuntu22.04-zen2/gcc-11.4.0/cuda-12.4.0-ypujjdfaen2zwiplopzke4ud33wddscv/bin"
+
+root_dir=$(pwd)
+log_dir="$root_dir/logs"
+exe_path="cutlass/build/examples/41_fused_multi_head_attention/41_fused_multi_head_attention_fixed_seqlen"
+
+nheads=8
+batch_size=32
+head_size=128
+length=1024
+
+if [ ! -f cutlass_attn ]; then
+    ln -s $exe_path cutlass_attn
+fi
+
+# 1. ncu test the mha benchmark
+echo "NCU profiling mha benchmark"
+$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \
+    --csv --set full cutlass_attn --nheads="$nheads" \
+        --batch_size=$batch_size \
+        --head_size=$head_size \
+        --head_size_v=$head_size \
+        --seq_length="$length" \
+        --seq_length_kv=$length \
+        --causal=false  > $log_dir/cutlass_attention_ncu.csv
\ No newline at end of file
diff --git a/artifacts/run_all_ncu_flash2.sh b/artifacts/run_all_ncu_flash2.sh
new file mode 100755
index 000000000..c2f8de1d6
--- /dev/null
+++ b/artifacts/run_all_ncu_flash2.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+ncu_dir="/home/sosp/env/spack/opt/spack/linux-ubuntu22.04-zen2/gcc-11.4.0/cuda-12.4.0-ypujjdfaen2zwiplopzke4ud33wddscv/bin"
+
+root_dir=$(pwd)
+log_dir="$root_dir/logs"
+benchmark_dir="FractalTensor/benchmarks"
+mha_dir="$benchmark_dir/multi-head_attention/baseline"
+
+# 1. ncu test the mha benchmark
+echo "NCU profiling mha benchmark"
+$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \
+    --csv --set full python3 $stack_rnn_path/$mha_dir/test_pt_model.py > $log_dir/flash2_attention_ncu.csv
+
diff --git a/artifacts/run_all_ncu_ft.sh b/artifacts/run_all_ncu_ft.sh
new file mode 100755
index 000000000..fad045c60
--- /dev/null
+++ b/artifacts/run_all_ncu_ft.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+ncu_dir="/home/sosp/env/spack/opt/spack/linux-ubuntu22.04-zen2/gcc-11.4.0/cuda-12.4.0-ypujjdfaen2zwiplopzke4ud33wddscv/bin"
+
+root_dir=$(pwd)
+log_dir="$root_dir/logs"
+benchmark_dir="FractalTensor/benchmarks"
+
+# 1. ncu test the mha benchmark
+echo "NCU profiling mha benchmark"
+mha_dir="$benchmark_dir/multi-head_attention/fractaltensor/build"
+mha_exe="$mha_dir/main"
+$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \
+    --csv --set full $mha_exe > $log_dir/ft_attention_ncu.csv
+
+# 2. ncu test the bigbird benchmark
+bigbird_dir="$benchmark_dir/blocked_sparse_attention/fractaltensor/build"
+echo "NCU profiling BigBird benchmark"
+$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \
+    --csv --set full $bigbird_dir/bigbird > $log_dir/ft_bigbird_ncu.csv
\ No newline at end of file
diff --git a/artifacts/run_all_ncu_pt.sh b/artifacts/run_all_ncu_pt.sh
new file mode 100755
index 000000000..7cc9e65ae
--- /dev/null
+++ b/artifacts/run_all_ncu_pt.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+ncu_dir="/home/sosp/env/spack/opt/spack/linux-ubuntu22.04-zen2/gcc-11.4.0/cuda-12.4.0-ypujjdfaen2zwiplopzke4ud33wddscv/bin"
+
+root_dir=$(pwd)
+log_dir="$root_dir/logs"
+benchmark_dir="FractalTensor/benchmarks"
+mha_dir="$benchmark_dir/multi-head_attention/baseline"
+
+bigbird_dir="$benchmark_dir/blocked_sparse_attention/pytorch"
+
+# 2. ncu test the bigbird benchmark
+echo "NCU profiling BigBird benchmark"
+$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \
+    --csv --set full python3 $bigbird_dir/main.py > $log_dir/pt_bigbird_ncu.csv
+
diff --git a/artifacts/run_all_ncu_triton.sh b/artifacts/run_all_ncu_triton.sh
new file mode 100755
index 000000000..adf355544
--- /dev/null
+++ b/artifacts/run_all_ncu_triton.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+ncu_dir="/home/sosp/env/spack/opt/spack/linux-ubuntu22.04-zen2/gcc-11.4.0/cuda-12.4.0-ypujjdfaen2zwiplopzke4ud33wddscv/bin"
+
+root_dir=$(pwd)
+log_dir="$root_dir/logs"
+benchmark_dir="FractalTensor/benchmarks"
+mha_dir="$benchmark_dir/multi-head_attention/baseline"
+
+# 1. ncu test the mha benchmark
+echo "NCU profiling mha benchmark"
+$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \
+    --csv --set full python3 $stack_rnn_path/$mha_dir/test_triton_model.py > $log_dir/triton_attention_ncu.csv
+
+
+bigbird_dir="$benchmark_dir/blocked_sparse_attention/triton"
+# 2. ncu test the bigbird benchmark
+echo "NCU profiling BigBird benchmark"
+$ncu_dir/ncu --section "MemoryWorkloadAnalysis" \
+    --csv --set full python3 $bigbird_dir/main.py \
+    --default_test > $log_dir/triton_bigbird_ncu.csv
\ No newline at end of file