From 59197341ef3e57bfb77ebbcc259429d296a3f5b6 Mon Sep 17 00:00:00 2001 From: kings177 Date: Fri, 16 Aug 2024 18:24:41 -0300 Subject: [PATCH 1/4] add dynamic shared mem allocation for hvm.cu --- build.rs | 26 +++++++++++++++++++++++++- src/get_shared_mem.cu | 24 ++++++++++++++++++++++++ src/hvm.cu | 10 ++++++++-- 3 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 src/get_shared_mem.cu diff --git a/build.rs b/build.rs index dd72c946..4ffac5b4 100644 --- a/build.rs +++ b/build.rs @@ -1,3 +1,5 @@ +use std::process::Command; + fn main() { let cores = num_cpus::get(); let tpcl2 = (cores as f64).log2().floor() as u32; @@ -6,6 +8,8 @@ fn main() { println!("cargo:rerun-if-changed=src/hvm.c"); println!("cargo:rerun-if-changed=src/run.cu"); println!("cargo:rerun-if-changed=src/hvm.cu"); + println!("cargo:rerun-if-changed=src/get_shared_mem.cu"); + println!("cargo:rerun-if-changed=src/shared_mem_config.h"); println!("cargo:rustc-link-arg=-rdynamic"); match cc::Build::new() @@ -23,13 +27,33 @@ fn main() { } // Builds hvm.cu - if std::process::Command::new("nvcc").arg("--version").stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().is_ok() { + if Command::new("nvcc").arg("--version").stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().is_ok() { if let Ok(cuda_path) = std::env::var("CUDA_HOME") { println!("cargo:rustc-link-search=native={}/lib64", cuda_path); } else { println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64"); } + // Compile get_shared_mem.cu + if let Ok(output) = Command::new("nvcc") + .args(&["src/get_shared_mem.cu", "-o", "get_shared_mem"]) + .output() + .and_then(|_| Command::new("./get_shared_mem").output()) { + if output.status.success() { + let shared_mem_str = String::from_utf8_lossy(&output.stdout).trim().to_string(); + std::fs::write("src/shared_mem_config.h", format!("#define HVM_SHARED_MEM {}", shared_mem_str)) + .expect("Failed to write shared_mem_config.h"); + println!("cargo:warning=Shared memory size: {}", shared_mem_str); + } else { + println!("cargo:warning=\x1b[1m\x1b[31mWARNING: Failed to get shared memory size. Using default value.\x1b[0m"); + } + } else { + println!("cargo:warning=\x1b[1m\x1b[31mWARNING: Failed to compile or run get_shared_mem.cu. Using default shared memory value.\x1b[0m"); + } + + // Clean up temporary executable + let _ = std::fs::remove_file("get_shared_mem"); + cc::Build::new() .cuda(true) .file("src/run.cu") diff --git a/src/get_shared_mem.cu b/src/get_shared_mem.cu new file mode 100644 index 00000000..6105d8b4 --- /dev/null +++ b/src/get_shared_mem.cu @@ -0,0 +1,24 @@ +#include +#include + +int main() { + int device = 0; + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, device); + + size_t sharedMemPerBlock = prop.sharedMemPerBlock; + int maxSharedMemPerBlockOptin; + cudaDeviceGetAttribute(&maxSharedMemPerBlockOptin, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + + size_t maxSharedMem = (sharedMemPerBlock > (size_t)maxSharedMemPerBlockOptin) ? sharedMemPerBlock : (size_t)maxSharedMemPerBlockOptin; + + // Subtract 3KB (3072 bytes) from the max shared memory as is allocated somewhere else + maxSharedMem -= 3072; + + // Calculate the hex value + unsigned int hexValue = (unsigned int)(maxSharedMem / 12); + + printf("0x%X", hexValue); + + return 0; +} diff --git a/src/hvm.cu b/src/hvm.cu index ffa3401b..d0c9fe6d 100644 --- a/src/hvm.cu +++ b/src/hvm.cu @@ -126,9 +126,15 @@ struct RBag { Pair lo_buf[RLEN]; }; +#include "shared_mem_config.h" + +#ifndef HVM_SHARED_MEM +#define HVM_SHARED_MEM 0x2000 // Default value +#endif + // Local Net -const u32 L_NODE_LEN = 0x2000; -const u32 L_VARS_LEN = 0x2000; +const u32 L_NODE_LEN = HVM_SHARED_MEM; +const u32 L_VARS_LEN = HVM_SHARED_MEM; struct LNet { Pair node_buf[L_NODE_LEN]; Port vars_buf[L_VARS_LEN]; From d5e326131f7909663228256f4e07847dc371ae84 Mon Sep 17 00:00:00 2001 From: kings177 Date: Fri, 16 Aug 2024 18:28:17 -0300 Subject: [PATCH 2/4] .gitignore autogenerated cuda headers --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 2b9560b3..f2c20d3c 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,6 @@ examples/**/*.cu # nix-direnv /.direnv/ /.envrc + +# cuda header +src/shared_mem_config.h \ No newline at end of file From 1579e64c8d82ef47210241c83e9fceb9697d39d6 Mon Sep 17 00:00:00 2001 From: kings177 Date: Fri, 16 Aug 2024 18:39:03 -0300 Subject: [PATCH 3/4] remove unnecessary print line --- build.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/build.rs b/build.rs index 4ffac5b4..b20a72c0 100644 --- a/build.rs +++ b/build.rs @@ -43,7 +43,6 @@ fn main() { let shared_mem_str = String::from_utf8_lossy(&output.stdout).trim().to_string(); std::fs::write("src/shared_mem_config.h", format!("#define HVM_SHARED_MEM {}", shared_mem_str)) .expect("Failed to write shared_mem_config.h"); - println!("cargo:warning=Shared memory size: {}", shared_mem_str); } else { println!("cargo:warning=\x1b[1m\x1b[31mWARNING: Failed to get shared memory size. Using default value.\x1b[0m"); } From 2fa9f5967962bfc21a7c2540b762a8725f473ba5 Mon Sep 17 00:00:00 2001 From: kings177 Date: Mon, 19 Aug 2024 20:04:13 -0300 Subject: [PATCH 4/4] directly pass the value of shared mem to a define method --- .gitignore | 3 --- build.rs | 28 +++++++++++++++------------- src/hvm.cu | 5 ++--- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index f2c20d3c..2b9560b3 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,3 @@ examples/**/*.cu # nix-direnv /.direnv/ /.envrc - -# cuda header -src/shared_mem_config.h \ No newline at end of file diff --git a/build.rs b/build.rs index b20a72c0..31b8e748 100644 --- a/build.rs +++ b/build.rs @@ -9,7 +9,6 @@ fn main() { println!("cargo:rerun-if-changed=src/run.cu"); println!("cargo:rerun-if-changed=src/hvm.cu"); println!("cargo:rerun-if-changed=src/get_shared_mem.cu"); - println!("cargo:rerun-if-changed=src/shared_mem_config.h"); println!("cargo:rustc-link-arg=-rdynamic"); match cc::Build::new() @@ -35,28 +34,31 @@ fn main() { } // Compile get_shared_mem.cu - if let Ok(output) = Command::new("nvcc") + let shared_mem_value = Command::new("nvcc") .args(&["src/get_shared_mem.cu", "-o", "get_shared_mem"]) .output() - .and_then(|_| Command::new("./get_shared_mem").output()) { - if output.status.success() { - let shared_mem_str = String::from_utf8_lossy(&output.stdout).trim().to_string(); - std::fs::write("src/shared_mem_config.h", format!("#define HVM_SHARED_MEM {}", shared_mem_str)) - .expect("Failed to write shared_mem_config.h"); - } else { - println!("cargo:warning=\x1b[1m\x1b[31mWARNING: Failed to get shared memory size. Using default value.\x1b[0m"); - } - } else { - println!("cargo:warning=\x1b[1m\x1b[31mWARNING: Failed to compile or run get_shared_mem.cu. Using default shared memory value.\x1b[0m"); - } + .and_then(|_| Command::new("./get_shared_mem").output()) + .ok() + .and_then(|output| if output.status.success() { + Some(String::from_utf8_lossy(&output.stdout).trim().to_string()) + } else { + None + }) + .unwrap_or_else(|| { + println!("cargo:warning=\x1b[1m\x1b[31mWARNING: Failed to get shared memory size. Using default value.\x1b[0m"); + "0x2000".to_string() + }); // Clean up temporary executable let _ = std::fs::remove_file("get_shared_mem"); + println!("cargo:warning=\x1b[1m\x1b[33mShared memory size set to: {}\x1b[0m", shared_mem_value); + cc::Build::new() .cuda(true) .file("src/run.cu") .define("IO", None) + .define("HVM_SHARED_MEM", Some(shared_mem_value.as_str())) .flag("-diag-suppress=177") // variable was declared but never referenced .flag("-diag-suppress=550") // variable was set but never used .flag("-diag-suppress=20039") // a __host__ function redeclared with __device__, hence treated as a __host__ __device__ function diff --git a/src/hvm.cu b/src/hvm.cu index d0c9fe6d..e8a9b8b3 100644 --- a/src/hvm.cu +++ b/src/hvm.cu @@ -126,10 +126,9 @@ struct RBag { Pair lo_buf[RLEN]; }; -#include "shared_mem_config.h" - +// Default value for shared memory (96KB) #ifndef HVM_SHARED_MEM -#define HVM_SHARED_MEM 0x2000 // Default value +#define HVM_SHARED_MEM 0x2000 #endif // Local Net