diff --git a/build.rs b/build.rs index dd72c946..31b8e748 100644 --- a/build.rs +++ b/build.rs @@ -1,3 +1,5 @@ +use std::process::Command; + fn main() { let cores = num_cpus::get(); let tpcl2 = (cores as f64).log2().floor() as u32; @@ -6,6 +8,7 @@ fn main() { println!("cargo:rerun-if-changed=src/hvm.c"); println!("cargo:rerun-if-changed=src/run.cu"); println!("cargo:rerun-if-changed=src/hvm.cu"); + println!("cargo:rerun-if-changed=src/get_shared_mem.cu"); println!("cargo:rustc-link-arg=-rdynamic"); match cc::Build::new() @@ -23,17 +26,39 @@ fn main() { } // Builds hvm.cu - if std::process::Command::new("nvcc").arg("--version").stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().is_ok() { + if Command::new("nvcc").arg("--version").stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().is_ok() { if let Ok(cuda_path) = std::env::var("CUDA_HOME") { println!("cargo:rustc-link-search=native={}/lib64", cuda_path); } else { println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64"); } + // Compile get_shared_mem.cu + let shared_mem_value = Command::new("nvcc") + .args(&["src/get_shared_mem.cu", "-o", "get_shared_mem"]) + .output() + .and_then(|_| Command::new("./get_shared_mem").output()) + .ok() + .and_then(|output| if output.status.success() { + Some(String::from_utf8_lossy(&output.stdout).trim().to_string()) + } else { + None + }) + .unwrap_or_else(|| { + println!("cargo:warning=\x1b[1m\x1b[31mWARNING: Failed to get shared memory size. Using default value.\x1b[0m"); + "0x2000".to_string() + }); + + // Clean up temporary executable + let _ = std::fs::remove_file("get_shared_mem"); + + println!("cargo:warning=\x1b[1m\x1b[33mShared memory size set to: {}\x1b[0m", shared_mem_value); + cc::Build::new() .cuda(true) .file("src/run.cu") .define("IO", None) + .define("HVM_SHARED_MEM", Some(shared_mem_value.as_str())) .flag("-diag-suppress=177") // variable was declared but never referenced .flag("-diag-suppress=550") // variable was set but never used .flag("-diag-suppress=20039") // a __host__ function redeclared with __device__, hence treated as a __host__ __device__ function diff --git a/src/get_shared_mem.cu b/src/get_shared_mem.cu new file mode 100644 index 00000000..6105d8b4 --- /dev/null +++ b/src/get_shared_mem.cu @@ -0,0 +1,24 @@ +#include +#include + +int main() { + int device = 0; + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, device); + + size_t sharedMemPerBlock = prop.sharedMemPerBlock; + int maxSharedMemPerBlockOptin; + cudaDeviceGetAttribute(&maxSharedMemPerBlockOptin, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); + + size_t maxSharedMem = (sharedMemPerBlock > (size_t)maxSharedMemPerBlockOptin) ? sharedMemPerBlock : (size_t)maxSharedMemPerBlockOptin; + + // Subtract 3KB (3072 bytes) from the max shared memory as is allocated somewhere else + maxSharedMem -= 3072; + + // Calculate the hex value + unsigned int hexValue = (unsigned int)(maxSharedMem / 12); + + printf("0x%X", hexValue); + + return 0; +} diff --git a/src/hvm.cu b/src/hvm.cu index ffa3401b..e8a9b8b3 100644 --- a/src/hvm.cu +++ b/src/hvm.cu @@ -126,9 +126,14 @@ struct RBag { Pair lo_buf[RLEN]; }; +// Default value for shared memory (96KB) +#ifndef HVM_SHARED_MEM +#define HVM_SHARED_MEM 0x2000 +#endif + // Local Net -const u32 L_NODE_LEN = 0x2000; -const u32 L_VARS_LEN = 0x2000; +const u32 L_NODE_LEN = HVM_SHARED_MEM; +const u32 L_VARS_LEN = HVM_SHARED_MEM; struct LNet { Pair node_buf[L_NODE_LEN]; Port vars_buf[L_VARS_LEN];