diff --git a/crates/llama_cpp_sys/build.rs b/crates/llama_cpp_sys/build.rs index 0d38b5c..34de349 100644 --- a/crates/llama_cpp_sys/build.rs +++ b/crates/llama_cpp_sys/build.rs @@ -430,7 +430,7 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati let mut nvcc = featless_cxx; nvcc.cuda(true) .flag("--forward-unknown-to-host-compiler") - .flag("-arch=native") + .flag("-arch=all") .define("K_QUANTS_PER_ITERATION", Some("2")) .define("GGML_CUDA_PEER_MAX_BATCH_SIZE", Some("128"));