From e9e586b82ef00d0eb9acf8d4a4d622b1f2bf5391 Mon Sep 17 00:00:00 2001 From: Aaron Siddhartha Mondal Date: Tue, 28 May 2024 08:54:06 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=8D=A6=20Bring=20CUDA=20setup=20in=20sync?= =?UTF-8?q?=20with=20recent=20nixpkgs=20(#242)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a subtle bug. The CUDA_TOOLKIT changed on the nix side and no longer contained the actual cuda driver. This lead to only the stubs being added to the runpath of CUDA executables. The stubs are only meant to be used at link time but not functional during runtime. This change prepends the runpath with the correct CUDA driver. Also removes the now obsolete `LL_CUDA_RUNTIME` flag. --- docs/reference/toolchain.md | 17 ++++++++--------- ll/BUILD.bazel | 6 ------ ll/args.bzl | 19 ++++++++++--------- ll/attributes.bzl | 6 ------ ll/toolchain.bzl | 1 - modules/defaultLlEnv.nix | 1 - modules/rules_ll.nix | 2 -- 7 files changed, 18 insertions(+), 34 deletions(-) diff --git a/docs/reference/toolchain.md b/docs/reference/toolchain.md index f7607f2e..023b129c 100644 --- a/docs/reference/toolchain.md +++ b/docs/reference/toolchain.md @@ -6,14 +6,14 @@ This file declares the `ll_toolchain` rule. ## `ll_toolchain` -
ll_toolchain(name, LL_AMD_INCLUDES, LL_AMD_LIBRARIES, LL_CFLAGS, LL_CUDA_DRIVER, LL_CUDA_RUNTIME,
-             LL_CUDA_TOOLKIT, LL_DYNAMIC_LINKER, LL_LDFLAGS, address_sanitizer, archiver,
-             bitcode_linker, builtin_includes, c_driver, clang_tidy, clang_tidy_runner,
-             compiler_runtime, cov, cpp_abihdrs, cpp_abilib, cpp_driver, cpp_stdhdrs, cpp_stdlib,
-             hip_libraries, hip_runtime, leak_sanitizer, linker, linker_wrapper, llvm_project_deps,
-             machine_code_tool, memory_sanitizer, objcopy, offload_bundler, offload_packager, opt,
-             profdata, profile, rocm_device_libs, symbolizer, thread_sanitizer,
-             undefined_behavior_sanitizer, unwind_library)
+
ll_toolchain(name, LL_AMD_INCLUDES, LL_AMD_LIBRARIES, LL_CFLAGS, LL_CUDA_DRIVER, LL_CUDA_TOOLKIT,
+             LL_DYNAMIC_LINKER, LL_LDFLAGS, address_sanitizer, archiver, bitcode_linker,
+             builtin_includes, c_driver, clang_tidy, clang_tidy_runner, compiler_runtime, cov,
+             cpp_abihdrs, cpp_abilib, cpp_driver, cpp_stdhdrs, cpp_stdlib, hip_libraries, hip_runtime,
+             leak_sanitizer, linker, linker_wrapper, llvm_project_deps, machine_code_tool,
+             memory_sanitizer, objcopy, offload_bundler, offload_packager, opt, profdata, profile,
+             rocm_device_libs, symbolizer, thread_sanitizer, undefined_behavior_sanitizer,
+             unwind_library)
`attributes` @@ -24,7 +24,6 @@ This file declares the `ll_toolchain` rule. | `LL_AMD_LIBRARIES` | Label, optional, defaults to None.

Link search paths for dependencies making use of AMD toolchains.

Affects the `hip_amdgpu` toolchain. | | `LL_CFLAGS` | Label, optional, defaults to None.

Arbitrary flags added to all compile actions. | | `LL_CUDA_DRIVER` | Label, optional, defaults to None.

The path to the CUDA driver.

Affects the `cuda_nvptx` and `hip_nvptx` toolchains. | -| `LL_CUDA_RUNTIME` | Label, optional, defaults to None.

The path to the CUDA runtime.

Affects the `cuda_nvptx` and `hip_nvptx` toolchains. | | `LL_CUDA_TOOLKIT` | Label, optional, defaults to None.

The path to the CUDA toolkit.

Affects the `cuda_nvptx` and `hip_nvptx` toolchains. | | `LL_DYNAMIC_LINKER` | Label, optional, defaults to None.

The linker from the glibc we compile and link against. | | `LL_LDFLAGS` | Label, optional, defaults to None.

Arbitrary flags added to all link actions. | diff --git a/ll/BUILD.bazel b/ll/BUILD.bazel index ce730671..a8244a83 100644 --- a/ll/BUILD.bazel +++ b/ll/BUILD.bazel @@ -94,7 +94,6 @@ string_flag( "LL_AMD_INCLUDES", "LL_AMD_LIBRARIES", "LL_CUDA_TOOLKIT", - "LL_CUDA_RUNTIME", "LL_CUDA_DRIVER", # Unset values default to an empty string. @@ -125,11 +124,6 @@ ll_toolchain( ":cuda_nvptx": ":LL_CUDA_DRIVER", "//conditions:default": "LL_UNSET", }), - LL_CUDA_RUNTIME = select({ - ":hip_nvptx": ":LL_CUDA_RUNTIME", - ":cuda_nvptx": ":LL_CUDA_RUNTIME", - "//conditions:default": "LL_UNSET", - }), LL_CUDA_TOOLKIT = select({ ":hip_nvptx": ":LL_CUDA_TOOLKIT", ":cuda_nvptx": ":LL_CUDA_TOOLKIT", diff --git a/ll/args.bzl b/ll/args.bzl index 4766470a..9a168d88 100644 --- a/ll/args.bzl +++ b/ll/args.bzl @@ -484,15 +484,16 @@ def link_executable_args(ctx, in_files, out_file, mode): "cuda_nvptx", "hip_nvptx", ]: - for location in [toolchain.LL_CUDA_TOOLKIT, toolchain.LL_CUDA_RUNTIME]: - if location != "": - args.add(location, format = "-rpath=%s/lib") - args.add(location, format = "-L%s/lib") - - # TODO: Not pretty. With the right nix packages we can probably - # do this more elegantly. - args.add(location, format = "-rpath=%s/lib/stubs") - args.add(location, format = "-L%s/lib/stubs") + # Both the CUDA driver and the CUDA toolkit contain `libcuda.so`. + # Link against `/lib/libcuda.so` at build time, but make + # sure that `/lib/libcuda.so` takes precedence at runtime. + if toolchain.LL_CUDA_DRIVER != "": + args.add(toolchain.LL_CUDA_DRIVER, format = "-rpath=%s/lib") + + if toolchain.LL_CUDA_TOOLKIT != "": + args.add(toolchain.LL_CUDA_TOOLKIT, format = "-rpath=%s/lib") + args.add(toolchain.LL_CUDA_TOOLKIT, format = "-L%s/lib") + args.add(toolchain.LL_CUDA_TOOLKIT, format = "-L%s/lib/stubs") args.add("-lcuda") args.add("-lcudart_static") diff --git a/ll/attributes.bzl b/ll/attributes.bzl index 2daa33dc..2ad865eb 100644 --- a/ll/attributes.bzl +++ b/ll/attributes.bzl @@ -589,12 +589,6 @@ LL_TOOLCHAIN_ATTRS = { Affects the `cuda_nvptx` and `hip_nvptx` toolchains. """, ), - "LL_CUDA_RUNTIME": attr.label( - doc = """The path to the CUDA runtime. - - Affects the `cuda_nvptx` and `hip_nvptx` toolchains. - """, - ), "LL_CUDA_DRIVER": attr.label( doc = """The path to the CUDA driver. diff --git a/ll/toolchain.bzl b/ll/toolchain.bzl index a28531b6..461afa98 100644 --- a/ll/toolchain.bzl +++ b/ll/toolchain.bzl @@ -70,7 +70,6 @@ def _ll_toolchain_impl(ctx): LL_AMD_INCLUDES = ctx.attr.LL_AMD_INCLUDES[BuildSettingInfo].value, LL_AMD_LIBRARIES = ctx.attr.LL_AMD_LIBRARIES[BuildSettingInfo].value, LL_CUDA_TOOLKIT = ctx.attr.LL_CUDA_TOOLKIT[BuildSettingInfo].value, - LL_CUDA_RUNTIME = ctx.attr.LL_CUDA_RUNTIME[BuildSettingInfo].value, LL_CUDA_DRIVER = ctx.attr.LL_CUDA_DRIVER[BuildSettingInfo].value, ), ] diff --git a/modules/defaultLlEnv.nix b/modules/defaultLlEnv.nix index 03c2e703..e52dcd29 100644 --- a/modules/defaultLlEnv.nix +++ b/modules/defaultLlEnv.nix @@ -74,6 +74,5 @@ in # Flags for CUDA dependencies. "LL_CUDA_TOOLKIT=${lib.strings.optionalString pkgs.config.cudaSupport "${cudatoolkit}"}" - "LL_CUDA_RUNTIME=${lib.strings.optionalString pkgs.config.cudaSupport "${cudatoolkit.lib}"}" "LL_CUDA_DRIVER=${lib.strings.optionalString pkgs.config.cudaSupport "${nvidia_x11}"}" ] diff --git a/modules/rules_ll.nix b/modules/rules_ll.nix index 1e68080a..c2bab93d 100644 --- a/modules/rules_ll.nix +++ b/modules/rules_ll.nix @@ -49,9 +49,7 @@ in - `LL_DYNAMIC_LINKER` - `LL_AMD_INCLUDES` - `LL_AMD_LIBRARIES` - - `LL_AMD_RPATHS` - `LL_CUDA_TOOLKIT` - - `LL_CUDA_RUNTIME` - `LL_CUDA_DRIVER` Attempting to set any other value will result in Bazel errors.