Implement hermetic cuda usage across TF projects.

PiperOrigin-RevId: 616865795
openxla · Mar 28, 2024 · 3359d97 · 3359d97
1 parent cf3ccda
commit 3359d97
Show file tree

Hide file tree

Showing 46 changed files with 2,271 additions and 238 deletions.
diff --git a/.bazelrc b/.bazelrc
@@ -232,7 +232,6 @@ build:cuda --@local_config_cuda//:enable_cuda
 # CUDA: This config refers to building CUDA op kernels with clang.
 build:cuda_clang --config=cuda
 # Enable TensorRT optimizations https://developer.nvidia.com/tensorrt
-build:cuda_clang --config=tensorrt
 build:cuda_clang --action_env=TF_CUDA_CLANG="1"
 build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
 # Select supported compute capabilities (supported graphics cards).
@@ -247,12 +246,10 @@ build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_8
 
 # Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
 build:cuda_clang_official --config=cuda_clang
-build:cuda_clang_official --action_env=TF_CUDA_VERSION="12"
+build:cuda_clang_official --action_env=TF_CUDA_VERSION="12.3"
 build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8"
-build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.3"
 build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
 build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang"
-build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
 build:cuda_clang_official --crosstool_top="@sigbuild-r2.16-clang_config_cuda//crosstool:toolchain"
 
 # Build with nvcc for CUDA and clang for host
@@ -533,9 +530,7 @@ build:rbe_linux_cuda --config=rbe_linux_cpu
 # For Remote build execution -- GPU configuration
 build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
 build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.16-clang_config_cuda"
-build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.16-clang_config_tensorrt"
 build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.16-clang_config_nccl"
-test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
 
 build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda
 build:rbe_linux_cuda_nvcc --config=nvcc_clang
@@ -629,7 +624,6 @@ build:release_cpu_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-17/bin/cla
 # Test-related settings below this point.
 test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true
 test:release_linux_base --local_test_jobs=HOST_CPUS
-test:release_linux_base --test_env=LD_LIBRARY_PATH
 # Give only the list of failed tests at the end of the log
 test:release_linux_base --test_summary=short
 
@@ -641,7 +635,6 @@ build:release_gpu_linux --config=release_cpu_linux
 # Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
 # Note that linux cpu and cuda builds share the same toolchain now.
 build:release_gpu_linux --config=cuda_clang_official
-test:release_gpu_linux --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
 # Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think
 test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute
 
@@ -672,9 +665,6 @@ build:unsupported_gpu_linux --config=unsupported_cpu_linux
 build:unsupported_gpu_linux --action_env=TF_CUDA_VERSION="11"
 build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8"
 build:unsupported_gpu_linux --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80"
-build:unsupported_gpu_linux --config=tensorrt
-build:unsupported_gpu_linux --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-11.2"
-build:unsupported_gpu_linux --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.1/lib64:/usr/local/tensorrt/lib"
 build:unsupported_gpu_linux --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
 build:unsupported_gpu_linux [email protected]_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain
 

diff --git a/third_party/repo.bzl b/third_party/repo.bzl
@@ -158,3 +158,72 @@ tf_vendored = repository_rule(
         "relpath": attr.string(),
     },
 )
+
+def _get_archive_name(url, archive_suffix = ".tar.xz"):
+    last_slash_index = url.rfind("/")
+    return url[last_slash_index + 1:-len(archive_suffix)]
+
+def _cuda_http_archive_impl(repository_ctx):
+    cuda_version = _get_env_var(repository_ctx, "TF_CUDA_VERSION")
+    archive_version = ""
+    if cuda_version == "12":
+        cuda_version = "12.3"
+    if cuda_version:
+        # Download archive only when GPU config is used.
+        arch_dict = {"amd64": "x86_64", "aarch64": "sbsa"}
+        arch = repository_ctx.os.arch
+        archive_arch_suffix = arch_dict[arch]
+        archive_version = repository_ctx.attr.version_dict[cuda_version]
+        sha256 = repository_ctx.attr.sha256_dict["{cuda_version}-{arch}".format(cuda_version = cuda_version, arch = arch)]
+
+        # The format of the url_template should be the following:
+        # https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/linux-{arch}/cuda_nvtx-linux-{arch}-{version}-archive.tar.xz
+        url = repository_ctx.attr.url_template.format(arch = archive_arch_suffix, version = archive_version)
+
+        archive_name = _get_archive_name(url, "." + repository_ctx.attr.type)
+
+        repository_ctx.download(
+            url = tf_mirror_urls(url),
+            output = archive_name + "." + ("zip" if repository_ctx.attr.type == "whl" else repository_ctx.attr.type),
+            sha256 = sha256,
+        )
+        repository_ctx.extract(
+            archive = archive_name + "." + ("zip" if repository_ctx.attr.type == "whl" else repository_ctx.attr.type),
+            stripPrefix = repository_ctx.attr.strip_prefix if repository_ctx.attr.strip_prefix else archive_name,
+        )
+    if repository_ctx.attr.build_template:
+        version = archive_version.split(".")[0] if archive_version else ""
+        repository_ctx.file("version.txt", version)
+        repository_ctx.template(
+            "BUILD",
+            repository_ctx.attr.build_template,
+            {"%{version}": version},
+        )
+    else:
+        repository_ctx.file(
+            "BUILD",
+            repository_ctx.read(repository_ctx.attr.build_file),
+        )
+
+_cuda_http_archive = repository_rule(
+    implementation = _cuda_http_archive_impl,
+    attrs = {
+        "sha256_dict": attr.string_dict(mandatory = True),
+        "version_dict": attr.string_dict(mandatory = True),
+        "url_template": attr.string(mandatory = True),
+        "type": attr.string(default = "tar.xz"),
+        "build_template": attr.label(),
+        "build_file": attr.label(),
+        "strip_prefix": attr.string(),
+    },
+    environ = ["TF_CUDA_VERSION"],
+)
+
+def cuda_http_archive(name, sha256_dict, version_dict, url_template, **kwargs):
+    _cuda_http_archive(
+        name = name,
+        sha256_dict = sha256_dict,
+        url_template = url_template,
+        version_dict = version_dict,
+        **kwargs
+    )
diff --git a/third_party/tsl/.bazelrc b/third_party/tsl/.bazelrc
@@ -232,7 +232,6 @@ build:cuda --@local_config_cuda//:enable_cuda
 # CUDA: This config refers to building CUDA op kernels with clang.
 build:cuda_clang --config=cuda
 # Enable TensorRT optimizations https://developer.nvidia.com/tensorrt
-build:cuda_clang --config=tensorrt
 build:cuda_clang --action_env=TF_CUDA_CLANG="1"
 build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
 # Select supported compute capabilities (supported graphics cards).
@@ -247,12 +246,10 @@ build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_8
 
 # Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
 build:cuda_clang_official --config=cuda_clang
-build:cuda_clang_official --action_env=TF_CUDA_VERSION="12"
+build:cuda_clang_official --action_env=TF_CUDA_VERSION="12.3"
 build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8"
-build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.3"
 build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
 build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang"
-build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
 build:cuda_clang_official --crosstool_top="@sigbuild-r2.16-clang_config_cuda//crosstool:toolchain"
 
 # Build with nvcc for CUDA and clang for host
@@ -533,9 +530,7 @@ build:rbe_linux_cuda --config=rbe_linux_cpu
 # For Remote build execution -- GPU configuration
 build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
 build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.16-clang_config_cuda"
-build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.16-clang_config_tensorrt"
 build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.16-clang_config_nccl"
-test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
 
 build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda
 build:rbe_linux_cuda_nvcc --config=nvcc_clang
@@ -629,7 +624,6 @@ build:release_cpu_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-17/bin/cla
 # Test-related settings below this point.
 test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true
 test:release_linux_base --local_test_jobs=HOST_CPUS
-test:release_linux_base --test_env=LD_LIBRARY_PATH
 # Give only the list of failed tests at the end of the log
 test:release_linux_base --test_summary=short
 
@@ -641,7 +635,6 @@ build:release_gpu_linux --config=release_cpu_linux
 # Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
 # Note that linux cpu and cuda builds share the same toolchain now.
 build:release_gpu_linux --config=cuda_clang_official
-test:release_gpu_linux --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
 # Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think
 test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute
 
@@ -672,9 +665,6 @@ build:unsupported_gpu_linux --config=unsupported_cpu_linux
 build:unsupported_gpu_linux --action_env=TF_CUDA_VERSION="11"
 build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8"
 build:unsupported_gpu_linux --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80"
-build:unsupported_gpu_linux --config=tensorrt
-build:unsupported_gpu_linux --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-11.2"
-build:unsupported_gpu_linux --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.1/lib64:/usr/local/tensorrt/lib"
 build:unsupported_gpu_linux --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
 build:unsupported_gpu_linux [email protected]_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain
 

diff --git a/third_party/tsl/opensource_only.files b/third_party/tsl/opensource_only.files
@@ -21,22 +21,40 @@ third_party/git/BUILD.tpl:
 third_party/git/BUILD:
 third_party/git/git_configure.bzl:
 third_party/gpus/BUILD:
+third_party/gpus/compiler_common_tools.bzl:
 third_party/gpus/crosstool/BUILD.rocm.tpl:
 third_party/gpus/crosstool/BUILD.tpl:
 third_party/gpus/crosstool/BUILD:
 third_party/gpus/crosstool/LICENSE:
 third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl:
 third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl:
 third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl:
+third_party/gpus/cuda/BUILD.hermetic.tpl:
 third_party/gpus/cuda/BUILD.tpl:
 third_party/gpus/cuda/BUILD.windows.tpl:
 third_party/gpus/cuda/BUILD:
 third_party/gpus/cuda/LICENSE:
 third_party/gpus/cuda/build_defs.bzl.tpl:
+third_party/gpus/cuda/cuda_cccl.BUILD:
 third_party/gpus/cuda/cuda_config.h.tpl:
 third_party/gpus/cuda/cuda_config.py.tpl:
+third_party/gpus/cuda/cuda_cublas.BUILD.tpl:
+third_party/gpus/cuda/cuda_cudart.BUILD.tpl:
+third_party/gpus/cuda/cuda_cudnn.BUILD.tpl:
+third_party/gpus/cuda/cuda_cufft.BUILD.tpl:
+third_party/gpus/cuda/cuda_cupti.BUILD.tpl:
+third_party/gpus/cuda/cuda_curand.BUILD.tpl:
+third_party/gpus/cuda/cuda_cusolver.BUILD.tpl:
+third_party/gpus/cuda/cuda_cusparse.BUILD.tpl:
+third_party/gpus/cuda/cuda_nccl.BUILD:
+third_party/gpus/cuda/cuda_nvcc.BUILD:
+third_party/gpus/cuda/cuda_nvjitlink.BUILD.tpl:
+third_party/gpus/cuda/cuda_nvml.BUILD:
+third_party/gpus/cuda/cuda_nvprune.BUILD:
+third_party/gpus/cuda/cuda_nvtx.BUILD:
 third_party/gpus/cuda_configure.bzl:
 third_party/gpus/find_cuda_config:.py
+third_party/gpus/hermetic_cuda_configure.bzl:
 third_party/gpus/rocm/BUILD.tpl:
 third_party/gpus/rocm/BUILD:
 third_party/gpus/rocm/build_defs.bzl.tpl:
@@ -61,6 +79,7 @@ third_party/nccl/archive.BUILD:
 third_party/nccl/archive.patch:
 third_party/nccl/build_defs.bzl.tpl:
 third_party/nccl/generated_names.bzl.tpl:
+third_party/nccl/hermetic_nccl_configure.bzl:
 third_party/nccl/nccl_configure.bzl:
 third_party/nccl/system.BUILD.tpl:
 third_party/nvtx/BUILD: