Skip to content

Commit

Permalink
Introduce rocm-only tag and remove if_rocm_is_configured
Browse files Browse the repository at this point in the history
This replaces all `if_rocm_is_configured` guards in `stream_executor/rocm/...` with a filtering tag `rocm-only`. The CUDA build on the CI gets adjusted to skip those targets.

This uncovered some additional problems that get fixed as well:

- A wrong library name for the hipfft library in the Bazel CUDA configuration
- A wrong test case in the RocmVersionParser test that so far has not been running anywhere.
- Missing tags for the platform alias targets in `stream_executor/BUILD`

PiperOrigin-RevId: 678548961
  • Loading branch information
beckerhe authored and Google-ML-Automation committed Sep 25, 2024
1 parent 9788cfa commit 3b050e2
Show file tree
Hide file tree
Showing 10 changed files with 373 additions and 186 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/bazel_dependency_violations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
dependency-violations:
strategy:
matrix:
tag: [gpu, cuda-only]
tag: [gpu, cuda-only, rocm-only]
name: no-${{ matrix.tag }}-targets-in-cpu-build
runs-on: ubuntu-22.04
defaults:
Expand Down
4 changes: 2 additions & 2 deletions build_tools/ci/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,9 @@ def nvidia_gpu_build_with_compute_capability(
image_url=_DEFAULT_IMAGE,
target_patterns=_XLA_DEFAULT_TARGET_PATTERNS,
configs=configs,
test_tag_filters=("-no_oss", "requires-gpu-nvidia", "gpu")
test_tag_filters=("-no_oss", "requires-gpu-nvidia", "gpu", "-rocm-only")
+ extra_gpu_tags,
build_tag_filters=("-no_oss", "requires-gpu-nvidia", "gpu"),
build_tag_filters=("-no_oss", "requires-gpu-nvidia", "gpu", "-rocm-only"),
options={
"run_under": "//tools/ci_build/gpu_build:parallel_gpu_execute",
"repo_env": f"TF_CUDA_COMPUTE_CAPABILITIES={compute_capability/10}",
Expand Down
2 changes: 1 addition & 1 deletion build_tools/ci/golden_commands.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ $KOKORO_ARTIFACTS_DIR/github/xla/.kokoro/generate_index_html.sh index.html
nvidia-smi
parallel --ungroup --retries 3 --delay 15 docker pull ::: gcr.io/tensorflow-sigs/build:latest-python3.11
docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/xla gcr.io/tensorflow-sigs/build:latest-python3.11 bash
docker exec xla_ci bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-amd --config=warnings --config=rbe_linux_cuda_nvcc --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --@cuda_driver//:enable_forward_compatibility=true --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @tsl//tsl/...
docker exec xla_ci bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-amd --config=warnings --config=rbe_linux_cuda_nvcc --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --@cuda_driver//:enable_forward_compatibility=true --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @tsl//tsl/...
docker exec xla_ci bazel analyze-profile profile.json.gz
docker stop xla_ci
# END BuildType.GPU
Expand Down
8 changes: 8 additions & 0 deletions build_tools/dependencies/aspects.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,11 @@ validate_cuda_only_tag = aspect(
implementation = _cuda_only_tag_violation_aspect_impl,
attr_aspects = ["deps"],
)

def _rocm_only_tag_violation_aspect_impl(target, ctx):
return _dependency_violation_aspect_impl(target, ctx, "rocm-only")

validate_rocm_only_tag = aspect(
implementation = _rocm_only_tag_violation_aspect_impl,
attr_aspects = ["deps"],
)
1 change: 1 addition & 0 deletions build_tools/lint/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
"gpu": "Catch-all tag for targets that should be built/tested on GPU CI",
"cpu": "Catch-all tag for targets that should be built/tested on CPU CI.",
"cuda-only": "Targets that require the CUDA backend to be enabled.",
"rocm-only": "Targets that require the ROCm backend to be enabled.",
# Below tags are generated by `xla_test`.
"broken": "Test will be marked with other tags to disable in `xla_test`.",
"xla_interpreter": "Uses interpreter backend.",
Expand Down
2 changes: 1 addition & 1 deletion third_party/tsl/third_party/gpus/rocm_configure.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def _create_dummy_repository(repository_ctx):
"%{hipblas_lib}": _lib_name("hipblas"),
"%{miopen_lib}": _lib_name("miopen"),
"%{rccl_lib}": _lib_name("rccl"),
"%{hipfft_or_rocfft}": _lib_name("hipfft"),
"%{hipfft_or_rocfft}": "hipfft",
"%{hipfft_or_rocfft_lib}": _lib_name("hipfft"),
"%{hiprand_lib}": _lib_name("hiprand"),
"%{hipsparse_lib}": _lib_name("hipsparse"),
Expand Down
28 changes: 19 additions & 9 deletions xla/backends/profiler/gpu/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,6 @@ tsl_gpu_library(
name = "device_tracer",
srcs = tf_additional_device_tracer_srcs(),
copts = tf_profiler_copts() + tsl_copts(),
cuda_deps = [
":cupti_buffer_events",
":cupti_collector",
":cupti_tracer",
":cupti_wrapper",
":rocm_collector",
":rocm_tracer",
],
deps = [
":cupti_utils",
"//xla/tsl/util:env_var",
Expand All @@ -47,7 +39,17 @@ tsl_gpu_library(
"@tsl//tsl/profiler/lib:profiler_interface",
"@tsl//tsl/profiler/protobuf:xplane_proto_cc",
"@tsl//tsl/profiler/utils:time_utils",
],
] + if_cuda([
# keep sorted
":cupti_buffer_events",
":cupti_collector",
":cupti_tracer",
":cupti_wrapper",
]) + if_rocm([
# keep sorted
":rocm_collector",
":rocm_tracer",
]),
alwayslink = 1,
)

Expand Down Expand Up @@ -218,6 +220,10 @@ tsl_gpu_library(
srcs = if_rocm(["rocm_collector.cc"]),
hdrs = if_rocm(["rocm_collector.h"]),
copts = tf_profiler_copts() + tsl_copts(),
tags = [
"gpu",
"rocm-only",
],
visibility = ["//visibility:public"],
deps = [
"//xla/stream_executor/rocm:roctracer_wrapper",
Expand Down Expand Up @@ -253,6 +259,10 @@ tsl_gpu_library(
srcs = if_rocm(["rocm_tracer.cc"]),
hdrs = if_rocm(["rocm_tracer.h"]),
copts = tf_profiler_copts() + tsl_copts(),
tags = [
"gpu",
"rocm-only",
],
visibility = ["//visibility:public"],
deps = [
":rocm_collector",
Expand Down
11 changes: 11 additions & 0 deletions xla/stream_executor/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -925,14 +925,25 @@ xla_cc_test(
alias(
name = "cuda_platform",
actual = "//xla/stream_executor/cuda:all_runtime",
tags = [
"cuda-only",
"gpu",
],
)

alias(
name = "rocm_platform",
actual = "//xla/stream_executor/rocm:all_runtime",
tags = [
"gpu",
"rocm-only",
],
)

alias(
name = "sycl_platform",
actual = "//xla/stream_executor/sycl:all_runtime",
tags = [
"gpu",
],
)
Loading

0 comments on commit 3b050e2

Please sign in to comment.