diff --git a/rvs/conf/MI210/babel.conf b/rvs/conf/MI210/babel.conf new file mode 100644 index 00000000..6602ad93 --- /dev/null +++ b/rvs/conf/MI210/babel.conf @@ -0,0 +1,51 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# BABEL test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by white space (e.g.: device: 50599 3245) +# Set parallel execution to false +# Set buffer size to reflect the buffer you want to test +# Set run count to 1 (test will run once) +# + +actions: +- name: babel-256MiB + device: all + module: babel # Name of the module + parallel: true # Parallel true or false + count: 1 # Number of times you want to repeat the test from the begin ( A clean start every time) + num_iter: 5000 # Number of iterations, this many kernels are launched simultaneosuly and stresses the system + array_size: 268435456 # Buffer size the test operates, this is 256 MiB + test_type: 1 # type of test, 1: Float, 2: Double, 3: Triad float, 4: Triad double + mibibytes: true # mibibytes (MiB) or megabytes (MB), true for MiB + o/p_csv: false # o/p as csv file + subtest: 5 # 1: copy 2: copy+mul 3: copy+mul+add 4: copy+mul+add+traid 5: copy+mul+add+traid+dot + dwords_per_lane: 4 # Number of dwords per lane + chunks_per_block: 4 # Number of chunks per block + diff --git a/rvs/conf/MI210/gpup_single.conf b/rvs/conf/MI210/gpup_single.conf new file mode 100644 index 00000000..d0f9386f --- /dev/null +++ b/rvs/conf/MI210/gpup_single.conf @@ -0,0 +1,174 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# Run test with testscript or binary: +# +# Using Testscript - +# cd /opt/rocm/share/rocm-validation-suite/testscripts +# sudo ./gpup.new.sh +# +# Using Binary - +# cd /opt/rocm/share/rocm-validation-suite/conf +# cd /opt/rocm/bin +# sudo ./rvs -c /opt/rocm/share/rocm-validation-suite/conf/gpup_single.conf +# +# Note: Paths may vary with the ROCm version or ROCm installation path. + +# GPUP test #1 +# +# Preconditions: +# all AMD compatible GPUs +# all types of devices +# all gpu properties, all io_links properties +# +# Expected result: +# Test passes with displaying all properties values for any GPUs + +actions: +- name: RVS-GPUP-TC1 + device: all + module: gpup + properties: + all: + io_links-properties: + all: + +# GPUP test #2 +# +# Preconditions: +# all AMD compatible GPUs +# all types of devices +# no regular expressions +# only a subset of gpu properties, only a subset of io_link properties +# +# Expected result: +# Test passes with displaying subsets of properties and io_link properties values for any GPUs + +- name: RVS-GPUP-TC2 + device: all + module: gpup + properties: + simd_count: + mem_banks_count: + io_links_count: + vendor_id: + location_id: + max_engine_clk_ccompute: + io_links-properties: + version_major: + type: + version_major: + version_minor: + node_from: + node_to: + recommended_transfer_size: + flags: + +# GPUP test #3 +# +# Preconditions: +# only a subset of AMD compatible GPUs (device filtering) +# all types of devices +# all gpu properties, all io_link properties +# +# Expected result: +# Test passes with displaying all properties and io_link properties values for subset of GPUs +# +# Note: +# Testing specific device, if device numbers are changed in system it should be changed in the test + +- name: RVS-GPUP-TC3 + device: all + module: gpup + properties: + all: + io_links-properties: + all: + +# GPUP test #4 +# +# Preconditions: +# all AMD compatible GPUs +# a given device type (deviceid filtering), this must be filled based on deviceid in sysfs/ ./rvs -g. +# Default is 0=> no filtering +# all gpu properties, all io_link properties +# +# Expected result: +# Test passes with displaying all properties and io_link properties values for all GPUs and given deviceid + +- name: RVS-GPUP-TC4 + device: all + module: gpup + deviceid: 0 + properties: + all: + io_links-properties: + all: + +# GPUP test #5 +# +# Preconditions: +# only a subset of AMD compatible GPUs (device filtering) +# a given device type (deviceid filtering) this must be filled based on deviceid in sysfs/ ./rvs -g +# Default is 0=> no filtering +# all gpu properties, all io_link properties +# +# Expected result: +# Test passes with displaying all properties and io_link properties values for subset of GPUs and given deviceid +# +# Note: +# Testing specific device, if device numbers are changed in system it should be changed in the test + +- name: RVS-GPUP-TC5 + device: all + module: gpup + deviceid: 0 + properties: + all: + io_links-properties: + all: + +# GPUP test #6 +# +# Preconditions: +# only a subset of AMD compatible GPUs (device filtering) +# a given device type (deviceid filtering) this must be filled based on deviceid in sysfs/ ./rvs -g +# Default is 0=> no filtering +# only a subset of gpu properties, only a subset of io_link properties +# +# Expected result: +# Test passes with displaying subset of properties and io_link properties values for subset of GPUs and given deviceid +# +# Note: +# Testing specific device, if device numbers are changed in system it should be changed in the test + +- name: RVS-GPUP-TC6 + device: all + module: gpup + deviceid: 0 + properties: + mem_banks_count: + io_links-properties: + version_major: diff --git a/rvs/conf/MI210/gst_single.conf b/rvs/conf/MI210/gst_single.conf new file mode 100644 index 00000000..b2fc16ec --- /dev/null +++ b/rvs/conf/MI210/gst_single.conf @@ -0,0 +1,130 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + + + +# GST test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by white space +# Set parallel execution to false +# Set matrix_size to 8640 (for Vega 10 cards). For Vega 20, the recommended matrix_size is 8640 +# Set run count to 2 (each test will run twice) +# Set copy_matrix to false (the matrices will be copied to GPUs only once) +# +# Run test with: +# cd bin +# sudo ./rvs -c conf/gst_1.conf -d 3 +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU achieves 5000 gflops +# in maximum 7 seconds and then the GPU sustains the gflops +# for the rest of the test duration (total duration is 18 seconds). +# A single Gflops violation (with a 7% tolerance) is allowed. +# FALSE otherwise + +actions: +- name: gpustress-41000-fp32-false + device: all + module: gst + parallel: false + count: 1 + duration: 10000 + copy_matrix: false + target_stress: 41000 + matrix_size_a: 28000 + matrix_size_b: 28000 + matrix_size_c: 28000 + data_type: fp32_r + lda: 28000 + ldb: 28000 + ldc: 28000 + blas_source: hipblaslt + +- name: gpustress-3000-dgemm-false + device: all + module: gst + parallel: false + count: 1 + #hot_calls: 1000 + duration: 15000 + copy_matrix: false + target_stress: 30000 + matrix_size_a: 8192 + matrix_size_b: 8192 + matrix_size_c: 8192 + matrix_init: trig + ops_type: dgemm + lda: 8192 + ldb: 8192 + ldc: 8192 + +- name: gst-8096-150000-fp16 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + copy_matrix: false + target_stress: 150000 + matrix_size_a: 8096 + matrix_size_b: 8096 + matrix_size_c: 8096 + data_type: fp16_r + lda: 8096 + ldb: 8096 + ldc: 8096 + ldd: 8096 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + blas_source: hipblaslt + +- name: gst-1215Tflops-4K4K8K-rand-fp8 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 500 + copy_matrix: false + target_stress: 160000 + matrix_size_a: 8192 + matrix_size_b: 8192 + matrix_size_c: 8192 + matrix_init: rand + data_type: i8_r + lda: 8192 + ldb: 8192 + ldc: 8192 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + blas_source: hipblaslt + diff --git a/rvs/conf/MI210/iet_single.conf b/rvs/conf/MI210/iet_single.conf new file mode 100644 index 00000000..5f279ebc --- /dev/null +++ b/rvs/conf/MI210/iet_single.conf @@ -0,0 +1,146 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: +- name: action_1 + device: all + module: iet + parallel: true + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 700 + log_interval: 700 + max_violations: 1 + target_power: 300 + tolerance: 0.06 + matrix_size: 8640 + ops_type: dgemm + +- name: action_2 + device: all + module: iet + parallel: true + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 1500 + log_interval: 2000 + max_violations: 1 + target_power: 300 + tolerance: 0.2 + matrix_size: 8640 + ops_type: dgemm + +- name: action_3 + device: all + module: iet + parallel: false + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 500 + log_interval: 500 + max_violations: 1 + target_power: 300 + tolerance: 0.1 + matrix_size: 8640 + ops_type: dgemm + +# IET test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# Set parallel execution to true +# Set matrix_size to 8640 (for Vega 10 cards). For Vega 20, the recommended matrix_size is 8640 +# Set run count to 2 (each test will run twice) +# +# Run test with: +# cd bin +# sudo ./rvs -c conf/iet4.conf -d 3 +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU power reaches 150W +# in maximum 5 seconds and then the GPU sustains the same power +# for the rest of the test duration (total duration is 10 seconds). +# A single power violation (with a 10% tolerance) is allowed. +# FALSE otherwise + +- name: action_4 + device: all + module: iet + parallel: true + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 500 + log_interval: 500 + max_violations: 1 + target_power: 300 + tolerance: 0.1 + matrix_size: 8640 + ops_type: sgemm + +# IET test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# Set parallel execution to false +# Set matrix_size to 8640 (for Vega 10 cards). For Vega 20, the recommended matrix_size is 8640 +# Set run count to 2 (each test will run twice) +# +# Run test with: +# cd bin +# sudo ./rvs -c conf/iet5.conf -d 3 +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU power reaches 50W +# in maximum 5 seconds and then the GPU sustains the same power +# for the rest of the test duration (total duration is 10 seconds). +# A single power violation (with a 10% tolerance) is allowed. +# FALSE otherwise + +- name: action_5 + device: all + module: iet + parallel: false + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 1500 + log_interval: 2000 + max_violations: 1 + target_power: 300 + tolerance: 0.1 + matrix_size: 8640 + ops_type: sgemm + diff --git a/rvs/conf/MI210/pbqt_single.conf b/rvs/conf/MI210/pbqt_single.conf new file mode 100644 index 00000000..d89c152d --- /dev/null +++ b/rvs/conf/MI210/pbqt_single.conf @@ -0,0 +1,182 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: +- name: action_1 + device: all + module: pbqt + log_interval: 800 + duration: 5000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + block_size: 1000000 2000000 10000000 + device_id: all + +- name: action_2 + device: all + module: pbqt + log_interval: 1000 + count: 3 + duration: 10000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + device_id: all + +- name: action_3 + device: all + module: pbqt + log_interval: 800 + duration: 4000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + device_id: all + +- name: action_4 + device: all + module: pbqt + log_interval: 1000 + duration: 5000 + count: 1 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + device_id: all + +- name: action_5 + device: all + module: pbqt + log_interval: 800 + duration: 4000 + count: 1 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + device_id: all + +- name: action_6 + device: all + module: pbqt + log_interval: 800 + duration: 8000 + count: 1 + peers: all + test_bandwidth: true + bidirectional: false + parallel: false + device_id: all + +- name: action_7 + device: all + module: pbqt + peers: all + count: 1 + test_bandwidth: false + device_id: all + +- name: action_8 + device: all + module: pbqt + peers: all + test_bandwidth: true + bidirectional: true + parallel : true + device_id: all + +- name: action_9 + device: all + module: pbqt + log_interval: 500 + duration: 1000 + peers: all + test_bandwidth: true + bidirectional: false + parallel: true + device_id: all + +- name: action_10 + device: all + module: pbqt + log_interval: 500 + duration: 1000 + peers: all + peer_device_id: all + test_bandwidth: true + bidirectional: false + parallel: true + +- name: action_11 + device: all + module: pbqt + log_interval: 0 + duration: 10000 + peers: all + peer_device_id: all + test_bandwidth: true + bidirectional: true + parallel: false + device_id: all + +- name: action_12 + device: all + module: pbqt + log_interval: 0 + duration: 1000 + count: 3 + wait: 1000 + peers: all + peer_device_id: all + test_bandwidth: true + bidirectional: true + parallel: true + +- name: action_13 + device: all + module: pbqt + log_interval: 1000 + duration: 10000 + peers: all + device_id: all + peer_device_id: all + test_bandwidth: true + bidirectional: true + parallel: true + +- name: action_14 + device: all + module: pbqt + log_interval: 500 + duration: 10000 + peers: all + test_bandwidth: true + bidirectional: true + device_id: all diff --git a/rvs/conf/MI210/pebb_single.conf b/rvs/conf/MI210/pebb_single.conf new file mode 100644 index 00000000..787f286e --- /dev/null +++ b/rvs/conf/MI210/pebb_single.conf @@ -0,0 +1,236 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# PEBB test #1 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host to device +# +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test1.conf -d 3 +# + + +actions: +- name: h2d-sequential-51MB + device: all + module: pebb + log_interval: 800 + duration: 50000 + device_to_host: true + host_to_device: true + parallel: true + block_size: 51200000 + link_type: 2 # PCIe + + +# PEBB test #2 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. device to host +# +# Run test with : +# cd bin +# ./rvs -c conf/pebb_test2.conf -d 3 +# + + +- name: d2h-sequential-51MB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + block_size: 51200000 + link_type: 2 # PCIe + + + +# PEBB test #3 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. bidirectional +# +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test3.conf -d 3 +# + +- name: h2d-d2h-sequential-51MB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + block_size: 51200000 + link_type: 2 # PCIe + + + + +# PEBB test #4 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host to device +# 4. parallel transfers +# 5. random block sizes +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test4.conf -d 3 + +- name: h2d-parallel-xMB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + link_type: 2 # PCIe + + +# PEBB test #5 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. device to host +# 4. parallel transfers +# 5. random block sizes +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test5.conf -d 3 + +- name: d2h-parallel-xMB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + link_type: 2 # PCIe + + + +# PEBB test #6 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. bidirectional +# 4. parallel transfers +# 5. random block sizes +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test6.conf -d 3 + +- name: h2d-d2h-xMB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + link_type: 2 # PCIe + + +# PEBB test #7 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host to device +# 4. parallel transfers +# 5. back-to-back 51MB +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test7.conf -d 3 + +- name: h2d-b2b-51MB + device: all + module: pebb + log_interval: 800 + duration: 34000 + device_to_host: false + host_to_device: true + b2b_block_size: 51200 + parallel: false + link_type: 2 # PCIe + + +# PEBB test #8 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host-to-device and device-to-host +# 4. parallel back-to-back transfers +# 5. back-to-back 51MB +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test8.conf -d 3 + +- name: d2h-b2b-51MB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + b2b_block_size: 51200 + parallel: true + link_type: 2 # PCIe + +# PEBB test #9 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. bidirectional +# 4. PCIe ponly +# 5. parallel back-to-back transfers +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test9.conf -d 3 + +- name: h2d-d2h-b2b-51MB + device: all + module: pebb + log_interval: 800 + duration: 34000 + device_to_host: true + host_to_device: true + b2b_block_size: 51200 + parallel: false + link_type: 2 # PCIe diff --git a/rvs/conf/MI210/tst_single.conf b/rvs/conf/MI210/tst_single.conf new file mode 100644 index 00000000..e408c565 --- /dev/null +++ b/rvs/conf/MI210/tst_single.conf @@ -0,0 +1,91 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# TST test +# +# Preconditions: +# Set device to all and execution as sequential. +# Workload set as dgemm operations with matrix size as 8640. +# Throttle temperature set as 100 degree celsius. +# +# Run test with: +# ./rvs -c conf/tst.conf -d 3 +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU junction temperature +# reaches the target temperature. If it reaches the throttle temperature +# during test duration is also monitored. +# +actions: +- name: action_1 + device: all + device_index: all + module: tst + parallel: false + count: 1 + wait: 100 + duration: 30000 + ramp_interval: 10000 + sample_interval: 2000 + log_interval: 2000 + max_violations: 1 + throttle_temp: 100 + target_temp: 50 + tolerance: 0.06 + matrix_size: 8640 + ops_type: dgemm + +# TST test +# +# Preconditions: +# Set device to all and execution in parallel. +# Workload set as dgemm operations with matrix size as 8640. +# Throttle temperature set as 100 degree celsius. +# +# Run test with: +# ./rvs -c conf/tst.conf -d 3 +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU junction temperature +# reaches the target temperature. If it reaches the throttle temperature +# during test duration is also monitored. +# +- name: action_2 + device: all + device_index: all + module: tst + parallel: true + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 700 + log_interval: 700 + target_temp: 50 + throttle_temp: 100 + tolerance: 0.06 + matrix_size: 8640 + ops_type: sgemm +