From 21b4d85d0982d738361dc82ad381cc884a2279dc Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Tue, 16 Apr 2024 17:01:02 +0200 Subject: [PATCH 1/5] Update the IOR check to fetch source from GitHub Signed-off-by: Theofilos Manitaras --- checks/system/io/ior_check.py | 84 ++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 26 deletions(-) diff --git a/checks/system/io/ior_check.py b/checks/system/io/ior_check.py index ad571c919..04bc21b75 100644 --- a/checks/system/io/ior_check.py +++ b/checks/system/io/ior_check.py @@ -1,4 +1,4 @@ -# Copyright 2016-2023 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# Copyright 2016-2024 Swiss National Supercomputing Centre (CSCS/ETH Zurich) # ReFrame Project Developers. See the top-level LICENSE file for details. # # SPDX-License-Identifier: BSD-3-Clause @@ -10,7 +10,47 @@ import reframe.utility.sanity as sn -class IorCheck(rfm.RegressionTest): +class fetch_ior_benchmarks(rfm.RunOnlyRegressionTest): + descr = 'Fetch IOR benchmarks' + version = variable(str, value='4.0.0') + executable = 'wget' + executable_opts = [ + f'https://github.com/hpc/ior/releases/download/{version}/ior-{version}.tar.gz' # noqa: E501 + ] + + @sanity_function + def validate_download(self): + return sn.assert_eq(self.job.exitcode, 0) + + +class build_ior_benchmarks(rfm.CompileOnlyRegressionTest): + descr = 'Build IOR benchmarks' + build_system = 'Autotools' + build_prefix = variable(str) + ior_benchmarks = fixture(fetch_ior_benchmarks, scope='session') + + # Build on the remote system for consistency + build_locally = False + + @run_before('compile') + def prepare_build(self): + tarball = f'ior-{self.ior_benchmarks.version}.tar.gz' + self.build_prefix = tarball[:-7] + fullpath = os.path.join(self.ior_benchmarks.stagedir, tarball) + self.prebuild_cmds = [ + f'cp {fullpath} {self.stagedir}', + f'tar xzf {tarball}', + f'cd {self.build_prefix}' + ] + + @sanity_function + def validate_build(self): + # If compilation fails, the test would fail in any case, so nothing to + # further validate here. + return True + + +class IorCheck(rfm.RunOnlyRegressionTest): base_dir = parameter(['/capstor/scratch/cscs', '/scratch/snx3000tds', '/scratch/snx3000', @@ -18,8 +58,9 @@ class IorCheck(rfm.RegressionTest): '/users']) username = getpass.getuser() time_limit = '5m' - maintainers = ['SO', 'GLR'] - tags = {'ops', 'production', 'external-resources'} + ior_binaries = fixture(build_ior_benchmarks, scope='environment') + maintainers = ['SO', 'TM'] + tags = {'ops', 'production'} @run_after('init') def set_description(self): @@ -108,20 +149,11 @@ def set_valid_systems(self): self.num_tasks = self.fs[self.base_dir][cur_sys].get('num_tasks', 1) self.num_tasks_per_node = tpn - self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'IOR') - @run_after('init') def load_cray_module(self): if self.current_system.name in ['eiger', 'pilatus']: self.modules = ['cray'] - @run_before('compile') - def prepare_build(self): - self.build_system = 'Make' - self.build_system.options = ['posix', 'mpiio'] - self.build_system.max_concurrency = 1 - self.num_gpus_per_node = 0 - @run_before('run') def prepare_run(self): # Default umask is 0022, which generates file permissions -rw-r--r-- @@ -131,14 +163,22 @@ def prepare_run(self): test_file = os.path.join(test_dir, f'.ior.{self.current_partition.name}') self.prerun_cmds = [f'mkdir -p {test_dir}'] - self.executable = os.path.join('src', 'C', 'IOR') + self.executable = os.path.join( + self.ior_binaries.stagedir, + self.ior_binaries.build_prefix, + 'src', 'ior' + ) # executable options depends on the file system block_size = self.fs[self.base_dir]['ior_block_size'] access_type = self.fs[self.base_dir]['ior_access_type'] - self.executable_opts = ['-B', '-F', '-C ', '-Q 1', '-t 4m', '-D 30', + self.executable_opts = ['-F', '-C ', '-Q 1', '-t 4m', '-D 30', '-b', block_size, '-a', access_type, - '-o', test_file] + '-o', test_file, '--posix.odirect'] + + @sanity_function + def assert_finished(self): + return sn.assert_found(r'^Finished\s+:', self.stdout) @rfm.simple_test @@ -146,15 +186,11 @@ class IorWriteCheck(IorCheck): executable_opts += ['-w', '-k'] tags |= {'write'} - @sanity_function - def assert_output(self): - return sn.assert_found(r'^Max Write: ', self.stdout) - @run_after('init') def set_perf_patterns(self): self.perf_patterns = { 'write_bw': sn.extractsingle( - r'^Max Write:\s+(?P\S+) MiB/sec', self.stdout, + r'^Operation(.*\n)*^write\s+(?P\S+)', self.stdout, 'write_bw', float) } @@ -164,15 +200,11 @@ class IorReadCheck(IorCheck): executable_opts += ['-r'] tags |= {'read'} - @sanity_function - def assert_output(self): - return sn.assert_found(r'^Max Read: ', self.stdout) - @run_after('init') def set_perf_patterns(self): self.perf_patterns = { 'read_bw': sn.extractsingle( - r'^Max Read:\s+(?P\S+) MiB/sec', self.stdout, + r'^Operation(.*\n)*^read\s+(?P\S+)', self.stdout, 'read_bw', float) } From 208d2ac3f74764e47512ebdf6f43a4e8c1595793 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Tue, 16 Apr 2024 17:12:30 +0200 Subject: [PATCH 2/5] Add the 'cray' module for eiger/pilatus Signed-off-by: Theofilos Manitaras --- checks/system/io/ior_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/checks/system/io/ior_check.py b/checks/system/io/ior_check.py index 04bc21b75..8d48e61e6 100644 --- a/checks/system/io/ior_check.py +++ b/checks/system/io/ior_check.py @@ -32,6 +32,11 @@ class build_ior_benchmarks(rfm.CompileOnlyRegressionTest): # Build on the remote system for consistency build_locally = False + @run_after('init') + def load_cray_module(self): + if self.current_system.name in ['eiger', 'pilatus']: + self.modules = ['cray'] + @run_before('compile') def prepare_build(self): tarball = f'ior-{self.ior_benchmarks.version}.tar.gz' From b082b389d88b926249178af8279894e52dc1d1ed Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Tue, 16 Apr 2024 17:18:07 +0200 Subject: [PATCH 3/5] Address PR comments Signed-off-by: Theofilos Manitaras --- checks/system/io/ior_check.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/checks/system/io/ior_check.py b/checks/system/io/ior_check.py index 8d48e61e6..bf57cabfe 100644 --- a/checks/system/io/ior_check.py +++ b/checks/system/io/ior_check.py @@ -1,4 +1,4 @@ -# Copyright 2016-2024 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# Copyright 2016 Swiss National Supercomputing Centre (CSCS/ETH Zurich) # ReFrame Project Developers. See the top-level LICENSE file for details. # # SPDX-License-Identifier: BSD-3-Clause @@ -48,10 +48,10 @@ def prepare_build(self): f'cd {self.build_prefix}' ] + # FIXME this will not be needed in a ReFrame release including: + # https://github.com/reframe-hpc/reframe/pull/3157 @sanity_function def validate_build(self): - # If compilation fails, the test would fail in any case, so nothing to - # further validate here. return True From cffeedcba328be98aea4d13245578f34188a9189 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 17 Apr 2024 11:45:06 +0200 Subject: [PATCH 4/5] Fix CPU threads with Slurm 23.02.7 --- checks/apps/namd/namd_check.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/checks/apps/namd/namd_check.py b/checks/apps/namd/namd_check.py index 2e438ad96..be06d5c06 100644 --- a/checks/apps/namd/namd_check.py +++ b/checks/apps/namd/namd_check.py @@ -77,6 +77,12 @@ def setup_parallel_run(self): self.num_tasks = 16 self.num_tasks_per_node = 1 + # Fix threads per task on Pilatus with Slurm 23.02.7 + if self.current_system.name in ['pilatus']: + self.env_vars = { + 'SRUN_CPUS_PER_TASK': '2' + } + @run_before('compile') def prepare_build(self): # Reset sources dir relative to the SCS apps prefix From e4a624e27ff5adf3eb235c3fa35bd11d7516914d Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 17 Apr 2024 12:27:53 +0200 Subject: [PATCH 5/5] Set cpus per tasks to work with Slurm 23.02.7 --- checks/apps/namd/namd_check.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/checks/apps/namd/namd_check.py b/checks/apps/namd/namd_check.py index be06d5c06..3d981153f 100644 --- a/checks/apps/namd/namd_check.py +++ b/checks/apps/namd/namd_check.py @@ -58,6 +58,7 @@ def setup_parallel_run(self): # On Eiger a no-smp NAMD version is the default if self.current_system.name in ['eiger', 'pilatus']: self.executable_opts = ['+idlepoll', 'stmv.namd'] + self.num_cpus_per_task = 2 else: self.executable_opts = ['+idlepoll', '+ppn 71', 'stmv.namd'] self.num_cpus_per_task = 72 @@ -77,12 +78,6 @@ def setup_parallel_run(self): self.num_tasks = 16 self.num_tasks_per_node = 1 - # Fix threads per task on Pilatus with Slurm 23.02.7 - if self.current_system.name in ['pilatus']: - self.env_vars = { - 'SRUN_CPUS_PER_TASK': '2' - } - @run_before('compile') def prepare_build(self): # Reset sources dir relative to the SCS apps prefix @@ -128,7 +123,7 @@ def set_reference(self): self.reference = { 'daint:mc': {'days_ns': (0.425, None, 0.10, 'days/ns')}, 'eiger:mc': {'days_ns': (0.057, None, 0.05, 'days/ns')}, - 'pilatus:mc': {'days_ns': (0.054, None, 0.05, 'days/ns')} + 'pilatus:mc': {'days_ns': (0.054, None, 0.10, 'days/ns')} } @performance_function('days/ns')