From 0e6050fa1010df8809618b1b68399500b59750e5 Mon Sep 17 00:00:00 2001 From: Meyer Zinn Date: Tue, 2 Apr 2024 23:51:35 +0000 Subject: [PATCH] tacc notes --- README_TACC.md | 28 +++---- galois | 2 +- include/scea/stats.hpp | 14 ++-- scripts/tacc_edit_scalability_friendster.sh | 86 +++++++++++++++++++++ scripts/tacc_edit_scalability_rmat18.sh | 86 +++++++++++++++++++++ scripts/tacc_env.sh | 10 +++ 6 files changed, 201 insertions(+), 25 deletions(-) create mode 100755 scripts/tacc_edit_scalability_friendster.sh create mode 100644 scripts/tacc_edit_scalability_rmat18.sh create mode 100644 scripts/tacc_env.sh diff --git a/README_TACC.md b/README_TACC.md index 9fbb30e..4a51305 100644 --- a/README_TACC.md +++ b/README_TACC.md @@ -86,8 +86,8 @@ module load impi/19.0.9 module load python3/3.9.7 module load boost-mpi/1.72 -export LLVM_DIR="$SCRATCH/llvm-project/build/cmake/modules/CMakeFiles/" -export fmt_DIR="$SCRATCH/fmt/build/" +export LLVM_DIR="$WORK/llvm-project/build/cmake/modules/CMakeFiles/" +export fmt_DIR="$WORK/fmt/build/" ``` ## Building Dependencies @@ -104,16 +104,10 @@ files then you need to clone and download your dependencies manually. For Galois you need [llvm](https://github.com/llvm/llvm-project) and [fmt](https://github.com/fmtlib/fmt). -*Note:* `llvm` is a very heavyweight library that will take hours to pull -and build. Galois really only uses this library for parsing arguments in -programs and it would probably be faster to remove Galois' dependency on -`llvm` and instead use `boost` to parse arguments than to build `llvm`. -But it is easier to just build `llvm` and burn a few hours. - The following will build `fmt` properly: ```shell -cd $SCRATCH +cd $WORK git clone https://github.com/fmtlib/fmt cd fmt cmake -B build @@ -121,19 +115,18 @@ cd build make -j4 ``` -If you are still reading this I can see you decided to build `llvm` anyways, -all I can say is good luck. `llvm` needs to be built with certain `cmake` -flags in order to work with Galois, the following sequence will build `llvm` -properly: +`llvm` needs to be built with certain `cmake` flags in order to work with +Galois, the following sequence will build `llvm` properly: ```shell -cd $SCRATCH +idev -m 120 # enter an interactive dev machine to use more threads +cd $WORK git clone https://github.com/llvm/llvm-project cd llvm-project cmake -S llvm -B build -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_RTTI=ON \ -DLLVM_ENABLE_ZSTD=OFF -DLLVM_ENABLE_ZLIB=OFF -DLLVM_ENABLE_TERMINFO=OFF cd build -make -j4 +make -j ``` ## Building Galois @@ -142,7 +135,7 @@ After you have built Galois' dependencies the process is more familiar. The important bits are to set env vars to tell `cmake` where to find your prebuilt dependencies. For example: -`export LLVM_DIR="$SCRATCH/llvm-project/build/cmake/modules/CMakeFiles/"`. +`export LLVM_DIR="$WORK/llvm-project/build/cmake/modules/CMakeFiles/"`. It is recommended to define the env vars as part of your TACC environment script for ease of use. @@ -151,10 +144,11 @@ version or compiler, then `cmake` will cache this choice. In order to rebuild with a different library version or compiler you must remove the file `build/CMakeCache.txt` and then rebuild with the proper settings. + Another caviat is with `llvm`: my builds were unable to find the header files for `llvm` despite `cmake` finding the dependency. In order to resolve this I used the following `cmake` hack to include them directly: -`target_include_directories( PRIVATE $SCRATCH/llvm-project/llvm/include)`. +`target_include_directories( PRIVATE $WORK/llvm-project/llvm/include)`. Note that this does not break builds for different machines since `cmake` will simply ignore paths that do not exist on the current filesystem. diff --git a/galois b/galois index 5d4ca96..b07bab0 160000 --- a/galois +++ b/galois @@ -1 +1 @@ -Subproject commit 5d4ca9654981869c8fbe8dd4c12634b5a2d4deb8 +Subproject commit b07bab051ad40fd39320bd94266de694b28901ef diff --git a/include/scea/stats.hpp b/include/scea/stats.hpp index 09119d0..b320d4d 100644 --- a/include/scea/stats.hpp +++ b/include/scea/stats.hpp @@ -126,13 +126,13 @@ class ScopeBenchmarker { uint64_t instructions = instructionsEvent.readValue(); uint64_t max_rss = getMaxRSS(); - std::cout << "Benchmark results for " << scopeName << ":\n"; - std::cout << "Duration: " << timer.getDurationNano() << " nanoseconds\n"; - std::cout << "Max RSS: " << max_rss << " KB\n"; - std::cout << "Cache Miss Rate: " - << (static_cast(cacheMisses) / cacheReferences) * 100 - << "%\n"; - std::cout << "Instructions: " << instructions << "\n"; + std::cout << "Benchmark results for " << scopeName << ":" << std::endl // + << "Duration: " << timer.getDurationNano() << " nanoseconds" + << std::endl // + << "Max RSS: " << max_rss << " KB" << std::endl // + << "Cache Misses: " << cacheMisses << std::endl // + << "Cache References: " << cacheReferences << std::endl // + << "Instructions: " << instructions << std::endl; } }; diff --git a/scripts/tacc_edit_scalability_friendster.sh b/scripts/tacc_edit_scalability_friendster.sh new file mode 100755 index 0000000..679add0 --- /dev/null +++ b/scripts/tacc_edit_scalability_friendster.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +GRAPH_PATH="${GRAPH_PATH:-$SCRATCH/graphs/friendster_batched_11.txt}" +GRAPH_NUM_VERTICES="${GRAPH_NUM_VERTICES:-124836180}" + +# required sbatch parameters + +# note that TIME is in the format H:MM:SS + +# note that the bash paradigm `HOSTS="${HOSTS:-}` +# means: set the env var HOSTS equal to $HOSTS if HOSTS is +# already set, if it is not set then set `HOSTS=default` +HOSTS="${HOSTS:-1}" +PROCS="${PROCS:-1}" +TIME="${TIME:-1:00:00}" +QUEUE="${QUEUE:-normal}" +JOBS="${JOBS:-graph-log-sketch-run}" +OUTS="${OUTS:-graph-log-sketch}" + +ENV=${WORK}/scea/graph-log-sketch/scripts/tacc_env.sh + +# These variables are not necessary but recommended for ease of use +# The data directory is helpful for storing outputs and is recommended +# but not necessary +BUILD="${BUILD:-$WORK/scea/graph-log-sketch/build}" +DATA="${DATA:-$SCRATCH/scea/graph-log-sketch/data/friendster}" + +# Create the data directory, if it does not yet exist: +mkdir -p $DATA + +# print statements to validate input, can and should +# be extended with application parameters +echo $HOSTS +echo $PROCS +echo $THREADS +echo $TIME +echo $DATA +echo $QUEUE +echo $JOBN + +for algo in bfs tc; do + for nthreads in 8 16 32 64; do + for graph in lscsr lccsr adj; do + # JOBN should be parameterized with application parameters as well + # possibly time as well time prevent conflicts and overwriting + JOBN=${DATA}/${JOBS}_${HOSTS}_${PROCS}_t=${nthreads}_g=${graph} + echo "Submitting job: $JOBN" + + # start of job that runs on the supercomputer + sbatch <<-EOT + #!/bin/bash + + # special arguments passed to sbatch here instead of by command line + # the mail arguments are optional and are just there to send you email + # notifications when your jobs are scheduled and complete + #SBATCH -J ${JOBN} + #SBATCH -o ${JOBN}.out + #SBATCH -e ${JOBN}.err + #SBATCH -t ${TIME} + #SBATCH -N ${HOSTS} + #SBATCH -n ${PROCS} + #SBATCH --mail-type=none + #SBATCH --mail-user=meyer.zinn@utexas.edu + #SBATCH -p ${QUEUE} + + # ensure the proper runtime environment is set + module purge + . ${ENV} + + # actually run the equivalent of $\(mpirun) the $\(--) ensures arguments + # are passed to your executable and not $\(ibrun) + ibrun -- ${BUILD}/microbench/edit-scalability \ + --algo $algo \ + --bfs-src 101 \ + --graph $graph \ + --ingest-threads $nthreads \ + --algo-threads $nthreads \ + --input-file $GRAPH_PATH \ + --num-vertices $GRAPH_NUM_VERTICES + EOT + done + done +done diff --git a/scripts/tacc_edit_scalability_rmat18.sh b/scripts/tacc_edit_scalability_rmat18.sh new file mode 100644 index 0000000..4599c11 --- /dev/null +++ b/scripts/tacc_edit_scalability_rmat18.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +GRAPH_PATH="${GRAPH_PATH:-$SCRATCH/graphs/rmat18_nV262144_randomized_20.txt}" +GRAPH_NUM_VERTICES="${GRAPH_NUM_VERTICES:-262145}" + +# required sbatch parameters + +# note that TIME is in the format H:MM:SS + +# note that the bash paradigm `HOSTS="${HOSTS:-}` +# means: set the env var HOSTS equal to $HOSTS if HOSTS is +# already set, if it is not set then set `HOSTS=default` +HOSTS="${HOSTS:-1}" +PROCS="${PROCS:-1}" +TIME="${TIME:-1:00:00}" +QUEUE="${QUEUE:-normal}" +JOBS="${JOBS:-graph-log-sketch-run-rmat18}" +OUTS="${OUTS:-graph-log-sketch-rmat18}" + +ENV=${WORK}/scea/graph-log-sketch/scripts/tacc_env.sh + +# These variables are not necessary but recommended for ease of use +# The data directory is helpful for storing outputs and is recommended +# but not necessary +BUILD="${BUILD:-$WORK/scea/graph-log-sketch/build}" +DATA="${DATA:-$SCRATCH/scea/graph-log-sketch/data/rmat18}" + +# Create the data directory, if it does not yet exist: +mkdir -p $DATA + +# print statements to validate input, can and should +# be extended with application parameters +echo $HOSTS +echo $PROCS +echo $THREADS +echo $TIME +echo $DATA +echo $QUEUE +echo $JOBN + +for algo in bfs tc; do + for nthreads in 8 16 32 64; do + for graph in lscsr lccsr adj; do + # JOBN should be parameterized with application parameters as well + # possibly time as well time prevent conflicts and overwriting + JOBN=${DATA}/${JOBS}_${HOSTS}_${PROCS}_t=${nthreads}_g=${graph} + echo "Submitting job: $JOBN" + + # start of job that runs on the supercomputer + sbatch <<-EOT + #!/bin/bash + + # special arguments passed to sbatch here instead of by command line + # the mail arguments are optional and are just there to send you email + # notifications when your jobs are scheduled and complete + #SBATCH -J ${JOBN} + #SBATCH -o ${JOBN}.out + #SBATCH -e ${JOBN}.err + #SBATCH -t ${TIME} + #SBATCH -N ${HOSTS} + #SBATCH -n ${PROCS} + #SBATCH --mail-type=none + #SBATCH --mail-user=meyer.zinn@utexas.edu + #SBATCH -p ${QUEUE} + + # ensure the proper runtime environment is set + module purge + . ${ENV} + + # actually run the equivalent of $\(mpirun) the $\(--) ensures arguments + # are passed to your executable and not $\(ibrun) + ibrun -- ${BUILD}/microbench/edit-scalability \ + --algo $algo \ + --bfs-src 101 \ + --graph $graph \ + --ingest-threads $nthreads \ + --algo-threads $nthreads \ + --input-file $GRAPH_PATH \ + --num-vertices $GRAPH_NUM_VERTICES + EOT + done + done +done diff --git a/scripts/tacc_env.sh b/scripts/tacc_env.sh new file mode 100644 index 0000000..3a5a861 --- /dev/null +++ b/scripts/tacc_env.sh @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +module load intel/19.1.1 +module load impi/19.0.9 +module load python3/3.9.7 +module load boost-mpi/1.72 + +export LLVM_DIR="$WORK/llvm-project/build/cmake/modules/CMakeFiles/" +export fmt_DIR="$WORK/fmt/build/"