From e3677bb9d32eeb7718fb65473dbcfd1d28061c82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Janal=C3=ADk?= Date: Wed, 2 Oct 2024 15:04:08 +0200 Subject: [PATCH 1/6] Add simple MPI tests - Communicator of the correct size - Point to point communication works between nodes --- ci/templates/test_image.yml.j2 | 23 ++++++++++ test/mpi_simple/Dockerfile.mpi_simple | 34 ++++++++++++++ test/mpi_simple/mpi_send_recv.c | 66 +++++++++++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 test/mpi_simple/Dockerfile.mpi_simple create mode 100644 test/mpi_simple/mpi_send_recv.c diff --git a/ci/templates/test_image.yml.j2 b/ci/templates/test_image.yml.j2 index 6667dd1..fd79559 100644 --- a/ci/templates/test_image.yml.j2 +++ b/ci/templates/test_image.yml.j2 @@ -45,3 +45,26 @@ test_job_test_image_gromacs-{{os}}{{osver}}-spack{{spackver}}-{{archstr[arch]}}: {% if arch == "rocm" -%} - echo "TODO Lets ignore ROCm for now" {%- endif %} + +##### mpi_simple + +build_job_test_image_mpi_simple-{{os}}{{osver}}-spack{{spackver}}-{{archstr[arch]}}: + stage: build_test_image + extends: + - {{container_builder[arch]}} + variables: + DOCKERFILE: "test/mpi_simple/Dockerfile.mpi_simple" + PERSIST_IMAGE_NAME: "{{registry_path}}/test-mpi_simple:spack{{spackver}}-{{os}}{{osver}}-{{archstr[arch]}}" + DOCKER_BUILD_ARGS: '["BUILDIMG={{registry_build_image_name_tag}}", "RUNTIMEIMG={{registry_runtime_image_name_tag}}", "TARGET={{spack_target}}"]' + +test_job_test_image_mpi_simple-{{os}}{{osver}}-spack{{spackver}}-{{archstr[arch]}}: + stage: test_test_image + extends: + - {{container_runner[arch]}} + image: "{{registry_path}}/test-mpi_simple:spack{{spackver}}-{{os}}{{osver}}-{{archstr[arch]}}" + script: + - hostname + - /src/mpi_send_recv + variables: + SLURM_JOB_NUM_NODES: 2 + SLURM_NTASKS: 2 diff --git a/test/mpi_simple/Dockerfile.mpi_simple b/test/mpi_simple/Dockerfile.mpi_simple new file mode 100644 index 0000000..c70cf6b --- /dev/null +++ b/test/mpi_simple/Dockerfile.mpi_simple @@ -0,0 +1,34 @@ +ARG BUILDIMG +FROM $BUILDIMG as builder + +RUN apt-get update \ + && env DEBIAN_FRONTEND=noninteractive TZ=Europe/Zurich apt-get -yqq install --no-install-recommends build-essential + +ARG TARGET +RUN spack-install-helper \ + "$TARGET" + + +# copy only relevant parts to the final container +ARG RUNTIMEIMG +FROM $RUNTIMEIMG + +# it is important to keep the paths, otherwise your installation is broken +# all these paths are created with the above `spack-install-helper` invocation +COPY --from=builder /opt/spack-environment /opt/spack-environment +COPY --from=builder /opt/software /opt/software +COPY --from=builder /opt/._view /opt/._view +COPY --from=builder /etc/profile.d/z10_spack_environment.sh /etc/profile.d/z10_spack_environment.sh + +# Some boilerplate to get all paths correctly - fix_spack_install is part of the base image +# and makes sure that all important things are being correctly setup +RUN fix_spack_install + +RUN apt-get update \ + && env DEBIAN_FRONTEND=noninteractive TZ=Europe/Zurich apt-get -yqq install --no-install-recommends build-essential + +# Finally do what we need +COPY . /src + +RUN cd /src \ + && mpicc mpi_send_recv.c -o mpi_send_recv diff --git a/test/mpi_simple/mpi_send_recv.c b/test/mpi_simple/mpi_send_recv.c new file mode 100644 index 0000000..2c38903 --- /dev/null +++ b/test/mpi_simple/mpi_send_recv.c @@ -0,0 +1,66 @@ +/**************************************************************** + * * + * This file has been written as a sample solution to an * + * exercise in a course given at the CSCS Summer School. * + * It is made freely available with the understanding that * + * every copy of this file must include this header and that * + * CSCS take no responsibility for the use of the enclosed * + * teaching material. * + * * + * Purpose: A program to try MPI_Comm_size and MPI_Comm_rank. * + * * + * Contents: C-Source * + ****************************************************************/ + +#include +#include +#include + + +int main(int argc, char *argv[]) +{ + /* declare any variables you need */ + int rank; + int size; + int number; + MPI_Status status; + + MPI_Init(&argc, &argv); + + /* Get the rank of each process */ + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + /* Get the size of the communicator */ + MPI_Comm_size(MPI_COMM_WORLD, &size); + + /* Write code such that every process writes its rank and the size of the communicator, + * but only process 0 prints "hello world*/ + printf("Process %d out of %d.\n", rank, size); + + /* Check communicator size */ + if (size != 2) { + printf("Error: Proces %d: communicator size %d, expected 2.\n", rank, size); + MPI_Finalize(); + exit(1); + } + + /* Check send, receive */ + if (rank == 0) { + number = 42; + printf("Process %d: sending number %d\n", rank, number); + MPI_Send(&number, 1, MPI_INT, 1, 0, MPI_COMM_WORLD); + } + + if (rank == 1) { + MPI_Recv(&number, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); + printf("Process %d: received number %d\n", rank, number); + if (number != 42) { + printf("Error: Process %d: received %d, expected 42.\n", rank, number); + MPI_Finalize(); + exit(1); + } + } + + MPI_Finalize(); + return 0; +} From bf8e9c30c4cb328e0f24644123ddbebf99f59a6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Janal=C3=ADk?= Date: Thu, 10 Oct 2024 18:40:20 +0200 Subject: [PATCH 2/6] Add MPI test: osu-micro-benchmarks --- ci/templates/test_image.yml.j2 | 20 +++--- ...rfile.mpi_simple => Dockerfile.mpi-osu_bw} | 13 +--- test/mpi_simple/mpi_send_recv.c | 66 ------------------- 3 files changed, 15 insertions(+), 84 deletions(-) rename test/{mpi_simple/Dockerfile.mpi_simple => Dockerfile.mpi-osu_bw} (78%) delete mode 100644 test/mpi_simple/mpi_send_recv.c diff --git a/ci/templates/test_image.yml.j2 b/ci/templates/test_image.yml.j2 index fd79559..905e89d 100644 --- a/ci/templates/test_image.yml.j2 +++ b/ci/templates/test_image.yml.j2 @@ -46,25 +46,29 @@ test_job_test_image_gromacs-{{os}}{{osver}}-spack{{spackver}}-{{archstr[arch]}}: - echo "TODO Lets ignore ROCm for now" {%- endif %} -##### mpi_simple +##### mpi-osu_bw -build_job_test_image_mpi_simple-{{os}}{{osver}}-spack{{spackver}}-{{archstr[arch]}}: +build_job_test_image_mpi-osu_bw-{{os}}{{osver}}-spack{{spackver}}-{{archstr[arch]}}: stage: build_test_image extends: - {{container_builder[arch]}} variables: - DOCKERFILE: "test/mpi_simple/Dockerfile.mpi_simple" - PERSIST_IMAGE_NAME: "{{registry_path}}/test-mpi_simple:spack{{spackver}}-{{os}}{{osver}}-{{archstr[arch]}}" + DOCKERFILE: "test/Dockerfile.mpi-osu_bw" + PERSIST_IMAGE_NAME: "{{registry_path}}/test-mpi-osu_bw:spack{{spackver}}-{{os}}{{osver}}-{{archstr[arch]}}" DOCKER_BUILD_ARGS: '["BUILDIMG={{registry_build_image_name_tag}}", "RUNTIMEIMG={{registry_runtime_image_name_tag}}", "TARGET={{spack_target}}"]' -test_job_test_image_mpi_simple-{{os}}{{osver}}-spack{{spackver}}-{{archstr[arch]}}: +test_job_test_image_mpi-osu_bw-{{os}}{{osver}}-spack{{spackver}}-{{archstr[arch]}}: stage: test_test_image extends: - {{container_runner[arch]}} - image: "{{registry_path}}/test-mpi_simple:spack{{spackver}}-{{os}}{{osver}}-{{archstr[arch]}}" + image: "{{registry_path}}/test-mpi-osu_bw:spack{{spackver}}-{{os}}{{osver}}-{{archstr[arch]}}" script: - - hostname - - /src/mpi_send_recv + - osu_bw | tee osu_bw-out.txt + - SIZE=4194304 + - BW_THRESHOLD=20000 + - BW=$(awk "/^$SIZE/ {print \$2}" osu_bw-out.txt) + - RES=$(bc <<< "$BW > $BW_THRESHOLD") + - test $RES -eq 1 # bc: true 1 false 0, bash: true 0 false 1 variables: SLURM_JOB_NUM_NODES: 2 SLURM_NTASKS: 2 diff --git a/test/mpi_simple/Dockerfile.mpi_simple b/test/Dockerfile.mpi-osu_bw similarity index 78% rename from test/mpi_simple/Dockerfile.mpi_simple rename to test/Dockerfile.mpi-osu_bw index c70cf6b..a5fe836 100644 --- a/test/mpi_simple/Dockerfile.mpi_simple +++ b/test/Dockerfile.mpi-osu_bw @@ -6,7 +6,9 @@ RUN apt-get update \ ARG TARGET RUN spack-install-helper \ - "$TARGET" + "$TARGET" \ + osu-micro-benchmarks \ + bc # copy only relevant parts to the final container @@ -23,12 +25,3 @@ COPY --from=builder /etc/profile.d/z10_spack_environment.sh /etc/profile.d/z10_s # Some boilerplate to get all paths correctly - fix_spack_install is part of the base image # and makes sure that all important things are being correctly setup RUN fix_spack_install - -RUN apt-get update \ - && env DEBIAN_FRONTEND=noninteractive TZ=Europe/Zurich apt-get -yqq install --no-install-recommends build-essential - -# Finally do what we need -COPY . /src - -RUN cd /src \ - && mpicc mpi_send_recv.c -o mpi_send_recv diff --git a/test/mpi_simple/mpi_send_recv.c b/test/mpi_simple/mpi_send_recv.c deleted file mode 100644 index 2c38903..0000000 --- a/test/mpi_simple/mpi_send_recv.c +++ /dev/null @@ -1,66 +0,0 @@ -/**************************************************************** - * * - * This file has been written as a sample solution to an * - * exercise in a course given at the CSCS Summer School. * - * It is made freely available with the understanding that * - * every copy of this file must include this header and that * - * CSCS take no responsibility for the use of the enclosed * - * teaching material. * - * * - * Purpose: A program to try MPI_Comm_size and MPI_Comm_rank. * - * * - * Contents: C-Source * - ****************************************************************/ - -#include -#include -#include - - -int main(int argc, char *argv[]) -{ - /* declare any variables you need */ - int rank; - int size; - int number; - MPI_Status status; - - MPI_Init(&argc, &argv); - - /* Get the rank of each process */ - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - /* Get the size of the communicator */ - MPI_Comm_size(MPI_COMM_WORLD, &size); - - /* Write code such that every process writes its rank and the size of the communicator, - * but only process 0 prints "hello world*/ - printf("Process %d out of %d.\n", rank, size); - - /* Check communicator size */ - if (size != 2) { - printf("Error: Proces %d: communicator size %d, expected 2.\n", rank, size); - MPI_Finalize(); - exit(1); - } - - /* Check send, receive */ - if (rank == 0) { - number = 42; - printf("Process %d: sending number %d\n", rank, number); - MPI_Send(&number, 1, MPI_INT, 1, 0, MPI_COMM_WORLD); - } - - if (rank == 1) { - MPI_Recv(&number, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); - printf("Process %d: received number %d\n", rank, number); - if (number != 42) { - printf("Error: Process %d: received %d, expected 42.\n", rank, number); - MPI_Finalize(); - exit(1); - } - } - - MPI_Finalize(); - return 0; -} From 1f81d1fcf03b248b2e92bb8844341a7d2d2c758d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Janal=C3=ADk?= Date: Wed, 16 Oct 2024 23:31:18 +0200 Subject: [PATCH 3/6] Fix osu-micro-benchmarks configure on CUDA --- test/Dockerfile.mpi-osu_bw | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/Dockerfile.mpi-osu_bw b/test/Dockerfile.mpi-osu_bw index a5fe836..74f2d86 100644 --- a/test/Dockerfile.mpi-osu_bw +++ b/test/Dockerfile.mpi-osu_bw @@ -4,6 +4,9 @@ FROM $BUILDIMG as builder RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive TZ=Europe/Zurich apt-get -yqq install --no-install-recommends build-essential +# Fix error in ./configure: cannot link with -lcuda +RUN (test -f /usr/local/cuda/lib64/stubs/libcuda.so && ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/libcuda.so) || true + ARG TARGET RUN spack-install-helper \ "$TARGET" \ From 6396c31614888a8345c684f3487bc450403537ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Janal=C3=ADk?= Date: Tue, 22 Oct 2024 14:03:07 +0200 Subject: [PATCH 4/6] Add comments --- ci/templates/test_image.yml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/templates/test_image.yml.j2 b/ci/templates/test_image.yml.j2 index 905e89d..5cabf01 100644 --- a/ci/templates/test_image.yml.j2 +++ b/ci/templates/test_image.yml.j2 @@ -67,7 +67,7 @@ test_job_test_image_mpi-osu_bw-{{os}}{{osver}}-spack{{spackver}}-{{archstr[arch] - SIZE=4194304 - BW_THRESHOLD=20000 - BW=$(awk "/^$SIZE/ {print \$2}" osu_bw-out.txt) - - RES=$(bc <<< "$BW > $BW_THRESHOLD") + - RES=$(bc <<< "$BW > $BW_THRESHOLD") # use bc because bash cannot compare floating poing numbers - test $RES -eq 1 # bc: true 1 false 0, bash: true 0 false 1 variables: SLURM_JOB_NUM_NODES: 2 From e1dde5d20d00925b3da61f224736cb677d1fb9dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Janal=C3=ADk?= Date: Tue, 22 Oct 2024 15:02:05 +0200 Subject: [PATCH 5/6] Update ln command --- test/Dockerfile.mpi-osu_bw | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/Dockerfile.mpi-osu_bw b/test/Dockerfile.mpi-osu_bw index 74f2d86..0345659 100644 --- a/test/Dockerfile.mpi-osu_bw +++ b/test/Dockerfile.mpi-osu_bw @@ -5,7 +5,12 @@ RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive TZ=Europe/Zurich apt-get -yqq install --no-install-recommends build-essential # Fix error in ./configure: cannot link with -lcuda -RUN (test -f /usr/local/cuda/lib64/stubs/libcuda.so && ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/libcuda.so) || true +RUN TARGET=/usr/local/cuda/lib64/stubs/libcuda.so; \ + LINK_NAME=/usr/local/cuda/lib64/libcuda.so; \ + if test -f $TARGET && test ! -f $LINK_NAME; \ + then \ + ln -s $TARGET $LINK_NAME; \ + fi ARG TARGET RUN spack-install-helper \ From a5049d99f6b4379ad39f5277f26a7cabc72f3fe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Janal=C3=ADk?= Date: Thu, 7 Nov 2024 21:09:29 +0100 Subject: [PATCH 6/6] Dummy commit to trigger pipeline --- ci/templates/test_image.yml.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/templates/test_image.yml.j2 b/ci/templates/test_image.yml.j2 index 5cabf01..4df2461 100644 --- a/ci/templates/test_image.yml.j2 +++ b/ci/templates/test_image.yml.j2 @@ -63,6 +63,7 @@ test_job_test_image_mpi-osu_bw-{{os}}{{osver}}-spack{{spackver}}-{{archstr[arch] - {{container_runner[arch]}} image: "{{registry_path}}/test-mpi-osu_bw:spack{{spackver}}-{{os}}{{osver}}-{{archstr[arch]}}" script: + - hostname - osu_bw | tee osu_bw-out.txt - SIZE=4194304 - BW_THRESHOLD=20000