diff --git a/ci/common-ci.yml b/ci/common-ci.yml index 97ae84ae0c..2747ec40ef 100644 --- a/ci/common-ci.yml +++ b/ci/common-ci.yml @@ -24,8 +24,8 @@ stages: - TAG_REPO=`find $SPACK_DLAF_REPO -type f -exec sha256sum {} \; | sha256sum - | head -c 16` - TAG_ENVIRONMENT=`cat $SPACK_ENVIRONMENT $COMMON_SPACK_ENVIRONMENT | sha256sum | head -c 16` - TAG=${TAG_IMAGE}-${TAG_APTGET}-${TAG_COMPILER}-MKL${USE_MKL}-${TAG_DOCKERFILE}-${TAG_SPACK}-${TAG_REPO}-${TAG_ENVIRONMENT} - - export PERSIST_IMAGE_NAME=$BUILD_IMAGE:$TAG - - echo "BUILD_IMAGE=$PERSIST_IMAGE_NAME" > build.env + - export PERSIST_IMAGE_NAME=$DEPS_IMAGE:$TAG + - echo "DEPS_IMAGE=$PERSIST_IMAGE_NAME" > build.env - echo "USE_MKL=$USE_MKL" >> build.env - echo "USE_ROCBLAS=$USE_ROCBLAS" >> build.env - echo "USE_CODECOV=$USE_CODECOV" >> build.env @@ -39,7 +39,6 @@ stages: SPACK_DLAF_REPO: ./spack DOCKER_BUILD_ARGS: '[ "BASE_IMAGE", - "BUILDKIT_INLINE_CACHE=1", "SPACK_SHA", "EXTRA_APTGET", "COMPILER", @@ -71,21 +70,14 @@ stages: after_script: - podman run -v $PWD/ci/ctest_to_gitlab.sh:/ctest_to_gitlab.sh $DEPLOY_IMAGE /ctest_to_gitlab.sh "$DEPLOY_IMAGE" "$USE_CODECOV" "$THREADS_PER_NODE" "$SLURM_CONSTRAINT" > pipeline.yml variables: - PERSIST_IMAGE_NAME: $DEPLOY_IMAGE + PERSIST_IMAGE_NAME: $DLAF_IMAGE DOCKER_BUILD_ARGS: '[ - "BUILD_IMAGE", - "DEPLOY_BASE_IMAGE", - "EXTRA_APTGET_DEPLOY", "PIP_OPTS", - "USE_MKL", - "USE_ROCBLAS", "NUM_PROCS=$NUM_CORES_BUILD_DLAF" ]' # default configuration variables # can be overwritten in the configuration as needed DOCKERFILE: ci/docker/deploy.Dockerfile - DEPLOY_BASE_IMAGE: docker.io/ubuntu:24.04 - EXTRA_APTGET_DEPLOY: "" PIP_OPTS: "" artifacts: paths: diff --git a/ci/cpu/clang18_release.yml b/ci/cpu/clang18_release.yml index 6a01bc5a01..db61babf2b 100644 --- a/ci/cpu/clang18_release.yml +++ b/ci/cpu/clang18_release.yml @@ -6,9 +6,9 @@ cpu clang18 release deps: variables: EXTRA_APTGET: "clang-18 libomp-18-dev" COMPILER: clang@18 - USE_MKL: "ON" SPACK_ENVIRONMENT: ci/docker/release-cpu.yaml - BUILD_IMAGE: $CSCS_REGISTRY_PATH/cpu-clang18-release/build + USE_MKL: "ON" + DEPS_IMAGE: $CSCS_REGISTRY_PATH/cpu-clang18-release/deps cpu clang18 release build: extends: @@ -17,7 +17,7 @@ cpu clang18 release build: needs: - cpu clang18 release deps variables: - DEPLOY_IMAGE: $CSCS_REGISTRY_PATH/cpu-clang18-release/deploy:$CI_COMMIT_SHA + DLAF_IMAGE: $CSCS_REGISTRY_PATH/cpu-clang18-release/dlaf:$CI_COMMIT_SHA cpu clang18 release test: extends: .run_common diff --git a/ci/cuda/gcc11_release.yml b/ci/cuda/gcc11_release.yml index 7d47f16d09..40d2b20bf2 100644 --- a/ci/cuda/gcc11_release.yml +++ b/ci/cuda/gcc11_release.yml @@ -6,10 +6,9 @@ cuda gcc11 release deps: variables: BASE_IMAGE: docker.io/nvidia/cuda:11.7.1-devel-ubuntu22.04 COMPILER: gcc@11 - CXXSTD: 17 SPACK_ENVIRONMENT: ci/docker/release-cuda.yaml USE_MKL: "ON" - BUILD_IMAGE: $CSCS_REGISTRY_PATH/cuda-gcc11-release/build + DEPS_IMAGE: $CSCS_REGISTRY_PATH/cuda-gcc11-release/deps cuda gcc11 release build: extends: @@ -18,8 +17,7 @@ cuda gcc11 release build: needs: - cuda gcc11 release deps variables: - DEPLOY_BASE_IMAGE: docker.io/ubuntu:22.04 - DEPLOY_IMAGE: $CSCS_REGISTRY_PATH/cuda-gcc11-release/deploy:$CI_COMMIT_SHA + DLAF_IMAGE: $CSCS_REGISTRY_PATH/cuda-gcc11-release/dlaf:$CI_COMMIT_SHA cuda gcc11 release test: extends: .run_common diff --git a/ci/docker/build.Dockerfile b/ci/docker/build.Dockerfile index fadb655dd0..397c6a7660 100644 --- a/ci/docker/build.Dockerfile +++ b/ci/docker/build.Dockerfile @@ -6,28 +6,28 @@ FROM $BASE_IMAGE LABEL com.jfrog.artifactory.retention.maxDays="21" ENV DEBIAN_FRONTEND=noninteractive \ - PATH="$PATH:/opt/spack/bin:/opt/libtree" \ + PATH="$PATH:/opt/spack/bin" \ SPACK_COLOR=always SHELL ["/bin/bash", "-c"] ARG EXTRA_APTGET +# python is needed for spack and fastcov +# codecov upload needs curl + ca-certificates +# glibc-tools is needed for libSegFault on ubuntu > 22.04 +# jq, strace are needed for check-threads +# tzdata is needed to print correct time RUN apt-get -yqq update && \ apt-get -yqq install --no-install-recommends \ software-properties-common \ build-essential gfortran \ autoconf automake libssl-dev ninja-build pkg-config \ - ${EXTRA_APTGET} \ gawk \ python3 python3-setuptools \ git tar wget curl ca-certificates gpg-agent jq tzdata \ + glibc-tools strace \ patchelf unzip file gnupg2 libncurses-dev && \ rm -rf /var/lib/apt/lists/* -# Install libtree for packaging -RUN mkdir -p /opt/libtree && \ - curl -Lfso /opt/libtree/libtree https://github.com/haampie/libtree/releases/download/v2.0.0/libtree_x86_64 && \ - chmod +x /opt/libtree/libtree - # Install MKL and remove static libs (to keep image smaller) ARG USE_MKL=ON ARG MKL_VERSION=2024.0 diff --git a/ci/docker/codecov.Dockerfile b/ci/docker/codecov.Dockerfile index 3673b680ac..b9a7c9dcad 100644 --- a/ci/docker/codecov.Dockerfile +++ b/ci/docker/codecov.Dockerfile @@ -1,18 +1,17 @@ -ARG BUILD_IMAGE -ARG DEPLOY_BASE_IMAGE +ARG DEPS_IMAGE +FROM $DEPS_IMAGE -# This is the folder where the project is built +LABEL com.jfrog.artifactory.retention.maxDays="7" +LABEL com.jfrog.artifactory.retention.maxCount="10" + +# Directory where the project is built ARG BUILD=/DLA-Future-build -# This is where we copy the sources to +# Directory where the miniapps are built as separate project +ARG BUILD_MINIAPP=/DLA-Future-miniapp-build +# Directory where the sources are copied to ARG SOURCE=/DLA-Future -# Where a bunch of shared libs live -ARG DEPLOY=/root/DLA-Future.bundle - -FROM $BUILD_IMAGE as builder - -ARG BUILD -ARG SOURCE -ARG DEPLOY +# Directory for some helper executables +ARG BIN=/DLA-Future-build/bin # Build DLA-Future COPY . ${SOURCE} @@ -28,83 +27,27 @@ RUN spack repo rm --scope site dlaf && \ spack repo add ${SOURCE}/spack && \ spack -e ci develop --no-clone --path ${SOURCE} --build-directory ${BUILD} dla-future@master && \ spack -e ci concretize -f && \ - spack -e ci --config "config:flags:keep_werror:all" install --jobs ${NUM_PROCS} --keep-stage --verbose - -# Prune and bundle binaries -RUN mkdir ${BUILD}-tmp && cd ${BUILD} && \ - export TEST_BINARIES=`PATH=${SOURCE}/ci:$PATH ctest --show-only=json-v1 | jq '.tests | map(.command | .[] | select(contains("check-threads") | not)) | .[]' | tr -d \"` && \ - echo "Binary sizes:" && \ - ls -lh ${TEST_BINARIES} && \ - ls -lh src/lib* && \ - libtree -d ${DEPLOY} ${TEST_BINARIES} && \ - rm -rf ${DEPLOY}/usr/bin && \ - libtree -d ${DEPLOY} $(which ctest gcov addr2line) && \ - cp -L ${SOURCE}/ci/{mpi-ctest,check-threads,upload_codecov} ${DEPLOY}/usr/bin && \ - echo "$TEST_BINARIES" | xargs -I{file} find -samefile {file} -exec cp --parents '{}' ${BUILD}-tmp ';' && \ - find '(' -name CTestTestfile.cmake -o -iname "*.gcno" ')' -exec cp --parents '{}' ${BUILD}-tmp ';' && \ - rm -rf ${BUILD} && \ - mv ${BUILD}-tmp ${BUILD} && \ - rm -rf ${SOURCE}/.git + spack -e ci --config "config:flags:keep_werror:all" install --jobs ${NUM_PROCS} --keep-stage --verbose && \ + find ${BUILD} -name CMakeFiles -exec rm -rf {} + -# Deploy Extra RocBlas files separately. -ARG USE_ROCBLAS=OFF -RUN mkdir ${DEPLOY}/usr/lib/rocblas; \ - if [ "$USE_ROCBLAS" = "ON" ]; then \ - cp -r `spack -e ci location -i rocblas`/lib/rocblas/library ${DEPLOY}/usr/lib/rocblas ; \ - fi +RUN mkdir -p ${BIN} && cp -L ${SOURCE}/ci/{mpi-ctest,check-threads} ${BIN} -# Multistage build, this is the final small image -FROM $DEPLOY_BASE_IMAGE - -# set jfrog autoclean policy -LABEL com.jfrog.artifactory.retention.maxDays="7" -LABEL com.jfrog.artifactory.retention.maxCount="10" - -ENV DEBIAN_FRONTEND noninteractive - -ARG BUILD -ARG SOURCE -ARG DEPLOY - -ARG EXTRA_APTGET_DEPLOY ARG PIP_OPTS -# python is needed for fastcov # pip is needed only to install fastcov (it is removed with # its dependencies after fastcov installation) -# codecov upload needs curl + ca-certificates -# glibc-tools is needed for libSegFault on ubuntu:22.04 -# jq, strace are needed for check-threads -# tzdata is needed to print correct time RUN apt-get update -qq && \ - apt-get install -qq -y --no-install-recommends \ - ${EXTRA_APTGET_DEPLOY} \ - python3 python3-pip \ - curl \ - ca-certificates \ - glibc-tools jq strace \ - tzdata && \ + apt-get install -qq -y --no-install-recommends python3-pip && \ pip install ${PIP_OPTS} fastcov && \ apt-get autoremove -qq -y python3-pip && \ apt-get clean -# Copy the executables and the codecov gcno files -COPY --from=builder ${BUILD} ${BUILD} -COPY --from=builder ${DEPLOY} ${DEPLOY} - -# Copy the source files into the image as well. -# This is necessary for code coverage of MPI tests: gcov has to have write temporary -# data into the source folder. In distributed applications we can therefore not mount -# the git repo folder at runtime in the container, because it is shared and would -# cause race conditions in gcov. -COPY --from=builder ${SOURCE} ${SOURCE} - RUN cd /usr/local/bin && \ curl -Ls https://codecov.io/bash > codecov.sh && \ echo "f0e7a3ee76a787c37aa400cf44aee0c9b473b2fa79092edfb36d1faa853bbe23 codecov.sh" | sha256sum --check --quiet && \ chmod +x codecov.sh # Make it easy to call our binaries. -ENV PATH="${DEPLOY}/usr/bin:$PATH" +ENV PATH="${BIN}:$PATH" ENV NVIDIA_VISIBLE_DEVICES all ENV NVIDIA_DRIVER_CAPABILITIES compute,utility ENV NVIDIA_REQUIRE_CUDA "cuda>=10.2" @@ -115,6 +58,4 @@ ENV ENABLE_COVERAGE="YES" # Automatically print stacktraces on segfault ENV LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so -RUN echo "${DEPLOY}/usr/lib/" > /etc/ld.so.conf.d/dlaf.conf && ldconfig - WORKDIR ${BUILD} diff --git a/ci/docker/deploy.Dockerfile b/ci/docker/deploy.Dockerfile index a500dd918e..42a11b2264 100644 --- a/ci/docker/deploy.Dockerfile +++ b/ci/docker/deploy.Dockerfile @@ -1,18 +1,17 @@ -ARG BUILD_IMAGE -ARG DEPLOY_BASE_IMAGE +ARG DEPS_IMAGE +FROM $DEPS_IMAGE -# This is the folder where the project is built +LABEL com.jfrog.artifactory.retention.maxDays="7" +LABEL com.jfrog.artifactory.retention.maxCount="10" + +# Directory where the project is built ARG BUILD=/DLA-Future-build -# This is where we copy the sources to +# Directory where the miniapps are built as separate project +ARG BUILD_MINIAPP=/DLA-Future-miniapp-build +# Directory where the sources are copied to ARG SOURCE=/DLA-Future -# Where a bunch of shared libs live -ARG DEPLOY=/root/DLA-Future.bundle - -FROM $BUILD_IMAGE as builder - -ARG BUILD -ARG SOURCE -ARG DEPLOY +# Directory for some helper executables +ARG BIN=/DLA-Future-build/bin # Build DLA-Future COPY . ${SOURCE} @@ -25,87 +24,18 @@ RUN spack repo rm --scope site dlaf && \ spack repo add ${SOURCE}/spack && \ spack -e ci develop --no-clone --path ${SOURCE} --build-directory ${BUILD} dla-future@master && \ spack -e ci concretize -f && \ - spack -e ci --config "config:flags:keep_werror:all" install --jobs ${NUM_PROCS} --keep-stage --verbose + spack -e ci --config "config:flags:keep_werror:all" install --jobs ${NUM_PROCS} --keep-stage --verbose && \ + find ${BUILD} -name CMakeFiles -exec rm -rf {} + # Test deployment with miniapps as independent project -RUN pushd ${SOURCE}/miniapp && \ - mkdir build-miniapps && cd build-miniapps && \ +RUN mkdir ${BUILD_MINIAPP} && cd ${BUILD_MINIAPP} && \ spack -e ci build-env dla-future@master -- \ - bash -c "cmake -DCMAKE_PREFIX_PATH=`spack -e ci location -i dla-future` .. && make -j ${NUM_PROCS}" && \ - popd - -# Prune and bundle binaries -RUN mkdir ${BUILD}-tmp && cd ${BUILD} && \ - export TEST_BINARIES=`PATH=${SOURCE}/ci:$PATH ctest --show-only=json-v1 | jq '.tests | map(.command | .[] | select(contains("check-threads") | not)) | .[]' | tr -d \"` && \ - LIBASAN=$(find /usr/lib -name libclang_rt.asan-x86_64.so) && \ - if [[ -n "${LIBASAN}" ]]; then export LD_LIBRARY_PATH=$(dirname ${LIBASAN}):${LD_LIBRARY_PATH}; fi && \ - echo "Binary sizes:" && \ - ls -lh ${TEST_BINARIES} && \ - ls -lh src/lib* && \ - libtree -d ${DEPLOY} ${TEST_BINARIES} && \ - rm -rf ${DEPLOY}/usr/bin && \ - libtree -d ${DEPLOY} $(which ctest addr2line) && \ - cp -L ${SOURCE}/ci/{mpi-ctest,check-threads} ${DEPLOY}/usr/bin && \ - echo "$TEST_BINARIES" | xargs -I{file} find -samefile {file} -exec cp --parents '{}' ${BUILD}-tmp ';' && \ - find -name CTestTestfile.cmake -exec cp --parents '{}' ${BUILD}-tmp ';' && \ - rm -rf ${BUILD} && \ - mv ${BUILD}-tmp ${BUILD} - -# Deploy MKL separately, since it dlopen's some libs -ARG USE_MKL=ON -RUN if [ "$USE_MKL" = "ON" ]; then \ - export MKL_LIB=$(dirname $(find $(spack location -i intel-oneapi-mkl) -name libmkl_core.so)) && \ - libtree -d ${DEPLOY} \ - ${MKL_LIB}/libmkl_avx2.so.2 \ - ${MKL_LIB}/libmkl_avx512.so.2 \ - ${MKL_LIB}/libmkl_core.so \ - ${MKL_LIB}/libmkl_def.so.2 \ - ${MKL_LIB}/libmkl_intel_thread.so \ - ${MKL_LIB}/libmkl_mc3.so.2 \ - ${MKL_LIB}/libmkl_sequential.so \ - ${MKL_LIB}/libmkl_tbb_thread.so \ - ${MKL_LIB}/libmkl_vml_avx2.so.2 \ - ${MKL_LIB}/libmkl_vml_avx512.so.2 \ - ${MKL_LIB}/libmkl_vml_cmpt.so.2 \ - ${MKL_LIB}/libmkl_vml_def.so.2 \ - ${MKL_LIB}/libmkl_vml_mc3.so.2 ; \ - fi - -# Deploy Extra RocBlas files separately. -ARG USE_ROCBLAS=OFF -RUN mkdir ${DEPLOY}/usr/lib/rocblas; \ - if [ "$USE_ROCBLAS" = "ON" ]; then \ - cp -r `spack -e ci location -i rocblas`/lib/rocblas/library ${DEPLOY}/usr/lib/rocblas ; \ - fi + bash -c "cmake -DCMAKE_PREFIX_PATH=`spack -e ci location -i dla-future` ${SOURCE}/miniapp && make -j ${NUM_PROCS}" -# Multistage build, this is the final small image -FROM $DEPLOY_BASE_IMAGE - -# set jfrog autoclean policy -LABEL com.jfrog.artifactory.retention.maxDays="7" -LABEL com.jfrog.artifactory.retention.maxCount="10" - -ENV DEBIAN_FRONTEND noninteractive - -ARG BUILD -ARG DEPLOY - -ARG EXTRA_APTGET_DEPLOY -# glibc-tools is needed for libSegFault on ubuntu:22.04 -# jq, strace are needed for check-threads -# tzdata is needed to print correct time -RUN apt-get update -qq && \ - apt-get install -qq -y --no-install-recommends \ - ${EXTRA_APTGET_DEPLOY} \ - glibc-tools jq strace \ - tzdata && \ - rm -rf /var/lib/apt/lists/* - -COPY --from=builder ${BUILD} ${BUILD} -COPY --from=builder ${DEPLOY} ${DEPLOY} +RUN mkdir -p ${BIN} && cp -L ${SOURCE}/ci/{mpi-ctest,check-threads} ${BIN} # Make it easy to call our binaries. -ENV PATH="${DEPLOY}/usr/bin:$PATH" +ENV PATH="${BIN}:$PATH" ENV NVIDIA_VISIBLE_DEVICES all ENV NVIDIA_DRIVER_CAPABILITIES compute,utility ENV NVIDIA_REQUIRE_CUDA "cuda>=10.2" @@ -113,6 +43,4 @@ ENV NVIDIA_REQUIRE_CUDA "cuda>=10.2" # Automatically print stacktraces on segfault ENV LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so -RUN echo "${DEPLOY}/usr/lib/" > /etc/ld.so.conf.d/dlaf.conf && ldconfig - WORKDIR ${BUILD}