Skip to content

Commit

Permalink
#2730 Fix issues with LFRic OMP offloading and add it to the integrat…
Browse files Browse the repository at this point in the history
…ion tests
  • Loading branch information
sergisiso committed Oct 3, 2024
1 parent fcc09be commit ae26a8b
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 73 deletions.
52 changes: 52 additions & 0 deletions .github/workflows/lfric_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,58 @@ jobs:
pip install .[test]
pip install jinja2
# PSyclone, compile and run MetOffice gungho_model on GPU
- name: LFRic GungHo with OpenMP offload
run: |
# Set up environment
source /apps/spack/psyclone-spack/spack-repo/share/spack/setup-env.sh
spack load lfric-build-environment%nvhpc
source .runner_venv/bin/activate
export PSYCLONE_LFRIC_DIR=${GITHUB_WORKSPACE}/examples/lfric/scripts
export PSYCLONE_CONFIG_FILE=${PSYCLONE_LFRIC_DIR}/KGOs/lfric_psyclone.cfg
# The LFRic source must be patched to workaround bugs in the NVIDIA
# compiler's namelist handling.
rm -rf ${HOME}/LFRic/gpu_build
mkdir -p ${HOME}/LFRic/gpu_build
cp -r ${HOME}/LFRic/lfric_apps_${LFRIC_APPS_REV} ${HOME}/LFRic/gpu_build/lfric_apps
cp -r ${HOME}/LFRic/lfric_core_50869 ${HOME}/LFRic/gpu_build/lfric
cd ${HOME}/LFRic/gpu_build
patch -p1 < ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_${LFRIC_APPS_REV}_nvidia.patch
# Update the compiler definitions to build for GPU
cp ${PSYCLONE_LFRIC_DIR}/KGOs/nvfortran_acc.mk lfric/infrastructure/build/fortran/nvfortran.mk
cp ${PSYCLONE_LFRIC_DIR}/KGOs/nvc++.mk lfric/infrastructure/build/cxx/.
# Update the PSyclone commands to ensure transformed kernels are written
# to working directory.
cp ${PSYCLONE_LFRIC_DIR}/KGOs/psyclone.mk lfric/infrastructure/build/psyclone/.
# Update dependencies.sh to point to our patched lfric core.
sed -i -e 's/export lfric_core_sources=.*$/export lfric_core_sources\=\/home\/gh_runner\/LFRic\/gpu_build\/lfric/' lfric_apps/dependencies.sh
export LFRIC_DIR=${HOME}/LFRic/gpu_build/lfric_apps
export OPT_DIR=${LFRIC_DIR}/applications/gungho_model/optimisation/psyclone-test
cd ${LFRIC_DIR}
# PSyclone scripts must now be under 'optimisation' and be called 'global.py'
mkdir -p ${OPT_DIR}
cp ${PSYCLONE_LFRIC_DIR}/gpu_offloading.py ${OPT_DIR}/global.py
# Clean previous version and compile again
rm -rf applications/gungho_model/working
OFFLOAD_USING_OMP=true ./build/local_build.py -a gungho_model -p psyclone-test
cd applications/gungho_model/example
cp ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its.nml configuration.nml
mpirun -n 1 ../bin/gungho_model configuration.nml |& tee output.txt
python ${PSYCLONE_LFRIC_DIR}/compare_ouput.py ${PSYCLONE_LFRIC_DIR}/KGOs/lfric_gungho_configuration_4its_checksums.txt gungho_model-checksums.txt
cat timer.txt
export VAR_TIME=$(grep "gungho_model" timer.txt | cut -d'|' -f5)
export VAR_HALOS=$(grep "gungho_model" halo_calls_counter.txt | cut -d'|' -f5)
echo $GITHUB_REF_NAME $GITHUB_SHA $VAR_TIME $VAR_HALOS >> ${HOME}/store_results/lfric_omp_performance_history
${HOME}/mongosh-2.1.1-linux-x64/bin/mongosh \
"mongodb+srv://cluster0.x8ncpxi.mongodb.net/PerformanceMonitoring" \
--quiet --apiVersion 1 --username ${{ secrets.MONGODB_USERNAME }} \
--password ${{ secrets.MONGODB_PASSWORD }} \
--eval 'db.GitHub_CI.insertOne({branch_name: "'"$GITHUB_REF_NAME"'", commit: "'"$GITHUB_SHA"'",
github_job: "'"$GITHUB_RUN_ID"'"-"'"$GITHUB_RUN_ATTEMPT"'",
ci_test: "LFRic OpenMP offloading", lfric_apps_version: '"$LFRIC_APPS_REV"', system: "GlaDos",
compiler:"spack-nvhpc-24.5", date: new Date(), elapsed_time: '"$VAR_TIME"',
num_of_halo_exchanges: '"$VAR_HALOS"'})'
# PSyclone, compile and run MetOffice gungho_model on GPU
- name: LFRic GungHo with OpenACC offload
run: |
Expand Down
59 changes: 0 additions & 59 deletions examples/lfric/scripts/KGOs/lfric_3269_nvidia.patch
Original file line number Diff line number Diff line change
Expand Up @@ -57,65 +57,6 @@ index 19c9cff9..b5cd3014 100644
$(call MESSAGE,Compiled,$<)


diff --git a/lfric/infrastructure/build/cxx/nvc++.mk b/lfric/infrastructure/build/cxx/nvc++.mk
new file mode 100644
index 00000000..13b17a10
--- /dev/null
+++ b/lfric/infrastructure/build/cxx/nvc++.mk
@@ -0,0 +1,9 @@
+##############################################################################
+# (c) Crown copyright 2017 Met Office. All rights reserved.
+# The file LICENCE, distributed with this code, contains details of the terms
+# under which the code may be used.
+##############################################################################
+
+$(info ** Chosen NVC++ compiler)
+
+CXX_RUNTIME_LIBRARY=stdc++
diff --git a/lfric/infrastructure/build/fortran/nvfortran.mk b/lfric/infrastructure/build/fortran/nvfortran.mk
new file mode 100644
index 00000000..cfed52c1
--- /dev/null
+++ b/lfric/infrastructure/build/fortran/nvfortran.mk
@@ -0,0 +1,38 @@
+##############################################################################
+# Copyright (c) 2017, Met Office, on behalf of HMSO and Queen's Printer
+# For further details please refer to the file LICENCE.original which you
+# should have received as part of this distribution.
+##############################################################################
+# Various things specific to the Portland Fortran compiler.
+##############################################################################
+#
+# This macro is evaluated now (:= syntax) so it may be used as many times as
+# desired without wasting time rerunning it.
+#
+F_MOD_DESTINATION_ARG = -module$(SPACE)
+OPENMP_ARG = -mp
+
+FFLAGS_COMPILER =
+FFLAGS_NO_OPTIMISATION = -O0
+FFLAGS_SAFE_OPTIMISATION = -O2
+FFLAGS_RISKY_OPTIMISATION = -O4
+FFLAGS_DEBUG = -g -traceback
+FFLAGS_RUNTIME = -Mchkptr -Mchkstk
+# Option for checking code meets Fortran standard (not available for PGI)
+FFLAGS_FORTRAN_STANDARD =
+
+LDFLAGS_COMPILER = -g
+
+FPP = cpp -traditional-cpp
+FPPFLAGS = -P
+FC = mpif90
+
+# FS#34981 (nvbug 4648082)
+science/src/um/src/atmosphere/large_scale_precipitation/ls_ppnc.o: private FFLAGS_RUNTIME = -Mchkstk
+
+# FS#35751
+mesh/create_mesh_mod.o: private FFLAGS_RUNTIME = -Mchkstk
+
+# 24.3
+science/src/socrates/src/cosp_github/subsample_and_optics_example/optics/quickbeam_optics/optics_lib.o: private FFLAGS_SAFE_OPTIMISATION = -O1
+science/src/socrates/src/cosp_github/subsample_and_optics_example/optics/quickbeam_optics/optics_lib.o: private FFLAGS_RISKY_OPTIMISATION = -O1
diff --git a/lfric/infrastructure/build/tools/DependencyRules b/lfric/infrastructure/build/tools/DependencyRules
index 9d4db390..e37384fc 100755
--- a/lfric/infrastructure/build/tools/DependencyRules
Expand Down
34 changes: 25 additions & 9 deletions examples/lfric/scripts/gpu_offloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,10 @@ def trans(psy):

if OFFLOAD_USING_OMP:
# Use OpenMP offloading
loop_offloading_trans = OMPLoopTrans()
loop_offloading_trans.omp_directive = "teamsdistributeparalleldo"
loop_offloading_trans = OMPLoopTrans(
omp_directive="teamsdistributeparalleldo",
omp_schedule="none"
)
kernels_trans = None
gpu_region_trans = OMPTargetTrans()
gpu_annotation_trans = OMPDeclareTargetTrans()
Expand Down Expand Up @@ -133,12 +135,16 @@ def trans(psy):
print(f"Failed to annotate '{kern.name}' with "
f"GPU-enabled directive due to:\n"
f"{err.value}")
# For annotated or inlined kernels we could attempt to
# provide compile-time dimensions for the temporary
# arrays and convert to code unsupported intrinsics.

# Add GPU offloading to loops unless they are over colours or are null.
schedule = invoke.schedule
for loop in schedule.walk(Loop):
if offload and all(kern.name.lower() not in failed_to_offload for
kern in loop.kernels()):
kernel_names = [k.name.lower() for k in loop.kernels()]
if offload and all(name not in failed_to_offload for name in
kernel_names):
try:
if loop.loop_type == "colours":
pass
Expand All @@ -151,13 +157,23 @@ def trans(psy):
loop, options={"independent": True})
gpu_region_trans.apply(loop.ancestor(Directive))
if loop.loop_type == "dof":
if kernels_trans:
# Loops over dofs can contain reductions, so we
# don't add loop parallelism (is not supported yet)
# but we can add 'kernel' parallelism if available
# Loops over dofs can contains reductions that ...
if OFFLOAD_USING_OMP:
# with loop offloading will be detected by the
# dependencyAnalysis and raise TransformationErrors
loop_offloading_trans.apply(
loop, options={"independent": True})
gpu_region_trans.apply(loop.ancestor(Directive))
elif kernels_trans:
# if kernel offloading is available it should
# manage them
kernels_trans.apply(loop)
# Alternatively with could use loop parallelism with
# reduction clauses
print(f"Successfully offloaded loop with {kernel_names}")
except TransformationError as err:
print(f"Failed to offload loop because: {err}")
print(f"Failed to offload loop with {kernel_names} "
f"because: {err}")

# Apply OpenMP thread parallelism for any kernels we've not been able
# to offload to GPU.
Expand Down
3 changes: 2 additions & 1 deletion src/psyclone/f2pygen.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ class OMPDirective(Directive):
'''
def __init__(self, root, line, position, dir_type):
self._types = ["parallel do", "parallel", "do", "master", "single",
"taskloop", "taskwait", "declare", "target"]
"taskloop", "taskwait", "declare", "target", "teams",
"teams distribute parallel do"]
self._positions = ["begin", "end"]

super(OMPDirective, self).__init__(root, line, position, dir_type)
Expand Down
33 changes: 29 additions & 4 deletions src/psyclone/psyir/nodes/omp_directives.py
Original file line number Diff line number Diff line change
Expand Up @@ -1442,7 +1442,8 @@ def gen_code(self, parent):
for call in reprod_red_call_list:
call.reduction_sum_loop(parent)

self.gen_post_region_code(parent)
if not self.ancestor(OMPRegionDirective):
self.gen_post_region_code(parent)

def lower_to_language_level(self):
'''
Expand Down Expand Up @@ -2302,7 +2303,7 @@ def gen_code(self, parent):
# Add directive to the f2pygen tree
parent.add(
DirectiveGen(
parent, "omp", "begin", "parallel do", ", ".join(
parent, "omp", "begin", self.begin_string()[4:], " ".join(
text for text in [default_str, private_str, fprivate_str,
schedule_str, self._reduction_string()]
if text)))
Expand All @@ -2312,10 +2313,24 @@ def gen_code(self, parent):

# make sure the directive occurs straight after the loop body
position = parent.previous_loop()
parent.add(DirectiveGen(parent, *self.end_string().split()),

# DirectiveGen only accepts 3 terms, e.g. "omp end loop", so for longer
# directive e.g. "omp end teams distribute parallel do", we split them
# between arguments and content (which is an additional string appended
# at the end)
terms = self.end_string().split()
if len(terms) > 3:
arguments = terms[:3]
content = " ".join(terms[3:])
else:
arguments = terms
content = ""

parent.add(DirectiveGen(parent, *arguments, content=content),
position=["after", position])

self.gen_post_region_code(parent)
if not self.ancestor(OMPRegionDirective):
self.gen_post_region_code(parent)

def lower_to_language_level(self):
'''
Expand Down Expand Up @@ -2415,6 +2430,16 @@ def gen_code(self, parent):
# Generate the code for this Directive
parent.add(DirectiveGen(parent, "omp", "begin", "target"))

# Generate the code for all of this node's children
for child in self.dir_body:
child.gen_code(parent)

# Generate the end code for this node
parent.add(DirectiveGen(parent, "omp", "end", "target", ""))

if not self.ancestor(OMPRegionDirective):
self.gen_post_region_code(parent)


class OMPLoopDirective(OMPRegionDirective):
''' Class for the !$OMP LOOP directive that specifies that the iterations
Expand Down

0 comments on commit ae26a8b

Please sign in to comment.